├── .gitignore ├── LICENSE ├── README.md ├── control.clj ├── dledger-jepsen ├── pom.xml ├── src │ └── main │ │ └── java │ │ └── io │ │ └── openmessaging │ │ └── dledger │ │ └── jepsenclient │ │ └── JepsenSetClient.java ├── startup.sh ├── stop.sh └── stop_dropcaches.sh ├── docker ├── .gitignore ├── control │ ├── .gitignore │ ├── Dockerfile │ ├── bashrc │ └── init.sh ├── docker-compose.dev.yml ├── docker-compose.ubuntu.yml ├── docker-compose.yml ├── node │ ├── Dockerfile │ ├── Dockerfile-ubuntu │ └── run.sh ├── secret │ └── .gitkeep └── up.sh ├── nodes ├── project.clj ├── run_test.sh └── src └── dledger_jepsen_test └── core.clj /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | .classpath 4 | .project 5 | .settings/ 6 | *.log* 7 | *.iml 8 | .DS_Store 9 | nohup.out 10 | store/ 11 | *.tar.gz 12 | 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (properties) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dledger.jepsen 2 | 3 | A [jepsen](https://github.com/jepsen-io/jepsen) test for [dledger](https://github.com/openmessaging/openmessaging-storage-dledger). 4 | 5 | ## What is being tested? 6 | 7 | DLedger is a raft-based java library for building high-available, high-durable, strong-consistent commitlog. The test run concurrent operations to dledger from different nodes in a dledger cluster and checks that the operations preserve the consistency properties defined in the test. During the test, various nemesis can be added to interfere with the operations. 8 | 9 | Currently, checker is **Set**. Given a set of concurrent unique appends to dledger commitlog followed by a final read, verifies that every successfully appended element is present in the read, and that the read contains only elements for which an append was attempted. 10 | 11 | ## Usage 12 | 13 | 1. Prepare **one** control node and **five** db nodes and ensure that the control node can use SSH to log into a bunch of db nodes. 14 | 2. Install clojure, jepsen and [clojure-control](https://github.com/killme2008/clojure-control) on the control node. 15 | 3. Edit *nodes* , *control.clj* and *src/dledger_jepsen_test/core.clj* files to set hostname, user name and store path. Those values are hardcoded in the program by now. 16 | 4. Deploy the dledger server with clojure-control on the control node: 17 | ``` 18 | control run dledger build 19 | control run dledger deploy 20 | ``` 21 | 5. Run the test 22 | ``` 23 | lein run test --nodes-file ./nodes 24 | ``` 25 |    or execute `./run_test.sh` 26 | 27 | ## Quick Start (Docker) 28 | 29 | In one shell, we start the five nodes and the controller using docker compose. 30 | ```shell 31 | cd docker 32 | ./up.sh --dev 33 | ``` 34 | In another shell, use `docker exec -it chaos-control bash` to enter the controller, then 35 | 36 | ```shell 37 | control run dledger build 38 | control run dledger deploy 39 | ./run_test.sh 40 | ``` 41 | 42 | ### Options 43 | 44 | See `lein run test --help` for options. 45 | 46 | **nemesis** 47 | 48 | `--nemsis NAME`, what nemesis should we run? The default value is partition-random-halves. You can also run the following nemesis: 49 | 50 | - partition-random-node: isolates a single node from the rest of the network. 51 | 52 | ![enter image description here](http://assets.processon.com/chart_image/5d05fd1ce4b00d2a1ac788c7.png) 53 | 54 | - partition-random-halves: cuts the network into randomly chosen halves. 55 | 56 | ![enter image description here](http://assets.processon.com/chart_image/5d05fb65e4b0cbb88a5f1815.png) 57 | 58 | - kill-random-processes: kill random processes and restart them. 59 | 60 | ![enter image description here](http://assets.processon.com/chart_image/5d0c4523e4b0d4ba353ee2dd.png) 61 | 62 | - crash-random-nodes: crash random nodes and restart them (kill processes and drop caches). 63 | 64 | ![enter image description here](http://assets.processon.com/chart_image/5d05feafe4b08ceab31d121a.png) 65 | 66 | - hammer-time: pause random nodes with SIGSTOP/SIGCONT. 67 | 68 | ![enter image description here](http://assets.processon.com/chart_image/5d06012de4b091a8f244ba50.png) 69 | 70 | - bridge: a grudge which cuts the network in half, but preserves a node in the middle which has uninterrupted bidirectional connectivity to both components. 71 | 72 | ![enter image description here](http://assets.processon.com/chart_image/5d06033de4b0d4295989d335.png) 73 | 74 | - partition-majorities-ring: every node can see a majority, but no node sees the _same_ majority as any other. Randomly orders nodes into a ring. 75 | 76 | ![enter image description here](http://assets.processon.com/chart_image/5d0604d3e4b0591fc0e34259.png) 77 | 78 | **Other options:** 79 | 80 | `--rate HZ`, approximate number of requests per second, per thread, the default value is 10. 81 | 82 | `--concurrency NUMBER`, the number of workers (clients), the default value is 5. 83 | 84 | `--time-limit TIME`, test time limit, the default value is 60. 85 | 86 | `--interval TIME`, nemesis interval, the default value is 15. 87 | 88 | `--test-count TIMES`, times to run test, the default value is 1. 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /control.clj: -------------------------------------------------------------------------------- 1 | ; Licensed to the Apache Software Foundation (ASF) under one or more 2 | ; contributor license agreements. See the NOTICE file distributed with 3 | ; this work for additional information regarding copyright ownership. 4 | ; The ASF licenses this file to You under the Apache License, Version 2.0 5 | ; (the "License"); you may not use this file except in compliance with 6 | ; the License. You may obtain a copy of the License at 7 | ; 8 | ; http://www.apache.org/licenses/LICENSE-2.0 9 | ; 10 | ; Unless required by applicable law or agreed to in writing, software 11 | ; distributed under the License is distributed on an "AS IS" BASIS, 12 | ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | ; See the License for the specific language governing permissions and 14 | ; limitations under the License. 15 | 16 | (defcluster :dledger 17 | :clients [{:host "n1" :user "root"} 18 | {:host "n2" :user "root"} 19 | {:host "n3" :user "root"} 20 | {:host "n4" :user "root"} 21 | {:host "n5" :user "root"}]) 22 | 23 | (deftask :date "echo date on cluster" [] 24 | (ssh "date")) 25 | 26 | (deftask :build [] 27 | (local 28 | (run 29 | (cd "dledger-jepsen" 30 | (run "mvn clean install") 31 | (run "chmod a+x startup.sh") 32 | (run "chmod a+x stop.sh") 33 | (run "chmod a+x stop_dropcaches.sh") 34 | ))) 35 | (local (run "rm dledger-jepsen.tar.gz; tar zcvf dledger-jepsen.tar.gz dledger-jepsen/target/dledger-jepsen.jar dledger-jepsen/startup.sh dledger-jepsen/stop.sh dledger-jepsen/stop_dropcaches.sh"))) 36 | 37 | 38 | (deftask :deploy [] 39 | (scp "dledger-jepsen.tar.gz" "/root/") 40 | (ssh 41 | (run 42 | (cd "/root" 43 | (run "rm -rf dledger-jepsen/") 44 | (run "tar zxvf dledger-jepsen.tar.gz"))))) -------------------------------------------------------------------------------- /dledger-jepsen/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 23 | 4.0.0 24 | 25 | io.openmessaging 26 | dledger-jepsen 27 | 1.0.0-SNAPSHOT 28 | 29 | 30 | 31 | io.openmessaging.storage 32 | dledger 33 | 0.1 34 | 35 | 36 | 37 | 38 | UTF-8 39 | 1.8 40 | 1.8 41 | 42 | 43 | 44 | 45 | 46 | maven-assembly-plugin 47 | 2.2.1 48 | 49 | 50 | 51 | io.openmessaging.storage.dledger.cmdline.BossCommand 52 | 53 | 54 | dledger-jepsen 55 | false 56 | false 57 | 58 | jar-with-dependencies 59 | 60 | 61 | 62 | 63 | make-assembly 64 | package 65 | 66 | single 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /dledger-jepsen/src/main/java/io/openmessaging/dledger/jepsenclient/JepsenSetClient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package io.openmessaging.dledger.jepsenclient; 19 | 20 | import io.openmessaging.storage.dledger.ShutdownAbleThread; 21 | 22 | import io.openmessaging.storage.dledger.client.DLedgerClientRpcNettyService; 23 | import io.openmessaging.storage.dledger.client.DLedgerClientRpcService; 24 | 25 | import io.openmessaging.storage.dledger.entry.DLedgerEntry; 26 | import io.openmessaging.storage.dledger.protocol.*; 27 | import io.openmessaging.storage.dledger.utils.DLedgerUtils; 28 | import org.apache.rocketmq.remoting.exception.RemotingConnectException; 29 | import org.apache.rocketmq.remoting.exception.RemotingSendRequestException; 30 | import org.apache.rocketmq.remoting.exception.RemotingTimeoutException; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import java.util.Map; 35 | import java.util.concurrent.CompletableFuture; 36 | import java.util.concurrent.ConcurrentHashMap; 37 | import java.util.concurrent.TimeUnit; 38 | 39 | public class JepsenSetClient { 40 | 41 | private static Logger logger = LoggerFactory.getLogger(JepsenSetClient.class); 42 | private final Map peerMap = new ConcurrentHashMap<>(); 43 | private final String group; 44 | private volatile String leaderId; 45 | private DLedgerClientRpcService dLedgerClientRpcService; 46 | private static int CLIENT_TIMEOUT_CODE = 1001; 47 | private static int CLIENT_CONNECT_REFUSE_CODE = 1002; 48 | private static int CLIENT_SENDREQ_FAIL_CODE = 1003; 49 | 50 | private MetadataUpdater metadataUpdater = new MetadataUpdater("MetadataUpdater", logger); 51 | 52 | public JepsenSetClient(String group, String peers) { 53 | this.group = group; 54 | updatePeers(peers); 55 | dLedgerClientRpcService = new DLedgerClientRpcNettyService(); 56 | dLedgerClientRpcService.updatePeers(peers); 57 | leaderId = peerMap.keySet().iterator().next(); 58 | } 59 | 60 | public int add(String value) { 61 | AppendEntryResponse response = append(value.getBytes()); 62 | return response.getCode(); 63 | } 64 | 65 | public String read() { 66 | // Construct a string of clojure set 67 | int index = 0; 68 | StringBuilder res = new StringBuilder("#{"); 69 | while (true) { 70 | GetEntriesResponse response = get(index); 71 | if (response.getEntries() != null && response.getEntries().size() > 0) { 72 | for (DLedgerEntry entry : response.getEntries()) { 73 | res.append(new String(entry.getBody())); 74 | res.append(","); 75 | } 76 | } else { 77 | break; 78 | } 79 | index++; 80 | } 81 | res.replace(res.length() - 1, res.length(), "}"); 82 | return res.toString(); 83 | } 84 | 85 | public AppendEntryResponse append(byte[] body) { 86 | try { 87 | waitOnUpdatingMetadata(1500, false); 88 | if (leaderId == null) { 89 | AppendEntryResponse appendEntryResponse = new AppendEntryResponse(); 90 | appendEntryResponse.setCode(DLedgerResponseCode.METADATA_ERROR.getCode()); 91 | return appendEntryResponse; 92 | } 93 | AppendEntryRequest appendEntryRequest = new AppendEntryRequest(); 94 | appendEntryRequest.setGroup(group); 95 | appendEntryRequest.setRemoteId(leaderId); 96 | appendEntryRequest.setBody(body); 97 | AppendEntryResponse response = dLedgerClientRpcService.append(appendEntryRequest).get(); 98 | if (response.getCode() == DLedgerResponseCode.NOT_LEADER.getCode()) { 99 | waitOnUpdatingMetadata(1500, true); 100 | if (leaderId != null) { 101 | appendEntryRequest.setRemoteId(leaderId); 102 | response = dLedgerClientRpcService.append(appendEntryRequest).get(); 103 | } 104 | } 105 | return response; 106 | } catch (RemotingTimeoutException e) { 107 | needFreshMetadata(); 108 | AppendEntryResponse appendEntryResponse = new AppendEntryResponse(); 109 | appendEntryResponse.setCode(CLIENT_TIMEOUT_CODE); 110 | return appendEntryResponse; 111 | } catch (RemotingConnectException e) { 112 | needFreshMetadata(); 113 | AppendEntryResponse appendEntryResponse = new AppendEntryResponse(); 114 | appendEntryResponse.setCode(CLIENT_CONNECT_REFUSE_CODE); 115 | return appendEntryResponse; 116 | } catch (RemotingSendRequestException e) { 117 | needFreshMetadata(); 118 | AppendEntryResponse appendEntryResponse = new AppendEntryResponse(); 119 | appendEntryResponse.setCode(CLIENT_SENDREQ_FAIL_CODE); 120 | return appendEntryResponse; 121 | } catch (Exception e) { 122 | needFreshMetadata(); 123 | logger.error("{}", e); 124 | AppendEntryResponse appendEntryResponse = new AppendEntryResponse(); 125 | appendEntryResponse.setCode(DLedgerResponseCode.INTERNAL_ERROR.getCode()); 126 | return appendEntryResponse; 127 | } 128 | } 129 | 130 | public GetEntriesResponse get(long index) { 131 | try { 132 | waitOnUpdatingMetadata(1500, false); 133 | if (leaderId == null) { 134 | GetEntriesResponse response = new GetEntriesResponse(); 135 | response.setCode(DLedgerResponseCode.METADATA_ERROR.getCode()); 136 | return response; 137 | } 138 | 139 | GetEntriesRequest request = new GetEntriesRequest(); 140 | request.setGroup(group); 141 | request.setRemoteId(leaderId); 142 | request.setBeginIndex(index); 143 | GetEntriesResponse response = dLedgerClientRpcService.get(request).get(); 144 | if (response.getCode() == DLedgerResponseCode.NOT_LEADER.getCode()) { 145 | waitOnUpdatingMetadata(1500, true); 146 | if (leaderId != null) { 147 | request.setRemoteId(leaderId); 148 | response = dLedgerClientRpcService.get(request).get(); 149 | } 150 | } 151 | return response; 152 | } catch (Exception t) { 153 | needFreshMetadata(); 154 | logger.error("", t); 155 | GetEntriesResponse getEntriesResponse = new GetEntriesResponse(); 156 | getEntriesResponse.setCode(DLedgerResponseCode.INTERNAL_ERROR.getCode()); 157 | return getEntriesResponse; 158 | } 159 | } 160 | 161 | public void startup() { 162 | this.dLedgerClientRpcService.startup(); 163 | this.metadataUpdater.start(); 164 | } 165 | 166 | public void shutdown() { 167 | this.dLedgerClientRpcService.shutdown(); 168 | this.metadataUpdater.shutdown(); 169 | } 170 | 171 | private void updatePeers(String peers) { 172 | for (String peerInfo : peers.split(";")) { 173 | peerMap.put(peerInfo.split("-")[0], peerInfo.split("-")[1]); 174 | } 175 | } 176 | 177 | private synchronized void needFreshMetadata() { 178 | leaderId = null; 179 | metadataUpdater.wakeup(); 180 | } 181 | 182 | private synchronized void waitOnUpdatingMetadata(long maxWaitMs, boolean needFresh) { 183 | if (needFresh) { 184 | leaderId = null; 185 | } else if (leaderId != null) { 186 | return; 187 | } 188 | long start = System.currentTimeMillis(); 189 | while (DLedgerUtils.elapsed(start) < maxWaitMs && leaderId == null) { 190 | metadataUpdater.wakeup(); 191 | try { 192 | wait(1000); 193 | } catch (InterruptedException e) { 194 | break; 195 | } 196 | } 197 | } 198 | 199 | private class MetadataUpdater extends ShutdownAbleThread { 200 | 201 | public MetadataUpdater(String name, Logger logger) { 202 | super(name, logger); 203 | } 204 | 205 | private void getMetadata(String peerId, boolean isLeader) { 206 | try { 207 | MetadataRequest request = new MetadataRequest(); 208 | request.setGroup(group); 209 | request.setRemoteId(peerId); 210 | CompletableFuture future = dLedgerClientRpcService.metadata(request); 211 | MetadataResponse response = future.get(1500, TimeUnit.MILLISECONDS); 212 | if (response.getLeaderId() != null) { 213 | leaderId = response.getLeaderId(); 214 | if (response.getPeers() != null) { 215 | peerMap.putAll(response.getPeers()); 216 | dLedgerClientRpcService.updatePeers(response.getPeers()); 217 | } 218 | } 219 | } catch (Throwable t) { 220 | if (isLeader) { 221 | needFreshMetadata(); 222 | } 223 | logger.warn("Get metadata failed from {}", peerId, t); 224 | } 225 | } 226 | 227 | @Override 228 | public void doWork() { 229 | try { 230 | if (leaderId == null) { 231 | for (String peer : peerMap.keySet()) { 232 | getMetadata(peer, false); 233 | if (leaderId != null) { 234 | synchronized (JepsenSetClient.this) { 235 | JepsenSetClient.this.notifyAll(); 236 | } 237 | DLedgerUtils.sleep(1000); 238 | break; 239 | } 240 | } 241 | } else { 242 | getMetadata(leaderId, true); 243 | } 244 | waitForRunning(3000); 245 | } catch (Throwable t) { 246 | logger.error("Error", t); 247 | DLedgerUtils.sleep(1000); 248 | } 249 | } 250 | } 251 | 252 | } 253 | 254 | -------------------------------------------------------------------------------- /dledger-jepsen/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | source /etc/profile 19 | source ~/.bashrc 20 | nohup java -jar target/dledger-jepsen.jar server $@ >> dledger.log 2>&1 & -------------------------------------------------------------------------------- /dledger-jepsen/stop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | pid=`ps -ef |grep dledger-jepsen |grep java |awk -F' ' '{print $2}'` 19 | if [ "$pid" != "" ] 20 | then 21 | echo "kill $pid" 22 | kill $pid 23 | fi -------------------------------------------------------------------------------- /dledger-jepsen/stop_dropcaches.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | pid=`ps -ef |grep dledger-jepsen |grep java |awk -F' ' '{print $2}'` 19 | if [ "$pid" != "" ] 20 | then 21 | echo "kill $pid" 22 | kill $pid 23 | fi 24 | 25 | # To free pagecache, dentries and inodes 26 | echo 3 >/proc/sys/vm/drop_caches -------------------------------------------------------------------------------- /docker/.gitignore: -------------------------------------------------------------------------------- 1 | secret/* 2 | !.gitkeep 3 | 4 | -------------------------------------------------------------------------------- /docker/control/.gitignore: -------------------------------------------------------------------------------- 1 | jepsen -------------------------------------------------------------------------------- /docker/control/Dockerfile: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | FROM ubuntu:16.04 19 | MAINTAINER jake@apache.org 20 | 21 | ENV LEIN_ROOT true 22 | 23 | # 24 | # Jepsen dependencies 25 | # 26 | RUN apt-get -y -q update && \ 27 | apt-get -y -q install software-properties-common && \ 28 | add-apt-repository ppa:openjdk-r/ppa && \ 29 | apt-get -y -q update && \ 30 | apt-get install -qqy \ 31 | dos2unix \ 32 | openjdk-8-jdk \ 33 | libjna-java \ 34 | git \ 35 | gnuplot \ 36 | wget \ 37 | vim # not required by jepsen itself, just for ease of use 38 | 39 | RUN wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein && \ 40 | mv lein /usr/bin && \ 41 | chmod +x /usr/bin/lein && \ 42 | lein self-install 43 | 44 | # without --dev flag up.sh copies jepsen to these subfolders 45 | # with --dev flag they are empty until mounted 46 | COPY jepsen/jepsen /jepsen/jepsen/ 47 | RUN if [ -f /jepsen/jepsen/project.clj ]; then cd /jepsen/jepsen && lein install; fi 48 | COPY jepsen /jepsen/ 49 | 50 | ADD ./bashrc /root/.bashrc 51 | ADD ./init.sh /init.sh 52 | RUN dos2unix /init.sh /root/.bashrc \ 53 | && chmod +x /init.sh 54 | 55 | RUN wget https://raw.githubusercontent.com/killme2008/clojure-control/master/bin/control && \ 56 | mv control /usr/bin && \ 57 | chmod +x /usr/bin/control && \ 58 | control self-install && \ 59 | apt-get -y install maven 60 | 61 | ENV CONTROL_ROOT=true 62 | 63 | 64 | CMD /init.sh 65 | -------------------------------------------------------------------------------- /docker/control/bashrc: -------------------------------------------------------------------------------- 1 | eval $(ssh-agent) &> /dev/null 2 | ssh-add /root/.ssh/id_rsa &> /dev/null 3 | 4 | cat < ~/.ssh/id_rsa 24 | chmod 600 ~/.ssh/id_rsa 25 | echo $SSH_PUBLIC_KEY > ~/.ssh/id_rsa.pub 26 | echo > ~/.ssh/known_hosts 27 | for f in $(seq 1 5);do 28 | ssh-keyscan -t rsa n$f >> ~/.ssh/known_hosts 29 | done 30 | fi 31 | 32 | # TODO: assert that SSH_PRIVATE_KEY==~/.ssh/id_rsa 33 | 34 | cat < Found authorized keys" 21 | mkdir -p /root/.ssh 22 | chmod 700 /root/.ssh 23 | touch /root/.ssh/authorized_keys 24 | chmod 600 /root/.ssh/authorized_keys 25 | IFS=$'\n' 26 | arr=$(echo ${AUTHORIZED_KEYS} | tr "," "\n") 27 | for x in $arr 28 | do 29 | x=$(echo $x |sed -e 's/^ *//' -e 's/ *$//') 30 | cat /root/.ssh/authorized_keys | grep "$x" >/dev/null 2>&1 31 | if [ $? -ne 0 ]; then 32 | echo "=> Adding public key to /root/.ssh/authorized_keys: $x" 33 | echo "$x" >> /root/.ssh/authorized_keys 34 | fi 35 | done 36 | fi 37 | 38 | exec /usr/sbin/sshd -D 39 | -------------------------------------------------------------------------------- /docker/secret/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openmessaging/openmessaging-dledger-jepsen/3ab87a0651d775e6bf45510e2860396fa779d2a2/docker/secret/.gitkeep -------------------------------------------------------------------------------- /docker/up.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # "To provide additional docker-compose args, set the COMPOSE var. Ex: 19 | # COMPOSE="-f FILE_PATH_HERE" 20 | 21 | set -e # exit on an error 22 | 23 | ERROR(){ 24 | /bin/echo -e "\e[101m\e[97m[ERROR]\e[49m\e[39m $@" 25 | } 26 | 27 | WARNING(){ 28 | /bin/echo -e "\e[101m\e[97m[WARNING]\e[49m\e[39m $@" 29 | } 30 | 31 | INFO(){ 32 | /bin/echo -e "\e[104m\e[97m[INFO]\e[49m\e[39m $@" 33 | } 34 | 35 | exists() { 36 | type $1 > /dev/null 2>&1 37 | } 38 | 39 | POSITIONAL=() 40 | while [[ $# -gt 0 ]] 41 | do 42 | key="$1" 43 | 44 | case $key in 45 | --help) 46 | HELP=1 47 | shift # past argument 48 | ;; 49 | --init-only) 50 | INIT_ONLY=1 51 | shift # past argument 52 | ;; 53 | --dev) 54 | if [ ! "$JEPSEN_ROOT" ]; then 55 | export JEPSEN_ROOT=$(cd ../ && pwd) 56 | INFO "JEPSEN_ROOT is not set, defaulting to: $JEPSEN_ROOT" 57 | fi 58 | INFO "Running docker-compose with dev config" 59 | DEV="-f docker-compose.dev.yml" 60 | shift # past argument 61 | ;; 62 | --compose) 63 | COMPOSE="-f $2" 64 | shift # past argument 65 | shift # past value 66 | ;; 67 | -d|--daemon) 68 | INFO "Running docker-compose as daemon" 69 | RUN_AS_DAEMON=1 70 | shift # past argument 71 | ;; 72 | *) 73 | POSITIONAL+=("$1") 74 | ERROR "unknown option $1" 75 | shift # past argument 76 | ;; 77 | esac 78 | done 79 | set -- "${POSITIONAL[@]}" # restore positional parameters 80 | 81 | if [ "$HELP" ]; then 82 | echo "Usage: $0 [OPTION]" 83 | echo " --help Display this message" 84 | echo " --init-only Initializes ssh-keys, but does not call docker-compose" 85 | echo " --daemon Runs docker-compose in the background" 86 | echo " --dev Mounts dir at host's JEPSEN_ROOT to /jepsen on jepsen-control container, syncing files for development" 87 | echo " --compose PATH Path to an additional docker-compose yml config." 88 | echo "To provide multiple additional docker-compose args, set the COMPOSE var directly, with the -f flag. Ex: COMPOSE=\"-f FILE_PATH_HERE -f ANOTHER_PATH\" ./up.sh --dev" 89 | exit 0 90 | fi 91 | 92 | exists ssh-keygen || { ERROR "Please install ssh-keygen (apt-get install openssh-client)"; exit 1; } 93 | exists perl || { ERROR "Please install perl (apt-get install perl)"; exit 1; } 94 | 95 | # Generate SSH keys for the control node 96 | if [ ! -f ./secret/node.env ]; then 97 | INFO "Generating key pair" 98 | ssh-keygen -t rsa -N "" -f ./secret/id_rsa 99 | 100 | INFO "Generating ./secret/control.env" 101 | echo "# generated by jepsen/docker/up.sh, parsed by jepsen/docker/control/bashrc" > ./secret/control.env 102 | echo "# NOTE: \\n is expressed as ↩" >> ./secret/control.env 103 | echo SSH_PRIVATE_KEY="$(cat ./secret/id_rsa | perl -p -e "s/\n/↩/g")" >> ./secret/control.env 104 | echo SSH_PUBLIC_KEY=$(cat ./secret/id_rsa.pub) >> ./secret/control.env 105 | 106 | INFO "Generating ./secret/node.env" 107 | echo "# generated by jepsen/docker/up.sh, parsed by the \"tutum/debian\" docker image entrypoint script" > ./secret/node.env 108 | echo ROOT_PASS=root >> ./secret/node.env 109 | echo AUTHORIZED_KEYS=$(cat ./secret/id_rsa.pub) >> ./secret/node.env 110 | else 111 | INFO "No need to generate key pair" 112 | fi 113 | 114 | # Make sure folders referenced in control Dockerfile exist and don't contain leftover files 115 | rm -rf ./control/jepsen 116 | mkdir -p ./control/jepsen/jepsen 117 | # Copy the jepsen directory if we're not mounting the JEPSEN_ROOT 118 | if [ ! "$DEV" ]; then 119 | # Dockerfile does not allow `ADD ..`. So we need to copy it here in setup. 120 | INFO "Copying .. to control/jepsen" 121 | ( 122 | (cd ..; tar --exclude=./docker --exclude=./.git --exclude-ignore=.gitignore -cf - .) | tar Cxf ./control/jepsen - 123 | ) 124 | fi 125 | 126 | if [ "$INIT_ONLY" ]; then 127 | exit 0 128 | fi 129 | 130 | exists docker || { ERROR "Please install docker (https://docs.docker.com/engine/installation/)"; exit 1; } 131 | exists docker-compose || { ERROR "Please install docker-compose (https://docs.docker.com/compose/install/)"; exit 1; } 132 | 133 | INFO "Running \`docker-compose build\`" 134 | docker-compose -f docker-compose.yml $COMPOSE $DEV build 135 | 136 | INFO "Running \`docker-compose up\`" 137 | if [ "$RUN_AS_DAEMON" ]; then 138 | docker-compose -f docker-compose.yml $COMPOSE $DEV up -d 139 | INFO "All containers started, run \`docker ps\` to view" 140 | exit 0 141 | else 142 | INFO "Please run \`docker exec -it jepsen-control bash\` in another terminal to proceed" 143 | docker-compose -f docker-compose.yml $COMPOSE $DEV up 144 | fi 145 | -------------------------------------------------------------------------------- /nodes: -------------------------------------------------------------------------------- 1 | n1 2 | n2 3 | n3 4 | n4 5 | n5 6 | 7 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | ; Licensed to the Apache Software Foundation (ASF) under one or more 2 | ; contributor license agreements. See the NOTICE file distributed with 3 | ; this work for additional information regarding copyright ownership. 4 | ; The ASF licenses this file to You under the Apache License, Version 2.0 5 | ; (the "License"); you may not use this file except in compliance with 6 | ; the License. You may obtain a copy of the License at 7 | ; 8 | ; http://www.apache.org/licenses/LICENSE-2.0 9 | ; 10 | ; Unless required by applicable law or agreed to in writing, software 11 | ; distributed under the License is distributed on an "AS IS" BASIS, 12 | ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | ; See the License for the specific language governing permissions and 14 | ; limitations under the License. 15 | 16 | (defproject openmessaging-dledger-jepsen "0.1.0-SNAPSHOT" 17 | :description "A jepsen test for dledger" 18 | :url "https://github.com/openmessaging/openmessaging-dledger-jepsen" 19 | :license {:name "Apache License 2.0"} 20 | :main dledger-jepsen-test.core 21 | :dependencies [ 22 | [org.clojure/clojure "1.10.0"] 23 | [jepsen "0.1.15-SNAPSHOT"] 24 | [io.openmessaging/dledger-jepsen "1.0.0-SNAPSHOT"] 25 | ]) 26 | -------------------------------------------------------------------------------- /run_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | lein run test --nodes-file ./nodes $@ 19 | -------------------------------------------------------------------------------- /src/dledger_jepsen_test/core.clj: -------------------------------------------------------------------------------- 1 | ; Licensed to the Apache Software Foundation (ASF) under one or more 2 | ; contributor license agreements. See the NOTICE file distributed with 3 | ; this work for additional information regarding copyright ownership. 4 | ; The ASF licenses this file to You under the Apache License, Version 2.0 5 | ; (the "License"); you may not use this file except in compliance with 6 | ; the License. You may obtain a copy of the License at 7 | ; 8 | ; http://www.apache.org/licenses/LICENSE-2.0 9 | ; 10 | ; Unless required by applicable law or agreed to in writing, software 11 | ; distributed under the License is distributed on an "AS IS" BASIS, 12 | ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | ; See the License for the specific language governing permissions and 14 | ; limitations under the License. 15 | 16 | (ns dledger-jepsen-test.core 17 | (:require [clojure.tools.logging :refer :all] 18 | [clojure.string :as cstr] 19 | [jepsen [cli :as cli] 20 | [control :as c] 21 | [db :as db] 22 | [tests :as tests] 23 | [checker :as checker] 24 | [client :as client] 25 | [generator :as gen] 26 | [nemesis :as nemesis]] 27 | [jepsen.checker.timeline :as timeline] 28 | [jepsen.control.util :as cu] 29 | [jepsen.os :as os]) 30 | (:import [io.openmessaging.dledger.jepsenclient JepsenSetClient])) 31 | 32 | (defonce dledger-path "/root/dledger-jepsen") 33 | (defonce dledger-port 20911) 34 | (defonce dledger-bin "java") 35 | (defonce dledger-start "startup.sh") 36 | (defonce dledger-stop "stop.sh") 37 | (defonce dledger-stop-dropcaches "stop_dropcaches.sh") 38 | (defonce dledger-data-path "/tmp/dledgerstore") 39 | (defonce dledger-log-path "logs/dledger") 40 | 41 | (defn peer-id [node] 42 | (str node)) 43 | 44 | (defn peer-str [node] 45 | (str (peer-id node) "-" node ":" dledger-port)) 46 | 47 | (defn peers 48 | "Constructs an initial cluster string for a test, like 49 | \"n0-host1:20911;n1-host2:20911,...\"" 50 | [test] 51 | (->> (:nodes test) 52 | (map (fn [node] 53 | (peer-str node))) 54 | (cstr/join ";"))) 55 | 56 | (defn start! [test node] 57 | (info "Start DLedgerServer" node) 58 | (c/cd dledger-path 59 | (c/exec :sh 60 | dledger-start 61 | "--group jepsen" 62 | "--id" 63 | (peer-id node) 64 | "--peers" 65 | (peers test)))) 66 | 67 | (defn stop! [node] 68 | (info "Stop DLedgerServer" node) 69 | (c/cd dledger-path 70 | (c/exec :sh 71 | dledger-stop))) 72 | 73 | (defn stop_dropcaches! [node] 74 | (info "Stop DLedgerServer and drop caches" node) 75 | (c/cd dledger-path 76 | (c/exec :sh 77 | dledger-stop))) 78 | 79 | (defn- create-client [test] 80 | (doto (JepsenSetClient. "jepsen" (peers test)) 81 | (.startup))) 82 | 83 | (defn- start-client [client] 84 | (-> client 85 | :conn 86 | (.startup))) 87 | 88 | (defn- shutdown-client [client] 89 | (-> client 90 | :conn 91 | (.shutdown))) 92 | 93 | 94 | (defn- add 95 | "add element to dledger" 96 | [client value] 97 | (-> client 98 | :conn 99 | (.add (pr-str value)))) 100 | 101 | (defn- read-all 102 | "read set from dledger" 103 | [client] 104 | (-> client 105 | :conn 106 | (.read))) 107 | 108 | 109 | (defn db 110 | "DLedger db." 111 | [] 112 | (reify db/DB 113 | (setup! [_ test node] 114 | (start! test node) 115 | (Thread/sleep 20000) 116 | ) 117 | 118 | (teardown! [_ test node] 119 | (stop! node) 120 | (Thread/sleep 20000) 121 | (c/exec :rm 122 | :-rf 123 | dledger-data-path)))) 124 | 125 | (defrecord Client [conn] 126 | client/Client 127 | (open! [this test node] 128 | (-> this 129 | (assoc :node node) 130 | (assoc :conn (create-client test)))) 131 | 132 | (setup! [this test]) 133 | 134 | (invoke! [this test op] 135 | (try 136 | (case (:f op) 137 | :add (let [res,(add this (:value op))] 138 | (cond 139 | (= res 200) (assoc op :type :ok) 140 | (= res 502) (assoc op :type :info :error "wait qurom ack timeout") 141 | (= res 1001) (assoc op :type :info :error "client append timeout") 142 | (= res 1002) (assoc op :type :fail :error "client connect refuse") 143 | (= res 1003) (assoc op :type :fail :error "client send req fail") 144 | :else (assoc op :type :fail :error (str "error code: " res)))) 145 | 146 | :read (assoc op 147 | :type :ok 148 | :value (read-string (read-all this))) 149 | ) 150 | 151 | (catch Exception e 152 | (assoc op :type :info :error e) 153 | ))) 154 | 155 | (teardown! [this test]) 156 | 157 | (close! [this test] 158 | (shutdown-client this))) 159 | 160 | (defn mostly-small-nonempty-subset 161 | "Returns a subset of the given collection, with a logarithmically decreasing 162 | probability of selecting more elements. Always selects at least one element. 163 | 164 | (->> #(mostly-small-nonempty-subset [1 2 3 4 5]) 165 | repeatedly 166 | (map count) 167 | (take 10000) 168 | frequencies 169 | sort) 170 | ; => ([1 3824] [2 2340] [3 1595] [4 1266] [5 975])" 171 | [xs] 172 | (-> xs 173 | count 174 | inc 175 | Math/log 176 | rand 177 | Math/exp 178 | long 179 | (take (shuffle xs)))) 180 | 181 | (def crash-random-nodes 182 | "A nemesis that crashes a random subset of nodes." 183 | (nemesis/node-start-stopper 184 | mostly-small-nonempty-subset 185 | (fn start [test node] 186 | (info "Crash start" node) 187 | (stop_dropcaches! node) 188 | [:killed node]) 189 | (fn stop [test node] 190 | (info "Crash stop" node) 191 | (start! test node) 192 | [:restarted node]))) 193 | 194 | (def kill-random-processes 195 | "A nemesis that kills a random subset of processes." 196 | (nemesis/node-start-stopper 197 | mostly-small-nonempty-subset 198 | (fn start [test node] 199 | (info "Kill start" node) 200 | (stop! node) 201 | [:killed node]) 202 | (fn stop [test node] 203 | (info "Kill stop" node) 204 | (start! test node) 205 | [:restarted node]))) 206 | 207 | (def nemesis-map 208 | "A map of nemesis names to functions that construct nemesis, given opts." 209 | {"partition-random-halves" (nemesis/partition-random-halves) 210 | "partition-random-node" (nemesis/partition-random-node) 211 | "kill-random-processes" kill-random-processes 212 | "crash-random-nodes" crash-random-nodes 213 | "hammer-time" (nemesis/hammer-time dledger-bin) 214 | "bridge" (nemesis/partitioner (comp nemesis/bridge shuffle)) 215 | "partition-majorities-ring" (nemesis/partition-majorities-ring)}) 216 | 217 | (defn- parse-int [s] 218 | (Integer/parseInt s)) 219 | 220 | (def cli-opts 221 | "Additional command line options." 222 | [["-r" "--rate HZ" "Approximate number of requests per second, per thread." 223 | :default 10 224 | :parse-fn read-string 225 | :validate [#(and (number? %) (pos? %)) "Must be a positive number"]] 226 | [nil "--nemesis NAME" "What nemesis should we run?" 227 | :default "partition-random-halves" 228 | :validate [nemesis-map (cli/one-of nemesis-map)]] 229 | ["-i" "--interval TIME" "How long is the nemesis interval?" 230 | :default 15 231 | :parse-fn parse-int 232 | :validate [#(and (number? %) (pos? %)) "Must be a positive number"]]] 233 | ) 234 | 235 | 236 | (defn dledger-test 237 | "Given an options map from the command line runner (e.g. :nodes, :ssh, 238 | :concurrency ...), constructs a test map." 239 | [opts] 240 | (let [nemesis (get nemesis-map (:nemesis opts))] 241 | (merge tests/noop-test 242 | opts 243 | {:name "dledger" 244 | :os os/noop 245 | :db (db) 246 | :client (Client. nil) 247 | :nemesis nemesis 248 | :checker (checker/compose 249 | {:perf (checker/perf) 250 | :set (checker/set) 251 | :timeline (timeline/html)}) 252 | :generator (gen/phases 253 | (->> (range) 254 | (map (fn [x] {:type :invoke, :f :add, :value x})) 255 | (gen/seq) 256 | (gen/stagger (/ (:rate opts))) 257 | (gen/nemesis 258 | (gen/seq(cycle [(gen/sleep (:interval opts)) 259 | {:type :info, :f :start} 260 | (gen/sleep (:interval opts)) 261 | {:type :info, :f :stop}]))) 262 | (gen/time-limit (:time-limit opts))) 263 | (gen/log "Healing cluster") 264 | (gen/nemesis (gen/once {:type :info, :f :stop})) 265 | (gen/log "Waiting for recovery") 266 | (gen/sleep 10) 267 | (gen/clients (gen/once {:type :invoke, :f :read, :value nil})))}))) 268 | 269 | (defn -main 270 | "Handles command line arguments. Can either run a test, or a web server for 271 | browsing results." 272 | [& args] 273 | (cli/run! (merge (cli/single-test-cmd {:test-fn dledger-test 274 | :opt-spec cli-opts}) 275 | (cli/serve-cmd)) 276 | args)) 277 | 278 | 279 | --------------------------------------------------------------------------------