├── .github └── workflows │ ├── regressions.yaml │ └── simulation.yaml ├── .gitignore ├── LICENSE ├── README.md ├── cmd ├── example │ ├── main.go │ ├── repl.go │ └── server.go └── simulator │ └── main.go ├── go.mod ├── internal └── simulator │ ├── array │ └── rand.go │ ├── constant │ └── constant.go │ ├── network.go │ └── simulator.go ├── pkg ├── assert │ ├── assert.go │ └── assert_release.go ├── client │ └── client.go ├── events │ ├── events.go │ └── events_test.go ├── ipv4port │ └── ipv4port.go ├── log │ └── log.go ├── network │ ├── network.go │ └── tcp.go ├── queue │ └── queue.go ├── replica │ └── replica.go ├── time │ ├── real.go │ ├── time.go │ ├── timer.go │ └── virtual.go └── utils │ ├── pair.go │ └── rng.go ├── records └── regression_seeds └── scripts └── regression.sh /.github/workflows/regressions.yaml: -------------------------------------------------------------------------------- 1 | name: Simulation Regression Test (Master) 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | push: 8 | branches: 9 | - master 10 | workflow_dispatch: 11 | 12 | jobs: 13 | simualation_test: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v2 18 | - name: Set up Go 19 | uses: actions/setup-go@v4 20 | with: 21 | go-version: '1.21.3' 22 | - name: Run simulation on previous issue seeds 23 | run: bash ./scripts/regression.sh -------------------------------------------------------------------------------- /.github/workflows/simulation.yaml: -------------------------------------------------------------------------------- 1 | name: Continuous Simulation 2 | 3 | on: 4 | schedule: 5 | # * is a special character in YAML so you have to quote this string 6 | - cron: '* * * * *' 7 | workflow_dispatch: 8 | 9 | jobs: 10 | simualation_test: 11 | runs-on: ubuntu-latest 12 | permissions: 13 | issues: write 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | - name: Set up Go 18 | uses: actions/setup-go@v4 19 | with: 20 | go-version: '1.21.3' 21 | - name: Run simulation 22 | id: run_simulation 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | run: | 26 | set +e 27 | export CPU_PROFILE=0 28 | export HEAP_PROFILE=0 29 | export REPLICA_DEBUG=-1 30 | export CLIENT_DEBUG=-1 31 | export SIMULATOR_DEBUG=0 32 | SEED=$(python3 -c 'import random; rng = random.SystemRandom(); print(rng.randint(0, 2**64-1))') 33 | timeout 3h go run ./cmd/simulator ${SEED} 34 | exit_code=$? 35 | if [ $exit_code -eq 0 ]; then 36 | echo "Simulation finished successfully" 37 | else 38 | issue="" 39 | if [ $exit_code -eq 124 ]; then 40 | echo "Simulation timed out for seed ${SEED}" 41 | issue="Simulation timed out for seed \`${SEED}\`" 42 | else 43 | echo "Simulation failed for seed ${SEED}" 44 | issue="Simulation failed for seed \`${SEED}\`" 45 | fi 46 | curl -L \ 47 | -X POST \ 48 | -H "Accept: application/vnd.github+json" \ 49 | -H "Authorization: Bearer ${GITHUB_TOKEN}" \ 50 | -H "X-GitHub-Api-Version: 2022-11-28" \ 51 | https://api.github.com/repos/tangledbytes/go-vsr/issues \ 52 | -d "{\"title\":\"${issue}\",\"labels\":[\"auto-simulation\"]}" 53 | exit 1 54 | fi 55 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pprof 2 | *.log -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2023 Utkarsh Srivastava 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-vsr 2 | 3 | go-vsr is the implementation of ViewStamped Replication[^1] in Golang. The goal here is to NOT build a production ready package rather is to simply play around with the protocol. As of now the it implements: 4 | 1. Normal Operation 5 | 2. View Change 6 | 3. State Transfer 7 | 8 | Recovery is not yet supported. 9 | 10 | [^1]: https://pmg.csail.mit.edu/papers/vr-revisited.pdf 11 | 12 | ## Simulator 13 | The simulator in the repository is still in works as is the VSR implmentation. The simulator is a deterministic simulator, i.e. for as long as the given seed is the same, you should be able to generate the same scenarios on repeat. At the moment, the simulator is in working condition and can do the following: 14 | 1. Take a seed and create a cluster of random numbers of replicas + random number of clients. 15 | 2. Simulates packet drops 16 | 3. Simulates unordered deliveries 17 | 4. Simulates network delays 18 | 5. Can do some sanity tests in the middle of the simulation as simulator drives the entire cluster and clients as well as has complete control over the passage of time. 19 | 6. Performs a several assertions at the end of the simulation. 20 | 21 | ### How to run? 22 | Running the simulator is easy, you just need Go installed on the system. 23 | ```console 24 | $ go run ./cmd/simulator 25 | ``` 26 | 27 | If a seed is given then that seed will be used or else a seed will be generated and will be printed. 28 | 29 | ## Example 30 | There is a simple client (REPL) server example in the repository. It can mimic a distributed in-memory KV store. 31 | 32 | ### How to run the example? 33 | ```console 34 | $ #export DEBUG=1 will enable debug logs 35 | $ go run ./cmd/example server -members "0.0.0.0:10000,0.0.0.0:10001,0.0.0.0:10002" -id 0 -port 10000 36 | ``` 37 | ```console 38 | $ #export DEBUG=1 will enable debug logs 39 | $ go run ./cmd/example server -members "0.0.0.0:10000,0.0.0.0:10001,0.0.0.0:10002" -id 1 -port 10001 40 | ``` 41 | ```console 42 | $ #export DEBUG=1 will enable debug logs 43 | $ go run ./cmd/example server -members "0.0.0.0:10000,0.0.0.0:10001,0.0.0.0:10002" -id 2 -port 10002 44 | ``` 45 | 46 | The above will start 3 replicas on port 10000, 10001, 10002. 47 | 48 | ```console 49 | $ #export DEBUG=1 will enable debug logs 50 | $ go run ./cmd/example client -members "0.0.0.0:10000,0.0.0.0:10001,0.0.0.0:10002" 51 | Starting client... members [0.0.0.0:10000 0.0.0.0:10001 0.0.0.0:10002] id 5688340127043569018 52 | client=>GET abc 53 | server=> {"result": ""} 54 | client=>SET abc 123 55 | server=> {"result": "123"} 56 | client=>GET abc 57 | server=> {"result": "123"} 58 | client=> 59 | ``` 60 | 61 | The above will start a client which can communicate with the cluster. 62 | 63 | ## NOTE 64 | This is below alpha level software at the moment and is being actively worked on. 65 | -------------------------------------------------------------------------------- /cmd/example/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "log/slog" 7 | "math/rand" 8 | "os" 9 | "strings" 10 | ) 11 | 12 | func logLevel() slog.Level { 13 | if os.Getenv("DEBUG") == "1" { 14 | return slog.LevelDebug 15 | } 16 | 17 | return slog.LevelInfo 18 | } 19 | 20 | func handlerServerCmd() { 21 | fset := flag.NewFlagSet("server", flag.ExitOnError) 22 | members := fset.String("members", "", "comma separated list of members") 23 | port := fset.String("port", "10000", "port to listen on") 24 | id := fset.Int("id", 0, "id of the replica") 25 | 26 | fset.Parse(os.Args[2:]) 27 | 28 | parsedMembers := strings.Split(*members, ",") 29 | 30 | slog.Info("Starting server...", "members", *members, "port", *port, "id", *id) 31 | newServer(parsedMembers, *port, *id).run() 32 | } 33 | 34 | func handleClientCmd() { 35 | fset := flag.NewFlagSet("client", flag.ExitOnError) 36 | members := fset.String("members", "", "comma separated list of members") 37 | 38 | fset.Parse(os.Args[2:]) 39 | 40 | parsedMembers := strings.Split(*members, ",") 41 | id := rand.Int() 42 | 43 | fmt.Println("Starting client...", "members", parsedMembers, "id", id) 44 | newRepl(parsedMembers, id).run() 45 | } 46 | 47 | func main() { 48 | args := os.Args 49 | 50 | logh := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ 51 | Level: logLevel(), 52 | }) 53 | slog.SetDefault(slog.New(logh)) 54 | 55 | if len(args) < 2 { 56 | fmt.Println("invalid command: only server and client are supported", args) 57 | return 58 | } 59 | 60 | if args[1] == "server" { 61 | handlerServerCmd() 62 | return 63 | } 64 | 65 | if args[1] == "client" { 66 | handleClientCmd() 67 | return 68 | } 69 | 70 | fmt.Println("invalid command: only server and client are supported") 71 | } 72 | -------------------------------------------------------------------------------- /cmd/example/repl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | "strings" 8 | 9 | "github.com/tangledbytes/go-vsr/pkg/assert" 10 | "github.com/tangledbytes/go-vsr/pkg/client" 11 | "github.com/tangledbytes/go-vsr/pkg/network" 12 | "github.com/tangledbytes/go-vsr/pkg/time" 13 | ) 14 | 15 | type repl struct { 16 | clientid int 17 | clustermembers []string 18 | 19 | time *time.Real 20 | net *network.TCP 21 | } 22 | 23 | func newRepl(members []string, id int) *repl { 24 | net := network.NewTCP("0.0.0.0:0") 25 | t := time.NewReal() 26 | 27 | return &repl{ 28 | clustermembers: members, 29 | time: t, 30 | net: net, 31 | clientid: id, 32 | } 33 | } 34 | 35 | func (r *repl) run() { 36 | client, err := client.New(client.Config{ 37 | ID: uint64(r.clientid), 38 | Members: r.clustermembers, 39 | Network: r.net, 40 | Time: r.time, 41 | RequestTimeout: 10 * time.SECOND, 42 | }) 43 | assert.Assert(err == nil, "err should be nil") 44 | 45 | go r.net.Run() 46 | 47 | waiting := false 48 | for { 49 | if !waiting { 50 | cmd, ok := r.acceptinput() 51 | if !ok { 52 | fmt.Println("Error reading input") 53 | continue 54 | } 55 | 56 | client.Request(cmd) 57 | waiting = true 58 | } 59 | 60 | ev, ok := r.net.Recv() 61 | if ok { 62 | client.Submit(ev) 63 | } 64 | 65 | client.Run() 66 | r.time.Tick() 67 | 68 | if waiting { 69 | reply, ok := client.CheckResult() 70 | if !ok { 71 | continue 72 | } 73 | 74 | fmt.Println("server=>", reply.Result) 75 | waiting = false 76 | } 77 | } 78 | } 79 | 80 | func (r *repl) acceptinput() (string, bool) { 81 | fmt.Print("client=>") 82 | buf := bufio.NewReader(os.Stdin) 83 | cmd, err := buf.ReadString('\n') 84 | return strings.TrimSpace(cmd), err == nil 85 | } 86 | -------------------------------------------------------------------------------- /cmd/example/server.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/tangledbytes/go-vsr/pkg/assert" 8 | "github.com/tangledbytes/go-vsr/pkg/network" 9 | "github.com/tangledbytes/go-vsr/pkg/replica" 10 | "github.com/tangledbytes/go-vsr/pkg/time" 11 | ) 12 | 13 | type server struct { 14 | members []string 15 | id int 16 | net *network.TCP 17 | time *time.Real 18 | 19 | store map[string]string 20 | } 21 | 22 | func newServer(members []string, port string, id int) *server { 23 | net := network.NewTCP("0.0.0.0:" + port) 24 | return &server{ 25 | members: members, 26 | id: id, 27 | net: net, 28 | store: make(map[string]string), 29 | } 30 | } 31 | 32 | func (s *server) run() { 33 | replica, err := replica.New(replica.Config{ 34 | ID: uint64(s.id), 35 | Members: s.members, 36 | Network: s.net, 37 | SrvHandler: s.handleCmd, 38 | Time: s.time, 39 | HeartbeatTimeout: 30 * time.SECOND, 40 | }) 41 | 42 | assert.Assert(err == nil, "err should be nil") 43 | 44 | go s.net.Run() 45 | 46 | for { 47 | ev, ok := s.net.Recv() 48 | if ok { 49 | replica.Submit(ev) 50 | } 51 | 52 | replica.Run() 53 | s.time.Tick() 54 | } 55 | } 56 | 57 | func (s *server) handleCmd(m string) string { 58 | cmd := strings.Split(m, " ") 59 | if len(cmd) < 2 || len(cmd) > 3 { 60 | return "{}" 61 | } 62 | 63 | if len(cmd) == 2 { 64 | if cmd[0] != "GET" { 65 | return "{}" 66 | } 67 | 68 | return fmt.Sprintf("{\"result\": \"%s\"}", s.store[cmd[1]]) 69 | } 70 | 71 | if len(cmd) == 3 { 72 | if cmd[0] != "SET" { 73 | return "{}" 74 | } 75 | 76 | s.store[cmd[1]] = cmd[2] 77 | return fmt.Sprintf("{\"result\": \"%s\"}", cmd[2]) 78 | } 79 | 80 | return "{}" 81 | } 82 | -------------------------------------------------------------------------------- /cmd/simulator/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "math/rand" 7 | "os" 8 | "os/signal" 9 | "runtime/pprof" 10 | "strconv" 11 | 12 | "github.com/tangledbytes/go-vsr/internal/simulator" 13 | ) 14 | 15 | func saveHeapProfile() { 16 | if os.Getenv("HEAP_PROFILE") == "1" { 17 | f, err := os.Create("heap.pprof") 18 | if err != nil { 19 | panic(err) 20 | } 21 | 22 | if err := pprof.WriteHeapProfile(f); err != nil { 23 | panic(err) 24 | } 25 | 26 | if err := f.Close(); err != nil { 27 | panic(err) 28 | } 29 | } 30 | } 31 | 32 | func getLogLevel(entity string) slog.Level { 33 | switch os.Getenv(entity + "_DEBUG") { 34 | case "1": 35 | return slog.LevelDebug 36 | case "0": 37 | return slog.LevelInfo 38 | case "-1": 39 | return slog.Level(1000) 40 | } 41 | 42 | return slog.LevelInfo 43 | } 44 | 45 | func createLogger(level slog.Level) *slog.Logger { 46 | return slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ 47 | Level: level, 48 | })) 49 | } 50 | 51 | func exitHooks(fns ...func()) { 52 | c := make(chan os.Signal, 1) 53 | signal.Notify(c, os.Interrupt) 54 | 55 | go func() { 56 | <-c 57 | for _, fn := range fns { 58 | fn() 59 | } 60 | 61 | os.Exit(1) 62 | }() 63 | } 64 | 65 | func setupCPUProfile() { 66 | if os.Getenv("CPU_PROFILE") == "1" { 67 | f, err := os.Create("cpu.pprof") 68 | if err != nil { 69 | panic(err) 70 | } 71 | 72 | if err := pprof.StartCPUProfile(f); err != nil { 73 | panic(err) 74 | } 75 | } 76 | } 77 | 78 | func stopCPUProfile() { 79 | if os.Getenv("CPU_PROFILE") == "1" { 80 | pprof.StopCPUProfile() 81 | } 82 | } 83 | 84 | func main() { 85 | var seed int64 86 | if len(os.Args) < 2 { 87 | seed = rand.Int63() 88 | } else { 89 | var err error 90 | seed, err = strconv.ParseInt(os.Args[1], 10, 64) 91 | if err != nil { 92 | fmt.Println("invalid seed") 93 | return 94 | } 95 | } 96 | 97 | setupCPUProfile() 98 | exitHooks( 99 | saveHeapProfile, 100 | stopCPUProfile, 101 | ) 102 | 103 | replicaLogger := createLogger(getLogLevel("REPLICA")) 104 | clientLogger := createLogger(getLogLevel("CLIENT")) 105 | simulatorLogger := createLogger(getLogLevel("SIMULATOR")) 106 | simulator.New(uint64(seed), replicaLogger, clientLogger, simulatorLogger).Simulate() 107 | } 108 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tangledbytes/go-vsr 2 | 3 | go 1.21.3 4 | -------------------------------------------------------------------------------- /internal/simulator/array/rand.go: -------------------------------------------------------------------------------- 1 | package array 2 | 3 | import ( 4 | "log/slog" 5 | "math/rand" 6 | 7 | "github.com/tangledbytes/go-vsr/internal/simulator/constant" 8 | ) 9 | 10 | type Rand[T any] struct { 11 | rng *rand.Rand 12 | randpickpercent float64 13 | data []T 14 | } 15 | 16 | func NewRand[T any](rng *rand.Rand, logger *slog.Logger) *Rand[T] { 17 | randpickpercent := rng.Float64() * constant.UNORDERED_PACKET_DELIVERY_PERCENT 18 | 19 | logger.Debug( 20 | "new rand array created", 21 | "rand pick percent", randpickpercent*100, 22 | ) 23 | 24 | return &Rand[T]{ 25 | rng: rng, 26 | randpickpercent: randpickpercent, 27 | data: make([]T, 0), 28 | } 29 | } 30 | 31 | func (r *Rand[T]) Push(value T) { 32 | r.data = append(r.data, value) 33 | } 34 | 35 | func (r *Rand[T]) Pop() (T, bool) { 36 | randpickChance := r.rng.Float64() 37 | if randpickChance <= r.randpickpercent { 38 | return r.popRandom() 39 | } 40 | 41 | return r.popOrdered() 42 | } 43 | 44 | func (r *Rand[T]) Len() int { 45 | return len(r.data) 46 | } 47 | 48 | func (r *Rand[T]) popOrdered() (T, bool) { 49 | var t T 50 | if len(r.data) == 0 { 51 | return t, false 52 | } 53 | 54 | t = r.data[0] 55 | r.data = r.data[1:] 56 | return t, true 57 | } 58 | 59 | func (r *Rand[T]) popRandom() (T, bool) { 60 | var t T 61 | if len(r.data) == 0 { 62 | return t, false 63 | } 64 | 65 | // Only pick from the first 0.01% of the array 66 | lenFirstPercent := int(float64(len(r.data)) * float64(0.01)) 67 | 68 | i := r.rng.Intn(lenFirstPercent + 1) 69 | t = r.data[i] 70 | r.data = append(r.data[:i], r.data[i+1:]...) 71 | return t, true 72 | } 73 | -------------------------------------------------------------------------------- /internal/simulator/constant/constant.go: -------------------------------------------------------------------------------- 1 | package constant 2 | 3 | import "github.com/tangledbytes/go-vsr/pkg/time" 4 | 5 | const PACKET_DROP_PERCENT = 0.1 6 | const PACKET_DUPS_PERCENT = 0.01 7 | const UNORDERED_PACKET_DELIVERY_PERCENT = 0.01 8 | const MIN_PACKET_DELAY = 1 * time.MILLISECOND 9 | const MAX_PACKET_DELAY = 30 * time.MILLISECOND 10 | const MAX_PACKET_INQUEUE = 1 << 16 11 | 12 | const MIN_REPLICAS = 1 13 | const MAX_REPLICAS = 9 14 | const MAX_REPLICA_MULTIPLIER = 1 15 | const MIN_CLIENTS = 1 16 | const MAX_CLIENTS = 100 17 | const MAX_CLIENT_MULTIPLIER = 1 18 | const MIN_REQUESTS = 1e3 19 | const MAX_REQUESTS = 1e4 20 | -------------------------------------------------------------------------------- /internal/simulator/network.go: -------------------------------------------------------------------------------- 1 | package simulator 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "log/slog" 8 | "math/rand" 9 | 10 | "github.com/tangledbytes/go-vsr/internal/simulator/array" 11 | "github.com/tangledbytes/go-vsr/internal/simulator/constant" 12 | "github.com/tangledbytes/go-vsr/pkg/assert" 13 | "github.com/tangledbytes/go-vsr/pkg/events" 14 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 15 | "github.com/tangledbytes/go-vsr/pkg/network" 16 | "github.com/tangledbytes/go-vsr/pkg/time" 17 | "github.com/tangledbytes/go-vsr/pkg/utils" 18 | ) 19 | 20 | type Route struct { 21 | data *array.Rand[events.NetworkEvent] 22 | 23 | dropPercent float64 24 | dupsPercent float64 25 | delayTimer *time.Timer 26 | } 27 | 28 | type Routes struct { 29 | rng *rand.Rand 30 | time *time.Virtual 31 | idxSrcDest map[string]map[string]int 32 | idxSrcAny map[string][]*Route 33 | 34 | logger *slog.Logger 35 | } 36 | 37 | func NewRoutes(rng *rand.Rand, time *time.Virtual, logger *slog.Logger) *Routes { 38 | return &Routes{ 39 | rng: rng, 40 | time: time, 41 | idxSrcDest: make(map[string]map[string]int), 42 | idxSrcAny: make(map[string][]*Route), 43 | 44 | logger: logger, 45 | } 46 | } 47 | 48 | func (r *Routes) AddPacket(src, dest string, ev events.NetworkEvent) bool { 49 | assert.Assert(ev.Event != nil, "event should not be nil") 50 | 51 | _, ok := r.idxSrcDest[dest] 52 | if !ok { 53 | r.idxSrcDest[dest] = make(map[string]int) 54 | } 55 | 56 | _, ok = r.idxSrcAny[dest] 57 | if !ok { 58 | r.idxSrcAny[dest] = make([]*Route, 0) 59 | } 60 | 61 | _, ok = r.idxSrcDest[dest][src] 62 | if !ok { 63 | r.idxSrcDest[dest][src] = len(r.idxSrcAny[dest]) 64 | 65 | dupspercent := r.rng.Float64() * constant.PACKET_DUPS_PERCENT 66 | droppercent := r.rng.Float64() * constant.PACKET_DROP_PERCENT 67 | delayTimer := utils.RandomIntRange(r.rng, constant.MIN_PACKET_DELAY, constant.MAX_PACKET_DELAY) 68 | r.logger.Debug( 69 | "new route created", 70 | "drop percent", droppercent*100, 71 | "dups percent", dupspercent*100, 72 | "delay timer", delayTimer, 73 | ) 74 | 75 | r.idxSrcAny[dest] = append(r.idxSrcAny[dest], &Route{ 76 | data: array.NewRand[events.NetworkEvent](r.rng, r.logger), 77 | dropPercent: droppercent, 78 | dupsPercent: dupspercent, 79 | delayTimer: time.NewTimer( 80 | r.time, 81 | uint64(delayTimer), 82 | ), 83 | }) 84 | } 85 | 86 | route := r.idxSrcAny[dest][r.idxSrcDest[dest][src]] 87 | if route.data.Len() >= constant.MAX_PACKET_INQUEUE { 88 | return false 89 | } 90 | 91 | dropChance := r.rng.Float64() 92 | if dropChance <= route.dropPercent { 93 | return true 94 | } 95 | 96 | route.data.Push(ev) 97 | return true 98 | } 99 | 100 | // SimulatedNetworkWorld is an awful name but can't think of anything better 101 | type SimulatedNetworkWorld struct { 102 | routes *Routes 103 | 104 | time *time.Virtual 105 | rng *rand.Rand 106 | 107 | logger *slog.Logger 108 | } 109 | 110 | func NewSimulatedNetworkWorld(time *time.Virtual, rng *rand.Rand, logger *slog.Logger) *SimulatedNetworkWorld { 111 | return &SimulatedNetworkWorld{ 112 | routes: NewRoutes(rng, time, logger), 113 | time: time, 114 | rng: rng, 115 | 116 | logger: logger, 117 | } 118 | } 119 | 120 | // Node returns a network node for the given address, this can be called 121 | // multiple times for the same address and it will return the same looking 122 | // node. 123 | func (snw *SimulatedNetworkWorld) Node(addr string) *SimulatedNetworkNode { 124 | ipv4port := ipv4port.IPv4Port{} 125 | if err := ipv4port.FromHostPort(addr); err != nil { 126 | panic(err) 127 | } 128 | 129 | return &SimulatedNetworkNode{ 130 | routes: snw.routes, 131 | addr: ipv4port, 132 | rng: snw.rng, 133 | 134 | logger: snw.logger, 135 | } 136 | } 137 | 138 | type SimulatedNetworkNode struct { 139 | routes *Routes 140 | addr ipv4port.IPv4Port 141 | rng *rand.Rand 142 | 143 | logger *slog.Logger 144 | } 145 | 146 | func (n *SimulatedNetworkNode) Send(dest ipv4port.IPv4Port, f func(io.Writer) error) error { 147 | buf := bytes.NewBuffer([]byte{}) 148 | if err := f(buf); err != nil { 149 | return err 150 | } 151 | 152 | ev := &events.Event{} 153 | if err := ev.FromReader(buf); err != nil { 154 | return err 155 | } 156 | 157 | // duplications ? 158 | send := 1 159 | dupChance := n.rng.Float64() 160 | if dupChance <= constant.PACKET_DUPS_PERCENT { 161 | // Not doing for now - client isn't ready 162 | send = 1 163 | } 164 | 165 | for i := 0; i < send; i++ { 166 | if !n.routes.AddPacket(n.addr.String(), dest.String(), events.NewNetworkEvent(n.addr, ev)) { 167 | return fmt.Errorf("failed to add packet to route") 168 | } 169 | } 170 | 171 | return nil 172 | } 173 | 174 | func (n *SimulatedNetworkNode) Recv() (events.NetworkEvent, bool) { 175 | this := n.addr.String() 176 | 177 | l := len(n.routes.idxSrcAny[this]) 178 | if l == 0 { 179 | return events.NetworkEvent{}, false 180 | } 181 | 182 | dest := n.rng.Intn(l) 183 | if !n.routes.idxSrcAny[this][dest].delayTimer.Done() { 184 | return events.NetworkEvent{}, false 185 | } 186 | 187 | n.routes.idxSrcAny[this][dest].delayTimer.Reset() 188 | nev, ok := n.routes.idxSrcAny[this][dest].data.Pop() 189 | if !ok { 190 | return events.NetworkEvent{}, false 191 | } 192 | 193 | assert.Assert(nev.Event != nil, "event should not be nil") 194 | return nev, true 195 | } 196 | 197 | var _ network.Network = (*SimulatedNetworkNode)(nil) 198 | -------------------------------------------------------------------------------- /internal/simulator/simulator.go: -------------------------------------------------------------------------------- 1 | package simulator 2 | 3 | import ( 4 | "fmt" 5 | "log/slog" 6 | "math/rand" 7 | 8 | "github.com/tangledbytes/go-vsr/internal/simulator/constant" 9 | "github.com/tangledbytes/go-vsr/pkg/assert" 10 | "github.com/tangledbytes/go-vsr/pkg/client" 11 | "github.com/tangledbytes/go-vsr/pkg/replica" 12 | "github.com/tangledbytes/go-vsr/pkg/time" 13 | "github.com/tangledbytes/go-vsr/pkg/utils" 14 | ) 15 | 16 | type Simulator struct { 17 | rng *rand.Rand 18 | seed uint64 19 | 20 | net *SimulatedNetworkWorld 21 | time *time.Virtual 22 | 23 | replicas []*replica.Replica 24 | replicaCfgs []replica.Config 25 | replicaMultipliers []int 26 | replicaStores []*store 27 | 28 | clients []*client.Client 29 | clientCfgs []client.Config 30 | clientMultipliers []int 31 | 32 | replicaLogger *slog.Logger 33 | clientLogger *slog.Logger 34 | logger *slog.Logger 35 | 36 | simStore *store 37 | } 38 | 39 | type store struct { 40 | data map[string]string 41 | } 42 | 43 | func (s *store) Apply(m string) string { 44 | s.data[m] = m 45 | return m 46 | } 47 | 48 | func New(seed uint64, replicalogger, clientlogger, simlogger *slog.Logger) *Simulator { 49 | time := time.NewVirtual(250 * time.MICROSECOND) // In this world, 1 mus is 1 tick. 50 | rng := rand.New(rand.NewSource(int64(seed))) 51 | 52 | return &Simulator{ 53 | rng: rng, 54 | seed: seed, 55 | 56 | net: NewSimulatedNetworkWorld(time, rng, simlogger), 57 | time: time, 58 | 59 | replicas: make([]*replica.Replica, 0), 60 | replicaCfgs: make([]replica.Config, 0), 61 | replicaStores: make([]*store, 0), 62 | 63 | clients: make([]*client.Client, 0), 64 | clientCfgs: make([]client.Config, 0), 65 | 66 | replicaLogger: replicalogger, 67 | clientLogger: clientlogger, 68 | logger: simlogger, 69 | 70 | simStore: &store{data: make(map[string]string)}, 71 | } 72 | } 73 | 74 | func (s *Simulator) Simulate() { 75 | replicaCount := utils.RandomIntRange(s.rng, constant.MIN_REPLICAS, constant.MAX_REPLICAS) 76 | clientCount := utils.RandomIntRange(s.rng, constant.MIN_CLIENTS, constant.MAX_CLIENTS) 77 | reqCount := utils.RandomIntRange(s.rng, constant.MIN_REQUESTS, constant.MAX_REQUESTS) 78 | 79 | s.logger.Info( 80 | "Simulation starting", 81 | "seed", s.seed, 82 | "replica_count", replicaCount, 83 | "client_count", clientCount, 84 | "request_count", reqCount, 85 | ) 86 | 87 | if err := s.initializeReplicaStores(replicaCount); err != nil { 88 | panic(err) 89 | } 90 | if err := s.initializeReplicas(replicaCount); err != nil { 91 | panic(err) 92 | } 93 | if err := s.initializeClients(clientCount); err != nil { 94 | panic(err) 95 | } 96 | 97 | sentReq := 0 98 | processedReq := 0 99 | progressVerifier := s.clusterProgressVerifier(&sentReq, &processedReq) 100 | for { 101 | s.runReplicas() 102 | 103 | s.clusterSanityChecks() 104 | progressVerifier() 105 | 106 | sentReq += s.simulateRequests(sentReq, reqCount) 107 | processedReq += s.runClients() 108 | 109 | s.time.Tick() 110 | 111 | // Done processing 112 | if processedReq == reqCount { 113 | break 114 | } 115 | } 116 | 117 | s.logger.Info( 118 | "Simulation complete", 119 | "ticks", s.time.Now(), 120 | ) 121 | 122 | assert.Assert(sentReq == reqCount, "sentReq should be equal to reqCount") 123 | 124 | // Verify that cluster state is the same as our global state 125 | s.verifyClusterVSRState() 126 | } 127 | 128 | func (s *Simulator) clusterProgressVerifier(sentReq, processedReq *int) func() { 129 | timer := time.NewTimer(s.time, 5*time.MINUTE) 130 | last := uint64(0) 131 | 132 | timer.Action(func(t *time.Timer) { 133 | opNum := findMaxOpNumber(s.replicas) 134 | if opNum == last { 135 | for _, replica := range s.replicas { 136 | state := replica.VSRState() 137 | s.logger.Info( 138 | "replica progress", 139 | "viewnum", state.ViewNumber, 140 | "opnum", state.OpNum, 141 | "commitnum", state.CommitNumber, 142 | "replicaID", state.ID, 143 | ) 144 | } 145 | } 146 | assert.Assert(opNum > last, "expected opNum to be > %d (last), found %d", opNum, last) 147 | 148 | viewNum := findMaxViewNumber(s.replicas) 149 | s.logger.Info( 150 | "cluster progress:", 151 | "opnum", opNum, 152 | "viewnum", viewNum, 153 | "sent req", *sentReq, 154 | "processed req", *processedReq, 155 | ) 156 | 157 | last = opNum 158 | t.Reset() 159 | }) 160 | 161 | return func() { 162 | timer.ActIfDone() 163 | } 164 | } 165 | 166 | func (r *Simulator) clusterSanityChecks() { 167 | for _, replica := range r.replicas { 168 | state := replica.VSRState() 169 | 170 | // 1. At no point any replica's opnum should be greater than its commitnum 171 | assert.Assert( 172 | state.OpNum >= state.CommitNumber, 173 | "expected opNum to be <= commitNum, found opNum: %d, commitNum: %d (viewNum: %d, replicaID: %d)", 174 | state.OpNum, 175 | state.CommitNumber, 176 | state.ViewNumber, 177 | state.ID, 178 | ) 179 | } 180 | } 181 | 182 | func (s *Simulator) verifyClusterVSRState() { 183 | replicaCount := len(s.replicas) 184 | 185 | quorum := replicaCount/2 + 1 186 | vsrStates := make([]replica.VSRState, 0) 187 | for _, replica := range s.replicas { 188 | vsrStates = append(vsrStates, replica.VSRState()) 189 | } 190 | 191 | viewMap := make(map[uint64]int) 192 | maxViews := 0 193 | opsMap := make(map[uint64]int) 194 | maxOps := 0 195 | for _, vsrState := range vsrStates { 196 | viewMap[vsrState.ViewNumber]++ 197 | opsMap[vsrState.OpNum]++ 198 | } 199 | 200 | // 1. Check if >= quorum of replicas have same view number 201 | for _, v := range viewMap { 202 | if v > maxViews { 203 | maxViews = v 204 | } 205 | } 206 | assert.Assert(maxViews >= quorum, "expected maxViews to be >= quorum, found %d", maxViews) 207 | // 2. Check if >= quorum of replicas have same op number 208 | for _, v := range opsMap { 209 | if v > maxOps { 210 | maxOps = v 211 | } 212 | } 213 | assert.Assert(maxOps >= quorum, "expected maxOps to be >= quorum, found %d", maxOps) 214 | // 3. Check if >= quorum of replicas have same commit number (?) 215 | // TODO 216 | // 4. Check if >= quorum of replicas have same log 217 | // TODO 218 | // 5. Check if >= quorum of replicas have same store (?) 219 | invalids := 0 220 | for k, v := range s.simStore.data { 221 | found := 0 222 | for _, store := range s.replicaStores { 223 | if store.data[k] == v { 224 | found++ 225 | } 226 | } 227 | 228 | if found < quorum { 229 | invalids++ 230 | } 231 | } 232 | 233 | // simulation might end before last commit so we check for <= 1 234 | assert.Assert(invalids <= 1, "expected invalids to be <=1 , found %d", invalids) 235 | } 236 | 237 | func (s *Simulator) runReplicas() { 238 | for i, replica := range s.replicas { 239 | cfg := s.replicaCfgs[i] 240 | ev, ok := cfg.Network.Recv() 241 | if ok { 242 | replica.Submit(ev) 243 | } 244 | 245 | // Run the replica a random number of times. 246 | // Hopefully simulates powerful hardware for 247 | // some replicas while weaker for others? 248 | for j := 0; j < s.replicaMultipliers[i]; j++ { 249 | replica.Run() 250 | } 251 | 252 | state := replica.VSRState() 253 | s.logger.Debug( 254 | "VSR State", 255 | "viewnum", state.ViewNumber, 256 | "opnum", state.OpNum, 257 | "commitnum", state.CommitNumber, 258 | "replicaID", state.ID, 259 | ) 260 | } 261 | } 262 | 263 | func (s *Simulator) runClients() int { 264 | processedReq := 0 265 | 266 | for i, client := range s.clients { 267 | cfg := s.clientCfgs[i] 268 | ev, ok := cfg.Network.Recv() 269 | if ok { 270 | client.Submit(ev) 271 | } 272 | 273 | // Run the clients a random number of times. 274 | // Hopefully simulates more aggresseive clients 275 | for j := 0; j < s.clientMultipliers[i]; j++ { 276 | client.Run() 277 | _, ok = client.CheckResult() 278 | if ok { 279 | processedReq++ 280 | s.logger.Debug("client processed request", "processed", processedReq) 281 | } 282 | } 283 | } 284 | 285 | return processedReq 286 | } 287 | 288 | func (s *Simulator) simulateRequests(sentReq, reqCount int) int { 289 | if sentReq < reqCount { 290 | reqbatch := utils.RandomIntRange(s.rng, 0, 101) 291 | if reqbatch > reqCount-sentReq { 292 | reqbatch = reqCount - sentReq 293 | } 294 | 295 | success := 0 296 | for i := 0; i < reqbatch; i++ { 297 | cID := s.rng.Intn(len(s.clients)) 298 | client := s.clients[cID] 299 | 300 | key := utils.RandomString(s.rng) 301 | if client.Request(key) { 302 | s.simStore.Apply(key) 303 | success += 1 304 | } 305 | } 306 | 307 | return success 308 | } 309 | 310 | return 0 311 | } 312 | 313 | func (s *Simulator) initializeClients(count int) error { 314 | if err := s.initializeClientConfigs(count); err != nil { 315 | return err 316 | } 317 | 318 | for i := 0; i < count; i++ { 319 | client, err := client.New(s.clientCfgs[i]) 320 | if err != nil { 321 | return err 322 | } 323 | 324 | s.clients = append(s.clients, client) 325 | } 326 | 327 | return nil 328 | } 329 | 330 | func (s *Simulator) initializeClientConfigs(count int) error { 331 | s.clientMultipliers = make([]int, count) 332 | 333 | for i := 0; i < count; i++ { 334 | s.clientMultipliers[i] = utils.RandomIntRange(s.rng, 1, constant.MAX_CLIENT_MULTIPLIER+1) 335 | 336 | cfg := client.Config{ 337 | ID: uint64(i), 338 | Network: s.net.Node(clientAddressByID(i)), 339 | Time: s.time, 340 | Members: s.replicaCfgs[0].Members, 341 | RequestTimeout: 30 * time.SECOND, 342 | Logger: s.clientLogger, 343 | } 344 | 345 | s.clientCfgs = append(s.clientCfgs, cfg) 346 | } 347 | 348 | return nil 349 | } 350 | 351 | func (s *Simulator) initializeReplicas(count int) error { 352 | if err := s.initializeReplicaConfigs(count); err != nil { 353 | return err 354 | } 355 | 356 | for i := 0; i < count; i++ { 357 | replica, err := replica.New(s.replicaCfgs[i]) 358 | if err != nil { 359 | return err 360 | } 361 | 362 | s.replicas = append(s.replicas, replica) 363 | } 364 | 365 | return nil 366 | } 367 | 368 | func (s *Simulator) initializeReplicaConfigs(count int) error { 369 | members := []string{} 370 | for i := 0; i < count; i++ { 371 | members = append(members, replicaAddressByID(i)) 372 | } 373 | 374 | if err := s.initializeReplicaStores(count); err != nil { 375 | return err 376 | } 377 | 378 | s.replicaMultipliers = make([]int, count) 379 | 380 | for i := 0; i < count; i++ { 381 | s.replicaMultipliers[i] = utils.RandomIntRange(s.rng, 1, constant.MAX_REPLICA_MULTIPLIER+1) 382 | 383 | cfg := replica.Config{ 384 | ID: uint64(i), 385 | Members: members, 386 | Network: s.net.Node(replicaAddressByID(i)), 387 | Time: s.time, 388 | SrvHandler: s.replicaStores[i].Apply, 389 | HeartbeatTimeout: 60 * time.SECOND, 390 | Logger: s.replicaLogger, 391 | } 392 | s.replicaCfgs = append(s.replicaCfgs, cfg) 393 | } 394 | 395 | return nil 396 | } 397 | 398 | func (s *Simulator) initializeReplicaStores(count int) error { 399 | for i := 0; i < count; i++ { 400 | s.replicaStores = append(s.replicaStores, &store{data: make(map[string]string)}) 401 | } 402 | 403 | return nil 404 | } 405 | 406 | func replicaAddressByID(id int) string { 407 | return fmt.Sprintf("0.0.0.0:%d", 10000+id) 408 | } 409 | 410 | func clientAddressByID(id int) string { 411 | return fmt.Sprintf("0.0.0.0:%d", 20000+id) 412 | } 413 | 414 | func findMaxOpNumber(replicas []*replica.Replica) uint64 { 415 | max := uint64(0) 416 | for _, replica := range replicas { 417 | state := replica.VSRState() 418 | if state.OpNum > max { 419 | max = state.OpNum 420 | } 421 | } 422 | 423 | return max 424 | } 425 | 426 | func findMaxViewNumber(replicas []*replica.Replica) uint64 { 427 | max := uint64(0) 428 | for _, replica := range replicas { 429 | state := replica.VSRState() 430 | if state.ViewNumber > max { 431 | max = state.ViewNumber 432 | } 433 | } 434 | 435 | return max 436 | } 437 | -------------------------------------------------------------------------------- /pkg/assert/assert.go: -------------------------------------------------------------------------------- 1 | //go:build !release 2 | 3 | package assert 4 | 5 | import "fmt" 6 | 7 | // Assert panics if cond is false and will 8 | // print msg to the console. 9 | // 10 | // Assert is a no-op when compiled with the 11 | // release build tag. 12 | func Assert(cond bool, format string, args ...any) { 13 | if !cond { 14 | panic(fmt.Sprintln("assertion failed:", fmt.Sprintf(format, args...))) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /pkg/assert/assert_release.go: -------------------------------------------------------------------------------- 1 | //go:build release 2 | 3 | package assert 4 | 5 | // Assert panics if cond is false and will 6 | // print msg to the console. 7 | // 8 | // Assert is a no-op when compiled with the 9 | // release build tag. 10 | func Assert(cond bool, msg string) { 11 | // no-op 12 | } 13 | -------------------------------------------------------------------------------- /pkg/client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "io" 5 | "log/slog" 6 | "sort" 7 | 8 | "github.com/tangledbytes/go-vsr/pkg/events" 9 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 10 | "github.com/tangledbytes/go-vsr/pkg/network" 11 | "github.com/tangledbytes/go-vsr/pkg/queue" 12 | "github.com/tangledbytes/go-vsr/pkg/time" 13 | ) 14 | 15 | type VSRState struct { 16 | // ID is the unique ID of the VSR client. 17 | ID uint64 18 | 19 | // RequestNumber is the monotonically increasing request 20 | // number of the client. 21 | RequestNumber uint64 22 | 23 | // LastKnownClusterMembers is the list of the last known 24 | // cluster members. This may not be the current cluster members. 25 | LastKnownClusterMembers []ipv4port.IPv4Port 26 | 27 | // LastKnownViewNumber is the last known view number of the cluster. 28 | LastKnownViewNumber uint64 29 | } 30 | 31 | type Internal struct { 32 | response *queue.Queue[events.Reply] 33 | 34 | request *events.Request 35 | result *events.Reply 36 | requestTimeout uint64 37 | requestTimer *time.Timer 38 | } 39 | 40 | // Client represents a single client of the VSR cluster. 41 | type Client struct { 42 | state VSRState 43 | internal Internal 44 | 45 | net network.Network 46 | time time.Time 47 | 48 | logger *slog.Logger 49 | } 50 | 51 | type Config struct { 52 | ID uint64 53 | Members []string 54 | Network network.Network 55 | Time time.Time 56 | RequestTimeout uint64 57 | Logger *slog.Logger 58 | } 59 | 60 | // New creates a new client. 61 | func New(cfg Config) (*Client, error) { 62 | clustermembers := make([]ipv4port.IPv4Port, len(cfg.Members)) 63 | for i, v := range cfg.Members { 64 | if err := clustermembers[i].FromHostPort(v); err != nil { 65 | return nil, err 66 | } 67 | } 68 | 69 | sort.Slice(clustermembers, func(i, j int) bool { 70 | return clustermembers[i].Less(clustermembers[j]) 71 | }) 72 | 73 | client := &Client{ 74 | state: VSRState{ 75 | ID: cfg.ID, 76 | RequestNumber: 0, 77 | LastKnownClusterMembers: clustermembers, 78 | LastKnownViewNumber: 0, 79 | }, 80 | internal: Internal{ 81 | response: queue.New[events.Reply](), 82 | requestTimeout: cfg.RequestTimeout, 83 | }, 84 | net: cfg.Network, 85 | time: cfg.Time, 86 | logger: cfg.Logger, 87 | } 88 | 89 | return client, nil 90 | } 91 | 92 | func (c *Client) Submit(ev events.NetworkEvent) { 93 | switch ev.Event.Type { 94 | case events.EventReply: 95 | c.internal.response.Push(ev.Event.Data.(events.Reply)) 96 | default: 97 | c.logger.Error("Received an invalid event", "event", ev) 98 | } 99 | } 100 | 101 | func (c *Client) Request(op string) bool { 102 | if c.internal.request != nil { 103 | return false 104 | } 105 | 106 | c.state.RequestNumber++ 107 | clusterRequest := &events.Request{ 108 | ID: c.state.RequestNumber, 109 | ClientID: c.state.ID, 110 | Op: op, 111 | } 112 | 113 | c.internal.request = clusterRequest 114 | 115 | if err := c.sendRequest(c.state.LastKnownClusterMembers[c.potentialPrimary()], op); err != nil { 116 | return false 117 | } 118 | 119 | c.internal.requestTimer = time.NewTimer(c.time, c.internal.requestTimeout) 120 | return true 121 | } 122 | 123 | func (c *Client) CheckResult() (events.Reply, bool) { 124 | if c.internal.result == nil { 125 | return events.Reply{}, false 126 | } 127 | 128 | resp := events.Reply{ 129 | ID: c.internal.result.ID, 130 | ClientID: c.internal.result.ClientID, 131 | ViewNum: c.internal.result.ViewNum, 132 | Result: c.internal.result.Result, 133 | } 134 | 135 | c.internal.result = nil 136 | return resp, true 137 | } 138 | 139 | func (c *Client) Run() { 140 | if c.internal.request != nil { 141 | if c.internal.requestTimer.Done() { 142 | c.logger.Debug("client timed out waiting for a reply") 143 | 144 | // Request for broadcast 145 | if err := c.broadcastRequest(c.internal.request.Op); err != nil { 146 | c.logger.Error("client failed to broadcast request", "error", err) 147 | } 148 | 149 | // Reset the timer but don't give up the request slot yet 150 | // next run will check the timer and will attempt the 151 | // broadcast again. 152 | c.internal.requestTimer.Reset() 153 | } 154 | } 155 | 156 | // Check if we have unprocessed replies. 157 | ev, ok := c.internal.response.Pop() 158 | if ok { 159 | c.onReply(ev) 160 | } 161 | } 162 | 163 | func (c *Client) onReply(ev events.Reply) { 164 | c.logger.Debug("client received reply from the cluster", "reply", ev) 165 | 166 | // No requests were pending, why am I here? Probably a duplicate or 167 | // delayed reply. 168 | if c.internal.request == nil { 169 | c.logger.Debug("client received a reply but no request was pending", "reply", ev) 170 | return 171 | } 172 | 173 | if c.internal.request.ID == ev.ID { 174 | c.logger.Debug("client received reply for the pending request", "reply", ev) 175 | c.internal.request = nil 176 | c.internal.result = &ev 177 | c.state.LastKnownViewNumber = ev.ViewNum 178 | c.internal.requestTimer = nil 179 | } 180 | } 181 | 182 | func (c *Client) sendRequest(to ipv4port.IPv4Port, op string) error { 183 | req := events.Request{ 184 | ID: c.state.RequestNumber, 185 | ClientID: c.state.ID, 186 | Op: op, 187 | } 188 | 189 | return c.net.Send(to, func(w io.Writer) error { 190 | return (&events.Event{ 191 | Type: events.EventRequest, 192 | Data: req, 193 | }).ToWriter(w) 194 | }) 195 | } 196 | 197 | func (c *Client) broadcastRequest(op string) error { 198 | for _, member := range c.state.LastKnownClusterMembers { 199 | if err := c.sendRequest(member, op); err != nil { 200 | return err 201 | } 202 | } 203 | 204 | return nil 205 | } 206 | 207 | func (c *Client) potentialPrimary() uint64 { 208 | return uint64(int(c.state.LastKnownViewNumber) % len(c.state.LastKnownClusterMembers)) 209 | } 210 | -------------------------------------------------------------------------------- /pkg/events/events.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "io" 7 | 8 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 9 | "github.com/tangledbytes/go-vsr/pkg/log" 10 | ) 11 | 12 | type EventType uint8 13 | 14 | const ( 15 | EventRequest EventType = iota + 1 16 | EventReply 17 | EventPrepare 18 | EventPrepareOK 19 | EventCommit 20 | EventStartViewChange 21 | EventDoViewChange 22 | EventStartView 23 | EventGetState 24 | EventNewState 25 | EventHeartbeat 26 | ) 27 | 28 | // ErrInvalidEventType is returned when an invalid event type is encountered. 29 | var ErrInvalidEventType = errors.New("invalid event type") 30 | 31 | type NetworkEvent struct { 32 | Src ipv4port.IPv4Port 33 | Event *Event 34 | } 35 | 36 | type Event struct { 37 | Type EventType `json:"@type"` 38 | Data any `json:"data"` 39 | } 40 | 41 | type Request struct { 42 | ID uint64 `json:"id,string"` 43 | ClientID uint64 `json:"client_id,string"` 44 | Op string `json:"op"` 45 | } 46 | 47 | type Reply struct { 48 | ID uint64 `json:"id,string"` 49 | ClientID uint64 `json:"client_id,string"` 50 | ViewNum uint64 `json:"view_num,string"` 51 | Result string `json:"result"` 52 | } 53 | 54 | type Prepare struct { 55 | ViewNum uint64 `json:"view_num,string"` 56 | OpNum uint64 `json:"op_num,string"` 57 | CommitNum uint64 `json:"commit_num,string"` 58 | Request Request `json:"request"` 59 | ReplicaID uint64 `json:"replica_id,string"` 60 | } 61 | 62 | type PrepareOK struct { 63 | ViewNum uint64 `json:"view_num,string"` 64 | OpNum uint64 `json:"op_num,string"` 65 | ReplicaID uint64 `json:"replica_id,string"` 66 | } 67 | 68 | type Commit struct { 69 | ViewNum uint64 `json:"view_num,string"` 70 | CommitNum uint64 `json:"commit_num,string"` 71 | ReplicaID uint64 `json:"replica_id,string"` 72 | } 73 | 74 | type StartViewChange struct { 75 | ViewNum uint64 `json:"view_num,string"` 76 | ReplicaID uint64 `json:"replica_id,string"` 77 | } 78 | 79 | type DoViewChange struct { 80 | ViewNum uint64 `json:"view_num,string"` 81 | Logs log.Logs `json:"logs"` 82 | LastStableViewNum uint64 `json:"last_stable_view_num,string"` 83 | OpNum uint64 `json:"op_num,string"` 84 | CommitNum uint64 `json:"commit_num,string"` 85 | ReplicaID uint64 `json:"replica_id,string"` 86 | } 87 | 88 | type StartView struct { 89 | ViewNum uint64 `json:"view_num,string"` 90 | OpNum uint64 `json:"op_num,string"` 91 | CommitNum uint64 `json:"commit_num,string"` 92 | Logs log.Logs `json:"logs"` 93 | ReplicaID uint64 `json:"replica_id,string"` 94 | } 95 | 96 | type GetState struct { 97 | ViewNum uint64 `json:"view_num,string"` 98 | OpNum uint64 `json:"op_num,string"` 99 | ReplicaID uint64 `json:"replica_id,string"` 100 | } 101 | 102 | type NewState struct { 103 | ViewNum uint64 `json:"view_num,string"` 104 | Logs log.Logs `json:"logs"` 105 | CommitNum uint64 `json:"commit_num,string"` 106 | OpNum uint64 `json:"op_num,string"` 107 | ReplicaID uint64 `json:"replica_id,string"` 108 | } 109 | 110 | type Heartbeat struct{} 111 | 112 | func (ev *Event) FromReader(r io.Reader) error { 113 | temp := struct { 114 | Type EventType `json:"@type"` 115 | Data json.RawMessage `json:"data"` 116 | }{} 117 | 118 | if err := json.NewDecoder(r).Decode(&temp); err != nil { 119 | return err 120 | } 121 | 122 | ev.Type = temp.Type 123 | var err error 124 | 125 | switch ev.Type { 126 | case EventRequest: 127 | var v Request 128 | err = json.Unmarshal(temp.Data, &v) 129 | ev.Data = v 130 | case EventPrepare: 131 | var v Prepare 132 | err = json.Unmarshal(temp.Data, &v) 133 | ev.Data = v 134 | case EventPrepareOK: 135 | var v PrepareOK 136 | err = json.Unmarshal(temp.Data, &v) 137 | ev.Data = v 138 | case EventCommit: 139 | var v Commit 140 | err = json.Unmarshal(temp.Data, &v) 141 | ev.Data = v 142 | case EventStartViewChange: 143 | var v StartViewChange 144 | err = json.Unmarshal(temp.Data, &v) 145 | ev.Data = v 146 | case EventDoViewChange: 147 | var v DoViewChange 148 | err = json.Unmarshal(temp.Data, &v) 149 | ev.Data = v 150 | case EventStartView: 151 | var v StartView 152 | err = json.Unmarshal(temp.Data, &v) 153 | ev.Data = v 154 | case EventGetState: 155 | var v GetState 156 | err = json.Unmarshal(temp.Data, &v) 157 | ev.Data = v 158 | case EventNewState: 159 | var v NewState 160 | err = json.Unmarshal(temp.Data, &v) 161 | ev.Data = v 162 | case EventHeartbeat: 163 | var v Heartbeat 164 | err = json.Unmarshal(temp.Data, &v) 165 | ev.Data = v 166 | case EventReply: 167 | var v Reply 168 | err = json.Unmarshal(temp.Data, &v) 169 | ev.Data = v 170 | default: 171 | return ErrInvalidEventType 172 | } 173 | 174 | if err != nil { 175 | return err 176 | } 177 | 178 | return nil 179 | } 180 | 181 | func (ev *Event) ToWriter(w io.Writer) error { 182 | if err := json.NewEncoder(w).Encode(ev); err != nil { 183 | return err 184 | } 185 | 186 | return nil 187 | } 188 | 189 | func NewNetworkEvent(src ipv4port.IPv4Port, ev *Event) NetworkEvent { 190 | return NetworkEvent{ 191 | Src: src, 192 | Event: ev, 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /pkg/events/events_test.go: -------------------------------------------------------------------------------- 1 | package events 2 | 3 | import ( 4 | "bytes" 5 | "io" 6 | "math" 7 | "reflect" 8 | "testing" 9 | ) 10 | 11 | func TestEvent_FromReader(t *testing.T) { 12 | type args struct { 13 | r io.Reader 14 | } 15 | tests := []struct { 16 | name string 17 | expevent Event 18 | args args 19 | wantErr bool 20 | }{ 21 | { 22 | name: "Invalid JSON", 23 | expevent: Event{ 24 | Type: EventRequest, 25 | Data: Request{ 26 | ID: 1, 27 | ClientID: 10, 28 | Op: "hello", 29 | }, 30 | }, 31 | args: args{ 32 | r: bytes.NewReader([]byte(`{"@type":1,"data":{`)), 33 | }, 34 | wantErr: true, 35 | }, 36 | { 37 | name: "Valid JSON", 38 | expevent: Event{ 39 | Type: EventRequest, 40 | Data: Request{ 41 | ID: 1, 42 | ClientID: 10, 43 | Op: "test command", 44 | }, 45 | }, 46 | args: args{ 47 | r: bytes.NewReader([]byte(`{"@type":1,"data":{"id":"1","client_id":"10","op":"test command"}}`)), 48 | }, 49 | wantErr: false, 50 | }, 51 | { 52 | name: "Valid JSON with newline terminator", 53 | expevent: Event{ 54 | Type: EventRequest, 55 | Data: Request{ 56 | ID: 1, 57 | ClientID: 10, 58 | Op: "test command", 59 | }, 60 | }, 61 | args: args{ 62 | r: bytes.NewReader([]byte(`{"@type":1,"data":{"id":"1","client_id":"10","op":"test command"}}` + "\n")), 63 | }, 64 | wantErr: false, 65 | }, 66 | { 67 | name: "Valid JSON with max UINT64", 68 | expevent: Event{ 69 | Type: EventRequest, 70 | Data: Request{ 71 | ID: math.MaxUint64, 72 | ClientID: 101, 73 | Op: "test command", 74 | }, 75 | }, 76 | args: args{ 77 | r: bytes.NewReader([]byte(`{"@type":1,"data":{"id":"18446744073709551615","client_id":"101","op":"test command"}}`)), 78 | }, 79 | wantErr: false, 80 | }, 81 | { 82 | name: "Valid \"PREPARE\" JSON with max UINT64", 83 | expevent: Event{ 84 | Type: EventPrepare, 85 | Data: Prepare{ 86 | ViewNum: 3, 87 | OpNum: math.MaxUint64, 88 | CommitNum: 101, 89 | Request: Request{ 90 | ID: 12, 91 | ClientID: 101, 92 | Op: "test command", 93 | }, 94 | }, 95 | }, 96 | args: args{ 97 | r: bytes.NewReader([]byte(`{"@type":3,"data":{"view_num":"3","op_num":"18446744073709551615","commit_num":"101","request": {"id": "12", "client_id": "101", "op": "test command"}}}`)), 98 | }, 99 | wantErr: false, 100 | }, 101 | } 102 | for _, tt := range tests { 103 | t.Run(tt.name, func(t *testing.T) { 104 | ev := Event{} 105 | err := ev.FromReader(tt.args.r) 106 | if (err != nil) != tt.wantErr { 107 | t.Fatalf("Event.FromReader() error = %v, wantErr %v", err, tt.wantErr) 108 | } 109 | if err == nil { 110 | if !reflect.DeepEqual(ev, tt.expevent) { 111 | t.Fatalf("Event.FromReader() got = %+v, want %+v", ev, tt.expevent) 112 | } 113 | } 114 | }) 115 | } 116 | } 117 | 118 | func TestEvent_ToWriter(t *testing.T) { 119 | type fields struct { 120 | Type EventType 121 | Data any 122 | } 123 | tests := []struct { 124 | name string 125 | fields fields 126 | wantW string 127 | wantErr bool 128 | }{ 129 | { 130 | name: "Test 1", 131 | fields: fields{ 132 | Type: EventRequest, 133 | Data: Request{ 134 | ID: 1, 135 | ClientID: 10, 136 | Op: "hello", 137 | }, 138 | }, 139 | wantW: `{"@type":1,"data":{"id":"1","client_id":"10","op":"hello"}}` + "\n", 140 | wantErr: false, 141 | }, 142 | } 143 | for _, tt := range tests { 144 | t.Run(tt.name, func(t *testing.T) { 145 | ev := &Event{ 146 | Type: tt.fields.Type, 147 | Data: tt.fields.Data, 148 | } 149 | w := bytes.NewBuffer([]byte{}) 150 | if err := ev.ToWriter(w); (err != nil) != tt.wantErr { 151 | t.Errorf("Event.ToWriter() error = %v, wantErr %v", err, tt.wantErr) 152 | return 153 | } 154 | if gotW := w.String(); gotW != tt.wantW { 155 | t.Errorf("Event.ToWriter() = %v, want %v", gotW, tt.wantW) 156 | } 157 | }) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /pkg/ipv4port/ipv4port.go: -------------------------------------------------------------------------------- 1 | package ipv4port 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | "net" 7 | "strconv" 8 | ) 9 | 10 | // IPv4Port is a pair of IPv4 address and port. 11 | type IPv4Port struct { 12 | First net.IP 13 | Second int 14 | 15 | str string 16 | } 17 | 18 | func (ipPort IPv4Port) GetIP() net.IP { 19 | return ipPort.First 20 | } 21 | 22 | func (ipPort IPv4Port) GetPort() int { 23 | return ipPort.Second 24 | } 25 | 26 | func (ipPort *IPv4Port) SetIP(ip net.IP) { 27 | ipPort.First = ip 28 | ipPort.str = fmt.Sprintf("%s:%d", ipPort.First.String(), ipPort.Second) 29 | } 30 | 31 | func (ipPort *IPv4Port) SetPort(port int) { 32 | ipPort.Second = port 33 | ipPort.str = fmt.Sprintf("%s:%d", ipPort.First.String(), ipPort.Second) 34 | } 35 | 36 | func (ipPort IPv4Port) String() string { 37 | return ipPort.str 38 | } 39 | 40 | func (ipPort *IPv4Port) FromHostPort(hostport string) error { 41 | ip, port, err := net.SplitHostPort(hostport) 42 | if err != nil { 43 | return err 44 | } 45 | 46 | parsedIP := net.ParseIP(ip) 47 | if parsedIP == nil { 48 | return fmt.Errorf("invalid IP address: %s", ip) 49 | } 50 | 51 | if parsedIP.To4() == nil { 52 | return fmt.Errorf("only IPv4 addresses are supported") 53 | } 54 | 55 | parsedPort, err := strconv.Atoi(port) 56 | if err != nil { 57 | return err 58 | } 59 | 60 | ipPort.SetIP(parsedIP) 61 | ipPort.SetPort(parsedPort) 62 | 63 | ipPort.str = fmt.Sprintf("%s:%d", ipPort.First.String(), ipPort.Second) 64 | return nil 65 | } 66 | 67 | func (ipPort IPv4Port) Less(other IPv4Port) bool { 68 | if ipPort.GetIP().Equal(other.GetIP()) { 69 | return ipPort.GetPort() < other.GetPort() 70 | } 71 | 72 | curr := ipPort.GetIP().To4() 73 | otherIP := other.GetIP().To4() 74 | if otherIP == nil { 75 | panic("other IP is not IPv4") 76 | } 77 | 78 | parsedCurr := binary.BigEndian.Uint16(curr) 79 | parsedOther := binary.BigEndian.Uint16(otherIP) 80 | 81 | return parsedCurr < parsedOther 82 | } 83 | -------------------------------------------------------------------------------- /pkg/log/log.go: -------------------------------------------------------------------------------- 1 | package log 2 | 3 | import "encoding/json" 4 | 5 | type Log struct { 6 | Data string `json:"data"` 7 | ClientID uint64 `json:"client_id,string"` 8 | RequestID uint64 `json:"request_id,string"` 9 | } 10 | 11 | type Logs []Log 12 | 13 | func NewLogs() Logs { 14 | return make(Logs, 0) 15 | } 16 | 17 | func (l Logs) Len() int { 18 | return len(l) 19 | } 20 | 21 | func (l *Logs) Add(log Log) { 22 | *l = append(*l, log) 23 | } 24 | 25 | func (l Logs) OpAt(i int) string { 26 | return (l)[i-1].Data 27 | } 28 | 29 | func (l Logs) ClientIDAt(i int) uint64 { 30 | return (l)[i-1].ClientID 31 | } 32 | 33 | func (l Logs) RequestIDAt(i int) uint64 { 34 | return (l)[i-1].RequestID 35 | } 36 | 37 | func (l *Logs) TruncateAfter(i int) { 38 | *l = (*l)[:(i - 1)] 39 | } 40 | 41 | func (l *Logs) After(i int) Logs { 42 | return (*l)[i:] 43 | } 44 | 45 | func (l *Logs) Merge(logs Logs) { 46 | *l = append(*l, logs...) 47 | } 48 | 49 | func (l Logs) Last() Log { 50 | return l[len(l)-1] 51 | } 52 | 53 | func (l *Logs) UnmarshalJSON(data []byte) error { 54 | var logs []Log 55 | if err := json.Unmarshal(data, &logs); err != nil { 56 | return err 57 | } 58 | 59 | *l = logs 60 | return nil 61 | } 62 | 63 | func (l Logs) MarshalJSON() ([]byte, error) { 64 | return json.Marshal([]Log(l)) 65 | } 66 | -------------------------------------------------------------------------------- /pkg/network/network.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/tangledbytes/go-vsr/pkg/events" 7 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 8 | ) 9 | 10 | type Network interface { 11 | Send(ipv4port.IPv4Port, func(io.Writer) error) error 12 | Recv() (events.NetworkEvent, bool) 13 | } 14 | -------------------------------------------------------------------------------- /pkg/network/tcp.go: -------------------------------------------------------------------------------- 1 | package network 2 | 3 | import ( 4 | "io" 5 | "log/slog" 6 | "net" 7 | "sync" 8 | 9 | "github.com/tangledbytes/go-vsr/pkg/assert" 10 | "github.com/tangledbytes/go-vsr/pkg/events" 11 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 12 | "github.com/tangledbytes/go-vsr/pkg/queue" 13 | ) 14 | 15 | type TCP struct { 16 | ip ipv4port.IPv4Port 17 | 18 | conns map[string]net.Conn 19 | 20 | queue *queue.Queue[events.NetworkEvent] 21 | mu *sync.RWMutex 22 | } 23 | 24 | func NewTCP(hostname string) *TCP { 25 | ipv4port := ipv4port.IPv4Port{} 26 | err := ipv4port.FromHostPort(hostname) 27 | assert.Assert(err == nil, "err should be nil") 28 | 29 | return &TCP{ 30 | ip: ipv4port, 31 | conns: make(map[string]net.Conn), 32 | mu: &sync.RWMutex{}, 33 | queue: queue.New[events.NetworkEvent](), 34 | } 35 | } 36 | 37 | func (tcp *TCP) Run() error { 38 | listener, err := net.Listen("tcp4", tcp.ip.String()) 39 | if err != nil { 40 | return err 41 | } 42 | 43 | slog.Debug("started listener", "addr", tcp.ip.String()) 44 | 45 | for { 46 | conn, err := listener.Accept() 47 | slog.Debug("accepted a connection", "remote-addr", conn.RemoteAddr().String()) 48 | if err != nil { 49 | slog.Debug("failed to accept a connection", "err", err) 50 | continue 51 | } 52 | 53 | tcp.mu.Lock() 54 | tcp.conns[conn.RemoteAddr().String()] = conn 55 | tcp.mu.Unlock() 56 | 57 | go tcp.handleConn(conn) 58 | } 59 | } 60 | 61 | func (tcp *TCP) Send(ipv4port ipv4port.IPv4Port, f func(io.Writer) error) error { 62 | slog.Debug("received request to send", "ipv4port", ipv4port.String()) 63 | 64 | tcp.mu.RLock() 65 | conn, ok := tcp.conns[ipv4port.String()] 66 | tcp.mu.RUnlock() 67 | if !ok { 68 | var err error 69 | conn, err = net.Dial("tcp4", ipv4port.String()) 70 | if err != nil { 71 | slog.Debug("failed to dial", "err", err) 72 | return err 73 | } 74 | 75 | tcp.mu.Lock() 76 | tcp.conns[ipv4port.String()] = conn 77 | tcp.mu.Unlock() 78 | 79 | go tcp.handleConn(conn) 80 | } 81 | 82 | slog.Debug("attempting to send an event", "ipv4port", ipv4port.String()) 83 | if err := f(conn); err != nil { 84 | slog.Debug("failed to send an event", "err", err) 85 | 86 | if err == net.ErrClosed { 87 | tcp.mu.Lock() 88 | delete(tcp.conns, ipv4port.String()) 89 | tcp.mu.Unlock() 90 | } 91 | 92 | return err 93 | } 94 | slog.Debug("sent an event", "ipv4port", ipv4port.String()) 95 | 96 | return nil 97 | } 98 | 99 | func (tcp *TCP) Recv() (events.NetworkEvent, bool) { 100 | return tcp.queue.Pop() 101 | } 102 | 103 | func (tcp *TCP) handleConn(conn net.Conn) { 104 | srcaddr := ipv4port.IPv4Port{} 105 | srcaddr.FromHostPort(conn.RemoteAddr().String()) 106 | 107 | for { 108 | ev := &events.Event{} 109 | slog.Debug("conn waiting for an event") 110 | if err := ev.FromReader(conn); err != nil { 111 | slog.Debug("failed to read an event", "err", err) 112 | break 113 | } 114 | 115 | slog.Debug("conn received an event", "type", ev.Type, "from", srcaddr.String()) 116 | tcp.queue.Push(events.NewNetworkEvent(srcaddr, ev)) 117 | slog.Debug("conn sent an event to the handler", "type", ev.Type, "from", srcaddr.String()) 118 | } 119 | 120 | tcp.mu.Lock() 121 | if err := conn.Close(); err != nil { 122 | slog.Debug("failed to properly close an errored connection", "err", err) 123 | } 124 | delete(tcp.conns, conn.RemoteAddr().String()) 125 | tcp.mu.Unlock() 126 | } 127 | 128 | var _ Network = (*TCP)(nil) 129 | -------------------------------------------------------------------------------- /pkg/queue/queue.go: -------------------------------------------------------------------------------- 1 | package queue 2 | 3 | import "sync" 4 | 5 | type Queue[T any] struct { 6 | head *node[T] 7 | tail *node[T] 8 | len int 9 | 10 | mu *sync.Mutex 11 | } 12 | 13 | type node[T any] struct { 14 | value T 15 | next *node[T] 16 | } 17 | 18 | func New[T any]() *Queue[T] { 19 | return &Queue[T]{ 20 | mu: &sync.Mutex{}, 21 | } 22 | } 23 | 24 | func (q *Queue[T]) Push(value T) { 25 | q.mu.Lock() 26 | defer q.mu.Unlock() 27 | 28 | n := &node[T]{} 29 | n.value = value 30 | 31 | if q.head == nil { 32 | q.head = n 33 | q.tail = n 34 | } else { 35 | q.tail.next = n 36 | q.tail = n 37 | } 38 | 39 | q.len++ 40 | } 41 | 42 | func (q *Queue[T]) Pop() (T, bool) { 43 | q.mu.Lock() 44 | defer q.mu.Unlock() 45 | 46 | var t T 47 | 48 | if q.head == nil { 49 | return t, false 50 | } 51 | 52 | n := q.head 53 | q.head = n.next 54 | if q.head == nil { 55 | q.tail = nil 56 | } 57 | 58 | q.len-- 59 | return n.value, true 60 | } 61 | 62 | func (q *Queue[T]) Len() int { 63 | q.mu.Lock() 64 | defer q.mu.Unlock() 65 | 66 | return q.len 67 | } 68 | 69 | func (q *Queue[T]) Empty() bool { 70 | q.mu.Lock() 71 | defer q.mu.Unlock() 72 | 73 | return q.len == 0 74 | } 75 | -------------------------------------------------------------------------------- /pkg/replica/replica.go: -------------------------------------------------------------------------------- 1 | package replica 2 | 3 | import ( 4 | "io" 5 | "log/slog" 6 | "sort" 7 | 8 | "github.com/tangledbytes/go-vsr/pkg/assert" 9 | "github.com/tangledbytes/go-vsr/pkg/events" 10 | "github.com/tangledbytes/go-vsr/pkg/ipv4port" 11 | "github.com/tangledbytes/go-vsr/pkg/log" 12 | "github.com/tangledbytes/go-vsr/pkg/network" 13 | "github.com/tangledbytes/go-vsr/pkg/queue" 14 | "github.com/tangledbytes/go-vsr/pkg/time" 15 | ) 16 | 17 | type ReplicaStatus int 18 | 19 | const ( 20 | // ReplicaStatusNormal is the normal state of the replica. 21 | ReplicaStatusNormal ReplicaStatus = iota 22 | 23 | // ReplicaStatusViewChange is the state of the replica when it is 24 | // performing a view change. 25 | ReplicaStatusViewChange 26 | 27 | // ReplicaStatusRecovery is the state of the replica when it is 28 | // recovering from a failure. 29 | ReplicaStatusRecovery 30 | 31 | // ReplicaStatusStateTransfer is a made up state which is not mentioned 32 | // in the original VSR paper. This state is used to indicate that the 33 | // replica is performing a state transfer. 34 | ReplicaStatusStateTransfer 35 | ) 36 | 37 | type InternalEventType = events.EventType 38 | 39 | const ( 40 | ProposeViewChange InternalEventType = iota + 100 41 | InternalPrepareOK 42 | ) 43 | 44 | type ClientTableData struct { 45 | Result string 46 | RequestNumber uint64 47 | Finished bool 48 | } 49 | 50 | type PendingPrepareOK struct { 51 | ClientAddr ipv4port.IPv4Port 52 | Request events.Request 53 | Responses map[uint64]struct{} 54 | } 55 | 56 | type PendingStartViewChange struct { 57 | Responses map[uint64]struct{} 58 | } 59 | 60 | type PendingDoViewChange struct { 61 | Responses map[uint64]events.DoViewChange 62 | } 63 | 64 | type InternalPrepareOKData struct { 65 | pok events.PrepareOK 66 | req events.Request 67 | clientaddr ipv4port.IPv4Port 68 | } 69 | 70 | // VSRState represents state of a single replica in the VSR cluster. 71 | // This replica could be Primary or Backup. 72 | type VSRState struct { 73 | // ID is nothing but the index of the replica in the ClusterMembers list. 74 | ID uint64 75 | 76 | // ViewNumber is the current view number of the replica. 77 | ViewNumber uint64 78 | 79 | // CommitNumber is the monotonically increasing commit number of the replica. 80 | CommitNumber uint64 81 | 82 | // OpNum is the monotonically increasing operation number of the replica. 83 | OpNum uint64 84 | 85 | // ClientTable is a map of client IDs to the last operation number 86 | // that the client has seen. 87 | ClientTable map[uint64]ClientTableData 88 | 89 | // ClusterMembers is a list of the cluster members IPv4:port addresses. 90 | // The smallest IP address is the initial primary. 91 | ClusterMembers []ipv4port.IPv4Port 92 | 93 | // Logs is a list of the operations that have been requested by the clients. 94 | Logs log.Logs 95 | 96 | // Status is the current status of the replica. 97 | Status ReplicaStatus 98 | } 99 | 100 | type Internal struct { 101 | // pendingPOKs keep track of all the pending prepare OKs for a particular 102 | // operation number. 103 | pendingPOKs map[uint64]PendingPrepareOK 104 | // pendingStartViewChange keeps track of all the pending start view change 105 | // messages for a particular view number. 106 | pendingStartViewChange map[uint64]PendingStartViewChange 107 | // pendingDoViewChange keeps track of all the pending do view change 108 | // messages for a particular view number. 109 | pendingDoViewChange map[uint64]PendingDoViewChange 110 | 111 | // sq is the submission queue for the events that need to be processed. 112 | // everything in the replica is driven by this queue. 113 | sq *queue.Queue[events.NetworkEvent] 114 | 115 | // lastStableViewNumber is the last view number when the replica was in 116 | // stable state. 117 | lastStableViewNumber uint64 118 | // lastExecutedOpNum is the last operation number that the replica has 119 | // executed on the state machine. This number is tracked separately despite 120 | // the presence of commit number in the state because the commit number 121 | // changes in the view change protocol to the max commmit number but that 122 | // doesn't mean that this replica has executed all the operations till that 123 | lastExecutedOpNum uint64 124 | 125 | // viewChangeTimer is used to detect if the replica needs to initiate a 126 | // view change or not. 127 | viewChangeTimer *time.Timer 128 | // getStateTimer keeps track of the time when the replica might needs 129 | // to send another getState request to another replica as it didn't 130 | // hear from the previous replica. 131 | getStateTimer *time.Timer 132 | // commitTimer keeps track of the time when the primary (current) sent 133 | // the last commit message to the backups. If the primary didn't send 134 | // any commit message for a certain amount of time, it will do again. 135 | commitTimer *time.Timer 136 | 137 | // running is a flag that indicates if the replica is running or not. 138 | running bool 139 | } 140 | 141 | type Replica struct { 142 | // state holds all the VSR related state of the replica. 143 | state VSRState 144 | network network.Network 145 | time time.Time 146 | logger *slog.Logger 147 | srvhandler func(msg string) string 148 | 149 | heartbeattimeout uint64 150 | 151 | internal Internal 152 | } 153 | 154 | type Config struct { 155 | HeartbeatTimeout uint64 156 | SrvHandler func(msg string) string 157 | Network network.Network 158 | Time time.Time 159 | ID uint64 160 | Members []string 161 | Logger *slog.Logger 162 | } 163 | 164 | func New(cfg Config) (*Replica, error) { 165 | clustermembers := make([]ipv4port.IPv4Port, len(cfg.Members)) 166 | for i, v := range cfg.Members { 167 | if err := clustermembers[i].FromHostPort(v); err != nil { 168 | return nil, err 169 | } 170 | } 171 | 172 | sort.Slice(clustermembers, func(i, j int) bool { 173 | return clustermembers[i].Less(clustermembers[j]) 174 | }) 175 | 176 | return &Replica{ 177 | state: VSRState{ 178 | ViewNumber: 0, 179 | CommitNumber: 0, 180 | ClientTable: make(map[uint64]ClientTableData), 181 | Logs: log.NewLogs(), 182 | Status: ReplicaStatusNormal, 183 | ClusterMembers: clustermembers, 184 | ID: cfg.ID, 185 | }, 186 | network: cfg.Network, 187 | time: cfg.Time, 188 | srvhandler: cfg.SrvHandler, 189 | heartbeattimeout: cfg.HeartbeatTimeout, 190 | logger: cfg.Logger, 191 | internal: Internal{ 192 | pendingPOKs: make(map[uint64]PendingPrepareOK), 193 | pendingStartViewChange: make(map[uint64]PendingStartViewChange), 194 | pendingDoViewChange: make(map[uint64]PendingDoViewChange), 195 | sq: queue.New[events.NetworkEvent](), 196 | 197 | commitTimer: time.NewTimer(cfg.Time, cfg.HeartbeatTimeout), 198 | viewChangeTimer: time.NewTimer(cfg.Time, cfg.HeartbeatTimeout*2), 199 | }, 200 | }, nil 201 | } 202 | 203 | // Submit will submit an event to the replica. This event will be eventually 204 | // processed by the replica (after call to Run). 205 | // 206 | // This function should NOT be called from multiple threads. 207 | func (r *Replica) Submit(e events.NetworkEvent) { 208 | assert.Assert(!r.internal.running, "replica should not be running") 209 | r.internal.sq.Push(e) 210 | } 211 | 212 | // Run will run replica event handling ONCE. It will dequeu one event from the 213 | // submission queue and after processing it will return. 214 | // 215 | // It needs to be run in a loop to keep processing the events. 216 | // 217 | // This function should NOT be called from multiple threads. 218 | func (r *Replica) Run() { 219 | assert.Assert(!r.internal.running, "replica should not be already running") 220 | 221 | r.setIsRunning(true) 222 | defer r.setIsRunning(false) 223 | 224 | r.consumeSQ() 225 | r.checkTimers() 226 | } 227 | 228 | // VSRState returns a copy of the VSR state of the replica. 229 | func (r *Replica) VSRState() VSRState { 230 | return r.state 231 | } 232 | 233 | // InternalState returns a copy of the internal state of the replica. 234 | func (r *Replica) InternalState() Internal { 235 | return r.internal 236 | } 237 | 238 | func (r *Replica) consumeSQ() { 239 | e, ok := r.internal.sq.Pop() 240 | if ok { 241 | r.processEvent(e) 242 | 243 | if r.isEventFromPrimary(&e) { 244 | r.logger.Debug("got event from primary", "event", e.Event.Type) 245 | r.internal.viewChangeTimer.Reset() 246 | } 247 | } 248 | } 249 | 250 | func (r *Replica) processEvent(e events.NetworkEvent) { 251 | switch e.Event.Type { 252 | case events.EventRequest: 253 | r.onRequest(e.Src, e.Event.Data.(events.Request)) 254 | case events.EventPrepare: 255 | r.onPrepare(e.Src, e.Event.Data.(events.Prepare)) 256 | case events.EventPrepareOK: 257 | r.onPrepareOK(e.Src, e.Event.Data.(events.PrepareOK)) 258 | case InternalPrepareOK: 259 | r.onInternalPrepareOK(e.Src, e.Event.Data.(InternalPrepareOKData)) 260 | case events.EventCommit: 261 | r.onCommit(e.Src, e.Event.Data.(events.Commit)) 262 | case events.EventStartViewChange: 263 | r.onStartViewChange(e.Src, e.Event.Data.(events.StartViewChange)) 264 | case ProposeViewChange: 265 | r.initiateViewChange(r.state.ViewNumber + 1) 266 | case events.EventDoViewChange: 267 | r.onDoViewChange(e.Src, e.Event.Data.(events.DoViewChange)) 268 | case events.EventStartView: 269 | r.onStartView(e.Src, e.Event.Data.(events.StartView)) 270 | case events.EventGetState: 271 | r.onGetState(e.Src, e.Event.Data.(events.GetState)) 272 | case events.EventNewState: 273 | r.onNewState(e.Src, e.Event.Data.(events.NewState)) 274 | default: 275 | r.logger.Error("replica received an invalid event", "event", e.Event) 276 | } 277 | } 278 | 279 | // onRequest handles a request from a client. This is equivalent to 280 | // in the VSR-revisited paper. 281 | func (r *Replica) onRequest(from ipv4port.IPv4Port, req events.Request) { 282 | r.logger.Debug("received request", "from", from.String(), "replica id", r.state.ID) 283 | 284 | // Am I in the state to perform the request? 285 | if r.state.Status != ReplicaStatusNormal { 286 | r.logger.Debug("not in normal state - dropping request", "state", r.state.Status) 287 | // I am not in the state to perform the request, drop the request. 288 | return 289 | } 290 | 291 | // Do I think I am primary? The client can have stale data but does that 292 | // stale data matches with my stale data? 293 | // 294 | // If it does matches and I proceed with the request, I am anyway not going 295 | // to get enough prepare OKs and the client request will timeout. 296 | if r.state.ID != r.potentialPrimary() { 297 | r.logger.Debug("not primary - dropping request", "id", r.state.ID, "potential primary", r.potentialPrimary()) 298 | // I am not primary, drop the request 299 | 300 | // TODO: Forward the request to the primary 301 | return 302 | } 303 | 304 | // Check if we have already seen this request. 305 | client, ok := r.state.ClientTable[req.ClientID] 306 | if ok { 307 | r.logger.Debug("request received from known client", "client id", req.ClientID) 308 | if client.RequestNumber == req.ID { 309 | // We have already seen this request, send 310 | // the saved result 311 | r.logger.Debug("request duplicate request", "client id", req.ClientID, "request id", req.ID) 312 | 313 | if client.Finished { 314 | r.logger.Debug("request duplicate request (finished)", "client id", req.ClientID, "request id", req.ID) 315 | 316 | if err := r.sendReplyToClient(from, req.ClientID, req.ID, client.Result); err != nil { 317 | r.logger.Error("failed to send reply to client", "err", err, "client addr", from.String()) 318 | } 319 | } 320 | 321 | // We have not finished processing this request, drop it 322 | // once we will finish it, we will send the result. 323 | return 324 | } 325 | 326 | if client.RequestNumber > req.ID { 327 | // We have already seen this request, but the 328 | // client has sent a stale request. Drop the request. 329 | return 330 | } 331 | } 332 | 333 | // At this point we know that we have not seen this request before. Proceed 334 | // to process this request. 335 | 336 | // 1. Increment the operation number. 337 | r.state.OpNum++ 338 | // 2. Append the operation to the log. 339 | r.state.Logs.Add(log.Log{ 340 | Data: req.Op, 341 | ClientID: req.ClientID, 342 | RequestID: req.ID, 343 | }) 344 | 345 | // 3. Update the client table. 346 | r.state.ClientTable[req.ClientID] = ClientTableData{ 347 | RequestNumber: client.RequestNumber, 348 | } 349 | 350 | // 4. Send the Prepare message to all the backups. 351 | for i, member := range r.state.ClusterMembers { 352 | if i == int(r.state.ID) { 353 | continue 354 | } 355 | 356 | r.logger.Debug("sending prepare to backup", "backup addr", member.String()) 357 | 358 | if err := r.sendPrepareToBackup(member, req); err != nil { 359 | r.logger.Error("failed to send prepare to backup", "err", err, "backup addr", member.String()) 360 | } 361 | } 362 | 363 | r.logger.Debug("sent prepare to all backups, wait for prepareOKs", "opNum", r.state.OpNum) 364 | 365 | // 5. Wait for prepareOKs 366 | r.internal.sq.Push(events.NetworkEvent{ 367 | Src: r.state.ClusterMembers[r.state.ID], 368 | Event: &events.Event{ 369 | Type: InternalPrepareOK, 370 | Data: InternalPrepareOKData{ 371 | pok: events.PrepareOK{ 372 | ViewNum: r.state.ViewNumber, 373 | OpNum: r.state.OpNum, 374 | ReplicaID: r.state.ID, 375 | }, 376 | req: req, 377 | clientaddr: from, 378 | }, 379 | }, 380 | }) 381 | } 382 | 383 | // onPrepare handles prepare messages sent by primary to the backups. 384 | func (r *Replica) onPrepare(from ipv4port.IPv4Port, ev events.Prepare) { 385 | r.logger.Debug("received prepare", "from", from.String(), "replica id", r.state.ID) 386 | 387 | if r.state.Status != ReplicaStatusNormal { 388 | // I am not in the state to process the message, drop the message. 389 | r.logger.Debug("not in state - dropping request", "state", r.state.Status) 390 | return 391 | } 392 | 393 | if r.state.ID == r.potentialPrimary() { 394 | // I am the primary, drop the message. 395 | r.logger.Debug("primary - dropping request", "id", r.state.ID, "view num", r.state.ViewNumber) 396 | return 397 | } 398 | 399 | if r.state.ViewNumber > ev.ViewNum { 400 | // Poor old primary is behind (or network is horrible), drop the message. 401 | r.logger.Debug("old view number", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 402 | return 403 | } 404 | 405 | if r.state.ViewNumber < ev.ViewNum { 406 | r.logger.Debug("replica lagging", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 407 | 408 | // I am behind, initiate a state transfer. 409 | r.initiateStateTransfer(ev.ViewNum, ev.CommitNum) 410 | r.logger.Debug("initiated state transfer, dropping request") 411 | return 412 | } 413 | 414 | if r.state.OpNum+1 != ev.OpNum { 415 | r.logger.Debug("replica lagging", "received op num", ev.OpNum, "current op num", r.state.OpNum) 416 | 417 | // I am behind, initiate a state transfer. 418 | r.initiateStateTransfer(ev.ViewNum, ev.OpNum) 419 | r.logger.Debug("initiated state transfer, dropping request") 420 | return 421 | } 422 | 423 | // 1. Increment the operation number. 424 | r.state.OpNum++ 425 | // 2. Add the operation to the log. 426 | r.state.Logs.Add(log.Log{ 427 | Data: ev.Request.Op, 428 | ClientID: ev.Request.ClientID, 429 | RequestID: ev.Request.ID, 430 | }) 431 | 432 | // 3. Update the client table 433 | r.state.ClientTable[ev.Request.ClientID] = ClientTableData{ 434 | RequestNumber: ev.Request.ClientID, 435 | } 436 | 437 | // 4. Generate a fake Commit message and enqueue it. 438 | // This will be processed immediately after the prepare message. 439 | r.internal.sq.Push(events.NetworkEvent{ 440 | Src: from, 441 | Event: &events.Event{ 442 | Type: events.EventCommit, 443 | Data: events.Commit{ 444 | ViewNum: ev.ViewNum, 445 | CommitNum: ev.CommitNum, 446 | }, 447 | }, 448 | }) 449 | 450 | // 4. Send the PrepareOK message to the primary. 451 | if err := r.sendPrepareOKToPrimary(from, ev.OpNum); err != nil { 452 | r.logger.Error("failed to send prepareOK to primary", "err", err, "primary addr", from.String()) 453 | return 454 | } 455 | 456 | r.logger.Debug("sent prepareOK to primary", "opNum", ev.OpNum) 457 | } 458 | 459 | func (r *Replica) onInternalPrepareOK(from ipv4port.IPv4Port, pok InternalPrepareOKData) { 460 | r.logger.Debug("received internal prepareOK", "from", from.String(), "replica id", r.state.ID) 461 | 462 | _, ok := r.internal.pendingPOKs[pok.req.ID] 463 | if !ok { 464 | r.internal.pendingPOKs[pok.req.ID] = PendingPrepareOK{ 465 | ClientAddr: pok.clientaddr, 466 | Request: pok.req, 467 | Responses: map[uint64]struct{}{}, 468 | } 469 | } else { 470 | r.internal.pendingPOKs[pok.req.ID] = PendingPrepareOK{ 471 | ClientAddr: pok.clientaddr, 472 | Request: pok.req, 473 | Responses: r.internal.pendingPOKs[pok.req.ID].Responses, 474 | } 475 | } 476 | 477 | // Do normal prepareOK processing 478 | r.onPrepareOK(from, pok.pok) 479 | } 480 | 481 | // onPrepareOK handles prepare ok messages sent by the backups to the primary. 482 | // It will use the pendingPrepareOKs map to keep track of the number of prepare 483 | // ok messages received for a particular operation number. 484 | // 485 | // This function does not check if the replica is a primary or not and expects 486 | // to be invoked iff the replica is a primary. This check needs to be done by 487 | // the caller. 488 | func (r *Replica) onPrepareOK(from ipv4port.IPv4Port, pok events.PrepareOK) { 489 | r.logger.Debug("received prepareOK", "from", from.String(), "replica id", r.state.ID) 490 | 491 | if r.state.Status != ReplicaStatusNormal { 492 | r.logger.Debug("not in state - dropping request", "state", r.state.Status) 493 | return 494 | } 495 | 496 | if r.state.ID != r.potentialPrimary() { 497 | r.logger.Debug("not primary - dropping request", "id", r.state.ID, "potential primary", r.potentialPrimary()) 498 | return 499 | } 500 | 501 | // Check if this prepare OK is even relevant by checking the commit number 502 | if pok.OpNum <= r.state.CommitNumber { 503 | r.logger.Debug("prepareOK not relevant", "opNum", pok.OpNum, "commitNum", r.state.CommitNumber) 504 | return 505 | } 506 | 507 | if _, ok := r.internal.pendingPOKs[pok.OpNum]; !ok { 508 | r.internal.pendingPOKs[pok.OpNum] = PendingPrepareOK{ 509 | Responses: map[uint64]struct{}{}, 510 | } 511 | } 512 | 513 | // Record the prepare OK 514 | r.internal.pendingPOKs[pok.OpNum].Responses[pok.ReplicaID] = struct{}{} 515 | 516 | // Check if we have received prepare OKs from a majority of the replicas 517 | if len(r.internal.pendingPOKs[pok.OpNum].Responses) > len(r.state.ClusterMembers)/2 { 518 | // Process all the logs from last commit number to the op number 519 | // of the prepare OK. 520 | r.logger.Info("received prepareOK from majority", "opNum", pok.OpNum) 521 | 522 | for i := r.internal.lastExecutedOpNum + 1; i <= pok.OpNum; i++ { 523 | r.logger.Debug("processing log", "opNum", i, "log", r.state.Logs.OpAt(int(i))) 524 | 525 | // 1. Apply the operation to the state machine 526 | res := r.srvhandler(r.state.Logs.OpAt(int(i))) 527 | 528 | // 2. Update the commit number 529 | r.state.CommitNumber = i 530 | r.internal.lastExecutedOpNum = i 531 | 532 | // 3. Send the commit message to all the replicas 533 | for mi, member := range r.state.ClusterMembers { 534 | if r.state.ID == uint64(mi) { 535 | continue 536 | } 537 | 538 | // Send commits - not mentioned in the paper 539 | if err := r.sendCommitToBackup(member, i); err != nil { 540 | r.logger.Error("failed to send commit to backup", 541 | "err", err, 542 | "backup addr", member.String(), 543 | "opNum", i, 544 | ) 545 | } 546 | } 547 | 548 | // 4. Store the result in the client table 549 | clientID := r.state.Logs.ClientIDAt(int(i)) 550 | reqID := r.state.Logs.RequestIDAt(int(i)) 551 | r.state.ClientTable[clientID] = ClientTableData{ 552 | Result: res, 553 | RequestNumber: reqID, 554 | Finished: true, 555 | } 556 | 557 | // 5. Send the reply to the client 558 | r.logger.Debug("sending reply to client", "client addr", r.internal.pendingPOKs[i].ClientAddr.String()) 559 | if r.internal.pendingPOKs[i].ClientAddr.String() != "" { 560 | if err := r.sendReplyToClient( 561 | r.internal.pendingPOKs[i].ClientAddr, 562 | clientID, 563 | reqID, 564 | res, 565 | ); err != nil { 566 | r.logger.Error( 567 | "failed to send reply to client", 568 | "err", err, 569 | "client addr", r.internal.pendingPOKs[i].ClientAddr.String(), 570 | "request id ", r.internal.pendingPOKs[i].Request.ID, 571 | ) 572 | } 573 | } 574 | 575 | // 6. Delete the pendingPrepareOKs entry 576 | delete(r.internal.pendingPOKs, i) 577 | } 578 | } 579 | } 580 | 581 | // onCommit handles commit messages sent by the primary to the backups. 582 | // 583 | // This function does not check if the replica is a backup or not and expects 584 | // to be invoked iff the replica is a backup. This check needs to be done by 585 | // the caller. 586 | func (r *Replica) onCommit(from ipv4port.IPv4Port, commit events.Commit) { 587 | r.logger.Debug("received commit", "from", from.String(), "replica id", r.state.ID) 588 | 589 | if r.state.Status != ReplicaStatusNormal { 590 | r.logger.Debug("not in state - dropping request", "state", r.state.Status) 591 | return 592 | } 593 | 594 | if r.state.ID == r.potentialPrimary() { 595 | r.logger.Debug("primary - dropping request", "id", r.state.ID, "view num", r.state.ViewNumber) 596 | return 597 | } 598 | 599 | if r.state.ViewNumber > commit.ViewNum { 600 | r.logger.Debug("old view number", "received view num", commit.ViewNum, "current view num", r.state.ViewNumber) 601 | return 602 | } 603 | 604 | if r.state.ViewNumber < commit.ViewNum { 605 | r.logger.Debug("replica lagging", "received view num", commit.ViewNum, "current view num", r.state.ViewNumber) 606 | r.initiateStateTransfer(commit.ViewNum, commit.CommitNum) 607 | r.logger.Debug("initiated state transfer, dropping request") 608 | return 609 | } 610 | 611 | if r.state.OpNum < commit.CommitNum { 612 | r.logger.Debug("replica lagging", "received commit num", commit.CommitNum, "current op num", r.state.OpNum) 613 | r.initiateStateTransfer(commit.ViewNum, commit.CommitNum) 614 | r.logger.Debug("initiated state transfer, dropping request") 615 | return 616 | } 617 | 618 | // Process all the logs from last commit number to the commit number 619 | for i := r.internal.lastExecutedOpNum + 1; i <= commit.CommitNum; i++ { 620 | r.logger.Debug("processing log", "opNum", i, "log", r.state.Logs.OpAt(int(i))) 621 | 622 | // 1. Apply the operation to the state machine 623 | res := r.srvhandler(r.state.Logs.OpAt(int(i))) 624 | 625 | // 2. Update the commit number 626 | r.state.CommitNumber = i 627 | r.internal.lastExecutedOpNum = i 628 | 629 | // 3. Store the result in the client table 630 | clientID := r.state.Logs.ClientIDAt(int(i)) 631 | reqID := r.state.Logs.RequestIDAt(int(i)) 632 | r.state.ClientTable[clientID] = ClientTableData{ 633 | Result: res, 634 | RequestNumber: reqID, 635 | Finished: true, 636 | } 637 | } 638 | } 639 | 640 | func (r *Replica) onStartViewChange(src ipv4port.IPv4Port, ev events.StartViewChange) { 641 | r.logger.Debug("received startViewChange", "from", src.String(), "replica id", r.state.ID) 642 | r.internal.viewChangeTimer.Reset() 643 | // Total 6 states are possible (2 replica states) and (3 view number relations) 644 | 645 | // Cover 2 states, normal and view change when the view number is lesser 646 | // then ours 647 | // TOTAL: 2 648 | if ev.ViewNum < r.state.ViewNumber { 649 | r.logger.Debug("old view number", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 650 | return 651 | } 652 | 653 | // Cover 1 state 654 | // TOTAL: 3 655 | if r.state.Status == ReplicaStatusNormal && ev.ViewNum == r.state.ViewNumber { 656 | r.logger.Debug("old view number", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 657 | return 658 | } 659 | 660 | // If I just received a initiation for a view change (with higher number) and I am not in the 661 | // view change state then I need to initiate a view change. 662 | // 663 | // Cover 2 state 664 | // TOTAL: 5 665 | if ev.ViewNum > r.state.ViewNumber { 666 | r.logger.Info("initiating view change", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 667 | 668 | r.initiateViewChange(ev.ViewNum) 669 | // Don't return here as we need to process the start view change message 670 | } 671 | 672 | // Already in view change status and the view number matches 673 | // 674 | // Cover 1 state 675 | // TOTAL: 6 676 | if r.state.Status == ReplicaStatusViewChange && ev.ViewNum == r.state.ViewNumber { 677 | _, ok := r.internal.pendingStartViewChange[ev.ViewNum] 678 | if !ok { 679 | r.internal.pendingStartViewChange[ev.ViewNum] = PendingStartViewChange{ 680 | Responses: map[uint64]struct{}{}, 681 | } 682 | } 683 | 684 | r.internal.pendingStartViewChange[ev.ViewNum].Responses[ev.ReplicaID] = struct{}{} 685 | 686 | // Check if we have received view change messages from a majority of the replicas 687 | if len(r.internal.pendingStartViewChange[ev.ViewNum].Responses) > len(r.state.ClusterMembers)/2 { 688 | r.logger.Info("received view change from majority", "view num", ev.ViewNum) 689 | 690 | if err := r.sendDoViewChangeToPrimary(r.state.ClusterMembers[r.potentialPrimary()]); err != nil { 691 | r.logger.Error("failed to send doViewChange to primary", "err", err) 692 | } 693 | 694 | r.logger.Debug("sent doViewChange to primary", "view num", ev.ViewNum) 695 | } 696 | 697 | return 698 | } 699 | 700 | assert.Assert(false, "should be unreachable state") 701 | } 702 | 703 | func (r *Replica) onDoViewChange(src ipv4port.IPv4Port, ev events.DoViewChange) { 704 | r.logger.Debug("received doViewChange", "from", src.String(), "replica id", r.state.ID) 705 | r.internal.viewChangeTimer.Reset() 706 | 707 | if r.state.Status != ReplicaStatusViewChange { 708 | r.logger.Debug("not in state - dropping request", "state", r.state.Status) 709 | return 710 | } 711 | 712 | // Don't do primary test here as it could be that we missed the start view change 713 | // and just getting to know about it now but for sanity ensure that the 714 | // view number is higher than the current view number. 715 | if ev.ViewNum < r.state.ViewNumber { 716 | r.logger.Debug("old view number", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 717 | return 718 | } 719 | 720 | _, ok := r.internal.pendingDoViewChange[ev.ViewNum] 721 | if !ok { 722 | r.internal.pendingDoViewChange[ev.ViewNum] = PendingDoViewChange{ 723 | Responses: map[uint64]events.DoViewChange{}, 724 | } 725 | } 726 | 727 | r.internal.pendingDoViewChange[ev.ViewNum].Responses[ev.ReplicaID] = ev 728 | 729 | // Check if we have received do view change messages from a majority of the replicas 730 | if len(r.internal.pendingDoViewChange[ev.ViewNum].Responses) > len(r.state.ClusterMembers)/2 { 731 | r.logger.Info("received doViewChange from majority", "view num", ev.ViewNum) 732 | 733 | r.finalizeViewChange() 734 | 735 | for mi, member := range r.state.ClusterMembers { 736 | if r.state.ID == uint64(mi) { 737 | continue 738 | } 739 | 740 | r.logger.Debug("sending startView to backup", "backup addr", member.String()) 741 | if err := r.sendStartViewToBackup(member); err != nil { 742 | r.logger.Error("failed to send startView to backup", "err", err, "backup addr", member.String()) 743 | } 744 | } 745 | 746 | r.logger.Info("view change complete", "view num", ev.ViewNum) 747 | } 748 | } 749 | 750 | func (r *Replica) onStartView(src ipv4port.IPv4Port, ev events.StartView) { 751 | r.logger.Debug("received startView", "from", src.String(), "replica id", r.state.ID) 752 | 753 | if ev.ViewNum < r.state.ViewNumber { 754 | r.logger.Debug("old view number", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 755 | return 756 | } 757 | 758 | r.state.OpNum = ev.OpNum 759 | r.state.Logs = ev.Logs 760 | r.state.CommitNumber = ev.CommitNum 761 | r.state.ViewNumber = ev.ViewNum 762 | r.state.Status = ReplicaStatusNormal 763 | 764 | // Send PrepareOK for all the non committed logs 765 | for i := ev.CommitNum + 1; i <= ev.OpNum; i++ { 766 | r.state.ClientTable[ev.Logs.ClientIDAt(int(i))] = ClientTableData{ 767 | RequestNumber: ev.Logs.RequestIDAt(int(i)), 768 | } 769 | 770 | r.logger.Debug("sending prepareOK to primary", "opNum", i) 771 | 772 | if err := r.sendPrepareOKToPrimary(r.state.ClusterMembers[r.potentialPrimary()], i); err != nil { 773 | r.logger.Error("failed to send prepareOK to primary", "err", err, "primary addr", r.state.ClusterMembers[r.potentialPrimary()].String()) 774 | } 775 | } 776 | 777 | delete(r.internal.pendingStartViewChange, ev.ViewNum) 778 | delete(r.internal.pendingDoViewChange, ev.ViewNum) 779 | 780 | // The primary will eventually send either Commit message or Prepare which 781 | // will allow the backup to execute the operations against the state machine 782 | } 783 | 784 | func (r *Replica) onGetState(src ipv4port.IPv4Port, ev events.GetState) { 785 | r.logger.Warn("received getState", "from", src.String(), "replica id", r.state.ID) 786 | if r.state.Status != ReplicaStatusNormal { 787 | r.logger.Warn("not in state - dropping request", "state", r.state.Status) 788 | return 789 | } 790 | 791 | if r.state.ViewNumber != ev.ViewNum { 792 | r.logger.Warn("not current view", "received view num", ev.ViewNum, "current view num", r.state.ViewNumber) 793 | return 794 | } 795 | 796 | // Not mentioned in the paper but we need to check if the requested op number is 797 | // lesser than the current op number only then we will respond with the state. 798 | if r.state.OpNum < ev.OpNum { 799 | r.logger.Warn("not current op num", "received op num", ev.OpNum, "current op num", r.state.OpNum, "id", r.state.ID) 800 | return 801 | } 802 | 803 | if err := r.sendNewState(src, int(ev.OpNum)); err != nil { 804 | r.logger.Error("failed to send newState to replica", "err", err, "replica addr", src.String()) 805 | } 806 | } 807 | 808 | func (r *Replica) onNewState(src ipv4port.IPv4Port, ev events.NewState) { 809 | r.logger.Warn("received newState", "from", src.String(), "replica id", r.state.ID) 810 | 811 | if r.state.Status != ReplicaStatusStateTransfer { 812 | r.logger.Debug("not in state - dropping request", "state", r.state.Status) 813 | return 814 | } 815 | 816 | r.state.ViewNumber = ev.ViewNum 817 | r.state.OpNum = ev.OpNum 818 | r.state.Logs.Merge(ev.Logs) 819 | r.state.Status = ReplicaStatusNormal 820 | 821 | r.internal.getStateTimer = nil 822 | } 823 | 824 | func (r *Replica) initiateViewChange(viewnum uint64) { 825 | r.logger.Debug("initiating view change", "view num", viewnum) 826 | 827 | r.state.Status = ReplicaStatusViewChange 828 | r.internal.lastStableViewNumber = r.state.ViewNumber 829 | r.state.ViewNumber = viewnum 830 | 831 | // Remove relevant timers 832 | r.internal.getStateTimer = nil 833 | 834 | for _, member := range r.state.ClusterMembers { 835 | r.logger.Debug("sending startViewChange to replica", "replica addr", member.String(), "view num", viewnum) 836 | if err := r.sendStartViewChange(member, viewnum); err != nil { 837 | r.logger.Error("failed to send startViewChange to replica", "err", err, "replica addr", member.String()) 838 | } 839 | } 840 | } 841 | 842 | func (r *Replica) initiateStateTransfer(newViewNum, newOpNum uint64) { 843 | r.logger.Debug( 844 | "initiate state transfer", 845 | "view num", newViewNum, 846 | "current view num", r.state.ViewNumber, 847 | "op num", newOpNum, 848 | "current op num", r.state.OpNum, 849 | ) 850 | 851 | if r.state.Status != ReplicaStatusNormal && r.state.Status != ReplicaStatusStateTransfer { 852 | // Remove the timer, as we might have entered a view change state or 853 | // recovery state. If a state transfer would still be needed, next calls 854 | // to prepare or commit will trigger it. 855 | r.internal.getStateTimer = nil 856 | return 857 | } 858 | 859 | // Now that we know that we are in desired state, lets change the status of the replica 860 | // to ensure that we do not process any Prepare or Commit messages while we are trying 861 | // to perform the state transfer. 862 | // ViewChange protocol will still be able to proceed as `onStartViewChange` does not 863 | // care about any status. This is desire as we don't want to race against the state 864 | // updates from view change. 865 | r.state.Status = ReplicaStatusStateTransfer 866 | 867 | if r.state.ViewNumber < newViewNum { 868 | // Hard luck, gotta trucate 869 | // 1. Set op number to the commit number 870 | r.state.OpNum = r.state.CommitNumber 871 | // 2. Truncate the logs 872 | r.state.Logs.TruncateAfter(int(r.state.CommitNumber)) 873 | } 874 | 875 | targetReplica := r.primaryForView(newViewNum) 876 | // If the timer is set that means we have been here and probably attempted 877 | // to perform the state transfer but failed. In that case we need to try 878 | // the next replica in the cluster members list. 879 | // 880 | // NOTE: if view change protocol kicks then this timer should be removed! 881 | if r.internal.getStateTimer != nil { 882 | state, ok := r.internal.getStateTimer.State() 883 | assert.Assert(ok, "getStateTime state should be set") 884 | 885 | castedState := state.([3]uint64) 886 | targetReplica = (castedState[2] + 1) % uint64(len(r.state.ClusterMembers)) 887 | } 888 | 889 | // 3. Set the timer to ensure that we get the state within the timeout 890 | r.internal.getStateTimer = time.NewTimer(r.time, 15*time.SECOND) 891 | r.internal.getStateTimer.SetState([3]uint64{newViewNum, newOpNum, targetReplica}) 892 | 893 | // 4. Send GetState to the replica from which we received the request 894 | // 895 | // NOTE: it might be better to send the requests to every cluster member and then 896 | // wait for the first response. This will ensure that we get the state from the 897 | // fastest replica. 898 | // 899 | // This isn't done here because the logs are in memory and the simulator simply 900 | // cannot manage this much memory being moved around :( 901 | if err := r.sendGetStateToReplica(r.state.ClusterMembers[targetReplica]); err != nil { 902 | r.logger.Error("failed to send getState to replica", "err", err, "replica addr", r.state.ClusterMembers[targetReplica].String()) 903 | } 904 | } 905 | 906 | func (r *Replica) sendPrepareToBackup(backup ipv4port.IPv4Port, req events.Request) error { 907 | return r.network.Send(backup, func(w io.Writer) error { 908 | ev := events.Event{ 909 | Type: events.EventPrepare, 910 | Data: events.Prepare{ 911 | ViewNum: r.state.ViewNumber, 912 | OpNum: r.state.OpNum, 913 | CommitNum: r.state.CommitNumber, 914 | Request: req, 915 | ReplicaID: r.state.ID, 916 | }, 917 | } 918 | 919 | return ev.ToWriter(w) 920 | }) 921 | } 922 | 923 | func (r *Replica) sendReplyToClient(client ipv4port.IPv4Port, clientID, reqID uint64, res string) error { 924 | return r.network.Send(client, func(w io.Writer) error { 925 | ev := events.Event{ 926 | Type: events.EventReply, 927 | Data: events.Reply{ 928 | ViewNum: r.state.ViewNumber, 929 | ID: reqID, 930 | ClientID: clientID, 931 | Result: res, 932 | }, 933 | } 934 | 935 | return ev.ToWriter(w) 936 | }) 937 | } 938 | 939 | func (r *Replica) sendPrepareOKToPrimary(primary ipv4port.IPv4Port, opnum uint64) error { 940 | return r.network.Send(primary, func(w io.Writer) error { 941 | ev := events.Event{ 942 | Type: events.EventPrepareOK, 943 | Data: events.PrepareOK{ 944 | ViewNum: r.state.ViewNumber, 945 | OpNum: opnum, 946 | ReplicaID: r.state.ID, 947 | }, 948 | } 949 | 950 | return ev.ToWriter(w) 951 | }) 952 | } 953 | 954 | func (r *Replica) sendCommitToBackup(backup ipv4port.IPv4Port, commitnum uint64) error { 955 | return r.network.Send(backup, func(w io.Writer) error { 956 | ev := events.Event{ 957 | Type: events.EventCommit, 958 | Data: events.Commit{ 959 | ViewNum: r.state.ViewNumber, 960 | CommitNum: commitnum, 961 | ReplicaID: r.state.ID, 962 | }, 963 | } 964 | 965 | return ev.ToWriter(w) 966 | }) 967 | } 968 | 969 | func (r *Replica) sendStartViewChange(backup ipv4port.IPv4Port, viewNum uint64) error { 970 | return r.network.Send(backup, func(w io.Writer) error { 971 | ev := events.Event{ 972 | Type: events.EventStartViewChange, 973 | Data: events.StartViewChange{ 974 | ViewNum: viewNum, 975 | ReplicaID: r.state.ID, 976 | }, 977 | } 978 | 979 | return ev.ToWriter(w) 980 | }) 981 | } 982 | 983 | func (r *Replica) sendDoViewChangeToPrimary(primary ipv4port.IPv4Port) error { 984 | return r.network.Send(primary, func(w io.Writer) error { 985 | ev := events.Event{ 986 | Type: events.EventDoViewChange, 987 | Data: events.DoViewChange{ 988 | ViewNum: r.state.ViewNumber, 989 | Logs: r.state.Logs, 990 | LastStableViewNum: r.internal.lastStableViewNumber, 991 | OpNum: r.state.OpNum, 992 | CommitNum: r.state.CommitNumber, 993 | ReplicaID: r.state.ID, 994 | }, 995 | } 996 | 997 | return ev.ToWriter(w) 998 | }) 999 | } 1000 | 1001 | func (r *Replica) sendStartViewToBackup(backup ipv4port.IPv4Port) error { 1002 | return r.network.Send(backup, func(w io.Writer) error { 1003 | ev := events.Event{ 1004 | Type: events.EventStartView, 1005 | Data: events.StartView{ 1006 | ViewNum: r.state.ViewNumber, 1007 | OpNum: r.state.OpNum, 1008 | CommitNum: r.state.CommitNumber, 1009 | Logs: r.state.Logs, 1010 | ReplicaID: r.state.ID, 1011 | }, 1012 | } 1013 | 1014 | return ev.ToWriter(w) 1015 | }) 1016 | } 1017 | 1018 | func (r *Replica) sendGetStateToReplica(replica ipv4port.IPv4Port) error { 1019 | return r.network.Send(replica, func(w io.Writer) error { 1020 | ev := events.Event{ 1021 | Type: events.EventGetState, 1022 | Data: events.GetState{ 1023 | ViewNum: r.state.ViewNumber, 1024 | OpNum: r.state.OpNum, 1025 | ReplicaID: r.state.ID, 1026 | }, 1027 | } 1028 | 1029 | return ev.ToWriter(w) 1030 | }) 1031 | } 1032 | 1033 | func (r *Replica) sendNewState(replica ipv4port.IPv4Port, logsFrom int) error { 1034 | return r.network.Send(replica, func(w io.Writer) error { 1035 | ev := events.Event{ 1036 | Type: events.EventNewState, 1037 | Data: events.NewState{ 1038 | ViewNum: r.state.ViewNumber, 1039 | OpNum: r.state.OpNum, 1040 | CommitNum: r.state.CommitNumber, 1041 | Logs: r.state.Logs.After(logsFrom), 1042 | ReplicaID: r.state.ID, 1043 | }, 1044 | } 1045 | 1046 | return ev.ToWriter(w) 1047 | }) 1048 | } 1049 | 1050 | // finalizeViewChange finalizes the view change for this node which 1051 | // will make this node the new primary 1052 | // 1053 | // This should NOT be called until the node has received at least `f` 1054 | // DoViewChange messages from the replicas AND one from itself (important 1055 | // for correctness IMO). 1056 | func (r *Replica) finalizeViewChange() { 1057 | r.state.ViewNumber = r.largestViewNumInPendingViewChange() 1058 | 1059 | bestLastStableView := uint64(0) 1060 | bestLogsCandidates := []events.DoViewChange{} 1061 | for _, v := range r.internal.pendingDoViewChange[r.state.ViewNumber].Responses { 1062 | if v.LastStableViewNum >= bestLastStableView { 1063 | bestLastStableView = v.LastStableViewNum 1064 | bestLogsCandidates = append(bestLogsCandidates, v) 1065 | } 1066 | } 1067 | 1068 | bestOpNum := uint64(0) 1069 | bestLogs := []log.Log{} 1070 | bestCommit := uint64(0) 1071 | for _, v := range bestLogsCandidates { 1072 | if v.OpNum >= bestOpNum { 1073 | bestOpNum = v.OpNum 1074 | bestLogs = v.Logs 1075 | } 1076 | if v.CommitNum > bestCommit { 1077 | bestCommit = v.CommitNum 1078 | } 1079 | } 1080 | 1081 | r.state.OpNum = bestOpNum 1082 | r.state.Logs = bestLogs 1083 | r.state.CommitNumber = bestCommit 1084 | r.state.Status = ReplicaStatusNormal 1085 | 1086 | // Need to seed tables 1087 | for i := r.state.CommitNumber + 1; i <= r.state.OpNum; i++ { 1088 | r.state.ClientTable[r.state.Logs.ClientIDAt(int(i))] = ClientTableData{ 1089 | RequestNumber: r.state.Logs.RequestIDAt(int(i)), 1090 | } 1091 | 1092 | r.internal.pendingPOKs[i] = PendingPrepareOK{ 1093 | Responses: map[uint64]struct{}{ 1094 | r.state.ID: {}, 1095 | }, 1096 | } 1097 | } 1098 | } 1099 | 1100 | func (r *Replica) checkTimers() { 1101 | r.checkGetStateTimeout() 1102 | r.checkCommitTimeout() 1103 | r.checkViewChangeTimer() 1104 | } 1105 | 1106 | func (r *Replica) checkViewChangeTimer() { 1107 | // No need to perform view change if I am the primary 1108 | if r.potentialPrimary() == r.state.ID { 1109 | return 1110 | } 1111 | 1112 | if !r.internal.viewChangeTimer.Done() { 1113 | return 1114 | } 1115 | 1116 | r.logger.Debug("view change timer timed out", "view num", r.state.ViewNumber) 1117 | r.internal.sq.Push(events.NetworkEvent{ 1118 | Event: &events.Event{ 1119 | Type: InternalEventType(ProposeViewChange), 1120 | }, 1121 | }) 1122 | 1123 | r.internal.viewChangeTimer.Reset() 1124 | } 1125 | 1126 | func (r *Replica) checkCommitTimeout() { 1127 | if !r.internal.commitTimer.Done() { 1128 | return 1129 | } 1130 | 1131 | if r.state.ID != r.potentialPrimary() { 1132 | return 1133 | } 1134 | 1135 | r.logger.Debug( 1136 | "broadcasting commit message", 1137 | "commit num", r.state.CommitNumber, 1138 | "view num", r.state.ViewNumber, 1139 | "replica id", r.state.ID, 1140 | ) 1141 | 1142 | for mi, member := range r.state.ClusterMembers { 1143 | if r.state.ID == uint64(mi) { 1144 | continue 1145 | } 1146 | 1147 | // It's OK if the broadcast fails 1148 | r.sendCommitToBackup(member, r.state.CommitNumber) 1149 | } 1150 | 1151 | r.internal.commitTimer.Reset() 1152 | } 1153 | 1154 | func (r *Replica) checkGetStateTimeout() { 1155 | if r.internal.getStateTimer == nil { 1156 | return 1157 | } 1158 | 1159 | if !r.internal.getStateTimer.Done() { 1160 | return 1161 | } 1162 | 1163 | state, ok := r.internal.getStateTimer.State() 1164 | assert.Assert(ok, "state should not be nil for getStateTimer") 1165 | 1166 | // Yes, I want this to panic if the state is not of the type I expect 1167 | // as this is a bug in the code. 1168 | // The expected state is [3]uint64 - [viewNum, opNum, lastReplicaIDTried] 1169 | castedState := state.([3]uint64) 1170 | 1171 | r.logger.Warn("getState timed out", "state", state) 1172 | r.initiateStateTransfer(castedState[0], castedState[1]) 1173 | } 1174 | 1175 | func (r *Replica) potentialPrimary() uint64 { 1176 | return r.primaryForView(r.state.ViewNumber) 1177 | } 1178 | 1179 | func (r *Replica) primaryForView(num uint64) uint64 { 1180 | return uint64(int(num) % len(r.state.ClusterMembers)) 1181 | } 1182 | 1183 | func (r *Replica) isEventFromPrimary(e *events.NetworkEvent) bool { 1184 | if e.Event.Type == events.EventPrepare { 1185 | ev := e.Event.Data.(events.Prepare) 1186 | return ev.ReplicaID == r.potentialPrimary() 1187 | } 1188 | 1189 | if e.Event.Type == events.EventCommit { 1190 | ev := e.Event.Data.(events.Commit) 1191 | return ev.ReplicaID == r.potentialPrimary() 1192 | } 1193 | 1194 | if e.Event.Type == events.EventStartView { 1195 | ev := e.Event.Data.(events.StartView) 1196 | return ev.ReplicaID == r.potentialPrimary() 1197 | } 1198 | 1199 | return false 1200 | } 1201 | 1202 | func (r *Replica) largestViewNumInPendingViewChange() uint64 { 1203 | largest := r.state.ViewNumber 1204 | for v := range r.internal.pendingDoViewChange { 1205 | if v > largest { 1206 | largest = v 1207 | } 1208 | } 1209 | 1210 | return largest 1211 | } 1212 | 1213 | func (r *Replica) setIsRunning(isRunning bool) { 1214 | r.internal.running = isRunning 1215 | } 1216 | -------------------------------------------------------------------------------- /pkg/time/real.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import "time" 4 | 5 | type Real struct{} 6 | 7 | func NewReal() *Real { 8 | return &Real{} 9 | } 10 | 11 | func (r *Real) Now() uint64 { 12 | return uint64(time.Now().UnixNano()) 13 | } 14 | 15 | func (r *Real) Tick() { 16 | // no-op 17 | } 18 | 19 | // Real implements Timer 20 | var _ Time = (*Real)(nil) 21 | -------------------------------------------------------------------------------- /pkg/time/time.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | const NANOSECOND = 1 4 | const MICROSECOND = 1000 * NANOSECOND 5 | const MILLISECOND = 1000 * MICROSECOND 6 | const SECOND = 1000 * MILLISECOND 7 | const MINUTE = 60 * SECOND 8 | 9 | // Time abstracts time in the system 10 | type Time interface { 11 | Now() uint64 12 | Tick() 13 | } 14 | -------------------------------------------------------------------------------- /pkg/time/timer.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | // Timer is a very simple timer that can be used to check if a certain amount of 4 | // time has passed and if it has then execute an action. The timer doesn't drives 5 | // itself, that is, it needs to be checked manually and the time needs to be ticked 6 | // manually as well (this can be done via OS as well in case of a real time). 7 | // 8 | // Timer allows storing any opaque state into it. 9 | type Timer struct { 10 | start uint64 11 | ticks uint64 12 | time Time 13 | 14 | state any 15 | action func(*Timer) 16 | } 17 | 18 | // NewTimer takes a time and a number of ticks and returns a new timer that will 19 | // execute the action after the given number of ticks has passed. 20 | func NewTimer(time Time, ticks uint64) *Timer { 21 | return &Timer{ 22 | start: time.Now(), 23 | ticks: ticks, 24 | time: time, 25 | } 26 | } 27 | 28 | // Reset resets the timer 29 | func (t *Timer) Reset() { 30 | t.start = t.time.Now() 31 | } 32 | 33 | // Done returns true if the timer has finished 34 | func (t *Timer) Done() bool { 35 | return t.time.Now()-t.start >= t.ticks 36 | } 37 | 38 | // Action sets the action that will be executed when the timer is done 39 | func (t *Timer) Action(fn func(*Timer)) { 40 | t.action = fn 41 | } 42 | 43 | // ActIfDone returns true if the timer is done and will execute the 44 | // registered action (if any) 45 | func (t *Timer) ActIfDone() bool { 46 | if !t.Done() { 47 | return false 48 | } 49 | 50 | if t.action != nil { 51 | t.action(t) 52 | } 53 | 54 | return true 55 | } 56 | 57 | // SetState allows storing arbitrary state into the timer 58 | func (t *Timer) SetState(state any) { 59 | t.state = state 60 | } 61 | 62 | // State returns the state stored in the timer 63 | func (t *Timer) State() (any, bool) { 64 | return t.state, t.state != nil 65 | } 66 | -------------------------------------------------------------------------------- /pkg/time/virtual.go: -------------------------------------------------------------------------------- 1 | package time 2 | 3 | import "github.com/tangledbytes/go-vsr/pkg/assert" 4 | 5 | type Virtual struct { 6 | ticks uint64 7 | factor uint64 8 | } 9 | 10 | // NewVirtual returns a new simulated timer with the given factor. 11 | // 12 | // The factor determines how fast the timer will tick. A factor of 1 means 13 | // that every tick will increase the ticks by 1. A factor of 2 means that 14 | // every tick will increase the ticks by 2. 15 | // 16 | // The factor must be greater than 0. 17 | func NewVirtual(factor uint64) *Virtual { 18 | assert.Assert(factor > 0, "factor must be greater than 0") 19 | 20 | return &Virtual{ 21 | ticks: 0, 22 | factor: factor, 23 | } 24 | } 25 | 26 | func (s *Virtual) Now() uint64 { 27 | return s.ticks 28 | } 29 | 30 | func (s *Virtual) Tick() { 31 | s.ticks += s.factor 32 | } 33 | 34 | func (s *Virtual) TickBy(units uint64) { 35 | adjusted := units / s.factor 36 | s.ticks += adjusted 37 | } 38 | -------------------------------------------------------------------------------- /pkg/utils/pair.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | type Pair[A any, B any] struct { 4 | First A 5 | Second B 6 | } 7 | 8 | func MakePair[A any, B any](first A, second B) Pair[A, B] { 9 | return Pair[A, B]{ 10 | First: first, 11 | Second: second, 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /pkg/utils/rng.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "math/rand" 5 | ) 6 | 7 | const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 8 | 9 | func RandomIntRange(rng *rand.Rand, start, end int) int { 10 | return start + rng.Intn(end-start) 11 | } 12 | 13 | func RandomIntSliceRange(rng *rand.Rand, min, max, n int) []int { 14 | res := make([]int, n) 15 | for i := 0; i < n; i++ { 16 | res[i] = RandomIntRange(rng, min, max) 17 | } 18 | 19 | return res 20 | } 21 | 22 | func RandomString(rng *rand.Rand) string { 23 | b := RandASCIIBytes(rng, 8) 24 | return string(b) 25 | } 26 | 27 | func RandASCIIBytes(rng *rand.Rand, n int) []byte { 28 | output := make([]byte, n) 29 | randomness := make([]byte, n) 30 | _, err := rng.Read(randomness) 31 | if err != nil { 32 | panic(err) 33 | } 34 | l := len(letterBytes) 35 | 36 | // fill output 37 | for pos := range output { 38 | random := uint8(randomness[pos]) 39 | randomPos := random % uint8(l) 40 | output[pos] = letterBytes[randomPos] 41 | } 42 | 43 | return output 44 | } 45 | -------------------------------------------------------------------------------- /records/regression_seeds: -------------------------------------------------------------------------------- 1 | 4194837988223983321 2 | 1234 -------------------------------------------------------------------------------- /scripts/regression.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CPU_PROFILE=0 4 | export HEAP_PROFILE=0 5 | export REPLICA_DEBUG=-1 6 | export CLIENT_DEBUG=-1 7 | export SIMULATOR_DEBUG=0 8 | 9 | cat records/regression_seeds | xargs -n 1 go run ./cmd/simulator --------------------------------------------------------------------------------