├── .gitignore ├── LICENSE ├── README.md ├── TODO.txt ├── scripts ├── client_metrics.py ├── config.py ├── experiment.py ├── gcloud_topology.py ├── graphs.py ├── main.py ├── results.py └── utils.py └── src ├── README ├── client └── client.go ├── cycles ├── cycles.go └── cycles_amd64.s ├── epaxos ├── epaxos-exec.go └── epaxos.go ├── epaxosproto ├── epaxosproto.go ├── epaxosproto_test.go └── epaxosprotomarsh.go ├── fastrpc └── fastrpc.go ├── genericsmr └── genericsmr.go ├── genericsmrproto ├── genericsmrproto.go ├── genericsmrproto_test.go └── gsmrprotomarsh.go ├── master └── master.go ├── masterproto └── masterproto.go ├── paxos └── paxos.go ├── paxosproto ├── paxosproto.go └── paxosprotomarsh.go ├── poisson ├── poisson.go └── poisson_test.go ├── priorityqueue ├── priorityqueue.go └── priorityqueue_test.go ├── rdtsc ├── rdtsc.s └── rdtsc_decl.go ├── server └── server.go ├── state ├── state.go ├── state.go.1k ├── statemarsh.go └── statemarsh.go.1k ├── timetrace └── timetrace.go ├── zipcalc └── zipcalc.go └── zipfian └── zipfian.go /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | stable-store-replica* 3 | output* 4 | results 5 | .DS_Store 6 | pkg 7 | src/golang.org 8 | src/github.com 9 | src/google.golang.org 10 | __pycache__ 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright 2013 Carnegie Mellon University 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | EPaxos 2 | ====== 3 | 4 | This repo contains code used to evaluate the Egalitarian Paxos consensus 5 | protocol for the NSDI '21 paper [EPaxos Revisited](https://www.usenix.org/conference/nsdi21/presentation/tollman). 6 | This is a fork of the code used for the original EPaxos evaluation for 7 | [the EPaxos SOSP paper](http://dl.acm.org/ft_gateway.cfm?id=2517350&ftid=1403953&dwn=1). 8 | 9 | The `scripts` directory contains code that produces the results of our 10 | re-evaluation. The experiments are configured for Google Cloud, so you will need 11 | a valid Google Cloud project id in order to run them. 12 | You will need to update the constants in `config.py` before running the scripts. 13 | (`git update-index --skip-worktree scripts/config.py` ignores changes to that 14 | file.) 15 | 16 | `python scripts/gcloud_topology.py --create` creates instances of the correct 17 | types in the correct locations. 18 | 19 | `python main.py` runs experiments from our re-evaluation and generates 20 | graphs of the results. 21 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | - Updated README with details instructions. 2 | - Average results over multiple trials. 3 | - Create dashboard.txt in addition to graphs. 4 | - virtualenv for experiment scripts. 5 | - Plot grid graphs. 6 | -------------------------------------------------------------------------------- /scripts/client_metrics.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | This file computes key metrics from Paxos client logfiles. The logfile format is 19 | specified in src/client/client.go. 20 | """ 21 | 22 | import json 23 | import numpy as np 24 | from os import path 25 | import statistics 26 | 27 | def get_metrics(dirname): 28 | """ 29 | Computes key metrics about an experiment from the client-side logfiles, and 30 | returns them as a dictionary. 'dirname' specifies the directory in which the 31 | client-side logfiles are stored. 32 | """ 33 | with open(path.join(dirname, 'lattput.txt')) as f: 34 | tputs = [] 35 | for l in f: 36 | l = l.split(' ') 37 | tputs.append(float(l[2])) 38 | 39 | with open(path.join(dirname, 'latency.txt')) as f: 40 | exec_lats = [] 41 | commit_lats = [] 42 | for l in f: 43 | l = l.split(' ') 44 | exec_lats.append(float(l[1])) 45 | commit_lats.append(float(l[2])) 46 | 47 | return { 48 | 'mean_lat_commit': statistics.mean(commit_lats), 49 | 'p50_lat_commit': np.percentile(commit_lats, 50), 50 | 'p90_lat_commit': np.percentile(commit_lats, 90), 51 | 'p95_lat_commit': np.percentile(commit_lats, 95), 52 | 'p99_lat_commit': np.percentile(commit_lats, 99), 53 | 'mean_lat_exec': statistics.mean(exec_lats), 54 | 'p50_lat_exec': np.percentile(exec_lats, 50), 55 | 'p90_lat_exec': np.percentile(exec_lats, 90), 56 | 'p95_lat_exec': np.percentile(exec_lats, 95), 57 | 'p99_lat_exec': np.percentile(exec_lats, 99), 58 | 'avg_tput': statistics.mean(tputs), 59 | 'total_ops': len(exec_lats), 60 | } 61 | 62 | if __name__ == '__main__': 63 | """ 64 | Computes client metrics from the root epaxos directory, which is where the 65 | files are stored on the remote client machines. Logs the metrics to stdout 66 | in json format. 67 | """ 68 | print(json.dumps(get_metrics(path.expanduser('~/epaxos')))) 69 | -------------------------------------------------------------------------------- /scripts/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | This file contains user-specific constants and functions. These should be 19 | updated when the repo is cloned before running the scripts. 20 | """ 21 | 22 | """ 23 | This constant should be set to be the ID of the Google Cloud Compute Engine 24 | project that you want to use to run the experiments. 25 | """ 26 | GCLOUD_PROJECT_ID = 'TODO' 27 | 28 | """ 29 | This constant should be set to the expanded path of the directory of the cloned 30 | epaxos repo. (This will likely be one directory up from the directory containing 31 | this file.) 32 | """ 33 | EPAXOS_DIR = 'TODO' 34 | 35 | def download_clock_sync_software(instance): 36 | """ 37 | Given an GCloudInstance object 'instance', downloads (or copies from the 38 | local machine) the necessary software to that instance so that it can 39 | synchronize its clocks with other machines. Expected to be asynchronous and 40 | return a function handler that will wait for the process to complete when 41 | called. 42 | """ 43 | # TODO: return utils.execute(command, desc) 44 | pass 45 | 46 | def install_clock_sync_software(instance): 47 | """ 48 | Given a GCloudInstance object 'instance' that already has the clock 49 | synchronization software downloaded, installs that software. Expected to be 50 | asynchronous and return a function handler that will wait for the process to 51 | complete when called. 52 | """ 53 | # TODO: return utils.execute(command, desc) 54 | pass 55 | 56 | def reset_clock_sync(instance): 57 | """ 58 | Given a GCloudInstance object 'instance' (server or master), kills any clock 59 | synchronization software that may be running and removes and reminants of 60 | the software, such that it will run cleanly on the next attempt. Expected to 61 | be asynchronous and return a function handler that will wait for the process 62 | to complete when called. 63 | """ 64 | # TODO: return utils.execute(command, desc) 65 | pass 66 | 67 | def synchronize_clocks_master(master): 68 | """ 69 | Given a GCloudMaster object 'master', runs the clock synchronization master 70 | software. Expected to be asynchronous and non-blocking. 71 | """ 72 | # TODO: return utils.execute(command, desc) 73 | pass 74 | 75 | def synchronize_clocks_server(server, master_ip): 76 | """ 77 | Given a GCloudServer object 'server' and the string IP address 'master_ip' 78 | of the GCloudMaster for the topology , which will be running its clock 79 | synchronization software, runs the clock synchronization software for a 80 | server. Expected to be asynchronous and non-blocking. 81 | """ 82 | # TODO: return utils.execute(command, desc) 83 | pass 84 | -------------------------------------------------------------------------------- /scripts/experiment.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | TODO(sktollman): add file comment 19 | """ 20 | 21 | from os import path 22 | 23 | EPAXOS_PROTO = 'epaxos' 24 | MPAXOS_PROTO = 'mpaxos' 25 | VALID_PROTOS = [EPAXOS_PROTO, MPAXOS_PROTO] 26 | 27 | CLOCK_SYNC_NONE = 'clock-sync-none' 28 | CLOCK_SYNC_QUORUM = 'clock-sync-quorum' 29 | CLOCK_SYNC_QUORUM_UNION = 'clock-sync-quorum-union' 30 | CLOCK_SYNC_CLUSTER = 'clock-sync-cluster' 31 | 32 | TENK = 1e4 33 | HUNDK = 1e5 34 | MILLION = 1e6 35 | TENMIL = 1e7 36 | HUNDMIL = 1e8 37 | BILLION = 1e9 38 | 39 | LARGE_INT_TO_DESC = { 40 | TENK: '10K', 41 | HUNDK: '100K', 42 | MILLION: '1M', 43 | TENMIL: '10M', 44 | HUNDMIL: '100M', 45 | BILLION: '1B', 46 | } 47 | 48 | DESC_TO_LARGE_INT = { v: k for k, v in LARGE_INT_TO_DESC.items() } 49 | 50 | class Experiment(): 51 | class ZipfianWorkload(): 52 | def __init__(self, frac_writes=1, unique_keys=MILLION, theta=.9): 53 | assert theta > 0 and theta < 1 54 | 55 | self._frac_writes = frac_writes 56 | self._unique_keys = int(unique_keys) 57 | self._theta = theta 58 | 59 | def unique_keys(self): 60 | return self._unique_keys 61 | 62 | def theta(self): 63 | return self._theta 64 | 65 | def __repr__(self): 66 | # No dot between unique keys and theta because theta is a float 67 | return 'zipf{}-{}-{}'.format(self._theta, 68 | LARGE_INT_TO_DESC[self._unique_keys], self._frac_writes) 69 | 70 | class FixedConflictWorkload(): 71 | def __init__(self, perc_conflict, frac_writes=1): 72 | self._perc_conflict = perc_conflict 73 | self._frac_writes = frac_writes 74 | 75 | def perc_conflict(self): 76 | return self._perc_conflict 77 | 78 | def __repr__(self): 79 | return 'fixedconf{}-{}'.format(self._perc_conflict, 80 | self._frac_writes) 81 | 82 | class OutstandingReqArrivalRate(): 83 | def __init__(self, outstanding_reqs): 84 | self._outstanding_reqs = outstanding_reqs 85 | 86 | def outstanding_reqs(self): 87 | return self._outstanding_reqs 88 | 89 | def __repr__(self): 90 | return 'or{}'.format(self._outstanding_reqs) 91 | 92 | class PoissonArrivalRate(): 93 | def __init__(self, rate_us): 94 | self._rate_us = rate_us 95 | 96 | def rate_us(self): 97 | return self._rate_us 98 | 99 | def __repr__(self): 100 | return 'psn{}'.format(self._rate_us) 101 | 102 | def __init__(self, proto, workload, arrival_rate, inffix=True, 103 | batching=False, vclients=10, clock_sync=CLOCK_SYNC_NONE, thrifty=False): 104 | assert proto in VALID_PROTOS 105 | self._proto = proto 106 | self._workload = workload 107 | self._arrival_rate = arrival_rate 108 | self._inffix = inffix 109 | self._batching = batching 110 | self._vclients = vclients 111 | self._clock_sync = clock_sync 112 | self._thrifty = thrifty 113 | 114 | def batching_enabled(self): 115 | return self._batching 116 | 117 | def is_epaxos(self): 118 | return self._proto == EPAXOS_PROTO 119 | 120 | def is_mpaxos(self): 121 | return self._proto == MPAXOS_PROTO 122 | 123 | def inffix(self): 124 | return self._inffix 125 | 126 | def thrifty(self): 127 | return self._thrifty 128 | 129 | def theta(self): 130 | try: 131 | return self._workload._theta 132 | except: 133 | return None 134 | 135 | def vclients(self): 136 | return self._vclients 137 | 138 | def frac_writes(self): 139 | return self._workload._frac_writes 140 | 141 | def workload(self): 142 | return self._workload 143 | 144 | def arrival_rate(self): 145 | return self._arrival_rate 146 | 147 | def clock_sync_group(self): 148 | return { 149 | CLOCK_SYNC_NONE: 0, 150 | CLOCK_SYNC_QUORUM: 1, 151 | CLOCK_SYNC_CLUSTER: 2, 152 | CLOCK_SYNC_QUORUM_UNION: 3, 153 | }[self._clock_sync] 154 | 155 | def clock_sync_str(self): 156 | return self._clock_sync 157 | 158 | def to_dirname(self, trial): 159 | return '_'.join((self._proto, str(self._workload), 160 | str(self._arrival_rate), 'inffix' if self._inffix else 'no-inffix', 161 | 'batch' if self._batching else 'no-batch', 162 | 'clients{}'.format(self._vclients), self._clock_sync, 163 | 'thrifty' if self._thrifty else 'no-thrifty', str(trial))) 164 | 165 | @staticmethod 166 | def dirname_to_args(dirname): 167 | """ 168 | The format of the directory name should be: 169 | protocol_workload_arrival-rate_inffix_batching_vclients_trial 170 | """ 171 | dirname = path.basename(path.normpath(dirname)) 172 | 173 | proto, workload, arrival_rate, inffix, batching, vclients, clock_sync, \ 174 | thrifty, trial = dirname.split('_') 175 | 176 | if workload.startswith('fixedconf'): 177 | workload = workload.strip('fixedconf') 178 | perc_conflict, perc_writes = workload.split('-') 179 | perc_conflict = int(perc_conflict) 180 | perc_writes = int(perc_writes) 181 | workload = Experiment.FixedConflictWorkload(perc_conflict, perc_writes) 182 | elif workload.startswith('zipf'): 183 | workload = workload.strip('zipf') 184 | theta, unique_keys, frac_writes = workload.split('-') 185 | frac_writes = float(frac_writes) 186 | theta = float(theta) 187 | unique_keys = DESC_TO_LARGE_INT[unique_keys] 188 | workload = Experiment.ZipfianWorkload(frac_writes, unique_keys, theta) 189 | else: 190 | raise Exception() 191 | 192 | if arrival_rate.startswith('psn'): 193 | arrival_rate = Experiment.PoissonArrivalRate(int(arrival_rate.strip('psn'))) 194 | elif arrival_rate.startswith('or'): 195 | arrival_rate = \ 196 | Experiment.OutstandingReqArrivalRate(int(arrival_rate.strip('or'))) 197 | else: 198 | raise Exception() 199 | 200 | inffix = inffix == 'inffix' 201 | batching = batching == 'batch' 202 | vclients = int(vclients.strip('clients')) 203 | thrifty = thrifty == 'thrifty' 204 | 205 | return proto, workload, arrival_rate, inffix, batching, vclients, clock_sync, thrifty 206 | 207 | @classmethod 208 | def from_dirname(cls, dirname): 209 | return cls(*cls.dirname_to_args(dirname)) 210 | -------------------------------------------------------------------------------- /scripts/gcloud_topology.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | Manages a Google Cloud topology associated with a Paxos cluster and runs 19 | experiments on it. 20 | """ 21 | 22 | import argparse 23 | import json 24 | import multiprocessing 25 | import operator 26 | import os 27 | from os import path 28 | import socket 29 | import struct 30 | 31 | import config 32 | import experiment 33 | from experiment import Experiment 34 | import utils 35 | 36 | LOCATION_TO_INDEX = { 37 | 'ca': 0, 38 | 'va': 1, 39 | 'eu': 2, 40 | 'or': 3, 41 | 'jp': 4, 42 | } 43 | 44 | class GCloudInstance(): 45 | """ 46 | """ 47 | 48 | def __init__(self, loc): 49 | self._loc = loc 50 | self._internal_ip = None 51 | self._external_ip = None 52 | 53 | def run(self): 54 | # TODO: run should take in an experiment or server/client 55 | raise NotImplementedError() 56 | 57 | def ip(self, internal=True): 58 | if internal and self._internal_ip: return self._internal_ip 59 | if not internal and self._external_ip: return self._external_ip 60 | 61 | command = 'gcloud compute instances list --filter="name={}" ' \ 62 | '--format "get(networkInterfaces[0].{})"'.format(self.id(), 63 | 'networkIP' if internal else 'accessConfigs[0].natIP') 64 | desc = '{} {} IP address'.format('internal' if internal else 'external', 65 | self.id()) 66 | 67 | ip = utils.execute(command, desc)() 68 | 69 | if internal: self._internal_ip = ip 70 | else: self._external_ip = ip 71 | 72 | return ip 73 | 74 | def _gssh(self, cmd, desc): 75 | # To see the commands that are run on each machine, uncomment the 76 | # statements below. 77 | # print(cmd) 78 | # return lambda: None 79 | return utils.execute(self._gssh_cmd(cmd), '{}: {}'.format(self.id(), 80 | desc)) 81 | 82 | def _gssh_cmd(self, cmd): 83 | if isinstance(cmd, list): cmd = '; '.join(cmd) 84 | 85 | return 'gcloud compute ssh {} --zone {} --command=\'{}\''.format( 86 | self.id(), 87 | self.zone(), 88 | cmd 89 | ) 90 | 91 | def zone(self): 92 | return { 93 | 'ca': 'us-west2-b', 94 | 'va': 'us-east4-a', 95 | 'eu': 'europe-west2-a', 96 | 'or': 'us-west1-a', 97 | 'jp': 'asia-northeast1-a', 98 | }[self._loc] 99 | 100 | def create(self): 101 | return utils.execute('gcloud compute instances create {} ' 102 | '--zone={} ' 103 | '--machine-type=n1-standard-8 ' 104 | '--image-family=ubuntu-1804-lts ' 105 | '--image-project=ubuntu-os-cloud '.format(self.id(), 106 | self.zone()), '{}: creating machine'.format(self.id())) 107 | 108 | def start(self): 109 | return utils.execute('gcloud compute instances start ' 110 | '{} --zone {}'.format(self.id(), self.zone()), 111 | '{}: starting machine'.format(self.id())) 112 | 113 | def stop(self): 114 | return utils.execute('gcloud compute instances stop ' 115 | '{} --zone {}'.format(self.id(), self.zone()), 116 | '{}: stopping machine'.format(self.id())) 117 | 118 | def install_go(self): 119 | return self._gssh([ 120 | 'sudo apt-get purge golang -y', 121 | 'sudo add-apt-repository ppa:longsleep/golang-backports -y', 122 | 'sudo apt-get update -y', 123 | 'sudo apt-get install golang-go -y', 124 | ], 'Installing go') 125 | 126 | def download_packages(self): 127 | return self._gssh([ 128 | 'export GOPATH=~/epaxos', 129 | 'go get golang.org/x/sync/semaphore', 130 | 'go get -u google.golang.org/grpc', 131 | 'go get -u github.com/golang/protobuf/protoc-gen-go', 132 | 'go get -u github.com/VividCortex/ewma', 133 | 'export PATH=$PATH:$GOPATH/bin', 134 | # For client metrics script 135 | 'sudo apt-get install python3-pip -y && pip3 install numpy' 136 | ], 'Downloading packages') 137 | 138 | 139 | def rsync(self, install=False): 140 | # This is NOT SECURE, but makes it possible to connect 141 | sshopts = 'ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null' 142 | remote = '{}.{}.{}'.format(self.id(), self.zone(), 143 | config.GCLOUD_PROJECT_ID) 144 | rsync_command = 'rsync --delete --exclude-from "{f}/.gitignore" ' \ 145 | '-re "{sshopts}" {f} {remote}:~'.format(sshopts=sshopts, 146 | f=config.EPAXOS_DIR, remote=remote) 147 | gocmd = ['export GOPATH=~/epaxos'] 148 | if install: 149 | gocmd.extend([ 150 | 'go get golang.org/x/sync/semaphore', 151 | 'go get -u google.golang.org/grpc', 152 | 'go get -u github.com/golang/protobuf/protoc-gen-go', 153 | 'go get -u github.com/VividCortex/ewma', 154 | 'export PATH=$PATH:$GOPATH/bin' 155 | ]) 156 | gocmd.extend([ 157 | 'go clean', 158 | 'go install master', 159 | 'go install server', 160 | 'go install client' 161 | ]) 162 | 163 | install_command = self._gssh_cmd(gocmd) 164 | 165 | return utils.execute([rsync_command, install_command], 'Rsync') 166 | 167 | def kill(self): 168 | return NotImplementedError() 169 | 170 | def metrics_filenames(self): 171 | return NotImplementedError() 172 | 173 | # Returns a handler for when we want to complete the processes 174 | # If start is true, the file sizes are stored as the start points, otherwise 175 | # as the end points. 176 | def get_file_sizes(self, start): 177 | processes = [ 178 | (self._gssh('stat -c %s epaxos/{}'.format(f), 179 | '{} size'.format(f)), f) for f in self.metrics_filenames() 180 | ] 181 | 182 | def handler(): 183 | sizes = { 184 | f : p() for p, f in processes 185 | } 186 | if start: 187 | self._file_starts = sizes 188 | else: 189 | self._file_ends = sizes 190 | 191 | return handler 192 | 193 | def trim_files(self): 194 | assert (self._file_starts is not None and self._file_ends is not None) 195 | 196 | trim_file = lambda f: self._gssh('cp epaxos/{fname} ' 197 | 'epaxos/{fname}_full && truncate -s {end} epaxos/{fname} && ' 198 | 'echo "$(tail -c +{start} epaxos/{fname})" > epaxos/{fname} && ' 199 | 'sed -i "1d;$d" epaxos/{fname}'.format(fname=f, 200 | start=self._file_starts[f], end=self._file_ends[f]), 201 | 'trimming {}'.format(f)) 202 | processes = [trim_file(f) for f in self.metrics_filenames()] 203 | 204 | def handler(): 205 | for p in processes: p() 206 | return handler 207 | 208 | def copy_output_file(self, dst_dirname): 209 | raise NotImplementedError() 210 | 211 | def copy_files(self, dst_dirname): 212 | copy_file = lambda f: \ 213 | utils.execute('gcloud compute scp {}:epaxos/{} {} --zone {}'.format( 214 | self.id(), 215 | f, 216 | path.join(dst_dirname, f), 217 | self.zone() 218 | ), '{}: copying metrics files'.format(self.id())) 219 | 220 | processes = list(map(copy_file, self.metrics_filenames())) 221 | 222 | def handler(): 223 | for p in processes: p() 224 | return handler 225 | 226 | class GCloudServer(GCloudInstance): 227 | def id(self): 228 | return 'server-{}'.format(self._loc) 229 | 230 | def flags(self, master_ip, expt): 231 | port = 7070 + LOCATION_TO_INDEX[self._loc] 232 | flags = [ 233 | '-port {}'.format(port), 234 | '-maddr {}'.format(master_ip), 235 | '-addr {}'.format(self.ip(internal=True)), 236 | '-clocksync {} -clockepsilon 0'.format(expt.clock_sync_group()), 237 | ] 238 | if expt.is_epaxos(): flags.append('-e') 239 | if expt.batching_enabled(): flags.append('-batch') 240 | if expt.inffix(): flags.append('-inffix') 241 | if expt.thrifty(): flags.append('-beacon -thrifty') 242 | 243 | return ' '.join(flags) 244 | 245 | def run(self, master_ip, expt): 246 | flags = self.flags(master_ip, expt) 247 | return self._gssh('cd epaxos && bin/server {} > output.txt 2>&1'.format(flags), 'run') 248 | 249 | def kill(self): 250 | return self._gssh('kill $(pidof bin/server)', 'force kill') 251 | 252 | def get_conflict_rate(self, start): 253 | def target(q): 254 | with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: 255 | port = 7070 + LOCATION_TO_INDEX[self._loc] 256 | s.connect((self.ip(internal=False), port)) 257 | s.sendall(struct.pack('!BB', 4, 0)) 258 | data = s.recv(1+8*2) 259 | kFast, kSlow = struct.unpack('qq', data) 260 | q.put((kFast, kSlow)) 261 | 262 | q = multiprocessing.Queue() 263 | p = multiprocessing.Process(target=target, args=(q,)) 264 | p.start() 265 | def handler(): 266 | p.join(10) 267 | if p.is_alive(): 268 | p.kill() 269 | raise Exception() 270 | if p.exitcode != 0: 271 | raise Exception() 272 | resp = q.get() 273 | if start: 274 | self._start_conflict = resp 275 | else: 276 | self._end_conflict = resp 277 | return handler 278 | 279 | def compute_conflict(self): 280 | assert self._start_conflict and self._end_conflict 281 | 282 | total_fast = self._end_conflict[0] - self._start_conflict[0] 283 | total_slow = self._end_conflict[1] - self._start_conflict[1] 284 | total = total_fast + total_slow 285 | conflict = total_slow/total if total > 0 else 0 286 | return total_fast, total_slow, conflict 287 | 288 | def copy_output_file(self, dst_dirname): 289 | return utils.execute('gcloud compute scp {}:epaxos/output.txt {} --zone {}'.format( 290 | self.id(), 291 | path.join(dst_dirname, 'server_output.txt'), 292 | self.zone() 293 | ), '{}: copying output file'.format(self.id())) 294 | 295 | # Master is co-located with server 296 | class GCloudMaster(GCloudServer): 297 | def run_master(self, server_ips): 298 | return self._gssh('cd epaxos && bin/master -N {} -ips {} > moutput.txt 2>&1'.format( 299 | len(server_ips), ','.join(server_ips)), 'run') 300 | 301 | def kill(self): 302 | return self._gssh('kill $(pidof bin/master) $(pidof bin/server)', 'force kill') 303 | 304 | def copy_output_file(self, dst_dirname): 305 | return utils.execute('gcloud compute scp {}:epaxos/moutput.txt {} --zone {}'.format( 306 | self.id(), 307 | path.join(dst_dirname, 'master_output.txt'), 308 | self.zone() 309 | ), '{}: copying output file'.format(self.id())) 310 | 311 | class GCloudClient(GCloudInstance): 312 | def id(self): 313 | return 'client-{}'.format(self._loc) 314 | 315 | def metrics_filenames(self): 316 | return ['latency.txt', 'lattput.txt'] 317 | 318 | def flags(self, master_ip, expt): 319 | flags = [ 320 | '-maddr {}'.format(master_ip), 321 | '-T {}'.format(expt.vclients()), 322 | '-writes {}'.format(expt.frac_writes()), 323 | ] 324 | 325 | # force leader 326 | if expt.is_epaxos(): 327 | flags.append(' -l {}'.format(LOCATION_TO_INDEX[self._loc])) 328 | 329 | workload = expt.workload() 330 | if isinstance(workload, Experiment.FixedConflictWorkload): 331 | # don't overlap start regions 332 | flags.append('-sr {}'.format(LOCATION_TO_INDEX[self._loc] * 500000)) 333 | flags.append('-c {}'.format(workload.perc_conflict())) 334 | elif isinstance(workload, Experiment.ZipfianWorkload): 335 | flags.append('-c -1') 336 | flags.append('-z {}'.format(workload.unique_keys())) 337 | flags.append('-theta {}'.format(workload.theta())) 338 | 339 | arrival_rate = expt.arrival_rate() 340 | if isinstance(arrival_rate, Experiment.OutstandingReqArrivalRate): 341 | flags.append('-or {}'.format(arrival_rate.outstanding_reqs())) 342 | elif isinstance(arrival_rate, Experiment.PoissonArrivalRate): 343 | # flags.append('-or {}'.format(int(15000/expt.vclients()))) # Cap poisson at 1500 344 | flags.append('-or {}'.format(int(10))) # 16 345 | flags.append('-poisson {}'.format(arrival_rate.rate_us())) 346 | 347 | return ' '.join(flags) 348 | 349 | def run(self, master_ip, expt): 350 | flags = self.flags(master_ip, expt) 351 | return self._gssh('cd epaxos && bin/client {} > output.txt 2>&1'.format(flags), 'run') 352 | 353 | def kill(self): 354 | return self._gssh('kill $(pidof bin/client)', 'force kill') 355 | 356 | def get_metrics(self): 357 | return self._gssh('python3 epaxos/scripts/client_metrics.py', 358 | 'getting metrics') 359 | 360 | def copy_output_file(self, dst_dirname): 361 | return utils.execute('gcloud compute scp {}:epaxos/output.txt {} --zone {}'.format( 362 | self.id(), 363 | path.join(dst_dirname, 'client_output.txt'), 364 | self.zone() 365 | ), '{}: copying output file'.format(self.id())) 366 | 367 | class GCloudTopology(): 368 | def __init__(self, locs=['ca', 'va', 'eu', 'or', 'jp']): 369 | assert len(locs) > 0 370 | 371 | self._locs = locs 372 | self._master = GCloudMaster(locs[0]) 373 | self._servers = [self._master] + [GCloudServer(l) for l in locs[1:]] 374 | self._clients = [GCloudClient(l) for i, l in enumerate(locs)] 375 | self._instances = self._servers + self._clients 376 | 377 | def _run_all(self, method, args=[], instances=None): 378 | if instances is None: instances = self._instances 379 | 380 | get_method = operator.methodcaller(method, *args) 381 | return list(map(get_method, instances)) # converting to list gets it to actually run 382 | 383 | def _complete_all(self, method, args=[], instances=None): 384 | for handler in self._run_all(method, args, instances): handler() 385 | 386 | def create(self): 387 | self._complete_all('create') 388 | 389 | def install_go(self): 390 | self._complete_all('install_go') 391 | 392 | def download_packages(self): 393 | self._complete_all('download_packages') 394 | 395 | def _expose_ports(self, rulename, ports, instances, description): 396 | return utils.execute('gcloud compute firewall-rules create {rulename} --allow ' 397 | 'tcp:{ports} --source-tags={instances} --source-ranges=0.0.0.0/0 ' 398 | '--description="{description}"'.format(rulename=rulename, 399 | ports=ports, instances=','.join([i.id() for i in instances]), 400 | description=description), 401 | 'Exposing ports with description "{}"'.format(description)) 402 | 403 | def expose_ports(self): 404 | """ 405 | Exposes ports on each server so that we can be a mock client and get the 406 | conflict rate metrics. 407 | """ 408 | self._expose_ports('mock-client', '7000-8000', self._servers, 409 | 'Ports through which clients contact servers')() 410 | self._expose_ports('clock-sync-ui', '9001', [self._master], 411 | 'Port for the clock synchronization software UI')() 412 | 413 | def rsync(self, install=False): 414 | utils.execute('gcloud compute config-ssh', 'gssh init')() 415 | self._complete_all('rsync', args=[install]) 416 | 417 | def download_clock_sync(self): 418 | for handler in list(map(config.download_clock_sync_software, 419 | self._servers)): handler() 420 | 421 | def install_clock_sync(self): 422 | for handler in list(map(config.install_clock_sync_software, 423 | self._servers)): handler() 424 | 425 | def synchronize_clocks(self): 426 | for handler in list(map(config.reset_clock_sync, 427 | self._servers)): handler() 428 | 429 | print('Synchronizing clocks ...') 430 | 431 | config.synchronize_clocks_master(self._master) 432 | utils.sleep_verbose('Wait for master to sync', 10) 433 | 434 | for s in self._servers: 435 | config.synchronize_clocks_server(s, self._master.ip()) 436 | 437 | print('Server clocks should be synced. UI available at {}:9001'.format( 438 | self._master.ip(internal=False))) 439 | 440 | def start(self): 441 | self._complete_all('start') 442 | 443 | def stop(self): 444 | self._complete_all('stop') 445 | 446 | def kill(self): 447 | self._complete_all('kill') 448 | 449 | def run(self, expt, dirname, stabilize_delay=5, capture_delay=5, full_metrics=False): # TODO: change full metrics to FALSE 450 | with open(path.join(dirname, 'flags.txt'), 'w') as f: 451 | print('Server flags: {}\nClient flags: {}'.format( 452 | self._servers[0].flags(self._master.ip(internal=True), expt), 453 | self._clients[0].flags(self._master.ip(internal=True), expt)), 454 | file=f) 455 | 456 | try: 457 | utils.run_verbose('Starting master', self._master.run_master, 458 | [[_.ip() for _ in self._servers]]) 459 | utils.run_verbose('Starting servers', self._run_all, ['run', 460 | [self._master.ip(), expt], self._servers]) 461 | utils.sleep_verbose('Letting servers connect', 5) 462 | utils.run_verbose('Starting clients', self._run_all, ['run', 463 | [self._master.ip(), expt], self._clients]) 464 | 465 | utils.sleep_verbose('Stabilizing', stabilize_delay) 466 | 467 | utils.run_verbose('Getting conflict rate', self._complete_all, 468 | ['get_conflict_rate', [True], self._servers]) 469 | utils.run_verbose('Getting file sizes', self._complete_all, 470 | ['get_file_sizes', [True], self._clients]) 471 | 472 | utils.sleep_verbose('Capturing', capture_delay) 473 | 474 | utils.run_verbose('Getting conflict rate', self._complete_all, 475 | ['get_conflict_rate', [False], self._servers]) 476 | utils.run_verbose('Getting file sizes', self._complete_all, 477 | ['get_file_sizes', [False], self._clients]) 478 | 479 | finally: 480 | # Kill all machines to cleanup when interrupted 481 | utils.run_verbose('Killing Paxos processes', self.kill) 482 | 483 | utils.run_verbose('Trimming files', self._complete_all, ['trim_files', 484 | [], self._clients]) 485 | 486 | def get_metrics(): 487 | handlers = [c.get_metrics() for c in self._clients] 488 | metrics = dict() 489 | for i, h in enumerate(handlers): 490 | metrics[self._locs[i]] = json.loads(h()) 491 | total_fast, total_slow, conflict = \ 492 | self._servers[i].compute_conflict() 493 | metrics[self._locs[i]]['total_fast'] = total_fast 494 | metrics[self._locs[i]]['total_slow'] = total_slow 495 | metrics[self._locs[i]]['conflict_rate'] = conflict 496 | with open(path.join(dirname, 'metrics.txt'), 'w') as f: 497 | print(json.dumps(metrics, indent=4), file=f) 498 | utils.run_verbose('Getting metrics', get_metrics) 499 | 500 | if full_metrics: 501 | def download_files(): 502 | handlers = [] 503 | for i, c in enumerate(self._clients): 504 | metrics_dir = path.join(dirname, self._locs[i]) 505 | os.mkdir(metrics_dir) 506 | h = c.copy_files(metrics_dir) 507 | handlers.append(h) 508 | handlers.append(c.copy_output_file(metrics_dir)) 509 | handlers.append(self._servers[i].copy_output_file(metrics_dir)) 510 | handlers.append(self._master.copy_output_file(dirname)) 511 | for h in handlers: h() 512 | utils.run_verbose('Downloading metrics files', download_files) 513 | 514 | 515 | if __name__ == '__main__': 516 | """ 517 | Performs the action on the Google Cloud topology that is associated with the 518 | provided command line argument. 519 | """ 520 | parser = argparse.ArgumentParser() 521 | group = parser.add_mutually_exclusive_group(required=True) 522 | group.add_argument('--start', help='Power on Google Cloud instances', 523 | action='store_true') 524 | group.add_argument('--clock_sync', help='Synchronize clocks of servers', 525 | action='store_true') 526 | group.add_argument('--stop', help='Power off Google Cloud instances', 527 | action='store_true') 528 | group.add_argument('--cleanup', help='Manually kill all Paxos processes ' 529 | 'running on Google Cloud instances', action='store_true') 530 | group.add_argument('--create', help='Instantiates Google Cloud instances ' 531 | 'associated with the test topology and provisions them so that they ' 532 | 'are ready to run experiments', action='store_true') 533 | 534 | args = parser.parse_args() 535 | 536 | topo = GCloudTopology() 537 | 538 | if args.create: 539 | # This is broken up into so many different commands so that if something 540 | # times out or fails it is easy to pinpoint which command it was and 541 | # restart just that portion. 542 | utils.run_verbose('Creating instances', topo.create) 543 | utils.run_verbose('Exposing ports', topo.expose_ports) 544 | utils.run_verbose('Installing go', topo.install_go) 545 | utils.run_verbose('Rsyncing instances', topo.rsync, [True]) 546 | utils.run_verbose('Downloading packages', topo.download_packages) 547 | utils.run_verbose('Downloading clock synchronization software', 548 | topo.download_clock_sync) 549 | utils.run_verbose('Installing clock synchronization software', 550 | topo.install_clock_sync) 551 | 552 | elif args.start: 553 | utils.run_verbose('Starting machines', topo.start) 554 | 555 | elif args.clock_sync: 556 | topo.synchronize_clocks() 557 | 558 | elif args.stop: 559 | utils.run_verbose('Stopping machines', topo.stop) 560 | 561 | elif args.cleanup: 562 | utils.run_verbose('Cleanup machines', topo.kill) 563 | 564 | -------------------------------------------------------------------------------- /scripts/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | TODO(sktollman): add file comment 19 | """ 20 | 21 | import argparse 22 | import os, sys 23 | from os import path 24 | import shutil 25 | 26 | from experiment import Experiment, MPAXOS_PROTO, EPAXOS_PROTO, \ 27 | CLOCK_SYNC_NONE, CLOCK_SYNC_QUORUM, CLOCK_SYNC_QUORUM_UNION, \ 28 | CLOCK_SYNC_CLUSTER 29 | from gcloud_topology import GCloudTopology 30 | import graphs 31 | import utils 32 | 33 | topo = GCloudTopology() 34 | 35 | def run(expts, root_dirname, full_results=False, trials=1): 36 | if not path.exists(root_dirname): 37 | os.mkdir(root_dirname) 38 | 39 | for i in range(trials): 40 | for expt in expts: 41 | expt_dirname = path.join(root_dirname, expt.to_dirname(i)) 42 | 43 | # If the directory already exists, skip it because the experiment 44 | # has already been run 45 | if path.exists(expt_dirname): 46 | continue 47 | 48 | if expt != expts[0]: 49 | utils.sleep_verbose('Letting machines chill', 5) 50 | 51 | for attempt in range(3): # 3 attempts 52 | try: 53 | os.mkdir(expt_dirname) 54 | 55 | print('## {} ##'.format(expt.to_dirname(i))) 56 | 57 | topo.run(expt, expt_dirname, stabilize_delay=20, 58 | capture_delay=60, full_metrics=full_results) 59 | break # Ran successfully, don't need to attempt again 60 | except (KeyboardInterrupt, SystemExit): 61 | raise 62 | except: 63 | shutil.rmtree(expt_dirname) 64 | print('** RETRYING **', file=sys.stderr) 65 | 66 | # If we didn't break out of the loop, we couldn't successfully 67 | # run the expt 68 | else: 69 | print('** FAILED **', file=sys.stderr) 70 | 71 | print() # Newline between experiments 72 | 73 | if __name__ == '__main__': 74 | parser = argparse.ArgumentParser() 75 | parser.add_argument('--rsync', help='Whether to copy the latest version of ' 76 | 'the files with all machines', default=False, action='store_true') 77 | parser.add_argument('--off', help='Whether to power off the machines at the' 78 | ' end of running the experiments', default=False, action='store_true') 79 | args = parser.parse_args() 80 | if args.rsync: 81 | utils.run_verbose('Rsyncing instances', topo.rsync) 82 | print() 83 | 84 | try: 85 | print('##################') 86 | print('## Reproduction ##') 87 | print('##################') 88 | repro_arrival_rate = Experiment.OutstandingReqArrivalRate(1) 89 | reproduction_expts = [ 90 | Experiment(MPAXOS_PROTO, Experiment.ZipfianWorkload(), 91 | repro_arrival_rate), 92 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(), 93 | repro_arrival_rate), 94 | Experiment(EPAXOS_PROTO, Experiment.FixedConflictWorkload(0), 95 | repro_arrival_rate), 96 | Experiment(EPAXOS_PROTO, Experiment.FixedConflictWorkload(2), 97 | repro_arrival_rate), 98 | Experiment(EPAXOS_PROTO, Experiment.FixedConflictWorkload(100), 99 | repro_arrival_rate), 100 | ] 101 | run(reproduction_expts, 'results/reproduction_1or', trials=1) 102 | graphs.reproduction_bar('results/reproduction_1or') 103 | 104 | print('##############') 105 | print('## Batching ##') 106 | print('##############') 107 | # .9, 50% writes, batching, no batching, mpaxos, psn 4500 108 | default_arrival_rate = Experiment.PoissonArrivalRate(4500) 109 | batching_workload = Experiment.ZipfianWorkload(theta=.9, frac_writes=.5) 110 | batching_expts = [ 111 | Experiment(MPAXOS_PROTO, batching_workload, default_arrival_rate), 112 | Experiment(EPAXOS_PROTO, batching_workload, default_arrival_rate), 113 | Experiment(EPAXOS_PROTO, batching_workload, default_arrival_rate, 114 | batching=True), 115 | ] 116 | run(batching_expts, 'results/batching', trials=5) 117 | graphs.batching_bar('results/batching') 118 | 119 | print('#########') 120 | print('## OSC ##') 121 | print('#########') 122 | default_arrival_rate = Experiment.PoissonArrivalRate(4500) 123 | osc_workload = Experiment.ZipfianWorkload(theta=.99, frac_writes=1) 124 | osc_expts = [ 125 | Experiment(MPAXOS_PROTO, osc_workload, default_arrival_rate), 126 | ] 127 | for w in [ 128 | osc_workload, 129 | Experiment.ZipfianWorkload(theta=.8, frac_writes=.5), 130 | Experiment.ZipfianWorkload(theta=.99, frac_writes=1) 131 | ]: 132 | osc_expts.extend([ 133 | Experiment(EPAXOS_PROTO, w, default_arrival_rate, 134 | clock_sync=CLOCK_SYNC_NONE), 135 | Experiment(EPAXOS_PROTO, w, default_arrival_rate, 136 | clock_sync=CLOCK_SYNC_QUORUM), 137 | Experiment(EPAXOS_PROTO, w, default_arrival_rate, 138 | clock_sync=CLOCK_SYNC_QUORUM_UNION), 139 | Experiment(EPAXOS_PROTO, w, default_arrival_rate, 140 | clock_sync=CLOCK_SYNC_CLUSTER), 141 | ]) 142 | run(osc_expts, 'results/osc', trials=5) 143 | graphs.osc_bar('results/osc') 144 | 145 | print('##############') 146 | print('## Thrifty ##') 147 | print('##############') 148 | default_arrival_rate = Experiment.PoissonArrivalRate(4500) 149 | thrifty_expts = [ 150 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.9, 151 | frac_writes=.5), default_arrival_rate), 152 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.9, 153 | frac_writes=.5), default_arrival_rate, thrifty=True), 154 | 155 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.99, 156 | frac_writes=1), default_arrival_rate), 157 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.99, 158 | frac_writes=1), default_arrival_rate, thrifty=True), 159 | 160 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.7, 161 | frac_writes=.3), default_arrival_rate), 162 | Experiment(EPAXOS_PROTO, Experiment.ZipfianWorkload(theta=.7, 163 | frac_writes=.3), default_arrival_rate, thrifty=True), 164 | ] 165 | run(thrifty_expts, 'results/thrifty', trials=5) 166 | graphs.thrifty_bar('results/thrifty') 167 | 168 | finally: 169 | os.system('afplay /System/Library/Sounds/Sosumi.aiff') # Beep when done 170 | 171 | if args.off: 172 | utils.run_verbose('Powering off instances', topo.stop) 173 | print() 174 | -------------------------------------------------------------------------------- /scripts/results.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | TODO 19 | """ 20 | 21 | import json 22 | from os import path 23 | 24 | from experiment import Experiment 25 | 26 | ns_to_s = lambda x: float(x)/1e9 27 | 28 | class Results(Experiment): 29 | def __init__(self, dirname): 30 | super().__init__(*self.dirname_to_args(dirname)) 31 | with open(path.join(dirname, 'metrics.txt')) as f: 32 | self._data = json.load(f) 33 | self._dirname = dirname 34 | self._all_lats_timestamps = dict() 35 | self._all_lats_commit = dict() 36 | self._all_lats_exec = dict() 37 | 38 | def mean_lat_commit(self, loc): 39 | # TODO: fix!! 40 | lat = self._data[loc]['mean_lat_commit'] 41 | if lat > 0: return lat 42 | else: return self.mean_lat_exec(loc) 43 | 44 | def p50_lat_commit(self, loc): 45 | return self._data[loc]['p50_lat_commit'] 46 | 47 | def p90_lat_commit(self, loc): 48 | return self._data[loc]['p90_lat_commit'] 49 | 50 | def p95_lat_commit(self, loc): 51 | return self._data[loc]['p95_lat_commit'] 52 | 53 | def p99_lat_commit(self, loc): 54 | lat = self._data[loc]['p99_lat_commit'] 55 | if lat > 0: return lat 56 | else: return self.p99_lat_exec(loc) 57 | 58 | def mean_lat_exec(self, loc): 59 | return self._data[loc]['mean_lat_exec'] 60 | 61 | def p50_lat_exec(self, loc): 62 | return self._data[loc]['p50_lat_exec'] 63 | 64 | def p90_lat_exec(self, loc): 65 | return self._data[loc]['p90_lat_exec'] 66 | 67 | def p95_lat_exec(self, loc): 68 | return self._data[loc]['p95_lat_exec'] 69 | 70 | def p99_lat_exec(self, loc): 71 | return self._data[loc]['p99_lat_exec'] 72 | 73 | def _parse_alllats(self, loc): 74 | fname = path.join(self._dirname, loc, 'latency.txt') 75 | timestamps = [] 76 | commit_lats = [] 77 | exec_lats = [] 78 | with open(fname) as f: 79 | for l in f: 80 | toks = l.split() 81 | timestamp = int(toks[0]) 82 | timestamps.append(timestamp) 83 | exec_lat = float(toks[1]) 84 | exec_lats.append(exec_lat) 85 | commit_lat = float(toks[2]) 86 | if commit_lat > 0: 87 | commit_lats.append(commit_lat) 88 | 89 | self._all_lats_timestamps[loc] = timestamps 90 | self._all_lats_commit[loc] = commit_lats 91 | self._all_lats_exec[loc] = exec_lats 92 | 93 | def all_lats_timestamps(self, loc): 94 | if not loc in self._all_lats_timestamps: 95 | self._parse_alllats(loc) 96 | 97 | return self._all_lats_timestamps[loc] 98 | 99 | def all_lats_commit(self, loc): 100 | if not loc in self._all_lats_commit: 101 | self._parse_alllats(loc) 102 | 103 | return self._all_lats_commit[loc] 104 | 105 | def all_lats_exec(self, loc): 106 | if not loc in self._all_lats_exec: 107 | self._parse_alllats(loc) 108 | 109 | return self._all_lats_exec[loc] 110 | 111 | def parse_lattput(self, loc): 112 | # time (ns), avg lat over the past second, tput since last line, total count, totalOrs, avg commit lat over the past second 113 | fname = path.join(self._dirname, loc, 'lattput.txt') 114 | start_time = None 115 | timestamps = [] 116 | lats = [] 117 | tputs = [] 118 | oreqs = [] 119 | with open(fname) as f: 120 | for l in f: 121 | toks = l.split() 122 | timestamp_s = ns_to_s(int(toks[0])) 123 | if start_time is None: start_time = timestamp_s 124 | timestamp_s -= start_time 125 | avg_lat = float(toks[1]) 126 | avg_tput = float(toks[2]) 127 | oreq = int(toks[4]) 128 | 129 | timestamps.append(timestamp_s) 130 | lats.append(avg_lat) 131 | tputs.append(avg_tput) 132 | oreqs.append(oreq) 133 | 134 | return timestamps, lats, tputs, oreqs 135 | 136 | def avg_tput(self, loc): 137 | return self._data[loc]['avg_tput'] 138 | 139 | def total_tput(self): 140 | return sum([self._data[loc]['avg_tput'] for loc in self._data]) 141 | 142 | def total_ops(self, loc): 143 | return self._data[loc]['total_ops'] 144 | 145 | def total_fast(self, loc): 146 | return self._data[loc]['total_fast'] 147 | 148 | def total_slow(self, loc): 149 | return self._data[loc]['total_slow'] 150 | 151 | def conflict_rate(self, loc): 152 | return self._data[loc]['conflict_rate'] 153 | 154 | def description(self): 155 | if self.is_mpaxos(): 156 | return 'MPaxos' 157 | elif self.is_epaxos(): 158 | if isinstance(self._workload, self.FixedConflictWorkload): 159 | return '{}%'.format(self._workload._perc_conflict) 160 | elif isinstance(self._workload, self.ZipfianWorkload): 161 | return 'Zipf' 162 | # return 'Z.{}'.format(str(self._workload._theta)[2:]) # remove leading 0. 163 | 164 | # If we get here we couldn't come up with a valid name 165 | raise Exception() 166 | 167 | -------------------------------------------------------------------------------- /scripts/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python3 2 | 3 | # Copyright (c) 2020 Stanford University 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | """ 18 | This file contains miscellaneous functions that may be useful across modules. 19 | """ 20 | 21 | import psutil 22 | import os 23 | from os import path 24 | import subprocess 25 | import sys 26 | import time 27 | from tqdm import tqdm 28 | 29 | def execute(cmd, desc): 30 | """ 31 | Runs 'command' as a shell process, returning a function handler that will 32 | wait for the process to complete when called. 'desc' provides identifying 33 | information about the command. 34 | """ 35 | if isinstance(cmd, list): cmd = '; '.join(cmd) 36 | 37 | p = subprocess.Popen( 38 | cmd, 39 | stdout=subprocess.PIPE, 40 | stderr=subprocess.PIPE, 41 | text=True, 42 | shell=True, 43 | executable='/usr/local/bin/bash', 44 | ) 45 | return lambda: complete_process(p, desc) 46 | 47 | def complete_process(process, desc): 48 | """ 49 | Waits for 'process', a shell process, to complete. Returns the stdout of the 50 | process. If the process returns an error code, prints the stderr of the 51 | process. 'desc' provides identifying information about the command, and is 52 | printed in the case of an error. 53 | """ 54 | p = psutil.Process(process.pid) 55 | children = p.children(recursive=True) 56 | 57 | out, err = process.communicate() 58 | retcode = process.returncode 59 | out = out.strip() 60 | 61 | if retcode != 0: 62 | err = err.strip() 63 | if err: print('ERROR when completing process "{}": {}'.format(desc, err), 64 | file=sys.stderr) 65 | 66 | for cp in children: 67 | if psutil.pid_exists(cp.pid): 68 | cp.kill() 69 | 70 | del process 71 | return out 72 | 73 | def sleep_verbose(message, delay): 74 | """ 75 | Pauses program execution for 'delay' seconds. Prints '[message]: x/delay', 76 | where x indicates the number of seconds that have passed so far, updated 77 | every second. 78 | """ 79 | for i in tqdm(range(delay), desc=message, total=delay, 80 | bar_format='{desc}: {n_fmt}/{total_fmt}'): 81 | time.sleep(1) 82 | 83 | def run_verbose(message, fn, args=[]): 84 | """ 85 | Calls the function provided by 'fn' with the parameters provided in 'args'. 86 | Prints '[message] ... done', with '[message] ...' printed at the start of 87 | the function call and 'done' printed at the end of the function call. 88 | """ 89 | print('{} ...'.format(message), end=' ', flush=True) 90 | result = fn(*args) 91 | print('done') 92 | return result 93 | 94 | def subdirectories(dirname): 95 | """ 96 | Returns all directory entries that are also directories at the path provided 97 | by 'dirname'. Ignores the python cache. 98 | """ 99 | return (d for d in os.listdir(dirname) \ 100 | if path.isdir(path.join(dirname, d)) and d != '__pycache__') 101 | -------------------------------------------------------------------------------- /src/README: -------------------------------------------------------------------------------- 1 | Built with Go version go1.1.2 2 | 3 | To build: 4 | 5 | export GOPATH=[...]/git/epaxos/ 6 | 7 | go install master 8 | go install server 9 | go install client 10 | 11 | To run: 12 | 13 | bin/master & 14 | bin/server -port 7070 & 15 | bin/server -port 7071 & 16 | bin/server -port 7072 & 17 | bin/client 18 | -------------------------------------------------------------------------------- /src/client/client.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "flag" 7 | "fmt" 8 | "genericsmrproto" 9 | "golang.org/x/sync/semaphore" 10 | "log" 11 | "masterproto" 12 | "math/rand" 13 | "net" 14 | "net/rpc" 15 | "os" 16 | "poisson" 17 | "runtime" 18 | "state" 19 | "sync" 20 | "time" 21 | "zipfian" 22 | ) 23 | 24 | var masterAddr *string = flag.String("maddr", "", "Master address. Defaults to localhost") 25 | var masterPort *int = flag.Int("mport", 7087, "Master port.") 26 | var procs *int = flag.Int("p", 2, "GOMAXPROCS.") 27 | var conflicts *int = flag.Int("c", 0, "Percentage of conflicts. If -1, uses Zipfian distribution.") 28 | var forceLeader = flag.Int("l", -1, "Force client to talk to a certain replica.") 29 | var startRange = flag.Int("sr", 0, "Key range start") 30 | var T = flag.Int("T", 10, "Number of threads (simulated clients).") 31 | var outstandingReqs = flag.Int64("or", 1, "Number of outstanding requests a thread can have at any given time.") 32 | var theta = flag.Float64("theta", 0.99, "Theta zipfian parameter") 33 | var zKeys = flag.Uint64("z", 1e9, "Number of unique keys in zipfian distribution.") 34 | var poissonAvg = flag.Int("poisson", -1, "The average number of microseconds between requests. -1 disables Poisson.") 35 | var percentWrites = flag.Float64("writes", 1, "A float between 0 and 1 that corresponds to the percentage of requests that should be writes. The remainder will be reads.") 36 | var blindWrites = flag.Bool("blindwrites", false, "True if writes don't need to execute before clients receive responses.") 37 | 38 | // Information about the latency of an operation 39 | type response struct { 40 | receivedAt time.Time 41 | rtt float64 // The operation latency, in ms 42 | commitLatency float64 // The operation's commit latency, in ms 43 | } 44 | 45 | // Information pertaining to operations that have been issued but that have not 46 | // yet received responses 47 | type outstandingRequestInfo struct { 48 | sync.Mutex 49 | sema *semaphore.Weighted // Controls number of outstanding operations 50 | startTimes map[int32]time.Time // The time at which operations were sent out 51 | } 52 | 53 | // An outstandingRequestInfo per client thread 54 | var orInfos []*outstandingRequestInfo 55 | 56 | func main() { 57 | flag.Parse() 58 | 59 | runtime.GOMAXPROCS(*procs) 60 | 61 | if *conflicts > 100 { 62 | log.Fatalf("Conflicts percentage must be between 0 and 100.\n") 63 | } 64 | 65 | orInfos = make([]*outstandingRequestInfo, *T) 66 | 67 | var master *rpc.Client 68 | var err error 69 | for { 70 | master, err = rpc.DialHTTP("tcp", fmt.Sprintf("%s:%d", *masterAddr, *masterPort)) 71 | if err != nil { 72 | log.Println("Error connecting to master", err) 73 | } else { 74 | break 75 | } 76 | } 77 | 78 | rlReply := new(masterproto.GetReplicaListReply) 79 | for !rlReply.Ready { 80 | err := master.Call("Master.GetReplicaList", new(masterproto.GetReplicaListArgs), rlReply) 81 | if err != nil { 82 | log.Println("Error making the GetReplicaList RPC", err) 83 | } 84 | } 85 | 86 | leader := 0 87 | if *forceLeader < 0 { 88 | reply := new(masterproto.GetLeaderReply) 89 | if err = master.Call("Master.GetLeader", new(masterproto.GetLeaderArgs), reply); err != nil { 90 | log.Println("Error making the GetLeader RPC:", err) 91 | } 92 | leader = reply.LeaderId 93 | } else { 94 | leader = *forceLeader 95 | } 96 | log.Printf("The leader is replica %d\n", leader) 97 | 98 | readings := make(chan *response, 100000) 99 | 100 | for i := 0; i < *T; i++ { 101 | server, err := net.Dial("tcp", rlReply.ReplicaList[leader]) 102 | if err != nil { 103 | log.Fatalf("Error connecting to replica %d\n", leader) 104 | } 105 | reader := bufio.NewReader(server) 106 | writer := bufio.NewWriter(server) 107 | 108 | orInfo := &outstandingRequestInfo{ 109 | sync.Mutex{}, 110 | semaphore.NewWeighted(*outstandingReqs), 111 | make(map[int32]time.Time, *outstandingReqs), 112 | } 113 | 114 | go simulatedClientWriter(writer, orInfo) 115 | go simulatedClientReader(reader, orInfo, readings) 116 | 117 | orInfos[i] = orInfo 118 | } 119 | 120 | printer(readings) 121 | } 122 | 123 | func simulatedClientWriter(writer *bufio.Writer, orInfo *outstandingRequestInfo) { 124 | args := genericsmrproto.Propose{0 /* id */, state.Command{state.PUT, 0, 0}, 0 /* timestamp */} 125 | 126 | conflictRand := rand.New(rand.NewSource(time.Now().UnixNano())) 127 | zipf := zipfian.NewZipfianGenerator(*zKeys, *theta) 128 | poissonGenerator := poisson.NewPoisson(*poissonAvg) 129 | opRand := rand.New(rand.NewSource(time.Now().UnixNano())) 130 | 131 | queuedReqs := 0 // The number of poisson departures that have been missed 132 | 133 | for id := int32(0); ; id++ { 134 | args.CommandId = id 135 | 136 | // Determine key 137 | if *conflicts >= 0 { 138 | r := conflictRand.Intn(100) 139 | if r < *conflicts { 140 | args.Command.K = 42 141 | } else { 142 | args.Command.K = state.Key(*startRange + 43 + int(id)) 143 | } 144 | } else { 145 | args.Command.K = state.Key(zipf.NextNumber()) 146 | } 147 | 148 | // Determine operation type 149 | if *percentWrites > opRand.Float64() { 150 | if !*blindWrites { 151 | args.Command.Op = state.PUT // write operation 152 | } else { 153 | args.Command.Op = state.PUT_BLIND 154 | } 155 | } else { 156 | args.Command.Op = state.GET // read operation 157 | } 158 | 159 | if *poissonAvg == -1 { // Poisson disabled 160 | orInfo.sema.Acquire(context.Background(), 1) 161 | } else { 162 | for { 163 | if orInfo.sema.TryAcquire(1) { 164 | if queuedReqs == 0 { 165 | time.Sleep(poissonGenerator.NextArrival()) 166 | } else { 167 | queuedReqs -= 1 168 | } 169 | break 170 | } 171 | time.Sleep(poissonGenerator.NextArrival()) 172 | queuedReqs += 1 173 | } 174 | } 175 | 176 | before := time.Now() 177 | writer.WriteByte(genericsmrproto.PROPOSE) 178 | args.Marshal(writer) 179 | writer.Flush() 180 | 181 | orInfo.Lock() 182 | orInfo.startTimes[id] = before 183 | orInfo.Unlock() 184 | } 185 | } 186 | 187 | func simulatedClientReader(reader *bufio.Reader, orInfo *outstandingRequestInfo, readings chan *response) { 188 | var reply genericsmrproto.ProposeReply 189 | 190 | for { 191 | if err := reply.Unmarshal(reader); err != nil || reply.OK == 0 { 192 | log.Println("Error when reading:", err) 193 | break 194 | } 195 | after := time.Now() 196 | 197 | orInfo.sema.Release(1) 198 | 199 | orInfo.Lock() 200 | before := orInfo.startTimes[reply.CommandId] 201 | delete(orInfo.startTimes, reply.CommandId) 202 | orInfo.Unlock() 203 | 204 | rtt := (after.Sub(before)).Seconds() * 1000 205 | commitToExec := float64(reply.Timestamp) / 1e6 206 | commitLatency := rtt - commitToExec 207 | 208 | readings <- &response{ 209 | after, 210 | rtt, 211 | commitLatency, 212 | } 213 | } 214 | } 215 | 216 | func printer(readings chan *response) { 217 | lattputFile, err := os.Create("lattput.txt") 218 | if err != nil { 219 | log.Println("Error creating lattput file", err) 220 | return 221 | } 222 | lattputFile.WriteString("# time (ns), avg lat over the past second, tput since last line, total count, totalOrs, avg commit lat over the past second\n") 223 | 224 | latFile, err := os.Create("latency.txt") 225 | if err != nil { 226 | log.Println("Error creating latency file", err) 227 | return 228 | } 229 | latFile.WriteString("# time (ns), latency, commit latency\n") 230 | 231 | startTime := time.Now() 232 | 233 | for { 234 | time.Sleep(time.Second) 235 | 236 | count := len(readings) 237 | var sum float64 = 0 238 | var commitSum float64 = 0 239 | endTime := time.Now() // Set to current time in case there are no readings 240 | for i := 0; i < count; i++ { 241 | resp := <-readings 242 | 243 | // Log all to latency file 244 | latFile.WriteString(fmt.Sprintf("%d %f %f\n", resp.receivedAt.UnixNano(), resp.rtt, resp.commitLatency)) 245 | sum += resp.rtt 246 | commitSum += resp.commitLatency 247 | endTime = resp.receivedAt 248 | } 249 | 250 | var avg float64 251 | var avgCommit float64 252 | var tput float64 253 | if count > 0 { 254 | avg = sum / float64(count) 255 | avgCommit = commitSum / float64(count) 256 | tput = float64(count) / endTime.Sub(startTime).Seconds() 257 | } 258 | 259 | totalOrs := 0 260 | for i := 0; i < *T; i++ { 261 | orInfos[i].Lock() 262 | totalOrs += len(orInfos[i].startTimes) 263 | orInfos[i].Unlock() 264 | } 265 | 266 | // Log summary to lattput file 267 | lattputFile.WriteString(fmt.Sprintf("%d %f %f %d %d %f\n", endTime.UnixNano(), 268 | avg, tput, count, totalOrs, avgCommit)) 269 | 270 | startTime = endTime 271 | } 272 | } 273 | -------------------------------------------------------------------------------- /src/cycles/cycles.go: -------------------------------------------------------------------------------- 1 | package cycles 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | /** 8 | * Return the current value of the fine-grain CPU cycle counter 9 | * (accessed via the RDTSC instruction). 10 | */ 11 | func Rdtsc() uint64 12 | 13 | /** 14 | * Return the current value of the fine-grain CPU cycle counter 15 | * (accessed via the RDTSCP instruction). 16 | */ 17 | func Rdtscp() uint64 18 | 19 | var MockCyclesPerSec float64 20 | var cyclesPerSec float64 21 | 22 | /** 23 | * Perform once-only overall initialization for the Cycles class, such 24 | * as calibrating the clock frequency. This method is invoked automatically 25 | * during initialization, but it may be invoked explicitly by other modules 26 | * to ensure that initialization occurs before those modules initialize 27 | * themselves. 28 | */ 29 | func Init() { 30 | // Compute the frequency of the fine-grained CPU timer: to do this, 31 | // take parallel time readings using both rdtsc and gettimeofday. 32 | // After 10ms have elapsed, take the ratio between these readings. 33 | 34 | var startTime, stopTime time.Time 35 | var startCycles, stopCycles uint64 36 | var micros int64 37 | var oldCycles float64 38 | 39 | // There is one tricky aspect, which is that we could get interrupted 40 | // between calling gettimeofday and reading the cycle counter, in which 41 | // case we won't have corresponding readings. To handle this (unlikely) 42 | // case, compute the overall result repeatedly, and wait until we get 43 | // two successive calculations that are within 0.1% of each other. 44 | oldCycles = 0 45 | for { 46 | startTime = time.Now() 47 | startCycles = Rdtsc() 48 | for { 49 | stopTime = time.Now() 50 | stopCycles = Rdtsc() 51 | micros = stopTime.Sub(startTime).Microseconds() 52 | if micros > 10000 { 53 | cyclesPerSec = float64(stopCycles - startCycles) 54 | cyclesPerSec = 1000000.0 * cyclesPerSec / float64(micros) 55 | break 56 | } 57 | } 58 | delta := cyclesPerSec / 1000.0 59 | if oldCycles > (cyclesPerSec-delta) && 60 | oldCycles < (cyclesPerSec+delta) { 61 | return 62 | } 63 | oldCycles = cyclesPerSec 64 | } 65 | } 66 | 67 | func PerSecond() float64 { 68 | if MockCyclesPerSec > 0 { 69 | return MockCyclesPerSec 70 | } 71 | 72 | if cyclesPerSec == 0 { 73 | Init() 74 | } 75 | 76 | return cyclesPerSec 77 | } 78 | 79 | func ToSeconds(cycles uint64) float64 { 80 | return float64(cycles) / PerSecond() 81 | } 82 | 83 | func ToNanoseconds(cycles uint64) float64 { 84 | return 1e09 * ToSeconds(cycles) 85 | } 86 | -------------------------------------------------------------------------------- /src/cycles/cycles_amd64.s: -------------------------------------------------------------------------------- 1 | // Copyright 2016 David Terei. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | #include "textflag.h" 6 | 7 | // func Rdtsc() uint64 8 | TEXT ·Rdtsc(SB),NOSPLIT,$0-8 9 | RDTSC 10 | SHLQ $32, DX 11 | ADDQ DX, AX 12 | MOVQ AX, ret+0(FP) 13 | RET 14 | 15 | // func Rdtscp() uint64 16 | TEXT ·Rdtscp(SB),NOSPLIT,$0-8 17 | BYTE $0x0F // RDTSCP 18 | BYTE $0x01 19 | BYTE $0xF9 20 | SHLQ $32, DX 21 | ADDQ DX, AX 22 | MOVQ AX, ret+0(FP) 23 | CPUID 24 | RET 25 | -------------------------------------------------------------------------------- /src/epaxos/epaxos-exec.go: -------------------------------------------------------------------------------- 1 | package epaxos 2 | 3 | import ( 4 | "epaxosproto" 5 | "genericsmrproto" 6 | "sort" 7 | "state" 8 | "time" 9 | ) 10 | 11 | const ( 12 | WHITE int8 = iota 13 | GRAY 14 | BLACK 15 | ) 16 | 17 | type Exec struct { 18 | r *Replica 19 | } 20 | 21 | type SCComponent struct { 22 | nodes []*Instance 23 | color int8 24 | } 25 | 26 | var overallRoot *Instance 27 | var overallRep int32 28 | var overallInst int32 29 | 30 | func (e *Exec) executeCommand(replica int32, instance int32) bool { 31 | if e.r.InstanceSpace[replica][instance] == nil { 32 | return false 33 | } 34 | inst := e.r.InstanceSpace[replica][instance] 35 | if inst.Status == epaxosproto.EXECUTED { 36 | return true 37 | } 38 | if inst.Status != epaxosproto.COMMITTED { 39 | return false 40 | } 41 | 42 | overallRep = replica 43 | overallInst = instance 44 | overallRoot = inst 45 | 46 | if !e.findSCC(inst) { 47 | return false 48 | } 49 | 50 | return true 51 | } 52 | 53 | var stack []*Instance = make([]*Instance, 0, 100) 54 | 55 | func (e *Exec) findSCC(root *Instance) bool { 56 | index := 1 57 | //find SCCs using Tarjan's algorithm 58 | stack = stack[0:0] 59 | return e.strongconnect(root, &index) 60 | } 61 | 62 | func (e *Exec) strongconnect(v *Instance, index *int) bool { 63 | v.Index = *index 64 | v.Lowlink = *index 65 | *index = *index + 1 66 | 67 | l := len(stack) 68 | if l == cap(stack) { 69 | newSlice := make([]*Instance, l, 2*l) 70 | copy(newSlice, stack) 71 | stack = newSlice 72 | } 73 | stack = stack[0 : l+1] 74 | stack[l] = v 75 | 76 | for q := int32(0); q < int32(e.r.N); q++ { 77 | inst := v.Deps[q] 78 | for i := e.r.ExecedUpTo[q] + 1; i <= inst; i++ { 79 | for e.r.InstanceSpace[q][i] == nil || e.r.InstanceSpace[q][i].Cmds == nil || v.Cmds == nil { 80 | // Sarah update: in the original code this was time.Sleep(1000 * 1000) 81 | return false 82 | } 83 | 84 | w := e.r.InstanceSpace[q][i] 85 | 86 | if w.Status == epaxosproto.EXECUTED { 87 | continue 88 | } 89 | 90 | // Instances that don't conflict can be skipped 91 | conflict := false 92 | for ci, _ := range v.Cmds { 93 | for di, _ := range w.Cmds { 94 | if state.Conflict(&v.Cmds[ci], &w.Cmds[di]) { 95 | conflict = true 96 | } 97 | } 98 | } 99 | if !conflict { 100 | continue 101 | } 102 | 103 | // Don't need to wait for reads 104 | allReads := true 105 | for _, cmd := range w.Cmds { 106 | if cmd.Op != state.GET { 107 | allReads = false 108 | } 109 | } 110 | if allReads { 111 | continue 112 | } 113 | 114 | // Livelock fix: any instance that has a high seq and the root 115 | // as a dependency will necessarily execute after it. (As will 116 | // any of its dependencies the root wouldn't already know about.) 117 | if e.r.infiniteFix && 118 | (w.Seq > overallRoot.Seq || (overallRep < q && w.Seq == overallRoot.Seq)) && 119 | w.Deps[overallRep] >= overallInst { 120 | break 121 | } 122 | 123 | for e.r.InstanceSpace[q][i].Status != epaxosproto.COMMITTED { 124 | // Sarah update: in the original code this was time.Sleep(1000 * 1000) 125 | return false 126 | } 127 | 128 | if w.Index == 0 { 129 | //e.strongconnect(w, index) 130 | if !e.strongconnect(w, index) { 131 | for j := l; j < len(stack); j++ { 132 | stack[j].Index = 0 133 | } 134 | stack = stack[0:l] 135 | return false 136 | } 137 | if w.Lowlink < v.Lowlink { 138 | v.Lowlink = w.Lowlink 139 | } 140 | } else { //if e.inStack(w) //<- probably unnecessary condition, saves a linear search 141 | if w.Index < v.Lowlink { 142 | v.Lowlink = w.Index 143 | } 144 | } 145 | } 146 | } 147 | 148 | if v.Lowlink == v.Index { 149 | //found SCC 150 | list := stack[l:len(stack)] 151 | 152 | //execute commands in the increasing order of the Seq field 153 | sort.Sort(nodeArray(list)) 154 | for _, w := range list { 155 | for w.Cmds == nil { 156 | // Sarah update: in the original code this was time.Sleep(1000 * 1000) 157 | return false 158 | } 159 | for idx := 0; idx < len(w.Cmds); idx++ { 160 | val := w.Cmds[idx].Execute(e.r.State) 161 | if !state.AllBlindWrites(w.Cmds) && 162 | w.lb != nil && w.lb.clientProposals != nil { 163 | e.r.ReplyPropose( 164 | &genericsmrproto.ProposeReply{ 165 | TRUE, 166 | w.lb.clientProposals[idx].CommandId, 167 | val, 168 | // w.lb.clientProposals[idx].Timestamp 169 | // Overload timestamp with time between commit and 170 | // execution 171 | time.Now().Sub(w.lb.commitTime).Nanoseconds()}, 172 | w.lb.clientProposals[idx].Reply) 173 | } 174 | } 175 | w.Status = epaxosproto.EXECUTED 176 | } 177 | stack = stack[0:l] 178 | } 179 | return true 180 | } 181 | 182 | func (e *Exec) inStack(w *Instance) bool { 183 | for _, u := range stack { 184 | if w == u { 185 | return true 186 | } 187 | } 188 | return false 189 | } 190 | 191 | type nodeArray []*Instance 192 | 193 | func (na nodeArray) Len() int { 194 | return len(na) 195 | } 196 | 197 | func (na nodeArray) Less(i, j int) bool { 198 | return na[i].Seq < na[j].Seq 199 | } 200 | 201 | func (na nodeArray) Swap(i, j int) { 202 | na[i], na[j] = na[j], na[i] 203 | } 204 | -------------------------------------------------------------------------------- /src/epaxosproto/epaxosproto.go: -------------------------------------------------------------------------------- 1 | package epaxosproto 2 | 3 | import ( 4 | "state" 5 | ) 6 | 7 | type Prepare struct { 8 | LeaderId int32 9 | Replica int32 10 | Instance int32 11 | Ballot int32 12 | } 13 | 14 | type PrepareReply struct { 15 | AcceptorId int32 16 | Replica int32 17 | Instance int32 18 | OK uint8 19 | Ballot int32 20 | Status int8 21 | Command []state.Command 22 | Seq int32 23 | Deps [5]int32 24 | } 25 | 26 | type PreAccept struct { 27 | LeaderId int32 28 | Replica int32 29 | Instance int32 30 | Ballot int32 31 | Command []state.Command 32 | Seq int32 33 | Deps [5]int32 34 | SentAt int64 // UTC unix nano 35 | OpenAfter int64 // UTC unix nano 36 | } 37 | 38 | type PreAcceptReply struct { 39 | Replica int32 40 | Instance int32 41 | OK uint8 42 | Ballot int32 43 | Seq int32 44 | Deps [5]int32 45 | CommittedDeps [5]int32 46 | } 47 | 48 | type PreAcceptOK struct { 49 | Instance int32 50 | } 51 | 52 | type Accept struct { 53 | LeaderId int32 54 | Replica int32 55 | Instance int32 56 | Ballot int32 57 | Count int32 58 | Seq int32 59 | Deps [5]int32 60 | SentAt int64 61 | } 62 | 63 | type AcceptReply struct { 64 | Replica int32 65 | Instance int32 66 | OK uint8 67 | Ballot int32 68 | } 69 | 70 | type Commit struct { 71 | LeaderId int32 72 | Replica int32 73 | Instance int32 74 | Command []state.Command 75 | Seq int32 76 | Deps [5]int32 77 | SentAt int64 78 | } 79 | 80 | type CommitShort struct { 81 | LeaderId int32 82 | Replica int32 83 | Instance int32 84 | Count int32 85 | Seq int32 86 | Deps [5]int32 87 | SentAt int64 88 | } 89 | 90 | type TryPreAccept struct { 91 | LeaderId int32 92 | Replica int32 93 | Instance int32 94 | Ballot int32 95 | Command []state.Command 96 | Seq int32 97 | Deps [5]int32 98 | } 99 | 100 | type TryPreAcceptReply struct { 101 | AcceptorId int32 102 | Replica int32 103 | Instance int32 104 | OK uint8 105 | Ballot int32 106 | ConflictReplica int32 107 | ConflictInstance int32 108 | ConflictStatus int8 109 | } 110 | 111 | const ( 112 | NONE int8 = iota 113 | PREACCEPTED 114 | PREACCEPTED_EQ 115 | ACCEPTED 116 | COMMITTED 117 | EXECUTED 118 | ) 119 | -------------------------------------------------------------------------------- /src/epaxosproto/epaxosproto_test.go: -------------------------------------------------------------------------------- 1 | package epaxosproto 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "reflect" 7 | "state" 8 | "testing" 9 | ) 10 | 11 | func TestPreAccept(t *testing.T) { 12 | m := &PreAccept{ 13 | LeaderId: 7, 14 | Replica: 29, 15 | Instance: 3, 16 | Command: []state.Command{state.Command{1, 7, 9}}, 17 | Seq: 12, 18 | Deps: [5]int32{1, 2, 3, 4, 5}, 19 | SentAt: 56321, 20 | OpenAfter: 341, 21 | } 22 | var buf bytes.Buffer 23 | w := bufio.NewWriter(&buf) 24 | m.Marshal(w) 25 | w.Flush() 26 | unmarsh := &PreAccept{} 27 | r := bufio.NewReader(&buf) 28 | err := unmarsh.Unmarshal(r) 29 | if err != nil { 30 | t.Fatal("Unexpected error:", err) 31 | } 32 | if !reflect.DeepEqual(m, unmarsh) { 33 | t.Fatalf("Expected %v, got %v", m, unmarsh) 34 | } 35 | } 36 | func TestAccept(t *testing.T) { 37 | m := &Accept{ 38 | LeaderId: 7, 39 | Replica: 29, 40 | Instance: 3, 41 | Count: 5, 42 | Seq: 12, 43 | Deps: [5]int32{1, 2, 3, 4, 5}, 44 | SentAt: 56321, 45 | } 46 | var buf bytes.Buffer 47 | w := bufio.NewWriter(&buf) 48 | m.Marshal(w) 49 | w.Flush() 50 | unmarsh := &Accept{} 51 | r := bufio.NewReader(&buf) 52 | err := unmarsh.Unmarshal(r) 53 | if err != nil { 54 | t.Fatal("Unexpected error:", err) 55 | } 56 | if !reflect.DeepEqual(m, unmarsh) { 57 | t.Fatalf("Expected %v, got %v", m, unmarsh) 58 | } 59 | } 60 | 61 | func TestCommit(t *testing.T) { 62 | m := &Commit{ 63 | LeaderId: 7, 64 | Replica: 29, 65 | Instance: 3, 66 | Command: []state.Command{state.Command{1, 7, 9}}, 67 | Seq: 12, 68 | Deps: [5]int32{1, 2, 3, 4, 5}, 69 | SentAt: 56321, 70 | } 71 | var buf bytes.Buffer 72 | w := bufio.NewWriter(&buf) 73 | m.Marshal(w) 74 | w.Flush() 75 | unmarsh := &Commit{} 76 | r := bufio.NewReader(&buf) 77 | err := unmarsh.Unmarshal(r) 78 | if err != nil { 79 | t.Fatal("Unexpected error:", err) 80 | } 81 | if !reflect.DeepEqual(m, unmarsh) { 82 | t.Fatalf("Expected %v, got %v", m, unmarsh) 83 | } 84 | } 85 | 86 | func TestCommitShort(t *testing.T) { 87 | m := &CommitShort{ 88 | LeaderId: 7, 89 | Replica: 29, 90 | Instance: 3, 91 | Count: 5, 92 | Seq: 12, 93 | Deps: [5]int32{1, 2, 3, 4, 5}, 94 | SentAt: 56321, 95 | } 96 | var buf bytes.Buffer 97 | w := bufio.NewWriter(&buf) 98 | m.Marshal(w) 99 | w.Flush() 100 | unmarsh := &CommitShort{} 101 | r := bufio.NewReader(&buf) 102 | err := unmarsh.Unmarshal(r) 103 | if err != nil { 104 | t.Fatal("Unexpected error:", err) 105 | } 106 | if !reflect.DeepEqual(m, unmarsh) { 107 | t.Fatalf("Expected %v, got %v", m, unmarsh) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/epaxosproto/epaxosprotomarsh.go: -------------------------------------------------------------------------------- 1 | package epaxosproto 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "fastrpc" 7 | "io" 8 | "state" 9 | ) 10 | 11 | type byteReader interface { 12 | io.Reader 13 | ReadByte() (c byte, err error) 14 | } 15 | 16 | func (p *TryPreAccept) New() fastrpc.Serializable { 17 | return new(TryPreAccept) 18 | } 19 | 20 | func (t *TryPreAccept) Marshal(wire io.Writer) { 21 | var b [24]byte 22 | var bs []byte 23 | bs = b[:16] 24 | tmp32 := t.LeaderId 25 | bs[0] = byte(tmp32) 26 | bs[1] = byte(tmp32 >> 8) 27 | bs[2] = byte(tmp32 >> 16) 28 | bs[3] = byte(tmp32 >> 24) 29 | tmp32 = t.Replica 30 | bs[4] = byte(tmp32) 31 | bs[5] = byte(tmp32 >> 8) 32 | bs[6] = byte(tmp32 >> 16) 33 | bs[7] = byte(tmp32 >> 24) 34 | tmp32 = t.Instance 35 | bs[8] = byte(tmp32) 36 | bs[9] = byte(tmp32 >> 8) 37 | bs[10] = byte(tmp32 >> 16) 38 | bs[11] = byte(tmp32 >> 24) 39 | tmp32 = t.Ballot 40 | bs[12] = byte(tmp32) 41 | bs[13] = byte(tmp32 >> 8) 42 | bs[14] = byte(tmp32 >> 16) 43 | bs[15] = byte(tmp32 >> 24) 44 | wire.Write(bs) 45 | bs = b[:] 46 | alen1 := int64(len(t.Command)) 47 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 48 | wire.Write(b[0:wlen]) 49 | } 50 | for i := int64(0); i < alen1; i++ { 51 | t.Command[i].Marshal(wire) 52 | } 53 | tmp32 = t.Seq 54 | bs[0] = byte(tmp32) 55 | bs[1] = byte(tmp32 >> 8) 56 | bs[2] = byte(tmp32 >> 16) 57 | bs[3] = byte(tmp32 >> 24) 58 | tmp32 = t.Deps[0] 59 | bs[4] = byte(tmp32) 60 | bs[5] = byte(tmp32 >> 8) 61 | bs[6] = byte(tmp32 >> 16) 62 | bs[7] = byte(tmp32 >> 24) 63 | tmp32 = t.Deps[1] 64 | bs[8] = byte(tmp32) 65 | bs[9] = byte(tmp32 >> 8) 66 | bs[10] = byte(tmp32 >> 16) 67 | bs[11] = byte(tmp32 >> 24) 68 | tmp32 = t.Deps[2] 69 | bs[12] = byte(tmp32) 70 | bs[13] = byte(tmp32 >> 8) 71 | bs[14] = byte(tmp32 >> 16) 72 | bs[15] = byte(tmp32 >> 24) 73 | tmp32 = t.Deps[3] 74 | bs[16] = byte(tmp32) 75 | bs[17] = byte(tmp32 >> 8) 76 | bs[18] = byte(tmp32 >> 16) 77 | bs[19] = byte(tmp32 >> 24) 78 | tmp32 = t.Deps[4] 79 | bs[20] = byte(tmp32) 80 | bs[21] = byte(tmp32 >> 8) 81 | bs[22] = byte(tmp32 >> 16) 82 | bs[23] = byte(tmp32 >> 24) 83 | wire.Write(bs) 84 | } 85 | 86 | func (t *TryPreAccept) Unmarshal(rr io.Reader) error { 87 | var wire byteReader 88 | var ok bool 89 | if wire, ok = rr.(byteReader); !ok { 90 | wire = bufio.NewReader(rr) 91 | } 92 | var b [24]byte 93 | var bs []byte 94 | bs = b[:16] 95 | if _, err := io.ReadAtLeast(wire, bs, 16); err != nil { 96 | return err 97 | } 98 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 99 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 100 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 101 | t.Ballot = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 102 | alen1, err := binary.ReadVarint(wire) 103 | if err != nil { 104 | return err 105 | } 106 | t.Command = make([]state.Command, alen1) 107 | for i := int64(0); i < alen1; i++ { 108 | t.Command[i].Unmarshal(wire) 109 | } 110 | bs = b[:24] 111 | if _, err := io.ReadAtLeast(wire, bs, 24); err != nil { 112 | return err 113 | } 114 | t.Seq = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 115 | t.Deps[0] = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 116 | t.Deps[1] = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 117 | t.Deps[2] = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 118 | t.Deps[3] = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 119 | t.Deps[4] = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 120 | return nil 121 | } 122 | 123 | func (p *PreAcceptReply) New() fastrpc.Serializable { 124 | return new(PreAcceptReply) 125 | } 126 | 127 | func (t *PreAcceptReply) Marshal(wire io.Writer) { 128 | var b [57]byte 129 | var bs []byte 130 | bs = b[:57] 131 | tmp32 := t.Replica 132 | bs[0] = byte(tmp32) 133 | bs[1] = byte(tmp32 >> 8) 134 | bs[2] = byte(tmp32 >> 16) 135 | bs[3] = byte(tmp32 >> 24) 136 | tmp32 = t.Instance 137 | bs[4] = byte(tmp32) 138 | bs[5] = byte(tmp32 >> 8) 139 | bs[6] = byte(tmp32 >> 16) 140 | bs[7] = byte(tmp32 >> 24) 141 | bs[8] = byte(t.OK) 142 | tmp32 = t.Ballot 143 | bs[9] = byte(tmp32) 144 | bs[10] = byte(tmp32 >> 8) 145 | bs[11] = byte(tmp32 >> 16) 146 | bs[12] = byte(tmp32 >> 24) 147 | tmp32 = t.Seq 148 | bs[13] = byte(tmp32) 149 | bs[14] = byte(tmp32 >> 8) 150 | bs[15] = byte(tmp32 >> 16) 151 | bs[16] = byte(tmp32 >> 24) 152 | tmp32 = t.Deps[0] 153 | bs[17] = byte(tmp32) 154 | bs[18] = byte(tmp32 >> 8) 155 | bs[19] = byte(tmp32 >> 16) 156 | bs[20] = byte(tmp32 >> 24) 157 | tmp32 = t.Deps[1] 158 | bs[21] = byte(tmp32) 159 | bs[22] = byte(tmp32 >> 8) 160 | bs[23] = byte(tmp32 >> 16) 161 | bs[24] = byte(tmp32 >> 24) 162 | tmp32 = t.Deps[2] 163 | bs[25] = byte(tmp32) 164 | bs[26] = byte(tmp32 >> 8) 165 | bs[27] = byte(tmp32 >> 16) 166 | bs[28] = byte(tmp32 >> 24) 167 | tmp32 = t.Deps[3] 168 | bs[29] = byte(tmp32) 169 | bs[30] = byte(tmp32 >> 8) 170 | bs[31] = byte(tmp32 >> 16) 171 | bs[32] = byte(tmp32 >> 24) 172 | tmp32 = t.Deps[4] 173 | bs[33] = byte(tmp32) 174 | bs[34] = byte(tmp32 >> 8) 175 | bs[35] = byte(tmp32 >> 16) 176 | bs[36] = byte(tmp32 >> 24) 177 | tmp32 = t.CommittedDeps[0] 178 | bs[37] = byte(tmp32) 179 | bs[38] = byte(tmp32 >> 8) 180 | bs[39] = byte(tmp32 >> 16) 181 | bs[40] = byte(tmp32 >> 24) 182 | tmp32 = t.CommittedDeps[1] 183 | bs[41] = byte(tmp32) 184 | bs[42] = byte(tmp32 >> 8) 185 | bs[43] = byte(tmp32 >> 16) 186 | bs[44] = byte(tmp32 >> 24) 187 | tmp32 = t.CommittedDeps[2] 188 | bs[45] = byte(tmp32) 189 | bs[46] = byte(tmp32 >> 8) 190 | bs[47] = byte(tmp32 >> 16) 191 | bs[48] = byte(tmp32 >> 24) 192 | tmp32 = t.CommittedDeps[3] 193 | bs[49] = byte(tmp32) 194 | bs[50] = byte(tmp32 >> 8) 195 | bs[51] = byte(tmp32 >> 16) 196 | bs[52] = byte(tmp32 >> 24) 197 | tmp32 = t.CommittedDeps[4] 198 | bs[53] = byte(tmp32) 199 | bs[54] = byte(tmp32 >> 8) 200 | bs[55] = byte(tmp32 >> 16) 201 | bs[56] = byte(tmp32 >> 24) 202 | wire.Write(bs) 203 | } 204 | 205 | func (t *PreAcceptReply) Unmarshal(wire io.Reader) error { 206 | var b [57]byte 207 | var bs []byte 208 | bs = b[:57] 209 | if _, err := io.ReadAtLeast(wire, bs, 57); err != nil { 210 | return err 211 | } 212 | t.Replica = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 213 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 214 | t.OK = uint8(bs[8]) 215 | t.Ballot = int32((uint32(bs[9]) | (uint32(bs[10]) << 8) | (uint32(bs[11]) << 16) | (uint32(bs[12]) << 24))) 216 | t.Seq = int32((uint32(bs[13]) | (uint32(bs[14]) << 8) | (uint32(bs[15]) << 16) | (uint32(bs[16]) << 24))) 217 | t.Deps[0] = int32((uint32(bs[17]) | (uint32(bs[18]) << 8) | (uint32(bs[19]) << 16) | (uint32(bs[20]) << 24))) 218 | t.Deps[1] = int32((uint32(bs[21]) | (uint32(bs[22]) << 8) | (uint32(bs[23]) << 16) | (uint32(bs[24]) << 24))) 219 | t.Deps[2] = int32((uint32(bs[25]) | (uint32(bs[26]) << 8) | (uint32(bs[27]) << 16) | (uint32(bs[28]) << 24))) 220 | t.Deps[3] = int32((uint32(bs[29]) | (uint32(bs[30]) << 8) | (uint32(bs[31]) << 16) | (uint32(bs[32]) << 24))) 221 | t.Deps[4] = int32((uint32(bs[33]) | (uint32(bs[34]) << 8) | (uint32(bs[35]) << 16) | (uint32(bs[36]) << 24))) 222 | t.CommittedDeps[0] = int32((uint32(bs[37]) | (uint32(bs[38]) << 8) | (uint32(bs[39]) << 16) | (uint32(bs[40]) << 24))) 223 | t.CommittedDeps[1] = int32((uint32(bs[41]) | (uint32(bs[42]) << 8) | (uint32(bs[43]) << 16) | (uint32(bs[44]) << 24))) 224 | t.CommittedDeps[2] = int32((uint32(bs[45]) | (uint32(bs[46]) << 8) | (uint32(bs[47]) << 16) | (uint32(bs[48]) << 24))) 225 | t.CommittedDeps[3] = int32((uint32(bs[49]) | (uint32(bs[50]) << 8) | (uint32(bs[51]) << 16) | (uint32(bs[52]) << 24))) 226 | t.CommittedDeps[4] = int32((uint32(bs[53]) | (uint32(bs[54]) << 8) | (uint32(bs[55]) << 16) | (uint32(bs[56]) << 24))) 227 | return nil 228 | } 229 | 230 | func (p *TryPreAcceptReply) New() fastrpc.Serializable { 231 | return new(TryPreAcceptReply) 232 | } 233 | 234 | func (t *TryPreAcceptReply) Marshal(wire io.Writer) { 235 | var b [26]byte 236 | var bs []byte 237 | bs = b[:26] 238 | tmp32 := t.AcceptorId 239 | bs[0] = byte(tmp32) 240 | bs[1] = byte(tmp32 >> 8) 241 | bs[2] = byte(tmp32 >> 16) 242 | bs[3] = byte(tmp32 >> 24) 243 | tmp32 = t.Replica 244 | bs[4] = byte(tmp32) 245 | bs[5] = byte(tmp32 >> 8) 246 | bs[6] = byte(tmp32 >> 16) 247 | bs[7] = byte(tmp32 >> 24) 248 | tmp32 = t.Instance 249 | bs[8] = byte(tmp32) 250 | bs[9] = byte(tmp32 >> 8) 251 | bs[10] = byte(tmp32 >> 16) 252 | bs[11] = byte(tmp32 >> 24) 253 | bs[12] = byte(t.OK) 254 | tmp32 = t.Ballot 255 | bs[13] = byte(tmp32) 256 | bs[14] = byte(tmp32 >> 8) 257 | bs[15] = byte(tmp32 >> 16) 258 | bs[16] = byte(tmp32 >> 24) 259 | tmp32 = t.ConflictReplica 260 | bs[17] = byte(tmp32) 261 | bs[18] = byte(tmp32 >> 8) 262 | bs[19] = byte(tmp32 >> 16) 263 | bs[20] = byte(tmp32 >> 24) 264 | tmp32 = t.ConflictInstance 265 | bs[21] = byte(tmp32) 266 | bs[22] = byte(tmp32 >> 8) 267 | bs[23] = byte(tmp32 >> 16) 268 | bs[24] = byte(tmp32 >> 24) 269 | bs[25] = byte(t.ConflictStatus) 270 | wire.Write(bs) 271 | } 272 | 273 | func (t *TryPreAcceptReply) Unmarshal(wire io.Reader) error { 274 | var b [26]byte 275 | var bs []byte 276 | bs = b[:26] 277 | if _, err := io.ReadAtLeast(wire, bs, 26); err != nil { 278 | return err 279 | } 280 | t.AcceptorId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 281 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 282 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 283 | t.OK = uint8(bs[12]) 284 | t.Ballot = int32((uint32(bs[13]) | (uint32(bs[14]) << 8) | (uint32(bs[15]) << 16) | (uint32(bs[16]) << 24))) 285 | t.ConflictReplica = int32((uint32(bs[17]) | (uint32(bs[18]) << 8) | (uint32(bs[19]) << 16) | (uint32(bs[20]) << 24))) 286 | t.ConflictInstance = int32((uint32(bs[21]) | (uint32(bs[22]) << 8) | (uint32(bs[23]) << 16) | (uint32(bs[24]) << 24))) 287 | t.ConflictStatus = int8(bs[25]) 288 | return nil 289 | } 290 | 291 | func (p *CommitShort) New() fastrpc.Serializable { 292 | return new(CommitShort) 293 | } 294 | 295 | func (t *CommitShort) Marshal(wire io.Writer) { 296 | var b [40]byte 297 | var bs []byte 298 | bs = b[:40] 299 | tmp32 := t.LeaderId 300 | bs[0] = byte(tmp32) 301 | bs[1] = byte(tmp32 >> 8) 302 | bs[2] = byte(tmp32 >> 16) 303 | bs[3] = byte(tmp32 >> 24) 304 | tmp32 = t.Replica 305 | bs[4] = byte(tmp32) 306 | bs[5] = byte(tmp32 >> 8) 307 | bs[6] = byte(tmp32 >> 16) 308 | bs[7] = byte(tmp32 >> 24) 309 | tmp32 = t.Instance 310 | bs[8] = byte(tmp32) 311 | bs[9] = byte(tmp32 >> 8) 312 | bs[10] = byte(tmp32 >> 16) 313 | bs[11] = byte(tmp32 >> 24) 314 | tmp32 = t.Count 315 | bs[12] = byte(tmp32) 316 | bs[13] = byte(tmp32 >> 8) 317 | bs[14] = byte(tmp32 >> 16) 318 | bs[15] = byte(tmp32 >> 24) 319 | tmp32 = t.Seq 320 | bs[16] = byte(tmp32) 321 | bs[17] = byte(tmp32 >> 8) 322 | bs[18] = byte(tmp32 >> 16) 323 | bs[19] = byte(tmp32 >> 24) 324 | tmp32 = t.Deps[0] 325 | bs[20] = byte(tmp32) 326 | bs[21] = byte(tmp32 >> 8) 327 | bs[22] = byte(tmp32 >> 16) 328 | bs[23] = byte(tmp32 >> 24) 329 | tmp32 = t.Deps[1] 330 | bs[24] = byte(tmp32) 331 | bs[25] = byte(tmp32 >> 8) 332 | bs[26] = byte(tmp32 >> 16) 333 | bs[27] = byte(tmp32 >> 24) 334 | tmp32 = t.Deps[2] 335 | bs[28] = byte(tmp32) 336 | bs[29] = byte(tmp32 >> 8) 337 | bs[30] = byte(tmp32 >> 16) 338 | bs[31] = byte(tmp32 >> 24) 339 | tmp32 = t.Deps[3] 340 | bs[32] = byte(tmp32) 341 | bs[33] = byte(tmp32 >> 8) 342 | bs[34] = byte(tmp32 >> 16) 343 | bs[35] = byte(tmp32 >> 24) 344 | tmp32 = t.Deps[4] 345 | bs[36] = byte(tmp32) 346 | bs[37] = byte(tmp32 >> 8) 347 | bs[38] = byte(tmp32 >> 16) 348 | bs[39] = byte(tmp32 >> 24) 349 | bs = append(bs, fastrpc.Int64ToByteArray(t.SentAt)...) 350 | wire.Write(bs) 351 | } 352 | 353 | func (t *CommitShort) Unmarshal(wire io.Reader) error { 354 | const size = 48 355 | var b [size]byte 356 | var bs []byte 357 | bs = b[:size] 358 | if _, err := io.ReadAtLeast(wire, bs, size); err != nil { 359 | return err 360 | } 361 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 362 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 363 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 364 | t.Count = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 365 | t.Seq = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 366 | t.Deps[0] = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 367 | t.Deps[1] = int32((uint32(bs[24]) | (uint32(bs[25]) << 8) | (uint32(bs[26]) << 16) | (uint32(bs[27]) << 24))) 368 | t.Deps[2] = int32((uint32(bs[28]) | (uint32(bs[29]) << 8) | (uint32(bs[30]) << 16) | (uint32(bs[31]) << 24))) 369 | t.Deps[3] = int32((uint32(bs[32]) | (uint32(bs[33]) << 8) | (uint32(bs[34]) << 16) | (uint32(bs[35]) << 24))) 370 | t.Deps[4] = int32((uint32(bs[36]) | (uint32(bs[37]) << 8) | (uint32(bs[38]) << 16) | (uint32(bs[39]) << 24))) 371 | t.SentAt = fastrpc.Int64FromByteArray(bs[(size - 8):]) 372 | return nil 373 | } 374 | 375 | func (p *PreAccept) New() fastrpc.Serializable { 376 | return new(PreAccept) 377 | } 378 | 379 | func (t *PreAccept) Marshal(wire io.Writer) { 380 | var b [24]byte 381 | var bs []byte 382 | bs = b[:16] 383 | tmp32 := t.LeaderId 384 | bs[0] = byte(tmp32) 385 | bs[1] = byte(tmp32 >> 8) 386 | bs[2] = byte(tmp32 >> 16) 387 | bs[3] = byte(tmp32 >> 24) 388 | tmp32 = t.Replica 389 | bs[4] = byte(tmp32) 390 | bs[5] = byte(tmp32 >> 8) 391 | bs[6] = byte(tmp32 >> 16) 392 | bs[7] = byte(tmp32 >> 24) 393 | tmp32 = t.Instance 394 | bs[8] = byte(tmp32) 395 | bs[9] = byte(tmp32 >> 8) 396 | bs[10] = byte(tmp32 >> 16) 397 | bs[11] = byte(tmp32 >> 24) 398 | tmp32 = t.Ballot 399 | bs[12] = byte(tmp32) 400 | bs[13] = byte(tmp32 >> 8) 401 | bs[14] = byte(tmp32 >> 16) 402 | bs[15] = byte(tmp32 >> 24) 403 | wire.Write(bs) 404 | bs = b[:] 405 | alen1 := int64(len(t.Command)) 406 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 407 | wire.Write(b[0:wlen]) 408 | } 409 | for i := int64(0); i < alen1; i++ { 410 | t.Command[i].Marshal(wire) 411 | } 412 | tmp32 = t.Seq 413 | bs[0] = byte(tmp32) 414 | bs[1] = byte(tmp32 >> 8) 415 | bs[2] = byte(tmp32 >> 16) 416 | bs[3] = byte(tmp32 >> 24) 417 | tmp32 = t.Deps[0] 418 | bs[4] = byte(tmp32) 419 | bs[5] = byte(tmp32 >> 8) 420 | bs[6] = byte(tmp32 >> 16) 421 | bs[7] = byte(tmp32 >> 24) 422 | tmp32 = t.Deps[1] 423 | bs[8] = byte(tmp32) 424 | bs[9] = byte(tmp32 >> 8) 425 | bs[10] = byte(tmp32 >> 16) 426 | bs[11] = byte(tmp32 >> 24) 427 | tmp32 = t.Deps[2] 428 | bs[12] = byte(tmp32) 429 | bs[13] = byte(tmp32 >> 8) 430 | bs[14] = byte(tmp32 >> 16) 431 | bs[15] = byte(tmp32 >> 24) 432 | tmp32 = t.Deps[3] 433 | bs[16] = byte(tmp32) 434 | bs[17] = byte(tmp32 >> 8) 435 | bs[18] = byte(tmp32 >> 16) 436 | bs[19] = byte(tmp32 >> 24) 437 | tmp32 = t.Deps[4] 438 | bs[20] = byte(tmp32) 439 | bs[21] = byte(tmp32 >> 8) 440 | bs[22] = byte(tmp32 >> 16) 441 | bs[23] = byte(tmp32 >> 24) 442 | bs = append(bs, fastrpc.Int64ToByteArray(t.SentAt)...) 443 | bs = append(bs, fastrpc.Int64ToByteArray(t.OpenAfter)...) 444 | wire.Write(bs) 445 | } 446 | 447 | func (t *PreAccept) Unmarshal(rr io.Reader) error { 448 | var wire byteReader 449 | var ok bool 450 | if wire, ok = rr.(byteReader); !ok { 451 | wire = bufio.NewReader(rr) 452 | } 453 | const size = 40 454 | var b [size]byte 455 | var bs []byte 456 | bs = b[:16] 457 | if _, err := io.ReadAtLeast(wire, bs, 16); err != nil { 458 | return err 459 | } 460 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 461 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 462 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 463 | t.Ballot = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 464 | alen1, err := binary.ReadVarint(wire) 465 | if err != nil { 466 | return err 467 | } 468 | t.Command = make([]state.Command, alen1) 469 | for i := int64(0); i < alen1; i++ { 470 | t.Command[i].Unmarshal(wire) 471 | } 472 | bs = b[:size] 473 | if _, err := io.ReadAtLeast(wire, bs, size); err != nil { 474 | return err 475 | } 476 | t.Seq = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 477 | t.Deps[0] = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 478 | t.Deps[1] = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 479 | t.Deps[2] = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 480 | t.Deps[3] = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 481 | t.Deps[4] = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 482 | t.SentAt = fastrpc.Int64FromByteArray(bs[(size - 16):(size - 8)]) 483 | t.OpenAfter = fastrpc.Int64FromByteArray(bs[(size - 8):]) 484 | return nil 485 | } 486 | 487 | func (p *PreAcceptOK) New() fastrpc.Serializable { 488 | return new(PreAcceptOK) 489 | } 490 | 491 | func (t *PreAcceptOK) Marshal(wire io.Writer) { 492 | var b [4]byte 493 | var bs []byte 494 | bs = b[:4] 495 | tmp32 := t.Instance 496 | bs[0] = byte(tmp32) 497 | bs[1] = byte(tmp32 >> 8) 498 | bs[2] = byte(tmp32 >> 16) 499 | bs[3] = byte(tmp32 >> 24) 500 | wire.Write(bs) 501 | } 502 | 503 | func (t *PreAcceptOK) Unmarshal(wire io.Reader) error { 504 | var b [4]byte 505 | var bs []byte 506 | bs = b[:4] 507 | if _, err := io.ReadAtLeast(wire, bs, 4); err != nil { 508 | return err 509 | } 510 | t.Instance = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 511 | return nil 512 | } 513 | 514 | func (p *PrepareReply) New() fastrpc.Serializable { 515 | return new(PrepareReply) 516 | } 517 | 518 | func (t *PrepareReply) Marshal(wire io.Writer) { 519 | var b [24]byte 520 | var bs []byte 521 | bs = b[:18] 522 | tmp32 := t.AcceptorId 523 | bs[0] = byte(tmp32) 524 | bs[1] = byte(tmp32 >> 8) 525 | bs[2] = byte(tmp32 >> 16) 526 | bs[3] = byte(tmp32 >> 24) 527 | tmp32 = t.Replica 528 | bs[4] = byte(tmp32) 529 | bs[5] = byte(tmp32 >> 8) 530 | bs[6] = byte(tmp32 >> 16) 531 | bs[7] = byte(tmp32 >> 24) 532 | tmp32 = t.Instance 533 | bs[8] = byte(tmp32) 534 | bs[9] = byte(tmp32 >> 8) 535 | bs[10] = byte(tmp32 >> 16) 536 | bs[11] = byte(tmp32 >> 24) 537 | bs[12] = byte(t.OK) 538 | tmp32 = t.Ballot 539 | bs[13] = byte(tmp32) 540 | bs[14] = byte(tmp32 >> 8) 541 | bs[15] = byte(tmp32 >> 16) 542 | bs[16] = byte(tmp32 >> 24) 543 | bs[17] = byte(t.Status) 544 | wire.Write(bs) 545 | bs = b[:] 546 | alen1 := int64(len(t.Command)) 547 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 548 | wire.Write(b[0:wlen]) 549 | } 550 | for i := int64(0); i < alen1; i++ { 551 | t.Command[i].Marshal(wire) 552 | } 553 | tmp32 = t.Seq 554 | bs[0] = byte(tmp32) 555 | bs[1] = byte(tmp32 >> 8) 556 | bs[2] = byte(tmp32 >> 16) 557 | bs[3] = byte(tmp32 >> 24) 558 | tmp32 = t.Deps[0] 559 | bs[4] = byte(tmp32) 560 | bs[5] = byte(tmp32 >> 8) 561 | bs[6] = byte(tmp32 >> 16) 562 | bs[7] = byte(tmp32 >> 24) 563 | tmp32 = t.Deps[1] 564 | bs[8] = byte(tmp32) 565 | bs[9] = byte(tmp32 >> 8) 566 | bs[10] = byte(tmp32 >> 16) 567 | bs[11] = byte(tmp32 >> 24) 568 | tmp32 = t.Deps[2] 569 | bs[12] = byte(tmp32) 570 | bs[13] = byte(tmp32 >> 8) 571 | bs[14] = byte(tmp32 >> 16) 572 | bs[15] = byte(tmp32 >> 24) 573 | tmp32 = t.Deps[3] 574 | bs[16] = byte(tmp32) 575 | bs[17] = byte(tmp32 >> 8) 576 | bs[18] = byte(tmp32 >> 16) 577 | bs[19] = byte(tmp32 >> 24) 578 | tmp32 = t.Deps[4] 579 | bs[20] = byte(tmp32) 580 | bs[21] = byte(tmp32 >> 8) 581 | bs[22] = byte(tmp32 >> 16) 582 | bs[23] = byte(tmp32 >> 24) 583 | wire.Write(bs) 584 | } 585 | 586 | func (t *PrepareReply) Unmarshal(rr io.Reader) error { 587 | var wire byteReader 588 | var ok bool 589 | if wire, ok = rr.(byteReader); !ok { 590 | wire = bufio.NewReader(rr) 591 | } 592 | var b [24]byte 593 | var bs []byte 594 | bs = b[:18] 595 | if _, err := io.ReadAtLeast(wire, bs, 18); err != nil { 596 | return err 597 | } 598 | t.AcceptorId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 599 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 600 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 601 | t.OK = uint8(bs[12]) 602 | t.Ballot = int32((uint32(bs[13]) | (uint32(bs[14]) << 8) | (uint32(bs[15]) << 16) | (uint32(bs[16]) << 24))) 603 | t.Status = int8(bs[17]) 604 | alen1, err := binary.ReadVarint(wire) 605 | if err != nil { 606 | return err 607 | } 608 | t.Command = make([]state.Command, alen1) 609 | for i := int64(0); i < alen1; i++ { 610 | t.Command[i].Unmarshal(wire) 611 | } 612 | bs = b[:24] 613 | if _, err := io.ReadAtLeast(wire, bs, 24); err != nil { 614 | return err 615 | } 616 | t.Seq = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 617 | t.Deps[0] = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 618 | t.Deps[1] = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 619 | t.Deps[2] = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 620 | t.Deps[3] = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 621 | t.Deps[4] = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 622 | return nil 623 | } 624 | 625 | func (p *Commit) New() fastrpc.Serializable { 626 | return new(Commit) 627 | } 628 | 629 | func (t *Commit) Marshal(wire io.Writer) { 630 | var b [24]byte 631 | var bs []byte 632 | bs = b[:12] 633 | tmp32 := t.LeaderId 634 | bs[0] = byte(tmp32) 635 | bs[1] = byte(tmp32 >> 8) 636 | bs[2] = byte(tmp32 >> 16) 637 | bs[3] = byte(tmp32 >> 24) 638 | tmp32 = t.Replica 639 | bs[4] = byte(tmp32) 640 | bs[5] = byte(tmp32 >> 8) 641 | bs[6] = byte(tmp32 >> 16) 642 | bs[7] = byte(tmp32 >> 24) 643 | tmp32 = t.Instance 644 | bs[8] = byte(tmp32) 645 | bs[9] = byte(tmp32 >> 8) 646 | bs[10] = byte(tmp32 >> 16) 647 | bs[11] = byte(tmp32 >> 24) 648 | wire.Write(bs) 649 | bs = b[:] 650 | alen1 := int64(len(t.Command)) 651 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 652 | wire.Write(b[0:wlen]) 653 | } 654 | for i := int64(0); i < alen1; i++ { 655 | t.Command[i].Marshal(wire) 656 | } 657 | tmp32 = t.Seq 658 | bs[0] = byte(tmp32) 659 | bs[1] = byte(tmp32 >> 8) 660 | bs[2] = byte(tmp32 >> 16) 661 | bs[3] = byte(tmp32 >> 24) 662 | tmp32 = t.Deps[0] 663 | bs[4] = byte(tmp32) 664 | bs[5] = byte(tmp32 >> 8) 665 | bs[6] = byte(tmp32 >> 16) 666 | bs[7] = byte(tmp32 >> 24) 667 | tmp32 = t.Deps[1] 668 | bs[8] = byte(tmp32) 669 | bs[9] = byte(tmp32 >> 8) 670 | bs[10] = byte(tmp32 >> 16) 671 | bs[11] = byte(tmp32 >> 24) 672 | tmp32 = t.Deps[2] 673 | bs[12] = byte(tmp32) 674 | bs[13] = byte(tmp32 >> 8) 675 | bs[14] = byte(tmp32 >> 16) 676 | bs[15] = byte(tmp32 >> 24) 677 | tmp32 = t.Deps[3] 678 | bs[16] = byte(tmp32) 679 | bs[17] = byte(tmp32 >> 8) 680 | bs[18] = byte(tmp32 >> 16) 681 | bs[19] = byte(tmp32 >> 24) 682 | tmp32 = t.Deps[4] 683 | bs[20] = byte(tmp32) 684 | bs[21] = byte(tmp32 >> 8) 685 | bs[22] = byte(tmp32 >> 16) 686 | bs[23] = byte(tmp32 >> 24) 687 | bs = append(bs, fastrpc.Int64ToByteArray(t.SentAt)...) 688 | wire.Write(bs) 689 | } 690 | 691 | func (t *Commit) Unmarshal(rr io.Reader) error { 692 | var wire byteReader 693 | var ok bool 694 | if wire, ok = rr.(byteReader); !ok { 695 | wire = bufio.NewReader(rr) 696 | } 697 | const size = 32 698 | var b [size]byte 699 | var bs []byte 700 | bs = b[:12] 701 | if _, err := io.ReadAtLeast(wire, bs, 12); err != nil { 702 | return err 703 | } 704 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 705 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 706 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 707 | alen1, err := binary.ReadVarint(wire) 708 | if err != nil { 709 | return err 710 | } 711 | t.Command = make([]state.Command, alen1) 712 | for i := int64(0); i < alen1; i++ { 713 | t.Command[i].Unmarshal(wire) 714 | } 715 | bs = b[:size] 716 | if _, err := io.ReadAtLeast(wire, bs, size); err != nil { 717 | return err 718 | } 719 | t.Seq = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 720 | t.Deps[0] = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 721 | t.Deps[1] = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 722 | t.Deps[2] = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 723 | t.Deps[3] = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 724 | t.Deps[4] = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 725 | t.SentAt = fastrpc.Int64FromByteArray(bs[(size - 8):]) 726 | return nil 727 | } 728 | 729 | func (p *AcceptReply) New() fastrpc.Serializable { 730 | return new(AcceptReply) 731 | } 732 | 733 | func (t *AcceptReply) Marshal(wire io.Writer) { 734 | var b [13]byte 735 | var bs []byte 736 | bs = b[:13] 737 | tmp32 := t.Replica 738 | bs[0] = byte(tmp32) 739 | bs[1] = byte(tmp32 >> 8) 740 | bs[2] = byte(tmp32 >> 16) 741 | bs[3] = byte(tmp32 >> 24) 742 | tmp32 = t.Instance 743 | bs[4] = byte(tmp32) 744 | bs[5] = byte(tmp32 >> 8) 745 | bs[6] = byte(tmp32 >> 16) 746 | bs[7] = byte(tmp32 >> 24) 747 | bs[8] = byte(t.OK) 748 | tmp32 = t.Ballot 749 | bs[9] = byte(tmp32) 750 | bs[10] = byte(tmp32 >> 8) 751 | bs[11] = byte(tmp32 >> 16) 752 | bs[12] = byte(tmp32 >> 24) 753 | wire.Write(bs) 754 | } 755 | 756 | func (t *AcceptReply) Unmarshal(wire io.Reader) error { 757 | var b [13]byte 758 | var bs []byte 759 | bs = b[:13] 760 | if _, err := io.ReadAtLeast(wire, bs, 13); err != nil { 761 | return err 762 | } 763 | t.Replica = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 764 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 765 | t.OK = uint8(bs[8]) 766 | t.Ballot = int32((uint32(bs[9]) | (uint32(bs[10]) << 8) | (uint32(bs[11]) << 16) | (uint32(bs[12]) << 24))) 767 | return nil 768 | } 769 | 770 | func (p *Accept) New() fastrpc.Serializable { 771 | return new(Accept) 772 | } 773 | 774 | func (t *Accept) Marshal(wire io.Writer) { 775 | var b [44]byte 776 | var bs []byte 777 | bs = b[:44] 778 | tmp32 := t.LeaderId 779 | bs[0] = byte(tmp32) 780 | bs[1] = byte(tmp32 >> 8) 781 | bs[2] = byte(tmp32 >> 16) 782 | bs[3] = byte(tmp32 >> 24) 783 | tmp32 = t.Replica 784 | bs[4] = byte(tmp32) 785 | bs[5] = byte(tmp32 >> 8) 786 | bs[6] = byte(tmp32 >> 16) 787 | bs[7] = byte(tmp32 >> 24) 788 | tmp32 = t.Instance 789 | bs[8] = byte(tmp32) 790 | bs[9] = byte(tmp32 >> 8) 791 | bs[10] = byte(tmp32 >> 16) 792 | bs[11] = byte(tmp32 >> 24) 793 | tmp32 = t.Ballot 794 | bs[12] = byte(tmp32) 795 | bs[13] = byte(tmp32 >> 8) 796 | bs[14] = byte(tmp32 >> 16) 797 | bs[15] = byte(tmp32 >> 24) 798 | tmp32 = t.Count 799 | bs[16] = byte(tmp32) 800 | bs[17] = byte(tmp32 >> 8) 801 | bs[18] = byte(tmp32 >> 16) 802 | bs[19] = byte(tmp32 >> 24) 803 | tmp32 = t.Seq 804 | bs[20] = byte(tmp32) 805 | bs[21] = byte(tmp32 >> 8) 806 | bs[22] = byte(tmp32 >> 16) 807 | bs[23] = byte(tmp32 >> 24) 808 | tmp32 = t.Deps[0] 809 | bs[24] = byte(tmp32) 810 | bs[25] = byte(tmp32 >> 8) 811 | bs[26] = byte(tmp32 >> 16) 812 | bs[27] = byte(tmp32 >> 24) 813 | tmp32 = t.Deps[1] 814 | bs[28] = byte(tmp32) 815 | bs[29] = byte(tmp32 >> 8) 816 | bs[30] = byte(tmp32 >> 16) 817 | bs[31] = byte(tmp32 >> 24) 818 | tmp32 = t.Deps[2] 819 | bs[32] = byte(tmp32) 820 | bs[33] = byte(tmp32 >> 8) 821 | bs[34] = byte(tmp32 >> 16) 822 | bs[35] = byte(tmp32 >> 24) 823 | tmp32 = t.Deps[3] 824 | bs[36] = byte(tmp32) 825 | bs[37] = byte(tmp32 >> 8) 826 | bs[38] = byte(tmp32 >> 16) 827 | bs[39] = byte(tmp32 >> 24) 828 | tmp32 = t.Deps[4] 829 | bs[40] = byte(tmp32) 830 | bs[41] = byte(tmp32 >> 8) 831 | bs[42] = byte(tmp32 >> 16) 832 | bs[43] = byte(tmp32 >> 24) 833 | bs = append(bs, fastrpc.Int64ToByteArray(t.SentAt)...) 834 | wire.Write(bs) 835 | } 836 | 837 | func (t *Accept) Unmarshal(wire io.Reader) error { 838 | const size = 52 839 | var b [size]byte 840 | var bs []byte 841 | bs = b[:size] 842 | if _, err := io.ReadAtLeast(wire, bs, size); err != nil { 843 | return err 844 | } 845 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 846 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 847 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 848 | t.Ballot = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 849 | t.Count = int32((uint32(bs[16]) | (uint32(bs[17]) << 8) | (uint32(bs[18]) << 16) | (uint32(bs[19]) << 24))) 850 | t.Seq = int32((uint32(bs[20]) | (uint32(bs[21]) << 8) | (uint32(bs[22]) << 16) | (uint32(bs[23]) << 24))) 851 | t.Deps[0] = int32((uint32(bs[24]) | (uint32(bs[25]) << 8) | (uint32(bs[26]) << 16) | (uint32(bs[27]) << 24))) 852 | t.Deps[1] = int32((uint32(bs[28]) | (uint32(bs[29]) << 8) | (uint32(bs[30]) << 16) | (uint32(bs[31]) << 24))) 853 | t.Deps[2] = int32((uint32(bs[32]) | (uint32(bs[33]) << 8) | (uint32(bs[34]) << 16) | (uint32(bs[35]) << 24))) 854 | t.Deps[3] = int32((uint32(bs[36]) | (uint32(bs[37]) << 8) | (uint32(bs[38]) << 16) | (uint32(bs[39]) << 24))) 855 | t.Deps[4] = int32((uint32(bs[40]) | (uint32(bs[41]) << 8) | (uint32(bs[42]) << 16) | (uint32(bs[43]) << 24))) 856 | t.SentAt = fastrpc.Int64FromByteArray(bs[(size - 8):]) 857 | return nil 858 | } 859 | 860 | func (p *Prepare) New() fastrpc.Serializable { 861 | return new(Prepare) 862 | } 863 | 864 | func (t *Prepare) Marshal(wire io.Writer) { 865 | var b [16]byte 866 | var bs []byte 867 | bs = b[:16] 868 | tmp32 := t.LeaderId 869 | bs[0] = byte(tmp32) 870 | bs[1] = byte(tmp32 >> 8) 871 | bs[2] = byte(tmp32 >> 16) 872 | bs[3] = byte(tmp32 >> 24) 873 | tmp32 = t.Replica 874 | bs[4] = byte(tmp32) 875 | bs[5] = byte(tmp32 >> 8) 876 | bs[6] = byte(tmp32 >> 16) 877 | bs[7] = byte(tmp32 >> 24) 878 | tmp32 = t.Instance 879 | bs[8] = byte(tmp32) 880 | bs[9] = byte(tmp32 >> 8) 881 | bs[10] = byte(tmp32 >> 16) 882 | bs[11] = byte(tmp32 >> 24) 883 | tmp32 = t.Ballot 884 | bs[12] = byte(tmp32) 885 | bs[13] = byte(tmp32 >> 8) 886 | bs[14] = byte(tmp32 >> 16) 887 | bs[15] = byte(tmp32 >> 24) 888 | wire.Write(bs) 889 | } 890 | 891 | func (t *Prepare) Unmarshal(wire io.Reader) error { 892 | var b [16]byte 893 | var bs []byte 894 | bs = b[:16] 895 | if _, err := io.ReadAtLeast(wire, bs, 16); err != nil { 896 | return err 897 | } 898 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 899 | t.Replica = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 900 | t.Instance = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 901 | t.Ballot = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 902 | return nil 903 | } 904 | -------------------------------------------------------------------------------- /src/fastrpc/fastrpc.go: -------------------------------------------------------------------------------- 1 | package fastrpc 2 | 3 | import ( 4 | "io" 5 | ) 6 | 7 | type Serializable interface { 8 | Marshal(io.Writer) 9 | Unmarshal(io.Reader) error 10 | New() Serializable 11 | } 12 | 13 | func Int64ToByteArray(x int64) []byte { 14 | b := make([]byte, 8) 15 | b[0] = byte(x) 16 | b[1] = byte(x >> 8) 17 | b[2] = byte(x >> 16) 18 | b[3] = byte(x >> 24) 19 | b[4] = byte(x >> 32) 20 | b[5] = byte(x >> 40) 21 | b[6] = byte(x >> 48) 22 | b[7] = byte(x >> 56) 23 | return b 24 | } 25 | 26 | func Int64FromByteArray(b []byte) int64 { 27 | x := uint64(b[0]) 28 | x |= uint64(b[1]) << 8 29 | x |= uint64(b[2]) << 16 30 | x |= uint64(b[3]) << 24 31 | x |= uint64(b[4]) << 32 32 | x |= uint64(b[5]) << 40 33 | x |= uint64(b[6]) << 48 34 | x |= uint64(b[7]) << 56 35 | return int64(x) 36 | } 37 | -------------------------------------------------------------------------------- /src/genericsmr/genericsmr.go: -------------------------------------------------------------------------------- 1 | package genericsmr 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "fastrpc" 7 | "fmt" 8 | "genericsmrproto" 9 | "io" 10 | "log" 11 | "net" 12 | "os" 13 | "rdtsc" 14 | "state" 15 | "time" 16 | "timetrace" 17 | ) 18 | 19 | const CHAN_BUFFER_SIZE = 200000 20 | 21 | type RPCMessage struct { 22 | Message fastrpc.Serializable 23 | ReceivedAt int64 // Unix Nano UTC timestamp 24 | From int64 // The rid of the replica who sent this message 25 | } 26 | 27 | type RPCPair struct { 28 | Obj fastrpc.Serializable 29 | Chan chan *RPCMessage 30 | } 31 | 32 | type Propose struct { 33 | *genericsmrproto.Propose 34 | Reply *bufio.Writer 35 | } 36 | 37 | type MetricsRequest struct { 38 | *genericsmrproto.MetricsRequest 39 | Reply *bufio.Writer 40 | } 41 | 42 | type Beacon struct { 43 | Rid int32 44 | Timestamp uint64 45 | } 46 | 47 | type Replica struct { 48 | N int // total number of replicas 49 | Id int32 // the ID of the current replica 50 | PeerAddrList []string // array with the IP:port address of every replica 51 | Peers []net.Conn // cache of connections to all other replicas 52 | PeerReaders []*bufio.Reader 53 | PeerWriters []*bufio.Writer 54 | Alive []bool // connection status 55 | Listener net.Listener 56 | 57 | State *state.State 58 | 59 | ProposeChan chan *Propose // channel for client proposals 60 | BeaconChan chan *Beacon // channel for beacons from peer replicas 61 | MetricsChan chan *MetricsRequest // channel to send metrics to experiment code 62 | 63 | Shutdown bool 64 | 65 | Thrifty bool // send only as many messages as strictly required? 66 | Beacon bool // send beacons to detect how fast are the other replicas? 67 | 68 | Durable bool // log to a stable store? 69 | StableStore *os.File // file support for the persistent log 70 | 71 | PreferredPeerOrder []int32 // replicas in the preferred order of communication 72 | 73 | rpcTable map[uint8]*RPCPair 74 | rpcCode uint8 75 | 76 | Ewma []float64 77 | 78 | OnClientConnect chan bool 79 | } 80 | 81 | func NewReplica(id int, peerAddrList []string, thrifty bool) *Replica { 82 | r := &Replica{ 83 | len(peerAddrList), 84 | int32(id), 85 | peerAddrList, 86 | make([]net.Conn, len(peerAddrList)), 87 | make([]*bufio.Reader, len(peerAddrList)), 88 | make([]*bufio.Writer, len(peerAddrList)), 89 | make([]bool, len(peerAddrList)), 90 | nil, 91 | state.InitState(), 92 | make(chan *Propose, CHAN_BUFFER_SIZE), 93 | make(chan *Beacon, CHAN_BUFFER_SIZE), 94 | make(chan *MetricsRequest, 1), 95 | false, 96 | thrifty, 97 | false, 98 | false, 99 | nil, 100 | make([]int32, len(peerAddrList)), 101 | make(map[uint8]*RPCPair), 102 | genericsmrproto.GENERIC_SMR_BEACON_REPLY + 1, 103 | make([]float64, len(peerAddrList)), 104 | make(chan bool, 100)} 105 | 106 | var err error 107 | 108 | if r.StableStore, err = os.Create(fmt.Sprintf("stable-store-replica%d", r.Id)); err != nil { 109 | log.Fatal(err) 110 | } 111 | 112 | for i := 0; i < r.N; i++ { 113 | r.PreferredPeerOrder[i] = int32((int(r.Id) + 1 + i) % r.N) 114 | r.Ewma[i] = 0.0 115 | } 116 | 117 | timetrace.Init() 118 | 119 | return r 120 | } 121 | 122 | // UTC unix nano 123 | func CurrentTime() int64 { 124 | return time.Now().UTC().UnixNano() 125 | } 126 | 127 | /* Client API */ 128 | 129 | func (r *Replica) Ping(args *genericsmrproto.PingArgs, reply *genericsmrproto.PingReply) error { 130 | return nil 131 | } 132 | 133 | func (r *Replica) BeTheLeader(args *genericsmrproto.BeTheLeaderArgs, reply *genericsmrproto.BeTheLeaderReply) error { 134 | return nil 135 | } 136 | 137 | /* ============= */ 138 | 139 | func (r *Replica) ConnectToPeers() { 140 | var b [4]byte 141 | bs := b[:4] 142 | done := make(chan bool) 143 | 144 | go r.waitForPeerConnections(done) 145 | 146 | //connect to peers 147 | for i := 0; i < int(r.Id); i++ { 148 | for done := false; !done; { 149 | if conn, err := net.Dial("tcp", r.PeerAddrList[i]); err == nil { 150 | r.Peers[i] = conn 151 | done = true 152 | } else { 153 | time.Sleep(1e9) 154 | } 155 | } 156 | binary.LittleEndian.PutUint32(bs, uint32(r.Id)) 157 | if _, err := r.Peers[i].Write(bs); err != nil { 158 | fmt.Println("Write id error:", err) 159 | continue 160 | } 161 | r.Alive[i] = true 162 | r.PeerReaders[i] = bufio.NewReader(r.Peers[i]) 163 | r.PeerWriters[i] = bufio.NewWriter(r.Peers[i]) 164 | 165 | go r.replicaListener(i, r.PeerReaders[i]) 166 | } 167 | <-done 168 | log.Printf("Replica id: %d. Done connecting to peers\n", r.Id) 169 | } 170 | 171 | /* Peer (replica) connections dispatcher */ 172 | func (r *Replica) waitForPeerConnections(done chan bool) { 173 | var b [4]byte 174 | bs := b[:4] 175 | 176 | r.Listener, _ = net.Listen("tcp", r.PeerAddrList[r.Id]) 177 | for i := r.Id + 1; i < int32(r.N); i++ { 178 | conn, err := r.Listener.Accept() 179 | if err != nil { 180 | fmt.Println("Accept error:", err) 181 | continue 182 | } 183 | if _, err := io.ReadFull(conn, bs); err != nil { 184 | fmt.Println("Connection establish error:", err) 185 | continue 186 | } 187 | id := int32(binary.LittleEndian.Uint32(bs)) 188 | r.Peers[id] = conn 189 | r.PeerReaders[id] = bufio.NewReader(conn) 190 | r.PeerWriters[id] = bufio.NewWriter(conn) 191 | r.Alive[id] = true 192 | 193 | go r.replicaListener(int(id), r.PeerReaders[id]) 194 | } 195 | 196 | done <- true 197 | } 198 | 199 | /* Client connections dispatcher */ 200 | func (r *Replica) WaitForClientConnections() { 201 | for !r.Shutdown { 202 | conn, err := r.Listener.Accept() 203 | if err != nil { 204 | log.Println("Accept error:", err) 205 | continue 206 | } 207 | 208 | log.Println("Connected to client", conn.RemoteAddr()) 209 | go r.clientListener(conn) 210 | 211 | r.OnClientConnect <- true 212 | } 213 | } 214 | 215 | func (r *Replica) replicaListener(rid int, reader *bufio.Reader) { 216 | var msgType uint8 217 | var err error = nil 218 | var gbeacon genericsmrproto.Beacon 219 | var gbeaconReply genericsmrproto.BeaconReply 220 | 221 | for err == nil && !r.Shutdown { 222 | 223 | if msgType, err = reader.ReadByte(); err != nil { 224 | break 225 | } 226 | 227 | receivedAt := CurrentTime() 228 | 229 | switch uint8(msgType) { 230 | 231 | case genericsmrproto.GENERIC_SMR_BEACON: 232 | if err = gbeacon.Unmarshal(reader); err != nil { 233 | break 234 | } 235 | beacon := &Beacon{int32(rid), gbeacon.Timestamp} 236 | r.BeaconChan <- beacon 237 | break 238 | 239 | case genericsmrproto.GENERIC_SMR_BEACON_REPLY: 240 | if err = gbeaconReply.Unmarshal(reader); err != nil { 241 | break 242 | } 243 | //TODO: UPDATE STUFF 244 | r.Ewma[rid] = 0.99*r.Ewma[rid] + 0.01*float64(rdtsc.Cputicks()-gbeaconReply.Timestamp) 245 | log.Println(r.Ewma) 246 | break 247 | 248 | default: 249 | if rpair, present := r.rpcTable[msgType]; present { 250 | obj := rpair.Obj.New() 251 | if err = obj.Unmarshal(reader); err != nil { 252 | break 253 | } 254 | rpair.Chan <- &RPCMessage{obj, receivedAt, int64(rid)} 255 | } else { 256 | log.Println("Error: received unknown message type") 257 | } 258 | } 259 | } 260 | } 261 | 262 | func (r *Replica) clientListener(conn net.Conn) { 263 | reader := bufio.NewReader(conn) 264 | writer := bufio.NewWriter(conn) 265 | var msgType byte 266 | var err error 267 | for !r.Shutdown && err == nil { 268 | 269 | if msgType, err = reader.ReadByte(); err != nil { 270 | break 271 | } 272 | 273 | switch uint8(msgType) { 274 | 275 | case genericsmrproto.PROPOSE: 276 | prop := new(genericsmrproto.Propose) 277 | if err = prop.Unmarshal(reader); err != nil { 278 | break 279 | } 280 | r.ProposeChan <- &Propose{prop, writer} 281 | break 282 | 283 | case genericsmrproto.METRICS_REQUEST: 284 | metrics := new(genericsmrproto.MetricsRequest) 285 | if err = metrics.Unmarshal(reader); err != nil { 286 | break 287 | } 288 | r.MetricsChan <- &MetricsRequest{metrics, writer} 289 | break 290 | } 291 | } 292 | if err != nil && err != io.EOF { 293 | log.Println("Error when reading from client connection:", err) 294 | } 295 | } 296 | 297 | func (r *Replica) RegisterRPC(msgObj fastrpc.Serializable, notify chan *RPCMessage) uint8 { 298 | code := r.rpcCode 299 | r.rpcCode++ 300 | r.rpcTable[code] = &RPCPair{msgObj, notify} 301 | return code 302 | } 303 | 304 | func (r *Replica) SendMsg(peerId int32, code uint8, msg fastrpc.Serializable) { 305 | w := r.PeerWriters[peerId] 306 | w.WriteByte(code) 307 | msg.Marshal(w) 308 | w.Flush() 309 | } 310 | 311 | func (r *Replica) ReplyPropose(reply *genericsmrproto.ProposeReply, w *bufio.Writer) { 312 | reply.Marshal(w) 313 | w.Flush() 314 | } 315 | 316 | func (r *Replica) SendBeacon(peerId int32) { 317 | w := r.PeerWriters[peerId] 318 | w.WriteByte(genericsmrproto.GENERIC_SMR_BEACON) 319 | beacon := &genericsmrproto.Beacon{rdtsc.Cputicks()} 320 | beacon.Marshal(w) 321 | w.Flush() 322 | } 323 | 324 | func (r *Replica) ReplyBeacon(beacon *Beacon) { 325 | w := r.PeerWriters[beacon.Rid] 326 | w.WriteByte(genericsmrproto.GENERIC_SMR_BEACON_REPLY) 327 | rb := &genericsmrproto.BeaconReply{beacon.Timestamp} 328 | rb.Marshal(w) 329 | w.Flush() 330 | } 331 | 332 | // updates the preferred order in which to communicate with peers according to a preferred quorum 333 | func (r *Replica) UpdatePreferredPeerOrder(quorum []int32) { 334 | aux := make([]int32, r.N) 335 | i := 0 336 | for _, p := range quorum { 337 | if p == r.Id { 338 | continue 339 | } 340 | aux[i] = p 341 | i++ 342 | } 343 | 344 | for _, p := range r.PreferredPeerOrder { 345 | found := false 346 | for j := 0; j < i; j++ { 347 | if aux[j] == p { 348 | found = true 349 | break 350 | } 351 | } 352 | if !found { 353 | aux[i] = p 354 | i++ 355 | } 356 | } 357 | 358 | r.PreferredPeerOrder = aux 359 | } 360 | -------------------------------------------------------------------------------- /src/genericsmrproto/genericsmrproto.go: -------------------------------------------------------------------------------- 1 | package genericsmrproto 2 | 3 | import ( 4 | "state" 5 | ) 6 | 7 | const ( 8 | PROPOSE uint8 = iota 9 | PROPOSE_REPLY 10 | GENERIC_SMR_BEACON 11 | GENERIC_SMR_BEACON_REPLY 12 | METRICS_REQUEST 13 | METRICS_REPLY 14 | ) 15 | 16 | const ( 17 | METRICSOP_CONFLICT_RATE uint8 = iota 18 | METRICSOP_DUMP_OWD 19 | ) 20 | 21 | type Propose struct { 22 | CommandId int32 23 | Command state.Command 24 | Timestamp int64 25 | } 26 | 27 | type ProposeReply struct { 28 | OK uint8 29 | CommandId int32 30 | Value state.Value 31 | Timestamp int64 32 | } 33 | 34 | // handling stalls and failures 35 | 36 | type Beacon struct { 37 | Timestamp uint64 38 | } 39 | 40 | type BeaconReply struct { 41 | Timestamp uint64 42 | } 43 | 44 | type PingArgs struct { 45 | ActAsLeader uint8 46 | } 47 | 48 | type PingReply struct { 49 | } 50 | 51 | type BeTheLeaderArgs struct { 52 | } 53 | 54 | type BeTheLeaderReply struct { 55 | } 56 | 57 | // handling experiment requests for metrics 58 | 59 | type MetricsRequest struct { 60 | OpCode uint8 61 | } 62 | 63 | type MetricsReply struct { 64 | KFast int64 // The number of instances that have committed on the fast path 65 | KSlow int64 // The number of instances that have committed on the slow path 66 | } 67 | -------------------------------------------------------------------------------- /src/genericsmrproto/genericsmrproto_test.go: -------------------------------------------------------------------------------- 1 | package genericsmrproto 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "reflect" 7 | "testing" 8 | ) 9 | 10 | func TestMetricsRequest(t *testing.T) { 11 | mr := &MetricsRequest{5} 12 | var buf bytes.Buffer 13 | w := bufio.NewWriter(&buf) 14 | mr.Marshal(w) 15 | w.Flush() 16 | unmarsh := &MetricsRequest{} 17 | r := bufio.NewReader(&buf) 18 | err := unmarsh.Unmarshal(r) 19 | if err != nil { 20 | t.Fatal("Unexpected error:", err) 21 | } 22 | if !reflect.DeepEqual(mr, unmarsh) { 23 | t.Fatalf("Expected %v, got %v", mr, unmarsh) 24 | } 25 | } 26 | 27 | func TestMetricsReply(t *testing.T) { 28 | mr := &MetricsReply{17, 82} 29 | var buf bytes.Buffer 30 | w := bufio.NewWriter(&buf) 31 | mr.Marshal(w) 32 | w.Flush() 33 | unmarsh := &MetricsReply{} 34 | r := bufio.NewReader(&buf) 35 | err := unmarsh.Unmarshal(r) 36 | if err != nil { 37 | t.Fatal("Unexpected error:", err) 38 | } 39 | if !reflect.DeepEqual(mr, unmarsh) { 40 | t.Fatalf("Expected %v, got %v", mr, unmarsh) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/genericsmrproto/gsmrprotomarsh.go: -------------------------------------------------------------------------------- 1 | package genericsmrproto 2 | 3 | import ( 4 | "fastrpc" 5 | "io" 6 | ) 7 | 8 | func (t *Propose) Marshal(wire io.Writer) { 9 | var b [8]byte 10 | var bs []byte 11 | bs = b[:4] 12 | tmp32 := t.CommandId 13 | bs[0] = byte(tmp32) 14 | bs[1] = byte(tmp32 >> 8) 15 | bs[2] = byte(tmp32 >> 16) 16 | bs[3] = byte(tmp32 >> 24) 17 | wire.Write(bs) 18 | t.Command.Marshal(wire) 19 | bs = b[:8] 20 | tmp64 := t.Timestamp 21 | bs[0] = byte(tmp64) 22 | bs[1] = byte(tmp64 >> 8) 23 | bs[2] = byte(tmp64 >> 16) 24 | bs[3] = byte(tmp64 >> 24) 25 | bs[4] = byte(tmp64 >> 32) 26 | bs[5] = byte(tmp64 >> 40) 27 | bs[6] = byte(tmp64 >> 48) 28 | bs[7] = byte(tmp64 >> 56) 29 | wire.Write(bs) 30 | } 31 | 32 | func (t *Propose) Unmarshal(wire io.Reader) error { 33 | var b [8]byte 34 | var bs []byte 35 | bs = b[:4] 36 | if _, err := io.ReadAtLeast(wire, bs, 4); err != nil { 37 | return err 38 | } 39 | t.CommandId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 40 | t.Command.Unmarshal(wire) 41 | bs = b[:8] 42 | if _, err := io.ReadAtLeast(wire, bs, 8); err != nil { 43 | return err 44 | } 45 | t.Timestamp = int64((uint64(bs[0]) | (uint64(bs[1]) << 8) | (uint64(bs[2]) << 16) | (uint64(bs[3]) << 24) | (uint64(bs[4]) << 32) | (uint64(bs[5]) << 40) | (uint64(bs[6]) << 48) | (uint64(bs[7]) << 56))) 46 | return nil 47 | } 48 | 49 | func (t *BeaconReply) Marshal(wire io.Writer) { 50 | var b [8]byte 51 | var bs []byte 52 | bs = b[:8] 53 | tmp64 := t.Timestamp 54 | bs[0] = byte(tmp64) 55 | bs[1] = byte(tmp64 >> 8) 56 | bs[2] = byte(tmp64 >> 16) 57 | bs[3] = byte(tmp64 >> 24) 58 | bs[4] = byte(tmp64 >> 32) 59 | bs[5] = byte(tmp64 >> 40) 60 | bs[6] = byte(tmp64 >> 48) 61 | bs[7] = byte(tmp64 >> 56) 62 | wire.Write(bs) 63 | } 64 | 65 | func (t *BeaconReply) Unmarshal(wire io.Reader) error { 66 | var b [8]byte 67 | var bs []byte 68 | bs = b[:8] 69 | if _, err := io.ReadAtLeast(wire, bs, 8); err != nil { 70 | return err 71 | } 72 | t.Timestamp = uint64((uint64(bs[0]) | (uint64(bs[1]) << 8) | (uint64(bs[2]) << 16) | (uint64(bs[3]) << 24) | (uint64(bs[4]) << 32) | (uint64(bs[5]) << 40) | (uint64(bs[6]) << 48) | (uint64(bs[7]) << 56))) 73 | return nil 74 | } 75 | 76 | func (t *PingArgs) Marshal(wire io.Writer) { 77 | var b [1]byte 78 | var bs []byte 79 | bs = b[:1] 80 | bs[0] = byte(t.ActAsLeader) 81 | wire.Write(bs) 82 | } 83 | 84 | func (t *PingArgs) Unmarshal(wire io.Reader) error { 85 | var b [1]byte 86 | var bs []byte 87 | bs = b[:1] 88 | if _, err := io.ReadAtLeast(wire, bs, 1); err != nil { 89 | return err 90 | } 91 | t.ActAsLeader = uint8(bs[0]) 92 | return nil 93 | } 94 | 95 | func (t *BeTheLeaderArgs) Marshal(wire io.Writer) { 96 | } 97 | 98 | func (t *BeTheLeaderArgs) Unmarshal(wire io.Reader) error { 99 | return nil 100 | } 101 | 102 | func (t *PingReply) Marshal(wire io.Writer) { 103 | } 104 | 105 | func (t *PingReply) Unmarshal(wire io.Reader) error { 106 | return nil 107 | } 108 | 109 | func (t *Beacon) Marshal(wire io.Writer) { 110 | var b [8]byte 111 | var bs []byte 112 | bs = b[:8] 113 | tmp64 := t.Timestamp 114 | bs[0] = byte(tmp64) 115 | bs[1] = byte(tmp64 >> 8) 116 | bs[2] = byte(tmp64 >> 16) 117 | bs[3] = byte(tmp64 >> 24) 118 | bs[4] = byte(tmp64 >> 32) 119 | bs[5] = byte(tmp64 >> 40) 120 | bs[6] = byte(tmp64 >> 48) 121 | bs[7] = byte(tmp64 >> 56) 122 | wire.Write(bs) 123 | } 124 | 125 | func (t *Beacon) Unmarshal(wire io.Reader) error { 126 | var b [8]byte 127 | var bs []byte 128 | bs = b[:8] 129 | if _, err := io.ReadAtLeast(wire, bs, 8); err != nil { 130 | return err 131 | } 132 | t.Timestamp = uint64((uint64(bs[0]) | (uint64(bs[1]) << 8) | (uint64(bs[2]) << 16) | (uint64(bs[3]) << 24) | (uint64(bs[4]) << 32) | (uint64(bs[5]) << 40) | (uint64(bs[6]) << 48) | (uint64(bs[7]) << 56))) 133 | return nil 134 | } 135 | 136 | func (t *BeTheLeaderReply) Marshal(wire io.Writer) { 137 | } 138 | 139 | func (t *BeTheLeaderReply) Unmarshal(wire io.Reader) error { 140 | return nil 141 | } 142 | 143 | func (t *ProposeReply) Marshal(wire io.Writer) { 144 | var b [8]byte 145 | var bs []byte 146 | bs = b[:5] 147 | bs[0] = byte(t.OK) 148 | tmp32 := t.CommandId 149 | bs[1] = byte(tmp32) 150 | bs[2] = byte(tmp32 >> 8) 151 | bs[3] = byte(tmp32 >> 16) 152 | bs[4] = byte(tmp32 >> 24) 153 | wire.Write(bs) 154 | t.Value.Marshal(wire) 155 | bs = b[:8] 156 | tmp64 := t.Timestamp 157 | bs[0] = byte(tmp64) 158 | bs[1] = byte(tmp64 >> 8) 159 | bs[2] = byte(tmp64 >> 16) 160 | bs[3] = byte(tmp64 >> 24) 161 | bs[4] = byte(tmp64 >> 32) 162 | bs[5] = byte(tmp64 >> 40) 163 | bs[6] = byte(tmp64 >> 48) 164 | bs[7] = byte(tmp64 >> 56) 165 | wire.Write(bs) 166 | } 167 | 168 | func (t *ProposeReply) Unmarshal(wire io.Reader) error { 169 | var b [8]byte 170 | var bs []byte 171 | bs = b[:5] 172 | if _, err := io.ReadAtLeast(wire, bs, 5); err != nil { 173 | return err 174 | } 175 | t.OK = uint8(bs[0]) 176 | t.CommandId = int32((uint32(bs[1]) | (uint32(bs[2]) << 8) | (uint32(bs[3]) << 16) | (uint32(bs[4]) << 24))) 177 | t.Value.Unmarshal(wire) 178 | bs = b[:8] 179 | if _, err := io.ReadAtLeast(wire, bs, 8); err != nil { 180 | return err 181 | } 182 | t.Timestamp = int64((uint64(bs[0]) | (uint64(bs[1]) << 8) | (uint64(bs[2]) << 16) | (uint64(bs[3]) << 24) | (uint64(bs[4]) << 32) | (uint64(bs[5]) << 40) | (uint64(bs[6]) << 48) | (uint64(bs[7]) << 56))) 183 | return nil 184 | } 185 | 186 | func (t *MetricsRequest) Marshal(wire io.Writer) error { 187 | wire.Write([]byte{t.OpCode}) 188 | return nil 189 | } 190 | 191 | func (t *MetricsRequest) Unmarshal(wire io.Reader) error { 192 | b := make([]byte, 1) 193 | if _, err := io.ReadAtLeast(wire, b, 1); err != nil { 194 | return err 195 | } 196 | t.OpCode = uint8(b[0]) 197 | return nil 198 | } 199 | 200 | func (t *MetricsReply) Marshal(wire io.Writer) error { 201 | wire.Write(append(fastrpc.Int64ToByteArray(t.KFast), fastrpc.Int64ToByteArray(t.KSlow)...)) 202 | return nil 203 | } 204 | 205 | func (t *MetricsReply) Unmarshal(wire io.Reader) error { 206 | size := 8 * 2 207 | b := make([]byte, size) 208 | if _, err := io.ReadAtLeast(wire, b, size); err != nil { 209 | return err 210 | } 211 | t.KFast = fastrpc.Int64FromByteArray(b[:8]) 212 | t.KSlow = fastrpc.Int64FromByteArray(b[8:]) 213 | return nil 214 | } 215 | -------------------------------------------------------------------------------- /src/master/master.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "genericsmrproto" 7 | "log" 8 | "masterproto" 9 | "net" 10 | "net/http" 11 | "net/rpc" 12 | "strings" 13 | "sync" 14 | "time" 15 | ) 16 | 17 | var portnum *int = flag.Int("port", 7087, "Port # to listen on. Defaults to 7087") 18 | var numNodes *int = flag.Int("N", 3, "Number of replicas. Defaults to 3.") 19 | var nodeIPs *string = flag.String("ips", "", "Space separated list of IP addresses (ordered). The leader will be 0") 20 | 21 | type Master struct { 22 | N int 23 | nodeList []string 24 | addrList []string 25 | portList []int 26 | lock *sync.Mutex 27 | nodes []*rpc.Client 28 | leader []bool 29 | alive []bool 30 | expectAddrList []string 31 | connected []bool 32 | nConnected int 33 | } 34 | 35 | func main() { 36 | flag.Parse() 37 | 38 | log.Printf("Master starting on port %d\n", *portnum) 39 | log.Printf("...waiting for %d replicas\n", *numNodes) 40 | 41 | ips := []string{} 42 | if *nodeIPs != "" { 43 | ips = strings.Split(*nodeIPs, ",") 44 | log.Println("Ordered replica ips:", ips, len(ips)) 45 | } 46 | 47 | master := &Master{ 48 | *numNodes, 49 | make([]string, *numNodes), 50 | make([]string, *numNodes), 51 | make([]int, *numNodes), 52 | new(sync.Mutex), 53 | make([]*rpc.Client, *numNodes), 54 | make([]bool, *numNodes), 55 | make([]bool, *numNodes), 56 | ips, 57 | make([]bool, *numNodes), 58 | 0, 59 | } 60 | 61 | rpc.Register(master) 62 | rpc.HandleHTTP() 63 | l, err := net.Listen("tcp", fmt.Sprintf(":%d", *portnum)) 64 | if err != nil { 65 | log.Fatal("Master listen error:", err) 66 | } 67 | 68 | go master.run() 69 | 70 | http.Serve(l, nil) 71 | } 72 | 73 | func (master *Master) run() { 74 | for true { 75 | master.lock.Lock() 76 | if master.nConnected == master.N { 77 | master.lock.Unlock() 78 | break 79 | } 80 | master.lock.Unlock() 81 | time.Sleep(100000000) 82 | } 83 | time.Sleep(2000000000) 84 | 85 | // connect to SMR servers 86 | for i := 0; i < master.N; i++ { 87 | var err error 88 | addr := fmt.Sprintf("%s:%d", master.addrList[i], master.portList[i]+1000) 89 | master.nodes[i], err = rpc.DialHTTP("tcp", addr) 90 | if err != nil { 91 | log.Fatalf("Error connecting to replica %d: %v\n", i, err) 92 | } 93 | master.leader[i] = false 94 | } 95 | master.leader[0] = true 96 | 97 | for true { 98 | time.Sleep(3000 * 1000 * 1000) 99 | new_leader := false 100 | for i, node := range master.nodes { 101 | err := node.Call("Replica.Ping", new(genericsmrproto.PingArgs), new(genericsmrproto.PingReply)) 102 | if err != nil { 103 | //log.Printf("Replica %d has failed to reply\n", i) 104 | master.alive[i] = false 105 | if master.leader[i] { 106 | // neet to choose a new leader 107 | new_leader = true 108 | master.leader[i] = false 109 | } 110 | } else { 111 | master.alive[i] = true 112 | } 113 | } 114 | if !new_leader { 115 | continue 116 | } 117 | for i, new_master := range master.nodes { 118 | if master.alive[i] { 119 | err := new_master.Call("Replica.BeTheLeader", new(genericsmrproto.BeTheLeaderArgs), new(genericsmrproto.BeTheLeaderReply)) 120 | if err == nil { 121 | master.leader[i] = true 122 | log.Printf("Replica %d is the new leader.", i) 123 | break 124 | } 125 | } 126 | } 127 | } 128 | } 129 | 130 | func (master *Master) Register(args *masterproto.RegisterArgs, reply *masterproto.RegisterReply) error { 131 | master.lock.Lock() 132 | defer master.lock.Unlock() 133 | 134 | addrPort := fmt.Sprintf("%s:%d", args.Addr, args.Port) 135 | 136 | i := master.N + 1 137 | 138 | log.Println("Received Register", addrPort, master.nodeList) 139 | 140 | for index, ap := range master.nodeList { 141 | if ap == addrPort { 142 | i = index 143 | break 144 | } 145 | } 146 | 147 | if i == master.N+1 { 148 | for index, a := range master.expectAddrList { 149 | if args.Addr == a { 150 | i = index 151 | if !master.connected[i] { 152 | break 153 | } 154 | } 155 | } 156 | } 157 | 158 | if i == master.N+1 { 159 | log.Println("Received register from bad IP:", addrPort) 160 | return nil 161 | } 162 | 163 | log.Println("Ended up with index", i) 164 | 165 | if !master.connected[i] { 166 | master.nodeList[i] = addrPort 167 | master.addrList[i] = args.Addr 168 | master.portList[i] = args.Port 169 | master.connected[i] = true 170 | master.nConnected++ 171 | } 172 | 173 | if master.nConnected == master.N { 174 | log.Println("All connected!") 175 | reply.Ready = true 176 | reply.ReplicaId = i 177 | reply.NodeList = master.nodeList 178 | } else { 179 | reply.Ready = false 180 | } 181 | 182 | return nil 183 | } 184 | 185 | func (master *Master) GetLeader(args *masterproto.GetLeaderArgs, reply *masterproto.GetLeaderReply) error { 186 | time.Sleep(4 * 1000 * 1000) 187 | for i, l := range master.leader { 188 | if l { 189 | *reply = masterproto.GetLeaderReply{i} 190 | break 191 | } 192 | } 193 | return nil 194 | } 195 | 196 | func (master *Master) GetReplicaList(args *masterproto.GetReplicaListArgs, reply *masterproto.GetReplicaListReply) error { 197 | master.lock.Lock() 198 | defer master.lock.Unlock() 199 | 200 | if master.nConnected == master.N { 201 | reply.ReplicaList = master.nodeList 202 | reply.Ready = true 203 | } else { 204 | reply.Ready = false 205 | } 206 | return nil 207 | } 208 | -------------------------------------------------------------------------------- /src/masterproto/masterproto.go: -------------------------------------------------------------------------------- 1 | package masterproto 2 | 3 | type RegisterArgs struct { 4 | Addr string 5 | Port int 6 | } 7 | 8 | type RegisterReply struct { 9 | ReplicaId int 10 | NodeList []string 11 | Ready bool 12 | } 13 | 14 | type GetLeaderArgs struct { 15 | } 16 | 17 | type GetLeaderReply struct { 18 | LeaderId int 19 | } 20 | 21 | type GetReplicaListArgs struct { 22 | } 23 | 24 | type GetReplicaListReply struct { 25 | ReplicaList []string 26 | Ready bool 27 | } 28 | -------------------------------------------------------------------------------- /src/paxos/paxos.go: -------------------------------------------------------------------------------- 1 | package paxos 2 | 3 | import ( 4 | "encoding/binary" 5 | "genericsmr" 6 | "genericsmrproto" 7 | "io" 8 | "log" 9 | "paxosproto" 10 | "state" 11 | "time" 12 | ) 13 | 14 | const CHAN_BUFFER_SIZE = 200000 15 | const TRUE = uint8(1) 16 | const FALSE = uint8(0) 17 | 18 | const MAX_BATCH = 5000 19 | 20 | type Replica struct { 21 | *genericsmr.Replica // extends a generic Paxos replica 22 | prepareChan chan *genericsmr.RPCMessage 23 | acceptChan chan *genericsmr.RPCMessage 24 | commitChan chan *genericsmr.RPCMessage 25 | commitShortChan chan *genericsmr.RPCMessage 26 | prepareReplyChan chan *genericsmr.RPCMessage 27 | acceptReplyChan chan *genericsmr.RPCMessage 28 | prepareRPC uint8 29 | acceptRPC uint8 30 | commitRPC uint8 31 | commitShortRPC uint8 32 | prepareReplyRPC uint8 33 | acceptReplyRPC uint8 34 | IsLeader bool // does this replica think it is the leader 35 | instanceSpace []*Instance // the space of all instances (used and not yet used) 36 | crtInstance int32 // highest active instance number that this replica knows about 37 | defaultBallot int32 // default ballot for new instances (0 until a Prepare(ballot, instance->infinity) from a leader) 38 | Shutdown bool 39 | counter int 40 | flush bool 41 | committedUpTo int32 42 | batchingEnabled bool 43 | } 44 | 45 | type InstanceStatus int 46 | 47 | const ( 48 | PREPARING InstanceStatus = iota 49 | PREPARED 50 | ACCEPTED 51 | COMMITTED 52 | ) 53 | 54 | type Instance struct { 55 | cmds []state.Command 56 | ballot int32 57 | status InstanceStatus 58 | lb *LeaderBookkeeping 59 | } 60 | 61 | type LeaderBookkeeping struct { 62 | clientProposals []*genericsmr.Propose 63 | maxRecvBallot int32 64 | prepareOKs int 65 | acceptOKs int 66 | nacks int 67 | } 68 | 69 | func NewReplica(id int, peerAddrList []string, thrifty bool, 70 | durable bool, batch bool) *Replica { 71 | r := &Replica{ 72 | genericsmr.NewReplica(id, peerAddrList, thrifty), 73 | make(chan *genericsmr.RPCMessage, genericsmr.CHAN_BUFFER_SIZE), 74 | make(chan *genericsmr.RPCMessage, genericsmr.CHAN_BUFFER_SIZE), 75 | make(chan *genericsmr.RPCMessage, genericsmr.CHAN_BUFFER_SIZE), 76 | make(chan *genericsmr.RPCMessage, genericsmr.CHAN_BUFFER_SIZE), 77 | make(chan *genericsmr.RPCMessage, genericsmr.CHAN_BUFFER_SIZE), 78 | make(chan *genericsmr.RPCMessage, 3*genericsmr.CHAN_BUFFER_SIZE), 79 | 0, 0, 0, 0, 0, 0, 80 | false, 81 | make([]*Instance, 15*1024*1024), 82 | 0, 83 | -1, 84 | false, 85 | 0, 86 | true, 87 | -1, 88 | batch, 89 | } 90 | 91 | r.Durable = durable 92 | 93 | r.prepareRPC = r.RegisterRPC(new(paxosproto.Prepare), r.prepareChan) 94 | r.acceptRPC = r.RegisterRPC(new(paxosproto.Accept), r.acceptChan) 95 | r.commitRPC = r.RegisterRPC(new(paxosproto.Commit), r.commitChan) 96 | r.commitShortRPC = r.RegisterRPC(new(paxosproto.CommitShort), r.commitShortChan) 97 | r.prepareReplyRPC = r.RegisterRPC(new(paxosproto.PrepareReply), r.prepareReplyChan) 98 | r.acceptReplyRPC = r.RegisterRPC(new(paxosproto.AcceptReply), r.acceptReplyChan) 99 | 100 | go r.run() 101 | 102 | return r 103 | } 104 | 105 | //append a log entry to stable storage 106 | func (r *Replica) recordInstanceMetadata(inst *Instance) { 107 | if !r.Durable { 108 | return 109 | } 110 | 111 | var b [5]byte 112 | binary.LittleEndian.PutUint32(b[0:4], uint32(inst.ballot)) 113 | b[4] = byte(inst.status) 114 | r.StableStore.Write(b[:]) 115 | } 116 | 117 | //write a sequence of commands to stable storage 118 | func (r *Replica) recordCommands(cmds []state.Command) { 119 | if !r.Durable { 120 | return 121 | } 122 | 123 | if cmds == nil { 124 | return 125 | } 126 | for i := 0; i < len(cmds); i++ { 127 | cmds[i].Marshal(io.Writer(r.StableStore)) 128 | } 129 | } 130 | 131 | //sync with the stable store 132 | func (r *Replica) sync() { 133 | if !r.Durable { 134 | return 135 | } 136 | 137 | r.StableStore.Sync() 138 | } 139 | 140 | /* RPC to be called by master */ 141 | 142 | func (r *Replica) BeTheLeader(args *genericsmrproto.BeTheLeaderArgs, reply *genericsmrproto.BeTheLeaderReply) error { 143 | r.IsLeader = true 144 | return nil 145 | } 146 | 147 | func (r *Replica) replyPrepare(replicaId int32, reply *paxosproto.PrepareReply) { 148 | r.SendMsg(replicaId, r.prepareReplyRPC, reply) 149 | } 150 | 151 | func (r *Replica) replyAccept(replicaId int32, reply *paxosproto.AcceptReply) { 152 | r.SendMsg(replicaId, r.acceptReplyRPC, reply) 153 | } 154 | 155 | /* ============= */ 156 | 157 | var clockChan chan bool 158 | 159 | func (r *Replica) clock() { 160 | for !r.Shutdown { 161 | time.Sleep(1000 * 1000 * 5) 162 | clockChan <- true 163 | } 164 | } 165 | 166 | /* Main event processing loop */ 167 | 168 | func (r *Replica) run() { 169 | 170 | r.ConnectToPeers() 171 | 172 | go r.WaitForClientConnections() 173 | 174 | go r.executeCommands() 175 | 176 | if r.Id == 0 { 177 | r.IsLeader = true 178 | } 179 | 180 | clockChan = make(chan bool, 1) 181 | if r.batchingEnabled { 182 | go r.clock() 183 | } 184 | 185 | onOffProposeChan := r.ProposeChan 186 | 187 | for !r.Shutdown { 188 | 189 | select { 190 | 191 | case <-clockChan: 192 | //activate the new proposals channel 193 | onOffProposeChan = r.ProposeChan 194 | break 195 | 196 | case propose := <-onOffProposeChan: 197 | //got a Propose from a client 198 | r.handlePropose(propose) 199 | //deactivate the new proposals channel to prioritize the handling of protocol messages 200 | if r.batchingEnabled { 201 | onOffProposeChan = nil 202 | } 203 | break 204 | 205 | case prepareS := <-r.prepareChan: 206 | prepare := prepareS.Message.(*paxosproto.Prepare) 207 | //got a Prepare message 208 | r.handlePrepare(prepare) 209 | break 210 | 211 | case acceptS := <-r.acceptChan: 212 | accept := acceptS.Message.(*paxosproto.Accept) 213 | //got an Accept message 214 | r.handleAccept(accept) 215 | break 216 | 217 | case commitS := <-r.commitChan: 218 | commit := commitS.Message.(*paxosproto.Commit) 219 | //got a Commit message 220 | r.handleCommit(commit) 221 | break 222 | 223 | case commitS := <-r.commitShortChan: 224 | commit := commitS.Message.(*paxosproto.CommitShort) 225 | //got a Commit message 226 | r.handleCommitShort(commit) 227 | break 228 | 229 | case prepareReplyS := <-r.prepareReplyChan: 230 | prepareReply := prepareReplyS.Message.(*paxosproto.PrepareReply) 231 | //got a Prepare reply 232 | r.handlePrepareReply(prepareReply) 233 | break 234 | 235 | case acceptReplyS := <-r.acceptReplyChan: 236 | acceptReply := acceptReplyS.Message.(*paxosproto.AcceptReply) 237 | //got an Accept reply 238 | r.handleAcceptReply(acceptReply) 239 | break 240 | 241 | case metricsRequest := <-r.MetricsChan: 242 | // Empty reply because there are no relevant metrics 243 | reply := &genericsmrproto.MetricsReply{} 244 | reply.Marshal(metricsRequest.Reply) 245 | metricsRequest.Reply.Flush() 246 | break 247 | } 248 | } 249 | } 250 | 251 | func (r *Replica) makeUniqueBallot(ballot int32) int32 { 252 | return (ballot << 4) | r.Id 253 | } 254 | 255 | func (r *Replica) updateCommittedUpTo() { 256 | for r.instanceSpace[r.committedUpTo+1] != nil && 257 | r.instanceSpace[r.committedUpTo+1].status == COMMITTED { 258 | r.committedUpTo++ 259 | } 260 | } 261 | 262 | func (r *Replica) bcastPrepare(instance int32, ballot int32, toInfinity bool) { 263 | defer func() { 264 | if err := recover(); err != nil { 265 | log.Println("Prepare bcast failed:", err) 266 | } 267 | }() 268 | ti := FALSE 269 | if toInfinity { 270 | ti = TRUE 271 | } 272 | args := &paxosproto.Prepare{r.Id, instance, ballot, ti} 273 | 274 | n := r.N - 1 275 | if r.Thrifty { 276 | n = r.N >> 1 277 | } 278 | q := r.Id 279 | 280 | for sent := 0; sent < n; { 281 | q = (q + 1) % int32(r.N) 282 | if q == r.Id { 283 | break 284 | } 285 | if !r.Alive[q] { 286 | continue 287 | } 288 | sent++ 289 | r.SendMsg(q, r.prepareRPC, args) 290 | } 291 | } 292 | 293 | var pa paxosproto.Accept 294 | 295 | func (r *Replica) bcastAccept(instance int32, ballot int32, command []state.Command) { 296 | defer func() { 297 | if err := recover(); err != nil { 298 | log.Println("Accept bcast failed:", err) 299 | } 300 | }() 301 | pa.LeaderId = r.Id 302 | pa.Instance = instance 303 | pa.Ballot = ballot 304 | pa.Command = command 305 | args := &pa 306 | //args := &paxosproto.Accept{r.Id, instance, ballot, command} 307 | 308 | n := r.N - 1 309 | if r.Thrifty { 310 | n = r.N >> 1 311 | } 312 | q := r.Id 313 | 314 | for sent := 0; sent < n; { 315 | q = (q + 1) % int32(r.N) 316 | if q == r.Id { 317 | break 318 | } 319 | if !r.Alive[q] { 320 | continue 321 | } 322 | sent++ 323 | r.SendMsg(q, r.acceptRPC, args) 324 | } 325 | } 326 | 327 | var pc paxosproto.Commit 328 | var pcs paxosproto.CommitShort 329 | 330 | func (r *Replica) bcastCommit(instance int32, ballot int32, command []state.Command) { 331 | defer func() { 332 | if err := recover(); err != nil { 333 | log.Println("Commit bcast failed:", err) 334 | } 335 | }() 336 | pc.LeaderId = r.Id 337 | pc.Instance = instance 338 | pc.Ballot = ballot 339 | pc.Command = command 340 | args := &pc 341 | pcs.LeaderId = r.Id 342 | pcs.Instance = instance 343 | pcs.Ballot = ballot 344 | pcs.Count = int32(len(command)) 345 | argsShort := &pcs 346 | 347 | //args := &paxosproto.Commit{r.Id, instance, command} 348 | 349 | n := r.N - 1 350 | if r.Thrifty { 351 | n = r.N >> 1 352 | } 353 | q := r.Id 354 | sent := 0 355 | 356 | for sent < n { 357 | q = (q + 1) % int32(r.N) 358 | if q == r.Id { 359 | break 360 | } 361 | if !r.Alive[q] { 362 | continue 363 | } 364 | sent++ 365 | r.SendMsg(q, r.commitShortRPC, argsShort) 366 | } 367 | if r.Thrifty && q != r.Id { 368 | for sent < r.N-1 { 369 | q = (q + 1) % int32(r.N) 370 | if q == r.Id { 371 | break 372 | } 373 | if !r.Alive[q] { 374 | continue 375 | } 376 | sent++ 377 | r.SendMsg(q, r.commitRPC, args) 378 | } 379 | } 380 | } 381 | 382 | func (r *Replica) handlePropose(propose *genericsmr.Propose) { 383 | if !r.IsLeader { 384 | preply := &genericsmrproto.ProposeReply{FALSE, -1, state.NIL, 0} 385 | r.ReplyPropose(preply, propose.Reply) 386 | return 387 | } 388 | 389 | for r.instanceSpace[r.crtInstance] != nil { 390 | r.crtInstance++ 391 | } 392 | 393 | instNo := r.crtInstance 394 | r.crtInstance++ 395 | 396 | batchSize := 1 397 | if r.batchingEnabled { 398 | batchSize := len(r.ProposeChan) + 1 399 | if batchSize > MAX_BATCH { 400 | batchSize = MAX_BATCH 401 | } 402 | } 403 | 404 | cmds := make([]state.Command, batchSize) 405 | proposals := make([]*genericsmr.Propose, batchSize) 406 | cmds[0] = propose.Command 407 | proposals[0] = propose 408 | 409 | for i := 1; i < batchSize; i++ { 410 | prop := <-r.ProposeChan 411 | cmds[i] = prop.Command 412 | proposals[i] = prop 413 | } 414 | 415 | if r.defaultBallot == -1 { 416 | r.instanceSpace[instNo] = &Instance{ 417 | cmds, 418 | r.makeUniqueBallot(0), 419 | PREPARING, 420 | &LeaderBookkeeping{proposals, 0, 0, 0, 0}} 421 | r.bcastPrepare(instNo, r.makeUniqueBallot(0), true) 422 | } else { 423 | r.instanceSpace[instNo] = &Instance{ 424 | cmds, 425 | r.defaultBallot, 426 | PREPARED, 427 | &LeaderBookkeeping{proposals, 0, 0, 0, 0}} 428 | 429 | r.recordInstanceMetadata(r.instanceSpace[instNo]) 430 | r.recordCommands(cmds) 431 | r.sync() 432 | 433 | r.bcastAccept(instNo, r.defaultBallot, cmds) 434 | } 435 | } 436 | 437 | func (r *Replica) handlePrepare(prepare *paxosproto.Prepare) { 438 | inst := r.instanceSpace[prepare.Instance] 439 | var preply *paxosproto.PrepareReply 440 | 441 | if inst == nil { 442 | ok := TRUE 443 | if r.defaultBallot > prepare.Ballot { 444 | ok = FALSE 445 | } 446 | preply = &paxosproto.PrepareReply{prepare.Instance, ok, r.defaultBallot, make([]state.Command, 0)} 447 | } else { 448 | ok := TRUE 449 | if prepare.Ballot < inst.ballot { 450 | ok = FALSE 451 | } 452 | preply = &paxosproto.PrepareReply{prepare.Instance, ok, inst.ballot, inst.cmds} 453 | } 454 | 455 | r.replyPrepare(prepare.LeaderId, preply) 456 | 457 | if prepare.ToInfinity == TRUE && prepare.Ballot > r.defaultBallot { 458 | r.defaultBallot = prepare.Ballot 459 | } 460 | } 461 | 462 | func (r *Replica) handleAccept(accept *paxosproto.Accept) { 463 | inst := r.instanceSpace[accept.Instance] 464 | var areply *paxosproto.AcceptReply 465 | 466 | if inst == nil { 467 | if accept.Ballot < r.defaultBallot { 468 | areply = &paxosproto.AcceptReply{accept.Instance, FALSE, r.defaultBallot} 469 | } else { 470 | r.instanceSpace[accept.Instance] = &Instance{ 471 | accept.Command, 472 | accept.Ballot, 473 | ACCEPTED, 474 | nil} 475 | areply = &paxosproto.AcceptReply{accept.Instance, TRUE, r.defaultBallot} 476 | } 477 | } else if inst.ballot > accept.Ballot { 478 | areply = &paxosproto.AcceptReply{accept.Instance, FALSE, inst.ballot} 479 | } else if inst.ballot < accept.Ballot { 480 | inst.cmds = accept.Command 481 | inst.ballot = accept.Ballot 482 | inst.status = ACCEPTED 483 | areply = &paxosproto.AcceptReply{accept.Instance, TRUE, inst.ballot} 484 | if inst.lb != nil && inst.lb.clientProposals != nil { 485 | //TODO: is this correct? 486 | // try the proposal in a different instance 487 | for i := 0; i < len(inst.lb.clientProposals); i++ { 488 | r.ProposeChan <- inst.lb.clientProposals[i] 489 | } 490 | inst.lb.clientProposals = nil 491 | } 492 | } else { 493 | // reordered ACCEPT 494 | r.instanceSpace[accept.Instance].cmds = accept.Command 495 | if r.instanceSpace[accept.Instance].status != COMMITTED { 496 | r.instanceSpace[accept.Instance].status = ACCEPTED 497 | } 498 | areply = &paxosproto.AcceptReply{accept.Instance, TRUE, r.defaultBallot} 499 | } 500 | 501 | if areply.OK == TRUE { 502 | r.recordInstanceMetadata(r.instanceSpace[accept.Instance]) 503 | r.recordCommands(accept.Command) 504 | r.sync() 505 | } 506 | 507 | r.replyAccept(accept.LeaderId, areply) 508 | } 509 | 510 | func (r *Replica) handleCommit(commit *paxosproto.Commit) { 511 | inst := r.instanceSpace[commit.Instance] 512 | 513 | if inst == nil { 514 | r.instanceSpace[commit.Instance] = &Instance{ 515 | commit.Command, 516 | commit.Ballot, 517 | COMMITTED, 518 | nil} 519 | } else { 520 | r.instanceSpace[commit.Instance].cmds = commit.Command 521 | r.instanceSpace[commit.Instance].status = COMMITTED 522 | r.instanceSpace[commit.Instance].ballot = commit.Ballot 523 | if inst.lb != nil && inst.lb.clientProposals != nil { 524 | for i := 0; i < len(inst.lb.clientProposals); i++ { 525 | r.ProposeChan <- inst.lb.clientProposals[i] 526 | } 527 | inst.lb.clientProposals = nil 528 | } 529 | } 530 | 531 | r.updateCommittedUpTo() 532 | 533 | r.recordInstanceMetadata(r.instanceSpace[commit.Instance]) 534 | r.recordCommands(commit.Command) 535 | } 536 | 537 | func (r *Replica) handleCommitShort(commit *paxosproto.CommitShort) { 538 | inst := r.instanceSpace[commit.Instance] 539 | 540 | if inst == nil { 541 | r.instanceSpace[commit.Instance] = &Instance{nil, 542 | commit.Ballot, 543 | COMMITTED, 544 | nil} 545 | } else { 546 | r.instanceSpace[commit.Instance].status = COMMITTED 547 | r.instanceSpace[commit.Instance].ballot = commit.Ballot 548 | if inst.lb != nil && inst.lb.clientProposals != nil { 549 | for i := 0; i < len(inst.lb.clientProposals); i++ { 550 | r.ProposeChan <- inst.lb.clientProposals[i] 551 | } 552 | inst.lb.clientProposals = nil 553 | } 554 | } 555 | 556 | r.updateCommittedUpTo() 557 | 558 | r.recordInstanceMetadata(r.instanceSpace[commit.Instance]) 559 | } 560 | 561 | func (r *Replica) handlePrepareReply(preply *paxosproto.PrepareReply) { 562 | inst := r.instanceSpace[preply.Instance] 563 | 564 | if inst.status != PREPARING { 565 | // TODO: should replies for non-current ballots be ignored? 566 | // we've moved on -- these are delayed replies, so just ignore 567 | return 568 | } 569 | 570 | if preply.OK == TRUE { 571 | inst.lb.prepareOKs++ 572 | 573 | if preply.Ballot > inst.lb.maxRecvBallot { 574 | inst.cmds = preply.Command 575 | inst.lb.maxRecvBallot = preply.Ballot 576 | if inst.lb.clientProposals != nil { 577 | // there is already a competing command for this instance, 578 | // so we put the client proposal back in the queue so that 579 | // we know to try it in another instance 580 | for i := 0; i < len(inst.lb.clientProposals); i++ { 581 | r.ProposeChan <- inst.lb.clientProposals[i] 582 | } 583 | inst.lb.clientProposals = nil 584 | } 585 | } 586 | 587 | if inst.lb.prepareOKs+1 > r.N>>1 { 588 | inst.status = PREPARED 589 | inst.lb.nacks = 0 590 | if inst.ballot > r.defaultBallot { 591 | r.defaultBallot = inst.ballot 592 | } 593 | r.recordInstanceMetadata(r.instanceSpace[preply.Instance]) 594 | r.sync() 595 | r.bcastAccept(preply.Instance, inst.ballot, inst.cmds) 596 | } 597 | } else { 598 | // TODO: there is probably another active leader 599 | inst.lb.nacks++ 600 | if preply.Ballot > inst.lb.maxRecvBallot { 601 | inst.lb.maxRecvBallot = preply.Ballot 602 | } 603 | if inst.lb.nacks >= r.N>>1 { 604 | if inst.lb.clientProposals != nil { 605 | // try the proposals in another instance 606 | for i := 0; i < len(inst.lb.clientProposals); i++ { 607 | r.ProposeChan <- inst.lb.clientProposals[i] 608 | } 609 | inst.lb.clientProposals = nil 610 | } 611 | } 612 | } 613 | } 614 | 615 | func (r *Replica) handleAcceptReply(areply *paxosproto.AcceptReply) { 616 | inst := r.instanceSpace[areply.Instance] 617 | 618 | if inst.status != PREPARED && inst.status != ACCEPTED { 619 | // we've move on, these are delayed replies, so just ignore 620 | return 621 | } 622 | 623 | if areply.OK == TRUE { 624 | inst.lb.acceptOKs++ 625 | if inst.lb.acceptOKs+1 > r.N>>1 { 626 | inst = r.instanceSpace[areply.Instance] 627 | inst.status = COMMITTED 628 | if inst.lb.clientProposals != nil && state.AllBlindWrites(inst.cmds) { 629 | // give client the all clear 630 | for i := 0; i < len(inst.cmds); i++ { 631 | propreply := &genericsmrproto.ProposeReply{ 632 | TRUE, 633 | inst.lb.clientProposals[i].CommandId, 634 | state.NIL, 635 | inst.lb.clientProposals[i].Timestamp} 636 | r.ReplyPropose(propreply, inst.lb.clientProposals[i].Reply) 637 | } 638 | } 639 | 640 | r.recordInstanceMetadata(r.instanceSpace[areply.Instance]) 641 | r.sync() //is this necessary? 642 | 643 | r.updateCommittedUpTo() 644 | 645 | r.bcastCommit(areply.Instance, inst.ballot, inst.cmds) 646 | } 647 | } else { 648 | // TODO: there is probably another active leader 649 | inst.lb.nacks++ 650 | if areply.Ballot > inst.lb.maxRecvBallot { 651 | inst.lb.maxRecvBallot = areply.Ballot 652 | } 653 | if inst.lb.nacks >= r.N>>1 { 654 | // TODO 655 | } 656 | } 657 | } 658 | 659 | func (r *Replica) executeCommands() { 660 | i := int32(0) 661 | for !r.Shutdown { 662 | executed := false 663 | 664 | for i <= r.committedUpTo { 665 | if r.instanceSpace[i].cmds != nil { 666 | inst := r.instanceSpace[i] 667 | for j := 0; j < len(inst.cmds); j++ { 668 | val := inst.cmds[j].Execute(r.State) 669 | if inst.lb != nil && !state.AllBlindWrites(inst.cmds) { 670 | propreply := &genericsmrproto.ProposeReply{ 671 | TRUE, 672 | inst.lb.clientProposals[j].CommandId, 673 | val, 674 | inst.lb.clientProposals[j].Timestamp} 675 | r.ReplyPropose(propreply, inst.lb.clientProposals[j].Reply) 676 | } 677 | } 678 | i++ 679 | executed = true 680 | } else { 681 | break 682 | } 683 | } 684 | 685 | if !executed { 686 | time.Sleep(1000 * 1000) 687 | } 688 | } 689 | 690 | } 691 | -------------------------------------------------------------------------------- /src/paxosproto/paxosproto.go: -------------------------------------------------------------------------------- 1 | package paxosproto 2 | 3 | import ( 4 | "state" 5 | ) 6 | 7 | const ( 8 | PREPARE uint8 = iota 9 | PREPARE_REPLY 10 | ACCEPT 11 | ACCEPT_REPLY 12 | COMMIT 13 | COMMIT_SHORT 14 | ) 15 | 16 | type Prepare struct { 17 | LeaderId int32 18 | Instance int32 19 | Ballot int32 20 | ToInfinity uint8 21 | } 22 | 23 | type PrepareReply struct { 24 | Instance int32 25 | OK uint8 26 | Ballot int32 27 | Command []state.Command 28 | } 29 | 30 | type Accept struct { 31 | LeaderId int32 32 | Instance int32 33 | Ballot int32 34 | Command []state.Command 35 | } 36 | 37 | type AcceptReply struct { 38 | Instance int32 39 | OK uint8 40 | Ballot int32 41 | } 42 | 43 | type Commit struct { 44 | LeaderId int32 45 | Instance int32 46 | Ballot int32 47 | Command []state.Command 48 | } 49 | 50 | type CommitShort struct { 51 | LeaderId int32 52 | Instance int32 53 | Count int32 54 | Ballot int32 55 | } 56 | -------------------------------------------------------------------------------- /src/paxosproto/paxosprotomarsh.go: -------------------------------------------------------------------------------- 1 | package paxosproto 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "fastrpc" 7 | "io" 8 | "state" 9 | "sync" 10 | ) 11 | 12 | type byteReader interface { 13 | io.Reader 14 | ReadByte() (c byte, err error) 15 | } 16 | 17 | func (t *Prepare) New() fastrpc.Serializable { 18 | return new(Prepare) 19 | } 20 | func (t *Prepare) BinarySize() (nbytes int, sizeKnown bool) { 21 | return 13, true 22 | } 23 | 24 | type PrepareCache struct { 25 | mu sync.Mutex 26 | cache []*Prepare 27 | } 28 | 29 | func NewPrepareCache() *PrepareCache { 30 | c := &PrepareCache{} 31 | c.cache = make([]*Prepare, 0) 32 | return c 33 | } 34 | 35 | func (p *PrepareCache) Get() *Prepare { 36 | var t *Prepare 37 | p.mu.Lock() 38 | if len(p.cache) > 0 { 39 | t = p.cache[len(p.cache)-1] 40 | p.cache = p.cache[0:(len(p.cache) - 1)] 41 | } 42 | p.mu.Unlock() 43 | if t == nil { 44 | t = &Prepare{} 45 | } 46 | return t 47 | } 48 | func (p *PrepareCache) Put(t *Prepare) { 49 | p.mu.Lock() 50 | p.cache = append(p.cache, t) 51 | p.mu.Unlock() 52 | } 53 | func (t *Prepare) Marshal(wire io.Writer) { 54 | var b [13]byte 55 | var bs []byte 56 | bs = b[:13] 57 | tmp32 := t.LeaderId 58 | bs[0] = byte(tmp32) 59 | bs[1] = byte(tmp32 >> 8) 60 | bs[2] = byte(tmp32 >> 16) 61 | bs[3] = byte(tmp32 >> 24) 62 | tmp32 = t.Instance 63 | bs[4] = byte(tmp32) 64 | bs[5] = byte(tmp32 >> 8) 65 | bs[6] = byte(tmp32 >> 16) 66 | bs[7] = byte(tmp32 >> 24) 67 | tmp32 = t.Ballot 68 | bs[8] = byte(tmp32) 69 | bs[9] = byte(tmp32 >> 8) 70 | bs[10] = byte(tmp32 >> 16) 71 | bs[11] = byte(tmp32 >> 24) 72 | bs[12] = byte(t.ToInfinity) 73 | wire.Write(bs) 74 | } 75 | 76 | func (t *Prepare) Unmarshal(wire io.Reader) error { 77 | var b [13]byte 78 | var bs []byte 79 | bs = b[:13] 80 | if _, err := io.ReadAtLeast(wire, bs, 13); err != nil { 81 | return err 82 | } 83 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 84 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 85 | t.Ballot = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 86 | t.ToInfinity = uint8(bs[12]) 87 | return nil 88 | } 89 | 90 | func (t *PrepareReply) New() fastrpc.Serializable { 91 | return new(PrepareReply) 92 | } 93 | func (t *PrepareReply) BinarySize() (nbytes int, sizeKnown bool) { 94 | return 0, false 95 | } 96 | 97 | type PrepareReplyCache struct { 98 | mu sync.Mutex 99 | cache []*PrepareReply 100 | } 101 | 102 | func NewPrepareReplyCache() *PrepareReplyCache { 103 | c := &PrepareReplyCache{} 104 | c.cache = make([]*PrepareReply, 0) 105 | return c 106 | } 107 | 108 | func (p *PrepareReplyCache) Get() *PrepareReply { 109 | var t *PrepareReply 110 | p.mu.Lock() 111 | if len(p.cache) > 0 { 112 | t = p.cache[len(p.cache)-1] 113 | p.cache = p.cache[0:(len(p.cache) - 1)] 114 | } 115 | p.mu.Unlock() 116 | if t == nil { 117 | t = &PrepareReply{} 118 | } 119 | return t 120 | } 121 | func (p *PrepareReplyCache) Put(t *PrepareReply) { 122 | p.mu.Lock() 123 | p.cache = append(p.cache, t) 124 | p.mu.Unlock() 125 | } 126 | func (t *PrepareReply) Marshal(wire io.Writer) { 127 | var b [10]byte 128 | var bs []byte 129 | bs = b[:9] 130 | tmp32 := t.Instance 131 | bs[0] = byte(tmp32) 132 | bs[1] = byte(tmp32 >> 8) 133 | bs[2] = byte(tmp32 >> 16) 134 | bs[3] = byte(tmp32 >> 24) 135 | bs[4] = byte(t.OK) 136 | tmp32 = t.Ballot 137 | bs[5] = byte(tmp32) 138 | bs[6] = byte(tmp32 >> 8) 139 | bs[7] = byte(tmp32 >> 16) 140 | bs[8] = byte(tmp32 >> 24) 141 | wire.Write(bs) 142 | bs = b[:] 143 | alen1 := int64(len(t.Command)) 144 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 145 | wire.Write(b[0:wlen]) 146 | } 147 | for i := int64(0); i < alen1; i++ { 148 | t.Command[i].Marshal(wire) 149 | } 150 | } 151 | 152 | func (t *PrepareReply) Unmarshal(rr io.Reader) error { 153 | var wire byteReader 154 | var ok bool 155 | if wire, ok = rr.(byteReader); !ok { 156 | wire = bufio.NewReader(rr) 157 | } 158 | var b [10]byte 159 | var bs []byte 160 | bs = b[:9] 161 | if _, err := io.ReadAtLeast(wire, bs, 9); err != nil { 162 | return err 163 | } 164 | t.Instance = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 165 | t.OK = uint8(bs[4]) 166 | t.Ballot = int32((uint32(bs[5]) | (uint32(bs[6]) << 8) | (uint32(bs[7]) << 16) | (uint32(bs[8]) << 24))) 167 | alen1, err := binary.ReadVarint(wire) 168 | if err != nil { 169 | return err 170 | } 171 | t.Command = make([]state.Command, alen1) 172 | for i := int64(0); i < alen1; i++ { 173 | t.Command[i].Unmarshal(wire) 174 | } 175 | return nil 176 | } 177 | 178 | func (t *Accept) New() fastrpc.Serializable { 179 | return new(Accept) 180 | } 181 | func (t *Accept) BinarySize() (nbytes int, sizeKnown bool) { 182 | return 0, false 183 | } 184 | 185 | type AcceptCache struct { 186 | mu sync.Mutex 187 | cache []*Accept 188 | } 189 | 190 | func NewAcceptCache() *AcceptCache { 191 | c := &AcceptCache{} 192 | c.cache = make([]*Accept, 0) 193 | return c 194 | } 195 | 196 | func (p *AcceptCache) Get() *Accept { 197 | var t *Accept 198 | p.mu.Lock() 199 | if len(p.cache) > 0 { 200 | t = p.cache[len(p.cache)-1] 201 | p.cache = p.cache[0:(len(p.cache) - 1)] 202 | } 203 | p.mu.Unlock() 204 | if t == nil { 205 | t = &Accept{} 206 | } 207 | return t 208 | } 209 | func (p *AcceptCache) Put(t *Accept) { 210 | p.mu.Lock() 211 | p.cache = append(p.cache, t) 212 | p.mu.Unlock() 213 | } 214 | func (t *Accept) Marshal(wire io.Writer) { 215 | var b [12]byte 216 | var bs []byte 217 | bs = b[:12] 218 | tmp32 := t.LeaderId 219 | bs[0] = byte(tmp32) 220 | bs[1] = byte(tmp32 >> 8) 221 | bs[2] = byte(tmp32 >> 16) 222 | bs[3] = byte(tmp32 >> 24) 223 | tmp32 = t.Instance 224 | bs[4] = byte(tmp32) 225 | bs[5] = byte(tmp32 >> 8) 226 | bs[6] = byte(tmp32 >> 16) 227 | bs[7] = byte(tmp32 >> 24) 228 | tmp32 = t.Ballot 229 | bs[8] = byte(tmp32) 230 | bs[9] = byte(tmp32 >> 8) 231 | bs[10] = byte(tmp32 >> 16) 232 | bs[11] = byte(tmp32 >> 24) 233 | wire.Write(bs) 234 | bs = b[:] 235 | alen1 := int64(len(t.Command)) 236 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 237 | wire.Write(b[0:wlen]) 238 | } 239 | for i := int64(0); i < alen1; i++ { 240 | t.Command[i].Marshal(wire) 241 | } 242 | } 243 | 244 | func (t *Accept) Unmarshal(rr io.Reader) error { 245 | var wire byteReader 246 | var ok bool 247 | if wire, ok = rr.(byteReader); !ok { 248 | wire = bufio.NewReader(rr) 249 | } 250 | var b [12]byte 251 | var bs []byte 252 | bs = b[:12] 253 | if _, err := io.ReadAtLeast(wire, bs, 12); err != nil { 254 | return err 255 | } 256 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 257 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 258 | t.Ballot = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 259 | alen1, err := binary.ReadVarint(wire) 260 | if err != nil { 261 | return err 262 | } 263 | t.Command = make([]state.Command, alen1) 264 | for i := int64(0); i < alen1; i++ { 265 | t.Command[i].Unmarshal(wire) 266 | } 267 | return nil 268 | } 269 | 270 | func (t *AcceptReply) New() fastrpc.Serializable { 271 | return new(AcceptReply) 272 | } 273 | func (t *AcceptReply) BinarySize() (nbytes int, sizeKnown bool) { 274 | return 9, true 275 | } 276 | 277 | type AcceptReplyCache struct { 278 | mu sync.Mutex 279 | cache []*AcceptReply 280 | } 281 | 282 | func NewAcceptReplyCache() *AcceptReplyCache { 283 | c := &AcceptReplyCache{} 284 | c.cache = make([]*AcceptReply, 0) 285 | return c 286 | } 287 | 288 | func (p *AcceptReplyCache) Get() *AcceptReply { 289 | var t *AcceptReply 290 | p.mu.Lock() 291 | if len(p.cache) > 0 { 292 | t = p.cache[len(p.cache)-1] 293 | p.cache = p.cache[0:(len(p.cache) - 1)] 294 | } 295 | p.mu.Unlock() 296 | if t == nil { 297 | t = &AcceptReply{} 298 | } 299 | return t 300 | } 301 | func (p *AcceptReplyCache) Put(t *AcceptReply) { 302 | p.mu.Lock() 303 | p.cache = append(p.cache, t) 304 | p.mu.Unlock() 305 | } 306 | func (t *AcceptReply) Marshal(wire io.Writer) { 307 | var b [9]byte 308 | var bs []byte 309 | bs = b[:9] 310 | tmp32 := t.Instance 311 | bs[0] = byte(tmp32) 312 | bs[1] = byte(tmp32 >> 8) 313 | bs[2] = byte(tmp32 >> 16) 314 | bs[3] = byte(tmp32 >> 24) 315 | bs[4] = byte(t.OK) 316 | tmp32 = t.Ballot 317 | bs[5] = byte(tmp32) 318 | bs[6] = byte(tmp32 >> 8) 319 | bs[7] = byte(tmp32 >> 16) 320 | bs[8] = byte(tmp32 >> 24) 321 | wire.Write(bs) 322 | } 323 | 324 | func (t *AcceptReply) Unmarshal(wire io.Reader) error { 325 | var b [9]byte 326 | var bs []byte 327 | bs = b[:9] 328 | if _, err := io.ReadAtLeast(wire, bs, 9); err != nil { 329 | return err 330 | } 331 | t.Instance = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 332 | t.OK = uint8(bs[4]) 333 | t.Ballot = int32((uint32(bs[5]) | (uint32(bs[6]) << 8) | (uint32(bs[7]) << 16) | (uint32(bs[8]) << 24))) 334 | return nil 335 | } 336 | 337 | func (t *Commit) New() fastrpc.Serializable { 338 | return new(Commit) 339 | } 340 | func (t *Commit) BinarySize() (nbytes int, sizeKnown bool) { 341 | return 0, false 342 | } 343 | 344 | type CommitCache struct { 345 | mu sync.Mutex 346 | cache []*Commit 347 | } 348 | 349 | func NewCommitCache() *CommitCache { 350 | c := &CommitCache{} 351 | c.cache = make([]*Commit, 0) 352 | return c 353 | } 354 | 355 | func (p *CommitCache) Get() *Commit { 356 | var t *Commit 357 | p.mu.Lock() 358 | if len(p.cache) > 0 { 359 | t = p.cache[len(p.cache)-1] 360 | p.cache = p.cache[0:(len(p.cache) - 1)] 361 | } 362 | p.mu.Unlock() 363 | if t == nil { 364 | t = &Commit{} 365 | } 366 | return t 367 | } 368 | func (p *CommitCache) Put(t *Commit) { 369 | p.mu.Lock() 370 | p.cache = append(p.cache, t) 371 | p.mu.Unlock() 372 | } 373 | func (t *Commit) Marshal(wire io.Writer) { 374 | var b [12]byte 375 | var bs []byte 376 | bs = b[:12] 377 | tmp32 := t.LeaderId 378 | bs[0] = byte(tmp32) 379 | bs[1] = byte(tmp32 >> 8) 380 | bs[2] = byte(tmp32 >> 16) 381 | bs[3] = byte(tmp32 >> 24) 382 | tmp32 = t.Instance 383 | bs[4] = byte(tmp32) 384 | bs[5] = byte(tmp32 >> 8) 385 | bs[6] = byte(tmp32 >> 16) 386 | bs[7] = byte(tmp32 >> 24) 387 | tmp32 = t.Ballot 388 | bs[8] = byte(tmp32) 389 | bs[9] = byte(tmp32 >> 8) 390 | bs[10] = byte(tmp32 >> 16) 391 | bs[11] = byte(tmp32 >> 24) 392 | wire.Write(bs) 393 | bs = b[:] 394 | alen1 := int64(len(t.Command)) 395 | if wlen := binary.PutVarint(bs, alen1); wlen >= 0 { 396 | wire.Write(b[0:wlen]) 397 | } 398 | for i := int64(0); i < alen1; i++ { 399 | t.Command[i].Marshal(wire) 400 | } 401 | } 402 | 403 | func (t *Commit) Unmarshal(rr io.Reader) error { 404 | var wire byteReader 405 | var ok bool 406 | if wire, ok = rr.(byteReader); !ok { 407 | wire = bufio.NewReader(rr) 408 | } 409 | var b [12]byte 410 | var bs []byte 411 | bs = b[:12] 412 | if _, err := io.ReadAtLeast(wire, bs, 12); err != nil { 413 | return err 414 | } 415 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 416 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 417 | t.Ballot = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 418 | alen1, err := binary.ReadVarint(wire) 419 | if err != nil { 420 | return err 421 | } 422 | t.Command = make([]state.Command, alen1) 423 | for i := int64(0); i < alen1; i++ { 424 | t.Command[i].Unmarshal(wire) 425 | } 426 | return nil 427 | } 428 | 429 | func (t *CommitShort) New() fastrpc.Serializable { 430 | return new(CommitShort) 431 | } 432 | func (t *CommitShort) BinarySize() (nbytes int, sizeKnown bool) { 433 | return 16, true 434 | } 435 | 436 | type CommitShortCache struct { 437 | mu sync.Mutex 438 | cache []*CommitShort 439 | } 440 | 441 | func NewCommitShortCache() *CommitShortCache { 442 | c := &CommitShortCache{} 443 | c.cache = make([]*CommitShort, 0) 444 | return c 445 | } 446 | 447 | func (p *CommitShortCache) Get() *CommitShort { 448 | var t *CommitShort 449 | p.mu.Lock() 450 | if len(p.cache) > 0 { 451 | t = p.cache[len(p.cache)-1] 452 | p.cache = p.cache[0:(len(p.cache) - 1)] 453 | } 454 | p.mu.Unlock() 455 | if t == nil { 456 | t = &CommitShort{} 457 | } 458 | return t 459 | } 460 | func (p *CommitShortCache) Put(t *CommitShort) { 461 | p.mu.Lock() 462 | p.cache = append(p.cache, t) 463 | p.mu.Unlock() 464 | } 465 | func (t *CommitShort) Marshal(wire io.Writer) { 466 | var b [16]byte 467 | var bs []byte 468 | bs = b[:16] 469 | tmp32 := t.LeaderId 470 | bs[0] = byte(tmp32) 471 | bs[1] = byte(tmp32 >> 8) 472 | bs[2] = byte(tmp32 >> 16) 473 | bs[3] = byte(tmp32 >> 24) 474 | tmp32 = t.Instance 475 | bs[4] = byte(tmp32) 476 | bs[5] = byte(tmp32 >> 8) 477 | bs[6] = byte(tmp32 >> 16) 478 | bs[7] = byte(tmp32 >> 24) 479 | tmp32 = t.Count 480 | bs[8] = byte(tmp32) 481 | bs[9] = byte(tmp32 >> 8) 482 | bs[10] = byte(tmp32 >> 16) 483 | bs[11] = byte(tmp32 >> 24) 484 | tmp32 = t.Ballot 485 | bs[12] = byte(tmp32) 486 | bs[13] = byte(tmp32 >> 8) 487 | bs[14] = byte(tmp32 >> 16) 488 | bs[15] = byte(tmp32 >> 24) 489 | wire.Write(bs) 490 | } 491 | 492 | func (t *CommitShort) Unmarshal(wire io.Reader) error { 493 | var b [16]byte 494 | var bs []byte 495 | bs = b[:16] 496 | if _, err := io.ReadAtLeast(wire, bs, 16); err != nil { 497 | return err 498 | } 499 | t.LeaderId = int32((uint32(bs[0]) | (uint32(bs[1]) << 8) | (uint32(bs[2]) << 16) | (uint32(bs[3]) << 24))) 500 | t.Instance = int32((uint32(bs[4]) | (uint32(bs[5]) << 8) | (uint32(bs[6]) << 16) | (uint32(bs[7]) << 24))) 501 | t.Count = int32((uint32(bs[8]) | (uint32(bs[9]) << 8) | (uint32(bs[10]) << 16) | (uint32(bs[11]) << 24))) 502 | t.Ballot = int32((uint32(bs[12]) | (uint32(bs[13]) << 8) | (uint32(bs[14]) << 16) | (uint32(bs[15]) << 24))) 503 | return nil 504 | } 505 | -------------------------------------------------------------------------------- /src/poisson/poisson.go: -------------------------------------------------------------------------------- 1 | package poisson 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | // Simulates a Poisson distribution 10 | type Poisson struct { 11 | // The average number of microseconds between arrivals 12 | rate int 13 | random *rand.Rand 14 | } 15 | 16 | func NewPoisson(rate int) *Poisson { 17 | return &Poisson{ 18 | rate, 19 | rand.New(rand.NewSource(time.Now().UnixNano())), 20 | } 21 | } 22 | 23 | // The number of microseconds until the next arrival 24 | func (p *Poisson) NextArrival() time.Duration { 25 | return time.Microsecond * time.Duration(-1*math.Log(1.0-p.random.Float64())*float64(p.rate)) 26 | } 27 | -------------------------------------------------------------------------------- /src/poisson/poisson_test.go: -------------------------------------------------------------------------------- 1 | package poisson 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | ) 7 | 8 | const POISSON_PARAM = 40 9 | const COUNT = 100000 10 | 11 | func TestPoisson(t *testing.T) { 12 | p := NewPoisson(POISSON_PARAM) 13 | var sum int64 = 0 14 | for i := 0; i < COUNT; i++ { 15 | sum += p.NextArrival().Microseconds() 16 | } 17 | avg := float64(sum) / float64(COUNT) 18 | if math.Abs(avg-POISSON_PARAM) > 1 { 19 | t.Errorf("Expected Poisson avg to be %d, got %f", POISSON_PARAM, avg) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/priorityqueue/priorityqueue.go: -------------------------------------------------------------------------------- 1 | // Based on https://golang.org/pkg/container/heap/ 2 | package priorityqueue 3 | 4 | import ( 5 | "container/heap" 6 | ) 7 | 8 | const PRI_INVALID int64 = -1 9 | 10 | // An Item is something we manage in a priority queue. 11 | type Item struct { 12 | value interface{} // The value of the item; arbitrary. 13 | priority int64 // The priority of the item in the queue. 14 | // The index is needed by update and is maintained by the heap.Interface methods. 15 | index int // The index of the item in the heap. 16 | } 17 | 18 | // A PriorityQueue implements heap.Interface and holds Items. 19 | type PriorityQueue struct { 20 | pq *priorityQueue 21 | } 22 | 23 | func NewPriorityQueue() *PriorityQueue { 24 | pq := make(priorityQueue, 0, 100) 25 | heap.Init(&pq) 26 | return &PriorityQueue{&pq} 27 | } 28 | 29 | func (pq *PriorityQueue) Push(value interface{}, priority int64) { 30 | // Insert a new item and then modify its priority. 31 | item := &Item{ 32 | value: value, 33 | priority: priority, 34 | } 35 | heap.Push(pq.pq, item) 36 | } 37 | 38 | func (pq *PriorityQueue) Pop() interface{} { 39 | if pq.pq.Len() == 0 { 40 | return nil 41 | } 42 | return heap.Pop(pq.pq).(*Item).value 43 | } 44 | 45 | func (pq *PriorityQueue) Peak() interface{} { 46 | if pq.pq.Len() == 0 { 47 | return nil 48 | } 49 | item := heap.Pop(pq.pq).(*Item) 50 | value := item.value 51 | heap.Push(pq.pq, item) 52 | return value 53 | } 54 | 55 | func (pq *PriorityQueue) PeakPriority() int64 { 56 | if pq.pq.Len() == 0 { 57 | return PRI_INVALID 58 | } 59 | item := heap.Pop(pq.pq).(*Item) 60 | priority := item.priority 61 | heap.Push(pq.pq, item) 62 | return priority 63 | } 64 | 65 | // A PriorityQueue implements heap.Interface and holds Items. 66 | type priorityQueue []*Item 67 | 68 | func (pq *priorityQueue) Push(x interface{}) { 69 | n := len(*pq) 70 | item := x.(*Item) 71 | item.index = n 72 | *pq = append(*pq, item) 73 | } 74 | 75 | func (pq *priorityQueue) Pop() interface{} { 76 | old := *pq 77 | n := len(old) 78 | item := old[n-1] 79 | old[n-1] = nil // avoid memory leak 80 | item.index = -1 // for safety 81 | *pq = old[0 : n-1] 82 | return item 83 | } 84 | 85 | func (pq priorityQueue) Len() int { 86 | return len(pq) 87 | } 88 | 89 | // The highest priority items have the lowest priority value 90 | func (pq priorityQueue) Less(i, j int) bool { 91 | return pq[i].priority < pq[j].priority 92 | } 93 | 94 | func (pq priorityQueue) Swap(i, j int) { 95 | pq[i], pq[j] = pq[j], pq[i] 96 | pq[i].index = i 97 | pq[j].index = j 98 | } 99 | 100 | // update modifies the priority and value of an Item in the queue. 101 | func (pq *priorityQueue) update(item *Item, value string, priority int64) { 102 | item.value = value 103 | item.priority = priority 104 | heap.Fix(pq, item.index) 105 | } 106 | -------------------------------------------------------------------------------- /src/priorityqueue/priorityqueue_test.go: -------------------------------------------------------------------------------- 1 | package priorityqueue 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | type testValue struct { 8 | val int32 9 | } 10 | 11 | func TestPriorityQueue(t *testing.T) { 12 | pq := NewPriorityQueue() 13 | pushed1 := &testValue{17} 14 | priority1 := int64(9) 15 | pushed2 := &testValue{3} 16 | priority2 := int64(1) 17 | pq.Push(pushed1, priority1) 18 | pq.Push(pushed2, priority2) 19 | ppeak1 := pq.PeakPriority() 20 | if ppeak1 != priority2 { 21 | t.Fatalf("Expected %d, got %d", priority2, ppeak1) 22 | } 23 | peak1 := pq.Peak() 24 | if peak1 != pushed2 { 25 | t.Fatalf("Expected %v, got %v", pushed2, peak1) 26 | } 27 | popped1 := pq.Pop() 28 | if popped1 != pushed2 { 29 | t.Fatalf("Expected %v, got %v", pushed2, popped1) 30 | } 31 | ppeak2 := pq.PeakPriority() 32 | if ppeak2 != priority1 { 33 | t.Fatalf("Expected %d, got %d", priority1, ppeak2) 34 | } 35 | peak2 := pq.Peak() 36 | if peak2 != pushed1 { 37 | t.Fatalf("Expected %v, got %v", pushed1, peak2) 38 | } 39 | popped2 := pq.Pop() 40 | if popped2 != pushed1 { 41 | t.Fatalf("Expected %v, got %v", pushed1, popped2) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/rdtsc/rdtsc.s: -------------------------------------------------------------------------------- 1 | // func Cputicks(void) (n uint64) 2 | TEXT ·Cputicks(SB),7,$0 3 | RDTSC 4 | SHLQ $32, DX 5 | ADDQ DX, AX 6 | MOVQ AX, n+0(FP) 7 | RET 8 | 9 | -------------------------------------------------------------------------------- /src/rdtsc/rdtsc_decl.go: -------------------------------------------------------------------------------- 1 | package rdtsc 2 | 3 | func Cputicks() (t uint64) 4 | -------------------------------------------------------------------------------- /src/server/server.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "epaxos" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "masterproto" 9 | "net" 10 | "net/http" 11 | "net/rpc" 12 | "os" 13 | "os/signal" 14 | "paxos" 15 | "runtime" 16 | "runtime/pprof" 17 | "time" 18 | ) 19 | 20 | var portnum *int = flag.Int("port", 7070, "Port # to listen on. Defaults to 7070") 21 | var masterAddr *string = flag.String("maddr", "", "Master address. Defaults to localhost.") 22 | var masterPort *int = flag.Int("mport", 7087, "Master port. Defaults to 7087.") 23 | var myAddr *string = flag.String("addr", "", "Server address (this machine). Defaults to localhost.") 24 | var doEpaxos *bool = flag.Bool("e", false, "Use EPaxos as the replication protocol. Defaults to false.") 25 | var procs *int = flag.Int("p", 2, "GOMAXPROCS. Defaults to 2") 26 | var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") 27 | var thrifty = flag.Bool("thrifty", false, "Use only as many messages as strictly required for inter-replica communication.") 28 | var beacon = flag.Bool("beacon", false, "Send beacons to other replicas to compare their relative speeds.") 29 | var durable = flag.Bool("durable", false, "Log to a stable store (i.e., a file in the current dir).") 30 | var batch = flag.Bool("batch", false, "Enables batching of inter-server messages") 31 | var infiniteFix = flag.Bool("inffix", false, "Enables a bound on execution latency for EPaxos") 32 | var clockSyncType = flag.Int("clocksync", 0, "0 to not sync clocks, 1 to delay the opening of messages until the quorum, 2 to delay so that all process at same time, 3 to delay to CA, VA, and OR.") 33 | var clockSyncEpsilon = flag.Float64("clockepsilon", 4, "The number of milliseconds to add as buffer for OpenAfter times.") 34 | 35 | func main() { 36 | flag.Parse() 37 | 38 | runtime.GOMAXPROCS(*procs) 39 | 40 | if *cpuprofile != "" { 41 | f, err := os.Create(*cpuprofile) 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | pprof.StartCPUProfile(f) 46 | 47 | interrupt := make(chan os.Signal, 1) 48 | signal.Notify(interrupt) 49 | go catchKill(interrupt) 50 | } 51 | 52 | log.Printf("Server starting on port %d\n", *portnum) 53 | 54 | replicaId, nodeList := registerWithMaster(fmt.Sprintf("%s:%d", *masterAddr, *masterPort)) 55 | 56 | if *doEpaxos { 57 | log.Println("Starting Egalitarian Paxos replica...") 58 | rep := epaxos.NewReplica(replicaId, nodeList, *thrifty, *beacon, 59 | *durable, *batch, *infiniteFix, epaxos.ClockSyncType(*clockSyncType), 60 | int64(*clockSyncEpsilon*1e6) /* ms to ns */) 61 | rpc.Register(rep) 62 | } else { 63 | log.Println("Starting classic Paxos replica...") 64 | rep := paxos.NewReplica(replicaId, nodeList, *thrifty, *durable, *batch) 65 | rpc.Register(rep) 66 | } 67 | 68 | rpc.HandleHTTP() 69 | //listen for RPC on a different port (8070 by default) 70 | l, err := net.Listen("tcp", fmt.Sprintf(":%d", *portnum+1000)) 71 | if err != nil { 72 | log.Fatal("listen error:", err) 73 | } 74 | 75 | http.Serve(l, nil) 76 | } 77 | 78 | func registerWithMaster(masterAddr string) (int, []string) { 79 | args := &masterproto.RegisterArgs{*myAddr, *portnum} 80 | var reply masterproto.RegisterReply 81 | 82 | for done := false; !done; { 83 | mcli, err := rpc.DialHTTP("tcp", masterAddr) 84 | if err == nil { 85 | err = mcli.Call("Master.Register", args, &reply) 86 | if err == nil && reply.Ready == true { 87 | done = true 88 | break 89 | } 90 | } 91 | time.Sleep(1e9) 92 | } 93 | 94 | return reply.ReplicaId, reply.NodeList 95 | } 96 | 97 | func catchKill(interrupt chan os.Signal) { 98 | <-interrupt 99 | if *cpuprofile != "" { 100 | pprof.StopCPUProfile() 101 | } 102 | fmt.Println("Caught signal") 103 | os.Exit(0) 104 | } 105 | -------------------------------------------------------------------------------- /src/state/state.go: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | type Operation uint8 4 | 5 | const ( 6 | NONE Operation = iota 7 | PUT 8 | GET 9 | PUT_BLIND // Result not needed immediately 10 | ) 11 | 12 | type Value int64 13 | 14 | const NIL Value = 0 15 | 16 | type Key int64 17 | 18 | type Command struct { 19 | Op Operation 20 | K Key 21 | V Value 22 | } 23 | 24 | type State struct { 25 | Store map[Key]Value 26 | } 27 | 28 | func InitState() *State { 29 | return &State{make(map[Key]Value)} 30 | } 31 | 32 | func Conflict(gamma *Command, delta *Command) bool { 33 | if gamma.K == delta.K { 34 | if gamma.Op == PUT || delta.Op == PUT { 35 | return true 36 | } 37 | } 38 | return false 39 | } 40 | 41 | func ConflictBatch(batch1 []Command, batch2 []Command) bool { 42 | for i := 0; i < len(batch1); i++ { 43 | for j := 0; j < len(batch2); j++ { 44 | if Conflict(&batch1[i], &batch2[j]) { 45 | return true 46 | } 47 | } 48 | } 49 | return false 50 | } 51 | 52 | func IsRead(command *Command) bool { 53 | return command.Op == GET 54 | } 55 | 56 | func (c *Command) Execute(st *State) Value { 57 | switch c.Op { 58 | case PUT, PUT_BLIND: 59 | st.Store[c.K] = c.V 60 | return c.V 61 | 62 | case GET: 63 | if val, present := st.Store[c.K]; present { 64 | return val 65 | } 66 | } 67 | 68 | return NIL 69 | } 70 | 71 | func AllReads(cmds []Command) bool { 72 | for i := range cmds { 73 | if cmds[i].Op != GET { 74 | return false 75 | } 76 | } 77 | return true 78 | } 79 | 80 | func AllWrites(cmds []Command) bool { 81 | for i := range cmds { 82 | if cmds[i].Op != PUT { 83 | return false 84 | } 85 | } 86 | return true 87 | } 88 | 89 | func AllBlindWrites(cmds []Command) bool { 90 | for i := range cmds { 91 | if cmds[i].Op != PUT_BLIND { 92 | return false 93 | } 94 | } 95 | return true 96 | } 97 | -------------------------------------------------------------------------------- /src/state/state.go.1k: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | //import "fmt" 4 | 5 | type Operation uint8 6 | 7 | const ( 8 | NONE Operation = iota 9 | PUT 10 | DELETE 11 | RLOCK 12 | WLOCK 13 | ) 14 | 15 | type Value [128]int64 16 | 17 | type Key int64 18 | 19 | type Command struct { 20 | Op Operation 21 | K Key 22 | V Value 23 | } 24 | 25 | type State struct { 26 | Store map[Key]Value 27 | } 28 | 29 | func Conflict(gamma *Command, delta *Command) bool { 30 | if gamma.K == delta.K { 31 | if gamma.Op == PUT || delta.Op == PUT { 32 | return true 33 | } 34 | } 35 | return false 36 | } 37 | 38 | func (c *Command) Execute(st *State) { 39 | //fmt.Printf("Executing (%d, %d)\n", c.K, c.V) 40 | switch (c.Op) { 41 | case PUT: 42 | st.Store[c.K] = c.V 43 | break 44 | } 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/state/statemarsh.go: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | ) 7 | 8 | func (t *Command) Marshal(w io.Writer) { 9 | var b [8]byte 10 | bs := b[:8] 11 | bs = b[:1] 12 | b[0] = byte(t.Op) 13 | w.Write(bs) 14 | bs = b[:8] 15 | binary.LittleEndian.PutUint64(bs, uint64(t.K)) 16 | w.Write(bs) 17 | binary.LittleEndian.PutUint64(bs, uint64(t.V)) 18 | w.Write(bs) 19 | } 20 | 21 | func (t *Command) Unmarshal(r io.Reader) error { 22 | var b [8]byte 23 | bs := b[:8] 24 | bs = b[:1] 25 | if _, err := io.ReadFull(r, bs); err != nil { 26 | return err 27 | } 28 | t.Op = Operation(b[0]) 29 | bs = b[:8] 30 | if _, err := io.ReadFull(r, bs); err != nil { 31 | return err 32 | } 33 | t.K = Key(binary.LittleEndian.Uint64(bs)) 34 | if _, err := io.ReadFull(r, bs); err != nil { 35 | return err 36 | } 37 | t.V = Value(binary.LittleEndian.Uint64(bs)) 38 | return nil 39 | } 40 | 41 | func (t *Key) Marshal(w io.Writer) { 42 | var b [8]byte 43 | bs := b[:8] 44 | binary.LittleEndian.PutUint64(bs, uint64(*t)) 45 | w.Write(bs) 46 | } 47 | 48 | func (t *Value) Marshal(w io.Writer) { 49 | var b [8]byte 50 | bs := b[:8] 51 | binary.LittleEndian.PutUint64(bs, uint64(*t)) 52 | w.Write(bs) 53 | } 54 | 55 | func (t *Key) Unmarshal(r io.Reader) error { 56 | var b [8]byte 57 | bs := b[:8] 58 | if _, err := io.ReadFull(r, bs); err != nil { 59 | return err 60 | } 61 | *t = Key(binary.LittleEndian.Uint64(bs)) 62 | return nil 63 | } 64 | 65 | func (t *Value) Unmarshal(r io.Reader) error { 66 | var b [8]byte 67 | bs := b[:8] 68 | if _, err := io.ReadFull(r, bs); err != nil { 69 | return err 70 | } 71 | *t = Value(binary.LittleEndian.Uint64(bs)) 72 | return nil 73 | } 74 | -------------------------------------------------------------------------------- /src/state/statemarsh.go.1k: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | import ( 4 | "io" 5 | "encoding/binary" 6 | ) 7 | 8 | func (t *Command) Marshal(w io.Writer) { 9 | var b [1033]byte 10 | bs := b[:] 11 | b[0] = byte(t.Op) 12 | binary.LittleEndian.PutUint64(bs[1:9], uint64(t.K)) 13 | for i := 0; i < 128; i++ { 14 | s := 9 + i * 8 15 | e := s + 8 16 | binary.LittleEndian.PutUint64(bs[s:e], uint64(t.V[i])) 17 | } 18 | w.Write(bs) 19 | } 20 | 21 | func (t *Command) Unmarshal(r io.Reader) error { 22 | var b [1033]byte 23 | bs := b[:] 24 | if _, err := io.ReadFull(r, bs); err != nil { 25 | return err 26 | } 27 | t.Op = Operation(b[0]) 28 | t.K = Key(binary.LittleEndian.Uint64(bs[1:9])) 29 | for i := 0; i < 128; i++ { 30 | s := 9 + i * 8 31 | e := s + 8 32 | t.V[i] = int64(binary.LittleEndian.Uint64(bs[s:e])) 33 | } 34 | return nil 35 | } 36 | 37 | func (t *Key) Marshal(w io.Writer) { 38 | var b [8]byte 39 | bs := b[:] 40 | binary.LittleEndian.PutUint64(bs, uint64(*t)) 41 | w.Write(bs) 42 | } 43 | 44 | func (t *Value) Marshal(w io.Writer) { 45 | var b [1024]byte 46 | bs := b[:] 47 | for i := 0; i < 128; i++ { 48 | s := i * 8 49 | e := s + 8 50 | binary.LittleEndian.PutUint64(bs[s:e], uint64((*t)[i])) 51 | } 52 | w.Write(bs) 53 | } 54 | 55 | 56 | func (t *Key) Unmarshal(r io.Reader) error { 57 | var b [8]byte 58 | bs := b[:] 59 | if _, err := io.ReadFull(r, bs); err != nil { 60 | return err 61 | } 62 | *t = Key(binary.LittleEndian.Uint64(bs)) 63 | return nil 64 | } 65 | 66 | 67 | func (t *Value) Unmarshal(r io.Reader) error { 68 | var b [1024]byte 69 | bs := b[:] 70 | if _, err := io.ReadFull(r, bs); err != nil { 71 | return err 72 | } 73 | for i := 0; i < 128; i++ { 74 | s := i * 8 75 | e := s + 8 76 | (*t)[i] = int64(binary.LittleEndian.Uint64(bs[s:e])) 77 | } 78 | return nil 79 | } 80 | -------------------------------------------------------------------------------- /src/timetrace/timetrace.go: -------------------------------------------------------------------------------- 1 | package timetrace 2 | 3 | import ( 4 | "cycles" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strings" 9 | "sync" 10 | ) 11 | 12 | /** 13 | * This class implements a circular buffer of entries, each of which 14 | * consists of a fine-grain timestamp, a short descriptive string, and 15 | * a few additional values. It's typically used to record times at 16 | * various points in an operation, in order to find performance bottlenecks. 17 | * It can record a trace relatively efficiently (< 10ns as of 6/2016), 18 | * and then either return the trace either as a string or print it to 19 | * the system log. 20 | * 21 | * This class is thread-safe. By default, trace information is recorded 22 | * separately for each thread in order to avoid synchronization and cache 23 | * consistency overheads; the thread-local traces are merged by methods 24 | * such as printToLog, so the existence of multiple trace buffers is 25 | * normally invisible. 26 | * 27 | * The TimeTrace class should never be constructed; it offers only 28 | * static methods. 29 | * 30 | * If you want to use a single trace buffer rather than per-thread 31 | * buffers, see the subclass TimeTrace::Buffer below. 32 | */ 33 | type TimeTrace struct { 34 | sync.Mutex 35 | 36 | // Holds pointers to all of the thread-private TimeTrace objects created 37 | // so far. Entries never get deleted from this object. 38 | threadBuffers []*Buffer 39 | 40 | frozen bool 41 | } 42 | 43 | type Context struct { 44 | // Points to a private per-thread TimeTrace::Buffer object; NULL means 45 | // no such object has been created yet for the current thread. 46 | ThreadBuffer *Buffer 47 | } 48 | 49 | var globalTrace *TimeTrace 50 | 51 | func Init() { 52 | globalTrace = &TimeTrace{} 53 | globalTrace.threadBuffers = []*Buffer{} 54 | cycles.Init() 55 | } 56 | 57 | /** 58 | * This structure holds one entry in a TimeTrace::Buffer. 59 | */ 60 | type Event struct { 61 | // Time when a particular event occurred. 62 | timestamp uint64 63 | 64 | // Format string describing the event. 65 | // nil means that this entry is unused. 66 | format *string 67 | 68 | // Number of valid args in the args array 69 | nargs int32 70 | 71 | // Arguments that may be referenced by format 72 | // when printing out this event. 73 | args [10]int64 74 | } 75 | 76 | func NewContext() *Context { 77 | return globalTrace.NewContext() 78 | } 79 | 80 | /** 81 | * Return a thread-local buffer that can be used to record events from the 82 | * calling thread. 83 | */ 84 | func (t *TimeTrace) NewContext() *Context { 85 | b := NewBuffer() 86 | 87 | t.Lock() 88 | t.threadBuffers = append(t.threadBuffers, b) 89 | t.Unlock() 90 | c := Context{b} 91 | return &c 92 | } 93 | 94 | func Record(context *Context, format *string, nargs int32, arg1 int64, arg2 int64, 95 | arg3 int64, arg4 int64) { 96 | globalTrace.Record(context, format, nargs, arg1, arg2, arg3, arg4) 97 | } 98 | 99 | func Record0(context *Context, format *string) { 100 | globalTrace.Record(context, format, 0, 0, 0, 0, 0) 101 | } 102 | 103 | func Record1(context *Context, format *string, arg1 int64) { 104 | globalTrace.Record(context, format, 1, arg1, 0, 0, 0) 105 | } 106 | 107 | func Record2(context *Context, format *string, arg1 int64, arg2 int64) { 108 | globalTrace.Record(context, format, 2, arg1, arg2, 0, 0) 109 | } 110 | 111 | func Record3(context *Context, format *string, arg1 int64, arg2 int64, 112 | arg3 int64) { 113 | globalTrace.Record(context, format, 3, arg1, arg2, arg3, 0) 114 | } 115 | 116 | func Record4(context *Context, format *string, arg1 int64, arg2 int64, 117 | arg3 int64, arg4 int64) { 118 | globalTrace.Record(context, format, 4, arg1, arg2, arg3, arg4) 119 | } 120 | 121 | /** 122 | * Record an event in a thread-local buffer, creating a new buffer 123 | * if this is the first record for this thread. 124 | * 125 | * \param timestamp 126 | * Identifies the time at which the event occurred. 127 | * \param format 128 | * A format string for snprintf that will be used, along with 129 | * arg0..arg3, to generate a human-readable message describing what 130 | * happened, when the time trace is printed. The message is generated 131 | * by calling snprintf as follows: 132 | * snprintf(buffer, size, format, arg0, arg1, arg2, arg3) 133 | * where format and arg0..arg3 are the corresponding arguments to this 134 | * method. This pointer is stored in the time trace, so the caller must 135 | * ensure that its contents will not change over its lifetime in the 136 | * trace. 137 | * \param args 138 | * Arguments to use when printing a message about this event. 139 | */ 140 | func (t *TimeTrace) Record(context *Context, format *string, nargs int32, arg1 int64, arg2 int64, 141 | arg3 int64, arg4 int64) { 142 | if !t.frozen { 143 | t.record(cycles.Rdtsc(), context, format, nargs, arg1, arg2, arg3, arg4) 144 | } 145 | } 146 | 147 | func (t *TimeTrace) record(timestamp uint64, context *Context, format *string, nargs int32, arg1 int64, arg2 int64, 148 | arg3 int64, arg4 int64) { 149 | if !t.frozen { 150 | context.ThreadBuffer.record(timestamp, format, nargs, arg1, arg2, arg3, arg4, 0, 0, 0, 0, 0) 151 | } 152 | } 153 | 154 | func GetTrace() string { 155 | return globalTrace.GetTrace() 156 | } 157 | 158 | /** 159 | * Return a string containing all of the trace records from all of the 160 | * thread-local buffers. 161 | * NOT safe with logging timetraces 162 | */ 163 | func (t *TimeTrace) GetTrace() string { 164 | var s string 165 | printInternal(t.threadBuffers, &s, nil) 166 | return s 167 | } 168 | 169 | func (t *TimeTrace) LogTrace(f *os.File) { 170 | printInternal(t.threadBuffers, nil, f) 171 | } 172 | 173 | /** 174 | * Discards all records in all of the thread-local buffers. Intended 175 | * primarily for unit testing. 176 | */ 177 | func (t *TimeTrace) Reset() { 178 | t.Lock() 179 | for _, b := range t.threadBuffers { 180 | b.Reset() 181 | } 182 | t.Unlock() 183 | } 184 | 185 | func Reset() { 186 | globalTrace.Reset() 187 | } 188 | 189 | const ( 190 | // Determines the number of events we can retain as an exponent of 2 191 | BUFFER_SIZE_EXP = 14 192 | 193 | // Total number of events that we can retain any given time. 194 | BUFFER_SIZE = 1 << BUFFER_SIZE_EXP 195 | 196 | // Bit mask used to implement a circular event buffer 197 | BUFFER_MASK = BUFFER_SIZE - 1 198 | ) 199 | 200 | /** 201 | * Represents a sequence of events, typically consisting of all those 202 | * generated by one thread. Has a fixed capacity, so slots are re-used 203 | * on a circular basis. This class is not thread-safe. 204 | */ 205 | type Buffer struct { 206 | // Index within events of the slot to use for the next call to the 207 | // record method. 208 | nextIndex int 209 | 210 | // Holds information from the most recent calls to the record method. 211 | events [BUFFER_SIZE]Event 212 | } 213 | 214 | /** 215 | * Construct a TimeTrace::Buffer. 216 | */ 217 | func NewBuffer() *Buffer { 218 | var b Buffer 219 | b.nextIndex = 0 220 | 221 | // Mark all of the events invalid. 222 | for i := 0; i < BUFFER_SIZE; i++ { 223 | b.events[i].format = nil 224 | } 225 | 226 | return &b 227 | } 228 | 229 | /** 230 | * Return a string containing a printout of the records in the buffer. 231 | */ 232 | func (b *Buffer) GetTrace() string { 233 | buffers := []*Buffer{b} 234 | var s string 235 | printInternal(buffers, &s, nil) 236 | return s 237 | } 238 | 239 | func (b *Buffer) Record0(format *string) { 240 | b.Record(format, 0, 0, 0, 0, 0) 241 | } 242 | 243 | func (b *Buffer) Record1(format *string, arg1 int64) { 244 | b.Record(format, 1, arg1, 0, 0, 0) 245 | } 246 | 247 | func (b *Buffer) Record2(format *string, arg1 int64, arg2 int64) { 248 | b.Record(format, 2, arg1, arg2, 0, 0) 249 | } 250 | 251 | func (b *Buffer) Record3(format *string, arg1 int64, arg2 int64, 252 | arg3 int64) { 253 | b.Record(format, 3, arg1, arg2, arg3, 0) 254 | } 255 | 256 | func (b *Buffer) Record9(format *string, arg1 int64, arg2 int64, 257 | arg3 int64, arg4 int64, arg5 int64, arg6 int64, arg7 int64, arg8 int64, arg9 int64) { 258 | b.record(cycles.Rdtsc(), format, 9, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9) 259 | } 260 | 261 | func (b *Buffer) Record4(format *string, arg1 int64, arg2 int64, 262 | arg3 int64, arg4 int64) { 263 | b.Record(format, 4, arg1, arg2, arg3, arg4) 264 | } 265 | 266 | /** 267 | * Record an event in the buffer. 268 | * 269 | * \param format 270 | * A format string for snprintf that will be used, along with 271 | * arg0..arg3, to generate a human-readable message describing what 272 | * happened, when the time trace is printed. The message is generated 273 | * by calling snprintf as follows: 274 | * snprintf(buffer, size, format, arg0, arg1, arg2, arg3) 275 | * where format and arg0..arg3 are the corresponding arguments to this 276 | * method. This pointer is stored in the buffer, so the caller must 277 | * ensure that its contents will not change over its lifetime in the 278 | * trace. 279 | * \param args 280 | * Arguments to use when printing a message about this event. 281 | */ 282 | func (b *Buffer) record(timestamp uint64, format *string, nargs int32, arg1 int64, arg2 int64, 283 | arg3 int64, arg4 int64, arg5 int64, arg6 int64, arg7 int64, arg8 int64, arg9 int64) { 284 | event := &b.events[b.nextIndex] 285 | event.timestamp = timestamp 286 | event.format = format 287 | event.nargs = nargs 288 | event.args[0] = arg1 289 | event.args[1] = arg2 290 | event.args[2] = arg3 291 | event.args[3] = arg4 292 | event.args[4] = arg5 293 | event.args[5] = arg6 294 | event.args[6] = arg7 295 | event.args[7] = arg8 296 | event.args[8] = arg9 297 | b.nextIndex = (b.nextIndex + 1) & BUFFER_MASK 298 | } 299 | 300 | func (b *Buffer) Record(format *string, nargs int32, arg1 int64, arg2 int64, 301 | arg3 int64, arg4 int64) { 302 | b.record(cycles.Rdtsc(), format, nargs, arg1, arg2, arg3, arg4, 0, 0, 0, 0, 0) 303 | } 304 | 305 | /** 306 | * Discard any existing trace records. 307 | */ 308 | func (b *Buffer) Reset() { 309 | for i := 0; i < BUFFER_SIZE; i++ { 310 | b.events[i].format = nil 311 | } 312 | b.nextIndex = 0 313 | } 314 | 315 | func (b *Buffer) DumpTrace(fname string) { 316 | f, err := os.Create(fname) 317 | if err != nil { 318 | log.Println("Couldn't log time trace in the background:", err) 319 | return 320 | } 321 | 322 | printInternal([]*Buffer{b}, nil, f) 323 | } 324 | 325 | /** 326 | * This private method does most of the work for both printToLog and 327 | * getTrace. 328 | * 329 | * \param buffers 330 | * Contains one or more TimeTrace::Buffers, whose contents will be merged 331 | * in the resulting output. Note: some of the buffers may extend 332 | * farther back in time than others. The output will cover only the 333 | * time period covered by *all* of the traces, ignoring older entries 334 | * from some traces. 335 | * \param s 336 | * If non-NULL, refers to a string that will hold a printout of the 337 | * time trace. If NULL, the trace will be printed on the system log. 338 | */ 339 | func printInternal(buffers []*Buffer, s *string, f *os.File) { 340 | printedAnything := false 341 | 342 | // Holds the index of the next event to consider from each trace. 343 | current := make([]int, len(buffers)) 344 | 345 | // Find the first (oldest) event in each trace. This will be events[0] 346 | // if we never completely filled the buffer, otherwise events[nextIndex+1]. 347 | // This means we don't print the entry at nextIndex; this is convenient 348 | // because it simplifies boundary conditions in the code below. 349 | for i, buffer := range buffers { 350 | index := (buffer.nextIndex + 1) % BUFFER_SIZE 351 | if buffer.events[index].format != nil { 352 | current[i] = index 353 | } else { 354 | current[i] = 0 355 | } 356 | } 357 | 358 | // Decide on the time of the first event to be included in the output. 359 | // This is most recent of the oldest times in all the traces (an empty 360 | // trace has an "oldest time" of 0). The idea here is to make sure 361 | // that there's no missing data in what we print (if trace A goes back 362 | // farther than trace B, skip the older events in trace A, since there 363 | // might have been related events that were once in trace B but have since 364 | // been overwritten). 365 | var startTime uint64 = 0 366 | for i := 0; i < len(buffers); i++ { 367 | event := buffers[i].events[current[i]] 368 | if event.format != nil && event.timestamp > startTime { 369 | startTime = event.timestamp 370 | } 371 | } 372 | log.Printf("Starting TSC %d, cyclesPerSec %.0f\n", startTime, 373 | cycles.PerSecond()) 374 | 375 | // Skip all events before the starting time. 376 | for i := 0; i < len(buffers); i++ { 377 | buffer := buffers[i] 378 | for buffer.events[current[i]].format != nil && 379 | buffer.events[current[i]].timestamp < startTime && 380 | current[i] != buffer.nextIndex { 381 | current[i] = (current[i] + 1) % BUFFER_SIZE 382 | } 383 | } 384 | 385 | // Each iteration through this loop processes one event (the one with 386 | // the earliest timestamp). 387 | prevTime := 0.0 388 | 389 | var stringBuilder strings.Builder 390 | for { 391 | var buffer *Buffer 392 | var event *Event 393 | 394 | // Check all the traces to find the earliest available event. 395 | currentBuffer := -1 396 | var earliestTime uint64 = ^uint64(0) 397 | for i, buffer := range buffers { 398 | event := buffer.events[current[i]] 399 | if current[i] != buffer.nextIndex && event.format != nil && event.timestamp < earliestTime { 400 | currentBuffer = i 401 | earliestTime = event.timestamp 402 | } 403 | } 404 | if currentBuffer < 0 { 405 | // None of the traces have any more events to process. 406 | break 407 | } 408 | printedAnything = true 409 | buffer = buffers[currentBuffer] 410 | event = &buffer.events[current[currentBuffer]] 411 | current[currentBuffer] = (current[currentBuffer] + 1) % BUFFER_SIZE 412 | 413 | ns := cycles.ToNanoseconds(event.timestamp - startTime) 414 | 415 | args := make([]interface{}, event.nargs) 416 | for i := int32(0); i < event.nargs; i++ { 417 | args[i] = event.args[i] 418 | } 419 | 420 | stringBuilder.WriteString(fmt.Sprintf("T%d %8.1f ns (+%6.1f ns): ", 421 | currentBuffer+1, ns, ns-prevTime)) 422 | stringBuilder.WriteString(fmt.Sprintf(*event.format, args...)) 423 | stringBuilder.WriteString("\n") 424 | 425 | prevTime = ns 426 | } 427 | 428 | if !printedAnything { 429 | stringBuilder.WriteString("No time trace events to print\n") 430 | } 431 | 432 | if s != nil { 433 | *s = stringBuilder.String() 434 | } else if f != nil { 435 | f.WriteString(stringBuilder.String()) 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /src/zipcalc/zipcalc.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "zipfian" 6 | ) 7 | 8 | func main() { 9 | for _, n := range []uint64{2, 1e9, 1e5, 1e4, 1e10} { 10 | for _, theta := range []float64{0.99, 0.95, 0.90, 0.85, 0.80, 0.75, 0.70, 0.65, 0.60, 0.55, 0.50} { 11 | fmt.Printf("N: %d, Theta: %f, Zeta: %.55f\n", n, theta, zipfian.Zeta(n, theta)) 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/zipfian/zipfian.go: -------------------------------------------------------------------------------- 1 | package zipfian 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | /** 10 | * Used to generate zipfian distributed random numbers where the distribution is 11 | * skewed toward the lower integers; e.g. 0 will be the most popular, 1 the next 12 | * most popular, etc. 13 | * 14 | * This class implements the core algorithm from YCSB's ZipfianGenerator; it, in 15 | * turn, uses the algorithm from "Quickly Generating Billion-Record Synthetic 16 | * Databases", Jim Gray et al, SIGMOD 1994. 17 | */ 18 | type ZipfianGenerator struct { 19 | n float64 // Range of numbers to be generated. 20 | theta float64 // Parameter of the zipfian distribution. 21 | alpha float64 // Special intermediate result used for generation. 22 | zetan float64 // Special intermediate result used for generation. 23 | eta float64 // Special intermediate result used for generation. 24 | r *rand.Rand 25 | } 26 | 27 | /** 28 | * Construct a generator. This may be expensive if n is large. 29 | * 30 | * \param n 31 | * The generator will output random numbers between 0 and n-1. 32 | * \param theta 33 | * The zipfian parameter where 0 < theta < 1 defines the skew; the 34 | * smaller the value the more skewed the distribution will be. Default 35 | * value of 0.99 comes from the YCSB default value. 36 | */ 37 | func NewZipfianGenerator(n uint64, theta float64) ZipfianGenerator { 38 | zetan := Zeta(n, theta) 39 | return ZipfianGenerator{ 40 | n: float64(n), 41 | theta: theta, 42 | alpha: (1.0 / (1.0 - theta)), 43 | zetan: zetan, 44 | eta: (1.0 - math.Pow(2.0/float64(n), 1.0-theta)) / 45 | (1.0 - Zeta(2.0, theta)/zetan), 46 | r: rand.New(rand.NewSource(time.Now().UnixNano())), 47 | } 48 | } 49 | 50 | /** 51 | * Return the zipfian distributed random number between 0 and n-1. 52 | * Partially inspired by https://github.com/cockroachdb/cockroach/blob/2eebbddbb133eea7102a47fbe7f5d13ec6f8f670/pkg/workload/ycsb/zipfgenerator.go 53 | */ 54 | func (z ZipfianGenerator) NextNumber() uint64 { 55 | u := z.r.Float64() 56 | uz := u * z.zetan 57 | if uz < 1 { 58 | return 0 59 | } 60 | if uz < 1+math.Pow(0.5, z.theta) { 61 | return 1 62 | } 63 | return 0 + uint64(float64(z.n)*math.Pow(z.eta*u-z.eta+1.0, z.alpha)) 64 | } 65 | 66 | /** 67 | * Returns the nth harmonic number with parameter theta; e.g. H_{n,theta}. 68 | */ 69 | func Zeta(n uint64, theta float64) float64 { 70 | // Some of these take quite a while to compute, so return 71 | // common results immediately 72 | if theta == 0.99 { 73 | if n == 1e9 { 74 | return 23.60336399999999912324710749089717864990234375 75 | } 76 | } 77 | if theta == 0.95 { 78 | if n == 1e9 { 79 | return 36.94122142977597178514770348556339740753173828125 80 | } 81 | } 82 | if theta == 0.90 { 83 | if n == 1e9 { 84 | return 70.0027094570042294208178645931184291839599609375 85 | } 86 | } 87 | if theta == 0.85 { 88 | if n == 1e9 { 89 | return 143.14759472538497675486723892390727996826171875 90 | } 91 | } 92 | if theta == 0.80 { 93 | if n == 1e9 { 94 | return 311.04113385576732753179385326802730560302734375 95 | } 96 | } 97 | if theta == 0.75 { 98 | if n == 1e9 { 99 | return 707.87047871782715446897782385349273681640625 100 | } 101 | } 102 | if theta == 0.70 { 103 | if n == 1e9 { 104 | return 1667.845723895596393049345351755619049072265625 105 | } 106 | } 107 | if theta == 0.65 { 108 | if n == 1e9 { 109 | return 4033.51556666213946300558745861053466796875 110 | } 111 | } 112 | if theta == 0.60 { 113 | if n == 1e9 { 114 | return 9950.726604378511183313094079494476318359375 115 | } 116 | } 117 | if theta == 0.55 { 118 | if n == 1e9 { 119 | return 24932.0647149890646687708795070648193359375 120 | } 121 | } 122 | if theta == 0.50 { 123 | if n == 1e9 { 124 | return 63244.092864672114956192672252655029296875 125 | } 126 | } 127 | 128 | var sum float64 = 0 129 | var i float64 130 | for i = 0; i < float64(n); i++ { 131 | sum = sum + 1.0/(math.Pow(i+1.0, theta)) 132 | } 133 | 134 | return sum 135 | } 136 | --------------------------------------------------------------------------------