├── .gitignore
├── Buchman_Ethan_201606_MAsc.pdf
├── Thesis_Defense.pdf
├── bib
    ├── applied.bib
    ├── consensus.bib
    ├── crypto.bib
    ├── formal.bib
    └── programming.bib
├── build.sh
├── chapters
    ├── abstract.tex
    ├── appendix.tex
    ├── apps.tex
    ├── background.tex
    ├── clients.tex
    ├── conclusion.tex
    ├── economics.tex
    ├── frontmatter.tex
    ├── governance.tex
    ├── implementation.tex
    ├── introduction.tex
    ├── performance.tex
    ├── related.tex
    ├── subprotocols.tex
    ├── tendermint.tex
    ├── theory.tex
    └── title.tex
├── figures
    ├── descriptions
    │   ├── block_header.tex
    │   ├── consensus_rules.tex
    │   ├── data_structures.tex
    │   ├── safety_guarantees.tex
    │   ├── security_guarantees.tex
    │   ├── tendermint-pi1.tex
    │   └── tendermint-pi2.tex
    ├── diagrams
    │   ├── abci.png
    │   ├── byzantine.pdf
    │   ├── byzantine.png
    │   ├── consensus_logic.pdf
    │   ├── consensus_logic.png
    │   ├── state_machine.pdf
    │   ├── state_machine.png
    │   ├── tmsp.pdf
    │   └── tmsp.png
    └── throughput
    │   ├── byz_tables.tex
    │   ├── crash_tables.tex
    │   ├── delay_tables.tex
    │   ├── large_instances
    │       ├── latency-throughput.pdf
    │       ├── latency-throughput.png
    │       ├── throughput-blocksize.pdf
    │       └── throughput-blocksize.png
    │   ├── latency-throughput.pdf
    │   ├── latency-throughput.png
    │   ├── single_datacenter
    │       ├── latency-throughput.pdf
    │       ├── latency-throughput.png
    │       ├── throughput-blocksize.pdf
    │       └── throughput-blocksize.png
    │   ├── throughput-blocksize.pdf
    │   └── throughput-blocksize.png
├── listings-golang.sty
├── main.tex
└── tendermint-pi.tex


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.aux
 2 | *.log
 3 | *.out
 4 | *.pdf
 5 | auto/
 6 | chapters/auto/
 7 | *.bbl
 8 | *.blg
 9 | *.fdb_latexmk
10 | *.fls
11 | *.lof
12 | *.lot
13 | *.toc
14 | 


--------------------------------------------------------------------------------
/Buchman_Ethan_201606_MAsc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/Buchman_Ethan_201606_MAsc.pdf


--------------------------------------------------------------------------------
/Thesis_Defense.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/Thesis_Defense.pdf


--------------------------------------------------------------------------------
/bib/applied.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{rao2011using,
  3 |   title={Using Paxos to build a scalable, consistent, and highly available datastore},
  4 |   author={Rao, Jun and Shekita, Eugene J and Tata, Sandeep},
  5 |   journal={Proceedings of the VLDB Endowment},
  6 |   volume={4},
  7 |   number={4},
  8 |   pages={243--254},
  9 |   year={2011},
 10 |   publisher={VLDB Endowment}
 11 | }
 12 | 
 13 | @incollection{lampson1996paxos,
 14 |   title={How to build a highly available system using consensus},
 15 |   author={Lampson, Butler W},
 16 |   booktitle={Distributed Algorithms},
 17 |   pages={1--17},
 18 |   year={1996},
 19 |   publisher={Springer},
 20 |   annote={practical guide to using paxos}
 21 | }
 22 | @inproceedings{dynamo,
 23 |   title={Dynamo: amazon's highly available key-value store},
 24 |   author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
 25 |   booktitle={ACM SIGOPS Operating Systems Review},
 26 |   volume={41},
 27 |   number={6},
 28 |   pages={205--220},
 29 |   year={2007},
 30 |   organization={ACM}
 31 | }
 32 | 
 33 | @inproceedings{chubby,
 34 |   title={The Chubby lock service for loosely-coupled distributed systems},
 35 |   author={Burrows, Mike},
 36 |   booktitle={Proceedings of the 7th symposium on Operating systems design and implementation},
 37 |   pages={335--350},
 38 |   year={2006},
 39 |   organization={USENIX Association}
 40 | }
 41 | 
 42 | @inproceedings{zookeeper,
 43 |   title={ZooKeeper: Wait-free Coordination for Internet-scale Systems.},
 44 |   author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio Paiva and Reed, Benjamin},
 45 |   booktitle={USENIX Annual Technical Conference},
 46 |   volume={8},
 47 |   pages={9},
 48 |   year={2010}
 49 | }
 50 | 
 51 | 
 52 | @article{sift,
 53 |   title={SIFT: Design and analysis of a fault-tolerant computer for aircraft control},
 54 |   author={Wensley, John H and Lamport, Leslie and Goldberg, Jack and Green, Milton W and Levitt, Karl N and Melliar-Smith, Po Mo and Shostak, Robert E and Einstock, Charles B},
 55 |   journal={Proceedings of the IEEE},
 56 |   volume={66},
 57 |   number={10},
 58 |   pages={1240--1255},
 59 |   year={1978},
 60 |   publisher={IEEE}
 61 | }
 62 | 
 63 | 
 64 | @article{ftmp,
 65 |   title={FTMP—a highly reliable fault-tolerant multiprocess for aircraft},
 66 |   author={Hopkins Jr, Albert L and Smith III, T and Lala, Jaynarayan H},
 67 |   journal={Proceedings of the IEEE},
 68 |   volume={66},
 69 |   number={10},
 70 |   pages={1221--1239},
 71 |   year={1978},
 72 |   publisher={IEEE}
 73 | }
 74 | 
 75 | @inproceedings{miner2000analysis,
 76 |   title={Analysis of the SPIDER fault-tolerance protocols},
 77 |   author={Miner, PS},
 78 |   booktitle={Proceedings of the 5th NASA Langley Formal Methods Workshop},
 79 |   year={2000}
 80 | }
 81 | 
 82 | @inproceedings{miner2004unified,
 83 |   title={A unified fault-tolerance protocol},
 84 |   author={Miner, Paul and Geser, Alfons and Pike, Lee and Maddalon, Jeffrey},
 85 |   organization={Springer}
 86 | }
 87 | 
 88 | @article{hoyme1993safebus,
 89 |   title={SAFEbus (for avionics)},
 90 |   author={Hoyme, Kenneth and Driscoll, Kevin},
 91 |   journal={Aerospace and Electronic Systems Magazine, IEEE},
 92 |   volume={8},
 93 |   number={3},
 94 |   pages={34--39},
 95 |   year={1993},
 96 |   publisher={IEEE}
 97 | }
 98 | 
 99 | @misc{coreos_raft,
100 |   title = {ETCD Distributed Key-Value Store Source Code Repository},
101 |   howpublished = {https://github.com/coreos/etcd},
102 |   note = {Accessed: 2016-04-01}
103 | }
104 | 
105 | @misc{hashicorp_raft,
106 |   title = {Hashicorp's Implementation of Raft in Go},
107 |   howpublished = {https://github.com/hashicorp/raft},
108 |   note = {Accessed: 2016-04-01}
109 | }
110 | 
111 | @misc{influxdb,
112 |   title = {InfluxDB: Scalable datastore for metrics, events, and real-time analytics},
113 |   howpublished = {https://github.com/influxdata/influxdb},
114 |   note = {Accessed: 2016-04-01}
115 | }
116 | 
117 | @inproceedings{chandra2007paxos,
118 |   title={Paxos made live: an engineering perspective},
119 |   author={Chandra, Tushar D and Griesemer, Robert and Redstone, Joshua},
120 |   booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing},
121 |   pages={398--407},
122 |   year={2007},
123 |   organization={ACM},
124 |   annote={ experience from building Chubby }
125 | }
126 | 
127 | @article{posner2013quadratic,
128 |   title={Quadratic voting as efficient corporate governance},
129 |   author={Posner, Eric A and Weyl, E Glen},
130 |   journal={University of Chicago Law Review, Forthcoming},
131 |   year={2013}
132 | }
133 | 
134 | @article{zak2001trust,
135 |   title={Trust and growth},
136 |   author={Zak, Paul J and Knack, Stephen},
137 |   journal={The economic journal},
138 |   volume={111},
139 |   number={470},
140 |   pages={295--321},
141 |   year={2001},
142 |   publisher={Wiley Online Library}
143 | }
144 | 
145 | @article{corbett2013spanner,
146 |   title={Spanner: Google’s globally distributed database},
147 |   author={Corbett, James C and Dean, Jeffrey and Epstein, Michael and Fikes, Andrew and Frost, Christopher and Furman, Jeffrey John and Ghemawat, Sanjay and Gubarev, Andrey and Heiser, Christopher and Hochschild, Peter and others},
148 |   journal={ACM Transactions on Computer Systems (TOCS)},
149 |   volume={31},
150 |   number={3},
151 |   pages={8},
152 |   year={2013},
153 |   publisher={ACM}
154 | }
155 | 
156 | 
157 | 


--------------------------------------------------------------------------------
/bib/consensus.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{flp,
  3 |   title={Impossibility of distributed consensus with one faulty process},
  4 |   author={Fischer, Michael J and Lynch, Nancy A and Paterson, Michael S},
  5 |   journal={Journal of the ACM (JACM)},
  6 |   volume={32},
  7 |   number={2},
  8 |   pages={374--382},
  9 |   year={1985},
 10 |   publisher={ACM},
 11 |   annote={fully async deterministic consensus is impossible.
 12 | 	"no completely asynchronous consensus protocol can tolerate even a single unannounced process death" 
 13 |   }
 14 | }
 15 | 
 16 | @article{impossibility,
 17 |   title={Easy impossibility proofs for distributed consensus problems},
 18 |   author={Fischer, Michael J and Lynch, Nancy A and Merritt, Michael},
 19 |   journal={Distributed Computing},
 20 |   volume={1},
 21 |   number={1},
 22 |   pages={26--39},
 23 |   year={1986},
 24 |   publisher={Springer}
 25 | }
 26 | 
 27 | 
 28 | @article{defago2004total,
 29 |   title={Total order broadcast and multicast algorithms: Taxonomy and survey},
 30 |   author={D{\'e}fago, Xavier and Schiper, Andr{\'e} and Urb{\'a}n, P{\'e}ter},
 31 |   journal={ACM Computing Surveys (CSUR)},
 32 |   volume={36},
 33 |   number={4},
 34 |   pages={372--421},
 35 |   year={2004},
 36 |   publisher={ACM}
 37 | }
 38 | 
 39 | @inproceedings{free-choice,
 40 |   title={Another advantage of free choice (extended abstract): Completely asynchronous agreement protocols},
 41 |   author={Ben-Or, Michael},
 42 |   booktitle={Proceedings of the second annual ACM symposium on Principles of distributed computing},
 43 |   pages={27--30},
 44 |   year={1983},
 45 |   organization={ACM},
 46 |   annote={somehow this cites flp.
 47 |   Can solve flp with probability 1 using non-determinism.
 48 |   2t+1 for non-byzantine, 5t+1 for byzantine version.
 49 |   "dont know whether N > 5t is best possible bound " ... :p
 50 |   each node flips a coin locally and broadcasts result - if enough of them, its the common coin.
 51 |   number of rounds to reach consensus is exponential in async case
 52 |   }
 53 | }
 54 | 
 55 | @inproceedings{rabin1983randomized,
 56 | 	title={Randomized byzantine generals},
 57 | 	author={Rabin, Michael O},
 58 | 	booktitle={Foundations of Computer Science, 1983., 24th Annual Symposium on},
 59 | 	pages={403--409},
 60 | 	year={1983},
 61 | 	organization={IEEE},
 62 | 	annote={
 63 | 		use shamir secret sharing to distribute the coin.
 64 | 		requires trusted dealer for initial setup.
 65 | 		constant time. expected number of rounds is O(T(n)).
 66 | 	}
 67 | }
 68 | 
 69 | @article{chor1985simple,
 70 |   title={A simple and efficient randomized byzantine agreement algorithm},
 71 |   author={Chor, Benny and Coan, Brian A},
 72 |   journal={Software Engineering, IEEE Transactions on},
 73 |   number={6},
 74 |   pages={531--539},
 75 |   year={1985},
 76 |   publisher={IEEE},
 77 |   annote={synchronous only. small set of size g flip, terminates in 2t/g rounds with a common coin.
 78 | 	no crypto, less redundnacy requires than BenOr}
 79 | }
 80 | 
 81 | @article{paxos_simple,
 82 |   title={Paxos made simple},
 83 |   author={Lamport, Leslie and others},
 84 |   journal={ACM Sigact News},
 85 |   volume={32},
 86 |   number={4},
 87 |   pages={18--25},
 88 |   year={2001}
 89 | }
 90 | 
 91 | 
 92 | @incollection{draper_lab,
 93 |   title={The evolution of fault tolerant computing at the Charles Stark Draper Laboratory, 1955--85},
 94 |   author={Hopkins Jr, Albert L and Lala, Jaynarayan H and Smith III, T Basil},
 95 |   booktitle={The Evolution of fault-tolerant computing},
 96 |   pages={121--140},
 97 |   year={1987},
 98 |   publisher={Springer},
 99 |   annoted={early days for aviation}
100 | }
101 | 
102 | 
103 | 
104 | 
105 | 
106 | # a great history
107 | http://betathoughts.blogspot.ca/2007/06/brief-history-of-consensus-2pc-and.html
108 | 
109 | @article{clocks,
110 |   title={Time, clocks, and the ordering of events in a distributed system},
111 |   author={Lamport, Leslie},
112 |   journal={Communications of the ACM},
113 |   volume={21},
114 |   number={7},
115 |   pages={558--565},
116 |   year={1978},
117 |   publisher={ACM},
118 |   annote={first consensus paper
119 | 	relativity of concurrent processes
120 |   	ordering based on message passing.
121 | 	introduces distribtued state machine
122 |   }
123 | }
124 | 
125 | @article{pease1980reaching,
126 |   title={Reaching agreement in the presence of faults},
127 |   author={Pease, Marshall and Shostak, Robert and Lamport, Leslie},
128 |   journal={Journal of the ACM (JACM)},
129 |   volume={27},
130 |   number={2},
131 |   pages={228--234},
132 |   year={1980},
133 |   publisher={ACM},
134 |   annote={ first to show that the best we can do in byzantine is 3t+1 
135 | 	  statement of byzantine generals before it was called byzantine generals.
136 | 	an algo where t faults requires t+1 rounds (ie. 1 fault requires 2 rounds ...)
137 | 	howevver, if authentication is used, then we can tolerate an arbitrary number of byzantine faults t<=N given t+1 rounds
138 |   }
139 | }
140 | 
141 | @article{gettier,
142 |   title={On the logical unsolvability of the Gettier problem},
143 |   author={Floridi, Luciano},
144 |   journal={Synthese},
145 |   volume={142},
146 |   number={1},
147 |   pages={61--79},
148 |   year={2004},
149 |   publisher={Springer}
150 | }
151 | 
152 | 
153 | 
154 | @article{lamport1982byzantine,
155 |   title={The Byzantine generals problem},
156 |   author={Lamport, Leslie and Shostak, Robert and Pease, Marshall},
157 |   journal={ACM Transactions on Programming Languages and Systems (TOPLAS)},
158 |   volume={4},
159 |   number={3},
160 |   pages={382--401},
161 |   year={1982},
162 |   publisher={ACM
163 | 	  coins Byzantine and expands on pease1980reaching
164 |   
165 |   }
166 | }
167 | 
168 | @article{paxos,
169 |   title={The part-time parliament},
170 |   author={Lamport, Leslie},
171 |   journal={ACM Transactions on Computer Systems (TOCS)},
172 |   volume={16},
173 |   number={2},
174 |   pages={133--169},
175 |   year={1998},
176 |   publisher={ACM}
177 | }
178 | 
179 | 
180 | @article{dls,
181 |   title={Consensus in the presence of partial synchrony},
182 |   author={Dwork, Cynthia and Lynch, Nancy and Stockmeyer, Larry},
183 |   journal={Journal of the ACM (JACM)},
184 |   volume={35},
185 |   number={2},
186 |   pages={288--323},
187 |   year={1988},
188 |   publisher={ACM}
189 | }
190 | 
191 | @incollection{ppbft,
192 |   title={On the practicality of practical Byzantine fault tolerance},
193 |   author={Chondros, Nikos and Kokordelis, Konstantinos and Roussopoulos, Mema},
194 |   booktitle={Proceedings of ACM/IFIP/USENIX International Middleware Conference (MIDDLEWARE)},
195 |   pages={436--455},
196 |   year={2012},
197 |   publisher={Springer}
198 | }
199 | 
200 | 
201 | @inproceedings{pbft,
202 | 	  title={Practical Byzantine fault tolerance},
203 | 	  author={Castro, Miguel and Liskov, Barbara and others},
204 | 	  booktitle={Proceedings of the Third Symposium on Operating Systems Design and Implementation},
205 | 	  year={1999},
206 | 	  annote={
207 | 		  tolerates byzantine faults in asyncronous networks
208 | 		  public keys only used when there are faults
209 | 		  implemented NFS, performance on par UNIX
210 | 		  "We  do  assume  that  the  adversary cannot delay correct nodes indefinitely. "
211 | 		  "The algorithm does not rely on synchrony to provide safety.  Therefore, it must rely on synchrony to provide liveness; else violate FLP"
212 | 		  	- delay(t) does not grow faster than t indefintely
213 | 
214 | 		  Protocol:
215 | 		  	- "buffer requests and multicast as a group" as optimization -> ie. make blocks
216 | 			- 3 steps: pre-prepare, prepare, commit
217 | 			- "The pre-prepare and prepare phases are used to totally
218 | 			order  requests  sent  in  the  same  view  even  when  the
219 | 			primary,  which  proposes  the  ordering  of  requests,  is
220 | 			faulty. The prepare and commit phases are used to ensure
221 | 			that requests that commit are totally ordered across views"
222 | 			- request not included in the pre-prepare: " decouples the  protocol to  totally  order
223 | 			requests from the protocol to transmit the request to the
224 | 			replicas"
225 | 			- generate state proofs at checkpoints, say every 100 requests
226 | 			- garbage collect messages upto checkpoints
227 | 			- complicated view change semantics ...
228 | 			- "We also believe that it is possible to reduce the number of copies of the state to but the details remain to be worked out"
229 | 
230 | 	  }
231 | }
232 | 
233 | @inproceedings{yin2003separating,
234 |   title={Separating agreement from execution for byzantine fault tolerant services},
235 |   author={Yin, Jian and Martin, Jean-Philippe and Venkataramani, Arun and Alvisi, Lorenzo and Dahlin, Mike},
236 |   booktitle={ACM SIGOPS Operating Systems Review},
237 |   volume={37},
238 |   number={5},
239 |   pages={253--267},
240 |   year={2003},
241 |   organization={ACM},
242 |   annote={
243 | 	realized that we can have 3f+1 in agreement replicas and 2f+1 in executing replicas
244 |   }
245 | }
246 | 
247 | @incollection{mutex,
248 |   title={Solution of a problem in concurrent programming control},
249 |   author={Dijkstra, Edsger W},
250 |   booktitle={Pioneers and Their Contributions to Software Engineering},
251 |   pages={289--294},
252 |   year={2001},
253 |   publisher={Springer}
254 | }
255 | 
256 | 
257 | @inproceedings{gray1981transaction,
258 |   title={The transaction concept: Virtues and limitations},
259 |   author={Gray, Jim and others},
260 |   booktitle={VLDB},
261 |   volume={81},
262 |   pages={144--154},
263 |   year={1981}
264 | }
265 | 
266 | @book{gray1978notes,
267 |   title={Notes on data base operating systems},
268 |   author={Gray, James N},
269 |   year={1978},
270 |   publisher={Springer}
271 | }
272 | 
273 | @article{eswaran1976notions,
274 |   title={The notions of consistency and predicate locks in a database system},
275 |   author={Eswaran, Kapali P. and Gray, Jim N and Lorie, Raymond A. and Traiger, Irving L.},
276 |   journal={Communications of the ACM},
277 |   volume={19},
278 |   number={11},
279 |   pages={624--633},
280 |   year={1976},
281 |   publisher={ACM}
282 | }
283 | 
284 | @article{skeen1983formal,
285 |   title={A formal model of crash recovery in a distributed system},
286 |   author={Skeen, Dale and Stonebraker, Michael},
287 |   journal={Software Engineering, IEEE Transactions on},
288 |   number={3},
289 |   pages={219--228},
290 |   year={1983},
291 |   publisher={IEEE},
292 |   note={three phase commit}
293 | }
294 | 
295 | 
296 | @inproceedings{raft,
297 |   title={In search of an understandable consensus algorithm},
298 |   author={Ongaro, Diego and Ousterhout, John},
299 |   booktitle={2014 USENIX Annual Technical Conference (USENIX ATC 14)},
300 |   pages={305--319},
301 |   year={2014}
302 | }
303 | 
304 | @phdthesis{raft_thesis,
305 |   title={Consensus: Bridging theory and practice},
306 |   author={Ongaro, Diego},
307 |   year={2014},
308 |   school={Stanford University}
309 | }
310 | 
311 | @article{cigarette_smokers,
312 |   title={On a solution and a generalization of the Cigarette Smokers' Problem},
313 |   author={Habermann, A Nico},
314 |   year={1972}
315 | }
316 | 
317 | @article{dining_philosophers,
318 |   title={Hierarchical ordering of sequential processes},
319 |   author={Dijkstra, Edsger W.},
320 |   journal={Acta informatica},
321 |   volume={1},
322 |   number={2},
323 |   pages={115--138},
324 |   year={1971},
325 |   publisher={Springer}
326 | }
327 | 
328 | 
329 | @inproceedings{kotla2004high,
330 |   title={High throughput Byzantine fault tolerance},
331 |   author={Kotla, Ramakrishna and Dahlin, Mike},
332 |   booktitle={Dependable Systems and Networks, 2004 International Conference on},
333 |   pages={575--584},
334 |   year={2004},
335 |   organization={IEEE}
336 | }
337 | 
338 | @inproceedings{kotla2007zyzzyva,
339 |   title={Zyzzyva: speculative byzantine fault tolerance},
340 |   author={Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund},
341 |   booktitle={ACM SIGOPS Operating Systems Review},
342 |   volume={41},
343 |   number={6},
344 |   pages={45--58},
345 |   year={2007},
346 |   organization={ACM}
347 | }
348 | 
349 | 
350 | @inproceedings{garcia2011efficient,
351 |   title={Efficient middleware for byzantine fault tolerant database replication},
352 |   author={Garcia, Rui and Rodrigues, Rodrigo and Pregui{\c{c}}a, Nuno},
353 |   booktitle={Proceedings of the sixth conference on Computer systems},
354 |   pages={107--122},
355 |   year={2011},
356 |   organization={ACM}
357 | }
358 | 
359 | @inproceedings{canetti1993fast,
360 |   title={Fast asynchronous Byzantine agreement with optimal resilience},
361 |   author={Canetti, Ran and Rabin, Tal},
362 |   booktitle={Proceedings of the twenty-fifth annual ACM symposium on Theory of computing},
363 |   pages={42--51},
364 |   year={1993},
365 |   organization={ACM}
366 | }
367 | 
368 | @inproceedings{cachin2000random,
369 |   title={Random oracles in constantipole: practical asynchronous Byzantine agreement using cryptography},
370 |   author={Cachin, Christian and Kursawe, Klaus and Shoup, Victor},
371 |   booktitle={Proceedings of the nineteenth annual ACM symposium on Principles of distributed computing},
372 |   pages={123--132},
373 |   year={2000},
374 |   organization={ACM}
375 | }
376 | 
377 | @inproceedings{feldman1988optimal,
378 |   title={Optimal algorithms for Byzantine agreement},
379 |   author={Feldman, Paul and Micali, Silvio},
380 |   booktitle={Proceedings of the twentieth annual ACM symposium on Theory of computing},
381 |   pages={148--161},
382 |   year={1988},
383 |   organization={ACM}
384 | }
385 | 
386 | @inproceedings{oki1988viewstamped,
387 |   title={Viewstamped replication: A new primary copy method to support highly-available distributed systems},
388 |   author={Oki, Brian M and Liskov, Barbara H},
389 |   booktitle={Proceedings of the seventh annual ACM Symposium on Principles of distributed computing},
390 |   pages={8--17},
391 |   year={1988},
392 |   organization={ACM}
393 | }
394 | 
395 | 
396 | @inproceedings{junqueira2011zab,
397 |   title={Zab: High-performance broadcast for primary-backup systems},
398 |   author={Junqueira, Flavio P and Reed, Benjamin C and Serafini, Marco},
399 |   booktitle={Dependable Systems \& Networks (DSN), 2011 IEEE/IFIP 41st International Conference on},
400 |   pages={245--256},
401 |   year={2011},
402 |   organization={IEEE}
403 | }
404 | 
405 | @article{van2015vive,
406 |   title={Vive la diff{\'e}rence: Paxos vs. viewstamped replication vs. zab},
407 |   author={Van Renesse, Robbert and Schiper, Nicolas and Schneider, Fred B},
408 |   journal={Dependable and Secure Computing, IEEE Transactions on},
409 |   volume={12},
410 |   number={4},
411 |   pages={472--484},
412 |   year={2015},
413 |   publisher={IEEE}
414 | }
415 | 
416 | @article{cachin2016non,
417 |   title={Non-determinism in Byzantine Fault-Tolerant Replication},
418 |   author={Cachin, Christian and Schubert, Simon and Vukoli{\'c}, Marko},
419 |   journal={arXiv preprint arXiv:1603.07351},
420 |   year={2016}
421 | }
422 | 
423 | @misc{raft.github.io,
424 |   title = {The Raft Consensus Algorithm},
425 |   howpublished = {http://raft.github.io},
426 |   note = {Accessed: 2016-04-01}
427 | }
428 | 
429 | @article{tangaroa,
430 |   title={Tangaroa: a Byzantine Fault Tolerant Raft},
431 |   author={Copeland, Christopher and Zhong, Hongxia}
432 | }
433 | 
434 | @article{taleb2014skin,
435 |   title={The skin in the game heuristic for protection against tail events},
436 |   author={Taleb, Nassim Nicholas and Sandis, Constantine},
437 |   journal={Review of Behavioral Economics},
438 |   volume={1},
439 |   pages={1--21},
440 |   year={2014}
441 | }
442 | 
443 | 
444 | @article{schneider1990implementing,
445 |   title={Implementing fault-tolerant services using the state machine approach: A tutorial},
446 |   author={Schneider, Fred B},
447 |   journal={ACM Computing Surveys (CSUR)},
448 |   volume={22},
449 |   number={4},
450 |   pages={299--319},
451 |   year={1990},
452 |   publisher={ACM}
453 | }
454 | 
455 | @misc{jepsen,
456 |   title = {JEPSEN - Distributed Systems Safety Analysis},
457 |   howpublished = {http://jepsen.io},
458 |   note = {Accessed: 2016-05-12}
459 | }
460 | https://aphyr.com/tags/Jepsen
461 | 


--------------------------------------------------------------------------------
/bib/crypto.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @misc{bitcoin,
  3 |   title={Bitcoin: A peer-to-peer electronic cash system},
  4 |   author={Nakamoto, Satoshi},
  5 |   year={2008}
  6 | }
  7 | 
  8 | 
  9 | @article{ethereum,
 10 |   title={Ethereum: A secure decentralised generalised transaction ledger},
 11 |   author={Wood, Gavin},
 12 |   journal={Ethereum Project Yellow Paper},
 13 |   year={2014}
 14 | }
 15 | 
 16 | @article{sidechains,
 17 |   title={Enabling blockchain innovations with pegged sidechains},
 18 |   author={Back, Adam and Maxwell, G and Corallo, M and Friedenbach, Mark and Dashjr, L},
 19 |   year={2014}
 20 | }
 21 | 
 22 | 
 23 | @article{peercoin,
 24 |   title={Ppcoin: Peer-to-peer crypto-currency with proof-of-stake},
 25 |   author={King, Sunny and Nadal, Scott},
 26 |   journal={self-published paper, August},
 27 |   volume={19},
 28 |   year={2012}
 29 | }
 30 | 
 31 | 
 32 | @misc{bittorrent,
 33 |   title={The BitTorrent protocol specification},
 34 |   author={Cohen, Bram},
 35 |   year={2008},
 36 |   publisher={BITTORRENT}
 37 | }
 38 | 
 39 | @inproceedings{libswift,
 40 |   title={Performance analysis of the libswift p2p streaming protocol},
 41 |   author={Petrocco, Riccardo and Pouwelse, Johan and Epema, Dick HJ},
 42 |   booktitle={Peer-to-Peer Computing (P2P), 2012 IEEE 12th International Conference on},
 43 |   pages={103--114},
 44 |   year={2012},
 45 |   organization={IEEE}
 46 | }
 47 | 
 48 | 
 49 | @inproceedings{merkle1987digital,
 50 |   title={A digital signature based on a conventional encryption function},
 51 |   author={Merkle, Ralph C},
 52 |   booktitle={Advances in Cryptology—CRYPTO’87},
 53 |   pages={369--378},
 54 |   year={1987},
 55 |   organization={Springer}
 56 | }
 57 | 
 58 | @article{shamir1979share,
 59 |   title={How to share a secret},
 60 |   author={Shamir, Adi},
 61 |   journal={Communications of the ACM},
 62 |   volume={22},
 63 |   number={11},
 64 |   pages={612--613},
 65 |   year={1979},
 66 |   publisher={ACM}
 67 | }
 68 | 
 69 | @inproceedings{replay,
 70 |   title={A taxonomy of replay attacks [cryptographic protocols]},
 71 |   author={Syverson, Paul},
 72 |   booktitle={Computer Security Foundations Workshop VII, 1994. CSFW 7. Proceedings},
 73 |   pages={187--191},
 74 |   year={1994},
 75 |   organization={IEEE}
 76 | }
 77 | 
 78 | @inproceedings{vukolic11quest,
 79 |   title={The quest for scalable blockchain fabric: Proof-of-work vs. BFT replication},
 80 |   author={Vukolic, Marko},
 81 |   booktitle={Proc. IFIP WG 11.4 Workshop on Open Research Problems in Network Security (iNetSec 2015)}
 82 | }
 83 | 
 84 | @techreport{honeybadger,
 85 |   title={The Honey Badger of BFT Protocols},
 86 |   author={Miller, Andrew and Xia, Yu and Croman, Kyle and Shi, Elaine and Song, Dawn},
 87 |   year={2016},
 88 |   institution={Cryptology ePrint Archive 2016/199}
 89 | }
 90 | 
 91 | 
 92 | @techreport{poon2015bitcoin,
 93 |   title={The bitcoin lightning network: Scalable off-chain instant payments},
 94 |   author={Poon, Joseph and Dryja, Thaddeus},
 95 |   year={2015},
 96 |   institution={Technical Report (draft). https://lightning. network}
 97 | }
 98 | 
 99 | @article{miller2014anonymous,
100 |   title={Anonymous byzantine consensus from moderately-hard puzzles: A model for bitcoin},
101 |   author={Miller, Andrew and LaViola Jr, Joseph J},
102 |   journal={Retrieved from Anonymous Byzantine Consensus from Moderately-Hard Puzzles: A Model for Bitcoin},
103 |   year={2014}
104 | }
105 | 
106 | @inproceedings{miller2015nonoutsourceable,
107 |   title={Nonoutsourceable Scratch-Off Puzzles to Discourage Bitcoin Mining Coalitions},
108 |   author={Miller, Andrew and Kosba, Ahmed and Katz, Jonathan and Shi, Elaine},
109 |   booktitle={Proceedings of the 22nd ACM SIGSAC Conference on Computer and Communications Security},
110 |   pages={680--691},
111 |   year={2015},
112 |   organization={ACM}
113 | }
114 | 
115 | @article{eyal2015bitcoin,
116 |   title={Bitcoin-ng: A scalable blockchain protocol},
117 |   author={Eyal, Ittay and Gencer, Adem Efe and Sirer, Emin Gun and van Renesse, Robbert},
118 |   journal={arXiv preprint arXiv:1510.02037},
119 |   year={2015}
120 | }
121 | 
122 | @incollection{eyal2014majority,
123 |   title={Majority is not enough: Bitcoin mining is vulnerable},
124 |   author={Eyal, Ittay and Sirer, Emin G{\"u}n},
125 |   booktitle={Financial Cryptography and Data Security},
126 |   pages={436--454},
127 |   year={2014},
128 |   publisher={Springer}
129 | }
130 | 
131 | @article{courtois2014subversive,
132 |   title={On subversive miner strategies and block withholding attack in bitcoin digital currency},
133 |   author={Courtois, Nicolas T and Bahack, Lear},
134 |   journal={arXiv preprint arXiv:1402.1718},
135 |   year={2014}
136 | }
137 | 
138 | @misc{buterin2013ethereum,
139 |   title={Ethereum white paper: a next generation smart contract \& decentralized application platform},
140 |   author={Buterin, Vitalik},
141 |   year={2013}
142 | }
143 | 
144 | @incollection{ghost,
145 |   title={Secure high-rate transaction processing in Bitcoin},
146 |   author={Sompolinsky, Yonatan and Zohar, Aviv},
147 |   booktitle={Financial Cryptography and Data Security},
148 |   pages={507--527},
149 |   year={2015},
150 |   publisher={Springer}
151 | }
152 | 
153 | @misc{blockchaininfo,
154 |   title = {Bitcoin Blockchain Charts},
155 |   howpublished = {https://blockchain.info/charts},
156 |   note = {Accessed: 2016-04-01}
157 | }
158 | 
159 | @misc{obc,
160 |   title = {OpenBlockChain: Blockchain Fabric Code},
161 |   howpublished = {https://github.com/openblockchain/obc-peer},
162 |   note = {Accessed: 2016-04-01}
163 | }
164 | 
165 | @misc{deterministicjs,
166 |   title = {A Deterministic Version of Javascript},
167 |   howpublished = {https://github.com/NodeGuy/Deterministic.js},
168 |   note = {Accessed: 2016-04-01}
169 | }
170 | 
171 | @misc{slasher,
172 |   title = {Slasher: a punitive proof of stake algorithm},
173 |   author={Buterin, Vitalik},
174 |   howpublished = {https://blog.ethereum.org/2014/01/15/slasher-a-punitive-proof-of-stake-algorithm/},
175 |   note = {Accessed: 2016-04-01}
176 | }
177 | 
178 | 
179 | 
180 | @incollection{ed25519,
181 |   title={Curve25519: new Diffie-Hellman speed records},
182 |   author={Bernstein, Daniel J},
183 |   booktitle={Public Key Cryptography-PKC 2006},
184 |   pages={207--228},
185 |   year={2006},
186 |   publisher={Springer}
187 | }
188 | 
189 | @incollection{economist_blockchains,
190 |   title={The Trust Machine},
191 |   year={2015},
192 |   publisher={The Economist}
193 | }
194 | 
195 | @article{authenticated_encryption,
196 |   title={Authentication and authenticated key exchanges},
197 |   author={Diffie, Whitfield and Van Oorschot, Paul C and Wiener, Michael J},
198 |   journal={Designs, Codes and cryptography},
199 |   volume={2},
200 |   number={2},
201 |   pages={107--125},
202 |   year={1992},
203 |   publisher={Springer}
204 | }
205 | 
206 | @book{snowden,
207 |   title={No place to hide: Edward Snowden, the NSA, and the US surveillance state},
208 |   author={Greenwald, Glenn},
209 |   year={2014},
210 |   publisher={Macmillan}
211 | }
212 | 
213 | @article{schneier2007did,
214 |   title={Did NSA put a secret backdoor in new encryption standard?, 2007},
215 |   author={Schneier, Bruce},
216 |   journal={URL http://archive. wired. com/politics/security/commentary/securitymatters/2007/11/securitymatters},
217 |   volume={1115}
218 | }
219 | 
220 | @book{levy2001crypto,
221 |   title={Crypto: How the Code Rebels Beat the Government--Saving Privacy in the Digital Age},
222 |   author={Levy, Steven},
223 |   year={2001},
224 |   publisher={Penguin}
225 | }
226 | 
227 | @article{szabo1997formalizing,
228 |   title={Formalizing and securing relationships on public networks},
229 |   author={Szabo, Nick},
230 |   journal={First Monday},
231 |   volume={2},
232 |   number={9},
233 |   year={1997}
234 | }
235 | 
236 | 
237 | @misc{juno,
238 |   author = {Buckie},
239 |   title = {Juno - Smart Contracts Running on a BFT Hardened Raft},
240 |   year = {2016},
241 |   publisher = {GitHub},
242 |   journal = {GitHub repository},
243 |   howpublished = {\url{https://github.com/buckie/juno}},
244 |   commit = {3426e7344389a66b19b64635f8b43fc0ec95aafd}
245 | }
246 | 
247 | 
248 | @misc{casper,
249 |   title = {Introducing Casper “the Friendly Ghost”},
250 |   author = {Vlad Zamfir},
251 |   howpublished = {https://blog.ethereum.org/2015/08/01/introducing-casper-friendly-ghost/},
252 |   note = {Accessed: 2016-05-012}
253 | }
254 | 
255 | @article{king2012ppcoin,
256 |   title={Ppcoin: Peer-to-peer crypto-currency with proof-of-stake},
257 |   author={King, Sunny and Nadal, Scott},
258 |   journal={self-published paper, August},
259 |   volume={19},
260 |   year={2012}
261 | }
262 | 
263 | @misc{poelstra2014distributed,
264 |   title={Distributed Consensus from Proof of Stake is Impossible},
265 |   author={Poelstra, Andrew and others},
266 |   year={2014},
267 |   publisher={May}
268 | }
269 | 
270 | 
271 | @inproceedings{ben1988completeness,
272 |   title={Completeness theorems for non-cryptographic fault-tolerant distributed computation},
273 |   author={Ben-Or, Michael and Goldwasser, Shafi and Wigderson, Avi},
274 |   booktitle={Proceedings of the twentieth annual ACM symposium on Theory of computing},
275 |   pages={1--10},
276 |   year={1988},
277 |   organization={ACM}
278 | }
279 | 
280 | 


--------------------------------------------------------------------------------
/bib/formal.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{girard1987linear,
  3 |   title={Linear logic},
  4 |   author={Girard, Jean-Yves},
  5 |   journal={Theoretical computer science},
  6 |   volume={50},
  7 |   number={1},
  8 |   pages={1--101},
  9 |   year={1987},
 10 |   publisher={Elsevier}
 11 | }
 12 | 
 13 | @incollection{bove2009dependent,
 14 |   title={Dependent types at work},
 15 |   author={Bove, Ana and Dybjer, Peter},
 16 |   booktitle={Language engineering and rigorous software development},
 17 |   pages={57--99},
 18 |   year={2009},
 19 |   publisher={Springer}
 20 | }
 21 | 
 22 | @inproceedings{wilcox2015verdi,
 23 |   title={Verdi: A framework for implementing and formally verifying distributed systems},
 24 |   author={Wilcox, James R and Woos, Doug and Panchekha, Pavel and Tatlock, Zachary and Wang, Xi and Ernst, Michael D and Anderson, Thomas},
 25 |   booktitle={Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation},
 26 |   pages={357--368},
 27 |   year={2015},
 28 |   organization={ACM}
 29 | }
 30 | 
 31 | @inproceedings{woos2016planning,
 32 |   title={Planning for change in a formal verification of the raft consensus protocol},
 33 |   author={Woos, Doug and Wilcox, James R and Anton, Steve and Tatlock, Zachary and Ernst, Michael D and Anderson, Thomas},
 34 |   booktitle={Proceedings of the 5th ACM SIGPLAN Conference on Certified Programs and Proofs},
 35 |   pages={154--165},
 36 |   year={2016},
 37 |   organization={ACM}
 38 | }
 39 | 
 40 | @book{csp,
 41 | 	title={Communicating sequential processes},
 42 | 	author={Hoare, Charles Antony Richard},
 43 | 	year={1978},
 44 | 	publisher={Springer}
 45 | }
 46 | 
 47 | @article{misra1989simple,
 48 |   title={A simple proof of a simple consensus algorithm},
 49 |   author={Misra, Jayadev},
 50 |   journal={Information processing letters},
 51 |   volume={33},
 52 |   number={1},
 53 |   pages={21--24},
 54 |   year={1989},
 55 |   publisher={Elsevier},
 56 |   note={proof of simple consensus with N > 4t+1 using equational reasoning}
 57 | }
 58 | 
 59 | 
 60 | @inproceedings{tsuchiya2007model,
 61 |   title={Model Checking of Consensus Algorit},
 62 |   author={Tsuchiya, Tatsuhiro and Schiper, Andr{\'e}},
 63 |   booktitle={Reliable Distributed Systems, 2007. SRDS 2007. 26th IEEE International Symposium on},
 64 |   pages={137--148},
 65 |   year={2007},
 66 |   organization={IEEE},
 67 |   annote={
 68 | 	only uses model checking (ie temporal logic) to verify.
 69 |  	uses Heard-Of model, which captures synchrony degree and any type of non-malicious fault
 70 | 	"first time standard model checking allows one to completely verify asynchronous consensus algorithms"
 71 |   }
 72 | }
 73 | 
 74 | @techreport{charron2006heard,
 75 |   title={The heard-of model: Unifying all benign failures},
 76 |   author={Charron-Bost, Bernadette and Schiper, Andr{\'e}},
 77 |   year={2006}
 78 | }
 79 | 
 80 | 
 81 | @incollection{francalanza2007fault,
 82 |   title={A fault tolerance bisimulation proof for consensus},
 83 |   author={Francalanza, Adrian and Hennessy, Matthew},
 84 |   booktitle={Programming Languages and Systems},
 85 |   pages={395--410},
 86 |   year={2007},
 87 |   publisher={Springer},
 88 |   annote={
 89 | 	fine tune bisimulation techniques for partial failure settings
 90 | 	some processes are reliable, thus immortal
 91 | 	decompose to two parts: non-fault tolerant basic correctness, correctness preservation given faults
 92 | 	based on riely1997distributed
 93 | 	"We view our calculus as a partial-failure calculus rather than a distributed calculus as it permits action synchronisations across locations. This implies a tighter synchronisation assumption between locations, which in our calculus merely embody units of failure."
 94 | 	op semantics are basic CCS plus perfect failure detection (susp) and dynamic failures (halt)
 95 | 
 96 |   }
 97 | }
 98 | 
 99 | 
100 | @book{riely1997distributed,
101 |   title={Distributed processes and location failures},
102 |   author={Riely, James and Hennessy, Matthew},
103 |   year={1997},
104 |   publisher={Springer}
105 | }
106 | 
107 | 
108 | @article{chandra1996unreliable,
109 |   title={Unreliable failure detectors for reliable distributed systems},
110 |   author={Chandra, Tushar Deepak and Toueg, Sam},
111 |   journal={Journal of the ACM (JACM)},
112 |   volume={43},
113 |   number={2},
114 |   pages={225--267},
115 |   year={1996},
116 |   publisher={ACM},
117 |   note={
118 |     show atomic broadcast and consensus are same thing in asynchronous networks
119 |     failure detectors: completeness and accuracy
120 |     completeness - all faulty processes eventually suspected
121 |     accuracy - restrict mistakes
122 |     defines weakes failure detector necessary and sufficient for consensus
123 |     defines the hierarchy and shows where we go from tolerating any num of faults to only (n-1)/2
124 |     
125 |   }
126 | }
127 | 
128 | @incollection{nestmann2003modeling,
129 |   title={Modeling consensus in a process calculus},
130 |   author={Nestmann, Uwe and Fuzzati, Rachele and Merro, Massimo},
131 |   booktitle={CONCUR 2003-Concurrency Theory},
132 |   pages={399--414},
133 |   year={2003},
134 |   publisher={Springer},
135 |   note={builds on chandra1996unreliable using simpler form of weakest failure detector
136 | 	models failure detector in pi calc, 
137 | 	complete pi calc description of algorithm!
138 | 	"matrix" view of network history allows to see all sent messages and state of receval,
139 | 		useful for proofs
140 |   }
141 | }
142 | 
143 | 
144 | @article{palamidessi2003comparing,
145 |   title={Comparing the expressive power of the synchronous and asynchronous $ pi $-calculi},
146 |   author={Palamidessi, Catuscia},
147 |   journal={Mathematical Structures in Computer Science},
148 |   volume={13},
149 |   number={05},
150 |   pages={685--719},
151 |   year={2003},
152 |   publisher={Cambridge Univ Press},
153 |   note={
154 | 	this paper is actually an updated of the original, from 1997
155 | 	asynch pi-calc can't encode mixed gaurded choice, 
156 | 	where the encoding is uniform, ie. [[ P | Q ]] = [[ P ]] | [[ Q ]].
157 | 	and [[ \sigma(P) ]]  = \sigma( [[ P ]] ) where sigma is an automorphism (injective renaming function)
158 | 	mixed gaurd has an initial symmetry that needs to be broken, 
159 | 	which can be seen as a leader election process, which is impossible in async
160 | 	pi calc for symmetric network.
161 | 	electoral system is one in which a the projection of a computation on each process
162 | 	has the procecesses all outputting the same value on their out chan
163 |   }
164 | }
165 | 
166 | @article{nestmann2000good,
167 |   title={What is a “good” encoding of guarded choice?},
168 |   author={Nestmann, Uwe},
169 |   journal={Information and computation},
170 |   volume={156},
171 |   number={1},
172 |   pages={287--319},
173 |   year={2000},
174 |   publisher={Elsevier},
175 |   note={
176 | 	important are compositionality and divergence freedom (no infinite loops)
177 | 	clarifies the Palamedessi result
178 | 	mixed choice can be done if willing to relax Palamedessi assumptions,
179 | 	either with randomized agreement (though apparently this disqualifies Palamedessi's reasonableness,
180 | 	though we know it can converge happen with probability 1), or arbitrary total order over the nodes, 
181 | 	which would break symmetry.
182 | 	an awesome paper
183 |   }
184 | }
185 | 
186 | @article{phillips2004correct,
187 |   title={A correct abstract machine for the stochastic pi-calculus},
188 |   author={Phillips, Andrew and Cardelli, Luca},
189 |   journal={Electronic Notes in Theoretical Computer Science},
190 |   year={2004},
191 |   note={
192 | 	how to implement the stochastic pi calculus
193 |   }
194 | }
195 | 
196 | 
197 | @article{priami1995stochastic,
198 |   title={Stochastic $\pi$-calculus},
199 |   author={Priami, Corrado},
200 |   journal={The Computer Journal},
201 |   volume={38},
202 |   number={7},
203 |   pages={578--589},
204 |   year={1995},
205 |   publisher={Br Computer Soc},
206 |   note={originally extension of pi-calc to probabilistic rates
207 | 	choice becomes probabilistic rather than non-deterministic
208 | 	delays are drawn from exponential distribution. memoryless.
209 | 	continuity of prob distribution ensures the prob of two events ending at same time is 0 (contention in the choice)
210 | 	rates on input/output can be independent, so rate of synch is that of the smaller one
211 | 	prob of a transition is rate of its action divided by the total rate leaving the state
212 | 	
213 |   }
214 | }
215 | 
216 | 
217 | @inproceedings{ene1999expressiveness,
218 |   title={Expressiveness of point-to-point versus broadcast communications},
219 |   author={Ene, Cristian and Muntean, Traian},
220 |   booktitle={Fundamentals of Computation Theory},
221 |   pages={258--268},
222 |   year={1999},
223 |   organization={Springer}
224 | }
225 | 
226 | 
227 | 
228 | @article{lucchi2007pi,
229 |   title={A pi-calculus based semantics for WS-BPEL},
230 |   author={Lucchi, Roberto and Mazzara, Manuel},
231 |   journal={The Journal of Logic and Algebraic Programming},
232 |   volume={70},
233 |   number={1},
234 |   pages={96--118},
235 |   year={2007},
236 |   publisher={Elsevier}
237 | }
238 | 
239 | @inproceedings{phillips2007efficient,
240 |   title={Efficient, correct simulation of biological processes in the stochastic pi-calculus},
241 |   author={Phillips, Andrew and Cardelli, Luca},
242 |   booktitle={Computational methods in systems biology},
243 |   pages={184--199},
244 |   year={2007},
245 |   organization={Springer}
246 | }
247 | 
248 | @article{abramsky1994proofs,
249 |   title={Proofs as processes},
250 |   author={Abramsky, Samson},
251 |   journal={Theoretical Computer Science},
252 |   volume={135},
253 |   number={1},
254 |   pages={5--9},
255 |   year={1994},
256 |   publisher={Elsevier}
257 | }
258 | 
259 | @inproceedings{ryan2015beyond,
260 |   title={Beyond Flash Boys: Improving Transparency and Fairness in Financial Markets},
261 |   author={Ryan, Ronan},
262 |   booktitle={CFA Institute Conference Proceedings Quarterly},
263 |   volume={32},
264 |   number={4},
265 |   pages={10--17},
266 |   year={2015},
267 |   organization={CFA Institute}
268 | }
269 | 
270 | 
271 | @article{milner1992calculus,
272 |   title={A calculus of mobile processes, i},
273 |   author={Milner, Robin and Parrow, Joachim and Walker, David},
274 |   journal={Information and computation},
275 |   volume={100},
276 |   number={1},
277 |   pages={1--40},
278 |   year={1992},
279 |   publisher={Elsevier}
280 | }
281 | 
282 | @article{stirling1991local,
283 |   title={Local model checking in the modal mu-calculus},
284 |   author={Stirling, Colin and Walker, David},
285 |   journal={Theoretical Computer Science},
286 |   volume={89},
287 |   number={1},
288 |   pages={161--177},
289 |   year={1991},
290 |   publisher={Elsevier}
291 | }
292 | 
293 | @article{caires2003spatial,
294 |   title={A spatial logic for concurrency (part I)},
295 |   author={Caires, Lu{\i}s and Cardelli, Luca},
296 |   journal={Information and Computation},
297 |   volume={186},
298 |   number={2},
299 |   pages={194--235},
300 |   year={2003},
301 |   publisher={Elsevier}
302 | }
303 | 
304 | @article{vieira2004spatial,
305 |   title={The spatial logic model checker user’s manual},
306 |   author={Vieira, Hugo and Caires, Lu{\'\i}s and Viegas, Ruben},
307 |   year={2004},
308 |   publisher={Citeseer}
309 | }
310 | 
311 | @article{milner1993modal,
312 |   title={Modal logics for mobile processes},
313 |   author={Milner, Robin and Parrow, Joachim and Walker, David},
314 |   journal={Theoretical Computer Science},
315 |   volume={114},
316 |   number={1},
317 |   pages={149--171},
318 |   year={1993},
319 |   publisher={Elsevier}
320 | }
321 | 
322 | 


--------------------------------------------------------------------------------
/bib/programming.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @techreport{avl,
 3 |   title={An algorithm for the organization of information},
 4 |   author={AdelsonVelskii, M and Landis, Evgenii Mikhailovich},
 5 |   year={1963},
 6 |   institution={DTIC Document}
 7 | }
 8 | 
 9 | @article{golang,
10 |   title={The Go Programming Language},
11 |   author={Pike, Rob},
12 |   journal={Talk given at Google’s Tech Talks},
13 |   year={2009}
14 | }
15 | 
16 | 
17 | @misc{openssl,
18 |   title = {OpenSSL Vulnerabilities},
19 |   howpublished = {https://www.openssl.org/news/vulnerabilities.html},
20 |   note = {Accessed: 2016-04-01}
21 | }
22 | 
23 | @misc{csp_go,
24 |   title = {Share Memory By Communicating},
25 |   howpublished = {https://blog.golang.org/share-memory-by-communicating},
26 |   note = {Accessed: 2016-05-12}
27 | }
28 | 
29 | @misc{jsonrpc,
30 |   title = {JSON-RPC},
31 |   howpublished = {http://json-rpc.org/},
32 |   note = {Accessed: 2016-04-01}
33 | }
34 | 
35 | @article{halting,
36 |   title={On computable numbers, with an application to the Entscheidungsproblem},
37 |   author={Turing, Alan Mathison},
38 |   journal={J. of Math},
39 |   volume={58},
40 |   number={345-363},
41 |   pages={5},
42 |   year={1936}
43 | }
44 | 
45 | @book{davis1958computability,
46 |   title={Computability \& unsolvability},
47 |   author={Davis, Martin},
48 |   year={1958},
49 |   publisher={Courier Corporation}
50 | }
51 | 
52 | 
53 | @article{protobuf,
54 |   title={Protocol buffers: Google’s data interchange format},
55 |   author={Varda, Kenton},
56 |   journal={Google Open Source Blog, Available at least as early as Jul},
57 |   year={2008}
58 | }
59 | 
60 | @inproceedings{rarest_first,
61 |   title={Rarest first and choke algorithms are enough},
62 |   author={Legout, Arnaud and Urvoy-Keller, Guillaume and Michiardi, Pietro},
63 |   booktitle={Proceedings of the 6th ACM SIGCOMM conference on Internet measurement},
64 |   pages={203--216},
65 |   year={2006},
66 |   organization={ACM}
67 | }
68 | 
69 | @article{hursch1995separation,
70 |   title={Separation of concerns},
71 |   author={H{\"u}rsch, Walter L and Lopes, Cristina Videira},
72 |   year={1995},
73 |   publisher={Citeseer}
74 | }
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | set -e
 3 | 
 4 | pdflatex(){
 5 | 	/usr/local/texlive/2015/bin/universal-darwin/pdflatex "$@"
 6 | }
 7 | 
 8 | bibtex(){
 9 | 	/usr/local/texlive/2015/bin/universal-darwin/bibtex "$@"
10 | }
11 | 
12 | CHAPTER=$1
13 | 
14 | if [[  "$CHAPTER" == "" ]]; then
15 | 	pdflatex main.tex
16 | 	bibtex main
17 | 	pdflatex main.tex
18 | 	pdflatex main.tex
19 | 	cp main.pdf Buchman_Ethan_201606_MAsc.pdf
20 | else
21 | 	pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}"
22 | #	bibtex chapBuild
23 | #	pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}"
24 | #	pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}"
25 | fi
26 | 
27 | 


--------------------------------------------------------------------------------
/chapters/abstract.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \thispagestyle{plain}
 3 | \begin{center}
 4 |     \textbf{ABSTRACT} \\ 
 5 |     \vspace{0.6cm}
 6 |     \textbf{TENDERMINT: BYZANTINE FAULT TOLERANCE IN THE AGE OF BLOCKCHAINS}
 7 | \end{center}
 8 |     
 9 | \vspace{0.6cm}
10 | \begin{tabular}{ p{0.5\textwidth} p{0.5\textwidth} }
11 | Ethan Buchman & Advisor:\\ 
12 | University of Guelph, 2016 & Professor Graham Taylor
13 | \end{tabular}
14 | 
15 | 
16 | \vspace{0.9cm}
17 | Tendermint is a new protocol for ordering events in a distributed network under adversarial conditions.
18 | More commonly known as consensus or atomic broadcast, the problem has attracted significant attention
19 | recently due to the widespread success of digital currencies, such as Bitcoin and Ethereum,
20 | which successfully solve the problem in public settings without a central authority.
21 | Tendermint modernizes classic academic work on the subject to provide a secure consensus protocol with 
22 | accountability guarantees, as well as an interface for building arbitrary applications above the consensus.
23 | Tendermint is high performance, achieving thousands of transactions per second on dozens of nodes distributed around the globe,
24 | with latencies of about one second, and performance degrading moderately in the face of adversarial attacks.
25 | 
26 | \clearpage
27 | 
28 | 


--------------------------------------------------------------------------------
/chapters/appendix.tex:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | \appendix
4 | \chapter{Appendix Title}
5 | 


--------------------------------------------------------------------------------
/chapters/apps.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Building Applications}
  2 | \label{ch:apps}
  3 | 
  4 | Tendermint is designed to be a general purpose algorithm for replicating a deterministic state machine.
  5 | It uses the Tendermint Socket Protocol (TMSP) to standardize communication between the consensus engine and the state machine,
  6 | enabling application developers to build their state machines in any programming language, 
  7 | and have it automatically replicated via Tendermint's BFT algorithm.
  8 | 
  9 | \section{Background}
 10 | 
 11 | Applications on the Internet can in general be characterized as containing two fundamental components:
 12 | 
 13 | \begin{itemize}
 14 | \item{Engine: handles core security, networking, replication. 
 15 | This is typically a webserver, like Apache or Nginx, when powering a web app, 
 16 | or a consensus algorithm when powering a distributed application.}
 17 | \item{State-machine: the actual application logic that processes transactions received from the engine and updates internal state.}
 18 | \end{itemize}
 19 | 
 20 | This separation of concerns enables application developers to write state-machines in any programming language representing arbitrary applications,
 21 | on top of an engine which may be specialized for its performance, security, usability, support, and other considerations.
 22 | 
 23 | Unlike web-servers and their applications, which often take the form of processes communicating over a socket via the Common Gateway Interface (CGI) protocol,
 24 | consensus algorithms have traditionally had much less usable or less general purpose interfaces to build applications on top of.
 25 | Some, like zookeeper, etcd, consul, and other distributed key-value stores, 
 26 | provide HTTP interfaces to a particular instance of a simple key-value application, 
 27 | with some more interesting features like atomic compare-and-swap operations and push notifications.
 28 | But they do not give the application developer control of the state-machine code itself.
 29 | 
 30 | Demand for such a high-level of control over the state-machine running above a consensus engine has been driven primarily by the success of Bitcoin and the consequent interest in blockchain technology.
 31 | By building more advanced applications directly into the consensus, 
 32 | users, developers, regulators, etc.~can achieve greater security guarantees on arbitrary state-machines, 
 33 | far beyond key-value stores, like currencies, exchanges, supply-chain management, governance, and so on.
 34 | What has captured the attention of so many is the potential of a system which permits collective enforcement of the execution of code.
 35 | It is practically a re-invention of many dimensions of the legal system, using distributed consensus algorithms and deterministically executable contracts,
 36 | rather than policemen, lawyers, judges, juries, and the like.
 37 | The ramifications for the development of human society are explosive, much as the introduction of the democratic rule of law was in the first place.
 38 | 
 39 | Tendermint aims to provide the fundamental interface and consensus engine upon which such applications might be built.
 40 | 
 41 | \section{Tendermint Socket Protocol}
 42 | 
 43 | The Tendermint Socket Protocol (TMSP) defines the core interface by which the consensus engine communicates with the application state machine.
 44 | The interface definition consists of a number of message types, specified using Google's Protocol Buffers \cite{protobuf}, 
 45 | that are length-prefixed and transmitted over a socket. 
 46 | A list of message types, their arguments, return values, and purpose is given in Figure \ref{fig:tmsp_msgs},
 47 | and an overview of the architecture and message flow is shown in Figure \ref{fig:tmsp}.
 48 | 
 49 | \begin{figure}[]
 50 | \vspace*{-1.5in}
 51 |     	\centering
 52 | \begin{lstlisting}
 53 | 
 54 | type Application interface {
 55 | 	// Return application info
 56 | 	Info() (info string)
 57 | 
 58 | 	// Set application option
 59 | 	SetOption(key string, value string) (log string)
 60 | 
 61 | 	// Append a tx
 62 | 	AppendTx(tx []byte) Result
 63 | 
 64 | 	// Validate a tx for the mempool
 65 | 	CheckTx(tx []byte) Result
 66 | 
 67 | 	// Return the application Merkle root hash
 68 | 	Commit() Result
 69 | 
 70 | 	// Query for state
 71 | 	Query(query []byte) Result
 72 | 
 73 | 	// Signals the beginning of a block
 74 | 	BeginBlock(height uint64) 
 75 | 
 76 | 	// Signals the end of a block
 77 | 	// validators: changed validators from app to TendermintCore
 78 | 	EndBlock(height uint64) (validators []*Validator)
 79 | }
 80 | 
 81 | type CodeType int32
 82 | 
 83 | type Result struct {
 84 | 	Code CodeType
 85 | 	Data []byte
 86 | 	Log  string // Can be non-deterministic
 87 | }
 88 | 
 89 | type Validator struct {
 90 | 	PubKey []byte 
 91 | 	Power  uint64 
 92 | }
 93 | \end{lstlisting}
 94 | 	\caption[TMSP Message Types]{
 95 | The TMSP application interface as defined in Go.
 96 | TMSP messages are defined using Google's Protocol Buffers, and their serialized form is length prefixed before 
 97 | being sent over the TMSP socket. 
 98 | Return values include a \emph{Code}, similar to an HTTP Status Code, representing any errors,
 99 | and $0$ is used to indicate no error.
100 | Messages are buffered client side until a \emph{Flush} message is sent,
101 | at which point all messages are transmitted. While the server design is asynchronous, message responses must be correctly ordered and match their request.
102 | }
103 | 	\label{fig:tmsp_msgs}
104 | \end{figure}
105 | 
106 | 
107 | \begin{figure}[]
108 | 	\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/tmsp.pdf}
109 |     	\centering
110 | 	\caption[TMSP Architecture]{
111 | The consensus logic communicates with the application logic via TMSP, a socket protocol. 
112 | Two sockets are maintained, one for the mempool to check the validity of new transactions,
113 | and one for the consensus to execute newly committed blocks.
114 | }
115 | 	\label{fig:tmsp}
116 | \end{figure}
117 | 
118 | TMSP is implemented as an ordered, asynchronous server, 
119 | where message types come in pairs of request and response,
120 | and where a special message type, Flush, pushes any buffered messages over the connection and awaits all responses.
121 | 
122 | At the core of the TMSP are two messages: \emph{AppendTx} and \emph{Commit}.
123 | Once a block is decided by the consensus, 
124 | the engine calls \emph{AppendTx} on each transaction in the block, 
125 | passing it to the application state-machine to be processed.
126 | If the transaction is valid, it will result in a state-transition in the application.
127 | 
128 | Once all \emph{AppendTx} calls have returned, the consensus engine calls Commit,
129 | causing the application to commit to the latest state, and persist it to disk.
130 | 
131 | \section{Separating Agreement and Execution}
132 | 
133 | Using the TMSP affords us an explicit separation between consensus, or agreement on the order of transactions, 
134 | and their actual execution in the state-machine.
135 | In particular, we achieve consensus on the order first, and then execute the ordered transactions.
136 | This separation actually improves the system's fault tolerance \cite{yin2003separating}:
137 | while $3f+1$ replicas are still needed for agreement to tolerate $f$ Byzantine failures, 
138 | only $2f+1$ replicas are needed for execution.
139 | That is, while we still need a two-thirds majority for ordering, we only need a one-half majority for execution.
140 | 
141 | On the other hand, the fact that transactions are executed after they are ordered results in possibly invalid transactions,
142 | which can waste system resources. 
143 | This is solved using an additional TMSP message, \emph{CheckTx}, which is called by the mempool,
144 | allowing it to check whether the transaction would be valid against the latest state.
145 | Note, however, that the fact that commits come in blocks at a time introduces complexity in the handling of \emph{CheckTx} messages.
146 | In particular, applications are expected to maintain a second state-machine that executes only those rules of the main state-machine pertaining to a transaction's validity. 
147 | This second state-machine is updated by \emph{CheckTx} messages and is reset to the latest committed state after every commit.
148 | In essence, the second state machine describes the transaction pool's filter rules.
149 | 
150 | To some extent, \emph{CheckTx} can be used as an \emph{optimistic execution} 
151 | returning a result to the transaction sender with the caveat that 
152 | the result may be wrong if a block is committed with a conflicting transaction
153 | before the transaction of interest is committed.
154 | This sort of optimistic execution is the focus of an approach to scalable BFT systems 
155 | that can work quite well for particular applications where conflicts between transactions are rare. 
156 | At the same time, it adds additional complexity to the client, by virtue of needing to handle possibly invalid results.
157 | The approach is discussed further in Chapter \ref{ch:related}.\looseness=-1
158 | 
159 | \section{Microservice Architecture}
160 | 
161 | Adopting separation of concerns as a strategy in application design is generally considered wise practice \cite{hursch1995separation}.
162 | In particular, many large scale application deployments today adopt a microservice architecture,
163 | wherein each functional component is implemented as a standalone network service, 
164 | and typically encapsulated in a Linux container (e.g.~using Docker) for efficient deployment, scalability, and upgradeability.
165 | 
166 | Applications running above Tendermint consensus will often be decomposable into microservices.
167 | For instance, many applications will utilize a key-value store for storing state.
168 | Running the key-value store as an independent service is quite common, 
169 | in order to take advantage of the data store's specialized features, such as high-performance data types or Merkle trees.
170 | 
171 | Another important microservice for applications is a governance module, 
172 | which manages a certain subset of TMSP messages, enabling the application to control validator set changes.
173 | Such a module can become a powerful paradigm for governance in BFT systems.
174 | 
175 | Some applications may utilize a native currency or account structure for users.
176 | It may thus be useful to provide a module which supports basic elements of, for instance,
177 | handling digital signatures and managing account dynamics.
178 | 
179 | The list of possible microservices to compose a complex TMSP application goes on. 
180 | In fact, one might even build an application which can launch sub-applications using data sent in transactions.
181 | For instance, including the hash of a docker image in a transaction,
182 | such that the image could be pulled from some file-storage backend and run as a sub-application where
183 | future transactions in the consensus could cause it to execute. 
184 | This is the approach of ethereum, 
185 | which allows developers to deploy bits of code to the network that can be triggered to run within the Ethereum Virtual Machine by future transactions \cite{ethereum},
186 | and of IBM's recent OpenBlockChain (OBC) project, which allows developers to send full docker contexts in transactions, 
187 | defining containers that run arbitrary code in response to transactions addressed to them \cite{obc}.
188 | 
189 | \section{Determinism}
190 | 
191 | The most critical caveat about building applications using TMSP is that they must be deterministic.
192 | That is, for the replicated state-machine to not compromise safety, 
193 | every node must obtain the same result when executing the same transaction against the same state.
194 | 
195 | This is not a unique requirement for Tendermint. Bitcoin, Raft, Ethereum, any other distributed consensus algorithm,
196 | and applications like lock-step multi-player gaming must all be strictly deterministic, lest a consensus failure arise.
197 | 
198 | There are many sources of non-determinism in programming languages, most obviously via random numbers and time,
199 | but also, for instance, via the use of floating point precision, and by iteration over hash tables 
200 | (some languages, such as Go, enforce randomized iteration over hash tables to force programmers to be explicit about when they need ordered data structures).
201 | The strict restriction on determinism, and its notable lacking from every major programming language,
202 | prompted ethereum to develop its own, Turing-complete, fully deterministic virtual machine,
203 | which forms the platform for application developers to build applications above the ethereum blockchain.
204 | While deterministic, it has many quirks, such as 32-byte stack words, storage keys, and storage values,
205 | and no support for byte-shifting operations - everything is big number arithmetic.
206 | 
207 | Deterministic programming is well studied in the world of real-time, lockstep, multi-party gaming.
208 | Such games constitute another example of replicated state machines, and are quite similar in many ways to consensus algorithms.
209 | Application developers building with TMSP are encouraged to study their methods, and to take care when implementing an application.
210 | On the one hand, the use of functional programming languages and proof methods can enable the construction of correct programs.
211 | On the other, compilers are being built to translate possibly non-deterministic programs to canonically deterministic ones \cite{deterministicjs}.
212 | 
213 | \section{Termination}
214 | 
215 | If determinism is critical for preserving safety, termination of transaction execution is critical for preserving liveness.
216 | It is, however, not in general possible to determine whether a given program halts for even a single input, let alone all of them,
217 | a problem known as the Halting Problem \cite{halting, davis1958computability}.
218 | 
219 | Ethereum's virtual machine solves the problem by \emph{metering}, that is, charging for each operation in the execution.
220 | This way, a transaction is guaranteed to terminate when the sender runs out of funds.
221 | Such metering may be possible in a more general case, 
222 | via compilers that compile programs to metered versions of themselves.
223 | 
224 | It is difficult to solve this problem without significant overhead.
225 | In essence, a validator cannot tell if an execution is in an infinite loop or is just slow, but nearly complete.
226 | It may be possible to use the Tendermint consensus protocol to decide on transaction timeouts, 
227 | such that more than two-thirds of validators must agree that a transaction timed out and is thus considered invalid (ie. having no effect on the state).
228 | However, we do not pursue the idea further here, leaving it to future work. 
229 | In the meantime, it is expected that applications will undergo thorough testing before being deployed in any consensus system,
230 | and that monitoring and governance mechanisms will be used to resurrect the system in the event of consensus failure.
231 | 
232 | \section{Examples}
233 | 
234 | In this section, examples of increasingly more complex TMSP applications are introduced and discussed,
235 | with particular focus on \emph{CheckTx} and managing the mempool.
236 | 
237 | \subsection{Merkleeyes}
238 | 
239 | A simple example of a TMSP application is a Merkle tree based key-value store. 
240 | Tendermint provides Merkleeyes, a TMSP application which wraps a self-balancing, Merkle binary search tree.
241 | The first byte of a transaction determines if the transaction is a get, set, or remove operation. 
242 | For get and remove operations, the remaining bytes are the key. 
243 | For the set operation, the remaining bytes are a serialized list containing the key and value.
244 | Merkleeyes may utilize a simple implementation of \emph{CheckTx} that only decodes the transaction,
245 | to ensure it is properly formatted.
246 | One could also make a more advanced \emph{CheckTx}, where get and remove operations on unknown keys are invalid.
247 | Once Commit is called, the latest updates are added into the Merkle tree, all hashes are computed, 
248 | and the latest state of the tree is committed to disk.
249 | 
250 | Note that Merkleeyes was designed to be a module used by other TMSP applications for a Merkle tree based key-value store,
251 | rather than a stand alone TMSP application, though the simplicity of the TMSP interface makes it amenable to both.
252 | 
253 | \subsection{Basecoin}
254 | 
255 | A more complete example is a simple currency, 
256 | using an account structure pioneered by Ethereum, 
257 | where each user has a public key and an account with the balance for that public key.
258 | The account also contains a sequence number, which is equal to the number of transactions sent by the account.
259 | Transactions can send funds from the account if they include the correct sequence number
260 | and are signed by the correct private key.
261 | Without the sequence number, the system would be susceptible to replay attacks \cite{replay},
262 | where a signed transaction debiting an account could be replayed, 
263 | causing the debit to occur multiple times.
264 | Furthermore, to prevent replay attacks in a multi-chain environment, 
265 | transaction signatures should include a network or blockchain identifier.
266 | 
267 | An application supporting a currency has naturally more logic than a simple key-value store.
268 | In particular, certain transactions are distinctly invalid, 
269 | such as those with an invalid signature, incorrect sequence number,
270 | or sending an amount greater than the sender's account balance.
271 | These conditions can be checked in \emph{CheckTx}.
272 | 
273 | Furthermore, a supplementary application state must be maintained for \emph{CheckTx} 
274 | in order to update sequence numbers and account balances 
275 | when there are multiple transactions involving the same accounts in the mempool at once.
276 | When commit is called, the supplementary application state is reset to the latest committed state.
277 | Any transactions still in the mempool can be replayed via \emph{CheckTx} against the latest state.
278 | 
279 | \subsection{Ethereum}
280 | 
281 | Ethereum uses the mechanisms already described to filter transactions out of the mempool,
282 | but it also runs some transactions in a virtual machine, 
283 | which updates state and returns results.
284 | The virtual machine execution is not done in \emph{CheckTx}, 
285 | as it is much more expensive and depends heavily on the ultimate order of transactions as they are included in blocks.
286 | 
287 | \section{Conclusion}
288 | 
289 | TMSP provides a simple yet flexible means to build arbitrary applications,
290 | in any programming language,
291 | that inherit BFT state-machine replication from the Tendermint consensus algorithm.
292 | It plays much the same role for a consensus engine and an application that, for instance, CGI plays for Apache and Wordpress.
293 | However, application developers must take special care to ensure their applications are deterministic, and that transaction executions terminate.
294 | 
295 | 


--------------------------------------------------------------------------------
/chapters/background.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Background}
  2 | \label{ch:background}
  3 | 
  4 | \renewcommand{\|}{\;|\;}
  5 | 
  6 | Distributed consensus systems have become a critical component of modern Internet infrastructure, 
  7 | powering every major Internet application at some level or another.
  8 | This chapter introduces the necessary background material for understanding and discussing these systems.
  9 | In addition, it introduces the $\pi$-calculus, a formal language for describing concurrent processes,
 10 | which will be used to specify the Tendermint algorithm in Chapter \ref{ch:tendermint}.
 11 | 
 12 | \section{Replicated State Machine}
 13 | 
 14 | The most common paradigm for studying and implementing distributed consensus is that of the Replicated State Machine, 
 15 | wherein a \emph{deterministic} state machine is replicated across a set of processes, 
 16 | such that it functions as a single state machine 
 17 | despite the failure of some processes \cite{schneider1990implementing}.
 18 | The state machine is driven by a set of inputs, known as \emph{transactions}, 
 19 | where each transaction may or may not, depending on its validity, cause a state transition and return a result.
 20 | More formally, a transaction is an \emph{atomic} operation on a database, 
 21 | meaning it either completes or doesn't occur at all, 
 22 | and can't be left in an intermediate state \cite{gray1981transaction}.
 23 | The state transition logic is governed by the state machine's state transition function,
 24 | which maps a transaction and the current state to a new state and a return value.
 25 | The state transition function is also sometimes referred to as \emph{application logic}.
 26 | 
 27 | It is the responsibility of the consensus protocol to order the transactions so that the resulting 
 28 | \emph{transaction log} is replicated exactly by every process.
 29 | Using a deterministic state transition function implies that 
 30 | every process will compute the same state given the same transaction log.
 31 | 
 32 | A summary of the replicated state machine architecture is given in Figure \ref{fig:replicated_state_machine}.
 33 | 
 34 | \begin{figure}[]
 35 | 	\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/state_machine.pdf}
 36 |     	\centering
 37 | 	\caption[Overview of replicated state machine architecture]{
 38 | A replicated state machine replicates a transaction log and resulting state across multiple machines. 
 39 | Transactions are received from the client, 
 40 | run through the consensus protocol, 
 41 | ordered in the transaction log,
 42 | and executed against the state. 
 43 | In the figure, each diamond represents a single machine, 
 44 | with dotted lines representing communication between machines to carry out the consensus protocol for ordering transactions.}
 45 | 	\label{fig:replicated_state_machine}
 46 | \end{figure}
 47 | 
 48 | Tendermint was motivated from the desire to create a general purpose, high-performance, secure, and robust replicated state machine.
 49 | 
 50 | \section{Asynchrony}
 51 | 
 52 | The purpose of a fault-tolerant replicated state machine is to co-ordinate 
 53 | a network of computers to stay in sync while providing a useful service, 
 54 | despite the presence of faults.
 55 | 
 56 | Staying in sync amounts to replicating the transaction log successfully; 
 57 | providing a useful service amounts to keeping the state machine available for new transactions.
 58 | These aspects of the system are traditionally known as \emph{safety} and \emph{liveness}, respectively.
 59 | Colloquially, safety means nothing bad happens; liveness means that something good eventually happens.
 60 | A violation of safety implies two or more valid, competing transaction logs.
 61 | Violating liveness implies an unresponsive network.
 62 | 
 63 | It is trivial to satisfy liveness by accepting all transactions. And it is trivial to satisfy safety by accepting none.
 64 | Hence, state machine replication algorithms can be seen to operate on a spectrum defined by these extremes.
 65 | Typically, processes require some threshold of received information from other processes before they commit a new transaction.
 66 | In synchronous environments, 
 67 | where we make assumptions about the maximum delay of network messages or the maximum speed of processor clocks,
 68 |  it is easy enough to take turns proposing new transactions, poll for a majority vote, 
 69 | and skip a proposer's turn if they don't propose within the bounds of the synchrony assumptions.
 70 | 
 71 | In asynchronous environments, where no such assumptions about network delays or processor speeds are warranted,
 72 | the trade-off is much more difficult to manage.
 73 | In fact, the so called FLP impossibility result demonstrates the 
 74 | impossibility of distributed consensus among deterministic asynchronous\footnote{Prior to FLP, the distinction between sync/async wasn't as prominent} processes 
 75 | if even a single processes can crash \cite{flp}.
 76 | The proof amounts to showing that, because processes can fail, 
 77 | there are valid executions of the protocol in which processes fail at the exact opportune times to prevent consensus.
 78 | Hence, we have no guarantee of consensus.
 79 | 
 80 | Typically, synchrony in a protocol is reflected by the use of timeouts to manage certain transitions.
 81 | In asynchronous environments, where messages can be arbitrarily delayed, relying on synchrony (timeouts) for safety
 82 | can lead to a fork in the transaction log.
 83 | Relying on synchrony to ensure liveness can cause the consensus to halt, and the service to become unresponsive.
 84 | The former case is usually considered more severe, as reconciling conflicting logs can be a daunting or impossible task. 
 85 | 
 86 | In practice, synchronous solutions are only used where the message latency is under 
 87 | extremely well defined control, for instance between controllers on an airplane \cite{hoyme1993safebus},
 88 | or between datacenters utilizing synchronized atomic clocks \cite{corbett2013spanner}.
 89 | Thus, while many efficient synchronous solutions exist,
 90 | the general unreliability of computer networks is too great a risk for them to be used in practice
 91 | without significant additional costs.
 92 | 
 93 | There are fundamentally two ways to overcome the FLP impossibility result.
 94 | The first is to use stronger synchrony assumptions - 
 95 | even rather weak assumptions are sufficient, 
 96 | for instance, that only eventually, 
 97 | crashed processes are suspected of crashing and correct ones are not \cite{chandra1996unreliable}.
 98 | Typically, this approach utilizes \emph{leaders}, 
 99 | which play a special co-ordinating role, 
100 | and which can be skipped if they are suspected of being faulty after some timeout.
101 | In practice, such leader-election mechanisms can be difficult to get right.
102 | 
103 | The second way to overcome FLP is to use non-determinism - 
104 | include randomization elements such that
105 | the probability of coming to consensus tends to $1$.
106 | While clever, relying on randomization is typically much slower, 
107 | though certain advanced cryptographic techniques have in recent years
108 | achieved tremendous improvements in speed \cite{honeybadger}
109 | 
110 | 
111 | \section{Broadcast and Consensus}
112 | 
113 | In order for a process to replicate its state on other processes, 
114 | it must have access to basic communication primitives which allow it to disseminate, or deliver, information.
115 | One of the most useful such primitives is \emph{reliable broadcast}.
116 | Reliable broadcast (RBC) is a broadcast primitive satisfying, for message $m$ \cite{chandra1996unreliable}:
117 | 
118 | \begin{itemize}
119 | \item validity - if a correct process broadcasts $m$, it eventually delivers $m$
120 | \item agreement - if a correct process delivers $m$, all correct processes eventually deliver $m$
121 | \item integrity - $m$ is only delivered once, and only if broadcast by its sender
122 | \end{itemize}
123 | 
124 | In essence, RBC enables a message to be eventually delivered once on all correct processes.
125 | 
126 | Another, more useful primitive is \emph{atomic broadcast} (ABC), 
127 | which satisfies RBC and an additional property \cite{chandra1996unreliable}:
128 | 
129 | \begin{itemize}
130 | \item total order - if correct processes $p$ and $q$ deliver $m$ and $m'$, then $p$ delivers $m$ before $m'$ iff $q$ delivers $m$ before $m'$
131 | \end{itemize}
132 | 
133 | Atomic broadcast is thus a reliable broadcast where values are delivered in the same order on each host. 
134 | Note this is exactly the problem of replicating a transaction log.
135 | While colloquially, the problem may be referred to as consensus, 
136 | the standard definition of the consensus primitive satisfies the following \cite{chandra1996unreliable}:
137 | \begin{itemize}
138 | \item termination - every correct process eventually decides
139 | \item integrity - every correct process decides at most once
140 | \item agreement - if one correct process decides $v1$ and another decides $v2$, then $v1=v2$
141 | \item validity - if a correct process decides $v$, at least one process proposed $v$
142 | \end{itemize}
143 | 
144 | Intuitively, consensus and ABC appear remarkably similar, 
145 | with the critical difference that ABC is a continuous protocol,
146 | whereas consensus expects to terminate.
147 | That said, it is well known that each can be reduced to the other \cite{chandra1996unreliable}.
148 | Consensus is easily reduced to ABC by deciding the first value to be atomically broadcast.
149 | ABC can be reduced to consensus by running many instances of the consensus protocol, 
150 | in sequence, 
151 | though certain subtle considerations must be made, 
152 | especially for handling Byzantine faults.
153 | A complete description of the parameter space surrounding
154 | the reduction of ABC to consensus remains an open topic of research.
155 | 
156 | Historically, despite the fact that most use cases actually require ABC,
157 | the most widely adopted algorithm has been a consensus algorithm called Paxos, 
158 | introduced, and proven correct, by Leslie Lamport in the 90s \cite{paxos}.
159 | Paxos simultaneously empowered and confused the discipline of consensus science,
160 | on the one hand by providing the first real-world, practical, fault-tolerant consensus algorithm,
161 | and on the other by being so difficult to understand and explain.
162 | Each implementation of the algorithm used its own unique bag of ad-hoc techniques
163 | to build ABC from Paxos, making the ecosystem difficult to navigate, understand, and utilize.
164 | Unfortunately, there was little work on improving the problem framing to make it more understandable,
165 | though there were efforts to delineate solutions to the various difficulties \cite{chandra2007paxos}.
166 | 
167 | In 2013, Ongaro and Ousterhout published Raft \cite{raft},
168 | a state machine replication algorithm whose motivating design goal was understandability.
169 | Rather than starting from a consensus algorithm, and attempting to build what was needed (ABC),
170 | the design of Raft considered first and foremost the transaction log,
171 | and sought orthogonal components which could fit together to provide what is ultimately ABC,
172 | though it is not described as such.
173 | 
174 | Paxos has been the staple consensus algorithm for industry, 
175 | upon which the likes of Amazon \cite{dynamo}, Google \cite{chubby}, 
176 | and others have built out highly available global Internet services.
177 | The Paxos consensus sits at the bottom of the application stack, 
178 | providing a consistent interface to resource management and allocation, 
179 | operating at much slower time scales than the highly-available applications facing the users.
180 | 
181 | Since its debut, however, Raft has seen tremendous adoption, especially in the open source community,
182 | with implementations in virtually ever major language \cite{raft.github.io},
183 | and use as the backbone in major projects, 
184 | including CoreOs's distributed Linux distribution \cite{coreos_raft} 
185 | and the open source time-series database InfluxDB \cite{influxdb,hashicorp_raft}.
186 | 
187 | Raft's major divergent design decisions from Paxos was to 
188 | focus on the transaction-log first, rather than a single value,
189 | in particular to allow a leader to persist in committing transactions until he goes down, 
190 | at which point leadership election can kick in. 
191 | In some ways, this is similar to the approach taken by blockchains, 
192 | though the major advantage of blockchains is the ability to tolerate a different kind of fault.
193 | 
194 | \section{Byzantine Fault Tolerance}
195 | 
196 | Blockchains have been described as ``trust machines'' \cite{economist_blockchains} on account of the way they reduce counter party risk through the decentralization of responsibility over a shared database.
197 | Bitcoin, in particular, is noted for its ability to withstand attacks and malicious behaviour by any of the participants. 
198 | Traditionally, consensus protocols tolerant of malicious behaviour were known as Byzantine Fault Tolerant (BFT) consensus protocols.
199 | The term Byzantine was used due to the similarity of the problem to that faced by generals of the Byzantine army attempting to co-ordinate themselves to attack Rome using only messengers,
200 | where one of the generals may be a traitor \cite{lamport1982byzantine}.
201 | 
202 | In a crash fault, a process simply halts. In a Byzantine fault, it can behave arbitrarily.
203 | Crash faults are easier to handle, as no process can \emph{lie} to another process.
204 | Systems which only tolerate crash faults can operate via simple majority rule, 
205 | and therefore typically tolerate simultaneous failure of up to half of the system.
206 | If the number of failures the system can tolerate is $f$, such systems must have at least $2f+1$ processes.
207 | 
208 | Byzantine failures are more complicated. In a system of $2f+1$ processes, if $f$ are Byzantine, 
209 | they can co-ordinate to say arbitrary things to the other $f+1$ processes.
210 | For instance, suppose we are trying to agree on the value of a single bit, 
211 | and $f=1$, so we have $N=3$ processes, $A$, $B$, and $C$, where $C$ is Byzantine, as in Figure \ref{fig:byzantine}.
212 | $C$ can tell $A$ that the value is $0$ and tell $B$ that it's $1$. 
213 | If $A$ agrees that its $0$, and $B$ agrees that its $1$, then they will both think they have a majority and commit, 
214 | thereby violating the safety condition.
215 | Hence, the upper bound on faults tolerated by a Byzantine system is strictly lower than a non-Byzantine one.
216 | 
217 | \begin{figure}[]
218 | 	\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/byzantine.pdf}
219 |     	\centering
220 | 	\caption[Byzantine processes tell lies]{
221 | A Byzantine process, C, tells A one thing and B another, causing them to come to different conclusions about the network.
222 | Here, simple majority vote results in a violation of safety due to only a single Byzantine process.}
223 | 	\label{fig:byzantine}
224 | \end{figure}
225 | 
226 | 
227 | In fact, it can be shown that the upper limit on $f$ for Byzantine faults is $f < N/3$ \cite{pease1980reaching}.
228 | Thus, to tolerate a single Byzantine process, we require at least $N=4$. 
229 | Then the faulty process can't split the vote the way it was able to when $N=3$.
230 | 
231 | In 1999, Castro and Liskov published Practical Byzantine Fault Tolerance \cite{pbft}, or \emph{PBFT}, 
232 | which provided the first optimal Byzantine fault tolerant algorithm for practical use.
233 | It set a new precedent for the practicality of Byzantine fault tolerance in industrial systems by being capable 
234 | of processing tens of thousands of transactions per second.
235 | Despite this success, Byzantine fault tolerance was still considered expensive and largely unnecessary, 
236 | and the most popular implementation was difficult to build on top of \cite{ppbft}.
237 | Hence, despite a resurgence in academic interest, including numerous improved variations \cite{yin2003separating, kotla2007zyzzyva}
238 | not much progress was made in the way of implementations and deployment.
239 | Furthermore, PBFT provides no guarantees if a third or more of the network co-ordinates to violate safety.
240 | 
241 | \section{Cryptography, Trust, and Economics}
242 | 
243 | Fundamentally, fault tolerance is a problem deriving from a lack of trust - 
244 | an inability to know how some process will behave.
245 | Formally, trust might be defined information theoretically as a means
246 | for reducing the entropy of one's model of the world - 
247 | to trust someone is to optimistically reduce one's uncertainty about the world,
248 | enabling more focused attention on higher order forms of organization.
249 | 
250 | Cryptographic primitives are also fundamentally related to the problem of trust,
251 | and may similarly be defined as mechanisms which allow for a massive reduction in entropy -
252 | successfully authenticating a cryptographic function collapses a distribution 
253 | over possible outcomes to a single, or in some cases a small number, of outcomes.
254 | 
255 | It is well known that civilizations that have greater forms of institutional trust,
256 | such as the rule-of-law, 
257 | have higher productivity and more vibrant economies \cite{zak2001trust}.
258 | The result makes intuitive sense, as being able to trust more about an interaction 
259 | reduces the space of possible outcomes that need to be actively modelled,
260 | making it easier to co-ordinate.
261 | Unfortunately, it is becoming increasingly difficult to evaluate the trustworthiness 
262 | of modern institutions as their complexity has skyrocketed in recent decades,
263 | increasing the likelihood that the certainty they allegedly provide is an illusion.
264 | 
265 | Fortunately, cryptography can form the basis for new institutions of trust in society 
266 | which may dramatically improve the capacity for human co-ordination at global scale on account
267 | of reduced risk of fraudulent and/or unaccountable activity.
268 | Of particular interest is the importance of cryptographic primitives in BFT algorithms,
269 | both for authentication and for seeding non-determinism.
270 | 
271 | Most interestingly, economic mechanisms may also serve as means for reducing entropy,
272 | in so far as economic agents can be incentivized - 
273 | which is to say be made more likely to execute a particular behaviour.
274 | In fact, Bitcoin's great insight was that cryptographic primitives could be used in
275 | conjunction with economic incentives to sufficiently reduce the entropy of a public consensus network
276 | to achieve secure replication of state.
277 | 
278 | A more formal investigation of the information theoretic grounds of trust, cryptography,
279 | consensus, and economics, and in particular their inter-relationship, remains for future work.
280 | 
281 | \section{Blockchain}
282 | 
283 | A blockchain is, at heart, an integrity-focused approach to Byzantine Fault Tolerant Atomic Broadcast.
284 | The Bitcoin blockchain, for instance, uses a combination of economics and cryptographic randomization 
285 | to provide a strong probabilistic guarantee that safety will not be violated, 
286 | given a weak synchrony assumption, namely, 
287 | that blocks are gossipped much more rapidly than they are found via the partial-hash collision lottery.
288 | In practice, however, it is well known that Bitcoin's security guarantees are vulnerable to a number 
289 | of subtle attacks \cite{courtois2014subversive,eyal2014majority}.
290 | 
291 | 
292 | The blockchain gets its name from the two key optimizations it employs in solving ABC.
293 | The first is that it groups transactions in blocks in order to amortize the high commit latency 
294 | (on the order of ten minutes) over many transactions.
295 | The second is to link blocks via cryptographic hashes into an immutable chain,
296 | such that is easy to verify the historical record.
297 | Both optimizations are natural improvements to a naive BFT-ABC,
298 | the former improving performance, the latter improving tolerance to certain kinds 
299 | of difficult to model Byzantine faults.
300 | 
301 | Over the last few years, it has become common to ``blockchainize'' consensus algorithms,
302 | that is, to adapt them to ABC using the blockchain paradigm of hash-linked transaction batches.
303 | To the author's knowledge, Tendermint was the first such proposal, 
304 | upgrading a well known BFT algorithm from the late 80s \cite{dls},
305 | though it has since evolved to a consensus algorithm of its own.
306 | It has been followed by IBM, which upgraded PBFT to a blockchain \cite{cachin2016non,obc},
307 | and by JP Morgan, which upgraded a BFT version of Raft \cite{juno}.
308 | 
309 | \section{Process Calculus}
310 | 
311 | Distributed systems, where pieces of the system execute concurrently with one another,
312 | are notorious for being difficult to design, build, and debug.
313 | They are further difficult to formally verify, 
314 | as most techniques for formal verification, and in fact the very foundations of computer science,
315 | have been specifically developed with sequential computation in mind.
316 | 
317 | Process calculi are a family of models introduced 
318 | to provide a formal basis for concurrent computation.
319 | The most popular calculus, the Communicating Sequential Processes (CSP) \cite{csp}
320 | forms the theoretical foundation for many modern programming languages,
321 | such as Go, which include concurrency primitives in the language design \cite{csp_go}.
322 | 
323 | In the 80s, Robin Milner introduced the Calculus of Communicating Systems (CCS), 
324 | designed to be a concurrent analog of the sequential lambda calculus that underlies most functional programming languages.
325 | While the lambda calculus has function application as its basic unit of computation,
326 | CCS uses communication between two concurrent processes over a shared channel as its basic operational primitive.
327 | A more general form of CCS, the $\pi$-calculus, 
328 | enables mobility in the communication graph between processes, 
329 | such that the channels of communication can themselves be passed along other channels,
330 | thereby blurring the distinction between data, variables, and channels.
331 | The result is a coherent, minimalistic model of computation more powerful than its sequential predecessors.
332 | 
333 | The $\pi$-calculus has proven to be a highly effective tool for the study of concurrent systems,
334 | with applications from business process management \cite{lucchi2007pi} to cellular biology \cite{phillips2007efficient}.
335 | The remarkably simple notation simplifies the description of concurrent protocols.
336 | Furthermore, the well known equivalence between computation and logic \cite{abramsky1994proofs} enables
337 | logical systems to be defined complementary to the various process calculi,
338 | providing formal means to discuss and verify the properties of systems specified in an appropriate calculus.
339 | 
340 | Our presentation of the $\pi$-calculus is sufficient merely to specify the Tendermint algorithm.
341 | For a more complete introduction, see \cite{milner1992calculus}.
342 | 
343 | The grammar of a simple $\pi$-calculus, in Backus-Naur form, is as follows:
344 | 
345 | 
346 | \begin{center}
347 | 	\begin{tabular}{l }
348 | 		{$\!\begin{aligned}
349 | 			P & := & 0  & & \text{ \emph{void}}\\
350 | 			    & \; \| & P \| P  & & \text{ \emph{par}} \\
351 | 			    & \; \| & \alpha.P  & & \text{ \emph{guard}} \\
352 | 			    & \; \| & \alpha.P  + \alpha.P & & \text{ \emph{guarded-choice}} \\
353 | 			    & \; \| & (\nu x) P & & \text{ \emph{fresh}}\\ \\
354 | 			    & \; \| & F^{s}(y) & & \text{ \emph{func}}\\ \\
355 | 
356 | 			\alpha & := & \tau & & \text{ \emph{null}} \\
357 | 			    & \; \| & x!(y) & & \text{ \emph{send}} \\
358 | 			    & \; \| & x?(y) & & \text{ \emph{receive}}\\
359 | 			    & \; \| & susp_i & & \text{ \emph{suspect}}\\
360 | 		\end{aligned}$} \\ 
361 | 	\end{tabular}
362 | \end{center}
363 | 
364 | Each grammatical rule is labelled with a reference to its functional meaning.
365 | A process may be the empty process, $0$.
366 | It may be the parallel composition of two processes, $P \| P$,
367 | denoting two processes running concurrently.
368 | A guarded processes, $\alpha.P$, only allows process $P$ to execute after an action, $\alpha$,
369 | has occurred.
370 | The action can be a null action, $\tau$, or it can be the sending, $x!(y)$, 
371 | or receiving, $x?(y)$, of $y$ along $x$.
372 | Guarded choice injects non-determinism into the operation of the calculus, 
373 | such that the processes $\alpha.P + \beta.Q$ will non-deterministically execute
374 | $\alpha$ or $\beta$, and then run $P$ or $Q$, respectively.
375 | A new channel, $x$, can be created via $(\nu x) P$, such that $x$ is only accessible in $P$.
376 | Functional forms $F^{s}(y)$ allow us to pass variables $s$ and $y$ into 
377 | the process called $F$, which may cause it self to execute recursively. 
378 | Typically, we let $s$ be state-like variables, while $y$ are channels in the calculus.
379 | Finally, since we are interested in consensus in asynchronous networks, 
380 | we employ an abstraction of timeouts knows as unreliable failure detectors \cite{chandra1996unreliable},
381 | and model them as a non-deterministic action \cite{nestmann2003modeling}.
382 | The $susp_i$ action is triggered when process $i$ is suspected of having failed -
383 | in other words, after some timeout.
384 | 
385 | Note that we may use $\sum P$ to denote guarded-choice over more than two processes,
386 | and $\prod P$ to denote the parallel composition of more than two processes.
387 | We also admit a polyadic form of send and receive, for instance the process $x?(v,w) \| x!(y,z)$ is equivalent to
388 | $x?(d).d?(v).d?(w) \| (\nu c) x!(c).c!(y).c!(z)$.
389 | 
390 | An operational semantics defines the actual non-reversible computational steps that a process may execute.
391 | Effectively, the only relevant operation is communication, known as the \emph{comm} rule:
392 | 
393 | \begin{equation}
394 | ( x?(y).P | x!(z) )  \rightarrow P\{z/y\}
395 | \end{equation} 
396 | The notation $P\{z/y\}$ means that all occurrences of $y$ in $P$ are replaced with $z$.
397 | In other words, $z$ was sent on $x$, received as $y$, and fed to $P$.
398 | 
399 | Given a $\pi$-calculus process, we can follow its execution by applying the comm rule.
400 | For instance, 
401 | 
402 | \begin{equation}
403 | ( x?(y).y!(x) | x!(z) )  \rightarrow z!(x)
404 | \end{equation} 
405 | 
406 | Now, we can use a formal logic to express properties a process might satisfy.
407 | For instance, the modal Hennessy–Milner logic can express that a process
408 | will satisfy some other logic expression after some or all forms of an action have occurred \cite{milner1993modal}.
409 | By adding more complex operators to the logic, 
410 | formal systems can be built up which easily describe important properties of distributed systems,
411 | such as safety and liveness \cite{stirling1991local}, and localization \cite{caires2003spatial}.
412 | Systems written in the $\pi$-calculus can then be formally verified to satisfy 
413 | the relevant properties using model checking software \cite{vieira2004spatial}.
414 | 
415 | While we use the $\pi$-calculus to specify the Tendermint algorithm, 
416 | we leave use of an associated formal logic, 
417 | and the corresponding verification of properties, to future work.
418 | 
419 | \section{The Need For Tendermint}
420 | 
421 | The success of Bitcoin and its derivatives, especially Ethereum \cite{ethereum}, and their promise of secure, autonomous, distributed, fault-tolerant execution of arbitrary code has caused virtually every major financial institution on the planet to become interested in the blockchain phenomenon. 
422 | In particular, there has emerged an understanding of two forms of the technology:
423 | On the one hand are the public blockchains, known affectionately as the Big Bad Public Blockchains or BBPBs, 
424 | whose protocols are dominated by in-built economic incentives bootstrapped by a native currency.
425 | On the other are so called private blockchains, which might more accurately be called ``consortia blockchains'',
426 | and which are effectively improvements on traditional consensus and BFT algorithms through the use of hash trees, digital signatures, 
427 | peer-to-peer networking, and enhanced accountability.
428 | 
429 | As the infrastructure of our societies continues to decentralize, and as the nature of business becomes more inter-organizational,
430 | there is increasing need for a transparent, accountable, high performance BFT system, which can support applications from finance to domain registration to electronic voting,
431 | and which comes equipped with advanced mechanisms for governance and evolution into the future.
432 | Tendermint is that solution, optimized for consortia, or inter-organizational logic, but flexible enough to accommodate anyone from private enterprise to global currency,
433 | and high-performance enough to compete with the major, non-BFT, consensus solutions available today, such as etcd, consul, and zookeeper, while providing greater resilience, security guarantees, and flexibility to application developers.
434 | 
435 | A more comprehensive discussion of consensus science and related algorithms is reserved for Chapter \ref{ch:related}.
436 | 
437 | 


--------------------------------------------------------------------------------
/chapters/clients.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Client Considerations}
  2 | \label{ch:clients}
  3 | 
  4 | This chapter reviews some considerations pertaining to clients that interact with an application hosted on Tendermint.
  5 | 
  6 | \section{Discovery}
  7 | 
  8 | Network discovery occurs simply by dialing some set of seed nodes over TCP.
  9 | The p2p network uses authenticated encryption, 
 10 | but the public keys of the validators must be verified somehow out of band, 
 11 | that is, via an alternative medium not within the purview of the protocol.
 12 | Indeed, in these systems, the genesis state itself must be communicated out of band, 
 13 | and ideally is the only thing that must be communicated, 
 14 | as it should also contain the public keys used by validators for authenticated encryption, 
 15 | which are different than those used for signing votes in consensus.
 16 | 
 17 | For validator sets that may change over time, it is useful to register all validators via DNS, 
 18 | and to register new validators before they actually become validators, and remove them after they are removed as validators.
 19 | Alternatively, validator locations can be registered in another fault-tolerant distributed data store, 
 20 | including possibly another Tendermint cluster itself.
 21 | 
 22 | \section{Broadcasting Transactions}
 23 | 
 24 | As a generalized application platform, Tendermint provides only a simple interface to clients for broadcasting transactions.
 25 | The general paradigm is that a client connects to a Tendermint consensus network through a proxy, which is either run locally on its machine,
 26 | or hosted by some other provider. The proxy functions as a non-validator node on the network, 
 27 | which means it keeps up with the consensus and processes transactions, but does not sign votes.
 28 | The proxy enables client transactions to be quickly broadcast to the whole network via the gossip layer.
 29 | 
 30 | A node need only connect to one other node on the network to broadcast transactions, but by default will connect to many,
 31 | minimizing the chances that the transaction will not be received.
 32 | Transactions are passed into the mempool, 
 33 | and gossiped through the mempool reactor to be cached in the mempool of all nodes, 
 34 | so that eventually one of them will include it in a block. 
 35 | 
 36 | Note that the transaction does not execute against the state until it gets into a block,
 37 | so the client does not get a result back right away, other than confirmation that it was accepted into the mempool and broadcast to other peers.
 38 | Clients should register with the proxy to receive the result as a push notification when it is computed during the commit of a block.
 39 | 
 40 | It is not essential that a client connect to the current proposer, 
 41 | as eventually any validator which has the transaction in its mempool may propose it.
 42 | However, preferential broadcasting to the next proposer in line may lead to lower latency for the transaction
 43 | in certain cases where the network is under high load. Otherwise, the transaction should be quickly gossiped to every validator.
 44 | 
 45 | \section{Mempool}
 46 | 
 47 | The mempool is responsible for caching transactions in memory before they are included in blocks.
 48 | Its behaviour is subtle, and forms a number of challenges for the overall system architecture.
 49 | First and foremost, caching arbitrary numbers of transactions in the mempool is a direct denial of service attack
 50 | that could trivially cripple the network. Most blockchains solve this problem using their native currency,
 51 | and permitting only transactions which spend a certain fee to reside in the mempool.
 52 | 
 53 | In a more generalized system, like Tendermint, where there is not necessarily a currency to pay fees with,
 54 | the system must establish stricter filtering rules and rely on more intelligent clients to resubmit transactions that are dropped.
 55 | The situation is even more subtle, however, because the rule set for filtering transactions in the mempool must be a function of the application itself.
 56 | Hence the \emph{CheckTx} message of TMSP,
 57 | which the mempool can use to run a transaction against a transient state of the application to determine if it should be kept around or dropped.
 58 | 
 59 | Handling the transient state is non-trivial, and is something left to the application developer, 
 60 | though examples are provided in the many example applications. 
 61 | In any case, clients must monitor the state of the mempool (i.e.~the unconfirmed transactions) to determine if they need to rebroadcast their transactions,
 62 | which may occur in highly concurrent settings where the validity of one transaction depends on having processed another.
 63 | 
 64 | \section{Semantics}
 65 | 
 66 | Tendermint's core consensus algorithm provides only \emph{at-least-once semantics}, 
 67 | which is to say the system is subject to replay attacks, 
 68 | where the same transaction can be committed many times.
 69 | However, many users and applications expect stronger guarantees from a database system.
 70 | The flexibility of the Tendermint system leaves the strictness of these semantics up to the application developer.
 71 | By utilizing the \emph{CheckTx} message, and by adequately managing state in the application, 
 72 | application developers can provide the database semantics that suit them and their users' needs.
 73 | For instance, as discussed in Chapter \ref{ch:apps}, 
 74 | using an account based system with sequence numbers mitigates replay attacks,
 75 | and changes the semantics from \emph{at-least-once} to \emph{exactly-once}.
 76 | 
 77 | \section{Reads} 
 78 | 
 79 | Clients issue read requests to the same proxy node they use for broadcasting transactions (writes).
 80 | The proxy is always available for reads, even if the network halts.
 81 | However, in the event of a partition, the proxy may be partitioned from the rest of the network, which continues making blocks.
 82 | In that case, reads from the proxy might be stale.
 83 | 
 84 | To avoid stale reads, the read request can be sent as a transaction, presuming the application permits such queries.
 85 | By using transactions, reads are guaranteed to return the latest committed state, i.e.~when the read transaction is committed in the next block.
 86 | This is of course much more expensive than simply querying the proxy for the state.
 87 | It is possible to use heuristics to determine if a read will be stale,
 88 | such as if the proxy is well-connected to its peers and is making blocks, 
 89 | or if it's stuck in a round with votes from one-third or more of validators,
 90 | but there is no substitute for performing an actual transaction.
 91 | 
 92 | \section{Light Client Proofs}
 93 | 
 94 | One of the major innovations of blockchains over traditional databases is their deliberate use of Merkle hash trees to enable the production
 95 | of compact proofs of system substates, so called light-client proofs.
 96 | A light client proof is a path through a Merkle tree that allows a client to verify that some key-value pair is in the Merkle tree with a given root hash.
 97 | The state's Merkle root hash is included in the block header, such that it is sufficient for a client to have only the latest header to verify any component of the state.
 98 | Of course, to know that the header itself is valid, they must have either validated the whole chain, 
 99 | or kept up-to-date with validator set changes only and rely on economic guarantees that the state transitions were correct.
100 | 
101 | \section{Conclusion}
102 | 
103 | Clients of a Tendermint network function similarly to those of any other distributed database,
104 | though considerations must be made for the block-based nature of commits and the behaviour of the mempool.
105 | Additionally, clients must be designed with a particular application in mind. 
106 | Though this adds some complexity, it enables tremendous flexibility.
107 | 


--------------------------------------------------------------------------------
/chapters/conclusion.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Conclusion}
 2 | 
 3 | Byzantine Fault Tolerant consensus provides a rich basis upon which to build services 
 4 | that do not depend on centralized, trusted parties, and which may be adopted by society
 5 | to manage critical components of socioeconomic infrastructure.
 6 | Tendermint, as presented in this thesis, was designed to meet the needs of such systems,
 7 | and to do so in a way that is understandably secure and easily high performance,
 8 | and which allows arbitrary systems to have transactions ordered by the consensus protocol,
 9 | with minimal fuss.
10 | 
11 | Careful considerations are necessary when deploying a distributed consensus system,
12 | especially one without an agreed upon central authority to mediate potential disputes and reset the system in the event of a crisis.
13 | Tendermint seeks to address such problems using explicit governance modules and accountability guarantees,
14 | enabling integration of Tendermint deployments into modern legal and economic infrastructure.
15 | 
16 | There is still considerable work to do. This includes formal verification of the algorithm's guarantees, 
17 | performance optimizations, and architectural changes to enable the system to increase capacity with the addition of machines.
18 | And of course, many, many TMSP applications remain to be built.
19 | 
20 | We hope that this thesis better illuminates some of the problems in distributed consensus and blockchain architecture,
21 | and inspires others to build something better.
22 | 


--------------------------------------------------------------------------------
/chapters/economics.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Economics}
 2 | \label{ch:economics}
 3 | 
 4 | \section{Cryptoeconomics}
 5 | 
 6 | \section{Proof-of-Work}
 7 | 
 8 | \section{Proof-of-Stake}
 9 | 
10 | \section{Conclusion}
11 | 


--------------------------------------------------------------------------------
/chapters/frontmatter.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | %\thispagestyle{plain}
 3 | \par\vspace*{.35\textheight}{\centering Dedicated to Theda. \par}
 4 | 
 5 | \chapter*{Preface}
 6 | The structure and presentation of this thesis was much inspired by Diego Ongaro's 2014 Doctoral Dissertation, 
 7 | ``Consensus: Bridging Theory and Practice'', wherein he specifies and evaluates the Raft consensus algorithm.
 8 | 
 9 | Much of the work done in this thesis was done in collaboration with Jae Kwon, who initiated the Tendermint project.
10 | Please see the Github repository, at \url{https://github.com/tendermint/tendermint}, for a more direct account of contributions to the codebase.
11 | 
12 | 
13 | \chapter*{Acknowledgments}
14 | I learned early in life from Tony Montana that a man has only two things in this world, his word and his balls, and he should break em for nobody.
15 | This thesis would not have been completed if I had not given my word to certain people that I would complete it.
16 | These include my family, in particular my parents, grandparents, and great uncle Paul, and my primary adviser, Graham,
17 | who has, for one reason or another, permitted me a practically abusive amount of flexibility to pursue the topic of my choosing.
18 | Thanks Graham.
19 | 
20 | Were it not for another set of individuals, this thesis would probably have been about machine learning.
21 | These include Vlad Zamfir, with whom I have experienced countless moments of discovery and insight;
22 | My previous employer and favorite company, Eris Industries, and especially their CEO and COO, Casey Kuhlman and Preston Byrne,
23 | for hiring me, mentoring me, and giving me such freedom to research and tinker and ultimately start my own company with technology they helped fund;
24 | Jae Kwon, for his direct mentorship in consensus science and programming, for being a great collaborator, and for being the core founder and CEO at Tendermint;
25 | Lucius Meredith, for mentoring me in the process calculi;
26 | Zach Ramsay, for being, for all intents and purposes, my heterosexual husband;
27 | and of course, Satoshi Nakamoto, whomever you are, for sending me down this damned rabbit hole in the first place.
28 | 
29 | There are of course many other people who have influenced my life during the course of this graduate degree;
30 | you know who you are, and I thank you for being that person and for all you've done for me.
31 | 
32 | \tableofcontents
33 | \listoffigures
34 | \listoftables
35 | 
36 | 


--------------------------------------------------------------------------------
/chapters/governance.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Governance}
  2 | \label{ch:governance}
  3 | 
  4 | So far, this thesis has reviewed the basic elements of the Tendermint consensus protocol and application environment.
  5 | Critical elements of operating the system in the real world, such as managing validator set changes
  6 | and recovering from a crisis, have not yet been discussed. 
  7 | 
  8 | This chapter proposes an approach to these problems that formalizes the role of governance in a consensus system.
  9 | As validator sets come to encompass more decentralized sets of agents, competent governance systems 
 10 | for maintaining the network will be increasingly paramount to the network's success.
 11 | 
 12 | \section{Governmint}
 13 | 
 14 | The basic functionality of governance is to filter proposals for action, typically through a form of voting.
 15 | The most basic implementation of governance as software is a module that enables users to make proposals,
 16 | vote on them, and tally the votes. 
 17 | Proposals may be programmatic, in which case they may execute automatically following a successful vote,
 18 | or they may be non-programmatic, in which case their execution is a manual exercise.
 19 | 
 20 | To enable certain actions in Tendermint, such as changing the validator set or upgrading the software,
 21 | a governance module has been implemented, called Governmint.
 22 | Governmint is a minimum viable governance application with support for multiple groups of entities,
 23 | each of which can vote internally on proposals, some of which may result in programmatic execution of actions,
 24 | like changing the validator set, or upgrading Governmint itself (for instance to add new proposal types or other voting mechanisms).
 25 | 
 26 | The system utilizes digital signatures to authenticate voters, 
 27 | and may use a variety of possible voting schemes.
 28 | Of particular interest are quadratic voting schemes,
 29 | where the cost to vote is quadratic in the weight of the vote,
 30 | which have been shown to have a superior ability to satisfy voter preferences \cite{posner2013quadratic}.
 31 | 
 32 | \section{Validator Set Changes}
 33 | 
 34 | Validator set changes are a critical component of real world consensus algorithms that many previous approaches have failed to specify 
 35 | or have been left as a black art. 
 36 | Raft took pains to expound a sound protocol for validator set changes, which required the change pass through consensus, 
 37 | using a new message type.
 38 | Tendermint takes a similar approach, though it is standardized through the TMSP interface using the \emph{EndBlock} message,
 39 | which is run after all the \emph{AppendTx} messages, but before \emph{Commit}.
 40 | If a transaction, or set of transactions, is included in a block with the intended effect of updating the validator set,
 41 | the application can return a list of validators to update by specifying their public key and new voting power 
 42 | in response to the \emph{EndBlock} message.
 43 | Validators can be removed by setting their voting power to zero.
 44 | This provides a generic means for applications to update the validator set without having to specify transaction types.
 45 | 
 46 | If the block at height $H$ returns an updated validator set, 
 47 | then the block at height $H+1$ will reflect the update.
 48 | Note, however, that the \emph{LastCommit} in block $H+1$
 49 | must utilize the validator set as it was at $H$, 
 50 | since it may contain signatures from a validator that was removed.
 51 | 
 52 | Changes to voting power are applied for $H+1$ such that the next proposer 
 53 | is affected by the update. 
 54 | In particular, the validator that otherwise should have been the next proposer may be removed.
 55 | The round robin algorithm should handle this gracefully, simply moving on to the next proposer in line.
 56 | Since the same block is replicated on at least two-thirds of validators, 
 57 | and the round robin is deterministic,
 58 | they will all make the same update and expect the same next proposer.
 59 | 
 60 | \section{Punishing Byzantine Validators}
 61 | 
 62 | One of the salient points of Bitcoin's design is its incentive structure, 
 63 | in so far as the goal of the protocol was to incentivize validators to behave correctly
 64 | by rewarding them. While this makes sense in the context of Bitcoin's consensus protocol,
 65 | a superior incentive may be to provide strong dis-incentives, such that validators
 66 | have real \emph{skin-in-the-game} \cite{taleb2014skin}, rather than a soft opportunity cost.
 67 | 
 68 | Disincentives can be achieved in Tendermint using an approach first proposed by Vitalik Buterin \cite{slasher} as a so-called Proof-of-Stake protocol.
 69 | In essence, validators must make a security deposit (``they must bond some stake'')
 70 | in order to participate in consensus.
 71 | In the event that they are found to double-sign proposals or votes, 
 72 | other validators can publish evidence of the transgression in the form of a transaction, 
 73 | which the application state can use to change the validator set by removing the transgressor, burning its deposit.
 74 | This has the effect of associating an explicit economic cost with Byzantine behaviour, 
 75 | and enables one to estimate the cost of violating safety by bribing a third or more of the validators to be Byzantine.
 76 | 
 77 | Note that a consensus protocol may specify more behaviours to be punished than just double signing.
 78 | In particular, we are interested in punishing any strong signalling behaviour which is unjustified - typically, any reported change in state that is not based on the reported state of others.
 79 | For instance, in a version of Tendermint where all pre-commits 
 80 | must come with the polka that justifies them,
 81 | validators may be punished for broadcasting unjustified pre-commits.
 82 | Note, however, that we cannot just punish for any unexpected behaviour - 
 83 | for instance, a validator proposing when it is not their round to propose
 84 | may be a basis for optimizations which pre-empt asynchrony or crashed nodes.
 85 | 
 86 | In fact, a generalization of Tendermint along these two lines, 
 87 | of 1) looser forms of justification and 2) allowing validators to propose before their term,
 88 | gives rise to a family of protocols similar in nature to that proposed by Vlad Zamfir,
 89 | under the guise Casper, as the consensus mechanism for a future version of ethereum \cite{casper}.
 90 | A more formal account of the relationship between the protocols, 
 91 | and of the characteristics of anti-Byzantine justifications, remains for future work.
 92 | 
 93 | \section{Software Upgrades}
 94 | 
 95 | Governmint can also be used as a natural means for negotiating software upgrades on a possibly decentralized network.
 96 | Software upgrades on the public Internet are a notoriously challenging operation,
 97 | requiring careful planning to maintain backwards compatibility for users that don't upgrade right away,
 98 | and to not upset loyal users of the software by introducing bugs, removing features, adding complexity, or,
 99 | perhaps worst of all, updating automatically without permission.
100 | 
101 | The challenge of upgrading a decentralized consensus system is made especially apparent with Bitcoin.
102 | While Ethereum has already managed a successful, non-backwards-compatible upgrade, 
103 | due to its strong leadership and unified community,
104 | Bitcoin has been unable to make some needed upgrades,
105 | despite a plethora of software engineering ills,
106 | on account of a viciously divided community and a lack of strong leadership.
107 | 
108 | Upgrades to blockchains are typically differentiated as being \emph{soft forks} or \emph{hard forks},
109 | on account of the scope of the changes.
110 | Soft forks are meant to be backwards compatible, and to use degrees of freedom in the protocol that may be ignored
111 | by users who have not upgraded, but which provide new features to users which do.
112 | Hard forks, on the other hand, are non-backwards compatible upgrades that,
113 | in Bitcoin's case, may cause violations of safety, 
114 | and in Tendermint's case, cause the system to halt.
115 | 
116 | To cope, developers of the Bitcoin software have rolled out a series of soft forks for which validators can vote by signalling in new blocks. 
117 | Once a certain threshold of validators are signalling for the update,
118 | it automatically takes effect across the network, at least for users with a version of the software supporting the update.
119 | The utility of the Bitcoin system has grown tremendously on account of these softforks, 
120 | and is expected to continue to do so on account of upcoming ones.
121 | Interestingly, the failure of the community to successfully hard fork the software has
122 | on the one hand raised concerns about the long term stability of the system,
123 | and on the other triggered excitement and inspiration about the system's resilience to corrupt governance - its ungovernability.
124 | 
125 | There are many reasons to take the latter stance, 
126 | given the overwhelming government corruption apparent in the world today.
127 | Still, cryptography and distributed consensus provide a new set of tools that enables a degree
128 | of transparency and accountability otherwise not imaginable in the paper-pen-handshake world of modern governments,
129 | nor even the digital world of the traditional web, which suffers tremendously from a lack of sufficiently robust authentication systems.
130 | 
131 | In a system using Governmint, developers would be identifiable entities on the blockchain,
132 | and may submit proposals for software upgrades. 
133 | The mechanism is quite similar to that of a Pull Request on Github, 
134 | only it is integrated into a live running system,
135 | and the agreement passes through the consensus protocol.
136 | Clients should be written with configurable update parameters, 
137 | so they can specify whether to update automatically or to require that they are notified first.
138 | 
139 | Of course, any software upgrade which is not thoroughly vetted could pose a danger to the system,
140 | and a conservative approach to upgrades should be taken in general.
141 | 
142 | \section{Crisis Recovery}
143 | 
144 | In the event of a crisis, such as a fork in the transaction log,
145 | or the system coming to a halt, 
146 | a traditional consensus system provides little or no guarantees,
147 | and typically requires manual intervention.
148 | 
149 | Tendermint assures that those responsible for violating safety can be identified,
150 | such that any client who can access at least one honest validator 
151 | can discern with cryptographic certainty who the dishonest validators are,
152 | and thereby chose to follow the honest validators onto a new chain with a validator set excluding those who were Byzantine.
153 | 
154 | For instance, suppose a third or more validators violate locking rules,
155 | causing two blocks to be committed at height $H$.
156 | The honest validators can determine who double-signed by gossipping all the votes.
157 | At this point, they cannot use the consensus protocol, because the basic fault assumptions have been violated.
158 | Note that being able to at this point accumulate all votes for $H$ 
159 | implies strong assumptions about network connectivity and availability during the crisis,
160 | which, if it cannot be provided by the p2p network, may require validators use alternative means, 
161 | such as social media and high availability services, to communicate evidence.
162 | A new blockchain can be started by the full set of remaining honest nodes, 
163 | once at least two-thirds of them have gathered all the evidence.
164 | 
165 | Alternatively, modifying the Tendermint protocol so that pre-commits require polka
166 | would ensure that those responsible for the fork could be punished immediately,
167 | and would not require an additional publishing period. 
168 | This modification remains for future work.
169 | 
170 | More complex uses of Governmint are possible for accommodating various particularities of crisis,
171 | such as permanent crash failures and the compromise of private keys.
172 | However, such approaches must be carefully thought out, 
173 | as they may undermine the safety guarantees of the underlying protocol.
174 | We leave investigation of these methods to future work, 
175 | but note the importance of the socio-economic context in which a blockchain is embedded, in terms of understanding its ability to recover from crisis.
176 | 
177 | Regardless of how crisis recovery proceeds, its success depends on integration with clients.
178 | If clients do not accept the new blockchain, the service is effectively offline.
179 | Thus, clients must be aware of the rules used by the particular blockchain to recover.
180 | In the cases of safety violation described above, they must also gather the evidence,
181 | determine which validators to remove, and compute the new state with the remaining validators.
182 | In the case of the liveness violation, they must keep up with Governmint.
183 | 
184 | \section{Conclusion}
185 | 
186 | Governance is a critical element of a distributed consensus system, 
187 | though competent governance systems remain poorly understood.
188 | Tendermint provides governance as a TMSP module called Governmint,
189 | which aims to facilitate increased experimentation in software-based governance for distributed systems.
190 | 
191 | 


--------------------------------------------------------------------------------
/chapters/implementation.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Implementation}
  2 | \label{ch:implementation}
  3 | 
  4 | The reference implementation of Tendermint is written in Go \cite{golang} and hosted at \url{https://github.com/tendermint/tendermint}.
  5 | Go is a C-like language with a rich standard library, concurrency primitives for light-weight massively concurrent executions,
  6 | and a development environment optimized for simplicity and efficiency.
  7 | 
  8 | The code uses a number of packages which are modular enough to be isolated as their own libraries.
  9 | These packages were written for the most part by Jae Kwon, with bug fixes, tests, and the occasional feature contributed by the author.
 10 | The most important of these packages are described in the following sub-sections.
 11 | 
 12 | \section{Binary Serialization}
 13 | 
 14 | Tendermint uses a binary serialization algorithm optimized for simplicity and determinism.
 15 | It supports all integer types (including varints, which are encoded with a one-byte length prefix),
 16 | strings, byte arrays, and time (unix time with millisecond precision).
 17 | It also supports arrays of any type and structs (encoded as a list of ordered values, ignoring keys).
 18 | It is somewhat inspired by Go's type system, especially its use of interface types, 
 19 | which can be implemented as one of many concrete types.
 20 | Interfaces can be registered and each concrete implementation given a leading type-byte in its encoding.
 21 | 
 22 | See \url{https://github.com/tendermint/go-wire} for more details.
 23 | 
 24 | \section{Cryptography}
 25 | 
 26 | Consensus algorithms such as Tendermint use three primary cryptographic primitives: digital signatures, hash functions, and authenticated encryption.
 27 | While many implementations of these primitives exist, 
 28 | choosing a cryptography library for enterprise software is no trivial task, given especially the profound insecurity of the world's most used security library, OpenSSL \cite{openssl}.
 29 | 
 30 | Contributing to the insecurity of cryptographic systems is the potential deliberate undermining of their security properties by government agencies 
 31 | such as the NSA, who, in collaboration with the NIST, have designed and standardized many of the most popular cryptographic algorithms in use today. 
 32 | Given the apparent unlawfulness of such agencies, as made evident, for instance, by Edward Snowden \cite{snowden},
 33 | and a history of trying to compromise public cryptographic standards \cite{levy2001crypto},
 34 | many in the cryptography community prefer to use algorithms designed in an open, academic environment.
 35 | Tendermint, similarly, uses only such algorithms.
 36 | 
 37 | Tendermint uses RIPEMD160 as its cryptographic hash function, which produces 20-byte outputs. 
 38 | It is used in the Merkle trees of transactions and validator signatures, and for computing the block hash.
 39 | Go provides an implementation in its extended library. RIPEMD160 is also used as one of two hashing functions by Bitcoin in the derivation of addresses from public keys.
 40 | 
 41 | As its digital signature scheme, Tendermint uses Schnorr signatures over the ED25519 elliptic curve. 
 42 | ED25519 was designed in the open by Dan Bernstein \cite{ed25519}, with the intention of being high performance and easy to implement without introducing vulnerabilities.
 43 | Bernstein also introduced NaCl, a high level library for doing authenticated encryption that uses the ED25519 curve. Tendermint uses the implementation provided by Go in its extended library.
 44 | 
 45 | \section{Merkle Hash Tree}
 46 | 
 47 | Merkle trees function much like other tree-based data-structures, 
 48 | with the additional feature that it is possible to produce a proof of membership of a key in the tree that is logarithmic in the size of the tree.
 49 | This is done by recursively concatenating and hashing keys in pairs until only a single hash is left, the root hash of the tree.
 50 | For any leaf in the tree, a trail of hashes leading from it to the root serves as proof of its membership.
 51 | This makes Merkle trees particularly useful for p2p file-sharing applications, where pieces of a large file can be verified as belonging to the file without
 52 | having all the pieces. Tendermint uses this mechanism to gossip block parts on the network, where the root hash is included in the block proposal.
 53 | 
 54 | Tendermint also provides a self-balancing, Merkle binary tree, modeled after the AVL tree \cite{avl}, as a TMSP service called Merkleeyes.
 55 | The IAVL tree can be used for storing state of dynamic size, allowing lookups, inserts, and removals in logarithmic time.
 56 | 
 57 | \section{RPC}
 58 | 
 59 | Tendermint exposes HTTP APIs for querying the blockchain, network information, and consensus state, and for broadcasting transactions.
 60 | The same API is available via three methods: GET requests using URI encoded parameters, POST requests using the JSONRPC standard \cite{jsonrpc}, 
 61 | and websockets using the JSONRPC standard. Websockets are the preferred method for high transaction throughput, 
 62 | and are necessary for receiving events.
 63 | 
 64 | 
 65 | \section{P2P Networking}
 66 | 
 67 | The P2P subprotocols used by Tendermint are described more fully in Chapter \ref{ch:subprotocols}.
 68 | 
 69 | \section{Reactors}
 70 | 
 71 | The Tendermint node is composed of multiple concurrent reactors, 
 72 | each managing a state machine sending and receiving messages to peers over the network, as described in Chapter \ref{ch:subprotocols}.
 73 | Reactors synchronize by locking shared datastructures, but the points of synchronization are kept to a minimum,
 74 | so that each reactor runs mostly concurrently with the others.
 75 | 
 76 | \subsection{Mempool}
 77 | 
 78 | The mempool reactor manages the mempool, 
 79 | which caches transactions before they are packed in blocks and committed.
 80 | The mempool uses a subset of the application's state machine to check the validity of transactions.
 81 | Transactions are kept in a concurrent linked list structure, allowing safe writes and many concurrent reads.
 82 | New, valid transactions are added to the end of the list. 
 83 | A routine for each peer traverses the list, sending each transaction to the peer, in order, only once.
 84 | The list is also scanned to collect transactions for a new proposal, 
 85 | and is updated every time a block is committed: committed transactions are removed, 
 86 | uncommitted transactions are re-run through CheckTx, and those that have become invalid are removed.
 87 | 
 88 | \subsection{Consensus}
 89 | 
 90 | The consensus reactor manages the consensus state machine, which handles proposals, voting, locking, 
 91 | and the actual committing of blocks.
 92 | The state machine is managed using a few persistent go-routines, 
 93 | which order received messages and enable them to be played back deterministically to debug the state.
 94 | These go-routines include the readLoop, for reading off the queue of received messages, 
 95 | and the timeoutLoop, for registering and triggering timeout events. 
 96 | 
 97 | Transitions in the consensus state machine are made either when a complete proposal and block are received,
 98 | or when more than two-thirds of either pre-votes or pre-commits have been received at a given round.
 99 | Transitions result in the broadcast of proposals, block data, or votes, which are queued on the internalReqQueue,
100 | and processed by the readLoop in serial with messages received from peers.
101 | This puts internal messages and peer messages on equal footing as far as being inputs to the consensus state machine, 
102 | but allows internal messages to be processed faster, as they don't sit in the same queue as those from peers.
103 | 
104 | \subsection{Blockchain}
105 | 
106 | The blockchain reactor syncs the blockchain using a much faster technique than the consensus reactor.
107 | Namely, validators request blocks of incrementing height until none of their peers have blocks of any higher height.
108 | Blocks are collected in a blockpool and synced to the blockchain by a worker routine that periodically takes blocks from the pool
109 | and validates them against the current chain.
110 | 
111 | Once the blockchain reactor finishes syncing up, it turns on the consensus reactor to take over.
112 | 
113 | \section{Conclusion}
114 | 
115 | The implementation of Tendermint in Go takes advantage of the language's concurrency primitives, garbage collection, 
116 | and type safety, to provide a clear, modular, easy to read code base with many reusable components. 
117 | As will be shown in Chapter \ref{ch:performance}, the implementation obtains high performance and is robust to many different kinds of fault.
118 | 


--------------------------------------------------------------------------------
/chapters/introduction.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Introduction}
  2 | \label{ch:intro}
  3 | 
  4 | The cold, hard truth about computer engineering today is that computers are faulty - 
  5 | they crash, corrupt, slow down, perform voodoo. 
  6 | What's worse, we're typically interested in connecting computers over a network (like the Internet),
  7 | and networks can be more unpredictable than the computers themselves.
  8 | These challenges are primarily the concern of ``fault tolerant distributed computing'',
  9 | whose aim is to discover principled protocol designs enabling faulty computers communicating over a faulty network 
 10 | to stay in sync while providing a useful service.
 11 | In essence, to make a reliable system from unreliable parts.
 12 | 
 13 | In an increasingly digital and globalized world, however, 
 14 | systems must not only be reliable in the face of unreliable parts, but in the face of malicious or ``Byzantine'' ones.
 15 | Over the last decade, major components of critical infrastructure have been ported to networked systems,
 16 | as have vast components of the world's finances.
 17 | In response, there has been an explosion of cyber warfare and financial fraud,
 18 | and a complete distortion of economic and political fundamentals.
 19 | 
 20 | \section{Bitcoin}
 21 | 
 22 | In 2009, an anonymous software developer known only as Satoshi Nakamoto introduced an approach to the resolution of these issues 
 23 | that was simultaneously an experiment in computer science, economics, and politics. 
 24 | It was a digital currency called Bitcoin \cite{bitcoin}.
 25 | Bitcoin was the first protocol to solve the problem of fault tolerant distributed computing in the face of malicious adversaries in a public setting.
 26 | The solution, dubbed a ``blockchain'', hosts a digital currency, 
 27 | where consent on the order of transactions is negotiated via an economically incentivized cryptographic random lottery based on partial hash collisions.
 28 | In essence, transactions are ordered in batches (blocks) by those who find partial hash collisions of the transaction data, 
 29 | in such a way that the correct ordering is the one where the collisions have the greatest cumulative difficulty.
 30 | The solution was dubbed Proof-of-Work (PoW).
 31 | 
 32 | Bitcoin's subtle brilliance was to invent a currency, a cryptocurrency, and to issue it to those solving the hash collisions, 
 33 | in exchange for their doing such an expensive thing as solving partial hash collisions.
 34 | In spirit, it might be assumed that the capacity to solve such problems would be distributed as computing power is, 
 35 | such that anyone with a CPU could participate.
 36 | Unfortunately, the reality is that the Bitcoin network has grown into the largest supercomputing entity on the planet, greater than all others combined,
 37 | evaluating only a single function, distributed across a few large data centers running Application Specific Integrated Circuits (ASICs) 
 38 | produced by a small number of primarily Chinese companies, 
 39 | and costing on the order of two million USD per day in electricty \cite{blockchaininfo}.
 40 | Further, its technical design has limitations: it takes up to an hour to confirm transactions, is difficult to build applications on top of, and does not scale in a way which preserves its security guarantees.
 41 | This is not to mention the internal bout of political struggles resulting from the immaturity of the Bitcoin community's governance mechanisms.
 42 | 
 43 | Despite these troubles, Bitcoin, astonishingly, continues to churn,
 44 | and its technology, 
 45 | of cryptography and distributed databases and co-operative economics,
 46 | continues to attract billions in investment capital,
 47 | both in the form of new companies and new cryptocurrencies,
 48 | each diverging from Bitcoin in its own unique way.
 49 | 
 50 | \section{Tendermint}
 51 | 
 52 | In 2014, Jae Kwon began the development of Tendermint, which sought to solve the consensus problem,
 53 | of ordering and executing a set of transactions in an adversarial environment, 
 54 | by modernizing solutions to the problem that have existed for decades,
 55 | but have lacked the social context to be deployed widely until now.
 56 | 
 57 | In early 2015, in an effort led by Eris Industries to bring a practical blockchain solution to industry,
 58 | the author joined Jae Kwon in the development of the Tendermint software and protocols.
 59 | 
 60 | The result of that collaboration is the Tendermint platform, consisting of a consensus protocol, a high-performance implementation in Go,
 61 | a flexible interface for building arbitrary applications above the consensus, and a suite of tools for deployments and their management.
 62 | We believe Tendermint achieves a superior design and implementation compared to previous approaches, 
 63 | including that of the classical academic literature \cite{dls,pbft,raft} as well as Bitcoin \cite{bitcoin} and its derivatives \cite{ethereum,sidechains,peercoin}
 64 | by combining the right elements of each to achieve a practical balance of security, performance, and simplicity.
 65 | 
 66 | The Tendermint platform is available open source at \url{https://github.com/tendermint/tendermint}, 
 67 | and in associated repositories at \url{https://github.com/tendermint}.
 68 | The core is licensed GPLv3 and most of the libraries are Apache 2.0.
 69 | 
 70 | \section{Contributions}
 71 | 
 72 | The primary contributions of this thesis can be found in Chapters \ref{ch:tendermint} and \ref{ch:performance}, 
 73 | and in the many commits on \url{https://github.com/tendermint/tendermint} and related repositories.
 74 | Of particular significance are:
 75 | \begin{itemize}  
 76 |     \item A formal specification of Tendermint in the $\pi$-calculus and 
 77 | an informal proof of correctness of its safety and accountability (Chapter \ref{ch:tendermint}).
 78 | 
 79 |     \item A refactor of the core consensus state machine in the spirit of the formal specification to be more robust, deterministic, and understandable (\url{https://github.com/tendermint/tendermint/}).
 80 | 
 81 |     \item Evaluation of the software's performance and characteristics in normal, faulty, and malicious conditions on large deployments (Chapter \ref{ch:performance}). 
 82 | 
 83 |     \item Countless additional tests, leading to innumerable bug fixes and performance improvements (\url{https://github.com/tendermint/tendermint/}).
 84 | \end{itemize}
 85 | 
 86 | Chapters \ref{ch:subprotocols}-\ref{ch:implementation} describe the many other components of a complete system.
 87 | Some of these, like the subprotocols used to gossip data (Chapter \ref{ch:subprotocols}) and the various low-level software libraries (Chapter \ref{ch:implementation}),
 88 | were designed and implemented by Jae Kwon before being joined by the author. 
 89 | The rest was designed and implemented with regular consultation and inspiration from the author.
 90 | For a more direct accounting of contributions, please see the Github repositories.
 91 | 
 92 | Though not recounted in this thesis, the author made various contributions during this time to the Ethereum Project%
 93 | \footnote{Most notably tests, bug-fixes, and performance improvements in the Go implementation at \url{https://github.com/ethereum/go-ethereum}},
 94 | an alternative to Bitcoin which generalizes the use of the technology from currency to arbitrary computations.
 95 | In addition, the author has been invited on numerous occasions to speak privately and publicly about both Ethereum and Tendermint,
 96 | including as an instructor%
 97 | \footnote{Private instructor to a major financial institution, 2015}%
 98 | \footnote{Blockchain University, 2015, \url{http://blockchainu.co}}, 
 99 |  and a presenter%
100 | \footnote{Cryptoeconomicon, 2015}%
101 | \footnote{International Workshop on Technical Computing for Machine Learning and Mathematical Engineering, 2014, \url{http://www.esat.kuleuven.be/stadius/tcmm2014/}}%
102 | \footnote{The Blockchain Workshops, 2016 \url{http://nyc.blockchainworkshops.org/}}.
103 | 
104 | A final note on thesis structure: Despite being placed at the end, Chapter \ref{ch:related} provides significant context 
105 | and may enhance understanding of the thesis if read before Chapter \ref{ch:tendermint}. However, in order to not delay the reader's introduction to Tendermint,
106 | it is placed at the end.
107 | 


--------------------------------------------------------------------------------
/chapters/performance.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Performance and Fault Tolerance}
  2 | \label{ch:performance}
  3 | 
  4 | Tendermint is designed as a Byzantine fault tolerant state-machine replication algorithm.
  5 | It guarantees safety so long as less than a third of validators are Byzantine, 
  6 | and guarantees liveness similarly, so long as network messages are eventually delivered,
  7 | with weak assumptions about network synchrony for gossiping proposals.
  8 | In this section, we evaluate Tendermint's fault tolerance empirically by injecting 
  9 | crash faults and Byzantine faults.
 10 | The goal is to show that the implementation of Tendermint consensus does not compromise safety in the event of such failures,
 11 | that it suffers minimum performance impact, and that it is quick to recover.
 12 | 
 13 | Performance of the Tendermint algorithm can be evaluated in a few key ways.
 14 | The most obvious measures are the block commit time, which is a measure of finalization latency, 
 15 | and transaction throughput, which measures the network's capacity.
 16 | We collect measurements for each on networks with validators distributed over the globe, 
 17 | where the number of validators ranges, in multiples of 2, from 2 to 64.
 18 | 
 19 | \section{Overview}
 20 | 
 21 | The experiments in this chapter can be reproduced using the repository at \url{https://github.com/tendermint/network\_testing}.
 22 | All experiments take place in docker containers 
 23 | running on \emph{Amazon EC2} instances of type \emph{t2.medium} or \emph{c3.8xlarge}.
 24 | The \emph{t2.medium} has 2 vCPU and 4 GB of RAM,
 25 | and the \emph{c3.8xlarge}, has 32 vCPUs and 60 GB of RAM.
 26 | Instances are distributed across seven datacenters, spanning five continents.
 27 | A second docker container, responsible for generating transactions, is run on each instance.
 28 | Transactions are 250 bytes in size (a reasonable size for including a few 32 or 64 byte hashes and signatures),
 29 | and were constructed to be debuggable, to be quick to generate, and to contain some stochasticity.
 30 | Thus, the leading bytes are Big-Endian encoded integers 
 31 | representing transaction number and validator index for that instance,
 32 | the trailing 16 bytes are randomly drawn from the operating system, 
 33 | and the intermediate bytes are just zeros.
 34 | 
 35 | A network monitoring tool is used to maintain active websocket connections 
 36 | to each validator's Tendermint RPC server,
 37 | and uses its local time when it receives a new committed block 
 38 | for the first time as the official commit time for that block.
 39 | Experiments were first run without the monitor by copying 
 40 | all data from the validators for analysis and using the local time
 41 | of the 2/3th validator committing a block as the commit time. 
 42 | Using the monitor is much faster, amenable to online monitoring, 
 43 | and was found to not impact the results 
 44 | so long as only block header information (and not the whole block) was passed over the websockets.
 45 | 
 46 | Docker containers on remote machines are easily managed using the \emph{docker-machine} tool, 
 47 | and the network\_testing repository provides some tools 
 48 | which take advantage of Go's concurrency features
 49 | to perform actions on docker containers on many remote machines at once.
 50 | 
 51 | Each validator connects directly to each other to avoid confounding effects of network topology.
 52 | 
 53 | For experiments involving crash faults or Byzantine behaviour, 
 54 | the number of faulty nodes is given by $N_{fault} = \lfloor(N-1)/3\rfloor$,
 55 | where $N$ is the total number of validators.
 56 | 
 57 | \section{Throughput and Latency}
 58 | 
 59 | This section describes experiments which measure the raw performance 
 60 | of Tendermint in non-adversarial conditions,
 61 | where all nodes are online and synced and no accommodations are made for asynchrony.
 62 | That is, an artificially high TimeoutPropose is used (10 seconds), 
 63 | and all other timeout parameters are set to 1 millisecond.
 64 | Additionally, all mempool activity is disabled 
 65 | (no gossiping of transactions or rechecking them after commits),
 66 | and an in-process nil application is used to bypass TMSP.
 67 | This serves as a control scenario for evaluating the performance drop in the face of faults and/or asynchrony.
 68 | 
 69 | Experiments are run on validator set sizes doubling in size from two to 64, and on block sizes doubling from 128 to 32768.
 70 | Transactions are preloaded on each validator. Each experiment is run for 16 blocks. 
 71 | 
 72 | \begin{figure}[]
 73 | 	\centering
 74 | 	\begin{subfigure}{0.8 \textwidth}
 75 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/latency-throughput.pdf}
 76 | 	\end{subfigure}
 77 | 
 78 | 	\begin{subfigure}{0.8 \textwidth}
 79 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/throughput-blocksize.pdf}
 80 | 	\end{subfigure}
 81 | 	\centering
 82 | 	\caption[Latency-Throughput trade-off in non-faulty global network]{Latency-throughput trade-off.
 83 | Larger blocks incur diminishing 
 84 | returns in transaction throughput, with an ultimate capacity at around 10,000 txs/s}
 85 | 	\label{fig:exp:throughput}
 86 | \end{figure}
 87 | 
 88 | As can be seen in Figure \ref{fig:exp:throughput},
 89 | Tendermint easily handles thousands of transactions per second with around one second block latency,
 90 | though there appears to be a capacity limit at around ten thousand transactions per second.
 91 | A block of 16384 transactions is about 4 MB in size, and analysis of network bandwidth shows each connection
 92 | easily reaching upwards of 20MB/s, though analysis of the logs shows that at high block sizes, 
 93 | validators can spend upwards of two seconds waiting for block parts.
 94 | Additionally, experiments in single data centers, as shown in Figure \ref{fig:exp:throughput:single},
 95 | demonstrate that much higher throughputs are possible,
 96 | while experiments on much larger machines exhibit more consistent performance,
 97 | relieving the capacity limit, as shown in Figure \ref{fig:exp:throughput:large}.
 98 | We leave further investigations of this capacity limit to future work.
 99 | 
100 | \begin{figure}[]
101 | 	\centering
102 | 	\begin{subfigure}{0.8 \textwidth}
103 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/single_datacenter/latency-throughput.pdf}
104 | 		\centering
105 | 	\end{subfigure}
106 | 
107 | 	\begin{subfigure}{0.8 \textwidth}
108 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/single_datacenter/throughput-blocksize.pdf}
109 | 	\end{subfigure}
110 | 	\caption[Latency-throughput trade-off in non-faulty local network]{Single datacenter.
111 | When messages don't need to cross the public Internet, Tendermint is capable of tens of thousands of transactions per second.}
112 | 	\label{fig:exp:throughput:single}
113 | \end{figure}
114 | 
115 | 
116 | 
117 | \begin{figure}[]
118 | 	\centering
119 | 	\begin{subfigure}{0.8 \textwidth}
120 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/large_instances/latency-throughput.pdf}
121 | 		\centering
122 | 	\end{subfigure}
123 | 
124 | 	\begin{subfigure}{0.8 \textwidth}
125 | 		\includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/large_instances/throughput-blocksize.pdf}
126 | 	\end{subfigure}
127 | 	\centering
128 | 	\caption[Latency-Throughput trade-off in non-faulty global network of large machines]{Large machines.
129 | With 32 vCPU and 60 GB of RAM, transaction throughput increases linearly with block-size, 
130 | relieving the capacity limits found on smaller machines.}
131 | 	\label{fig:exp:throughput:large}
132 | \end{figure}
133 | 
134 | In the experiments that follow, various forms of fault are injected
135 | and latency statistics presented.
136 | Each experiments was run for validator set sizes doubling from 4 to 32, 
137 | for varying values of TimeoutPropose, and with a block size of 2048 transactions.
138 | 
139 | \section{Crash Failures}
140 | 
141 | To evaluate the performance of a network subject to crash failures, 
142 | every three seconds $N_{fault}$ validators were randomly selected,
143 | stopped, and restarted three seconds later.
144 | 
145 | The results in Table \ref{fig:exp:crash_failure} demonstrate that 
146 | performance under this crash failure scenario drops by about 
147 | $50\%$, and that larger TimeoutPropose values help mediate latencies. 
148 | While the average latency increases to about two seconds,
149 | the median is closer to one second, and latencies may run as high as ten or twenty seconds,
150 | though in one case it was as high as seventy seconds.
151 | It is likely that modifying TimeoutPropose to be slightly non-deterministic may
152 | ease the probability of such extreme latencies.
153 | 
154 | \begin{table}
155 | 	\input{figures/throughput/crash_tables}
156 | 	\caption[Latency statistics under crash faults]{Crash-fault latency statistics. Every three seconds, a random selection of$N_{fault}$ validators were crashed, and restarted three seconds later. This crash-restart procedure continued for 200 blocks. Each table reports the minimum, maximum, average, median, and $95^{th}$ percentile of the block latencies, for varying values of the TimeoutPropose parameter.}
157 | 	\label{fig:exp:crash_failure}
158 | \end{table}
159 | 
160 | \section{Random Network Delay}
161 | 
162 | Another form of fault, which may be attributed either to Byzantine behaviour or to network asynchrony,
163 | is to inject random delays into every read and write to a network connection.
164 | In this experiment, before every read and write on every network connection,
165 | $N_{fault}$ of the validators slept for $X$ milliseconds, 
166 | where $X$ was drawn uniformly on $(0, 3000)$.
167 | As can be seen in Table \ref{fig:exp:delay}, 
168 | latencies are similar to the crash failure scenario, 
169 | though increasing the TimeoutPropose has the opposite effect.
170 | Since not all validators were faulty, 
171 | small values of TimeoutPropose allow faulty validators to be skipped quickly. 
172 | If all validators were subject to the network delays, 
173 | larger TimeoutPropose values would be expected to reduce latency
174 | since there would be no non-faulty validators to skip to,
175 | and more time would be provided to receive delayed messages.
176 | 
177 | \begin{table}[]
178 | 	\input{figures/throughput/delay_tables}
179 | 	\caption[Latency statistics under randomized delays]{Random delay latency statistics. $N_{fault}$ validators were set to inject a random delay
180 | before every read and write, where the delay time was chosen uniformly on $(0, 3000)$ milliseconds.}
181 | 	\label{fig:exp:delay}
182 | \end{table}
183 | 
184 | 
185 | \section{Byzantine Failures}
186 | 
187 | A more explicit Byzantine failure can be injected through the following modifications
188 | to the state machine:
189 | 
190 | \begin{itemize}
191 | \item{Conflicting proposals: during its time to propose, a Byzantine validator signs two conflicting proposals and broadcasts each, along with a pre-vote and pre-commit, to separate halves of its connected peers.} 
192 | \item{No nil votes: a Byzantine validator never signs a nil-vote.}
193 | \item{Sign every proposal: a Byzantine validator submits a pre-vote and a pre-commit for every proposal it sees, as soon as it sees it.}
194 | \end{itemize}
195 | 
196 | Taken together, these behaviours explicitly violate the double signing and locking rules. 
197 | Note, however, that the behaviour is dominated by the broadcast of conflicting proposals,
198 | and the eventual committing of one of them.
199 | More complex arrangements of Byzantine strategies are left for future work.
200 |  
201 | Despite the injected Byzantine faults, 
202 | which would cause many systems to fail completely and immediately,
203 | Tendermint maintains respectable latencies, as can be seen from Table \ref{fig:exp:byz_failure}.
204 | Since these faults have little to do with asynchrony,
205 | there is no real discernible effect from TimeoutPropose.
206 | The performance also falls off with larger validator sets,
207 | which may be the result of a naive algorithm for handling Byzantine votes.
208 | 
209 | \begin{table}[]
210 | 	\input{figures/throughput/byz_tables}
211 | 	\caption[Latency statistics under Byzantine faults]{Byzantine-fault latency statistics.
212 | Byzantine validators propose conflicting blocks and vote on any proposal as soon as they see it.
213 | Each table reports the minimum, maximum, average, median, and $95^{th}$ percentile of the block latencies, for varying values of the TimeoutPropose parameter.}
214 | 	\label{fig:exp:byz_failure}
215 | \end{table}
216 | 
217 | \ifx
218 | \section{A real application: ErisDB}
219 | 
220 | The experiments presented so far have been artificial to the extent that transactions incur no processing logic.
221 | This was done deliberately to benchmark the core consensus engine. 
222 | To get a handle on a real application, we present throughput and latency results for ErisDB, 
223 | a blockchain application developed primarily by the author at Eris Industries, in collaboration with Jae Kwon.
224 | ErisDB provides a rich set of features, including a native currency, the Ethereum Virtual Machine (EVM).
225 | a native name registry, and a rich permissioning system.
226 | Transactions must be digitally signed using ED25519 signatures to be valid, and all state queries and updates are done on a merkle IAVL tree.
227 | 
228 | For this experiment, a simple contract with two methods, get and set, is deployed to the virtual machine.
229 | The contract is written in solidity, a high-level, javascript-like language developed by Ethereum which compiles down to EVM byte code.
230 | The application state is preloaded with 1000 accounts, and transactions are signed by private keys drawn uniformly from those accounts.
231 | Keys and values for the get and set methods are fixed at 32-bytes each, to reflect the native architecture of the EVM \cite{ethereum_yellow_paper}.
232 | Transactions are generated for a read/write load of 10/90 (i.e. 90\% of transaction call the set method).
233 | 
234 | \fi
235 | 
236 | \section{Related Work}
237 | 
238 | The throughput experiments in this chapter were modeled after those in \cite{honeybadger},
239 | which benchmarks the performance of a PBFT implementation 
240 | and a new randomized BFT protocol called HoneyBadgerBFT.
241 | In their results, PBFT achieves over 15,000 transactions per second on four nodes,
242 | but decays exponentially as the number of nodes increases, 
243 | while HoneyBadgerBFT attains roughly even performance
244 | of between 10,000 and 15,000 transactions per second.
245 | Block latencies in HoneyBadgerBFT, however, are much higher, 
246 | closer to 10 seconds for validator sets of size 8, 16, and 32, and even more for larger ones.
247 | 
248 | A well known tool for studying consensus implementations is Jepsen \cite{jepsen},
249 | which is used to test the consistency guarantees of databases by simulating 
250 | many forms of network partition. 
251 | Testing Tendermint with Jepsen remains an exciting area for future work.
252 | 
253 | The author is not aware of any throughput experiments in the face of persistent Byzantine failures,
254 | like those presented here.
255 | 
256 | \section{Conclusion}
257 | 
258 | The implementation of Tendermint written by the author and Jae Kwon easily achieves 
259 | thousands of transactions per second on up to 64 nodes on machines distributed around the globe, 
260 | with latencies mostly in the one to two second range.
261 | This is highly competitive with other solutions, and especially with the current state of blockchains,
262 | with Bitcoin, for instance, capping out at around 7 transactions per second.
263 | Furthermore, our implementation is shown to be robust to both crash faults, message delays,
264 | and deliberate Byzantine faults,
265 | being able to maintain over a thousand transactions per second in each scenario.
266 | 
267 | 
268 | 


--------------------------------------------------------------------------------
/chapters/subprotocols.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Tendermint Subprotocols}
  2 | \label{ch:subprotocols}
  3 | 
  4 | The presentation of Tendermint consensus in the previous chapter left out a number of details
  5 | regarding the gossip protocols used to disseminate blocks, votes, transactions, 
  6 | and other peer information. 
  7 | This was done in order to focus in on the consensus protocol itself, 
  8 | without distraction from the hydra of practical software engineering.
  9 | This chapter describes one particular approach to filling in these details,
 10 | by implementing components as relatively independent reactors that are multiplexed over each peer connection.
 11 | 
 12 | \section{P2P-Networking}
 13 | 
 14 | On startup, each Tendermint node receives an initial list of peers to dial.
 15 | For each peer, a node maintains a persistent TCP connection over which multiple subprotocols are multiplexed in a rate-limited fashion.
 16 | Messages are serialized into a compact binary representation to be sent on the wire, and 
 17 | connections are encrypted via an authenticated encryption protocol \cite{authenticated_encryption}.
 18 | 
 19 | Each remaining section of this chapter describes a separate reactor that is multiplexed over each peer connection.
 20 | An additional peer exchange reactor can be run which allows nodes to request other peer addresses from each other and keep track of peers they have connected to before,
 21 | in order to stay connected to some minimum number of other peers.
 22 | 
 23 | \section{Consensus Gossip}
 24 | 
 25 | The consensus reactor wraps the consensus state machine, 
 26 | and ensures each node broadcasts to all peers its current state every time it changes.
 27 | In this way, each node keeps track of the consensus state of all its peers, 
 28 | allowing it to optimize the gossiping of messages to only send peers information they need at the very moment,
 29 | and which they don't already have.
 30 | For each peer, a node maintains two routines which continuously check for new information to send the peer,
 31 | namely, proposals and votes. 
 32 | Information should be gossiped in a ``rarest first'' manner in order to maximize 
 33 | gossip efficiency and minimize the chance that some information becomes unavailable \cite{rarest_first}
 34 | 
 35 | 
 36 | \subsection{Block Data}
 37 | In Chapter \ref{ch:tendermint}, it was assumed that proposal messages include the block.
 38 | However, since blocks emerge from a single source and can be quite large, 
 39 | this puts undue pressure on the block proposer to upload the data to all other nodes;
 40 | blocks can be disseminated much more quickly if they are split into parts and gossiped.
 41 | 
 42 | A common approach to securely gossiping data, as popularized by various p2p protocols \cite{bittorrent,libswift}, 
 43 | is to use a Merkle tree \cite{merkle1987digital},
 44 | allowing each piece of the data to be accompanied by a short proof (logarithmic in the size of the data)
 45 | that the piece is a part of the whole. 
 46 | To use this approach, 
 47 | blocks are serialized and split into chunks of an appropriate size 
 48 | for the expected block size and number of validators,
 49 | and chunks are hashed into a Merkle tree. 
 50 | The signed proposal, instead of including the entire block, includes just the Merkle root hash,
 51 | allowing the network to co-operate in gossiping the chunks.
 52 | A node informs its peers every time it receives a chunk, 
 53 | in order to minimize the bandwidth wasted by transmitting the same chunk to a node more than once.
 54 | 
 55 | Once all the chunks are received, the block is deserialized and validated to ensure it refers correctly to the previous 
 56 | block, and that its various checksums, implemented as Merkle trees, are correct. 
 57 | While it was previously assumed that a validator does not pre-vote until the proposal (including the block) is received,
 58 | some performance benefit may be obtained by allowing validators to pre-vote after receiving a proposal, 
 59 | but before receiving the full block. This would imply that it is okay to pre-vote for what turns out to be an invalid block.
 60 | However, pre-committing for an invalid block must always be considered Byzantine.
 61 | 
 62 | Peers that are catching up (i.e.~are on an earlier height) are sent chunks for the height they are on,
 63 | and progress one block at a time.
 64 | 
 65 | \subsection{Votes}
 66 | 
 67 | At each step in the consensus state machine, after the proposal, a node is waiting for votes (or a local timeout) to progress.
 68 | If a peer has just entered a new height, it is sent pre-commits from the previous block,
 69 | so it may include them in the next blocks \emph{LastCommit} if it's a proposer.
 70 | If a peer has pre-voted but has yet to pre-commit, or has pre-committed, but has yet to go to the next round,
 71 | it is sent pre-votes or pre-commits, respectively.
 72 | If a peer is catching up, it is sent the pre-commits for the committed block at its current height.
 73 | 
 74 | \section{Mempool}
 75 | 
 76 | Chapter \ref{ch:tendermint} made little mention of transactions, 
 77 | as Tendermint operates on blocks of transactions at a time, and has no concern for individual transactions,
 78 | so long as their checksum in the block is correct.
 79 | 
 80 | Transactions are managed independently in an in-memory cache, 
 81 | which, following Bitcoin, has come to be known as the \emph{mempool}.
 82 | Transactions are validated by the application logic when they are received and, if valid, 
 83 | added to the mempool and gossiped using an ordered multicast algorithm.
 84 | A node maintains a routine for each peer which ensures that transactions 
 85 | in the mempool are sent to the peer in the same order in which they were processed by the node.
 86 | 
 87 | Proposers reap transactions from the ordered list in the mempool for new block proposals.
 88 | Once a block is committed, all transactions included in the block are removed from the mempool,
 89 | and the remaining transactions are re-validated by the application logic,
 90 | as their validity may have changed on account of other transactions being committed, 
 91 | which the node may not have had in its mempool.
 92 | 
 93 | \section{Syncing the Blockchain}
 94 | 
 95 | The consensus reactor provides a relatively slow means of syncing with the latest state of the blockchain,
 96 | as it was designed for real-time consensus,
 97 | meaning peers wait to receive all information to commit a single block before worrying about the next block.
 98 | To accommodate peers that may be more than just a few blocks behind, 
 99 | an additional reactor, the blockchain reactor, allows peers to download many blocks in parallel,
100 | enabling a peer to sync hundreds of times faster than via the consensus reactor.
101 | 
102 | When a node connects to a new peer, the peer sends its current height.
103 | The node will request blocks, in order, beginning with its current height,
104 | from all peers that self-reported higher heights, and download the blocks concurrently, adding them to the block pool.
105 | Another routine continuously attempts to remove blocks from the pool and add them to the blockchain by validating and executing them, 
106 | two blocks at a time, against the latest state of the blockchain.
107 | Blocks must be validated two blocks at a time because the commit for one block is included as the LastCommit data in the next one.
108 | 
109 | The node continuously queries its peers for their current height, 
110 | and continues to concurrently request blocks until it has caught up to the highest height among its peers, 
111 | at which point it stops making requests for peer heights and starts the consensus reactor.
112 | 
113 | \section{Conclusion}
114 | 
115 | A number of subprotocols are required for a practical implementation of the Tendermint blockchain.
116 | These include the gossipping of consensus data (votes and proposals), of block data, and of transactions,
117 | and some means for new peers to quickly catch up with the latest state of the blockchain.
118 | 


--------------------------------------------------------------------------------
/chapters/theory.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Theory}
  2 | 
  3 | This chapter introduces some theoretical formalisms for describing consensus networks.
  4 | First and foremost, we introduce a formal definition of trust on the basis of mutual information,
  5 | and show how the use of cryptography can increase the amount of trust in a system, 
  6 | enabling higher-level forms of communication.
  7 | Second, we formalize the consensus and atomic broadcast problems using process calculi 
  8 | and define a blockchain as a generic means for transforming consenus into atomic broadcast.
  9 | Third, we introduce Byzantine Failure Detectors for the detection of malicious processes,
 10 | and show how they can be used in a consensus protocol to achieve accountability.
 11 | Fourth, we consider probabilistic solutions to consensus, formalizing the common coin and proof-of-work approaches,
 12 | and show how a generalization of PoW in asynchronous conditions results in a protocol like Casper.
 13 | Finally, we describe how to formally introduce economics into the model, and discuss the resulting problem space.
 14 | 
 15 | \section{Trust and Information}
 16 | 
 17 | It is well known that \emph{trust}, defined as ...,
 18 | is a crucial element to maintaining productive socioeconomic systems \cite{trust}.
 19 | Intuitively, trust reduces uncertainty about the world, 
 20 | and enables higher-order forms of organization to flourish.
 21 | 
 22 | - communications channel, capacity as MI
 23 | - crypto primitives are axes for high entropy systems with high MI 'paths' for 'correct' processes
 24 | 
 25 | - crypto systems are stronger than purely info-theoretic ones \cite{ben1988completeness}
 26 | 
 27 | 
 28 | 
 29 | Suppose we have agents Alice and Bob, represented by random variables $A$ and $B$, 
 30 | operating in an uncertain environment, $X$.
 31 | Each agent mantains a representation of the world that defines a distribution over possible events 
 32 | in the universe, which for each agent consists of the other agent and the environment.
 33 | Let Alice's distribution be denoted $p_A(B, X) = p_A(B | X)p_A(X) $. 
 34 | The distribution has some entropy, $H[p_A]$. 
 35 | If Alice trusts Bob, we expect that the entropy, in particular that related to Bob, should decrease. 
 36 | 
 37 | 
 38 | We then define ``$A$ trusts $B$'' as an
 39 | 
 40 | 
 41 | Formally, we can define trust as a reduction in entropy 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | 
 47 | \emph{Trust} as expected mutual information.
 48 | \emph{Correct-trust} as mutual information where you can see when it fails (crypto).
 49 | Show that correct-trust increases possible trust.
 50 | 
 51 | Digital signatures.
 52 | Merkle trees, erasure codes for broadcast.
 53 | Hash-chain links to simplify proposer logic.
 54 | Reduce complexity of network protocols by moving elements from data to authenticators.
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | \section{Consensus and Atomic Broadcast}
 62 | The problem has been pitched as consensus or atomic broadcast (ABC).
 63 | Consensus commits a value; ABC orders transactions.
 64 | Can show they are the same \cite{chandra1996unreliable}
 65 | We show they are the same with generalized process calculus forms of each and a bi-simualtion between them.
 66 | Atomic broadcast is the more natural form for real systems.
 67 | 
 68 | Note the pi calculus doesn't allow a strictly composable encoding of broadcast \cite{ene1999expressiveness},
 69 | but we don't need it, since in practice each node has a network stack/kernel that manages broadcasts.
 70 | Further, we really do want point-to-point, rather than broadcast,
 71 | because we want connections to be encrypted on a per-connection basis,
 72 | though group-encrypted broadcast primitives would be an interesting pursuit.
 73 | 
 74 | Reliable broadcast (RBC) is a broadcast primitive satisfying
 75 | 
 76 | \begin{itemize}
 77 | \item validity - if a correct process broadcasts m, it eventually delivers m
 78 | \item agreement - if a correct process delivers m, all correct processes eventually deliver m
 79 | \item integrity - m is only delivered once, and only if broadcast by its sender
 80 | \end{itemize}
 81 | 
 82 | We model RBC as a pi-calculus process, 
 83 | $rbc(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i rb_i(r_i, d_i, \hat{x})$,
 84 | where $rbc_i$ is the instance of RBC running on node $i$, 
 85 | $\hat{r}$ are input channels, with one for each node, 
 86 | on which new requests from clients can be received, 
 87 | $d_i$ are $delivery$ channels, on which a node outputs RBC-delivered values,
 88 | and $\hat{x}$ are some shared variables.
 89 | 
 90 | We can state the properties in a temporal henessy-milner logic with fixed-point operators:
 91 | \begin{itemize}
 92 | \item validity - $ \forall m$, and correct $i$, $rbc |= [ r_i?(m) ] . \nu Z . ( d_i!(m)T \vee [*]Z) $
 93 | \item agreement - $ \forall m$, and correct $i$, $rbc |= [ d_i!(m) ] . \wedge_{j \neq i} ( \nu Z . (d_j!(m)T \vee [*]Z) $
 94 | \item integrity - $ \forall m$, and correct $i$, $rbc |= [ d_i!(m) ] . [ * ] . < d_i!(m) > ff $, and only if broadcast by its sender ...
 95 | \end{itemize}
 96 | 
 97 | Let us now model atomic broadcast ABC after RBC, as 
 98 | $abc(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i abc_i(r_i, d_i, \hat{x})$,
 99 | with the same properties as $rbc$, but with the addition of \emph{total order},
100 | \begin{itemize}
101 | \item total order - if correct processes p and q deliver m and m', then p delivers m before m' iff q delivers m before m'
102 | \end{itemize}
103 | 
104 | 
105 | That is, ABC is identical to RBC, with the added constraint that reads off of any $d_i$ 
106 | must return the same values in the same order.
107 | 
108 | We can model consensus similarly, as 
109 | $cns(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i cns_i(r_i, d_i, \hat{x})$,
110 | 
111 | with the following properties
112 | 
113 | \begin{itemize}
114 | \item termination - every correct process eventually decides
115 | \item integrity - every correct process decides at most once
116 | \item agreement - if one correct process decides $v1$ and another decides $v2$, then $v1=v2$
117 | \item validity - if a correct process decides $v$, at least one process proposed $v$
118 | \end{itemize}
119 | 
120 | Note that the forms of consensus and ABC are identical (save some function names),
121 | with the major difference in the properties relating to the fact that consenus
122 | manages only one value, while atomic broadcast may handle many.
123 | 
124 | To show an equivalence between ABC and consensus,
125 | we create a process context for each,
126 | yielding $ C_{CNS}[ abc_i ] $ and $ C_{ABC}[ cns_i ] $ 
127 | where we intend to show that 
128 | $ C_{CNS}[ abc_i ] \sim cns_i $ and $ C_{ABC}[ cns_i ] \sim abc_i $ for
129 | some weak bisimulation $\sim$.
130 | 
131 | Intuitevely, consensus can be derrived from ABC by deciding the first value fired on $d_i$,
132 | while ABC can be derrived from consensus by running the consensus protocol multiple times,
133 | once for each value, or batch of values, to be atomically broadcast.
134 | Thus $ C_{CNS}[ ] $ is a context which restricts $d_i$, such that it is only read from once,
135 | while $ C_{ABC}[ ] $ is a context which manages multiple instances of consensus, delivering on $d_i$ many times.
136 | 
137 | 
138 | \section{Byzantine Failure Detectors}
139 | Failure detectors (FDs), an abstraction of timeouts,
140 | were introduced and used to solve consensus \cite{chandra1996unreliable}.
141 | FDs enable processes to keep a list of other processes they suspect to have crashed.
142 | Though unreliable, in that they may be suspicious of correct processes,
143 | FDs can be constrained by abstract asymptotic properties ensuring that 
144 | eventually, crashed processes are suspect, and correct processes are not.
145 | Notably, the formalism of FDs enable refined investigation of consensus algorithms.
146 | 
147 | Here, we extend the model the Byzantine case, yielding Byzantine Failure Detectors,
148 | and show how they can be used as building blocks for the construction of BFT algorithms.
149 | 
150 | 
151 | Notes
152 | - "correct" behaviour vs arbitrary behaviour
153 | - nodes keep state of other nodes in order to know what is "allowed" and detect variation
154 | - depending on the protocol, there may be a tradeoff between detection and asynchrony
155 | 	- non-byz may be suspected as byz cuz of asynchrony
156 | 	- conjecture: this only possible in non-strongly-consistent protocols
157 | - tendermint has perfect byz detection since no tradeoff against asynchrony!
158 | - what about pbft? need to review view change
159 | - pi calc allows us to describe many of the BFDs implicitly by whether or not
160 | we even listen for the message (eg. proposing when its not your turn)
161 | - economics as a modulator for moving to next proposer
162 | 
163 | 
164 | 
165 | 
166 | 
167 | Byzantine Failure Detectors (BFDs) are a different breed.
168 | While Byzantine traditionally means "arbitrary", 
169 | it is in practice trivial to enforce simple rules which restrict
170 | the set of messages which might affect the state.
171 | Furthermore, Byzantine behaviour wherein a process does not send a message when it should have 
172 | is indistinguishable from asynchrony.
173 | Thus, BFDs must only be concerned with a particular class of Byzantine behaviour,
174 | namely, that which is \emph{malicious}.
175 | Unlike FDs, BFDs are not unreliable - they can not mistakenly suspect other processes
176 | of being Byzantine, as triggering the BFD requires cryptographic proof.
177 | 
178 | There are two forms of malicious behaviour, which we call divergent-broadcast (DBC)
179 | and unjustified-broadcast (UBC). In DBC, a process sends conflicting messages to peers.
180 | Detecting DBC simply requires receiving the conflicting messages.
181 | In UBC, a process sends a message which claims something about its internal state which is untrue.
182 | To detect UBC requires the use of functions from the particular consensus protocol itself,
183 | which must define a set of justification rules.
184 | Either form of malicious behaviour is sufficient to violate safety in a non-Byzantine protocol.
185 | 
186 | We now define a BFD as satisfying the following property:
187 | 
188 | \begin{itemize}
189 | \item{Eventually, every DBC and UBC is detected by at least one correct process}
190 | \end{itemize}
191 | 
192 | In practice, using a BFD requires a correct process to keep a list of all messages it has delivered,
193 | and to reliably broadcast those messages to all other correct processes.
194 | Further, the BFD must be informed by the rules of the consensus protocol as to what constitutes a UBC.
195 | 
196 | Note that something which is a UBC in one protocol may not be in another.
197 | We are interested in this boundary, particularly the weakest UBCs necessary for consensus.
198 | Introduction of economics can weaken the UBCs necessary
199 | 	- eg PoW: mining an alternate chain doesn't get detected and punished, but economics yeilds an op cost
200 | 	- eg Casper: changing bets might incur small economic cost for larger reward of getting consensus sooner
201 | 
202 | 
203 | Many BFT protocols are tolerant of Byzantine faults, but don't emphasize detection.
204 | Thus, while a BFD is not necessary for Byzantine consensus, we show that, for some 
205 | forms of the justification function, it is sufficient,
206 | and that their use elucidates deeper structure in the BFT problem.
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | When no processes are malicious,
214 | every message from a process can be trusted as an accurate reflection of that process' state.
215 | 
216 | 
217 | 
218 | FDs can be formalized with the pi-calculus, 
219 | and resulting consensus protocols subject to a matrix analysis \cite{nestmann2003modeling}.
220 | We'd like a similar analysis, with a more general notion of justification.
221 | Most previous byz algos dont focus on detection, just tolerance.
222 | 
223 | Further, we'd like to show that justifications can be removed from the real-time
224 | protocol and moved to a post-failure recovery mode protocol, under some weak network assumptions,
225 | without compromising accountability.
226 | 
227 | Start by defining messages as consisting of three parts: indices, authenticators, data.
228 | Indices are things like height number, round number, message type number, etc.
229 | Authenticators are signatures and hashes.
230 | Cant do BFT without authenticators (tho wtf about some of those papers ...)
231 | Byzantine msgs are those with the same indices and authenticators, but different data.
232 | Note this assumes deterministic authenticators, and implies that detection requires gossip.
233 | We also want byzantine msgs to be those that are "unjustified".
234 | Introduce "justification" rules which map $(AUTH, DATA)$ to $\{True, False\}$.
235 | 
236 | Also note how moving data/indices into auth using hashes can simplify protocols
237 | (eg. the way linking to the previous block avoids subtle leader crash/recover scenarios).
238 | 
239 | \section{Probabilistic Solutions}
240 | Consensus can be solved with FDs or with randomness.
241 | Common coin gives probabilistic liveness, where randomness is over what value sent.
242 | Bitcoin gives probabilistic safety, where randomness is over when value sent.
243 | There seems to be a duality here, common coin being like $\wedge$ and bitcoin like $\vee$.
244 | How to reflect in stochastic-pi calc logic.
245 | 
246 | Bitcoin makes synchrony assumption that network latency is much less than block time,
247 | allowing it to give strong (economic/probabilistic) serializability guarantees.
248 | GHOST weakens the synchrony assumption by using additional network information to inform fork choice.
249 | Is the asynchronous generalization of GHOST something like casper?
250 | How does the move from PoW to PoS complement that from synchrony to asynchrony?
251 | 
252 | \section{Economics}
253 | Suppose the consensus system is probabilistic, ala some stochastic process calculus.
254 | Economics are a way to parameterize the Comm rates of the calculus,
255 | such that the param values may change, subject to some constraints 
256 | (eg. the avg value over time is constant, etc.).
257 | The point of the system is to be valuable,
258 | and have this value be contributed back to the processes as wealth.
259 | Economics makes the system reflexive, in the sense that,
260 | given finite critical resources and a driving energy source,
261 | the system must increase its efficiency (ie. innovate, build wealth, etc),
262 | to maintain liveness during growth.
263 | Integration with food systems, be an organism, etc.
264 | 
265 | Note economics can also act as a weak/parametrized form of synchrony!
266 | 
267 | \section{Residence Times}
268 | Drawing inspiration from ecology and biophysics, 
269 | where its been suggested that residence time of energy in a non-equilibrium system is a 
270 | measure of its organizational complexity.
271 | 
272 | Consider a network of processes in such a light.
273 | Energy input is receipt of a msg. 
274 | Causes a tree of execution. 
275 | Residence time is (eg.) time until all branches of the tree either communicate with other trees or halt.
276 | Here, txs are the inputs (ie. they should pay fees!).
277 | Another energy input is eg. POW - can be measured as a packet of energy arriving as a new block.
278 | Without inflation, packets arrive and are immediately released as heat, minus what is paid in fees,
279 | which hang around as a balance and prolong the residence time.
280 | The inflation increases the residence time, but is clearly unsustainable - distribution mechanisms are important tho!
281 | Alternatively, in POS, packets come in as security deposits, which sit around for a long time ...
282 | 


--------------------------------------------------------------------------------
/chapters/title.tex:
--------------------------------------------------------------------------------
 1 | \begin{titlepage}
 2 |     \begin{center}
 3 |         \vspace*{1cm}
 4 |         
 5 |         \textbf{\large{Tendermint: Byzantine Fault Tolerance in the Age of Blockchains}}\\
 6 |         
 7 |         \vspace{1 cm}
 8 | 
 9 |         \textbf{by} \\
10 |         \vspace{1 cm}
11 |         \textbf{Ethan Buchman}
12 |         
13 |         \vfill
14 |         
15 |         A Thesis \\
16 | 	presented to \\
17 | 	The University of Guelph 
18 | 
19 |         \vspace{0.8cm}
20 | 
21 | 	In partial fulfilment of requirements \\
22 | 	for the degree of \\
23 | 	Master of Applied Science \\
24 |        	in \\
25 | 	Engineering Systems and Computing
26 | 
27 | 	\vspace{0.8cm}
28 | 	Guelph, Ontario, Canada
29 | 
30 | 	\vspace{0.8cm}
31 | 	\copyright Ethan Buchman, June, 2016
32 |     \end{center}
33 | \end{titlepage}
34 | 


--------------------------------------------------------------------------------
/figures/descriptions/block_header.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{verbatim}
 3 | type Header struct {
 4 | 	ChainID            string        
 5 | 	Height             int           
 6 | 	Time               time.Time     
 7 | 	NumTxs             int           
 8 | 	LastBlockHash      []byte        
 9 | 	LastBlockParts     PartSetHeader 
10 | 	LastCommitHash []byte       // Merkle root hash of LastCommit
11 | 	DataHash           []byte   // Merkle root hash of transaction 
12 | 	ValidatorsHash     []byte   // Merkle root hash of validator set
13 | 	AppHash            []byte   // state Merkle root from previous block's transactions
14 | }
15 | 
16 | type PartSetHeader struct {
17 | 	Total int    
18 | 	Hash  []byte 
19 | }
20 | \end{verbatim}
21 | 	\caption[Block Header Structure]{The fields required for a valid block header. The validity of all fields is checked before pre-commit}
22 | 


--------------------------------------------------------------------------------
/figures/descriptions/consensus_rules.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \underline{Consensus State Rules}
 3 | \begin{description}
 4 | 	\item[Proposal:] Wait up to \emph{TimeoutPropose} for a proposal from the correct validator for the current height and round.
 5 | 	\item[Prevote:]  If a proposal comes with a valid signature from the correct proposer for a validator’s current height and round, and the validator is not locked, it should prevote for the proposal block. Else, prevote nil.
 6 | 	\item[Precommit:] If a validator receives prevotes from $+\frac{2}{3}$ validators for the same block, it should precommit for that block. If the $+\frac{2}{3}$ prevotes are not for the same block, it should wait \emph{TimeoutPrevote}, and then precommit nil.
 7 | 	\item[Commit:] If a validator receives precommits from $+\frac{2}{3}$ validators for the same block, it should commit that block, and go to the next height.  If the $+\frac{2}{3}$ prevotes are not for the same block, it should wait \emph{TimeoutPrecommit}, and then go to the next round.
 8 | \end{description}
 9 | 
10 | \underline{Broadcast Rules}
11 | \begin{description}
12 | 	\item[No Double Signing:] a validator only signs for each message type (proposal, prevote, precommit) once at a given height and round.
13 | 	\item[Prevote the Lock:] A validator is locked on the last block they precommitted. They must propose it and prevote for it in future rounds, until they unlock.
14 | 	\item[Unlock on Polka:] a validator may only unlock if there has been a polka at a round after they locked.
15 | \end{description}
16 | Violation of any of the Broadcast Rules is detectable and should be punished.
17 | 
18 | \caption[Summary of Tendermint protocol rules]{Summary of rules in the tendermint protocol. 
19 | $+\frac{2}{3}$ validators is short for ``more than two-thirds of validators''}
20 | 


--------------------------------------------------------------------------------
/figures/descriptions/data_structures.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \vspace*{-1.5in}
 3 | 
 4 | \begin{lstlisting}
 5 | 
 6 | // Proposal for a block at a given height and round, signed by the proposer
 7 | type Proposal struct {
 8 |   Height           int                     
 9 |   Round            int                     
10 |   BlockHash        []byte                  
11 |   Signature        crypto.SignatureEd25519  // 64 bytes
12 | }
13 | 
14 | // Represents a prevote or precommit vote from validators for consensus.
15 | type Vote struct {
16 |   Height           int                     
17 |   Round            int                     
18 |   Type             byte                     // 1 for prevote, 2 for precommit
19 |   BlockHash        []byte                   // empty if vote is nil
20 |   Signature        crypto.SignatureEd25519  // 64 bytes
21 | }
22 | 
23 | // A vote message, gossiped to peers
24 | type VoteMessage struct {
25 |   ValidatorIndex int
26 |   Vote           *types.Vote
27 | }
28 | 
29 | // A proposal message, gossiped to peers
30 | type ProposalMessage struct {
31 |   Proposal *types.Proposal
32 | }
33 | 
34 | // Current local state of a validator's consensus machine
35 | type RoundState struct {
36 |   Height             int // Height we are working on
37 |   Round              int
38 |   Step               RoundStepType
39 |   CommitTime         time.Time // Subjective time we received +2/3 precommits 
40 |   Validators         *types.ValidatorSet
41 |   Proposal           *types.Proposal
42 |   ProposalBlock      *types.Block
43 |   LockedRound        int
44 |   LockedBlock        *types.Block
45 |   Votes              *HeightVoteSet // Votes from all rounds at this height
46 |   CommitRound        int            //
47 |   LastCommit         *types.VoteSet // Last precommits at Height-1
48 |   LastValidators     *types.ValidatorSet
49 | }
50 | \end{lstlisting}
51 | 
52 | \caption[Summary of Tendermint protocol data types]{Summary of data types in the Tendermint protocol}
53 | 


--------------------------------------------------------------------------------
/figures/descriptions/safety_guarantees.tex:
--------------------------------------------------------------------------------
 1 | \textbf{Tendermint Safety Guarantees}
 2 | \begin{description}
 3 |   \item[Proposer Safety] \hfill \\
 4 | 	There is at most one valid proposer for every term.
 5 |   \item[Validator Append Only] \hfill \\
 6 | 	A validator never overwrites or deletes blocks it has committed.
 7 |   \item[Proposer Completeness] \hfill \\
 8 | 	If a block is committed at a given height, then that block will be present in the chain of all proposers at greater heights.
 9 |   \item[State Machine Safety] \hfill \\
10 | 	If a validator has applied a block at a given height to its state machine, no other validator will ever apply a different block for the same height.
11 | \end{description}
12 | \caption[Tendermint Safety Guarantees]{Tendermint guarantees that all of these properties are true, at all times, within the security guarantee. This set of properties was taken practically verbatim from \cite{raft_thesis}.}
13 | 


--------------------------------------------------------------------------------
/figures/descriptions/security_guarantees.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \textbf{Tendermint Security Guarantees}
 3 | \begin{description}
 4 |   \item[Byzantine Fault Tolerance] \hfill \\
 5 | 	All properties in \ref{fig:tendermint_guarantees} are satisfied so long as fewer than one-third of validators are Byzantine.
 6 |   \item[Deterministic Accountability] \hfill \\
 7 | 	If one-third or more of validators, but less than half, are Byzantine, and thereby compromise safety, 
 8 | 	they can be specifically identified and held accountable to their actions.
 9 | \end{description}
10 | \caption[Tendermint Security Guarantees]{Tendermint guarantees these security properties, making it more suitable than algorithms like Raft and Paxos, and even other BFT algorithms like PBFT, for consortia with potentially malicious or untrusted actors}
11 | 


--------------------------------------------------------------------------------
/figures/descriptions/tendermint-pi1.tex:
--------------------------------------------------------------------------------
 1 | 		
 2 | \begin{tabular}{l}
 3 | 	\hline\\
 4 | 	$Consensus := \prod_{i=1}^N  PR_i^{0,\emptyset,\emptyset,} $ \\\\
 5 | 
 6 | 		\hline \\
 7 | 		{$\!\begin{aligned}
 8 | 		PR_i^{r,p,v} := 
 9 | 			& \text{if } i=proposer(r) \text{ then } \\
10 | 				& \quad propose_i ! (prop) \| PV_i^{r,prop,v} \text{, where } prop = chooseProposal(p)\\
11 | 			& \text{ else if } p \neq \emptyset \text{ then}  \\
12 | 				& \quad PV_i^{r,p,v}  \\
13 | 			& \text{else} \\ 
14 | 				& \quad propose_{proposer(r)} ? (prop).PV_i^{r,prop,v} + susp_{proposer(r)}.PV_i^{r,\emptyset,v} \\
15 | 		\end{aligned}$} \\\\
16 | 
17 | 		\hline \\
18 | 		$PV_i^{r,p,v}:= prevote_i ! (p) \| (\nu \> c) ( \prod_{j=1}^n prevote_j ? (w) . c!(prevote_j, w)  \| PV1_i^{r,p,v}(c))$ \\\\
19 | 
20 | 		\hline \\
21 | 		{$\!\begin{aligned}
22 | 		PV1_i^{r,p,v}(c) := & \text{ if } max_{b}(|\left\{ w \in v_r^1 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\
23 | 				& \quad PC_i^{r,b,v} \\
24 | 			& \text{else if }  | v_r^1 | > \frac{2}{3} N \text{ then} \\ 
25 | 				& \quad PC_i^{r,\emptyset,v} \\ 
26 | 			& \text{else} \\
27 | 				& \quad c?(pv, vote) . \text{ if } vote.round < r \text{ then} \\ 
28 | 						& \quad  \quad	pv?(w).c!(pv, w) \| PV1_i^{r,p,v}(c)\\ 
29 | 						& \quad  \text{else if } vote.round = r \text{ then} \\ 
30 | 						& \quad  \quad	PV1_i^{r,p,vote::v}(c) \\
31 | 						& \quad  \text{else } \\
32 | 						& \quad  \quad PR_i^{vote.round, p, vote::v} \\
33 | 		\end{aligned}$} \\\\
34 | 		\hline\\
35 | 
36 | \end{tabular}
37 | 


--------------------------------------------------------------------------------
/figures/descriptions/tendermint-pi2.tex:
--------------------------------------------------------------------------------
 1 | 		
 2 | \begin{tabular}{l}
 3 | 		\hline \\
 4 | 		$PC_i^{r,p,v}:= precommit_i ! (p) \| (\nu \> c) ( \prod_{j=1}^n precommit_j ? (w) . c!(preccomit_j, w)  \| PC1_i^{r,p,v}(c))$ \\\\
 5 | 
 6 | 		\hline \\
 7 | 		{$\!\begin{aligned}
 8 | 		PC1_i^{r,p,v}(c) := 
 9 | 			& \text{ if } max_{b}(|\left\{ w \in v_r^2 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\
10 | 				& \quad d_i!(b) \\
11 | 			& \text{else if }  | v_r^2 | > \frac{2}{3} N \text{ then} \\ 
12 | 				& \quad PR_i^{r+1,\emptyset,v} \\ 
13 | 			& \text{else} \\
14 | 				& \quad c?(pc, vote) .\text{ if } vote.round < r \text{ then} \\ 
15 | 						& \quad  \quad	pc?(w).c!(pc, w) \| PC1_i^{r,p,v}(c) \\ 
16 | 						& \quad  \text{else if } vote.round = r \text{ then} \\ 
17 | 
18 | 						& \quad  \quad	PC1_i^{r,p,vote::v}(c) \\
19 | 						& \quad  \text{else } \\
20 | 						& \quad  \quad PR_i^{vote.round, p, vote::v} \\
21 | 		\end{aligned}$} \\\\
22 | 		\hline\\
23 | 
24 | \end{tabular}
25 | 


--------------------------------------------------------------------------------
/figures/diagrams/abci.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/abci.png


--------------------------------------------------------------------------------
/figures/diagrams/byzantine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/byzantine.pdf


--------------------------------------------------------------------------------
/figures/diagrams/byzantine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/byzantine.png


--------------------------------------------------------------------------------
/figures/diagrams/consensus_logic.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/consensus_logic.pdf


--------------------------------------------------------------------------------
/figures/diagrams/consensus_logic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/consensus_logic.png


--------------------------------------------------------------------------------
/figures/diagrams/state_machine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/state_machine.pdf


--------------------------------------------------------------------------------
/figures/diagrams/state_machine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/state_machine.png


--------------------------------------------------------------------------------
/figures/diagrams/tmsp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/tmsp.pdf


--------------------------------------------------------------------------------
/figures/diagrams/tmsp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/tmsp.png


--------------------------------------------------------------------------------
/figures/throughput/byz_tables.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{subtable}{.5 \linewidth}
 3 | 	\centering
 4 | 	\begin{tabular}{| l | l | l | l | l | l | }
 5 | 		\hline
 6 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
 7 | 		1000 & 868 & 3888 & 1450 & 1086 & 3320 \\ \hline
 8 | 		2000 & 929 & 4375 & 1786 & 1272 & 4166 \\ \hline
 9 | 		3000 & 881 & 4363 & 1224 & 1099 & 1680 \\ \hline
10 | 		4000 & 824 & 8256 & 1693 & 1272 & 2607 \\ \hline
11 | 	\end{tabular}
12 | 	\caption{4 Validators}
13 | \end{subtable}
14 | 
15 | 
16 | \begin{subtable}{.5 \linewidth}
17 | 	\centering
18 | 	\begin{tabular}{| l | l | l | l | l | l | }
19 | 		\hline
20 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
21 | 		1000 & 771 & 3445 & 1472 & 916 & 3288 \\ \hline
22 | 		2000 & 731 & 3661 & 1426 & 902 & 3339 \\ \hline
23 | 		3000 & 835 & 6402 & 1912 & 962 & 6155 \\ \hline
24 | 		4000 & 811 & 4462 & 1512 & 964 & 3592 \\ \hline
25 | 	\end{tabular}
26 | 	\caption{8 Validators}
27 | \end{subtable}
28 | 
29 | 
30 | \begin{subtable}{.5 \linewidth}
31 | 	\centering
32 | 	\begin{tabular}{| l | l | l | l | l | l | }
33 | 		\hline
34 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
35 | 		1000 & 877 & 15930 & 2086 & 1024 & 5844 \\ \hline
36 | 		2000 & 808 & 5737 & 1580 & 1027 & 4155 \\ \hline
37 | 		3000 & 919 & 10533 & 1801 & 1110 & 4174 \\ \hline
38 | 		4000 & 915 & 5589 & 1745 & 1095 & 4181 \\ \hline
39 | 	\end{tabular}
40 | 	\caption{16 Validators}
41 | \end{subtable}
42 | 
43 | 
44 | \begin{subtable}{.5 \linewidth}
45 | 	\centering
46 | 	\begin{tabular}{| l | l | l | l | l | l | }
47 | 		\hline
48 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
49 | 		1000 & 1594 & 11730 & 2680 & 1854 & 5016 \\ \hline
50 | 		2000 & 1496 & 17801 & 3430 & 1874 & 11730 \\ \hline
51 | 		3000 & 1504 & 15963 & 3280 & 1736 & 9569 \\ \hline
52 | 		4000 & 1490 & 24836 & 3940 & 1773 & 12866 \\ \hline
53 | 	\end{tabular}
54 | 	\caption{32 Validators}
55 | \end{subtable}
56 | 
57 | 


--------------------------------------------------------------------------------
/figures/throughput/crash_tables.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{subtable}{.5 \linewidth}
 3 | 	\centering
 4 | 	\begin{tabular}{| l | l | l | l | l | l | }
 5 | 		\hline
 6 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
 7 | 		500 & 434 & 15318 & 2179 & 1102 & 5575 \\ \hline
 8 | 		1000 & 516 & 18149 & 2180 & 1046 & 5677 \\ \hline
 9 | 		2000 & 473 & 15067 & 2044 & 1049 & 5479 \\ \hline
10 | 		3000 & 428 & 9964 & 2005 & 1096 & 5502 \\ \hline
11 | 	\end{tabular}
12 | 	\caption{4 Validators}
13 | \end{subtable}
14 | 
15 | 
16 | \begin{subtable}{.5 \linewidth}
17 | 	\centering
18 | 	\begin{tabular}{| l | l | l | l | l | l | }
19 | 		\hline
20 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
21 | 		500 & 618 & 126481 & 2679 & 990 & 5589 \\ \hline
22 | 		1000 & 570 & 9832 & 1763 & 962 & 5835 \\ \hline
23 | 		2000 & 594 & 8869 & 1658 & 968 & 5481 \\ \hline
24 | 		3000 & 535 & 10101 & 1633 & 959 & 5485 \\ \hline
25 | 	\end{tabular}
26 | 	\caption{8 Validators}
27 | \end{subtable}
28 | 
29 | 
30 | \begin{subtable}{.5 \linewidth}
31 | 	\centering
32 | 	\begin{tabular}{| l | l | l | l | l | l | }
33 | 		\hline
34 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
35 | 		500 & 782 & 21354 & 1977 & 1001 & 5930 \\ \hline
36 | 		1000 & 758 & 12659 & 1761 & 981 & 5642 \\ \hline
37 | 		2000 & 751 & 21285 & 2041 & 1005 & 6872 \\ \hline
38 | 		3000 & 719 & 72406 & 2395 & 991 & 5987 \\ \hline
39 | 	\end{tabular}
40 | 	\caption{16 Validators}
41 | \end{subtable}
42 | 
43 | 
44 | \begin{subtable}{.5 \linewidth}
45 | 	\centering
46 | 	\begin{tabular}{| l | l | l | l | l | l | }
47 | 		\hline
48 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
49 | 		500 & 760 & 24692 & 2591 & 1087 & 14025 \\ \hline
50 | 		1000 & 755 & 19696 & 2328 & 1119 & 9321 \\ \hline
51 | 		2000 & 852 & 21044 & 2178 & 1141 & 6514 \\ \hline
52 | 		3000 & 763 & 25587 & 2289 & 1119 & 6707 \\ \hline
53 | 	\end{tabular}
54 | 	\caption{32 Validators}
55 | \end{subtable}
56 | 
57 | 


--------------------------------------------------------------------------------
/figures/throughput/delay_tables.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{subtable}{.5 \linewidth}
 3 | 	\centering
 4 | 	\begin{tabular}{| l | l | l | l | l | l | }
 5 | 		\hline
 6 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
 7 | 		1000 & 873 & 2796 & 1437 & 1036 & 2627 \\ \hline
 8 | 		2000 & 831 & 4549 & 1843 & 1180 & 4036 \\ \hline
 9 | 		3000 & 921 & 5782 & 2273 & 1251 & 5491 \\ \hline
10 | 		4000 & 967 & 6875 & 2700 & 1413 & 6781 \\ \hline
11 | 	\end{tabular}
12 | 	\caption{4 Validators}
13 | \end{subtable}
14 | 
15 | 
16 | \begin{subtable}{.5 \linewidth}
17 | 	\centering
18 | 	\begin{tabular}{| l | l | l | l | l | l | }
19 | 		\hline
20 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
21 | 		1000 & 870 & 2840 & 1449 & 1040 & 2786 \\ \hline
22 | 		2000 & 957 & 4268 & 1848 & 1076 & 4148 \\ \hline
23 | 		3000 & 859 & 5724 & 2156 & 1100 & 5649 \\ \hline
24 | 		4000 & 897 & 11859 & 3055 & 1093 & 11805 \\ \hline
25 | 	\end{tabular}
26 | 	\caption{8 Validators}
27 | \end{subtable}
28 | 
29 | 
30 | \begin{subtable}{.5 \linewidth}
31 | 	\centering
32 | 	\begin{tabular}{| l | l | l | l | l | l | }
33 | 		\hline
34 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
35 | 		1000 & 914 & 5595 & 1821 & 1135 & 5466 \\ \hline
36 | 		2000 & 950 & 7782 & 2490 & 1165 & 7650 \\ \hline
37 | 		3000 & 978 & 10305 & 3049 & 1163 & 9890 \\ \hline
38 | 		4000 & 1018 & 6890 & 2808 & 1174 & 6813 \\ \hline
39 | 	\end{tabular}
40 | 	\caption{16 Validators}
41 | \end{subtable}
42 | 
43 | 
44 | \begin{subtable}{.5 \linewidth}
45 | 	\centering
46 | 	\begin{tabular}{| l | l | l | l | l | l | }
47 | 		\hline
48 | 		TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline
49 | 		1000 & 1202 & 8562 & 2219 & 1349 & 5733 \\ \hline
50 | 		2000 & 1196 & 7878 & 2549 & 1365 & 7579 \\ \hline
51 | 		3000 & 1164 & 10082 & 3003 & 1382 & 9805 \\ \hline
52 | 		4000 & 1223 & 17571 & 3696 & 1392 & 12014 \\ \hline
53 | 	\end{tabular}
54 | 	\caption{32 Validators}
55 | \end{subtable}
56 | 
57 | 


--------------------------------------------------------------------------------
/figures/throughput/large_instances/latency-throughput.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/latency-throughput.pdf


--------------------------------------------------------------------------------
/figures/throughput/large_instances/latency-throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/latency-throughput.png


--------------------------------------------------------------------------------
/figures/throughput/large_instances/throughput-blocksize.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/throughput-blocksize.pdf


--------------------------------------------------------------------------------
/figures/throughput/large_instances/throughput-blocksize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/throughput-blocksize.png


--------------------------------------------------------------------------------
/figures/throughput/latency-throughput.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/latency-throughput.pdf


--------------------------------------------------------------------------------
/figures/throughput/latency-throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/latency-throughput.png


--------------------------------------------------------------------------------
/figures/throughput/single_datacenter/latency-throughput.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/latency-throughput.pdf


--------------------------------------------------------------------------------
/figures/throughput/single_datacenter/latency-throughput.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/latency-throughput.png


--------------------------------------------------------------------------------
/figures/throughput/single_datacenter/throughput-blocksize.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/throughput-blocksize.pdf


--------------------------------------------------------------------------------
/figures/throughput/single_datacenter/throughput-blocksize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/throughput-blocksize.png


--------------------------------------------------------------------------------
/figures/throughput/throughput-blocksize.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/throughput-blocksize.pdf


--------------------------------------------------------------------------------
/figures/throughput/throughput-blocksize.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/throughput-blocksize.png


--------------------------------------------------------------------------------
/listings-golang.sty:
--------------------------------------------------------------------------------
 1 | %% Golang definition for listings
 2 | %% http://github.io/julienc91/lstlistings-golang
 3 | %%
 4 | \RequirePackage{listings}
 5 | 
 6 | \lstdefinelanguage{Golang}%
 7 |   {morekeywords=[1]{package,import,func,type,struct,return,defer,panic,%
 8 |      recover,select,var,const,iota,},%
 9 |    morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,%
10 |      int32,int64,bool,float32,float64,complex64,complex128,byte,rune,uintptr,%
11 |      error},%
12 |    morekeywords=[3]{interface,map,slice,make,new,nil,len,cap,copy,close,true,false,%
13 |      delete,append,real,imag,complex,chan,},%
14 |    morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,%
15 |      else,default,},%
16 |    morekeywords=[5]{Println,Printf,Error,},%
17 |    sensitive=true,%
18 |    morecomment=[l]{//},%
19 |    morecomment=[s]{/*}{*/},%
20 |    morestring=[b]',%
21 |    morestring=[b]",%
22 |    morestring=[s]{`}{`},%
23 |    }
24 | 


--------------------------------------------------------------------------------
/main.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \documentclass[12pt]{report}
 3 | \usepackage[utf8]{inputenc}
 4 | \usepackage{graphicx} 
 5 | \usepackage{listings} % code listings
 6 | \usepackage{listings-golang}
 7 | \usepackage{subcaption}
 8 | \usepackage{amsmath} 
 9 | \usepackage[backend=bibtex,block=ragged]{biblatex} % block=ragged wraps lines in the bib
10 | \graphicspath{ {images/} }
11 | \usepackage{float}
12 | \usepackage{xcolor}
13 | 
14 | %\floatstyle{boxed} 
15 | \restylefloat{figure}
16 | 
17 | 
18 | % color not working :(
19 | \lstset{ % add your own preferences
20 |     frame=single,
21 |     basicstyle=\footnotesize,
22 |     keywordstyle=\color{orange},
23 |     keywordstyle=[2]\color{green},
24 |     commentstyle=\color{blue},
25 |     %numbers=left,
26 |     %numbersep=5pt,
27 |     showstringspaces=false, 
28 |     stringstyle=\color{red},
29 |     tabsize=4,
30 |     language=Golang % this is it !
31 | }
32 | 
33 | 
34 | \pagenumbering{gobble}
35 | 
36 | \usepackage{hyperref}
37 | \hypersetup{
38 |     colorlinks=true,
39 |     linkcolor=blue,
40 |     filecolor=magenta,      
41 |     urlcolor=cyan,
42 | }
43 |  
44 | \urlstyle{same}
45 | 
46 | %\bibliographystyle{IEEEtran}
47 | \bibliography{bib/consensus,bib/crypto,bib/programming,bib/applied,bib/formal}
48 | \AtEveryBibitem{%
49 |   \clearfield{note}%
50 | }
51 | 
52 | % Fix to put commas between multiple footnotes (e.g. in Chapter 1)
53 | % Normal solution is to use:
54 | %   \usepackage[multiple]{footmisc}  
55 | % However, this is incompatible with hyperref
56 | % Suggested solution here:
57 | % http://tex.stackexchange.com/a/62091
58 | \let\oldFootnote\footnote
59 | \newcommand\nextToken\relax
60 | 
61 | \renewcommand\footnote[1]{%
62 |     \oldFootnote{#1}\futurelet\nextToken\isFootnote}
63 | 
64 | \newcommand\isFootnote{%
65 |     \ifx\footnote\nextToken\textsuperscript{,}\fi}
66 | 
67 | 
68 | \begin{document}
69 | \include{chapters/title}
70 | \include{chapters/abstract}
71 | \pagenumbering{roman}
72 | \setcounter{page}{3}
73 | \include{chapters/frontmatter}
74 | \pagenumbering{arabic}
75 | \include{chapters/introduction}
76 | \include{chapters/background}
77 | \include{chapters/tendermint}
78 | \include{chapters/subprotocols}
79 | \include{chapters/apps}
80 | \include{chapters/governance}
81 | \include{chapters/clients}
82 | \include{chapters/implementation}
83 | \include{chapters/performance}
84 | \include{chapters/related}
85 | \include{chapters/conclusion}
86 | 
87 | 
88 | 
89 | \printbibliography
90 | 
91 | \end{document}
92 | 


--------------------------------------------------------------------------------
/tendermint-pi.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | \documentclass[12pt]{report}
  3 | \usepackage[utf8]{inputenc}
  4 | \usepackage{graphicx}
  5 | \usepackage{listings}
  6 | \usepackage{lstautogobble}
  7 | \usepackage{amsmath} 
  8 | \graphicspath{ {images/} }
  9 | \usepackage{float}
 10 | \floatstyle{boxed} 
 11 | \restylefloat{figure}
 12 | 
 13 | 
 14 | \renewcommand{\|}{\;|\;}
 15 | 
 16 | \begin{document}
 17 | 
 18 | Here we attempt a general depiction of consensus protocols, drawing on Nestmann (2003),
 19 | which models the non-Byzantine consensus protocol of Chandra and Touegg (1996).
 20 | We model only the consensus, rather than full ABC, but describe how to 
 21 | easily extend the model to ABC.
 22 | 
 23 | Let $Consensus := \prod_{i=1}^N Y_i $ represent a consensus protocol
 24 | over a set of $N$ validators, each executing one of a mutually exclusive set of processes, $Y_i$.
 25 | Internal state $s = \{r, p, v \}$ consists of a strictly increasing round, $r$,
 26 | a proposal $p$, containing the proposed block for this round;
 27 | and a set of votes, $v$, containing all votes at all rounds;
 28 | We denote by $v_r^1$ and $v_r^2$ the set of prevotes and precommits, respectively, at round $r$.
 29 | We define $proposer(r) = r \mod n$ to be the index of the proposer at round $r$.
 30 | We represent a peer at a particular point in the protocol as $Y_i^{r, p, v}$.
 31 | Processes $Y_i$ range over $PR_i$, $PV_i$, $PC_i$, $C_i$,
 32 | respectively abbreviating 
 33 | \emph{propose}, \emph{prevote}, \emph{precommit}, \emph{commit}.
 34 | We introduce additional sub-functions for $PV$ and $PC$ to capture the recursion,
 35 | denoted $PV1$, $PV2$, etc.
 36 | 
 37 | Peers are connected using broadcast channels for each message type,
 38 | namely $propose_i$, $prevote_i$, and $precommit_i$,
 39 | as well as a channel for broadcasting new transactions, $b_i$,
 40 | and one for deciding on, or committing, the next block, $d_i$.
 41 | Via an abuse of notation, a single send on some $x_i$ can be received by each process along
 42 | $x_i$.
 43 | 
 44 | We use only two message types: proposals and votes. 
 45 | Each contains a round number, block (hash), and signature, 
 46 | denoted $msg.round$, $msg.block$, $msg.sig$.
 47 | Note we can absorb the signature into the broadcast channel itself,
 48 | but we need it for use as evidence in the event of byzantine behaviour.
 49 | 
 50 | 
 51 | \begin{center}
 52 | 	\begin{tabular}{l }
 53 | 		\hline \\
 54 | 		$Consensus := \prod_{i=1}^N [ PR_i^{0,\emptyset,\emptyset,} \| D_i]$ \\\\
 55 | 
 56 | 		\hline \\
 57 | 		{$\!\begin{aligned}
 58 | 		PR_i^{r,p,v} := 
 59 | 			& \text{if } i=proposer(r) \text{ then } \\
 60 | 				& \quad propose_i ! (prop) \| PV_i^{r,prop,v} \text{, where } prop = chooseProposal(p)\\
 61 | 			& \text{ else if } p \neq \emptyset \text{ then}  \\
 62 | 				& \quad PV_i^{r,p,v}  \\
 63 | 			& \text{else} \\ 
 64 | 				& \quad propose_{proposer(r)} ? (prop).PV_i^{r,prop,v} + susp_{proposer(r)}.PV_i^{r,\emptyset,v} \\
 65 | 		\end{aligned}$} \\\\
 66 | 
 67 | 		\hline \\
 68 | 		$PV_i^{r,p,v}:= prevote_i ! (r,p) \| (\nu \> c) ( \prod_{j=1}^n prevote_j ? (w) . c!(w)  \| PV1_i^{r,p,v}(c))$ \\\\
 69 | 
 70 | 		\hline \\
 71 | 		{$\!\begin{aligned}
 72 | 		PV1_i^{r,p,v}(c) := 
 73 | 			& \text{ if } max_{b}(|\left\{ w \in v_r^1 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\
 74 | 				& \quad PC_i^{r,b,v} \\
 75 | 			& \text{else if }  | v_r^1 | > \frac{2}{3} N \text{ then} \\ 
 76 | 				& \quad PC_i^{r,\emptyset,v} \\ 
 77 | 			& \text{else} \\
 78 | 				& \quad c?(vote) . PV1_i^{r,p,vote::v}(c) \\
 79 | 		\end{aligned}$} \\\\
 80 | 
 81 | 		\hline \\
 82 | 		$PC_i^{r,p,v}:= precommit_i ! (r,p) \| (\nu \> c) ( \prod_{j=1}^n precommit_j ? (w) . c!(w)  \| PC1_i^{r,p,v}(c))$ \\\\
 83 | 
 84 | 		\hline \\
 85 | 		{$\!\begin{aligned}
 86 | 		PC1_i^{r,p,v}(c) := 
 87 | 			& \text{ if } max_{b}(|\left\{ w \in v_r^2 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\
 88 | 				& \quad C_i^{r,b,v} \\
 89 | 			& \text{else if }  | v_r^2 | > \frac{2}{3} N \text{ then} \\ 
 90 | 				& \quad PR_i^{r+1,\emptyset,v} \\ 
 91 | 			& \text{else} \\
 92 | 				& \quad c?(vote) . PC1_i^{r,p,vote::v}(c) \\
 93 | 		\end{aligned}$}
 94 | 
 95 | 
 96 | 	\end{tabular}
 97 | \end{center}
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | \end{document}
109 | 


--------------------------------------------------------------------------------