├── .gitignore ├── Buchman_Ethan_201606_MAsc.pdf ├── Thesis_Defense.pdf ├── bib ├── applied.bib ├── consensus.bib ├── crypto.bib ├── formal.bib └── programming.bib ├── build.sh ├── chapters ├── abstract.tex ├── appendix.tex ├── apps.tex ├── background.tex ├── clients.tex ├── conclusion.tex ├── economics.tex ├── frontmatter.tex ├── governance.tex ├── implementation.tex ├── introduction.tex ├── performance.tex ├── related.tex ├── subprotocols.tex ├── tendermint.tex ├── theory.tex └── title.tex ├── figures ├── descriptions │ ├── block_header.tex │ ├── consensus_rules.tex │ ├── data_structures.tex │ ├── safety_guarantees.tex │ ├── security_guarantees.tex │ ├── tendermint-pi1.tex │ └── tendermint-pi2.tex ├── diagrams │ ├── abci.png │ ├── byzantine.pdf │ ├── byzantine.png │ ├── consensus_logic.pdf │ ├── consensus_logic.png │ ├── state_machine.pdf │ ├── state_machine.png │ ├── tmsp.pdf │ └── tmsp.png └── throughput │ ├── byz_tables.tex │ ├── crash_tables.tex │ ├── delay_tables.tex │ ├── large_instances │ ├── latency-throughput.pdf │ ├── latency-throughput.png │ ├── throughput-blocksize.pdf │ └── throughput-blocksize.png │ ├── latency-throughput.pdf │ ├── latency-throughput.png │ ├── single_datacenter │ ├── latency-throughput.pdf │ ├── latency-throughput.png │ ├── throughput-blocksize.pdf │ └── throughput-blocksize.png │ ├── throughput-blocksize.pdf │ └── throughput-blocksize.png ├── listings-golang.sty ├── main.tex └── tendermint-pi.tex /.gitignore: -------------------------------------------------------------------------------- 1 | *.aux 2 | *.log 3 | *.out 4 | *.pdf 5 | auto/ 6 | chapters/auto/ 7 | *.bbl 8 | *.blg 9 | *.fdb_latexmk 10 | *.fls 11 | *.lof 12 | *.lot 13 | *.toc 14 | -------------------------------------------------------------------------------- /Buchman_Ethan_201606_MAsc.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/Buchman_Ethan_201606_MAsc.pdf -------------------------------------------------------------------------------- /Thesis_Defense.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/Thesis_Defense.pdf -------------------------------------------------------------------------------- /bib/applied.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{rao2011using, 3 | title={Using Paxos to build a scalable, consistent, and highly available datastore}, 4 | author={Rao, Jun and Shekita, Eugene J and Tata, Sandeep}, 5 | journal={Proceedings of the VLDB Endowment}, 6 | volume={4}, 7 | number={4}, 8 | pages={243--254}, 9 | year={2011}, 10 | publisher={VLDB Endowment} 11 | } 12 | 13 | @incollection{lampson1996paxos, 14 | title={How to build a highly available system using consensus}, 15 | author={Lampson, Butler W}, 16 | booktitle={Distributed Algorithms}, 17 | pages={1--17}, 18 | year={1996}, 19 | publisher={Springer}, 20 | annote={practical guide to using paxos} 21 | } 22 | @inproceedings{dynamo, 23 | title={Dynamo: amazon's highly available key-value store}, 24 | author={DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner}, 25 | booktitle={ACM SIGOPS Operating Systems Review}, 26 | volume={41}, 27 | number={6}, 28 | pages={205--220}, 29 | year={2007}, 30 | organization={ACM} 31 | } 32 | 33 | @inproceedings{chubby, 34 | title={The Chubby lock service for loosely-coupled distributed systems}, 35 | author={Burrows, Mike}, 36 | booktitle={Proceedings of the 7th symposium on Operating systems design and implementation}, 37 | pages={335--350}, 38 | year={2006}, 39 | organization={USENIX Association} 40 | } 41 | 42 | @inproceedings{zookeeper, 43 | title={ZooKeeper: Wait-free Coordination for Internet-scale Systems.}, 44 | author={Hunt, Patrick and Konar, Mahadev and Junqueira, Flavio Paiva and Reed, Benjamin}, 45 | booktitle={USENIX Annual Technical Conference}, 46 | volume={8}, 47 | pages={9}, 48 | year={2010} 49 | } 50 | 51 | 52 | @article{sift, 53 | title={SIFT: Design and analysis of a fault-tolerant computer for aircraft control}, 54 | author={Wensley, John H and Lamport, Leslie and Goldberg, Jack and Green, Milton W and Levitt, Karl N and Melliar-Smith, Po Mo and Shostak, Robert E and Einstock, Charles B}, 55 | journal={Proceedings of the IEEE}, 56 | volume={66}, 57 | number={10}, 58 | pages={1240--1255}, 59 | year={1978}, 60 | publisher={IEEE} 61 | } 62 | 63 | 64 | @article{ftmp, 65 | title={FTMP—a highly reliable fault-tolerant multiprocess for aircraft}, 66 | author={Hopkins Jr, Albert L and Smith III, T and Lala, Jaynarayan H}, 67 | journal={Proceedings of the IEEE}, 68 | volume={66}, 69 | number={10}, 70 | pages={1221--1239}, 71 | year={1978}, 72 | publisher={IEEE} 73 | } 74 | 75 | @inproceedings{miner2000analysis, 76 | title={Analysis of the SPIDER fault-tolerance protocols}, 77 | author={Miner, PS}, 78 | booktitle={Proceedings of the 5th NASA Langley Formal Methods Workshop}, 79 | year={2000} 80 | } 81 | 82 | @inproceedings{miner2004unified, 83 | title={A unified fault-tolerance protocol}, 84 | author={Miner, Paul and Geser, Alfons and Pike, Lee and Maddalon, Jeffrey}, 85 | organization={Springer} 86 | } 87 | 88 | @article{hoyme1993safebus, 89 | title={SAFEbus (for avionics)}, 90 | author={Hoyme, Kenneth and Driscoll, Kevin}, 91 | journal={Aerospace and Electronic Systems Magazine, IEEE}, 92 | volume={8}, 93 | number={3}, 94 | pages={34--39}, 95 | year={1993}, 96 | publisher={IEEE} 97 | } 98 | 99 | @misc{coreos_raft, 100 | title = {ETCD Distributed Key-Value Store Source Code Repository}, 101 | howpublished = {https://github.com/coreos/etcd}, 102 | note = {Accessed: 2016-04-01} 103 | } 104 | 105 | @misc{hashicorp_raft, 106 | title = {Hashicorp's Implementation of Raft in Go}, 107 | howpublished = {https://github.com/hashicorp/raft}, 108 | note = {Accessed: 2016-04-01} 109 | } 110 | 111 | @misc{influxdb, 112 | title = {InfluxDB: Scalable datastore for metrics, events, and real-time analytics}, 113 | howpublished = {https://github.com/influxdata/influxdb}, 114 | note = {Accessed: 2016-04-01} 115 | } 116 | 117 | @inproceedings{chandra2007paxos, 118 | title={Paxos made live: an engineering perspective}, 119 | author={Chandra, Tushar D and Griesemer, Robert and Redstone, Joshua}, 120 | booktitle={Proceedings of the twenty-sixth annual ACM symposium on Principles of distributed computing}, 121 | pages={398--407}, 122 | year={2007}, 123 | organization={ACM}, 124 | annote={ experience from building Chubby } 125 | } 126 | 127 | @article{posner2013quadratic, 128 | title={Quadratic voting as efficient corporate governance}, 129 | author={Posner, Eric A and Weyl, E Glen}, 130 | journal={University of Chicago Law Review, Forthcoming}, 131 | year={2013} 132 | } 133 | 134 | @article{zak2001trust, 135 | title={Trust and growth}, 136 | author={Zak, Paul J and Knack, Stephen}, 137 | journal={The economic journal}, 138 | volume={111}, 139 | number={470}, 140 | pages={295--321}, 141 | year={2001}, 142 | publisher={Wiley Online Library} 143 | } 144 | 145 | @article{corbett2013spanner, 146 | title={Spanner: Google’s globally distributed database}, 147 | author={Corbett, James C and Dean, Jeffrey and Epstein, Michael and Fikes, Andrew and Frost, Christopher and Furman, Jeffrey John and Ghemawat, Sanjay and Gubarev, Andrey and Heiser, Christopher and Hochschild, Peter and others}, 148 | journal={ACM Transactions on Computer Systems (TOCS)}, 149 | volume={31}, 150 | number={3}, 151 | pages={8}, 152 | year={2013}, 153 | publisher={ACM} 154 | } 155 | 156 | 157 | -------------------------------------------------------------------------------- /bib/consensus.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{flp, 3 | title={Impossibility of distributed consensus with one faulty process}, 4 | author={Fischer, Michael J and Lynch, Nancy A and Paterson, Michael S}, 5 | journal={Journal of the ACM (JACM)}, 6 | volume={32}, 7 | number={2}, 8 | pages={374--382}, 9 | year={1985}, 10 | publisher={ACM}, 11 | annote={fully async deterministic consensus is impossible. 12 | "no completely asynchronous consensus protocol can tolerate even a single unannounced process death" 13 | } 14 | } 15 | 16 | @article{impossibility, 17 | title={Easy impossibility proofs for distributed consensus problems}, 18 | author={Fischer, Michael J and Lynch, Nancy A and Merritt, Michael}, 19 | journal={Distributed Computing}, 20 | volume={1}, 21 | number={1}, 22 | pages={26--39}, 23 | year={1986}, 24 | publisher={Springer} 25 | } 26 | 27 | 28 | @article{defago2004total, 29 | title={Total order broadcast and multicast algorithms: Taxonomy and survey}, 30 | author={D{\'e}fago, Xavier and Schiper, Andr{\'e} and Urb{\'a}n, P{\'e}ter}, 31 | journal={ACM Computing Surveys (CSUR)}, 32 | volume={36}, 33 | number={4}, 34 | pages={372--421}, 35 | year={2004}, 36 | publisher={ACM} 37 | } 38 | 39 | @inproceedings{free-choice, 40 | title={Another advantage of free choice (extended abstract): Completely asynchronous agreement protocols}, 41 | author={Ben-Or, Michael}, 42 | booktitle={Proceedings of the second annual ACM symposium on Principles of distributed computing}, 43 | pages={27--30}, 44 | year={1983}, 45 | organization={ACM}, 46 | annote={somehow this cites flp. 47 | Can solve flp with probability 1 using non-determinism. 48 | 2t+1 for non-byzantine, 5t+1 for byzantine version. 49 | "dont know whether N > 5t is best possible bound " ... :p 50 | each node flips a coin locally and broadcasts result - if enough of them, its the common coin. 51 | number of rounds to reach consensus is exponential in async case 52 | } 53 | } 54 | 55 | @inproceedings{rabin1983randomized, 56 | title={Randomized byzantine generals}, 57 | author={Rabin, Michael O}, 58 | booktitle={Foundations of Computer Science, 1983., 24th Annual Symposium on}, 59 | pages={403--409}, 60 | year={1983}, 61 | organization={IEEE}, 62 | annote={ 63 | use shamir secret sharing to distribute the coin. 64 | requires trusted dealer for initial setup. 65 | constant time. expected number of rounds is O(T(n)). 66 | } 67 | } 68 | 69 | @article{chor1985simple, 70 | title={A simple and efficient randomized byzantine agreement algorithm}, 71 | author={Chor, Benny and Coan, Brian A}, 72 | journal={Software Engineering, IEEE Transactions on}, 73 | number={6}, 74 | pages={531--539}, 75 | year={1985}, 76 | publisher={IEEE}, 77 | annote={synchronous only. small set of size g flip, terminates in 2t/g rounds with a common coin. 78 | no crypto, less redundnacy requires than BenOr} 79 | } 80 | 81 | @article{paxos_simple, 82 | title={Paxos made simple}, 83 | author={Lamport, Leslie and others}, 84 | journal={ACM Sigact News}, 85 | volume={32}, 86 | number={4}, 87 | pages={18--25}, 88 | year={2001} 89 | } 90 | 91 | 92 | @incollection{draper_lab, 93 | title={The evolution of fault tolerant computing at the Charles Stark Draper Laboratory, 1955--85}, 94 | author={Hopkins Jr, Albert L and Lala, Jaynarayan H and Smith III, T Basil}, 95 | booktitle={The Evolution of fault-tolerant computing}, 96 | pages={121--140}, 97 | year={1987}, 98 | publisher={Springer}, 99 | annoted={early days for aviation} 100 | } 101 | 102 | 103 | 104 | 105 | 106 | # a great history 107 | http://betathoughts.blogspot.ca/2007/06/brief-history-of-consensus-2pc-and.html 108 | 109 | @article{clocks, 110 | title={Time, clocks, and the ordering of events in a distributed system}, 111 | author={Lamport, Leslie}, 112 | journal={Communications of the ACM}, 113 | volume={21}, 114 | number={7}, 115 | pages={558--565}, 116 | year={1978}, 117 | publisher={ACM}, 118 | annote={first consensus paper 119 | relativity of concurrent processes 120 | ordering based on message passing. 121 | introduces distribtued state machine 122 | } 123 | } 124 | 125 | @article{pease1980reaching, 126 | title={Reaching agreement in the presence of faults}, 127 | author={Pease, Marshall and Shostak, Robert and Lamport, Leslie}, 128 | journal={Journal of the ACM (JACM)}, 129 | volume={27}, 130 | number={2}, 131 | pages={228--234}, 132 | year={1980}, 133 | publisher={ACM}, 134 | annote={ first to show that the best we can do in byzantine is 3t+1 135 | statement of byzantine generals before it was called byzantine generals. 136 | an algo where t faults requires t+1 rounds (ie. 1 fault requires 2 rounds ...) 137 | howevver, if authentication is used, then we can tolerate an arbitrary number of byzantine faults t<=N given t+1 rounds 138 | } 139 | } 140 | 141 | @article{gettier, 142 | title={On the logical unsolvability of the Gettier problem}, 143 | author={Floridi, Luciano}, 144 | journal={Synthese}, 145 | volume={142}, 146 | number={1}, 147 | pages={61--79}, 148 | year={2004}, 149 | publisher={Springer} 150 | } 151 | 152 | 153 | 154 | @article{lamport1982byzantine, 155 | title={The Byzantine generals problem}, 156 | author={Lamport, Leslie and Shostak, Robert and Pease, Marshall}, 157 | journal={ACM Transactions on Programming Languages and Systems (TOPLAS)}, 158 | volume={4}, 159 | number={3}, 160 | pages={382--401}, 161 | year={1982}, 162 | publisher={ACM 163 | coins Byzantine and expands on pease1980reaching 164 | 165 | } 166 | } 167 | 168 | @article{paxos, 169 | title={The part-time parliament}, 170 | author={Lamport, Leslie}, 171 | journal={ACM Transactions on Computer Systems (TOCS)}, 172 | volume={16}, 173 | number={2}, 174 | pages={133--169}, 175 | year={1998}, 176 | publisher={ACM} 177 | } 178 | 179 | 180 | @article{dls, 181 | title={Consensus in the presence of partial synchrony}, 182 | author={Dwork, Cynthia and Lynch, Nancy and Stockmeyer, Larry}, 183 | journal={Journal of the ACM (JACM)}, 184 | volume={35}, 185 | number={2}, 186 | pages={288--323}, 187 | year={1988}, 188 | publisher={ACM} 189 | } 190 | 191 | @incollection{ppbft, 192 | title={On the practicality of practical Byzantine fault tolerance}, 193 | author={Chondros, Nikos and Kokordelis, Konstantinos and Roussopoulos, Mema}, 194 | booktitle={Proceedings of ACM/IFIP/USENIX International Middleware Conference (MIDDLEWARE)}, 195 | pages={436--455}, 196 | year={2012}, 197 | publisher={Springer} 198 | } 199 | 200 | 201 | @inproceedings{pbft, 202 | title={Practical Byzantine fault tolerance}, 203 | author={Castro, Miguel and Liskov, Barbara and others}, 204 | booktitle={Proceedings of the Third Symposium on Operating Systems Design and Implementation}, 205 | year={1999}, 206 | annote={ 207 | tolerates byzantine faults in asyncronous networks 208 | public keys only used when there are faults 209 | implemented NFS, performance on par UNIX 210 | "We do assume that the adversary cannot delay correct nodes indefinitely. " 211 | "The algorithm does not rely on synchrony to provide safety. Therefore, it must rely on synchrony to provide liveness; else violate FLP" 212 | - delay(t) does not grow faster than t indefintely 213 | 214 | Protocol: 215 | - "buffer requests and multicast as a group" as optimization -> ie. make blocks 216 | - 3 steps: pre-prepare, prepare, commit 217 | - "The pre-prepare and prepare phases are used to totally 218 | order requests sent in the same view even when the 219 | primary, which proposes the ordering of requests, is 220 | faulty. The prepare and commit phases are used to ensure 221 | that requests that commit are totally ordered across views" 222 | - request not included in the pre-prepare: " decouples the protocol to totally order 223 | requests from the protocol to transmit the request to the 224 | replicas" 225 | - generate state proofs at checkpoints, say every 100 requests 226 | - garbage collect messages upto checkpoints 227 | - complicated view change semantics ... 228 | - "We also believe that it is possible to reduce the number of copies of the state to but the details remain to be worked out" 229 | 230 | } 231 | } 232 | 233 | @inproceedings{yin2003separating, 234 | title={Separating agreement from execution for byzantine fault tolerant services}, 235 | author={Yin, Jian and Martin, Jean-Philippe and Venkataramani, Arun and Alvisi, Lorenzo and Dahlin, Mike}, 236 | booktitle={ACM SIGOPS Operating Systems Review}, 237 | volume={37}, 238 | number={5}, 239 | pages={253--267}, 240 | year={2003}, 241 | organization={ACM}, 242 | annote={ 243 | realized that we can have 3f+1 in agreement replicas and 2f+1 in executing replicas 244 | } 245 | } 246 | 247 | @incollection{mutex, 248 | title={Solution of a problem in concurrent programming control}, 249 | author={Dijkstra, Edsger W}, 250 | booktitle={Pioneers and Their Contributions to Software Engineering}, 251 | pages={289--294}, 252 | year={2001}, 253 | publisher={Springer} 254 | } 255 | 256 | 257 | @inproceedings{gray1981transaction, 258 | title={The transaction concept: Virtues and limitations}, 259 | author={Gray, Jim and others}, 260 | booktitle={VLDB}, 261 | volume={81}, 262 | pages={144--154}, 263 | year={1981} 264 | } 265 | 266 | @book{gray1978notes, 267 | title={Notes on data base operating systems}, 268 | author={Gray, James N}, 269 | year={1978}, 270 | publisher={Springer} 271 | } 272 | 273 | @article{eswaran1976notions, 274 | title={The notions of consistency and predicate locks in a database system}, 275 | author={Eswaran, Kapali P. and Gray, Jim N and Lorie, Raymond A. and Traiger, Irving L.}, 276 | journal={Communications of the ACM}, 277 | volume={19}, 278 | number={11}, 279 | pages={624--633}, 280 | year={1976}, 281 | publisher={ACM} 282 | } 283 | 284 | @article{skeen1983formal, 285 | title={A formal model of crash recovery in a distributed system}, 286 | author={Skeen, Dale and Stonebraker, Michael}, 287 | journal={Software Engineering, IEEE Transactions on}, 288 | number={3}, 289 | pages={219--228}, 290 | year={1983}, 291 | publisher={IEEE}, 292 | note={three phase commit} 293 | } 294 | 295 | 296 | @inproceedings{raft, 297 | title={In search of an understandable consensus algorithm}, 298 | author={Ongaro, Diego and Ousterhout, John}, 299 | booktitle={2014 USENIX Annual Technical Conference (USENIX ATC 14)}, 300 | pages={305--319}, 301 | year={2014} 302 | } 303 | 304 | @phdthesis{raft_thesis, 305 | title={Consensus: Bridging theory and practice}, 306 | author={Ongaro, Diego}, 307 | year={2014}, 308 | school={Stanford University} 309 | } 310 | 311 | @article{cigarette_smokers, 312 | title={On a solution and a generalization of the Cigarette Smokers' Problem}, 313 | author={Habermann, A Nico}, 314 | year={1972} 315 | } 316 | 317 | @article{dining_philosophers, 318 | title={Hierarchical ordering of sequential processes}, 319 | author={Dijkstra, Edsger W.}, 320 | journal={Acta informatica}, 321 | volume={1}, 322 | number={2}, 323 | pages={115--138}, 324 | year={1971}, 325 | publisher={Springer} 326 | } 327 | 328 | 329 | @inproceedings{kotla2004high, 330 | title={High throughput Byzantine fault tolerance}, 331 | author={Kotla, Ramakrishna and Dahlin, Mike}, 332 | booktitle={Dependable Systems and Networks, 2004 International Conference on}, 333 | pages={575--584}, 334 | year={2004}, 335 | organization={IEEE} 336 | } 337 | 338 | @inproceedings{kotla2007zyzzyva, 339 | title={Zyzzyva: speculative byzantine fault tolerance}, 340 | author={Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund}, 341 | booktitle={ACM SIGOPS Operating Systems Review}, 342 | volume={41}, 343 | number={6}, 344 | pages={45--58}, 345 | year={2007}, 346 | organization={ACM} 347 | } 348 | 349 | 350 | @inproceedings{garcia2011efficient, 351 | title={Efficient middleware for byzantine fault tolerant database replication}, 352 | author={Garcia, Rui and Rodrigues, Rodrigo and Pregui{\c{c}}a, Nuno}, 353 | booktitle={Proceedings of the sixth conference on Computer systems}, 354 | pages={107--122}, 355 | year={2011}, 356 | organization={ACM} 357 | } 358 | 359 | @inproceedings{canetti1993fast, 360 | title={Fast asynchronous Byzantine agreement with optimal resilience}, 361 | author={Canetti, Ran and Rabin, Tal}, 362 | booktitle={Proceedings of the twenty-fifth annual ACM symposium on Theory of computing}, 363 | pages={42--51}, 364 | year={1993}, 365 | organization={ACM} 366 | } 367 | 368 | @inproceedings{cachin2000random, 369 | title={Random oracles in constantipole: practical asynchronous Byzantine agreement using cryptography}, 370 | author={Cachin, Christian and Kursawe, Klaus and Shoup, Victor}, 371 | booktitle={Proceedings of the nineteenth annual ACM symposium on Principles of distributed computing}, 372 | pages={123--132}, 373 | year={2000}, 374 | organization={ACM} 375 | } 376 | 377 | @inproceedings{feldman1988optimal, 378 | title={Optimal algorithms for Byzantine agreement}, 379 | author={Feldman, Paul and Micali, Silvio}, 380 | booktitle={Proceedings of the twentieth annual ACM symposium on Theory of computing}, 381 | pages={148--161}, 382 | year={1988}, 383 | organization={ACM} 384 | } 385 | 386 | @inproceedings{oki1988viewstamped, 387 | title={Viewstamped replication: A new primary copy method to support highly-available distributed systems}, 388 | author={Oki, Brian M and Liskov, Barbara H}, 389 | booktitle={Proceedings of the seventh annual ACM Symposium on Principles of distributed computing}, 390 | pages={8--17}, 391 | year={1988}, 392 | organization={ACM} 393 | } 394 | 395 | 396 | @inproceedings{junqueira2011zab, 397 | title={Zab: High-performance broadcast for primary-backup systems}, 398 | author={Junqueira, Flavio P and Reed, Benjamin C and Serafini, Marco}, 399 | booktitle={Dependable Systems \& Networks (DSN), 2011 IEEE/IFIP 41st International Conference on}, 400 | pages={245--256}, 401 | year={2011}, 402 | organization={IEEE} 403 | } 404 | 405 | @article{van2015vive, 406 | title={Vive la diff{\'e}rence: Paxos vs. viewstamped replication vs. zab}, 407 | author={Van Renesse, Robbert and Schiper, Nicolas and Schneider, Fred B}, 408 | journal={Dependable and Secure Computing, IEEE Transactions on}, 409 | volume={12}, 410 | number={4}, 411 | pages={472--484}, 412 | year={2015}, 413 | publisher={IEEE} 414 | } 415 | 416 | @article{cachin2016non, 417 | title={Non-determinism in Byzantine Fault-Tolerant Replication}, 418 | author={Cachin, Christian and Schubert, Simon and Vukoli{\'c}, Marko}, 419 | journal={arXiv preprint arXiv:1603.07351}, 420 | year={2016} 421 | } 422 | 423 | @misc{raft.github.io, 424 | title = {The Raft Consensus Algorithm}, 425 | howpublished = {http://raft.github.io}, 426 | note = {Accessed: 2016-04-01} 427 | } 428 | 429 | @article{tangaroa, 430 | title={Tangaroa: a Byzantine Fault Tolerant Raft}, 431 | author={Copeland, Christopher and Zhong, Hongxia} 432 | } 433 | 434 | @article{taleb2014skin, 435 | title={The skin in the game heuristic for protection against tail events}, 436 | author={Taleb, Nassim Nicholas and Sandis, Constantine}, 437 | journal={Review of Behavioral Economics}, 438 | volume={1}, 439 | pages={1--21}, 440 | year={2014} 441 | } 442 | 443 | 444 | @article{schneider1990implementing, 445 | title={Implementing fault-tolerant services using the state machine approach: A tutorial}, 446 | author={Schneider, Fred B}, 447 | journal={ACM Computing Surveys (CSUR)}, 448 | volume={22}, 449 | number={4}, 450 | pages={299--319}, 451 | year={1990}, 452 | publisher={ACM} 453 | } 454 | 455 | @misc{jepsen, 456 | title = {JEPSEN - Distributed Systems Safety Analysis}, 457 | howpublished = {http://jepsen.io}, 458 | note = {Accessed: 2016-05-12} 459 | } 460 | https://aphyr.com/tags/Jepsen 461 | -------------------------------------------------------------------------------- /bib/crypto.bib: -------------------------------------------------------------------------------- 1 | 2 | @misc{bitcoin, 3 | title={Bitcoin: A peer-to-peer electronic cash system}, 4 | author={Nakamoto, Satoshi}, 5 | year={2008} 6 | } 7 | 8 | 9 | @article{ethereum, 10 | title={Ethereum: A secure decentralised generalised transaction ledger}, 11 | author={Wood, Gavin}, 12 | journal={Ethereum Project Yellow Paper}, 13 | year={2014} 14 | } 15 | 16 | @article{sidechains, 17 | title={Enabling blockchain innovations with pegged sidechains}, 18 | author={Back, Adam and Maxwell, G and Corallo, M and Friedenbach, Mark and Dashjr, L}, 19 | year={2014} 20 | } 21 | 22 | 23 | @article{peercoin, 24 | title={Ppcoin: Peer-to-peer crypto-currency with proof-of-stake}, 25 | author={King, Sunny and Nadal, Scott}, 26 | journal={self-published paper, August}, 27 | volume={19}, 28 | year={2012} 29 | } 30 | 31 | 32 | @misc{bittorrent, 33 | title={The BitTorrent protocol specification}, 34 | author={Cohen, Bram}, 35 | year={2008}, 36 | publisher={BITTORRENT} 37 | } 38 | 39 | @inproceedings{libswift, 40 | title={Performance analysis of the libswift p2p streaming protocol}, 41 | author={Petrocco, Riccardo and Pouwelse, Johan and Epema, Dick HJ}, 42 | booktitle={Peer-to-Peer Computing (P2P), 2012 IEEE 12th International Conference on}, 43 | pages={103--114}, 44 | year={2012}, 45 | organization={IEEE} 46 | } 47 | 48 | 49 | @inproceedings{merkle1987digital, 50 | title={A digital signature based on a conventional encryption function}, 51 | author={Merkle, Ralph C}, 52 | booktitle={Advances in Cryptology—CRYPTO’87}, 53 | pages={369--378}, 54 | year={1987}, 55 | organization={Springer} 56 | } 57 | 58 | @article{shamir1979share, 59 | title={How to share a secret}, 60 | author={Shamir, Adi}, 61 | journal={Communications of the ACM}, 62 | volume={22}, 63 | number={11}, 64 | pages={612--613}, 65 | year={1979}, 66 | publisher={ACM} 67 | } 68 | 69 | @inproceedings{replay, 70 | title={A taxonomy of replay attacks [cryptographic protocols]}, 71 | author={Syverson, Paul}, 72 | booktitle={Computer Security Foundations Workshop VII, 1994. CSFW 7. Proceedings}, 73 | pages={187--191}, 74 | year={1994}, 75 | organization={IEEE} 76 | } 77 | 78 | @inproceedings{vukolic11quest, 79 | title={The quest for scalable blockchain fabric: Proof-of-work vs. BFT replication}, 80 | author={Vukolic, Marko}, 81 | booktitle={Proc. IFIP WG 11.4 Workshop on Open Research Problems in Network Security (iNetSec 2015)} 82 | } 83 | 84 | @techreport{honeybadger, 85 | title={The Honey Badger of BFT Protocols}, 86 | author={Miller, Andrew and Xia, Yu and Croman, Kyle and Shi, Elaine and Song, Dawn}, 87 | year={2016}, 88 | institution={Cryptology ePrint Archive 2016/199} 89 | } 90 | 91 | 92 | @techreport{poon2015bitcoin, 93 | title={The bitcoin lightning network: Scalable off-chain instant payments}, 94 | author={Poon, Joseph and Dryja, Thaddeus}, 95 | year={2015}, 96 | institution={Technical Report (draft). https://lightning. network} 97 | } 98 | 99 | @article{miller2014anonymous, 100 | title={Anonymous byzantine consensus from moderately-hard puzzles: A model for bitcoin}, 101 | author={Miller, Andrew and LaViola Jr, Joseph J}, 102 | journal={Retrieved from Anonymous Byzantine Consensus from Moderately-Hard Puzzles: A Model for Bitcoin}, 103 | year={2014} 104 | } 105 | 106 | @inproceedings{miller2015nonoutsourceable, 107 | title={Nonoutsourceable Scratch-Off Puzzles to Discourage Bitcoin Mining Coalitions}, 108 | author={Miller, Andrew and Kosba, Ahmed and Katz, Jonathan and Shi, Elaine}, 109 | booktitle={Proceedings of the 22nd ACM SIGSAC Conference on Computer and Communications Security}, 110 | pages={680--691}, 111 | year={2015}, 112 | organization={ACM} 113 | } 114 | 115 | @article{eyal2015bitcoin, 116 | title={Bitcoin-ng: A scalable blockchain protocol}, 117 | author={Eyal, Ittay and Gencer, Adem Efe and Sirer, Emin Gun and van Renesse, Robbert}, 118 | journal={arXiv preprint arXiv:1510.02037}, 119 | year={2015} 120 | } 121 | 122 | @incollection{eyal2014majority, 123 | title={Majority is not enough: Bitcoin mining is vulnerable}, 124 | author={Eyal, Ittay and Sirer, Emin G{\"u}n}, 125 | booktitle={Financial Cryptography and Data Security}, 126 | pages={436--454}, 127 | year={2014}, 128 | publisher={Springer} 129 | } 130 | 131 | @article{courtois2014subversive, 132 | title={On subversive miner strategies and block withholding attack in bitcoin digital currency}, 133 | author={Courtois, Nicolas T and Bahack, Lear}, 134 | journal={arXiv preprint arXiv:1402.1718}, 135 | year={2014} 136 | } 137 | 138 | @misc{buterin2013ethereum, 139 | title={Ethereum white paper: a next generation smart contract \& decentralized application platform}, 140 | author={Buterin, Vitalik}, 141 | year={2013} 142 | } 143 | 144 | @incollection{ghost, 145 | title={Secure high-rate transaction processing in Bitcoin}, 146 | author={Sompolinsky, Yonatan and Zohar, Aviv}, 147 | booktitle={Financial Cryptography and Data Security}, 148 | pages={507--527}, 149 | year={2015}, 150 | publisher={Springer} 151 | } 152 | 153 | @misc{blockchaininfo, 154 | title = {Bitcoin Blockchain Charts}, 155 | howpublished = {https://blockchain.info/charts}, 156 | note = {Accessed: 2016-04-01} 157 | } 158 | 159 | @misc{obc, 160 | title = {OpenBlockChain: Blockchain Fabric Code}, 161 | howpublished = {https://github.com/openblockchain/obc-peer}, 162 | note = {Accessed: 2016-04-01} 163 | } 164 | 165 | @misc{deterministicjs, 166 | title = {A Deterministic Version of Javascript}, 167 | howpublished = {https://github.com/NodeGuy/Deterministic.js}, 168 | note = {Accessed: 2016-04-01} 169 | } 170 | 171 | @misc{slasher, 172 | title = {Slasher: a punitive proof of stake algorithm}, 173 | author={Buterin, Vitalik}, 174 | howpublished = {https://blog.ethereum.org/2014/01/15/slasher-a-punitive-proof-of-stake-algorithm/}, 175 | note = {Accessed: 2016-04-01} 176 | } 177 | 178 | 179 | 180 | @incollection{ed25519, 181 | title={Curve25519: new Diffie-Hellman speed records}, 182 | author={Bernstein, Daniel J}, 183 | booktitle={Public Key Cryptography-PKC 2006}, 184 | pages={207--228}, 185 | year={2006}, 186 | publisher={Springer} 187 | } 188 | 189 | @incollection{economist_blockchains, 190 | title={The Trust Machine}, 191 | year={2015}, 192 | publisher={The Economist} 193 | } 194 | 195 | @article{authenticated_encryption, 196 | title={Authentication and authenticated key exchanges}, 197 | author={Diffie, Whitfield and Van Oorschot, Paul C and Wiener, Michael J}, 198 | journal={Designs, Codes and cryptography}, 199 | volume={2}, 200 | number={2}, 201 | pages={107--125}, 202 | year={1992}, 203 | publisher={Springer} 204 | } 205 | 206 | @book{snowden, 207 | title={No place to hide: Edward Snowden, the NSA, and the US surveillance state}, 208 | author={Greenwald, Glenn}, 209 | year={2014}, 210 | publisher={Macmillan} 211 | } 212 | 213 | @article{schneier2007did, 214 | title={Did NSA put a secret backdoor in new encryption standard?, 2007}, 215 | author={Schneier, Bruce}, 216 | journal={URL http://archive. wired. com/politics/security/commentary/securitymatters/2007/11/securitymatters}, 217 | volume={1115} 218 | } 219 | 220 | @book{levy2001crypto, 221 | title={Crypto: How the Code Rebels Beat the Government--Saving Privacy in the Digital Age}, 222 | author={Levy, Steven}, 223 | year={2001}, 224 | publisher={Penguin} 225 | } 226 | 227 | @article{szabo1997formalizing, 228 | title={Formalizing and securing relationships on public networks}, 229 | author={Szabo, Nick}, 230 | journal={First Monday}, 231 | volume={2}, 232 | number={9}, 233 | year={1997} 234 | } 235 | 236 | 237 | @misc{juno, 238 | author = {Buckie}, 239 | title = {Juno - Smart Contracts Running on a BFT Hardened Raft}, 240 | year = {2016}, 241 | publisher = {GitHub}, 242 | journal = {GitHub repository}, 243 | howpublished = {\url{https://github.com/buckie/juno}}, 244 | commit = {3426e7344389a66b19b64635f8b43fc0ec95aafd} 245 | } 246 | 247 | 248 | @misc{casper, 249 | title = {Introducing Casper “the Friendly Ghost”}, 250 | author = {Vlad Zamfir}, 251 | howpublished = {https://blog.ethereum.org/2015/08/01/introducing-casper-friendly-ghost/}, 252 | note = {Accessed: 2016-05-012} 253 | } 254 | 255 | @article{king2012ppcoin, 256 | title={Ppcoin: Peer-to-peer crypto-currency with proof-of-stake}, 257 | author={King, Sunny and Nadal, Scott}, 258 | journal={self-published paper, August}, 259 | volume={19}, 260 | year={2012} 261 | } 262 | 263 | @misc{poelstra2014distributed, 264 | title={Distributed Consensus from Proof of Stake is Impossible}, 265 | author={Poelstra, Andrew and others}, 266 | year={2014}, 267 | publisher={May} 268 | } 269 | 270 | 271 | @inproceedings{ben1988completeness, 272 | title={Completeness theorems for non-cryptographic fault-tolerant distributed computation}, 273 | author={Ben-Or, Michael and Goldwasser, Shafi and Wigderson, Avi}, 274 | booktitle={Proceedings of the twentieth annual ACM symposium on Theory of computing}, 275 | pages={1--10}, 276 | year={1988}, 277 | organization={ACM} 278 | } 279 | 280 | -------------------------------------------------------------------------------- /bib/formal.bib: -------------------------------------------------------------------------------- 1 | 2 | @article{girard1987linear, 3 | title={Linear logic}, 4 | author={Girard, Jean-Yves}, 5 | journal={Theoretical computer science}, 6 | volume={50}, 7 | number={1}, 8 | pages={1--101}, 9 | year={1987}, 10 | publisher={Elsevier} 11 | } 12 | 13 | @incollection{bove2009dependent, 14 | title={Dependent types at work}, 15 | author={Bove, Ana and Dybjer, Peter}, 16 | booktitle={Language engineering and rigorous software development}, 17 | pages={57--99}, 18 | year={2009}, 19 | publisher={Springer} 20 | } 21 | 22 | @inproceedings{wilcox2015verdi, 23 | title={Verdi: A framework for implementing and formally verifying distributed systems}, 24 | author={Wilcox, James R and Woos, Doug and Panchekha, Pavel and Tatlock, Zachary and Wang, Xi and Ernst, Michael D and Anderson, Thomas}, 25 | booktitle={Proceedings of the 36th ACM SIGPLAN Conference on Programming Language Design and Implementation}, 26 | pages={357--368}, 27 | year={2015}, 28 | organization={ACM} 29 | } 30 | 31 | @inproceedings{woos2016planning, 32 | title={Planning for change in a formal verification of the raft consensus protocol}, 33 | author={Woos, Doug and Wilcox, James R and Anton, Steve and Tatlock, Zachary and Ernst, Michael D and Anderson, Thomas}, 34 | booktitle={Proceedings of the 5th ACM SIGPLAN Conference on Certified Programs and Proofs}, 35 | pages={154--165}, 36 | year={2016}, 37 | organization={ACM} 38 | } 39 | 40 | @book{csp, 41 | title={Communicating sequential processes}, 42 | author={Hoare, Charles Antony Richard}, 43 | year={1978}, 44 | publisher={Springer} 45 | } 46 | 47 | @article{misra1989simple, 48 | title={A simple proof of a simple consensus algorithm}, 49 | author={Misra, Jayadev}, 50 | journal={Information processing letters}, 51 | volume={33}, 52 | number={1}, 53 | pages={21--24}, 54 | year={1989}, 55 | publisher={Elsevier}, 56 | note={proof of simple consensus with N > 4t+1 using equational reasoning} 57 | } 58 | 59 | 60 | @inproceedings{tsuchiya2007model, 61 | title={Model Checking of Consensus Algorit}, 62 | author={Tsuchiya, Tatsuhiro and Schiper, Andr{\'e}}, 63 | booktitle={Reliable Distributed Systems, 2007. SRDS 2007. 26th IEEE International Symposium on}, 64 | pages={137--148}, 65 | year={2007}, 66 | organization={IEEE}, 67 | annote={ 68 | only uses model checking (ie temporal logic) to verify. 69 | uses Heard-Of model, which captures synchrony degree and any type of non-malicious fault 70 | "first time standard model checking allows one to completely verify asynchronous consensus algorithms" 71 | } 72 | } 73 | 74 | @techreport{charron2006heard, 75 | title={The heard-of model: Unifying all benign failures}, 76 | author={Charron-Bost, Bernadette and Schiper, Andr{\'e}}, 77 | year={2006} 78 | } 79 | 80 | 81 | @incollection{francalanza2007fault, 82 | title={A fault tolerance bisimulation proof for consensus}, 83 | author={Francalanza, Adrian and Hennessy, Matthew}, 84 | booktitle={Programming Languages and Systems}, 85 | pages={395--410}, 86 | year={2007}, 87 | publisher={Springer}, 88 | annote={ 89 | fine tune bisimulation techniques for partial failure settings 90 | some processes are reliable, thus immortal 91 | decompose to two parts: non-fault tolerant basic correctness, correctness preservation given faults 92 | based on riely1997distributed 93 | "We view our calculus as a partial-failure calculus rather than a distributed calculus as it permits action synchronisations across locations. This implies a tighter synchronisation assumption between locations, which in our calculus merely embody units of failure." 94 | op semantics are basic CCS plus perfect failure detection (susp) and dynamic failures (halt) 95 | 96 | } 97 | } 98 | 99 | 100 | @book{riely1997distributed, 101 | title={Distributed processes and location failures}, 102 | author={Riely, James and Hennessy, Matthew}, 103 | year={1997}, 104 | publisher={Springer} 105 | } 106 | 107 | 108 | @article{chandra1996unreliable, 109 | title={Unreliable failure detectors for reliable distributed systems}, 110 | author={Chandra, Tushar Deepak and Toueg, Sam}, 111 | journal={Journal of the ACM (JACM)}, 112 | volume={43}, 113 | number={2}, 114 | pages={225--267}, 115 | year={1996}, 116 | publisher={ACM}, 117 | note={ 118 | show atomic broadcast and consensus are same thing in asynchronous networks 119 | failure detectors: completeness and accuracy 120 | completeness - all faulty processes eventually suspected 121 | accuracy - restrict mistakes 122 | defines weakes failure detector necessary and sufficient for consensus 123 | defines the hierarchy and shows where we go from tolerating any num of faults to only (n-1)/2 124 | 125 | } 126 | } 127 | 128 | @incollection{nestmann2003modeling, 129 | title={Modeling consensus in a process calculus}, 130 | author={Nestmann, Uwe and Fuzzati, Rachele and Merro, Massimo}, 131 | booktitle={CONCUR 2003-Concurrency Theory}, 132 | pages={399--414}, 133 | year={2003}, 134 | publisher={Springer}, 135 | note={builds on chandra1996unreliable using simpler form of weakest failure detector 136 | models failure detector in pi calc, 137 | complete pi calc description of algorithm! 138 | "matrix" view of network history allows to see all sent messages and state of receval, 139 | useful for proofs 140 | } 141 | } 142 | 143 | 144 | @article{palamidessi2003comparing, 145 | title={Comparing the expressive power of the synchronous and asynchronous $ pi $-calculi}, 146 | author={Palamidessi, Catuscia}, 147 | journal={Mathematical Structures in Computer Science}, 148 | volume={13}, 149 | number={05}, 150 | pages={685--719}, 151 | year={2003}, 152 | publisher={Cambridge Univ Press}, 153 | note={ 154 | this paper is actually an updated of the original, from 1997 155 | asynch pi-calc can't encode mixed gaurded choice, 156 | where the encoding is uniform, ie. [[ P | Q ]] = [[ P ]] | [[ Q ]]. 157 | and [[ \sigma(P) ]] = \sigma( [[ P ]] ) where sigma is an automorphism (injective renaming function) 158 | mixed gaurd has an initial symmetry that needs to be broken, 159 | which can be seen as a leader election process, which is impossible in async 160 | pi calc for symmetric network. 161 | electoral system is one in which a the projection of a computation on each process 162 | has the procecesses all outputting the same value on their out chan 163 | } 164 | } 165 | 166 | @article{nestmann2000good, 167 | title={What is a “good” encoding of guarded choice?}, 168 | author={Nestmann, Uwe}, 169 | journal={Information and computation}, 170 | volume={156}, 171 | number={1}, 172 | pages={287--319}, 173 | year={2000}, 174 | publisher={Elsevier}, 175 | note={ 176 | important are compositionality and divergence freedom (no infinite loops) 177 | clarifies the Palamedessi result 178 | mixed choice can be done if willing to relax Palamedessi assumptions, 179 | either with randomized agreement (though apparently this disqualifies Palamedessi's reasonableness, 180 | though we know it can converge happen with probability 1), or arbitrary total order over the nodes, 181 | which would break symmetry. 182 | an awesome paper 183 | } 184 | } 185 | 186 | @article{phillips2004correct, 187 | title={A correct abstract machine for the stochastic pi-calculus}, 188 | author={Phillips, Andrew and Cardelli, Luca}, 189 | journal={Electronic Notes in Theoretical Computer Science}, 190 | year={2004}, 191 | note={ 192 | how to implement the stochastic pi calculus 193 | } 194 | } 195 | 196 | 197 | @article{priami1995stochastic, 198 | title={Stochastic $\pi$-calculus}, 199 | author={Priami, Corrado}, 200 | journal={The Computer Journal}, 201 | volume={38}, 202 | number={7}, 203 | pages={578--589}, 204 | year={1995}, 205 | publisher={Br Computer Soc}, 206 | note={originally extension of pi-calc to probabilistic rates 207 | choice becomes probabilistic rather than non-deterministic 208 | delays are drawn from exponential distribution. memoryless. 209 | continuity of prob distribution ensures the prob of two events ending at same time is 0 (contention in the choice) 210 | rates on input/output can be independent, so rate of synch is that of the smaller one 211 | prob of a transition is rate of its action divided by the total rate leaving the state 212 | 213 | } 214 | } 215 | 216 | 217 | @inproceedings{ene1999expressiveness, 218 | title={Expressiveness of point-to-point versus broadcast communications}, 219 | author={Ene, Cristian and Muntean, Traian}, 220 | booktitle={Fundamentals of Computation Theory}, 221 | pages={258--268}, 222 | year={1999}, 223 | organization={Springer} 224 | } 225 | 226 | 227 | 228 | @article{lucchi2007pi, 229 | title={A pi-calculus based semantics for WS-BPEL}, 230 | author={Lucchi, Roberto and Mazzara, Manuel}, 231 | journal={The Journal of Logic and Algebraic Programming}, 232 | volume={70}, 233 | number={1}, 234 | pages={96--118}, 235 | year={2007}, 236 | publisher={Elsevier} 237 | } 238 | 239 | @inproceedings{phillips2007efficient, 240 | title={Efficient, correct simulation of biological processes in the stochastic pi-calculus}, 241 | author={Phillips, Andrew and Cardelli, Luca}, 242 | booktitle={Computational methods in systems biology}, 243 | pages={184--199}, 244 | year={2007}, 245 | organization={Springer} 246 | } 247 | 248 | @article{abramsky1994proofs, 249 | title={Proofs as processes}, 250 | author={Abramsky, Samson}, 251 | journal={Theoretical Computer Science}, 252 | volume={135}, 253 | number={1}, 254 | pages={5--9}, 255 | year={1994}, 256 | publisher={Elsevier} 257 | } 258 | 259 | @inproceedings{ryan2015beyond, 260 | title={Beyond Flash Boys: Improving Transparency and Fairness in Financial Markets}, 261 | author={Ryan, Ronan}, 262 | booktitle={CFA Institute Conference Proceedings Quarterly}, 263 | volume={32}, 264 | number={4}, 265 | pages={10--17}, 266 | year={2015}, 267 | organization={CFA Institute} 268 | } 269 | 270 | 271 | @article{milner1992calculus, 272 | title={A calculus of mobile processes, i}, 273 | author={Milner, Robin and Parrow, Joachim and Walker, David}, 274 | journal={Information and computation}, 275 | volume={100}, 276 | number={1}, 277 | pages={1--40}, 278 | year={1992}, 279 | publisher={Elsevier} 280 | } 281 | 282 | @article{stirling1991local, 283 | title={Local model checking in the modal mu-calculus}, 284 | author={Stirling, Colin and Walker, David}, 285 | journal={Theoretical Computer Science}, 286 | volume={89}, 287 | number={1}, 288 | pages={161--177}, 289 | year={1991}, 290 | publisher={Elsevier} 291 | } 292 | 293 | @article{caires2003spatial, 294 | title={A spatial logic for concurrency (part I)}, 295 | author={Caires, Lu{\i}s and Cardelli, Luca}, 296 | journal={Information and Computation}, 297 | volume={186}, 298 | number={2}, 299 | pages={194--235}, 300 | year={2003}, 301 | publisher={Elsevier} 302 | } 303 | 304 | @article{vieira2004spatial, 305 | title={The spatial logic model checker user’s manual}, 306 | author={Vieira, Hugo and Caires, Lu{\'\i}s and Viegas, Ruben}, 307 | year={2004}, 308 | publisher={Citeseer} 309 | } 310 | 311 | @article{milner1993modal, 312 | title={Modal logics for mobile processes}, 313 | author={Milner, Robin and Parrow, Joachim and Walker, David}, 314 | journal={Theoretical Computer Science}, 315 | volume={114}, 316 | number={1}, 317 | pages={149--171}, 318 | year={1993}, 319 | publisher={Elsevier} 320 | } 321 | 322 | -------------------------------------------------------------------------------- /bib/programming.bib: -------------------------------------------------------------------------------- 1 | 2 | @techreport{avl, 3 | title={An algorithm for the organization of information}, 4 | author={AdelsonVelskii, M and Landis, Evgenii Mikhailovich}, 5 | year={1963}, 6 | institution={DTIC Document} 7 | } 8 | 9 | @article{golang, 10 | title={The Go Programming Language}, 11 | author={Pike, Rob}, 12 | journal={Talk given at Google’s Tech Talks}, 13 | year={2009} 14 | } 15 | 16 | 17 | @misc{openssl, 18 | title = {OpenSSL Vulnerabilities}, 19 | howpublished = {https://www.openssl.org/news/vulnerabilities.html}, 20 | note = {Accessed: 2016-04-01} 21 | } 22 | 23 | @misc{csp_go, 24 | title = {Share Memory By Communicating}, 25 | howpublished = {https://blog.golang.org/share-memory-by-communicating}, 26 | note = {Accessed: 2016-05-12} 27 | } 28 | 29 | @misc{jsonrpc, 30 | title = {JSON-RPC}, 31 | howpublished = {http://json-rpc.org/}, 32 | note = {Accessed: 2016-04-01} 33 | } 34 | 35 | @article{halting, 36 | title={On computable numbers, with an application to the Entscheidungsproblem}, 37 | author={Turing, Alan Mathison}, 38 | journal={J. of Math}, 39 | volume={58}, 40 | number={345-363}, 41 | pages={5}, 42 | year={1936} 43 | } 44 | 45 | @book{davis1958computability, 46 | title={Computability \& unsolvability}, 47 | author={Davis, Martin}, 48 | year={1958}, 49 | publisher={Courier Corporation} 50 | } 51 | 52 | 53 | @article{protobuf, 54 | title={Protocol buffers: Google’s data interchange format}, 55 | author={Varda, Kenton}, 56 | journal={Google Open Source Blog, Available at least as early as Jul}, 57 | year={2008} 58 | } 59 | 60 | @inproceedings{rarest_first, 61 | title={Rarest first and choke algorithms are enough}, 62 | author={Legout, Arnaud and Urvoy-Keller, Guillaume and Michiardi, Pietro}, 63 | booktitle={Proceedings of the 6th ACM SIGCOMM conference on Internet measurement}, 64 | pages={203--216}, 65 | year={2006}, 66 | organization={ACM} 67 | } 68 | 69 | @article{hursch1995separation, 70 | title={Separation of concerns}, 71 | author={H{\"u}rsch, Walter L and Lopes, Cristina Videira}, 72 | year={1995}, 73 | publisher={Citeseer} 74 | } 75 | 76 | 77 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -e 3 | 4 | pdflatex(){ 5 | /usr/local/texlive/2015/bin/universal-darwin/pdflatex "$@" 6 | } 7 | 8 | bibtex(){ 9 | /usr/local/texlive/2015/bin/universal-darwin/bibtex "$@" 10 | } 11 | 12 | CHAPTER=$1 13 | 14 | if [[ "$CHAPTER" == "" ]]; then 15 | pdflatex main.tex 16 | bibtex main 17 | pdflatex main.tex 18 | pdflatex main.tex 19 | cp main.pdf Buchman_Ethan_201606_MAsc.pdf 20 | else 21 | pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}" 22 | # bibtex chapBuild 23 | # pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}" 24 | # pdflatex -jobname=chapBuild "\includeonly{$CHAPTER}\input{main.tex}" 25 | fi 26 | 27 | -------------------------------------------------------------------------------- /chapters/abstract.tex: -------------------------------------------------------------------------------- 1 | 2 | \thispagestyle{plain} 3 | \begin{center} 4 | \textbf{ABSTRACT} \\ 5 | \vspace{0.6cm} 6 | \textbf{TENDERMINT: BYZANTINE FAULT TOLERANCE IN THE AGE OF BLOCKCHAINS} 7 | \end{center} 8 | 9 | \vspace{0.6cm} 10 | \begin{tabular}{ p{0.5\textwidth} p{0.5\textwidth} } 11 | Ethan Buchman & Advisor:\\ 12 | University of Guelph, 2016 & Professor Graham Taylor 13 | \end{tabular} 14 | 15 | 16 | \vspace{0.9cm} 17 | Tendermint is a new protocol for ordering events in a distributed network under adversarial conditions. 18 | More commonly known as consensus or atomic broadcast, the problem has attracted significant attention 19 | recently due to the widespread success of digital currencies, such as Bitcoin and Ethereum, 20 | which successfully solve the problem in public settings without a central authority. 21 | Tendermint modernizes classic academic work on the subject to provide a secure consensus protocol with 22 | accountability guarantees, as well as an interface for building arbitrary applications above the consensus. 23 | Tendermint is high performance, achieving thousands of transactions per second on dozens of nodes distributed around the globe, 24 | with latencies of about one second, and performance degrading moderately in the face of adversarial attacks. 25 | 26 | \clearpage 27 | 28 | -------------------------------------------------------------------------------- /chapters/appendix.tex: -------------------------------------------------------------------------------- 1 | 2 | 3 | \appendix 4 | \chapter{Appendix Title} 5 | -------------------------------------------------------------------------------- /chapters/apps.tex: -------------------------------------------------------------------------------- 1 | \chapter{Building Applications} 2 | \label{ch:apps} 3 | 4 | Tendermint is designed to be a general purpose algorithm for replicating a deterministic state machine. 5 | It uses the Tendermint Socket Protocol (TMSP) to standardize communication between the consensus engine and the state machine, 6 | enabling application developers to build their state machines in any programming language, 7 | and have it automatically replicated via Tendermint's BFT algorithm. 8 | 9 | \section{Background} 10 | 11 | Applications on the Internet can in general be characterized as containing two fundamental components: 12 | 13 | \begin{itemize} 14 | \item{Engine: handles core security, networking, replication. 15 | This is typically a webserver, like Apache or Nginx, when powering a web app, 16 | or a consensus algorithm when powering a distributed application.} 17 | \item{State-machine: the actual application logic that processes transactions received from the engine and updates internal state.} 18 | \end{itemize} 19 | 20 | This separation of concerns enables application developers to write state-machines in any programming language representing arbitrary applications, 21 | on top of an engine which may be specialized for its performance, security, usability, support, and other considerations. 22 | 23 | Unlike web-servers and their applications, which often take the form of processes communicating over a socket via the Common Gateway Interface (CGI) protocol, 24 | consensus algorithms have traditionally had much less usable or less general purpose interfaces to build applications on top of. 25 | Some, like zookeeper, etcd, consul, and other distributed key-value stores, 26 | provide HTTP interfaces to a particular instance of a simple key-value application, 27 | with some more interesting features like atomic compare-and-swap operations and push notifications. 28 | But they do not give the application developer control of the state-machine code itself. 29 | 30 | Demand for such a high-level of control over the state-machine running above a consensus engine has been driven primarily by the success of Bitcoin and the consequent interest in blockchain technology. 31 | By building more advanced applications directly into the consensus, 32 | users, developers, regulators, etc.~can achieve greater security guarantees on arbitrary state-machines, 33 | far beyond key-value stores, like currencies, exchanges, supply-chain management, governance, and so on. 34 | What has captured the attention of so many is the potential of a system which permits collective enforcement of the execution of code. 35 | It is practically a re-invention of many dimensions of the legal system, using distributed consensus algorithms and deterministically executable contracts, 36 | rather than policemen, lawyers, judges, juries, and the like. 37 | The ramifications for the development of human society are explosive, much as the introduction of the democratic rule of law was in the first place. 38 | 39 | Tendermint aims to provide the fundamental interface and consensus engine upon which such applications might be built. 40 | 41 | \section{Tendermint Socket Protocol} 42 | 43 | The Tendermint Socket Protocol (TMSP) defines the core interface by which the consensus engine communicates with the application state machine. 44 | The interface definition consists of a number of message types, specified using Google's Protocol Buffers \cite{protobuf}, 45 | that are length-prefixed and transmitted over a socket. 46 | A list of message types, their arguments, return values, and purpose is given in Figure \ref{fig:tmsp_msgs}, 47 | and an overview of the architecture and message flow is shown in Figure \ref{fig:tmsp}. 48 | 49 | \begin{figure}[] 50 | \vspace*{-1.5in} 51 | \centering 52 | \begin{lstlisting} 53 | 54 | type Application interface { 55 | // Return application info 56 | Info() (info string) 57 | 58 | // Set application option 59 | SetOption(key string, value string) (log string) 60 | 61 | // Append a tx 62 | AppendTx(tx []byte) Result 63 | 64 | // Validate a tx for the mempool 65 | CheckTx(tx []byte) Result 66 | 67 | // Return the application Merkle root hash 68 | Commit() Result 69 | 70 | // Query for state 71 | Query(query []byte) Result 72 | 73 | // Signals the beginning of a block 74 | BeginBlock(height uint64) 75 | 76 | // Signals the end of a block 77 | // validators: changed validators from app to TendermintCore 78 | EndBlock(height uint64) (validators []*Validator) 79 | } 80 | 81 | type CodeType int32 82 | 83 | type Result struct { 84 | Code CodeType 85 | Data []byte 86 | Log string // Can be non-deterministic 87 | } 88 | 89 | type Validator struct { 90 | PubKey []byte 91 | Power uint64 92 | } 93 | \end{lstlisting} 94 | \caption[TMSP Message Types]{ 95 | The TMSP application interface as defined in Go. 96 | TMSP messages are defined using Google's Protocol Buffers, and their serialized form is length prefixed before 97 | being sent over the TMSP socket. 98 | Return values include a \emph{Code}, similar to an HTTP Status Code, representing any errors, 99 | and $0$ is used to indicate no error. 100 | Messages are buffered client side until a \emph{Flush} message is sent, 101 | at which point all messages are transmitted. While the server design is asynchronous, message responses must be correctly ordered and match their request. 102 | } 103 | \label{fig:tmsp_msgs} 104 | \end{figure} 105 | 106 | 107 | \begin{figure}[] 108 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/tmsp.pdf} 109 | \centering 110 | \caption[TMSP Architecture]{ 111 | The consensus logic communicates with the application logic via TMSP, a socket protocol. 112 | Two sockets are maintained, one for the mempool to check the validity of new transactions, 113 | and one for the consensus to execute newly committed blocks. 114 | } 115 | \label{fig:tmsp} 116 | \end{figure} 117 | 118 | TMSP is implemented as an ordered, asynchronous server, 119 | where message types come in pairs of request and response, 120 | and where a special message type, Flush, pushes any buffered messages over the connection and awaits all responses. 121 | 122 | At the core of the TMSP are two messages: \emph{AppendTx} and \emph{Commit}. 123 | Once a block is decided by the consensus, 124 | the engine calls \emph{AppendTx} on each transaction in the block, 125 | passing it to the application state-machine to be processed. 126 | If the transaction is valid, it will result in a state-transition in the application. 127 | 128 | Once all \emph{AppendTx} calls have returned, the consensus engine calls Commit, 129 | causing the application to commit to the latest state, and persist it to disk. 130 | 131 | \section{Separating Agreement and Execution} 132 | 133 | Using the TMSP affords us an explicit separation between consensus, or agreement on the order of transactions, 134 | and their actual execution in the state-machine. 135 | In particular, we achieve consensus on the order first, and then execute the ordered transactions. 136 | This separation actually improves the system's fault tolerance \cite{yin2003separating}: 137 | while $3f+1$ replicas are still needed for agreement to tolerate $f$ Byzantine failures, 138 | only $2f+1$ replicas are needed for execution. 139 | That is, while we still need a two-thirds majority for ordering, we only need a one-half majority for execution. 140 | 141 | On the other hand, the fact that transactions are executed after they are ordered results in possibly invalid transactions, 142 | which can waste system resources. 143 | This is solved using an additional TMSP message, \emph{CheckTx}, which is called by the mempool, 144 | allowing it to check whether the transaction would be valid against the latest state. 145 | Note, however, that the fact that commits come in blocks at a time introduces complexity in the handling of \emph{CheckTx} messages. 146 | In particular, applications are expected to maintain a second state-machine that executes only those rules of the main state-machine pertaining to a transaction's validity. 147 | This second state-machine is updated by \emph{CheckTx} messages and is reset to the latest committed state after every commit. 148 | In essence, the second state machine describes the transaction pool's filter rules. 149 | 150 | To some extent, \emph{CheckTx} can be used as an \emph{optimistic execution} 151 | returning a result to the transaction sender with the caveat that 152 | the result may be wrong if a block is committed with a conflicting transaction 153 | before the transaction of interest is committed. 154 | This sort of optimistic execution is the focus of an approach to scalable BFT systems 155 | that can work quite well for particular applications where conflicts between transactions are rare. 156 | At the same time, it adds additional complexity to the client, by virtue of needing to handle possibly invalid results. 157 | The approach is discussed further in Chapter \ref{ch:related}.\looseness=-1 158 | 159 | \section{Microservice Architecture} 160 | 161 | Adopting separation of concerns as a strategy in application design is generally considered wise practice \cite{hursch1995separation}. 162 | In particular, many large scale application deployments today adopt a microservice architecture, 163 | wherein each functional component is implemented as a standalone network service, 164 | and typically encapsulated in a Linux container (e.g.~using Docker) for efficient deployment, scalability, and upgradeability. 165 | 166 | Applications running above Tendermint consensus will often be decomposable into microservices. 167 | For instance, many applications will utilize a key-value store for storing state. 168 | Running the key-value store as an independent service is quite common, 169 | in order to take advantage of the data store's specialized features, such as high-performance data types or Merkle trees. 170 | 171 | Another important microservice for applications is a governance module, 172 | which manages a certain subset of TMSP messages, enabling the application to control validator set changes. 173 | Such a module can become a powerful paradigm for governance in BFT systems. 174 | 175 | Some applications may utilize a native currency or account structure for users. 176 | It may thus be useful to provide a module which supports basic elements of, for instance, 177 | handling digital signatures and managing account dynamics. 178 | 179 | The list of possible microservices to compose a complex TMSP application goes on. 180 | In fact, one might even build an application which can launch sub-applications using data sent in transactions. 181 | For instance, including the hash of a docker image in a transaction, 182 | such that the image could be pulled from some file-storage backend and run as a sub-application where 183 | future transactions in the consensus could cause it to execute. 184 | This is the approach of ethereum, 185 | which allows developers to deploy bits of code to the network that can be triggered to run within the Ethereum Virtual Machine by future transactions \cite{ethereum}, 186 | and of IBM's recent OpenBlockChain (OBC) project, which allows developers to send full docker contexts in transactions, 187 | defining containers that run arbitrary code in response to transactions addressed to them \cite{obc}. 188 | 189 | \section{Determinism} 190 | 191 | The most critical caveat about building applications using TMSP is that they must be deterministic. 192 | That is, for the replicated state-machine to not compromise safety, 193 | every node must obtain the same result when executing the same transaction against the same state. 194 | 195 | This is not a unique requirement for Tendermint. Bitcoin, Raft, Ethereum, any other distributed consensus algorithm, 196 | and applications like lock-step multi-player gaming must all be strictly deterministic, lest a consensus failure arise. 197 | 198 | There are many sources of non-determinism in programming languages, most obviously via random numbers and time, 199 | but also, for instance, via the use of floating point precision, and by iteration over hash tables 200 | (some languages, such as Go, enforce randomized iteration over hash tables to force programmers to be explicit about when they need ordered data structures). 201 | The strict restriction on determinism, and its notable lacking from every major programming language, 202 | prompted ethereum to develop its own, Turing-complete, fully deterministic virtual machine, 203 | which forms the platform for application developers to build applications above the ethereum blockchain. 204 | While deterministic, it has many quirks, such as 32-byte stack words, storage keys, and storage values, 205 | and no support for byte-shifting operations - everything is big number arithmetic. 206 | 207 | Deterministic programming is well studied in the world of real-time, lockstep, multi-party gaming. 208 | Such games constitute another example of replicated state machines, and are quite similar in many ways to consensus algorithms. 209 | Application developers building with TMSP are encouraged to study their methods, and to take care when implementing an application. 210 | On the one hand, the use of functional programming languages and proof methods can enable the construction of correct programs. 211 | On the other, compilers are being built to translate possibly non-deterministic programs to canonically deterministic ones \cite{deterministicjs}. 212 | 213 | \section{Termination} 214 | 215 | If determinism is critical for preserving safety, termination of transaction execution is critical for preserving liveness. 216 | It is, however, not in general possible to determine whether a given program halts for even a single input, let alone all of them, 217 | a problem known as the Halting Problem \cite{halting, davis1958computability}. 218 | 219 | Ethereum's virtual machine solves the problem by \emph{metering}, that is, charging for each operation in the execution. 220 | This way, a transaction is guaranteed to terminate when the sender runs out of funds. 221 | Such metering may be possible in a more general case, 222 | via compilers that compile programs to metered versions of themselves. 223 | 224 | It is difficult to solve this problem without significant overhead. 225 | In essence, a validator cannot tell if an execution is in an infinite loop or is just slow, but nearly complete. 226 | It may be possible to use the Tendermint consensus protocol to decide on transaction timeouts, 227 | such that more than two-thirds of validators must agree that a transaction timed out and is thus considered invalid (ie. having no effect on the state). 228 | However, we do not pursue the idea further here, leaving it to future work. 229 | In the meantime, it is expected that applications will undergo thorough testing before being deployed in any consensus system, 230 | and that monitoring and governance mechanisms will be used to resurrect the system in the event of consensus failure. 231 | 232 | \section{Examples} 233 | 234 | In this section, examples of increasingly more complex TMSP applications are introduced and discussed, 235 | with particular focus on \emph{CheckTx} and managing the mempool. 236 | 237 | \subsection{Merkleeyes} 238 | 239 | A simple example of a TMSP application is a Merkle tree based key-value store. 240 | Tendermint provides Merkleeyes, a TMSP application which wraps a self-balancing, Merkle binary search tree. 241 | The first byte of a transaction determines if the transaction is a get, set, or remove operation. 242 | For get and remove operations, the remaining bytes are the key. 243 | For the set operation, the remaining bytes are a serialized list containing the key and value. 244 | Merkleeyes may utilize a simple implementation of \emph{CheckTx} that only decodes the transaction, 245 | to ensure it is properly formatted. 246 | One could also make a more advanced \emph{CheckTx}, where get and remove operations on unknown keys are invalid. 247 | Once Commit is called, the latest updates are added into the Merkle tree, all hashes are computed, 248 | and the latest state of the tree is committed to disk. 249 | 250 | Note that Merkleeyes was designed to be a module used by other TMSP applications for a Merkle tree based key-value store, 251 | rather than a stand alone TMSP application, though the simplicity of the TMSP interface makes it amenable to both. 252 | 253 | \subsection{Basecoin} 254 | 255 | A more complete example is a simple currency, 256 | using an account structure pioneered by Ethereum, 257 | where each user has a public key and an account with the balance for that public key. 258 | The account also contains a sequence number, which is equal to the number of transactions sent by the account. 259 | Transactions can send funds from the account if they include the correct sequence number 260 | and are signed by the correct private key. 261 | Without the sequence number, the system would be susceptible to replay attacks \cite{replay}, 262 | where a signed transaction debiting an account could be replayed, 263 | causing the debit to occur multiple times. 264 | Furthermore, to prevent replay attacks in a multi-chain environment, 265 | transaction signatures should include a network or blockchain identifier. 266 | 267 | An application supporting a currency has naturally more logic than a simple key-value store. 268 | In particular, certain transactions are distinctly invalid, 269 | such as those with an invalid signature, incorrect sequence number, 270 | or sending an amount greater than the sender's account balance. 271 | These conditions can be checked in \emph{CheckTx}. 272 | 273 | Furthermore, a supplementary application state must be maintained for \emph{CheckTx} 274 | in order to update sequence numbers and account balances 275 | when there are multiple transactions involving the same accounts in the mempool at once. 276 | When commit is called, the supplementary application state is reset to the latest committed state. 277 | Any transactions still in the mempool can be replayed via \emph{CheckTx} against the latest state. 278 | 279 | \subsection{Ethereum} 280 | 281 | Ethereum uses the mechanisms already described to filter transactions out of the mempool, 282 | but it also runs some transactions in a virtual machine, 283 | which updates state and returns results. 284 | The virtual machine execution is not done in \emph{CheckTx}, 285 | as it is much more expensive and depends heavily on the ultimate order of transactions as they are included in blocks. 286 | 287 | \section{Conclusion} 288 | 289 | TMSP provides a simple yet flexible means to build arbitrary applications, 290 | in any programming language, 291 | that inherit BFT state-machine replication from the Tendermint consensus algorithm. 292 | It plays much the same role for a consensus engine and an application that, for instance, CGI plays for Apache and Wordpress. 293 | However, application developers must take special care to ensure their applications are deterministic, and that transaction executions terminate. 294 | 295 | -------------------------------------------------------------------------------- /chapters/background.tex: -------------------------------------------------------------------------------- 1 | \chapter{Background} 2 | \label{ch:background} 3 | 4 | \renewcommand{\|}{\;|\;} 5 | 6 | Distributed consensus systems have become a critical component of modern Internet infrastructure, 7 | powering every major Internet application at some level or another. 8 | This chapter introduces the necessary background material for understanding and discussing these systems. 9 | In addition, it introduces the $\pi$-calculus, a formal language for describing concurrent processes, 10 | which will be used to specify the Tendermint algorithm in Chapter \ref{ch:tendermint}. 11 | 12 | \section{Replicated State Machine} 13 | 14 | The most common paradigm for studying and implementing distributed consensus is that of the Replicated State Machine, 15 | wherein a \emph{deterministic} state machine is replicated across a set of processes, 16 | such that it functions as a single state machine 17 | despite the failure of some processes \cite{schneider1990implementing}. 18 | The state machine is driven by a set of inputs, known as \emph{transactions}, 19 | where each transaction may or may not, depending on its validity, cause a state transition and return a result. 20 | More formally, a transaction is an \emph{atomic} operation on a database, 21 | meaning it either completes or doesn't occur at all, 22 | and can't be left in an intermediate state \cite{gray1981transaction}. 23 | The state transition logic is governed by the state machine's state transition function, 24 | which maps a transaction and the current state to a new state and a return value. 25 | The state transition function is also sometimes referred to as \emph{application logic}. 26 | 27 | It is the responsibility of the consensus protocol to order the transactions so that the resulting 28 | \emph{transaction log} is replicated exactly by every process. 29 | Using a deterministic state transition function implies that 30 | every process will compute the same state given the same transaction log. 31 | 32 | A summary of the replicated state machine architecture is given in Figure \ref{fig:replicated_state_machine}. 33 | 34 | \begin{figure}[] 35 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/state_machine.pdf} 36 | \centering 37 | \caption[Overview of replicated state machine architecture]{ 38 | A replicated state machine replicates a transaction log and resulting state across multiple machines. 39 | Transactions are received from the client, 40 | run through the consensus protocol, 41 | ordered in the transaction log, 42 | and executed against the state. 43 | In the figure, each diamond represents a single machine, 44 | with dotted lines representing communication between machines to carry out the consensus protocol for ordering transactions.} 45 | \label{fig:replicated_state_machine} 46 | \end{figure} 47 | 48 | Tendermint was motivated from the desire to create a general purpose, high-performance, secure, and robust replicated state machine. 49 | 50 | \section{Asynchrony} 51 | 52 | The purpose of a fault-tolerant replicated state machine is to co-ordinate 53 | a network of computers to stay in sync while providing a useful service, 54 | despite the presence of faults. 55 | 56 | Staying in sync amounts to replicating the transaction log successfully; 57 | providing a useful service amounts to keeping the state machine available for new transactions. 58 | These aspects of the system are traditionally known as \emph{safety} and \emph{liveness}, respectively. 59 | Colloquially, safety means nothing bad happens; liveness means that something good eventually happens. 60 | A violation of safety implies two or more valid, competing transaction logs. 61 | Violating liveness implies an unresponsive network. 62 | 63 | It is trivial to satisfy liveness by accepting all transactions. And it is trivial to satisfy safety by accepting none. 64 | Hence, state machine replication algorithms can be seen to operate on a spectrum defined by these extremes. 65 | Typically, processes require some threshold of received information from other processes before they commit a new transaction. 66 | In synchronous environments, 67 | where we make assumptions about the maximum delay of network messages or the maximum speed of processor clocks, 68 | it is easy enough to take turns proposing new transactions, poll for a majority vote, 69 | and skip a proposer's turn if they don't propose within the bounds of the synchrony assumptions. 70 | 71 | In asynchronous environments, where no such assumptions about network delays or processor speeds are warranted, 72 | the trade-off is much more difficult to manage. 73 | In fact, the so called FLP impossibility result demonstrates the 74 | impossibility of distributed consensus among deterministic asynchronous\footnote{Prior to FLP, the distinction between sync/async wasn't as prominent} processes 75 | if even a single processes can crash \cite{flp}. 76 | The proof amounts to showing that, because processes can fail, 77 | there are valid executions of the protocol in which processes fail at the exact opportune times to prevent consensus. 78 | Hence, we have no guarantee of consensus. 79 | 80 | Typically, synchrony in a protocol is reflected by the use of timeouts to manage certain transitions. 81 | In asynchronous environments, where messages can be arbitrarily delayed, relying on synchrony (timeouts) for safety 82 | can lead to a fork in the transaction log. 83 | Relying on synchrony to ensure liveness can cause the consensus to halt, and the service to become unresponsive. 84 | The former case is usually considered more severe, as reconciling conflicting logs can be a daunting or impossible task. 85 | 86 | In practice, synchronous solutions are only used where the message latency is under 87 | extremely well defined control, for instance between controllers on an airplane \cite{hoyme1993safebus}, 88 | or between datacenters utilizing synchronized atomic clocks \cite{corbett2013spanner}. 89 | Thus, while many efficient synchronous solutions exist, 90 | the general unreliability of computer networks is too great a risk for them to be used in practice 91 | without significant additional costs. 92 | 93 | There are fundamentally two ways to overcome the FLP impossibility result. 94 | The first is to use stronger synchrony assumptions - 95 | even rather weak assumptions are sufficient, 96 | for instance, that only eventually, 97 | crashed processes are suspected of crashing and correct ones are not \cite{chandra1996unreliable}. 98 | Typically, this approach utilizes \emph{leaders}, 99 | which play a special co-ordinating role, 100 | and which can be skipped if they are suspected of being faulty after some timeout. 101 | In practice, such leader-election mechanisms can be difficult to get right. 102 | 103 | The second way to overcome FLP is to use non-determinism - 104 | include randomization elements such that 105 | the probability of coming to consensus tends to $1$. 106 | While clever, relying on randomization is typically much slower, 107 | though certain advanced cryptographic techniques have in recent years 108 | achieved tremendous improvements in speed \cite{honeybadger} 109 | 110 | 111 | \section{Broadcast and Consensus} 112 | 113 | In order for a process to replicate its state on other processes, 114 | it must have access to basic communication primitives which allow it to disseminate, or deliver, information. 115 | One of the most useful such primitives is \emph{reliable broadcast}. 116 | Reliable broadcast (RBC) is a broadcast primitive satisfying, for message $m$ \cite{chandra1996unreliable}: 117 | 118 | \begin{itemize} 119 | \item validity - if a correct process broadcasts $m$, it eventually delivers $m$ 120 | \item agreement - if a correct process delivers $m$, all correct processes eventually deliver $m$ 121 | \item integrity - $m$ is only delivered once, and only if broadcast by its sender 122 | \end{itemize} 123 | 124 | In essence, RBC enables a message to be eventually delivered once on all correct processes. 125 | 126 | Another, more useful primitive is \emph{atomic broadcast} (ABC), 127 | which satisfies RBC and an additional property \cite{chandra1996unreliable}: 128 | 129 | \begin{itemize} 130 | \item total order - if correct processes $p$ and $q$ deliver $m$ and $m'$, then $p$ delivers $m$ before $m'$ iff $q$ delivers $m$ before $m'$ 131 | \end{itemize} 132 | 133 | Atomic broadcast is thus a reliable broadcast where values are delivered in the same order on each host. 134 | Note this is exactly the problem of replicating a transaction log. 135 | While colloquially, the problem may be referred to as consensus, 136 | the standard definition of the consensus primitive satisfies the following \cite{chandra1996unreliable}: 137 | \begin{itemize} 138 | \item termination - every correct process eventually decides 139 | \item integrity - every correct process decides at most once 140 | \item agreement - if one correct process decides $v1$ and another decides $v2$, then $v1=v2$ 141 | \item validity - if a correct process decides $v$, at least one process proposed $v$ 142 | \end{itemize} 143 | 144 | Intuitively, consensus and ABC appear remarkably similar, 145 | with the critical difference that ABC is a continuous protocol, 146 | whereas consensus expects to terminate. 147 | That said, it is well known that each can be reduced to the other \cite{chandra1996unreliable}. 148 | Consensus is easily reduced to ABC by deciding the first value to be atomically broadcast. 149 | ABC can be reduced to consensus by running many instances of the consensus protocol, 150 | in sequence, 151 | though certain subtle considerations must be made, 152 | especially for handling Byzantine faults. 153 | A complete description of the parameter space surrounding 154 | the reduction of ABC to consensus remains an open topic of research. 155 | 156 | Historically, despite the fact that most use cases actually require ABC, 157 | the most widely adopted algorithm has been a consensus algorithm called Paxos, 158 | introduced, and proven correct, by Leslie Lamport in the 90s \cite{paxos}. 159 | Paxos simultaneously empowered and confused the discipline of consensus science, 160 | on the one hand by providing the first real-world, practical, fault-tolerant consensus algorithm, 161 | and on the other by being so difficult to understand and explain. 162 | Each implementation of the algorithm used its own unique bag of ad-hoc techniques 163 | to build ABC from Paxos, making the ecosystem difficult to navigate, understand, and utilize. 164 | Unfortunately, there was little work on improving the problem framing to make it more understandable, 165 | though there were efforts to delineate solutions to the various difficulties \cite{chandra2007paxos}. 166 | 167 | In 2013, Ongaro and Ousterhout published Raft \cite{raft}, 168 | a state machine replication algorithm whose motivating design goal was understandability. 169 | Rather than starting from a consensus algorithm, and attempting to build what was needed (ABC), 170 | the design of Raft considered first and foremost the transaction log, 171 | and sought orthogonal components which could fit together to provide what is ultimately ABC, 172 | though it is not described as such. 173 | 174 | Paxos has been the staple consensus algorithm for industry, 175 | upon which the likes of Amazon \cite{dynamo}, Google \cite{chubby}, 176 | and others have built out highly available global Internet services. 177 | The Paxos consensus sits at the bottom of the application stack, 178 | providing a consistent interface to resource management and allocation, 179 | operating at much slower time scales than the highly-available applications facing the users. 180 | 181 | Since its debut, however, Raft has seen tremendous adoption, especially in the open source community, 182 | with implementations in virtually ever major language \cite{raft.github.io}, 183 | and use as the backbone in major projects, 184 | including CoreOs's distributed Linux distribution \cite{coreos_raft} 185 | and the open source time-series database InfluxDB \cite{influxdb,hashicorp_raft}. 186 | 187 | Raft's major divergent design decisions from Paxos was to 188 | focus on the transaction-log first, rather than a single value, 189 | in particular to allow a leader to persist in committing transactions until he goes down, 190 | at which point leadership election can kick in. 191 | In some ways, this is similar to the approach taken by blockchains, 192 | though the major advantage of blockchains is the ability to tolerate a different kind of fault. 193 | 194 | \section{Byzantine Fault Tolerance} 195 | 196 | Blockchains have been described as ``trust machines'' \cite{economist_blockchains} on account of the way they reduce counter party risk through the decentralization of responsibility over a shared database. 197 | Bitcoin, in particular, is noted for its ability to withstand attacks and malicious behaviour by any of the participants. 198 | Traditionally, consensus protocols tolerant of malicious behaviour were known as Byzantine Fault Tolerant (BFT) consensus protocols. 199 | The term Byzantine was used due to the similarity of the problem to that faced by generals of the Byzantine army attempting to co-ordinate themselves to attack Rome using only messengers, 200 | where one of the generals may be a traitor \cite{lamport1982byzantine}. 201 | 202 | In a crash fault, a process simply halts. In a Byzantine fault, it can behave arbitrarily. 203 | Crash faults are easier to handle, as no process can \emph{lie} to another process. 204 | Systems which only tolerate crash faults can operate via simple majority rule, 205 | and therefore typically tolerate simultaneous failure of up to half of the system. 206 | If the number of failures the system can tolerate is $f$, such systems must have at least $2f+1$ processes. 207 | 208 | Byzantine failures are more complicated. In a system of $2f+1$ processes, if $f$ are Byzantine, 209 | they can co-ordinate to say arbitrary things to the other $f+1$ processes. 210 | For instance, suppose we are trying to agree on the value of a single bit, 211 | and $f=1$, so we have $N=3$ processes, $A$, $B$, and $C$, where $C$ is Byzantine, as in Figure \ref{fig:byzantine}. 212 | $C$ can tell $A$ that the value is $0$ and tell $B$ that it's $1$. 213 | If $A$ agrees that its $0$, and $B$ agrees that its $1$, then they will both think they have a majority and commit, 214 | thereby violating the safety condition. 215 | Hence, the upper bound on faults tolerated by a Byzantine system is strictly lower than a non-Byzantine one. 216 | 217 | \begin{figure}[] 218 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/diagrams/byzantine.pdf} 219 | \centering 220 | \caption[Byzantine processes tell lies]{ 221 | A Byzantine process, C, tells A one thing and B another, causing them to come to different conclusions about the network. 222 | Here, simple majority vote results in a violation of safety due to only a single Byzantine process.} 223 | \label{fig:byzantine} 224 | \end{figure} 225 | 226 | 227 | In fact, it can be shown that the upper limit on $f$ for Byzantine faults is $f < N/3$ \cite{pease1980reaching}. 228 | Thus, to tolerate a single Byzantine process, we require at least $N=4$. 229 | Then the faulty process can't split the vote the way it was able to when $N=3$. 230 | 231 | In 1999, Castro and Liskov published Practical Byzantine Fault Tolerance \cite{pbft}, or \emph{PBFT}, 232 | which provided the first optimal Byzantine fault tolerant algorithm for practical use. 233 | It set a new precedent for the practicality of Byzantine fault tolerance in industrial systems by being capable 234 | of processing tens of thousands of transactions per second. 235 | Despite this success, Byzantine fault tolerance was still considered expensive and largely unnecessary, 236 | and the most popular implementation was difficult to build on top of \cite{ppbft}. 237 | Hence, despite a resurgence in academic interest, including numerous improved variations \cite{yin2003separating, kotla2007zyzzyva} 238 | not much progress was made in the way of implementations and deployment. 239 | Furthermore, PBFT provides no guarantees if a third or more of the network co-ordinates to violate safety. 240 | 241 | \section{Cryptography, Trust, and Economics} 242 | 243 | Fundamentally, fault tolerance is a problem deriving from a lack of trust - 244 | an inability to know how some process will behave. 245 | Formally, trust might be defined information theoretically as a means 246 | for reducing the entropy of one's model of the world - 247 | to trust someone is to optimistically reduce one's uncertainty about the world, 248 | enabling more focused attention on higher order forms of organization. 249 | 250 | Cryptographic primitives are also fundamentally related to the problem of trust, 251 | and may similarly be defined as mechanisms which allow for a massive reduction in entropy - 252 | successfully authenticating a cryptographic function collapses a distribution 253 | over possible outcomes to a single, or in some cases a small number, of outcomes. 254 | 255 | It is well known that civilizations that have greater forms of institutional trust, 256 | such as the rule-of-law, 257 | have higher productivity and more vibrant economies \cite{zak2001trust}. 258 | The result makes intuitive sense, as being able to trust more about an interaction 259 | reduces the space of possible outcomes that need to be actively modelled, 260 | making it easier to co-ordinate. 261 | Unfortunately, it is becoming increasingly difficult to evaluate the trustworthiness 262 | of modern institutions as their complexity has skyrocketed in recent decades, 263 | increasing the likelihood that the certainty they allegedly provide is an illusion. 264 | 265 | Fortunately, cryptography can form the basis for new institutions of trust in society 266 | which may dramatically improve the capacity for human co-ordination at global scale on account 267 | of reduced risk of fraudulent and/or unaccountable activity. 268 | Of particular interest is the importance of cryptographic primitives in BFT algorithms, 269 | both for authentication and for seeding non-determinism. 270 | 271 | Most interestingly, economic mechanisms may also serve as means for reducing entropy, 272 | in so far as economic agents can be incentivized - 273 | which is to say be made more likely to execute a particular behaviour. 274 | In fact, Bitcoin's great insight was that cryptographic primitives could be used in 275 | conjunction with economic incentives to sufficiently reduce the entropy of a public consensus network 276 | to achieve secure replication of state. 277 | 278 | A more formal investigation of the information theoretic grounds of trust, cryptography, 279 | consensus, and economics, and in particular their inter-relationship, remains for future work. 280 | 281 | \section{Blockchain} 282 | 283 | A blockchain is, at heart, an integrity-focused approach to Byzantine Fault Tolerant Atomic Broadcast. 284 | The Bitcoin blockchain, for instance, uses a combination of economics and cryptographic randomization 285 | to provide a strong probabilistic guarantee that safety will not be violated, 286 | given a weak synchrony assumption, namely, 287 | that blocks are gossipped much more rapidly than they are found via the partial-hash collision lottery. 288 | In practice, however, it is well known that Bitcoin's security guarantees are vulnerable to a number 289 | of subtle attacks \cite{courtois2014subversive,eyal2014majority}. 290 | 291 | 292 | The blockchain gets its name from the two key optimizations it employs in solving ABC. 293 | The first is that it groups transactions in blocks in order to amortize the high commit latency 294 | (on the order of ten minutes) over many transactions. 295 | The second is to link blocks via cryptographic hashes into an immutable chain, 296 | such that is easy to verify the historical record. 297 | Both optimizations are natural improvements to a naive BFT-ABC, 298 | the former improving performance, the latter improving tolerance to certain kinds 299 | of difficult to model Byzantine faults. 300 | 301 | Over the last few years, it has become common to ``blockchainize'' consensus algorithms, 302 | that is, to adapt them to ABC using the blockchain paradigm of hash-linked transaction batches. 303 | To the author's knowledge, Tendermint was the first such proposal, 304 | upgrading a well known BFT algorithm from the late 80s \cite{dls}, 305 | though it has since evolved to a consensus algorithm of its own. 306 | It has been followed by IBM, which upgraded PBFT to a blockchain \cite{cachin2016non,obc}, 307 | and by JP Morgan, which upgraded a BFT version of Raft \cite{juno}. 308 | 309 | \section{Process Calculus} 310 | 311 | Distributed systems, where pieces of the system execute concurrently with one another, 312 | are notorious for being difficult to design, build, and debug. 313 | They are further difficult to formally verify, 314 | as most techniques for formal verification, and in fact the very foundations of computer science, 315 | have been specifically developed with sequential computation in mind. 316 | 317 | Process calculi are a family of models introduced 318 | to provide a formal basis for concurrent computation. 319 | The most popular calculus, the Communicating Sequential Processes (CSP) \cite{csp} 320 | forms the theoretical foundation for many modern programming languages, 321 | such as Go, which include concurrency primitives in the language design \cite{csp_go}. 322 | 323 | In the 80s, Robin Milner introduced the Calculus of Communicating Systems (CCS), 324 | designed to be a concurrent analog of the sequential lambda calculus that underlies most functional programming languages. 325 | While the lambda calculus has function application as its basic unit of computation, 326 | CCS uses communication between two concurrent processes over a shared channel as its basic operational primitive. 327 | A more general form of CCS, the $\pi$-calculus, 328 | enables mobility in the communication graph between processes, 329 | such that the channels of communication can themselves be passed along other channels, 330 | thereby blurring the distinction between data, variables, and channels. 331 | The result is a coherent, minimalistic model of computation more powerful than its sequential predecessors. 332 | 333 | The $\pi$-calculus has proven to be a highly effective tool for the study of concurrent systems, 334 | with applications from business process management \cite{lucchi2007pi} to cellular biology \cite{phillips2007efficient}. 335 | The remarkably simple notation simplifies the description of concurrent protocols. 336 | Furthermore, the well known equivalence between computation and logic \cite{abramsky1994proofs} enables 337 | logical systems to be defined complementary to the various process calculi, 338 | providing formal means to discuss and verify the properties of systems specified in an appropriate calculus. 339 | 340 | Our presentation of the $\pi$-calculus is sufficient merely to specify the Tendermint algorithm. 341 | For a more complete introduction, see \cite{milner1992calculus}. 342 | 343 | The grammar of a simple $\pi$-calculus, in Backus-Naur form, is as follows: 344 | 345 | 346 | \begin{center} 347 | \begin{tabular}{l } 348 | {$\!\begin{aligned} 349 | P & := & 0 & & \text{ \emph{void}}\\ 350 | & \; \| & P \| P & & \text{ \emph{par}} \\ 351 | & \; \| & \alpha.P & & \text{ \emph{guard}} \\ 352 | & \; \| & \alpha.P + \alpha.P & & \text{ \emph{guarded-choice}} \\ 353 | & \; \| & (\nu x) P & & \text{ \emph{fresh}}\\ \\ 354 | & \; \| & F^{s}(y) & & \text{ \emph{func}}\\ \\ 355 | 356 | \alpha & := & \tau & & \text{ \emph{null}} \\ 357 | & \; \| & x!(y) & & \text{ \emph{send}} \\ 358 | & \; \| & x?(y) & & \text{ \emph{receive}}\\ 359 | & \; \| & susp_i & & \text{ \emph{suspect}}\\ 360 | \end{aligned}$} \\ 361 | \end{tabular} 362 | \end{center} 363 | 364 | Each grammatical rule is labelled with a reference to its functional meaning. 365 | A process may be the empty process, $0$. 366 | It may be the parallel composition of two processes, $P \| P$, 367 | denoting two processes running concurrently. 368 | A guarded processes, $\alpha.P$, only allows process $P$ to execute after an action, $\alpha$, 369 | has occurred. 370 | The action can be a null action, $\tau$, or it can be the sending, $x!(y)$, 371 | or receiving, $x?(y)$, of $y$ along $x$. 372 | Guarded choice injects non-determinism into the operation of the calculus, 373 | such that the processes $\alpha.P + \beta.Q$ will non-deterministically execute 374 | $\alpha$ or $\beta$, and then run $P$ or $Q$, respectively. 375 | A new channel, $x$, can be created via $(\nu x) P$, such that $x$ is only accessible in $P$. 376 | Functional forms $F^{s}(y)$ allow us to pass variables $s$ and $y$ into 377 | the process called $F$, which may cause it self to execute recursively. 378 | Typically, we let $s$ be state-like variables, while $y$ are channels in the calculus. 379 | Finally, since we are interested in consensus in asynchronous networks, 380 | we employ an abstraction of timeouts knows as unreliable failure detectors \cite{chandra1996unreliable}, 381 | and model them as a non-deterministic action \cite{nestmann2003modeling}. 382 | The $susp_i$ action is triggered when process $i$ is suspected of having failed - 383 | in other words, after some timeout. 384 | 385 | Note that we may use $\sum P$ to denote guarded-choice over more than two processes, 386 | and $\prod P$ to denote the parallel composition of more than two processes. 387 | We also admit a polyadic form of send and receive, for instance the process $x?(v,w) \| x!(y,z)$ is equivalent to 388 | $x?(d).d?(v).d?(w) \| (\nu c) x!(c).c!(y).c!(z)$. 389 | 390 | An operational semantics defines the actual non-reversible computational steps that a process may execute. 391 | Effectively, the only relevant operation is communication, known as the \emph{comm} rule: 392 | 393 | \begin{equation} 394 | ( x?(y).P | x!(z) ) \rightarrow P\{z/y\} 395 | \end{equation} 396 | The notation $P\{z/y\}$ means that all occurrences of $y$ in $P$ are replaced with $z$. 397 | In other words, $z$ was sent on $x$, received as $y$, and fed to $P$. 398 | 399 | Given a $\pi$-calculus process, we can follow its execution by applying the comm rule. 400 | For instance, 401 | 402 | \begin{equation} 403 | ( x?(y).y!(x) | x!(z) ) \rightarrow z!(x) 404 | \end{equation} 405 | 406 | Now, we can use a formal logic to express properties a process might satisfy. 407 | For instance, the modal Hennessy–Milner logic can express that a process 408 | will satisfy some other logic expression after some or all forms of an action have occurred \cite{milner1993modal}. 409 | By adding more complex operators to the logic, 410 | formal systems can be built up which easily describe important properties of distributed systems, 411 | such as safety and liveness \cite{stirling1991local}, and localization \cite{caires2003spatial}. 412 | Systems written in the $\pi$-calculus can then be formally verified to satisfy 413 | the relevant properties using model checking software \cite{vieira2004spatial}. 414 | 415 | While we use the $\pi$-calculus to specify the Tendermint algorithm, 416 | we leave use of an associated formal logic, 417 | and the corresponding verification of properties, to future work. 418 | 419 | \section{The Need For Tendermint} 420 | 421 | The success of Bitcoin and its derivatives, especially Ethereum \cite{ethereum}, and their promise of secure, autonomous, distributed, fault-tolerant execution of arbitrary code has caused virtually every major financial institution on the planet to become interested in the blockchain phenomenon. 422 | In particular, there has emerged an understanding of two forms of the technology: 423 | On the one hand are the public blockchains, known affectionately as the Big Bad Public Blockchains or BBPBs, 424 | whose protocols are dominated by in-built economic incentives bootstrapped by a native currency. 425 | On the other are so called private blockchains, which might more accurately be called ``consortia blockchains'', 426 | and which are effectively improvements on traditional consensus and BFT algorithms through the use of hash trees, digital signatures, 427 | peer-to-peer networking, and enhanced accountability. 428 | 429 | As the infrastructure of our societies continues to decentralize, and as the nature of business becomes more inter-organizational, 430 | there is increasing need for a transparent, accountable, high performance BFT system, which can support applications from finance to domain registration to electronic voting, 431 | and which comes equipped with advanced mechanisms for governance and evolution into the future. 432 | Tendermint is that solution, optimized for consortia, or inter-organizational logic, but flexible enough to accommodate anyone from private enterprise to global currency, 433 | and high-performance enough to compete with the major, non-BFT, consensus solutions available today, such as etcd, consul, and zookeeper, while providing greater resilience, security guarantees, and flexibility to application developers. 434 | 435 | A more comprehensive discussion of consensus science and related algorithms is reserved for Chapter \ref{ch:related}. 436 | 437 | -------------------------------------------------------------------------------- /chapters/clients.tex: -------------------------------------------------------------------------------- 1 | \chapter{Client Considerations} 2 | \label{ch:clients} 3 | 4 | This chapter reviews some considerations pertaining to clients that interact with an application hosted on Tendermint. 5 | 6 | \section{Discovery} 7 | 8 | Network discovery occurs simply by dialing some set of seed nodes over TCP. 9 | The p2p network uses authenticated encryption, 10 | but the public keys of the validators must be verified somehow out of band, 11 | that is, via an alternative medium not within the purview of the protocol. 12 | Indeed, in these systems, the genesis state itself must be communicated out of band, 13 | and ideally is the only thing that must be communicated, 14 | as it should also contain the public keys used by validators for authenticated encryption, 15 | which are different than those used for signing votes in consensus. 16 | 17 | For validator sets that may change over time, it is useful to register all validators via DNS, 18 | and to register new validators before they actually become validators, and remove them after they are removed as validators. 19 | Alternatively, validator locations can be registered in another fault-tolerant distributed data store, 20 | including possibly another Tendermint cluster itself. 21 | 22 | \section{Broadcasting Transactions} 23 | 24 | As a generalized application platform, Tendermint provides only a simple interface to clients for broadcasting transactions. 25 | The general paradigm is that a client connects to a Tendermint consensus network through a proxy, which is either run locally on its machine, 26 | or hosted by some other provider. The proxy functions as a non-validator node on the network, 27 | which means it keeps up with the consensus and processes transactions, but does not sign votes. 28 | The proxy enables client transactions to be quickly broadcast to the whole network via the gossip layer. 29 | 30 | A node need only connect to one other node on the network to broadcast transactions, but by default will connect to many, 31 | minimizing the chances that the transaction will not be received. 32 | Transactions are passed into the mempool, 33 | and gossiped through the mempool reactor to be cached in the mempool of all nodes, 34 | so that eventually one of them will include it in a block. 35 | 36 | Note that the transaction does not execute against the state until it gets into a block, 37 | so the client does not get a result back right away, other than confirmation that it was accepted into the mempool and broadcast to other peers. 38 | Clients should register with the proxy to receive the result as a push notification when it is computed during the commit of a block. 39 | 40 | It is not essential that a client connect to the current proposer, 41 | as eventually any validator which has the transaction in its mempool may propose it. 42 | However, preferential broadcasting to the next proposer in line may lead to lower latency for the transaction 43 | in certain cases where the network is under high load. Otherwise, the transaction should be quickly gossiped to every validator. 44 | 45 | \section{Mempool} 46 | 47 | The mempool is responsible for caching transactions in memory before they are included in blocks. 48 | Its behaviour is subtle, and forms a number of challenges for the overall system architecture. 49 | First and foremost, caching arbitrary numbers of transactions in the mempool is a direct denial of service attack 50 | that could trivially cripple the network. Most blockchains solve this problem using their native currency, 51 | and permitting only transactions which spend a certain fee to reside in the mempool. 52 | 53 | In a more generalized system, like Tendermint, where there is not necessarily a currency to pay fees with, 54 | the system must establish stricter filtering rules and rely on more intelligent clients to resubmit transactions that are dropped. 55 | The situation is even more subtle, however, because the rule set for filtering transactions in the mempool must be a function of the application itself. 56 | Hence the \emph{CheckTx} message of TMSP, 57 | which the mempool can use to run a transaction against a transient state of the application to determine if it should be kept around or dropped. 58 | 59 | Handling the transient state is non-trivial, and is something left to the application developer, 60 | though examples are provided in the many example applications. 61 | In any case, clients must monitor the state of the mempool (i.e.~the unconfirmed transactions) to determine if they need to rebroadcast their transactions, 62 | which may occur in highly concurrent settings where the validity of one transaction depends on having processed another. 63 | 64 | \section{Semantics} 65 | 66 | Tendermint's core consensus algorithm provides only \emph{at-least-once semantics}, 67 | which is to say the system is subject to replay attacks, 68 | where the same transaction can be committed many times. 69 | However, many users and applications expect stronger guarantees from a database system. 70 | The flexibility of the Tendermint system leaves the strictness of these semantics up to the application developer. 71 | By utilizing the \emph{CheckTx} message, and by adequately managing state in the application, 72 | application developers can provide the database semantics that suit them and their users' needs. 73 | For instance, as discussed in Chapter \ref{ch:apps}, 74 | using an account based system with sequence numbers mitigates replay attacks, 75 | and changes the semantics from \emph{at-least-once} to \emph{exactly-once}. 76 | 77 | \section{Reads} 78 | 79 | Clients issue read requests to the same proxy node they use for broadcasting transactions (writes). 80 | The proxy is always available for reads, even if the network halts. 81 | However, in the event of a partition, the proxy may be partitioned from the rest of the network, which continues making blocks. 82 | In that case, reads from the proxy might be stale. 83 | 84 | To avoid stale reads, the read request can be sent as a transaction, presuming the application permits such queries. 85 | By using transactions, reads are guaranteed to return the latest committed state, i.e.~when the read transaction is committed in the next block. 86 | This is of course much more expensive than simply querying the proxy for the state. 87 | It is possible to use heuristics to determine if a read will be stale, 88 | such as if the proxy is well-connected to its peers and is making blocks, 89 | or if it's stuck in a round with votes from one-third or more of validators, 90 | but there is no substitute for performing an actual transaction. 91 | 92 | \section{Light Client Proofs} 93 | 94 | One of the major innovations of blockchains over traditional databases is their deliberate use of Merkle hash trees to enable the production 95 | of compact proofs of system substates, so called light-client proofs. 96 | A light client proof is a path through a Merkle tree that allows a client to verify that some key-value pair is in the Merkle tree with a given root hash. 97 | The state's Merkle root hash is included in the block header, such that it is sufficient for a client to have only the latest header to verify any component of the state. 98 | Of course, to know that the header itself is valid, they must have either validated the whole chain, 99 | or kept up-to-date with validator set changes only and rely on economic guarantees that the state transitions were correct. 100 | 101 | \section{Conclusion} 102 | 103 | Clients of a Tendermint network function similarly to those of any other distributed database, 104 | though considerations must be made for the block-based nature of commits and the behaviour of the mempool. 105 | Additionally, clients must be designed with a particular application in mind. 106 | Though this adds some complexity, it enables tremendous flexibility. 107 | -------------------------------------------------------------------------------- /chapters/conclusion.tex: -------------------------------------------------------------------------------- 1 | \chapter{Conclusion} 2 | 3 | Byzantine Fault Tolerant consensus provides a rich basis upon which to build services 4 | that do not depend on centralized, trusted parties, and which may be adopted by society 5 | to manage critical components of socioeconomic infrastructure. 6 | Tendermint, as presented in this thesis, was designed to meet the needs of such systems, 7 | and to do so in a way that is understandably secure and easily high performance, 8 | and which allows arbitrary systems to have transactions ordered by the consensus protocol, 9 | with minimal fuss. 10 | 11 | Careful considerations are necessary when deploying a distributed consensus system, 12 | especially one without an agreed upon central authority to mediate potential disputes and reset the system in the event of a crisis. 13 | Tendermint seeks to address such problems using explicit governance modules and accountability guarantees, 14 | enabling integration of Tendermint deployments into modern legal and economic infrastructure. 15 | 16 | There is still considerable work to do. This includes formal verification of the algorithm's guarantees, 17 | performance optimizations, and architectural changes to enable the system to increase capacity with the addition of machines. 18 | And of course, many, many TMSP applications remain to be built. 19 | 20 | We hope that this thesis better illuminates some of the problems in distributed consensus and blockchain architecture, 21 | and inspires others to build something better. 22 | -------------------------------------------------------------------------------- /chapters/economics.tex: -------------------------------------------------------------------------------- 1 | \chapter{Economics} 2 | \label{ch:economics} 3 | 4 | \section{Cryptoeconomics} 5 | 6 | \section{Proof-of-Work} 7 | 8 | \section{Proof-of-Stake} 9 | 10 | \section{Conclusion} 11 | -------------------------------------------------------------------------------- /chapters/frontmatter.tex: -------------------------------------------------------------------------------- 1 | 2 | %\thispagestyle{plain} 3 | \par\vspace*{.35\textheight}{\centering Dedicated to Theda. \par} 4 | 5 | \chapter*{Preface} 6 | The structure and presentation of this thesis was much inspired by Diego Ongaro's 2014 Doctoral Dissertation, 7 | ``Consensus: Bridging Theory and Practice'', wherein he specifies and evaluates the Raft consensus algorithm. 8 | 9 | Much of the work done in this thesis was done in collaboration with Jae Kwon, who initiated the Tendermint project. 10 | Please see the Github repository, at \url{https://github.com/tendermint/tendermint}, for a more direct account of contributions to the codebase. 11 | 12 | 13 | \chapter*{Acknowledgments} 14 | I learned early in life from Tony Montana that a man has only two things in this world, his word and his balls, and he should break em for nobody. 15 | This thesis would not have been completed if I had not given my word to certain people that I would complete it. 16 | These include my family, in particular my parents, grandparents, and great uncle Paul, and my primary adviser, Graham, 17 | who has, for one reason or another, permitted me a practically abusive amount of flexibility to pursue the topic of my choosing. 18 | Thanks Graham. 19 | 20 | Were it not for another set of individuals, this thesis would probably have been about machine learning. 21 | These include Vlad Zamfir, with whom I have experienced countless moments of discovery and insight; 22 | My previous employer and favorite company, Eris Industries, and especially their CEO and COO, Casey Kuhlman and Preston Byrne, 23 | for hiring me, mentoring me, and giving me such freedom to research and tinker and ultimately start my own company with technology they helped fund; 24 | Jae Kwon, for his direct mentorship in consensus science and programming, for being a great collaborator, and for being the core founder and CEO at Tendermint; 25 | Lucius Meredith, for mentoring me in the process calculi; 26 | Zach Ramsay, for being, for all intents and purposes, my heterosexual husband; 27 | and of course, Satoshi Nakamoto, whomever you are, for sending me down this damned rabbit hole in the first place. 28 | 29 | There are of course many other people who have influenced my life during the course of this graduate degree; 30 | you know who you are, and I thank you for being that person and for all you've done for me. 31 | 32 | \tableofcontents 33 | \listoffigures 34 | \listoftables 35 | 36 | -------------------------------------------------------------------------------- /chapters/governance.tex: -------------------------------------------------------------------------------- 1 | \chapter{Governance} 2 | \label{ch:governance} 3 | 4 | So far, this thesis has reviewed the basic elements of the Tendermint consensus protocol and application environment. 5 | Critical elements of operating the system in the real world, such as managing validator set changes 6 | and recovering from a crisis, have not yet been discussed. 7 | 8 | This chapter proposes an approach to these problems that formalizes the role of governance in a consensus system. 9 | As validator sets come to encompass more decentralized sets of agents, competent governance systems 10 | for maintaining the network will be increasingly paramount to the network's success. 11 | 12 | \section{Governmint} 13 | 14 | The basic functionality of governance is to filter proposals for action, typically through a form of voting. 15 | The most basic implementation of governance as software is a module that enables users to make proposals, 16 | vote on them, and tally the votes. 17 | Proposals may be programmatic, in which case they may execute automatically following a successful vote, 18 | or they may be non-programmatic, in which case their execution is a manual exercise. 19 | 20 | To enable certain actions in Tendermint, such as changing the validator set or upgrading the software, 21 | a governance module has been implemented, called Governmint. 22 | Governmint is a minimum viable governance application with support for multiple groups of entities, 23 | each of which can vote internally on proposals, some of which may result in programmatic execution of actions, 24 | like changing the validator set, or upgrading Governmint itself (for instance to add new proposal types or other voting mechanisms). 25 | 26 | The system utilizes digital signatures to authenticate voters, 27 | and may use a variety of possible voting schemes. 28 | Of particular interest are quadratic voting schemes, 29 | where the cost to vote is quadratic in the weight of the vote, 30 | which have been shown to have a superior ability to satisfy voter preferences \cite{posner2013quadratic}. 31 | 32 | \section{Validator Set Changes} 33 | 34 | Validator set changes are a critical component of real world consensus algorithms that many previous approaches have failed to specify 35 | or have been left as a black art. 36 | Raft took pains to expound a sound protocol for validator set changes, which required the change pass through consensus, 37 | using a new message type. 38 | Tendermint takes a similar approach, though it is standardized through the TMSP interface using the \emph{EndBlock} message, 39 | which is run after all the \emph{AppendTx} messages, but before \emph{Commit}. 40 | If a transaction, or set of transactions, is included in a block with the intended effect of updating the validator set, 41 | the application can return a list of validators to update by specifying their public key and new voting power 42 | in response to the \emph{EndBlock} message. 43 | Validators can be removed by setting their voting power to zero. 44 | This provides a generic means for applications to update the validator set without having to specify transaction types. 45 | 46 | If the block at height $H$ returns an updated validator set, 47 | then the block at height $H+1$ will reflect the update. 48 | Note, however, that the \emph{LastCommit} in block $H+1$ 49 | must utilize the validator set as it was at $H$, 50 | since it may contain signatures from a validator that was removed. 51 | 52 | Changes to voting power are applied for $H+1$ such that the next proposer 53 | is affected by the update. 54 | In particular, the validator that otherwise should have been the next proposer may be removed. 55 | The round robin algorithm should handle this gracefully, simply moving on to the next proposer in line. 56 | Since the same block is replicated on at least two-thirds of validators, 57 | and the round robin is deterministic, 58 | they will all make the same update and expect the same next proposer. 59 | 60 | \section{Punishing Byzantine Validators} 61 | 62 | One of the salient points of Bitcoin's design is its incentive structure, 63 | in so far as the goal of the protocol was to incentivize validators to behave correctly 64 | by rewarding them. While this makes sense in the context of Bitcoin's consensus protocol, 65 | a superior incentive may be to provide strong dis-incentives, such that validators 66 | have real \emph{skin-in-the-game} \cite{taleb2014skin}, rather than a soft opportunity cost. 67 | 68 | Disincentives can be achieved in Tendermint using an approach first proposed by Vitalik Buterin \cite{slasher} as a so-called Proof-of-Stake protocol. 69 | In essence, validators must make a security deposit (``they must bond some stake'') 70 | in order to participate in consensus. 71 | In the event that they are found to double-sign proposals or votes, 72 | other validators can publish evidence of the transgression in the form of a transaction, 73 | which the application state can use to change the validator set by removing the transgressor, burning its deposit. 74 | This has the effect of associating an explicit economic cost with Byzantine behaviour, 75 | and enables one to estimate the cost of violating safety by bribing a third or more of the validators to be Byzantine. 76 | 77 | Note that a consensus protocol may specify more behaviours to be punished than just double signing. 78 | In particular, we are interested in punishing any strong signalling behaviour which is unjustified - typically, any reported change in state that is not based on the reported state of others. 79 | For instance, in a version of Tendermint where all pre-commits 80 | must come with the polka that justifies them, 81 | validators may be punished for broadcasting unjustified pre-commits. 82 | Note, however, that we cannot just punish for any unexpected behaviour - 83 | for instance, a validator proposing when it is not their round to propose 84 | may be a basis for optimizations which pre-empt asynchrony or crashed nodes. 85 | 86 | In fact, a generalization of Tendermint along these two lines, 87 | of 1) looser forms of justification and 2) allowing validators to propose before their term, 88 | gives rise to a family of protocols similar in nature to that proposed by Vlad Zamfir, 89 | under the guise Casper, as the consensus mechanism for a future version of ethereum \cite{casper}. 90 | A more formal account of the relationship between the protocols, 91 | and of the characteristics of anti-Byzantine justifications, remains for future work. 92 | 93 | \section{Software Upgrades} 94 | 95 | Governmint can also be used as a natural means for negotiating software upgrades on a possibly decentralized network. 96 | Software upgrades on the public Internet are a notoriously challenging operation, 97 | requiring careful planning to maintain backwards compatibility for users that don't upgrade right away, 98 | and to not upset loyal users of the software by introducing bugs, removing features, adding complexity, or, 99 | perhaps worst of all, updating automatically without permission. 100 | 101 | The challenge of upgrading a decentralized consensus system is made especially apparent with Bitcoin. 102 | While Ethereum has already managed a successful, non-backwards-compatible upgrade, 103 | due to its strong leadership and unified community, 104 | Bitcoin has been unable to make some needed upgrades, 105 | despite a plethora of software engineering ills, 106 | on account of a viciously divided community and a lack of strong leadership. 107 | 108 | Upgrades to blockchains are typically differentiated as being \emph{soft forks} or \emph{hard forks}, 109 | on account of the scope of the changes. 110 | Soft forks are meant to be backwards compatible, and to use degrees of freedom in the protocol that may be ignored 111 | by users who have not upgraded, but which provide new features to users which do. 112 | Hard forks, on the other hand, are non-backwards compatible upgrades that, 113 | in Bitcoin's case, may cause violations of safety, 114 | and in Tendermint's case, cause the system to halt. 115 | 116 | To cope, developers of the Bitcoin software have rolled out a series of soft forks for which validators can vote by signalling in new blocks. 117 | Once a certain threshold of validators are signalling for the update, 118 | it automatically takes effect across the network, at least for users with a version of the software supporting the update. 119 | The utility of the Bitcoin system has grown tremendously on account of these softforks, 120 | and is expected to continue to do so on account of upcoming ones. 121 | Interestingly, the failure of the community to successfully hard fork the software has 122 | on the one hand raised concerns about the long term stability of the system, 123 | and on the other triggered excitement and inspiration about the system's resilience to corrupt governance - its ungovernability. 124 | 125 | There are many reasons to take the latter stance, 126 | given the overwhelming government corruption apparent in the world today. 127 | Still, cryptography and distributed consensus provide a new set of tools that enables a degree 128 | of transparency and accountability otherwise not imaginable in the paper-pen-handshake world of modern governments, 129 | nor even the digital world of the traditional web, which suffers tremendously from a lack of sufficiently robust authentication systems. 130 | 131 | In a system using Governmint, developers would be identifiable entities on the blockchain, 132 | and may submit proposals for software upgrades. 133 | The mechanism is quite similar to that of a Pull Request on Github, 134 | only it is integrated into a live running system, 135 | and the agreement passes through the consensus protocol. 136 | Clients should be written with configurable update parameters, 137 | so they can specify whether to update automatically or to require that they are notified first. 138 | 139 | Of course, any software upgrade which is not thoroughly vetted could pose a danger to the system, 140 | and a conservative approach to upgrades should be taken in general. 141 | 142 | \section{Crisis Recovery} 143 | 144 | In the event of a crisis, such as a fork in the transaction log, 145 | or the system coming to a halt, 146 | a traditional consensus system provides little or no guarantees, 147 | and typically requires manual intervention. 148 | 149 | Tendermint assures that those responsible for violating safety can be identified, 150 | such that any client who can access at least one honest validator 151 | can discern with cryptographic certainty who the dishonest validators are, 152 | and thereby chose to follow the honest validators onto a new chain with a validator set excluding those who were Byzantine. 153 | 154 | For instance, suppose a third or more validators violate locking rules, 155 | causing two blocks to be committed at height $H$. 156 | The honest validators can determine who double-signed by gossipping all the votes. 157 | At this point, they cannot use the consensus protocol, because the basic fault assumptions have been violated. 158 | Note that being able to at this point accumulate all votes for $H$ 159 | implies strong assumptions about network connectivity and availability during the crisis, 160 | which, if it cannot be provided by the p2p network, may require validators use alternative means, 161 | such as social media and high availability services, to communicate evidence. 162 | A new blockchain can be started by the full set of remaining honest nodes, 163 | once at least two-thirds of them have gathered all the evidence. 164 | 165 | Alternatively, modifying the Tendermint protocol so that pre-commits require polka 166 | would ensure that those responsible for the fork could be punished immediately, 167 | and would not require an additional publishing period. 168 | This modification remains for future work. 169 | 170 | More complex uses of Governmint are possible for accommodating various particularities of crisis, 171 | such as permanent crash failures and the compromise of private keys. 172 | However, such approaches must be carefully thought out, 173 | as they may undermine the safety guarantees of the underlying protocol. 174 | We leave investigation of these methods to future work, 175 | but note the importance of the socio-economic context in which a blockchain is embedded, in terms of understanding its ability to recover from crisis. 176 | 177 | Regardless of how crisis recovery proceeds, its success depends on integration with clients. 178 | If clients do not accept the new blockchain, the service is effectively offline. 179 | Thus, clients must be aware of the rules used by the particular blockchain to recover. 180 | In the cases of safety violation described above, they must also gather the evidence, 181 | determine which validators to remove, and compute the new state with the remaining validators. 182 | In the case of the liveness violation, they must keep up with Governmint. 183 | 184 | \section{Conclusion} 185 | 186 | Governance is a critical element of a distributed consensus system, 187 | though competent governance systems remain poorly understood. 188 | Tendermint provides governance as a TMSP module called Governmint, 189 | which aims to facilitate increased experimentation in software-based governance for distributed systems. 190 | 191 | -------------------------------------------------------------------------------- /chapters/implementation.tex: -------------------------------------------------------------------------------- 1 | \chapter{Implementation} 2 | \label{ch:implementation} 3 | 4 | The reference implementation of Tendermint is written in Go \cite{golang} and hosted at \url{https://github.com/tendermint/tendermint}. 5 | Go is a C-like language with a rich standard library, concurrency primitives for light-weight massively concurrent executions, 6 | and a development environment optimized for simplicity and efficiency. 7 | 8 | The code uses a number of packages which are modular enough to be isolated as their own libraries. 9 | These packages were written for the most part by Jae Kwon, with bug fixes, tests, and the occasional feature contributed by the author. 10 | The most important of these packages are described in the following sub-sections. 11 | 12 | \section{Binary Serialization} 13 | 14 | Tendermint uses a binary serialization algorithm optimized for simplicity and determinism. 15 | It supports all integer types (including varints, which are encoded with a one-byte length prefix), 16 | strings, byte arrays, and time (unix time with millisecond precision). 17 | It also supports arrays of any type and structs (encoded as a list of ordered values, ignoring keys). 18 | It is somewhat inspired by Go's type system, especially its use of interface types, 19 | which can be implemented as one of many concrete types. 20 | Interfaces can be registered and each concrete implementation given a leading type-byte in its encoding. 21 | 22 | See \url{https://github.com/tendermint/go-wire} for more details. 23 | 24 | \section{Cryptography} 25 | 26 | Consensus algorithms such as Tendermint use three primary cryptographic primitives: digital signatures, hash functions, and authenticated encryption. 27 | While many implementations of these primitives exist, 28 | choosing a cryptography library for enterprise software is no trivial task, given especially the profound insecurity of the world's most used security library, OpenSSL \cite{openssl}. 29 | 30 | Contributing to the insecurity of cryptographic systems is the potential deliberate undermining of their security properties by government agencies 31 | such as the NSA, who, in collaboration with the NIST, have designed and standardized many of the most popular cryptographic algorithms in use today. 32 | Given the apparent unlawfulness of such agencies, as made evident, for instance, by Edward Snowden \cite{snowden}, 33 | and a history of trying to compromise public cryptographic standards \cite{levy2001crypto}, 34 | many in the cryptography community prefer to use algorithms designed in an open, academic environment. 35 | Tendermint, similarly, uses only such algorithms. 36 | 37 | Tendermint uses RIPEMD160 as its cryptographic hash function, which produces 20-byte outputs. 38 | It is used in the Merkle trees of transactions and validator signatures, and for computing the block hash. 39 | Go provides an implementation in its extended library. RIPEMD160 is also used as one of two hashing functions by Bitcoin in the derivation of addresses from public keys. 40 | 41 | As its digital signature scheme, Tendermint uses Schnorr signatures over the ED25519 elliptic curve. 42 | ED25519 was designed in the open by Dan Bernstein \cite{ed25519}, with the intention of being high performance and easy to implement without introducing vulnerabilities. 43 | Bernstein also introduced NaCl, a high level library for doing authenticated encryption that uses the ED25519 curve. Tendermint uses the implementation provided by Go in its extended library. 44 | 45 | \section{Merkle Hash Tree} 46 | 47 | Merkle trees function much like other tree-based data-structures, 48 | with the additional feature that it is possible to produce a proof of membership of a key in the tree that is logarithmic in the size of the tree. 49 | This is done by recursively concatenating and hashing keys in pairs until only a single hash is left, the root hash of the tree. 50 | For any leaf in the tree, a trail of hashes leading from it to the root serves as proof of its membership. 51 | This makes Merkle trees particularly useful for p2p file-sharing applications, where pieces of a large file can be verified as belonging to the file without 52 | having all the pieces. Tendermint uses this mechanism to gossip block parts on the network, where the root hash is included in the block proposal. 53 | 54 | Tendermint also provides a self-balancing, Merkle binary tree, modeled after the AVL tree \cite{avl}, as a TMSP service called Merkleeyes. 55 | The IAVL tree can be used for storing state of dynamic size, allowing lookups, inserts, and removals in logarithmic time. 56 | 57 | \section{RPC} 58 | 59 | Tendermint exposes HTTP APIs for querying the blockchain, network information, and consensus state, and for broadcasting transactions. 60 | The same API is available via three methods: GET requests using URI encoded parameters, POST requests using the JSONRPC standard \cite{jsonrpc}, 61 | and websockets using the JSONRPC standard. Websockets are the preferred method for high transaction throughput, 62 | and are necessary for receiving events. 63 | 64 | 65 | \section{P2P Networking} 66 | 67 | The P2P subprotocols used by Tendermint are described more fully in Chapter \ref{ch:subprotocols}. 68 | 69 | \section{Reactors} 70 | 71 | The Tendermint node is composed of multiple concurrent reactors, 72 | each managing a state machine sending and receiving messages to peers over the network, as described in Chapter \ref{ch:subprotocols}. 73 | Reactors synchronize by locking shared datastructures, but the points of synchronization are kept to a minimum, 74 | so that each reactor runs mostly concurrently with the others. 75 | 76 | \subsection{Mempool} 77 | 78 | The mempool reactor manages the mempool, 79 | which caches transactions before they are packed in blocks and committed. 80 | The mempool uses a subset of the application's state machine to check the validity of transactions. 81 | Transactions are kept in a concurrent linked list structure, allowing safe writes and many concurrent reads. 82 | New, valid transactions are added to the end of the list. 83 | A routine for each peer traverses the list, sending each transaction to the peer, in order, only once. 84 | The list is also scanned to collect transactions for a new proposal, 85 | and is updated every time a block is committed: committed transactions are removed, 86 | uncommitted transactions are re-run through CheckTx, and those that have become invalid are removed. 87 | 88 | \subsection{Consensus} 89 | 90 | The consensus reactor manages the consensus state machine, which handles proposals, voting, locking, 91 | and the actual committing of blocks. 92 | The state machine is managed using a few persistent go-routines, 93 | which order received messages and enable them to be played back deterministically to debug the state. 94 | These go-routines include the readLoop, for reading off the queue of received messages, 95 | and the timeoutLoop, for registering and triggering timeout events. 96 | 97 | Transitions in the consensus state machine are made either when a complete proposal and block are received, 98 | or when more than two-thirds of either pre-votes or pre-commits have been received at a given round. 99 | Transitions result in the broadcast of proposals, block data, or votes, which are queued on the internalReqQueue, 100 | and processed by the readLoop in serial with messages received from peers. 101 | This puts internal messages and peer messages on equal footing as far as being inputs to the consensus state machine, 102 | but allows internal messages to be processed faster, as they don't sit in the same queue as those from peers. 103 | 104 | \subsection{Blockchain} 105 | 106 | The blockchain reactor syncs the blockchain using a much faster technique than the consensus reactor. 107 | Namely, validators request blocks of incrementing height until none of their peers have blocks of any higher height. 108 | Blocks are collected in a blockpool and synced to the blockchain by a worker routine that periodically takes blocks from the pool 109 | and validates them against the current chain. 110 | 111 | Once the blockchain reactor finishes syncing up, it turns on the consensus reactor to take over. 112 | 113 | \section{Conclusion} 114 | 115 | The implementation of Tendermint in Go takes advantage of the language's concurrency primitives, garbage collection, 116 | and type safety, to provide a clear, modular, easy to read code base with many reusable components. 117 | As will be shown in Chapter \ref{ch:performance}, the implementation obtains high performance and is robust to many different kinds of fault. 118 | -------------------------------------------------------------------------------- /chapters/introduction.tex: -------------------------------------------------------------------------------- 1 | \chapter{Introduction} 2 | \label{ch:intro} 3 | 4 | The cold, hard truth about computer engineering today is that computers are faulty - 5 | they crash, corrupt, slow down, perform voodoo. 6 | What's worse, we're typically interested in connecting computers over a network (like the Internet), 7 | and networks can be more unpredictable than the computers themselves. 8 | These challenges are primarily the concern of ``fault tolerant distributed computing'', 9 | whose aim is to discover principled protocol designs enabling faulty computers communicating over a faulty network 10 | to stay in sync while providing a useful service. 11 | In essence, to make a reliable system from unreliable parts. 12 | 13 | In an increasingly digital and globalized world, however, 14 | systems must not only be reliable in the face of unreliable parts, but in the face of malicious or ``Byzantine'' ones. 15 | Over the last decade, major components of critical infrastructure have been ported to networked systems, 16 | as have vast components of the world's finances. 17 | In response, there has been an explosion of cyber warfare and financial fraud, 18 | and a complete distortion of economic and political fundamentals. 19 | 20 | \section{Bitcoin} 21 | 22 | In 2009, an anonymous software developer known only as Satoshi Nakamoto introduced an approach to the resolution of these issues 23 | that was simultaneously an experiment in computer science, economics, and politics. 24 | It was a digital currency called Bitcoin \cite{bitcoin}. 25 | Bitcoin was the first protocol to solve the problem of fault tolerant distributed computing in the face of malicious adversaries in a public setting. 26 | The solution, dubbed a ``blockchain'', hosts a digital currency, 27 | where consent on the order of transactions is negotiated via an economically incentivized cryptographic random lottery based on partial hash collisions. 28 | In essence, transactions are ordered in batches (blocks) by those who find partial hash collisions of the transaction data, 29 | in such a way that the correct ordering is the one where the collisions have the greatest cumulative difficulty. 30 | The solution was dubbed Proof-of-Work (PoW). 31 | 32 | Bitcoin's subtle brilliance was to invent a currency, a cryptocurrency, and to issue it to those solving the hash collisions, 33 | in exchange for their doing such an expensive thing as solving partial hash collisions. 34 | In spirit, it might be assumed that the capacity to solve such problems would be distributed as computing power is, 35 | such that anyone with a CPU could participate. 36 | Unfortunately, the reality is that the Bitcoin network has grown into the largest supercomputing entity on the planet, greater than all others combined, 37 | evaluating only a single function, distributed across a few large data centers running Application Specific Integrated Circuits (ASICs) 38 | produced by a small number of primarily Chinese companies, 39 | and costing on the order of two million USD per day in electricty \cite{blockchaininfo}. 40 | Further, its technical design has limitations: it takes up to an hour to confirm transactions, is difficult to build applications on top of, and does not scale in a way which preserves its security guarantees. 41 | This is not to mention the internal bout of political struggles resulting from the immaturity of the Bitcoin community's governance mechanisms. 42 | 43 | Despite these troubles, Bitcoin, astonishingly, continues to churn, 44 | and its technology, 45 | of cryptography and distributed databases and co-operative economics, 46 | continues to attract billions in investment capital, 47 | both in the form of new companies and new cryptocurrencies, 48 | each diverging from Bitcoin in its own unique way. 49 | 50 | \section{Tendermint} 51 | 52 | In 2014, Jae Kwon began the development of Tendermint, which sought to solve the consensus problem, 53 | of ordering and executing a set of transactions in an adversarial environment, 54 | by modernizing solutions to the problem that have existed for decades, 55 | but have lacked the social context to be deployed widely until now. 56 | 57 | In early 2015, in an effort led by Eris Industries to bring a practical blockchain solution to industry, 58 | the author joined Jae Kwon in the development of the Tendermint software and protocols. 59 | 60 | The result of that collaboration is the Tendermint platform, consisting of a consensus protocol, a high-performance implementation in Go, 61 | a flexible interface for building arbitrary applications above the consensus, and a suite of tools for deployments and their management. 62 | We believe Tendermint achieves a superior design and implementation compared to previous approaches, 63 | including that of the classical academic literature \cite{dls,pbft,raft} as well as Bitcoin \cite{bitcoin} and its derivatives \cite{ethereum,sidechains,peercoin} 64 | by combining the right elements of each to achieve a practical balance of security, performance, and simplicity. 65 | 66 | The Tendermint platform is available open source at \url{https://github.com/tendermint/tendermint}, 67 | and in associated repositories at \url{https://github.com/tendermint}. 68 | The core is licensed GPLv3 and most of the libraries are Apache 2.0. 69 | 70 | \section{Contributions} 71 | 72 | The primary contributions of this thesis can be found in Chapters \ref{ch:tendermint} and \ref{ch:performance}, 73 | and in the many commits on \url{https://github.com/tendermint/tendermint} and related repositories. 74 | Of particular significance are: 75 | \begin{itemize} 76 | \item A formal specification of Tendermint in the $\pi$-calculus and 77 | an informal proof of correctness of its safety and accountability (Chapter \ref{ch:tendermint}). 78 | 79 | \item A refactor of the core consensus state machine in the spirit of the formal specification to be more robust, deterministic, and understandable (\url{https://github.com/tendermint/tendermint/}). 80 | 81 | \item Evaluation of the software's performance and characteristics in normal, faulty, and malicious conditions on large deployments (Chapter \ref{ch:performance}). 82 | 83 | \item Countless additional tests, leading to innumerable bug fixes and performance improvements (\url{https://github.com/tendermint/tendermint/}). 84 | \end{itemize} 85 | 86 | Chapters \ref{ch:subprotocols}-\ref{ch:implementation} describe the many other components of a complete system. 87 | Some of these, like the subprotocols used to gossip data (Chapter \ref{ch:subprotocols}) and the various low-level software libraries (Chapter \ref{ch:implementation}), 88 | were designed and implemented by Jae Kwon before being joined by the author. 89 | The rest was designed and implemented with regular consultation and inspiration from the author. 90 | For a more direct accounting of contributions, please see the Github repositories. 91 | 92 | Though not recounted in this thesis, the author made various contributions during this time to the Ethereum Project% 93 | \footnote{Most notably tests, bug-fixes, and performance improvements in the Go implementation at \url{https://github.com/ethereum/go-ethereum}}, 94 | an alternative to Bitcoin which generalizes the use of the technology from currency to arbitrary computations. 95 | In addition, the author has been invited on numerous occasions to speak privately and publicly about both Ethereum and Tendermint, 96 | including as an instructor% 97 | \footnote{Private instructor to a major financial institution, 2015}% 98 | \footnote{Blockchain University, 2015, \url{http://blockchainu.co}}, 99 | and a presenter% 100 | \footnote{Cryptoeconomicon, 2015}% 101 | \footnote{International Workshop on Technical Computing for Machine Learning and Mathematical Engineering, 2014, \url{http://www.esat.kuleuven.be/stadius/tcmm2014/}}% 102 | \footnote{The Blockchain Workshops, 2016 \url{http://nyc.blockchainworkshops.org/}}. 103 | 104 | A final note on thesis structure: Despite being placed at the end, Chapter \ref{ch:related} provides significant context 105 | and may enhance understanding of the thesis if read before Chapter \ref{ch:tendermint}. However, in order to not delay the reader's introduction to Tendermint, 106 | it is placed at the end. 107 | -------------------------------------------------------------------------------- /chapters/performance.tex: -------------------------------------------------------------------------------- 1 | \chapter{Performance and Fault Tolerance} 2 | \label{ch:performance} 3 | 4 | Tendermint is designed as a Byzantine fault tolerant state-machine replication algorithm. 5 | It guarantees safety so long as less than a third of validators are Byzantine, 6 | and guarantees liveness similarly, so long as network messages are eventually delivered, 7 | with weak assumptions about network synchrony for gossiping proposals. 8 | In this section, we evaluate Tendermint's fault tolerance empirically by injecting 9 | crash faults and Byzantine faults. 10 | The goal is to show that the implementation of Tendermint consensus does not compromise safety in the event of such failures, 11 | that it suffers minimum performance impact, and that it is quick to recover. 12 | 13 | Performance of the Tendermint algorithm can be evaluated in a few key ways. 14 | The most obvious measures are the block commit time, which is a measure of finalization latency, 15 | and transaction throughput, which measures the network's capacity. 16 | We collect measurements for each on networks with validators distributed over the globe, 17 | where the number of validators ranges, in multiples of 2, from 2 to 64. 18 | 19 | \section{Overview} 20 | 21 | The experiments in this chapter can be reproduced using the repository at \url{https://github.com/tendermint/network\_testing}. 22 | All experiments take place in docker containers 23 | running on \emph{Amazon EC2} instances of type \emph{t2.medium} or \emph{c3.8xlarge}. 24 | The \emph{t2.medium} has 2 vCPU and 4 GB of RAM, 25 | and the \emph{c3.8xlarge}, has 32 vCPUs and 60 GB of RAM. 26 | Instances are distributed across seven datacenters, spanning five continents. 27 | A second docker container, responsible for generating transactions, is run on each instance. 28 | Transactions are 250 bytes in size (a reasonable size for including a few 32 or 64 byte hashes and signatures), 29 | and were constructed to be debuggable, to be quick to generate, and to contain some stochasticity. 30 | Thus, the leading bytes are Big-Endian encoded integers 31 | representing transaction number and validator index for that instance, 32 | the trailing 16 bytes are randomly drawn from the operating system, 33 | and the intermediate bytes are just zeros. 34 | 35 | A network monitoring tool is used to maintain active websocket connections 36 | to each validator's Tendermint RPC server, 37 | and uses its local time when it receives a new committed block 38 | for the first time as the official commit time for that block. 39 | Experiments were first run without the monitor by copying 40 | all data from the validators for analysis and using the local time 41 | of the 2/3th validator committing a block as the commit time. 42 | Using the monitor is much faster, amenable to online monitoring, 43 | and was found to not impact the results 44 | so long as only block header information (and not the whole block) was passed over the websockets. 45 | 46 | Docker containers on remote machines are easily managed using the \emph{docker-machine} tool, 47 | and the network\_testing repository provides some tools 48 | which take advantage of Go's concurrency features 49 | to perform actions on docker containers on many remote machines at once. 50 | 51 | Each validator connects directly to each other to avoid confounding effects of network topology. 52 | 53 | For experiments involving crash faults or Byzantine behaviour, 54 | the number of faulty nodes is given by $N_{fault} = \lfloor(N-1)/3\rfloor$, 55 | where $N$ is the total number of validators. 56 | 57 | \section{Throughput and Latency} 58 | 59 | This section describes experiments which measure the raw performance 60 | of Tendermint in non-adversarial conditions, 61 | where all nodes are online and synced and no accommodations are made for asynchrony. 62 | That is, an artificially high TimeoutPropose is used (10 seconds), 63 | and all other timeout parameters are set to 1 millisecond. 64 | Additionally, all mempool activity is disabled 65 | (no gossiping of transactions or rechecking them after commits), 66 | and an in-process nil application is used to bypass TMSP. 67 | This serves as a control scenario for evaluating the performance drop in the face of faults and/or asynchrony. 68 | 69 | Experiments are run on validator set sizes doubling in size from two to 64, and on block sizes doubling from 128 to 32768. 70 | Transactions are preloaded on each validator. Each experiment is run for 16 blocks. 71 | 72 | \begin{figure}[] 73 | \centering 74 | \begin{subfigure}{0.8 \textwidth} 75 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/latency-throughput.pdf} 76 | \end{subfigure} 77 | 78 | \begin{subfigure}{0.8 \textwidth} 79 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/throughput-blocksize.pdf} 80 | \end{subfigure} 81 | \centering 82 | \caption[Latency-Throughput trade-off in non-faulty global network]{Latency-throughput trade-off. 83 | Larger blocks incur diminishing 84 | returns in transaction throughput, with an ultimate capacity at around 10,000 txs/s} 85 | \label{fig:exp:throughput} 86 | \end{figure} 87 | 88 | As can be seen in Figure \ref{fig:exp:throughput}, 89 | Tendermint easily handles thousands of transactions per second with around one second block latency, 90 | though there appears to be a capacity limit at around ten thousand transactions per second. 91 | A block of 16384 transactions is about 4 MB in size, and analysis of network bandwidth shows each connection 92 | easily reaching upwards of 20MB/s, though analysis of the logs shows that at high block sizes, 93 | validators can spend upwards of two seconds waiting for block parts. 94 | Additionally, experiments in single data centers, as shown in Figure \ref{fig:exp:throughput:single}, 95 | demonstrate that much higher throughputs are possible, 96 | while experiments on much larger machines exhibit more consistent performance, 97 | relieving the capacity limit, as shown in Figure \ref{fig:exp:throughput:large}. 98 | We leave further investigations of this capacity limit to future work. 99 | 100 | \begin{figure}[] 101 | \centering 102 | \begin{subfigure}{0.8 \textwidth} 103 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/single_datacenter/latency-throughput.pdf} 104 | \centering 105 | \end{subfigure} 106 | 107 | \begin{subfigure}{0.8 \textwidth} 108 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/single_datacenter/throughput-blocksize.pdf} 109 | \end{subfigure} 110 | \caption[Latency-throughput trade-off in non-faulty local network]{Single datacenter. 111 | When messages don't need to cross the public Internet, Tendermint is capable of tens of thousands of transactions per second.} 112 | \label{fig:exp:throughput:single} 113 | \end{figure} 114 | 115 | 116 | 117 | \begin{figure}[] 118 | \centering 119 | \begin{subfigure}{0.8 \textwidth} 120 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/large_instances/latency-throughput.pdf} 121 | \centering 122 | \end{subfigure} 123 | 124 | \begin{subfigure}{0.8 \textwidth} 125 | \includegraphics[width=\linewidth,height=\textheight,keepaspectratio]{figures/throughput/large_instances/throughput-blocksize.pdf} 126 | \end{subfigure} 127 | \centering 128 | \caption[Latency-Throughput trade-off in non-faulty global network of large machines]{Large machines. 129 | With 32 vCPU and 60 GB of RAM, transaction throughput increases linearly with block-size, 130 | relieving the capacity limits found on smaller machines.} 131 | \label{fig:exp:throughput:large} 132 | \end{figure} 133 | 134 | In the experiments that follow, various forms of fault are injected 135 | and latency statistics presented. 136 | Each experiments was run for validator set sizes doubling from 4 to 32, 137 | for varying values of TimeoutPropose, and with a block size of 2048 transactions. 138 | 139 | \section{Crash Failures} 140 | 141 | To evaluate the performance of a network subject to crash failures, 142 | every three seconds $N_{fault}$ validators were randomly selected, 143 | stopped, and restarted three seconds later. 144 | 145 | The results in Table \ref{fig:exp:crash_failure} demonstrate that 146 | performance under this crash failure scenario drops by about 147 | $50\%$, and that larger TimeoutPropose values help mediate latencies. 148 | While the average latency increases to about two seconds, 149 | the median is closer to one second, and latencies may run as high as ten or twenty seconds, 150 | though in one case it was as high as seventy seconds. 151 | It is likely that modifying TimeoutPropose to be slightly non-deterministic may 152 | ease the probability of such extreme latencies. 153 | 154 | \begin{table} 155 | \input{figures/throughput/crash_tables} 156 | \caption[Latency statistics under crash faults]{Crash-fault latency statistics. Every three seconds, a random selection of$N_{fault}$ validators were crashed, and restarted three seconds later. This crash-restart procedure continued for 200 blocks. Each table reports the minimum, maximum, average, median, and $95^{th}$ percentile of the block latencies, for varying values of the TimeoutPropose parameter.} 157 | \label{fig:exp:crash_failure} 158 | \end{table} 159 | 160 | \section{Random Network Delay} 161 | 162 | Another form of fault, which may be attributed either to Byzantine behaviour or to network asynchrony, 163 | is to inject random delays into every read and write to a network connection. 164 | In this experiment, before every read and write on every network connection, 165 | $N_{fault}$ of the validators slept for $X$ milliseconds, 166 | where $X$ was drawn uniformly on $(0, 3000)$. 167 | As can be seen in Table \ref{fig:exp:delay}, 168 | latencies are similar to the crash failure scenario, 169 | though increasing the TimeoutPropose has the opposite effect. 170 | Since not all validators were faulty, 171 | small values of TimeoutPropose allow faulty validators to be skipped quickly. 172 | If all validators were subject to the network delays, 173 | larger TimeoutPropose values would be expected to reduce latency 174 | since there would be no non-faulty validators to skip to, 175 | and more time would be provided to receive delayed messages. 176 | 177 | \begin{table}[] 178 | \input{figures/throughput/delay_tables} 179 | \caption[Latency statistics under randomized delays]{Random delay latency statistics. $N_{fault}$ validators were set to inject a random delay 180 | before every read and write, where the delay time was chosen uniformly on $(0, 3000)$ milliseconds.} 181 | \label{fig:exp:delay} 182 | \end{table} 183 | 184 | 185 | \section{Byzantine Failures} 186 | 187 | A more explicit Byzantine failure can be injected through the following modifications 188 | to the state machine: 189 | 190 | \begin{itemize} 191 | \item{Conflicting proposals: during its time to propose, a Byzantine validator signs two conflicting proposals and broadcasts each, along with a pre-vote and pre-commit, to separate halves of its connected peers.} 192 | \item{No nil votes: a Byzantine validator never signs a nil-vote.} 193 | \item{Sign every proposal: a Byzantine validator submits a pre-vote and a pre-commit for every proposal it sees, as soon as it sees it.} 194 | \end{itemize} 195 | 196 | Taken together, these behaviours explicitly violate the double signing and locking rules. 197 | Note, however, that the behaviour is dominated by the broadcast of conflicting proposals, 198 | and the eventual committing of one of them. 199 | More complex arrangements of Byzantine strategies are left for future work. 200 | 201 | Despite the injected Byzantine faults, 202 | which would cause many systems to fail completely and immediately, 203 | Tendermint maintains respectable latencies, as can be seen from Table \ref{fig:exp:byz_failure}. 204 | Since these faults have little to do with asynchrony, 205 | there is no real discernible effect from TimeoutPropose. 206 | The performance also falls off with larger validator sets, 207 | which may be the result of a naive algorithm for handling Byzantine votes. 208 | 209 | \begin{table}[] 210 | \input{figures/throughput/byz_tables} 211 | \caption[Latency statistics under Byzantine faults]{Byzantine-fault latency statistics. 212 | Byzantine validators propose conflicting blocks and vote on any proposal as soon as they see it. 213 | Each table reports the minimum, maximum, average, median, and $95^{th}$ percentile of the block latencies, for varying values of the TimeoutPropose parameter.} 214 | \label{fig:exp:byz_failure} 215 | \end{table} 216 | 217 | \ifx 218 | \section{A real application: ErisDB} 219 | 220 | The experiments presented so far have been artificial to the extent that transactions incur no processing logic. 221 | This was done deliberately to benchmark the core consensus engine. 222 | To get a handle on a real application, we present throughput and latency results for ErisDB, 223 | a blockchain application developed primarily by the author at Eris Industries, in collaboration with Jae Kwon. 224 | ErisDB provides a rich set of features, including a native currency, the Ethereum Virtual Machine (EVM). 225 | a native name registry, and a rich permissioning system. 226 | Transactions must be digitally signed using ED25519 signatures to be valid, and all state queries and updates are done on a merkle IAVL tree. 227 | 228 | For this experiment, a simple contract with two methods, get and set, is deployed to the virtual machine. 229 | The contract is written in solidity, a high-level, javascript-like language developed by Ethereum which compiles down to EVM byte code. 230 | The application state is preloaded with 1000 accounts, and transactions are signed by private keys drawn uniformly from those accounts. 231 | Keys and values for the get and set methods are fixed at 32-bytes each, to reflect the native architecture of the EVM \cite{ethereum_yellow_paper}. 232 | Transactions are generated for a read/write load of 10/90 (i.e. 90\% of transaction call the set method). 233 | 234 | \fi 235 | 236 | \section{Related Work} 237 | 238 | The throughput experiments in this chapter were modeled after those in \cite{honeybadger}, 239 | which benchmarks the performance of a PBFT implementation 240 | and a new randomized BFT protocol called HoneyBadgerBFT. 241 | In their results, PBFT achieves over 15,000 transactions per second on four nodes, 242 | but decays exponentially as the number of nodes increases, 243 | while HoneyBadgerBFT attains roughly even performance 244 | of between 10,000 and 15,000 transactions per second. 245 | Block latencies in HoneyBadgerBFT, however, are much higher, 246 | closer to 10 seconds for validator sets of size 8, 16, and 32, and even more for larger ones. 247 | 248 | A well known tool for studying consensus implementations is Jepsen \cite{jepsen}, 249 | which is used to test the consistency guarantees of databases by simulating 250 | many forms of network partition. 251 | Testing Tendermint with Jepsen remains an exciting area for future work. 252 | 253 | The author is not aware of any throughput experiments in the face of persistent Byzantine failures, 254 | like those presented here. 255 | 256 | \section{Conclusion} 257 | 258 | The implementation of Tendermint written by the author and Jae Kwon easily achieves 259 | thousands of transactions per second on up to 64 nodes on machines distributed around the globe, 260 | with latencies mostly in the one to two second range. 261 | This is highly competitive with other solutions, and especially with the current state of blockchains, 262 | with Bitcoin, for instance, capping out at around 7 transactions per second. 263 | Furthermore, our implementation is shown to be robust to both crash faults, message delays, 264 | and deliberate Byzantine faults, 265 | being able to maintain over a thousand transactions per second in each scenario. 266 | 267 | 268 | -------------------------------------------------------------------------------- /chapters/subprotocols.tex: -------------------------------------------------------------------------------- 1 | \chapter{Tendermint Subprotocols} 2 | \label{ch:subprotocols} 3 | 4 | The presentation of Tendermint consensus in the previous chapter left out a number of details 5 | regarding the gossip protocols used to disseminate blocks, votes, transactions, 6 | and other peer information. 7 | This was done in order to focus in on the consensus protocol itself, 8 | without distraction from the hydra of practical software engineering. 9 | This chapter describes one particular approach to filling in these details, 10 | by implementing components as relatively independent reactors that are multiplexed over each peer connection. 11 | 12 | \section{P2P-Networking} 13 | 14 | On startup, each Tendermint node receives an initial list of peers to dial. 15 | For each peer, a node maintains a persistent TCP connection over which multiple subprotocols are multiplexed in a rate-limited fashion. 16 | Messages are serialized into a compact binary representation to be sent on the wire, and 17 | connections are encrypted via an authenticated encryption protocol \cite{authenticated_encryption}. 18 | 19 | Each remaining section of this chapter describes a separate reactor that is multiplexed over each peer connection. 20 | An additional peer exchange reactor can be run which allows nodes to request other peer addresses from each other and keep track of peers they have connected to before, 21 | in order to stay connected to some minimum number of other peers. 22 | 23 | \section{Consensus Gossip} 24 | 25 | The consensus reactor wraps the consensus state machine, 26 | and ensures each node broadcasts to all peers its current state every time it changes. 27 | In this way, each node keeps track of the consensus state of all its peers, 28 | allowing it to optimize the gossiping of messages to only send peers information they need at the very moment, 29 | and which they don't already have. 30 | For each peer, a node maintains two routines which continuously check for new information to send the peer, 31 | namely, proposals and votes. 32 | Information should be gossiped in a ``rarest first'' manner in order to maximize 33 | gossip efficiency and minimize the chance that some information becomes unavailable \cite{rarest_first} 34 | 35 | 36 | \subsection{Block Data} 37 | In Chapter \ref{ch:tendermint}, it was assumed that proposal messages include the block. 38 | However, since blocks emerge from a single source and can be quite large, 39 | this puts undue pressure on the block proposer to upload the data to all other nodes; 40 | blocks can be disseminated much more quickly if they are split into parts and gossiped. 41 | 42 | A common approach to securely gossiping data, as popularized by various p2p protocols \cite{bittorrent,libswift}, 43 | is to use a Merkle tree \cite{merkle1987digital}, 44 | allowing each piece of the data to be accompanied by a short proof (logarithmic in the size of the data) 45 | that the piece is a part of the whole. 46 | To use this approach, 47 | blocks are serialized and split into chunks of an appropriate size 48 | for the expected block size and number of validators, 49 | and chunks are hashed into a Merkle tree. 50 | The signed proposal, instead of including the entire block, includes just the Merkle root hash, 51 | allowing the network to co-operate in gossiping the chunks. 52 | A node informs its peers every time it receives a chunk, 53 | in order to minimize the bandwidth wasted by transmitting the same chunk to a node more than once. 54 | 55 | Once all the chunks are received, the block is deserialized and validated to ensure it refers correctly to the previous 56 | block, and that its various checksums, implemented as Merkle trees, are correct. 57 | While it was previously assumed that a validator does not pre-vote until the proposal (including the block) is received, 58 | some performance benefit may be obtained by allowing validators to pre-vote after receiving a proposal, 59 | but before receiving the full block. This would imply that it is okay to pre-vote for what turns out to be an invalid block. 60 | However, pre-committing for an invalid block must always be considered Byzantine. 61 | 62 | Peers that are catching up (i.e.~are on an earlier height) are sent chunks for the height they are on, 63 | and progress one block at a time. 64 | 65 | \subsection{Votes} 66 | 67 | At each step in the consensus state machine, after the proposal, a node is waiting for votes (or a local timeout) to progress. 68 | If a peer has just entered a new height, it is sent pre-commits from the previous block, 69 | so it may include them in the next blocks \emph{LastCommit} if it's a proposer. 70 | If a peer has pre-voted but has yet to pre-commit, or has pre-committed, but has yet to go to the next round, 71 | it is sent pre-votes or pre-commits, respectively. 72 | If a peer is catching up, it is sent the pre-commits for the committed block at its current height. 73 | 74 | \section{Mempool} 75 | 76 | Chapter \ref{ch:tendermint} made little mention of transactions, 77 | as Tendermint operates on blocks of transactions at a time, and has no concern for individual transactions, 78 | so long as their checksum in the block is correct. 79 | 80 | Transactions are managed independently in an in-memory cache, 81 | which, following Bitcoin, has come to be known as the \emph{mempool}. 82 | Transactions are validated by the application logic when they are received and, if valid, 83 | added to the mempool and gossiped using an ordered multicast algorithm. 84 | A node maintains a routine for each peer which ensures that transactions 85 | in the mempool are sent to the peer in the same order in which they were processed by the node. 86 | 87 | Proposers reap transactions from the ordered list in the mempool for new block proposals. 88 | Once a block is committed, all transactions included in the block are removed from the mempool, 89 | and the remaining transactions are re-validated by the application logic, 90 | as their validity may have changed on account of other transactions being committed, 91 | which the node may not have had in its mempool. 92 | 93 | \section{Syncing the Blockchain} 94 | 95 | The consensus reactor provides a relatively slow means of syncing with the latest state of the blockchain, 96 | as it was designed for real-time consensus, 97 | meaning peers wait to receive all information to commit a single block before worrying about the next block. 98 | To accommodate peers that may be more than just a few blocks behind, 99 | an additional reactor, the blockchain reactor, allows peers to download many blocks in parallel, 100 | enabling a peer to sync hundreds of times faster than via the consensus reactor. 101 | 102 | When a node connects to a new peer, the peer sends its current height. 103 | The node will request blocks, in order, beginning with its current height, 104 | from all peers that self-reported higher heights, and download the blocks concurrently, adding them to the block pool. 105 | Another routine continuously attempts to remove blocks from the pool and add them to the blockchain by validating and executing them, 106 | two blocks at a time, against the latest state of the blockchain. 107 | Blocks must be validated two blocks at a time because the commit for one block is included as the LastCommit data in the next one. 108 | 109 | The node continuously queries its peers for their current height, 110 | and continues to concurrently request blocks until it has caught up to the highest height among its peers, 111 | at which point it stops making requests for peer heights and starts the consensus reactor. 112 | 113 | \section{Conclusion} 114 | 115 | A number of subprotocols are required for a practical implementation of the Tendermint blockchain. 116 | These include the gossipping of consensus data (votes and proposals), of block data, and of transactions, 117 | and some means for new peers to quickly catch up with the latest state of the blockchain. 118 | -------------------------------------------------------------------------------- /chapters/theory.tex: -------------------------------------------------------------------------------- 1 | \chapter{Theory} 2 | 3 | This chapter introduces some theoretical formalisms for describing consensus networks. 4 | First and foremost, we introduce a formal definition of trust on the basis of mutual information, 5 | and show how the use of cryptography can increase the amount of trust in a system, 6 | enabling higher-level forms of communication. 7 | Second, we formalize the consensus and atomic broadcast problems using process calculi 8 | and define a blockchain as a generic means for transforming consenus into atomic broadcast. 9 | Third, we introduce Byzantine Failure Detectors for the detection of malicious processes, 10 | and show how they can be used in a consensus protocol to achieve accountability. 11 | Fourth, we consider probabilistic solutions to consensus, formalizing the common coin and proof-of-work approaches, 12 | and show how a generalization of PoW in asynchronous conditions results in a protocol like Casper. 13 | Finally, we describe how to formally introduce economics into the model, and discuss the resulting problem space. 14 | 15 | \section{Trust and Information} 16 | 17 | It is well known that \emph{trust}, defined as ..., 18 | is a crucial element to maintaining productive socioeconomic systems \cite{trust}. 19 | Intuitively, trust reduces uncertainty about the world, 20 | and enables higher-order forms of organization to flourish. 21 | 22 | - communications channel, capacity as MI 23 | - crypto primitives are axes for high entropy systems with high MI 'paths' for 'correct' processes 24 | 25 | - crypto systems are stronger than purely info-theoretic ones \cite{ben1988completeness} 26 | 27 | 28 | 29 | Suppose we have agents Alice and Bob, represented by random variables $A$ and $B$, 30 | operating in an uncertain environment, $X$. 31 | Each agent mantains a representation of the world that defines a distribution over possible events 32 | in the universe, which for each agent consists of the other agent and the environment. 33 | Let Alice's distribution be denoted $p_A(B, X) = p_A(B | X)p_A(X) $. 34 | The distribution has some entropy, $H[p_A]$. 35 | If Alice trusts Bob, we expect that the entropy, in particular that related to Bob, should decrease. 36 | 37 | 38 | We then define ``$A$ trusts $B$'' as an 39 | 40 | 41 | Formally, we can define trust as a reduction in entropy 42 | 43 | 44 | 45 | 46 | 47 | \emph{Trust} as expected mutual information. 48 | \emph{Correct-trust} as mutual information where you can see when it fails (crypto). 49 | Show that correct-trust increases possible trust. 50 | 51 | Digital signatures. 52 | Merkle trees, erasure codes for broadcast. 53 | Hash-chain links to simplify proposer logic. 54 | Reduce complexity of network protocols by moving elements from data to authenticators. 55 | 56 | 57 | 58 | 59 | 60 | 61 | \section{Consensus and Atomic Broadcast} 62 | The problem has been pitched as consensus or atomic broadcast (ABC). 63 | Consensus commits a value; ABC orders transactions. 64 | Can show they are the same \cite{chandra1996unreliable} 65 | We show they are the same with generalized process calculus forms of each and a bi-simualtion between them. 66 | Atomic broadcast is the more natural form for real systems. 67 | 68 | Note the pi calculus doesn't allow a strictly composable encoding of broadcast \cite{ene1999expressiveness}, 69 | but we don't need it, since in practice each node has a network stack/kernel that manages broadcasts. 70 | Further, we really do want point-to-point, rather than broadcast, 71 | because we want connections to be encrypted on a per-connection basis, 72 | though group-encrypted broadcast primitives would be an interesting pursuit. 73 | 74 | Reliable broadcast (RBC) is a broadcast primitive satisfying 75 | 76 | \begin{itemize} 77 | \item validity - if a correct process broadcasts m, it eventually delivers m 78 | \item agreement - if a correct process delivers m, all correct processes eventually deliver m 79 | \item integrity - m is only delivered once, and only if broadcast by its sender 80 | \end{itemize} 81 | 82 | We model RBC as a pi-calculus process, 83 | $rbc(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i rb_i(r_i, d_i, \hat{x})$, 84 | where $rbc_i$ is the instance of RBC running on node $i$, 85 | $\hat{r}$ are input channels, with one for each node, 86 | on which new requests from clients can be received, 87 | $d_i$ are $delivery$ channels, on which a node outputs RBC-delivered values, 88 | and $\hat{x}$ are some shared variables. 89 | 90 | We can state the properties in a temporal henessy-milner logic with fixed-point operators: 91 | \begin{itemize} 92 | \item validity - $ \forall m$, and correct $i$, $rbc |= [ r_i?(m) ] . \nu Z . ( d_i!(m)T \vee [*]Z) $ 93 | \item agreement - $ \forall m$, and correct $i$, $rbc |= [ d_i!(m) ] . \wedge_{j \neq i} ( \nu Z . (d_j!(m)T \vee [*]Z) $ 94 | \item integrity - $ \forall m$, and correct $i$, $rbc |= [ d_i!(m) ] . [ * ] . < d_i!(m) > ff $, and only if broadcast by its sender ... 95 | \end{itemize} 96 | 97 | Let us now model atomic broadcast ABC after RBC, as 98 | $abc(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i abc_i(r_i, d_i, \hat{x})$, 99 | with the same properties as $rbc$, but with the addition of \emph{total order}, 100 | \begin{itemize} 101 | \item total order - if correct processes p and q deliver m and m', then p delivers m before m' iff q delivers m before m' 102 | \end{itemize} 103 | 104 | 105 | That is, ABC is identical to RBC, with the added constraint that reads off of any $d_i$ 106 | must return the same values in the same order. 107 | 108 | We can model consensus similarly, as 109 | $cns(\hat{r}, \hat{d}) = (\nu \hat{x}) \prod_i cns_i(r_i, d_i, \hat{x})$, 110 | 111 | with the following properties 112 | 113 | \begin{itemize} 114 | \item termination - every correct process eventually decides 115 | \item integrity - every correct process decides at most once 116 | \item agreement - if one correct process decides $v1$ and another decides $v2$, then $v1=v2$ 117 | \item validity - if a correct process decides $v$, at least one process proposed $v$ 118 | \end{itemize} 119 | 120 | Note that the forms of consensus and ABC are identical (save some function names), 121 | with the major difference in the properties relating to the fact that consenus 122 | manages only one value, while atomic broadcast may handle many. 123 | 124 | To show an equivalence between ABC and consensus, 125 | we create a process context for each, 126 | yielding $ C_{CNS}[ abc_i ] $ and $ C_{ABC}[ cns_i ] $ 127 | where we intend to show that 128 | $ C_{CNS}[ abc_i ] \sim cns_i $ and $ C_{ABC}[ cns_i ] \sim abc_i $ for 129 | some weak bisimulation $\sim$. 130 | 131 | Intuitevely, consensus can be derrived from ABC by deciding the first value fired on $d_i$, 132 | while ABC can be derrived from consensus by running the consensus protocol multiple times, 133 | once for each value, or batch of values, to be atomically broadcast. 134 | Thus $ C_{CNS}[ ] $ is a context which restricts $d_i$, such that it is only read from once, 135 | while $ C_{ABC}[ ] $ is a context which manages multiple instances of consensus, delivering on $d_i$ many times. 136 | 137 | 138 | \section{Byzantine Failure Detectors} 139 | Failure detectors (FDs), an abstraction of timeouts, 140 | were introduced and used to solve consensus \cite{chandra1996unreliable}. 141 | FDs enable processes to keep a list of other processes they suspect to have crashed. 142 | Though unreliable, in that they may be suspicious of correct processes, 143 | FDs can be constrained by abstract asymptotic properties ensuring that 144 | eventually, crashed processes are suspect, and correct processes are not. 145 | Notably, the formalism of FDs enable refined investigation of consensus algorithms. 146 | 147 | Here, we extend the model the Byzantine case, yielding Byzantine Failure Detectors, 148 | and show how they can be used as building blocks for the construction of BFT algorithms. 149 | 150 | 151 | Notes 152 | - "correct" behaviour vs arbitrary behaviour 153 | - nodes keep state of other nodes in order to know what is "allowed" and detect variation 154 | - depending on the protocol, there may be a tradeoff between detection and asynchrony 155 | - non-byz may be suspected as byz cuz of asynchrony 156 | - conjecture: this only possible in non-strongly-consistent protocols 157 | - tendermint has perfect byz detection since no tradeoff against asynchrony! 158 | - what about pbft? need to review view change 159 | - pi calc allows us to describe many of the BFDs implicitly by whether or not 160 | we even listen for the message (eg. proposing when its not your turn) 161 | - economics as a modulator for moving to next proposer 162 | 163 | 164 | 165 | 166 | 167 | Byzantine Failure Detectors (BFDs) are a different breed. 168 | While Byzantine traditionally means "arbitrary", 169 | it is in practice trivial to enforce simple rules which restrict 170 | the set of messages which might affect the state. 171 | Furthermore, Byzantine behaviour wherein a process does not send a message when it should have 172 | is indistinguishable from asynchrony. 173 | Thus, BFDs must only be concerned with a particular class of Byzantine behaviour, 174 | namely, that which is \emph{malicious}. 175 | Unlike FDs, BFDs are not unreliable - they can not mistakenly suspect other processes 176 | of being Byzantine, as triggering the BFD requires cryptographic proof. 177 | 178 | There are two forms of malicious behaviour, which we call divergent-broadcast (DBC) 179 | and unjustified-broadcast (UBC). In DBC, a process sends conflicting messages to peers. 180 | Detecting DBC simply requires receiving the conflicting messages. 181 | In UBC, a process sends a message which claims something about its internal state which is untrue. 182 | To detect UBC requires the use of functions from the particular consensus protocol itself, 183 | which must define a set of justification rules. 184 | Either form of malicious behaviour is sufficient to violate safety in a non-Byzantine protocol. 185 | 186 | We now define a BFD as satisfying the following property: 187 | 188 | \begin{itemize} 189 | \item{Eventually, every DBC and UBC is detected by at least one correct process} 190 | \end{itemize} 191 | 192 | In practice, using a BFD requires a correct process to keep a list of all messages it has delivered, 193 | and to reliably broadcast those messages to all other correct processes. 194 | Further, the BFD must be informed by the rules of the consensus protocol as to what constitutes a UBC. 195 | 196 | Note that something which is a UBC in one protocol may not be in another. 197 | We are interested in this boundary, particularly the weakest UBCs necessary for consensus. 198 | Introduction of economics can weaken the UBCs necessary 199 | - eg PoW: mining an alternate chain doesn't get detected and punished, but economics yeilds an op cost 200 | - eg Casper: changing bets might incur small economic cost for larger reward of getting consensus sooner 201 | 202 | 203 | Many BFT protocols are tolerant of Byzantine faults, but don't emphasize detection. 204 | Thus, while a BFD is not necessary for Byzantine consensus, we show that, for some 205 | forms of the justification function, it is sufficient, 206 | and that their use elucidates deeper structure in the BFT problem. 207 | 208 | 209 | 210 | 211 | 212 | 213 | When no processes are malicious, 214 | every message from a process can be trusted as an accurate reflection of that process' state. 215 | 216 | 217 | 218 | FDs can be formalized with the pi-calculus, 219 | and resulting consensus protocols subject to a matrix analysis \cite{nestmann2003modeling}. 220 | We'd like a similar analysis, with a more general notion of justification. 221 | Most previous byz algos dont focus on detection, just tolerance. 222 | 223 | Further, we'd like to show that justifications can be removed from the real-time 224 | protocol and moved to a post-failure recovery mode protocol, under some weak network assumptions, 225 | without compromising accountability. 226 | 227 | Start by defining messages as consisting of three parts: indices, authenticators, data. 228 | Indices are things like height number, round number, message type number, etc. 229 | Authenticators are signatures and hashes. 230 | Cant do BFT without authenticators (tho wtf about some of those papers ...) 231 | Byzantine msgs are those with the same indices and authenticators, but different data. 232 | Note this assumes deterministic authenticators, and implies that detection requires gossip. 233 | We also want byzantine msgs to be those that are "unjustified". 234 | Introduce "justification" rules which map $(AUTH, DATA)$ to $\{True, False\}$. 235 | 236 | Also note how moving data/indices into auth using hashes can simplify protocols 237 | (eg. the way linking to the previous block avoids subtle leader crash/recover scenarios). 238 | 239 | \section{Probabilistic Solutions} 240 | Consensus can be solved with FDs or with randomness. 241 | Common coin gives probabilistic liveness, where randomness is over what value sent. 242 | Bitcoin gives probabilistic safety, where randomness is over when value sent. 243 | There seems to be a duality here, common coin being like $\wedge$ and bitcoin like $\vee$. 244 | How to reflect in stochastic-pi calc logic. 245 | 246 | Bitcoin makes synchrony assumption that network latency is much less than block time, 247 | allowing it to give strong (economic/probabilistic) serializability guarantees. 248 | GHOST weakens the synchrony assumption by using additional network information to inform fork choice. 249 | Is the asynchronous generalization of GHOST something like casper? 250 | How does the move from PoW to PoS complement that from synchrony to asynchrony? 251 | 252 | \section{Economics} 253 | Suppose the consensus system is probabilistic, ala some stochastic process calculus. 254 | Economics are a way to parameterize the Comm rates of the calculus, 255 | such that the param values may change, subject to some constraints 256 | (eg. the avg value over time is constant, etc.). 257 | The point of the system is to be valuable, 258 | and have this value be contributed back to the processes as wealth. 259 | Economics makes the system reflexive, in the sense that, 260 | given finite critical resources and a driving energy source, 261 | the system must increase its efficiency (ie. innovate, build wealth, etc), 262 | to maintain liveness during growth. 263 | Integration with food systems, be an organism, etc. 264 | 265 | Note economics can also act as a weak/parametrized form of synchrony! 266 | 267 | \section{Residence Times} 268 | Drawing inspiration from ecology and biophysics, 269 | where its been suggested that residence time of energy in a non-equilibrium system is a 270 | measure of its organizational complexity. 271 | 272 | Consider a network of processes in such a light. 273 | Energy input is receipt of a msg. 274 | Causes a tree of execution. 275 | Residence time is (eg.) time until all branches of the tree either communicate with other trees or halt. 276 | Here, txs are the inputs (ie. they should pay fees!). 277 | Another energy input is eg. POW - can be measured as a packet of energy arriving as a new block. 278 | Without inflation, packets arrive and are immediately released as heat, minus what is paid in fees, 279 | which hang around as a balance and prolong the residence time. 280 | The inflation increases the residence time, but is clearly unsustainable - distribution mechanisms are important tho! 281 | Alternatively, in POS, packets come in as security deposits, which sit around for a long time ... 282 | -------------------------------------------------------------------------------- /chapters/title.tex: -------------------------------------------------------------------------------- 1 | \begin{titlepage} 2 | \begin{center} 3 | \vspace*{1cm} 4 | 5 | \textbf{\large{Tendermint: Byzantine Fault Tolerance in the Age of Blockchains}}\\ 6 | 7 | \vspace{1 cm} 8 | 9 | \textbf{by} \\ 10 | \vspace{1 cm} 11 | \textbf{Ethan Buchman} 12 | 13 | \vfill 14 | 15 | A Thesis \\ 16 | presented to \\ 17 | The University of Guelph 18 | 19 | \vspace{0.8cm} 20 | 21 | In partial fulfilment of requirements \\ 22 | for the degree of \\ 23 | Master of Applied Science \\ 24 | in \\ 25 | Engineering Systems and Computing 26 | 27 | \vspace{0.8cm} 28 | Guelph, Ontario, Canada 29 | 30 | \vspace{0.8cm} 31 | \copyright Ethan Buchman, June, 2016 32 | \end{center} 33 | \end{titlepage} 34 | -------------------------------------------------------------------------------- /figures/descriptions/block_header.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{verbatim} 3 | type Header struct { 4 | ChainID string 5 | Height int 6 | Time time.Time 7 | NumTxs int 8 | LastBlockHash []byte 9 | LastBlockParts PartSetHeader 10 | LastCommitHash []byte // Merkle root hash of LastCommit 11 | DataHash []byte // Merkle root hash of transaction 12 | ValidatorsHash []byte // Merkle root hash of validator set 13 | AppHash []byte // state Merkle root from previous block's transactions 14 | } 15 | 16 | type PartSetHeader struct { 17 | Total int 18 | Hash []byte 19 | } 20 | \end{verbatim} 21 | \caption[Block Header Structure]{The fields required for a valid block header. The validity of all fields is checked before pre-commit} 22 | -------------------------------------------------------------------------------- /figures/descriptions/consensus_rules.tex: -------------------------------------------------------------------------------- 1 | 2 | \underline{Consensus State Rules} 3 | \begin{description} 4 | \item[Proposal:] Wait up to \emph{TimeoutPropose} for a proposal from the correct validator for the current height and round. 5 | \item[Prevote:] If a proposal comes with a valid signature from the correct proposer for a validator’s current height and round, and the validator is not locked, it should prevote for the proposal block. Else, prevote nil. 6 | \item[Precommit:] If a validator receives prevotes from $+\frac{2}{3}$ validators for the same block, it should precommit for that block. If the $+\frac{2}{3}$ prevotes are not for the same block, it should wait \emph{TimeoutPrevote}, and then precommit nil. 7 | \item[Commit:] If a validator receives precommits from $+\frac{2}{3}$ validators for the same block, it should commit that block, and go to the next height. If the $+\frac{2}{3}$ prevotes are not for the same block, it should wait \emph{TimeoutPrecommit}, and then go to the next round. 8 | \end{description} 9 | 10 | \underline{Broadcast Rules} 11 | \begin{description} 12 | \item[No Double Signing:] a validator only signs for each message type (proposal, prevote, precommit) once at a given height and round. 13 | \item[Prevote the Lock:] A validator is locked on the last block they precommitted. They must propose it and prevote for it in future rounds, until they unlock. 14 | \item[Unlock on Polka:] a validator may only unlock if there has been a polka at a round after they locked. 15 | \end{description} 16 | Violation of any of the Broadcast Rules is detectable and should be punished. 17 | 18 | \caption[Summary of Tendermint protocol rules]{Summary of rules in the tendermint protocol. 19 | $+\frac{2}{3}$ validators is short for ``more than two-thirds of validators''} 20 | -------------------------------------------------------------------------------- /figures/descriptions/data_structures.tex: -------------------------------------------------------------------------------- 1 | 2 | \vspace*{-1.5in} 3 | 4 | \begin{lstlisting} 5 | 6 | // Proposal for a block at a given height and round, signed by the proposer 7 | type Proposal struct { 8 | Height int 9 | Round int 10 | BlockHash []byte 11 | Signature crypto.SignatureEd25519 // 64 bytes 12 | } 13 | 14 | // Represents a prevote or precommit vote from validators for consensus. 15 | type Vote struct { 16 | Height int 17 | Round int 18 | Type byte // 1 for prevote, 2 for precommit 19 | BlockHash []byte // empty if vote is nil 20 | Signature crypto.SignatureEd25519 // 64 bytes 21 | } 22 | 23 | // A vote message, gossiped to peers 24 | type VoteMessage struct { 25 | ValidatorIndex int 26 | Vote *types.Vote 27 | } 28 | 29 | // A proposal message, gossiped to peers 30 | type ProposalMessage struct { 31 | Proposal *types.Proposal 32 | } 33 | 34 | // Current local state of a validator's consensus machine 35 | type RoundState struct { 36 | Height int // Height we are working on 37 | Round int 38 | Step RoundStepType 39 | CommitTime time.Time // Subjective time we received +2/3 precommits 40 | Validators *types.ValidatorSet 41 | Proposal *types.Proposal 42 | ProposalBlock *types.Block 43 | LockedRound int 44 | LockedBlock *types.Block 45 | Votes *HeightVoteSet // Votes from all rounds at this height 46 | CommitRound int // 47 | LastCommit *types.VoteSet // Last precommits at Height-1 48 | LastValidators *types.ValidatorSet 49 | } 50 | \end{lstlisting} 51 | 52 | \caption[Summary of Tendermint protocol data types]{Summary of data types in the Tendermint protocol} 53 | -------------------------------------------------------------------------------- /figures/descriptions/safety_guarantees.tex: -------------------------------------------------------------------------------- 1 | \textbf{Tendermint Safety Guarantees} 2 | \begin{description} 3 | \item[Proposer Safety] \hfill \\ 4 | There is at most one valid proposer for every term. 5 | \item[Validator Append Only] \hfill \\ 6 | A validator never overwrites or deletes blocks it has committed. 7 | \item[Proposer Completeness] \hfill \\ 8 | If a block is committed at a given height, then that block will be present in the chain of all proposers at greater heights. 9 | \item[State Machine Safety] \hfill \\ 10 | If a validator has applied a block at a given height to its state machine, no other validator will ever apply a different block for the same height. 11 | \end{description} 12 | \caption[Tendermint Safety Guarantees]{Tendermint guarantees that all of these properties are true, at all times, within the security guarantee. This set of properties was taken practically verbatim from \cite{raft_thesis}.} 13 | -------------------------------------------------------------------------------- /figures/descriptions/security_guarantees.tex: -------------------------------------------------------------------------------- 1 | 2 | \textbf{Tendermint Security Guarantees} 3 | \begin{description} 4 | \item[Byzantine Fault Tolerance] \hfill \\ 5 | All properties in \ref{fig:tendermint_guarantees} are satisfied so long as fewer than one-third of validators are Byzantine. 6 | \item[Deterministic Accountability] \hfill \\ 7 | If one-third or more of validators, but less than half, are Byzantine, and thereby compromise safety, 8 | they can be specifically identified and held accountable to their actions. 9 | \end{description} 10 | \caption[Tendermint Security Guarantees]{Tendermint guarantees these security properties, making it more suitable than algorithms like Raft and Paxos, and even other BFT algorithms like PBFT, for consortia with potentially malicious or untrusted actors} 11 | -------------------------------------------------------------------------------- /figures/descriptions/tendermint-pi1.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{tabular}{l} 3 | \hline\\ 4 | $Consensus := \prod_{i=1}^N PR_i^{0,\emptyset,\emptyset,} $ \\\\ 5 | 6 | \hline \\ 7 | {$\!\begin{aligned} 8 | PR_i^{r,p,v} := 9 | & \text{if } i=proposer(r) \text{ then } \\ 10 | & \quad propose_i ! (prop) \| PV_i^{r,prop,v} \text{, where } prop = chooseProposal(p)\\ 11 | & \text{ else if } p \neq \emptyset \text{ then} \\ 12 | & \quad PV_i^{r,p,v} \\ 13 | & \text{else} \\ 14 | & \quad propose_{proposer(r)} ? (prop).PV_i^{r,prop,v} + susp_{proposer(r)}.PV_i^{r,\emptyset,v} \\ 15 | \end{aligned}$} \\\\ 16 | 17 | \hline \\ 18 | $PV_i^{r,p,v}:= prevote_i ! (p) \| (\nu \> c) ( \prod_{j=1}^n prevote_j ? (w) . c!(prevote_j, w) \| PV1_i^{r,p,v}(c))$ \\\\ 19 | 20 | \hline \\ 21 | {$\!\begin{aligned} 22 | PV1_i^{r,p,v}(c) := & \text{ if } max_{b}(|\left\{ w \in v_r^1 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\ 23 | & \quad PC_i^{r,b,v} \\ 24 | & \text{else if } | v_r^1 | > \frac{2}{3} N \text{ then} \\ 25 | & \quad PC_i^{r,\emptyset,v} \\ 26 | & \text{else} \\ 27 | & \quad c?(pv, vote) . \text{ if } vote.round < r \text{ then} \\ 28 | & \quad \quad pv?(w).c!(pv, w) \| PV1_i^{r,p,v}(c)\\ 29 | & \quad \text{else if } vote.round = r \text{ then} \\ 30 | & \quad \quad PV1_i^{r,p,vote::v}(c) \\ 31 | & \quad \text{else } \\ 32 | & \quad \quad PR_i^{vote.round, p, vote::v} \\ 33 | \end{aligned}$} \\\\ 34 | \hline\\ 35 | 36 | \end{tabular} 37 | -------------------------------------------------------------------------------- /figures/descriptions/tendermint-pi2.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{tabular}{l} 3 | \hline \\ 4 | $PC_i^{r,p,v}:= precommit_i ! (p) \| (\nu \> c) ( \prod_{j=1}^n precommit_j ? (w) . c!(preccomit_j, w) \| PC1_i^{r,p,v}(c))$ \\\\ 5 | 6 | \hline \\ 7 | {$\!\begin{aligned} 8 | PC1_i^{r,p,v}(c) := 9 | & \text{ if } max_{b}(|\left\{ w \in v_r^2 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\ 10 | & \quad d_i!(b) \\ 11 | & \text{else if } | v_r^2 | > \frac{2}{3} N \text{ then} \\ 12 | & \quad PR_i^{r+1,\emptyset,v} \\ 13 | & \text{else} \\ 14 | & \quad c?(pc, vote) .\text{ if } vote.round < r \text{ then} \\ 15 | & \quad \quad pc?(w).c!(pc, w) \| PC1_i^{r,p,v}(c) \\ 16 | & \quad \text{else if } vote.round = r \text{ then} \\ 17 | 18 | & \quad \quad PC1_i^{r,p,vote::v}(c) \\ 19 | & \quad \text{else } \\ 20 | & \quad \quad PR_i^{vote.round, p, vote::v} \\ 21 | \end{aligned}$} \\\\ 22 | \hline\\ 23 | 24 | \end{tabular} 25 | -------------------------------------------------------------------------------- /figures/diagrams/abci.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/abci.png -------------------------------------------------------------------------------- /figures/diagrams/byzantine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/byzantine.pdf -------------------------------------------------------------------------------- /figures/diagrams/byzantine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/byzantine.png -------------------------------------------------------------------------------- /figures/diagrams/consensus_logic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/consensus_logic.pdf -------------------------------------------------------------------------------- /figures/diagrams/consensus_logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/consensus_logic.png -------------------------------------------------------------------------------- /figures/diagrams/state_machine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/state_machine.pdf -------------------------------------------------------------------------------- /figures/diagrams/state_machine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/state_machine.png -------------------------------------------------------------------------------- /figures/diagrams/tmsp.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/tmsp.pdf -------------------------------------------------------------------------------- /figures/diagrams/tmsp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/diagrams/tmsp.png -------------------------------------------------------------------------------- /figures/throughput/byz_tables.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{subtable}{.5 \linewidth} 3 | \centering 4 | \begin{tabular}{| l | l | l | l | l | l | } 5 | \hline 6 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 7 | 1000 & 868 & 3888 & 1450 & 1086 & 3320 \\ \hline 8 | 2000 & 929 & 4375 & 1786 & 1272 & 4166 \\ \hline 9 | 3000 & 881 & 4363 & 1224 & 1099 & 1680 \\ \hline 10 | 4000 & 824 & 8256 & 1693 & 1272 & 2607 \\ \hline 11 | \end{tabular} 12 | \caption{4 Validators} 13 | \end{subtable} 14 | 15 | 16 | \begin{subtable}{.5 \linewidth} 17 | \centering 18 | \begin{tabular}{| l | l | l | l | l | l | } 19 | \hline 20 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 21 | 1000 & 771 & 3445 & 1472 & 916 & 3288 \\ \hline 22 | 2000 & 731 & 3661 & 1426 & 902 & 3339 \\ \hline 23 | 3000 & 835 & 6402 & 1912 & 962 & 6155 \\ \hline 24 | 4000 & 811 & 4462 & 1512 & 964 & 3592 \\ \hline 25 | \end{tabular} 26 | \caption{8 Validators} 27 | \end{subtable} 28 | 29 | 30 | \begin{subtable}{.5 \linewidth} 31 | \centering 32 | \begin{tabular}{| l | l | l | l | l | l | } 33 | \hline 34 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 35 | 1000 & 877 & 15930 & 2086 & 1024 & 5844 \\ \hline 36 | 2000 & 808 & 5737 & 1580 & 1027 & 4155 \\ \hline 37 | 3000 & 919 & 10533 & 1801 & 1110 & 4174 \\ \hline 38 | 4000 & 915 & 5589 & 1745 & 1095 & 4181 \\ \hline 39 | \end{tabular} 40 | \caption{16 Validators} 41 | \end{subtable} 42 | 43 | 44 | \begin{subtable}{.5 \linewidth} 45 | \centering 46 | \begin{tabular}{| l | l | l | l | l | l | } 47 | \hline 48 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 49 | 1000 & 1594 & 11730 & 2680 & 1854 & 5016 \\ \hline 50 | 2000 & 1496 & 17801 & 3430 & 1874 & 11730 \\ \hline 51 | 3000 & 1504 & 15963 & 3280 & 1736 & 9569 \\ \hline 52 | 4000 & 1490 & 24836 & 3940 & 1773 & 12866 \\ \hline 53 | \end{tabular} 54 | \caption{32 Validators} 55 | \end{subtable} 56 | 57 | -------------------------------------------------------------------------------- /figures/throughput/crash_tables.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{subtable}{.5 \linewidth} 3 | \centering 4 | \begin{tabular}{| l | l | l | l | l | l | } 5 | \hline 6 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 7 | 500 & 434 & 15318 & 2179 & 1102 & 5575 \\ \hline 8 | 1000 & 516 & 18149 & 2180 & 1046 & 5677 \\ \hline 9 | 2000 & 473 & 15067 & 2044 & 1049 & 5479 \\ \hline 10 | 3000 & 428 & 9964 & 2005 & 1096 & 5502 \\ \hline 11 | \end{tabular} 12 | \caption{4 Validators} 13 | \end{subtable} 14 | 15 | 16 | \begin{subtable}{.5 \linewidth} 17 | \centering 18 | \begin{tabular}{| l | l | l | l | l | l | } 19 | \hline 20 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 21 | 500 & 618 & 126481 & 2679 & 990 & 5589 \\ \hline 22 | 1000 & 570 & 9832 & 1763 & 962 & 5835 \\ \hline 23 | 2000 & 594 & 8869 & 1658 & 968 & 5481 \\ \hline 24 | 3000 & 535 & 10101 & 1633 & 959 & 5485 \\ \hline 25 | \end{tabular} 26 | \caption{8 Validators} 27 | \end{subtable} 28 | 29 | 30 | \begin{subtable}{.5 \linewidth} 31 | \centering 32 | \begin{tabular}{| l | l | l | l | l | l | } 33 | \hline 34 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 35 | 500 & 782 & 21354 & 1977 & 1001 & 5930 \\ \hline 36 | 1000 & 758 & 12659 & 1761 & 981 & 5642 \\ \hline 37 | 2000 & 751 & 21285 & 2041 & 1005 & 6872 \\ \hline 38 | 3000 & 719 & 72406 & 2395 & 991 & 5987 \\ \hline 39 | \end{tabular} 40 | \caption{16 Validators} 41 | \end{subtable} 42 | 43 | 44 | \begin{subtable}{.5 \linewidth} 45 | \centering 46 | \begin{tabular}{| l | l | l | l | l | l | } 47 | \hline 48 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 49 | 500 & 760 & 24692 & 2591 & 1087 & 14025 \\ \hline 50 | 1000 & 755 & 19696 & 2328 & 1119 & 9321 \\ \hline 51 | 2000 & 852 & 21044 & 2178 & 1141 & 6514 \\ \hline 52 | 3000 & 763 & 25587 & 2289 & 1119 & 6707 \\ \hline 53 | \end{tabular} 54 | \caption{32 Validators} 55 | \end{subtable} 56 | 57 | -------------------------------------------------------------------------------- /figures/throughput/delay_tables.tex: -------------------------------------------------------------------------------- 1 | 2 | \begin{subtable}{.5 \linewidth} 3 | \centering 4 | \begin{tabular}{| l | l | l | l | l | l | } 5 | \hline 6 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 7 | 1000 & 873 & 2796 & 1437 & 1036 & 2627 \\ \hline 8 | 2000 & 831 & 4549 & 1843 & 1180 & 4036 \\ \hline 9 | 3000 & 921 & 5782 & 2273 & 1251 & 5491 \\ \hline 10 | 4000 & 967 & 6875 & 2700 & 1413 & 6781 \\ \hline 11 | \end{tabular} 12 | \caption{4 Validators} 13 | \end{subtable} 14 | 15 | 16 | \begin{subtable}{.5 \linewidth} 17 | \centering 18 | \begin{tabular}{| l | l | l | l | l | l | } 19 | \hline 20 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 21 | 1000 & 870 & 2840 & 1449 & 1040 & 2786 \\ \hline 22 | 2000 & 957 & 4268 & 1848 & 1076 & 4148 \\ \hline 23 | 3000 & 859 & 5724 & 2156 & 1100 & 5649 \\ \hline 24 | 4000 & 897 & 11859 & 3055 & 1093 & 11805 \\ \hline 25 | \end{tabular} 26 | \caption{8 Validators} 27 | \end{subtable} 28 | 29 | 30 | \begin{subtable}{.5 \linewidth} 31 | \centering 32 | \begin{tabular}{| l | l | l | l | l | l | } 33 | \hline 34 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 35 | 1000 & 914 & 5595 & 1821 & 1135 & 5466 \\ \hline 36 | 2000 & 950 & 7782 & 2490 & 1165 & 7650 \\ \hline 37 | 3000 & 978 & 10305 & 3049 & 1163 & 9890 \\ \hline 38 | 4000 & 1018 & 6890 & 2808 & 1174 & 6813 \\ \hline 39 | \end{tabular} 40 | \caption{16 Validators} 41 | \end{subtable} 42 | 43 | 44 | \begin{subtable}{.5 \linewidth} 45 | \centering 46 | \begin{tabular}{| l | l | l | l | l | l | } 47 | \hline 48 | TimeoutPropose & Min & Max & Mean & Median & $95^{th} \ \%-ile$ \\ \hline 49 | 1000 & 1202 & 8562 & 2219 & 1349 & 5733 \\ \hline 50 | 2000 & 1196 & 7878 & 2549 & 1365 & 7579 \\ \hline 51 | 3000 & 1164 & 10082 & 3003 & 1382 & 9805 \\ \hline 52 | 4000 & 1223 & 17571 & 3696 & 1392 & 12014 \\ \hline 53 | \end{tabular} 54 | \caption{32 Validators} 55 | \end{subtable} 56 | 57 | -------------------------------------------------------------------------------- /figures/throughput/large_instances/latency-throughput.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/latency-throughput.pdf -------------------------------------------------------------------------------- /figures/throughput/large_instances/latency-throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/latency-throughput.png -------------------------------------------------------------------------------- /figures/throughput/large_instances/throughput-blocksize.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/throughput-blocksize.pdf -------------------------------------------------------------------------------- /figures/throughput/large_instances/throughput-blocksize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/large_instances/throughput-blocksize.png -------------------------------------------------------------------------------- /figures/throughput/latency-throughput.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/latency-throughput.pdf -------------------------------------------------------------------------------- /figures/throughput/latency-throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/latency-throughput.png -------------------------------------------------------------------------------- /figures/throughput/single_datacenter/latency-throughput.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/latency-throughput.pdf -------------------------------------------------------------------------------- /figures/throughput/single_datacenter/latency-throughput.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/latency-throughput.png -------------------------------------------------------------------------------- /figures/throughput/single_datacenter/throughput-blocksize.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/throughput-blocksize.pdf -------------------------------------------------------------------------------- /figures/throughput/single_datacenter/throughput-blocksize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/single_datacenter/throughput-blocksize.png -------------------------------------------------------------------------------- /figures/throughput/throughput-blocksize.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/throughput-blocksize.pdf -------------------------------------------------------------------------------- /figures/throughput/throughput-blocksize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ebuchman/thesis/5f20effb0f478aec91aa7f99fdbc6b33fc2539c4/figures/throughput/throughput-blocksize.png -------------------------------------------------------------------------------- /listings-golang.sty: -------------------------------------------------------------------------------- 1 | %% Golang definition for listings 2 | %% http://github.io/julienc91/lstlistings-golang 3 | %% 4 | \RequirePackage{listings} 5 | 6 | \lstdefinelanguage{Golang}% 7 | {morekeywords=[1]{package,import,func,type,struct,return,defer,panic,% 8 | recover,select,var,const,iota,},% 9 | morekeywords=[2]{string,uint,uint8,uint16,uint32,uint64,int,int8,int16,% 10 | int32,int64,bool,float32,float64,complex64,complex128,byte,rune,uintptr,% 11 | error},% 12 | morekeywords=[3]{interface,map,slice,make,new,nil,len,cap,copy,close,true,false,% 13 | delete,append,real,imag,complex,chan,},% 14 | morekeywords=[4]{for,break,continue,range,goto,switch,case,fallthrough,if,% 15 | else,default,},% 16 | morekeywords=[5]{Println,Printf,Error,},% 17 | sensitive=true,% 18 | morecomment=[l]{//},% 19 | morecomment=[s]{/*}{*/},% 20 | morestring=[b]',% 21 | morestring=[b]",% 22 | morestring=[s]{`}{`},% 23 | } 24 | -------------------------------------------------------------------------------- /main.tex: -------------------------------------------------------------------------------- 1 | 2 | \documentclass[12pt]{report} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage{graphicx} 5 | \usepackage{listings} % code listings 6 | \usepackage{listings-golang} 7 | \usepackage{subcaption} 8 | \usepackage{amsmath} 9 | \usepackage[backend=bibtex,block=ragged]{biblatex} % block=ragged wraps lines in the bib 10 | \graphicspath{ {images/} } 11 | \usepackage{float} 12 | \usepackage{xcolor} 13 | 14 | %\floatstyle{boxed} 15 | \restylefloat{figure} 16 | 17 | 18 | % color not working :( 19 | \lstset{ % add your own preferences 20 | frame=single, 21 | basicstyle=\footnotesize, 22 | keywordstyle=\color{orange}, 23 | keywordstyle=[2]\color{green}, 24 | commentstyle=\color{blue}, 25 | %numbers=left, 26 | %numbersep=5pt, 27 | showstringspaces=false, 28 | stringstyle=\color{red}, 29 | tabsize=4, 30 | language=Golang % this is it ! 31 | } 32 | 33 | 34 | \pagenumbering{gobble} 35 | 36 | \usepackage{hyperref} 37 | \hypersetup{ 38 | colorlinks=true, 39 | linkcolor=blue, 40 | filecolor=magenta, 41 | urlcolor=cyan, 42 | } 43 | 44 | \urlstyle{same} 45 | 46 | %\bibliographystyle{IEEEtran} 47 | \bibliography{bib/consensus,bib/crypto,bib/programming,bib/applied,bib/formal} 48 | \AtEveryBibitem{% 49 | \clearfield{note}% 50 | } 51 | 52 | % Fix to put commas between multiple footnotes (e.g. in Chapter 1) 53 | % Normal solution is to use: 54 | % \usepackage[multiple]{footmisc} 55 | % However, this is incompatible with hyperref 56 | % Suggested solution here: 57 | % http://tex.stackexchange.com/a/62091 58 | \let\oldFootnote\footnote 59 | \newcommand\nextToken\relax 60 | 61 | \renewcommand\footnote[1]{% 62 | \oldFootnote{#1}\futurelet\nextToken\isFootnote} 63 | 64 | \newcommand\isFootnote{% 65 | \ifx\footnote\nextToken\textsuperscript{,}\fi} 66 | 67 | 68 | \begin{document} 69 | \include{chapters/title} 70 | \include{chapters/abstract} 71 | \pagenumbering{roman} 72 | \setcounter{page}{3} 73 | \include{chapters/frontmatter} 74 | \pagenumbering{arabic} 75 | \include{chapters/introduction} 76 | \include{chapters/background} 77 | \include{chapters/tendermint} 78 | \include{chapters/subprotocols} 79 | \include{chapters/apps} 80 | \include{chapters/governance} 81 | \include{chapters/clients} 82 | \include{chapters/implementation} 83 | \include{chapters/performance} 84 | \include{chapters/related} 85 | \include{chapters/conclusion} 86 | 87 | 88 | 89 | \printbibliography 90 | 91 | \end{document} 92 | -------------------------------------------------------------------------------- /tendermint-pi.tex: -------------------------------------------------------------------------------- 1 | 2 | \documentclass[12pt]{report} 3 | \usepackage[utf8]{inputenc} 4 | \usepackage{graphicx} 5 | \usepackage{listings} 6 | \usepackage{lstautogobble} 7 | \usepackage{amsmath} 8 | \graphicspath{ {images/} } 9 | \usepackage{float} 10 | \floatstyle{boxed} 11 | \restylefloat{figure} 12 | 13 | 14 | \renewcommand{\|}{\;|\;} 15 | 16 | \begin{document} 17 | 18 | Here we attempt a general depiction of consensus protocols, drawing on Nestmann (2003), 19 | which models the non-Byzantine consensus protocol of Chandra and Touegg (1996). 20 | We model only the consensus, rather than full ABC, but describe how to 21 | easily extend the model to ABC. 22 | 23 | Let $Consensus := \prod_{i=1}^N Y_i $ represent a consensus protocol 24 | over a set of $N$ validators, each executing one of a mutually exclusive set of processes, $Y_i$. 25 | Internal state $s = \{r, p, v \}$ consists of a strictly increasing round, $r$, 26 | a proposal $p$, containing the proposed block for this round; 27 | and a set of votes, $v$, containing all votes at all rounds; 28 | We denote by $v_r^1$ and $v_r^2$ the set of prevotes and precommits, respectively, at round $r$. 29 | We define $proposer(r) = r \mod n$ to be the index of the proposer at round $r$. 30 | We represent a peer at a particular point in the protocol as $Y_i^{r, p, v}$. 31 | Processes $Y_i$ range over $PR_i$, $PV_i$, $PC_i$, $C_i$, 32 | respectively abbreviating 33 | \emph{propose}, \emph{prevote}, \emph{precommit}, \emph{commit}. 34 | We introduce additional sub-functions for $PV$ and $PC$ to capture the recursion, 35 | denoted $PV1$, $PV2$, etc. 36 | 37 | Peers are connected using broadcast channels for each message type, 38 | namely $propose_i$, $prevote_i$, and $precommit_i$, 39 | as well as a channel for broadcasting new transactions, $b_i$, 40 | and one for deciding on, or committing, the next block, $d_i$. 41 | Via an abuse of notation, a single send on some $x_i$ can be received by each process along 42 | $x_i$. 43 | 44 | We use only two message types: proposals and votes. 45 | Each contains a round number, block (hash), and signature, 46 | denoted $msg.round$, $msg.block$, $msg.sig$. 47 | Note we can absorb the signature into the broadcast channel itself, 48 | but we need it for use as evidence in the event of byzantine behaviour. 49 | 50 | 51 | \begin{center} 52 | \begin{tabular}{l } 53 | \hline \\ 54 | $Consensus := \prod_{i=1}^N [ PR_i^{0,\emptyset,\emptyset,} \| D_i]$ \\\\ 55 | 56 | \hline \\ 57 | {$\!\begin{aligned} 58 | PR_i^{r,p,v} := 59 | & \text{if } i=proposer(r) \text{ then } \\ 60 | & \quad propose_i ! (prop) \| PV_i^{r,prop,v} \text{, where } prop = chooseProposal(p)\\ 61 | & \text{ else if } p \neq \emptyset \text{ then} \\ 62 | & \quad PV_i^{r,p,v} \\ 63 | & \text{else} \\ 64 | & \quad propose_{proposer(r)} ? (prop).PV_i^{r,prop,v} + susp_{proposer(r)}.PV_i^{r,\emptyset,v} \\ 65 | \end{aligned}$} \\\\ 66 | 67 | \hline \\ 68 | $PV_i^{r,p,v}:= prevote_i ! (r,p) \| (\nu \> c) ( \prod_{j=1}^n prevote_j ? (w) . c!(w) \| PV1_i^{r,p,v}(c))$ \\\\ 69 | 70 | \hline \\ 71 | {$\!\begin{aligned} 72 | PV1_i^{r,p,v}(c) := 73 | & \text{ if } max_{b}(|\left\{ w \in v_r^1 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\ 74 | & \quad PC_i^{r,b,v} \\ 75 | & \text{else if } | v_r^1 | > \frac{2}{3} N \text{ then} \\ 76 | & \quad PC_i^{r,\emptyset,v} \\ 77 | & \text{else} \\ 78 | & \quad c?(vote) . PV1_i^{r,p,vote::v}(c) \\ 79 | \end{aligned}$} \\\\ 80 | 81 | \hline \\ 82 | $PC_i^{r,p,v}:= precommit_i ! (r,p) \| (\nu \> c) ( \prod_{j=1}^n precommit_j ? (w) . c!(w) \| PC1_i^{r,p,v}(c))$ \\\\ 83 | 84 | \hline \\ 85 | {$\!\begin{aligned} 86 | PC1_i^{r,p,v}(c) := 87 | & \text{ if } max_{b}(|\left\{ w \in v_r^2 : w.block = b\right\}|) > \frac{2}{3} N \text{ then} \\ 88 | & \quad C_i^{r,b,v} \\ 89 | & \text{else if } | v_r^2 | > \frac{2}{3} N \text{ then} \\ 90 | & \quad PR_i^{r+1,\emptyset,v} \\ 91 | & \text{else} \\ 92 | & \quad c?(vote) . PC1_i^{r,p,vote::v}(c) \\ 93 | \end{aligned}$} 94 | 95 | 96 | \end{tabular} 97 | \end{center} 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | \end{document} 109 | --------------------------------------------------------------------------------