├── .gitignore
├── .travis.yml
├── README.md
├── erlang
    └── model
    │   ├── .gitignore
    │   ├── README.md
    │   ├── qsc.erl
    │   └── run.sh
├── go
    ├── dist
    │   ├── README.md
    │   ├── causal.go
    │   ├── dist_test.go
    │   ├── doc.go
    │   ├── node.go
    │   ├── qsc.go
    │   ├── set.go
    │   ├── tlc.go
    │   └── vec.go
    ├── lib
    │   ├── backoff
    │   │   ├── retry.go
    │   │   ├── retry_test.go
    │   │   └── rfq
    │   │   │   └── doc.go
    │   ├── cas
    │   │   ├── cas.go
    │   │   └── test
    │   │   │   ├── cas.go
    │   │   │   └── cas_test.go
    │   ├── doc.go
    │   └── fs
    │   │   ├── atomic
    │   │       ├── atomic.go
    │   │       └── atomic_test.go
    │   │   ├── casdir
    │   │       └── state.go
    │   │   └── verst
    │   │       └── state.go
    └── model
    │   ├── README.md
    │   ├── doc.go
    │   ├── model_test.go
    │   ├── node.go
    │   ├── qsc.go
    │   ├── qscod
    │       ├── README.md
    │       ├── core
    │       │   ├── cli.go
    │       │   └── test
    │       │   │   ├── cli.go
    │       │   │   └── cli_test.go
    │       ├── encoding
    │       │   └── enc.go
    │       ├── fs
    │       │   ├── casdir
    │       │   │   └── cas_test.go
    │       │   ├── simple
    │       │   │   ├── store.go
    │       │   │   └── store_test.go
    │       │   └── store
    │       │   │   ├── store.go
    │       │   │   └── store_test.go
    │       └── qscas
    │       │   ├── doc.go
    │       │   ├── group.go
    │       │   ├── group_test.go
    │       │   ├── rand.go
    │       │   └── store.go
    │   ├── quepaxa
    │       ├── consensus.go
    │       ├── isr.go
    │       └── proposal.go
    │   └── tlc.go
├── spin
    ├── README.md
    ├── qp.pml
    ├── qpm.pml
    ├── qsc.pml
    ├── results-qp.txt
    ├── results-qpm.txt
    └── run.sh
└── tools
    └── qsc
        ├── .gitignore
        ├── group.go
        ├── main.go
        └── string.go


/.gitignore:
--------------------------------------------------------------------------------
1 | pan
2 | pan.*
3 | *.pml.trail
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: bionic
 2 | 
 3 | language: go
 4 | 
 5 | go:
 6 | - 1.12.x
 7 | 
 8 | before_install:
 9 |   - sudo apt-get install -y spin
10 | 
11 | install:
12 | - go get -u golang.org/x/lint/golint
13 | 
14 | script:
15 | - go vet ./...
16 | - if [ "$( gofmt -l . )" ]; then gofmt -d; exit 1; fi
17 | - golint -set_exit_status ./...
18 | - go test ./...
19 | - cd spin; ./run.sh
20 | 
21 | notifications:
22 |   email: false
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This repository contains multiple prototype implementations of
 3 | Threshold Logical Clocks (TLC) and Que Sera Consensus (QSC),
 4 | as described in the following papers:
 5 | 
 6 | * [Threshold Logical Clocks for Asynchronous Distributed Coordination and Consensus](https://arxiv.org/abs/1907.07010)
 7 | * [Que Sera Consensus: Simple Asynchronous Agreement with Private Coins and Threshold Logical Clocks](https://arxiv.org/abs/2003.02291)
 8 | 
 9 | The following prototype implementations of TLC and QSC are available
10 | in multiple languages:
11 | 
12 | * [erlang/model](erlang/model/) contains a minimalistic model implementation
13 |   of the QSC, TLCB, and TLCR algorithms detailed in the
14 |   [new QSC preprint](https://arxiv.org/abs/2003.02291).
15 |   This model implements QSC using Erlang processes and communication 
16 |   on a single machine for illustrative simplicity, although
17 |   [distributed Erlang](https://erlang.org/doc/reference_manual/distributed.html)
18 |   should make it straightforward to extend this model
19 |   to true distributed consensus.
20 |   Erlang's [selective receive](https://ndpar.blogspot.com/2010/11/erlang-explained-selective-receive.html)
21 |   is particularly well-suited to implementing TLCR concisely.
22 |   The model consists of only 73 lines of code
23 |   as measured by [cloc](https://github.com/AlDanial/cloc),
24 |   including test code,
25 |   or only 37 lines comprising the consensus algorithm alone.
26 | 
27 | * [go/model](go/model/) contains a minimalistic model implementation in Go
28 |   of TLC and QSC as described in the
29 |   [original TLC preprint](https://arxiv.org/abs/1907.07010).
30 |   This model illustrates the key concepts
31 |   using goroutines and shared memory communication for simplicity.
32 |   It is not useful in an actual distributed context,
33 |   but being less than 200 code lines long
34 |   as measured by [cloc](https://github.com/AlDanial/cloc),
35 |   it is ideal for studying and understanding TLC and QSC.
36 | 
37 | * [go/model/qscod](go/model/qscod/)
38 |   contains a model implementation in Go of QSCOD,
39 |   the client-driven "on-demand" consensus algorithm outlined in the 
40 |   [new QSC preprint](https://arxiv.org/abs/2003.02291).
41 |   This formulation of QSC consumes no bandwidth or computation
42 |   when there is no work to be done (hence on-demand),
43 |   and incurs only <i>O(n<sup>2</sup>)</i> communication complexity
44 |   per client-driven agreement.
45 | 
46 | * [go/dist](go/dist/) contains a simple but working
47 |   "real" distributed implementation of TLC and QSC in Go
48 |   for a fail-stop (Paxos-like) threat model.
49 |   It uses TCP, TLS encryption and authentication,
50 |   and Go's native Gob encoding for inter-node communication.
51 |   At less than 1000 code lines long
52 |   as measured by [cloc](https://github.com/AlDanial/cloc),
53 |   it is still probably one of the simplest implementations
54 |   of asynchronous consensus around.
55 | 
56 | * [spin](spin/) contains a simple Promela model of the core of TLC and QSC
57 |   for the [Spin model checker](https://spinroot.com/spin/whatispin.html).
58 |   Although this implementation models TLC and QSC only at a
59 |   very high, abstract level, it captures the basic logic enough
60 |   to lend confidence to the correctness of the algorithm.
61 | 
62 | All of this code is still extremely early and experimental;
63 | use at your own risk.
64 | 
65 | [![Build Status](https://travis-ci.com/dedis/tlc.svg?branch=master)](https://travis-ci.com/dedis/tlc)
66 | 
67 | 


--------------------------------------------------------------------------------
/erlang/model/.gitignore:
--------------------------------------------------------------------------------
1 | *.beam
2 | 


--------------------------------------------------------------------------------
/erlang/model/README.md:
--------------------------------------------------------------------------------
 1 | This directory contains a minimal implementation of
 2 | Que Sera Consensus (QSC) built on Threshold Logical Clocks (TLC)
 3 | in [Erlang](https://www.erlang.org)
 4 | for fail-stop, non-Byzantine environments.
 5 | This model implements the QSC, TLCB, and TLCR algorithms detailed in the 
 6 | [new QSC preprint](https://arxiv.org/abs/2003.02291)
 7 | in only 37 lines of code representing the actual algorithm.
 8 | 
 9 | For background information on QSC and TLC,
10 | and other model implementations in several languages, please see the
11 | [top level of this repository](https://github.com/dedis/tlc/).
12 | 


--------------------------------------------------------------------------------
/erlang/model/qsc.erl:
--------------------------------------------------------------------------------
  1 | -module(qsc).
  2 | -export([qsc/1, test/0]).
  3 | 
  4 | % Node configuration is a tuple defined as a record.
  5 | % nn: node number from 1..len(pids)
  6 | % tr: receive threshold
  7 | % tb: broadcast threshold
  8 | % ts: spread threshold
  9 | % pids: list of process IDs of all nodes
 10 | % steps: maximum number of time steps to run, nil to run forever
 11 | % choose: function choose(Config, Step) -> Msg to choose application message
 12 | % random: function random() -> Value to choose a random priority value
 13 | % deliver: function deliver(History) to deliver a committed history
 14 | -record(config, {nn, tr, tb, ts, pids, steps, choose, random, deliver}).
 15 | 
 16 | % A history is a record representing the most recent in a chain.
 17 | -record(hist, {step, nn, msg, pri, pred}).
 18 | 
 19 | % qsc(C) -> (never returns)
 20 | % Implements Que Sera Co nsensus (QSC) atop TLCB and TLCR.
 21 | qsc(C) -> qsc(C, 1, #hist{step=0}).	% start at step 1 with placeholder pred
 22 | qsc(#config{steps=Max}, S0, _) when S0 > Max -> {};	% stop after Max steps
 23 | qsc(#config{nn=I, choose=Ch, random=Rv, deliver=D} = C, S0, H0) ->
 24 | 	H1 = #hist{step=S0, nn=I, msg=Ch(C, S0), pri=Rv(), pred=H0},
 25 | 	{S1, R1, B1} = tlcb(C, S0, H1),	% Try to broadcast (confirm) proposal
 26 | 	{H2, _} = best(B1),		% Choose some best eligible proposal
 27 | 	{S2, R2, B2} = tlcb(C, S1, H2),	% Re-broadcast it to reconfirm proposal
 28 | 	{Hn, _} = best(R2),		% Choose best eligible for next round
 29 | 	{H3, Unique} = best(R1),	% What is the best potential history?
 30 | 	Final = lists:member(Hn, B2) and (Hn == H3) and Unique,
 31 | 	if	Final -> D(Hn), qsc(C, S2, Hn);		% Deliver history Hn
 32 | 		true -> qsc(C, S2, Hn)	% Just proceed to next consensus round
 33 | 	end.
 34 | 
 35 | % best(L) -> {B, U}
 36 | % Find and return the best (highest-priority) history B in a nonempty list L,
 37 | % and a flag U indicating whether B is uniquely best (highest priority) in L.
 38 | best([H]) -> {H, true};			% trivial singleton case
 39 | best(L) ->
 40 | 	Compare = fun(#hist{pri=AR}, #hist{pri=BR}) -> AR >= BR end,
 41 | 	[#hist{pri=BR} = B, #hist{pri=NR} | _] = lists:sort(Compare, L),
 42 | 	{B, (BR /= NR)}.
 43 | 
 44 | 
 45 | % tlcb(C, S, H) -> {S, R, B}
 46 | % Implements the TLCB algorithm for full-spread synchronous broadcast.
 47 | tlcb(#config{ts=Ts} = C, S0, H) ->
 48 | 	{S1, R1, _} = tlcr(C, S0, H),	% Step 1: broadcast history H
 49 | 	{S2, R2, _} = tlcr(C, S1, R1),	% Step 2: re-broadcast list we received
 50 | 	R = sets:to_list(sets:union([sets:from_list(L) || L <- [R1 | R2]])),
 51 | 	B = [Hc || Hc <- R, count(R2, Hc) >= Ts],
 52 | 	{S2, R, B}.			% New state, receive and broadcast sets
 53 | 
 54 | % count(LL, H) -> N
 55 | % Return N the number of lists in list-of-lists LL that include history H.
 56 | count(LL, H) -> length([L || L <- LL, lists:member(H, L)]).
 57 | 
 58 | 
 59 | % tlcr(C, S, M) -> {S, R, nil}
 60 | % Implements the TLCR algorithm for receive-threshold synchronous broadcast.
 61 | tlcr(#config{pids=Pids} = C, S, M) ->
 62 | 	[P ! {S, M} || P <- Pids],		% broadcast next message
 63 | 	tlcr_wait(C, S, []).			% wait for receive threshold
 64 | tlcr_wait(#config{tr=Tr} = C, S, R) when length(R) < Tr ->
 65 | 	receive	{RS, RM} when RS == S -> tlcr_wait(C, S, [RM | R]);
 66 | 		{RS, _} when RS < S -> tlcr_wait(C, S, R)	% drop old msgs
 67 | 	end;	% when RS > S message stays in the inbox to be received later
 68 | tlcr_wait(_, S, R) -> {S+1, R, nil}.
 69 | 
 70 | 
 71 | % Run a test-case configured for a given number of potentially-failing nodes F,
 72 | % then signal Parent process when done.
 73 | test_run(F, Steps) ->
 74 | 	% Generate a standard valid configuration from number of failures F.
 75 | 	N = 3*F, Tr = 2*F, Tb = F, Ts = F+1,
 76 | 	io:fwrite("Test N=~p F=~p~n", [N, F]),
 77 | 
 78 | 	% Function to choose message for node I to propose at TLC time-step S.
 79 | 	Choose = fun(#config{nn=I}, S) -> {msg, S, I} end,
 80 | 
 81 | 	% Choose a random value to attach to a proposal in time-step S.
 82 | 	% This low-entropy random distribution is intended only for testing,
 83 | 	% so as to ensure a significant rate of ties for best priority.
 84 | 	% Production code should use high-entropy cryptographic randomness for
 85 | 	% maximum efficiency and strength against intelligent DoS attackers.
 86 | 	Random = fun() -> rand:uniform(N) end,
 87 | 
 88 | 	% Spawn a process to receive and consistency-check committed histories.
 89 | 	Checker = spawn(fun() -> test_checker(#hist{step=0}) end),
 90 | 
 91 | 	% The nodes will "deliver" histories by sending them back to us.
 92 | 	Deliver = fun(H) -> Checker ! {check, H} end,
 93 | 
 94 | 	% Launch a process representing each of the N nodes.
 95 | 	Self = self(),
 96 | 	Pids = [spawn(fun() -> test_node(Self) end) || _ <- lists:seq(1, N)],
 97 | 
 98 | 	% Send each node its complete configuration record to get it started.
 99 | 	C = #config{ tr = Tr, tb = Tb, ts = Ts, pids = Pids, steps = Steps,
100 | 		choose = Choose, random = Random, deliver = Deliver},
101 | 	[lists:nth(I, Pids) ! C#config{nn=I} || I <- lists:seq(1, N)],
102 | 
103 | 	% Wait until all nodes run the designated number of time steps.
104 | 	[test_wait(I) || I <- lists:seq(1, N)],
105 | 	Checker ! {stop}.		% Terminate our checker process
106 | 
107 | % Receive a node configuration, run a QSC node simulation with it,
108 | % then send a completion signal to our parent process.
109 | test_node(Parent) ->
110 | 	receive #config{} = C -> qsc(C), Parent ! {done, C#config.nn} end.
111 | 
112 | % Wait to receive a signal that node I is finished.
113 | test_wait(I) -> receive {done, I} -> {} end.
114 | 
115 | % test_checker() -> {}
116 | % Receive committed histories from all nodes and consistency-check them
117 | test_checker(Hp) ->
118 | 	receive	{check, H} ->
119 | 			%io:fwrite("committed ~P~n", [H, 8]),
120 | 			test_checker(test_check(Hp, H));
121 | 		{stop} -> {}
122 | 	end.
123 | 
124 | % test_check(A, B) -> H
125 | % Check two histories A and B for consistency, and return the longer one.
126 | test_check(#hist{step=AC,pred=AP} = A, #hist{step=BC} = B) when AC > BC ->
127 | 	test_check(AP, B), A;		% compare shorter prefix of A with B
128 | test_check(#hist{step=AC} = A, #hist{step=BC,pred=BP} = B) when BC > AC ->
129 | 	test_check(A, BP), B;		% compare A with shorter prefix of B
130 | test_check(A, B) when A == B -> A;
131 | test_check(A, B) -> erlang:error({inconsistency, A, B}).
132 | 
133 | % Run QSC and TLC through a test suite.
134 | test() ->
135 | 	[test_run(F, 1000) || F <- [1,2,3,4,5]],	% simple test suite
136 | 	io:fwrite("Tests completed~n").
137 | 
138 | 


--------------------------------------------------------------------------------
/erlang/model/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | erl -make && erl -noshell -run qsc test -run init stop
3 | 


--------------------------------------------------------------------------------
/go/dist/README.md:
--------------------------------------------------------------------------------
1 | This Go package provides a simple but "real" distributed implementation of
2 | Que Sera Consensus (QSC) built on Threshold Logical Clocks (TLC)
3 | for fail-stop, non-Byzantine environments.
4 | For background information on QSC and TLC,
5 | and other model implementations in several languages, please see the
6 | [top level of this repository](https://github.com/dedis/tlc/).
7 | For more details on this package see the code and its
8 | [GoDoc documentation](https://godoc.org/github.com/dedis/tlc/go/dist).
9 | 


--------------------------------------------------------------------------------
/go/dist/causal.go:
--------------------------------------------------------------------------------
  1 | package dist
  2 | 
  3 | // Broadcast a copy of our current message template to all nodes.
  4 | func (n *Node) broadcastCausal(msg *Message) {
  5 | 
  6 | 	//println(n.self, n.tmpl.Step, "broadcastCausal",
  7 | 	//	"mat", len(n.mat))
  8 | 
  9 | 	// Assign the new message a sequence number
 10 | 	msg.Seq = len(n.seqLog[n.self]) // Assign sequence number
 11 | 	msg.Vec = n.mat[n.self].copy()  // Include vector time update
 12 | 	n.logCausal(n.self, msg)        // Add msg to our log
 13 | 	//println(n.self, n.tmpl.Step, "broadcastCausal step", msg.Step,
 14 | 	//		"typ", msg.Typ, "seq", msg.Seq,
 15 | 	//		"vec", fmt.Sprintf("%v", msg.Vec))
 16 | 
 17 | 	// We always receive our own message first.
 18 | 	n.receiveTLC(msg)
 19 | 
 20 | 	// Send it to all other peers.
 21 | 	for dest := range n.peer {
 22 | 		if dest != n.self {
 23 | 			n.sendCausal(dest, msg)
 24 | 		}
 25 | 	}
 26 | }
 27 | 
 28 | // Log a peer's message, either our own (just sent)
 29 | // or another node's (received and ready to be delivered).
 30 | func (n *Node) logCausal(peer int, msg *Message) {
 31 | 
 32 | 	// Update peer's matrix clock and our record of what it saw by msg
 33 | 	for i := range n.peer {
 34 | 		//println(i, "mat", len(n.mat), "vec", len(msg.Vec))
 35 | 		for n.mat[peer][i] < msg.Vec[i] {
 36 | 			n.sawCausal(peer, n.seqLog[i][n.mat[peer][i]])
 37 | 			n.mat[peer][i]++
 38 | 		}
 39 | 	}
 40 | 	n.sawCausal(peer, msg)   // msg has been seen by the peer that sent it
 41 | 	n.sawCausal(n.self, msg) // and now we've seen the message too
 42 | 
 43 | 	n.seqLog[peer] = append(n.seqLog[peer], msg) // log this msg
 44 | 	n.mat[n.self][peer] = len(n.seqLog[peer])    // update our vector time
 45 | 	if len(n.seqLog[peer]) != msg.Seq+1 {        // sanity check
 46 | 		panic("out of sync")
 47 | 	}
 48 | }
 49 | 
 50 | // Record the fact that a given peer is now known to have seen a given message.
 51 | // For Wit messages, record the fact that the proposal was threshold witnessed.
 52 | func (n *Node) sawCausal(peer int, msg *Message) {
 53 | 	n.saw[peer].add(msg)
 54 | 	if msg.Typ == Wit {
 55 | 		prop := n.seqLog[msg.From][msg.Prop]
 56 | 		if prop.Typ != Prop {
 57 | 			panic("not a proposal!")
 58 | 		}
 59 | 		n.wit[peer].add(prop)
 60 | 	}
 61 | }
 62 | 
 63 | // Transmit a message to a particular node.
 64 | func (n *Node) sendCausal(dest int, msg *Message) {
 65 | 	//println(n.self, n.tmpl.Step, "sendCausal to", dest, "typ", msg.Typ,
 66 | 	//	"seq", msg.Seq)
 67 | 	n.peer[dest].Send(msg)
 68 | }
 69 | 
 70 | // Receive a possibly out-of-order message from the network.
 71 | // Enqueue it and actually deliver messages as soon as we can.
 72 | func (n *Node) receiveCausal(msg *Message) {
 73 | 
 74 | 	// Unicast acknowledgments don't get sequence numbers or reordering.
 75 | 	if msg.Typ == Ack {
 76 | 		n.receiveTLC(msg) // Just send it up the stack
 77 | 		return
 78 | 	}
 79 | 
 80 | 	// Ignore duplicate message deliveries
 81 | 	if msg.Seq < n.mat[n.self][msg.From] {
 82 | 		println(n.self, n.tmpl.Step, "duplicate message from", msg.From,
 83 | 			"seq", msg.Seq)
 84 | 		panic("XXX")
 85 | 	}
 86 | 
 87 | 	// Enqueue broadcast message for delivery in causal order.
 88 | 	//println(n.self, n.tmpl.Step, "receiveCausal from", msg.From,
 89 | 	//	"type", msg.Typ, "seq", msg.Seq,
 90 | 	//	"vec", fmt.Sprintf("%v", msg.Vec))
 91 | 	//if len(n.oom[msg.From]) <= msg.Seq - n.mat[n.self][msg.From] - 1000 {
 92 | 	//	panic("huge jump")
 93 | 	//}
 94 | 	for len(n.oom[msg.From]) <= msg.Seq-n.mat[n.self][msg.From] {
 95 | 		n.oom[msg.From] = append(n.oom[msg.From], nil)
 96 | 	}
 97 | 	n.oom[msg.From][msg.Seq-n.mat[n.self][msg.From]] = msg
 98 | 
 99 | 	// Deliver whatever messages we can consistently with causal order.
100 | 	for progress := true; progress; {
101 | 		progress = false
102 | 		for i := range n.peer {
103 | 			progress = progress || n.deliverCausal(i)
104 | 		}
105 | 	}
106 | }
107 | 
108 | // Try to deliver out-of-order messages held from a given peer.
109 | // Returns true if we made progress, false if nothing to  do for this peer.
110 | func (n *Node) deliverCausal(peer int) bool {
111 | 	if len(n.oom[peer]) == 0 || n.oom[peer][0] == nil ||
112 | 		!n.oom[peer][0].Vec.le(n.mat[n.self]) {
113 | 		return false
114 | 	}
115 | 
116 | 	// Log the message now that it's in causal order.
117 | 	//println(n.self, n.tmpl.Step, "enqueueCausal",
118 | 	//	"deliver type", msg.Typ,
119 | 	//	"seq", msg.Seq, "#oom", len(n.oom[i]))
120 | 	msg := n.oom[peer][0]
121 | 	n.logCausal(peer, msg)
122 | 
123 | 	// Remove it from this peer's out-of-order message queue.
124 | 	n.oom[peer] = n.oom[peer][1:]
125 | 
126 | 	// Deliver the message to upper layers.
127 | 	n.receiveTLC(msg)
128 | 
129 | 	return true // made progress
130 | }
131 | 
132 | // Initialize the causality and higher layer state for a node.
133 | func (n *Node) initCausal() {
134 | 	n.mat = make([]vec, len(n.peer))
135 | 	n.oom = make([][]*Message, len(n.peer))
136 | 	n.seqLog = make([][]*Message, len(n.peer))
137 | 	n.saw = make([]set, len(n.peer))
138 | 	n.wit = make([]set, len(n.peer))
139 | 	for i := range n.peer {
140 | 		n.mat[i] = make(vec, len(n.peer))
141 | 		n.saw[i] = make(set)
142 | 		n.wit[i] = make(set)
143 | 	}
144 | 
145 | 	n.initTLC()
146 | }
147 | 


--------------------------------------------------------------------------------
/go/dist/dist_test.go:
--------------------------------------------------------------------------------
  1 | package dist
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"crypto/ecdsa"
  7 | 	"crypto/elliptic"
  8 | 	crand "crypto/rand"
  9 | 	"crypto/tls"
 10 | 	"crypto/x509"
 11 | 	"encoding/gob"
 12 | 	"encoding/json"
 13 | 	"encoding/pem"
 14 | 	"fmt"
 15 | 	"io"
 16 | 	"math/big"
 17 | 	mrand "math/rand"
 18 | 	"net"
 19 | 	"os"
 20 | 	"os/exec"
 21 | 	"sync"
 22 | 	"testing"
 23 | 	"time"
 24 | )
 25 | 
 26 | // MaxSteps to take
 27 | var MaxSteps int
 28 | 
 29 | // Maximum random delays to add to message deliveries for testing
 30 | var MaxSleep time.Duration
 31 | 
 32 | // Whether to run consensus among multiple separate processes
 33 | var MultiProcess = true
 34 | 
 35 | // Whether to use TLS encryption and authentication atop TCP
 36 | var UseTLS = true
 37 | 
 38 | // Information about each virtual host passed to child processes via JSON
 39 | type testHost struct {
 40 | 	Name string // Virtual host name
 41 | 	Addr string // Host IP address and TCP port
 42 | 	Cert []byte // Host's self-signed x509 certificate
 43 | }
 44 | 
 45 | // Configuration information each child goroutine or process needs to launch
 46 | type testConfig struct {
 47 | 	Self     int    // Which participant number we are
 48 | 	Nnodes   int    // Total number of participants
 49 | 	HostName string // This child's virtual hostname
 50 | 
 51 | 	MaxSteps  int
 52 | 	MaxTicket int32
 53 | 	MaxSleep  time.Duration
 54 | }
 55 | 
 56 | func TestQSC(t *testing.T) {
 57 | 
 58 | 	testCase(t, 1, 1, 10000, 0, 0) // Trivial case: 1 of 1 consensus!
 59 | 	testCase(t, 2, 2, 10000, 0, 0) // Another trivial case: 2 of 2
 60 | 
 61 | 	testCase(t, 2, 3, 1000, 0, 0) // Standard f=1 case
 62 | 	testCase(t, 3, 5, 1000, 0, 0) // Standard f=2 case
 63 | 	testCase(t, 4, 7, 100, 0, 0)  // Standard f=3 case
 64 | 	testCase(t, 5, 9, 100, 0, 0)  // Standard f=4 case
 65 | 	testCase(t, 11, 21, 20, 0, 0) // Standard f=10 case
 66 | 	//testCase(t, 101, 201, 10, 0, 0) // Standard f=100 case - blows up
 67 | 
 68 | 	testCase(t, 3, 3, 100, 0, 0) // Larger-than-minimum thresholds
 69 | 	testCase(t, 6, 7, 100, 0, 0)
 70 | 	testCase(t, 9, 10, 100, 0, 0)
 71 | 
 72 | 	// Test with low-entropy tickets:
 73 | 	// commit success rate will be bad, but still must remain safe!
 74 | 	testCase(t, 2, 3, 10, 1, 0)  // Limit case: will never commit
 75 | 	testCase(t, 2, 3, 100, 2, 0) // Extreme low-entropy: rarely commits
 76 | 	testCase(t, 2, 3, 100, 3, 0) // A bit better bit still bad...
 77 | 
 78 | 	// Test with random delays inserted
 79 | 	testCase(t, 2, 3, 100, 0, 1*time.Nanosecond)
 80 | 	testCase(t, 2, 3, 100, 0, 1*time.Microsecond)
 81 | 	testCase(t, 2, 3, 100, 0, 1*time.Millisecond)
 82 | 	testCase(t, 4, 7, 100, 0, 1*time.Microsecond)
 83 | 	testCase(t, 4, 7, 100, 0, 1*time.Millisecond)
 84 | }
 85 | 
 86 | func testCase(t *testing.T, threshold, nnodes, maxSteps, maxTicket int,
 87 | 	maxSleep time.Duration) {
 88 | 
 89 | 	if maxTicket == 0 { // Default to moderate-entropy tickets
 90 | 		maxTicket = 10 * nnodes
 91 | 	}
 92 | 
 93 | 	desc := fmt.Sprintf("T=%v,N=%v,Steps=%v,Tickets=%v,Sleep=%v",
 94 | 		threshold, nnodes, maxSteps, maxTicket, maxSleep)
 95 | 	t.Run(desc, func(t *testing.T) {
 96 | 
 97 | 		// Configure and run the test case.
 98 | 		MaxSteps = maxSteps
 99 | 		MaxTicket = int32(maxTicket)
100 | 		MaxSleep = maxSleep
101 | 		Threshold = threshold
102 | 
103 | 		testExec(t, threshold, nnodes)
104 | 	})
105 | }
106 | 
107 | func testExec(t *testing.T, threshold, nnodes int) {
108 | 
109 | 	// Create a cancelable context in which to execute helper processes
110 | 	ctx, cancel := context.WithCancel(context.Background())
111 | 	defer cancel() // kill child processes
112 | 
113 | 	// Create a public/private keypair and self-signed cert for each node.
114 | 	conf := make([]testConfig, nnodes) // each node's config information
115 | 	for i := range conf {
116 | 		conf[i].Self = i
117 | 		conf[i].Nnodes = nnodes
118 | 		conf[i].HostName = fmt.Sprintf("host%v", i)
119 | 		conf[i].MaxSteps = MaxSteps
120 | 		conf[i].MaxTicket = MaxTicket
121 | 		conf[i].MaxSleep = MaxSleep
122 | 	}
123 | 
124 | 	// Start the per-node child processes,
125 | 	// and gather network addresses and certificates from each one.
126 | 	childGroup := &sync.WaitGroup{}
127 | 	host := make([]testHost, nnodes)
128 | 	enc := make([]*json.Encoder, nnodes)
129 | 	dec := make([]*json.Decoder, nnodes)
130 | 	for i := range host {
131 | 
132 | 		childGroup.Add(1)
133 | 		childIn, childOut := testExecChild(ctx, &conf[i], t, childGroup)
134 | 
135 | 		// We'll communicate with the child via JSON-encoded stdin/out
136 | 		enc[i] = json.NewEncoder(childIn)
137 | 		dec[i] = json.NewDecoder(childOut)
138 | 
139 | 		// Send the child its configuration information
140 | 		if err := enc[i].Encode(&conf[i]); err != nil {
141 | 			t.Fatalf("Encode: " + err.Error())
142 | 		}
143 | 
144 | 		// Get the network address the child is listening on
145 | 		if err := dec[i].Decode(&host[i]); err != nil {
146 | 			t.Fatalf("Decode: %v", err.Error())
147 | 		}
148 | 		if host[i].Name != conf[i].HostName { // sanity check
149 | 			panic("hostname mismatch")
150 | 		}
151 | 		//println("child", i, "listening on", host[i].Addr)
152 | 	}
153 | 
154 | 	// Send the array of addresses to all the child processes
155 | 	for i := range host {
156 | 		if err := enc[i].Encode(host); err != nil {
157 | 			t.Fatalf("Encode: " + err.Error())
158 | 		}
159 | 	}
160 | 
161 | 	// Wait and collect the consensus histories of each child
162 | 	hist := make([][]choice, nnodes)
163 | 	for i := range host {
164 | 		if err := dec[i].Decode(&hist[i]); err != nil {
165 | 			t.Fatalf("Decode: %v", err.Error())
166 | 		}
167 | 	}
168 | 
169 | 	// Let all the children know they can exit
170 | 	for i := range host {
171 | 		if err := enc[i].Encode(struct{}{}); err != nil {
172 | 			t.Fatalf("Encode: " + err.Error())
173 | 		}
174 | 	}
175 | 
176 | 	// Wait for the helper processes to complete
177 | 	childGroup.Wait()
178 | }
179 | 
180 | // Exec a child as a separate process.
181 | func testExecChild(ctx context.Context, conf *testConfig, t *testing.T,
182 | 	grp *sync.WaitGroup) (io.Writer, io.Reader) {
183 | 
184 | 	if !MultiProcess {
185 | 		// Run a child as a separate goroutine in the same process.
186 | 		childInRd, childInWr := io.Pipe()
187 | 		childOutRd, childOutWr := io.Pipe()
188 | 		go func() {
189 | 			testChild(childInRd, childOutWr)
190 | 			grp.Done()
191 | 		}()
192 | 		return childInWr, childOutRd
193 | 	}
194 | 
195 | 	// Run the child as a separate helper process
196 | 	cmd := exec.CommandContext(ctx, os.Args[0],
197 | 		"-test.run=TestHelper")
198 | 	cmd.Env = append(os.Environ(), "TLC_HELPER=1")
199 | 
200 | 	// Arrange to send standard input to the child via pipe
201 | 	childIn, err := cmd.StdinPipe()
202 | 	if err != nil {
203 | 		t.Fatalf("StdinPipe: %v", err.Error())
204 | 	}
205 | 
206 | 	// Copy child's standard output to parent via pipe
207 | 	childOut, err := cmd.StdoutPipe()
208 | 	if err != nil {
209 | 		t.Fatalf("StdoutPipe: %v", err.Error())
210 | 	}
211 | 
212 | 	// Copy child's standard error to parent's standard error
213 | 	childErr, err := cmd.StderrPipe()
214 | 	if err != nil {
215 | 		t.Fatalf("StderrPipe: %v", err.Error())
216 | 	}
217 | 	go copyAll(os.Stderr, childErr)
218 | 
219 | 	// Start the command running
220 | 	if err := cmd.Start(); err != nil {
221 | 		t.Fatalf("cmd.Start: %v", err.Error())
222 | 	}
223 | 
224 | 	// Arrange to signal the provided WaitGroup when child terminates
225 | 	go func() {
226 | 		if err := cmd.Wait(); err != nil {
227 | 			t.Fatalf("cmd.Wait: %v", err.Error())
228 | 		}
229 | 		grp.Done()
230 | 	}()
231 | 
232 | 	return childIn, childOut
233 | }
234 | 
235 | func TestHelper(t *testing.T) {
236 | 
237 | 	if os.Getenv("TLC_HELPER") == "" {
238 | 		return // Do nothing except when called as a helper
239 | 	}
240 | 
241 | 	// Exit with error status if anything goes wrong.
242 | 	defer os.Exit(1)
243 | 
244 | 	testChild(os.Stdin, os.Stdout)
245 | 	os.Exit(0)
246 | }
247 | 
248 | func copyAll(dst io.Writer, src io.Reader) {
249 | 	if _, err := io.Copy(dst, src); err != nil {
250 | 		println("Copy: " + err.Error())
251 | 	}
252 | }
253 | 
254 | func createCert(hostName string) (certPemBytes, privPemBytes []byte) {
255 | 
256 | 	priv, err := ecdsa.GenerateKey(elliptic.P256(), crand.Reader)
257 | 	if err != nil {
258 | 		panic("createCert: " + err.Error())
259 | 	}
260 | 
261 | 	notBefore := time.Now()                         // valid starting now
262 | 	notAfter := notBefore.Add(365 * 24 * time.Hour) // valid for a year
263 | 	tmpl := x509.Certificate{
264 | 		NotBefore: notBefore,
265 | 		NotAfter:  notAfter,
266 | 		IsCA:      true,
267 | 		KeyUsage: x509.KeyUsageKeyEncipherment |
268 | 			x509.KeyUsageDigitalSignature |
269 | 			x509.KeyUsageCertSign,
270 | 		ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth,
271 | 			x509.ExtKeyUsageClientAuth},
272 | 		BasicConstraintsValid: true,
273 | 		DNSNames:              []string{hostName},
274 | 		SerialNumber:          big.NewInt(1),
275 | 	}
276 | 	certb, err := x509.CreateCertificate(crand.Reader, &tmpl, &tmpl,
277 | 		&priv.PublicKey, priv)
278 | 	if err != nil {
279 | 		panic("createCert: " + err.Error())
280 | 	}
281 | 
282 | 	cert, err := x509.ParseCertificate(certb)
283 | 	if err != nil {
284 | 		panic("ParseCertificate: " + err.Error())
285 | 	}
286 | 
287 | 	if err := cert.VerifyHostname(hostName); err != nil {
288 | 		panic("VerifyHostname: " + err.Error())
289 | 	}
290 | 
291 | 	// Sanity-check the certificate just to make sure it actually works.
292 | 	pool := x509.NewCertPool()
293 | 	pool.AddCert(cert)
294 | 	vo := x509.VerifyOptions{DNSName: hostName, Roots: pool}
295 | 	if _, err := cert.Verify(vo); err != nil {
296 | 		panic("Verify: " + err.Error())
297 | 	}
298 | 	//println("verified for", hostName)
299 | 
300 | 	// PEM-encode our certificate
301 | 	certPem := bytes.NewBuffer(nil)
302 | 	if err := pem.Encode(certPem, &pem.Block{Type: "CERTIFICATE",
303 | 		Bytes: certb}); err != nil {
304 | 		panic("pem.Encode: " + err.Error())
305 | 	}
306 | 
307 | 	// PEM-encode our private key
308 | 	privb, err := x509.MarshalECPrivateKey(priv)
309 | 	if err != nil {
310 | 		panic("x509.MarshalECPrivateKey: " + err.Error())
311 | 	}
312 | 	privPem := bytes.NewBuffer(nil)
313 | 	if err := pem.Encode(privPem, &pem.Block{Type: "EC PRIVATE KEY",
314 | 		Bytes: privb}); err != nil {
315 | 		panic("pem.Encode: " + err.Error())
316 | 	}
317 | 
318 | 	return certPem.Bytes(), privPem.Bytes()
319 | }
320 | 
321 | func testChild(in io.Reader, out io.Writer) {
322 | 
323 | 	// We'll use JSON over stdin/stdout to coordinate with our parent.
324 | 	dec := json.NewDecoder(in)
325 | 	enc := json.NewEncoder(out)
326 | 
327 | 	// Get the child process config information via JSON
328 | 	conf := testConfig{}
329 | 	if err := dec.Decode(&conf); err != nil {
330 | 		panic("Decode: " + err.Error())
331 | 	}
332 | 	self := conf.Self
333 | 	MaxSteps = conf.MaxSteps
334 | 	MaxTicket = conf.MaxTicket
335 | 	MaxSleep = conf.MaxSleep
336 | 
337 | 	// Initialize the node appropriately
338 | 	//println("self", self, "nnodes", conf.Nnodes)
339 | 	n := &Node{}
340 | 	n.init(self, make([]peer, conf.Nnodes))
341 | 	n.mutex.Lock() // keep node's TLC state locked until fully set up
342 | 
343 | 	// Create a TLS/TCP listen socket for this child
344 | 	tcpl, err := net.Listen("tcp", "")
345 | 	if err != nil {
346 | 		panic("Listen: " + err.Error())
347 | 	}
348 | 
349 | 	// Create an x509 certificate and private key for this child
350 | 	//println(self, "createCert for", conf.HostName)
351 | 	certb, privb := createCert(conf.HostName)
352 | 
353 | 	// Create a TLS certificate from it
354 | 	tlscert, err := tls.X509KeyPair(certb, privb)
355 | 	if err != nil {
356 | 		panic("tls.X509KeyPair: " + err.Error())
357 | 	}
358 | 
359 | 	// Report our network address and certificate to the parent process
360 | 	myHost := testHost{
361 | 		Name: conf.HostName,
362 | 		Addr: tcpl.Addr().String(),
363 | 		Cert: certb,
364 | 	}
365 | 	if err := enc.Encode(myHost); err != nil {
366 | 		panic("Encode: " + err.Error())
367 | 	}
368 | 
369 | 	// Get the list of all host names, addresses, and certs from the parent
370 | 	host := []testHost{}
371 | 	if err := dec.Decode(&host); err != nil {
372 | 		panic("Decode: " + err.Error())
373 | 	}
374 | 
375 | 	// Create a certificate pool containing all nodes' certificates
376 | 	pool := x509.NewCertPool()
377 | 	for i := range host {
378 | 		if !pool.AppendCertsFromPEM(host[i].Cert) {
379 | 			panic("failed to append cert from " + host[i].Name)
380 | 		}
381 | 	}
382 | 
383 | 	//println("hostName", conf.HostName, "pool", len(pool.Subjects()))
384 | 
385 | 	// Listen and accept TCP/TLS connections
386 | 	donegrp := &sync.WaitGroup{}
387 | 	go func() {
388 | 		for {
389 | 			// Accept a TCP connection
390 | 			tcpc, err := tcpl.Accept()
391 | 			if err != nil {
392 | 				panic("Accept: " + err.Error())
393 | 			}
394 | 
395 | 			// Launch a goroutine to process it
396 | 			donegrp.Add(1)
397 | 			go n.acceptNetwork(tcpc, &tls.Config{
398 | 				RootCAs:      pool,
399 | 				Certificates: []tls.Certificate{tlscert},
400 | 				ServerName:   conf.HostName,
401 | 				ClientAuth:   tls.RequireAndVerifyClientCert,
402 | 				ClientCAs:    pool,
403 | 			}, host, donegrp)
404 | 		}
405 | 	}()
406 | 
407 | 	// Open TCP and optionally TLS connections to each peer
408 | 	//println(self, "open TLS connections to", len(host), "peers")
409 | 	stepgrp := &sync.WaitGroup{}
410 | 	for i := range host {
411 | 		// Open an authenticated TLS connection to peer i
412 | 		peerConf := tls.Config{
413 | 			RootCAs:      pool,
414 | 			Certificates: []tls.Certificate{tlscert},
415 | 			ServerName:   conf.HostName,
416 | 			ClientAuth:   tls.RequireAndVerifyClientCert,
417 | 			ClientCAs:    pool,
418 | 		}
419 | 		peerConf.ServerName = host[i].Name
420 | 		//println(self, "Dial", host[i].Name, host[i].Addr)
421 | 		var conn net.Conn
422 | 		if UseTLS {
423 | 			conn, err = tls.Dial("tcp", host[i].Addr, &peerConf)
424 | 		} else {
425 | 			conn, err = net.Dial("tcp", host[i].Addr)
426 | 		}
427 | 		if err != nil {
428 | 			panic("Dial: " + err.Error())
429 | 		}
430 | 
431 | 		// Tell the server which client we are.
432 | 		enc := gob.NewEncoder(conn)
433 | 		if err := enc.Encode(self); err != nil {
434 | 			panic("gob.Encode: " + err.Error())
435 | 		}
436 | 
437 | 		// Set up a peer sender object.
438 | 		// It signals stepgrp.Done() after enough steps pass.
439 | 		stepgrp.Add(1)
440 | 		n.peer[i] = &testPeer{enc, stepgrp, conn}
441 | 	}
442 | 	//println(self, "opened TLS connections")
443 | 
444 | 	// Start the consensus test
445 | 	n.advanceTLC(0)
446 | 
447 | 	// Now we can let the receive goroutines process incoming messages
448 | 	n.mutex.Unlock()
449 | 
450 | 	// Wait to finish enough consensus rounds
451 | 	//println(self, "wait for test to complete")
452 | 	stepgrp.Wait()
453 | 
454 | 	// Report our observed consensus history to the parent
455 | 	if err := enc.Encode(n.choice); err != nil {
456 | 		panic("Encode: " + err.Error())
457 | 	}
458 | 
459 | 	// Finally, wait for our parent to signal when the test is complete.
460 | 	if err := dec.Decode(&struct{}{}); err != nil {
461 | 		panic("Decode: " + err.Error())
462 | 	}
463 | 
464 | 	//println(self, "child finished")
465 | }
466 | 
467 | // Accept a new TLS connection on a TCP server socket.
468 | func (n *Node) acceptNetwork(conn net.Conn, tlsConf *tls.Config,
469 | 	host []testHost, donegrp *sync.WaitGroup) {
470 | 
471 | 	// Enable TLS on the connection and run the handshake.
472 | 	if UseTLS {
473 | 		conn = tls.Server(conn, tlsConf)
474 | 	}
475 | 	defer func() { conn.Close() }()
476 | 
477 | 	// Receive the client's nodenumber indication
478 | 	dec := gob.NewDecoder(conn)
479 | 	var peer int
480 | 	if err := dec.Decode(&peer); err != nil {
481 | 		println(n.self, "acceptNetwork gob.Decode: "+err.Error())
482 | 		return
483 | 		//panic("acceptNetwork gob.Decode: " + err.Error())
484 | 	}
485 | 	if peer < 0 || peer >= len(host) {
486 | 		println("acceptNetwork: bad peer number")
487 | 		return
488 | 	}
489 | 
490 | 	// Authenticate the client with TLS.
491 | 	// XXX Why doesn't VerifyHostname work to verify a client auth?
492 | 	// Go TLS bug to report?
493 | 	//if err := tlsc.VerifyHostname(host[peer].Name); err != nil {
494 | 	//	panic("VerifyHostname: " + err.Error())
495 | 	//}
496 | 	if UseTLS {
497 | 		cs := conn.(*tls.Conn).ConnectionState()
498 | 		if len(cs.PeerCertificates) < 1 {
499 | 			println("acceptNetwork: no certificate from client")
500 | 			return
501 | 		}
502 | 		err := cs.PeerCertificates[0].VerifyHostname(host[peer].Name)
503 | 		if err != nil {
504 | 			println("VerifyHostname: " + err.Error())
505 | 			return
506 | 		}
507 | 	}
508 | 
509 | 	// Receive and process arriving messages
510 | 	n.runReceiveNetwork(peer, dec, donegrp)
511 | }
512 | 
513 | // Receive messages from a connection and dispatch them into the TLC stack.
514 | func (n *Node) runReceiveNetwork(peer int, dec *gob.Decoder,
515 | 	grp *sync.WaitGroup) {
516 | 	for {
517 | 		// Get next message from this peer
518 | 		msg := Message{}
519 | 		err := dec.Decode(&msg)
520 | 		if err == io.EOF {
521 | 			break
522 | 		} else if err != nil {
523 | 			panic("receiveCausal:" + err.Error())
524 | 		}
525 | 		//println(n.self, n.tmpl.Step, "runReceiveNetwork: recv from",
526 | 		//	msg.From, "type", msg.Typ, "seq", msg.Seq,
527 | 		//	"step", msg.Step)
528 | 
529 | 		// Optionally insert random delays on a message basis
530 | 		time.Sleep(time.Duration(mrand.Int63n(int64(MaxSleep + 1))))
531 | 
532 | 		grp.Add(1)
533 | 		go n.receiveNetwork(&msg, grp)
534 | 	}
535 | 	grp.Done() // signal that we're done
536 | }
537 | 
538 | func (n *Node) receiveNetwork(msg *Message, grp *sync.WaitGroup) {
539 | 
540 | 	// Keep the stack single-threaded.
541 | 	n.mutex.Lock()
542 | 	defer func() {
543 | 		n.mutex.Unlock()
544 | 		grp.Done()
545 | 	}()
546 | 
547 | 	// Dispatch up to the causal ordering layer
548 | 	//println(n.self, n.tmpl.Step, "receiveNetwork from", msg.From,
549 | 	//	"type", msg.Typ,  "seq", msg.Seq, "vec", len(msg.Vec))
550 | 	n.receiveCausal(msg)
551 | }
552 | 
553 | type testPeer struct {
554 | 	e *gob.Encoder
555 | 	w *sync.WaitGroup
556 | 	c io.Closer
557 | }
558 | 
559 | func (tp *testPeer) Send(msg *Message) {
560 | 	if tp.e != nil {
561 | 		//println("testPeer.Send seq", msg.Seq, "step", msg.Step,
562 | 		//	"MaxSteps", MaxSteps)
563 | 		if err := tp.e.Encode(msg); err != nil {
564 | 			println("Encode:", err.Error())
565 | 		}
566 | 	}
567 | 	if tp.w != nil && MaxSteps > 1 && msg.Step >= MaxSteps {
568 | 		//println("testPeer.Send done")
569 | 		tp.w.Done()
570 | 		tp.w = nil
571 | 	}
572 | }
573 | 


--------------------------------------------------------------------------------
/go/dist/doc.go:
--------------------------------------------------------------------------------
1 | // Package dist implements a minimalistic distributed implementation
2 | // of TLC and QSC for the non-Byzantine (fail-stop) threat model.
3 | // It uses TLS/TCP for communication, gob encoding for serialization, and
4 | // vector time and a basic causal ordering protocol using vector time.
5 | package dist
6 | 


--------------------------------------------------------------------------------
/go/dist/node.go:
--------------------------------------------------------------------------------
 1 | package dist
 2 | 
 3 | import (
 4 | 	"sync"
 5 | )
 6 | 
 7 | // Threshold is the TLC and consensus threshold
 8 | var Threshold int
 9 | 
10 | // MaxTicket is the Amount of entropy in lottery tickets
11 | var MaxTicket int32 = 100
12 | 
13 | // Type of message
14 | type Type int
15 | 
16 | const (
17 | 	// Prop is a raw unwitnessed proposal
18 | 	Prop Type = iota
19 | 	// Ack is an acknowledgment of a proposal
20 | 	Ack
21 | 	// Wit is a threshold witness confirmation of proposal
22 | 	Wit
23 | )
24 | 
25 | // Message over the network
26 | type Message struct {
27 | 	// Network/peering layer
28 | 
29 | 	// From designates the node which originally sent this message
30 | 	From int
31 | 
32 | 	// Causality layer
33 | 	// Seq is the Node-local sequence number for vector time
34 | 	Seq int
35 | 	// Vev is the Vector clock update from sender node
36 | 	Vec vec
37 | 
38 | 	// Threshold time (TLC) layer
39 | 	// Step is the logical time step this message is for
40 | 	Step int
41 | 	// typ is the message type
42 | 	Typ Type
43 | 	// Prop is the proposal Seq this Ack or Wit is about
44 | 	Prop int
45 | 	// Ticket is the genetic fitness ticket for this proposal
46 | 	Ticket int32
47 | }
48 | 
49 | // Node definition
50 | type Node struct {
51 | 	// Network/peering layer
52 | 	self  int        // This node's participant number
53 | 	peer  []peer     // How to send messages to each peer
54 | 	mutex sync.Mutex // Mutex protecting node's protocol stack
55 | 
56 | 	// Causal history layer
57 | 	mat    []vec        // Node's current matrix clock
58 | 	oom    [][]*Message // Out-of-order messages not yet delivered
59 | 	seqLog [][]*Message // Nodes' message received and delivered by seq
60 | 	saw    []set        // Messages each node saw recently
61 | 	wit    []set        // Witnessed messages each node saw recently
62 | 
63 | 	// Threshold time (TLC) layer
64 | 	tmpl    Message      // Template for messages we send
65 | 	save    int          // Earliest step for which we maintain history
66 | 	acks    set          // Acknowledgments we've received in this step
67 | 	wits    set          // Threshold witnessed messages seen this step
68 | 	stepLog [][]logEntry // Nodes' messages seen by start of recent steps
69 | 
70 | 	// This node's record of QSC consensus history
71 | 	choice []choice // Best proposal this node chose each round
72 | }
73 | 
74 | type peer interface {
75 | 	Send(msg *Message)
76 | }
77 | 
78 | // Info each node logs about other nodes' views at the start of each time-step
79 | type logEntry struct {
80 | 	saw set // All nodes' messages the node had seen by then
81 | 	wit set // Threshold witnessed messages it had seen
82 | }
83 | 
84 | // Record of one node's QSC decision in one time-step
85 | type choice struct {
86 | 	best   int  // Best proposal this node chose in this round
87 | 	commit bool // Whether node observed successful commitment
88 | }
89 | 
90 | func (n *Node) init(self int, peer []peer) {
91 | 	n.self = self
92 | 	n.peer = peer
93 | 
94 | 	n.initCausal()
95 | 	n.initTLC()
96 | }
97 | 


--------------------------------------------------------------------------------
/go/dist/qsc.go:
--------------------------------------------------------------------------------
 1 | package dist
 2 | 
 3 | // RoundSteps is three because the witnessed QSC requires three TLC
 4 | // time-steps per consensus round.
 5 | const RoundSteps = 3
 6 | 
 7 | // The TLC layer upcalls this method on advancing to a new time-step,
 8 | // with sets of proposals seen (saw) and threshold witnessed (wit) recently.
 9 | func (n *Node) advanceQSC(saw, wit set) {
10 | 	//println(n.self, n.tmpl.Step, "advanceQSC saw", len(saw),
11 | 	//	"wit", len(wit))
12 | 
13 | 	// Calculate the starting step of the round that's just now completing.
14 | 	s := n.tmpl.Step - RoundSteps
15 | 	if s < 0 {
16 | 		return // Nothing to be done until the first round completes
17 | 	}
18 | 
19 | 	// Find the best eligible proposal that was broadcast at s+0
20 | 	// and that is in our view by the end of the round at s+3.
21 | 	var bestProp *Message
22 | 	var bestTicket int32
23 | 	for p := range wit {
24 | 		if p.Typ != Prop {
25 | 			panic("wit should contain only proposals")
26 | 		}
27 | 		if p.Step == s+0 && p.Ticket >= bestTicket {
28 | 			bestProp = p
29 | 			bestTicket = p.Ticket
30 | 		}
31 | 	}
32 | 
33 | 	// Determine if we can consider this proposal permanently committed.
34 | 	spoiled := n.spoiledQSC(s, saw, bestProp, bestTicket)
35 | 	reconfirmed := n.reconfirmedQSC(s, wit, bestProp)
36 | 	committed := !spoiled && reconfirmed
37 | 
38 | 	// Record the consensus results for this round (from s to s+3).
39 | 	n.choice = append(n.choice, choice{bestProp.From, committed})
40 | 	//println(n.self, n.tmpl.Step, "choice", bestProp.From, "spoiled", spoiled,
41 | 	// "reconfirmed", reconfirmed, "committed", committed)
42 | 
43 | 	// Don't bother saving history before the start of the next round.
44 | 	n.save = s + 1
45 | }
46 | 
47 | // Return true if there's another proposal competitive with a given candidate.
48 | func (n *Node) spoiledQSC(s int, saw set, prop *Message, ticket int32) bool {
49 | 	for p := range saw {
50 | 		if p.Step == s+0 && p.Typ == Prop && p != prop &&
51 | 			p.Ticket >= ticket {
52 | 			return true // victory spoiled by competition!
53 | 		}
54 | 	}
55 | 	return false
56 | }
57 | 
58 | // Return true if given proposal was doubly confirmed (reconfirmed).
59 | func (n *Node) reconfirmedQSC(s int, wit set, prop *Message) bool {
60 | 	for p := range wit { // search for a paparazzi witness at s+1
61 | 		if p.Step == s+1 && n.stepLog[p.From][s+1].wit.has(prop) {
62 | 			return true
63 | 		}
64 | 	}
65 | 	return false
66 | }
67 | 


--------------------------------------------------------------------------------
/go/dist/set.go:
--------------------------------------------------------------------------------
 1 | package dist
 2 | 
 3 | // Use a map to represent a set of messages
 4 | type set map[*Message]struct{}
 5 | 
 6 | // Test if msg is in set s.
 7 | func (s set) has(msg *Message) bool {
 8 | 	_, present := s[msg]
 9 | 	return present
10 | }
11 | 
12 | // Add msg to set s.
13 | func (s set) add(msg *Message) {
14 | 	s[msg] = struct{}{}
15 | }
16 | 
17 | // Return a copy of message set s,
18 | // dropping any messages before earliest.
19 | func (s set) copy(earliest int) set {
20 | 	n := make(set)
21 | 	for k, v := range s {
22 | 		if k.Step >= earliest {
23 | 			n[k] = v
24 | 		}
25 | 	}
26 | 	return n
27 | }
28 | 


--------------------------------------------------------------------------------
/go/dist/tlc.go:
--------------------------------------------------------------------------------
  1 | package dist
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | )
  6 | 
  7 | // Initialize the TLC layer state in a Node
  8 | func (n *Node) initTLC() {
  9 | 	n.tmpl = Message{From: n.self, Step: -1}
 10 | 	n.stepLog = make([][]logEntry, len(n.peer))
 11 | }
 12 | 
 13 | // Broadcast a copy of our current message template to all nodes
 14 | func (n *Node) broadcastTLC() *Message {
 15 | 
 16 | 	//println(n.self, n.tmpl.Step, "broadcast", msg, "typ", msg.Typ)
 17 | 	msg := n.tmpl
 18 | 	n.broadcastCausal(&msg)
 19 | 	return &msg
 20 | }
 21 | 
 22 | // Unicast an acknowledgment of a given proposal to its sender
 23 | func (n *Node) acknowledgeTLC(prop *Message) {
 24 | 
 25 | 	msg := n.tmpl
 26 | 	msg.Typ = Ack
 27 | 	msg.Prop = prop.Seq
 28 | 	n.sendCausal(prop.From, &msg)
 29 | }
 30 | 
 31 | // Advance to a new time step.
 32 | func (n *Node) advanceTLC(step int) {
 33 | 	//println(n.self, step, "advanceTLC",
 34 | 	//	"saw", len(n.saw[n.self]), "wit", len(n.wit[n.self]))
 35 | 
 36 | 	// Initialize our message template for new time step
 37 | 	n.tmpl.Step = step                     // Advance to new time step
 38 | 	n.tmpl.Typ = Prop                      // Raw unwitnessed proposal message initially
 39 | 	n.tmpl.Ticket = rand.Int31n(MaxTicket) // Choose a ticket
 40 | 
 41 | 	n.acks = make(set) // No acknowledgments received yet in this step
 42 | 	n.wits = make(set) // No threshold witnessed messages received yet
 43 | 
 44 | 	// Notify the upper (QSC) layer of the advancement of time,
 45 | 	// and let it fill in its part of the new message to broadcast.
 46 | 	n.advanceQSC(n.saw[n.self], n.wit[n.self])
 47 | 
 48 | 	prop := n.broadcastTLC() // broadcast our raw proposal
 49 | 	n.tmpl.Prop = prop.Seq   // save proposal's sequence number
 50 | 	n.acks.add(prop)         // automatically self-acknowledge  it
 51 | }
 52 | 
 53 | func (n *Node) receiveTLC(msg *Message) {
 54 | 
 55 | 	// Now process this message according to type.
 56 | 	//println(n.self, n.tmpl.Step, "receivedTLC from", msg.From,
 57 | 	//	"step", msg.Step, "typ", msg.Typ)
 58 | 	switch msg.Typ {
 59 | 	case Prop: // A raw unwitnessed proposal broadcast.
 60 | 
 61 | 		// Record the set of messages this node had seen
 62 | 		// by the time it advanced to this new time-step.
 63 | 		if len(n.stepLog[msg.From]) != msg.Step {
 64 | 			panic("out of sync")
 65 | 		}
 66 | 		n.stepLog[msg.From] = append(n.stepLog[msg.From],
 67 | 			logEntry{n.saw[msg.From], n.wit[msg.From]})
 68 | 
 69 | 		// Continue from pruned copies in the next time step
 70 | 		n.saw[msg.From] = n.saw[msg.From].copy(n.save)
 71 | 		n.wit[msg.From] = n.wit[msg.From].copy(n.save)
 72 | 
 73 | 		if msg.Step == n.tmpl.Step {
 74 | 			//println(n.self, n.tmpl.Step, "ack", msg.From)
 75 | 			n.acknowledgeTLC(msg)
 76 | 		}
 77 | 
 78 | 	case Ack: // An acknowledgment. Collect a threshold of acknowledgments.
 79 | 		if msg.Prop == n.tmpl.Prop { // only if it acks our proposal
 80 | 			n.acks.add(msg)
 81 | 			//println(n.self, n.tmpl.Step,  "got ack", len(n.acks))
 82 | 			if n.tmpl.Typ == Prop && len(n.acks) >= Threshold {
 83 | 
 84 | 				// Broadcast a threshold-witnesed certification
 85 | 				n.tmpl.Typ = Wit
 86 | 				n.broadcastTLC()
 87 | 			}
 88 | 		}
 89 | 
 90 | 	case Wit: // A threshold-witnessed message. Collect a threshold of them.
 91 | 		prop := n.seqLog[msg.From][msg.Prop]
 92 | 		if prop.Typ != Prop {
 93 | 			panic("doesn't refer to a proposal!")
 94 | 		}
 95 | 		if msg.Step == n.tmpl.Step {
 96 | 
 97 | 			// Collect a threshold of Wit witnessed messages.
 98 | 			n.wits.add(prop) // witnessed messages in this step
 99 | 			if len(n.wits) >= Threshold {
100 | 
101 | 				// We've met the condition to advance time.
102 | 				n.advanceTLC(n.tmpl.Step + 1)
103 | 			}
104 | 		}
105 | 	}
106 | }
107 | 


--------------------------------------------------------------------------------
/go/dist/vec.go:
--------------------------------------------------------------------------------
 1 | package dist
 2 | 
 3 | // Vector timestemp
 4 | type vec []int
 5 | 
 6 | // Return a copy of this vector
 7 | func (v vec) copy() vec {
 8 | 	return append(vec{}, v...)
 9 | }
10 | 
11 | // Return true if vector timestamp v is causally before or equal to y.
12 | func (v vec) le(y vec) bool {
13 | 	for i := range v {
14 | 		if v[i] > y[i] {
15 | 			return false
16 | 		}
17 | 	}
18 | 	return true
19 | }
20 | 
21 | // Set v to the elementwise maximum of vectors x and y.
22 | // Inputs x and/or y can be the same as target v.
23 | func (v vec) max(x, y vec) {
24 | 	for i := range v {
25 | 		if x[i] > y[i] {
26 | 			v[i] = x[i]
27 | 		} else {
28 | 			v[i] = y[i]
29 | 		}
30 | 	}
31 | }
32 | 
33 | //func (v vec) String()  {
34 | //	fmt.Sprintf("%v", []int(v))
35 | //}
36 | 


--------------------------------------------------------------------------------
/go/lib/backoff/retry.go:
--------------------------------------------------------------------------------
 1 | // This package converts errors into time delays via random exponential backoff.
 2 | // It is designed to be extremely simple to use but robust and automatic.
 3 | //
 4 | package backoff
 5 | 
 6 | import (
 7 | 	"context"
 8 | 	"log"
 9 | 	"math/rand"
10 | 	"time"
11 | )
12 | 
13 | // Retry calls try() repeatedly until it returns without an error,
14 | // with the default exponential backoff configuration.
15 | //
16 | // By default, Retry continues to try forever until it succeeds.
17 | // The caller may pass a cancelable context in the ctx parameter, however,
18 | // in case Retry will give up calling try when the context is cancelled.
19 | // If the context was already cancelled on the call to Retry,
20 | // then Retry returns ctx.Err() immediately without calling try.
21 | //
22 | func Retry(ctx context.Context, try func() error) error {
23 | 	return Config{}.Retry(ctx, try)
24 | }
25 | 
26 | // Config represents configuration parameters for exponential backoff.
27 | // To use, initialize a Config structure with the desired parameters
28 | // and then call Config.Retry().
29 | //
30 | // Report, if non-nil, is a function called by Retry to report errors
31 | // in an appropriate fashion specific to the application.
32 | // If nil, Retry reports errors via log.Println by default.
33 | // Report may also return a non-nil error to abort the Retry loop if it
34 | // determines that the detected error is permanent and waiting will not help.
35 | //
36 | type Config struct {
37 | 	Report  func(error) error // Function to report errors
38 | 	MaxWait time.Duration     // Maximum backoff wait period
39 | 
40 | 	mayGrow struct{} // Ensure Config remains extensible
41 | }
42 | 
43 | func defaultReport(err error) error {
44 | 	log.Println(err.Error())
45 | 	return nil
46 | }
47 | 
48 | // Retry calls try() repeatedly until it returns without an error,
49 | // using exponential backoff configuration c.
50 | func (c Config) Retry(ctx context.Context, try func() error) error {
51 | 
52 | 	// Make sure we have a valid error reporter
53 | 	if c.Report == nil {
54 | 		c.Report = defaultReport
55 | 	}
56 | 
57 | 	// Return immediately if ctx was already cancelled
58 | 	if ctx.Err() != nil {
59 | 		return ctx.Err()
60 | 	}
61 | 
62 | 	backoff := time.Duration(1) // minimum backoff duration
63 | 	for {
64 | 		before := time.Now()
65 | 		err := try()
66 | 		if err == nil { // success
67 | 			return nil
68 | 		}
69 | 		elapsed := time.Since(before)
70 | 
71 | 		// Report the error as appropriate
72 | 		err = c.Report(err)
73 | 		if err != nil {
74 | 			return err // abort the retry loop
75 | 		}
76 | 
77 | 		// Wait for an exponentially-growing random backoff period,
78 | 		// with the duration of each operation attempt as the minimum
79 | 		if backoff <= elapsed {
80 | 			backoff = elapsed
81 | 		}
82 | 		backoff += time.Duration(rand.Int63n(int64(backoff)))
83 | 		if c.MaxWait > 0 && backoff > c.MaxWait {
84 | 			backoff = c.MaxWait
85 | 		}
86 | 
87 | 		// Wait for either the backoff timer or a cancel signal.
88 | 		t := time.NewTimer(backoff)
89 | 		select {
90 | 		case <-t.C: // Backoff timer expired
91 | 			continue
92 | 
93 | 		case <-ctx.Done(): // Our context got cancelled
94 | 			t.Stop()
95 | 			return ctx.Err()
96 | 		}
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/go/lib/backoff/retry_test.go:
--------------------------------------------------------------------------------
 1 | package backoff
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"testing"
 8 | 	"time"
 9 | )
10 | 
11 | func TestRetry(t *testing.T) {
12 | 
13 | 	n := 0
14 | 	try := func() error {
15 | 		n++
16 | 		if n < 30 {
17 | 			return errors.New(fmt.Sprintf("test error %d", n))
18 | 		}
19 | 		return nil
20 | 	}
21 | 	Retry(context.Background(), try)
22 | }
23 | 
24 | func TestTimeout(t *testing.T) {
25 | 
26 | 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
27 | 	try := func() error {
28 | 		return errors.New("haha, never going to succeed")
29 | 	}
30 | 	if err := Retry(ctx, try); err != context.DeadlineExceeded {
31 | 		t.Errorf("got wrong error from Retry: %v", err.Error())
32 | 	}
33 | 
34 | 	// Now test with an already-cancelled context
35 | 	try = func() error {
36 | 		panic("shouldn't get here!")
37 | 	}
38 | 	if err := Retry(ctx, try); err != context.DeadlineExceeded {
39 | 		t.Errorf("got wrong error from Retry: %v", err.Error())
40 | 	}
41 | 
42 | 	// for good measure
43 | 	cancel()
44 | }
45 | 


--------------------------------------------------------------------------------
/go/lib/backoff/rfq/doc.go:
--------------------------------------------------------------------------------
 1 | // Package rfq implements responsively-fair queueing (RFQ).
 2 | // or distributed queueing? RFDQ
 3 | //
 4 | // If a server has limited resources to serve a potentially-unlimited
 5 | // number of clients (especially in a flash crowd or DDoS attack setting),
 6 | // we wish to allocate the server's limited resources fairly among clients,
 7 | // so that (for example) fast clients cannot indefinitely starve slower ones.
 8 | // This definition of fairness implies wait-freedom, i.e., lack of starvation.
 9 | // We would like the server to be able to serve an arbitrary number of clients
10 | // in a fair or at least starvation-free way, with only constant server state.
11 | //
12 | // One approach is for the server to organize the clients into a literal queue,
13 | // with each client responsible for remembering who is next in the queue,
14 | // so the space required for next pointers is distributed among the clients.
15 | // This is what queue-based multiprocessor shared memory locking algorithms do.
16 | // But it works only when the clients are perfectly reliable and trustworthy:
17 | // if not, a single crashed client breaks the chain
18 | // and leaves all clients waiting behind it "dangling" and blocked forever
19 | // (at last without introducing timeouts or similar recovery mechanisms).
20 | //
21 | // Another baseline approach is to have all clients run a backoff algorithm
22 | // when they submit a request that the server must reject due to a full queue.
23 | // This approach might be statistically fair and starvation-free
24 | // if all the clients have similar processing speed and connectivity,
25 | // but clients that are much faster than others can starve slow clients.
26 | // This is because if some number of fast clients can saturate the server,
27 | // re-filling the server's queue only a brief moment after space opens up,
28 | // and a slow client's network round-trip time and/or client-side delay
29 | // add up to significantly more than the server's work-item processing time,
30 | // then each time the slow client attempts to retry it will always find
31 | // that the server's queue has already been filled again by the fast clients.
32 | //
33 | // A next-step solution to ensure approximate fairness across all clients
34 | // would be for the server to propagate maximum backoff delays among clients.
35 | // For example, suppose a slow client attempts to submit a request,
36 | // is rejected due to a full server queue, resubmits it t ms later,
37 | // and is again rejected, increasing its backoff timer to 2t ms.
38 | // If the original t ms value was dominated by client-side or network delays,
39 | // and the work-item processing times for fast clients is significantly less,
40 | // then with independent backoff delays the slow client will be starved.
41 | // But if the server notices that the slow client has backed off to 2ms,
42 | // and in response forces *all* clients to use a maximum backoff of 2ms
43 | // until the slow client's request has been satisfied,
44 | // then the slow client will no longer be starved
45 | // and allocation of the server's resources will be approximately fair.
46 | //
47 | // This approach fails to be responsive, however: the server's response times
48 | // to fast clients are slowed to that of the slowest client at a given time.
49 | // This approach can also greatly underutilize the server's resources:
50 | // the server may be perfectly able to process many work-items every 2ms,
51 | // but has slowed itself down to the rate of the slowest client for fairness.
52 | // Pursuing such strong fairness also creates DoS attack vectors,
53 | // since it is trivial for a misbehaved client simply to pretend to be slow.
54 | // In practice we cannot achieve both perfect fairness and responsiveness:
55 | // utilizing the server's full capacity to service fast clients quickly
56 | // inherently means that fast clients obtain more resources than slow clients.
57 | // But we would still like to be "reasonably" fair while also responsive,
58 | // and particualrly to ensure that no client, however slow, is starved.
59 | //
60 | // RFQ thus provides "responsively-fair queueing",
61 | // which ensures statistical fairness among clients that see similar delays,
62 | // and similarly ensures fairness among clients in different delay classes.
63 | // ...
64 | //
65 | // Server has a limited internal queue, which it keeps sorted
66 | // oldest-request-first as judged by the server's own clock.
67 | // An externally-queued request can bump an internally-queued request
68 | // if the server has previously outsourced it to the client and forgotten it
69 | // but its approximate service time has arrived and the client resubmitted it.
70 | //
71 | // Tolerating misbehaving (Byzantine) clients:
72 | // use server-side MAC (and optionally encryption) to protect the state
73 | // the server outsources to clients.
74 | //
75 | // Issue: replay attacks, since server doesn't have storage to remember
76 | // which tokens have and haven't been "used" or how many times.
77 | // Full processing might reveal and neutralize the effect of a replay --
78 | // e.g., where a cryptocurrency server finds a UTXO was already spent --
79 | // but full processing might be significantly more costly in resources.
80 | // One simple defense is to have the server keep a record (e.g., hash table)
81 | // of all the tokens that have been processed within some past epoch.
82 | // The server can then detect and trivially discard replays within one epoch,
83 | // thereby rate-limiting the effectiveness of token replays to one per epoch.
84 | // This takes storage linear in the epoch length and server's processing rate,
85 | // but is independent of the number clients contending to submit requests.
86 | //
87 | // If it is acceptable to impose a maximum round-trip delay on any client,
88 | // denying service to clients that can't resubmit a request within one epoch,
89 | // then the server can presume requests from earlier epochs to be replays
90 | // and reject them unconditionally, thereby eliminating replay attacks.
91 | //
92 | package rfq
93 | 


--------------------------------------------------------------------------------
/go/lib/cas/cas.go:
--------------------------------------------------------------------------------
 1 | // Package cas defines a simple compare-and-set (CAS) state interface.
 2 | // It defines a generic access interface called Store,
 3 | // and a simple in-memory CAS register called Register.
 4 | //
 5 | package cas
 6 | 
 7 | import (
 8 | 	"context"
 9 | 	"sync"
10 | )
11 | 
12 | // Store defines a CAS storage abstraction via a single CompareAndSet method.
13 | //
14 | // CompareAndSet writes a proposed new value to the state,
15 | // provided the state still has the specified old value.
16 | // The compare and conditional write are guaranteed to be atomic,
17 | // ensuring that the caller can avoid undetected state loss due to races.
18 | // CompareAndSet then reads and returns the latest actual state value.
19 | //
20 | // State values are arbitrary opaque Go strings, and may contain binary data.
21 | // While values in principle have no particular length limit, in practice
22 | // Store implementations may expect them to be "reasonably small", i.e.,
23 | // efficient for storing metadata but not necessarily for bulk data storage.
24 | //
25 | // The Store assigns a version number to each value CompareAndSet returns.
26 | // Version numbers must be monotonic but need not be assigned consecutively.
27 | // The version number must increase when the stored value changes,
28 | // and may increase at other times even when the value hasn't changed.
29 | // The caller may simply ignore the version numbers CompareAndSet returns,
30 | // or may use them for consistency-checking and debugging:
31 | // see the Checked wrapper function in the test subpackage for example.
32 | // Version numbers do not impose a burden on Store interface implementations,
33 | // in part because it's easy to adapt a non-versioned underlying CAS interface
34 | // with a simple wrapper that attaches a version number to each proposed value.
35 | //
36 | // CompareAndSet takes a Context parameter so that long-running implementations,
37 | // particularly those accessing remote storage in a distributed system,
38 | // can respond to cancellation requests and timeouts appropriately.
39 | // For robust asynchronous operation, CompareAndSet should return err != nil
40 | // only when its context is cancelled or when it encounters an error
41 | // that it detects to be permanent and unrecoverable for sure.
42 | // On encountering errors that may be temporary (e.g., due to network outages),
43 | // it is better for the Store to keep trying until success or cancellation,
44 | // using the lib/backoff package for example.
45 | //
46 | type Store interface {
47 | 	CompareAndSet(ctx context.Context, old, new string) (
48 | 		version int64, actual string, err error)
49 | }
50 | 
51 | // Register implements a simple local-memory CAS register.
52 | // It is thread-safe and ready for use on instantiation.
53 | type Register struct {
54 | 	mut sync.Mutex // for synchronizing accesses
55 | 	ver int64      // version number of the latest value
56 | 	val string     // the latest value written
57 | }
58 | 
59 | // CompareAndSet implements the Store interface for the CAS register.
60 | func (r *Register) CompareAndSet(ctx context.Context, old, new string) (
61 | 	version int64, actual string, err error) {
62 | 
63 | 	r.mut.Lock()
64 | 	defer r.mut.Unlock()
65 | 
66 | 	// Update the value only if the current value is as expected.
67 | 	if r.val == old {
68 | 		r.ver, r.val = r.ver+1, new
69 | 	}
70 | 
71 | 	// Return the actual new value, changed or not.
72 | 	return r.ver, r.val, nil
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/go/lib/cas/test/cas.go:
--------------------------------------------------------------------------------
  1 | // Package test implements shareable code for testing instantiations
  2 | // of the cas.Store check-and-set storage interface.
  3 | package test
  4 | 
  5 | import (
  6 | 	"context"
  7 | 	"fmt"
  8 | 	"math/rand"
  9 | 	"sync"
 10 | 	"testing"
 11 | 
 12 | 	"github.com/dedis/tlc/go/lib/cas"
 13 | )
 14 | 
 15 | // History records a history of cas.Store version/value observations,
 16 | // typically made across concurrent goroutines or even distributed nodes,
 17 | // and checks all these observations for consistency.
 18 | //
 19 | type History struct {
 20 | 	hist map[int64]string // version-value map defining observed history
 21 | 	mut  sync.Mutex       // mutex protecting this reference order
 22 | }
 23 | 
 24 | // Observe records an old/new value pair that was observed via a cas.Store,
 25 | // checks it for consistency against all prior recorded old/new value pairs,
 26 | // and reports any errors via testing context t.
 27 | //
 28 | func (to *History) Observe(t *testing.T, version int64, value string) {
 29 | 	to.mut.Lock()
 30 | 	defer to.mut.Unlock()
 31 | 
 32 | 	// Create the version/value map if it doesn't already exist
 33 | 	if to.hist == nil {
 34 | 		to.hist = make(map[int64]string)
 35 | 	}
 36 | 
 37 | 	// If there is a recorded value for this version, it must be the same.
 38 | 	if old, exist := to.hist[version]; exist && old != value {
 39 | 		t.Errorf("\nInconsistency:\n ver %v\n old %q\n new %q\n",
 40 | 			version, old, value)
 41 | 	}
 42 | 
 43 | 	// Record the new successor
 44 | 	to.hist[version] = value
 45 | }
 46 | 
 47 | // Checked wraps the provided CAS store with a consistency-checker
 48 | // that records all requested and observed accesses against history h,
 49 | // reporting any inconsistency errors discovered via testing context t.
 50 | //
 51 | // The wrapper also consistency-checks the caller's accesses to the Store,
 52 | // e.g., that the provided old value is indeed the last version retrieved.
 53 | // This means that when checking a Store that is shared across goroutines,
 54 | // each goroutine must have its own Checked wrapper around that Store.
 55 | //
 56 | func Checked(t *testing.T, h *History, store cas.Store) cas.Store {
 57 | 	return &checkedStore{t: t, h: h, s: store}
 58 | }
 59 | 
 60 | type checkedStore struct {
 61 | 	t *testing.T // Testing context
 62 | 	h *History   // History we're using for consistency-checking
 63 | 	s cas.Store  // Underlying compare-and-set Store
 64 | 
 65 | 	lver int64  // Last version number read from the underlying Store
 66 | 	lval string // Last value read from the underlying Store
 67 | 
 68 | 	rver int64 // Our fudged informational version numbers for testing
 69 | }
 70 | 
 71 | func (cs *checkedStore) CompareAndSet(ctx context.Context, old, new string) (
 72 | 	version int64, actual string, err error) {
 73 | 
 74 | 	// Sanity-check the arguments we're passed
 75 | 	if old != cs.lval {
 76 | 		cs.t.Errorf("CompareAndSet: wrong old value %q != %q",
 77 | 			old, cs.lval)
 78 | 	}
 79 | 	if new == "" {
 80 | 		cs.t.Errorf("CompareAndSet: new value empty")
 81 | 	}
 82 | 	if new == old {
 83 | 		cs.t.Errorf("CompareAndSet: new value identical to old")
 84 | 	}
 85 | 
 86 | 	// Try to change old to new atomically.
 87 | 	version, actual, err = cs.s.CompareAndSet(ctx, old, new)
 88 | 
 89 | 	// Sanity-check the Store-assigned version numbers
 90 | 	if version < cs.lver {
 91 | 		cs.t.Errorf("CompareAndSet: Store version number decreased")
 92 | 	}
 93 | 	if version == cs.lver && actual != cs.lval {
 94 | 		cs.t.Errorf("CompareAndSet: Store version failed to increase")
 95 | 	}
 96 | 
 97 | 	// Record and consistency-check all version/value pairs we observe.
 98 | 	cs.h.Observe(cs.t, version, actual)
 99 | 
100 | 	// Produce our own informational version numbers to return
101 | 	// that increase a bit unpredictability for testing purposes.
102 | 	if version > cs.lver {
103 | 		cs.rver++
104 | 	}
105 | 	cs.rver += rand.Int63n(3)
106 | 
107 | 	// Update our cached record of the underlying Store's last state
108 | 	cs.lver, cs.lval = version, actual
109 | 
110 | 	// Return the actual new value regardless.
111 | 	return cs.rver, actual, err
112 | }
113 | 
114 | // Stores torture-tests one or more cas.Store interfaces
115 | // that are all supposed to represent the same consistent underlying state.
116 | // The test is driven by nthreads goroutines per Store interface,
117 | // each of which performs naccesses CAS operations on its interface.
118 | //
119 | func Stores(t *testing.T, nthreads, naccesses int, store ...cas.Store) {
120 | 
121 | 	bg := context.Background()
122 | 	wg := sync.WaitGroup{}
123 | 	h := &History{}
124 | 
125 | 	tester := func(i, j int) {
126 | 		cs := Checked(t, h, store[i])
127 | 		old, err := "", error(nil)
128 | 		for k := 0; k < naccesses; k++ {
129 | 			new := fmt.Sprintf("store %v thread %v access %v",
130 | 				i, j, k)
131 | 			//println("tester", i, j, "access", k)
132 | 			_, old, err = cs.CompareAndSet(bg, old, new)
133 | 			if err != nil {
134 | 				t.Error("CompareAndSet: " + err.Error())
135 | 			}
136 | 		}
137 | 		//println("tester", i, j, "done")
138 | 		wg.Done()
139 | 	}
140 | 
141 | 	// Launch a set of goroutines for each Store interface.
142 | 	// To maximize cross-store concurrency,
143 | 	// launch the first thread per store, then the second per store, etc.
144 | 	for j := 0; j < nthreads; j++ {
145 | 		for i := range store {
146 | 			wg.Add(1)
147 | 			go tester(i, j)
148 | 		}
149 | 	}
150 | 
151 | 	// Wait for all tester goroutines to complete
152 | 	wg.Wait()
153 | }
154 | 


--------------------------------------------------------------------------------
/go/lib/cas/test/cas_test.go:
--------------------------------------------------------------------------------
 1 | package test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/dedis/tlc/go/lib/cas"
 7 | )
 8 | 
 9 | // Test the Client with a trivial in-memory key/value Store implementation.
10 | func TestRegister(t *testing.T) {
11 | 	Stores(t, 100, 100000, &cas.Register{})
12 | }
13 | 


--------------------------------------------------------------------------------
/go/lib/doc.go:
--------------------------------------------------------------------------------
1 | // Sub-packages of this package contains common library functionality
2 | // useful in implementations of threshold logical clocks and consensus.
3 | package lib
4 | 


--------------------------------------------------------------------------------
/go/lib/fs/atomic/atomic.go:
--------------------------------------------------------------------------------
 1 | // This package supports writing files atomically
 2 | // while ensuring "at-most-once" semantics.
 3 | package atomic
 4 | 
 5 | import (
 6 | 	"errors"
 7 | 	"fmt"
 8 | 	"io/ioutil"
 9 | 	"os"
10 | 	"path/filepath"
11 | )
12 | 
13 | // WriteFileOnce attempts to write data to filename atomically, only once,
14 | // failing with ErrExist if someone else already wrote a file at filename.
15 | //
16 | // Ensures that no one ever sees a zero-length or incomplete file
17 | // at the target filename, by writing data to a temporary file first,
18 | // synchronizing it to stable storage, then atomically linking it into place.
19 | //
20 | // This code solves a different problem from, but is partly inspired by:
21 | // https://github.com/google/renameio
22 | // https://github.com/natefinch/atomic
23 | 
24 | func WriteFileOnce(filename string, data []byte, perm os.FileMode) error {
25 | 
26 | 	// Create a temporary file in the target directory,
27 | 	// mainly to ensure that it's on the same volume for hard linking.
28 | 	dir, name := filepath.Split(filename)
29 | 	pattern := fmt.Sprintf("%s-*.tmp", name)
30 | 	tmpfile, err := ioutil.TempFile(dir, pattern)
31 | 	if err != nil {
32 | 		return err
33 | 	}
34 | 
35 | 	// Make sure it gets closed and removed regardless of outcome.
36 | 	tmpname := tmpfile.Name()
37 | 	defer func() {
38 | 		tmpfile.Close()
39 | 		os.Remove(tmpname)
40 | 	}()
41 | 
42 | 	// Write the data to the temporary file.
43 | 	n, err := tmpfile.Write(data)
44 | 	if err != nil {
45 | 		return err
46 | 	}
47 | 	if n < len(data) {
48 | 		return errors.New("short write")
49 | 	}
50 | 
51 | 	// Set the correct file permissions
52 | 	if err := tmpfile.Chmod(perm); err != nil {
53 | 		return err
54 | 	}
55 | 
56 | 	// Force the newly-written data to stable storage.
57 | 	// For background on this see commends for CloseAtomicallyReplace
58 | 	// at https://github.com/google/renameio/blob/master/tempfile.go
59 | 	//
60 | 	if err := tmpfile.Sync(); err != nil {
61 | 		return err
62 | 	}
63 | 
64 | 	if err := tmpfile.Close(); err != nil {
65 | 		return err
66 | 	}
67 | 
68 | 	// Atomically hard-link the temporary file into the target filename.
69 | 	// Unlike os.Rename, this fails if target filename already exists.
70 | 	if err := os.Link(tmpname, filename); err != nil {
71 | 		return err
72 | 	}
73 | 
74 | 	return nil
75 | }
76 | 


--------------------------------------------------------------------------------
/go/lib/fs/atomic/atomic_test.go:
--------------------------------------------------------------------------------
 1 | package atomic
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"math/rand"
 6 | 	"os"
 7 | 	"sync"
 8 | 	"testing"
 9 | 	"time"
10 | )
11 | 
12 | func TestWriteFileOnce(t *testing.T) {
13 | 
14 | 	filename := "testfile.tmp"
15 | 	var wg sync.WaitGroup
16 | 	writer := func(i int) {
17 | 
18 | 		// Sleep a small random duration to jitter the test
19 | 		time.Sleep(time.Duration(rand.Int63n(int64(time.Microsecond))))
20 | 		//println("thread",i,"writing")
21 | 
22 | 		// Try to write the file
23 | 		b := make([]byte, i) // create i-length file filled with i's
24 | 		for j := range b {
25 | 			b[j] = byte(i)
26 | 		}
27 | 		err := WriteFileOnce(filename, b, 0644)
28 | 		if err != nil && !os.IsExist(err) {
29 | 			t.Error("WriteFileOnce:", err)
30 | 		}
31 | 
32 | 		// Now try to read the file that got written
33 | 		b, err = ioutil.ReadFile(filename)
34 | 		if err != nil {
35 | 			t.Error("ReadFile", err)
36 | 		}
37 | 
38 | 		// Check that what we read back is valid
39 | 		//println("thread",i,"read",len(b))
40 | 		i = len(b)
41 | 		if i == 0 {
42 | 			t.Error("zero-length file shouldn't be possible")
43 | 		}
44 | 		for j := range b {
45 | 			if b[j] != byte(i) {
46 | 				t.Error("read file has wrong byte at", j)
47 | 			}
48 | 		}
49 | 
50 | 		wg.Done()
51 | 	}
52 | 
53 | 	// Test with increasing numbers of threads
54 | 	for n := 1; n <= 128; n *= 2 {
55 | 
56 | 		//println("\ntesting", n, "threads")
57 | 		for i := 1; i <= n; i++ {
58 | 			wg.Add(1)
59 | 			go writer(i)
60 | 		}
61 | 		wg.Wait()
62 | 
63 | 		os.Remove(filename)
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/go/lib/fs/casdir/state.go:
--------------------------------------------------------------------------------
 1 | // Package casdir implements a versioned check-and-set (CAS) state abstraction
 2 | // in a directory on a standard POSIX-compatible file system.
 3 | //
 4 | // See the tlc/go/lib/cas package for general information
 5 | // on this CAS state abstraction.
 6 | //
 7 | // This implementation is just a simple wrapper around the verst package,
 8 | // which provides a slightly-more-general versioned state abstraction.
 9 | // To implement CAS, in essence, we simply expire old versions immediately
10 | // as soon as any new version is written.
11 | //
12 | package casdir
13 | 
14 | import (
15 | 	"context"
16 | 
17 | 	"github.com/dedis/tlc/go/lib/fs/verst"
18 | )
19 | 
20 | // Store implements the compare-and-set state abstraction
21 | // generically defined by the cas.Store interface,
22 | // holding the underlying state in a POSIX directory.
23 | //
24 | // The underlying state directory may be shared locally or remotely
25 | // (e.g., via NFS-mounted file systems),
26 | // provided that file system accesses ensure file-level POSIX atomicity.
27 | //
28 | // Each Store instance is intended for use by only one goroutine at a time,
29 | // so the client must synchronize shared uses across multiple goroutines.
30 | //
31 | type Store struct {
32 | 	vs   verst.State // underlying versioned state
33 | 	lver int64       // last version we've read
34 | 	lval string      // application value associated with lver
35 | }
36 | 
37 | // Init sets Store to refer to a CAS register at a given file system path.
38 | // If create is true, creates the designated directory if it doesn't exist.
39 | // If excl is true, fails if the designated directory already exists.
40 | //
41 | func (st *Store) Init(path string, create, excl bool) error {
42 | 	return st.vs.Init(path, create, excl)
43 | }
44 | 
45 | // CompareAndSet writes value new provided the state still holds value old,
46 | // then reads and returns the actual current state version and value.
47 | //
48 | func (st *Store) CompareAndSet(ctx context.Context, old, new string) (
49 | 	version int64, actual string, err error) {
50 | 
51 | 	if old != st.lval {
52 | 		panic("CompareAndSet: wrong old value")
53 | 	}
54 | 
55 | 	// Try to write the new version to the underlying versioned store -
56 | 	// but don't fret if someone else wrote it or if it has expired.
57 | 	ver := st.lver + 1
58 | 	err = st.vs.WriteVersion(ver, new)
59 | 	if err != nil && !verst.IsExist(err) && !verst.IsNotExist(err) {
60 | 		return 0, "", err
61 | 	}
62 | 
63 | 	// Now read back whatever value was successfully written.
64 | 	val, err := st.vs.ReadVersion(ver)
65 | 	if err != nil && verst.IsNotExist(err) {
66 | 
67 | 		// The requested version has probably been aged out,
68 | 		// so catch up to the most recent committed value.
69 | 		ver, val, err = st.vs.ReadLatest()
70 | 	}
71 | 	if err != nil {
72 | 		return 0, "", err
73 | 	}
74 | 
75 | 	// Expire all versions before this latest one
76 | 	st.vs.Expire(ver)
77 | 
78 | 	// Return the actual version and value that we read
79 | 	st.lver, st.lval = ver, val
80 | 	return ver, val, err
81 | }
82 | 


--------------------------------------------------------------------------------
/go/lib/fs/verst/state.go:
--------------------------------------------------------------------------------
  1 | // Package verst implements a simple persistent versioned state abstraction
  2 | // in a directory on a standard POSIX-compatible file system.
  3 | //
  4 | // The abstraction that verst presents is essentially a key/value store,
  5 | // in which the keys are sequentially-increasing version numbers,
  6 | // and the values are opaque byte strings (which we represent as Go strings).
  7 | // The main operations verst provides are
  8 | // reading a particular (or the latest) version,
  9 | // and writing a new version as a successor to the latest version.
 10 | // The implementation ensures that new version writes are atomic:
 11 | // clients will never read partially-written values, for example.
 12 | // If several clients attempt to write the same new version concurrently,
 13 | // one will succeed while all the others will fail,
 14 | // and potentially need to retry with respect to the new latest version.
 15 | //
 16 | // The package is designed assuming that values are small,
 17 | // e.g., metadata rather than bulk data, appropriate for Go strings.
 18 | // and reading/writing all at once as atomic units.
 19 | // Bulk data should be handled by other means.
 20 | //
 21 | // The verst package uses simple atomic POSIX file system operations,
 22 | // with no locking, to manage concurrency in the underlying file system.
 23 | // It supports garbage-collection of old state versions
 24 | // by using atomic POSIX directory-manipulation operations.
 25 | // Barring bugs, it "should" not be possible to violate
 26 | // the guaranteed atomicity properties or corrupt the state store
 27 | // regardless of how many clients may be competing to access it
 28 | // or with what access patterns or delays.
 29 | // This atomicity is necessarily only as good as the underlying file system's
 30 | // guarantee of atomicity and consistency of the underlying operations:
 31 | // e.g., if the underlying file system can leave a rename operation
 32 | // half-completed after a badly-timed crash, the state could be corrupted.
 33 | //
 34 | // The design of verst guarantees progress, but not fairness:
 35 | // that is, by standard definitions it is lock-free but not wait free
 36 | // (https://en.wikipedia.org/wiki/Non-blocking_algorithm).
 37 | // Regardless of the amount of contention to write a new version, for example,
 38 | // verst guarantees that at least one client will be able to make progress.
 39 | // It makes no guarantee of a "fair" rotation among clients, however,
 40 | // or that some particularly slow or otherwise unlucky client will not starve.
 41 | //
 42 | // While this package currently lives in the tlc repository,
 43 | // it is not particularly specific to TLC and depends on nothing else in it,
 44 | // and hence might eventually be moved to a more generic home if appropriate.
 45 | //
 46 | // XXX describe the techniques in a bit more detail.
 47 | //
 48 | package verst
 49 | 
 50 | import (
 51 | 	"fmt"
 52 | 	"io/ioutil"
 53 | 	"os"
 54 | 	"path/filepath"
 55 | 	//	"errors"
 56 | 
 57 | 	"github.com/bford/cofo/cbe"
 58 | 	"github.com/dedis/tlc/go/lib/fs/atomic"
 59 | )
 60 | 
 61 | //const versPerGen = 100 // Number of versions between generation subdirectories
 62 | const versPerGen = 10 // Number of versions between generation subdirectories
 63 | 
 64 | const genFormat = "gen-%d" // Format for generation directory names
 65 | const verFormat = "ver-%d" // Format for register version file names
 66 | 
 67 | // State holds cached state for a single verst versioned register.
 68 | type State struct {
 69 | 	path    string  // Base pathname of directory containing register state
 70 | 	genVer  int64 // Version number of highest generation subdirectory
 71 | 	genPath string  // Pathname to generation subdirectory
 72 | 	ver     int64 // Highest register version known to exist already
 73 | 	val     string  // Cached register value for highest known version
 74 | 	expVer  int64 // Version number before which state is expired
 75 | }
 76 | 
 77 | // Initialize State to refer to a verst register at a given file system path.
 78 | // If create is true, create the designated directory if it doesn't exist.
 79 | // If excl is true, fail if the designated directory already exists.
 80 | func (st *State) Init(path string, create, excl bool) error {
 81 | 	*st = State{path: path} // Set path and clear cached state
 82 | 
 83 | 	// First check if the path already exists and is a directory.
 84 | 	stat, err := os.Stat(path)
 85 | 	switch {
 86 | 	case err == nil && !stat.IsDir():
 87 | 		return os.ErrExist // already exists, but not a directory
 88 | 
 89 | 	case err == nil && !excl:
 90 | 		return st.refresh() // exists: load our cache from it
 91 | 
 92 | 	case err != nil && (!IsNotExist(err) || !create):
 93 | 		return err // didn't exist and we can't create it
 94 | 	}
 95 | 
 96 | 	// Create and initialize the version state directory,
 97 | 	// initially with a temporary name for atomicity.
 98 | 	dir, name := filepath.Split(path)
 99 | 	if dir == "" {
100 | 		dir = "." // Ensure dir is nonempty
101 | 	}
102 | 	tmpPath, err := ioutil.TempDir(dir, name+"-*.tmp")
103 | 	if err != nil {
104 | 		return err
105 | 	}
106 | 	defer func() { // Clean up on return if we can't move it into place
107 | 		os.RemoveAll(tmpPath)
108 | 	}()
109 | 
110 | 	// Create an initial generation directory for state version 0
111 | 	genPath := filepath.Join(tmpPath, fmt.Sprintf(genFormat, 0))
112 | 	err = os.Mkdir(genPath, 0777)
113 | 	if err != nil {
114 | 		return err
115 | 	}
116 | 
117 | 	// Create an initial state version 0 with the empty string as its value
118 | 	err = writeVerFile(genPath, fmt.Sprintf(verFormat, 0), "", "")
119 | 	if err != nil {
120 | 		return err
121 | 	}
122 | 
123 | 	// Atomically move the temporary version state directory into place.
124 | 	err = os.Rename(tmpPath, path)
125 | 	if err != nil && (excl || !IsExist(err)) {
126 | 		return err
127 | 	}
128 | 
129 | 	// Finally, load our cache from the state directory.
130 | 	return st.refresh()
131 | }
132 | 
133 | // Refresh our cached state in attempt to "catch up" to the
134 | // latest register version on the file system.
135 | // Of course the file system may be a constantly-moving target
136 | // so the refreshed state could be stale again immediately on return.
137 | func (st *State) refresh() error {
138 | 
139 | 	// First find the highest-numbered state generation subdirectory
140 | 	genver, genname, _, err := scan(st.path, genFormat, 0)
141 | 	if err != nil {
142 | 		return err
143 | 	}
144 | 
145 | 	// Then find the highest-numbered register version in that subdirectory
146 | 	genpath := filepath.Join(st.path, genname)
147 | 	regver, regname, _, err := scan(genpath, verFormat, 0)
148 | 	if err != nil {
149 | 		return err
150 | 	}
151 | 
152 | 	// Read that highest register version file
153 | 	val, _, err := readVerFile(genpath, regname)
154 | 	if err != nil {
155 | 		return err
156 | 	}
157 | 
158 | 	st.genVer = genver
159 | 	st.genPath = genpath
160 | 
161 | 	st.ver = regver
162 | 	st.val = val
163 | 
164 | 	return nil
165 | }
166 | 
167 | // Scan a directory for highest-numbered file or subdirectory matching format.
168 | // If upTo > 0, returns the highest-numbered version no higher than upTo.
169 | func scan(path, format string, upTo int64) (
170 | 	maxver int64, maxname string, names []string, err error) {
171 | 
172 | 	// Scan the verst directory for the highest-numbered subdirectory.
173 | 	dir, err := os.Open(path)
174 | 	if err != nil {
175 | 		return 0, "", nil, err
176 | 	}
177 | 	info, err := dir.Readdir(0)
178 | 	if err != nil {
179 | 		return 0, "", nil, err
180 | 	}
181 | 
182 | 	// Find the highest-numbered generation subdirectory
183 | 	maxver = -1
184 | 	for i := range info {
185 | 		name := info[i].Name()
186 | 
187 | 		// Scan the version number embedded in the name, if any,
188 | 		// and confirm that the filename exactly matches the format.
189 | 		var ver int64
190 | 		n, err := fmt.Sscanf(name, format, &ver)
191 | 		if n < 1 || err != nil || name != fmt.Sprintf(format, ver) {
192 | 			continue
193 | 		}
194 | 
195 | 		// Find the highest extant version number
196 | 		// (no greater than upTo, if upTo is nonzero)
197 | 		if ver > maxver && (upTo == 0 || ver <= upTo) {
198 | 			maxver, maxname = ver, name
199 | 		}
200 | 
201 | 		// If upTo is nonzero, collect all the matching names.
202 | 		if upTo > 0 && ver <= upTo {
203 | 			names = append(names, name)
204 | 		}
205 | 	}
206 | 	if maxver < 0 { // No highest version!? oops
207 | 		return 0, "", nil, os.ErrNotExist
208 | 	}
209 | 	return
210 | }
211 | 
212 | // Read and parse the register version file at regpath.
213 | func readVerFile(genPath, verName string) (val, nextGen string, err error) {
214 | 
215 | 	regPath := filepath.Join(genPath, verName)
216 | 	b, err := ioutil.ReadFile(regPath)
217 | 	if err != nil {
218 | 		return "", "", err
219 | 	}
220 | 
221 | 	// The encoded value is always first and not optional
222 | 	rb, b, err := cbe.Decode(b)
223 | 	if err != nil {
224 | 		println("corrupt verst version file " + regPath)
225 | 		return "", "", err
226 | 	}
227 | 
228 | 	// The encoded next-generation directory name is optional
229 | 	nxg, b, err := cbe.Decode(b)
230 | 	// (ignore decoding errors)
231 | 
232 | 	return string(rb), string(nxg), nil
233 | }
234 | 
235 | // Read the latest version of the stored state,
236 | // returning both the highest version number (key) and associated value.
237 | // Of course a new version might be written at any time,
238 | // so the caller must assume this information could become stale immediately.
239 | func (st *State) ReadLatest() (ver int64, val string, err error) {
240 | 
241 | 	if err := st.refresh(); err != nil {
242 | 		return 0, "", err
243 | 	}
244 | 	return st.ver, st.val, nil
245 | }
246 | 
247 | // Read a specific version of the stored state,
248 | // returning the associated value if possible.
249 | // Returns ErrNotExist if the specified version does not exist,
250 | // either because it has never been written or because it has been expired.
251 | func (st *State) ReadVersion(ver int64) (val string, err error) {
252 | 
253 | 	// In the common case of reading back the last-written version,
254 | 	// just return its value from our cache.
255 | 	if ver == st.ver {
256 | 		return st.val, nil
257 | 	}
258 | 
259 | 	// Find and read the appropriate version file
260 | 	val, err = st.readUncached(ver)
261 | 	if err != nil {
262 | 		return "", err
263 | 	}
264 | 
265 | 	// Update our cached state as appropriate.
266 | 	if ver > st.ver {
267 | 		st.ver = ver
268 | 		st.val = val
269 | 	}
270 | 
271 | 	return val, nil
272 | }
273 | 
274 | func (st *State) readUncached(ver int64) (val string, err error) {
275 | 
276 | 	// Optimize for sequential reads of the "next" version
277 | 	verName := fmt.Sprintf(verFormat, ver)
278 | 	if ver >= st.genVer {
279 | 		val, _, err := readVerFile(st.genPath, verName)
280 | 		if err == nil {
281 | 			return val, nil // success
282 | 		}
283 | 		if !IsNotExist(err) {
284 | 			return "", err // error other than non-existent
285 | 		}
286 | 	}
287 | 
288 | 	// Fallback: scan for the generation containing requested version.
289 | 	//println("readUncached: fallback at", ver)
290 | 	genVer, genName, _, err := scan(st.path, genFormat, ver)
291 | 	if err != nil {
292 | 		return "", err
293 | 	}
294 | 	//println("readUncached: found", ver, "in gen", genVer)
295 | 
296 | 	// The requested version should be in directory genName if it exists.
297 | 	genPath := filepath.Join(st.path, genName)
298 | 	val, _, err = readVerFile(genPath, verName)
299 | 	if err != nil {
300 | 		return "", err
301 | 	}
302 | 
303 | 	// Update our cached generation state
304 | 	if ver >= st.ver {
305 | 		println("moving to generation", genVer, "at ver", ver)
306 | 		st.genVer = genVer
307 | 		st.genPath = genPath
308 | 	}
309 | 
310 | 	return val, err
311 | }
312 | 
313 | // Write version ver with associated value val if ver is not yet written.
314 | // The caller may skip version numbers, e.g., to catch up a delayed store,
315 | // but must never try to (re-)write older versions up to the last written.
316 | //
317 | func (st *State) WriteVersion(ver int64, val string) (err error) {
318 | 
319 | 	if ver <= st.ver {
320 | 		return ErrExist
321 | 	}
322 | 	verName := fmt.Sprintf(verFormat, ver)
323 | 
324 | 	// Should this register version start a new generation?
325 | 	tmpGenName := ""
326 | 	if ver%versPerGen == 0 {
327 | 
328 | 		// Prepare the new generation in a temporary directory first
329 | 		pattern := fmt.Sprintf(genFormat+"-*.tmp", ver)
330 | 		tmpPath, err := ioutil.TempDir(st.path, pattern)
331 | 		if err != nil {
332 | 			return err
333 | 		}
334 | 		defer func() {
335 | 			os.RemoveAll(tmpPath)
336 | 		}()
337 | 		tmpGenName = filepath.Base(tmpPath)
338 | 
339 | 		// Write the new register version in the new directory (too)
340 | 		err = writeVerFile(tmpPath, verName, val, tmpGenName)
341 | 		if err != nil {
342 | 			return err
343 | 		}
344 | 	}
345 | 
346 | 	// Write version into the (old) generation directory
347 | 	err = writeVerFile(st.genPath, verName, val, tmpGenName)
348 | 	if err != nil && !IsExist(err) {
349 | 		return err
350 | 	}
351 | 
352 | 	// Read back whatever register version file actually got written,
353 | 	// which might be from someone else's write that won over ours.
354 | 	val, tmpGenName, err = readVerFile(st.genPath, verName)
355 | 	if err != nil {
356 | 		return err
357 | 	}
358 | 
359 | 	// If the (actual) new version indicates a new generation directory,
360 | 	// try to move the temporary directory into its place.
361 | 	// It's harmless if multiple writers attempt this redundantly:
362 | 	// it fails if either the old temporary directory no longer exists
363 | 	// or if a directory with the new name already exists.
364 | 	if tmpGenName != "" {
365 | 		oldGenPath := filepath.Join(st.path, tmpGenName)
366 | 		newGenPath := filepath.Join(st.path,
367 | 			fmt.Sprintf(genFormat, ver))
368 | 		err := os.Rename(oldGenPath, newGenPath)
369 | 		if err != nil && !IsExist(err) && !IsNotExist(err) {
370 | 			return err
371 | 		}
372 | 
373 | 		// It's a good time to expire old generations when feasible
374 | 		st.expireOld()
375 | 
376 | 		// Update our cached generation state
377 | 		st.genVer = ver
378 | 		st.genPath = newGenPath
379 | 	}
380 | 
381 | 	// Update our cached version state
382 | 	st.ver = ver
383 | 	st.val = val
384 | 	return nil
385 | }
386 | 
387 | func writeVerFile(genPath, verName, val, nextGen string) error {
388 | 
389 | 	// Encode the new register version file
390 | 	b := cbe.Encode(nil, []byte(val))
391 | 	b = cbe.Encode(b, []byte(nextGen))
392 | 
393 | 	// Write it atomically
394 | 	verPath := filepath.Join(genPath, verName)
395 | 	if err := atomic.WriteFileOnce(verPath, b, 0644); err != nil {
396 | 		return err
397 | 	}
398 | 
399 | 	return nil
400 | }
401 | 
402 | // Expire indicates that state versions earlier than before may be deleted.
403 | // It does not necessarily delete these older versions immediately, however.
404 | // Attempts either to read or to write expired versions will fail.
405 | //
406 | func (st *State) Expire(before int64) {
407 | 	if st.expVer < before {
408 | 		st.expVer = before
409 | 	}
410 | }
411 | 
412 | // Actually try to delete expired versions.
413 | // We do this only about once per generation for efficiency.
414 | func (st *State) expireOld() {
415 | 
416 | 	// Find all existing generation directories up to version 'before'
417 | 	maxVer, maxName, names, err := scan(st.path, genFormat, st.expVer)
418 | 	if err != nil || len(names) == 0 {
419 | 		return // ignore errors, e.g., no expired generations
420 | 	}
421 | 	if maxVer < 0 || maxVer > st.expVer {
422 | 		println("expireOld oops", len(names), maxVer, st.expVer)
423 | 		panic("shouldn't happen")
424 | 	}
425 | 
426 | 	// Delete all generation directories before maxVer,
427 | 	// since those can only contain versions strictly before maxVer.
428 | 	for _, genName := range names {
429 | 		if genName != maxName {
430 | 			genPath := filepath.Join(st.path, genName)
431 | 			atomicRemoveAll(genPath)
432 | 		}
433 | 	}
434 | }
435 | 
436 | // Atomically remove the directory at path,
437 | // ensuring that no one sees inconsistent states within it,
438 | // by renaming it before starting to delete its contents.
439 | func atomicRemoveAll(path string) error {
440 | 
441 | 	tmpPath := fmt.Sprintf("%s.old", path)
442 | 	if err := os.Rename(path, tmpPath); err != nil {
443 | 		return err
444 | 	}
445 | 
446 | 	return os.RemoveAll(tmpPath)
447 | }
448 | 
449 | // State.Write returns an error matching this predicate
450 | // when the version the caller asked to write already exists.
451 | func IsExist(err error) bool {
452 | 	return os.IsExist(err)
453 | }
454 | 
455 | // State.Read returns an error matching this predicat
456 | // when the version the caller asked to read does not exist.
457 | func IsNotExist(err error) bool {
458 | 	return os.IsNotExist(err)
459 | }
460 | 
461 | var ErrExist = os.ErrExist
462 | var ErrNotExist = os.ErrNotExist
463 | 


--------------------------------------------------------------------------------
/go/model/README.md:
--------------------------------------------------------------------------------
1 | This Go package provides a minimal implementation of
2 | Que Sera Consensus (QSC) built on Threshold Logical Clocks (TLC)
3 | for fail-stop, non-Byzantine environments.
4 | For background information on QSC and TLC,
5 | and other model implementations in several languages, please see the
6 | [top level of this repository](https://github.com/dedis/tlc/).
7 | For more details on this package see the code and its
8 | [GoDoc documentation](https://godoc.org/github.com/dedis/tlc/go/model).
9 | 


--------------------------------------------------------------------------------
/go/model/doc.go:
--------------------------------------------------------------------------------
 1 | // Package model implements a simple pedagogic model of TLC and QSC.
 2 | // It uses no cryptography and supports only failstop, non-Byzantine consensus,
 3 | // but should be usable in scenarios that would typically employ Paxos or Raft.
 4 | //
 5 | // This implementation is less than 200 lines of actual code as counted by CLOC,
 6 | // so a good way to understand it is to read the code directly at
 7 | // https://github.com/dedis/tlc/tree/master/go/model.
 8 | // You can test this implementation in a variety of consensus configurations
 9 | // using only goroutines and channels for communication via:
10 | //
11 | //	go test -v
12 | //
13 | // To read about the principles underlying TLC and QSC, please refer to
14 | // https://arxiv.org/abs/1907.07010.
15 | // For a high-level overview of the different implementations of TLC/QSC
16 | // in different languages that live in this repository, please see
17 | // https://github.com/dedis/tlc/.
18 | //
19 | // Configuring and launching consensus groups
20 | //
21 | // To use this implementation of QSC,
22 | // a user of this package must first configure and launch
23 | // a threshold group of nodes.
24 | // This package handles only the core consensus logic,
25 | // leaving matters such as node configuration, network names, connections,
26 | // and wire-format marshaling and unmarshaling to the client of this package.
27 | //
28 | // The client using this package
29 | // must assign each node a unique number from 0 through nnode-1,
30 | // e.g., by configuring the group with a well-known ordering of its members.
31 | // Only node numbers are important to this package; it is oblivious to names.
32 | //
33 | // When each node in the consensus group starts,
34 | // the client calls NewNode to initialize the node's TLC and QSC state.
35 | // The client may then change optional Node configuration parameters,
36 | // such as Node.Rand, before actually commencing protocol message processing.
37 | // The client then calls Node.Advance to launch TLC and the consensus protocol,
38 | // advance to time-step zero, and broadcast a proposal for this time-step.
39 | // Thereafter, the protocol self-clocks asynchronously using TLC
40 | // based on network communication.
41 | //
42 | // Consensus protocol operation and results
43 | //
44 | // This package implements QSC in pipelined fashion, which means that
45 | // a sliding window of three concurrent QSC rounds is active at any time.
46 | // At the start of any given time step s when Advance broadcasts a Raw message,
47 | // this event initiates a new consensus round starting at s and ending at s+3,
48 | // and (in the steady state) completes a consensus round that started at s-3.
49 | // Each Message a node broadcasts includes QSC state from four rounds:
50 | // Message.QSC[0] holds the results of the consensus round just completed,
51 | // while QSC[1] through QSC[3] hold the state of the three still-active rounds,
52 | // with QSC[3] being the newest round just launched.
53 | //
54 | // If Message.QSC[0].Commit is true in the Raw message commencing a time-step,
55 | // then this node saw the round ending at step Message.Step as fully committed.
56 | // In this case, all nodes will have agreed on the same proposal in that round,
57 | // which is the proposal made by node number Message.QSC[0].Conf.From.
58 | // If the client was waiting for a particular transaction to be ordered
59 | // or definitely committed/aborted according to the client's transaction rules,
60 | // then seeing that Message.QSC[0].Commit is true means that the client may
61 | // resolve the status of transactions proposed up to Message.Step-3.
62 | // Other nodes might not have observed this same round as committed, however,
63 | // so the client must not assume that other nodes also necessarily be aware
64 | // that this consensus round successfully committed.
65 | //
66 | // If Message.QSC[0].Commit is false, the round may or may not have converged:
67 | // this node simply cannot determine conclusively whether the round converged.
68 | // Other nodes might have chosen different "best confirmed" proposals,
69 | // as indicated in their respective QSC[0].Conf.From broadcasts for this step.
70 | // Alternatively, the round may in fact have converged,
71 | // and other nodes might observe that fact, even though this node did not.
72 | //
73 | // Message transmission, marshaling
74 | //
75 | // This package invokes the send function provided to NewNode to send messages,
76 | // leaving any wire-format marshaling required to the provided function.
77 | // This allows the client complete control over the desired wire format,
78 | // and to include other information beyond the fields defined in Message,
79 | // such as any semantic content on which the client wishes to achieve consensus.
80 | // On receipt of a message from another node,
81 | // the client must unmarshal it as appropriate
82 | // and invoke Node.Receive with the unmarshalled Message.
83 | //
84 | // Concurrency control
85 | //
86 | // The consensus protocol logic in this package is not thread safe:
87 | // it must be run in a single goroutine,
88 | // or else the client must implement appropriate locking.
89 | //
90 | package model
91 | 


--------------------------------------------------------------------------------
/go/model/model_test.go:
--------------------------------------------------------------------------------
  1 | package model
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math/rand"
  6 | 	"sync"
  7 | 	"testing"
  8 | )
  9 | 
 10 | func (n *Node) run(maxSteps int, peer []chan *Message, wg *sync.WaitGroup) {
 11 | 
 12 | 	// broadcast message for initial time step s=0
 13 | 	n.Advance() // broadcast message for initial time step
 14 | 
 15 | 	// run the required number of time steps for the test
 16 | 	for n.m.Step < maxSteps {
 17 | 		msg := <-peer[n.m.From] // Receive a message
 18 | 		n.Receive(msg)          // Process it
 19 | 	}
 20 | 
 21 | 	// signal that we're done
 22 | 	wg.Done()
 23 | }
 24 | 
 25 | //  Run a consensus test case with the specified parameters.
 26 | func testRun(t *testing.T, thres, nnode, maxSteps, maxTicket int) {
 27 | 	if maxTicket == 0 { // Default to moderate-entropy tickets
 28 | 		maxTicket = 10 * nnode
 29 | 	}
 30 | 	desc := fmt.Sprintf("T=%v,N=%v,Steps=%v,Tickets=%v",
 31 | 		thres, nnode, maxSteps, maxTicket)
 32 | 	t.Run(desc, func(t *testing.T) {
 33 | 		all := make([]*Node, nnode)
 34 | 		peer := make([]chan *Message, nnode)
 35 | 		send := func(dst int, msg *Message) { peer[dst] <- msg }
 36 | 
 37 | 		for i := range all { // Initialize all the nodes
 38 | 			peer[i] = make(chan *Message, 3*nnode*maxSteps)
 39 | 			all[i] = NewNode(i, thres, nnode, send)
 40 | 			if maxTicket > 0 {
 41 | 				all[i].Rand = func() int64 {
 42 | 					return rand.Int63n(int64(maxTicket))
 43 | 				}
 44 | 			}
 45 | 		}
 46 | 		wg := &sync.WaitGroup{}
 47 | 		for _, n := range all { // Run the nodes on separate goroutines
 48 | 			wg.Add(1)
 49 | 			go n.run(maxSteps, peer, wg)
 50 | 		}
 51 | 		wg.Wait()
 52 | 		testResults(t, all) // Report test results
 53 | 	})
 54 | }
 55 | 
 56 | // Dump the consensus state of node n in round s
 57 | func (n *Node) testDump(t *testing.T, s, nnode int) {
 58 | 	r := &n.m.QSC[s]
 59 | 	t.Errorf("%v %v conf %v %v re %v %v spoil %v %v",
 60 | 		n.m.From, s, r.Conf.From, r.Conf.Tkt,
 61 | 		r.Reconf.From, r.Reconf.Tkt, r.Spoil.From, r.Spoil.Tkt)
 62 | }
 63 | 
 64 | // Globally sanity-check and summarize each node's observed results.
 65 | func testResults(t *testing.T, all []*Node) {
 66 | 	for i, ni := range all {
 67 | 		commits := 0
 68 | 		for s, si := range ni.m.QSC {
 69 | 			if si.Commit {
 70 | 				commits++
 71 | 				for _, nj := range all { // verify consensus
 72 | 					if nj.m.QSC[s].Conf.From !=
 73 | 						si.Conf.From {
 74 | 
 75 | 						t.Errorf("%v %v UNSAFE", i, s)
 76 | 						ni.testDump(t, s, len(all))
 77 | 						nj.testDump(t, s, len(all))
 78 | 					}
 79 | 				}
 80 | 			}
 81 | 		}
 82 | 		t.Logf("node %v committed %v of %v (%v%% success rate)",
 83 | 			i, commits, len(ni.m.QSC), (commits*100)/len(ni.m.QSC))
 84 | 	}
 85 | }
 86 | 
 87 | // Run QSC consensus for a variety of test cases.
 88 | func TestQSC(t *testing.T) {
 89 | 	testRun(t, 1, 1, 100000, 0) // Trivial case: 1 of 1 consensus!
 90 | 	testRun(t, 2, 2, 100000, 0) // Another trivial case: 2 of 2
 91 | 
 92 | 	testRun(t, 2, 3, 100000, 0)  // Standard f=1 case
 93 | 	testRun(t, 3, 5, 100000, 0)  // Standard f=2 case
 94 | 	testRun(t, 4, 7, 10000, 0)   // Standard f=3 case
 95 | 	testRun(t, 5, 9, 10000, 0)   // Standard f=4 case
 96 | 	testRun(t, 11, 21, 10000, 0) // Standard f=10 case
 97 | 
 98 | 	testRun(t, 3, 3, 100000, 0) // Larger-than-minimum thresholds
 99 | 	testRun(t, 6, 7, 10000, 0)
100 | 	testRun(t, 9, 10, 10000, 0)
101 | 
102 | 	// Test with low-entropy tickets: hurts commit rate, but still safe!
103 | 	testRun(t, 2, 3, 100000, 1) // Limit case: will never commit
104 | 	testRun(t, 2, 3, 100000, 2) // Extreme low-entropy: rarely commits
105 | 	testRun(t, 2, 3, 100000, 3) // A bit better bit still bad...
106 | }
107 | 


--------------------------------------------------------------------------------
/go/model/node.go:
--------------------------------------------------------------------------------
 1 | package model
 2 | 
 3 | import "math/rand"
 4 | 
 5 | // Type represents the type of a QSC message: either Raw, Ack, or Wit.
 6 | //
 7 | // At the start of each time step a node broadcasts a Raw message,
 8 | // which proposes a block for the consensus round starting at this step
 9 | // and solicits witness acknowledgments to the proposal.
10 | //
11 | // Nodes that receive a Raw message during the same time step
12 | // reply with a unicast Ack message to Raw message's sender,
13 | // acknowledging that they have seen the sender's proposal
14 | // and merged in its QSC state.
15 | //
16 | // Once a node has received a threshold of Ack messages to its Raw proposal,
17 | // the node broadcasts a Wit message to announce that its proposal is witnessed.
18 | // Nodes wait to collect a threshold of Wit messages as their condition
19 | // to advance to the next time step and broadcast their next Raw message.
20 | //
21 | type Type int
22 | 
23 | const (
24 | 	// Raw unwitnessed QSC proposal
25 | 	Raw Type = iota
26 | 	// Ack is the acknowledgment of a proposal
27 | 	Ack
28 | 	// Wit is the threshold witness confirmation of a proposal
29 | 	Wit
30 | )
31 | 
32 | // Message contains the information nodes must pass in messages
33 | // both to run the TLC clocking protocol and achieve QSC consensus.
34 | //
35 | // This implementation of QSC performs no message marshaling or unmarshalling;
36 | // the client using it must handle message wire-format serialization.
37 | // However, the Message struct is defined so as to be compatible with
38 | // standard Go encoders such as encoding/gob or encoding/json.
39 | // The client may also marshal/unmarshal its own larger message struct
40 | // containing a superset of the information here,
41 | // such as to attach semantic content in some form to consensus proposals.
42 | type Message struct {
43 | 	From int     // Node number of node that sent this message
44 | 	Step int     // Logical time step this message is for
45 | 	Type Type    // Message type: Prop, Ack, or Wit
46 | 	Tkt  uint64  // Genetic fitness ticket for consensus
47 | 	QSC  []Round // QSC consensus state for rounds ending at Step or later
48 | }
49 | 
50 | // Node contains per-node state and configuration for TLC and QSC.
51 | // Use NewNode to create and properly initialize an instance
52 | // with the mandatory configuration parameters.
53 | // Public fields in this struct are optional configuration settings,
54 | // which NewNode initializes to defaults but the caller may change
55 | // after calling NewNode but before commencing protocol execution.
56 | //
57 | // Consensus uses the configurable Rand function to choose "genetic fitness"
58 | // lottery tickets for each node's proposal in each round.
59 | // Only the low 63 bits of the returned int64 are used.
60 | // This defaults to using the system's math/rand.Int63().
61 | // To tolerate sophisticated network denial-of-service attackers,
62 | // a full implementation should use cryptographic randomness
63 | // and hide the tickets from the network using encryption (e.g., TLS).
64 | //
65 | // The Rand function must not be changed once the Node is in operation.
66 | // All nodes must use the same nonnegative random number distribution.
67 | // Ticket collisions are not a problem as long as they are rare,
68 | // which is why 63 bits of entropy is sufficient.
69 | //
70 | type Node struct {
71 | 	m Message // Template for messages we send
72 | 
73 | 	thres int                          // TLC message and witness thresholds
74 | 	nnode int                          // Total number of nodes
75 | 	send  func(peer int, msg *Message) // Function to send message to a peer
76 | 
77 | 	acks int // # acknowledgments we've received in this step
78 | 	wits int // # threshold witnessed messages seen this step
79 | 
80 | 	Rand func() int64 // Function to generate random genetic fitness tickets
81 | }
82 | 
83 | // NewNode creates and initializes a new Node with the specified group configuration.
84 | // The parameters to NewNode are the mandatory Node configuration parameters:
85 | // self is this node's number, thres is the TLC message and witness threshold,
86 | // nnode is the total number of nodes,
87 | // and send is a function to send a Message to a given peer node number.
88 | //
89 | // Optional configuration is represented by fields in the created Node struct,
90 | // which the caller may modify before commencing the consensus protocol.
91 | //
92 | func NewNode(self, thres, nnode int, send func(peer int, msg *Message)) (n *Node) {
93 | 	return &Node{
94 | 		m: Message{From: self, Step: -1,
95 | 			QSC: make([]Round, 3)}, // "rounds" ending in steps 0-2
96 | 		thres: thres, nnode: nnode, send: send,
97 | 		Rand: rand.Int63}
98 | }
99 | 


--------------------------------------------------------------------------------
/go/model/qsc.go:
--------------------------------------------------------------------------------
 1 | package model
 2 | 
 3 | // Best is a record representing either a best confirmed proposal,
 4 | // or a best potential spoiler competing with the best confirmed proposal,
 5 | // used in the Round struct.
 6 | //
 7 | // In each case, the only information we really need is
 8 | // the genetic fitness lottery ticket of the "best" proposal seen so far,
 9 | // and which node produced that proposal.
10 | // This optimization works only in the non-Byzantine QSC consensus protocol,
11 | // because Byzantine consensus requires that the lottery tickets be
12 | // unknown and unbiasable to everyone until the consensus round completes.
13 | //
14 | // When we're collecting the best potential spoiler proposal -
15 | // the proposal with the highest ticket regardless of whether it's confirmed -
16 | // we must keep track of ticket collisions,
17 | // in case one colliding proposal might "win" if not spoiled by the other.
18 | // When we detect a spoiler collision, we simply set From to -1,
19 | // an invalid node number that will be unequal to, and hence properly "spoil",
20 | // a confirmed or reconfirmed proposal with the same ticket from any node.
21 | //
22 | type Best struct {
23 | 	From int    // Node the proposal is from (spoiler: -1 for tied tickets)
24 | 	Tkt  uint64 // Proposal's genetic fitness ticket
25 | }
26 | 
27 | // Find the Best of two records primarily according to highest ticket number.
28 | // For spoilers, detect and record ticket collisions with invalid node number.
29 | func (b *Best) merge(o *Best, spoiler bool) {
30 | 	if o.Tkt > b.Tkt {
31 | 		*b = *o // strictly better ticket
32 | 	} else if o.Tkt == b.Tkt && o.From != b.From && spoiler {
33 | 		b.From = -1 // record ticket collision
34 | 	}
35 | }
36 | 
37 | // Round encapsulates all the QSC state needed for one consensus round:
38 | // the best potential "spoiler" proposal regardless of confirmation status,
39 | // the best confirmed (witnessed) proposal we've seen so far in the round,
40 | // and the best reconfirmed (double-witnessed) proposal we've seen so far.
41 | // Finally, at the end of the round, we set Commit to true if
42 | // the best confirmed proposal in Conf has definitely been committed.
43 | type Round struct {
44 | 	Spoil  Best // Best potential spoiler(s) we've found so far
45 | 	Conf   Best // Best confirmed proposal we've found so far
46 | 	Reconf Best // Best reconfirmed proposal we've found so far
47 | 	Commit bool // Whether we confirm this round successfully committed
48 | }
49 | 
50 | // Merge QSC round info from an incoming message into our round history
51 | func mergeQSC(b, o []Round) {
52 | 	for i := range b {
53 | 		b[i].Spoil.merge(&o[i].Spoil, true)
54 | 		b[i].Conf.merge(&o[i].Conf, false)
55 | 		b[i].Reconf.merge(&o[i].Reconf, false)
56 | 	}
57 | }
58 | 
59 | // The TLC layer upcalls this method on advancing to a new time-step,
60 | // with sets of proposals recently seen (saw) and threshold witnessed (wit).
61 | func (n *Node) advanceQSC() {
62 | 
63 | 	// Choose a fresh genetic fitness ticket for this proposal
64 | 	n.m.Tkt = uint64(n.Rand()) | (1 << 63) // Ensure it's greater than zero
65 | 
66 | 	// Initialize consensus state for the round starting at step.
67 | 	// Find best spoiler, breaking ticket ties in favor of higher node
68 | 	newRound := Round{Spoil: Best{From: n.m.From, Tkt: n.m.Tkt}}
69 | 	n.m.QSC = append(n.m.QSC, newRound)
70 | 
71 | 	// Decide if the just-completed consensus round successfully committed.
72 | 	r := &n.m.QSC[n.m.Step]
73 | 	r.Commit = r.Conf.From == r.Reconf.From && r.Conf.From == r.Spoil.From
74 | }
75 | 
76 | // TLC layer upcalls this to inform us that our proposal is threshold witnessed
77 | func (n *Node) witnessedQSC() {
78 | 
79 | 	// Our proposal is now confirmed in the consensus round just starting
80 | 	// Find best confirmed proposal, breaking ties in favor of lower node
81 | 	myBest := &Best{From: n.m.From, Tkt: n.m.Tkt}
82 | 	n.m.QSC[n.m.Step+3].Conf.merge(myBest, false)
83 | 
84 | 	// Find reconfirmed proposals for the consensus round that's in step 1
85 | 	n.m.QSC[n.m.Step+2].Reconf.merge(&n.m.QSC[n.m.Step+2].Conf, false)
86 | }
87 | 


--------------------------------------------------------------------------------
/go/model/qscod/README.md:
--------------------------------------------------------------------------------
1 | This Go package provides a minimal implementation of
2 | Que Sera Consensus (QSC) built on Threshold Logical Clocks (TLC)
3 | for fail-stop, non-Byzantine environments.
4 | For background information on QSC and TLC,
5 | and other model implementations in several languages, please see the
6 | [top level of this repository](https://github.com/dedis/tlc/).
7 | For more details on this package see the code and its
8 | [GoDoc documentation](https://godoc.org/github.com/dedis/tlc/go/model/qscod).
9 | 


--------------------------------------------------------------------------------
/go/model/qscod/core/cli.go:
--------------------------------------------------------------------------------
  1 | // Package core implements the minimal core of the QSCOD consensus algorithm.
  2 | // for client-driven "on-demand" consensus.
  3 | //
  4 | // This implementation of QSCOD builds on the TLCB and TLCR
  5 | // threshold logical clock algorithms.
  6 | // These algorithms are extremely simple but do impose one constraint:
  7 | // the number of failing nodes must be at most one-third the group size.
  8 | //
  9 | // The unit tests for this package is in the test sub-package,
 10 | // so that useful test framework code can be shared with other packages
 11 | // without requiring any of it to be imported into development builds.
 12 | // (Too bad Go doesn't allow packages to export and import test code.)
 13 | //
 14 | package core
 15 | 
 16 | //import "fmt"
 17 | import "sync"
 18 | import "context"
 19 | 
 20 | // Store represents an interface to one of the n key/value stores
 21 | // representing the persistent state of each of the n consensus group members.
 22 | // A Store's keys are integer TLC time-steps,
 23 | // and its values are Value structures.
 24 | //
 25 | // WriteRead(step, value) attempts to write tv to the store at step v.S,
 26 | // returning the first value written by any client.
 27 | // WriteRead may also return a value from any higher time-step,
 28 | // if other clients have moved the store's state beyond v.S.
 29 | //
 30 | // This interface intentionally provides no means to return an error.
 31 | // If WriteRead encounters an error that might be temporary or recoverable,
 32 | // then it should just keep trying (perhaps with appropriate backoff).
 33 | // This is the fundamental idea of asynchronous fault tolerant consensus:
 34 | // to tolerate individual storage node faults, persistently without giving up,
 35 | // waiting for as long as it takes for the store to become available again.
 36 | //
 37 | // If the application encounters an error that warrants a true global failure,
 38 | // then it should arrange for the Up function to return an error,
 39 | // which will eventually cause all the worker threads to terminate.
 40 | // In this case, the application can cancel any active WriteRead calls,
 41 | // which may simply return the value v that was requested to be written
 42 | // in order to allow the per-node worker thread to terminate cleanly.
 43 | //
 44 | type Store interface {
 45 | 	WriteRead(v Value) Value
 46 | }
 47 | 
 48 | // Value represents the values that a consensus node's key/value Store maps to.
 49 | type Value struct {
 50 | 	S    int64  // TLC step number this broadcast value is for
 51 | 	P    string // Application data string for this proposal
 52 | 	I    int64  // Random integer priority for this proposal
 53 | 	R, B Set    // Read set and broadcast set from TLCB
 54 | }
 55 | 
 56 | // Set represents a set of proposed values from the same time-step,
 57 | // indexed by integer node numbers.
 58 | type Set map[int]Value
 59 | 
 60 | // best returns some maximum-priority Value in a Set,
 61 | // together with a flag indicating whether the returned history
 62 | // is uniquely the best, i.e., the set contains no history tied for best.
 63 | func (S Set) best() (bn int, bv Value, bu bool) {
 64 | 	for n, v := range S {
 65 | 		if v.I >= bv.I {
 66 | 			// A new best value is unique (so far)
 67 | 			// if its priority is strictly higher than the last,
 68 | 			// or if it has equal priority, was unique so far,
 69 | 			// and is proposing identical application data.
 70 | 			bn, bv, bu = n, v, v.I > bv.I || (bu && v.P == bv.P)
 71 | 		}
 72 | 	}
 73 | 	return bn, bv, bu
 74 | }
 75 | 
 76 | // Client represents a logical client that can propose transactions
 77 | // to the consensus group and drive the QSC/TLC state machine forward
 78 | // asynchronously across the key/value storesu defining the group's state.
 79 | //
 80 | // The caller must initialize the public variables
 81 | // to represent a valid QSCOD configuration,
 82 | // before invoking Client.Run to run the consensus algorithm.
 83 | // The public configuration variables must not be changed
 84 | // after starting the client.
 85 | //
 86 | // KV is a slice containing interfaces to each of the key/value stores
 87 | // that hold the persistent state of each node in the consensus group.
 88 | // The total number of nodes N is defined to be len(KV).
 89 | //
 90 | // Tr and Ts are the receive and spread thresholds, respectively.
 91 | // To ensure liveness against up to F slow or crashed nodes,
 92 | // the receive threshold must exclude the F failed nodes: i.e., Tr <= N-F.
 93 | // To ensure consistency (safety), the constrant Tr+Ts > N must hold.
 94 | // Finally, to ensure that each round enjoys a high probability
 95 | // of successful commitment, it should be the case that N >= 3F.
 96 | // Thus, given F and N >= 3F, it is safe to set Tr = N-F and Ts = N-Tr+1.
 97 | // The precise minimum threshold requirements are slightly more subtle,
 98 | // but this is a safe and simpler configuration rule.
 99 | //
100 | // Up is a callback function that the Client calls regularly while running,
101 | // to update the caller's knowledge of committed transactions
102 | // and to update the proposal data the client attempts to commit.
103 | // Client passes to Up the step numbers and proposal Data strings
104 | // for the last (predecessor) and current states known to be committed.
105 | // This known-committed proposal will change regularly across calls,
106 | // but may not change on each call and may not even be monotonic.
107 | // The Up function returns a string representing the new preferred
108 | // proposal Data that the Client will subsequently attempt to commit.
109 | // The Up function also returns an error which, if non-nil,
110 | // causes the Client's operation to terminate and return that error.
111 | //
112 | // RV is a function to generate non-negatative random numbers
113 | // for the symmetry-breaking priority values QSCOD requires.
114 | // In a production system, these random numbers should have high entropy
115 | // for maximum performance (minimum likelihood of collisions),
116 | // and should be generated from a cryptographically strong private source
117 | // for maximum protection against denial-of-service attacks in the network.
118 | //
119 | type Client struct {
120 | 	KV     []Store // Per-node key/value state storage interfaces
121 | 	Tr, Ts int     // Receive and spread threshold configuration
122 | 
123 | 	Pr func(int64, string, bool) (string, int64) // Proposal function
124 | 
125 | 	mut sync.Mutex // Mutex protecting this client's state
126 | }
127 | 
128 | type work struct {
129 | 	cond *sync.Cond // For awaiting threshold conditions
130 | 	val  Value      // Value template each worker will try to write
131 | 	kvc  Set        // Key/value cache collected for this time-step
132 | 	max  Value      // Value with highest time-step we must catch up to
133 | 	next *work      // Forward pointer to next work item
134 | }
135 | 
136 | // Run starts a client running with its given configuration parameters,
137 | // proposing transactions and driving the consensus state machine continuously
138 | // forever or until the passed context is cancelled.
139 | //
140 | func (c *Client) Run(ctx context.Context) (err error) {
141 | 
142 | 	// Keep the mutex locked whenever we're not waiting.
143 | 	c.mut.Lock()
144 | 	defer c.mut.Unlock()
145 | 
146 | 	// Launch one client thread to drive each of the n consensus nodes.
147 | 	w := &work{kvc: make(Set), cond: sync.NewCond(&c.mut)}
148 | 	for i := range c.KV {
149 | 		go c.worker(i, w)
150 | 	}
151 | 
152 | 	// Drive consensus state forever or until our context gets cancelled.
153 | 	for ; ctx.Err() == nil; w = w.next {
154 | 
155 | 		// Wait for a threshold number of worker threads
156 | 		// to complete the current work-item
157 | 		for len(w.kvc) < c.Tr {
158 | 			w.cond.Wait()
159 | 		}
160 | 
161 | 		//str := fmt.Sprintf("at %v kvc contains:", w.val.S)
162 | 		//for i, v := range w.kvc {
163 | 		//	str += fmt.Sprintf(
164 | 		//		"\n node %v step %v data %q pri %v R %v B %v",
165 | 		//		i, v.S, v.P, v.I, len(v.R), len(v.B))
166 | 		//}
167 | 		//println(str)
168 | 
169 | 		// Set the next work-item pointer in the current work-item,
170 | 		// so that the worker threads know there will be a next item.
171 | 		w.next = &work{kvc: make(Set), cond: sync.NewCond(&c.mut)}
172 | 
173 | 		// Wake up worker threads waiting for a next item to appear
174 | 		w.cond.Broadcast()
175 | 
176 | 		// Decide on the next step number and value to broadcast,
177 | 		// based on the threshold set we collected,
178 | 		// which is now immutable and consistent across threads.
179 | 		//		v := Value{P:Head{Step:w.max.S+1}}
180 | 		nv := &w.next.val
181 | 		//		v.S = w.max.S+1
182 | 		nv.S = w.val.S + 1
183 | 		switch {
184 | 
185 | 		case w.max.S > w.val.S:
186 | 
187 | 			// Some node already reached a higher time-step.
188 | 			// Our next work item is simply to catch up all nodes
189 | 			// at least to the highest-known step we discovered.
190 | 			//println("catching up from", w.val.S, "to", w.max.S)
191 | 			*nv = w.max
192 | 
193 | 		case (w.val.S & 1) == 0: // finishing even-numbered step
194 | 
195 | 			// Complete the first TLCR broadcast
196 | 			// and start the second within a TLCB round.
197 | 			// The value for the second broadcsast is simply
198 | 			// the threshold receive set from the first.
199 | 			nv.R = w.kvc
200 | 
201 | 		case (w.val.S & 3) == 1:
202 | 
203 | 			// Complete the first TLCB call in a QSCOD round
204 | 			// and start the second TLCB call for the round.
205 | 
206 | 			// Calculate valid potential (still tentative)
207 | 			// R and B sets from the first TLCB call in this round,
208 | 			// and include them in the second TLCB broadcast.
209 | 			R0, B0 := c.tlcbRB(w.kvc)
210 | 
211 | 			// Pick any best confirmed proposal from B0
212 | 			// as our broadcast for the second TLCB round.
213 | 			_, v2, _ := B0.best()
214 | 
215 | 			// Set the value for the second TLCB call to broadcast
216 | 			nv.I, nv.R, nv.B = v2.I, R0, B0
217 | 
218 | 		case (w.val.S & 3) == 3:
219 | 
220 | 			// Complete a prior QSCOD round and start a new one.
221 | 
222 | 			// First, calculate valid potential R2 and B2 sets from
223 | 			// the second TLCB call in the completed QSCOD round.
224 | 			R2, B2 := c.tlcbRB(w.kvc)
225 | 
226 | 			// We always adopt some best confirmed proposal from R2
227 | 			// as our own (still tentative so far) view of history.
228 | 			// If this round successfully commits,
229 | 			// then our b2 will be the same as everyone else's,
230 | 			// even if we fail below to realize that fact.
231 | 			_, b2, _ := R2.best()
232 | 
233 | 			// Find the best-known proposal b0 in some node's R0.
234 | 			// We can get an R0 set from the first round in b2.R.
235 | 			// Also determine if b0 was uniquely best in this R0.
236 | 			// Our R2 and B2 sets will be subsets of any valid R0.
237 | 			n0, b0, u0 := b2.R.best()
238 | 
239 | 			// See if we can determine b2 to have been committed:
240 | 			// if b0==b2 is the uniquely-best eligible proposal.
241 | 			// This test may succeed only for some nodes in a round.
242 | 			// If b is uniquely-best in R0 we can compare priorities
243 | 			// to see if two values are the same node's proposal.
244 | 			//			// Never commit proposals that don't change the Data,
245 | 			//			// since we use those to represent "no-op" proposals.
246 | 			com := u0 && b0.I == b2.I && b0.I == B2[n0].I
247 | 			if com {
248 | 				//			if u0 && b0.I == b2.I && b0.I == B2[n0].I &&
249 | 				//				b0.P.Data != v.C.Data
250 | 
251 | 				// b0.P is the original proposal with data,
252 | 				// which becomes the new current commit C.
253 | 				// The previous current commit
254 | 				// becomes the last commit L.
255 | 				//println("committed", b0.S, "data", b0.P)
256 | 				//				v.L, v.C = v.C, b0.P
257 | 			}
258 | 
259 | 			// Set the value for the first TLCB call
260 | 			// in the next QSCOD round to broadcast,
261 | 			// containing a proposal for the next round.
262 | 			nv.P, nv.I = c.Pr(b0.S, b0.P, com)
263 | 		}
264 | 
265 | 		//fmt.Printf("at %v next step %v pri %v prop %q R %v B %v\n",
266 | 		//	w.val.S, nv.S, nv.I, nv.P, len(nv.R), len(nv.B))
267 | 
268 | 		//if nv.S < w.max.S {
269 | 		//	println("no progress: s", w.val.S, "lv", w.max.S,
270 | 		//		"to", nv.S)
271 | 		//}
272 | 	}
273 | 
274 | 	// Signal the worker threads to terminate with an all-nil work-item
275 | 	w.next = &work{}
276 | 	w.cond.Broadcast()
277 | 
278 | 	// Any slow client threads will continue in the background
279 | 	// until they catch up with the others or successfully get cancelled.
280 | 	return ctx.Err()
281 | }
282 | 
283 | // worker handles a goroutine dedicated to submitting WriteRead requests
284 | // to each consensus group node asynchronously without delaying the main thread.
285 | //
286 | // We could in principle launch a separate goroutine per node each time step,
287 | // which would be even simpler to manage and provide higher parallelism.
288 | // But this would risk creating a ton of outstanding concurrent goroutines
289 | // trying to access the same slow node(s) and overloading those nodes further,
290 | // or creating local resource pressures such as too many open file descriptors
291 | // in case each WriteRead call opens a new file descriptor or socket, etc.
292 | // So we have only one worker per consensus group node do everything serially,
293 | // limiting resource usage while protecting the main thread from slow nodes.
294 | //
295 | func (c *Client) worker(node int, w *work) {
296 | 
297 | 	// Keep Client state locked while we're not waiting
298 | 	c.mut.Lock()
299 | 
300 | 	// Process work-items defined by the main thread in sequence,
301 | 	// terminating when we encounter a work-item with a nil kvc.
302 | 	for ; w.kvc != nil; w = w.next {
303 | 
304 | 		//		// Pull the next Value template we're supposed to write
305 | 		//		v := w.val
306 | 
307 | 		//		// In steps that start a new QSC round with new proposals,
308 | 		//		// each node gets its own independent random priority
309 | 		//		// even when they're proposals of the same application value.
310 | 		//		if (v.S & 3) == 0 {
311 | 		//			v.I = c.RV()
312 | 		//		}
313 | 
314 | 		//println(w, "before WriteRead step", w.val.S)
315 | 
316 | 		// Try to write new value, then read whatever the winner wrote.
317 | 		c.mut.Unlock()
318 | 		v := c.KV[node].WriteRead(w.val)
319 | 		c.mut.Lock()
320 | 
321 | 		//println(w, "after WriteRead step", w.val.S, "read", v.S)
322 | 
323 | 		//if v.S < w.val.S {
324 | 		//	println("read back value from old step", v.S, w.val.S)
325 | 		//}
326 | 
327 | 		// Collect a threshold number of last-step values in w.kvc,
328 | 		// after which work-item w will be considered complete.
329 | 		// Don't modify kvc or max after reaching the threshold tr,
330 | 		// because they are expected to be immutable afterwards.
331 | 		if len(w.kvc) < c.Tr {
332 | 
333 | 			// Record the actual value read in the work-item
334 | 			w.kvc[node] = v
335 | 
336 | 			// Track the highest last-step value read on any node,
337 | 			// which may be higher than the one we tried to write
338 | 			// if we need to catch up with a faster node.
339 | 			if v.S > w.max.S {
340 | 				w.max = v
341 | 			}
342 | 
343 | 			// Wake up the main thread when we reach the threshold
344 | 			if len(w.kvc) == c.Tr {
345 | 				w.cond.Broadcast()
346 | 			}
347 | 		}
348 | 
349 | 		// Wait until the main thread has created a next work-item.
350 | 		for w.next == nil {
351 | 			w.cond.Wait()
352 | 		}
353 | 	}
354 | 
355 | 	c.mut.Unlock()
356 | }
357 | 
358 | // tlcbRB calculates the receive (R) and broadcast (B) sets
359 | // returned by the TLCB algorithm after its second TLCR call.
360 | //
361 | // The returned R and B sets are only tentative,
362 | // representing possible threshold receive-set and broadcast-set outcomes
363 | // from this TLCB invocation, computed locally by this client.
364 | // These locally-computed sets cannot be relied on to be definite for this node
365 | // until the values computed from them are committed via Store.WriteRead.
366 | //
367 | func (c *Client) tlcbRB(kvc Set) (Set, Set) {
368 | 
369 | 	// Using the tentative client-side receive-set from the second TLCR,
370 | 	// compute potential receive-set (R) and broadcast-set (B) sets
371 | 	// to return from TLCB.
372 | 	R, B, Bc := make(Set), make(Set), make([]int, len(c.KV))
373 | 	for _, v := range kvc {
374 | 		for j, vv := range v.R {
375 | 			R[j] = vv          // R has all values we've seen
376 | 			Bc[j]++            // How many nodes have seen vv?
377 | 			if Bc[j] >= c.Ts { // B has only those reaching ts
378 | 				B[j] = vv
379 | 			}
380 | 		}
381 | 	}
382 | 	return R, B
383 | }
384 | 


--------------------------------------------------------------------------------
/go/model/qscod/core/test/cli.go:
--------------------------------------------------------------------------------
  1 | // Package test contains shareable code for testing instantiations of QSCOD.
  2 | package test
  3 | 
  4 | import (
  5 | 	"context"
  6 | 	"fmt"
  7 | 	"math/rand"
  8 | 	"sync"
  9 | 	"testing"
 10 | 
 11 | 	. "github.com/dedis/tlc/go/model/qscod/core"
 12 | )
 13 | 
 14 | // Object to record the common total order and verify it for consistency
 15 | type testOrder struct {
 16 | 	hist []string   // all history known to be committed so far
 17 | 	mut  sync.Mutex // mutex protecting this reference order
 18 | }
 19 | 
 20 | // When a client reports a history h has been committed,
 21 | // record that in the testOrder and check it for global consistency.
 22 | func (to *testOrder) committed(t *testing.T, step int64, prop string) {
 23 | 	to.mut.Lock()
 24 | 	defer to.mut.Unlock()
 25 | 
 26 | 	// Ensure history slice is long enough
 27 | 	for step >= int64(len(to.hist)) {
 28 | 		to.hist = append(to.hist, "")
 29 | 	}
 30 | 
 31 | 	// Check commit consistency across all concurrent clients
 32 | 	switch {
 33 | 	case to.hist[step] == "":
 34 | 		to.hist[step] = prop
 35 | 	case to.hist[step] != prop:
 36 | 		t.Errorf("Inconsistency at %v:\n old %q\n new %q",
 37 | 			step, to.hist[step], prop)
 38 | 	}
 39 | }
 40 | 
 41 | // testCli creates a test client with particular configuration parameters.
 42 | func testCli(t *testing.T, self, f, maxstep, maxpri int,
 43 | 	kv []Store, to *testOrder, wg *sync.WaitGroup) {
 44 | 
 45 | 	// Create a cancelable context for the test run
 46 | 	ctx, cancel := context.WithCancel(context.Background())
 47 | 
 48 | 	// Our proposal function simply collects and consistency-checks
 49 | 	// committed Heads until a designated time-step is reached.
 50 | 	pr := func(step int64, cur string, com bool) (string, int64) {
 51 | 		//fmt.Printf("cli %v saw commit %v %q\n", self, C.Step, C.Data)
 52 | 
 53 | 		// Consistency-check the history h known to be committed
 54 | 		if com {
 55 | 			to.committed(t, step, cur)
 56 | 		}
 57 | 
 58 | 		// Stop once we reach the step limit
 59 | 		if step >= int64(maxstep) {
 60 | 			cancel()
 61 | 		}
 62 | 
 63 | 		// Use the simple Int63n for random number generation,
 64 | 		// with values constrained to be lower than maxpri for testing.
 65 | 		// A real deployment should use cryptographic randomness
 66 | 		// and should preferably be high-entropy,
 67 | 		// close to the full 64 bits.
 68 | 		pri := rand.Int63n(int64(maxpri))
 69 | 
 70 | 		return fmt.Sprintf("cli %v proposal %v", self, step), pri
 71 | 	}
 72 | 
 73 | 	// Start the test client with appropriate parameters assuming
 74 | 	// n=3f, tr=2f, tb=f, and ts=f+1, satisfying TLCB's constraints.
 75 | 	c := Client{KV: kv, Tr: 2 * f, Ts: f + 1, Pr: pr}
 76 | 	c.Run(ctx)
 77 | 
 78 | 	wg.Done()
 79 | }
 80 | 
 81 | // Run a consensus test case on a given set of Store interfaces
 82 | // and with the specified group configuration and test parameters.
 83 | func TestRun(t *testing.T, kv []Store, nfail, ncli, maxstep, maxpri int) {
 84 | 
 85 | 	// Create a reference total order for safety checking
 86 | 	to := &testOrder{}
 87 | 
 88 | 	desc := fmt.Sprintf("F=%v,N=%v,Clients=%v,Commits=%v,Tickets=%v",
 89 | 		nfail, len(kv), ncli, maxstep, maxpri)
 90 | 	t.Run(desc, func(t *testing.T) {
 91 | 
 92 | 		// Simulate the appropriate number of concurrent clients
 93 | 		wg := &sync.WaitGroup{}
 94 | 		for i := 0; i < ncli; i++ {
 95 | 			wg.Add(1)
 96 | 			go testCli(t, i, nfail, maxstep, maxpri, kv, to, wg)
 97 | 		}
 98 | 		wg.Wait()
 99 | 	})
100 | }
101 | 


--------------------------------------------------------------------------------
/go/model/qscod/core/test/cli_test.go:
--------------------------------------------------------------------------------
 1 | package test
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"testing"
 6 | 
 7 | 	. "github.com/dedis/tlc/go/model/qscod/core"
 8 | )
 9 | 
10 | // Trivial intra-process key-value store implementation for testing
11 | type testStore struct {
12 | 	mut sync.Mutex // synchronization for testStore state
13 | 	v   Value      // the latest value written
14 | }
15 | 
16 | // WriteRead implements the Store interface with a simple intra-process map.
17 | func (ts *testStore) WriteRead(v Value) Value {
18 | 	ts.mut.Lock()
19 | 	defer ts.mut.Unlock()
20 | 
21 | 	// Write value v only if it's newer than the last value written.
22 | 	if v.S > ts.v.S {
23 | 		ts.v = v
24 | 	}
25 | 
26 | 	// Then return whatever was last written, regardless.
27 | 	return ts.v
28 | }
29 | 
30 | //  Run a consensus test case with the specified parameters.
31 | func testRun(t *testing.T, nfail, nnode, ncli, maxstep, maxpri int) {
32 | 
33 | 	// Create a simple test key/value store representing each node
34 | 	kv := make([]Store, nnode)
35 | 	for i := range kv {
36 | 		kv[i] = &testStore{}
37 | 	}
38 | 
39 | 	TestRun(t, kv, nfail, ncli, maxstep, maxpri)
40 | }
41 | 
42 | // Test the Client with a trivial in-memory key/value Store implementation.
43 | func TestClient(t *testing.T) {
44 | 	testRun(t, 1, 3, 1, 100000, 100) // Standard f=1 case
45 | 	testRun(t, 1, 3, 2, 100000, 100)
46 | 	testRun(t, 1, 3, 10, 100000, 100)
47 | 	testRun(t, 1, 3, 20, 100000, 100)
48 | 	testRun(t, 1, 3, 50, 100000, 100)
49 | 	testRun(t, 1, 3, 100, 100000, 100)
50 | 
51 | 	testRun(t, 2, 6, 10, 100000, 100)  // Standard f=2 case
52 | 	testRun(t, 3, 9, 10, 100000, 100)  // Standard f=3 case
53 | 	testRun(t, 4, 12, 10, 100000, 100) // Standard f=4 case
54 | 	testRun(t, 5, 15, 10, 100000, 100) // Standard f=10 case
55 | 
56 | 	// Test with low-entropy tickets: hurts commit rate, but still safe!
57 | 	testRun(t, 1, 3, 10, 100000, 2) // Extreme low-entropy: rarely commits
58 | 	testRun(t, 1, 3, 10, 100000, 3) // A bit better bit still bad...
59 | }
60 | 


--------------------------------------------------------------------------------
/go/model/qscod/encoding/enc.go:
--------------------------------------------------------------------------------
 1 | // This package implements serialization of Values for QSCOD.
 2 | // It currently just uses GOB encoding for simplicity,
 3 | // but we should change that to something not Go-specific.
 4 | package encoding
 5 | 
 6 | import (
 7 | 	"bytes"
 8 | 	"encoding/gob"
 9 | 
10 | 	. "github.com/dedis/tlc/go/model/qscod/core"
11 | )
12 | 
13 | // Encode a Value for serialized transmission.
14 | func EncodeValue(v Value) ([]byte, error) {
15 | 	buf := &bytes.Buffer{}
16 | 	enc := gob.NewEncoder(buf)
17 | 	if err := enc.Encode(v); err != nil {
18 | 		return nil, err
19 | 	}
20 | 	return buf.Bytes(), nil
21 | }
22 | 
23 | // Decode a Value from its serialized format.
24 | func DecodeValue(b []byte) (v Value, err error) {
25 | 	r := bytes.NewReader(b)
26 | 	dec := gob.NewDecoder(r)
27 | 	err = dec.Decode(&v)
28 | 	return
29 | }
30 | 


--------------------------------------------------------------------------------
/go/model/qscod/fs/casdir/cas_test.go:
--------------------------------------------------------------------------------
 1 | // Package casdir tests CAS-based QSCOD over a set of file system CAS stores.
 2 | package casdir
 3 | 
 4 | import (
 5 | 	"context"
 6 | 	"fmt"
 7 | 	"os"
 8 | 	"testing"
 9 | 
10 | 	. "github.com/dedis/tlc/go/lib/cas"
11 | 	"github.com/dedis/tlc/go/lib/cas/test"
12 | 	"github.com/dedis/tlc/go/lib/fs/casdir"
13 | 	. "github.com/dedis/tlc/go/model/qscod/cas"
14 | )
15 | 
16 | //  Run a consensus test case with the specified parameters.
17 | func testRun(t *testing.T, nfail, nnode, nclients, nthreads, naccesses int) {
18 | 
19 | 	// Create a test key/value store representing each node
20 | 	dirs := make([]string, nnode)
21 | 	for i := range dirs {
22 | 		dirs[i] = fmt.Sprintf("test-store-%d", i)
23 | 
24 | 		// Remove the test directory if one is left-over
25 | 		// from a previous test run.
26 | 		os.RemoveAll(dirs[i])
27 | 
28 | 		// Create the test directory afresh.
29 | 		fs := &casdir.Store{}
30 | 		if err := fs.Init(dirs[i], true, true); err != nil {
31 | 			t.Fatal(err)
32 | 		}
33 | 
34 | 		// Clean it up once the test is done.
35 | 		defer os.RemoveAll(dirs[i])
36 | 	}
37 | 
38 | 	desc := fmt.Sprintf("F=%v,N=%v,Clients=%v,Threads=%v,Accesses=%v",
39 | 		nfail, nnode, nclients, nthreads, naccesses)
40 | 	t.Run(desc, func(t *testing.T) {
41 | 
42 | 		// Create a context and cancel it at the end of the test
43 | 		ctx, cancel := context.WithCancel(context.Background())
44 | 		defer cancel()
45 | 
46 | 		// Create simulated clients to access the consensus group
47 | 		clients := make([]Store, nclients)
48 | 		for i := range clients {
49 | 
50 | 			// Create a set of Store objects for each client
51 | 			members := make([]Store, nnode)
52 | 			for j := range members {
53 | 				fs := &casdir.Store{}
54 | 				err := fs.Init(dirs[j], false, false)
55 | 				if err != nil {
56 | 					t.Fatal(err)
57 | 				}
58 | 				members[j] = fs
59 | 			}
60 | 
61 | 			clients[i] = (&Group{}).Start(ctx, members, nfail)
62 | 		}
63 | 
64 | 		// Run a standard torture test across all the clients
65 | 		test.Stores(t, nthreads, naccesses, clients...)
66 | 	})
67 | }
68 | 
69 | func TestConsensus(t *testing.T) {
70 | 	testRun(t, 1, 3, 1, 10, 10) // Standard f=1 case,
71 | 	testRun(t, 1, 3, 2, 10, 10) // varying number of clients
72 | 	testRun(t, 1, 3, 10, 10, 10)
73 | 	testRun(t, 1, 3, 20, 10, 10)
74 | 
75 | 	testRun(t, 2, 6, 10, 10, 10) // Standard f=2 case
76 | 	testRun(t, 3, 9, 10, 10, 10) // Standard f=3 case
77 | 
78 | 	// Note: when nnode * nclients gets to be around 120-ish,
79 | 	// we start running into default max-open-file limits.
80 | }
81 | 


--------------------------------------------------------------------------------
/go/model/qscod/fs/simple/store.go:
--------------------------------------------------------------------------------
 1 | // This package provides a simple file system key/value Store for QSCOD,
 2 | // with no support for garbage collection.
 3 | // It is intended only for education, testing, and experimentation,
 4 | // and not for any production use.
 5 | //
 6 | package simple
 7 | 
 8 | import (
 9 | 	"context"
10 | 	"fmt"
11 | 	"io/ioutil"
12 | 	"os"
13 | 	"path/filepath"
14 | 
15 | 	"github.com/dedis/tlc/go/lib/backoff"
16 | 	"github.com/dedis/tlc/go/lib/fs/atomic"
17 | 	. "github.com/dedis/tlc/go/model/qscod/core"
18 | 	"github.com/dedis/tlc/go/model/qscod/encoding"
19 | )
20 | 
21 | // FileStore implements a simple QSCOD key/value store
22 | // as a directory in a file system.
23 | // The caller must create the directory designated by Path.
24 | //
25 | type FileStore struct {
26 | 	Path string // Directory to contain files representing key/value state
27 | }
28 | 
29 | // Attempt to write the value v to a file associated with time-step step,
30 | // then read back whichever value was successfully written first.
31 | //
32 | // This implementation simply panics if any file system error occurs.
33 | // A more robust approach suited to asynchronous consensus would be
34 | // to log the error then retry in an exponential-backoff loop.
35 | //
36 | func (fs *FileStore) WriteRead(v Value) (rv Value) {
37 | 
38 | 	try := func() (err error) {
39 | 
40 | 		// Serialize the proposed value
41 | 		buf, err := encoding.EncodeValue(v)
42 | 		if err != nil {
43 | 			return err
44 | 		}
45 | 
46 | 		// Try to write the file, ignoring already-exists errors
47 | 		name := fmt.Sprintf("ver-%d", v.P.Step)
48 | 		path := filepath.Join(fs.Path, name)
49 | 		err = atomic.WriteFileOnce(path, buf, 0666)
50 | 		if err != nil && !os.IsExist(err) {
51 | 			return err
52 | 		}
53 | 
54 | 		// Read back whatever file was successfully written first there
55 | 		rbuf, err := ioutil.ReadFile(path)
56 | 		if err != nil {
57 | 			return err
58 | 		}
59 | 
60 | 		// Deserialize the value read
61 | 		rv, err = encoding.DecodeValue(rbuf)
62 | 		if err != nil {
63 | 			return err
64 | 		}
65 | 
66 | 		return nil
67 | 	}
68 | 
69 | 	backoff.Retry(context.Background(), try)
70 | 	return rv
71 | }
72 | 
73 | // QSCOD calls Committed to inform us that history comh is committed,
74 | // so we can garbage-collect entries before it in the key/value store.
75 | // But this Store does not implement garbage-collection.
76 | //
77 | func (fs *FileStore) Committed(comh Head) {
78 | 	// do nothing - no garbage collection
79 | }
80 | 


--------------------------------------------------------------------------------
/go/model/qscod/fs/simple/store_test.go:
--------------------------------------------------------------------------------
 1 | package simple
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 	"testing"
 7 | 
 8 | 	. "github.com/dedis/tlc/go/model/qscod/core"
 9 | 	. "github.com/dedis/tlc/go/model/qscod/core/test"
10 | )
11 | 
12 | //  Run a consensus test case with the specified parameters.
13 | func testRun(t *testing.T, nfail, nnode, ncli, maxstep, maxpri int) {
14 | 
15 | 	// Create a test key/value store representing each node
16 | 	kv := make([]Store, nnode)
17 | 	for i := range kv {
18 | 		path := fmt.Sprintf("test-store-%d", i)
19 | 		ss := &FileStore{path}
20 | 		kv[i] = ss
21 | 
22 | 		// Remove the test directory if one is left-over
23 | 		// from a previous test run.
24 | 		os.RemoveAll(path)
25 | 
26 | 		// Create the test directory afresh.
27 | 		if err := os.Mkdir(path, 0744); err != nil {
28 | 			t.Fatal(err)
29 | 		}
30 | 
31 | 		// Clean it up once the test is done.
32 | 		defer os.RemoveAll(path)
33 | 	}
34 | 
35 | 	TestRun(t, kv, nfail, ncli, maxstep, maxpri)
36 | }
37 | 
38 | func TestSimpleStore(t *testing.T) {
39 | 	testRun(t, 1, 3, 1, 10, 100) // Standard f=1 case,
40 | 	testRun(t, 1, 3, 2, 10, 100) // varying number of clients
41 | 	testRun(t, 1, 3, 10, 3, 100)
42 | 	testRun(t, 1, 3, 20, 2, 100)
43 | 	testRun(t, 1, 3, 40, 2, 100)
44 | 
45 | 	testRun(t, 2, 6, 10, 5, 100)  // Standard f=2 case
46 | 	testRun(t, 3, 9, 10, 3, 100)  // Standard f=3 case
47 | 	testRun(t, 4, 12, 10, 2, 100) // Standard f=4 case
48 | 	testRun(t, 5, 15, 10, 2, 100) // Standard f=10 case
49 | }
50 | 


--------------------------------------------------------------------------------
/go/model/qscod/fs/store/store.go:
--------------------------------------------------------------------------------
  1 | // Package store provides a file system key/value Store for QSCOD.
  2 | // It uses the cas package to implement versioned write-once and read,
  3 | // with garbage collection of old versions before the last known commit.
  4 | //
  5 | package store
  6 | 
  7 | import (
  8 | 	"context"
  9 | 
 10 | 	"github.com/dedis/tlc/go/lib/backoff"
 11 | 	"github.com/dedis/tlc/go/lib/fs/verst"
 12 | 	. "github.com/dedis/tlc/go/model/qscod/core"
 13 | 	"github.com/dedis/tlc/go/model/qscod/encoding"
 14 | )
 15 | 
 16 | // FileStore implements a QSCOD key/value store
 17 | // as a directory in a file system.
 18 | //
 19 | type FileStore struct {
 20 | 	state verst.State
 21 | 	ctx   context.Context
 22 | 	bc    backoff.Config
 23 | }
 24 | 
 25 | // Initialize FileStore to use a directory at a given file system path.
 26 | // If create is true, create the designated directory if it doesn't exist.
 27 | // If excl is true, fail if the designated directory already exists.
 28 | func (fs *FileStore) Init(ctx context.Context, path string, create, excl bool) error {
 29 | 
 30 | 	fs.ctx = ctx
 31 | 	return fs.state.Init(path, create, excl)
 32 | }
 33 | 
 34 | // SetBackoff sets the backoff configuration for handling errors that occur
 35 | // while attempting to access the key/value store on the file system.
 36 | //
 37 | // Since we don't know in general which errors may be transitory
 38 | // and which are permanent failures, especially on remote file systems,
 39 | // FileStore assumes all errors may be transitory, just reports them,
 40 | // and keeps trying the access after a random exponential backoff.
 41 | //
 42 | func (fs *FileStore) SetReport(bc backoff.Config) {
 43 | 	fs.bc = bc
 44 | }
 45 | 
 46 | // Attempt to write the value v to a file associated with time-step step,
 47 | // then read back whichever value was successfully written first.
 48 | // Implements the qscod.Store interface.
 49 | //
 50 | func (fs *FileStore) WriteRead(v Value) (rv Value) {
 51 | 
 52 | 	// Don't try to write version 0; that's a virtual placeholder.
 53 | 	if v.P.Step == 0 {
 54 | 		return v
 55 | 	}
 56 | 
 57 | 	try := func() (err error) {
 58 | 		rv, err = fs.tryWriteRead(v)
 59 | 		return err
 60 | 	}
 61 | 
 62 | 	fs.bc.Retry(fs.ctx, try)
 63 | 	return rv
 64 | }
 65 | 
 66 | func (fs *FileStore) tryWriteRead(val Value) (Value, error) {
 67 | 	ver := int64(val.P.Step)
 68 | 
 69 | 	// Serialize the proposed value
 70 | 	valb, err := encoding.EncodeValue(val)
 71 | 	if err != nil {
 72 | 		return Value{}, err
 73 | 	}
 74 | 	vals := string(valb)
 75 | 
 76 | 	// Try to write it to the versioned store -
 77 | 	// but don't fret if someone else wrote it or if it has expired.
 78 | 	err = fs.state.WriteVersion(ver, vals)
 79 | 	if err != nil && !verst.IsExist(err) && !verst.IsNotExist(err) {
 80 | 		return Value{}, err
 81 | 	}
 82 | 
 83 | 	// Now read back whatever value was successfully written.
 84 | 	vals, err = fs.state.ReadVersion(ver)
 85 | 	if err != nil && verst.IsNotExist(err) {
 86 | 
 87 | 		// The requested version has probably been aged out,
 88 | 		// so catch up to the most recent committed Head.
 89 | 		_, vals, err = fs.state.ReadLatest()
 90 | 	}
 91 | 	if err != nil {
 92 | 		return Value{}, err
 93 | 	}
 94 | 
 95 | 	// Deserialize the value we read
 96 | 	val, err = encoding.DecodeValue([]byte(vals))
 97 | 	if err != nil {
 98 | 		return Value{}, err
 99 | 	}
100 | 
101 | 	// Expire all versions before this latest one
102 | 	fs.state.Expire(int64(val.P.Step))
103 | 
104 | 	// Return the value v that we read
105 | 	return val, err
106 | }
107 | 


--------------------------------------------------------------------------------
/go/model/qscod/fs/store/store_test.go:
--------------------------------------------------------------------------------
 1 | package store
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"os"
 7 | 	"testing"
 8 | 
 9 | 	. "github.com/dedis/tlc/go/model/qscod/core"
10 | 	. "github.com/dedis/tlc/go/model/qscod/core/test"
11 | )
12 | 
13 | //  Run a consensus test case with the specified parameters.
14 | func testRun(t *testing.T, nfail, nnode, ncli, maxstep, maxpri int) {
15 | 
16 | 	// Create a test key/value store representing each node
17 | 	kv := make([]Store, nnode)
18 | 	ctx := context.Background()
19 | 	for i := range kv {
20 | 		path := fmt.Sprintf("test-store-%d", i)
21 | 
22 | 		// Remove the test directory if one is left-over
23 | 		// from a previous test run.
24 | 		os.RemoveAll(path)
25 | 
26 | 		// Create the test directory afresh.
27 | 		ss := &FileStore{}
28 | 		if err := ss.Init(ctx, path, true, true); err != nil {
29 | 			t.Fatal(err)
30 | 		}
31 | 		kv[i] = ss
32 | 
33 | 		// Clean it up once the test is done.
34 | 		defer os.RemoveAll(path)
35 | 	}
36 | 
37 | 	TestRun(t, kv, nfail, ncli, maxstep, maxpri)
38 | }
39 | 
40 | func TestSimpleStore(t *testing.T) {
41 | 	testRun(t, 1, 3, 1, 10, 100) // Standard f=1 case,
42 | 	testRun(t, 1, 3, 2, 10, 100) // varying number of clients
43 | 	testRun(t, 1, 3, 10, 3, 100)
44 | 	testRun(t, 1, 3, 20, 2, 100)
45 | 	testRun(t, 1, 3, 40, 2, 100)
46 | 
47 | 	testRun(t, 2, 6, 10, 5, 100) // Standard f=2 case
48 | 	testRun(t, 3, 9, 10, 3, 100) // Standard f=3 case
49 | 
50 | 	// Note: when nnode * ncli gets to be around 120-ish,
51 | 	// we start running into default max-open-file limits.
52 | }
53 | 


--------------------------------------------------------------------------------
/go/model/qscod/qscas/doc.go:
--------------------------------------------------------------------------------
1 | // Package qscas provides an implementation of QSCOD consensus
2 | // that both builds on, and provides, a Check-and-Set (CAS) Store interface
3 | // as defined by the tlc/go/lib/cas package.
4 | //
5 | package qscas
6 | 


--------------------------------------------------------------------------------
/go/model/qscod/qscas/group.go:
--------------------------------------------------------------------------------
  1 | package qscas
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | 
  7 | 	"github.com/dedis/tlc/go/lib/cas"
  8 | 	"github.com/dedis/tlc/go/model/qscod/core"
  9 | )
 10 | 
 11 | // Group implements the cas.Store interface as a QSCOD consensus group.
 12 | // After creation, invoke Start to configure the consensus group state,
 13 | // then call CompareAndSet to perform CAS operations on the logical state.
 14 | type Group struct {
 15 | 	c   core.Client     // consensus client core
 16 | 	ctx context.Context // group operation context
 17 | 
 18 | 	mut  sync.Mutex     // for synchronizing shutdown
 19 | 	wg   sync.WaitGroup // counts active CAS operations
 20 | 	done bool           // set after group shutdown
 21 | 
 22 | 	// channel that CAS calls use to propose work to do
 23 | 	ch chan func(int64, string, bool) (string, int64)
 24 | }
 25 | 
 26 | // Start initializes g to represent a consensus group comprised of
 27 | // particular member nodes, starts it operating, and returns g.
 28 | //
 29 | // Consensus thresholds are determined by the faulty parameter,
 30 | // the maximum number of faulty nodes the group should tolerate.
 31 | // For this implementation of QSCOD based on the TLCB and TLCR algorithms,
 32 | // faulty should be at most one-third of the total group size.
 33 | // If faulty < 0, it is set to one-third of the group size, rounded down.
 34 | //
 35 | // Start launchers worker goroutines that help service CAS requests,
 36 | // which will run and consume resources forever unless cancelled.
 37 | // To define their lifetime, the caller should pass a cancelable context,
 38 | // and cancel it when operations on the Group are no longer required.
 39 | //
 40 | func (g *Group) Start(ctx context.Context, members []cas.Store, faulty int) *Group {
 41 | 
 42 | 	// Calculate and sanity-check the threshold configuration parameters.
 43 | 	// For details on where these calculations come from, see:
 44 | 	// https://arxiv.org/abs/2003.02291
 45 | 	N := len(members)
 46 | 	if faulty < 0 {
 47 | 		faulty = N / 3		// Default fault tolerance threshold
 48 | 	}
 49 | 	Tr := N - faulty // receive threshold
 50 | 	Ts := N - Tr + 1 // spread threshold
 51 | 	if Tr <= 0 || Tr > N || Ts <= 0 || Ts > Tr || (Ts+Tr) <= N {
 52 | 		panic("faulty threshold yields unsafe configuration")
 53 | 	}
 54 | 	if N*(Tr-Ts+1)-Tr*(N-Tr) <= 0 { // test if Tb <= 0
 55 | 		panic("faulty threshold yields non-live configuration")
 56 | 	}
 57 | 	//println("N", N, "Tr", Tr, "Ts", Ts)
 58 | 
 59 | 	// Create a consensus group state instance
 60 | 	g.c = core.Client{Tr: Tr, Ts: Ts}
 61 | 	g.ctx = ctx
 62 | 	g.ch = make(chan func(s int64, p string, c bool) (string, int64))
 63 | 
 64 | 	// Create a core.Store wrapper around each cas.Store group member
 65 | 	g.c.KV = make([]core.Store, N)
 66 | 	for i := range members {
 67 | 		g.c.KV[i] = &coreStore{Store: members[i], g: g}
 68 | 	}
 69 | 
 70 | 	// Our proposal function normally just "punts" by waiting for
 71 | 	// an actual proposal to get sent on the group's channel,
 72 | 	// and then we call that to form the proposal as appropriate.
 73 | 	// But we concurrently listen for channel cancellation
 74 | 	// and return promptly with a no-op proposal in that case.
 75 | 	g.c.Pr = func(s int64, p string, c bool) (prop string, pri int64) {
 76 | 		for {
 77 | 			select {
 78 | 			case f := <-g.ch: // got a CAS work function to call
 79 | 				if f == nil { // context cancelled
 80 | 					println("Pr: channel closed")
 81 | 					return p, 0 // no-op proposal
 82 | 				}
 83 | 				//println("got work function\n")
 84 | 				prop, pri = f(s, p, c) // call work function
 85 | 				if prop != "" || pri != 0 {
 86 | 					return prop, pri // return its result
 87 | 				}
 88 | 				//println("work function yielded no work")
 89 | 
 90 | 			case <-ctx.Done(): // our context got cancelled
 91 | 				//println("Pr: cancelled")
 92 | 				return p, 0 // produce no-op proposal
 93 | 			}
 94 | 		}
 95 | 	}
 96 | 
 97 | 	// Launch the underlying consensus core as a separate goroutine.
 98 | 	// Make sure the group's WaitGroup remains nonzero until
 99 | 	// the context is cancelled and we're ready to shut down.
100 | 	g.wg.Add(1)
101 | 	go g.run(ctx)
102 | 
103 | 	return g
104 | }
105 | 
106 | // Run consensus in a goroutine
107 | func (g *Group) run(ctx context.Context) {
108 | 
109 | 	// Run the consensus protocol until our context gets cancelled
110 | 	g.c.Run(ctx)
111 | 
112 | 	// Drain any remaining proposal function sends to the group's channel.
113 | 	// CompareAndSet won't add anymore after g.ctx has been cancelled.
114 | 	go func() {
115 | 		for range g.ch {
116 | 		}
117 | 	}()
118 | 
119 | 	g.mut.Lock()
120 | 
121 | 	// Wait until no threads are in active CompareAndSet calls.
122 | 	g.wg.Done()
123 | 	g.wg.Wait()
124 | 
125 | 	// Now it's safe to close the group's channel.
126 | 	close(g.ch)
127 | 	g.done = true
128 | 
129 | 	g.mut.Unlock()
130 | }
131 | 
132 | // CompareAndSet conditionally writes a new version and reads the latest,
133 | // implementing the cas.Store interface.
134 | //
135 | func (g *Group) CompareAndSet(ctx context.Context, old, new string) (
136 | 	version int64, actual string, err error) {
137 | 
138 | 	//println("CAS lastVer", lastVer, "reqVal", reqVal)
139 | 
140 | 	// Record active CompareAndSet calls in a WaitGroup
141 | 	// so that the group's main goroutine can wait for them to complete
142 | 	// when shutting down gracefully in response to context cancellation.
143 | 	// Atomically check that the group is still active before wg.Add.
144 | 	g.mut.Lock()
145 | 	if g.done {
146 | 		//println("CAS after done")
147 | 		// This should only ever happen once the context is cancelled
148 | 		if g.ctx.Err() == nil {
149 | 			panic("group done but context not cancelled?")
150 | 		}
151 | 		g.mut.Unlock()
152 | 		return 0, "", g.ctx.Err()
153 | 	}
154 | 	g.wg.Add(1)
155 | 	g.mut.Unlock()
156 | 	defer g.wg.Done()
157 | 
158 | 	// We'll need a mutex to protect concurrent accesses to our locals.
159 | 	mut := sync.Mutex{}
160 | 
161 | 	// Define the proposal formulation function that will do our work.
162 | 	// Returns the empty string to keep this worker thread waiting
163 | 	// for something to propose while letting other threads progress.
164 | 	pr := func(s int64, cur string, com bool) (prop string, pri int64) {
165 | 		mut.Lock()
166 | 		defer mut.Unlock()
167 | 
168 | 		//println("CAS step", s, cur, com, "prop", old, "->", new)
169 | 
170 | 		// Now check the situation of what's known to be committed.
171 | 		switch {
172 | 
173 | 		// It's safe to propose new as the new string to commit
174 | 		// if the prior value we're building on is equal to old.
175 | 		case cur == old:
176 | 			prop, pri = new, randValue()
177 | 
178 | 		// Complete the CAS operation as soon as we commit anything,
179 | 		// whether it was our new proposal or some other string.
180 | 		case com:
181 | 			version, actual = int64(s), cur
182 | 
183 | 		// Otherwise, if the current proposal isn't the same as old
184 | 		// but also isn't committed, we have to make no-op proposals
185 | 		// until we manage to get something committed.
186 | 		default:
187 | 			println("no-op proposal")
188 | 			prop, pri = cur, randValue()
189 | 
190 | 			//case int64(s) > lastVer && c && p != prop:
191 | 			//	err = cas.Changed
192 | 			//	fallthrough
193 | 			//// XXX get rid of Changed?
194 | 
195 | 			//case int64(s) > lastVer && c:
196 | 			//	actualVer = int64(s)
197 | 			//	actualVal = reqVal
198 | 
199 | 			//case int64(s) > lastVer:
200 | 			//	// do nothing
201 | 
202 | 			// Our CAS has succeeded if we've committed a new version
203 | 			// that builds immediately on the version we were expecting
204 | 			// and that commits the reqVal we were trying to propose.
205 | 			// Return "" in prop to have this worker thread keep waiting
206 | 			// for a future CAS operation to propose something useful.
207 | 			//		case int64(L.Step) == lastVer && C.Data == reqVal:
208 | 			//			println("proposal committed at step", C.Step)
209 | 			//			if int64(C.Step) <= lastVer {
210 | 			//				panic("XXX")
211 | 			//			}
212 | 			//			actualVer = int64(s)
213 | 			//			actualVal = reqVal
214 | 
215 | 			// Otherwise, our CAS fails with a Changed error as soon as
216 | 			// anything else gets committed on top of lastVer.
217 | 			// Return "" in prop to keep this worker thread waiting.
218 | 			//		case int64(C.Step) > lastVer:
219 | 			//			println("proposal overridden at step", C.Step)
220 | 			//			actualVer = int64(C.Step)
221 | 			//			actualVal = C.Data
222 | 			//			err = cas.Changed
223 | 
224 | 			// If C.Step < lastVer, we're choosing a proposal for a node
225 | 			// that doesn't yet "know" that lastVer was committed.
226 | 			// Just return a "safe" no-op proposal for this node,
227 | 			// although we know it has no chance of being committed.
228 | 			//		case int64(C.Step) < lastVer:
229 | 			//			println(i, "outdated at", C.Step, "<", lastVer,
230 | 			//				"data", C.Data)
231 | 			//			prop, pri = C.Data, 0
232 | 
233 | 			//default:
234 | 			//	panic("lastVer appears to be from the future")
235 | 		}
236 | 		return
237 | 	}
238 | 
239 | 	// A simple helper function to test if we've completed our work.
240 | 	done := func() bool {
241 | 		mut.Lock()
242 | 		defer mut.Unlock()
243 | 		return actual != "" || err != nil
244 | 	}
245 | 
246 | 	// Continuously send references to our proposal function
247 | 	// to the group's channel so it will get called until it finishes
248 | 	// or until one of the contexts (ours or the group's) is cancelled.
249 | 	// Since the channel is unbuffered, each send will block
250 | 	// until some consensus worker thread is ready to receive it.
251 | 	for !done() && ctx.Err() == nil && g.ctx.Err() == nil {
252 | 		//println("CAS sending", old, "->", new)
253 | 		g.ch <- pr
254 | 	}
255 | 	//	println("CAS done", lastVer, "reqVal", reqVal,
256 | 	//		"actualVer", actualVer, "actualVal", actualVal, "err", err)
257 | 	return version, actual, err
258 | }
259 | 


--------------------------------------------------------------------------------
/go/model/qscod/qscas/group_test.go:
--------------------------------------------------------------------------------
 1 | package qscas
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/dedis/tlc/go/lib/cas"
 9 | 	"github.com/dedis/tlc/go/lib/cas/test"
10 | )
11 | 
12 | //  Run a consensus test case with the specified parameters.
13 | func testRun(t *testing.T, nfail, nnode, nclients, nthreads, naccesses int) {
14 | 
15 | 	desc := fmt.Sprintf("F=%v,N=%v,Clients=%v,Threads=%v,Accesses=%v",
16 | 		nfail, nnode, nclients, nthreads, naccesses)
17 | 	t.Run(desc, func(t *testing.T) {
18 | 
19 | 		// Create a cancelable context for the test run
20 | 		ctx, cancel := context.WithCancel(context.Background())
21 | 
22 | 		// Create an in-memory CAS register representing each node
23 | 		members := make([]cas.Store, nnode)
24 | 		memhist := make([]test.History, nnode)
25 | 		for i := range members {
26 | 			members[i] = &cas.Register{}
27 | 		}
28 | 
29 | 		// Create a consensus group Store for each simulated client
30 | 		clients := make([]cas.Store, nclients)
31 | 		for i := range clients {
32 | 
33 | 			// Interpose checking wrappers on the CAS registers
34 | 			checkers := make([]cas.Store, nnode)
35 | 			for i := range checkers {
36 | 				checkers[i] = test.Checked(t, &memhist[i],
37 | 					members[i])
38 | 			}
39 | 
40 | 			clients[i] = (&Group{}).Start(ctx, checkers, nfail)
41 | 		}
42 | 
43 | 		// Run a standard torture-test across all the clients
44 | 		test.Stores(t, nthreads, naccesses, clients...)
45 | 
46 | 		// Shut down all the clients by canceling the context
47 | 		cancel()
48 | 	})
49 | }
50 | 
51 | // Test the Client with a trivial in-memory key/value Store implementation.
52 | func TestClient(t *testing.T) {
53 | 	testRun(t, 1, 3, 1, 1, 1000) // Standard f=1 case
54 | 	testRun(t, 1, 3, 2, 1, 1000)
55 | 	testRun(t, 1, 3, 10, 1, 1000)
56 | 	testRun(t, 1, 3, 20, 1, 100)
57 | 	testRun(t, 1, 3, 50, 1, 10)
58 | 	testRun(t, 1, 3, 100, 1, 10)
59 | 
60 | 	testRun(t, 2, 6, 10, 10, 000)  // Standard f=2 case
61 | 	testRun(t, 3, 9, 10, 10, 100)  // Standard f=3 case
62 | 	testRun(t, 4, 12, 10, 10, 100) // Standard f=4 case
63 | 	testRun(t, 5, 15, 10, 10, 100) // Standard f=10 case
64 | 
65 | 	// Test with low-entropy tickets: hurts commit rate, but still safe!
66 | 	testRun(t, 1, 3, 10, 10, 1000) // Extreme low-entropy: rarely commits
67 | 	testRun(t, 1, 3, 10, 10, 1000) // A bit better bit still bad...
68 | }
69 | 


--------------------------------------------------------------------------------
/go/model/qscod/qscas/rand.go:
--------------------------------------------------------------------------------
 1 | package qscas
 2 | 
 3 | import (
 4 | 	"crypto/rand"
 5 | 	"encoding/binary"
 6 | )
 7 | 
 8 | // Generate a 63-bit positive integer from strong cryptographic randomness.
 9 | func randValue() int64 {
10 | 	var b [8]byte
11 | 	_, err := rand.Read(b[:])
12 | 	if err != nil {
13 | 		panic("error reading cryptographic randomness: " + err.Error())
14 | 	}
15 | 	return int64(binary.BigEndian.Uint64(b[:]) &^ (1 << 63))
16 | }
17 | 


--------------------------------------------------------------------------------
/go/model/qscod/qscas/store.go:
--------------------------------------------------------------------------------
 1 | package qscas
 2 | 
 3 | import (
 4 | 	"github.com/dedis/tlc/go/lib/backoff"
 5 | 	"github.com/dedis/tlc/go/lib/cas"
 6 | 	"github.com/dedis/tlc/go/model/qscod/core"
 7 | 	"github.com/dedis/tlc/go/model/qscod/encoding"
 8 | )
 9 | 
10 | // coreStore implements QSCOD core's native Store interface
11 | // based on a cas.Store interface.
12 | type coreStore struct {
13 | 	cas.Store            // underlying CAS state store
14 | 	g         *Group     // group this store is associated with
15 | 	lvals     string     // last value we observed in the underlying Store
16 | 	lval      core.Value // deserialized last value
17 | }
18 | 
19 | func (cs *coreStore) WriteRead(v core.Value) (rv core.Value) {
20 | 
21 | 	try := func() (err error) {
22 | 		rv, err = cs.tryWriteRead(v)
23 | 		return err
24 | 	}
25 | 
26 | 	// Try to perform the atomic operation until it succeeds
27 | 	// or until the group's context gets cancelled.
28 | 	err := backoff.Retry(cs.g.ctx, try)
29 | 	if err != nil && cs.g.ctx.Err() != nil {
30 | 
31 | 		// The group's context got cancelled,
32 | 		// so just silently return nil Values
33 | 		// until the consensus worker threads catch up and terminate.
34 | 		//println("WriteRead cancelled")
35 | 		return core.Value{}
36 | 	}
37 | 	if err != nil {
38 | 		panic("backoff.Retry inexplicably gave up: " + err.Error())
39 | 	}
40 | 	return rv
41 | }
42 | 
43 | func (cs *coreStore) tryWriteRead(val core.Value) (core.Value, error) {
44 | 
45 | 	// Serialize the proposed value
46 | 	valb, err := encoding.EncodeValue(val)
47 | 	if err != nil {
48 | 		println("encoding error", err.Error())
49 | 		return core.Value{}, err
50 | 	}
51 | 	vals := string(valb)
52 | 
53 | 	// Try to set the underlying CAS register to the proposed value
54 | 	// only as long as doing so would strictly increase its TLC step
55 | 	for val.S > cs.lval.S {
56 | 
57 | 		// Write the serialized value to the underlying CAS interface
58 | 		_, avals, err := cs.CompareAndSet(cs.g.ctx, cs.lvals, vals)
59 | 		if err != nil {
60 | 			println("CompareAndSet error", err.Error())
61 | 			return core.Value{}, err
62 | 		}
63 | 
64 | 		// Deserialize the actual value we read back
65 | 		aval, err := encoding.DecodeValue([]byte(avals))
66 | 		if err != nil {
67 | 			println("decoding error", err.Error())
68 | 			return core.Value{}, err
69 | 		}
70 | 
71 | 		//		println("tryWriteRead step",
72 | 		//			cs.lval.S, "w", val.S, "->", aval.S,
73 | 		//			"casver", cs.lver, "->", aver)
74 | 
75 | 		if aval.S <= cs.lval.S {
76 | 			panic("CAS failed to advance TLC step!")
77 | 		}
78 | 
79 | 		// Update our record of the underlying CAS version and value
80 | 		//println("update from step", cs.lval.S, "to step", aval.S)
81 | 		cs.lvals, cs.lval = avals, aval
82 | 	}
83 | 
84 | 	//println("cs returning newer step", cs.lval.S)
85 | 	return cs.lval, nil
86 | }
87 | 


--------------------------------------------------------------------------------
/go/model/quepaxa/consensus.go:
--------------------------------------------------------------------------------
  1 | package quepaxa
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"sync"
  6 | )
  7 | 
  8 | type Node int32
  9 | type Choice int64
 10 | type Step int32
 11 | 
 12 | // A logical time consists of
 13 | // a Choice (consensus decision or slot number) and
 14 | // a Step (consensus attempt number within a turn).
 15 | type Time struct {
 16 | 	c Choice
 17 | 	s Step
 18 | }
 19 | 
 20 | // Returns true if logical time T1 is strictly less than T2.
 21 | func (t1 Time) LT(t2 Time) bool {
 22 | 	return t1.c < t2.c || (t1.c == t2.c && t1.s < t2.s)
 23 | }
 24 | 
 25 | type Replica[P Proposal[P]] interface {
 26 | 	Record(ctx context.Context, t Time, p P) (
 27 | 		rt Time, rf P, rl P, err error)
 28 | }
 29 | 
 30 | type Proposer[P Proposal[P]] struct {
 31 | 
 32 | 	// configuration state
 33 | 	w  []worker[P] // one worker per replica
 34 | 	th int         // consensus threshold (n-f)
 35 | 
 36 | 	// synchronization state
 37 | 	m sync.Mutex
 38 | 	c sync.Cond
 39 | 
 40 | 	t Time // proposer's current logical time
 41 | 
 42 | 	// per-choice state
 43 | 	ld Node // which replica is the leader, -1 if none
 44 | 	dp P    // decision proposal from last choice
 45 | 	nf int  // number of fast-path responses this choice
 46 | 
 47 | 	// per-step state
 48 | 	pp P   // preferred proposal for this step
 49 | 	bp P   // best of appropriate replies this step
 50 | 	nr int // number of responses seen so far this step
 51 | 
 52 | 	// graceful termination state
 53 | 	stop   bool               // signal when workers should shut down
 54 | 	ctx    context.Context    // cancelable context for all of our workers
 55 | 	cancel context.CancelFunc // cancellation function
 56 | }
 57 | 
 58 | func (p *Proposer[P]) Init(replicas []Replica[P]) {
 59 | 
 60 | 	if p.w != nil {
 61 | 		panic("Proposer.Init must not be invoked twice")
 62 | 	}
 63 | 
 64 | 	// set up a cancelable context for when we want to stop
 65 | 	p.ctx, p.cancel = context.WithCancel(context.Background())
 66 | 
 67 | 	// set the threshold appropriately for group size
 68 | 	p.th = len(replicas)/2 + 1
 69 | 
 70 | 	p.w = make([]worker[P], len(replicas))
 71 | 	for i := range replicas {
 72 | 		p.w[i].p = p
 73 | 		p.w[i].r = replicas[i]
 74 | 		p.w[i].i = Node(i)
 75 | 
 76 | 		go p.w[i].work()
 77 | 	}
 78 | }
 79 | 
 80 | func (p *Proposer[P]) Agree(preferred P) (choice Choice, decision P) {
 81 | 
 82 | 	// keep our mutex locked except while waiting on a condition
 83 | 	p.m.Lock()
 84 | 	defer p.m.Unlock()
 85 | 
 86 | 	c := p.t.c
 87 | 	if p.t.s < 4 {
 88 | 		p.advance(Time{p.t.c, 4}, preferred)
 89 | 	}
 90 | 	for !p.stop && p.t.c == c {
 91 | 		// signal any non-busy workers that there's new work to do
 92 | 		p.c.Broadcast()
 93 | 	}
 94 | 
 95 | 	// return choice at which last decision was made, and that decision
 96 | 	return p.t.c - 1, p.dp
 97 | }
 98 | 
 99 | // Advance to time t with preferred proposap pp.
100 | // Proposer's mutex must be locked.
101 | func (p *Proposer[P]) advance(t Time, pp P) {
102 | 	p.t = t         // new time step
103 | 	p.pp = pp       // preferred proposal entering new step
104 | 	p.bp = pp.Nil() // initial best proposal from new step
105 | 	p.nr = 0        // count responses toward threshold
106 | 
107 | 	if t.s == 4 { // only when advancing to fast-path step...
108 | 		p.nf = 0 // initialize fast-path response count
109 | 	}
110 | }
111 | 
112 | // Each worker thread calls workDone when it gets a response from a recorder.
113 | //
114 | // This function gets called at most once per recorder per time step,
115 | // so it can count responses without worrying about duplicates.
116 | func (p *Proposer[P]) workDone(rt Time, rf, rl P) {
117 | 
118 | 	// When we receive fast-path responses from phase 4 of current choice,
119 | 	// count them towards the fast-path threshold even if they come late.
120 | 	if rt.c == p.t.c && rt.s == 4 {
121 | 		p.nf++
122 | 		if p.nf == p.th {
123 | 			p.decided(rf) // fast-path decision
124 | 		}
125 | 	}
126 | 
127 | 	// where is the proposer with respect to the response in logical time?
128 | 	if rt.LT(p.t) { // is the response behind the proposer?
129 | 		return // the work done is obsolete - just discard
130 | 	}
131 | 	if p.t.LT(rt) { // is the response ahead of the proposer?
132 | 		p.advance(rt, rf) // advance to newer time in response
133 | 		return
134 | 	}
135 | 	// the response is from proposer's current time step exactly
136 | 
137 | 	// what we do with the response depends on which phase we're in
138 | 	if rt.s&3 == 0 {
139 | 		p.bp = p.bp.Best(rf) // Phase 0: best of first proposals
140 | 	} else if rt.s&2 != 0 {
141 | 		p.bp = p.bp.Best(rl) // Phase 2-3: best of last aggregate
142 | 	}
143 | 
144 | 	// have we reached the response threshold for this step?
145 | 	p.nr++
146 | 	if p.nr < p.th {
147 | 		return // not yet, wait for more responses
148 | 	}
149 | 	// threshold reached, so we can complete this time step
150 | 
151 | 	// in phase 2, check if we've reached a consensus decision
152 | 	if rt.s&3 == 2 && p.pp.EqD(p.bp) {
153 | 		p.decided(p.pp)
154 | 		return
155 | 	}
156 | 	// no decision yet but still end of current time step
157 | 
158 | 	// in phases 0 and 3, new preferred proposal is best from replies
159 | 	pp := p.pp
160 | 	if rt.s&3 == 0 || rt.s&3 == 3 {
161 | 		pp = p.bp
162 | 	}
163 | 
164 | 	// advance to next logical time step
165 | 	p.advance(Time{p.t.c, p.t.s + 1}, pp)
166 | }
167 | 
168 | func (p *Proposer[P]) decided(dp P) {
169 | 
170 | 	// record the decision in local state
171 | 	p.t.c++   // last choice is decided, now on to next
172 | 	p.t.s = 0 // idle but ready for a new agreement
173 | 	p.dp = dp // record decision proposal from last choice
174 | 	p.ld = -1 // default to no leader, but caller can change
175 | 
176 | 	// signal the main proposer thread to return the decision,
177 | 	// while the workers inform the recorders asynchronously.
178 | 	p.c.Broadcast()
179 | }
180 | 
181 | // Immediately after observing a decision being made,
182 | // the application can select a new leader based on that decision.
183 | // If SetLeader is not called, the next choice is leaderless.
184 | // The choice of leader (or lack thereof) must be deterministic
185 | // based on prior decisions and set the same on all nodes.
186 | func (p *Proposer[P]) SetLeader(leader Node) {
187 | 	p.ld = leader
188 | }
189 | 
190 | // Stop permanently shuts down this proposer and its worker threads.
191 | func (p *Proposer[P]) Stop() {
192 | 
193 | 	p.stop = true   // signal that workers should stop
194 | 	p.c.Broadcast() // wake them up to see the signal
195 | 	p.cancel()      // also cancel all Record calls in progress
196 | }
197 | 
198 | // We create one worker per replica.
199 | type worker[P Proposal[P]] struct {
200 | 	p *Proposer[P] // back pointer to Proposer
201 | 	r Replica[P]   // Replica interface of this replica
202 | 	i Node         // replica number of this replica
203 | }
204 | 
205 | func (w *worker[P]) work() {
206 | 	p := w.p // keep handy pointer back to proposer
207 | 	p.m.Lock()
208 | 	for !p.stop {
209 | 		// we're done with prior steps so wait until proposer advances
210 | 		t := p.t // save proposer's current time
211 | 		for p.t == t {
212 | 			p.c.Wait()
213 | 		}
214 | 
215 | 		pp := p.pp      // save proposer's preferred proposal
216 | 		if t.s&3 == 0 { // in phase zero we must re-rank proposals
217 | 			pp = pp.Rank(w.i, t.s == 4 && w.i == p.ld)
218 | 		}
219 | 
220 | 		// asychronously record the proposal with mutex unlocked
221 | 		p.m.Unlock()
222 | 		rt, rf, rl, err := w.r.Record(p.ctx, t, pp)
223 | 		if err != nil { // canceled
224 | 			return
225 | 			// XXX backoff retry?
226 | 		}
227 | 		p.m.Lock()
228 | 
229 | 		// inform the Proposer that this recorder's work is done
230 | 		p.workDone(rt, rf, rl)
231 | 	}
232 | 	p.m.Unlock()
233 | }
234 | 


--------------------------------------------------------------------------------
/go/model/quepaxa/isr.go:
--------------------------------------------------------------------------------
 1 | package quepaxa
 2 | 
 3 | // Interval Summary Register (ISR)
 4 | type ISR[P Proposal[P]] struct {
 5 | 	t Time // current logical time step
 6 | 	f P    // first value seen in this step
 7 | 	a P    // aggregated values so far in this step
 8 | 	l P    // aggregated values seen in last step
 9 | }
10 | 
11 | func (r *ISR[P]) Record(t Time, p P) (Time, P, P) {
12 | 
13 | 	if r.t.LT(t) {
14 | 		// Our recorder state needs to catch up to time t
15 | 		if t.s == r.t.s+1 {
16 | 			r.l = r.a
17 | 		} else {
18 | 			r.l = r.l.Nil()
19 | 		}
20 | 		r.t = t
21 | 		r.f = p
22 | 		r.a = p
23 | 
24 | 	} else if !t.LT(r.t) {
25 | 
26 | 		// At exactly the right time step - just aggregate proposals
27 | 		r.a = r.a.Best(p)
28 | 
29 | 	} else {
30 | 		// proposal p is obsolete - just discard it
31 | 	}
32 | 
33 | 	// In any case, return the latest recorder state
34 | 	return r.t, r.f, r.l
35 | }
36 | 


--------------------------------------------------------------------------------
/go/model/quepaxa/proposal.go:
--------------------------------------------------------------------------------
 1 | package quepaxa
 2 | 
 3 | import (
 4 | 	"crypto/rand"
 5 | 	"encoding/binary"
 6 | 	"math"
 7 | )
 8 | 
 9 | // The Proposal interface defines constraints for a concrete proposal type P.
10 | //
11 | // The Rank method must return the same proposal with rank set appropriately:
12 | // - to the maximum rank High if leader is set (this replica is the leader)
13 | // - to a freshly-chosen random rank between 1 and High-1 otherwise
14 | //
15 | // In addition, if proposal ranks are low-entropy so there is a chance of ties,
16 | // and P is using replica numbers for tiebreaking,
17 | // then the Rank function also sets the replica number in the proposal.
18 | type Proposal[P any] interface {
19 | 	Nil() P                           // the nil proposal
20 | 	Best(other P) P                   // best of this and other
21 | 	Rank(replica Node, leader bool) P // randomly rank proposal
22 | 	EqD(other P) bool                 // equality for deciding
23 | }
24 | 
25 | // BasicProposal provides a basic proposal design
26 | // that represents a reasonable "sweet spot" for most purposes.
27 | //
28 | // Proposals are randomly ranked using 31 bits of private randomness,
29 | // drawn from the cryptographic random source for strong unpredictability,
30 | // which might conceivably be needed to protect against a strong DoS attacker.
31 | // Since 31-bit random ranks do not have high entropy,
32 | // BasicProposal uses recorder numbers for breaking ties.
33 | //
34 | // BasicProposal contains a field D of parameterized type Data,
35 | // containing any application-defined data associated with the proposal.
36 | // This type may contain pointers or slices (e.g., referring to bulk data)
37 | // provided the referenced data objects do not change during consensus.
38 | // The BasicProposal does nothing with this data field other than copy it.
39 | type BasicProposal[Data any] struct {
40 | 	R uint32 // Randomzed rank or priority
41 | 	N Node   // Replica for which proposal was created
42 | 	D Data   // Application-defined data
43 | }
44 | 
45 | const basicProposalHighRank = math.MaxUint32
46 | 
47 | func (_ BasicProposal[D]) Nil() BasicProposal[D] {
48 | 	return BasicProposal[D]{}
49 | }
50 | 
51 | func (p BasicProposal[D]) Best(o BasicProposal[D]) BasicProposal[D] {
52 | 	if o.R > p.R || (o.R == p.R && o.N > p.N) {
53 | 		return o
54 | 	}
55 | 	return p
56 | }
57 | 
58 | func (p BasicProposal[D]) Rank(node Node, leader bool) BasicProposal[D] {
59 | 
60 | 	// record the replica number that this proposal was produced for
61 | 	p.N = node
62 | 
63 | 	if leader {
64 | 		// the leader always uses the reserved maximum rank
65 | 		p.R = basicProposalHighRank
66 | 
67 | 	} else {
68 | 		// read 32 bits of randomness
69 | 		var b [4]byte
70 | 		_, err := rand.Read(b[:])
71 | 		if err != nil {
72 | 			panic("unable to read cryptographically random bits: " +
73 | 				err.Error())
74 | 		}
75 | 
76 | 		// produce a 31-bit rank, avoiding the zero rank
77 | 		p.R = (binary.BigEndian.Uint32(b[:]) & 0x7fffffff) + 1
78 | 	}
79 | 	return p
80 | }
81 | 
82 | func (p BasicProposal[D]) EqD(o BasicProposal[D]) bool {
83 | 	return p.R == o.R && p.N == o.N
84 | }
85 | 
86 | var bp BasicProposal[struct{}]
87 | var prop Proposer[BasicProposal[struct{}]]
88 | 


--------------------------------------------------------------------------------
/go/model/tlc.go:
--------------------------------------------------------------------------------
 1 | package model
 2 | 
 3 | // Create a copy of our message template for transmission.
 4 | // Sends QSC state only for the rounds still in our window.
 5 | func (n *Node) newMsg() *Message {
 6 | 	msg := n.m                                         // copy template
 7 | 	msg.QSC = append([]Round{}, n.m.QSC[n.m.Step:]...) // active QSC state
 8 | 	return &msg
 9 | }
10 | 
11 | // Broadcast a copy of our current message template to all nodes
12 | func (n *Node) broadcastTLC() {
13 | 	msg := n.newMsg()
14 | 	for i := 0; i < n.nnode; i++ {
15 | 		n.send(i, msg)
16 | 	}
17 | }
18 | 
19 | // Advance to the next TLC time step.
20 | //
21 | // The client must invoke this function once after calling NewNode
22 | // to launch the protocol and broadcast the message for TLC time-step zero.
23 | // Thereafter, TLC advances time automatically based on network communication.
24 | //
25 | func (n *Node) Advance() {
26 | 
27 | 	// Initialize message template with a proposal for the new time step
28 | 	n.m.Step++     // Advance to next time step
29 | 	n.m.Type = Raw // Broadcast raw proposal first
30 | 	n.acks = 0     // No acknowledgments received yet in this step
31 | 	n.wits = 0     // No threshold witnessed messages received yet
32 | 
33 | 	// Notify the upper (QSC) layer of the advancement of time,
34 | 	// and let it fill in its part of the new message to broadcast.
35 | 	n.advanceQSC()
36 | 
37 | 	n.broadcastTLC() // broadcast our raw proposal
38 | }
39 | 
40 | // Receive is called by the client or network layer on receipt of a Message
41 | // from a peer.
42 | // Any unmarshaling that may be required must have already been done.
43 | //
44 | // This function assumes that peer-to-peer connections are ordered and reliable,
45 | // as they are when sent over Go channels or TCP/TLS connections.
46 | // It also assumes that connection or peer failures are permanent:
47 | // this implementation of QSC does not support restarting/resuming connections.
48 | //
49 | func (n *Node) Receive(msg *Message) {
50 | 
51 | 	// Process only messages from the current or next time step.
52 | 	// We could accept and merge in information from older messages,
53 | 	// but it's perfectly safe and simpler just to ignore old messages.
54 | 	if msg.Step >= n.m.Step {
55 | 
56 | 		// If msg is ahead of us, then virally catch up to it
57 | 		// Since we receive messages from a given peer in order,
58 | 		// a message we receive can be at most one step ahead of ours.
59 | 		if msg.Step > n.m.Step {
60 | 			n.Advance()
61 | 		}
62 | 
63 | 		// Merge in received QSC state for rounds still in our pipeline
64 | 		mergeQSC(n.m.QSC[msg.Step:], msg.QSC)
65 | 
66 | 		// Now process this message according to type.
67 | 		switch msg.Type {
68 | 		case Raw: // Acknowledge unwitnessed proposals.
69 | 			ack := n.newMsg()
70 | 			ack.Type = Ack
71 | 			n.send(msg.From, ack)
72 | 
73 | 		case Ack: // Collect a threshold of acknowledgments.
74 | 			n.acks++
75 | 			if n.m.Type == Raw && n.acks >= n.thres {
76 | 				n.m.Type = Wit // Prop now threshold witnessed
77 | 				n.witnessedQSC()
78 | 				n.broadcastTLC()
79 | 			}
80 | 
81 | 		case Wit: // Collect a threshold of threshold witnessed messages
82 | 			n.wits++ // witnessed messages in this step
83 | 			if n.wits >= n.thres {
84 | 				n.Advance() // tick the clock
85 | 			}
86 | 		}
87 | 	}
88 | }
89 | 


--------------------------------------------------------------------------------
/spin/README.md:
--------------------------------------------------------------------------------
1 | This directory contains a minimal model of
2 | Que Sera Consensus (QSC) and Threshold Logical Clocks (TLC)
3 | for the [Spin model checker](https://spinroot.com/spin/whatispin.html).
4 | To test it, simply use the provided `run.sh` script after installing Spin.
5 | 
6 | For background information on QSC and TLC,
7 | and other model implementations in several languages, please see the
8 | [top level of this repository](https://github.com/dedis/tlc/).
9 | 


--------------------------------------------------------------------------------
/spin/qp.pml:
--------------------------------------------------------------------------------
  1 | // Simple model of QuePaxa consensus.
  2 | // Recorder logic runs atomically in-line within the proposer code.
  3 | 
  4 | #define N		3	// total number of recorder (state) nodes
  5 | #define F		1	// number of failures tolerated
  6 | #define T		(N-F)	// consensus threshold required
  7 | 
  8 | #define M		2	// number of proposers (clients)
  9 | 
 10 | #define STEPHI		11	// highest step number to simulate
 11 | #define RAND		2	// random part of fitness space is 1..RAND
 12 | #define HI		(RAND+1) // top priority for proposals by leader
 13 | #define VALS		2	// space of preferred values is 1..VALS
 14 | 
 15 | // A proposal is an integer divided into two bit-fields: fitness and value.
 16 | #define	VALBITS		4
 17 | #define FITBITS		4
 18 | #define VALSHIFT	(0)
 19 | #define FITSHIFT	(VALBITS)
 20 | #define PROP(f,v)	(((f)<<FITSHIFT) | ((v)<<VALSHIFT))
 21 | #define VAL(p)		(((p) >> VALSHIFT) & ((1 << VALBITS)-1))
 22 | #define FIT(p)		(((p) >> FITSHIFT) & ((1 << FITBITS)-1))
 23 | 
 24 | #define MAX(a, b)	((a) > (b) -> (a) : (b))
 25 | 
 26 | // Recorder state: implements an interval summary register (ISR),
 27 | // which returns the first value submitted in this time step
 28 | // and the maximum of all values submitted in the prior time step
 29 | typedef Rec {
 30 | 	byte s;			// step number
 31 | 	byte f;			// first value submitted in this step
 32 | 	byte a;			// maximum value seen so far in this step
 33 | 	byte m;			// maximum value seen in prior step (s-1)
 34 | }
 35 | 
 36 | Rec rec[1+N];			// state of recorder nodes 1..N
 37 | byte decided;			// proposed value that we've decided on
 38 | byte leader;			// which proposer is the well-known leader
 39 | 
 40 | #define DECIDE(j, s, p)	atomic {					\
 41 | 	printf("%d step %d decided <%d,%d>", j, s, FIT(p), VAL(p));	\
 42 | 	assert(decided == 0 || decided == VAL(p));			\
 43 | 	decided = VAL(p);						\
 44 | }
 45 | 
 46 | 
 47 | // We model one process per proposer.
 48 | proctype Proposer(byte j) {			// We're proposer j in 1..M
 49 | 	byte s, t;
 50 | 	byte p, g;
 51 | 	byte i, recs, mask;	// recorders we've interacted with
 52 | 	bit done;		// for detecting early-decision opportunities
 53 | 
 54 | 	// Choose the arbitrary initial "preferred value" of this proposer
 55 | 	s = 4;
 56 | 	select (t : 1 .. VALS);	// select a "random" value into temporary
 57 | 	p = PROP(HI, t);
 58 | 	printf("%d proposing %d\n", j, t);
 59 | 
 60 | 	do			// iterate over time-steps
 61 | 	:: s <= STEPHI ->
 62 | 		printf("%d step %d\n", j, s);
 63 | 
 64 | 		// Send <s,p> and get reply from threshold of recorders
 65 | 		recs = 0;	// number of recorders we've heard from
 66 | 		mask = 0;	// bit mask of those recorders
 67 | 		g = 0;		// gather best response proposer saw so far
 68 | 		done = true;
 69 | 		select (i : 1 .. N);	// first recorder to interact with
 70 | 		do		// interact with the recorders in any order
 71 | 		:: recs < T && (mask & (1 << i)) == 0 ->
 72 | 
 73 | 			atomic {
 74 | 				// Randomize fitnesses if we're not the leader
 75 | 				if
 76 | 				:: (s & 3) == 0 && j != leader ->
 77 | 					select(t : 1 .. RAND);
 78 | 					p = PROP(t, VAL(p));
 79 | 				:: else -> skip
 80 | 				fi
 81 | 				assert(FIT(p) > 0 && VAL(p) > 0);
 82 | 
 83 | 				// enter the recorder/ISR role (via "RPC").
 84 | 				printf("%d step %d ISR <%d,%d> to %d\n",
 85 | 					j, s, FIT(p), VAL(p), i);
 86 | 
 87 | 				// first catch up the recorder if appropriate
 88 | 				if
 89 | 				:: s > rec[i].s ->
 90 | 					rec[i].m = ((s == (rec[i].s+1)) ->
 91 | 							rec[i].a : 0);
 92 | 					rec[i].s = s;
 93 | 					rec[i].f = p;
 94 | 					rec[i].a = p;
 95 | 
 96 | 				:: s == rec[i].s ->
 97 | 					rec[i].a = MAX(rec[i].a, p);
 98 | 
 99 | 				:: else -> skip
100 | 				fi
101 | 
102 | 				// we're back to the proposer's logic now,
103 | 				// incorporating the recorder's "response".
104 | 				assert(s <= rec[i].s);
105 | 				if
106 | 				:: s == rec[i].s && (s & 3) == 0 ->
107 | 					g = MAX(g, rec[i].f);	// gather props
108 | 					done = done && (FIT(rec[i].f) == HI);
109 | 
110 | 				:: s == rec[i].s && (s & 3) == 1 -> skip
111 | 
112 | 				:: s == rec[i].s && (s & 3) >= 2 ->
113 | 					printf("%d step %d got <%d,%d> from %d\n", j, s, FIT(rec[i].m), VAL(rec[i].m), i);
114 | 					g = MAX(g, rec[i].m);	// gather E/C
115 | 
116 | 				:: s < rec[i].s ->	// catch up proposer
117 | 					s = rec[i].s;
118 | 					p = rec[i].f;
119 | 					break;
120 | 				fi
121 | 				assert(s == rec[i].s);
122 | 
123 | 				recs++;	 // this recorder has now "replied"
124 | 				mask = mask | (1 << i);
125 | 
126 | 				select (i : 1 .. N);	// choose next recorder
127 | 
128 | 			} // atomic
129 | 
130 | 		:: recs < T && (mask & (1 << i)) != 0 ->
131 | 			// we've already gotten a reply from this recorder,
132 | 			// so just pick a different one.
133 | 			select (i : 1 .. N);
134 | 
135 | 		:: recs == T ->	// we've heard from a threshold of recorders
136 | 
137 | 			if
138 | 			:: (s & 3) == 0 ->	// propose phase
139 | 				assert(FIT(g) > 0 && VAL(g) > 0);
140 | 				p = g;		// pick best of some E set
141 | 
142 | 				// Decide early if all proposals were HI fit
143 | 				if
144 | 				:: done ->
145 | 					DECIDE(j, s, p);
146 | 				:: else -> skip
147 | 				fi
148 | 
149 | 			:: (s & 3) == 1 -> skip	// spreadE phase
150 | 
151 | 			:: (s & 3) == 2 ->	// gatherEspreadC phase
152 | 				// p is now the best of a U set;
153 | 				// g is the best of all gathered E sets
154 | 				assert(FIT(g) > 0 && VAL(g) > 0);
155 | 				if
156 | 				:: p == g ->
157 | 					DECIDE(j, s, p);
158 | 				:: else -> skip
159 | 				fi
160 | 
161 | 			:: (s & 3) == 3 ->	// gatherC phase
162 | 				// g is the best of all gathered C sets.
163 | 				// this is our proposal for the next round.
164 | 				assert(FIT(g) > 0 && VAL(g) > 0);
165 | 				p = g;
166 | 			fi
167 | 			s = s + 1;
168 | 			break;
169 | 		od
170 | 
171 | 	:: s > STEPHI ->	// we've simulated enough time-steps
172 | 			break;
173 | 	od
174 | }
175 | 
176 | init {
177 | 	assert(HI < 1 << FITBITS);
178 | 	assert(VALS < 1 << VALBITS);
179 | 
180 | 	decided = 0;				// we haven't decided yet
181 | 
182 | 	// first choose the "well-known" leader, or 0 for no leader
183 | 	//leader = 0;				// no leader
184 | 	leader = 1;				// fixed leader
185 | 	//select (leader : 0 .. M);		// any (or no) leader
186 | 
187 | 	atomic {
188 | 		int i;
189 | 		for (i : 1 .. M) {		// Launch M proposers
190 | 			run Proposer(i)
191 | 		}
192 | 	}
193 | }
194 | 
195 | 


--------------------------------------------------------------------------------
/spin/qpm.pml:
--------------------------------------------------------------------------------
  1 | // Simple model of QuePaxa consensus.
  2 | // Uses explicit message-based communication with recorders.
  3 | 
  4 | #define N		3	// total number of recorder (state) nodes
  5 | #define F		1	// number of failures tolerated
  6 | #define T		(N-F)	// consensus threshold required
  7 | 
  8 | #define M		2	// number of proposers (clients)
  9 | 
 10 | #define STEPHI		11	// highest step number to simulate
 11 | #define RAND		2	// random part of fitness space is 1..RAND
 12 | #define HI		(RAND+1) // top priority for proposals by leader
 13 | #define VALS		2	// space of preferred values is 1..VALS
 14 | 
 15 | // A proposal is an integer divided into two bit-fields: fitness and value.
 16 | #define	VALBITS		4
 17 | #define FITBITS		4
 18 | #define VALSHIFT	(0)
 19 | #define FITSHIFT	(VALBITS)
 20 | #define PROP(f,v)	(((f)<<FITSHIFT) | ((v)<<VALSHIFT))
 21 | #define VAL(p)		(((p) >> VALSHIFT) & ((1 << VALBITS)-1))
 22 | #define FIT(p)		(((p) >> FITSHIFT) & ((1 << FITBITS)-1))
 23 | 
 24 | #define MAX(a, b)	((a) > (b) -> (a) : (b))
 25 | 
 26 | byte leader;			// which proposer is the well-known leader
 27 | byte decided;			// proposed value that we've decided on
 28 | byte propsdone;			// number of proposers that have finished
 29 | 
 30 | // Channels for recorder/proposer communication.
 31 | chan creq[1+N] = [0] of { byte, byte, byte }		// <j, s, v>
 32 | chan crep[1+M] = [0] of { byte, byte, byte, byte, byte}	// <i, s, s', f', m'>
 33 | 
 34 | #define DECIDE(j, s, p)	atomic {					\
 35 | 	printf("%d step %d decided <%d,%d>", j, s, FIT(p), VAL(p));	\
 36 | 	assert(decided == 0 || decided == VAL(p));			\
 37 | 	decided = VAL(p);						\
 38 | }
 39 | 
 40 | // Each proposer is a process.
 41 | proctype Proposer(byte j) {			// We're proposer j in 1..M
 42 | 	byte s;
 43 | 	byte p, g;
 44 | 	byte ri, rs, rsn, rfn, rmn;	// responses we get from recorders
 45 | 	byte i, sent, recs;	// request send and reply receiving state
 46 | 	bit done;		// for detecting early-decision opportunities
 47 | 
 48 | 	// Choose the arbitrary initial "preferred value" of this proposer
 49 | 	s = 4;
 50 | 	select (p : 1 .. VALS);	// select a "random" value into temporary
 51 | 	printf("%d proposing %d\n", j, p);
 52 | 	p = PROP(HI, p);
 53 | 
 54 | 	// Initialize per-step state for the first step of the first round.
 55 | 	printf("%d step %d\n", j, s);
 56 | 	sent = 0;		// bit mask of recorders we've sent to
 57 | 	recs = 0;		// number of recorders we've heard from
 58 | 	g = 0;			// gather best response proposer saw so far
 59 | 	done = true;
 60 | 
 61 | 	i = 0;			// first, send to a channel no one listens on
 62 | 	do
 63 | 	:: creq[i] ! j, s, p ->	// send a request for this step to recorder i
 64 | 		printf("%d step %d sent <%d,%d> to %d\n",
 65 | 			j, s, FIT(p), VAL(p), i);
 66 | 		sent = sent | (1 << i);		// successfully sent
 67 | 		i = 0;				// now we have no target again
 68 | 
 69 | 	:: s <= STEPHI && recs < T ->		// choose a recorder to send to
 70 | 
 71 | 		// randomize fitness in phase 0 if we're not the leader
 72 | 		if
 73 | 		:: (s & 3) == 0 && j != leader ->
 74 | 			byte r;
 75 | 			select(r : 1 .. RAND);
 76 | 			p = PROP(r, VAL(p));
 77 | 		:: else -> skip
 78 | 		fi
 79 | 		assert(FIT(p) > 0 && VAL(p) > 0);
 80 | 
 81 | 		// choose a recorder that we haven't already sent a request to
 82 | 		// revert to i=0 if we've already sent to selected recorder
 83 | 		select (i : 1 .. N);
 84 | 		i = ((sent & (1 << i)) == 0 -> i : 0);
 85 | 
 86 | 	:: crep[j] ? ri, rs, rsn, rfn, rmn ->	// get response from a recorder
 87 | 		printf("%d step %d recv %d %d <%d,%d>,<%d,%d> from %d\n",
 88 | 			j, s, rs, rsn, FIT(rfn), VAL(rfn),
 89 | 			FIT(rmn), VAL(rmn),  i);
 90 | 		assert(rs <= s);	// should get replies only to requests
 91 | 		if
 92 | 		:: rs < s -> skip	// discard old unneeded replies
 93 | 
 94 | 		:: rs == s && rsn > s -> // catch up to new recorder state
 95 | 			s = rsn;	// adopt recorder's round start state
 96 | 			p = rfn;
 97 | 
 98 | 			// initialize per-step state for the new time-step
 99 | 			printf("%d step %d\n", j, s);
100 | 			sent = 0;	// bit mask of recorders we've sent to
101 | 			recs = 0;	// number of recorders we've heard from
102 | 			g = 0;		// best response proposer saw so far
103 | 			done = true;
104 | 
105 | 		:: rs == s && rsn == s && (s & 3) == 0 -> // propose phase
106 | 			g = MAX(g, rfn); // gather best of all first proposals
107 | 			done = done && (FIT(rfn) == HI);
108 | 			recs++;	 	// this recorder has now replied
109 | 
110 | 		:: rs == s && rsn == s && (s & 3) == 1 -> // spread E phase
111 | 			recs++;	 	// this recorder has now replied
112 | 
113 | 		:: rs == s && rsn == s && (s & 3) >= 2 -> // gather E spread C
114 | 			g = MAX(g, rmn); // gather best of E or C sets
115 | 			recs++;	 	// this recorder has now replied
116 | 		fi
117 | 		assert(recs <= N);	// shouldn't get any extra replies
118 | 
119 | 		ri = 0;			// clear temporaries
120 | 		rs = 0;
121 | 		rsn = 0;
122 | 		rfn = 0;
123 | 		rmn = 0;
124 | 
125 | 	:: s <= STEPHI && recs >= T ->	// got a quorum of replies
126 | 
127 | 		// handle the proposer's completion of this round
128 | 		if
129 | 		:: (s & 3) == 0 ->		// propose phase
130 | 			assert(FIT(g) > 0 && VAL(g) > 0);
131 | 			p = g;		// pick best of some E set
132 | 
133 | 			// Decide early if all proposals were HI fit
134 | 			if
135 | 			:: done ->
136 | 				DECIDE(j, s, p);
137 | 			:: else -> skip
138 | 			fi
139 | 
140 | 		:: (s & 3) == 1 -> skip		// spread E phase: nothing to do
141 | 
142 | 		:: (s & 3) == 2 ->		// gather E spread C phase
143 | 			// p is now the best of some universal (U) set;
144 | 			// g is the best of all the E sets we gathered.
145 | 			assert(FIT(g) > 0 && VAL(g) > 0);
146 | 			if
147 | 			:: p == g ->
148 | 				DECIDE(j, s, p);
149 | 			:: else -> skip
150 | 			fi
151 | 
152 | 		:: (s & 3) == 3 ->		// gather C phase
153 | 			// g is the best of all common (C) sets we gathered;
154 | 			// this becomes our proposal for the next round.
155 | 			assert(FIT(g) > 0 && VAL(g) > 0);
156 | 			p = g;
157 | 		fi
158 | 
159 | 		// proceed to next logical time-step
160 | 		s = s + 1;
161 | 
162 | 		// initialize per-step state for the new time-step
163 | 		printf("%d step %d\n", j, s);
164 | 		sent = 0;	// bit mask of recorders we've sent to
165 | 		recs = 0;	// number of recorders we've heard from
166 | 		g = 0;		// best response proposer saw so far
167 | 		done = true;
168 | 
169 | 	:: s > STEPHI ->		// we've simulated enough time-steps
170 | 		break;
171 | 	od
172 | 
173 | 	// count terminated proposers so recorders can terminate too
174 | 	atomic {
175 | 		propsdone++;
176 | 	}
177 | }
178 | 
179 | // Each recorder is a process implementing an interval summary register (ISR).
180 | proctype Recorder(byte i) {			// We're proposer j in 1..M
181 | 	byte s, f, a, m;
182 | 	byte rj, rs, rv;
183 | 
184 | 	do
185 | 	:: creq[i] ? rj, rs, rv ->		// got request from proposer rj
186 | 		if
187 | 		:: rs == s ->
188 | 			a = MAX(a, rv);		// accumulate max of all values
189 | 
190 | 		:: rs > s ->			// forward to a later step
191 | 			m = (rs == s+1 -> a : 0);
192 | 			s = rs;
193 | 			f = rv;
194 | 			a = rv;
195 | 
196 | 		:: else -> skip
197 | 		fi
198 | 
199 | 		// send reply to the proposer --
200 | 		// but don't block forever if all proposers terminate.
201 | 		if
202 | 		:: crep[rj] ! i, rs, s, f, m 	// reply succeeded
203 | 		:: propsdone == M -> break	// done while trying to send
204 | 		fi
205 | 
206 | 		rj = 0;				// clear temporaries
207 | 		rs = 0;
208 | 		rv = 0;
209 | 
210 | 	:: propsdone == M ->			// all proposers terminated?
211 | 		break;				// terminate recorder thread
212 | 	od
213 | }
214 | 
215 | // The initialization process just gets things launched.
216 | init {
217 | 	assert(HI < 1 << FITBITS);
218 | 	assert(VALS < 1 << VALBITS);
219 | 
220 | 	decided = 0;				// we haven't decided yet
221 | 
222 | 	// first choose the "well-known" leader, or 0 for no leader
223 | 	//leader = 0;				// no leader
224 | 	leader = 1;				// fixed leader
225 | 	//select (leader : 0 .. M);		// any (or no) leader
226 | 
227 | 	atomic {
228 | 		int i, j;
229 | 
230 | 		for (i : 1 .. N) {		// Launch N recorders
231 | 			run Recorder(i)
232 | 		}
233 | 		for (j : 1 .. M) {		// Launch M proposers
234 | 			run Proposer(j)
235 | 		}
236 | 	}
237 | }
238 | 
239 | 


--------------------------------------------------------------------------------
/spin/qsc.pml:
--------------------------------------------------------------------------------
  1 | 
  2 | #define N		3		// total number of nodes
  3 | #define Fa	1		// max number of availability failures
  4 | #define Fc	0		// max number of correctness failures
  5 | #define T	(Fa+Fc+1)	// consensus threshold required
  6 | 
  7 | #define STEPS	3		// TLC time-steps per consensus round
  8 | #define ROUNDS	2		// number of consensus rounds to run
  9 | #define TICKETS	3		// proposal lottery ticket space
 10 | 
 11 | // TLC state for each logical time-step
 12 | typedef Step {
 13 | 	bit sent;			// true if we've sent our raw proposal
 14 | 	bit seen[1+N];		// nodes whose raw proposals we've received
 15 | 	bit ackd[1+N];		// nodes who have acknowledged our raw proposal
 16 | 	bit witd;			// true if our proposal is threshold witnessed
 17 | 	bit witn[1+N];		// nodes we've gotten threshold witnessed msgs from
 18 | }
 19 | 
 20 | // QSC summary information for a "best" proposal seen so far
 21 | typedef Best {
 22 | 	byte from;			// node number the proposal is from, 0 if tied spoiler
 23 | 	byte tkt;			// proposal's genetic fitness ticket value
 24 | }
 25 | 
 26 | // TLC and QSC state per round
 27 | typedef Round {
 28 | 	Step step[STEPS];	// TLC state for each logical time-step
 29 | 
 30 | 	byte ticket;		// QSC lottery ticket assigned to proposal at t+0
 31 | 	Best spoil;			// best potential spoiler(s) we've found so far
 32 | 	Best conf;			// best confirmed proposal we've seen so far
 33 | 	Best reconf;		// best reconfirmed proposal we've seen so far
 34 | 	byte picked;		// which proposal this node picked this round, 0 if not yet
 35 | }
 36 | 
 37 | // Per-node state
 38 | typedef Node {
 39 | 	Round rnd[ROUNDS];	// each node's per-consensus-round information
 40 | }
 41 | 
 42 | Node node[1+N];			// all state of each node 1..N
 43 | 
 44 | 
 45 | // Implement a given node i.
 46 | proctype NodeProc(byte i) {
 47 | 	byte j, r, s, tkt, step, acks, wits;
 48 | 
 49 | 	for (r : 0 .. ROUNDS-1) {
 50 | 
 51 | 		atomic {
 52 | 			// select a "random" (here just arbitrary) ticket
 53 | 			select (tkt : 1 .. TICKETS);
 54 | 			node[i].rnd[r].ticket = tkt;
 55 | 
 56 | 			// start with our own proposal as best potential spoiler
 57 | 			node[i].rnd[r].spoil.from = i;
 58 | 			node[i].rnd[r].spoil.tkt = tkt;
 59 | 		} // atomic
 60 | 
 61 | 		// Run the round to completion
 62 | 		for (s : 0 .. STEPS-1) {
 63 | 
 64 | 			// "send" the broadcast for this time-step
 65 | 			node[i].rnd[r].step[s].sent = 1;
 66 | 
 67 | 			// collect a threshold of other nodes' broadcasts
 68 | 			acks = 0;
 69 | 			wits = 0;
 70 | 			do
 71 | 			::	// Pick another node to "receive" a message from
 72 | 				select (j : 1 .. N);
 73 | 				atomic {
 74 | 
 75 | 					// Track the best potential spoiler we encounter
 76 | 					if
 77 | 					// Node j knows about a strictly better potential spoiler
 78 | 					:: node[j].rnd[r].spoil.tkt > node[i].rnd[r].spoil.tkt ->
 79 | 						node[i].rnd[r].spoil.from = node[j].rnd[r].spoil.from;
 80 | 						node[i].rnd[r].spoil.tkt = node[j].rnd[r].spoil.tkt;
 81 | 
 82 | 					// Node j knows about a spoiler that's tied with our best
 83 | 					:: node[j].rnd[r].spoil.tkt == node[i].rnd[r].spoil.tkt &&
 84 | 						node[j].rnd[r].spoil.from != node[i].rnd[r].spoil.from ->
 85 | 						node[i].rnd[r].spoil.from = 0; // tied, so mark invalid
 86 | 
 87 | 					:: else -> skip
 88 | 					fi
 89 | 
 90 | 					// Track the best confirmed proposal we encounter
 91 | 					if
 92 | 					:: node[j].rnd[r].conf.tkt > node[i].rnd[r].conf.tkt ->
 93 | 						node[i].rnd[r].conf.from = node[j].rnd[r].conf.from;
 94 | 						node[i].rnd[r].conf.tkt = node[j].rnd[r].conf.tkt;
 95 | 					:: else -> skip
 96 | 					fi
 97 | 
 98 | 					// Track the best reconfirmed proposal we encounter
 99 | 					if
100 | 					:: node[j].rnd[r].reconf.tkt > node[i].rnd[r].reconf.tkt ->
101 | 						node[i].rnd[r].reconf.from = node[j].rnd[r].reconf.from;
102 | 						node[i].rnd[r].reconf.tkt = node[j].rnd[r].reconf.tkt;
103 | 					:: else -> skip
104 | 					fi
105 | 
106 | 					// Now handle specific types of messages: Raw, Ack, or Wit.
107 | 					if
108 | 
109 | 					// We "receive" a raw unwitnessed message from node j
110 | 					:: node[j].rnd[r].step[s].sent && !node[i].rnd[r].step[s].seen[j] ->
111 | 
112 | 						node[i].rnd[r].step[s].seen[j] = 1;
113 | 
114 | 					// We "receive" an acknowledgment of our message from node j
115 | 					:: node[j].rnd[r].step[s].seen[i] && !node[i].rnd[r].step[s].ackd[j] ->
116 | 
117 | 						node[i].rnd[r].step[s].ackd[j] = 1;
118 | 						acks++;
119 | 						if
120 | 						:: acks >= T ->
121 | 							// Our proposal is now fully threshold witnessed
122 | 							node[i].rnd[r].step[s].witd = 1
123 | 
124 | 							// See if our proposal is now the best confirmed proposal
125 | 							if
126 | 							:: s == 0 &&
127 | 								node[i].rnd[r].ticket > node[i].rnd[r].conf.tkt ->
128 | 								node[i].rnd[r].conf.from = i;
129 | 								node[i].rnd[r].conf.tkt = node[i].rnd[r].ticket;
130 | 							:: else -> skip
131 | 							fi
132 | 
133 | 							// See if we're reconfirming a best confirmed proposal
134 | 							if
135 | 							:: s == 1 &&
136 | 								node[i].rnd[r].conf.tkt > node[i].rnd[r].reconf.tkt ->
137 | 								node[i].rnd[r].reconf.from = node[i].rnd[r].conf.from;
138 | 								node[i].rnd[r].reconf.tkt = node[i].rnd[r].conf.tkt;
139 | 							:: else -> skip
140 | 							fi
141 | 
142 | 						:: else -> skip
143 | 						fi
144 | 
145 | 					// We "receive" a fully threshold witnessed message from node j
146 | 					:: node[j].rnd[r].step[s].witd && !node[i].rnd[r].step[s].witn[j] ->
147 | 
148 | 						node[i].rnd[r].step[s].witn[j] = 1
149 | 						wits++;
150 | 
151 | 					// End this step if we've seen enough witnessed proposals
152 | 					:: wits >= T -> break;
153 | 
154 | 					:: else -> skip
155 | 					fi
156 | 				} // atomic
157 | 			od
158 | 		}
159 | 
160 | 		atomic {
161 | 			printf("%d best spoiler %d ticket %d\n",
162 | 						i, node[i].rnd[r].spoil.from, node[i].rnd[r].spoil.tkt);
163 | 			printf("%d best confirmed %d ticket %d\n",
164 | 						i, node[i].rnd[r].conf.from, node[i].rnd[r].conf.tkt);
165 | 			printf("%d best reconfirmed %d ticket %d\n",
166 | 						i, node[i].rnd[r].reconf.from, node[i].rnd[r].reconf.tkt);
167 | 
168 | 			// The round is now complete in terms of picking a proposal.
169 | 			node[i].rnd[r].picked = node[i].rnd[r].conf.from;
170 | 
171 | 			// We can be sure everyone has converged on this proposal
172 | 			// if it is also the best spoiler and best reconfirmed proposal.
173 | 			if
174 | 			:: node[i].rnd[r].spoil.from == node[i].rnd[r].picked &&
175 | 				node[i].rnd[r].reconf.from == node[i].rnd[r].picked ->
176 | 				printf("%d round %d definitely COMMITTED\n", i, r);
177 | 
178 | 				// Verify that what we decided doesn't conflict with
179 | 				// the proposal any other node chooses.
180 | 				select (j : 1 .. N);
181 | 				assert(!node[j].rnd[r].picked ||
182 | 					(node[j].rnd[r].picked == node[i].rnd[r].picked));
183 | 
184 | 			:: node[i].rnd[r].reconf.from != node[i].rnd[r].picked ->
185 | 				printf("%d round %d FAILED to be reconfirmed\n", i, r);
186 | 
187 | 			:: node[i].rnd[r].spoil.from != node[i].rnd[r].picked ->
188 | 				printf("%d round %d FAILED due to spoiler\n", i, r);
189 | 
190 | 			:: node[i].rnd[r].spoil.from == 0 ->
191 | 				printf("%d round %d FAILED due to tie\n", i, r);
192 | 
193 | 			:: else ->
194 | 			fi
195 | 		} // atomic
196 | 	}
197 | }
198 | 
199 | init {
200 | 	atomic {
201 | 		int i;
202 | 		for (i : 1 .. N) {
203 | 			run NodeProc(i)
204 | 		}
205 | 	}
206 | }
207 | 
208 | 


--------------------------------------------------------------------------------
/spin/results-qp.txt:
--------------------------------------------------------------------------------
  1 | qp.pml verification:
  2 | 
  3 | Bitstate verification using spin -search -O2 -safety -DMEMLIM=60000 $1
  4 | Results from running on Bryan's 2019 MacBook Pro M1 Max.
  5 | 
  6 | ---
  7 | 7 steps (1 full consensus round in steps 4-7):
  8 | 
  9 | Depth=     180 States=  1.4e+07 Transitions= 2.51e+07 Memory=  1088.105	t=     5.64 R=   2e+06
 10 | 
 11 | (Spin Version 6.5.2 -- 6 December 2019)
 12 | 	+ Partial Order Reduction
 13 | 
 14 | Full statespace search for:
 15 | 	never claim         	- (none specified)
 16 | 	assertion violations	+
 17 | 	cycle checks       	- (disabled by -DSAFETY)
 18 | 	invalid end states	+
 19 | 
 20 | State-vector 60 byte, depth reached 180, errors: 0
 21 |  14561376 states, stored
 22 |  11554544 states, matched
 23 |  26115920 transitions (= stored+matched)
 24 |  14444193 atomic steps
 25 | hash conflicts:   3406143 (resolved)
 26 | 
 27 | Stats on memory usage (in Megabytes):
 28 |  1222.039	equivalent memory usage for states (stored*(State-vector + overhead))
 29 |   998.371	actual memory usage for states (compression: 81.70%)
 30 |          	state-vector as stored = 44 byte + 28 byte overhead
 31 |   128.000	memory used for hash table (-w24)
 32 |     0.534	memory used for DFS stack (-m10000)
 33 |  1126.581	total actual memory usage
 34 | 
 35 | 
 36 | unreached in proctype Proposer
 37 | 	(0 of 114 states)
 38 | unreached in init
 39 | 	(0 of 16 states)
 40 | 
 41 | pan: elapsed time 5.92 seconds
 42 | pan: rate 2459691.9 states/second
 43 | 
 44 | 
 45 | ---
 46 | 8 steps:
 47 | 
 48 | Depth=     220 States=  4.3e+07 Transitions= 7.36e+07 Memory=  3559.675	t=     22.4 R=   2e+06
 49 | 
 50 | (Spin Version 6.5.2 -- 6 December 2019)
 51 | 	+ Partial Order Reduction
 52 | 
 53 | Full statespace search for:
 54 | 	never claim         	- (none specified)
 55 | 	assertion violations	+
 56 | 	cycle checks       	- (disabled by -DSAFETY)
 57 | 	invalid end states	+
 58 | 
 59 | State-vector 60 byte, depth reached 220, errors: 0
 60 |  43443684 states, stored
 61 |  31011944 states, matched
 62 |  74455628 transitions (= stored+matched)
 63 |  38765865 atomic steps
 64 | hash conflicts:  24394276 (resolved)
 65 | 
 66 | Stats on memory usage (in Megabytes):
 67 |  3645.939	equivalent memory usage for states (stored*(State-vector + overhead))
 68 |  3078.502	actual memory usage for states (compression: 84.44%)
 69 |          	state-vector as stored = 46 byte + 28 byte overhead
 70 |   512.000	memory used for hash table (-w26)
 71 |     0.534	memory used for DFS stack (-m10000)
 72 |  3590.144	total actual memory usage
 73 | 
 74 | 
 75 | unreached in proctype Proposer
 76 | 	(0 of 114 states)
 77 | unreached in init
 78 | 	(0 of 16 states)
 79 | 
 80 | pan: elapsed time 22.6 seconds
 81 | pan: rate 1919738.6 states/second
 82 | 
 83 | 
 84 | ---
 85 | 9 steps: 
 86 | 
 87 | Depth=     262 States= 1.17e+08 Transitions= 2.12e+08 Memory=  8642.683	t=     67.9 R=   2e+06
 88 | 
 89 | (Spin Version 6.5.2 -- 6 December 2019)
 90 | 	+ Partial Order Reduction
 91 | 
 92 | Full statespace search for:
 93 | 	never claim         	- (none specified)
 94 | 	assertion violations	+
 95 | 	cycle checks       	- (disabled by -DSAFETY)
 96 | 	invalid end states	+
 97 | 
 98 | State-vector 60 byte, depth reached 262, errors: 0
 99 | 1.1701493e+08 states, stored
100 |  95266996 states, matched
101 | 2.1228193e+08 transitions (= stored+matched)
102 | 1.3433094e+08 atomic steps
103 | hash conflicts:  80036981 (resolved)
104 | 
105 | Stats on memory usage (in Megabytes):
106 |  9820.284	equivalent memory usage for states (stored*(State-vector + overhead))
107 |  8133.282	actual memory usage for states (compression: 82.82%)
108 |          	state-vector as stored = 45 byte + 28 byte overhead
109 |   512.000	memory used for hash table (-w26)
110 |     0.534	memory used for DFS stack (-m10000)
111 |     2.157	memory lost to fragmentation
112 |  8643.659	total actual memory usage
113 | 
114 | 
115 | unreached in proctype Proposer
116 | 	(0 of 114 states)
117 | unreached in init
118 | 	(0 of 16 states)
119 | 
120 | pan: elapsed time 67.9 seconds
121 | pan: rate 1722834.7 states/second
122 | 
123 | 
124 | ---
125 | 10 steps: 
126 | 
127 | Depth=     302 States= 1.93e+08 Transitions= 3.66e+08 Memory= 15855.624	t=      121 R=   2e+06
128 | 
129 | (Spin Version 6.5.2 -- 6 December 2019)
130 | 	+ Partial Order Reduction
131 | 
132 | Full statespace search for:
133 | 	never claim         	- (none specified)
134 | 	assertion violations	+
135 | 	cycle checks       	- (disabled by -DSAFETY)
136 | 	invalid end states	+
137 | 
138 | State-vector 60 byte, depth reached 302, errors: 0
139 | 1.9397366e+08 states, stored
140 | 1.7384229e+08 states, matched
141 | 3.6781595e+08 transitions (= stored+matched)
142 | 2.374664e+08 atomic steps
143 | hash conflicts: 1.350289e+08 (resolved)
144 | 
145 | Stats on memory usage (in Megabytes):
146 | 16278.918	equivalent memory usage for states (stored*(State-vector + overhead))
147 | 13877.107	actual memory usage for states (compression: 85.25%)
148 |          	state-vector as stored = 47 byte + 28 byte overhead
149 |  2048.000	memory used for hash table (-w28)
150 |     0.534	memory used for DFS stack (-m10000)
151 |     3.122	memory lost to fragmentation
152 | 15922.519	total actual memory usage
153 | 
154 | 
155 | unreached in proctype Proposer
156 | 	(0 of 114 states)
157 | unreached in init
158 | 	(0 of 16 states)
159 | 
160 | pan: elapsed time 122 seconds
161 | pan: rate   1595047 states/second
162 | 
163 | 
164 | ---
165 | 11 steps (2 full consensus rounds: steps 4-7 and 8-11):
166 | 
167 | Depth=     338 States= 2.45e+08 Transitions= 4.68e+08 Memory= 19425.351	t=      153 R=   2e+06
168 | 
169 | (Spin Version 6.5.2 -- 6 December 2019)
170 | 	+ Partial Order Reduction
171 | 
172 | Full statespace search for:
173 | 	never claim         	- (none specified)
174 | 	assertion violations	+
175 | 	cycle checks       	- (disabled by -DSAFETY)
176 | 	invalid end states	+
177 | 
178 | State-vector 60 byte, depth reached 338, errors: 0
179 | 2.4529035e+08 states, stored
180 | 2.2295857e+08 states, matched
181 | 4.6824892e+08 transitions (= stored+matched)
182 | 3.0213691e+08 atomic steps
183 | hash conflicts: 1.5778641e+08 (resolved)
184 | 
185 | Stats on memory usage (in Megabytes):
186 | 20585.585	equivalent memory usage for states (stored*(State-vector + overhead))
187 | 17400.834	actual memory usage for states (compression: 84.53%)
188 |          	state-vector as stored = 46 byte + 28 byte overhead
189 |  2048.000	memory used for hash table (-w28)
190 |     0.534	memory used for DFS stack (-m10000)
191 |     4.096	memory lost to fragmentation
192 | 19445.272	total actual memory usage
193 | 
194 | 
195 | unreached in proctype Proposer
196 | 	(0 of 114 states)
197 | unreached in init
198 | 	(0 of 16 states)
199 | 
200 | pan: elapsed time 153 seconds
201 | pan: rate 1600380.7 states/second
202 | 
203 | 
204 | ---
205 | 12 steps:
206 | 
207 | Depth=     378 States= 3.85e+08 Transitions= 7.09e+08 Memory= 28987.069	t=      244 R=   2e+06
208 | 
209 | (Spin Version 6.5.2 -- 6 December 2019)
210 | 	+ Partial Order Reduction
211 | 
212 | Full statespace search for:
213 | 	never claim         	- (none specified)
214 | 	assertion violations	+
215 | 	cycle checks       	- (disabled by -DSAFETY)
216 | 	invalid end states	+
217 | 
218 | State-vector 60 byte, depth reached 378, errors: 0
219 | 3.8578596e+08 states, stored
220 | 3.2452935e+08 states, matched
221 | 7.1031531e+08 transitions (= stored+matched)
222 | 4.3387088e+08 atomic steps
223 | hash conflicts: 2.5898853e+08 (resolved)
224 | 
225 | Stats on memory usage (in Megabytes):
226 | 32376.446	equivalent memory usage for states (stored*(State-vector + overhead))
227 | 26998.934	actual memory usage for states (compression: 83.39%)
228 |          	state-vector as stored = 45 byte + 28 byte overhead
229 |  2048.000	memory used for hash table (-w28)
230 |     0.534	memory used for DFS stack (-m10000)
231 |     6.396	memory lost to fragmentation
232 | 29041.073	total actual memory usage
233 | 
234 | 
235 | unreached in proctype Proposer
236 | 	(0 of 114 states)
237 | unreached in init
238 | 	(0 of 16 states)
239 | 
240 | pan: elapsed time 244 seconds
241 | pan: rate 1579665.7 states/second
242 | 
243 | 


--------------------------------------------------------------------------------
/spin/results-qpm.txt:
--------------------------------------------------------------------------------
  1 | qpm.pml verification:
  2 | 
  3 | Bitstate verification using spin -search -O2 -safety -bitstate -w38 $1
  4 | (32GB state hash table).
  5 | Results from running on Bryan's 2019 MacBook Pro M1 Max.
  6 | 
  7 | ---
  8 | 4 steps: 
  9 | 
 10 | Depth=    1007 States=    3e+06 Transitions= 4.93e+06 Memory= 32768.925	t=     3.14 R=   1e+06
 11 | 
 12 | (Spin Version 6.5.2 -- 6 December 2019)
 13 | 	+ Partial Order Reduction
 14 | 
 15 | Bit statespace search for:
 16 | 	never claim         	- (none specified)
 17 | 	assertion violations	+
 18 | 	cycle checks       	- (disabled by -DSAFETY)
 19 | 	invalid end states	+
 20 | 
 21 | State-vector 168 byte, depth reached 1007, errors: 0
 22 |   3871180 states, stored
 23 |   2503227 states, matched
 24 |   6374407 transitions (= stored+matched)
 25 |        20 atomic steps
 26 | 
 27 | hash factor: 71006.2 (best if > 100.)
 28 | 
 29 | bits set per state: 3 (-k3)
 30 | 
 31 | Stats on memory usage (in Megabytes):
 32 |   694.067	equivalent memory usage for states (stored*(State-vector + overhead))
 33 | 32768.000	memory used for hash array (-w38)
 34 |     0.076	memory used for bit stack
 35 |     0.534	memory used for DFS stack (-m10000)
 36 | 32768.925	total actual memory usage
 37 | 
 38 | 
 39 | unreached in proctype Proposer
 40 | 	qpm.pml:115, state 58, "recs = (recs+1)"
 41 | 	qpm.pml:140, state 81, "(1)"
 42 | 	qpm.pml:148, state 87, "decided = ((p>>0)&((1<<4)-1))"
 43 | 	qpm.pml:149, state 90, "(1)"
 44 | 	qpm.pml:147, state 91, "((p==g))"
 45 | 	qpm.pml:147, state 91, "else"
 46 | 	qpm.pml:156, state 95, "p = g"
 47 | 	(6 of 111 states)
 48 | unreached in proctype Recorder
 49 | 	(0 of 26 states)
 50 | unreached in init
 51 | 	(0 of 26 states)
 52 | 
 53 | pan: elapsed time 3.96 seconds
 54 | pan: rate 977570.71 states/second
 55 | 
 56 | 
 57 | ---
 58 | 5 steps:
 59 | 
 60 | Depth=    1743 States= 2.13e+08 Transitions= 3.74e+08 Memory= 32769.120	t=      213 R=   1e+06
 61 | 
 62 | (Spin Version 6.5.2 -- 6 December 2019)
 63 | 	+ Partial Order Reduction
 64 | 
 65 | Bit statespace search for:
 66 | 	never claim         	- (none specified)
 67 | 	assertion violations	+
 68 | 	cycle checks       	- (disabled by -DSAFETY)
 69 | 	invalid end states	+
 70 | 
 71 | State-vector 168 byte, depth reached 1743, errors: 0
 72 | 2.1362823e+08 states, stored
 73 | 1.6166948e+08 states, matched
 74 | 3.7529772e+08 transitions (= stored+matched)
 75 |        20 atomic steps
 76 | 
 77 | hash factor: 1286.71 (best if > 100.)
 78 | 
 79 | bits set per state: 3 (-k3)
 80 | 
 81 | Stats on memory usage (in Megabytes):
 82 | 38301.571	equivalent memory usage for states (stored*(State-vector + overhead))
 83 | 32768.000	memory used for hash array (-w38)
 84 |     0.076	memory used for bit stack
 85 |     0.534	memory used for DFS stack (-m10000)
 86 | 32769.120	total actual memory usage
 87 | 
 88 | 
 89 | unreached in proctype Proposer
 90 | 	qpm.pml:148, state 87, "decided = ((p>>0)&((1<<4)-1))"
 91 | 	qpm.pml:149, state 90, "(1)"
 92 | 	qpm.pml:147, state 91, "((p==g))"
 93 | 	qpm.pml:147, state 91, "else"
 94 | 	qpm.pml:156, state 95, "p = g"
 95 | 	(4 of 111 states)
 96 | unreached in proctype Recorder
 97 | 	(0 of 26 states)
 98 | unreached in init
 99 | 	(0 of 26 states)
100 | 
101 | pan: elapsed time 213 seconds
102 | pan: rate 1001116.4 states/second
103 | 
104 | ---
105 | 6 steps:
106 | 
107 | Depth=    2323 States= 1.19e+09 Transitions= 2.14e+09 Memory= 32769.218	t= 1.11e+03 R=   1e+06
108 | 
109 | (Spin Version 6.5.2 -- 6 December 2019)
110 | 	+ Partial Order Reduction
111 | 
112 | Bit statespace search for:
113 | 	never claim         	- (none specified)
114 | 	assertion violations	+
115 | 	cycle checks       	- (disabled by -DSAFETY)
116 | 	invalid end states	+
117 | 
118 | State-vector 168 byte, depth reached 2323, errors: 0
119 | 1.1925986e+09 states, stored
120 | 9.5240049e+08 states, matched
121 | 2.1449991e+09 transitions (= stored+matched)
122 |        20 atomic steps
123 | 
124 | hash factor: 230.487 (best if > 100.)
125 | 
126 | bits set per state: 3 (-k3)
127 | 
128 | Stats on memory usage (in Megabytes):
129 | 213821.928	equivalent memory usage for states (stored*(State-vector + overhead))
130 | 32768.000	memory used for hash array (-w38)
131 |     0.076	memory used for bit stack
132 |     0.534	memory used for DFS stack (-m10000)
133 | 32769.218	total actual memory usage
134 | 
135 | 
136 | unreached in proctype Proposer
137 | 	qpm.pml:156, state 95, "p = g"
138 | 	(1 of 111 states)
139 | unreached in proctype Recorder
140 | 	(0 of 26 states)
141 | unreached in init
142 | 	(0 of 26 states)
143 | 
144 | pan: elapsed time 1.11e+03 seconds
145 | pan: rate 1070699.5 states/second
146 | 
147 | ---
148 | 7 steps:
149 | 
150 | Depth=    3018 States= 3.57e+09 Transitions= 6.48e+09 Memory= 32769.315	t= 3.44e+03 R=   1e+06
151 | 
152 | (Spin Version 6.5.2 -- 6 December 2019)
153 | 	+ Partial Order Reduction
154 | 
155 | Bit statespace search for:
156 | 	never claim         	- (none specified)
157 | 	assertion violations	+
158 | 	cycle checks       	- (disabled by -DSAFETY)
159 | 	invalid end states	+
160 | 
161 | State-vector 168 byte, depth reached 3018, errors: 0
162 | 3.5701183e+09 states, stored
163 | 2.9060201e+09 states, matched
164 | 6.4761385e+09 transitions (= stored+matched)
165 |        20 atomic steps
166 | 
167 | hash factor: 76.9941 (best if > 100.)
168 | 
169 | bits set per state: 3 (-k3)
170 | 
171 | Stats on memory usage (in Megabytes):
172 | 640089.267	equivalent memory usage for states (stored*(State-vector + overhead))
173 | 32768.000	memory used for hash array (-w38)
174 |     0.076	memory used for bit stack
175 |     0.534	memory used for DFS stack (-m10000)
176 | 32769.315	total actual memory usage
177 | 
178 | 
179 | unreached in proctype Proposer
180 | 	(0 of 111 states)
181 | unreached in proctype Recorder
182 | 	(0 of 26 states)
183 | unreached in init
184 | 	(0 of 26 states)
185 | 
186 | pan: elapsed time 3.44e+03 seconds
187 | pan: rate 1037038.3 states/second
188 | 
189 | 
190 | ---
191 | 8 steps:
192 | 
193 | Depth=    3741 States= 1.55e+10 Transitions=  2.7e+10 Memory= 32769.511	t=  1.6e+04 R=   1e+06
194 | 
195 | (Spin Version 6.5.2 -- 6 December 2019)
196 | 	+ Partial Order Reduction
197 | 
198 | Bit statespace search for:
199 | 	never claim         	- (none specified)
200 | 	assertion violations	+
201 | 	cycle checks       	- (disabled by -DSAFETY)
202 | 	invalid end states	+
203 | 
204 | State-vector 168 byte, depth reached 3741, errors: 0
205 | 1.5529605e+10 states, stored
206 | 1.1502249e+10 states, matched
207 | 2.7031855e+10 transitions (= stored+matched)
208 |        20 atomic steps
209 | 
210 | hash factor: 17.7003 (best if > 100.)
211 | 
212 | bits set per state: 3 (-k3)
213 | 
214 | Stats on memory usage (in Megabytes):
215 | 2784314.887	equivalent memory usage for states (stored*(State-vector + overhead))
216 | 32768.000	memory used for hash array (-w38)
217 |     0.076	memory used for bit stack
218 |     0.534	memory used for DFS stack (-m10000)
219 | 32769.511	total actual memory usage
220 | 
221 | 
222 | unreached in proctype Proposer
223 | 	(0 of 111 states)
224 | unreached in proctype Recorder
225 | 	(0 of 26 states)
226 | unreached in init
227 | 	(0 of 26 states)
228 | 
229 | pan: elapsed time 1.6e+04 seconds
230 | pan: rate 969476.33 states/second
231 | 
232 | ---
233 | 10 steps:
234 | 
235 | Depth=    4912 States=  6.2e+10 Transitions=  1.1e+11 Memory= 32769.706	t= 1.06e+05 R=   6e+05
236 | 
237 | (Spin Version 6.5.2 -- 6 December 2019)
238 | 	+ Partial Order Reduction
239 | 
240 | Bit statespace search for:
241 | 	never claim         	- (none specified)
242 | 	assertion violations	+
243 | 	cycle checks       	- (disabled by -DSAFETY)
244 | 	invalid end states	+
245 | 
246 | State-vector 168 byte, depth reached 4912, errors: 0
247 | 6.1979144e+10 states, stored
248 | 4.7682823e+10 states, matched
249 | 1.0966197e+11 transitions (= stored+matched)
250 |        20 atomic steps
251 | 
252 | hash factor: 4.43501 (best if > 100.)
253 | 
254 | bits set per state: 3 (-k3)
255 | 
256 | Stats on memory usage (in Megabytes):
257 | 11112288.506	equivalent memory usage for states (stored*(State-vector + overhead))
258 | 32768.000	memory used for hash array (-w38)
259 |     0.076	memory used for bit stack
260 |     0.534	memory used for DFS stack (-m10000)
261 |     1.014	other (proc and chan stacks)
262 | 32769.706	total actual memory usage
263 | 
264 | 
265 | unreached in proctype Proposer
266 | 	(0 of 111 states)
267 | unreached in proctype Recorder
268 | 	(0 of 26 states)
269 | unreached in init
270 | 	(0 of 26 states)
271 | 
272 | pan: elapsed time 1.06e+05 seconds
273 | pan: rate 583824.35 states/second
274 | 
275 | 
276 | ---
277 | 11 steps:
278 | 
279 | <to be done>
280 | Depth=    5465 States=  7.1e+10 Transitions= 1.25e+11 Memory= 32769.804	t= 8.45e+04 R=   8e+05
281 | 
282 | (Spin Version 6.5.2 -- 6 December 2019)
283 | 	+ Partial Order Reduction
284 | 
285 | Bit statespace search for:
286 | 	never claim         	- (none specified)
287 | 	assertion violations	+
288 | 	cycle checks       	- (disabled by -DSAFETY)
289 | 	invalid end states	+
290 | 
291 | State-vector 168 byte, depth reached 5465, errors: 0
292 | 7.0950964e+10 states, stored
293 | 5.4281076e+10 states, matched
294 | 1.2523204e+11 transitions (= stored+matched)
295 |        20 atomic steps
296 | 
297 | hash factor: 3.8742 (best if > 100.)
298 | 
299 | bits set per state: 3 (-k3)
300 | 
301 | Stats on memory usage (in Megabytes):
302 | 12720853.000	equivalent memory usage for states (stored*(State-vector + overhead))
303 | 32768.000	memory used for hash array (-w38)
304 |     0.076	memory used for bit stack
305 |     0.534	memory used for DFS stack (-m10000)
306 |     1.111	other (proc and chan stacks)
307 | 32769.804	total actual memory usage
308 | 
309 | 
310 | unreached in proctype Proposer
311 | 	(0 of 111 states)
312 | unreached in proctype Recorder
313 | 	(0 of 26 states)
314 | unreached in init
315 | 	(0 of 26 states)
316 | 
317 | pan: elapsed time 8.45e+04 seconds
318 | pan: rate 839344.87 states/second
319 | 
320 | 
321 | ---
322 | 12 steps:
323 | 
324 | <to be done>
325 | 
326 | 


--------------------------------------------------------------------------------
/spin/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Analyze the consensus model using the Spin model checker.
 3 | 
 4 | # Exhaustive verification.
 5 | # MEMLIMIT is the memory-usage limit in megabytes.
 6 | #spin -search -O2 -safety -DMEMLIM=60000 $1
 7 | 
 8 | # Set maximum search depth (-m), making it an error to exceed this depth (-b).
 9 | #spin -search -O2 -safety -DMEMLIM=60000 -m3870 -b $1
10 | 
11 | # Exhaustive verification with state vector compression.
12 | #spin -search -O2 -safety -DMEMLIM=60000 -collapse $1
13 | #spin -search -O2 -safety -DMEMLIM=60000 -hc $1
14 | 
15 | # Bitstate verification - most aggressive state compression.
16 | # -w defines the power of two of the hash table size in bits.
17 | #	examples: -w28: 32MB, -w33: 1GB, -w38: 32GB
18 | #spin -search -O2 -safety -bitstate -w28 $1
19 | spin -search -O2 -safety -bitstate -w38 $1
20 | 
21 | 


--------------------------------------------------------------------------------
/tools/qsc/.gitignore:
--------------------------------------------------------------------------------
1 | qsc
2 | 


--------------------------------------------------------------------------------
/tools/qsc/group.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"net/url"
 7 | 	"strings"
 8 | 
 9 | 	"github.com/bford/cofo/cri"
10 | 
11 | 	"github.com/dedis/tlc/go/lib/cas"
12 | 	"github.com/dedis/tlc/go/lib/fs/casdir"
13 | 	"github.com/dedis/tlc/go/model/qscod/qscas"
14 | )
15 | 
16 | // Group represents a QSC consensus group.
17 | // XXX move to a suitable generic package.
18 | type group struct {
19 | 	qscas.Group
20 | }
21 | 
22 | // Open a consensus group identified by the resource identifier ri.
23 | // Creates the group if create is true; otherwise opens existing group state.
24 | //
25 | // Supports composable resource identifier (CRI) as preferred group syntax
26 | // because CRIs cleanly suppport nesting of resource identifiers.
27 | //
28 | func (g *group) Open(ctx context.Context, ri string, create bool) error {
29 | 
30 | 	// Parse the group resource identifier into individual members
31 | 	paths, err := parseGroupRI(ri)
32 | 	if err != nil {
33 | 		return err
34 | 	}
35 | 	n := len(paths) // number of members in the consensus group
36 | 
37 | 	// Create a POSIX directory-based CAS interface to each store
38 | 	stores := make([]cas.Store, n)
39 | 	for i, path := range paths {
40 | 		st := &casdir.Store{}
41 | 		if err := st.Init(path, create, create); err != nil {
42 | 			return err
43 | 		}
44 | 		stores[i] = st
45 | 	}
46 | 
47 | 	// Start a CAS-based consensus group across this set of stores,
48 | 	// with the default threshold configuration.
49 | 	// (XXX make this configurable eventually.)
50 | 	g.Group.Start(ctx, stores, -1)
51 | 
52 | 	return nil
53 | }
54 | 
55 | // Parse a group resource identifier into individual member identifiers.
56 | func parseGroupRI(group string) ([]string, error) {
57 | 
58 | 	// Allow just '[...]' as a command-line shorthand for 'qsc[...]'
59 | 	if len(group) > 0 && group[0] == '[' {
60 | 		group = "qsc" + group
61 | 	}
62 | 
63 | 	// Parsing it as an actual CRI/URI is kind of unnecessary so far,
64 | 	// but may get more interesting with query-string options and such.
65 | 	rawurl, err := cri.URI.From(group)
66 | 	if err != nil {
67 | 		return nil, err
68 | 	}
69 | 	//println("rawurl:", rawurl)
70 | 	url, err := url.Parse(rawurl)
71 | 	if err != nil {
72 | 		return nil, err
73 | 	}
74 | 	if url.Scheme != "qsc" {
75 | 		return nil, errors.New("consensus groups must use qsc scheme")
76 | 	}
77 | 
78 | 	// Parse the nested member paths from the opaque string in the URL.
79 | 	str, path := url.Opaque, ""
80 | 	var paths []string
81 | 	for str != "" {
82 | 		if i := strings.IndexByte(str, ','); i >= 0 {
83 | 			path, str = str[:i], str[i+1:]
84 | 		} else {
85 | 			path, str = str, ""
86 | 		}
87 | 		paths = append(paths, path)
88 | 	}
89 | 	if len(paths) < 3 {
90 | 		return nil, errors.New(
91 | 			"consensus groups must have minimum three members")
92 | 	}
93 | 
94 | 	return paths, nil
95 | }
96 | 


--------------------------------------------------------------------------------
/tools/qsc/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	//"flag"
 6 | 	//"log"
 7 | 	"context"
 8 | 	"os"
 9 | )
10 | 
11 | var verbose bool = false
12 | 
13 | const usageStr = `
14 | The qsc command provides tools using Que Sera Consensus (QSC).
15 | 
16 | Usage:
17 | 
18 | 	qsc <type> <command> [arguments]
19 | 
20 | The types of consensus groups are:
21 | 
22 | 	string		Consensus on simple strings
23 | 	git		Consensus on Git repositories
24 | 	hg		Consensus on Mercurial repositories
25 | 
26 | Run qsc <type> help for commands that apply to each type.
27 | `
28 | 
29 | func usage(usageString string) {
30 | 	fmt.Println(usageString)
31 | 	os.Exit(1)
32 | }
33 | 
34 | func main() {
35 | 	if len(os.Args) < 2 {
36 | 		usage(usageStr)
37 | 	}
38 | 
39 | 	// Create a cancelable top-level context and cancel it when we're done,
40 | 	// to shut down asynchronous consensus access operations cleanly.
41 | 	ctx, cancel := context.WithCancel(context.Background())
42 | 	defer cancel()
43 | 
44 | 	// Parse consensus group kind
45 | 	switch os.Args[1] {
46 | 	case "string":
47 | 		stringCommand(ctx, os.Args[2:])
48 | 	default:
49 | 		usage(usageStr)
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/tools/qsc/string.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"os"
  8 | )
  9 | 
 10 | func stringCommand(ctx context.Context, args []string) {
 11 | 	if len(args) == 0 {
 12 | 		usage(stringUsageStr)
 13 | 	}
 14 | 	switch args[0] {
 15 | 	case "init":
 16 | 		stringInitCommand(ctx, args[1:])
 17 | 	case "get":
 18 | 		stringGetCommand(ctx, args[1:])
 19 | 	case "set":
 20 | 		stringSetCommand(ctx, args[1:])
 21 | 	default:
 22 | 		usage(stringUsageStr)
 23 | 	}
 24 | }
 25 | 
 26 | const stringUsageStr = `
 27 | Usage: qsc string <command> [arguments]
 28 | 
 29 | The commands for string-value consensus groups are:
 30 | 
 31 | 	init	initialize a new consensus group
 32 | 	get	output the current consensus state as a quoted string
 33 | 	set	change the consensus state via atomic compare-and-set
 34 | `
 35 | 
 36 | func stringInitCommand(ctx context.Context, args []string) {
 37 | 	if len(args) != 1 {
 38 | 		usage(stringInitUsageStr)
 39 | 	}
 40 | 
 41 | 	// Create the consensus group state on each member node
 42 | 	var g group
 43 | 	err := g.Open(ctx, args[0], true)
 44 | 	if err != nil {
 45 | 		log.Fatal(err)
 46 | 	}
 47 | }
 48 | 
 49 | const stringInitUsageStr = `
 50 | Usage: qsc string init <group>
 51 | 
 52 | where <group> specifies the consensus group
 53 | as a composable resource identifier (CRI).
 54 | For example:
 55 | 
 56 | 	qsc git init qsc[host1:path1,host2:path2,host3:path3]
 57 | `
 58 | 
 59 | func stringGetCommand(ctx context.Context, args []string) {
 60 | 	if len(args) != 1 {
 61 | 		usage(stringGetUsageStr)
 62 | 	}
 63 | 
 64 | 	// Open the file stores
 65 | 	var g group
 66 | 	err := g.Open(ctx, args[0], false)
 67 | 	if err != nil {
 68 | 		log.Fatal(err)
 69 | 	}
 70 | 
 71 | 	// Find a consensus view of the last known commit.
 72 | 	ver, val, err := g.CompareAndSet(ctx, "", "")
 73 | 	if err != nil {
 74 | 		log.Fatal(err)
 75 | 	}
 76 | 
 77 | 	fmt.Printf("version %d state %q\n", ver, val)
 78 | }
 79 | 
 80 | const stringGetUsageStr = `
 81 | Usage: qsc string get <group>
 82 | 
 83 | where <group> specifies the consensus group.
 84 | Reads and prints the version number and string last committed.
 85 | `
 86 | 
 87 | func stringSetCommand(ctx context.Context, args []string) {
 88 | 	if len(args) != 3 {
 89 | 		usage(stringSetUsageStr)
 90 | 	}
 91 | 
 92 | 	old := args[1]
 93 | 	new := args[2]
 94 | 	if new == "" {
 95 | 		log.Fatal("The empty string is reserved for the starting state")
 96 | 	}
 97 | 
 98 | 	// Open the file stores
 99 | 	var g group
100 | 	err := g.Open(ctx, args[0], false)
101 | 	if err != nil {
102 | 		log.Fatal(err)
103 | 	}
104 | 
105 | 	// Invoke the request compare-and-set operation.
106 | 	ver, val, err := g.CompareAndSet(ctx, old, new)
107 | 	if err != nil {
108 | 		log.Fatal(err)
109 | 	}
110 | 
111 | 	fmt.Printf("version %d state %q\n", ver, val)
112 | 
113 | 	// Return success only if the next commit was what we wanted
114 | 	if val != new {
115 | 		os.Exit(1)
116 | 	}
117 | 	os.Exit(0)
118 | }
119 | 
120 | const stringSetUsageStr = `
121 | Usage: qsc string set <group> <old> <new>
122 | 
123 | where:
124 | <group> specifies the consensus group
125 | <old> is the expected existing value string
126 | <new> is the new value to set if it hasn't yet changed from <old>
127 | 
128 | Prints the version number and string last committed,
129 | regardless of success or failure.
130 | `
131 | 


--------------------------------------------------------------------------------