├── .gitignore
├── LICENSE
├── README.md
├── ReplicaEngine
└── tla
│ ├── ReplicaEngine.tla
│ └── ReplicaEngine.toolbox
│ ├── .project
│ ├── .settings
│ └── org.lamport.tla.toolbox.prefs
│ └── ReplicaEngine___model.launch
├── Storage
└── tla
│ ├── Storage.tla
│ └── Storage.toolbox
│ └── Storage___model.launch
├── ZenWithTerms
└── tla
│ ├── ZenWithTerms.tla
│ └── ZenWithTerms.toolbox
│ ├── .project
│ ├── .settings
│ └── org.lamport.tla.toolbox.prefs
│ └── ZenWithTerms___model.launch
├── cluster
├── isabelle
│ ├── Implementation.thy
│ ├── Monadic.thy
│ ├── OneSlot.thy
│ ├── Preliminaries.thy
│ ├── ROOT
│ ├── Zen.thy
│ └── document
│ │ └── root.tex
└── tla
│ ├── consensus.tla
│ └── consensus.toolbox
│ ├── .project
│ ├── .settings
│ └── org.lamport.tla.toolbox.prefs
│ └── consensus___model.launch
└── data
└── tla
├── replication.tla
└── replication.toolbox
├── .project
├── .settings
└── org.lamport.tla.toolbox.prefs
└── replication___model.launch
/.gitignore:
--------------------------------------------------------------------------------
1 | **/.DS_Store
2 | **/tla/*.toolbox/model
3 | **/tla/*.toolbox/*aux
4 | **/tla/*.toolbox/*.log
5 | **/tla/*.toolbox/*.pdf
6 | **/tla/*.toolbox/*.tex
7 | **/tla/*.toolbox/*___model_SnapShot*.launch
8 | **/tla/*.toolbox/**/*.tla
9 | **/tla/*.toolbox/**/*.out
10 | **/tla/*.toolbox/**/MC.cfg
11 | **/tla/*.pdf
12 | **/tla/*.old
13 | **/*~
14 | cluster/isabelle/output
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Formal models of core Elasticsearch algorithms
2 |
3 | This repository contains formal models of core [Elasticsearch](https://github.com/elastic/elasticsearch) algorithms and is directly related to implementation efforts around [data replication](https://github.com/elastic/elasticsearch/issues/10708) and [cluster coordination](https://github.com/elastic/elasticsearch/issues/32006). The models in this repository might represent past, current and future designs of Elasticsearch and can differ to their implementations in substantial ways. The formal models mainly serve to illustrate some of the high-level concepts and help to validate resiliency-related aspects.
4 |
5 | ## Models
6 |
7 | ### Cluster coordination model
8 |
9 | The cluster coordination TLA+ model ensures the consistency of cluster state updates and represents the core [cluster coordination](https://github.com/elastic/elasticsearch/issues/32006) and metadata replication algorithm implemented in Elasticsearch 7.0. It consists of two files:
10 |
11 | - [TLA+ specification](ZenWithTerms/tla/ZenWithTerms.tla) which has a [direct one-to-one implementation in Elasticsearch](https://github.com/elastic/elasticsearch/blob/master/server/src/main/java/org/elasticsearch/cluster/coordination/CoordinationState.java)
12 | - [TLC model checking configuration](ZenWithTerms/tla/ZenWithTerms.toolbox/ZenWithTerms___model.launch)
13 |
14 | ### Data replication model
15 |
16 | The data replication TLA+ model describes the Elasticsearch [sequence number](https://github.com/elastic/elasticsearch/issues/10708) based data replication approach, implemented since Elasticsearch 6.0, which consists of two files:
17 |
18 | - [TLA+ specification](data/tla/replication.tla)
19 | - [TLC model checking configuration](data/tla/replication.toolbox/replication___model.launch)
20 |
21 | ### Replica engine
22 |
23 | A TLA+ model of how the
24 | [engine](https://github.com/elastic/elasticsearch/blob/00fd73acc4a2991f96438f8c1948016c5b9eefb2/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java)
25 | handles replication requests.
26 |
27 | - [TLA+ specification](ReplicaEngine/tla/ReplicaEngine.tla)
28 | - [TLC model checking configuration](ReplicaEngine/tla/ReplicaEngine.toolbox/ReplicaEngine___model.launch)
29 |
30 | ### Alternative cluster coordination model
31 |
32 | The alternative cluster coordination TLA+ model consists of two files:
33 |
34 | - [TLA+ specification](cluster/tla/consensus.tla)
35 | - [TLC model checking configuration](cluster/tla/consensus.toolbox/consensus___model.launch)
36 |
37 | The alternative cluster consensus Isabelle model consists of the following theories:
38 |
39 | - [Basic definitions](cluster/isabelle/Preliminaries.thy)
40 | - [An implementation in functional style](cluster/isabelle/Implementation.thy)
41 | - [An implementation in monadic style, along with a proof it's equivalent to the previous](cluster/isabelle/Monadic.thy)
42 | - [The proof that each slot is consistent, based on Lamport's Synod algorithm](cluster/isabelle/OneSlot.thy)
43 | - [The proof that the implementation ensures consistency](cluster/isabelle/Zen.thy)
44 |
45 | ## How to edit/run TLA+:
46 |
47 | - Install the [TLA Toolbox](http://research.microsoft.com/en-us/um/people/lamport/tla/toolbox.html)
48 | - If on Mac OS, [move the downloaded app to the Applications folder first](https://groups.google.com/forum/#!topic/tlaplus/bL04c6BiYxo)
49 | - Read some [documentation](http://research.microsoft.com/en-us/um/people/lamport/tla/book.html)
50 |
51 | How to run the model checker in headless mode:
52 |
53 | - Download [tla2tools.jar](http://research.microsoft.com/en-us/um/people/lamport/tla/tools.html)
54 | - Run the model checker once in TLA+ Toolbox on desktop (can be aborted once started). This generates the folder `elasticsearch.toolbox/model/` that contains all model files that are required to run the model checker in headless mode.
55 | - Copy the above folder and `tla2tools.jar` to the server running in headless mode.
56 | - `cd` to the folder and run `java -Xmx30G -cp ../tla2tools.jar tlc2.TLC MC -deadlock -workers 12`. The setting `-Xmx30G` denotes the amount of memory to allocate to the model checker and `-workers 12` the number of worker threads (should be equal to the number of cores on machine). The setting `-deadlock` ensures that TLC explores the full reachable state space, not searching for deadlocks.
57 |
--------------------------------------------------------------------------------
/ReplicaEngine/tla/ReplicaEngine.toolbox/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | ReplicaEngine
4 |
5 |
6 |
7 |
8 |
9 | toolbox.builder.TLAParserBuilder
10 |
11 |
12 |
13 |
14 | toolbox.builder.PCalAlgorithmSearchingBuilder
15 |
16 |
17 |
18 |
19 |
20 | toolbox.natures.TLANature
21 |
22 |
23 |
24 | ReplicaEngine.tla
25 | 1
26 | PARENT-1-PROJECT_LOC/ReplicaEngine.tla
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/ReplicaEngine/tla/ReplicaEngine.toolbox/.settings/org.lamport.tla.toolbox.prefs:
--------------------------------------------------------------------------------
1 | ProjectRootFile=PARENT-1-PROJECT_LOC/ReplicaEngine.tla
2 | eclipse.preferences.version=1
3 |
--------------------------------------------------------------------------------
/ReplicaEngine/tla/ReplicaEngine.toolbox/ReplicaEngine___model.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/Storage/tla/Storage.tla:
--------------------------------------------------------------------------------
1 | ------------------------------ MODULE Storage ------------------------------
2 | EXTENDS Integers, FiniteSets, TLC
3 |
4 | CONSTANTS
5 | MaxNewMeta, \* maximum generation of newMeta to limit the state space
6 | MetaDataContent \* content that is written to the metadata file
7 |
8 | VARIABLES
9 | metadata, \* metaData[i] = MetaDataContent if metadata of generation i is present
10 | manifest, \* manifest[j] is generation of metadata j-th manifest is referencing
11 | newMeta, \* generation of newly created metadata file
12 | newManifest, \* generation of newly created manifest file
13 | state, \* current state, describes what to do next
14 | possibleStates \* set of generations of metadata that limits what can be read from disk
15 |
16 | --------------------------------------------------------------------------
17 | (*************************************************************************)
18 | (* First we define some helper functions to work with files abstraction. *)
19 | (* Files is a function from file generation to some content. *)
20 | (*************************************************************************)
21 |
22 | (*************************************************************************)
23 | (* CurrentGeneration returns the maximum file generation. If there are *)
24 | (* no files then -1 is returned. *)
25 | (*************************************************************************)
26 | CurrentGeneration(files) ==
27 | IF DOMAIN files = {}
28 | THEN -1
29 | ELSE
30 | CHOOSE gen \in DOMAIN files :
31 | \A otherGen \in DOMAIN files : gen \geq otherGen
32 |
33 | (*************************************************************************)
34 | (* DeleteFile removes file with generation delGen. *)
35 | (*************************************************************************)
36 | DeleteFile(files, delGen) == [gen \in DOMAIN files \ {delGen} |-> files[gen]]
37 |
38 | (*************************************************************************)
39 | (* DeleteFilesExcept removes all files except keepGen. *)
40 | (*************************************************************************)
41 | DeleteFilesExcept(files, keepGen) == (keepGen :> files[keepGen])
42 |
43 | (*************************************************************************)
44 | (* WriteFile creates new file with specified generation and content. *)
45 | (*************************************************************************)
46 | WriteFile(files, gen, content) == (gen :> content) @@ files
47 |
48 | --------------------------------------------------------------------------
49 | (*************************************************************************)
50 | (* Now we define functions to emulate write and cleanup of the metadata. *)
51 | (*************************************************************************)
52 | WriteMetaOk(gen) ==
53 | /\ metadata' = WriteFile(metadata, gen, MetaDataContent)
54 | /\ state' = "writeManifest"
55 |
56 | WriteMetaFail(gen) ==
57 | /\ metadata' = metadata
58 | /\ state' = "writeMeta"
59 |
60 | WriteMetaDirty(gen) ==
61 | /\ \/ metadata' = WriteFile(metadata, gen, MetaDataContent)
62 | \/ metadata' = metadata
63 | /\ state' = "deleteNewMeta"
64 |
65 | DeleteNewMeta ==
66 | /\ \/ metadata' = DeleteFile(metadata, newMeta)
67 | \/ metadata' = metadata
68 | /\ state' = "writeMeta"
69 | /\ UNCHANGED <>
70 |
71 | DeleteOldMeta ==
72 | /\ \/ metadata' = DeleteFilesExcept(metadata, newMeta)
73 | \/ metadata' = metadata
74 | /\ state' = "writeMeta"
75 | /\ UNCHANGED <>
76 |
77 | WriteMeta ==
78 | LET gen == CurrentGeneration(metadata) + 1 IN
79 | /\ newMeta' = gen
80 | /\ \/ WriteMetaOk(gen)
81 | \/ WriteMetaFail(gen)
82 | \/ WriteMetaDirty(gen)
83 | /\ UNCHANGED <>
84 |
85 | --------------------------------------------------------------------------
86 | (*************************************************************************)
87 | (* Now we define functions to emulate write and cleanup of the manifest *)
88 | (* file. *)
89 | (*************************************************************************)
90 | WriteManifestOk(gen) ==
91 | /\ manifest' = WriteFile(manifest, gen, newMeta)
92 | /\ state' = "deleteOldManifest"
93 | /\ possibleStates' = {newMeta}
94 |
95 | WriteManifestFail(gen) ==
96 | /\ manifest' = manifest
97 | /\ state' = "deleteNewMeta"
98 | /\ possibleStates' = possibleStates
99 |
100 | WriteManifestDirty(gen) ==
101 | /\ \/ manifest' = WriteFile(manifest, gen, newMeta)
102 | \/ manifest' = manifest
103 | /\ state' = "deleteNewManifest"
104 | /\ possibleStates' = possibleStates \union {newMeta}
105 |
106 | WriteManifest ==
107 | LET gen == CurrentGeneration(manifest) + 1 IN
108 | /\ newManifest' = gen
109 | /\ \/ WriteManifestOk(gen)
110 | \/ WriteManifestFail(gen)
111 | \/ WriteManifestDirty(gen)
112 | /\ UNCHANGED <>
113 |
114 | DeleteOldManifest ==
115 | /\ \/ manifest' = DeleteFilesExcept(manifest, newManifest)
116 | \/ manifest' = manifest
117 | /\ state' = "deleteOldMeta"
118 | /\ UNCHANGED <>
119 |
120 | --------------------------------------------------------------------------
121 | (*************************************************************************)
122 | (* Below are 3 versions of the same function, that is called when *)
123 | (* manifest write was dirty. The buggy one was initially implemented and *)
124 | (* was caught by https://github.com/elastic/elasticsearch/issues/39077. *)
125 | (* Pick one and use in Next function. *)
126 | (* https://github.com/elastic/elasticsearch/pull/40519 implements *)
127 | (* DeleteNewManifestEasy. *)
128 | (*************************************************************************)
129 | DeleteNewManifestBuggy ==
130 | /\ \/ manifest' = DeleteFile(manifest, newManifest)
131 | \/ manifest' = manifest
132 | /\ state' = "deleteNewMeta"
133 | /\ UNCHANGED <>
134 |
135 | DeleteNewManifestEasy ==
136 | /\ \/ manifest' = DeleteFile(manifest, newManifest)
137 | \/ manifest' = manifest
138 | /\ state' = "writeMeta"
139 | /\ UNCHANGED <>
140 |
141 | DeleteNewManifestHard ==
142 | /\ \/ /\ manifest' = DeleteFile(manifest, newManifest)
143 | /\ state' = "deleteNewMeta"
144 | \/ /\ manifest' = manifest
145 | /\ state' = "writeMeta"
146 | /\ UNCHANGED <>
147 | --------------------------------------------------------------------------
148 | (*************************************************************************)
149 | (* We can define Init and Next functions now. *)
150 | (*************************************************************************)
151 | Init ==
152 | /\ metadata = <<>>
153 | /\ manifest = <<>>
154 | /\ newMeta = -1 \* no latest metadata file
155 | /\ newManifest = -1 \* no latest manifest file
156 | /\ state = "writeMeta" \* we start with writing metadata file
157 | /\ possibleStates = {} \* no metadata can be read from disk
158 |
159 | Next ==
160 | \/ (state = "writeMeta" /\ WriteMeta)
161 | \/ (state = "writeManifest" /\ WriteManifest)
162 | \/ (state = "deleteOldManifest" /\ DeleteOldManifest)
163 | \/ (state = "deleteOldMeta" /\ DeleteOldMeta)
164 | \/ (state = "deleteNewManifest" /\ DeleteNewManifestEasy) \* try DeleteNewManifestBuggy and DeleteNewManifestHard
165 | \/ (state = "deleteNewMeta" /\ DeleteNewMeta)
166 | --------------------------------------------------------------------------
167 | (*************************************************************************)
168 | (* Our model has 2 invariants. *)
169 | (*************************************************************************)
170 | MetadataFileReferencedByManifestExists ==
171 | CurrentGeneration(manifest) /= -1
172 | =>
173 | manifest[CurrentGeneration(manifest)] \in DOMAIN metadata
174 |
175 | MetadataReferencedByManifestIsValid ==
176 | CurrentGeneration(manifest) /= -1
177 | =>
178 | \E meta \in possibleStates : meta = manifest[CurrentGeneration(manifest)]
179 | ============
--------------------------------------------------------------------------------
/Storage/tla/Storage.toolbox/Storage___model.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/ZenWithTerms/tla/ZenWithTerms.tla:
--------------------------------------------------------------------------------
1 | -------------------------------------------------------------------------------------
2 |
3 | -------------------------------- MODULE ZenWithTerms --------------------------------
4 | \* Imported modules used in this specification
5 | EXTENDS Naturals, FiniteSets, Sequences, TLC
6 |
7 | ----
8 |
9 | CONSTANTS Values
10 |
11 | \* Set of node ids (all master-eligible nodes)
12 | CONSTANTS Nodes
13 |
14 | \* RPC message types
15 | CONSTANTS
16 | Join,
17 | PublishRequest,
18 | PublishResponse,
19 | Commit
20 |
21 | ----
22 |
23 | \* Set of requests and responses sent between nodes.
24 | VARIABLE messages
25 |
26 | \* Transitive closure of value updates as done by leaders
27 | VARIABLE descendant
28 |
29 | \* Values to bootstrap the cluster
30 | VARIABLE initialConfiguration
31 | VARIABLE initialValue
32 | VARIABLE initialAcceptedVersion
33 |
34 | \* node state (map from node id to state)
35 | VARIABLE currentTerm
36 | VARIABLE lastCommittedConfiguration
37 | VARIABLE lastAcceptedTerm
38 | VARIABLE lastAcceptedVersion
39 | VARIABLE lastAcceptedValue
40 | VARIABLE lastAcceptedConfiguration
41 | VARIABLE joinVotes
42 | VARIABLE startedJoinSinceLastReboot
43 | VARIABLE electionWon
44 | VARIABLE lastPublishedVersion
45 | VARIABLE lastPublishedConfiguration
46 | VARIABLE publishVotes
47 |
48 | ----
49 |
50 | Terms == Nat
51 |
52 | Versions == Nat
53 |
54 | \* set of valid configurations (i.e. the set of all non-empty subsets of Nodes)
55 | ValidConfigs == SUBSET(Nodes) \ {{}}
56 |
57 | \* cluster-state versions that might have come from older systems
58 | InitialVersions == Nat
59 |
60 | \* quorums correspond to majority of votes in a config
61 | IsQuorum(votes, config) == Cardinality(votes \cap config) * 2 > Cardinality(config)
62 |
63 | IsElectionQuorum(n, votes) ==
64 | /\ IsQuorum(votes, lastCommittedConfiguration[n])
65 | /\ IsQuorum(votes, lastAcceptedConfiguration[n])
66 |
67 | IsPublishQuorum(n, votes) ==
68 | /\ IsQuorum(votes, lastCommittedConfiguration[n])
69 | /\ IsQuorum(votes, lastPublishedConfiguration[n])
70 |
71 | \* initial model state
72 | Init == /\ messages = {}
73 | /\ descendant = {}
74 | /\ initialConfiguration \in ValidConfigs
75 | /\ initialValue \in Values
76 | /\ initialAcceptedVersion \in [Nodes -> InitialVersions]
77 | /\ currentTerm = [n \in Nodes |-> 0]
78 | /\ lastCommittedConfiguration = [n \in Nodes |-> {}] \* empty config
79 | /\ lastAcceptedTerm = [n \in Nodes |-> 0]
80 | /\ lastAcceptedVersion = initialAcceptedVersion
81 | /\ lastAcceptedValue \in {[n \in Nodes |-> v] : v \in Values} \* all agree on initial value
82 | /\ lastAcceptedConfiguration = [n \in Nodes |-> lastCommittedConfiguration[n]]
83 | /\ joinVotes = [n \in Nodes |-> {}]
84 | /\ startedJoinSinceLastReboot = [n \in Nodes |-> FALSE]
85 | /\ electionWon = [n \in Nodes |-> FALSE]
86 | /\ lastPublishedVersion = [n \in Nodes |-> 0]
87 | /\ lastPublishedConfiguration = [n \in Nodes |-> lastCommittedConfiguration[n]]
88 | /\ publishVotes = [n \in Nodes |-> {}]
89 |
90 | \* Bootstrap node n with the initial state and config
91 | SetInitialState(n) ==
92 | /\ lastAcceptedConfiguration[n] = {} \* not already bootstrapped
93 | /\ Assert(lastAcceptedTerm[n] = 0, "lastAcceptedTerm should be 0")
94 | /\ Assert(lastCommittedConfiguration[n] = {}, "lastCommittedConfiguration should be empty")
95 | /\ Assert(lastPublishedVersion[n] = 0, "lastPublishedVersion should be 0")
96 | /\ Assert(lastPublishedConfiguration[n] = {}, "lastPublishedConfiguration should be empty")
97 | /\ Assert(electionWon[n] = FALSE, "electionWon should be FALSE")
98 | /\ Assert(joinVotes[n] = {}, "joinVotes should be empty")
99 | /\ Assert(publishVotes[n] = {}, "publishVotes should be empty")
100 | /\ lastAcceptedConfiguration' = [lastAcceptedConfiguration EXCEPT ![n] = initialConfiguration]
101 | /\ lastAcceptedValue' = [lastAcceptedValue EXCEPT ![n] = initialValue]
102 | /\ lastCommittedConfiguration' = [lastCommittedConfiguration EXCEPT ![n] = initialConfiguration]
103 | /\ Assert(lastAcceptedTerm[n] = 0, "lastAcceptedTerm should be 0")
104 | /\ Assert(lastAcceptedConfiguration'[n] /= {}, "lastAcceptedConfiguration should be non-empty")
105 | /\ Assert(lastCommittedConfiguration'[n] /= {}, "lastCommittedConfiguration should be non-empty")
106 | /\ UNCHANGED <>
109 |
110 | \* Send join request from node n to node nm for term t
111 | HandleStartJoin(n, nm, t) ==
112 | /\ t > currentTerm[n]
113 | /\ LET
114 | joinRequest == [method |-> Join,
115 | source |-> n,
116 | dest |-> nm,
117 | term |-> t,
118 | laTerm |-> lastAcceptedTerm[n],
119 | laVersion |-> lastAcceptedVersion[n]]
120 | IN
121 | /\ currentTerm' = [currentTerm EXCEPT ![n] = t]
122 | /\ lastPublishedVersion' = [lastPublishedVersion EXCEPT ![n] = 0]
123 | /\ lastPublishedConfiguration' = [lastPublishedConfiguration EXCEPT ![n] = lastAcceptedConfiguration[n]]
124 | /\ startedJoinSinceLastReboot' = [startedJoinSinceLastReboot EXCEPT ![n] = TRUE]
125 | /\ electionWon' = [electionWon EXCEPT ![n] = FALSE]
126 | /\ joinVotes' = [joinVotes EXCEPT ![n] = {}]
127 | /\ publishVotes' = [publishVotes EXCEPT ![n] = {}]
128 | /\ messages' = messages \cup { joinRequest }
129 | /\ UNCHANGED <>
131 |
132 | \* node n handles a join request and checks if it has received enough joins (= votes)
133 | \* for its term to be elected as master
134 | HandleJoin(n, m) ==
135 | /\ m.method = Join
136 | /\ m.term = currentTerm[n]
137 | /\ startedJoinSinceLastReboot[n]
138 | /\ \/ m.laTerm < lastAcceptedTerm[n]
139 | \/ /\ m.laTerm = lastAcceptedTerm[n]
140 | /\ m.laVersion <= lastAcceptedVersion[n]
141 | /\ lastAcceptedConfiguration[n] /= {} \* must be bootstrapped
142 | /\ joinVotes' = [joinVotes EXCEPT ![n] = @ \cup { m.source }]
143 | /\ electionWon' = [electionWon EXCEPT ![n] = IsElectionQuorum(n, joinVotes'[n])]
144 | /\ IF electionWon[n] = FALSE /\ electionWon'[n]
145 | THEN
146 | \* initiating publish version with last accepted version to enable client requests
147 | /\ lastPublishedVersion' = [lastPublishedVersion EXCEPT ![n] = lastAcceptedVersion[n]]
148 | ELSE
149 | UNCHANGED <>
150 | /\ UNCHANGED <>
154 |
155 | \* client causes a cluster state change val with configuration cfg
156 | HandleClientValue(n, t, v, val, cfg) ==
157 | /\ electionWon[n]
158 | /\ lastPublishedVersion[n] = lastAcceptedVersion[n] \* means we have the last published value / config (useful for CAS operations, where we need to read the previous value first)
159 | /\ t = currentTerm[n]
160 | /\ v > lastPublishedVersion[n]
161 | /\ cfg /= lastAcceptedConfiguration[n] => lastCommittedConfiguration[n] = lastAcceptedConfiguration[n] \* only allow reconfiguration if there is not already a reconfiguration in progress
162 | /\ IsQuorum(joinVotes[n], cfg) \* only allow reconfiguration if we have a quorum of (join) votes for the new config
163 | /\ LET
164 | publishRequests == { [method |-> PublishRequest,
165 | source |-> n,
166 | dest |-> ns,
167 | term |-> t,
168 | version |-> v,
169 | value |-> val,
170 | config |-> cfg,
171 | commConf |-> lastCommittedConfiguration[n]] : ns \in Nodes }
172 | newEntry == [prevT |-> lastAcceptedTerm[n],
173 | prevV |-> lastAcceptedVersion[n],
174 | nextT |-> t,
175 | nextV |-> v]
176 | matchingElems == { e \in descendant :
177 | /\ e.nextT = newEntry.prevT
178 | /\ e.nextV = newEntry.prevV }
179 | newTransitiveElems == { [prevT |-> e.prevT,
180 | prevV |-> e.prevV,
181 | nextT |-> newEntry.nextT,
182 | nextV |-> newEntry.nextV] : e \in matchingElems }
183 | IN
184 | /\ descendant' = descendant \cup {newEntry} \cup newTransitiveElems
185 | /\ lastPublishedVersion' = [lastPublishedVersion EXCEPT ![n] = v]
186 | /\ lastPublishedConfiguration' = [lastPublishedConfiguration EXCEPT ![n] = cfg]
187 | /\ publishVotes' = [publishVotes EXCEPT ![n] = {}] \* publishVotes are only counted per publish version
188 | /\ messages' = messages \cup publishRequests
189 | /\ UNCHANGED <>
192 |
193 | \* handle publish request m on node n
194 | HandlePublishRequest(n, m) ==
195 | /\ m.method = PublishRequest
196 | /\ m.term = currentTerm[n]
197 | /\ (m.term = lastAcceptedTerm[n]) => (m.version > lastAcceptedVersion[n])
198 | /\ lastAcceptedTerm' = [lastAcceptedTerm EXCEPT ![n] = m.term]
199 | /\ lastAcceptedVersion' = [lastAcceptedVersion EXCEPT ![n] = m.version]
200 | /\ lastAcceptedValue' = [lastAcceptedValue EXCEPT ![n] = m.value]
201 | /\ lastAcceptedConfiguration' = [lastAcceptedConfiguration EXCEPT ![n] = m.config]
202 | /\ lastCommittedConfiguration' = [lastCommittedConfiguration EXCEPT ![n] = m.commConf]
203 | /\ LET
204 | response == [method |-> PublishResponse,
205 | source |-> n,
206 | dest |-> m.source,
207 | term |-> m.term,
208 | version |-> m.version]
209 | IN
210 | /\ messages' = messages \cup {response}
211 | /\ UNCHANGED <>
214 |
215 | \* node n commits a change
216 | HandlePublishResponse(n, m) ==
217 | /\ m.method = PublishResponse
218 | /\ electionWon[n]
219 | /\ m.term = currentTerm[n]
220 | /\ m.version = lastPublishedVersion[n]
221 | /\ publishVotes' = [publishVotes EXCEPT ![n] = @ \cup {m.source}]
222 | /\ IF
223 | IsPublishQuorum(n, publishVotes'[n])
224 | THEN
225 | LET
226 | commitRequests == { [method |-> Commit,
227 | source |-> n,
228 | dest |-> ns,
229 | term |-> currentTerm[n],
230 | version |-> lastPublishedVersion[n]] : ns \in Nodes }
231 | IN
232 | /\ messages' = messages \cup commitRequests
233 | ELSE
234 | UNCHANGED <>
235 | /\ UNCHANGED <>
239 |
240 | \* apply committed configuration to node n
241 | HandleCommit(n, m) ==
242 | /\ m.method = Commit
243 | /\ m.term = currentTerm[n]
244 | /\ m.term = lastAcceptedTerm[n]
245 | /\ m.version = lastAcceptedVersion[n]
246 | /\ (electionWon[n] => lastAcceptedVersion[n] = lastPublishedVersion[n])
247 | /\ lastCommittedConfiguration' = [lastCommittedConfiguration EXCEPT ![n] = lastAcceptedConfiguration[n]]
248 | /\ UNCHANGED <>
251 |
252 | \* crash/restart node n (loses ephemeral state)
253 | RestartNode(n) ==
254 | /\ joinVotes' = [joinVotes EXCEPT ![n] = {}]
255 | /\ startedJoinSinceLastReboot' = [startedJoinSinceLastReboot EXCEPT ![n] = FALSE]
256 | /\ electionWon' = [electionWon EXCEPT ![n] = FALSE]
257 | /\ lastPublishedVersion' = [lastPublishedVersion EXCEPT ![n] = 0]
258 | /\ lastPublishedConfiguration' = [lastPublishedConfiguration EXCEPT ![n] = lastAcceptedConfiguration[n]]
259 | /\ publishVotes' = [publishVotes EXCEPT ![n] = {}]
260 | /\ UNCHANGED <>
263 |
264 | \* next-step relation
265 | Next ==
266 | \/ \E n \in Nodes : SetInitialState(n)
267 | \/ \E n, nm \in Nodes : \E t \in Terms : HandleStartJoin(n, nm, t)
268 | \/ \E m \in messages : HandleJoin(m.dest, m)
269 | \/ \E n \in Nodes : \E t \in Terms : \E v \in Versions : \E val \in Values : \E vs \in ValidConfigs : HandleClientValue(n, t, v, val, vs)
270 | \/ \E m \in messages : HandlePublishRequest(m.dest, m)
271 | \/ \E m \in messages : HandlePublishResponse(m.dest, m)
272 | \/ \E m \in messages : HandleCommit(m.dest, m)
273 | \/ \E n \in Nodes : RestartNode(n)
274 |
275 | ----
276 |
277 | \* Invariants
278 |
279 | SingleNodeInvariant ==
280 | \A n \in Nodes :
281 | /\ lastAcceptedTerm[n] <= currentTerm[n]
282 | /\ electionWon[n] = IsElectionQuorum(n, joinVotes[n]) \* cached value is consistent
283 | /\ IF electionWon[n] THEN lastPublishedVersion[n] >= lastAcceptedVersion[n] ELSE lastPublishedVersion[n] = 0
284 | /\ electionWon[n] => startedJoinSinceLastReboot[n]
285 | /\ publishVotes[n] /= {} => electionWon[n]
286 |
287 | OneMasterPerTerm ==
288 | \A m1, m2 \in messages:
289 | /\ m1.method = PublishRequest
290 | /\ m2.method = PublishRequest
291 | /\ m1.term = m2.term
292 | => m1.source = m2.source
293 |
294 | LogMatching ==
295 | \A m1, m2 \in messages:
296 | /\ m1.method = PublishRequest
297 | /\ m2.method = PublishRequest
298 | /\ m1.term = m2.term
299 | /\ m1.version = m2.version
300 | => m1.value = m2.value
301 |
302 | CommittedPublishRequest(mp) ==
303 | /\ mp.method = PublishRequest
304 | /\ \E mc \in messages:
305 | /\ mc.method = Commit
306 | /\ mp.term = mc.term
307 | /\ mp.version = mc.version
308 |
309 | DescendantRelationIsStrictlyOrdered ==
310 | \A d \in descendant:
311 | /\ d.prevT <= d.nextT
312 | /\ d.prevV < d.nextV
313 |
314 | DescendantRelationIsTransitive ==
315 | \A d1, d2 \in descendant:
316 | d1.nextT = d2.prevT /\ d1.nextV = d2.prevV
317 | => [prevT |-> d1.prevT, prevV |-> d1.prevV, nextT |-> d2.nextT, nextV |-> d2.nextV] \in descendant
318 |
319 | NewerOpsBasedOnOlderCommittedOps ==
320 | \A m1, m2 \in messages :
321 | /\ CommittedPublishRequest(m1)
322 | /\ m2.method = PublishRequest
323 | /\ m2.term >= m1.term
324 | /\ m2.version > m1.version
325 | => [prevT |-> m1.term, prevV |-> m1.version, nextT |-> m2.term, nextV |-> m2.version] \in descendant
326 |
327 | \* main invariant (follows from NewerOpsBasedOnOlderCommittedOps):
328 | CommittedValuesDescendantsFromCommittedValues ==
329 | \A m1, m2 \in messages :
330 | /\ CommittedPublishRequest(m1)
331 | /\ CommittedPublishRequest(m2)
332 | /\ \/ m1.term /= m2.term
333 | \/ m1.version /= m2.version
334 | =>
335 | \/ [prevT |-> m1.term, prevV |-> m1.version, nextT |-> m2.term, nextV |-> m2.version] \in descendant
336 | \/ [prevT |-> m2.term, prevV |-> m2.version, nextT |-> m1.term, nextV |-> m1.version] \in descendant
337 |
338 | CommittedValuesDescendantsFromInitialValue ==
339 | \E v \in InitialVersions :
340 | /\ \E n \in Nodes : v = initialAcceptedVersion[n]
341 | /\ \E votes \in SUBSET(initialConfiguration) :
342 | /\ IsQuorum(votes, initialConfiguration)
343 | /\ \A n \in votes : initialAcceptedVersion[n] <= v
344 | /\ \A m \in messages :
345 | CommittedPublishRequest(m)
346 | =>
347 | [prevT |-> 0, prevV |-> v, nextT |-> m.term, nextV |-> m.version] \in descendant
348 |
349 | CommitHasQuorumVsPreviousCommittedConfiguration ==
350 | \A mc \in messages: mc.method = Commit
351 | => (\A mprq \in messages: (/\ mprq.method = PublishRequest
352 | /\ mprq.term = mc.term
353 | /\ mprq.version = mc.version)
354 |
355 | => IsQuorum({mprs.source: mprs \in {mprs \in messages: /\ mprs.method = PublishResponse
356 | /\ mprs.term = mprq.term
357 | /\ mprs.version = mprq.version
358 | }}, mprq.commConf))
359 |
360 | P2bInvariant ==
361 | \A mc \in messages: mc.method = Commit
362 | => (\A mprq \in messages: mprq.method = PublishRequest
363 | => (mprq.term > mc.term => mprq.version > mc.version))
364 |
365 | \* State-exploration limits
366 | StateConstraint ==
367 | /\ \A n \in Nodes: IF currentTerm[n] <= 1 THEN lastPublishedVersion[n] <= 2 ELSE lastPublishedVersion[n] <= 3
368 | /\ Cardinality(messages) <= 15
369 |
370 | ====================================================================================================
371 |
--------------------------------------------------------------------------------
/ZenWithTerms/tla/ZenWithTerms.toolbox/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | ZenWithTerms
4 |
5 |
6 |
7 |
8 |
9 | toolbox.builder.TLAParserBuilder
10 |
11 |
12 |
13 |
14 | toolbox.builder.PCalAlgorithmSearchingBuilder
15 |
16 |
17 |
18 |
19 |
20 | toolbox.natures.TLANature
21 |
22 |
23 |
24 | ZenWithTerms.tla
25 | 1
26 | PARENT-1-PROJECT_LOC/ZenWithTerms.tla
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/ZenWithTerms/tla/ZenWithTerms.toolbox/.settings/org.lamport.tla.toolbox.prefs:
--------------------------------------------------------------------------------
1 | ProjectRootFile=PARENT-1-PROJECT_LOC/ZenWithTerms.tla
2 | eclipse.preferences.version=1
3 |
--------------------------------------------------------------------------------
/ZenWithTerms/tla/ZenWithTerms.toolbox/ZenWithTerms___model.launch:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/cluster/isabelle/Implementation.thy:
--------------------------------------------------------------------------------
1 | section \Implementation\
2 |
3 | text \This section presents the implementation of the algorithm.\
4 |
5 | theory Implementation
6 | imports Preliminaries
7 | begin
8 |
9 | subsection \Protocol messages\
10 |
11 | text \The
12 | proven-safe core of the protocol works by sending messages as described here. The remainder of the
13 | protocol may send other messages too, and may drop, reorder or duplicate any of these messages, but
14 | must not send these messages itself to ensure safety. Another way of thinking of these messages is
15 | to consider them as ``fire-and-forget'' RPC invocations that, on receipt, call some local method, maybe
16 | update the receiving node's state, and maybe yield some further messages. The @{type nat} parameter to each
17 | message refers to a slot number.\
18 |
19 | datatype TermOption = NO_TERM | SomeTerm Term
20 |
21 | instantiation TermOption :: linorder
22 | begin
23 |
24 | fun less_TermOption :: "TermOption \ TermOption \ bool"
25 | where "t < NO_TERM = False"
26 | | "NO_TERM < SomeTerm t = True"
27 | | "SomeTerm t\<^sub>1 < SomeTerm t\<^sub>2 = (t\<^sub>1 < t\<^sub>2)"
28 |
29 | definition less_eq_TermOption :: "TermOption \ TermOption \ bool"
30 | where "(t\<^sub>1 :: TermOption) \ t\<^sub>2 \ t\<^sub>1 = t\<^sub>2 \ t\<^sub>1 < t\<^sub>2"
31 |
32 | instance proof
33 | fix x y z :: TermOption
34 | show "(x < y) = (x \ y \ \ y \ x)" unfolding less_eq_TermOption_def apply auto
35 | using less_TermOption.elims apply fastforce
36 | by (metis less_TermOption.elims(2) less_TermOption.simps(3) less_not_sym)
37 |
38 | show "x \ x" by (simp add: less_eq_TermOption_def)
39 |
40 | show "x \ y \ y \ z \ x \ z" unfolding less_eq_TermOption_def apply auto
41 | by (metis TermOption.distinct(1) TermOption.inject dual_order.strict_trans less_TermOption.elims(2) less_TermOption.elims(3))
42 |
43 | show "x \ y \ y \ x \ x = y" unfolding less_eq_TermOption_def apply auto
44 | using \(x < y) = (x \ y \ \ y \ x)\ less_eq_TermOption_def by blast
45 |
46 | show "x \ y \ y \ x" unfolding less_eq_TermOption_def apply auto
47 | by (metis TermOption.distinct(1) TermOption.inject less_TermOption.elims(3) neqE)
48 | qed
49 |
50 | end
51 |
52 | lemma NO_TERM_le [simp]: "NO_TERM \ t" by (cases t, simp_all add: less_eq_TermOption_def)
53 | lemma le_NO_TERM [simp]: "(t \ NO_TERM) = (t = NO_TERM)" by (cases t, simp_all add: less_eq_TermOption_def)
54 | lemma le_SomeTerm [simp]: "(SomeTerm t\<^sub>1 \ SomeTerm t\<^sub>2) = (t\<^sub>1 \ t\<^sub>2)" by (auto simp add: less_eq_TermOption_def)
55 |
56 | datatype Message
57 | = StartJoin Term
58 | | Vote Slot Term TermOption
59 | | ClientValue Value
60 | | PublishRequest Slot Term Value
61 | | PublishResponse Slot Term
62 | | ApplyCommit Slot Term
63 | | CatchUpRequest
64 | | CatchUpResponse Slot "Node set" ClusterState
65 | | DiscardJoinVotes
66 | | Reboot
67 |
68 | text \Some prose descriptions of these messages follows, in order to give a bit more of an
69 | intuitive understanding of their purposes.\
70 |
71 | text \The message @{term "StartJoin t"} may be sent by any node to attempt to start a master
72 | election in the given term @{term t}.\
73 |
74 | text \The message @{term "Vote i t a"} may be sent by a node in response
75 | to a @{term StartJoin} message. It indicates that the sender knows all committed values for slots
76 | strictly below @{term i}, and that the sender will no longer vote (i.e. send an @{term
77 | PublishResponse}) in any term prior to @{term t}. The field @{term a} is either @{term
78 | None} or @{term "Some t'"}. In the former case this indicates that
79 | the node has not yet sent any @{term PublishResponse} message in slot @{term i}, and in the latter
80 | case it indicates that the largest term in which it has previously sent an @{term PublishResponse}
81 | message is @{term t'}. All
82 | nodes must avoid sending a @{term Vote} message to two different masters in the same term.\
83 |
84 | text \The message @{term "ClientValue x"} may be sent by any node and indicates an attempt to
85 | reach consensus on the value @{term x}.\
86 |
87 | text \The message @{term "PublishRequest i t v"} may be sent by the elected master of term
88 | @{term t} to request the other master-eligible nodes to vote for value @{term v} to be committed in
89 | slot @{term i}.\
90 |
91 | text \The message @{term "PublishResponse i t"} may be sent by node in response to
92 | the corresponding @{term PublishRequest} message, indicating that the sender votes for the value
93 | proposed by the master of term @{term t} to be committed in slot @{term i}.\
94 |
95 | text \The message @{term "ApplyCommit i t"} indicates that the value proposed by the master of
96 | term @{term t} in slot @{term i} received a quorum of votes and is therefore committed.\
97 |
98 | text \The message @{term Reboot} may be sent by any node to represent the restart of a node, which
99 | loses any ephemeral state.\
100 |
101 | text \The abstract model of Zen keeps track of the set of all messages that have ever been
102 | sent, and asserts that this set obeys certain invariants, listed below. Further below, it will be
103 | shown that these invariants imply that each slot obeys the @{term oneSlot} invariants above and
104 | hence that each slot cannot see inconsistent committed values.\
105 |
106 | datatype Destination = Broadcast | OneNode Node
107 |
108 | record RoutedMessage =
109 | sender :: Node
110 | destination :: Destination
111 | payload :: Message
112 |
113 | text \It will be useful to be able to choose the optional term with the greater term,
114 | so here is a function that does that.\
115 |
116 | subsection \Node implementation\
117 |
118 | text \Each node holds the following local data.\
119 |
120 | record TermValue =
121 | tvTerm :: Term
122 | tvValue :: Value
123 |
124 | record NodeData =
125 | currentNode :: Node
126 | currentTerm :: Term
127 | (* committed state *)
128 | firstUncommittedSlot :: Slot
129 | currentVotingNodes :: "Node set"
130 | currentClusterState :: ClusterState
131 | (* accepted state *)
132 | lastAcceptedData :: "TermValue option"
133 | (* election state *)
134 | joinVotes :: "Node set"
135 | electionWon :: bool
136 | (* publish state *)
137 | publishPermitted :: bool
138 | publishVotes :: "Node set"
139 |
140 | definition lastAcceptedValue :: "NodeData \ Value"
141 | where "lastAcceptedValue nd \ tvValue (THE lad. lastAcceptedData nd = Some lad)"
142 |
143 | definition lastAcceptedTerm :: "NodeData \ TermOption"
144 | where "lastAcceptedTerm nd \ case lastAcceptedData nd of None \ NO_TERM | Some lad \ SomeTerm (tvTerm lad)"
145 |
146 | definition isQuorum :: "NodeData \ Node set \ bool"
147 | where "isQuorum nd q \ q \ majorities (currentVotingNodes nd)"
148 |
149 | lemma lastAcceptedValue_joinVotes_update[simp]: "lastAcceptedValue (joinVotes_update f nd) = lastAcceptedValue nd" by (simp add: lastAcceptedValue_def)
150 | lemma lastAcceptedTerm_joinVotes_update[simp]: "lastAcceptedTerm (joinVotes_update f nd) = lastAcceptedTerm nd" by (simp add: lastAcceptedTerm_def)
151 |
152 | lemma lastAcceptedValue_electionWon_update[simp]: "lastAcceptedValue (electionWon_update f nd) = lastAcceptedValue nd" by (simp add: lastAcceptedValue_def)
153 | lemma lastAcceptedTerm_electionWon_update[simp]: "lastAcceptedTerm (electionWon_update f nd) = lastAcceptedTerm nd" by (simp add: lastAcceptedTerm_def)
154 |
155 | text \This method publishes a value via a @{term PublishRequest} message.\
156 |
157 | definition publishValue :: "Value \ NodeData \ (NodeData * Message option)"
158 | where
159 | "publishValue x nd \
160 | if electionWon nd \ publishPermitted nd
161 | then ( nd \ publishPermitted := False \
162 | , Some (PublishRequest
163 | (firstUncommittedSlot nd)
164 | (currentTerm nd) x) )
165 | else (nd, None)"
166 |
167 | text \This method updates the node's current term (if necessary) and discards any data associated
168 | with the previous term.\
169 |
170 | definition ensureCurrentTerm :: "Term \ NodeData \ NodeData"
171 | where
172 | "ensureCurrentTerm t nd \
173 | if t \ currentTerm nd
174 | then nd
175 | else nd
176 | \ joinVotes := {}
177 | , currentTerm := t
178 | , electionWon := False
179 | , publishPermitted := True
180 | , publishVotes := {} \"
181 |
182 | text \This method updates the node's state on receipt of a vote (a @{term Vote}) in an election.\
183 |
184 | definition addElectionVote :: "Node \ Slot => TermOption \ NodeData \ NodeData"
185 | where
186 | "addElectionVote s i a nd \ let newVotes = insert s (joinVotes nd)
187 | in nd \ joinVotes := newVotes
188 | , electionWon := isQuorum nd newVotes \"
189 |
190 | text \Clients request the cluster to achieve consensus on certain values using the @{term ClientValue}
191 | message which is handled as follows.\
192 |
193 | definition handleClientValue :: "Value \ NodeData \ (NodeData * Message option)"
194 | where
195 | "handleClientValue x nd \ if lastAcceptedTerm nd = NO_TERM then publishValue x nd else (nd, None)"
196 |
197 | text \A @{term StartJoin} message is checked for acceptability and then handled by updating the
198 | node's term and yielding a @{term Vote} message as follows.\
199 |
200 | definition handleStartJoin :: "Term \ NodeData \ (NodeData * Message option)"
201 | where
202 | "handleStartJoin t nd \
203 | if currentTerm nd < t
204 | then ( ensureCurrentTerm t nd
205 | , Some (Vote (firstUncommittedSlot nd)
206 | t
207 | (lastAcceptedTerm nd)))
208 | else (nd, None)"
209 |
210 | text \A @{term Vote} message is checked for acceptability and then handled as follows, perhaps
211 | yielding a @{term PublishRequest} message.\
212 |
213 | definition handleVote :: "Node \ Slot \ Term \ TermOption \ NodeData \ (NodeData * Message option)"
214 | where
215 | "handleVote s i t a nd \
216 | if t = currentTerm nd
217 | \ (i < firstUncommittedSlot nd
218 | \ (i = firstUncommittedSlot nd \ a \ lastAcceptedTerm nd))
219 | then let nd1 = addElectionVote s i a nd
220 | in (if lastAcceptedTerm nd = NO_TERM then (nd1, None) else publishValue (lastAcceptedValue nd1) nd1)
221 | else (nd, None)"
222 |
223 | text \A @{term PublishRequest} message is checked for acceptability and then handled as follows,
224 | yielding a @{term PublishResponse} message.\
225 |
226 | definition handlePublishRequest :: "Slot \ Term \ Value \ NodeData \ (NodeData * Message option)"
227 | where
228 | "handlePublishRequest i t x nd \
229 | if i = firstUncommittedSlot nd
230 | \ t = currentTerm nd
231 | then ( nd \ lastAcceptedData := Some \ tvTerm = t, tvValue = x \ \
232 | , Some (PublishResponse i t))
233 | else (nd, None)"
234 |
235 | text \This method sends an @{term ApplyCommit} message if a quorum of votes has been received.\
236 |
237 | definition commitIfQuorate :: "NodeData \ (NodeData * Message option)"
238 | where
239 | "commitIfQuorate nd = (nd, if isQuorum nd (publishVotes nd)
240 | then Some (ApplyCommit (firstUncommittedSlot nd) (currentTerm nd)) else None)"
241 |
242 | text \A @{term PublishResponse} message is checked for acceptability and handled as follows. If
243 | this message, together with the previously-received messages, forms a quorum of votes then the
244 | value is committed, yielding an @{term ApplyCommit} message.\
245 |
246 | definition handlePublishResponse :: "Node \ Slot \ Term \ NodeData \ (NodeData * Message option)"
247 | where
248 | "handlePublishResponse s i t nd \
249 | if i = firstUncommittedSlot nd \ t = currentTerm nd
250 | then commitIfQuorate (nd \ publishVotes := insert s (publishVotes nd) \)
251 | else (nd, None)"
252 |
253 | text \This method updates the node's state when a value is committed.\
254 |
255 | definition applyAcceptedValue :: "NodeData \ NodeData"
256 | where
257 | "applyAcceptedValue nd \ case lastAcceptedValue nd of
258 | NoOp \ nd
259 | | Reconfigure votingNodes \ nd
260 | \ currentVotingNodes := set votingNodes
261 | , electionWon := joinVotes nd \ majorities (set votingNodes) \
262 | | ClusterStateDiff diff \ nd \ currentClusterState := diff (currentClusterState nd) \"
263 |
264 | text \An @{term ApplyCommit} message is applied to the current node's state, updating its configuration
265 | and \texttt{ClusterState} via the @{term applyValue} method. It yields no messages.\
266 |
267 | definition handleApplyCommit :: "Slot \ Term \ NodeData \ NodeData"
268 | where
269 | "handleApplyCommit i t nd \
270 | if i = firstUncommittedSlot nd \ lastAcceptedTerm nd = SomeTerm t
271 | then (applyAcceptedValue nd)
272 | \ firstUncommittedSlot := i + 1
273 | , lastAcceptedData := None
274 | , publishPermitted := True
275 | , publishVotes := {} \
276 | else nd"
277 |
278 | definition handleCatchUpRequest :: "NodeData \ (NodeData * Message option)"
279 | where
280 | "handleCatchUpRequest nd = (nd, Some (CatchUpResponse (firstUncommittedSlot nd)
281 | (currentVotingNodes nd) (currentClusterState nd)))"
282 |
283 | definition handleCatchUpResponse :: "Slot \ Node set \ ClusterState \ NodeData \ NodeData"
284 | where
285 | "handleCatchUpResponse i conf cs nd \
286 | if firstUncommittedSlot nd < i
287 | then nd \ firstUncommittedSlot := i
288 | , publishPermitted := True
289 | , publishVotes := {}
290 | , currentVotingNodes := conf
291 | , currentClusterState := cs
292 | , lastAcceptedData := None
293 | , joinVotes := {}
294 | , electionWon := False \
295 | else nd"
296 |
297 | text \A @{term Reboot} message simulates the effect of a reboot, discarding any ephemeral state but
298 | preserving the persistent state. It yields no messages.\
299 |
300 | definition handleReboot :: "NodeData \ NodeData"
301 | where
302 | "handleReboot nd \
303 | \ currentNode = currentNode nd
304 | , currentTerm = currentTerm nd
305 | , firstUncommittedSlot = firstUncommittedSlot nd
306 | , currentVotingNodes = currentVotingNodes nd
307 | , currentClusterState = currentClusterState nd
308 | , lastAcceptedData = lastAcceptedData nd
309 | , joinVotes = {}
310 | , electionWon = False
311 | , publishPermitted = False
312 | , publishVotes = {} \"
313 |
314 | text \A @{term DiscardJoinVotes} message discards the votes received by a node. It yields
315 | no messages.\
316 |
317 | definition handleDiscardJoinVotes :: "NodeData \ NodeData"
318 | where
319 | "handleDiscardJoinVotes nd \ nd \ electionWon := False, joinVotes := {} \"
320 |
321 | text \This function dispatches incoming messages to the appropriate handler method, and
322 | routes any responses to the appropriate places. In particular, @{term Vote} messages
323 | (sent by the @{term handleStartJoin} method) and
324 | @{term PublishResponse} messages (sent by the @{term handlePublishRequest} method) are
325 | only sent to a single node, whereas all other responses are broadcast to all nodes.\
326 |
327 | definition ProcessMessage :: "NodeData \ RoutedMessage \ (NodeData * RoutedMessage option)"
328 | where
329 | "ProcessMessage nd msg \
330 | let respondTo =
331 | (\ d (nd, mmsg). case mmsg of
332 | None \ (nd, None)
333 | | Some msg \ (nd,
334 | Some \ sender = currentNode nd, destination = d,
335 | payload = msg \));
336 | respondToSender = respondTo (OneNode (sender msg));
337 | respondToAll = respondTo Broadcast
338 | in
339 | if destination msg \ { Broadcast, OneNode (currentNode nd) }
340 | then case payload msg of
341 | StartJoin t
342 | \ respondToSender (handleStartJoin t nd)
343 | | Vote i t a
344 | \ respondToAll (handleVote (sender msg) i t a nd)
345 | | ClientValue x
346 | \ respondToAll (handleClientValue x nd)
347 | | PublishRequest i t x
348 | \ respondToSender (handlePublishRequest i t x nd)
349 | | PublishResponse i t
350 | \ respondToAll (handlePublishResponse (sender msg) i t nd)
351 | | ApplyCommit i t
352 | \ (handleApplyCommit i t nd, None)
353 | | CatchUpRequest
354 | \ respondToSender (handleCatchUpRequest nd)
355 | | CatchUpResponse i conf cs
356 | \ (handleCatchUpResponse i conf cs nd, None)
357 | | DiscardJoinVotes
358 | \ (handleDiscardJoinVotes nd, None)
359 | | Reboot
360 | \ (handleReboot nd, None)
361 | else (nd, None)"
362 |
363 | text \Nodes are initialised to this state. The data required is the initial configuration, @{term Q\<^sub>0}
364 | and the initial \texttt{ClusterState}, here shown as @{term "ClusterState 0"}.\
365 |
366 | definition initialNodeState :: "Node \ NodeData"
367 | where "initialNodeState n =
368 | \ currentNode = n
369 | , currentTerm = 0
370 | , firstUncommittedSlot = 0
371 | , currentVotingNodes = V\<^sub>0
372 | , currentClusterState = CS\<^sub>0
373 | , lastAcceptedData = None
374 | , joinVotes = {}
375 | , electionWon = False
376 | , publishPermitted = False
377 | , publishVotes = {} \"
378 | (* Note: publishPermitted could be True initially, but in the actual implementation we call the
379 | same constructor whether we're starting up from afresh or recovering from a reboot, and the value
380 | is really unimportant as we need to run an election in a new term before becoming master anyway,
381 | so it's hard to justify putting any effort into calculating different values for these two cases.
382 | Instead just set it to False initially.*)
383 |
384 | end
385 |
--------------------------------------------------------------------------------
/cluster/isabelle/Monadic.thy:
--------------------------------------------------------------------------------
1 | theory Monadic
2 | imports Implementation "~~/src/HOL/Library/Monad_Syntax"
3 | begin
4 |
5 | datatype Exception = IllegalArgumentException
6 |
7 | datatype ('e,'a) Result = Success 'a | Exception 'e
8 |
9 | datatype 'a Action = Action "NodeData \ (NodeData * RoutedMessage list * (Exception,'a) Result)"
10 |
11 | definition runM :: "'a Action \ NodeData \ (NodeData * RoutedMessage list * (Exception,'a) Result)"
12 | where "runM ma \ case ma of Action unwrapped_ma \ unwrapped_ma"
13 |
14 | lemma runM_Action[simp]: "runM (Action f) = f" by (simp add: runM_def)
15 | lemma runM_inject[intro]: "(\nd. runM ma nd = runM mb nd) \ ma = mb" by (cases ma, cases mb, auto simp add: runM_def)
16 |
17 | definition return :: "'a \ 'a Action" where "return a \ Action (\ nd. (nd, [], Success a))"
18 |
19 | lemma runM_return[simp]: "runM (return a) nd = (nd, [], Success a)" unfolding runM_def return_def by simp
20 |
21 | definition Action_bind :: "'a Action \ ('a \ 'b Action) \ 'b Action"
22 | where "Action_bind ma mf \ Action (\ nd0. case runM ma nd0 of
23 | (nd1, msgs1, result1) \ (case result1 of
24 | Exception e \ (nd1, msgs1, Exception e)
25 | | Success a \ (case runM (mf a) nd1 of
26 | (nd2, msgs2, result2) \ (nd2, msgs1 @ msgs2, result2))))"
27 |
28 | adhoc_overloading bind Action_bind
29 |
30 | lemma runM_bind: "runM (a \ f) nd0 = (case runM a nd0 of (nd1, msgs1, result1) \ (case result1 of Exception e \ (nd1, msgs1, Exception e) | Success b \