├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── backbuild
├── ckite-core
└── src
│ ├── main
│ ├── resources
│ │ └── reference.conf
│ └── scala
│ │ └── ckite
│ │ ├── CKite.scala
│ │ ├── CKiteBuilder.scala
│ │ ├── CKiteClient.scala
│ │ ├── ConfigAware.scala
│ │ ├── ConfigSupport.scala
│ │ ├── Configuration.scala
│ │ ├── Consensus.scala
│ │ ├── LeaderAnnouncer.scala
│ │ ├── LocalMember.scala
│ │ ├── Member.scala
│ │ ├── Membership.scala
│ │ ├── RLog.scala
│ │ ├── Raft.scala
│ │ ├── RemoteMember.scala
│ │ ├── exception
│ │ ├── LeaderTimeoutException.scala
│ │ ├── LostLeadershipException.scala
│ │ └── WriteTimeoutException.scala
│ │ ├── rlog
│ │ ├── FixedSizeLogCompactionPolicy.scala
│ │ ├── Log.scala
│ │ ├── LogAppender.scala
│ │ ├── LogCompactionPolicy.scala
│ │ ├── Snapshot.scala
│ │ ├── SnapshotManager.scala
│ │ └── Storage.scala
│ │ ├── rpc
│ │ ├── AppendEntries.scala
│ │ ├── AppendEntriesResponse.scala
│ │ ├── ClusterConfigurationCommand.scala
│ │ ├── Command.scala
│ │ ├── GetMembersRequest.scala
│ │ ├── GetMembersResponse.scala
│ │ ├── InstallSnapshot.scala
│ │ ├── JoinMember.scala
│ │ ├── JoinMemberResponse.scala
│ │ ├── JointConfiguration.scala
│ │ ├── LogEntry.scala
│ │ ├── NewConfiguration.scala
│ │ ├── NoOps.scala
│ │ ├── ReadCommand.scala
│ │ ├── RequestVote.scala
│ │ ├── RequestVoteResponse.scala
│ │ ├── Rpc.scala
│ │ ├── RpcClient.scala
│ │ ├── RpcService.scala
│ │ └── WriteCommand.scala
│ │ ├── statemachine
│ │ ├── CommandExecutor.scala
│ │ ├── StateMachine.scala
│ │ └── j
│ │ │ ├── StateMachine.scala
│ │ │ └── StateMachineWrapper.scala
│ │ ├── states
│ │ ├── Candidate.scala
│ │ ├── Follower.scala
│ │ ├── Joiner.scala
│ │ ├── Leader.scala
│ │ ├── Starter.scala
│ │ ├── State.scala
│ │ └── Stopped.scala
│ │ ├── stats
│ │ ├── StateInfo.scala
│ │ └── Stats.scala
│ │ ├── storage
│ │ └── MemoryStorage.scala
│ │ └── util
│ │ ├── ConcurrencySupport.scala
│ │ ├── Conversions.scala
│ │ ├── CustomThreadFactory.scala
│ │ ├── LockSupport.scala
│ │ ├── Logging.scala
│ │ └── Serializer.scala
│ └── test
│ ├── resources
│ └── logback-test.xml
│ └── scala
│ └── ckite
│ ├── CKiteIntegrationTest.scala
│ ├── SerializerTest.scala
│ ├── TestRpc.scala
│ └── example
│ ├── Get.scala
│ ├── KVStore.scala
│ └── Put.scala
├── ckite-finagle
└── src
│ └── main
│ ├── resources
│ └── reference.conf
│ ├── scala
│ └── ckite
│ │ └── rpc
│ │ ├── FinagleThriftRpc.scala
│ │ └── thrift
│ │ ├── FinagleThriftClient.scala
│ │ ├── FinagleThriftServer.scala
│ │ └── ThriftConverters.scala
│ └── thrift
│ └── ckite
│ └── rpc
│ └── thrift
│ └── ckite.thrift
├── ckite-mapdb
└── src
│ ├── main
│ └── scala
│ │ └── ckite
│ │ └── mapdb
│ │ ├── FileSupport.scala
│ │ ├── MapDBPersistentLog.scala
│ │ └── MapDBStorage.scala
│ └── test
│ └── scala
│ └── ckite
│ └── mapdb
│ └── MapDBStorageTest.scala
└── project
├── Build.scala
├── Dependencies.scala
├── Settings.scala
├── build.properties
└── plugins.sbt
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | target
3 | .cache
4 | .classpath
5 | .project
6 | .idea
7 | *.iml
8 | *.ipr
9 | *.iws
10 | src/main/scala/ckite/rpc/thrift/CKiteService.scala
11 | src/main/scala/ckite/rpc/thrift/CKiteService.scala
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | sbt_args: -sbt-version 0.13.7
3 | scala:
4 | - 2.11.7
5 | jdk:
6 | - oraclejdk8
7 | sudo: false
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | This software is licensed under the Apache 2 license, quoted below.
2 |
3 | Copyright © 2013 the CKite project
4 |
5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 | use this file except in compliance with the License. You may obtain a copy of
7 | the License at
8 |
9 | [http://www.apache.org/licenses/LICENSE-2.0]
10 |
11 | Unless required by applicable law or agreed to in writing, software
12 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 | License for the specific language governing permissions and limitations under
15 | the License.
16 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | CKite - JVM Raft [](https://travis-ci.org/pablosmedina/ckite)
2 | =====
3 |
4 | ## Overview
5 |
6 | A __JVM__ implementation of the [Raft distributed consensus algorithm](http://raftconsensus.github.io/) written in Scala. CKite is a `consensus library` with an easy to use API intended to be used by distributed applications needing consensus agreement.
7 |
8 | It is designed to be agnostic of both the mechanism used to exchange messages between members `(RPC)` and the medium to store the Log `(Storage)`. CKite has a modular architecture with pluggable `RPC` and `Storage` implementations. Custom RPCs and Storages can be easily implemented and configured to be used by CKite.
9 |
10 | ## Status
11 |
12 | CKite covers all the major topics of Raft including leader election, log replication, log compaction and cluster membership changes. It currently has two implemented modules:
13 |
14 | * ckite-finagle: Finagle based RPC module
15 | * ckite-mapdb: MapDB based Storage module
16 |
17 | Checkout the latest __Release 0.2.1__ following the instructions detailed below to start playing with it.
18 |
19 | ## Features
20 |
21 | * Leader Election
22 | * Log Replication
23 | * Cluster Membership Changes
24 | * Log Compaction
25 | * Twitter Finagle integration
26 | * MapDB integration
27 |
28 | ## Architecture
29 |
30 | * `ckite-core` - The core of the library. It implements the Raft consensus protocol. It can be configured with RPCs and Storages.
31 |
32 | * `ckite-finagle` - Twitter Finagle based RPC implementation. It uses a Thrift protocol to exchange Raft messages between members.
33 |
34 | * `ckite-mapdb` - MapDB based storage implementation. MapDB provides concurrent Maps, Sets and Queues backed by disk storage or off-heap-memory. It is a fast and easy to use embedded Java database engine.
35 |
36 | Comming soon: ckite-chronicle, ckite-akka.
37 |
38 | ## Getting started (Scala)
39 |
40 | #### SBT settings
41 |
42 | The latest release 0.2.1 is in Maven central. Add the following sbt dependency to your project settings:
43 |
44 | ```scala
45 | libraryDependencies += "io.ckite" %% "ckite-core" % "0.2.1"
46 | ```
47 | ```scala
48 | libraryDependencies += "io.ckite" %% "ckite-finagle" % "0.2.1"
49 | ```
50 | ```scala
51 | libraryDependencies += "io.ckite" %% "ckite-mapdb" % "0.2.1"
52 | ```
53 |
54 | ## Getting started (Java)
55 |
56 | #### Maven settings
57 |
58 | Add the following maven dependency to your pom.xml:
59 |
60 | ```xml
61 |
62 | io.ckite
63 | ckite-core
64 | 0.2.1
65 |
66 | ```
67 |
68 | ## Example (See [KVStore](https://github.com/pablosmedina/kvstore))
69 |
70 | #### 1) Create a StateMachine
71 | ```scala
72 | //KVStore is an in-memory distributed Map allowing Puts and Gets operations
73 | class KVStore extends StateMachine {
74 |
75 | private var map = Map[String, String]()
76 | private var lastIndex: Long = 0
77 |
78 | //Called when a consensus has been reached for a WriteCommand
79 | //index associated to the write is provided to implement your own persistent semantics
80 | //see lastAppliedIndex
81 | def applyWrite = {
82 | case (index, Put(key: String, value: String)) => {
83 | map.put(key, value)
84 | lastIndex = index
85 | value
86 | }
87 | }
88 |
89 | //called when a read command has been received
90 | def applyRead = {
91 | case Get(key) => map.get(key)
92 | }
93 |
94 | //CKite needs to know the last applied write on log replay to
95 | //provide exactly-once semantics
96 | //If no persistence is needed then state machines can just return zero
97 | def getLastAppliedIndex: Long = lastIndex
98 |
99 | //called during Log replay on startup and upon installSnapshot requests
100 | def restoreSnapshot(byteBuffer: ByteBuffer) = {
101 | map = Serializer.deserialize[Map[String, String]](byteBuffer.array())
102 | }
103 | //called when Log compaction is required
104 | def takeSnapshot(): ByteBuffer = ByteBuffer.wrap(Serializer.serialize(map))
105 |
106 | }
107 |
108 | //WriteCommands are replicated under Raft rules
109 | case class Put(key: String, value: String) extends WriteCommand[String]
110 |
111 | //ReadCommands are not replicated but forwarded to the Leader
112 | case class Get(key: String) extends ReadCommand[Option[String]]
113 | ```
114 | #### 2) Create a CKite instance using the builder (minimal)
115 | ```scala
116 | val ckite = CKiteBuilder().listenAddress("node1:9091").rpc(FinagleThriftRpc) //Finagle based transport
117 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait
118 | .bootstrap(true) //bootstraps a new cluster. only needed just the first time for the very first node
119 | .build
120 | ```
121 |
122 | #### 3) Create a CKite instance using the builder (extended)
123 | ```scala
124 | val ckite = CKiteBuilder().listenAddress("localhost:9091").rpc(FinagleThriftRpc)
125 | .members(Seq("localhost:9092","localhost:9093")) //optional seeds to join the cluster
126 | .minElectionTimeout(1000).maxElectionTimeout(1500) //optional
127 | .heartbeatsPeriod(250) //optional. period to send heartbeats interval when being Leader
128 | .dataDir("/home/ckite/data") //dataDir for persistent state (log, terms, snapshots, etc...)
129 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait
130 | .sync(false) //disables log sync to disk
131 | .flushSize(10) //max batch size when flushing log to disk
132 | .build
133 | ```
134 | #### 4) Start ckite
135 | ```scala
136 | ckite.start()
137 | ```
138 |
139 | #### 4) Send a write command
140 | ```scala
141 | //this Put command is forwarded to the Leader and applied under Raft rules
142 | val writeFuture:Future[String] = ckite.write(Put("key1","value1"))
143 | ```
144 |
145 | #### 5) Send a consistent read command
146 | ```scala
147 | //consistent read commands are forwarded to the Leader
148 | val readFuture:Future[Option[String]] = ckite.read(Get("key1"))
149 | ```
150 | #### 6) Add a new Member
151 | ```scala
152 | //as write commands, cluster membership changes are forwarded to the Leader
153 | ckite.addMember("someHost:9094")
154 | ```
155 |
156 | #### 7) Remove a Member
157 | ```scala
158 | //as write commands, cluster membership changes are forwarded to the Leader
159 | ckite.removeMember("someHost:9094")
160 | ```
161 |
162 | #### 8) Send a local read command
163 | ```scala
164 | //alternatively you can read from its local state machine allowing possible stale values
165 | val value = ckite.readLocal(Get("key1"))
166 | ```
167 |
168 | #### 9) Check leadership
169 | ```scala
170 | //if necessary waits for elections to end
171 | ckite.isLeader()
172 | ```
173 | #### 10) Stop ckite
174 | ```scala
175 | ckite.stop()
176 | ```
177 |
178 | ## How CKite bootstraps
179 |
180 | To start a new cluster you have to run the very first node turning on the bootstrap parameter. This will create an initial configuration with just the first node. The next nodes starts by pointing to the existing ones to join the cluster.
181 | You can bootstrap the first node using the builder, overriding ckite.bootstrap in your application.conf or by starting your application with a system property -Dckite.bootstrap=true. See [KVStore](https://github.com/pablosmedina/kvstore) for more details.
182 |
183 |
184 | #### bootstrapping the first node using the builder
185 | ```scala
186 | val ckite = CKiteBuilder().listenAddress("node1:9091").rpc(FinagleThriftRpc)
187 | .dataDir("/home/ckite/data") //dataDir for persistent state (log, terms, snapshots, etc...)
188 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait
189 | .bootstrap(true) //bootstraps a new cluster. only needed just the first time for the very first node
190 | .build
191 | ```
192 |
193 | ## Implementation details
194 |
195 | * Built in Scala 2.11.7 and JDK 8.
196 | * [Twitter Finagle](http://twitter.github.io/finagle/).
197 | * [Thrift](http://thrift.apache.org/).
198 | * [Twitter Scrooge](http://twitter.github.io/scrooge/).
199 | * [MapDB](http://www.mapdb.org/)
200 | * [Kryo](https://github.com/EsotericSoftware/kryo)
201 | * Chronicle (to be implemented)
202 |
203 |
204 | ## Contributions
205 |
206 | Feel free to contribute to CKite!. Any kind of help will be very welcome. We are happy to receive pull requests, issues, discuss implementation details, analyze the raft algorithm and whatever it makes CKite a better library. Checkout the issues. You can start from there!
207 |
208 |
209 | ## Importing the project into IntelliJ IDEA
210 |
211 | To generate the necessary IDE config files first run the following command and then open the project as usual:
212 |
213 | sbt gen-idea
214 |
215 | ## Importing the project into Eclipse
216 |
217 | To generate the necessary IDE config files first run the following command and then open the project as usual:
218 |
219 | sbt eclipse
220 |
--------------------------------------------------------------------------------
/backbuild:
--------------------------------------------------------------------------------
1 | name := "ckite"
2 |
3 | organization := "io.ckite"
4 |
5 | version := "0.2.0-SNAPSHOT"
6 |
7 | scalaVersion := "2.11.4"
8 |
9 | publishMavenStyle := true
10 |
11 | publishArtifact in Test := false
12 |
13 | pomIncludeRepository := { x => false}
14 |
15 | libraryDependencies ++= Seq(
16 | "org.slf4j" % "slf4j-api" % "1.6.4",
17 | "com.twitter" %% "scrooge-core" % "3.17.0" exclude("org.scala-lang", "scala-library"),
18 | "org.apache.thrift" % "libthrift" % "0.9.1" exclude("org.apache.httpcomponents", "httpclient") exclude("org.apache.httpcomponents", "httpcore") exclude("org.slf4j", "slf4j-api") exclude("org.apache.commons", "commons-lang3"),
19 | "com.twitter" %% "finagle-core" % "6.24.0" exclude("com.twitter", "util-logging_2.11") exclude("com.twitter", "util-app_2.11"),
20 | "com.twitter" %% "finagle-thrift" % "6.24.0" exclude("org.scala-lang", "scala-library") exclude("org.apache.thrift", "libthrift"),
21 | "com.typesafe" % "config" % "1.0.2",
22 | "org.mapdb" % "mapdb" % "0.9.13",
23 | "com.esotericsoftware.kryo" % "kryo" % "2.22",
24 | "com.twitter" %% "finagle-http" % "6.24.0" % "test",
25 | "com.fasterxml.jackson.module" %% "ja" +
26 | "ckson-module-scala" % "2.4.4" % "test",
27 | "org.scalatest" %% "scalatest" % "2.2.2" % "test",
28 | "ch.qos.logback" % "logback-classic" % "1.1.1" % "test",
29 | "junit" % "junit" % "4.8.1" % "test"
30 | )
31 |
32 |
33 | unmanagedSourceDirectories in Compile <++= baseDirectory { base =>
34 | Seq(
35 | base / "src/main/resources",
36 | base / "src/main/thrift"
37 | )
38 | }
39 |
40 | unmanagedSourceDirectories in Test <++= baseDirectory { base =>
41 | Seq(
42 | base / "src/test/resources"
43 | )
44 | }
45 |
46 | com.twitter.scrooge.ScroogeSBT.newSettings
47 |
48 | /*
49 | scroogeThriftOutputFolder in Compile <<= baseDirectory {
50 | _ / "src/main/scala"
51 | }
52 | */
53 | publishTo <<= version { v: String =>
54 | val nexus = "https://oss.sonatype.org/"
55 | if (v.trim.endsWith("SNAPSHOT"))
56 | Some("snapshots" at nexus + "content/repositories/snapshots")
57 | if (v.trim.endsWith("LOCAL"))
58 | Some(Resolver.file("file", new File(Path.userHome.absolutePath + "/.m2/repository")))
59 | else
60 | Some("releases" at nexus + "service/local/staging/deploy/maven2")
61 | }
62 |
63 | pomExtra := {
64 | http://ckite.io
65 |
66 |
67 | Apache 2
68 | http://www.apache.org/licenses/LICENSE-2.0.txt
69 | repo
70 |
71 |
72 |
73 | scm:git:github.com/pablosmedina/ckite.git
74 | scm:git:git@github.com:pablosmedina/ckite.git
75 | github.com/pablosmedina/ckite.git
76 |
77 |
78 |
79 | pmedina
80 | Pablo S. Medina
81 | https://twitter.com/pablosmedina
82 |
83 |
84 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | ckite {
2 |
3 | # Address and port where this Server listens for RPCs (host:port)
4 | listen-address = "localhost:9091"
5 |
6 | # Bootstraps a new Cluster
7 | bootstrap = false
8 |
9 | # Addresses and ports of Members to be used as seeds when joining an existing Cluster
10 | members = []
11 |
12 | # Directory location where CKite stores its persistent data
13 | datadir = "/tmp"
14 |
15 | # Timeout for waiting a Leader to be elected
16 | leader-timeout = 10s
17 |
18 | # Timeout for committing writes
19 | write-timeout = 2s
20 |
21 | append-entries {
22 | # Time between heartbeat (empty AppendEntries) pulses sent by the Leader
23 | period = 200ms
24 | }
25 |
26 | election {
27 | # Minimum timeout for starting an election when no receiving RPCs from the Leader
28 | min-timeout = 1s
29 |
30 | # Maximum timeout for starting an election when no receiving RPCs from the Leader
31 | max-timeout = 2s
32 |
33 | # Timeout for collecting votes
34 | voting-timeout = 1 s
35 | }
36 |
37 | log {
38 | # Max amount of entries to be flushed in a batch
39 | flush-size = 1000
40 |
41 | # Enable or disable disk sync
42 | sync = true
43 |
44 | # Threshold of LogEntries to start a Compaction
45 | compaction-threshold = 10000
46 | }
47 |
48 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/CKite.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import ckite.rpc.{ ReadCommand, WriteCommand }
4 |
5 | import scala.concurrent.Future
6 |
7 | /**
8 | * A CKite is a member of the cluster. It exchanges messages with its peers to achieve consensus
9 | * on the submitted write and read commands according to the Raft consensus protocol.
10 | */
11 | trait CKite {
12 |
13 | /**
14 | * Starts CKite. It begins the communication with the rest of the cluster.
15 | */
16 | def start(): Unit
17 |
18 | /**
19 | * Stops CKite. It no longer receives or sends messages to the cluster. It can't be started again.
20 | */
21 | def stop(): Unit
22 |
23 | /**
24 | * Consistently replicates and applies a command under Raft consensus rules.
25 | *
26 | * @param writeCommand to be applied
27 | * @tparam T
28 | * @return a Future with the result of applying the Write to the StateMachine
29 | */
30 | def write[T](writeCommand: WriteCommand[T]): Future[T]
31 |
32 | /**
33 | * Consistent read. It is forwarded and answered by the Leader according to Raft consensus rules.
34 | *
35 | * @param readCommand to be forwarded and applied to the Leader StateMachine
36 | * @tparam T
37 | * @return a Future with the result of applying the Read to the StateMachine
38 | */
39 | def read[T](readCommand: ReadCommand[T]): Future[T]
40 |
41 | /**
42 | * Consistently adds a new member to the cluster.
43 | * *
44 | * @param memberId to be added
45 | * @return future with true if the memberId could be added to the cluster and false if not
46 | */
47 | def addMember(memberId: String): Future[Boolean]
48 |
49 | /**
50 | * Consistently removes a new member to the cluster.
51 | * *
52 | * @param memberId to be removed
53 | * @return future with true if the memberId could be removed to the cluster and false if not
54 | */
55 | def removeMember(memberId: String): Future[Boolean]
56 |
57 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/CKiteBuilder.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import ckite.rlog.Storage
4 | import ckite.rpc.Rpc
5 | import ckite.statemachine.StateMachine
6 | import ckite.statemachine.j.StateMachineWrapper
7 | import ckite.storage.MemoryStorage
8 | import com.typesafe.config.ConfigFactory
9 |
10 | class CKiteBuilder {
11 |
12 | private val configuration = new Configuration(ConfigFactory.load())
13 | private var stateMachine: Option[StateMachine] = None
14 | private var rpc: Option[Rpc] = None
15 | private var storage: Storage = MemoryStorage()
16 |
17 | def minElectionTimeout(minElectionTimeout: Int): CKiteBuilder = {
18 | configuration.withMinElectionTimeout(minElectionTimeout)
19 | CKiteBuilder.this
20 | }
21 |
22 | def maxElectionTimeout(maxElectionTimeout: Int): CKiteBuilder = {
23 | configuration.withMaxElectionTimeout(maxElectionTimeout)
24 | CKiteBuilder.this
25 | }
26 |
27 | def heartbeatsPeriod(heartbeatsInterval: Int): CKiteBuilder = {
28 | configuration.withHeartbeatsInterval(heartbeatsInterval)
29 | CKiteBuilder.this
30 | }
31 |
32 | def listenAddress(localBinding: String): CKiteBuilder = {
33 | configuration.withLocalBinding(localBinding)
34 | CKiteBuilder.this
35 | }
36 |
37 | def members(memberBindings: Seq[String]): CKiteBuilder = {
38 | configuration.withMemberBindings(memberBindings)
39 | CKiteBuilder.this
40 | }
41 |
42 | def members(memberBindings: String): CKiteBuilder = {
43 | configuration.withMemberBindings(memberBindings.split(","))
44 | CKiteBuilder.this
45 | }
46 |
47 | def compactionThreshold(threshold: Int): CKiteBuilder = {
48 | configuration.withLogCompactionThreshold(threshold)
49 | CKiteBuilder.this
50 | }
51 |
52 | def stateMachine(stateMachine: StateMachine): CKiteBuilder = {
53 | CKiteBuilder.this.stateMachine = Some(stateMachine)
54 | CKiteBuilder.this
55 | }
56 |
57 | def stateMachine(stateMachine: ckite.statemachine.j.StateMachine): CKiteBuilder = {
58 | CKiteBuilder.this.stateMachine = Some(new StateMachineWrapper(stateMachine))
59 | CKiteBuilder.this
60 | }
61 |
62 | def bootstrap(enabled: Boolean): CKiteBuilder = {
63 | configuration.bootstrap(enabled)
64 | CKiteBuilder.this
65 | }
66 |
67 | def rpc(someRpc: Rpc): CKiteBuilder = {
68 | rpc = Some(someRpc)
69 | CKiteBuilder.this
70 | }
71 |
72 | def storage(someStorage: Storage): CKiteBuilder = {
73 | storage = someStorage
74 | CKiteBuilder.this
75 | }
76 |
77 | private def configuredStateMachine() = {
78 | stateMachine.getOrElse(throw new IllegalStateException("StateMachine required"))
79 | }
80 |
81 | private def configuredRpc() = {
82 | rpc.getOrElse(throw new IllegalStateException("RPC required"))
83 | }
84 |
85 | def build: CKite = {
86 | val stateMachine = configuredStateMachine()
87 | val rpc = configuredRpc()
88 | val raft = Raft(stateMachine, rpc, storage, configuration)
89 | CKiteClient(raft, rpc.createServer(raft, configuration.config), CKiteBuilder.this)
90 | }
91 |
92 | }
93 |
94 | object CKiteBuilder {
95 | def apply() = new CKiteBuilder()
96 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/CKiteClient.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.atomic.AtomicBoolean
4 |
5 | import ckite.rpc.{ ReadCommand, RpcServer, WriteCommand }
6 | import ckite.stats.{ Stats, ConsensusStats }
7 |
8 | import scala.concurrent.ExecutionContext.Implicits.global
9 | import scala.concurrent.Future
10 |
11 | class CKiteClient(raft: Raft, rpcServer: RpcServer, private[ckite] val builder: CKiteBuilder) extends CKite {
12 |
13 | private val stopped = new AtomicBoolean(false)
14 |
15 | def write[T](writeCommand: WriteCommand[T]): Future[T] = raft.onCommandReceived[T](writeCommand)
16 |
17 | def read[T](readCommand: ReadCommand[T]): Future[T] = raft.onCommandReceived[T](readCommand)
18 |
19 | def addMember(memberBinding: String) = raft.onMemberJoinReceived(memberBinding).map(_.success)
20 |
21 | def removeMember(memberBinding: String) = raft.onMemberLeaveReceived(memberBinding)
22 |
23 | def readLocal[T](readCommand: ReadCommand[T]): Future[T] = raft.onLocalReadReceived(readCommand)
24 |
25 | private[ckite] def isLeader: Boolean = raft.isLeader
26 |
27 | private[ckite] def members: Set[String] = raft.membership.members
28 |
29 | private[ckite] def id(): String = raft.membership.myId
30 |
31 | private[ckite] def stats(): Stats = raft.stats()
32 |
33 | def start() = {
34 | rpcServer.start()
35 | raft.start()
36 | }
37 |
38 | def stop() = {
39 | if (!stopped.getAndSet(true)) {
40 | rpcServer.stop()
41 | raft.stop()
42 | }
43 | }
44 |
45 | override val toString = s"CKite($id)"
46 | }
47 |
48 | object CKiteClient {
49 | def apply(raft: Raft, rpcServer: RpcServer, builder: CKiteBuilder) = {
50 | new CKiteClient(raft, rpcServer, builder)
51 | }
52 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/ConfigAware.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import com.typesafe.config.Config
4 |
5 | trait ConfigAware {
6 |
7 | implicit def config: Config
8 | }
9 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/ConfigSupport.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import com.typesafe.config.Config
4 |
5 | trait ConfigSupport {
6 |
7 | implicit val config: Config
8 |
9 | val Id = "ckite.finagle.listen-address"
10 | val Bootstrap = "ckite.bootstrap"
11 |
12 | val MinElectionTimeout = "ckite.election.min-timeout"
13 | val MaxElectionTimeout = "ckite.election.max-timeout"
14 | val VotingTimeout = "ckite.election.voting-timeout"
15 | val ElectionWorkers = "ckite.election.workers"
16 |
17 | val WriteTimeout = "ckite.write-timeout"
18 |
19 | val HeartbeatsPeriod = "ckite.append-entries.period"
20 | val AppendEntriesWorkers = "ckite.append-entries.workers"
21 |
22 | val Members = "ckite.members"
23 | val LeaderTimeout = "ckite.leader-timeout"
24 |
25 | val ListenAddress = "ckite.finagle.listen-address"
26 | val ThriftWorkers = "ckite.finagle.thrift.workers"
27 |
28 | val CompactionThreshold = "ckite.log.compaction-threshold"
29 | val FlushSize = "ckite.log.flush-size"
30 | val Sync = "ckite.log.sync"
31 | val DataDir = "ckite.datadir"
32 |
33 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/Configuration.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import com.typesafe.config.Config
4 | import com.typesafe.config.ConfigValueFactory
5 | import scala.collection.JavaConverters._
6 |
7 | class Configuration(var config: Config) {
8 |
9 | val Bootstrap = "ckite.bootstrap"
10 |
11 | val MinElectionTimeout = "ckite.election.min-timeout"
12 | val MaxElectionTimeout = "ckite.election.max-timeout"
13 | val VotingTimeout = "ckite.election.voting-timeout"
14 | val ElectionWorkers = "ckite.election.workers"
15 |
16 | val WriteTimeout = "ckite.write-timeout"
17 |
18 | val HeartbeatsPeriod = "ckite.append-entries.period"
19 | val AppendEntriesWorkers = "ckite.append-entries.workers"
20 |
21 | val Members = "ckite.members"
22 | val LeaderTimeout = "ckite.leader-timeout"
23 |
24 | val ListenAddress = "ckite.listen-address"
25 |
26 | val CompactionThreshold = "ckite.log.compaction-threshold"
27 | val FlushSize = "ckite.log.flush-size"
28 | val Sync = "ckite.log.sync"
29 | val DataDir = "ckite.datadir"
30 |
31 | def withMinElectionTimeout(minElectionTimeout: Int) = {
32 | config = config.withValue(MinElectionTimeout, ConfigValueFactory.fromAnyRef(minElectionTimeout))
33 | }
34 |
35 | def minElectionTimeout: Long = {
36 | config.getMilliseconds(MinElectionTimeout)
37 | }
38 |
39 | def withMaxElectionTimeout(maxElectionTimeout: Int) = {
40 | config = config.withValue(MaxElectionTimeout, ConfigValueFactory.fromAnyRef(maxElectionTimeout))
41 | }
42 |
43 | def maxElectionTimeout: Long = {
44 | config.getMilliseconds(MaxElectionTimeout)
45 | }
46 |
47 | def withHeartbeatsInterval(heartbeatsInterval: Int) = {
48 | config = config.withValue(HeartbeatsPeriod, ConfigValueFactory.fromAnyRef(heartbeatsInterval))
49 | }
50 |
51 | def heartbeatsInterval: Long = {
52 | config.getMilliseconds(HeartbeatsPeriod)
53 | }
54 |
55 | def withLocalBinding(localBinding: String) = {
56 | config = config.withValue(ListenAddress, ConfigValueFactory.fromAnyRef(localBinding))
57 | }
58 |
59 | def withDataDir(dataDir: String) = {
60 | config = config.withValue(DataDir, ConfigValueFactory.fromAnyRef(dataDir))
61 | }
62 |
63 | def dataDir: String = {
64 | config.getString(DataDir)
65 | }
66 |
67 | def localBinding: String = {
68 | config.getString(ListenAddress)
69 | }
70 |
71 | def withMemberBindings(membersBindings: Seq[String]) = {
72 | config = config.withValue(Members, ConfigValueFactory.fromIterable(membersBindings.asJava))
73 | }
74 |
75 | def withLogCompactionThreshold(threshold: Int) = {
76 | config = config.withValue(CompactionThreshold, ConfigValueFactory.fromAnyRef(threshold))
77 | }
78 |
79 | def withFlushSize(flushSize: Long) = {
80 | config = config.withValue(FlushSize, ConfigValueFactory.fromAnyRef(flushSize))
81 | }
82 |
83 | def withSyncEnabled(syncEnabled: Boolean) = {
84 | config = config.withValue(Sync, ConfigValueFactory.fromAnyRef(syncEnabled))
85 | }
86 |
87 | def withWaitForLeaderTimeout(waitForLeaderTimeout: Int) = {
88 | config = config.withValue(LeaderTimeout, ConfigValueFactory.fromAnyRef(waitForLeaderTimeout))
89 | }
90 |
91 | def withCollectVotesTimeout(collectVotesTimeout: Int) = {
92 | config = config.withValue(VotingTimeout, ConfigValueFactory.fromAnyRef(collectVotesTimeout))
93 | }
94 |
95 | def waitForLeaderTimeout: Long = {
96 | config.getMilliseconds(LeaderTimeout)
97 | }
98 |
99 | def memberBindings: Set[String] = {
100 | config.getStringList(Members).asScala.toSet
101 | }
102 |
103 | def bootstrap: Boolean = {
104 | config.getBoolean(Bootstrap)
105 | }
106 |
107 | def bootstrap(enabled: Boolean) = {
108 | config = config.withValue(Bootstrap, ConfigValueFactory.fromAnyRef(enabled))
109 | }
110 |
111 | def collectVotesTimeout: Long = {
112 | config.getMilliseconds(VotingTimeout)
113 | }
114 |
115 | def logCompactionThreshold: Long = {
116 | config.getLong(CompactionThreshold)
117 | }
118 |
119 | def appendEntriesTimeout: Long = {
120 | config.getMilliseconds(WriteTimeout)
121 | }
122 |
123 | def appendEntriesWorkers: Int = {
124 | config.getInt(AppendEntriesWorkers)
125 | }
126 |
127 | def electionWorkers: Int = {
128 | config.getInt(ElectionWorkers)
129 | }
130 |
131 | def syncEnabled: Boolean = config.getBoolean(Sync)
132 |
133 | def flushSize: Long = config.getLong(FlushSize)
134 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/Consensus.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 |
5 | import ckite.rlog.{ Storage, Vote }
6 | import ckite.rpc.LogEntry._
7 | import ckite.rpc._
8 | import ckite.states._
9 | import ckite.stats.ConsensusStats
10 | import ckite.util.Logging
11 |
12 | import scala.concurrent.duration._
13 | import scala.concurrent.{ Await, Future }
14 | import scala.util.control.Breaks._
15 | import scala.concurrent.ExecutionContext.Implicits.global
16 |
17 | case class Consensus(raft: Raft, storage: Storage, configuration: Configuration) extends Logging {
18 |
19 | private val currentState = new AtomicReference[State](Starter)
20 | private val ZERO_TERM = 0
21 |
22 | def membership = raft.membership
23 |
24 | private def log = raft.log
25 |
26 | private def state = currentState.get()
27 |
28 | def startAsBootstrapper() = {
29 | becomeFollower(ZERO_TERM)
30 | }
31 |
32 | def startAsFollower() = {
33 | storage.retrieveLatestVote() match {
34 | case Some(Vote(term, member)) ⇒ becomeFollower(term = term, vote = Option(member))
35 | case None ⇒ becomeFollower(ZERO_TERM)
36 | }
37 | }
38 |
39 | def startAsJoiner() = {
40 | //no configuration. will try to join an existing cluster
41 | logger.info("Start as Joiner. Using seeds: {}", configuration.memberBindings)
42 |
43 | becomeJoiner(ZERO_TERM) //don't start elections
44 |
45 | breakable {
46 | for (remoteMemberBinding ← configuration.memberBindings) {
47 | logger.info("Try to join with {}", remoteMemberBinding)
48 | val remoteMember = membership.get(remoteMemberBinding).get
49 | val response = Await.result(remoteMember.join(membership.myId), 3 seconds) //TODO: Refactor me
50 | if (response.success) {
51 | logger.info("Join successful")
52 |
53 | //becomeFollower(ZERO_TERM)
54 |
55 | break
56 | }
57 | }
58 | //TODO: Implement retries/shutdown here
59 | }
60 | }
61 |
62 | def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = state.onAppendEntries(appendEntries)
63 |
64 | def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = state.onRequestVote(requestVote)
65 |
66 | def onJointConfigurationCommitted(index: Index, jointConfiguration: JointConfiguration) = {
67 | if (membership.isCurrent(index)) { //TODO: move to Leader
68 | state.onJointConfigurationCommitted(jointConfiguration)
69 | }
70 | true
71 | }
72 |
73 | def onNewConfigurationCommitted(index: Index, configuration: NewConfiguration): Boolean = {
74 | true
75 | }
76 |
77 | def onCommand[T](command: Command): Future[T] = state.onCommand[T](command)
78 |
79 | def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
80 | state.onInstallSnapshot(installSnapshot)
81 | }
82 |
83 | def onMemberJoin(member: String): Future[JoinMemberResponse] = {
84 | if (!membership.contains(member)) {
85 | onCommand[Boolean](JointConfiguration(membership.members, membership.members + member)).map(JoinMemberResponse(_))
86 | } else {
87 | logger.info(s"$member is already part of the cluster")
88 | Future.successful(JoinMemberResponse(true))
89 | }
90 | }
91 |
92 | def onMemberLeave(member: String): Future[Boolean] = {
93 | if (membership.contains(member)) {
94 | onCommand(JointConfiguration(membership.members, membership.members - member))
95 | } else {
96 | Future.successful(true)
97 | }
98 | }
99 |
100 | def becomeLeader(term: Term) = {
101 | become(Leader(this, membership, log, term, leaderAnnouncer))
102 | }
103 |
104 | def becomeCandidate(term: Term) = {
105 | become(Candidate(this, membership, log, term, leaderAnnouncer.onElection))
106 | }
107 |
108 | def becomeFollower(term: Term, leaderAnnouncer: LeaderAnnouncer = LeaderAnnouncer(membership, configuration), vote: Option[String] = None) = {
109 | become(Follower(this, membership, log, term, leaderAnnouncer, vote))
110 | }
111 |
112 | def becomeJoiner(term: Term): Unit = {
113 | become(Joiner(this, membership, log, term, configuration))
114 | }
115 |
116 | def isLeader = {
117 | state.isLeader
118 | }
119 |
120 | def becomeStarter = changeState(Starter, Starter)
121 |
122 | def leaderAnnouncer = state.leaderAnnouncer
123 |
124 | private def become(newState: State) = {
125 | logger.trace("Trying to become {}", newState)
126 | var current = state
127 | //stops when current == newState or current.term < newState.term
128 | while (current.canTransitionTo(newState)) {
129 | if (changeState(current, newState)) {
130 | logger.debug(s"Transition from $current to $newState")
131 | persistState()
132 | current.stop(newState.term)
133 | newState.begin()
134 | }
135 | current = state
136 | }
137 | logger.trace("State is {}", current)
138 | }
139 |
140 | def persistState() = {
141 | val st = state
142 | if (st != Stopped) {
143 | storage.saveVote(Vote(st.term, st.votedFor.get().getOrElse("")))
144 | }
145 | }
146 |
147 | private def changeState(current: State, newState: State) = currentState.compareAndSet(current, newState)
148 |
149 | def term(): Term = state.term
150 |
151 | def stop(): Unit = {
152 | become(Stopped)
153 | }
154 |
155 | def stats(): ConsensusStats = ConsensusStats(term, currentState.toString, currentState.get().stats())
156 |
157 | }
158 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/LeaderAnnouncer.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.TimeoutException
4 |
5 | import ckite.exception.LeaderTimeoutException
6 | import ckite.util.Logging
7 |
8 | import scala.concurrent.ExecutionContext.Implicits.global
9 | import scala.concurrent.duration._
10 | import scala.concurrent.{ Await, Future, Promise }
11 |
12 | case class LeaderAnnouncer(membership: Membership, configuration: Configuration) extends Logging {
13 |
14 | private val waitForLeaderTimeout = configuration.waitForLeaderTimeout millis
15 | private val promise = Promise[Member]()
16 |
17 | def announce(leaderId: String) = {
18 | val leader: Member = if (membership.myId == leaderId) membership.localMember else membership.get(leaderId).getOrElse(unknownAnnouncedLeader(leaderId))
19 | promise.trySuccess(leader)
20 | }
21 |
22 | def onElection = {
23 | if (isLeaderAnnounced) LeaderAnnouncer(membership, configuration) else this
24 | }
25 |
26 | def onStepDown = {
27 | if (isLeaderAnnounced) LeaderAnnouncer(membership, configuration) else this
28 | }
29 |
30 | def onLeader[T](block: Member ⇒ Future[T]): Future[T] = {
31 | leader().flatMap(block(_))
32 | }
33 |
34 | def awaitLeader: Member = {
35 | try {
36 | if (!promise.isCompleted) {
37 | logger.info("Waiting for a Leader to be announced...")
38 | }
39 | Await.result(promise.future, waitForLeaderTimeout)
40 | } catch {
41 | case e: TimeoutException ⇒ {
42 | logger.warn("Wait for Leader in {} timed out after {}", waitForLeaderTimeout)
43 | throw new LeaderTimeoutException(e)
44 | }
45 | }
46 | }
47 |
48 | def isLeaderAnnounced = promise.isCompleted
49 |
50 | private def leader(): Future[Member] = {
51 | promise.future
52 | }
53 |
54 | private def unknownAnnouncedLeader(leaderId: String) = {
55 | logger.info(s"Unknown Leader $leaderId")
56 | throw new RuntimeException("Announced Leader member is unknown")
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/LocalMember.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import ckite.rpc.Command
4 |
5 | import scala.concurrent.Future
6 |
7 | case class LocalMember(raft: Raft, configuration: Configuration) extends Member(configuration.localBinding) {
8 |
9 | private def consensus = raft.consensus
10 |
11 | override def forwardCommand[T](command: Command): Future[T] = {
12 | consensus.onCommand(command)
13 | }
14 |
15 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/Member.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import ckite.rpc.Command
4 | import ckite.util.Logging
5 |
6 | import scala.concurrent.Future
7 |
8 | abstract class Member(binding: String) extends Logging {
9 |
10 | def id() = s"$binding"
11 |
12 | def forwardCommand[T](command: Command): Future[T]
13 |
14 | override def toString() = id
15 |
16 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/Membership.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 |
5 | import ckite.rpc.LogEntry.Index
6 | import ckite.rpc.{ Rpc, ClusterConfigurationCommand, JointConfiguration, NewConfiguration }
7 | import ckite.util.Logging
8 |
9 | trait ClusterConfiguration {
10 | /** The index in the Log where this ClusterConfiguration is located */
11 | def index: Index
12 |
13 | /** All the members included in this ClusterConfiguration. This can includes both new and old members */
14 | def members: Set[String]
15 |
16 | /** Checks if the given members forms a quorum in this ClusterConfiguration. */
17 | def reachQuorum(someMembers: Set[String]): Boolean
18 |
19 | /** Checks if the given members forms SOME quorum. Useful in the case of JointClusterConfiguration */
20 | def reachSomeQuorum(someMembers: Set[String]): Boolean
21 | }
22 |
23 | case class SingleClusterConfiguration(members: Set[String], index: Index = -1) extends ClusterConfiguration {
24 | private val quorum = (members.size / 2) + 1
25 |
26 | def reachQuorum(someMembers: Set[String]) = someMembers.intersect(members).size >= quorum
27 |
28 | def reachSomeQuorum(someMembers: Set[String]) = reachQuorum(someMembers)
29 | }
30 |
31 | case class JointClusterConfiguration(cold: SingleClusterConfiguration, cnew: SingleClusterConfiguration, index: Index) extends ClusterConfiguration {
32 | val members = cold.members ++ cnew.members
33 |
34 | def reachQuorum(someMembers: Set[String]) = cold.reachQuorum(someMembers) && cnew.reachQuorum(someMembers)
35 |
36 | def reachSomeQuorum(someMembers: Set[String]) = cold.reachQuorum(someMembers) || cnew.reachQuorum(someMembers)
37 |
38 | override def toString = s"JointClusterConfiguration(cold=${cold.members}, cnew=${cnew.members}, index= $index)"
39 | }
40 |
41 | object JointClusterConfiguration {
42 | implicit def fromMembersSetToSimpleClusterConfiguration(members: Set[String]): SingleClusterConfiguration = {
43 | SingleClusterConfiguration(members)
44 | }
45 | }
46 |
47 | object EmptyClusterConfiguration extends SingleClusterConfiguration(Set())
48 |
49 | case class Membership(localMember: LocalMember, rpc: Rpc, configuration: Configuration) extends Logging {
50 |
51 | import ckite.JointClusterConfiguration._
52 |
53 | private val currentClusterConfiguration = new AtomicReference[ClusterConfiguration](EmptyClusterConfiguration)
54 | private val currentKnownMembers = new AtomicReference[Map[String, RemoteMember]](Map())
55 |
56 | register(configuration.memberBindings)
57 |
58 | def clusterConfiguration = currentClusterConfiguration.get()
59 |
60 | private def knownMembers = currentKnownMembers.get()
61 |
62 | def members = clusterConfiguration.members
63 | def remoteMembers = (clusterConfiguration.members - localMember.id()).map(member ⇒ knownMembers(member))
64 | def hasRemoteMembers = !remoteMembers.isEmpty
65 |
66 | def reachQuorum(someMembers: Set[String]) = clusterConfiguration.reachQuorum(someMembers)
67 |
68 | def reachSomeQuorum(someMembers: Set[String]) = clusterConfiguration.reachSomeQuorum(someMembers)
69 |
70 | def get(member: String): Option[RemoteMember] = {
71 | knownMembers.get(member).orElse {
72 | register(Set(member))
73 | knownMembers.get(member)
74 | }
75 | }
76 |
77 | def changeConfiguration(index: Index, clusterConfiguration: ClusterConfigurationCommand) = {
78 | if (happensBefore(index)) {
79 | clusterConfiguration match {
80 | case JointConfiguration(oldMembers, newMembers) ⇒ {
81 | //JointConfiguration received. Switch membership to JointClusterConfiguration
82 | transitionTo(JointClusterConfiguration(oldMembers, newMembers, index))
83 | }
84 | case NewConfiguration(members) ⇒ {
85 | //NewConfiguration received. A new membership has been set. Switch to SimpleClusterConfiguration or shutdown If no longer part of the cluster.
86 | transitionTo(SingleClusterConfiguration(members, index))
87 | }
88 | }
89 | }
90 | }
91 |
92 | def transitionTo(newClusterConfiguration: ClusterConfiguration) = {
93 | val newMembers = newClusterConfiguration.members.filterNot(member ⇒ knownMembers.contains(member) || member == myId)
94 |
95 | register(newMembers)
96 | currentClusterConfiguration.set(newClusterConfiguration)
97 | logger.info("Cluster Configuration changed to {}", clusterConfiguration)
98 | }
99 |
100 | def register(newMembers: Set[String]) {
101 | currentKnownMembers.set(knownMembers ++ newMembers.map(id ⇒ (id, createRemoteMember(id))))
102 | }
103 |
104 | def happensBefore(index: Index) = clusterConfiguration.index < index
105 |
106 | def isCurrent(index: Index) = index == clusterConfiguration.index
107 |
108 | def contains(member: String) = members.contains(member)
109 |
110 | def myId = localMember.id()
111 |
112 | def bootstrap() = {
113 | //validate empty log and no snapshot
114 | transitionTo(SingleClusterConfiguration(Set(myId), 1))
115 | }
116 |
117 | def createRemoteMember(id: String): RemoteMember = new RemoteMember(rpc, id)
118 |
119 | def isInitialized = clusterConfiguration != EmptyClusterConfiguration
120 | }
121 |
122 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/RLog.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent._
4 | import java.util.concurrent.atomic.AtomicLong
5 |
6 | import ckite.rlog._
7 | import ckite.rpc.LogEntry.{ Index, Term }
8 | import ckite.rpc._
9 | import ckite.statemachine.{ CommandExecutor, StateMachine }
10 | import ckite.stats.LogStats
11 | import ckite.util.CKiteConversions._
12 | import ckite.util.{ CustomThreadFactory, Logging }
13 |
14 | import scala.Option.option2Iterable
15 | import scala.collection.immutable.NumericRange
16 | import scala.concurrent.ExecutionContext.Implicits.global
17 | import scala.concurrent.{ Future, Promise }
18 |
19 | case class RLog(raft: Raft, stateMachine: StateMachine, storage: Storage, configuration: Configuration) extends Logging {
20 |
21 | val log = storage.log()
22 |
23 | private def consensus = raft.consensus
24 |
25 | private def membership = raft.membership
26 |
27 | private val _lastIndex = new AtomicLong(0)
28 |
29 | private val snapshotManager = SnapshotManager(membership, this, storage, configuration)
30 |
31 | private val logWorker = new ThreadPoolExecutor(0, 1,
32 | 10L, TimeUnit.SECONDS, new SynchronousQueue[Runnable](), CustomThreadFactory("Log-worker"))
33 |
34 | private val appendsQueue = new LinkedBlockingQueue[Append[_]]()
35 |
36 | private val commandExecutor = new CommandExecutor(stateMachine)
37 |
38 | private val messageQueue = new LinkedBlockingQueue[Message]()
39 |
40 | @volatile
41 | var commitIndex: Long = 0
42 | @volatile
43 | var lastApplied: Long = stateMachine.getLastAppliedIndex
44 |
45 | private val applyPromises = new ConcurrentHashMap[Long, Promise[_]]()
46 |
47 | def bootstrap() = {
48 | assertEmptyLog()
49 | assertNoSnapshot()
50 | }
51 |
52 | //Leader append path
53 | def append[T](term: Term, write: WriteCommand[T]): Future[(LogEntry, Promise[T])] = {
54 | append(LeaderAppend[T](term, write))
55 | }
56 |
57 | //Follower append path
58 | def tryAppend(appendEntries: AppendEntries): Future[Boolean] = {
59 | logger.trace("Try appending {}", appendEntries)
60 | val canAppend = hasPreviousLogEntry(appendEntries)
61 | if (canAppend) {
62 | appendAll(appendEntries.entries) map { _ ⇒
63 | commit(appendEntries.commitIndex)
64 | canAppend
65 | }
66 | } else {
67 | logger.trace("Rejecting {}", appendEntries)
68 | Future.successful(canAppend)
69 | }
70 | }
71 |
72 | private def hasPreviousLogEntry(appendEntries: AppendEntries) = {
73 | containsEntry(appendEntries.prevLogIndex, appendEntries.prevLogTerm)
74 | }
75 |
76 | //Follower appends all these entries and waits for them to be flushed to the persistentLog
77 | private def appendAll(entries: List[LogEntry]): Future[List[Index]] = {
78 | val appendPromises = entries.map { entry ⇒
79 | logger.trace(s"Try appending $entry")
80 | Some(append(FollowerAppend(entry)))
81 | }
82 | composeFutures(appendPromises)
83 | }
84 |
85 | private def composeFutures(appendPromises: List[Option[Future[Index]]]) = {
86 | val futures = for {
87 | append ← appendPromises
88 | future ← append
89 | } yield future
90 | Future.sequence(futures)
91 | }
92 |
93 | private def hasIndex(index: Long) = log.getLastIndex >= index
94 |
95 | def commit(index: Long) = {
96 | if (lastApplied < index) {
97 | messageQueue.offer(WriteCommitMessage(index))
98 | }
99 | }
100 |
101 | def execute[T](command: ReadCommand[T]) = applyRead(command)
102 |
103 | def entry(index: Long, allowCompactedEntry: Boolean = false): Option[LogEntry] = {
104 | val entry = log.getEntry(index)
105 | if (entry != null) Some(entry)
106 | else if (allowCompactedEntry && snapshotManager.isInSnapshot(index)) Some(snapshotManager.compactedEntry)
107 | else None
108 | }
109 |
110 | def lastEntry: Option[LogEntry] = {
111 | val lastLogIndex = findLastLogIndex()
112 | if (snapshotManager.isInSnapshot(lastLogIndex)) {
113 | Some(snapshotManager.compactedEntry)
114 | } else {
115 | entry(lastLogIndex)
116 | }
117 | }
118 |
119 | def getPreviousLogEntry(logEntry: LogEntry): Option[LogEntry] = entry(logEntry.index - 1, true)
120 |
121 | private def containsEntry(index: Long, term: Int) = {
122 | val logEntryOption = entry(index)
123 | if (logEntryOption.isDefined) logEntryOption.get.term == term else (isZeroEntry(index, term) || snapshotManager.isInSnapshot(index, term))
124 | }
125 |
126 | private def isZeroEntry(index: Long, term: Int): Boolean = index == -1 && term == -1
127 |
128 | def resetLastIndex() = _lastIndex.set(findLastLogIndex())
129 |
130 | private def findLastLogIndex(): Long = {
131 | val lastIndex = log.getLastIndex
132 | if (lastIndex > 0) lastIndex else snapshotManager.latestSnapshotIndex
133 | }
134 |
135 | private def nextLogIndex() = _lastIndex.incrementAndGet()
136 |
137 | def size() = log.size
138 |
139 | def stop() = {
140 | logWorker.shutdownNow()
141 | logWorker.awaitTermination(10, TimeUnit.SECONDS)
142 | log.close()
143 | }
144 |
145 | def serializeStateMachine = stateMachine.takeSnapshot()
146 |
147 | private def assertEmptyLog() = {
148 | if (log.size > 0) throw new IllegalStateException("Log is not empty")
149 | }
150 |
151 | private def assertNoSnapshot() = {
152 | if (snapshotManager.latestSnapshotIndex > 0) throw new IllegalStateException("A Snapshot was found")
153 | }
154 |
155 | def initialize() = {
156 | logger.info("Initializing RLog...")
157 | restoreLatestClusterConfiguration()
158 | replay()
159 | startLogWorker()
160 | logger.info("Done initializing RLog")
161 | }
162 |
163 | private def replay(): Unit = {
164 | lastApplied = reloadSnapshot()
165 | val from = lastApplied + 1
166 | val to = commitIndex
167 | if (from <= to) replay(from, to)
168 | else logger.info("No entry to replay. commitIndex is #{}", commitIndex)
169 | }
170 |
171 | private def reloadSnapshot(): Long = {
172 | val latestSnapshot = snapshotManager.latestSnapshot()
173 | val lastAppliedIndex: Long = latestSnapshot map { snapshot ⇒
174 | logger.info("Found a {}", snapshot)
175 | if (snapshot.index > lastApplied) {
176 | logger.info("The Snapshot has more recent data than the StateMachine. Will reload it...")
177 | snapshotManager.reload(snapshot)
178 | snapshot.index
179 | } else {
180 | logger.info("The StateMachine has more recent data than the Snapshot")
181 | membership.transitionTo(snapshot.clusterConfiguration)
182 | lastApplied
183 | }
184 | } getOrElse {
185 | logger.info("No Snapshot was found")
186 | 0
187 | }
188 | lastAppliedIndex
189 | }
190 |
191 | def installSnapshot(snapshot: Snapshot) = {
192 | val promise = Promise[Unit]()
193 | messageQueue.offer(InstallSnapshotMessage(promise, snapshot))
194 | promise.future
195 | }
196 |
197 | def isInSnapshot(index: Index) = snapshotManager.isInSnapshot(index)
198 |
199 | def latestSnapshot() = snapshotManager.latestSnapshot()
200 |
201 | private def restoreLatestClusterConfiguration() = {
202 | val latestClusterConfigurationEntry = findLatestClusterConfiguration()
203 | latestClusterConfigurationEntry foreach { entry ⇒
204 | logger.info("Found cluster configuration in the log: {}", entry.command)
205 | consensus.membership.changeConfiguration(entry.index, entry.command.asInstanceOf[ClusterConfigurationCommand])
206 | }
207 | }
208 |
209 | private def findLatestClusterConfiguration(): Option[LogEntry] = {
210 | traversingInReversal find { index ⇒
211 | val logEntry = entry(index)
212 | if (!logEntry.isDefined) return None
213 | logEntry.collect { case LogEntry(term, entry, c: ClusterConfigurationCommand) ⇒ true }.getOrElse(false)
214 | } map { index ⇒ entry(index) } flatten
215 | }
216 |
217 | def traversingInReversal: NumericRange[Long] = {
218 | findLastLogIndex to 1 by -1
219 | }
220 |
221 | def rollLog(index: Long) = {
222 | log.rollLog(index)
223 | }
224 |
225 | def lastIndex(): Index = _lastIndex.longValue()
226 |
227 | def isEmpty: Boolean = lastIndex().equals(0L)
228 |
229 | private def startLogWorker() = logWorker.execute(new Runnable {
230 | override def run(): Unit = runLogWorker()
231 | })
232 |
233 | private def append[T](append: Append[T]): Future[T] = {
234 | logger.trace(s"Append $append")
235 | val promise: Promise[T] = append.promise
236 | messageQueue.offer(AppendMessage(append))
237 | promise.future
238 | }
239 |
240 | private def runLogWorker() = {
241 | logger.info(s"Starting Log from index #{}", lastApplied)
242 | try {
243 | while (true) nextMessage()
244 | } catch {
245 | case e: InterruptedException ⇒ logger.info("Shutdown LogWorker...")
246 | }
247 | }
248 |
249 | private def applyLogCompactionPolicy() = snapshotManager.applyLogCompactionPolicy()
250 |
251 | private def onLogEntryAppended(append: Append[_])(entry: LogEntry) = {
252 | entry.command match {
253 | case configuration: ClusterConfigurationCommand ⇒ membership.changeConfiguration(entry.index, configuration)
254 | case _ ⇒ ;
255 | }
256 | append.onComplete(entry)
257 | }
258 |
259 | private def next: Append[_] = {
260 | if (appendsQueue.isEmpty()) {
261 | appendsQueue.take()
262 | } else {
263 | appendsQueue.poll()
264 | }
265 | }
266 |
267 | private def replay(from: Long, to: Long): Unit = {
268 | logger.debug("Start log replay from index #{} to #{}", from, to)
269 | entry(to).foreach {
270 | entry ⇒
271 | applyUntil(entry)
272 | }
273 | logger.debug("Finished log replay")
274 | }
275 |
276 | private def isFromCurrentTerm(entryOption: Option[LogEntry]) = {
277 | entryOption.exists(entry ⇒ entry.term.equals(consensus.term))
278 | }
279 |
280 | private def applyUntil(entry: LogEntry) = {
281 | (lastApplied + 1) to entry.index foreach { index ⇒
282 | entryToApply(index, entry).map { entry ⇒
283 | updateCommitIndex(index)
284 | logger.debug("Will apply committed entry {}", entry)
285 | val result = execute(entry.index, entry.command)
286 | updateLastAppliedIndex(index)
287 | notifyResult(index, result)
288 | }.orElse {
289 | logger.error(s"Missing index #$index")
290 | None
291 | }
292 | }
293 | }
294 |
295 | private def updateCommitIndex(index: Long) = {
296 | commitIndex = index
297 | logger.debug("New commitIndex is #{}", index)
298 | }
299 |
300 | private def updateLastAppliedIndex(index: Long) = {
301 | lastApplied = index //TODO: What do we assume about the StateMachine persistence?
302 | logger.debug("Last applied index is #{}", index)
303 | }
304 |
305 | private def entryToApply(index: Long, logEntry: LogEntry) = {
306 | if (index == logEntry.index) Some(logEntry) else entry(index)
307 | }
308 |
309 | private def notifyResult(index: Long, result: Any) = {
310 | val applyPromise = applyPromises.get(index).asInstanceOf[Promise[Any]]
311 | if (applyPromise != null) {
312 | applyPromise.success(result)
313 | applyPromises.remove(index)
314 | }
315 | }
316 |
317 | private def execute(index: Long, command: Command): Any = {
318 | command match {
319 | case jointConfiguration: JointConfiguration ⇒ consensus.onJointConfigurationCommitted(index, jointConfiguration)
320 | case newConfiguration: NewConfiguration ⇒ consensus.onNewConfigurationCommitted(index, newConfiguration)
321 | case NoOp() ⇒ true
322 | case write: WriteCommand[_] ⇒ executeInStateMachine(index, write)
323 | }
324 | }
325 |
326 | def executeInStateMachine(index: Long, write: WriteCommand[_]): Any = {
327 | logger.debug("Executing write {}", write)
328 | commandExecutor.applyWrite(index, write)
329 | }
330 |
331 | def applyRead[T](read: ReadCommand[T]) = {
332 | val promise = Promise[T]()
333 | messageQueue.offer(ReadApplyMessage(promise, read))
334 | promise.future
335 | }
336 |
337 | private def nextMessage = {
338 | if (messageQueue.isEmpty) {
339 | messageQueue.take()
340 | } else {
341 | messageQueue.poll
342 | }
343 | }
344 |
345 | def stats(): LogStats = LogStats(size(), commitIndex, lastEntry)
346 |
347 | trait Message {
348 | def apply()
349 | }
350 |
351 | case class WriteCommitMessage(index: Long) extends Message {
352 | def apply() = {
353 | if (lastApplied < index) {
354 | val logEntry = entry(index)
355 | if (isFromCurrentTerm(logEntry)) {
356 | applyUntil(logEntry.get)
357 | }
358 | }
359 | }
360 | }
361 |
362 | case class ReadApplyMessage[T](promise: Promise[T], read: ReadCommand[T]) extends Message {
363 | def apply() = promise.trySuccess(commandExecutor.applyRead(read))
364 | }
365 |
366 | case class InstallSnapshotMessage(promise: Promise[Unit], snapshot: Snapshot) extends Message {
367 | def apply() = snapshotManager.installSnapshot(snapshot)
368 | }
369 |
370 | case class AppendMessage[T](append: Append[T]) extends Message {
371 | def apply() = {
372 | val logEntry = append.logEntry
373 |
374 | logger.debug(s"Appending $logEntry")
375 |
376 | if (!containsEntry(logEntry.index, logEntry.term)) {
377 | if (hasIndex(logEntry.index)) {
378 | //If an entry is overridden then all the subsequent entries must be removed
379 | logger.debug("Will discard inconsistent entries starting from index #{} to follow Leader's log", logEntry.index)
380 | log.discardEntriesFrom(logEntry.index)
381 | }
382 | log.append(logEntry).map { _ ⇒
383 | onLogEntryAppended(append)(logEntry)
384 | }
385 | applyLogCompactionPolicy()
386 | } else {
387 | logger.debug("Discarding append of a duplicate entry {}", logEntry)
388 | }
389 | }
390 | }
391 |
392 | trait Append[T] {
393 | def promise: Promise[T]
394 |
395 | def logEntry: LogEntry
396 |
397 | def onComplete(logEntry: LogEntry)
398 | }
399 |
400 | case class LeaderAppend[T](term: Int, write: WriteCommand[T]) extends Append[(LogEntry, Promise[T])] {
401 | val _promise = Promise[(LogEntry, Promise[T])]()
402 | val _valuePromise = Promise[T]()
403 |
404 | def promise = _promise
405 |
406 | val logEntry = {
407 | val logEntry = LogEntry(term, nextLogIndex, write)
408 | applyPromises.put(logEntry.index, _valuePromise)
409 | logEntry
410 | }
411 |
412 | def onComplete(logEntry: LogEntry) = _promise.success((logEntry, _valuePromise))
413 | }
414 |
415 | case class FollowerAppend(entry: LogEntry) extends Append[Long] {
416 | val _promise = Promise[Long]()
417 |
418 | def promise = _promise
419 |
420 | def logEntry = entry
421 |
422 | def onComplete(logEntry: LogEntry) = _promise.success(logEntry.index)
423 | }
424 |
425 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/Raft.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import ckite.rlog.Storage
4 | import ckite.rpc._
5 | import ckite.statemachine.StateMachine
6 | import ckite.stats.{ ConsensusStats, Stats }
7 | import ckite.util.{ ConcurrencySupport, Logging }
8 |
9 | import scala.concurrent.Future
10 |
11 | class Raft(stateMachine: StateMachine, rpc: Rpc, storage: Storage, configuration: Configuration) extends RpcService with ConcurrencySupport with Logging {
12 |
13 | val consensus = Consensus(this, storage, configuration)
14 | val membership = Membership(LocalMember(this, configuration), rpc, configuration)
15 | val log = RLog(this, stateMachine, storage, configuration)
16 |
17 | def start() = {
18 | logger.info(s"Starting CKite ${membership.myId}...")
19 | initializeLog()
20 | if (configuration.bootstrap) {
21 | bootstrapStart()
22 | } else if (!isInitialized) {
23 | joinStart()
24 | } else {
25 | normalStart()
26 | }
27 | }
28 |
29 | def initializeLog() = log.initialize()
30 |
31 | def joinStart() = {
32 | logger.info("CKite not initialized. Join start")
33 | consensus.startAsJoiner()
34 | }
35 |
36 | private def normalStart() = {
37 | logger.info("CKite already initialized. Simple start")
38 | consensus.startAsFollower()
39 | }
40 |
41 | private def bootstrapStart() = {
42 | logger.info("Bootstrapping a new CKite consensus cluster...")
43 |
44 | membership.bootstrap()
45 | log.bootstrap()
46 |
47 | consensus.startAsBootstrapper()
48 |
49 | consensus.leaderAnnouncer.awaitLeader
50 | }
51 |
52 | private def isInitialized = membership.isInitialized
53 |
54 | def stop() = {
55 | logger.info(s"Stopping CKite ${membership.myId}...")
56 | consensus.stop()
57 | log.stop()
58 | }
59 |
60 | def onRequestVoteReceived(requestVote: RequestVote): Future[RequestVoteResponse] = {
61 | logger.debug("RequestVote received: {}", requestVote)
62 | consensus.onRequestVote(requestVote)
63 | }
64 |
65 | def onAppendEntriesReceived(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
66 | logger.trace(s"Received $appendEntries")
67 | consensus.onAppendEntries(appendEntries)
68 | }
69 |
70 | def onCommandReceived[T](command: Command): Future[T] = {
71 | logger.debug("Command received: {}", command)
72 | consensus.onCommand[T](command)
73 | }
74 |
75 | def onInstallSnapshotReceived(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
76 | logger.debug("InstallSnapshot received")
77 | consensus.onInstallSnapshot(installSnapshot)
78 | }
79 |
80 | def onLocalReadReceived[T](read: ReadCommand[T]) = {
81 | log.execute(read)
82 | }
83 |
84 | def onMemberJoinReceived(member: String): Future[JoinMemberResponse] = {
85 | logger.info(s"Join member $member request received")
86 | consensus.onMemberJoin(member)
87 | }
88 |
89 | def onMemberLeaveReceived(member: String): Future[Boolean] = {
90 | logger.info(s"Leave member $member request received")
91 | consensus.onMemberLeave(member)
92 | }
93 |
94 | def isLeader = {
95 | consensus.isLeader
96 | }
97 |
98 | def stats(): Stats = Stats(consensus.stats(), log.stats())
99 |
100 | }
101 |
102 | object Raft {
103 | def apply(stateMachine: StateMachine, rpc: Rpc, storage: Storage, configuration: Configuration) = {
104 | new Raft(stateMachine, rpc, storage, configuration)
105 | }
106 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/RemoteMember.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.ConcurrentHashMap
4 | import java.util.concurrent.atomic.{ AtomicBoolean, AtomicLong }
5 |
6 | import ckite.rpc.LogEntry.Index
7 | import ckite.rpc._
8 |
9 | import scala.concurrent.ExecutionContext.Implicits.global
10 | import scala.concurrent.Future
11 |
12 | class RemoteMember(rpc: Rpc, binding: String) extends Member(binding) {
13 |
14 | logger.debug(s"Creating RemoteMember client for $binding")
15 |
16 | val nextLogIndex = new AtomicLong(1)
17 | val matchIndex = new AtomicLong(0)
18 |
19 | private val client: RpcClient = rpc.createClient(id)
20 |
21 | private val replicationsEnabled = new AtomicBoolean(true)
22 | private val replicationsInProgress = new ConcurrentHashMap[Long, Boolean]()
23 |
24 | override def forwardCommand[T](command: Command): Future[T] = {
25 | logger.debug(s"Forward command ${command} to ${id}")
26 | client.send[T](command)
27 | }
28 |
29 | def sendAppendEntries(appendEntries: AppendEntries) = {
30 | client.send(appendEntries)
31 | }
32 |
33 | def sendRequestVote(requestVote: RequestVote) = {
34 | client.send(requestVote)
35 | }
36 |
37 | private def markAsReplicated(index: Index): Unit = replicationsInProgress.remove(index)
38 |
39 | def canReplicateIndex(index: Index): Boolean = isReplicationEnabled && !isBeingReplicated(index)
40 |
41 | private def isBeingReplicated(index: Long) = replicationsInProgress.put(index, true)
42 |
43 | private def isReplicationEnabled = replicationsEnabled.get()
44 |
45 | def acknowledgeIndex(index: Long) = {
46 | updateMatchIndex(index)
47 | updateNextLogIndex(index)
48 | markAsReplicated(index)
49 | }
50 |
51 | def markReplicationsNotInProgress(indexes: List[Long]) = {
52 | indexes.foreach(index ⇒ replicationsInProgress.remove(index))
53 | }
54 |
55 | private def updateMatchIndex(index: Long) = {
56 | var currentMatchIndex = matchIndex.longValue()
57 | while (currentMatchIndex < index && !matchIndex.compareAndSet(currentMatchIndex, index)) {
58 | currentMatchIndex = matchIndex.longValue()
59 | }
60 | }
61 |
62 | private def updateNextLogIndex(index: Long) = nextLogIndex.set(index + 1)
63 |
64 | def decrementNextLogIndex() = {
65 | val currentIndex = nextLogIndex.decrementAndGet()
66 | if (currentIndex == 0) nextLogIndex.set(1)
67 | replicationsInProgress.remove(nextLogIndex.intValue())
68 | }
69 |
70 | def sendInstallSnapshot(installSnapshot: InstallSnapshot) = {
71 | client.send(installSnapshot)
72 | }
73 |
74 | def setNextLogIndex(index: Long) = nextLogIndex.set(index)
75 |
76 | def resetMatchIndex = matchIndex.set(0)
77 |
78 | def enableReplications() = {
79 | val wasEnabled = replicationsEnabled.getAndSet(true)
80 | if (!wasEnabled) logger.debug(s"Enabling replications to $id")
81 | wasEnabled
82 | }
83 |
84 | def disableReplications() = {
85 | val wasEnabled = replicationsEnabled.getAndSet(false)
86 | if (wasEnabled) logger.debug(s"Disabling replications to $id")
87 | wasEnabled
88 | }
89 |
90 | def join(joiningMemberId: String): Future[JoinMemberResponse] = {
91 | logger.debug(s"Joining with $id")
92 | client.send(JoinMember(joiningMemberId)).recover {
93 | case reason: Throwable ⇒
94 | logger.warn(s"Can't join to member $id", reason)
95 | JoinMemberResponse(false)
96 | }
97 | }
98 |
99 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/exception/LeaderTimeoutException.scala:
--------------------------------------------------------------------------------
1 | package ckite.exception
2 |
3 | import java.util.concurrent.TimeoutException
4 |
5 | /**
6 | * Raised when waiting for a Leader to be elected timed out
7 | */
8 | case class LeaderTimeoutException(exception: TimeoutException) extends RuntimeException(exception)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/exception/LostLeadershipException.scala:
--------------------------------------------------------------------------------
1 | package ckite.exception
2 |
3 | /**
4 | * Raised when waiting for a Leader loses its leadership.
5 | * This can happen during reads on a Leader that gets partitioned from the rest of the cluster
6 | */
7 | case class LostLeadershipException(reason: String) extends RuntimeException(reason)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/exception/WriteTimeoutException.scala:
--------------------------------------------------------------------------------
1 | package ckite.exception
2 |
3 | import ckite.rpc.LogEntry
4 |
5 | /**
6 | * Waiting for WriteCommand commit timed out
7 | */
8 | case class WriteTimeoutException(logEntry: LogEntry) extends RuntimeException(s"$logEntry")
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/FixedSizeLogCompactionPolicy.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import ckite.statemachine.StateMachine
4 | import ckite.util.Logging
5 |
6 | class FixedSizeLogCompactionPolicy(fixedSize: Long) extends LogCompactionPolicy with Logging {
7 |
8 | def applies(persistentLog: Log, stateMachine: StateMachine) = {
9 | val size = persistentLog.size
10 | val applies = size >= fixedSize
11 | if (applies) {
12 | logger.info(s"Log size is ${size} and exceeds the maximum threshold of ${fixedSize}")
13 | }
14 | applies
15 | }
16 |
17 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/Log.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import ckite.rpc.LogEntry
4 | import ckite.rpc.LogEntry.Index
5 |
6 | import scala.concurrent.Future
7 |
8 | trait Log {
9 |
10 | def append(entry: LogEntry): Future[Unit]
11 |
12 | def rollLog(upToIndex: Index): Unit
13 |
14 | def getEntry(index: Index): LogEntry //TODO: change it to Option[LogEntry]
15 |
16 | def getLastIndex: Long
17 |
18 | def discardEntriesFrom(index: Index): Unit
19 |
20 | def size: Long
21 |
22 | def close(): Unit
23 |
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/LogAppender.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import java.util.concurrent.{ LinkedBlockingQueue, SynchronousQueue, ThreadPoolExecutor, TimeUnit }
4 |
5 | import ckite.RLog
6 | import ckite.rpc.{ ClusterConfigurationCommand, Command, LogEntry, WriteCommand }
7 | import ckite.util.CKiteConversions.fromFunctionToRunnable
8 | import ckite.util.{ CustomThreadFactory, Logging }
9 |
10 | import scala.collection.mutable.ArrayBuffer
11 | import scala.concurrent.{ ExecutionContext, Future, Promise }
12 |
13 | class LogAppender(rlog: RLog, localLog: Log) extends Logging {
14 |
15 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/LogCompactionPolicy.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import ckite.statemachine.StateMachine
4 |
5 | trait LogCompactionPolicy {
6 |
7 | def applies(persistentLog: Log, stateMachine: StateMachine): Boolean
8 |
9 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/Snapshot.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import java.nio.ByteBuffer
4 |
5 | import ckite.ClusterConfiguration
6 | import ckite.rpc.LogEntry._
7 |
8 | case class Snapshot(term: Term, index: Index, clusterConfiguration: ClusterConfiguration, stateMachineSerialized: ByteBuffer)
9 |
10 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/SnapshotManager.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference }
4 | import java.util.concurrent.{ Executors, SynchronousQueue, ThreadPoolExecutor, TimeUnit }
5 |
6 | import ckite.rpc.LogEntry.{ Index, Term }
7 | import ckite.rpc.{ CompactedEntry, LogEntry }
8 | import ckite.util.CKiteConversions.fromFunctionToRunnable
9 | import ckite.util.{ CustomThreadFactory, Logging }
10 | import ckite.{ Configuration, Membership, RLog }
11 |
12 | import scala.concurrent.{ ExecutionContext, Future }
13 |
14 | case class SnapshotManager(membership: Membership, rlog: RLog, storage: Storage, configuration: Configuration) extends Logging {
15 |
16 | val logCompactionPolicy = new FixedSizeLogCompactionPolicy(configuration.logCompactionThreshold)
17 |
18 | val stateMachine = rlog.stateMachine
19 |
20 | val latestSnapshotCoordinates = new AtomicReference[(Index, Term)]((0, 0))
21 |
22 | def applyLogCompactionPolicy() = {
23 | if (logCompactionPolicy.applies(rlog.log, rlog.stateMachine)) {
24 | logger.debug(s"Log compaction is required")
25 | compact()
26 | }
27 | }
28 |
29 | private def compact() = {
30 | val snapshot = takeSnapshot()
31 | save(snapshot)
32 | //rolls the log up to the given logIndex
33 | rlog.rollLog(snapshot.index)
34 | updateLatestSnapshotCoordinates(snapshot)
35 | }
36 |
37 | private def updateLatestSnapshotCoordinates(snapshot: Snapshot) = {
38 | latestSnapshotCoordinates.set((snapshot.index, snapshot.term))
39 | }
40 |
41 | private def save(snapshot: Snapshot) = {
42 | logger.debug(s"Saving Snapshot $snapshot")
43 |
44 | storage.saveSnapshot(snapshot)
45 |
46 | logger.debug(s"Finished saving Snapshot ${snapshot}")
47 | }
48 |
49 | private def takeSnapshot(): Snapshot = {
50 | val latestEntry = rlog.entry(rlog.lastApplied).get
51 | val clusterConfiguration = membership.clusterConfiguration
52 | val stateMachineSerialized = rlog.serializeStateMachine
53 | Snapshot(latestEntry.term, latestEntry.index, clusterConfiguration, stateMachineSerialized)
54 | }
55 |
56 | def installSnapshot(snapshot: Snapshot) = {
57 | logger.debug(s"Installing $snapshot")
58 | storage.saveSnapshot(snapshot)
59 |
60 | stateMachine.restoreSnapshot(snapshot.stateMachineSerialized)
61 |
62 | membership.transitionTo(snapshot.clusterConfiguration)
63 |
64 | logger.debug(s"Finished installing $snapshot")
65 | }
66 |
67 | def reload(snapshot: Snapshot) = {
68 | logger.info(s"Reloading $snapshot")
69 | stateMachine.restoreSnapshot(snapshot.stateMachineSerialized)
70 | membership.transitionTo(snapshot.clusterConfiguration)
71 | latestSnapshotCoordinates.set((snapshot.index, snapshot.term))
72 | logger.info(s"Finished reloading $snapshot")
73 | }
74 |
75 | def latestSnapshot(): Option[Snapshot] = {
76 | storage.retrieveLatestSnapshot()
77 | }
78 |
79 | def latestSnapshotIndex = latestSnapshotCoordinates.get()._1
80 |
81 | def isInSnapshot(index: Index, term: Term): Boolean = {
82 | val coordinates = latestSnapshotCoordinates.get()
83 | coordinates._2 >= term && coordinates._1 >= index
84 | }
85 |
86 | def isInSnapshot(index: Index): Boolean = {
87 | val coordinates = latestSnapshotCoordinates.get()
88 | coordinates._1 >= index
89 | }
90 |
91 | def compactedEntry = {
92 | val coordinates = latestSnapshotCoordinates.get()
93 | LogEntry(coordinates._2, coordinates._1, CompactedEntry())
94 | }
95 |
96 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rlog/Storage.scala:
--------------------------------------------------------------------------------
1 | package ckite.rlog
2 |
3 | import ckite.rpc.LogEntry.Term
4 |
5 | trait Storage {
6 | def log(): Log
7 |
8 | def saveVote(vote: Vote)
9 |
10 | def retrieveLatestVote(): Option[Vote]
11 |
12 | def saveSnapshot(snapshot: Snapshot)
13 |
14 | def retrieveLatestSnapshot(): Option[Snapshot]
15 | }
16 |
17 | case class Vote(term: Term, member: String)
18 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/AppendEntries.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class AppendEntries(term: Int, leaderId: String, commitIndex: Long, prevLogIndex: Long = -1,
4 | prevLogTerm: Int = -1, entries: List[LogEntry] = List()) {
5 | override def toString = s"AppendEntries(term=$term,leaderId=$leaderId,commitIndex=$commitIndex,prevLogIndex=$prevLogIndex,prevLogTerm=$prevLogTerm,entries=$entries)"
6 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/AppendEntriesResponse.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class AppendEntriesResponse(term: Int, success: Boolean)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/ClusterConfigurationCommand.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | trait ClusterConfigurationCommand extends WriteCommand[Boolean]
4 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/Command.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | trait Command extends Serializable
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/GetMembersRequest.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class GetMembersRequest()
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/GetMembersResponse.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class GetMembersResponse(success: Boolean, members: Seq[String])
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/InstallSnapshot.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import ckite.rlog.Snapshot
4 | import ckite.rpc.LogEntry.Term
5 |
6 | case class InstallSnapshot(term: Term, leaderId: String, snapshot: Snapshot)
7 |
8 | case class InstallSnapshotResponse(success: Boolean)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/JoinMember.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class JoinMember(memberId: String)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/JoinMemberResponse.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class JoinMemberResponse(success: Boolean)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/JointConfiguration.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import com.esotericsoftware.kryo.{ Kryo, KryoSerializable }
4 | import com.esotericsoftware.kryo.io.{ Input, Output }
5 |
6 | case class JointConfiguration(var oldMembers: Set[String], var newMembers: Set[String]) extends ClusterConfigurationCommand with KryoSerializable {
7 | def write(kryo: Kryo, output: Output) = {
8 | output.writeString(oldMembers.mkString(","))
9 | output.writeString(newMembers.mkString(","))
10 | }
11 |
12 | def read(kryo: Kryo, input: Input) = {
13 | oldMembers = input.readString().split(",").toSet
14 | newMembers = input.readString().split(",").toSet
15 | }
16 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/LogEntry.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import LogEntry._
4 |
5 | case class LogEntry(term: Term, index: Index, command: Command) {
6 | override def toString = s"LogEntry(term=$term,index=$index,$command)"
7 | }
8 |
9 | object LogEntry {
10 | type Index = Long
11 | type Term = Int
12 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/NewConfiguration.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import com.esotericsoftware.kryo.{ Kryo, KryoSerializable }
4 | import com.esotericsoftware.kryo.io.{ Input, Output }
5 |
6 | case class NewConfiguration(var bindings: Set[String]) extends ClusterConfigurationCommand with KryoSerializable {
7 | def write(kryo: Kryo, output: Output) = {
8 | output.writeString(bindings.mkString(","))
9 | }
10 |
11 | def read(kryo: Kryo, input: Input) = {
12 | bindings = input.readString().split(",").toSet
13 | }
14 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/NoOps.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class NoOp() extends WriteCommand[Unit]
4 |
5 | case object Void
6 |
7 | case class CompactedEntry() extends ReadCommand[Unit]
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/ReadCommand.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | trait ReadCommand[T] extends Command
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/RequestVote.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class RequestVote(memberId: String, term: Int, lastLogIndex: Long = -1, lastLogTerm: Int = -1) {
4 | override def toString(): String = s"RequestVote($memberId,term=$term,lastLogIndex=$lastLogIndex,lastLogTerm=$lastLogTerm)"
5 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/RequestVoteResponse.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | case class RequestVoteResponse(currentTerm: Int, granted: Boolean)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/Rpc.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import com.typesafe.config.Config
4 |
5 | trait Rpc {
6 | def createServer(service: RpcService, config: Config): RpcServer
7 |
8 | def createClient(address: String): RpcClient
9 |
10 | }
11 |
12 | trait RpcServer {
13 |
14 | def start(): Unit
15 |
16 | def stop(): Unit
17 |
18 | }
19 |
20 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/RpcClient.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import scala.concurrent.Future
4 |
5 | trait RpcClient {
6 |
7 | def send(request: RequestVote): Future[RequestVoteResponse]
8 |
9 | def send(appendEntries: AppendEntries): Future[AppendEntriesResponse]
10 |
11 | def send(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse]
12 |
13 | def send[T](command: Command): Future[T]
14 |
15 | def send(joinMember: JoinMember): Future[JoinMemberResponse]
16 |
17 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/RpcService.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import scala.concurrent.Future
4 |
5 | trait RpcService {
6 |
7 | def onRequestVoteReceived(requestVote: RequestVote): Future[RequestVoteResponse]
8 | def onAppendEntriesReceived(appendEntries: AppendEntries): Future[AppendEntriesResponse]
9 | def onCommandReceived[T](command: Command): Future[T]
10 | def onInstallSnapshotReceived(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse]
11 | def onMemberJoinReceived(memberId: String): Future[JoinMemberResponse]
12 |
13 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/rpc/WriteCommand.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | trait WriteCommand[T] extends Command
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/statemachine/CommandExecutor.scala:
--------------------------------------------------------------------------------
1 | package ckite.statemachine
2 |
3 | import ckite.rpc.Command
4 | import ckite.rpc.WriteCommand
5 | import ckite.rpc.ReadCommand
6 | import ckite.util.Logging
7 |
8 | class CommandExecutor(stateMachine: StateMachine) extends Logging {
9 |
10 | val writeFunction = stateMachine.applyWrite
11 | val readFunction = stateMachine.applyRead
12 |
13 | def applyWrite[T](index: Long, write: WriteCommand[T]): T = {
14 | val params = (index, write)
15 | if (writeFunction.isDefinedAt(params)) writeFunction(params).asInstanceOf[T]
16 | else {
17 | logger.warn(s"No handler for ${write} is available in the StateMachine")
18 | throw new IllegalStateException(s"No handler for ${write}")
19 | }
20 | }
21 |
22 | def applyRead[T](read: ReadCommand[T]): T = {
23 | if (readFunction.isDefinedAt(read)) readFunction(read).asInstanceOf[T]
24 | else {
25 | logger.warn(s"No handler for ${read} is available in the StateMachine")
26 | throw new IllegalStateException(s"No handler for ${read}")
27 | }
28 | }
29 |
30 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/statemachine/StateMachine.scala:
--------------------------------------------------------------------------------
1 | package ckite.statemachine
2 |
3 | import java.nio.ByteBuffer
4 |
5 | import ckite.rpc.{ ReadCommand, WriteCommand }
6 |
7 | trait StateMachine {
8 |
9 | /**
10 | * Called when consensus has been reached on a WriteCommand.
11 | * Along with the WriteCommand an index is provided to allow
12 | * persistent StateMachines to save atomically both the WriteCommand's
13 | * updates and the index.
14 | * CKite will ask the lastAppliedIndex when deciding which WriteCommands can be replayed during startup.
15 | *
16 | * Memory consistency effects: Since all the operations on the StateMachine are done by
17 | * a single thread then every read, write or snapshot operation happens-before the subsequent
18 | * read, write or snapshot operation.
19 | */
20 | def applyWrite: PartialFunction[(Long, WriteCommand[_]), Any]
21 |
22 | /**
23 | * The last applied index in the StateMachine.
24 | */
25 | def getLastAppliedIndex: Long
26 |
27 | /**
28 | * Called when readonly commands are requested.
29 | */
30 | def applyRead: PartialFunction[ReadCommand[_], Any]
31 |
32 | /**
33 | * Restore the StateMachine state from a Snapshot
34 | */
35 | def restoreSnapshot(byteBuffer: ByteBuffer)
36 |
37 | /**
38 | * Captures the StateMachine state as a Snapshot
39 | */
40 | def takeSnapshot(): ByteBuffer
41 |
42 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/statemachine/j/StateMachine.scala:
--------------------------------------------------------------------------------
1 | package ckite.statemachine.j
2 |
3 | import java.nio.ByteBuffer
4 | import ckite.rpc.Command
5 | import ckite.rpc.WriteCommand
6 | import ckite.rpc.ReadCommand
7 |
8 | trait StateMachine {
9 |
10 | def deserialize(byteBuffer: ByteBuffer)
11 |
12 | def serialize(): ByteBuffer
13 |
14 | def applyWrite(index: Long, write: WriteCommand[_]): Any
15 |
16 | def applyRead(read: ReadCommand[_]): Any
17 |
18 | def lastAppliedIndex: Long
19 |
20 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/statemachine/j/StateMachineWrapper.scala:
--------------------------------------------------------------------------------
1 | package ckite.statemachine.j
2 |
3 | import java.nio.ByteBuffer
4 | import ckite.rpc.Command
5 | import ckite.rpc.WriteCommand
6 | import ckite.rpc.ReadCommand
7 |
8 | class StateMachineWrapper(jstateMachine: StateMachine) extends ckite.statemachine.StateMachine {
9 |
10 | def restoreSnapshot(byteBuffer: ByteBuffer) = jstateMachine.deserialize(byteBuffer)
11 |
12 | def takeSnapshot(): ByteBuffer = jstateMachine.serialize
13 |
14 | def applyWrite: PartialFunction[(Long, WriteCommand[_]), Any] = {
15 | case (index, write) ⇒ jstateMachine.applyWrite(index, write)
16 | }
17 |
18 | def applyRead: PartialFunction[ReadCommand[_], Any] = {
19 | case read ⇒ jstateMachine.applyRead(read)
20 | }
21 |
22 | def getLastAppliedIndex: Long = jstateMachine.lastAppliedIndex
23 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Candidate.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 | import java.util.concurrent.{ ConcurrentHashMap, SynchronousQueue, ThreadPoolExecutor, TimeUnit }
5 |
6 | import ckite._
7 | import ckite.rpc.LogEntry.Term
8 | import ckite.rpc._
9 | import ckite.util.CKiteConversions._
10 | import ckite.util.CustomThreadFactory
11 |
12 | import scala.collection.JavaConverters._
13 | import scala.concurrent.ExecutionContext.Implicits.global
14 | import scala.concurrent.duration._
15 | import scala.concurrent.{ Await, Future, Promise }
16 | import scala.util.{ Failure, Success, Try }
17 |
18 | case class Candidate(consensus: Consensus, membership: Membership, log: RLog, term: Term, leaderAnnouncer: LeaderAnnouncer) extends State(Some(membership.myId)) {
19 |
20 | private val electionWorker = new ThreadPoolExecutor(1, 1,
21 | 60L, TimeUnit.SECONDS,
22 | new SynchronousQueue[Runnable](),
23 | CustomThreadFactory(s"CandidateElection-worker-${membership.myId}"))
24 |
25 | private val runningElection = new AtomicReference[java.util.concurrent.Future[_]]()
26 | private val votes = new ConcurrentHashMap[String, Boolean]()
27 | private val maxVotesExpected = membership.members.size
28 |
29 | override def begin() = {
30 | logger.debug(s"Start election")
31 | startElection()
32 | }
33 |
34 | private def startElection() = {
35 | val electionTask: Runnable = () ⇒ {
36 | election()
37 | }
38 | runningElection.set(electionWorker.submit(electionTask))
39 | }
40 |
41 | private def election() = {
42 | val votes = collectVotes()
43 | logger.debug(s"Got ${votes.size} votes")
44 | if (membership.reachQuorum(votes)) wonElection() else lostElection()
45 | }
46 |
47 | def lostElection() {
48 | logger.info(s"Not enough votes to be a Leader")
49 | consensus.becomeFollower(term = term, vote = Some(membership.myId)) //voted for my self when Candidate
50 | }
51 |
52 | def wonElection() {
53 | logger.info(s"Won the election. Will become Leader...")
54 | consensus.becomeLeader(term)
55 | }
56 |
57 | private def collectVotes(): Set[String] = {
58 | if (!membership.hasRemoteMembers) return Set(membership.myId)
59 | val votesPromise = Promise[Set[String]]()
60 |
61 | voteForMyself()
62 |
63 | val lastLogEntry = log.lastEntry
64 | membership.remoteMembers.foreach { remoteMember ⇒
65 | Future {
66 | (remoteMember.id, requestVote(remoteMember, lastLogEntry))
67 | } onComplete {
68 | case Success((member, vote)) ⇒ {
69 | vote map { granted ⇒
70 | votes.put(member, granted)
71 | val grantedVotes = votes.asScala.filter { tuple ⇒ tuple._2 }.keySet.toSet
72 | val rejectedVotes = votes.asScala.filterNot { tuple ⇒ tuple._2 }.keySet.toSet
73 | if (membership.reachQuorum(grantedVotes) ||
74 | membership.reachSomeQuorum(rejectedVotes) ||
75 | maxVotesExpected == votes.size())
76 | votesPromise.trySuccess(grantedVotes)
77 | }
78 | }
79 | case Failure(reason) ⇒ {
80 | logger.error("failure collecting votes", reason)
81 | }
82 | }
83 | }
84 | Try {
85 | Await.result(votesPromise.future, consensus.configuration.collectVotesTimeout millis) //TODO: Refactor me
86 | } getOrElse {
87 | votes.asScala.filter { tuple ⇒ tuple._2 }.keySet.toSet
88 | }
89 | }
90 |
91 | private def requestVote(remoteMember: RemoteMember, lastLogEntry: Option[LogEntry]): Future[Boolean] = {
92 | logger.debug(s"Requesting vote to ${remoteMember.id}")
93 | remoteMember.sendRequestVote(lastLogEntry match {
94 | case None ⇒ RequestVote(membership.myId, term)
95 | case Some(entry) ⇒ RequestVote(membership.myId, term, entry.index, entry.term)
96 | }).map { voteResponse ⇒
97 | logger.debug(s"Got $voteResponse from ${remoteMember.id}")
98 | voteResponse.granted && voteResponse.currentTerm == term
99 | } recover {
100 | case reason: Throwable ⇒
101 | logger.debug(s"Error requesting vote: ${reason.getMessage()}")
102 | false
103 | }
104 | }
105 |
106 | private def voteForMyself() {
107 | //This is in conflict with the vote set upon Candidate creation
108 | if (membership.members.contains(membership.myId)) {
109 | votes.put(membership.myId, true) //Can Local vote???
110 | }
111 | }
112 |
113 | private def abortElection() = {
114 | logger.debug("Abort Election")
115 | val future = runningElection.get()
116 | if (future != null) future.cancel(true)
117 | }
118 |
119 | override def canTransitionTo(newState: State) = {
120 | newState match {
121 | case leader: Leader ⇒ leader.term == term
122 | case follower: Follower ⇒ follower.term >= term //in case of split vote or being an old candidate
123 | case _ ⇒ newState.term > term
124 | }
125 | }
126 |
127 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
128 | appendEntries.term match {
129 | case leaderTerm if leaderTerm < term ⇒ rejectOldLeader(appendEntries)
130 | case leaderTerm if leaderTerm >= term ⇒ stepDownAndPropagate(appendEntries)
131 | }
132 | }
133 |
134 | override def stepDownAndPropagate(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
135 | logger.debug("Leader already elected in term[{}]", appendEntries.term)
136 | abortElection()
137 | super.stepDownAndPropagate(appendEntries)
138 | }
139 |
140 | override def stepDownAndPropagate(requestVote: RequestVote): Future[RequestVoteResponse] = {
141 | abortElection()
142 | super.stepDownAndPropagate(requestVote)
143 | }
144 |
145 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = {
146 | requestVote.term match {
147 | case candidateTerm if candidateTerm < term ⇒ rejectOldCandidate(requestVote.memberId)
148 | case candidateTerm if candidateTerm == term ⇒ rejectVote(requestVote.memberId, "contender candidate of the same term")
149 | case candidateTerm if candidateTerm > term ⇒ stepDownAndPropagate(requestVote)
150 | }
151 | }
152 |
153 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
154 | Future.successful(InstallSnapshotResponse(false))
155 | }
156 |
157 | override def onCommand[T](command: Command): Future[T] = {
158 | leaderAnnouncer.onLeader(_.forwardCommand[T](command))
159 | }
160 |
161 | override def toString = s"Candidate[$term]"
162 |
163 | }
164 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Follower.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import java.util.Random
4 | import java.util.concurrent.atomic.AtomicReference
5 | import java.util.concurrent.{ ScheduledFuture, TimeUnit }
6 |
7 | import ckite._
8 | import ckite.rpc._
9 | import ckite.util.CKiteConversions.fromFunctionToRunnable
10 | import ckite.util.{ ConcurrencySupport, Logging }
11 |
12 | import scala.concurrent.ExecutionContext.Implicits.global
13 | import scala.concurrent.Future
14 | import scala.util.Try
15 |
16 | case class Follower(consensus: Consensus, membership: Membership, log: RLog, term: Int, leaderAnnouncer: LeaderAnnouncer, vote: Option[String]) extends State(vote) with Logging {
17 |
18 | private val electionTimeout = new ElectionTimeout(consensus, term)
19 |
20 | override def begin() = {
21 | resetElectionTimeout() //start the election timeout if no communication from the Leader
22 | }
23 |
24 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
25 | appendEntries.term match {
26 | case leaderTerm if leaderTerm < term ⇒ rejectOldLeader(appendEntries)
27 | case leaderTerm if leaderTerm > term ⇒ stepDownAndPropagate(appendEntries)
28 | case leaderTerm if leaderTerm == term ⇒ receivedAppendEntriesFromLeader(appendEntries)
29 | }
30 | }
31 |
32 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = {
33 | requestVote.term match {
34 | case requestTerm if requestTerm < term ⇒ rejectOldCandidate(requestVote.memberId)
35 | case requestTerm if requestTerm > term ⇒ stepDownAndPropagate(requestVote)
36 | case requestTerm if requestTerm == term ⇒ analyzeRequestVote(requestVote)
37 | }
38 | }
39 |
40 | private def receivedAppendEntriesFromLeader(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
41 | Try {
42 | resetElectionTimeout() //Leader is alive. God save the Leader!
43 | announceLeader(appendEntries.leaderId)
44 | append(appendEntries)
45 | }.recover {
46 | case reason: Exception ⇒ rejectAppendEntries(appendEntries, reason.getMessage)
47 | }.get
48 | }
49 |
50 | private def analyzeRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = {
51 | val couldGrantVote = checkGrantVotePolicy(requestVote)
52 | if (couldGrantVote) {
53 | if (tryGrantVoteTo(requestVote.memberId)) {
54 | logger.debug(s"Granting vote to ${requestVote.memberId} in term[${term}]")
55 | resetElectionTimeout()
56 | consensus.persistState()
57 | grantVote()
58 | } else {
59 | rejectVote(requestVote.memberId, s"already voted for ${votedFor.get()}")
60 | }
61 | } else {
62 | rejectVote(requestVote.memberId, s"not granted vote policy")
63 | }
64 | }
65 |
66 | private def tryGrantVoteTo(member: String): Boolean = {
67 | votedFor.compareAndSet(None, Some(member)) || votedFor.get().equals(Some(member))
68 | }
69 |
70 | override def onCommand[T](command: Command): Future[T] = leaderAnnouncer.onLeader(_.forwardCommand[T](command))
71 |
72 | def stepDownAndPropagate(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
73 | stepDown(installSnapshot.term)
74 | consensus.onInstallSnapshot(installSnapshot)
75 | }
76 |
77 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
78 | installSnapshot.term match {
79 | case leaderTerm if leaderTerm < term ⇒ Future.successful(InstallSnapshotResponse(REJECTED))
80 | case leaderTerm if leaderTerm > term ⇒ stepDownAndPropagate(installSnapshot)
81 | case leaderTerm if leaderTerm == term ⇒ log.installSnapshot(installSnapshot.snapshot).map(_ ⇒ InstallSnapshotResponse(ACCEPTED))
82 | }
83 | }
84 |
85 | private def resetElectionTimeout() = electionTimeout restart
86 |
87 | private def append(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
88 | log.tryAppend(appendEntries) map { success ⇒
89 | AppendEntriesResponse(term, success)
90 | }
91 | }
92 |
93 | private def announceLeader(leaderId: String) {
94 | if (leaderAnnouncer.announce(leaderId)) {
95 | logger.info("Following {} in term[{}]", leaderId, term)
96 | }
97 | }
98 |
99 | private def checkGrantVotePolicy(requestVote: RequestVote) = {
100 | (hastNotVotedYet() || hasVotedFor(requestVote.memberId)) && isMuchUpToDate(requestVote)
101 | }
102 |
103 | def hasVotedFor(member: String): Boolean = vote.get == member
104 |
105 | def hastNotVotedYet(): Boolean = !votedFor.get().isDefined
106 |
107 | private def isMuchUpToDate(requestVote: RequestVote) = {
108 | val lastLogEntry = log.lastEntry
109 | lastLogEntry.isEmpty || (requestVote.lastLogTerm >= lastLogEntry.get.term && requestVote.lastLogIndex >= lastLogEntry.get.index)
110 | }
111 |
112 | override def stop(stopTerm: Int) = {
113 | if (stopTerm > term) {
114 | electionTimeout stop
115 | }
116 | }
117 |
118 | override val toString = s"Follower[$term]"
119 |
120 | }
121 |
122 | class ElectionTimeout(consensus: Consensus, term: Int) extends Logging {
123 |
124 | import ckite.states.ElectionTimeout._
125 |
126 | private val scheduledFuture = new AtomicReference[ScheduledFuture[_]]()
127 |
128 | def restart = {
129 | stop
130 | start
131 | }
132 |
133 | private def start = {
134 | val electionTimeout = randomTimeout
135 | logger.trace(s"New timeout is $electionTimeout ms")
136 | val task: Runnable = () ⇒ {
137 | logger.debug("Timeout reached! Time to elect a new leader")
138 | consensus.becomeCandidate(term + 1)
139 | }
140 | val future = electionTimeoutScheduler.schedule(task, electionTimeout, TimeUnit.MILLISECONDS)
141 | val previousFuture = scheduledFuture.getAndSet(future)
142 | cancel(previousFuture)
143 | }
144 |
145 | private def randomTimeout = {
146 | val conf = consensus.configuration
147 | val diff = conf.maxElectionTimeout - conf.minElectionTimeout
148 | conf.minElectionTimeout + random.nextInt(if (diff > 0) diff.toInt else 1)
149 | }
150 |
151 | def stop() = {
152 | val future = scheduledFuture.get()
153 | cancel(future)
154 | }
155 |
156 | private def cancel(future: java.util.concurrent.Future[_]) = if (future != null) future.cancel(false)
157 |
158 | }
159 |
160 | object ElectionTimeout extends ConcurrencySupport {
161 | private val random = new Random()
162 | private val electionTimeoutScheduler = scheduler(s"ElectionTimeout-worker")
163 | }
164 |
165 | trait NoElectionTimeout extends State {
166 | override def begin() = {}
167 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Joiner.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import ckite._
4 | import ckite.rpc.LogEntry.Term
5 |
6 | object Joiner {
7 | def apply(consensus: Consensus, membership: Membership, log: RLog, term: Term, configuration: Configuration): Follower = {
8 | new Follower(consensus, membership, log, term, LeaderAnnouncer(membership, configuration), None) with NoElectionTimeout {
9 | override val toString = s"Joiner[$term]"
10 | }
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Leader.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import java.lang.Boolean
4 | import java.util.concurrent.{ ConcurrentHashMap, TimeUnit }
5 |
6 | import ckite._
7 | import ckite.exception.LostLeadershipException
8 | import ckite.rpc.LogEntry.{ Index, Term }
9 | import ckite.rpc._
10 | import ckite.stats.{ LeaderInfo, FollowerInfo }
11 | import ckite.util.CKiteConversions.fromFunctionToRunnable
12 | import ckite.util.ConcurrencySupport
13 |
14 | import scala.collection.concurrent.TrieMap
15 | import scala.collection.JavaConverters._
16 | import scala.concurrent.ExecutionContext.Implicits.global
17 | import scala.concurrent.duration._
18 | import scala.concurrent.{ Await, Future, Promise }
19 | import scala.util.{ Try, Failure, Success }
20 |
21 | case class Leader(consensus: Consensus, membership: Membership, log: RLog, term: Term, leaderAnnouncer: LeaderAnnouncer) extends State(Some(membership.myId)) with ConcurrencySupport {
22 |
23 | private val ReplicationTimeout = consensus.configuration.appendEntriesTimeout
24 | private val AppendEntriesTimeout = consensus.configuration.appendEntriesTimeout millis
25 | private val waitForLeaderTimeout = consensus.configuration.waitForLeaderTimeout millis
26 | private val scheduledHeartbeatsPool = scheduler("HeartbeatThread")
27 | private val followersStats = new ConcurrentHashMap[String, Long]()
28 | private val startTime = now()
29 |
30 | override def begin() = {
31 | if (term < consensus.term) {
32 | logger.debug(s"Can't be a Leader of term $term. Current term is ${consensus.term}")
33 | consensus.becomeFollower(consensus.term)
34 | } else {
35 | resetLastIndex()
36 | resetNextAndMatchIndexes()
37 | startBroadcasting()
38 | appendNoOp() andThen {
39 | case Success(_) ⇒ announceLeadership()
40 | case Failure(reason) ⇒ logger.error("Failed to commit noop command", reason)
41 | }
42 | }
43 | }
44 |
45 | def startBroadcasting() {
46 | logger.debug("Start broadcasting...")
47 | scheduledHeartbeatsPool.scheduleAtFixedRate(() ⇒ {
48 | broadcast()
49 | }, 0, consensus.configuration.heartbeatsInterval, TimeUnit.MILLISECONDS)
50 | }
51 |
52 | private def broadcast(): Set[(RemoteMember, Future[AppendEntriesResponse])] = {
53 | logger.trace(s"Leader[$term] broadcasting AppendEntries")
54 | membership.remoteMembers map { member ⇒ (member, sendAppendEntries(member)) }
55 | }
56 |
57 | private def sendAppendEntries(member: RemoteMember): Future[AppendEntriesResponse] = {
58 | val request = createAppendEntriesFor(member)
59 | if (!request.entries.isEmpty) {
60 | logger.trace("Sending {} entries to {}", request.entries.size, member.id)
61 | }
62 | member.sendAppendEntries(request).map { response ⇒
63 | logger.trace(s"AppendEntries response ${response} from ${member.id}")
64 | if (response.term > term) {
65 | receivedHigherTerm(response.term)
66 | } else {
67 | receivedAppendEntriesResponse(member, request, response)
68 | }
69 | response
70 | }.andThen {
71 | case Failure(reason) ⇒
72 | logger.trace("Error sending appendEntries {}", reason.getMessage())
73 | if (!request.entries.isEmpty) {
74 | member.markReplicationsNotInProgress(request.entries.map(_.index))
75 | }
76 | }
77 | }
78 |
79 | private def createAppendEntriesFor(member: RemoteMember) = toReplicateEntriesOf(member) match {
80 | case head :: list ⇒ replication(head, list)
81 | case Nil ⇒ heartbeat()
82 | }
83 |
84 | private def receivedHigherTerm(higherTerm: Int) = {
85 | val currentTerm = consensus.term
86 | if (higherTerm > currentTerm) {
87 | logger.debug("Detected a term {} higher than current term {}. Step down", higherTerm, currentTerm)
88 | stepDown(higherTerm)
89 | }
90 | }
91 |
92 | private def replication(head: LogEntry, tail: List[LogEntry]) = {
93 | val entries = head :: tail
94 | log.getPreviousLogEntry(head) match {
95 | case Some(previous) ⇒ normalReplication(previous, entries)
96 | case None ⇒ firstReplication(entries)
97 | }
98 | }
99 |
100 | private def normalReplication(previous: LogEntry, entries: List[LogEntry]) = {
101 | AppendEntries(term, membership.myId, log.commitIndex, previous.index, previous.term, entries)
102 | }
103 |
104 | private def firstReplication(entries: List[LogEntry]) = {
105 | AppendEntries(term, membership.myId, log.commitIndex, entries = entries)
106 | }
107 |
108 | private def heartbeat() = AppendEntries(term, membership.myId, log.commitIndex)
109 |
110 | private def toReplicateEntriesOf(member: RemoteMember): List[LogEntry] = {
111 | val index = member.nextLogIndex.longValue()
112 | val entries = for (
113 | entry ← log.entry(index) if (member.canReplicateIndex(index))
114 | ) yield entry
115 | List(entries).flatten
116 | }
117 |
118 | def stopBroadcasting() = {
119 | logger.debug("Stop broadcasting")
120 | scheduledHeartbeatsPool.shutdownNow()
121 | }
122 |
123 | private def announceLeadership() = {
124 | logger.info(s"Start being $this")
125 | leaderAnnouncer.announce(membership.myId)
126 | }
127 |
128 | private def appendNoOp() = {
129 | if (log.isEmpty) {
130 | logger.info("Log is empty. First Leader. Appending initial cluster configuration")
131 | onCommand[Boolean](NewConfiguration(Set(membership.myId))) //the initial configuration must go through the log
132 | } else {
133 | logger.debug("Append a NoOp as part of Leader initialization")
134 | onCommand[Unit](NoOp())
135 | }
136 | }
137 |
138 | private def resetLastIndex() = log.resetLastIndex()
139 |
140 | private def resetNextAndMatchIndexes() = {
141 | val nextIndex = log.lastIndex + 1
142 | membership.remoteMembers.foreach { member ⇒ member.setNextLogIndex(nextIndex); member.resetMatchIndex }
143 | }
144 |
145 | override def stop(stopTerm: Int) = {
146 | if (stopTerm > term) {
147 | stopBroadcasting()
148 | logger.debug("Stop being Leader")
149 | }
150 | }
151 |
152 | override def onCommand[T](command: Command): Future[T] = {
153 | command match {
154 | case write: WriteCommand[T] ⇒ onWriteCommand[T](write)
155 | case read: ReadCommand[T] ⇒ onReadCommand[T](read)
156 | }
157 | }
158 |
159 | private def onWriteCommand[T](write: WriteCommand[T]): Future[T] = {
160 | log.append[T](term, write) flatMap { tuple ⇒
161 | val logEntry = tuple._1
162 | val valuePromise = tuple._2
163 | broadcast(logEntry)
164 | valuePromise.future
165 | }
166 | }
167 |
168 | private def broadcast(logEntry: LogEntry): Unit = {
169 | if (membership.hasRemoteMembers) {
170 | broadcast()
171 | } else {
172 | logger.debug("No member to broadcast")
173 | log.commit(logEntry.index)
174 | }
175 | }
176 |
177 | private def onReadCommand[T](command: ReadCommand[T]): Future[T] = onStillBeingLeader.flatMap(_ ⇒ log.execute(command))
178 |
179 | private def onStillBeingLeader = {
180 | logger.trace(s"$this checking if still being Leader...")
181 | if (membership.hasRemoteMembers) {
182 | val promise = Promise[Unit]()
183 | val term = consensus.term()
184 |
185 | val membersAck = TrieMap[String, Unit](membership.myId -> Unit)
186 | val memberFailures = TrieMap[String, Unit]()
187 |
188 | broadcast().foreach {
189 | case (member, futureResponse) ⇒
190 | futureResponse.andThen {
191 | case Success(response) ⇒ {
192 | if (consensus.term != term) lostLeadership(promise, "New term received")
193 | else {
194 | membersAck.put(member.id(), Unit)
195 | if (membership.reachQuorum(membersAck.keys.toSet)) {
196 | logger.debug(s"$this ${membership.myId} is still the current Leader")
197 | promise.trySuccess(())
198 | }
199 | }
200 | }
201 | case Failure(reason) ⇒ {
202 | memberFailures.put(member.id(), Unit)
203 | if (membership.reachSomeQuorum(memberFailures.keys.toSet)) {
204 | lostLeadership(promise, "Failed to reach quorum of members")
205 | }
206 | }
207 | }
208 | }
209 | promise.future
210 | } else Future.successful(())
211 | }
212 |
213 | private def lostLeadership(promise: Promise[Unit], reason: String) = {
214 | logger.debug(s"$this ${membership.myId} lost leadership. Reason: $reason")
215 | promise.tryFailure(LostLeadershipException(reason))
216 | }
217 |
218 | override def isLeader() = {
219 | Try {
220 | Await.result(onStillBeingLeader, waitForLeaderTimeout)
221 | true
222 | }.getOrElse(false)
223 | }
224 |
225 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
226 | if (appendEntries.term < term) {
227 | rejectOldLeader(appendEntries)
228 | } else {
229 | stepDownAndPropagate(appendEntries)
230 | }
231 | }
232 |
233 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = {
234 | if (requestVote.term <= term) {
235 | rejectVote(requestVote.memberId, s"being Leader in term $term")
236 | } else {
237 | stepDownAndPropagate(requestVote)
238 | }
239 | }
240 |
241 | override def onJointConfigurationCommitted(jointConfiguration: JointConfiguration) = {
242 | logger.debug(s"JointConfiguration is committed... will use and broadcast a NewConfiguration")
243 | onCommand[Boolean](NewConfiguration(jointConfiguration.newMembers))
244 | }
245 |
246 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = {
247 | Future.successful(InstallSnapshotResponse(false))
248 | }
249 |
250 | private def receivedAppendEntriesResponse(member: RemoteMember, request: AppendEntries, response: AppendEntriesResponse) = {
251 | followersStats.put(member.id(), now())
252 | if (!request.entries.isEmpty) {
253 | updateNextLogIndex(member, request, response)
254 | }
255 | val nextIndex = member.nextLogIndex.intValue()
256 | if (isLogEntryInSnapshot(nextIndex)) {
257 | val wasEnabled = member.disableReplications()
258 | if (wasEnabled) {
259 | logger.debug(s"Next LogIndex #$nextIndex to be sent to ${member} is contained in a Snapshot. An InstallSnapshot will be sent.")
260 | sendInstallSnapshot(member)
261 | }
262 | }
263 | }
264 |
265 | private def updateNextLogIndex(member: RemoteMember, appendEntries: AppendEntries, appendEntriesResponse: AppendEntriesResponse) = {
266 | val lastIndexSent = appendEntries.entries.last.index
267 | if (appendEntriesResponse.success) {
268 | member.acknowledgeIndex(lastIndexSent)
269 | logger.debug(s"Member ${member} ack - index sent #$lastIndexSent - next index #${member.nextLogIndex}")
270 | tryToCommitEntries(lastIndexSent)
271 | } else {
272 | member.decrementNextLogIndex()
273 | if (!appendEntries.entries.isEmpty) {
274 | member.markReplicationsNotInProgress(appendEntries.entries.map(_.index))
275 | }
276 | logger.debug(s"Member ${member} reject - index sent #$lastIndexSent - next index is #${member.nextLogIndex}")
277 | }
278 | }
279 |
280 | private def tryToCommitEntries(lastEntrySent: Long) = {
281 | val currentCommitIndex = log.commitIndex
282 | (currentCommitIndex + 1) to lastEntrySent foreach { index ⇒
283 | if (reachQuorum(index)) {
284 | log.commit(index)
285 | }
286 | }
287 | }
288 |
289 | private def reachQuorum(index: Index) = membership.reachQuorum(membersHavingAtLeast(index) + membership.myId)
290 |
291 | private def membersHavingAtLeast(index: Long): Set[String] = {
292 | membership.remoteMembers.filter { remoteMember ⇒ remoteMember.matchIndex.longValue() >= index } map {
293 | _.id
294 | }
295 | }
296 |
297 | private def isLogEntryInSnapshot(logIndex: Int): Boolean = {
298 | log.isInSnapshot(logIndex)
299 | }
300 |
301 | def sendInstallSnapshot(member: RemoteMember) = {
302 | log.latestSnapshot map { snapshot ⇒
303 | val installSnapshot = InstallSnapshot(term, membership.myId, snapshot)
304 | logger.debug(s"Sending $installSnapshot to ${member}")
305 | member.sendInstallSnapshot(installSnapshot).map { response ⇒
306 | if (response.success) {
307 | logger.debug("Successful InstallSnapshot")
308 | member.acknowledgeIndex(snapshot.index)
309 | tryToCommitEntries(snapshot.index)
310 | } else {
311 | logger.debug("Failed InstallSnapshot")
312 | }
313 | member.enableReplications()
314 | }
315 |
316 | }
317 | }
318 |
319 | override def stats() = {
320 | val currentTime = now()
321 | val followers = followersStats.asScala.map {
322 | tuple ⇒
323 | val member = membership.get(tuple._1).get
324 | (tuple._1, FollowerInfo(lastAck(tuple._2, currentTime), member.matchIndex.intValue(), member.nextLogIndex.intValue()))
325 | }
326 | LeaderInfo(leaderUptime.toString, followers.toMap)
327 | }
328 |
329 | private def leaderUptime = (now() - startTime millis).toCoarsest
330 |
331 | private def lastAck(ackTime: Long, now: Long) = if (ackTime > 0) (now - ackTime millis).toString else "Never"
332 |
333 | private def now(): Long = System.currentTimeMillis()
334 |
335 | override def toString = s"Leader[$term]"
336 |
337 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Starter.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import ckite.rpc.LogEntry.Term
4 | import ckite.rpc._
5 | import ckite.{ Consensus, LeaderAnnouncer, Membership }
6 |
7 | import scala.concurrent.Future
8 |
9 | case object Starter extends State {
10 |
11 | override def begin() = {}
12 |
13 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = rejectAppendEntries(appendEntries, "not ready yet")
14 |
15 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = rejectVote(requestVote.memberId, "not ready yet")
16 |
17 | override def stepDown(term: Term) = {}
18 |
19 | override val term: Int = -1
20 |
21 | override protected def membership: Membership = throw new UnsupportedOperationException()
22 |
23 | override protected def consensus: Consensus = throw new UnsupportedOperationException()
24 |
25 | override def leaderAnnouncer: LeaderAnnouncer = throw new UnsupportedOperationException()
26 |
27 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = rejectInstallSnapshot()
28 |
29 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/State.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 |
5 | import ckite._
6 | import ckite.rpc.LogEntry.Term
7 | import ckite.rpc._
8 | import ckite.stats.{ NonLeaderInfo, StateInfo }
9 | import ckite.util.Logging
10 |
11 | import scala.concurrent.Future
12 |
13 | abstract class State(vote: Option[String] = None) extends Logging {
14 |
15 | val votedFor = new AtomicReference[Option[String]](vote)
16 |
17 | protected val GRANTED, ACCEPTED = true
18 | protected val REJECTED = false
19 |
20 | def leaderAnnouncer: LeaderAnnouncer
21 |
22 | def term: Term
23 |
24 | protected def membership: Membership
25 |
26 | protected def consensus: Consensus
27 |
28 | def begin() = {
29 | }
30 |
31 | def stop(term: Term) = {
32 | }
33 |
34 | def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse]
35 |
36 | def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse]
37 |
38 | def onCommand[T](command: Command): Future[T] = throw new UnsupportedOperationException()
39 |
40 | def onJointConfigurationCommitted(jointConfiguration: JointConfiguration) = {}
41 |
42 | def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse]
43 |
44 | def canTransitionTo(newState: State): Boolean = newState.term > term
45 |
46 | protected def stepDown(term: Term): Unit = {
47 | logger.debug(s"${membership.myId} Step down from being $this")
48 | consensus.becomeFollower(term = term, leaderAnnouncer = leaderAnnouncer.onStepDown)
49 | }
50 |
51 | protected def stepDownAndPropagate(appendEntries: AppendEntries): Future[AppendEntriesResponse] = {
52 | stepDown(appendEntries.term)
53 | consensus.onAppendEntries(appendEntries)
54 | }
55 |
56 | protected def stepDownAndPropagate(requestVote: RequestVote): Future[RequestVoteResponse] = {
57 | stepDown(requestVote.term)
58 | consensus.onRequestVote(requestVote)
59 | }
60 |
61 | protected def rejectVote(candidateRejected: String, reason: String): Future[RequestVoteResponse] = {
62 | logger.debug(s"Rejecting vote to $candidateRejected due to $reason")
63 | Future.successful(RequestVoteResponse(term, REJECTED))
64 | }
65 |
66 | protected def rejectOldCandidate(candidateRejected: String) = {
67 | rejectVote(candidateRejected, "old Candidate term")
68 | }
69 |
70 | protected def rejectOldLeader(appendEntries: AppendEntries) = {
71 | rejectAppendEntries(appendEntries, "old Leader term")
72 | }
73 |
74 | protected def grantVote() = {
75 | Future.successful(RequestVoteResponse(term, GRANTED))
76 | }
77 |
78 | protected def rejectAppendEntries(appendEntries: AppendEntries, reason: String): Future[AppendEntriesResponse] = {
79 | logger.debug(s"Rejecting $AppendEntries due to $reason")
80 | Future.successful(AppendEntriesResponse(term, REJECTED))
81 | }
82 |
83 | protected def rejectInstallSnapshot() = Future.successful(InstallSnapshotResponse(REJECTED))
84 |
85 | def isLeader = {
86 | leaderAnnouncer.awaitLeader.id().equals(membership.myId)
87 | }
88 |
89 | def stats(): StateInfo = NonLeaderInfo(if (leaderAnnouncer.isLeaderAnnounced) Some(leaderAnnouncer.awaitLeader.id()) else None)
90 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/states/Stopped.scala:
--------------------------------------------------------------------------------
1 | package ckite.states
2 |
3 | import ckite.rpc.LogEntry._
4 | import ckite.rpc._
5 | import ckite.{ Consensus, LeaderAnnouncer, Membership }
6 |
7 | import scala.concurrent.Future
8 |
9 | case object Stopped extends State {
10 |
11 | override def begin() = {}
12 |
13 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = Future.successful(AppendEntriesResponse(appendEntries.term, success = false))
14 |
15 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = Future.successful(RequestVoteResponse(requestVote.term, granted = false))
16 |
17 | override def canTransitionTo(state: State): Boolean = false
18 |
19 | override def stepDown(term: Term) = {}
20 |
21 | override val term: Int = Int.MaxValue
22 |
23 | override def isLeader = false
24 |
25 | override def leaderAnnouncer: LeaderAnnouncer = throw new UnsupportedOperationException()
26 |
27 | override protected def membership: Membership = throw new UnsupportedOperationException()
28 |
29 | override protected def consensus: Consensus = throw new UnsupportedOperationException()
30 |
31 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = rejectInstallSnapshot()
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/stats/StateInfo.scala:
--------------------------------------------------------------------------------
1 | package ckite.stats
2 |
3 | class StateInfo
4 |
5 | case class LeaderInfo(leaderUptime: String, followers: Map[String, FollowerInfo]) extends StateInfo
6 |
7 | case class NonLeaderInfo(following: Option[String]) extends StateInfo
8 |
9 | case class FollowerInfo(lastHeartbeatACK: String, matchIndex: Int, nextIndex: Int)
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/stats/Stats.scala:
--------------------------------------------------------------------------------
1 | package ckite.stats
2 |
3 | import ckite.rpc.LogEntry
4 |
5 | case class Stats(consensus: ConsensusStats, log: LogStats)
6 |
7 | case class ConsensusStats(term: Int, state: String, stateInfo: StateInfo)
8 |
9 | case class LogStats(length: Long, commitIndex: Long, lastEntry: Option[LogEntry])
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/storage/MemoryStorage.scala:
--------------------------------------------------------------------------------
1 | package ckite.storage
2 |
3 | import java.util.concurrent.atomic.AtomicReference
4 |
5 | import ckite.rlog.{ Log, Snapshot, Storage, Vote }
6 | import ckite.rpc.LogEntry
7 | import ckite.util.Logging
8 |
9 | import scala.annotation.tailrec
10 | import scala.collection.concurrent.TrieMap
11 | import scala.concurrent.Future
12 |
13 | case class MemoryStorage() extends Storage {
14 |
15 | private val volatileLog = new MemoryLog()
16 | private val latestSnapshot = new AtomicReference[Option[Snapshot]](None)
17 | private val latestVote = new AtomicReference[Option[Vote]](None)
18 |
19 | override def log(): Log = volatileLog
20 |
21 | override def retrieveLatestSnapshot(): Option[Snapshot] = latestSnapshot.get()
22 |
23 | override def saveVote(vote: Vote): Unit = latestVote.set(Some(vote))
24 |
25 | override def saveSnapshot(snapshot: Snapshot): Unit = latestSnapshot.set(Some(snapshot))
26 |
27 | override def retrieveLatestVote(): Option[Vote] = latestVote.get()
28 | }
29 |
30 | class MemoryLog extends Log with Logging {
31 |
32 | val map = TrieMap[Long, LogEntry]()
33 |
34 | override def append(entry: LogEntry): Future[Unit] = {
35 | map.put(entry.index, entry)
36 | Future.successful(())
37 | }
38 |
39 | override def rollLog(upToIndex: Long): Unit = {
40 | (1L to upToIndex) foreach { index ⇒
41 | logger.info(s"Removing entry #${index}")
42 | map.remove(index)
43 | }
44 | }
45 |
46 | override def size(): Long = map.size
47 |
48 | override def getEntry(index: Long): LogEntry = map.get(index).getOrElse(null)
49 |
50 | override def discardEntriesFrom(index: Long): Unit = {
51 | discardEntriesFromRecursive(index)
52 | }
53 |
54 | @tailrec
55 | private def discardEntriesFromRecursive(index: Long): Unit = {
56 | if (map.remove(index) != null) discardEntriesFromRecursive(index + 1)
57 | }
58 |
59 | override def close(): Unit = {}
60 |
61 | override def getLastIndex(): Long = {
62 | if (size() > 0) map.keys.toSeq.sorted.last else 0
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/ConcurrencySupport.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import java.util.concurrent.{ Executors, ScheduledExecutorService }
4 |
5 | trait ConcurrencySupport {
6 |
7 | def scheduler(name: String): ScheduledExecutorService = {
8 | Executors.newScheduledThreadPool(1, CustomThreadFactory(name))
9 | }
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/Conversions.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import java.util.concurrent.Callable
4 |
5 | object CKiteConversions {
6 |
7 | implicit def fromFunctionToRunnable(f: () ⇒ Any): Runnable = new Runnable() {
8 | override def run() = {
9 | f()
10 | }
11 | }
12 |
13 | implicit def fromFunctionToCallable[V](f: () ⇒ V): Callable[V] = new Callable[V]() {
14 | override def call() = {
15 | f()
16 | }
17 | }
18 |
19 | def task(taskName: String)(block: () ⇒ Any): Runnable = {
20 | new Runnable() {
21 | override def run() = {
22 | val currentThreadName = Thread.currentThread().getName
23 | Thread.currentThread().setName(s"$currentThreadName-$taskName")
24 | try {
25 | block()
26 | } finally {
27 | Thread.currentThread().setName(currentThreadName)
28 | }
29 | }
30 | }
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/CustomThreadFactory.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import java.util.concurrent.ThreadFactory
4 | import java.util.concurrent.atomic.AtomicInteger
5 |
6 | case class CustomThreadFactory(name: String, makeDaemons: Boolean = true) extends ThreadFactory {
7 | def this(name: String) = this(name, false)
8 |
9 | val group = new ThreadGroup(Thread.currentThread().getThreadGroup(), name)
10 | val threadNumber = new AtomicInteger(1)
11 |
12 | def newThread(r: Runnable) = {
13 | val thread = new Thread(group, r, name + "-" + threadNumber.getAndIncrement())
14 | thread.setDaemon(makeDaemons)
15 | if (thread.getPriority != Thread.NORM_PRIORITY) {
16 | thread.setPriority(Thread.NORM_PRIORITY)
17 | }
18 | thread
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/LockSupport.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import java.util.concurrent.locks.ReentrantReadWriteLock
4 |
5 | trait LockSupport {
6 |
7 | private val lock = new ReentrantReadWriteLock()
8 | private val exclusiveLock = lock.writeLock()
9 | private val sharedLock = lock.readLock()
10 |
11 | def shared[T](block: ⇒ T): T = {
12 | sharedLock.lock()
13 | try {
14 | block
15 | } finally {
16 | sharedLock.unlock()
17 | }
18 | }
19 |
20 | def exclusive[T](block: ⇒ T): T = {
21 | exclusiveLock.lock()
22 | try {
23 | block
24 | } finally {
25 | exclusiveLock.unlock()
26 | }
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/Logging.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import org.slf4j.LoggerFactory
4 |
5 | trait Logging {
6 |
7 | val logger = LoggerFactory.getLogger(this.getClass())
8 |
9 | def loggingErrors[T](f: ⇒ T) = {
10 | try {
11 | f
12 | } catch {
13 | case e: Exception ⇒ logger.error("Error", e); throw e
14 | }
15 | }
16 |
17 | }
--------------------------------------------------------------------------------
/ckite-core/src/main/scala/ckite/util/Serializer.scala:
--------------------------------------------------------------------------------
1 | package ckite.util
2 |
3 | import com.twitter.chill.ScalaKryoInstantiator
4 |
5 | object Serializer {
6 | def serialize[T](anObject: T): Array[Byte] = KryoSerializer.serialize(anObject)
7 | def deserialize[T](bytes: Array[Byte]): T = KryoSerializer.deserialize(bytes)
8 | }
9 |
10 | object KryoSerializer {
11 | private val kryoPool = ScalaKryoInstantiator.defaultPool
12 |
13 | def serialize[T](anObject: T): Array[Byte] = kryoPool.toBytesWithClass(anObject)
14 | def deserialize[T](bytes: Array[Byte]): T = kryoPool.fromBytes(bytes).asInstanceOf[T]
15 | }
--------------------------------------------------------------------------------
/ckite-core/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | %d{HH:mm:ss.SSS} %-5level [%thread] %logger{36} - %msg%n
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/CKiteIntegrationTest.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.util.concurrent.TimeoutException
4 |
5 | import ckite.example.{ Get, KVStore, Put }
6 | import ckite.exception.LostLeadershipException
7 | import ckite.storage.MemoryStorage
8 | import ckite.util.Logging
9 | import org.scalatest._
10 |
11 | import scala.concurrent.{ Await, Future }
12 | import scala.concurrent.duration._
13 |
14 | class CKiteIntegrationTest extends FlatSpec with Matchers with Logging {
15 |
16 | val Key1 = "key1"
17 | val Value1 = "value1"
18 | val Value2 = "value2"
19 |
20 | val BOOTSTRAP = true
21 |
22 | val Member1Address = "localhost:9091"
23 | val Member2Address = "localhost:9092"
24 | val Member3Address = "localhost:9093"
25 | val Member4Address = "localhost:9094"
26 |
27 | "A single member cluster" should "elect a Leader" in {
28 | val ckite = CKiteBuilder().listenAddress(Member1Address)
29 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).storage(MemoryStorage()).rpc(TestRpc).build.asInstanceOf[CKiteClient]
30 | ckite start
31 |
32 | ckite.isLeader should be
33 |
34 | ckite stop
35 | }
36 |
37 | it should "read committed writes" in {
38 | val ckite = CKiteBuilder().listenAddress(Member1Address)
39 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build
40 | ckite start
41 |
42 | await(ckite.write(Put(Key1, Value1)))
43 |
44 | val readValue = await(ckite.read(Get(Key1)))
45 |
46 | readValue should be(Value1)
47 |
48 | ckite stop
49 | }
50 |
51 | it should "compact a log & reload snapshot" in {
52 | val ckite = CKiteBuilder().listenAddress(Member1Address)
53 | .compactionThreshold(5 + 1) //5 writes + 1 NoOp
54 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build
55 | ckite start
56 |
57 | await(ckite.write(Put("key1", "value1")))
58 | await(ckite.write(Put("key2", "value2")))
59 | await(ckite.write(Put("key3", "value3")))
60 | await(ckite.write(Put("key4", "value4")))
61 | await(ckite.write(Put("key5", "value5")))
62 |
63 | //log should be compacted at this point
64 |
65 | await(ckite.write(Put("key6", "value6")))
66 |
67 | waitSomeTimeForElection
68 |
69 | ckite stop
70 |
71 | val ckiteRestarted = restart(ckite)
72 |
73 | await(ckiteRestarted.read(Get("key1"))) should be("value1")
74 | await(ckiteRestarted.read(Get("key2"))) should be("value2")
75 | await(ckiteRestarted.read(Get("key3"))) should be("value3")
76 | await(ckiteRestarted.read(Get("key4"))) should be("value4")
77 | await(ckiteRestarted.read(Get("key5"))) should be("value5")
78 |
79 | ckiteRestarted.stop
80 | }
81 |
82 | it should "restore latest cluster configuration from Log" in {
83 | val ckite = CKiteBuilder().listenAddress(Member1Address)
84 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build
85 | ckite start
86 |
87 | //It is expected to timeout since Member2 is not up and the configuration must to committed under the new configuration (member1 and member2)
88 | //TODO: What if two subsequent JointConfiguration ???
89 | intercept[TimeoutException] {
90 | await(ckite.addMember(Member2Address))
91 | }
92 |
93 | ckite stop
94 |
95 | val ckiteRestarted = restart(ckite)
96 |
97 | val members = ckiteRestarted.members
98 |
99 | members should contain(Member2Address)
100 |
101 | ckiteRestarted.stop
102 | }
103 |
104 | it should "restore latest cluster configuration from Snapshot" in {
105 | val ckite = CKiteBuilder().listenAddress(Member1Address)
106 | .compactionThreshold(2 + 1) //1 writes + 1 NoOp
107 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build
108 | ckite start
109 |
110 | //It is expected to timeout since 9092 is not up and the configuration need to committed under the new configuration (9091 and 9092)
111 | //TODO: What if two subsequent EnterJointConsensus ???
112 | intercept[TimeoutException] {
113 | await(ckite.addMember(Member2Address))
114 | }
115 |
116 | //This will force the Snapshot. Again, it is expected to timeout.
117 | intercept[TimeoutException] {
118 | await(ckite.write(Put(Key1, Value1)))
119 | }
120 |
121 | waitSomeTimeForAppendEntries
122 |
123 | ckite.stop
124 |
125 | val ckiteRestarted = restart(ckite)
126 |
127 | val members = ckiteRestarted.members
128 |
129 | members should contain(Member2Address)
130 |
131 | ckiteRestarted.stop
132 | }
133 |
134 | "A 3 member cluster" should "elect a single Leader" in withStartedThreeMemberCluster { members ⇒
135 | val leader = members leader
136 | val followers = members followers
137 |
138 | leader should not be null
139 | followers.length should be(2)
140 | }
141 |
142 | it should "failover Leader" in withStartedThreeMemberCluster { members ⇒
143 | val originalLeader = members leader
144 | val followers = members followers
145 |
146 | originalLeader stop
147 |
148 | waitSomeTimeForElection
149 |
150 | //a leader must be elected from the followers
151 | val newLeader = followers leader
152 |
153 | newLeader should not be null
154 | newLeader should not be originalLeader
155 | }
156 |
157 | it should "read committed writes" in withStartedThreeMemberCluster { members ⇒
158 |
159 | val leader = members leader
160 |
161 | await(leader.write(Put(Key1, Value1)))
162 |
163 | members foreach { member ⇒
164 | await(member.read(Get(Key1))) should be(Value1)
165 | }
166 |
167 | }
168 |
169 | it should "forward writes to the Leader" in withStartedThreeMemberCluster { members ⇒
170 |
171 | val someFollower = (members followers) head
172 |
173 | //this write is forwarded to the Leader
174 | await(someFollower.write(Put(Key1, Value1)))
175 |
176 | members foreach { member ⇒
177 | await(member.read(Get(Key1))) should be(Value1)
178 | }
179 | }
180 |
181 | it should "maintain quorum when 1 member goes down" in withStartedThreeMemberCluster { members ⇒
182 |
183 | val someFollower = (members followers) head
184 |
185 | //a member goes down
186 | someFollower.stop
187 |
188 | val leader = members leader
189 |
190 | //leader still have quorum. this write is going to be committed
191 | await(leader.write(Put(Key1, Value1)))
192 |
193 | (members diff Seq(someFollower)) foreach { member ⇒
194 | await(member.read(Get(Key1))) should be(Value1)
195 | }
196 | }
197 |
198 | it should "loose quorum when 2 members goes down" in withStartedThreeMemberCluster { members ⇒
199 |
200 | val leader = members leader
201 |
202 | //all the followers goes down
203 | (members followers) foreach {
204 | _.stop
205 | }
206 |
207 | //leader no longer have quorum. this write is going to be rejected
208 | intercept[TimeoutException] {
209 | await(leader.write(Put(Key1, Value1)))
210 | }
211 | }
212 |
213 | it should "forward join on restarted member" in withStartedThreeMemberCluster { members ⇒
214 |
215 | val leader = members leader
216 |
217 | //all the followers goes down
218 | val follower = members.followers.head
219 |
220 | follower.stop()
221 |
222 | val seeds = Set(Member1Address, Member2Address, Member3Address) - id(leader) - (id(follower))
223 | builder(follower).storage(MemoryStorage()).members(seeds.toSeq)
224 |
225 | restart(follower)
226 |
227 | waitSomeTimeForAppendEntries()
228 | }
229 |
230 | it should "replicate missing commands on restarted member" in {
231 |
232 | val member1 = CKiteBuilder().listenAddress(Member1Address)
233 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build
234 |
235 | val member2 = CKiteBuilder().listenAddress(Member2Address).members(Seq(Member1Address, Member3Address))
236 | .minElectionTimeout(1000).maxElectionTimeout(1000)
237 | .stateMachine(new KVStore()).rpc(TestRpc).build
238 |
239 | val member3 = CKiteBuilder().listenAddress(Member3Address).members(Seq(Member2Address, Member1Address))
240 | .minElectionTimeout(2000).maxElectionTimeout(2000)
241 | .stateMachine(new KVStore()).rpc(TestRpc).build
242 |
243 | val members = Seq(member1, member2, member3)
244 |
245 | members foreach {
246 | _ start
247 | }
248 |
249 | try {
250 |
251 | val leader = members leader
252 |
253 | //member3 goes down
254 | member3.stop
255 |
256 | //still having a quorum. This write is committed.
257 | await(leader.write(Put(Key1, Value1)))
258 |
259 | val seeds = Set(Member1Address, Member2Address, Member3Address) - id(leader) - (id(member3))
260 | builder(member3).storage(MemoryStorage()).members(seeds.toSeq)
261 |
262 | //member3 is back
263 | val restartedMember3 = restart(member3)
264 |
265 | //wait some time (> heartbeatsInterval) for missing appendEntries to arrive
266 | waitSomeTimeForAppendEntries
267 |
268 | //read from its local state machine to check if missing appendEntries have been replicated
269 | val readValue = await(restartedMember3.readLocal(Get(Key1)))
270 |
271 | readValue should be(Value1)
272 | restartedMember3.stop
273 | } finally {
274 | member1.stop
275 | member2.stop
276 | }
277 | }
278 |
279 | it should "add a new member" in withStartedThreeMemberCluster { members ⇒
280 |
281 | val leader = members leader
282 |
283 | await(leader.write(Put(Key1, Value1)))
284 |
285 | //add member4 to the cluster
286 | await(leader.addMember(Member4Address))
287 |
288 | val member4 = CKiteBuilder().listenAddress(Member4Address).members(Seq(Member2Address, Member1Address, Member3Address))
289 | .minElectionTimeout(2000).maxElectionTimeout(3000).stateMachine(new KVStore()).rpc(TestRpc).build.asInstanceOf[CKiteClient]
290 | //start member4
291 | member4.start
292 |
293 | //get value for k1. this is going to be forwarded to the Leader.
294 | val replicatedValue = await(member4.read(Get(Key1)))
295 | replicatedValue should be(Value1)
296 |
297 | //wait some time (> heartbeatsInterval) for missing appendEntries to arrive
298 | waitSomeTimeForAppendEntries
299 |
300 | //get value for Key1 from local
301 | val localValue = await(member4.readLocal(Get(Key1)))
302 |
303 | localValue should be(replicatedValue)
304 |
305 | member4.stop
306 | }
307 |
308 | it should "overwrite uncommitted entries on an old Leader" in withStartedThreeMemberCluster { members ⇒
309 |
310 | val leader = members leader
311 |
312 | val followers = (members followers)
313 |
314 | //stop the followers
315 | followers foreach {
316 | _.stop
317 | }
318 |
319 | //this two writes will timeout since no majority can be reached
320 | for (i ← (1 to 2)) {
321 | intercept[TimeoutException] {
322 | await(leader.write(Put(Key1, Value1)))
323 | }
324 | }
325 | //at this point the leader has two uncommitted entries
326 |
327 | //leader stops
328 | leader.stop
329 |
330 | //followers came back
331 | val rebuiltFollowers = followers map {
332 | restart(_)
333 | }
334 |
335 | val livemembers = rebuiltFollowers
336 |
337 | waitSomeTimeForElection
338 |
339 | //a new leader is elected
340 | val newleader = livemembers leader
341 |
342 | //old leader came back
343 | val oldleader = restart(leader)
344 |
345 | waitSomeTimeForAppendEntries
346 |
347 | //those two uncommitted entries of the oldleader must be overridden and removed by the new Leader as part of appendEntries
348 | await(newleader.read(Get(Key1))) should be(null)
349 |
350 | oldleader.stop
351 | rebuiltFollowers foreach {
352 | _.stop
353 | }
354 |
355 | }
356 |
357 | it should "avoid partioned leader stale reads" in withStartedThreeMemberCluster { members ⇒
358 |
359 | val oldLeader = members leader
360 |
361 | await(oldLeader.write(Put(Key1, Value1)))
362 |
363 | TestRpc.blockTraffic(id(oldLeader))
364 |
365 | waitSomeTimeForElection()
366 |
367 | val newLeader = members leader
368 |
369 | await(newLeader.write(Put(Key1, Value2)))
370 |
371 | await(newLeader.read(Get(Key1))) should be(Value2)
372 |
373 | intercept[LostLeadershipException] {
374 | await(oldLeader.read(Get(Key1)))
375 | }
376 |
377 | }
378 |
379 | implicit def membersSequence(members: Seq[CKite]): CKiteSequence = {
380 | new CKiteSequence(members)
381 | }
382 |
383 | class CKiteSequence(members: Seq[CKite]) {
384 |
385 | def followers = members filterNot {
386 | _.asInstanceOf[CKiteClient].isLeader
387 | }
388 |
389 | def leader = {
390 | val leaders = (members diff followers)
391 | val theLeader = leaders.head
392 | withClue(s"Leader $theLeader is not unique") {
393 | leaders diff Seq(theLeader) should be('empty)
394 | }
395 | theLeader
396 | }
397 |
398 | }
399 |
400 | private def withThreeMemberCluster(block: Seq[CKite] ⇒ Any) = {
401 | //member1 has default election timeout (500ms - 700ms). It is intended to be the first to start an election and raise as the leader.
402 | val member1 = CKiteBuilder().listenAddress(Member1Address)
403 | .bootstrap(true)
404 | .stateMachine(new KVStore()).rpc(TestRpc).build
405 |
406 | val member2 = CKiteBuilder().listenAddress(Member2Address).members(Seq(Member1Address))
407 | .minElectionTimeout(1250).maxElectionTimeout(1500) //higher election timeout
408 |
409 | .stateMachine(new KVStore()).rpc(TestRpc).build
410 |
411 | val member3 = CKiteBuilder().listenAddress(Member3Address).members(Seq(Member2Address, Member1Address))
412 | .minElectionTimeout(1750).maxElectionTimeout(2000) //higher election timeout
413 | .stateMachine(new KVStore()).rpc(TestRpc).build
414 | val members = Seq(member1, member2, member3)
415 | try {
416 | block(members)
417 | } finally {
418 | members.foreach { member ⇒
419 | try { member.stop() } finally {}
420 | }
421 | }
422 | }
423 |
424 | private def withStartedThreeMemberCluster(test: Seq[CKite] ⇒ Any) = withThreeMemberCluster { members ⇒
425 | logger.info(s"Starting all the members")
426 | members.foreach(_.start())
427 |
428 | waitSomeTimeForElection
429 | try {
430 | logger.info(s"Running test...")
431 | test(members)
432 | } finally {
433 | logger.info(s"Stopping all the members")
434 | members foreach { member ⇒
435 | try { member stop }
436 | finally {
437 | TestRpc.unblockTraffic(id(member))
438 | }
439 | }
440 | }
441 | }
442 |
443 | private def builder(ckite: CKite) = ckite.asInstanceOf[CKiteClient].builder
444 |
445 | private def id(ckite: CKite): String = ckite.asInstanceOf[CKiteClient].id()
446 |
447 | private def restart(ckite: CKite): CKiteClient = {
448 | val clonedCKite = ckite.asInstanceOf[CKiteClient].builder.stateMachine(new KVStore).bootstrap(false).build.asInstanceOf[CKiteClient]
449 | clonedCKite.start()
450 | clonedCKite
451 | }
452 |
453 | private def waitSomeTimeForElection() = Thread.sleep(3000)
454 |
455 | private def waitSomeTimeForAppendEntries() = Thread.sleep(5000)
456 |
457 | private def await[T](future: Future[T]): T = {
458 | Await.result(future, 3 seconds)
459 | }
460 |
461 | }
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/SerializerTest.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import org.scalatest.Matchers
4 | import ckite.util.Logging
5 | import org.scalatest.junit.JUnitRunner
6 | import org.scalatest._
7 | import ckite.rpc.LogEntry
8 | import ckite.rpc.NoOp
9 | import ckite.util.Serializer
10 | import ckite.rpc.LogEntry
11 |
12 | class SerializerTest extends FlatSpec with Matchers with Logging {
13 |
14 | "a serializer" should "serialize and deserialize" in {
15 | val logEntry = LogEntry(1, 1, NoOp())
16 |
17 | val bytes = Serializer.serialize(logEntry)
18 |
19 | val deserialized: LogEntry = Serializer.deserialize(bytes)
20 | }
21 | }
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/TestRpc.scala:
--------------------------------------------------------------------------------
1 | package ckite
2 |
3 | import java.io.IOException
4 | import java.util.concurrent.ConcurrentHashMap
5 | import java.util.concurrent.atomic.AtomicBoolean
6 |
7 | import ckite.rpc._
8 | import com.typesafe.config.Config
9 |
10 | import scala.concurrent.Future
11 | import scala.util.Try
12 |
13 | object TestRpc extends Rpc {
14 |
15 | val servers = new ConcurrentHashMap[String, TestServer]()
16 |
17 | def server(binding: String): Raft = {
18 | val server = servers.get(binding)
19 | if (server == null || server.isStopped() || server.isBlocked) {
20 | throw new IOException("Connection refused")
21 | }
22 | server.cluster
23 | }
24 |
25 | def blockTraffic(binding: String) = {
26 | servers.get(binding).block()
27 | }
28 |
29 | def unblockTraffic(binding: String) = {
30 | servers.get(binding).unblock()
31 | }
32 |
33 | def isBlocked(binding: String) = servers.get(binding).isBlocked
34 |
35 | override def createServer(service: RpcService, config: Config): RpcServer = {
36 | val testServer: TestServer = new TestServer(service.asInstanceOf[Raft])
37 | servers.put(service.asInstanceOf[Raft].membership.myId, testServer)
38 | testServer
39 | }
40 |
41 | override def createClient(binding: String): RpcClient = new TestClient(binding)
42 | }
43 |
44 | class TestServer(val cluster: Raft) extends RpcServer {
45 | val stopped = new AtomicBoolean()
46 | val blocked = new AtomicBoolean()
47 |
48 | override def start(): Unit = {
49 | stopped.set(false)
50 | }
51 |
52 | override def stop(): Unit = {
53 |
54 | stopped.set(true)
55 | }
56 |
57 | def block() = {
58 | blocked.set(true)
59 | }
60 |
61 | def unblock() = {
62 | blocked.set(false)
63 | }
64 |
65 | def isStopped() = stopped.get()
66 |
67 | def isBlocked = blocked.get()
68 |
69 | }
70 |
71 | class TestClient(binding: String) extends RpcClient {
72 | override def send(request: RequestVote): Future[RequestVoteResponse] = ioTry {
73 | if (TestRpc.isBlocked(request.memberId)) {
74 | throw new IOException("Connection refused")
75 | }
76 | TestRpc.server(binding).onRequestVoteReceived(request)
77 | }
78 |
79 | override def send(appendEntries: AppendEntries): Future[AppendEntriesResponse] = ioTry {
80 | if (TestRpc.isBlocked(appendEntries.leaderId)) {
81 | throw new IOException("Connection refused")
82 | }
83 | TestRpc.server(binding).onAppendEntriesReceived(appendEntries)
84 | }
85 |
86 | override def send(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = ioTry {
87 | if (TestRpc.isBlocked(installSnapshot.leaderId)) {
88 | throw new IOException("Connection refused")
89 | }
90 | TestRpc.server(binding).onInstallSnapshotReceived(installSnapshot)
91 | }
92 |
93 | override def send[T](command: Command): Future[T] = ioTry {
94 | TestRpc.server(binding).onCommandReceived(command)
95 | }
96 |
97 | override def send(joinMember: JoinMember): Future[JoinMemberResponse] = ioTry {
98 | if (TestRpc.isBlocked(joinMember.memberId)) {
99 | throw new IOException("Connection refused")
100 | }
101 | TestRpc.server(binding).onMemberJoinReceived(joinMember.memberId)
102 | }
103 |
104 | def ioTry[T](block: ⇒ Future[T]): Future[T] = {
105 | Try {
106 | block
107 | }.recover {
108 | case e: IOException ⇒ Future.failed(e)
109 | }.get
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/example/Get.scala:
--------------------------------------------------------------------------------
1 | package ckite.example
2 | import ckite.rpc.ReadCommand
3 |
4 | case class Get(key: String) extends ReadCommand[String]
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/example/KVStore.scala:
--------------------------------------------------------------------------------
1 | package ckite.example
2 |
3 | import java.nio.ByteBuffer
4 | import java.util.HashMap
5 |
6 | import ckite.statemachine.StateMachine
7 | import ckite.util.{ Logging, Serializer }
8 |
9 | class KVStore extends StateMachine with Logging {
10 |
11 | private var map = new HashMap[String, String]()
12 | private var lastIndex: Long = 0
13 |
14 | def applyWrite = {
15 | case (index, Put(key: String, value: String)) ⇒ {
16 | logger.debug(s"Put $key=$value")
17 | map.put(key, value)
18 | lastIndex = index
19 | value
20 | }
21 | }
22 |
23 | def applyRead = {
24 | case Get(key) ⇒ {
25 | logger.debug(s"Get $key")
26 | map.get(key)
27 | }
28 | }
29 |
30 | def getLastAppliedIndex: Long = lastIndex
31 |
32 | def restoreSnapshot(byteBuffer: ByteBuffer) = {
33 | map = Serializer.deserialize(byteBuffer.array())
34 | }
35 |
36 | def takeSnapshot(): ByteBuffer = {
37 | ByteBuffer.wrap(Serializer.serialize(map))
38 | }
39 |
40 | }
--------------------------------------------------------------------------------
/ckite-core/src/test/scala/ckite/example/Put.scala:
--------------------------------------------------------------------------------
1 | package ckite.example
2 |
3 | import ckite.rpc.WriteCommand
4 |
5 | case class Put(key: String, value: String) extends WriteCommand[String]
--------------------------------------------------------------------------------
/ckite-finagle/src/main/resources/reference.conf:
--------------------------------------------------------------------------------
1 | ckite {
2 | finagle {
3 | thrift {
4 | # Workers handling incoming requests
5 | workers = 4
6 | }
7 | }
8 | }
--------------------------------------------------------------------------------
/ckite-finagle/src/main/scala/ckite/rpc/FinagleThriftRpc.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc
2 |
3 | import ckite.rpc.thrift.{ FinagleThriftClient, FinagleThriftServer }
4 | import com.typesafe.config.Config
5 |
6 | object FinagleThriftRpc extends Rpc {
7 |
8 | override def createServer(rpcService: RpcService, config: Config): RpcServer = FinagleThriftServer(rpcService, config)
9 |
10 | override def createClient(address: String): RpcClient = FinagleThriftClient(address)
11 |
12 | }
--------------------------------------------------------------------------------
/ckite-finagle/src/main/scala/ckite/rpc/thrift/FinagleThriftClient.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc.thrift
2 |
3 | import java.util.concurrent.TimeUnit
4 |
5 | import ckite.rpc._
6 | import ckite.rpc.thrift.ThriftConverters._
7 | import ckite.util.Logging
8 | import com.twitter.finagle.builder.ClientBuilder
9 | import com.twitter.finagle.service.RetryPolicy
10 | import com.twitter.finagle.thrift.ThriftClientFramedCodec
11 | import com.twitter.util.{ Duration, Future }
12 |
13 | import scala.concurrent.{ Promise, Future ⇒ ScalaFuture }
14 |
15 | case class FinagleThriftClient(binding: String) extends RpcClient with Logging {
16 |
17 | val client = new CKiteService.FinagledClient(ClientBuilder().hosts(binding)
18 | .retryPolicy(NoRetry).codec(ThriftClientFramedCodec()).failFast(false)
19 | .hostConnectionLimit(10).hostConnectionCoresize(1).requestTimeout(Duration(60, TimeUnit.SECONDS)).build())
20 |
21 | override def send(request: RequestVote): ScalaFuture[RequestVoteResponse] = {
22 | logger.debug(s"Sending $request to $binding")
23 | val f = client.sendRequestVote(request)
24 | val promise = Promise[RequestVoteResponse]()
25 | f.onSuccess(value ⇒ promise.success(value))
26 | f.onFailure(e ⇒ promise.failure(e))
27 | promise.future
28 | }
29 |
30 | override def send(appendEntries: AppendEntries): ScalaFuture[AppendEntriesResponse] = {
31 | logger.trace(s"Sending $appendEntries to $binding")
32 | val f = client.sendAppendEntries(appendEntries)
33 | val promise = Promise[AppendEntriesResponse]()
34 | f.onSuccess(value ⇒ promise.success(value))
35 | f.onFailure(e ⇒ promise.failure(e))
36 | promise.future
37 | }
38 |
39 | override def send[T](command: Command): ScalaFuture[T] = {
40 | val future = client.sendCommand(command)
41 | val promise = Promise[T]()
42 | future.onSuccess(value ⇒ promise.success(value))
43 | future.onFailure(e ⇒ promise.failure(e))
44 | promise.future
45 | }
46 |
47 | override def send(installSnapshot: InstallSnapshot): ScalaFuture[InstallSnapshotResponse] = {
48 | val future = client.sendInstallSnapshot(installSnapshot)
49 | val promise = Promise[InstallSnapshotResponse]()
50 | future.onSuccess(value ⇒ promise.success(value))
51 | future.onFailure(e ⇒ promise.failure(e))
52 | promise.future
53 | }
54 |
55 | override def send(joinRequest: JoinMember): ScalaFuture[JoinMemberResponse] = {
56 | val future = client.sendJoinMember(joinRequest)
57 | val promise = Promise[JoinMemberResponse]()
58 | future.onSuccess(value ⇒ promise.success(value))
59 | future.onFailure(e ⇒ promise.failure(e))
60 | promise.future
61 | }
62 |
63 | private implicit def toScalaFuture[T](twitterFuture: Future[T]): ScalaFuture[T] = {
64 | val promise = Promise[T]()
65 | twitterFuture.onSuccess(value ⇒ promise.success(value))
66 | twitterFuture.onFailure(e ⇒ promise.failure(e))
67 | promise.future
68 | }
69 |
70 | }
71 |
72 | object NoRetry extends RetryPolicy[com.twitter.util.Try[Nothing]] {
73 | def apply(e: com.twitter.util.Try[Nothing]) = {
74 | None
75 | }
76 | }
--------------------------------------------------------------------------------
/ckite-finagle/src/main/scala/ckite/rpc/thrift/FinagleThriftServer.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc.thrift
2 |
3 | import java.nio.ByteBuffer
4 | import java.util.concurrent.{ SynchronousQueue, ThreadPoolExecutor, TimeUnit }
5 |
6 | import ckite.rpc.thrift.ThriftConverters._
7 | import ckite.rpc.{ RpcServer, RpcService }
8 | import ckite.util.CustomThreadFactory
9 | import com.twitter.finagle.{ ListeningServer, Thrift }
10 | import com.twitter.util.{ Future, FuturePool, Promise }
11 | import com.typesafe.config.Config
12 | import org.apache.thrift.protocol.TBinaryProtocol
13 |
14 | import scala.concurrent.ExecutionContext.Implicits.global
15 | import scala.concurrent.{ Future ⇒ ScalaFuture }
16 | import scala.util.{ Failure, Success }
17 |
18 | case class FinagleThriftServer(rpcService: RpcService, config: Config) extends RpcServer {
19 | var closed = false
20 | var finagleServer: ListeningServer = _
21 |
22 | def start() = {
23 | val localPort = config.getString("ckite.listen-address").split(":")(1)
24 | finagleServer = Thrift.serve(s":$localPort", ckiteService)
25 | }
26 |
27 | implicit def toTwitterFuture[T](scalaFuture: ScalaFuture[T]): Future[T] = {
28 | val promise = Promise[T]
29 | scalaFuture.onComplete {
30 | case Success(value) ⇒ promise.setValue(value)
31 | case Failure(t) ⇒ promise.raise(t)
32 | }
33 | promise
34 | }
35 |
36 | def ckiteService = {
37 | val ckiteService = new CKiteService[Future]() {
38 |
39 | override def sendRequestVote(requestVote: RequestVoteST): Future[RequestVoteResponseST] = {
40 | rpcService.onRequestVoteReceived(requestVote).map[RequestVoteResponseST](r ⇒ r)
41 | }
42 |
43 | override def sendAppendEntries(appendEntries: AppendEntriesST): Future[AppendEntriesResponseST] = {
44 | rpcService.onAppendEntriesReceived(appendEntries).map[AppendEntriesResponseST](r ⇒ r)
45 | }
46 |
47 | override def sendCommand(bb: ByteBuffer): Future[ByteBuffer] = {
48 | rpcService.onCommandReceived[Any](bb).map[ByteBuffer](r ⇒ r)
49 | }
50 |
51 | override def sendJoinMember(joinRequest: JoinMemberST): Future[JoinMemberResponseST] = {
52 | rpcService.onMemberJoinReceived(joinRequest._1).map[JoinMemberResponseST](r ⇒ r)
53 | }
54 |
55 | override def sendInstallSnapshot(installSnapshot: InstallSnapshotST) = {
56 | rpcService.onInstallSnapshotReceived(installSnapshot).map[InstallSnapshotResponseST](r ⇒ r)
57 | }
58 | }
59 |
60 | new CKiteService$FinagleService(ckiteService, new TBinaryProtocol.Factory())
61 | }
62 |
63 | def stop() = synchronized {
64 | if (!closed) {
65 | futurePool.executor.shutdownNow()
66 | finagleServer.close()
67 | closed = true
68 | }
69 | }
70 |
71 | val futurePool = FuturePool(new ThreadPoolExecutor(0, config.getInt("ckite.finagle.thrift.workers"),
72 | 15L, TimeUnit.SECONDS,
73 | new SynchronousQueue[Runnable](),
74 | CustomThreadFactory("Thrift-worker", true)))
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/ckite-finagle/src/main/scala/ckite/rpc/thrift/ThriftConverters.scala:
--------------------------------------------------------------------------------
1 | package ckite.rpc.thrift
2 |
3 | import java.nio.ByteBuffer
4 |
5 | import ckite.rlog.Snapshot
6 | import ckite.rpc._
7 | import ckite.util.Logging
8 | import ckite.util.Serializer
9 |
10 | object ThriftConverters extends Logging {
11 |
12 | implicit def requestVoteToThrift(request: RequestVote): RequestVoteST = {
13 | RequestVoteST(request.memberId, request.term, request.lastLogIndex, request.lastLogTerm)
14 | }
15 |
16 | implicit def requestVoteFromThrift(requestVote: RequestVoteST): RequestVote = {
17 | RequestVote(requestVote.memberId, requestVote.term, requestVote.lastLogIndex, requestVote.lastLogTerm)
18 | }
19 |
20 | implicit def appendEntriesToThrift(request: AppendEntries): AppendEntriesST = {
21 | val entries: Seq[LogEntryST] = request.entries.map(entry ⇒ logEntryToThrift(entry)).toSeq
22 | AppendEntriesST(request.term, request.leaderId, request.commitIndex, request.prevLogIndex, request.prevLogTerm, Some(entries))
23 | }
24 |
25 | implicit def appendEntriesFromThrift(request: AppendEntriesST): AppendEntries = {
26 | val entries = request.entries.get.map(entry ⇒ logEntryFromThrift(entry)).toList
27 | AppendEntries(request.term, request.leaderId, request.commitIndex, request.prevLogIndex, request.prevLogTerm, entries)
28 | }
29 |
30 | implicit def requestVoteResponseToThrift(response: RequestVoteResponse): RequestVoteResponseST = {
31 | RequestVoteResponseST(response.currentTerm, response.granted)
32 | }
33 |
34 | implicit def requestVoteResponseFromThrift(response: RequestVoteResponseST): RequestVoteResponse = {
35 | RequestVoteResponse(response.currentTerm, response.granted)
36 | }
37 |
38 | implicit def appendEntriesResponseFromThrift(response: AppendEntriesResponseST): AppendEntriesResponse = {
39 | AppendEntriesResponse(response.term, response.success)
40 | }
41 |
42 | implicit def appendEntriesResponseToThrift(response: AppendEntriesResponse): AppendEntriesResponseST = {
43 | AppendEntriesResponseST(response.term, response.success)
44 | }
45 |
46 | implicit def logEntryToThrift(entry: LogEntry): LogEntryST = {
47 | LogEntryST(entry.term, entry.index, entry.command)
48 | }
49 |
50 | implicit def logEntryFromThrift(entry: LogEntryST): LogEntry = {
51 | LogEntry(entry.term, entry.index, entry.command)
52 | }
53 |
54 | implicit def anyToThrift[T](command: T): ByteBuffer = {
55 | val bb = ByteBuffer.wrap(Serializer.serialize(command))
56 | bb
57 | }
58 |
59 | implicit def anyFromThrift[T](byteBuffer: ByteBuffer): T = {
60 | val remaining = byteBuffer.remaining()
61 | val bytes = new Array[Byte](remaining)
62 | byteBuffer.get(bytes)
63 | val c = Serializer.deserialize[T](bytes)
64 | c
65 | }
66 |
67 | implicit def snapshotToThrift(snapshot: Snapshot): SnapshotST = {
68 | val bb2: ByteBuffer = snapshot.clusterConfiguration
69 | val bb: ByteBuffer = snapshot.stateMachineSerialized
70 | SnapshotST(bb, snapshot.index, snapshot.term, bb2)
71 | }
72 |
73 | implicit def snapshotFromThrift(snapshotST: SnapshotST): Snapshot = {
74 | Snapshot(snapshotST.lastLogEntryTerm, snapshotST.lastLogEntryIndex, snapshotST.membershipState, snapshotST.stateMachineState)
75 | }
76 |
77 | implicit def installSnapshotToThrift(installSnapshot: InstallSnapshot): InstallSnapshotST = {
78 | InstallSnapshotST(installSnapshot.term, installSnapshot.leaderId, installSnapshot.snapshot)
79 | }
80 |
81 | implicit def installSnapshotFromThrift(installSnapshotST: InstallSnapshotST): InstallSnapshot = {
82 | InstallSnapshot(installSnapshotST.term, installSnapshotST.leaderId, installSnapshotST.snapshot)
83 | }
84 |
85 | implicit def installSnapshotResponseFromThrift(installSnapshotResponseST: InstallSnapshotResponseST): InstallSnapshotResponse = {
86 | InstallSnapshotResponse(installSnapshotResponseST.success)
87 | }
88 |
89 | implicit def installSnapshotResponseToThrift(installSnapshotResponse: InstallSnapshotResponse): InstallSnapshotResponseST = {
90 | InstallSnapshotResponseST(installSnapshotResponse.success)
91 | }
92 |
93 | implicit def joinMemberToThrift(joinRequest: JoinMember): JoinMemberST = {
94 | JoinMemberST(joinRequest.memberId)
95 | }
96 |
97 | implicit def joinMemberResponseToThrift(joinResponse: JoinMemberResponse): JoinMemberResponseST = {
98 | JoinMemberResponseST(joinResponse.success)
99 | }
100 |
101 | implicit def joinMemberResponseFromThrift(joinResponse: JoinMemberResponseST): JoinMemberResponse = {
102 | JoinMemberResponse(joinResponse.success)
103 | }
104 |
105 | }
--------------------------------------------------------------------------------
/ckite-finagle/src/main/thrift/ckite/rpc/thrift/ckite.thrift:
--------------------------------------------------------------------------------
1 | namespace java ckite.rpc.thrift
2 |
3 | struct LogEntryST {
4 | 1: required i32 term;
5 | 2: required i64 index;
6 | 3: required binary command;
7 | }
8 |
9 | struct AppendEntriesST {
10 | 1: required i32 term;
11 | 2: required string leaderId;
12 | 3: optional i64 commitIndex = -1;
13 | 4: optional i64 prevLogIndex = -1;
14 | 5: optional i32 prevLogTerm = -1;
15 | 6: optional list entries;
16 | }
17 |
18 | struct AppendEntriesResponseST {
19 | 1: required i32 term;
20 | 2: required bool success;
21 | }
22 |
23 | struct RequestVoteST {
24 | 1: required string memberId;
25 | 2: required i32 term;
26 | 3: optional i64 lastLogIndex = -1;
27 | 4: optional i32 lastLogTerm = -1;
28 | }
29 |
30 | struct RequestVoteResponseST {
31 | 1: required i32 currentTerm;
32 | 2: required bool granted;
33 | }
34 |
35 | struct SnapshotST {
36 | 1: required binary stateMachineState;
37 | 2: required i64 lastLogEntryIndex;
38 | 3: required i32 lastLogEntryTerm;
39 | 4: required binary membershipState;
40 | }
41 |
42 | struct InstallSnapshotST {
43 | 1: required i32 term;
44 | 2: required string leaderId;
45 | 3: required SnapshotST snapshot;
46 | }
47 |
48 | struct InstallSnapshotResponseST {
49 | 1: required bool success;
50 | }
51 |
52 | struct JoinMemberST {
53 | 1: required string memberId;
54 | }
55 |
56 | struct JoinMemberResponseST {
57 | 1: required bool success;
58 | }
59 |
60 | service CKiteService {
61 |
62 | RequestVoteResponseST sendRequestVote(1:RequestVoteST requestVote);
63 |
64 | AppendEntriesResponseST sendAppendEntries(1:AppendEntriesST appendEntries);
65 |
66 | binary sendCommand(1:binary command);
67 |
68 | InstallSnapshotResponseST sendInstallSnapshot(1:InstallSnapshotST installSnapshot);
69 |
70 | JoinMemberResponseST sendJoinMember(1:JoinMemberST memberId);
71 |
72 | }
--------------------------------------------------------------------------------
/ckite-mapdb/src/main/scala/ckite/mapdb/FileSupport.scala:
--------------------------------------------------------------------------------
1 | package ckite.mapdb
2 |
3 | import java.io.File
4 |
5 | trait FileSupport {
6 |
7 | protected def file(dataDir: String, fileName: String): File = {
8 | val dir = new File(dataDir)
9 | dir.mkdirs()
10 | val file = new File(dir, fileName)
11 | file
12 | }
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/ckite-mapdb/src/main/scala/ckite/mapdb/MapDBPersistentLog.scala:
--------------------------------------------------------------------------------
1 | package ckite.mapdb
2 |
3 | import java.util.concurrent.atomic.AtomicLong
4 |
5 | import ckite.rlog.Log
6 | import ckite.rpc.LogEntry
7 | import ckite.util.{ Logging, Serializer }
8 | import org.mapdb.DBMaker
9 |
10 | import scala.concurrent.Future
11 |
12 | case class MapDBPersistentLog(dataDir: String) extends Log with FileSupport with Logging {
13 |
14 | val logDB = DBMaker.newFileDB(file(dataDir, "ckite-mapdb-log")).mmapFileEnable().closeOnJvmShutdown().transactionDisable().cacheDisable().make()
15 |
16 | val entries = logDB.getTreeMap[Long, Array[Byte]]("logEntries")
17 | val cachedSize = new AtomicLong(if (entries.isEmpty) 0 else entries.size())
18 | val lastIndex = new AtomicLong(if (entries.isEmpty) -1 else entries.lastKey())
19 |
20 | def append(entry: LogEntry): Future[Unit] = Future.successful {
21 | entries.put(entry.index, Serializer.serialize(entry))
22 | cachedSize.incrementAndGet()
23 | lastIndex.set(entry.index)
24 | commit()
25 | }
26 |
27 | def getEntry(index: Long): LogEntry = {
28 | val bytes = entries.get(index)
29 | if (bytes != null) Serializer.deserialize(bytes) else null.asInstanceOf[LogEntry]
30 | }
31 |
32 | def rollLog(upToIndex: Long) = {
33 | val range = firstIndex to upToIndex
34 | logger.debug(s"Compacting ${range.size} LogEntries")
35 | range foreach { index ⇒ remove(index) }
36 | logger.debug(s"Finished compaction")
37 | }
38 |
39 | def getLastIndex: Long = lastIndex.longValue()
40 |
41 | def size = cachedSize.longValue()
42 |
43 | def discardEntriesFrom(index: Long) = {
44 | index to lastIndex.longValue() foreach { i ⇒
45 | remove(i)
46 | }
47 | lastIndex.set(index - 1)
48 | }
49 |
50 | def close() = logDB.close()
51 |
52 | private def commit() = logDB.commit()
53 |
54 | private def firstIndex: Long = if (!entries.isEmpty) entries.firstKey else 1
55 |
56 | private def remove(index: Long) = {
57 | if (index > 0) {
58 | entries.remove(index)
59 | cachedSize.decrementAndGet()
60 | }
61 | }
62 | }
--------------------------------------------------------------------------------
/ckite-mapdb/src/main/scala/ckite/mapdb/MapDBStorage.scala:
--------------------------------------------------------------------------------
1 | package ckite.mapdb
2 |
3 | import ckite.rlog._
4 | import ckite.util.Serializer
5 | import com.typesafe.config.ConfigFactory
6 | import org.mapdb.DBMaker
7 |
8 | class MapDBStorage(dataDirOption: Option[String] = None) extends Storage with FileSupport {
9 |
10 | private val config = ConfigFactory.load()
11 | private val dataDir = dataDirOption.getOrElse(config.getString("ckite.datadir"))
12 |
13 | private val logDir = s"$dataDir/log"
14 | private val snapshotsDir = s"$dataDir/snapshots"
15 | private val stateDir = s"$dataDir/state"
16 |
17 | override val log: Log = new MapDBPersistentLog(logDir)
18 |
19 | private val stateDB = DBMaker.newFileDB(file(stateDir, "ckite-mapdb-state")).make()
20 | private val voteTerm = stateDB.getAtomicInteger("term")
21 | private val voteMember = stateDB.getAtomicString("memberId")
22 |
23 | private val snapshotsDB = DBMaker.newFileDB(file(snapshotsDir, "ckite-mapdb-snapshots")).mmapFileEnable().make()
24 | private val snapshotsMap = snapshotsDB.getHashMap[String, Array[Byte]]("snapshotsMap")
25 |
26 | override def retrieveLatestSnapshot(): Option[Snapshot] = {
27 | Option(snapshotsMap.get("snapshot")).map(deserializeSnapshot)
28 | }
29 |
30 | override def saveVote(vote: Vote): Unit = {
31 | voteTerm.set(vote.term)
32 | voteMember.set(vote.member)
33 | stateDB.commit()
34 | }
35 |
36 | override def saveSnapshot(snapshot: Snapshot): Unit = {
37 | snapshotsMap.put("snapshot", serializeSnapshot(snapshot))
38 | snapshotsDB.commit()
39 | }
40 |
41 | private def serializeSnapshot(snapshot: Snapshot): Array[Byte] = Serializer.serialize(snapshot)
42 |
43 | private def deserializeSnapshot(bytes: Array[Byte]): Snapshot = Serializer.deserialize(bytes)
44 |
45 | override def retrieveLatestVote(): Option[Vote] = {
46 | val term = voteTerm.get()
47 | val member = voteMember.get()
48 | if (term == 0 && member.isEmpty) {
49 | None
50 | } else {
51 | Some(Vote(term, member))
52 | }
53 | }
54 |
55 | }
56 |
57 | object MapDBStorage {
58 | def apply(dataDir: String) = new MapDBStorage(Some(dataDir))
59 | def apply() = new MapDBStorage()
60 | }
61 |
--------------------------------------------------------------------------------
/ckite-mapdb/src/test/scala/ckite/mapdb/MapDBStorageTest.scala:
--------------------------------------------------------------------------------
1 | package ckite.mapdb
2 |
3 | import java.nio.ByteBuffer
4 |
5 | import ckite.SingleClusterConfiguration
6 | import ckite.rlog.{ Vote, Snapshot }
7 | import ckite.rpc.{ NoOp, LogEntry }
8 | import org.scalatest.{ Matchers, FlatSpec }
9 |
10 | import scala.concurrent.{ Await, Future }
11 | import scala.concurrent.duration._
12 | import scala.concurrent.ExecutionContext.Implicits.global
13 |
14 | class MapDBStorageTest extends FlatSpec with Matchers {
15 |
16 | "A MapDBStorage" should "store and retrieve snapshots" in {
17 | val mapdbStorage = MapDBStorage(dataDir)
18 | val snapshot = Snapshot(1, 1, SingleClusterConfiguration(Set("m1", "m2"), 1), ByteBuffer.wrap(Array[Byte](1)))
19 | mapdbStorage.saveSnapshot(snapshot)
20 |
21 | val someSnapshot = mapdbStorage.retrieveLatestSnapshot()
22 |
23 | someSnapshot shouldBe Some(snapshot)
24 | }
25 |
26 | it should "save and restore latest vote" in {
27 | val mapdbStorage = MapDBStorage(dataDir)
28 |
29 | val vote = Vote(1, "m1")
30 |
31 | mapdbStorage.saveVote(vote)
32 |
33 | mapdbStorage.retrieveLatestVote() shouldBe Some(vote)
34 | }
35 |
36 | "A MapDBStorage log" should "store and retrieve entries" in {
37 | val mapdbStorage = MapDBStorage(dataDir)
38 |
39 | mapdbStorage.log.discardEntriesFrom(1)
40 |
41 | val futures = (1 to 5) map { index ⇒
42 | mapdbStorage.log.append(LogEntry(1, index, NoOp()))
43 | }
44 |
45 | Await.ready(Future.sequence(futures), 3 seconds)
46 |
47 | mapdbStorage.log.size shouldEqual 5
48 | (1 to 5) foreach { index ⇒
49 | mapdbStorage.log.getEntry(index) shouldEqual LogEntry(1, index, NoOp())
50 | }
51 |
52 | }
53 |
54 | private def dataDir = s"/tmp/ckite/test-${System.currentTimeMillis()}"
55 | }
56 |
--------------------------------------------------------------------------------
/project/Build.scala:
--------------------------------------------------------------------------------
1 | import sbt.Keys._
2 | import sbt._
3 | import sbt.Defaults.itSettings
4 | import spray.revolver.RevolverPlugin._
5 |
6 | object CKite extends Build {
7 |
8 | import Dependencies._
9 | import Settings._
10 |
11 | lazy val ckite: Project = Project("ckite", file("."))
12 | .aggregate(ckiteCore, ckiteFinagle, ckiteMapDB)
13 | .settings(basicSettings: _*)
14 | .settings(sonatypeSettings: _*)
15 | .settings(formatSettings: _*)
16 | .settings(noPublishing: _*)
17 |
18 | lazy val ckiteCore: Project = Project("ckite-core", file("ckite-core"))
19 | .settings(basicSettings: _*)
20 | .settings(sonatypeSettings: _*)
21 | .settings(formatSettings: _*)
22 | .settings(libraryDependencies ++=
23 | compile(slf4j, config, chill) ++
24 | test(scalaTest, logback))
25 |
26 |
27 | lazy val ckiteFinagle: Project = Project("ckite-finagle", file("ckite-finagle"))
28 | .dependsOn(ckiteCore)
29 | .settings(basicSettings: _*)
30 | .settings(sonatypeSettings: _*)
31 | .settings(formatSettings: _*)
32 | .settings(libraryDependencies ++=
33 | compile(slf4j, scrooge, finagleCore, finagleThrift) ++
34 | test(scalaTest, logback, finagleHttp, jacksonAfterBurner, jacksonScala))
35 |
36 | lazy val ckiteMapDB: Project = Project("ckite-mapdb", file("ckite-mapdb"))
37 | .dependsOn(ckiteCore)
38 | .settings(basicSettings: _*)
39 | .settings(sonatypeSettings: _*)
40 | .settings(formatSettings: _*)
41 | .settings(libraryDependencies ++=
42 | compile(mapdb) ++
43 | test(scalaTest, logback))
44 |
45 | }
--------------------------------------------------------------------------------
/project/Dependencies.scala:
--------------------------------------------------------------------------------
1 | import sbt._
2 |
3 | object Dependencies {
4 |
5 | val finagleV = "6.43.0"
6 | val jacksonV = "2.4.4"
7 |
8 | val slf4j = "org.slf4j" % "slf4j-api" % "1.7.7"
9 | val scrooge = "com.twitter" %% "scrooge-core" % "4.15.0"
10 | val finagleCore = "com.twitter" %% "finagle-core" % finagleV exclude("com.twitter", "util-logging_2.11") exclude("com.twitter", "util-app_2.11")
11 | val finagleThrift = "com.twitter" %% "finagle-thrift" % finagleV
12 | val finagleHttp = "com.twitter" %% "finagle-http" % finagleV
13 | val config = "com.typesafe" % "config" % "1.0.2"
14 | val mapdb = "org.mapdb" % "mapdb" % "0.9.13"
15 | val chill = "com.twitter" %% "chill" % "0.9.3"
16 | val jacksonAfterBurner = "com.fasterxml.jackson.module" % "jackson-module-afterburner" % jacksonV
17 | val jacksonScala = "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonV
18 | val scalaTest = "org.scalatest" %% "scalatest" % "3.0.5"
19 | val logback = "ch.qos.logback" % "logback-classic" % "1.1.2"
20 | val thrift = "org.apache.thrift" % "libthrift" % "0.9.2"
21 |
22 | def compile(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "compile")
23 | def provided(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "provided")
24 | def test(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "test")
25 | def runtime(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "runtime")
26 | def it(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "it")
27 |
28 | }
--------------------------------------------------------------------------------
/project/Settings.scala:
--------------------------------------------------------------------------------
1 | import com.typesafe.sbteclipse.core.EclipsePlugin.{EclipseCreateSrc, EclipseKeys}
2 | import sbt._
3 | import Keys._
4 | import com.typesafe.sbt.SbtScalariform
5 | import com.typesafe.sbt.SbtScalariform.ScalariformKeys
6 | import scalariform.formatter.preferences._
7 |
8 | object Settings {
9 |
10 | val ScalaVersion = "2.11.12"
11 | val CrossScalaVersions = Seq("2.12.8","2.11.12")
12 |
13 | lazy val basicSettings = Seq(
14 | scalaVersion := ScalaVersion,
15 | crossScalaVersions := CrossScalaVersions,
16 | organization := "io.ckite",
17 | version := "0.2.2-SNAPSHOT",
18 | resolvers ++= Seq("twitter-repo" at "http://maven.twttr.com"),
19 | fork in(Test, run) := true,
20 | javacOptions := Seq(
21 | "-source", "1.8", "-target", "1.8"
22 | ),
23 | scalacOptions := Seq(
24 | "-encoding",
25 | "utf8",
26 | "-g:vars",
27 | "-feature",
28 | "-unchecked",
29 | "-optimise",
30 | "-deprecation",
31 | "-target:jvm-1.8",
32 | "-language:postfixOps",
33 | "-language:implicitConversions",
34 | "-language:reflectiveCalls",
35 | "-Xlog-reflective-calls"
36 | ))
37 |
38 | lazy val sonatypeSettings = Seq(
39 | publishMavenStyle := true,
40 | publishArtifact in Test := false,
41 | pomIncludeRepository := { x => false},
42 | crossPaths := false,
43 | publishTo := {
44 | val nexus = "https://oss.sonatype.org/"
45 | if (version.value.trim.endsWith("SNAPSHOT"))
46 | Some("snapshots" at nexus + "content/repositories/snapshots")
47 | else if (version.value.trim.endsWith("LOCAL"))
48 | Some(Resolver.file("file", new File(Path.userHome.absolutePath+"/.m2/repository")))
49 | else
50 | Some("releases" at nexus + "service/local/staging/deploy/maven2")
51 | },
52 | pomExtra := {
53 | http://ckite.io
54 |
55 |
56 | Apache 2
57 | http://www.apache.org/licenses/LICENSE-2.0.txt
58 | repo
59 |
60 |
61 |
62 | scm:git:github.com/pablosmedina/ckite.git
63 | scm:git:git@github.com:pablosmedina/ckite.git
64 | github.com/pablosmedina/ckite.git
65 |
66 |
67 |
68 | pmedina
69 | Pablo S. Medina
70 | https://twitter.com/pablosmedina
71 |
72 |
73 | }
74 | )
75 |
76 | lazy val formatSettings = SbtScalariform.scalariformSettings ++ Seq(
77 | ScalariformKeys.preferences in Compile := formattingPreferences,
78 | ScalariformKeys.preferences in Test := formattingPreferences
79 | )
80 |
81 | def formattingPreferences =
82 | FormattingPreferences()
83 | .setPreference(RewriteArrowSymbols, true)
84 | .setPreference(AlignParameters, false)
85 | .setPreference(AlignSingleLineCaseStatements, true)
86 | .setPreference(DoubleIndentClassDeclaration, true)
87 |
88 | lazy val eclipseSettings = Seq(EclipseKeys.configurations := Set(Compile, Test, IntegrationTest), EclipseKeys.createSrc := EclipseCreateSrc.Default + EclipseCreateSrc.Resource)
89 | lazy val itExtraSettings = Seq(
90 | parallelExecution in IntegrationTest := false
91 | )
92 |
93 | val noPublishing = Seq(publish :=(), publishLocal :=(), publishArtifact := false)
94 |
95 | }
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 0.13.18
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | resolvers += "twitter-repo" at "http://maven.twttr.com"
2 | resolvers += "JBoss" at "https://repository.jboss.org"
3 |
4 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0")
5 |
6 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0")
7 |
8 | addSbtPlugin("com.twitter" %% "scrooge-sbt-plugin" % "4.15.0")
9 |
10 | addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2")
11 |
12 | addSbtPlugin("com.typesafe.sbt" % "sbt-scalariform" % "1.3.0")
13 |
14 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.5")
15 |
--------------------------------------------------------------------------------