├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── backbuild ├── ckite-core └── src │ ├── main │ ├── resources │ │ └── reference.conf │ └── scala │ │ └── ckite │ │ ├── CKite.scala │ │ ├── CKiteBuilder.scala │ │ ├── CKiteClient.scala │ │ ├── ConfigAware.scala │ │ ├── ConfigSupport.scala │ │ ├── Configuration.scala │ │ ├── Consensus.scala │ │ ├── LeaderAnnouncer.scala │ │ ├── LocalMember.scala │ │ ├── Member.scala │ │ ├── Membership.scala │ │ ├── RLog.scala │ │ ├── Raft.scala │ │ ├── RemoteMember.scala │ │ ├── exception │ │ ├── LeaderTimeoutException.scala │ │ ├── LostLeadershipException.scala │ │ └── WriteTimeoutException.scala │ │ ├── rlog │ │ ├── FixedSizeLogCompactionPolicy.scala │ │ ├── Log.scala │ │ ├── LogAppender.scala │ │ ├── LogCompactionPolicy.scala │ │ ├── Snapshot.scala │ │ ├── SnapshotManager.scala │ │ └── Storage.scala │ │ ├── rpc │ │ ├── AppendEntries.scala │ │ ├── AppendEntriesResponse.scala │ │ ├── ClusterConfigurationCommand.scala │ │ ├── Command.scala │ │ ├── GetMembersRequest.scala │ │ ├── GetMembersResponse.scala │ │ ├── InstallSnapshot.scala │ │ ├── JoinMember.scala │ │ ├── JoinMemberResponse.scala │ │ ├── JointConfiguration.scala │ │ ├── LogEntry.scala │ │ ├── NewConfiguration.scala │ │ ├── NoOps.scala │ │ ├── ReadCommand.scala │ │ ├── RequestVote.scala │ │ ├── RequestVoteResponse.scala │ │ ├── Rpc.scala │ │ ├── RpcClient.scala │ │ ├── RpcService.scala │ │ └── WriteCommand.scala │ │ ├── statemachine │ │ ├── CommandExecutor.scala │ │ ├── StateMachine.scala │ │ └── j │ │ │ ├── StateMachine.scala │ │ │ └── StateMachineWrapper.scala │ │ ├── states │ │ ├── Candidate.scala │ │ ├── Follower.scala │ │ ├── Joiner.scala │ │ ├── Leader.scala │ │ ├── Starter.scala │ │ ├── State.scala │ │ └── Stopped.scala │ │ ├── stats │ │ ├── StateInfo.scala │ │ └── Stats.scala │ │ ├── storage │ │ └── MemoryStorage.scala │ │ └── util │ │ ├── ConcurrencySupport.scala │ │ ├── Conversions.scala │ │ ├── CustomThreadFactory.scala │ │ ├── LockSupport.scala │ │ ├── Logging.scala │ │ └── Serializer.scala │ └── test │ ├── resources │ └── logback-test.xml │ └── scala │ └── ckite │ ├── CKiteIntegrationTest.scala │ ├── SerializerTest.scala │ ├── TestRpc.scala │ └── example │ ├── Get.scala │ ├── KVStore.scala │ └── Put.scala ├── ckite-finagle └── src │ └── main │ ├── resources │ └── reference.conf │ ├── scala │ └── ckite │ │ └── rpc │ │ ├── FinagleThriftRpc.scala │ │ └── thrift │ │ ├── FinagleThriftClient.scala │ │ ├── FinagleThriftServer.scala │ │ └── ThriftConverters.scala │ └── thrift │ └── ckite │ └── rpc │ └── thrift │ └── ckite.thrift ├── ckite-mapdb └── src │ ├── main │ └── scala │ │ └── ckite │ │ └── mapdb │ │ ├── FileSupport.scala │ │ ├── MapDBPersistentLog.scala │ │ └── MapDBStorage.scala │ └── test │ └── scala │ └── ckite │ └── mapdb │ └── MapDBStorageTest.scala └── project ├── Build.scala ├── Dependencies.scala ├── Settings.scala ├── build.properties └── plugins.sbt /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | target 3 | .cache 4 | .classpath 5 | .project 6 | .idea 7 | *.iml 8 | *.ipr 9 | *.iws 10 | src/main/scala/ckite/rpc/thrift/CKiteService.scala 11 | src/main/scala/ckite/rpc/thrift/CKiteService.scala 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | sbt_args: -sbt-version 0.13.7 3 | scala: 4 | - 2.11.7 5 | jdk: 6 | - oraclejdk8 7 | sudo: false 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is licensed under the Apache 2 license, quoted below. 2 | 3 | Copyright © 2013 the CKite project 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); you may not 6 | use this file except in compliance with the License. You may obtain a copy of 7 | the License at 8 | 9 | [http://www.apache.org/licenses/LICENSE-2.0] 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 13 | WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 14 | License for the specific language governing permissions and limitations under 15 | the License. 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CKite - JVM Raft [![Build Status](https://api.travis-ci.org/pablosmedina/ckite.png)](https://travis-ci.org/pablosmedina/ckite) 2 | ===== 3 | 4 | ## Overview 5 | 6 | A __JVM__ implementation of the [Raft distributed consensus algorithm](http://raftconsensus.github.io/) written in Scala. CKite is a `consensus library` with an easy to use API intended to be used by distributed applications needing consensus agreement. 7 | 8 | It is designed to be agnostic of both the mechanism used to exchange messages between members `(RPC)` and the medium to store the Log `(Storage)`. CKite has a modular architecture with pluggable `RPC` and `Storage` implementations. Custom RPCs and Storages can be easily implemented and configured to be used by CKite. 9 | 10 | ## Status 11 | 12 | CKite covers all the major topics of Raft including leader election, log replication, log compaction and cluster membership changes. It currently has two implemented modules: 13 | 14 | * ckite-finagle: Finagle based RPC module 15 | * ckite-mapdb: MapDB based Storage module 16 | 17 | Checkout the latest __Release 0.2.1__ following the instructions detailed below to start playing with it. 18 | 19 | ## Features 20 | 21 | * Leader Election 22 | * Log Replication 23 | * Cluster Membership Changes 24 | * Log Compaction 25 | * Twitter Finagle integration 26 | * MapDB integration 27 | 28 | ## Architecture 29 | 30 | * `ckite-core` - The core of the library. It implements the Raft consensus protocol. It can be configured with RPCs and Storages. 31 | 32 | * `ckite-finagle` - Twitter Finagle based RPC implementation. It uses a Thrift protocol to exchange Raft messages between members. 33 | 34 | * `ckite-mapdb` - MapDB based storage implementation. MapDB provides concurrent Maps, Sets and Queues backed by disk storage or off-heap-memory. It is a fast and easy to use embedded Java database engine. 35 | 36 | Comming soon: ckite-chronicle, ckite-akka. 37 | 38 | ## Getting started (Scala) 39 | 40 | #### SBT settings 41 | 42 | The latest release 0.2.1 is in Maven central. Add the following sbt dependency to your project settings: 43 | 44 | ```scala 45 | libraryDependencies += "io.ckite" %% "ckite-core" % "0.2.1" 46 | ``` 47 | ```scala 48 | libraryDependencies += "io.ckite" %% "ckite-finagle" % "0.2.1" 49 | ``` 50 | ```scala 51 | libraryDependencies += "io.ckite" %% "ckite-mapdb" % "0.2.1" 52 | ``` 53 | 54 | ## Getting started (Java) 55 | 56 | #### Maven settings 57 | 58 | Add the following maven dependency to your pom.xml: 59 | 60 | ```xml 61 | 62 | io.ckite 63 | ckite-core 64 | 0.2.1 65 | 66 | ``` 67 | 68 | ## Example (See [KVStore](https://github.com/pablosmedina/kvstore)) 69 | 70 | #### 1) Create a StateMachine 71 | ```scala 72 | //KVStore is an in-memory distributed Map allowing Puts and Gets operations 73 | class KVStore extends StateMachine { 74 | 75 | private var map = Map[String, String]() 76 | private var lastIndex: Long = 0 77 | 78 | //Called when a consensus has been reached for a WriteCommand 79 | //index associated to the write is provided to implement your own persistent semantics 80 | //see lastAppliedIndex 81 | def applyWrite = { 82 | case (index, Put(key: String, value: String)) => { 83 | map.put(key, value) 84 | lastIndex = index 85 | value 86 | } 87 | } 88 | 89 | //called when a read command has been received 90 | def applyRead = { 91 | case Get(key) => map.get(key) 92 | } 93 | 94 | //CKite needs to know the last applied write on log replay to 95 | //provide exactly-once semantics 96 | //If no persistence is needed then state machines can just return zero 97 | def getLastAppliedIndex: Long = lastIndex 98 | 99 | //called during Log replay on startup and upon installSnapshot requests 100 | def restoreSnapshot(byteBuffer: ByteBuffer) = { 101 | map = Serializer.deserialize[Map[String, String]](byteBuffer.array()) 102 | } 103 | //called when Log compaction is required 104 | def takeSnapshot(): ByteBuffer = ByteBuffer.wrap(Serializer.serialize(map)) 105 | 106 | } 107 | 108 | //WriteCommands are replicated under Raft rules 109 | case class Put(key: String, value: String) extends WriteCommand[String] 110 | 111 | //ReadCommands are not replicated but forwarded to the Leader 112 | case class Get(key: String) extends ReadCommand[Option[String]] 113 | ``` 114 | #### 2) Create a CKite instance using the builder (minimal) 115 | ```scala 116 | val ckite = CKiteBuilder().listenAddress("node1:9091").rpc(FinagleThriftRpc) //Finagle based transport 117 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait 118 | .bootstrap(true) //bootstraps a new cluster. only needed just the first time for the very first node 119 | .build 120 | ``` 121 | 122 | #### 3) Create a CKite instance using the builder (extended) 123 | ```scala 124 | val ckite = CKiteBuilder().listenAddress("localhost:9091").rpc(FinagleThriftRpc) 125 | .members(Seq("localhost:9092","localhost:9093")) //optional seeds to join the cluster 126 | .minElectionTimeout(1000).maxElectionTimeout(1500) //optional 127 | .heartbeatsPeriod(250) //optional. period to send heartbeats interval when being Leader 128 | .dataDir("/home/ckite/data") //dataDir for persistent state (log, terms, snapshots, etc...) 129 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait 130 | .sync(false) //disables log sync to disk 131 | .flushSize(10) //max batch size when flushing log to disk 132 | .build 133 | ``` 134 | #### 4) Start ckite 135 | ```scala 136 | ckite.start() 137 | ``` 138 | 139 | #### 4) Send a write command 140 | ```scala 141 | //this Put command is forwarded to the Leader and applied under Raft rules 142 | val writeFuture:Future[String] = ckite.write(Put("key1","value1")) 143 | ``` 144 | 145 | #### 5) Send a consistent read command 146 | ```scala 147 | //consistent read commands are forwarded to the Leader 148 | val readFuture:Future[Option[String]] = ckite.read(Get("key1")) 149 | ``` 150 | #### 6) Add a new Member 151 | ```scala 152 | //as write commands, cluster membership changes are forwarded to the Leader 153 | ckite.addMember("someHost:9094") 154 | ``` 155 | 156 | #### 7) Remove a Member 157 | ```scala 158 | //as write commands, cluster membership changes are forwarded to the Leader 159 | ckite.removeMember("someHost:9094") 160 | ``` 161 | 162 | #### 8) Send a local read command 163 | ```scala 164 | //alternatively you can read from its local state machine allowing possible stale values 165 | val value = ckite.readLocal(Get("key1")) 166 | ``` 167 | 168 | #### 9) Check leadership 169 | ```scala 170 | //if necessary waits for elections to end 171 | ckite.isLeader() 172 | ``` 173 | #### 10) Stop ckite 174 | ```scala 175 | ckite.stop() 176 | ``` 177 | 178 | ## How CKite bootstraps 179 | 180 | To start a new cluster you have to run the very first node turning on the bootstrap parameter. This will create an initial configuration with just the first node. The next nodes starts by pointing to the existing ones to join the cluster. 181 | You can bootstrap the first node using the builder, overriding ckite.bootstrap in your application.conf or by starting your application with a system property -Dckite.bootstrap=true. See [KVStore](https://github.com/pablosmedina/kvstore) for more details. 182 | 183 | 184 | #### bootstrapping the first node using the builder 185 | ```scala 186 | val ckite = CKiteBuilder().listenAddress("node1:9091").rpc(FinagleThriftRpc) 187 | .dataDir("/home/ckite/data") //dataDir for persistent state (log, terms, snapshots, etc...) 188 | .stateMachine(new KVStore()) //KVStore is an implementation of the StateMachine trait 189 | .bootstrap(true) //bootstraps a new cluster. only needed just the first time for the very first node 190 | .build 191 | ``` 192 | 193 | ## Implementation details 194 | 195 | * Built in Scala 2.11.7 and JDK 8. 196 | * [Twitter Finagle](http://twitter.github.io/finagle/). 197 | * [Thrift](http://thrift.apache.org/). 198 | * [Twitter Scrooge](http://twitter.github.io/scrooge/). 199 | * [MapDB](http://www.mapdb.org/) 200 | * [Kryo](https://github.com/EsotericSoftware/kryo) 201 | * Chronicle (to be implemented) 202 | 203 | 204 | ## Contributions 205 | 206 | Feel free to contribute to CKite!. Any kind of help will be very welcome. We are happy to receive pull requests, issues, discuss implementation details, analyze the raft algorithm and whatever it makes CKite a better library. Checkout the issues. You can start from there! 207 | 208 | 209 | ## Importing the project into IntelliJ IDEA 210 | 211 | To generate the necessary IDE config files first run the following command and then open the project as usual: 212 | 213 | sbt gen-idea 214 | 215 | ## Importing the project into Eclipse 216 | 217 | To generate the necessary IDE config files first run the following command and then open the project as usual: 218 | 219 | sbt eclipse 220 | -------------------------------------------------------------------------------- /backbuild: -------------------------------------------------------------------------------- 1 | name := "ckite" 2 | 3 | organization := "io.ckite" 4 | 5 | version := "0.2.0-SNAPSHOT" 6 | 7 | scalaVersion := "2.11.4" 8 | 9 | publishMavenStyle := true 10 | 11 | publishArtifact in Test := false 12 | 13 | pomIncludeRepository := { x => false} 14 | 15 | libraryDependencies ++= Seq( 16 | "org.slf4j" % "slf4j-api" % "1.6.4", 17 | "com.twitter" %% "scrooge-core" % "3.17.0" exclude("org.scala-lang", "scala-library"), 18 | "org.apache.thrift" % "libthrift" % "0.9.1" exclude("org.apache.httpcomponents", "httpclient") exclude("org.apache.httpcomponents", "httpcore") exclude("org.slf4j", "slf4j-api") exclude("org.apache.commons", "commons-lang3"), 19 | "com.twitter" %% "finagle-core" % "6.24.0" exclude("com.twitter", "util-logging_2.11") exclude("com.twitter", "util-app_2.11"), 20 | "com.twitter" %% "finagle-thrift" % "6.24.0" exclude("org.scala-lang", "scala-library") exclude("org.apache.thrift", "libthrift"), 21 | "com.typesafe" % "config" % "1.0.2", 22 | "org.mapdb" % "mapdb" % "0.9.13", 23 | "com.esotericsoftware.kryo" % "kryo" % "2.22", 24 | "com.twitter" %% "finagle-http" % "6.24.0" % "test", 25 | "com.fasterxml.jackson.module" %% "ja" + 26 | "ckson-module-scala" % "2.4.4" % "test", 27 | "org.scalatest" %% "scalatest" % "2.2.2" % "test", 28 | "ch.qos.logback" % "logback-classic" % "1.1.1" % "test", 29 | "junit" % "junit" % "4.8.1" % "test" 30 | ) 31 | 32 | 33 | unmanagedSourceDirectories in Compile <++= baseDirectory { base => 34 | Seq( 35 | base / "src/main/resources", 36 | base / "src/main/thrift" 37 | ) 38 | } 39 | 40 | unmanagedSourceDirectories in Test <++= baseDirectory { base => 41 | Seq( 42 | base / "src/test/resources" 43 | ) 44 | } 45 | 46 | com.twitter.scrooge.ScroogeSBT.newSettings 47 | 48 | /* 49 | scroogeThriftOutputFolder in Compile <<= baseDirectory { 50 | _ / "src/main/scala" 51 | } 52 | */ 53 | publishTo <<= version { v: String => 54 | val nexus = "https://oss.sonatype.org/" 55 | if (v.trim.endsWith("SNAPSHOT")) 56 | Some("snapshots" at nexus + "content/repositories/snapshots") 57 | if (v.trim.endsWith("LOCAL")) 58 | Some(Resolver.file("file", new File(Path.userHome.absolutePath + "/.m2/repository"))) 59 | else 60 | Some("releases" at nexus + "service/local/staging/deploy/maven2") 61 | } 62 | 63 | pomExtra := { 64 | http://ckite.io 65 | 66 | 67 | Apache 2 68 | http://www.apache.org/licenses/LICENSE-2.0.txt 69 | repo 70 | 71 | 72 | 73 | scm:git:github.com/pablosmedina/ckite.git 74 | scm:git:git@github.com:pablosmedina/ckite.git 75 | github.com/pablosmedina/ckite.git 76 | 77 | 78 | 79 | pmedina 80 | Pablo S. Medina 81 | https://twitter.com/pablosmedina 82 | 83 | 84 | } -------------------------------------------------------------------------------- /ckite-core/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | ckite { 2 | 3 | # Address and port where this Server listens for RPCs (host:port) 4 | listen-address = "localhost:9091" 5 | 6 | # Bootstraps a new Cluster 7 | bootstrap = false 8 | 9 | # Addresses and ports of Members to be used as seeds when joining an existing Cluster 10 | members = [] 11 | 12 | # Directory location where CKite stores its persistent data 13 | datadir = "/tmp" 14 | 15 | # Timeout for waiting a Leader to be elected 16 | leader-timeout = 10s 17 | 18 | # Timeout for committing writes 19 | write-timeout = 2s 20 | 21 | append-entries { 22 | # Time between heartbeat (empty AppendEntries) pulses sent by the Leader 23 | period = 200ms 24 | } 25 | 26 | election { 27 | # Minimum timeout for starting an election when no receiving RPCs from the Leader 28 | min-timeout = 1s 29 | 30 | # Maximum timeout for starting an election when no receiving RPCs from the Leader 31 | max-timeout = 2s 32 | 33 | # Timeout for collecting votes 34 | voting-timeout = 1 s 35 | } 36 | 37 | log { 38 | # Max amount of entries to be flushed in a batch 39 | flush-size = 1000 40 | 41 | # Enable or disable disk sync 42 | sync = true 43 | 44 | # Threshold of LogEntries to start a Compaction 45 | compaction-threshold = 10000 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/CKite.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import ckite.rpc.{ ReadCommand, WriteCommand } 4 | 5 | import scala.concurrent.Future 6 | 7 | /** 8 | * A CKite is a member of the cluster. It exchanges messages with its peers to achieve consensus 9 | * on the submitted write and read commands according to the Raft consensus protocol. 10 | */ 11 | trait CKite { 12 | 13 | /** 14 | * Starts CKite. It begins the communication with the rest of the cluster. 15 | */ 16 | def start(): Unit 17 | 18 | /** 19 | * Stops CKite. It no longer receives or sends messages to the cluster. It can't be started again. 20 | */ 21 | def stop(): Unit 22 | 23 | /** 24 | * Consistently replicates and applies a command under Raft consensus rules. 25 | * 26 | * @param writeCommand to be applied 27 | * @tparam T 28 | * @return a Future with the result of applying the Write to the StateMachine 29 | */ 30 | def write[T](writeCommand: WriteCommand[T]): Future[T] 31 | 32 | /** 33 | * Consistent read. It is forwarded and answered by the Leader according to Raft consensus rules. 34 | * 35 | * @param readCommand to be forwarded and applied to the Leader StateMachine 36 | * @tparam T 37 | * @return a Future with the result of applying the Read to the StateMachine 38 | */ 39 | def read[T](readCommand: ReadCommand[T]): Future[T] 40 | 41 | /** 42 | * Consistently adds a new member to the cluster. 43 | * * 44 | * @param memberId to be added 45 | * @return future with true if the memberId could be added to the cluster and false if not 46 | */ 47 | def addMember(memberId: String): Future[Boolean] 48 | 49 | /** 50 | * Consistently removes a new member to the cluster. 51 | * * 52 | * @param memberId to be removed 53 | * @return future with true if the memberId could be removed to the cluster and false if not 54 | */ 55 | def removeMember(memberId: String): Future[Boolean] 56 | 57 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/CKiteBuilder.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import ckite.rlog.Storage 4 | import ckite.rpc.Rpc 5 | import ckite.statemachine.StateMachine 6 | import ckite.statemachine.j.StateMachineWrapper 7 | import ckite.storage.MemoryStorage 8 | import com.typesafe.config.ConfigFactory 9 | 10 | class CKiteBuilder { 11 | 12 | private val configuration = new Configuration(ConfigFactory.load()) 13 | private var stateMachine: Option[StateMachine] = None 14 | private var rpc: Option[Rpc] = None 15 | private var storage: Storage = MemoryStorage() 16 | 17 | def minElectionTimeout(minElectionTimeout: Int): CKiteBuilder = { 18 | configuration.withMinElectionTimeout(minElectionTimeout) 19 | CKiteBuilder.this 20 | } 21 | 22 | def maxElectionTimeout(maxElectionTimeout: Int): CKiteBuilder = { 23 | configuration.withMaxElectionTimeout(maxElectionTimeout) 24 | CKiteBuilder.this 25 | } 26 | 27 | def heartbeatsPeriod(heartbeatsInterval: Int): CKiteBuilder = { 28 | configuration.withHeartbeatsInterval(heartbeatsInterval) 29 | CKiteBuilder.this 30 | } 31 | 32 | def listenAddress(localBinding: String): CKiteBuilder = { 33 | configuration.withLocalBinding(localBinding) 34 | CKiteBuilder.this 35 | } 36 | 37 | def members(memberBindings: Seq[String]): CKiteBuilder = { 38 | configuration.withMemberBindings(memberBindings) 39 | CKiteBuilder.this 40 | } 41 | 42 | def members(memberBindings: String): CKiteBuilder = { 43 | configuration.withMemberBindings(memberBindings.split(",")) 44 | CKiteBuilder.this 45 | } 46 | 47 | def compactionThreshold(threshold: Int): CKiteBuilder = { 48 | configuration.withLogCompactionThreshold(threshold) 49 | CKiteBuilder.this 50 | } 51 | 52 | def stateMachine(stateMachine: StateMachine): CKiteBuilder = { 53 | CKiteBuilder.this.stateMachine = Some(stateMachine) 54 | CKiteBuilder.this 55 | } 56 | 57 | def stateMachine(stateMachine: ckite.statemachine.j.StateMachine): CKiteBuilder = { 58 | CKiteBuilder.this.stateMachine = Some(new StateMachineWrapper(stateMachine)) 59 | CKiteBuilder.this 60 | } 61 | 62 | def bootstrap(enabled: Boolean): CKiteBuilder = { 63 | configuration.bootstrap(enabled) 64 | CKiteBuilder.this 65 | } 66 | 67 | def rpc(someRpc: Rpc): CKiteBuilder = { 68 | rpc = Some(someRpc) 69 | CKiteBuilder.this 70 | } 71 | 72 | def storage(someStorage: Storage): CKiteBuilder = { 73 | storage = someStorage 74 | CKiteBuilder.this 75 | } 76 | 77 | private def configuredStateMachine() = { 78 | stateMachine.getOrElse(throw new IllegalStateException("StateMachine required")) 79 | } 80 | 81 | private def configuredRpc() = { 82 | rpc.getOrElse(throw new IllegalStateException("RPC required")) 83 | } 84 | 85 | def build: CKite = { 86 | val stateMachine = configuredStateMachine() 87 | val rpc = configuredRpc() 88 | val raft = Raft(stateMachine, rpc, storage, configuration) 89 | CKiteClient(raft, rpc.createServer(raft, configuration.config), CKiteBuilder.this) 90 | } 91 | 92 | } 93 | 94 | object CKiteBuilder { 95 | def apply() = new CKiteBuilder() 96 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/CKiteClient.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.atomic.AtomicBoolean 4 | 5 | import ckite.rpc.{ ReadCommand, RpcServer, WriteCommand } 6 | import ckite.stats.{ Stats, ConsensusStats } 7 | 8 | import scala.concurrent.ExecutionContext.Implicits.global 9 | import scala.concurrent.Future 10 | 11 | class CKiteClient(raft: Raft, rpcServer: RpcServer, private[ckite] val builder: CKiteBuilder) extends CKite { 12 | 13 | private val stopped = new AtomicBoolean(false) 14 | 15 | def write[T](writeCommand: WriteCommand[T]): Future[T] = raft.onCommandReceived[T](writeCommand) 16 | 17 | def read[T](readCommand: ReadCommand[T]): Future[T] = raft.onCommandReceived[T](readCommand) 18 | 19 | def addMember(memberBinding: String) = raft.onMemberJoinReceived(memberBinding).map(_.success) 20 | 21 | def removeMember(memberBinding: String) = raft.onMemberLeaveReceived(memberBinding) 22 | 23 | def readLocal[T](readCommand: ReadCommand[T]): Future[T] = raft.onLocalReadReceived(readCommand) 24 | 25 | private[ckite] def isLeader: Boolean = raft.isLeader 26 | 27 | private[ckite] def members: Set[String] = raft.membership.members 28 | 29 | private[ckite] def id(): String = raft.membership.myId 30 | 31 | private[ckite] def stats(): Stats = raft.stats() 32 | 33 | def start() = { 34 | rpcServer.start() 35 | raft.start() 36 | } 37 | 38 | def stop() = { 39 | if (!stopped.getAndSet(true)) { 40 | rpcServer.stop() 41 | raft.stop() 42 | } 43 | } 44 | 45 | override val toString = s"CKite($id)" 46 | } 47 | 48 | object CKiteClient { 49 | def apply(raft: Raft, rpcServer: RpcServer, builder: CKiteBuilder) = { 50 | new CKiteClient(raft, rpcServer, builder) 51 | } 52 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/ConfigAware.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import com.typesafe.config.Config 4 | 5 | trait ConfigAware { 6 | 7 | implicit def config: Config 8 | } 9 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/ConfigSupport.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import com.typesafe.config.Config 4 | 5 | trait ConfigSupport { 6 | 7 | implicit val config: Config 8 | 9 | val Id = "ckite.finagle.listen-address" 10 | val Bootstrap = "ckite.bootstrap" 11 | 12 | val MinElectionTimeout = "ckite.election.min-timeout" 13 | val MaxElectionTimeout = "ckite.election.max-timeout" 14 | val VotingTimeout = "ckite.election.voting-timeout" 15 | val ElectionWorkers = "ckite.election.workers" 16 | 17 | val WriteTimeout = "ckite.write-timeout" 18 | 19 | val HeartbeatsPeriod = "ckite.append-entries.period" 20 | val AppendEntriesWorkers = "ckite.append-entries.workers" 21 | 22 | val Members = "ckite.members" 23 | val LeaderTimeout = "ckite.leader-timeout" 24 | 25 | val ListenAddress = "ckite.finagle.listen-address" 26 | val ThriftWorkers = "ckite.finagle.thrift.workers" 27 | 28 | val CompactionThreshold = "ckite.log.compaction-threshold" 29 | val FlushSize = "ckite.log.flush-size" 30 | val Sync = "ckite.log.sync" 31 | val DataDir = "ckite.datadir" 32 | 33 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/Configuration.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import com.typesafe.config.Config 4 | import com.typesafe.config.ConfigValueFactory 5 | import scala.collection.JavaConverters._ 6 | 7 | class Configuration(var config: Config) { 8 | 9 | val Bootstrap = "ckite.bootstrap" 10 | 11 | val MinElectionTimeout = "ckite.election.min-timeout" 12 | val MaxElectionTimeout = "ckite.election.max-timeout" 13 | val VotingTimeout = "ckite.election.voting-timeout" 14 | val ElectionWorkers = "ckite.election.workers" 15 | 16 | val WriteTimeout = "ckite.write-timeout" 17 | 18 | val HeartbeatsPeriod = "ckite.append-entries.period" 19 | val AppendEntriesWorkers = "ckite.append-entries.workers" 20 | 21 | val Members = "ckite.members" 22 | val LeaderTimeout = "ckite.leader-timeout" 23 | 24 | val ListenAddress = "ckite.listen-address" 25 | 26 | val CompactionThreshold = "ckite.log.compaction-threshold" 27 | val FlushSize = "ckite.log.flush-size" 28 | val Sync = "ckite.log.sync" 29 | val DataDir = "ckite.datadir" 30 | 31 | def withMinElectionTimeout(minElectionTimeout: Int) = { 32 | config = config.withValue(MinElectionTimeout, ConfigValueFactory.fromAnyRef(minElectionTimeout)) 33 | } 34 | 35 | def minElectionTimeout: Long = { 36 | config.getMilliseconds(MinElectionTimeout) 37 | } 38 | 39 | def withMaxElectionTimeout(maxElectionTimeout: Int) = { 40 | config = config.withValue(MaxElectionTimeout, ConfigValueFactory.fromAnyRef(maxElectionTimeout)) 41 | } 42 | 43 | def maxElectionTimeout: Long = { 44 | config.getMilliseconds(MaxElectionTimeout) 45 | } 46 | 47 | def withHeartbeatsInterval(heartbeatsInterval: Int) = { 48 | config = config.withValue(HeartbeatsPeriod, ConfigValueFactory.fromAnyRef(heartbeatsInterval)) 49 | } 50 | 51 | def heartbeatsInterval: Long = { 52 | config.getMilliseconds(HeartbeatsPeriod) 53 | } 54 | 55 | def withLocalBinding(localBinding: String) = { 56 | config = config.withValue(ListenAddress, ConfigValueFactory.fromAnyRef(localBinding)) 57 | } 58 | 59 | def withDataDir(dataDir: String) = { 60 | config = config.withValue(DataDir, ConfigValueFactory.fromAnyRef(dataDir)) 61 | } 62 | 63 | def dataDir: String = { 64 | config.getString(DataDir) 65 | } 66 | 67 | def localBinding: String = { 68 | config.getString(ListenAddress) 69 | } 70 | 71 | def withMemberBindings(membersBindings: Seq[String]) = { 72 | config = config.withValue(Members, ConfigValueFactory.fromIterable(membersBindings.asJava)) 73 | } 74 | 75 | def withLogCompactionThreshold(threshold: Int) = { 76 | config = config.withValue(CompactionThreshold, ConfigValueFactory.fromAnyRef(threshold)) 77 | } 78 | 79 | def withFlushSize(flushSize: Long) = { 80 | config = config.withValue(FlushSize, ConfigValueFactory.fromAnyRef(flushSize)) 81 | } 82 | 83 | def withSyncEnabled(syncEnabled: Boolean) = { 84 | config = config.withValue(Sync, ConfigValueFactory.fromAnyRef(syncEnabled)) 85 | } 86 | 87 | def withWaitForLeaderTimeout(waitForLeaderTimeout: Int) = { 88 | config = config.withValue(LeaderTimeout, ConfigValueFactory.fromAnyRef(waitForLeaderTimeout)) 89 | } 90 | 91 | def withCollectVotesTimeout(collectVotesTimeout: Int) = { 92 | config = config.withValue(VotingTimeout, ConfigValueFactory.fromAnyRef(collectVotesTimeout)) 93 | } 94 | 95 | def waitForLeaderTimeout: Long = { 96 | config.getMilliseconds(LeaderTimeout) 97 | } 98 | 99 | def memberBindings: Set[String] = { 100 | config.getStringList(Members).asScala.toSet 101 | } 102 | 103 | def bootstrap: Boolean = { 104 | config.getBoolean(Bootstrap) 105 | } 106 | 107 | def bootstrap(enabled: Boolean) = { 108 | config = config.withValue(Bootstrap, ConfigValueFactory.fromAnyRef(enabled)) 109 | } 110 | 111 | def collectVotesTimeout: Long = { 112 | config.getMilliseconds(VotingTimeout) 113 | } 114 | 115 | def logCompactionThreshold: Long = { 116 | config.getLong(CompactionThreshold) 117 | } 118 | 119 | def appendEntriesTimeout: Long = { 120 | config.getMilliseconds(WriteTimeout) 121 | } 122 | 123 | def appendEntriesWorkers: Int = { 124 | config.getInt(AppendEntriesWorkers) 125 | } 126 | 127 | def electionWorkers: Int = { 128 | config.getInt(ElectionWorkers) 129 | } 130 | 131 | def syncEnabled: Boolean = config.getBoolean(Sync) 132 | 133 | def flushSize: Long = config.getLong(FlushSize) 134 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/Consensus.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | 5 | import ckite.rlog.{ Storage, Vote } 6 | import ckite.rpc.LogEntry._ 7 | import ckite.rpc._ 8 | import ckite.states._ 9 | import ckite.stats.ConsensusStats 10 | import ckite.util.Logging 11 | 12 | import scala.concurrent.duration._ 13 | import scala.concurrent.{ Await, Future } 14 | import scala.util.control.Breaks._ 15 | import scala.concurrent.ExecutionContext.Implicits.global 16 | 17 | case class Consensus(raft: Raft, storage: Storage, configuration: Configuration) extends Logging { 18 | 19 | private val currentState = new AtomicReference[State](Starter) 20 | private val ZERO_TERM = 0 21 | 22 | def membership = raft.membership 23 | 24 | private def log = raft.log 25 | 26 | private def state = currentState.get() 27 | 28 | def startAsBootstrapper() = { 29 | becomeFollower(ZERO_TERM) 30 | } 31 | 32 | def startAsFollower() = { 33 | storage.retrieveLatestVote() match { 34 | case Some(Vote(term, member)) ⇒ becomeFollower(term = term, vote = Option(member)) 35 | case None ⇒ becomeFollower(ZERO_TERM) 36 | } 37 | } 38 | 39 | def startAsJoiner() = { 40 | //no configuration. will try to join an existing cluster 41 | logger.info("Start as Joiner. Using seeds: {}", configuration.memberBindings) 42 | 43 | becomeJoiner(ZERO_TERM) //don't start elections 44 | 45 | breakable { 46 | for (remoteMemberBinding ← configuration.memberBindings) { 47 | logger.info("Try to join with {}", remoteMemberBinding) 48 | val remoteMember = membership.get(remoteMemberBinding).get 49 | val response = Await.result(remoteMember.join(membership.myId), 3 seconds) //TODO: Refactor me 50 | if (response.success) { 51 | logger.info("Join successful") 52 | 53 | //becomeFollower(ZERO_TERM) 54 | 55 | break 56 | } 57 | } 58 | //TODO: Implement retries/shutdown here 59 | } 60 | } 61 | 62 | def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = state.onAppendEntries(appendEntries) 63 | 64 | def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = state.onRequestVote(requestVote) 65 | 66 | def onJointConfigurationCommitted(index: Index, jointConfiguration: JointConfiguration) = { 67 | if (membership.isCurrent(index)) { //TODO: move to Leader 68 | state.onJointConfigurationCommitted(jointConfiguration) 69 | } 70 | true 71 | } 72 | 73 | def onNewConfigurationCommitted(index: Index, configuration: NewConfiguration): Boolean = { 74 | true 75 | } 76 | 77 | def onCommand[T](command: Command): Future[T] = state.onCommand[T](command) 78 | 79 | def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 80 | state.onInstallSnapshot(installSnapshot) 81 | } 82 | 83 | def onMemberJoin(member: String): Future[JoinMemberResponse] = { 84 | if (!membership.contains(member)) { 85 | onCommand[Boolean](JointConfiguration(membership.members, membership.members + member)).map(JoinMemberResponse(_)) 86 | } else { 87 | logger.info(s"$member is already part of the cluster") 88 | Future.successful(JoinMemberResponse(true)) 89 | } 90 | } 91 | 92 | def onMemberLeave(member: String): Future[Boolean] = { 93 | if (membership.contains(member)) { 94 | onCommand(JointConfiguration(membership.members, membership.members - member)) 95 | } else { 96 | Future.successful(true) 97 | } 98 | } 99 | 100 | def becomeLeader(term: Term) = { 101 | become(Leader(this, membership, log, term, leaderAnnouncer)) 102 | } 103 | 104 | def becomeCandidate(term: Term) = { 105 | become(Candidate(this, membership, log, term, leaderAnnouncer.onElection)) 106 | } 107 | 108 | def becomeFollower(term: Term, leaderAnnouncer: LeaderAnnouncer = LeaderAnnouncer(membership, configuration), vote: Option[String] = None) = { 109 | become(Follower(this, membership, log, term, leaderAnnouncer, vote)) 110 | } 111 | 112 | def becomeJoiner(term: Term): Unit = { 113 | become(Joiner(this, membership, log, term, configuration)) 114 | } 115 | 116 | def isLeader = { 117 | state.isLeader 118 | } 119 | 120 | def becomeStarter = changeState(Starter, Starter) 121 | 122 | def leaderAnnouncer = state.leaderAnnouncer 123 | 124 | private def become(newState: State) = { 125 | logger.trace("Trying to become {}", newState) 126 | var current = state 127 | //stops when current == newState or current.term < newState.term 128 | while (current.canTransitionTo(newState)) { 129 | if (changeState(current, newState)) { 130 | logger.debug(s"Transition from $current to $newState") 131 | persistState() 132 | current.stop(newState.term) 133 | newState.begin() 134 | } 135 | current = state 136 | } 137 | logger.trace("State is {}", current) 138 | } 139 | 140 | def persistState() = { 141 | val st = state 142 | if (st != Stopped) { 143 | storage.saveVote(Vote(st.term, st.votedFor.get().getOrElse(""))) 144 | } 145 | } 146 | 147 | private def changeState(current: State, newState: State) = currentState.compareAndSet(current, newState) 148 | 149 | def term(): Term = state.term 150 | 151 | def stop(): Unit = { 152 | become(Stopped) 153 | } 154 | 155 | def stats(): ConsensusStats = ConsensusStats(term, currentState.toString, currentState.get().stats()) 156 | 157 | } 158 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/LeaderAnnouncer.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.TimeoutException 4 | 5 | import ckite.exception.LeaderTimeoutException 6 | import ckite.util.Logging 7 | 8 | import scala.concurrent.ExecutionContext.Implicits.global 9 | import scala.concurrent.duration._ 10 | import scala.concurrent.{ Await, Future, Promise } 11 | 12 | case class LeaderAnnouncer(membership: Membership, configuration: Configuration) extends Logging { 13 | 14 | private val waitForLeaderTimeout = configuration.waitForLeaderTimeout millis 15 | private val promise = Promise[Member]() 16 | 17 | def announce(leaderId: String) = { 18 | val leader: Member = if (membership.myId == leaderId) membership.localMember else membership.get(leaderId).getOrElse(unknownAnnouncedLeader(leaderId)) 19 | promise.trySuccess(leader) 20 | } 21 | 22 | def onElection = { 23 | if (isLeaderAnnounced) LeaderAnnouncer(membership, configuration) else this 24 | } 25 | 26 | def onStepDown = { 27 | if (isLeaderAnnounced) LeaderAnnouncer(membership, configuration) else this 28 | } 29 | 30 | def onLeader[T](block: Member ⇒ Future[T]): Future[T] = { 31 | leader().flatMap(block(_)) 32 | } 33 | 34 | def awaitLeader: Member = { 35 | try { 36 | if (!promise.isCompleted) { 37 | logger.info("Waiting for a Leader to be announced...") 38 | } 39 | Await.result(promise.future, waitForLeaderTimeout) 40 | } catch { 41 | case e: TimeoutException ⇒ { 42 | logger.warn("Wait for Leader in {} timed out after {}", waitForLeaderTimeout) 43 | throw new LeaderTimeoutException(e) 44 | } 45 | } 46 | } 47 | 48 | def isLeaderAnnounced = promise.isCompleted 49 | 50 | private def leader(): Future[Member] = { 51 | promise.future 52 | } 53 | 54 | private def unknownAnnouncedLeader(leaderId: String) = { 55 | logger.info(s"Unknown Leader $leaderId") 56 | throw new RuntimeException("Announced Leader member is unknown") 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/LocalMember.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import ckite.rpc.Command 4 | 5 | import scala.concurrent.Future 6 | 7 | case class LocalMember(raft: Raft, configuration: Configuration) extends Member(configuration.localBinding) { 8 | 9 | private def consensus = raft.consensus 10 | 11 | override def forwardCommand[T](command: Command): Future[T] = { 12 | consensus.onCommand(command) 13 | } 14 | 15 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/Member.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import ckite.rpc.Command 4 | import ckite.util.Logging 5 | 6 | import scala.concurrent.Future 7 | 8 | abstract class Member(binding: String) extends Logging { 9 | 10 | def id() = s"$binding" 11 | 12 | def forwardCommand[T](command: Command): Future[T] 13 | 14 | override def toString() = id 15 | 16 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/Membership.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | 5 | import ckite.rpc.LogEntry.Index 6 | import ckite.rpc.{ Rpc, ClusterConfigurationCommand, JointConfiguration, NewConfiguration } 7 | import ckite.util.Logging 8 | 9 | trait ClusterConfiguration { 10 | /** The index in the Log where this ClusterConfiguration is located */ 11 | def index: Index 12 | 13 | /** All the members included in this ClusterConfiguration. This can includes both new and old members */ 14 | def members: Set[String] 15 | 16 | /** Checks if the given members forms a quorum in this ClusterConfiguration. */ 17 | def reachQuorum(someMembers: Set[String]): Boolean 18 | 19 | /** Checks if the given members forms SOME quorum. Useful in the case of JointClusterConfiguration */ 20 | def reachSomeQuorum(someMembers: Set[String]): Boolean 21 | } 22 | 23 | case class SingleClusterConfiguration(members: Set[String], index: Index = -1) extends ClusterConfiguration { 24 | private val quorum = (members.size / 2) + 1 25 | 26 | def reachQuorum(someMembers: Set[String]) = someMembers.intersect(members).size >= quorum 27 | 28 | def reachSomeQuorum(someMembers: Set[String]) = reachQuorum(someMembers) 29 | } 30 | 31 | case class JointClusterConfiguration(cold: SingleClusterConfiguration, cnew: SingleClusterConfiguration, index: Index) extends ClusterConfiguration { 32 | val members = cold.members ++ cnew.members 33 | 34 | def reachQuorum(someMembers: Set[String]) = cold.reachQuorum(someMembers) && cnew.reachQuorum(someMembers) 35 | 36 | def reachSomeQuorum(someMembers: Set[String]) = cold.reachQuorum(someMembers) || cnew.reachQuorum(someMembers) 37 | 38 | override def toString = s"JointClusterConfiguration(cold=${cold.members}, cnew=${cnew.members}, index= $index)" 39 | } 40 | 41 | object JointClusterConfiguration { 42 | implicit def fromMembersSetToSimpleClusterConfiguration(members: Set[String]): SingleClusterConfiguration = { 43 | SingleClusterConfiguration(members) 44 | } 45 | } 46 | 47 | object EmptyClusterConfiguration extends SingleClusterConfiguration(Set()) 48 | 49 | case class Membership(localMember: LocalMember, rpc: Rpc, configuration: Configuration) extends Logging { 50 | 51 | import ckite.JointClusterConfiguration._ 52 | 53 | private val currentClusterConfiguration = new AtomicReference[ClusterConfiguration](EmptyClusterConfiguration) 54 | private val currentKnownMembers = new AtomicReference[Map[String, RemoteMember]](Map()) 55 | 56 | register(configuration.memberBindings) 57 | 58 | def clusterConfiguration = currentClusterConfiguration.get() 59 | 60 | private def knownMembers = currentKnownMembers.get() 61 | 62 | def members = clusterConfiguration.members 63 | def remoteMembers = (clusterConfiguration.members - localMember.id()).map(member ⇒ knownMembers(member)) 64 | def hasRemoteMembers = !remoteMembers.isEmpty 65 | 66 | def reachQuorum(someMembers: Set[String]) = clusterConfiguration.reachQuorum(someMembers) 67 | 68 | def reachSomeQuorum(someMembers: Set[String]) = clusterConfiguration.reachSomeQuorum(someMembers) 69 | 70 | def get(member: String): Option[RemoteMember] = { 71 | knownMembers.get(member).orElse { 72 | register(Set(member)) 73 | knownMembers.get(member) 74 | } 75 | } 76 | 77 | def changeConfiguration(index: Index, clusterConfiguration: ClusterConfigurationCommand) = { 78 | if (happensBefore(index)) { 79 | clusterConfiguration match { 80 | case JointConfiguration(oldMembers, newMembers) ⇒ { 81 | //JointConfiguration received. Switch membership to JointClusterConfiguration 82 | transitionTo(JointClusterConfiguration(oldMembers, newMembers, index)) 83 | } 84 | case NewConfiguration(members) ⇒ { 85 | //NewConfiguration received. A new membership has been set. Switch to SimpleClusterConfiguration or shutdown If no longer part of the cluster. 86 | transitionTo(SingleClusterConfiguration(members, index)) 87 | } 88 | } 89 | } 90 | } 91 | 92 | def transitionTo(newClusterConfiguration: ClusterConfiguration) = { 93 | val newMembers = newClusterConfiguration.members.filterNot(member ⇒ knownMembers.contains(member) || member == myId) 94 | 95 | register(newMembers) 96 | currentClusterConfiguration.set(newClusterConfiguration) 97 | logger.info("Cluster Configuration changed to {}", clusterConfiguration) 98 | } 99 | 100 | def register(newMembers: Set[String]) { 101 | currentKnownMembers.set(knownMembers ++ newMembers.map(id ⇒ (id, createRemoteMember(id)))) 102 | } 103 | 104 | def happensBefore(index: Index) = clusterConfiguration.index < index 105 | 106 | def isCurrent(index: Index) = index == clusterConfiguration.index 107 | 108 | def contains(member: String) = members.contains(member) 109 | 110 | def myId = localMember.id() 111 | 112 | def bootstrap() = { 113 | //validate empty log and no snapshot 114 | transitionTo(SingleClusterConfiguration(Set(myId), 1)) 115 | } 116 | 117 | def createRemoteMember(id: String): RemoteMember = new RemoteMember(rpc, id) 118 | 119 | def isInitialized = clusterConfiguration != EmptyClusterConfiguration 120 | } 121 | 122 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/RLog.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent._ 4 | import java.util.concurrent.atomic.AtomicLong 5 | 6 | import ckite.rlog._ 7 | import ckite.rpc.LogEntry.{ Index, Term } 8 | import ckite.rpc._ 9 | import ckite.statemachine.{ CommandExecutor, StateMachine } 10 | import ckite.stats.LogStats 11 | import ckite.util.CKiteConversions._ 12 | import ckite.util.{ CustomThreadFactory, Logging } 13 | 14 | import scala.Option.option2Iterable 15 | import scala.collection.immutable.NumericRange 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import scala.concurrent.{ Future, Promise } 18 | 19 | case class RLog(raft: Raft, stateMachine: StateMachine, storage: Storage, configuration: Configuration) extends Logging { 20 | 21 | val log = storage.log() 22 | 23 | private def consensus = raft.consensus 24 | 25 | private def membership = raft.membership 26 | 27 | private val _lastIndex = new AtomicLong(0) 28 | 29 | private val snapshotManager = SnapshotManager(membership, this, storage, configuration) 30 | 31 | private val logWorker = new ThreadPoolExecutor(0, 1, 32 | 10L, TimeUnit.SECONDS, new SynchronousQueue[Runnable](), CustomThreadFactory("Log-worker")) 33 | 34 | private val appendsQueue = new LinkedBlockingQueue[Append[_]]() 35 | 36 | private val commandExecutor = new CommandExecutor(stateMachine) 37 | 38 | private val messageQueue = new LinkedBlockingQueue[Message]() 39 | 40 | @volatile 41 | var commitIndex: Long = 0 42 | @volatile 43 | var lastApplied: Long = stateMachine.getLastAppliedIndex 44 | 45 | private val applyPromises = new ConcurrentHashMap[Long, Promise[_]]() 46 | 47 | def bootstrap() = { 48 | assertEmptyLog() 49 | assertNoSnapshot() 50 | } 51 | 52 | //Leader append path 53 | def append[T](term: Term, write: WriteCommand[T]): Future[(LogEntry, Promise[T])] = { 54 | append(LeaderAppend[T](term, write)) 55 | } 56 | 57 | //Follower append path 58 | def tryAppend(appendEntries: AppendEntries): Future[Boolean] = { 59 | logger.trace("Try appending {}", appendEntries) 60 | val canAppend = hasPreviousLogEntry(appendEntries) 61 | if (canAppend) { 62 | appendAll(appendEntries.entries) map { _ ⇒ 63 | commit(appendEntries.commitIndex) 64 | canAppend 65 | } 66 | } else { 67 | logger.trace("Rejecting {}", appendEntries) 68 | Future.successful(canAppend) 69 | } 70 | } 71 | 72 | private def hasPreviousLogEntry(appendEntries: AppendEntries) = { 73 | containsEntry(appendEntries.prevLogIndex, appendEntries.prevLogTerm) 74 | } 75 | 76 | //Follower appends all these entries and waits for them to be flushed to the persistentLog 77 | private def appendAll(entries: List[LogEntry]): Future[List[Index]] = { 78 | val appendPromises = entries.map { entry ⇒ 79 | logger.trace(s"Try appending $entry") 80 | Some(append(FollowerAppend(entry))) 81 | } 82 | composeFutures(appendPromises) 83 | } 84 | 85 | private def composeFutures(appendPromises: List[Option[Future[Index]]]) = { 86 | val futures = for { 87 | append ← appendPromises 88 | future ← append 89 | } yield future 90 | Future.sequence(futures) 91 | } 92 | 93 | private def hasIndex(index: Long) = log.getLastIndex >= index 94 | 95 | def commit(index: Long) = { 96 | if (lastApplied < index) { 97 | messageQueue.offer(WriteCommitMessage(index)) 98 | } 99 | } 100 | 101 | def execute[T](command: ReadCommand[T]) = applyRead(command) 102 | 103 | def entry(index: Long, allowCompactedEntry: Boolean = false): Option[LogEntry] = { 104 | val entry = log.getEntry(index) 105 | if (entry != null) Some(entry) 106 | else if (allowCompactedEntry && snapshotManager.isInSnapshot(index)) Some(snapshotManager.compactedEntry) 107 | else None 108 | } 109 | 110 | def lastEntry: Option[LogEntry] = { 111 | val lastLogIndex = findLastLogIndex() 112 | if (snapshotManager.isInSnapshot(lastLogIndex)) { 113 | Some(snapshotManager.compactedEntry) 114 | } else { 115 | entry(lastLogIndex) 116 | } 117 | } 118 | 119 | def getPreviousLogEntry(logEntry: LogEntry): Option[LogEntry] = entry(logEntry.index - 1, true) 120 | 121 | private def containsEntry(index: Long, term: Int) = { 122 | val logEntryOption = entry(index) 123 | if (logEntryOption.isDefined) logEntryOption.get.term == term else (isZeroEntry(index, term) || snapshotManager.isInSnapshot(index, term)) 124 | } 125 | 126 | private def isZeroEntry(index: Long, term: Int): Boolean = index == -1 && term == -1 127 | 128 | def resetLastIndex() = _lastIndex.set(findLastLogIndex()) 129 | 130 | private def findLastLogIndex(): Long = { 131 | val lastIndex = log.getLastIndex 132 | if (lastIndex > 0) lastIndex else snapshotManager.latestSnapshotIndex 133 | } 134 | 135 | private def nextLogIndex() = _lastIndex.incrementAndGet() 136 | 137 | def size() = log.size 138 | 139 | def stop() = { 140 | logWorker.shutdownNow() 141 | logWorker.awaitTermination(10, TimeUnit.SECONDS) 142 | log.close() 143 | } 144 | 145 | def serializeStateMachine = stateMachine.takeSnapshot() 146 | 147 | private def assertEmptyLog() = { 148 | if (log.size > 0) throw new IllegalStateException("Log is not empty") 149 | } 150 | 151 | private def assertNoSnapshot() = { 152 | if (snapshotManager.latestSnapshotIndex > 0) throw new IllegalStateException("A Snapshot was found") 153 | } 154 | 155 | def initialize() = { 156 | logger.info("Initializing RLog...") 157 | restoreLatestClusterConfiguration() 158 | replay() 159 | startLogWorker() 160 | logger.info("Done initializing RLog") 161 | } 162 | 163 | private def replay(): Unit = { 164 | lastApplied = reloadSnapshot() 165 | val from = lastApplied + 1 166 | val to = commitIndex 167 | if (from <= to) replay(from, to) 168 | else logger.info("No entry to replay. commitIndex is #{}", commitIndex) 169 | } 170 | 171 | private def reloadSnapshot(): Long = { 172 | val latestSnapshot = snapshotManager.latestSnapshot() 173 | val lastAppliedIndex: Long = latestSnapshot map { snapshot ⇒ 174 | logger.info("Found a {}", snapshot) 175 | if (snapshot.index > lastApplied) { 176 | logger.info("The Snapshot has more recent data than the StateMachine. Will reload it...") 177 | snapshotManager.reload(snapshot) 178 | snapshot.index 179 | } else { 180 | logger.info("The StateMachine has more recent data than the Snapshot") 181 | membership.transitionTo(snapshot.clusterConfiguration) 182 | lastApplied 183 | } 184 | } getOrElse { 185 | logger.info("No Snapshot was found") 186 | 0 187 | } 188 | lastAppliedIndex 189 | } 190 | 191 | def installSnapshot(snapshot: Snapshot) = { 192 | val promise = Promise[Unit]() 193 | messageQueue.offer(InstallSnapshotMessage(promise, snapshot)) 194 | promise.future 195 | } 196 | 197 | def isInSnapshot(index: Index) = snapshotManager.isInSnapshot(index) 198 | 199 | def latestSnapshot() = snapshotManager.latestSnapshot() 200 | 201 | private def restoreLatestClusterConfiguration() = { 202 | val latestClusterConfigurationEntry = findLatestClusterConfiguration() 203 | latestClusterConfigurationEntry foreach { entry ⇒ 204 | logger.info("Found cluster configuration in the log: {}", entry.command) 205 | consensus.membership.changeConfiguration(entry.index, entry.command.asInstanceOf[ClusterConfigurationCommand]) 206 | } 207 | } 208 | 209 | private def findLatestClusterConfiguration(): Option[LogEntry] = { 210 | traversingInReversal find { index ⇒ 211 | val logEntry = entry(index) 212 | if (!logEntry.isDefined) return None 213 | logEntry.collect { case LogEntry(term, entry, c: ClusterConfigurationCommand) ⇒ true }.getOrElse(false) 214 | } map { index ⇒ entry(index) } flatten 215 | } 216 | 217 | def traversingInReversal: NumericRange[Long] = { 218 | findLastLogIndex to 1 by -1 219 | } 220 | 221 | def rollLog(index: Long) = { 222 | log.rollLog(index) 223 | } 224 | 225 | def lastIndex(): Index = _lastIndex.longValue() 226 | 227 | def isEmpty: Boolean = lastIndex().equals(0L) 228 | 229 | private def startLogWorker() = logWorker.execute(new Runnable { 230 | override def run(): Unit = runLogWorker() 231 | }) 232 | 233 | private def append[T](append: Append[T]): Future[T] = { 234 | logger.trace(s"Append $append") 235 | val promise: Promise[T] = append.promise 236 | messageQueue.offer(AppendMessage(append)) 237 | promise.future 238 | } 239 | 240 | private def runLogWorker() = { 241 | logger.info(s"Starting Log from index #{}", lastApplied) 242 | try { 243 | while (true) nextMessage() 244 | } catch { 245 | case e: InterruptedException ⇒ logger.info("Shutdown LogWorker...") 246 | } 247 | } 248 | 249 | private def applyLogCompactionPolicy() = snapshotManager.applyLogCompactionPolicy() 250 | 251 | private def onLogEntryAppended(append: Append[_])(entry: LogEntry) = { 252 | entry.command match { 253 | case configuration: ClusterConfigurationCommand ⇒ membership.changeConfiguration(entry.index, configuration) 254 | case _ ⇒ ; 255 | } 256 | append.onComplete(entry) 257 | } 258 | 259 | private def next: Append[_] = { 260 | if (appendsQueue.isEmpty()) { 261 | appendsQueue.take() 262 | } else { 263 | appendsQueue.poll() 264 | } 265 | } 266 | 267 | private def replay(from: Long, to: Long): Unit = { 268 | logger.debug("Start log replay from index #{} to #{}", from, to) 269 | entry(to).foreach { 270 | entry ⇒ 271 | applyUntil(entry) 272 | } 273 | logger.debug("Finished log replay") 274 | } 275 | 276 | private def isFromCurrentTerm(entryOption: Option[LogEntry]) = { 277 | entryOption.exists(entry ⇒ entry.term.equals(consensus.term)) 278 | } 279 | 280 | private def applyUntil(entry: LogEntry) = { 281 | (lastApplied + 1) to entry.index foreach { index ⇒ 282 | entryToApply(index, entry).map { entry ⇒ 283 | updateCommitIndex(index) 284 | logger.debug("Will apply committed entry {}", entry) 285 | val result = execute(entry.index, entry.command) 286 | updateLastAppliedIndex(index) 287 | notifyResult(index, result) 288 | }.orElse { 289 | logger.error(s"Missing index #$index") 290 | None 291 | } 292 | } 293 | } 294 | 295 | private def updateCommitIndex(index: Long) = { 296 | commitIndex = index 297 | logger.debug("New commitIndex is #{}", index) 298 | } 299 | 300 | private def updateLastAppliedIndex(index: Long) = { 301 | lastApplied = index //TODO: What do we assume about the StateMachine persistence? 302 | logger.debug("Last applied index is #{}", index) 303 | } 304 | 305 | private def entryToApply(index: Long, logEntry: LogEntry) = { 306 | if (index == logEntry.index) Some(logEntry) else entry(index) 307 | } 308 | 309 | private def notifyResult(index: Long, result: Any) = { 310 | val applyPromise = applyPromises.get(index).asInstanceOf[Promise[Any]] 311 | if (applyPromise != null) { 312 | applyPromise.success(result) 313 | applyPromises.remove(index) 314 | } 315 | } 316 | 317 | private def execute(index: Long, command: Command): Any = { 318 | command match { 319 | case jointConfiguration: JointConfiguration ⇒ consensus.onJointConfigurationCommitted(index, jointConfiguration) 320 | case newConfiguration: NewConfiguration ⇒ consensus.onNewConfigurationCommitted(index, newConfiguration) 321 | case NoOp() ⇒ true 322 | case write: WriteCommand[_] ⇒ executeInStateMachine(index, write) 323 | } 324 | } 325 | 326 | def executeInStateMachine(index: Long, write: WriteCommand[_]): Any = { 327 | logger.debug("Executing write {}", write) 328 | commandExecutor.applyWrite(index, write) 329 | } 330 | 331 | def applyRead[T](read: ReadCommand[T]) = { 332 | val promise = Promise[T]() 333 | messageQueue.offer(ReadApplyMessage(promise, read)) 334 | promise.future 335 | } 336 | 337 | private def nextMessage = { 338 | if (messageQueue.isEmpty) { 339 | messageQueue.take() 340 | } else { 341 | messageQueue.poll 342 | } 343 | } 344 | 345 | def stats(): LogStats = LogStats(size(), commitIndex, lastEntry) 346 | 347 | trait Message { 348 | def apply() 349 | } 350 | 351 | case class WriteCommitMessage(index: Long) extends Message { 352 | def apply() = { 353 | if (lastApplied < index) { 354 | val logEntry = entry(index) 355 | if (isFromCurrentTerm(logEntry)) { 356 | applyUntil(logEntry.get) 357 | } 358 | } 359 | } 360 | } 361 | 362 | case class ReadApplyMessage[T](promise: Promise[T], read: ReadCommand[T]) extends Message { 363 | def apply() = promise.trySuccess(commandExecutor.applyRead(read)) 364 | } 365 | 366 | case class InstallSnapshotMessage(promise: Promise[Unit], snapshot: Snapshot) extends Message { 367 | def apply() = snapshotManager.installSnapshot(snapshot) 368 | } 369 | 370 | case class AppendMessage[T](append: Append[T]) extends Message { 371 | def apply() = { 372 | val logEntry = append.logEntry 373 | 374 | logger.debug(s"Appending $logEntry") 375 | 376 | if (!containsEntry(logEntry.index, logEntry.term)) { 377 | if (hasIndex(logEntry.index)) { 378 | //If an entry is overridden then all the subsequent entries must be removed 379 | logger.debug("Will discard inconsistent entries starting from index #{} to follow Leader's log", logEntry.index) 380 | log.discardEntriesFrom(logEntry.index) 381 | } 382 | log.append(logEntry).map { _ ⇒ 383 | onLogEntryAppended(append)(logEntry) 384 | } 385 | applyLogCompactionPolicy() 386 | } else { 387 | logger.debug("Discarding append of a duplicate entry {}", logEntry) 388 | } 389 | } 390 | } 391 | 392 | trait Append[T] { 393 | def promise: Promise[T] 394 | 395 | def logEntry: LogEntry 396 | 397 | def onComplete(logEntry: LogEntry) 398 | } 399 | 400 | case class LeaderAppend[T](term: Int, write: WriteCommand[T]) extends Append[(LogEntry, Promise[T])] { 401 | val _promise = Promise[(LogEntry, Promise[T])]() 402 | val _valuePromise = Promise[T]() 403 | 404 | def promise = _promise 405 | 406 | val logEntry = { 407 | val logEntry = LogEntry(term, nextLogIndex, write) 408 | applyPromises.put(logEntry.index, _valuePromise) 409 | logEntry 410 | } 411 | 412 | def onComplete(logEntry: LogEntry) = _promise.success((logEntry, _valuePromise)) 413 | } 414 | 415 | case class FollowerAppend(entry: LogEntry) extends Append[Long] { 416 | val _promise = Promise[Long]() 417 | 418 | def promise = _promise 419 | 420 | def logEntry = entry 421 | 422 | def onComplete(logEntry: LogEntry) = _promise.success(logEntry.index) 423 | } 424 | 425 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/Raft.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import ckite.rlog.Storage 4 | import ckite.rpc._ 5 | import ckite.statemachine.StateMachine 6 | import ckite.stats.{ ConsensusStats, Stats } 7 | import ckite.util.{ ConcurrencySupport, Logging } 8 | 9 | import scala.concurrent.Future 10 | 11 | class Raft(stateMachine: StateMachine, rpc: Rpc, storage: Storage, configuration: Configuration) extends RpcService with ConcurrencySupport with Logging { 12 | 13 | val consensus = Consensus(this, storage, configuration) 14 | val membership = Membership(LocalMember(this, configuration), rpc, configuration) 15 | val log = RLog(this, stateMachine, storage, configuration) 16 | 17 | def start() = { 18 | logger.info(s"Starting CKite ${membership.myId}...") 19 | initializeLog() 20 | if (configuration.bootstrap) { 21 | bootstrapStart() 22 | } else if (!isInitialized) { 23 | joinStart() 24 | } else { 25 | normalStart() 26 | } 27 | } 28 | 29 | def initializeLog() = log.initialize() 30 | 31 | def joinStart() = { 32 | logger.info("CKite not initialized. Join start") 33 | consensus.startAsJoiner() 34 | } 35 | 36 | private def normalStart() = { 37 | logger.info("CKite already initialized. Simple start") 38 | consensus.startAsFollower() 39 | } 40 | 41 | private def bootstrapStart() = { 42 | logger.info("Bootstrapping a new CKite consensus cluster...") 43 | 44 | membership.bootstrap() 45 | log.bootstrap() 46 | 47 | consensus.startAsBootstrapper() 48 | 49 | consensus.leaderAnnouncer.awaitLeader 50 | } 51 | 52 | private def isInitialized = membership.isInitialized 53 | 54 | def stop() = { 55 | logger.info(s"Stopping CKite ${membership.myId}...") 56 | consensus.stop() 57 | log.stop() 58 | } 59 | 60 | def onRequestVoteReceived(requestVote: RequestVote): Future[RequestVoteResponse] = { 61 | logger.debug("RequestVote received: {}", requestVote) 62 | consensus.onRequestVote(requestVote) 63 | } 64 | 65 | def onAppendEntriesReceived(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 66 | logger.trace(s"Received $appendEntries") 67 | consensus.onAppendEntries(appendEntries) 68 | } 69 | 70 | def onCommandReceived[T](command: Command): Future[T] = { 71 | logger.debug("Command received: {}", command) 72 | consensus.onCommand[T](command) 73 | } 74 | 75 | def onInstallSnapshotReceived(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 76 | logger.debug("InstallSnapshot received") 77 | consensus.onInstallSnapshot(installSnapshot) 78 | } 79 | 80 | def onLocalReadReceived[T](read: ReadCommand[T]) = { 81 | log.execute(read) 82 | } 83 | 84 | def onMemberJoinReceived(member: String): Future[JoinMemberResponse] = { 85 | logger.info(s"Join member $member request received") 86 | consensus.onMemberJoin(member) 87 | } 88 | 89 | def onMemberLeaveReceived(member: String): Future[Boolean] = { 90 | logger.info(s"Leave member $member request received") 91 | consensus.onMemberLeave(member) 92 | } 93 | 94 | def isLeader = { 95 | consensus.isLeader 96 | } 97 | 98 | def stats(): Stats = Stats(consensus.stats(), log.stats()) 99 | 100 | } 101 | 102 | object Raft { 103 | def apply(stateMachine: StateMachine, rpc: Rpc, storage: Storage, configuration: Configuration) = { 104 | new Raft(stateMachine, rpc, storage, configuration) 105 | } 106 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/RemoteMember.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.ConcurrentHashMap 4 | import java.util.concurrent.atomic.{ AtomicBoolean, AtomicLong } 5 | 6 | import ckite.rpc.LogEntry.Index 7 | import ckite.rpc._ 8 | 9 | import scala.concurrent.ExecutionContext.Implicits.global 10 | import scala.concurrent.Future 11 | 12 | class RemoteMember(rpc: Rpc, binding: String) extends Member(binding) { 13 | 14 | logger.debug(s"Creating RemoteMember client for $binding") 15 | 16 | val nextLogIndex = new AtomicLong(1) 17 | val matchIndex = new AtomicLong(0) 18 | 19 | private val client: RpcClient = rpc.createClient(id) 20 | 21 | private val replicationsEnabled = new AtomicBoolean(true) 22 | private val replicationsInProgress = new ConcurrentHashMap[Long, Boolean]() 23 | 24 | override def forwardCommand[T](command: Command): Future[T] = { 25 | logger.debug(s"Forward command ${command} to ${id}") 26 | client.send[T](command) 27 | } 28 | 29 | def sendAppendEntries(appendEntries: AppendEntries) = { 30 | client.send(appendEntries) 31 | } 32 | 33 | def sendRequestVote(requestVote: RequestVote) = { 34 | client.send(requestVote) 35 | } 36 | 37 | private def markAsReplicated(index: Index): Unit = replicationsInProgress.remove(index) 38 | 39 | def canReplicateIndex(index: Index): Boolean = isReplicationEnabled && !isBeingReplicated(index) 40 | 41 | private def isBeingReplicated(index: Long) = replicationsInProgress.put(index, true) 42 | 43 | private def isReplicationEnabled = replicationsEnabled.get() 44 | 45 | def acknowledgeIndex(index: Long) = { 46 | updateMatchIndex(index) 47 | updateNextLogIndex(index) 48 | markAsReplicated(index) 49 | } 50 | 51 | def markReplicationsNotInProgress(indexes: List[Long]) = { 52 | indexes.foreach(index ⇒ replicationsInProgress.remove(index)) 53 | } 54 | 55 | private def updateMatchIndex(index: Long) = { 56 | var currentMatchIndex = matchIndex.longValue() 57 | while (currentMatchIndex < index && !matchIndex.compareAndSet(currentMatchIndex, index)) { 58 | currentMatchIndex = matchIndex.longValue() 59 | } 60 | } 61 | 62 | private def updateNextLogIndex(index: Long) = nextLogIndex.set(index + 1) 63 | 64 | def decrementNextLogIndex() = { 65 | val currentIndex = nextLogIndex.decrementAndGet() 66 | if (currentIndex == 0) nextLogIndex.set(1) 67 | replicationsInProgress.remove(nextLogIndex.intValue()) 68 | } 69 | 70 | def sendInstallSnapshot(installSnapshot: InstallSnapshot) = { 71 | client.send(installSnapshot) 72 | } 73 | 74 | def setNextLogIndex(index: Long) = nextLogIndex.set(index) 75 | 76 | def resetMatchIndex = matchIndex.set(0) 77 | 78 | def enableReplications() = { 79 | val wasEnabled = replicationsEnabled.getAndSet(true) 80 | if (!wasEnabled) logger.debug(s"Enabling replications to $id") 81 | wasEnabled 82 | } 83 | 84 | def disableReplications() = { 85 | val wasEnabled = replicationsEnabled.getAndSet(false) 86 | if (wasEnabled) logger.debug(s"Disabling replications to $id") 87 | wasEnabled 88 | } 89 | 90 | def join(joiningMemberId: String): Future[JoinMemberResponse] = { 91 | logger.debug(s"Joining with $id") 92 | client.send(JoinMember(joiningMemberId)).recover { 93 | case reason: Throwable ⇒ 94 | logger.warn(s"Can't join to member $id", reason) 95 | JoinMemberResponse(false) 96 | } 97 | } 98 | 99 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/exception/LeaderTimeoutException.scala: -------------------------------------------------------------------------------- 1 | package ckite.exception 2 | 3 | import java.util.concurrent.TimeoutException 4 | 5 | /** 6 | * Raised when waiting for a Leader to be elected timed out 7 | */ 8 | case class LeaderTimeoutException(exception: TimeoutException) extends RuntimeException(exception) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/exception/LostLeadershipException.scala: -------------------------------------------------------------------------------- 1 | package ckite.exception 2 | 3 | /** 4 | * Raised when waiting for a Leader loses its leadership. 5 | * This can happen during reads on a Leader that gets partitioned from the rest of the cluster 6 | */ 7 | case class LostLeadershipException(reason: String) extends RuntimeException(reason) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/exception/WriteTimeoutException.scala: -------------------------------------------------------------------------------- 1 | package ckite.exception 2 | 3 | import ckite.rpc.LogEntry 4 | 5 | /** 6 | * Waiting for WriteCommand commit timed out 7 | */ 8 | case class WriteTimeoutException(logEntry: LogEntry) extends RuntimeException(s"$logEntry") -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/FixedSizeLogCompactionPolicy.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import ckite.statemachine.StateMachine 4 | import ckite.util.Logging 5 | 6 | class FixedSizeLogCompactionPolicy(fixedSize: Long) extends LogCompactionPolicy with Logging { 7 | 8 | def applies(persistentLog: Log, stateMachine: StateMachine) = { 9 | val size = persistentLog.size 10 | val applies = size >= fixedSize 11 | if (applies) { 12 | logger.info(s"Log size is ${size} and exceeds the maximum threshold of ${fixedSize}") 13 | } 14 | applies 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/Log.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import ckite.rpc.LogEntry 4 | import ckite.rpc.LogEntry.Index 5 | 6 | import scala.concurrent.Future 7 | 8 | trait Log { 9 | 10 | def append(entry: LogEntry): Future[Unit] 11 | 12 | def rollLog(upToIndex: Index): Unit 13 | 14 | def getEntry(index: Index): LogEntry //TODO: change it to Option[LogEntry] 15 | 16 | def getLastIndex: Long 17 | 18 | def discardEntriesFrom(index: Index): Unit 19 | 20 | def size: Long 21 | 22 | def close(): Unit 23 | 24 | } 25 | 26 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/LogAppender.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import java.util.concurrent.{ LinkedBlockingQueue, SynchronousQueue, ThreadPoolExecutor, TimeUnit } 4 | 5 | import ckite.RLog 6 | import ckite.rpc.{ ClusterConfigurationCommand, Command, LogEntry, WriteCommand } 7 | import ckite.util.CKiteConversions.fromFunctionToRunnable 8 | import ckite.util.{ CustomThreadFactory, Logging } 9 | 10 | import scala.collection.mutable.ArrayBuffer 11 | import scala.concurrent.{ ExecutionContext, Future, Promise } 12 | 13 | class LogAppender(rlog: RLog, localLog: Log) extends Logging { 14 | 15 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/LogCompactionPolicy.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import ckite.statemachine.StateMachine 4 | 5 | trait LogCompactionPolicy { 6 | 7 | def applies(persistentLog: Log, stateMachine: StateMachine): Boolean 8 | 9 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/Snapshot.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import ckite.ClusterConfiguration 6 | import ckite.rpc.LogEntry._ 7 | 8 | case class Snapshot(term: Term, index: Index, clusterConfiguration: ClusterConfiguration, stateMachineSerialized: ByteBuffer) 9 | 10 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/SnapshotManager.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import java.util.concurrent.atomic.{ AtomicBoolean, AtomicReference } 4 | import java.util.concurrent.{ Executors, SynchronousQueue, ThreadPoolExecutor, TimeUnit } 5 | 6 | import ckite.rpc.LogEntry.{ Index, Term } 7 | import ckite.rpc.{ CompactedEntry, LogEntry } 8 | import ckite.util.CKiteConversions.fromFunctionToRunnable 9 | import ckite.util.{ CustomThreadFactory, Logging } 10 | import ckite.{ Configuration, Membership, RLog } 11 | 12 | import scala.concurrent.{ ExecutionContext, Future } 13 | 14 | case class SnapshotManager(membership: Membership, rlog: RLog, storage: Storage, configuration: Configuration) extends Logging { 15 | 16 | val logCompactionPolicy = new FixedSizeLogCompactionPolicy(configuration.logCompactionThreshold) 17 | 18 | val stateMachine = rlog.stateMachine 19 | 20 | val latestSnapshotCoordinates = new AtomicReference[(Index, Term)]((0, 0)) 21 | 22 | def applyLogCompactionPolicy() = { 23 | if (logCompactionPolicy.applies(rlog.log, rlog.stateMachine)) { 24 | logger.debug(s"Log compaction is required") 25 | compact() 26 | } 27 | } 28 | 29 | private def compact() = { 30 | val snapshot = takeSnapshot() 31 | save(snapshot) 32 | //rolls the log up to the given logIndex 33 | rlog.rollLog(snapshot.index) 34 | updateLatestSnapshotCoordinates(snapshot) 35 | } 36 | 37 | private def updateLatestSnapshotCoordinates(snapshot: Snapshot) = { 38 | latestSnapshotCoordinates.set((snapshot.index, snapshot.term)) 39 | } 40 | 41 | private def save(snapshot: Snapshot) = { 42 | logger.debug(s"Saving Snapshot $snapshot") 43 | 44 | storage.saveSnapshot(snapshot) 45 | 46 | logger.debug(s"Finished saving Snapshot ${snapshot}") 47 | } 48 | 49 | private def takeSnapshot(): Snapshot = { 50 | val latestEntry = rlog.entry(rlog.lastApplied).get 51 | val clusterConfiguration = membership.clusterConfiguration 52 | val stateMachineSerialized = rlog.serializeStateMachine 53 | Snapshot(latestEntry.term, latestEntry.index, clusterConfiguration, stateMachineSerialized) 54 | } 55 | 56 | def installSnapshot(snapshot: Snapshot) = { 57 | logger.debug(s"Installing $snapshot") 58 | storage.saveSnapshot(snapshot) 59 | 60 | stateMachine.restoreSnapshot(snapshot.stateMachineSerialized) 61 | 62 | membership.transitionTo(snapshot.clusterConfiguration) 63 | 64 | logger.debug(s"Finished installing $snapshot") 65 | } 66 | 67 | def reload(snapshot: Snapshot) = { 68 | logger.info(s"Reloading $snapshot") 69 | stateMachine.restoreSnapshot(snapshot.stateMachineSerialized) 70 | membership.transitionTo(snapshot.clusterConfiguration) 71 | latestSnapshotCoordinates.set((snapshot.index, snapshot.term)) 72 | logger.info(s"Finished reloading $snapshot") 73 | } 74 | 75 | def latestSnapshot(): Option[Snapshot] = { 76 | storage.retrieveLatestSnapshot() 77 | } 78 | 79 | def latestSnapshotIndex = latestSnapshotCoordinates.get()._1 80 | 81 | def isInSnapshot(index: Index, term: Term): Boolean = { 82 | val coordinates = latestSnapshotCoordinates.get() 83 | coordinates._2 >= term && coordinates._1 >= index 84 | } 85 | 86 | def isInSnapshot(index: Index): Boolean = { 87 | val coordinates = latestSnapshotCoordinates.get() 88 | coordinates._1 >= index 89 | } 90 | 91 | def compactedEntry = { 92 | val coordinates = latestSnapshotCoordinates.get() 93 | LogEntry(coordinates._2, coordinates._1, CompactedEntry()) 94 | } 95 | 96 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rlog/Storage.scala: -------------------------------------------------------------------------------- 1 | package ckite.rlog 2 | 3 | import ckite.rpc.LogEntry.Term 4 | 5 | trait Storage { 6 | def log(): Log 7 | 8 | def saveVote(vote: Vote) 9 | 10 | def retrieveLatestVote(): Option[Vote] 11 | 12 | def saveSnapshot(snapshot: Snapshot) 13 | 14 | def retrieveLatestSnapshot(): Option[Snapshot] 15 | } 16 | 17 | case class Vote(term: Term, member: String) 18 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/AppendEntries.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class AppendEntries(term: Int, leaderId: String, commitIndex: Long, prevLogIndex: Long = -1, 4 | prevLogTerm: Int = -1, entries: List[LogEntry] = List()) { 5 | override def toString = s"AppendEntries(term=$term,leaderId=$leaderId,commitIndex=$commitIndex,prevLogIndex=$prevLogIndex,prevLogTerm=$prevLogTerm,entries=$entries)" 6 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/AppendEntriesResponse.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class AppendEntriesResponse(term: Int, success: Boolean) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/ClusterConfigurationCommand.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | trait ClusterConfigurationCommand extends WriteCommand[Boolean] 4 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/Command.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | trait Command extends Serializable -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/GetMembersRequest.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class GetMembersRequest() -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/GetMembersResponse.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class GetMembersResponse(success: Boolean, members: Seq[String]) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/InstallSnapshot.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import ckite.rlog.Snapshot 4 | import ckite.rpc.LogEntry.Term 5 | 6 | case class InstallSnapshot(term: Term, leaderId: String, snapshot: Snapshot) 7 | 8 | case class InstallSnapshotResponse(success: Boolean) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/JoinMember.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class JoinMember(memberId: String) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/JoinMemberResponse.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class JoinMemberResponse(success: Boolean) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/JointConfiguration.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import com.esotericsoftware.kryo.{ Kryo, KryoSerializable } 4 | import com.esotericsoftware.kryo.io.{ Input, Output } 5 | 6 | case class JointConfiguration(var oldMembers: Set[String], var newMembers: Set[String]) extends ClusterConfigurationCommand with KryoSerializable { 7 | def write(kryo: Kryo, output: Output) = { 8 | output.writeString(oldMembers.mkString(",")) 9 | output.writeString(newMembers.mkString(",")) 10 | } 11 | 12 | def read(kryo: Kryo, input: Input) = { 13 | oldMembers = input.readString().split(",").toSet 14 | newMembers = input.readString().split(",").toSet 15 | } 16 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/LogEntry.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import LogEntry._ 4 | 5 | case class LogEntry(term: Term, index: Index, command: Command) { 6 | override def toString = s"LogEntry(term=$term,index=$index,$command)" 7 | } 8 | 9 | object LogEntry { 10 | type Index = Long 11 | type Term = Int 12 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/NewConfiguration.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import com.esotericsoftware.kryo.{ Kryo, KryoSerializable } 4 | import com.esotericsoftware.kryo.io.{ Input, Output } 5 | 6 | case class NewConfiguration(var bindings: Set[String]) extends ClusterConfigurationCommand with KryoSerializable { 7 | def write(kryo: Kryo, output: Output) = { 8 | output.writeString(bindings.mkString(",")) 9 | } 10 | 11 | def read(kryo: Kryo, input: Input) = { 12 | bindings = input.readString().split(",").toSet 13 | } 14 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/NoOps.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class NoOp() extends WriteCommand[Unit] 4 | 5 | case object Void 6 | 7 | case class CompactedEntry() extends ReadCommand[Unit] -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/ReadCommand.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | trait ReadCommand[T] extends Command -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/RequestVote.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class RequestVote(memberId: String, term: Int, lastLogIndex: Long = -1, lastLogTerm: Int = -1) { 4 | override def toString(): String = s"RequestVote($memberId,term=$term,lastLogIndex=$lastLogIndex,lastLogTerm=$lastLogTerm)" 5 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/RequestVoteResponse.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | case class RequestVoteResponse(currentTerm: Int, granted: Boolean) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/Rpc.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import com.typesafe.config.Config 4 | 5 | trait Rpc { 6 | def createServer(service: RpcService, config: Config): RpcServer 7 | 8 | def createClient(address: String): RpcClient 9 | 10 | } 11 | 12 | trait RpcServer { 13 | 14 | def start(): Unit 15 | 16 | def stop(): Unit 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/RpcClient.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import scala.concurrent.Future 4 | 5 | trait RpcClient { 6 | 7 | def send(request: RequestVote): Future[RequestVoteResponse] 8 | 9 | def send(appendEntries: AppendEntries): Future[AppendEntriesResponse] 10 | 11 | def send(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] 12 | 13 | def send[T](command: Command): Future[T] 14 | 15 | def send(joinMember: JoinMember): Future[JoinMemberResponse] 16 | 17 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/RpcService.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import scala.concurrent.Future 4 | 5 | trait RpcService { 6 | 7 | def onRequestVoteReceived(requestVote: RequestVote): Future[RequestVoteResponse] 8 | def onAppendEntriesReceived(appendEntries: AppendEntries): Future[AppendEntriesResponse] 9 | def onCommandReceived[T](command: Command): Future[T] 10 | def onInstallSnapshotReceived(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] 11 | def onMemberJoinReceived(memberId: String): Future[JoinMemberResponse] 12 | 13 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/rpc/WriteCommand.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | trait WriteCommand[T] extends Command -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/statemachine/CommandExecutor.scala: -------------------------------------------------------------------------------- 1 | package ckite.statemachine 2 | 3 | import ckite.rpc.Command 4 | import ckite.rpc.WriteCommand 5 | import ckite.rpc.ReadCommand 6 | import ckite.util.Logging 7 | 8 | class CommandExecutor(stateMachine: StateMachine) extends Logging { 9 | 10 | val writeFunction = stateMachine.applyWrite 11 | val readFunction = stateMachine.applyRead 12 | 13 | def applyWrite[T](index: Long, write: WriteCommand[T]): T = { 14 | val params = (index, write) 15 | if (writeFunction.isDefinedAt(params)) writeFunction(params).asInstanceOf[T] 16 | else { 17 | logger.warn(s"No handler for ${write} is available in the StateMachine") 18 | throw new IllegalStateException(s"No handler for ${write}") 19 | } 20 | } 21 | 22 | def applyRead[T](read: ReadCommand[T]): T = { 23 | if (readFunction.isDefinedAt(read)) readFunction(read).asInstanceOf[T] 24 | else { 25 | logger.warn(s"No handler for ${read} is available in the StateMachine") 26 | throw new IllegalStateException(s"No handler for ${read}") 27 | } 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/statemachine/StateMachine.scala: -------------------------------------------------------------------------------- 1 | package ckite.statemachine 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import ckite.rpc.{ ReadCommand, WriteCommand } 6 | 7 | trait StateMachine { 8 | 9 | /** 10 | * Called when consensus has been reached on a WriteCommand. 11 | * Along with the WriteCommand an index is provided to allow 12 | * persistent StateMachines to save atomically both the WriteCommand's 13 | * updates and the index. 14 | * CKite will ask the lastAppliedIndex when deciding which WriteCommands can be replayed during startup. 15 | * 16 | * Memory consistency effects: Since all the operations on the StateMachine are done by 17 | * a single thread then every read, write or snapshot operation happens-before the subsequent 18 | * read, write or snapshot operation. 19 | */ 20 | def applyWrite: PartialFunction[(Long, WriteCommand[_]), Any] 21 | 22 | /** 23 | * The last applied index in the StateMachine. 24 | */ 25 | def getLastAppliedIndex: Long 26 | 27 | /** 28 | * Called when readonly commands are requested. 29 | */ 30 | def applyRead: PartialFunction[ReadCommand[_], Any] 31 | 32 | /** 33 | * Restore the StateMachine state from a Snapshot 34 | */ 35 | def restoreSnapshot(byteBuffer: ByteBuffer) 36 | 37 | /** 38 | * Captures the StateMachine state as a Snapshot 39 | */ 40 | def takeSnapshot(): ByteBuffer 41 | 42 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/statemachine/j/StateMachine.scala: -------------------------------------------------------------------------------- 1 | package ckite.statemachine.j 2 | 3 | import java.nio.ByteBuffer 4 | import ckite.rpc.Command 5 | import ckite.rpc.WriteCommand 6 | import ckite.rpc.ReadCommand 7 | 8 | trait StateMachine { 9 | 10 | def deserialize(byteBuffer: ByteBuffer) 11 | 12 | def serialize(): ByteBuffer 13 | 14 | def applyWrite(index: Long, write: WriteCommand[_]): Any 15 | 16 | def applyRead(read: ReadCommand[_]): Any 17 | 18 | def lastAppliedIndex: Long 19 | 20 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/statemachine/j/StateMachineWrapper.scala: -------------------------------------------------------------------------------- 1 | package ckite.statemachine.j 2 | 3 | import java.nio.ByteBuffer 4 | import ckite.rpc.Command 5 | import ckite.rpc.WriteCommand 6 | import ckite.rpc.ReadCommand 7 | 8 | class StateMachineWrapper(jstateMachine: StateMachine) extends ckite.statemachine.StateMachine { 9 | 10 | def restoreSnapshot(byteBuffer: ByteBuffer) = jstateMachine.deserialize(byteBuffer) 11 | 12 | def takeSnapshot(): ByteBuffer = jstateMachine.serialize 13 | 14 | def applyWrite: PartialFunction[(Long, WriteCommand[_]), Any] = { 15 | case (index, write) ⇒ jstateMachine.applyWrite(index, write) 16 | } 17 | 18 | def applyRead: PartialFunction[ReadCommand[_], Any] = { 19 | case read ⇒ jstateMachine.applyRead(read) 20 | } 21 | 22 | def getLastAppliedIndex: Long = jstateMachine.lastAppliedIndex 23 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Candidate.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | import java.util.concurrent.{ ConcurrentHashMap, SynchronousQueue, ThreadPoolExecutor, TimeUnit } 5 | 6 | import ckite._ 7 | import ckite.rpc.LogEntry.Term 8 | import ckite.rpc._ 9 | import ckite.util.CKiteConversions._ 10 | import ckite.util.CustomThreadFactory 11 | 12 | import scala.collection.JavaConverters._ 13 | import scala.concurrent.ExecutionContext.Implicits.global 14 | import scala.concurrent.duration._ 15 | import scala.concurrent.{ Await, Future, Promise } 16 | import scala.util.{ Failure, Success, Try } 17 | 18 | case class Candidate(consensus: Consensus, membership: Membership, log: RLog, term: Term, leaderAnnouncer: LeaderAnnouncer) extends State(Some(membership.myId)) { 19 | 20 | private val electionWorker = new ThreadPoolExecutor(1, 1, 21 | 60L, TimeUnit.SECONDS, 22 | new SynchronousQueue[Runnable](), 23 | CustomThreadFactory(s"CandidateElection-worker-${membership.myId}")) 24 | 25 | private val runningElection = new AtomicReference[java.util.concurrent.Future[_]]() 26 | private val votes = new ConcurrentHashMap[String, Boolean]() 27 | private val maxVotesExpected = membership.members.size 28 | 29 | override def begin() = { 30 | logger.debug(s"Start election") 31 | startElection() 32 | } 33 | 34 | private def startElection() = { 35 | val electionTask: Runnable = () ⇒ { 36 | election() 37 | } 38 | runningElection.set(electionWorker.submit(electionTask)) 39 | } 40 | 41 | private def election() = { 42 | val votes = collectVotes() 43 | logger.debug(s"Got ${votes.size} votes") 44 | if (membership.reachQuorum(votes)) wonElection() else lostElection() 45 | } 46 | 47 | def lostElection() { 48 | logger.info(s"Not enough votes to be a Leader") 49 | consensus.becomeFollower(term = term, vote = Some(membership.myId)) //voted for my self when Candidate 50 | } 51 | 52 | def wonElection() { 53 | logger.info(s"Won the election. Will become Leader...") 54 | consensus.becomeLeader(term) 55 | } 56 | 57 | private def collectVotes(): Set[String] = { 58 | if (!membership.hasRemoteMembers) return Set(membership.myId) 59 | val votesPromise = Promise[Set[String]]() 60 | 61 | voteForMyself() 62 | 63 | val lastLogEntry = log.lastEntry 64 | membership.remoteMembers.foreach { remoteMember ⇒ 65 | Future { 66 | (remoteMember.id, requestVote(remoteMember, lastLogEntry)) 67 | } onComplete { 68 | case Success((member, vote)) ⇒ { 69 | vote map { granted ⇒ 70 | votes.put(member, granted) 71 | val grantedVotes = votes.asScala.filter { tuple ⇒ tuple._2 }.keySet.toSet 72 | val rejectedVotes = votes.asScala.filterNot { tuple ⇒ tuple._2 }.keySet.toSet 73 | if (membership.reachQuorum(grantedVotes) || 74 | membership.reachSomeQuorum(rejectedVotes) || 75 | maxVotesExpected == votes.size()) 76 | votesPromise.trySuccess(grantedVotes) 77 | } 78 | } 79 | case Failure(reason) ⇒ { 80 | logger.error("failure collecting votes", reason) 81 | } 82 | } 83 | } 84 | Try { 85 | Await.result(votesPromise.future, consensus.configuration.collectVotesTimeout millis) //TODO: Refactor me 86 | } getOrElse { 87 | votes.asScala.filter { tuple ⇒ tuple._2 }.keySet.toSet 88 | } 89 | } 90 | 91 | private def requestVote(remoteMember: RemoteMember, lastLogEntry: Option[LogEntry]): Future[Boolean] = { 92 | logger.debug(s"Requesting vote to ${remoteMember.id}") 93 | remoteMember.sendRequestVote(lastLogEntry match { 94 | case None ⇒ RequestVote(membership.myId, term) 95 | case Some(entry) ⇒ RequestVote(membership.myId, term, entry.index, entry.term) 96 | }).map { voteResponse ⇒ 97 | logger.debug(s"Got $voteResponse from ${remoteMember.id}") 98 | voteResponse.granted && voteResponse.currentTerm == term 99 | } recover { 100 | case reason: Throwable ⇒ 101 | logger.debug(s"Error requesting vote: ${reason.getMessage()}") 102 | false 103 | } 104 | } 105 | 106 | private def voteForMyself() { 107 | //This is in conflict with the vote set upon Candidate creation 108 | if (membership.members.contains(membership.myId)) { 109 | votes.put(membership.myId, true) //Can Local vote??? 110 | } 111 | } 112 | 113 | private def abortElection() = { 114 | logger.debug("Abort Election") 115 | val future = runningElection.get() 116 | if (future != null) future.cancel(true) 117 | } 118 | 119 | override def canTransitionTo(newState: State) = { 120 | newState match { 121 | case leader: Leader ⇒ leader.term == term 122 | case follower: Follower ⇒ follower.term >= term //in case of split vote or being an old candidate 123 | case _ ⇒ newState.term > term 124 | } 125 | } 126 | 127 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 128 | appendEntries.term match { 129 | case leaderTerm if leaderTerm < term ⇒ rejectOldLeader(appendEntries) 130 | case leaderTerm if leaderTerm >= term ⇒ stepDownAndPropagate(appendEntries) 131 | } 132 | } 133 | 134 | override def stepDownAndPropagate(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 135 | logger.debug("Leader already elected in term[{}]", appendEntries.term) 136 | abortElection() 137 | super.stepDownAndPropagate(appendEntries) 138 | } 139 | 140 | override def stepDownAndPropagate(requestVote: RequestVote): Future[RequestVoteResponse] = { 141 | abortElection() 142 | super.stepDownAndPropagate(requestVote) 143 | } 144 | 145 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = { 146 | requestVote.term match { 147 | case candidateTerm if candidateTerm < term ⇒ rejectOldCandidate(requestVote.memberId) 148 | case candidateTerm if candidateTerm == term ⇒ rejectVote(requestVote.memberId, "contender candidate of the same term") 149 | case candidateTerm if candidateTerm > term ⇒ stepDownAndPropagate(requestVote) 150 | } 151 | } 152 | 153 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 154 | Future.successful(InstallSnapshotResponse(false)) 155 | } 156 | 157 | override def onCommand[T](command: Command): Future[T] = { 158 | leaderAnnouncer.onLeader(_.forwardCommand[T](command)) 159 | } 160 | 161 | override def toString = s"Candidate[$term]" 162 | 163 | } 164 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Follower.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import java.util.Random 4 | import java.util.concurrent.atomic.AtomicReference 5 | import java.util.concurrent.{ ScheduledFuture, TimeUnit } 6 | 7 | import ckite._ 8 | import ckite.rpc._ 9 | import ckite.util.CKiteConversions.fromFunctionToRunnable 10 | import ckite.util.{ ConcurrencySupport, Logging } 11 | 12 | import scala.concurrent.ExecutionContext.Implicits.global 13 | import scala.concurrent.Future 14 | import scala.util.Try 15 | 16 | case class Follower(consensus: Consensus, membership: Membership, log: RLog, term: Int, leaderAnnouncer: LeaderAnnouncer, vote: Option[String]) extends State(vote) with Logging { 17 | 18 | private val electionTimeout = new ElectionTimeout(consensus, term) 19 | 20 | override def begin() = { 21 | resetElectionTimeout() //start the election timeout if no communication from the Leader 22 | } 23 | 24 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 25 | appendEntries.term match { 26 | case leaderTerm if leaderTerm < term ⇒ rejectOldLeader(appendEntries) 27 | case leaderTerm if leaderTerm > term ⇒ stepDownAndPropagate(appendEntries) 28 | case leaderTerm if leaderTerm == term ⇒ receivedAppendEntriesFromLeader(appendEntries) 29 | } 30 | } 31 | 32 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = { 33 | requestVote.term match { 34 | case requestTerm if requestTerm < term ⇒ rejectOldCandidate(requestVote.memberId) 35 | case requestTerm if requestTerm > term ⇒ stepDownAndPropagate(requestVote) 36 | case requestTerm if requestTerm == term ⇒ analyzeRequestVote(requestVote) 37 | } 38 | } 39 | 40 | private def receivedAppendEntriesFromLeader(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 41 | Try { 42 | resetElectionTimeout() //Leader is alive. God save the Leader! 43 | announceLeader(appendEntries.leaderId) 44 | append(appendEntries) 45 | }.recover { 46 | case reason: Exception ⇒ rejectAppendEntries(appendEntries, reason.getMessage) 47 | }.get 48 | } 49 | 50 | private def analyzeRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = { 51 | val couldGrantVote = checkGrantVotePolicy(requestVote) 52 | if (couldGrantVote) { 53 | if (tryGrantVoteTo(requestVote.memberId)) { 54 | logger.debug(s"Granting vote to ${requestVote.memberId} in term[${term}]") 55 | resetElectionTimeout() 56 | consensus.persistState() 57 | grantVote() 58 | } else { 59 | rejectVote(requestVote.memberId, s"already voted for ${votedFor.get()}") 60 | } 61 | } else { 62 | rejectVote(requestVote.memberId, s"not granted vote policy") 63 | } 64 | } 65 | 66 | private def tryGrantVoteTo(member: String): Boolean = { 67 | votedFor.compareAndSet(None, Some(member)) || votedFor.get().equals(Some(member)) 68 | } 69 | 70 | override def onCommand[T](command: Command): Future[T] = leaderAnnouncer.onLeader(_.forwardCommand[T](command)) 71 | 72 | def stepDownAndPropagate(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 73 | stepDown(installSnapshot.term) 74 | consensus.onInstallSnapshot(installSnapshot) 75 | } 76 | 77 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 78 | installSnapshot.term match { 79 | case leaderTerm if leaderTerm < term ⇒ Future.successful(InstallSnapshotResponse(REJECTED)) 80 | case leaderTerm if leaderTerm > term ⇒ stepDownAndPropagate(installSnapshot) 81 | case leaderTerm if leaderTerm == term ⇒ log.installSnapshot(installSnapshot.snapshot).map(_ ⇒ InstallSnapshotResponse(ACCEPTED)) 82 | } 83 | } 84 | 85 | private def resetElectionTimeout() = electionTimeout restart 86 | 87 | private def append(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 88 | log.tryAppend(appendEntries) map { success ⇒ 89 | AppendEntriesResponse(term, success) 90 | } 91 | } 92 | 93 | private def announceLeader(leaderId: String) { 94 | if (leaderAnnouncer.announce(leaderId)) { 95 | logger.info("Following {} in term[{}]", leaderId, term) 96 | } 97 | } 98 | 99 | private def checkGrantVotePolicy(requestVote: RequestVote) = { 100 | (hastNotVotedYet() || hasVotedFor(requestVote.memberId)) && isMuchUpToDate(requestVote) 101 | } 102 | 103 | def hasVotedFor(member: String): Boolean = vote.get == member 104 | 105 | def hastNotVotedYet(): Boolean = !votedFor.get().isDefined 106 | 107 | private def isMuchUpToDate(requestVote: RequestVote) = { 108 | val lastLogEntry = log.lastEntry 109 | lastLogEntry.isEmpty || (requestVote.lastLogTerm >= lastLogEntry.get.term && requestVote.lastLogIndex >= lastLogEntry.get.index) 110 | } 111 | 112 | override def stop(stopTerm: Int) = { 113 | if (stopTerm > term) { 114 | electionTimeout stop 115 | } 116 | } 117 | 118 | override val toString = s"Follower[$term]" 119 | 120 | } 121 | 122 | class ElectionTimeout(consensus: Consensus, term: Int) extends Logging { 123 | 124 | import ckite.states.ElectionTimeout._ 125 | 126 | private val scheduledFuture = new AtomicReference[ScheduledFuture[_]]() 127 | 128 | def restart = { 129 | stop 130 | start 131 | } 132 | 133 | private def start = { 134 | val electionTimeout = randomTimeout 135 | logger.trace(s"New timeout is $electionTimeout ms") 136 | val task: Runnable = () ⇒ { 137 | logger.debug("Timeout reached! Time to elect a new leader") 138 | consensus.becomeCandidate(term + 1) 139 | } 140 | val future = electionTimeoutScheduler.schedule(task, electionTimeout, TimeUnit.MILLISECONDS) 141 | val previousFuture = scheduledFuture.getAndSet(future) 142 | cancel(previousFuture) 143 | } 144 | 145 | private def randomTimeout = { 146 | val conf = consensus.configuration 147 | val diff = conf.maxElectionTimeout - conf.minElectionTimeout 148 | conf.minElectionTimeout + random.nextInt(if (diff > 0) diff.toInt else 1) 149 | } 150 | 151 | def stop() = { 152 | val future = scheduledFuture.get() 153 | cancel(future) 154 | } 155 | 156 | private def cancel(future: java.util.concurrent.Future[_]) = if (future != null) future.cancel(false) 157 | 158 | } 159 | 160 | object ElectionTimeout extends ConcurrencySupport { 161 | private val random = new Random() 162 | private val electionTimeoutScheduler = scheduler(s"ElectionTimeout-worker") 163 | } 164 | 165 | trait NoElectionTimeout extends State { 166 | override def begin() = {} 167 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Joiner.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import ckite._ 4 | import ckite.rpc.LogEntry.Term 5 | 6 | object Joiner { 7 | def apply(consensus: Consensus, membership: Membership, log: RLog, term: Term, configuration: Configuration): Follower = { 8 | new Follower(consensus, membership, log, term, LeaderAnnouncer(membership, configuration), None) with NoElectionTimeout { 9 | override val toString = s"Joiner[$term]" 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Leader.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import java.lang.Boolean 4 | import java.util.concurrent.{ ConcurrentHashMap, TimeUnit } 5 | 6 | import ckite._ 7 | import ckite.exception.LostLeadershipException 8 | import ckite.rpc.LogEntry.{ Index, Term } 9 | import ckite.rpc._ 10 | import ckite.stats.{ LeaderInfo, FollowerInfo } 11 | import ckite.util.CKiteConversions.fromFunctionToRunnable 12 | import ckite.util.ConcurrencySupport 13 | 14 | import scala.collection.concurrent.TrieMap 15 | import scala.collection.JavaConverters._ 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import scala.concurrent.duration._ 18 | import scala.concurrent.{ Await, Future, Promise } 19 | import scala.util.{ Try, Failure, Success } 20 | 21 | case class Leader(consensus: Consensus, membership: Membership, log: RLog, term: Term, leaderAnnouncer: LeaderAnnouncer) extends State(Some(membership.myId)) with ConcurrencySupport { 22 | 23 | private val ReplicationTimeout = consensus.configuration.appendEntriesTimeout 24 | private val AppendEntriesTimeout = consensus.configuration.appendEntriesTimeout millis 25 | private val waitForLeaderTimeout = consensus.configuration.waitForLeaderTimeout millis 26 | private val scheduledHeartbeatsPool = scheduler("HeartbeatThread") 27 | private val followersStats = new ConcurrentHashMap[String, Long]() 28 | private val startTime = now() 29 | 30 | override def begin() = { 31 | if (term < consensus.term) { 32 | logger.debug(s"Can't be a Leader of term $term. Current term is ${consensus.term}") 33 | consensus.becomeFollower(consensus.term) 34 | } else { 35 | resetLastIndex() 36 | resetNextAndMatchIndexes() 37 | startBroadcasting() 38 | appendNoOp() andThen { 39 | case Success(_) ⇒ announceLeadership() 40 | case Failure(reason) ⇒ logger.error("Failed to commit noop command", reason) 41 | } 42 | } 43 | } 44 | 45 | def startBroadcasting() { 46 | logger.debug("Start broadcasting...") 47 | scheduledHeartbeatsPool.scheduleAtFixedRate(() ⇒ { 48 | broadcast() 49 | }, 0, consensus.configuration.heartbeatsInterval, TimeUnit.MILLISECONDS) 50 | } 51 | 52 | private def broadcast(): Set[(RemoteMember, Future[AppendEntriesResponse])] = { 53 | logger.trace(s"Leader[$term] broadcasting AppendEntries") 54 | membership.remoteMembers map { member ⇒ (member, sendAppendEntries(member)) } 55 | } 56 | 57 | private def sendAppendEntries(member: RemoteMember): Future[AppendEntriesResponse] = { 58 | val request = createAppendEntriesFor(member) 59 | if (!request.entries.isEmpty) { 60 | logger.trace("Sending {} entries to {}", request.entries.size, member.id) 61 | } 62 | member.sendAppendEntries(request).map { response ⇒ 63 | logger.trace(s"AppendEntries response ${response} from ${member.id}") 64 | if (response.term > term) { 65 | receivedHigherTerm(response.term) 66 | } else { 67 | receivedAppendEntriesResponse(member, request, response) 68 | } 69 | response 70 | }.andThen { 71 | case Failure(reason) ⇒ 72 | logger.trace("Error sending appendEntries {}", reason.getMessage()) 73 | if (!request.entries.isEmpty) { 74 | member.markReplicationsNotInProgress(request.entries.map(_.index)) 75 | } 76 | } 77 | } 78 | 79 | private def createAppendEntriesFor(member: RemoteMember) = toReplicateEntriesOf(member) match { 80 | case head :: list ⇒ replication(head, list) 81 | case Nil ⇒ heartbeat() 82 | } 83 | 84 | private def receivedHigherTerm(higherTerm: Int) = { 85 | val currentTerm = consensus.term 86 | if (higherTerm > currentTerm) { 87 | logger.debug("Detected a term {} higher than current term {}. Step down", higherTerm, currentTerm) 88 | stepDown(higherTerm) 89 | } 90 | } 91 | 92 | private def replication(head: LogEntry, tail: List[LogEntry]) = { 93 | val entries = head :: tail 94 | log.getPreviousLogEntry(head) match { 95 | case Some(previous) ⇒ normalReplication(previous, entries) 96 | case None ⇒ firstReplication(entries) 97 | } 98 | } 99 | 100 | private def normalReplication(previous: LogEntry, entries: List[LogEntry]) = { 101 | AppendEntries(term, membership.myId, log.commitIndex, previous.index, previous.term, entries) 102 | } 103 | 104 | private def firstReplication(entries: List[LogEntry]) = { 105 | AppendEntries(term, membership.myId, log.commitIndex, entries = entries) 106 | } 107 | 108 | private def heartbeat() = AppendEntries(term, membership.myId, log.commitIndex) 109 | 110 | private def toReplicateEntriesOf(member: RemoteMember): List[LogEntry] = { 111 | val index = member.nextLogIndex.longValue() 112 | val entries = for ( 113 | entry ← log.entry(index) if (member.canReplicateIndex(index)) 114 | ) yield entry 115 | List(entries).flatten 116 | } 117 | 118 | def stopBroadcasting() = { 119 | logger.debug("Stop broadcasting") 120 | scheduledHeartbeatsPool.shutdownNow() 121 | } 122 | 123 | private def announceLeadership() = { 124 | logger.info(s"Start being $this") 125 | leaderAnnouncer.announce(membership.myId) 126 | } 127 | 128 | private def appendNoOp() = { 129 | if (log.isEmpty) { 130 | logger.info("Log is empty. First Leader. Appending initial cluster configuration") 131 | onCommand[Boolean](NewConfiguration(Set(membership.myId))) //the initial configuration must go through the log 132 | } else { 133 | logger.debug("Append a NoOp as part of Leader initialization") 134 | onCommand[Unit](NoOp()) 135 | } 136 | } 137 | 138 | private def resetLastIndex() = log.resetLastIndex() 139 | 140 | private def resetNextAndMatchIndexes() = { 141 | val nextIndex = log.lastIndex + 1 142 | membership.remoteMembers.foreach { member ⇒ member.setNextLogIndex(nextIndex); member.resetMatchIndex } 143 | } 144 | 145 | override def stop(stopTerm: Int) = { 146 | if (stopTerm > term) { 147 | stopBroadcasting() 148 | logger.debug("Stop being Leader") 149 | } 150 | } 151 | 152 | override def onCommand[T](command: Command): Future[T] = { 153 | command match { 154 | case write: WriteCommand[T] ⇒ onWriteCommand[T](write) 155 | case read: ReadCommand[T] ⇒ onReadCommand[T](read) 156 | } 157 | } 158 | 159 | private def onWriteCommand[T](write: WriteCommand[T]): Future[T] = { 160 | log.append[T](term, write) flatMap { tuple ⇒ 161 | val logEntry = tuple._1 162 | val valuePromise = tuple._2 163 | broadcast(logEntry) 164 | valuePromise.future 165 | } 166 | } 167 | 168 | private def broadcast(logEntry: LogEntry): Unit = { 169 | if (membership.hasRemoteMembers) { 170 | broadcast() 171 | } else { 172 | logger.debug("No member to broadcast") 173 | log.commit(logEntry.index) 174 | } 175 | } 176 | 177 | private def onReadCommand[T](command: ReadCommand[T]): Future[T] = onStillBeingLeader.flatMap(_ ⇒ log.execute(command)) 178 | 179 | private def onStillBeingLeader = { 180 | logger.trace(s"$this checking if still being Leader...") 181 | if (membership.hasRemoteMembers) { 182 | val promise = Promise[Unit]() 183 | val term = consensus.term() 184 | 185 | val membersAck = TrieMap[String, Unit](membership.myId -> Unit) 186 | val memberFailures = TrieMap[String, Unit]() 187 | 188 | broadcast().foreach { 189 | case (member, futureResponse) ⇒ 190 | futureResponse.andThen { 191 | case Success(response) ⇒ { 192 | if (consensus.term != term) lostLeadership(promise, "New term received") 193 | else { 194 | membersAck.put(member.id(), Unit) 195 | if (membership.reachQuorum(membersAck.keys.toSet)) { 196 | logger.debug(s"$this ${membership.myId} is still the current Leader") 197 | promise.trySuccess(()) 198 | } 199 | } 200 | } 201 | case Failure(reason) ⇒ { 202 | memberFailures.put(member.id(), Unit) 203 | if (membership.reachSomeQuorum(memberFailures.keys.toSet)) { 204 | lostLeadership(promise, "Failed to reach quorum of members") 205 | } 206 | } 207 | } 208 | } 209 | promise.future 210 | } else Future.successful(()) 211 | } 212 | 213 | private def lostLeadership(promise: Promise[Unit], reason: String) = { 214 | logger.debug(s"$this ${membership.myId} lost leadership. Reason: $reason") 215 | promise.tryFailure(LostLeadershipException(reason)) 216 | } 217 | 218 | override def isLeader() = { 219 | Try { 220 | Await.result(onStillBeingLeader, waitForLeaderTimeout) 221 | true 222 | }.getOrElse(false) 223 | } 224 | 225 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 226 | if (appendEntries.term < term) { 227 | rejectOldLeader(appendEntries) 228 | } else { 229 | stepDownAndPropagate(appendEntries) 230 | } 231 | } 232 | 233 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = { 234 | if (requestVote.term <= term) { 235 | rejectVote(requestVote.memberId, s"being Leader in term $term") 236 | } else { 237 | stepDownAndPropagate(requestVote) 238 | } 239 | } 240 | 241 | override def onJointConfigurationCommitted(jointConfiguration: JointConfiguration) = { 242 | logger.debug(s"JointConfiguration is committed... will use and broadcast a NewConfiguration") 243 | onCommand[Boolean](NewConfiguration(jointConfiguration.newMembers)) 244 | } 245 | 246 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = { 247 | Future.successful(InstallSnapshotResponse(false)) 248 | } 249 | 250 | private def receivedAppendEntriesResponse(member: RemoteMember, request: AppendEntries, response: AppendEntriesResponse) = { 251 | followersStats.put(member.id(), now()) 252 | if (!request.entries.isEmpty) { 253 | updateNextLogIndex(member, request, response) 254 | } 255 | val nextIndex = member.nextLogIndex.intValue() 256 | if (isLogEntryInSnapshot(nextIndex)) { 257 | val wasEnabled = member.disableReplications() 258 | if (wasEnabled) { 259 | logger.debug(s"Next LogIndex #$nextIndex to be sent to ${member} is contained in a Snapshot. An InstallSnapshot will be sent.") 260 | sendInstallSnapshot(member) 261 | } 262 | } 263 | } 264 | 265 | private def updateNextLogIndex(member: RemoteMember, appendEntries: AppendEntries, appendEntriesResponse: AppendEntriesResponse) = { 266 | val lastIndexSent = appendEntries.entries.last.index 267 | if (appendEntriesResponse.success) { 268 | member.acknowledgeIndex(lastIndexSent) 269 | logger.debug(s"Member ${member} ack - index sent #$lastIndexSent - next index #${member.nextLogIndex}") 270 | tryToCommitEntries(lastIndexSent) 271 | } else { 272 | member.decrementNextLogIndex() 273 | if (!appendEntries.entries.isEmpty) { 274 | member.markReplicationsNotInProgress(appendEntries.entries.map(_.index)) 275 | } 276 | logger.debug(s"Member ${member} reject - index sent #$lastIndexSent - next index is #${member.nextLogIndex}") 277 | } 278 | } 279 | 280 | private def tryToCommitEntries(lastEntrySent: Long) = { 281 | val currentCommitIndex = log.commitIndex 282 | (currentCommitIndex + 1) to lastEntrySent foreach { index ⇒ 283 | if (reachQuorum(index)) { 284 | log.commit(index) 285 | } 286 | } 287 | } 288 | 289 | private def reachQuorum(index: Index) = membership.reachQuorum(membersHavingAtLeast(index) + membership.myId) 290 | 291 | private def membersHavingAtLeast(index: Long): Set[String] = { 292 | membership.remoteMembers.filter { remoteMember ⇒ remoteMember.matchIndex.longValue() >= index } map { 293 | _.id 294 | } 295 | } 296 | 297 | private def isLogEntryInSnapshot(logIndex: Int): Boolean = { 298 | log.isInSnapshot(logIndex) 299 | } 300 | 301 | def sendInstallSnapshot(member: RemoteMember) = { 302 | log.latestSnapshot map { snapshot ⇒ 303 | val installSnapshot = InstallSnapshot(term, membership.myId, snapshot) 304 | logger.debug(s"Sending $installSnapshot to ${member}") 305 | member.sendInstallSnapshot(installSnapshot).map { response ⇒ 306 | if (response.success) { 307 | logger.debug("Successful InstallSnapshot") 308 | member.acknowledgeIndex(snapshot.index) 309 | tryToCommitEntries(snapshot.index) 310 | } else { 311 | logger.debug("Failed InstallSnapshot") 312 | } 313 | member.enableReplications() 314 | } 315 | 316 | } 317 | } 318 | 319 | override def stats() = { 320 | val currentTime = now() 321 | val followers = followersStats.asScala.map { 322 | tuple ⇒ 323 | val member = membership.get(tuple._1).get 324 | (tuple._1, FollowerInfo(lastAck(tuple._2, currentTime), member.matchIndex.intValue(), member.nextLogIndex.intValue())) 325 | } 326 | LeaderInfo(leaderUptime.toString, followers.toMap) 327 | } 328 | 329 | private def leaderUptime = (now() - startTime millis).toCoarsest 330 | 331 | private def lastAck(ackTime: Long, now: Long) = if (ackTime > 0) (now - ackTime millis).toString else "Never" 332 | 333 | private def now(): Long = System.currentTimeMillis() 334 | 335 | override def toString = s"Leader[$term]" 336 | 337 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Starter.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import ckite.rpc.LogEntry.Term 4 | import ckite.rpc._ 5 | import ckite.{ Consensus, LeaderAnnouncer, Membership } 6 | 7 | import scala.concurrent.Future 8 | 9 | case object Starter extends State { 10 | 11 | override def begin() = {} 12 | 13 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = rejectAppendEntries(appendEntries, "not ready yet") 14 | 15 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = rejectVote(requestVote.memberId, "not ready yet") 16 | 17 | override def stepDown(term: Term) = {} 18 | 19 | override val term: Int = -1 20 | 21 | override protected def membership: Membership = throw new UnsupportedOperationException() 22 | 23 | override protected def consensus: Consensus = throw new UnsupportedOperationException() 24 | 25 | override def leaderAnnouncer: LeaderAnnouncer = throw new UnsupportedOperationException() 26 | 27 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = rejectInstallSnapshot() 28 | 29 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/State.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | 5 | import ckite._ 6 | import ckite.rpc.LogEntry.Term 7 | import ckite.rpc._ 8 | import ckite.stats.{ NonLeaderInfo, StateInfo } 9 | import ckite.util.Logging 10 | 11 | import scala.concurrent.Future 12 | 13 | abstract class State(vote: Option[String] = None) extends Logging { 14 | 15 | val votedFor = new AtomicReference[Option[String]](vote) 16 | 17 | protected val GRANTED, ACCEPTED = true 18 | protected val REJECTED = false 19 | 20 | def leaderAnnouncer: LeaderAnnouncer 21 | 22 | def term: Term 23 | 24 | protected def membership: Membership 25 | 26 | protected def consensus: Consensus 27 | 28 | def begin() = { 29 | } 30 | 31 | def stop(term: Term) = { 32 | } 33 | 34 | def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] 35 | 36 | def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] 37 | 38 | def onCommand[T](command: Command): Future[T] = throw new UnsupportedOperationException() 39 | 40 | def onJointConfigurationCommitted(jointConfiguration: JointConfiguration) = {} 41 | 42 | def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] 43 | 44 | def canTransitionTo(newState: State): Boolean = newState.term > term 45 | 46 | protected def stepDown(term: Term): Unit = { 47 | logger.debug(s"${membership.myId} Step down from being $this") 48 | consensus.becomeFollower(term = term, leaderAnnouncer = leaderAnnouncer.onStepDown) 49 | } 50 | 51 | protected def stepDownAndPropagate(appendEntries: AppendEntries): Future[AppendEntriesResponse] = { 52 | stepDown(appendEntries.term) 53 | consensus.onAppendEntries(appendEntries) 54 | } 55 | 56 | protected def stepDownAndPropagate(requestVote: RequestVote): Future[RequestVoteResponse] = { 57 | stepDown(requestVote.term) 58 | consensus.onRequestVote(requestVote) 59 | } 60 | 61 | protected def rejectVote(candidateRejected: String, reason: String): Future[RequestVoteResponse] = { 62 | logger.debug(s"Rejecting vote to $candidateRejected due to $reason") 63 | Future.successful(RequestVoteResponse(term, REJECTED)) 64 | } 65 | 66 | protected def rejectOldCandidate(candidateRejected: String) = { 67 | rejectVote(candidateRejected, "old Candidate term") 68 | } 69 | 70 | protected def rejectOldLeader(appendEntries: AppendEntries) = { 71 | rejectAppendEntries(appendEntries, "old Leader term") 72 | } 73 | 74 | protected def grantVote() = { 75 | Future.successful(RequestVoteResponse(term, GRANTED)) 76 | } 77 | 78 | protected def rejectAppendEntries(appendEntries: AppendEntries, reason: String): Future[AppendEntriesResponse] = { 79 | logger.debug(s"Rejecting $AppendEntries due to $reason") 80 | Future.successful(AppendEntriesResponse(term, REJECTED)) 81 | } 82 | 83 | protected def rejectInstallSnapshot() = Future.successful(InstallSnapshotResponse(REJECTED)) 84 | 85 | def isLeader = { 86 | leaderAnnouncer.awaitLeader.id().equals(membership.myId) 87 | } 88 | 89 | def stats(): StateInfo = NonLeaderInfo(if (leaderAnnouncer.isLeaderAnnounced) Some(leaderAnnouncer.awaitLeader.id()) else None) 90 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/states/Stopped.scala: -------------------------------------------------------------------------------- 1 | package ckite.states 2 | 3 | import ckite.rpc.LogEntry._ 4 | import ckite.rpc._ 5 | import ckite.{ Consensus, LeaderAnnouncer, Membership } 6 | 7 | import scala.concurrent.Future 8 | 9 | case object Stopped extends State { 10 | 11 | override def begin() = {} 12 | 13 | override def onAppendEntries(appendEntries: AppendEntries): Future[AppendEntriesResponse] = Future.successful(AppendEntriesResponse(appendEntries.term, success = false)) 14 | 15 | override def onRequestVote(requestVote: RequestVote): Future[RequestVoteResponse] = Future.successful(RequestVoteResponse(requestVote.term, granted = false)) 16 | 17 | override def canTransitionTo(state: State): Boolean = false 18 | 19 | override def stepDown(term: Term) = {} 20 | 21 | override val term: Int = Int.MaxValue 22 | 23 | override def isLeader = false 24 | 25 | override def leaderAnnouncer: LeaderAnnouncer = throw new UnsupportedOperationException() 26 | 27 | override protected def membership: Membership = throw new UnsupportedOperationException() 28 | 29 | override protected def consensus: Consensus = throw new UnsupportedOperationException() 30 | 31 | override def onInstallSnapshot(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = rejectInstallSnapshot() 32 | 33 | } 34 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/stats/StateInfo.scala: -------------------------------------------------------------------------------- 1 | package ckite.stats 2 | 3 | class StateInfo 4 | 5 | case class LeaderInfo(leaderUptime: String, followers: Map[String, FollowerInfo]) extends StateInfo 6 | 7 | case class NonLeaderInfo(following: Option[String]) extends StateInfo 8 | 9 | case class FollowerInfo(lastHeartbeatACK: String, matchIndex: Int, nextIndex: Int) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/stats/Stats.scala: -------------------------------------------------------------------------------- 1 | package ckite.stats 2 | 3 | import ckite.rpc.LogEntry 4 | 5 | case class Stats(consensus: ConsensusStats, log: LogStats) 6 | 7 | case class ConsensusStats(term: Int, state: String, stateInfo: StateInfo) 8 | 9 | case class LogStats(length: Long, commitIndex: Long, lastEntry: Option[LogEntry]) -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/storage/MemoryStorage.scala: -------------------------------------------------------------------------------- 1 | package ckite.storage 2 | 3 | import java.util.concurrent.atomic.AtomicReference 4 | 5 | import ckite.rlog.{ Log, Snapshot, Storage, Vote } 6 | import ckite.rpc.LogEntry 7 | import ckite.util.Logging 8 | 9 | import scala.annotation.tailrec 10 | import scala.collection.concurrent.TrieMap 11 | import scala.concurrent.Future 12 | 13 | case class MemoryStorage() extends Storage { 14 | 15 | private val volatileLog = new MemoryLog() 16 | private val latestSnapshot = new AtomicReference[Option[Snapshot]](None) 17 | private val latestVote = new AtomicReference[Option[Vote]](None) 18 | 19 | override def log(): Log = volatileLog 20 | 21 | override def retrieveLatestSnapshot(): Option[Snapshot] = latestSnapshot.get() 22 | 23 | override def saveVote(vote: Vote): Unit = latestVote.set(Some(vote)) 24 | 25 | override def saveSnapshot(snapshot: Snapshot): Unit = latestSnapshot.set(Some(snapshot)) 26 | 27 | override def retrieveLatestVote(): Option[Vote] = latestVote.get() 28 | } 29 | 30 | class MemoryLog extends Log with Logging { 31 | 32 | val map = TrieMap[Long, LogEntry]() 33 | 34 | override def append(entry: LogEntry): Future[Unit] = { 35 | map.put(entry.index, entry) 36 | Future.successful(()) 37 | } 38 | 39 | override def rollLog(upToIndex: Long): Unit = { 40 | (1L to upToIndex) foreach { index ⇒ 41 | logger.info(s"Removing entry #${index}") 42 | map.remove(index) 43 | } 44 | } 45 | 46 | override def size(): Long = map.size 47 | 48 | override def getEntry(index: Long): LogEntry = map.get(index).getOrElse(null) 49 | 50 | override def discardEntriesFrom(index: Long): Unit = { 51 | discardEntriesFromRecursive(index) 52 | } 53 | 54 | @tailrec 55 | private def discardEntriesFromRecursive(index: Long): Unit = { 56 | if (map.remove(index) != null) discardEntriesFromRecursive(index + 1) 57 | } 58 | 59 | override def close(): Unit = {} 60 | 61 | override def getLastIndex(): Long = { 62 | if (size() > 0) map.keys.toSeq.sorted.last else 0 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/ConcurrencySupport.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import java.util.concurrent.{ Executors, ScheduledExecutorService } 4 | 5 | trait ConcurrencySupport { 6 | 7 | def scheduler(name: String): ScheduledExecutorService = { 8 | Executors.newScheduledThreadPool(1, CustomThreadFactory(name)) 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/Conversions.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import java.util.concurrent.Callable 4 | 5 | object CKiteConversions { 6 | 7 | implicit def fromFunctionToRunnable(f: () ⇒ Any): Runnable = new Runnable() { 8 | override def run() = { 9 | f() 10 | } 11 | } 12 | 13 | implicit def fromFunctionToCallable[V](f: () ⇒ V): Callable[V] = new Callable[V]() { 14 | override def call() = { 15 | f() 16 | } 17 | } 18 | 19 | def task(taskName: String)(block: () ⇒ Any): Runnable = { 20 | new Runnable() { 21 | override def run() = { 22 | val currentThreadName = Thread.currentThread().getName 23 | Thread.currentThread().setName(s"$currentThreadName-$taskName") 24 | try { 25 | block() 26 | } finally { 27 | Thread.currentThread().setName(currentThreadName) 28 | } 29 | } 30 | } 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/CustomThreadFactory.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import java.util.concurrent.ThreadFactory 4 | import java.util.concurrent.atomic.AtomicInteger 5 | 6 | case class CustomThreadFactory(name: String, makeDaemons: Boolean = true) extends ThreadFactory { 7 | def this(name: String) = this(name, false) 8 | 9 | val group = new ThreadGroup(Thread.currentThread().getThreadGroup(), name) 10 | val threadNumber = new AtomicInteger(1) 11 | 12 | def newThread(r: Runnable) = { 13 | val thread = new Thread(group, r, name + "-" + threadNumber.getAndIncrement()) 14 | thread.setDaemon(makeDaemons) 15 | if (thread.getPriority != Thread.NORM_PRIORITY) { 16 | thread.setPriority(Thread.NORM_PRIORITY) 17 | } 18 | thread 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/LockSupport.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import java.util.concurrent.locks.ReentrantReadWriteLock 4 | 5 | trait LockSupport { 6 | 7 | private val lock = new ReentrantReadWriteLock() 8 | private val exclusiveLock = lock.writeLock() 9 | private val sharedLock = lock.readLock() 10 | 11 | def shared[T](block: ⇒ T): T = { 12 | sharedLock.lock() 13 | try { 14 | block 15 | } finally { 16 | sharedLock.unlock() 17 | } 18 | } 19 | 20 | def exclusive[T](block: ⇒ T): T = { 21 | exclusiveLock.lock() 22 | try { 23 | block 24 | } finally { 25 | exclusiveLock.unlock() 26 | } 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/Logging.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import org.slf4j.LoggerFactory 4 | 5 | trait Logging { 6 | 7 | val logger = LoggerFactory.getLogger(this.getClass()) 8 | 9 | def loggingErrors[T](f: ⇒ T) = { 10 | try { 11 | f 12 | } catch { 13 | case e: Exception ⇒ logger.error("Error", e); throw e 14 | } 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /ckite-core/src/main/scala/ckite/util/Serializer.scala: -------------------------------------------------------------------------------- 1 | package ckite.util 2 | 3 | import com.twitter.chill.ScalaKryoInstantiator 4 | 5 | object Serializer { 6 | def serialize[T](anObject: T): Array[Byte] = KryoSerializer.serialize(anObject) 7 | def deserialize[T](bytes: Array[Byte]): T = KryoSerializer.deserialize(bytes) 8 | } 9 | 10 | object KryoSerializer { 11 | private val kryoPool = ScalaKryoInstantiator.defaultPool 12 | 13 | def serialize[T](anObject: T): Array[Byte] = kryoPool.toBytesWithClass(anObject) 14 | def deserialize[T](bytes: Array[Byte]): T = kryoPool.fromBytes(bytes).asInstanceOf[T] 15 | } -------------------------------------------------------------------------------- /ckite-core/src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | %d{HH:mm:ss.SSS} %-5level [%thread] %logger{36} - %msg%n 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/CKiteIntegrationTest.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.util.concurrent.TimeoutException 4 | 5 | import ckite.example.{ Get, KVStore, Put } 6 | import ckite.exception.LostLeadershipException 7 | import ckite.storage.MemoryStorage 8 | import ckite.util.Logging 9 | import org.scalatest._ 10 | 11 | import scala.concurrent.{ Await, Future } 12 | import scala.concurrent.duration._ 13 | 14 | class CKiteIntegrationTest extends FlatSpec with Matchers with Logging { 15 | 16 | val Key1 = "key1" 17 | val Value1 = "value1" 18 | val Value2 = "value2" 19 | 20 | val BOOTSTRAP = true 21 | 22 | val Member1Address = "localhost:9091" 23 | val Member2Address = "localhost:9092" 24 | val Member3Address = "localhost:9093" 25 | val Member4Address = "localhost:9094" 26 | 27 | "A single member cluster" should "elect a Leader" in { 28 | val ckite = CKiteBuilder().listenAddress(Member1Address) 29 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).storage(MemoryStorage()).rpc(TestRpc).build.asInstanceOf[CKiteClient] 30 | ckite start 31 | 32 | ckite.isLeader should be 33 | 34 | ckite stop 35 | } 36 | 37 | it should "read committed writes" in { 38 | val ckite = CKiteBuilder().listenAddress(Member1Address) 39 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build 40 | ckite start 41 | 42 | await(ckite.write(Put(Key1, Value1))) 43 | 44 | val readValue = await(ckite.read(Get(Key1))) 45 | 46 | readValue should be(Value1) 47 | 48 | ckite stop 49 | } 50 | 51 | it should "compact a log & reload snapshot" in { 52 | val ckite = CKiteBuilder().listenAddress(Member1Address) 53 | .compactionThreshold(5 + 1) //5 writes + 1 NoOp 54 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build 55 | ckite start 56 | 57 | await(ckite.write(Put("key1", "value1"))) 58 | await(ckite.write(Put("key2", "value2"))) 59 | await(ckite.write(Put("key3", "value3"))) 60 | await(ckite.write(Put("key4", "value4"))) 61 | await(ckite.write(Put("key5", "value5"))) 62 | 63 | //log should be compacted at this point 64 | 65 | await(ckite.write(Put("key6", "value6"))) 66 | 67 | waitSomeTimeForElection 68 | 69 | ckite stop 70 | 71 | val ckiteRestarted = restart(ckite) 72 | 73 | await(ckiteRestarted.read(Get("key1"))) should be("value1") 74 | await(ckiteRestarted.read(Get("key2"))) should be("value2") 75 | await(ckiteRestarted.read(Get("key3"))) should be("value3") 76 | await(ckiteRestarted.read(Get("key4"))) should be("value4") 77 | await(ckiteRestarted.read(Get("key5"))) should be("value5") 78 | 79 | ckiteRestarted.stop 80 | } 81 | 82 | it should "restore latest cluster configuration from Log" in { 83 | val ckite = CKiteBuilder().listenAddress(Member1Address) 84 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build 85 | ckite start 86 | 87 | //It is expected to timeout since Member2 is not up and the configuration must to committed under the new configuration (member1 and member2) 88 | //TODO: What if two subsequent JointConfiguration ??? 89 | intercept[TimeoutException] { 90 | await(ckite.addMember(Member2Address)) 91 | } 92 | 93 | ckite stop 94 | 95 | val ckiteRestarted = restart(ckite) 96 | 97 | val members = ckiteRestarted.members 98 | 99 | members should contain(Member2Address) 100 | 101 | ckiteRestarted.stop 102 | } 103 | 104 | it should "restore latest cluster configuration from Snapshot" in { 105 | val ckite = CKiteBuilder().listenAddress(Member1Address) 106 | .compactionThreshold(2 + 1) //1 writes + 1 NoOp 107 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build 108 | ckite start 109 | 110 | //It is expected to timeout since 9092 is not up and the configuration need to committed under the new configuration (9091 and 9092) 111 | //TODO: What if two subsequent EnterJointConsensus ??? 112 | intercept[TimeoutException] { 113 | await(ckite.addMember(Member2Address)) 114 | } 115 | 116 | //This will force the Snapshot. Again, it is expected to timeout. 117 | intercept[TimeoutException] { 118 | await(ckite.write(Put(Key1, Value1))) 119 | } 120 | 121 | waitSomeTimeForAppendEntries 122 | 123 | ckite.stop 124 | 125 | val ckiteRestarted = restart(ckite) 126 | 127 | val members = ckiteRestarted.members 128 | 129 | members should contain(Member2Address) 130 | 131 | ckiteRestarted.stop 132 | } 133 | 134 | "A 3 member cluster" should "elect a single Leader" in withStartedThreeMemberCluster { members ⇒ 135 | val leader = members leader 136 | val followers = members followers 137 | 138 | leader should not be null 139 | followers.length should be(2) 140 | } 141 | 142 | it should "failover Leader" in withStartedThreeMemberCluster { members ⇒ 143 | val originalLeader = members leader 144 | val followers = members followers 145 | 146 | originalLeader stop 147 | 148 | waitSomeTimeForElection 149 | 150 | //a leader must be elected from the followers 151 | val newLeader = followers leader 152 | 153 | newLeader should not be null 154 | newLeader should not be originalLeader 155 | } 156 | 157 | it should "read committed writes" in withStartedThreeMemberCluster { members ⇒ 158 | 159 | val leader = members leader 160 | 161 | await(leader.write(Put(Key1, Value1))) 162 | 163 | members foreach { member ⇒ 164 | await(member.read(Get(Key1))) should be(Value1) 165 | } 166 | 167 | } 168 | 169 | it should "forward writes to the Leader" in withStartedThreeMemberCluster { members ⇒ 170 | 171 | val someFollower = (members followers) head 172 | 173 | //this write is forwarded to the Leader 174 | await(someFollower.write(Put(Key1, Value1))) 175 | 176 | members foreach { member ⇒ 177 | await(member.read(Get(Key1))) should be(Value1) 178 | } 179 | } 180 | 181 | it should "maintain quorum when 1 member goes down" in withStartedThreeMemberCluster { members ⇒ 182 | 183 | val someFollower = (members followers) head 184 | 185 | //a member goes down 186 | someFollower.stop 187 | 188 | val leader = members leader 189 | 190 | //leader still have quorum. this write is going to be committed 191 | await(leader.write(Put(Key1, Value1))) 192 | 193 | (members diff Seq(someFollower)) foreach { member ⇒ 194 | await(member.read(Get(Key1))) should be(Value1) 195 | } 196 | } 197 | 198 | it should "loose quorum when 2 members goes down" in withStartedThreeMemberCluster { members ⇒ 199 | 200 | val leader = members leader 201 | 202 | //all the followers goes down 203 | (members followers) foreach { 204 | _.stop 205 | } 206 | 207 | //leader no longer have quorum. this write is going to be rejected 208 | intercept[TimeoutException] { 209 | await(leader.write(Put(Key1, Value1))) 210 | } 211 | } 212 | 213 | it should "forward join on restarted member" in withStartedThreeMemberCluster { members ⇒ 214 | 215 | val leader = members leader 216 | 217 | //all the followers goes down 218 | val follower = members.followers.head 219 | 220 | follower.stop() 221 | 222 | val seeds = Set(Member1Address, Member2Address, Member3Address) - id(leader) - (id(follower)) 223 | builder(follower).storage(MemoryStorage()).members(seeds.toSeq) 224 | 225 | restart(follower) 226 | 227 | waitSomeTimeForAppendEntries() 228 | } 229 | 230 | it should "replicate missing commands on restarted member" in { 231 | 232 | val member1 = CKiteBuilder().listenAddress(Member1Address) 233 | .stateMachine(new KVStore()).bootstrap(BOOTSTRAP).rpc(TestRpc).build 234 | 235 | val member2 = CKiteBuilder().listenAddress(Member2Address).members(Seq(Member1Address, Member3Address)) 236 | .minElectionTimeout(1000).maxElectionTimeout(1000) 237 | .stateMachine(new KVStore()).rpc(TestRpc).build 238 | 239 | val member3 = CKiteBuilder().listenAddress(Member3Address).members(Seq(Member2Address, Member1Address)) 240 | .minElectionTimeout(2000).maxElectionTimeout(2000) 241 | .stateMachine(new KVStore()).rpc(TestRpc).build 242 | 243 | val members = Seq(member1, member2, member3) 244 | 245 | members foreach { 246 | _ start 247 | } 248 | 249 | try { 250 | 251 | val leader = members leader 252 | 253 | //member3 goes down 254 | member3.stop 255 | 256 | //still having a quorum. This write is committed. 257 | await(leader.write(Put(Key1, Value1))) 258 | 259 | val seeds = Set(Member1Address, Member2Address, Member3Address) - id(leader) - (id(member3)) 260 | builder(member3).storage(MemoryStorage()).members(seeds.toSeq) 261 | 262 | //member3 is back 263 | val restartedMember3 = restart(member3) 264 | 265 | //wait some time (> heartbeatsInterval) for missing appendEntries to arrive 266 | waitSomeTimeForAppendEntries 267 | 268 | //read from its local state machine to check if missing appendEntries have been replicated 269 | val readValue = await(restartedMember3.readLocal(Get(Key1))) 270 | 271 | readValue should be(Value1) 272 | restartedMember3.stop 273 | } finally { 274 | member1.stop 275 | member2.stop 276 | } 277 | } 278 | 279 | it should "add a new member" in withStartedThreeMemberCluster { members ⇒ 280 | 281 | val leader = members leader 282 | 283 | await(leader.write(Put(Key1, Value1))) 284 | 285 | //add member4 to the cluster 286 | await(leader.addMember(Member4Address)) 287 | 288 | val member4 = CKiteBuilder().listenAddress(Member4Address).members(Seq(Member2Address, Member1Address, Member3Address)) 289 | .minElectionTimeout(2000).maxElectionTimeout(3000).stateMachine(new KVStore()).rpc(TestRpc).build.asInstanceOf[CKiteClient] 290 | //start member4 291 | member4.start 292 | 293 | //get value for k1. this is going to be forwarded to the Leader. 294 | val replicatedValue = await(member4.read(Get(Key1))) 295 | replicatedValue should be(Value1) 296 | 297 | //wait some time (> heartbeatsInterval) for missing appendEntries to arrive 298 | waitSomeTimeForAppendEntries 299 | 300 | //get value for Key1 from local 301 | val localValue = await(member4.readLocal(Get(Key1))) 302 | 303 | localValue should be(replicatedValue) 304 | 305 | member4.stop 306 | } 307 | 308 | it should "overwrite uncommitted entries on an old Leader" in withStartedThreeMemberCluster { members ⇒ 309 | 310 | val leader = members leader 311 | 312 | val followers = (members followers) 313 | 314 | //stop the followers 315 | followers foreach { 316 | _.stop 317 | } 318 | 319 | //this two writes will timeout since no majority can be reached 320 | for (i ← (1 to 2)) { 321 | intercept[TimeoutException] { 322 | await(leader.write(Put(Key1, Value1))) 323 | } 324 | } 325 | //at this point the leader has two uncommitted entries 326 | 327 | //leader stops 328 | leader.stop 329 | 330 | //followers came back 331 | val rebuiltFollowers = followers map { 332 | restart(_) 333 | } 334 | 335 | val livemembers = rebuiltFollowers 336 | 337 | waitSomeTimeForElection 338 | 339 | //a new leader is elected 340 | val newleader = livemembers leader 341 | 342 | //old leader came back 343 | val oldleader = restart(leader) 344 | 345 | waitSomeTimeForAppendEntries 346 | 347 | //those two uncommitted entries of the oldleader must be overridden and removed by the new Leader as part of appendEntries 348 | await(newleader.read(Get(Key1))) should be(null) 349 | 350 | oldleader.stop 351 | rebuiltFollowers foreach { 352 | _.stop 353 | } 354 | 355 | } 356 | 357 | it should "avoid partioned leader stale reads" in withStartedThreeMemberCluster { members ⇒ 358 | 359 | val oldLeader = members leader 360 | 361 | await(oldLeader.write(Put(Key1, Value1))) 362 | 363 | TestRpc.blockTraffic(id(oldLeader)) 364 | 365 | waitSomeTimeForElection() 366 | 367 | val newLeader = members leader 368 | 369 | await(newLeader.write(Put(Key1, Value2))) 370 | 371 | await(newLeader.read(Get(Key1))) should be(Value2) 372 | 373 | intercept[LostLeadershipException] { 374 | await(oldLeader.read(Get(Key1))) 375 | } 376 | 377 | } 378 | 379 | implicit def membersSequence(members: Seq[CKite]): CKiteSequence = { 380 | new CKiteSequence(members) 381 | } 382 | 383 | class CKiteSequence(members: Seq[CKite]) { 384 | 385 | def followers = members filterNot { 386 | _.asInstanceOf[CKiteClient].isLeader 387 | } 388 | 389 | def leader = { 390 | val leaders = (members diff followers) 391 | val theLeader = leaders.head 392 | withClue(s"Leader $theLeader is not unique") { 393 | leaders diff Seq(theLeader) should be('empty) 394 | } 395 | theLeader 396 | } 397 | 398 | } 399 | 400 | private def withThreeMemberCluster(block: Seq[CKite] ⇒ Any) = { 401 | //member1 has default election timeout (500ms - 700ms). It is intended to be the first to start an election and raise as the leader. 402 | val member1 = CKiteBuilder().listenAddress(Member1Address) 403 | .bootstrap(true) 404 | .stateMachine(new KVStore()).rpc(TestRpc).build 405 | 406 | val member2 = CKiteBuilder().listenAddress(Member2Address).members(Seq(Member1Address)) 407 | .minElectionTimeout(1250).maxElectionTimeout(1500) //higher election timeout 408 | 409 | .stateMachine(new KVStore()).rpc(TestRpc).build 410 | 411 | val member3 = CKiteBuilder().listenAddress(Member3Address).members(Seq(Member2Address, Member1Address)) 412 | .minElectionTimeout(1750).maxElectionTimeout(2000) //higher election timeout 413 | .stateMachine(new KVStore()).rpc(TestRpc).build 414 | val members = Seq(member1, member2, member3) 415 | try { 416 | block(members) 417 | } finally { 418 | members.foreach { member ⇒ 419 | try { member.stop() } finally {} 420 | } 421 | } 422 | } 423 | 424 | private def withStartedThreeMemberCluster(test: Seq[CKite] ⇒ Any) = withThreeMemberCluster { members ⇒ 425 | logger.info(s"Starting all the members") 426 | members.foreach(_.start()) 427 | 428 | waitSomeTimeForElection 429 | try { 430 | logger.info(s"Running test...") 431 | test(members) 432 | } finally { 433 | logger.info(s"Stopping all the members") 434 | members foreach { member ⇒ 435 | try { member stop } 436 | finally { 437 | TestRpc.unblockTraffic(id(member)) 438 | } 439 | } 440 | } 441 | } 442 | 443 | private def builder(ckite: CKite) = ckite.asInstanceOf[CKiteClient].builder 444 | 445 | private def id(ckite: CKite): String = ckite.asInstanceOf[CKiteClient].id() 446 | 447 | private def restart(ckite: CKite): CKiteClient = { 448 | val clonedCKite = ckite.asInstanceOf[CKiteClient].builder.stateMachine(new KVStore).bootstrap(false).build.asInstanceOf[CKiteClient] 449 | clonedCKite.start() 450 | clonedCKite 451 | } 452 | 453 | private def waitSomeTimeForElection() = Thread.sleep(3000) 454 | 455 | private def waitSomeTimeForAppendEntries() = Thread.sleep(5000) 456 | 457 | private def await[T](future: Future[T]): T = { 458 | Await.result(future, 3 seconds) 459 | } 460 | 461 | } -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/SerializerTest.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import org.scalatest.Matchers 4 | import ckite.util.Logging 5 | import org.scalatest.junit.JUnitRunner 6 | import org.scalatest._ 7 | import ckite.rpc.LogEntry 8 | import ckite.rpc.NoOp 9 | import ckite.util.Serializer 10 | import ckite.rpc.LogEntry 11 | 12 | class SerializerTest extends FlatSpec with Matchers with Logging { 13 | 14 | "a serializer" should "serialize and deserialize" in { 15 | val logEntry = LogEntry(1, 1, NoOp()) 16 | 17 | val bytes = Serializer.serialize(logEntry) 18 | 19 | val deserialized: LogEntry = Serializer.deserialize(bytes) 20 | } 21 | } -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/TestRpc.scala: -------------------------------------------------------------------------------- 1 | package ckite 2 | 3 | import java.io.IOException 4 | import java.util.concurrent.ConcurrentHashMap 5 | import java.util.concurrent.atomic.AtomicBoolean 6 | 7 | import ckite.rpc._ 8 | import com.typesafe.config.Config 9 | 10 | import scala.concurrent.Future 11 | import scala.util.Try 12 | 13 | object TestRpc extends Rpc { 14 | 15 | val servers = new ConcurrentHashMap[String, TestServer]() 16 | 17 | def server(binding: String): Raft = { 18 | val server = servers.get(binding) 19 | if (server == null || server.isStopped() || server.isBlocked) { 20 | throw new IOException("Connection refused") 21 | } 22 | server.cluster 23 | } 24 | 25 | def blockTraffic(binding: String) = { 26 | servers.get(binding).block() 27 | } 28 | 29 | def unblockTraffic(binding: String) = { 30 | servers.get(binding).unblock() 31 | } 32 | 33 | def isBlocked(binding: String) = servers.get(binding).isBlocked 34 | 35 | override def createServer(service: RpcService, config: Config): RpcServer = { 36 | val testServer: TestServer = new TestServer(service.asInstanceOf[Raft]) 37 | servers.put(service.asInstanceOf[Raft].membership.myId, testServer) 38 | testServer 39 | } 40 | 41 | override def createClient(binding: String): RpcClient = new TestClient(binding) 42 | } 43 | 44 | class TestServer(val cluster: Raft) extends RpcServer { 45 | val stopped = new AtomicBoolean() 46 | val blocked = new AtomicBoolean() 47 | 48 | override def start(): Unit = { 49 | stopped.set(false) 50 | } 51 | 52 | override def stop(): Unit = { 53 | 54 | stopped.set(true) 55 | } 56 | 57 | def block() = { 58 | blocked.set(true) 59 | } 60 | 61 | def unblock() = { 62 | blocked.set(false) 63 | } 64 | 65 | def isStopped() = stopped.get() 66 | 67 | def isBlocked = blocked.get() 68 | 69 | } 70 | 71 | class TestClient(binding: String) extends RpcClient { 72 | override def send(request: RequestVote): Future[RequestVoteResponse] = ioTry { 73 | if (TestRpc.isBlocked(request.memberId)) { 74 | throw new IOException("Connection refused") 75 | } 76 | TestRpc.server(binding).onRequestVoteReceived(request) 77 | } 78 | 79 | override def send(appendEntries: AppendEntries): Future[AppendEntriesResponse] = ioTry { 80 | if (TestRpc.isBlocked(appendEntries.leaderId)) { 81 | throw new IOException("Connection refused") 82 | } 83 | TestRpc.server(binding).onAppendEntriesReceived(appendEntries) 84 | } 85 | 86 | override def send(installSnapshot: InstallSnapshot): Future[InstallSnapshotResponse] = ioTry { 87 | if (TestRpc.isBlocked(installSnapshot.leaderId)) { 88 | throw new IOException("Connection refused") 89 | } 90 | TestRpc.server(binding).onInstallSnapshotReceived(installSnapshot) 91 | } 92 | 93 | override def send[T](command: Command): Future[T] = ioTry { 94 | TestRpc.server(binding).onCommandReceived(command) 95 | } 96 | 97 | override def send(joinMember: JoinMember): Future[JoinMemberResponse] = ioTry { 98 | if (TestRpc.isBlocked(joinMember.memberId)) { 99 | throw new IOException("Connection refused") 100 | } 101 | TestRpc.server(binding).onMemberJoinReceived(joinMember.memberId) 102 | } 103 | 104 | def ioTry[T](block: ⇒ Future[T]): Future[T] = { 105 | Try { 106 | block 107 | }.recover { 108 | case e: IOException ⇒ Future.failed(e) 109 | }.get 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/example/Get.scala: -------------------------------------------------------------------------------- 1 | package ckite.example 2 | import ckite.rpc.ReadCommand 3 | 4 | case class Get(key: String) extends ReadCommand[String] -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/example/KVStore.scala: -------------------------------------------------------------------------------- 1 | package ckite.example 2 | 3 | import java.nio.ByteBuffer 4 | import java.util.HashMap 5 | 6 | import ckite.statemachine.StateMachine 7 | import ckite.util.{ Logging, Serializer } 8 | 9 | class KVStore extends StateMachine with Logging { 10 | 11 | private var map = new HashMap[String, String]() 12 | private var lastIndex: Long = 0 13 | 14 | def applyWrite = { 15 | case (index, Put(key: String, value: String)) ⇒ { 16 | logger.debug(s"Put $key=$value") 17 | map.put(key, value) 18 | lastIndex = index 19 | value 20 | } 21 | } 22 | 23 | def applyRead = { 24 | case Get(key) ⇒ { 25 | logger.debug(s"Get $key") 26 | map.get(key) 27 | } 28 | } 29 | 30 | def getLastAppliedIndex: Long = lastIndex 31 | 32 | def restoreSnapshot(byteBuffer: ByteBuffer) = { 33 | map = Serializer.deserialize(byteBuffer.array()) 34 | } 35 | 36 | def takeSnapshot(): ByteBuffer = { 37 | ByteBuffer.wrap(Serializer.serialize(map)) 38 | } 39 | 40 | } -------------------------------------------------------------------------------- /ckite-core/src/test/scala/ckite/example/Put.scala: -------------------------------------------------------------------------------- 1 | package ckite.example 2 | 3 | import ckite.rpc.WriteCommand 4 | 5 | case class Put(key: String, value: String) extends WriteCommand[String] -------------------------------------------------------------------------------- /ckite-finagle/src/main/resources/reference.conf: -------------------------------------------------------------------------------- 1 | ckite { 2 | finagle { 3 | thrift { 4 | # Workers handling incoming requests 5 | workers = 4 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /ckite-finagle/src/main/scala/ckite/rpc/FinagleThriftRpc.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc 2 | 3 | import ckite.rpc.thrift.{ FinagleThriftClient, FinagleThriftServer } 4 | import com.typesafe.config.Config 5 | 6 | object FinagleThriftRpc extends Rpc { 7 | 8 | override def createServer(rpcService: RpcService, config: Config): RpcServer = FinagleThriftServer(rpcService, config) 9 | 10 | override def createClient(address: String): RpcClient = FinagleThriftClient(address) 11 | 12 | } -------------------------------------------------------------------------------- /ckite-finagle/src/main/scala/ckite/rpc/thrift/FinagleThriftClient.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc.thrift 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import ckite.rpc._ 6 | import ckite.rpc.thrift.ThriftConverters._ 7 | import ckite.util.Logging 8 | import com.twitter.finagle.builder.ClientBuilder 9 | import com.twitter.finagle.service.RetryPolicy 10 | import com.twitter.finagle.thrift.ThriftClientFramedCodec 11 | import com.twitter.util.{ Duration, Future } 12 | 13 | import scala.concurrent.{ Promise, Future ⇒ ScalaFuture } 14 | 15 | case class FinagleThriftClient(binding: String) extends RpcClient with Logging { 16 | 17 | val client = new CKiteService.FinagledClient(ClientBuilder().hosts(binding) 18 | .retryPolicy(NoRetry).codec(ThriftClientFramedCodec()).failFast(false) 19 | .hostConnectionLimit(10).hostConnectionCoresize(1).requestTimeout(Duration(60, TimeUnit.SECONDS)).build()) 20 | 21 | override def send(request: RequestVote): ScalaFuture[RequestVoteResponse] = { 22 | logger.debug(s"Sending $request to $binding") 23 | val f = client.sendRequestVote(request) 24 | val promise = Promise[RequestVoteResponse]() 25 | f.onSuccess(value ⇒ promise.success(value)) 26 | f.onFailure(e ⇒ promise.failure(e)) 27 | promise.future 28 | } 29 | 30 | override def send(appendEntries: AppendEntries): ScalaFuture[AppendEntriesResponse] = { 31 | logger.trace(s"Sending $appendEntries to $binding") 32 | val f = client.sendAppendEntries(appendEntries) 33 | val promise = Promise[AppendEntriesResponse]() 34 | f.onSuccess(value ⇒ promise.success(value)) 35 | f.onFailure(e ⇒ promise.failure(e)) 36 | promise.future 37 | } 38 | 39 | override def send[T](command: Command): ScalaFuture[T] = { 40 | val future = client.sendCommand(command) 41 | val promise = Promise[T]() 42 | future.onSuccess(value ⇒ promise.success(value)) 43 | future.onFailure(e ⇒ promise.failure(e)) 44 | promise.future 45 | } 46 | 47 | override def send(installSnapshot: InstallSnapshot): ScalaFuture[InstallSnapshotResponse] = { 48 | val future = client.sendInstallSnapshot(installSnapshot) 49 | val promise = Promise[InstallSnapshotResponse]() 50 | future.onSuccess(value ⇒ promise.success(value)) 51 | future.onFailure(e ⇒ promise.failure(e)) 52 | promise.future 53 | } 54 | 55 | override def send(joinRequest: JoinMember): ScalaFuture[JoinMemberResponse] = { 56 | val future = client.sendJoinMember(joinRequest) 57 | val promise = Promise[JoinMemberResponse]() 58 | future.onSuccess(value ⇒ promise.success(value)) 59 | future.onFailure(e ⇒ promise.failure(e)) 60 | promise.future 61 | } 62 | 63 | private implicit def toScalaFuture[T](twitterFuture: Future[T]): ScalaFuture[T] = { 64 | val promise = Promise[T]() 65 | twitterFuture.onSuccess(value ⇒ promise.success(value)) 66 | twitterFuture.onFailure(e ⇒ promise.failure(e)) 67 | promise.future 68 | } 69 | 70 | } 71 | 72 | object NoRetry extends RetryPolicy[com.twitter.util.Try[Nothing]] { 73 | def apply(e: com.twitter.util.Try[Nothing]) = { 74 | None 75 | } 76 | } -------------------------------------------------------------------------------- /ckite-finagle/src/main/scala/ckite/rpc/thrift/FinagleThriftServer.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc.thrift 2 | 3 | import java.nio.ByteBuffer 4 | import java.util.concurrent.{ SynchronousQueue, ThreadPoolExecutor, TimeUnit } 5 | 6 | import ckite.rpc.thrift.ThriftConverters._ 7 | import ckite.rpc.{ RpcServer, RpcService } 8 | import ckite.util.CustomThreadFactory 9 | import com.twitter.finagle.{ ListeningServer, Thrift } 10 | import com.twitter.util.{ Future, FuturePool, Promise } 11 | import com.typesafe.config.Config 12 | import org.apache.thrift.protocol.TBinaryProtocol 13 | 14 | import scala.concurrent.ExecutionContext.Implicits.global 15 | import scala.concurrent.{ Future ⇒ ScalaFuture } 16 | import scala.util.{ Failure, Success } 17 | 18 | case class FinagleThriftServer(rpcService: RpcService, config: Config) extends RpcServer { 19 | var closed = false 20 | var finagleServer: ListeningServer = _ 21 | 22 | def start() = { 23 | val localPort = config.getString("ckite.listen-address").split(":")(1) 24 | finagleServer = Thrift.serve(s":$localPort", ckiteService) 25 | } 26 | 27 | implicit def toTwitterFuture[T](scalaFuture: ScalaFuture[T]): Future[T] = { 28 | val promise = Promise[T] 29 | scalaFuture.onComplete { 30 | case Success(value) ⇒ promise.setValue(value) 31 | case Failure(t) ⇒ promise.raise(t) 32 | } 33 | promise 34 | } 35 | 36 | def ckiteService = { 37 | val ckiteService = new CKiteService[Future]() { 38 | 39 | override def sendRequestVote(requestVote: RequestVoteST): Future[RequestVoteResponseST] = { 40 | rpcService.onRequestVoteReceived(requestVote).map[RequestVoteResponseST](r ⇒ r) 41 | } 42 | 43 | override def sendAppendEntries(appendEntries: AppendEntriesST): Future[AppendEntriesResponseST] = { 44 | rpcService.onAppendEntriesReceived(appendEntries).map[AppendEntriesResponseST](r ⇒ r) 45 | } 46 | 47 | override def sendCommand(bb: ByteBuffer): Future[ByteBuffer] = { 48 | rpcService.onCommandReceived[Any](bb).map[ByteBuffer](r ⇒ r) 49 | } 50 | 51 | override def sendJoinMember(joinRequest: JoinMemberST): Future[JoinMemberResponseST] = { 52 | rpcService.onMemberJoinReceived(joinRequest._1).map[JoinMemberResponseST](r ⇒ r) 53 | } 54 | 55 | override def sendInstallSnapshot(installSnapshot: InstallSnapshotST) = { 56 | rpcService.onInstallSnapshotReceived(installSnapshot).map[InstallSnapshotResponseST](r ⇒ r) 57 | } 58 | } 59 | 60 | new CKiteService$FinagleService(ckiteService, new TBinaryProtocol.Factory()) 61 | } 62 | 63 | def stop() = synchronized { 64 | if (!closed) { 65 | futurePool.executor.shutdownNow() 66 | finagleServer.close() 67 | closed = true 68 | } 69 | } 70 | 71 | val futurePool = FuturePool(new ThreadPoolExecutor(0, config.getInt("ckite.finagle.thrift.workers"), 72 | 15L, TimeUnit.SECONDS, 73 | new SynchronousQueue[Runnable](), 74 | CustomThreadFactory("Thrift-worker", true))) 75 | 76 | } 77 | -------------------------------------------------------------------------------- /ckite-finagle/src/main/scala/ckite/rpc/thrift/ThriftConverters.scala: -------------------------------------------------------------------------------- 1 | package ckite.rpc.thrift 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import ckite.rlog.Snapshot 6 | import ckite.rpc._ 7 | import ckite.util.Logging 8 | import ckite.util.Serializer 9 | 10 | object ThriftConverters extends Logging { 11 | 12 | implicit def requestVoteToThrift(request: RequestVote): RequestVoteST = { 13 | RequestVoteST(request.memberId, request.term, request.lastLogIndex, request.lastLogTerm) 14 | } 15 | 16 | implicit def requestVoteFromThrift(requestVote: RequestVoteST): RequestVote = { 17 | RequestVote(requestVote.memberId, requestVote.term, requestVote.lastLogIndex, requestVote.lastLogTerm) 18 | } 19 | 20 | implicit def appendEntriesToThrift(request: AppendEntries): AppendEntriesST = { 21 | val entries: Seq[LogEntryST] = request.entries.map(entry ⇒ logEntryToThrift(entry)).toSeq 22 | AppendEntriesST(request.term, request.leaderId, request.commitIndex, request.prevLogIndex, request.prevLogTerm, Some(entries)) 23 | } 24 | 25 | implicit def appendEntriesFromThrift(request: AppendEntriesST): AppendEntries = { 26 | val entries = request.entries.get.map(entry ⇒ logEntryFromThrift(entry)).toList 27 | AppendEntries(request.term, request.leaderId, request.commitIndex, request.prevLogIndex, request.prevLogTerm, entries) 28 | } 29 | 30 | implicit def requestVoteResponseToThrift(response: RequestVoteResponse): RequestVoteResponseST = { 31 | RequestVoteResponseST(response.currentTerm, response.granted) 32 | } 33 | 34 | implicit def requestVoteResponseFromThrift(response: RequestVoteResponseST): RequestVoteResponse = { 35 | RequestVoteResponse(response.currentTerm, response.granted) 36 | } 37 | 38 | implicit def appendEntriesResponseFromThrift(response: AppendEntriesResponseST): AppendEntriesResponse = { 39 | AppendEntriesResponse(response.term, response.success) 40 | } 41 | 42 | implicit def appendEntriesResponseToThrift(response: AppendEntriesResponse): AppendEntriesResponseST = { 43 | AppendEntriesResponseST(response.term, response.success) 44 | } 45 | 46 | implicit def logEntryToThrift(entry: LogEntry): LogEntryST = { 47 | LogEntryST(entry.term, entry.index, entry.command) 48 | } 49 | 50 | implicit def logEntryFromThrift(entry: LogEntryST): LogEntry = { 51 | LogEntry(entry.term, entry.index, entry.command) 52 | } 53 | 54 | implicit def anyToThrift[T](command: T): ByteBuffer = { 55 | val bb = ByteBuffer.wrap(Serializer.serialize(command)) 56 | bb 57 | } 58 | 59 | implicit def anyFromThrift[T](byteBuffer: ByteBuffer): T = { 60 | val remaining = byteBuffer.remaining() 61 | val bytes = new Array[Byte](remaining) 62 | byteBuffer.get(bytes) 63 | val c = Serializer.deserialize[T](bytes) 64 | c 65 | } 66 | 67 | implicit def snapshotToThrift(snapshot: Snapshot): SnapshotST = { 68 | val bb2: ByteBuffer = snapshot.clusterConfiguration 69 | val bb: ByteBuffer = snapshot.stateMachineSerialized 70 | SnapshotST(bb, snapshot.index, snapshot.term, bb2) 71 | } 72 | 73 | implicit def snapshotFromThrift(snapshotST: SnapshotST): Snapshot = { 74 | Snapshot(snapshotST.lastLogEntryTerm, snapshotST.lastLogEntryIndex, snapshotST.membershipState, snapshotST.stateMachineState) 75 | } 76 | 77 | implicit def installSnapshotToThrift(installSnapshot: InstallSnapshot): InstallSnapshotST = { 78 | InstallSnapshotST(installSnapshot.term, installSnapshot.leaderId, installSnapshot.snapshot) 79 | } 80 | 81 | implicit def installSnapshotFromThrift(installSnapshotST: InstallSnapshotST): InstallSnapshot = { 82 | InstallSnapshot(installSnapshotST.term, installSnapshotST.leaderId, installSnapshotST.snapshot) 83 | } 84 | 85 | implicit def installSnapshotResponseFromThrift(installSnapshotResponseST: InstallSnapshotResponseST): InstallSnapshotResponse = { 86 | InstallSnapshotResponse(installSnapshotResponseST.success) 87 | } 88 | 89 | implicit def installSnapshotResponseToThrift(installSnapshotResponse: InstallSnapshotResponse): InstallSnapshotResponseST = { 90 | InstallSnapshotResponseST(installSnapshotResponse.success) 91 | } 92 | 93 | implicit def joinMemberToThrift(joinRequest: JoinMember): JoinMemberST = { 94 | JoinMemberST(joinRequest.memberId) 95 | } 96 | 97 | implicit def joinMemberResponseToThrift(joinResponse: JoinMemberResponse): JoinMemberResponseST = { 98 | JoinMemberResponseST(joinResponse.success) 99 | } 100 | 101 | implicit def joinMemberResponseFromThrift(joinResponse: JoinMemberResponseST): JoinMemberResponse = { 102 | JoinMemberResponse(joinResponse.success) 103 | } 104 | 105 | } -------------------------------------------------------------------------------- /ckite-finagle/src/main/thrift/ckite/rpc/thrift/ckite.thrift: -------------------------------------------------------------------------------- 1 | namespace java ckite.rpc.thrift 2 | 3 | struct LogEntryST { 4 | 1: required i32 term; 5 | 2: required i64 index; 6 | 3: required binary command; 7 | } 8 | 9 | struct AppendEntriesST { 10 | 1: required i32 term; 11 | 2: required string leaderId; 12 | 3: optional i64 commitIndex = -1; 13 | 4: optional i64 prevLogIndex = -1; 14 | 5: optional i32 prevLogTerm = -1; 15 | 6: optional list entries; 16 | } 17 | 18 | struct AppendEntriesResponseST { 19 | 1: required i32 term; 20 | 2: required bool success; 21 | } 22 | 23 | struct RequestVoteST { 24 | 1: required string memberId; 25 | 2: required i32 term; 26 | 3: optional i64 lastLogIndex = -1; 27 | 4: optional i32 lastLogTerm = -1; 28 | } 29 | 30 | struct RequestVoteResponseST { 31 | 1: required i32 currentTerm; 32 | 2: required bool granted; 33 | } 34 | 35 | struct SnapshotST { 36 | 1: required binary stateMachineState; 37 | 2: required i64 lastLogEntryIndex; 38 | 3: required i32 lastLogEntryTerm; 39 | 4: required binary membershipState; 40 | } 41 | 42 | struct InstallSnapshotST { 43 | 1: required i32 term; 44 | 2: required string leaderId; 45 | 3: required SnapshotST snapshot; 46 | } 47 | 48 | struct InstallSnapshotResponseST { 49 | 1: required bool success; 50 | } 51 | 52 | struct JoinMemberST { 53 | 1: required string memberId; 54 | } 55 | 56 | struct JoinMemberResponseST { 57 | 1: required bool success; 58 | } 59 | 60 | service CKiteService { 61 | 62 | RequestVoteResponseST sendRequestVote(1:RequestVoteST requestVote); 63 | 64 | AppendEntriesResponseST sendAppendEntries(1:AppendEntriesST appendEntries); 65 | 66 | binary sendCommand(1:binary command); 67 | 68 | InstallSnapshotResponseST sendInstallSnapshot(1:InstallSnapshotST installSnapshot); 69 | 70 | JoinMemberResponseST sendJoinMember(1:JoinMemberST memberId); 71 | 72 | } -------------------------------------------------------------------------------- /ckite-mapdb/src/main/scala/ckite/mapdb/FileSupport.scala: -------------------------------------------------------------------------------- 1 | package ckite.mapdb 2 | 3 | import java.io.File 4 | 5 | trait FileSupport { 6 | 7 | protected def file(dataDir: String, fileName: String): File = { 8 | val dir = new File(dataDir) 9 | dir.mkdirs() 10 | val file = new File(dir, fileName) 11 | file 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /ckite-mapdb/src/main/scala/ckite/mapdb/MapDBPersistentLog.scala: -------------------------------------------------------------------------------- 1 | package ckite.mapdb 2 | 3 | import java.util.concurrent.atomic.AtomicLong 4 | 5 | import ckite.rlog.Log 6 | import ckite.rpc.LogEntry 7 | import ckite.util.{ Logging, Serializer } 8 | import org.mapdb.DBMaker 9 | 10 | import scala.concurrent.Future 11 | 12 | case class MapDBPersistentLog(dataDir: String) extends Log with FileSupport with Logging { 13 | 14 | val logDB = DBMaker.newFileDB(file(dataDir, "ckite-mapdb-log")).mmapFileEnable().closeOnJvmShutdown().transactionDisable().cacheDisable().make() 15 | 16 | val entries = logDB.getTreeMap[Long, Array[Byte]]("logEntries") 17 | val cachedSize = new AtomicLong(if (entries.isEmpty) 0 else entries.size()) 18 | val lastIndex = new AtomicLong(if (entries.isEmpty) -1 else entries.lastKey()) 19 | 20 | def append(entry: LogEntry): Future[Unit] = Future.successful { 21 | entries.put(entry.index, Serializer.serialize(entry)) 22 | cachedSize.incrementAndGet() 23 | lastIndex.set(entry.index) 24 | commit() 25 | } 26 | 27 | def getEntry(index: Long): LogEntry = { 28 | val bytes = entries.get(index) 29 | if (bytes != null) Serializer.deserialize(bytes) else null.asInstanceOf[LogEntry] 30 | } 31 | 32 | def rollLog(upToIndex: Long) = { 33 | val range = firstIndex to upToIndex 34 | logger.debug(s"Compacting ${range.size} LogEntries") 35 | range foreach { index ⇒ remove(index) } 36 | logger.debug(s"Finished compaction") 37 | } 38 | 39 | def getLastIndex: Long = lastIndex.longValue() 40 | 41 | def size = cachedSize.longValue() 42 | 43 | def discardEntriesFrom(index: Long) = { 44 | index to lastIndex.longValue() foreach { i ⇒ 45 | remove(i) 46 | } 47 | lastIndex.set(index - 1) 48 | } 49 | 50 | def close() = logDB.close() 51 | 52 | private def commit() = logDB.commit() 53 | 54 | private def firstIndex: Long = if (!entries.isEmpty) entries.firstKey else 1 55 | 56 | private def remove(index: Long) = { 57 | if (index > 0) { 58 | entries.remove(index) 59 | cachedSize.decrementAndGet() 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /ckite-mapdb/src/main/scala/ckite/mapdb/MapDBStorage.scala: -------------------------------------------------------------------------------- 1 | package ckite.mapdb 2 | 3 | import ckite.rlog._ 4 | import ckite.util.Serializer 5 | import com.typesafe.config.ConfigFactory 6 | import org.mapdb.DBMaker 7 | 8 | class MapDBStorage(dataDirOption: Option[String] = None) extends Storage with FileSupport { 9 | 10 | private val config = ConfigFactory.load() 11 | private val dataDir = dataDirOption.getOrElse(config.getString("ckite.datadir")) 12 | 13 | private val logDir = s"$dataDir/log" 14 | private val snapshotsDir = s"$dataDir/snapshots" 15 | private val stateDir = s"$dataDir/state" 16 | 17 | override val log: Log = new MapDBPersistentLog(logDir) 18 | 19 | private val stateDB = DBMaker.newFileDB(file(stateDir, "ckite-mapdb-state")).make() 20 | private val voteTerm = stateDB.getAtomicInteger("term") 21 | private val voteMember = stateDB.getAtomicString("memberId") 22 | 23 | private val snapshotsDB = DBMaker.newFileDB(file(snapshotsDir, "ckite-mapdb-snapshots")).mmapFileEnable().make() 24 | private val snapshotsMap = snapshotsDB.getHashMap[String, Array[Byte]]("snapshotsMap") 25 | 26 | override def retrieveLatestSnapshot(): Option[Snapshot] = { 27 | Option(snapshotsMap.get("snapshot")).map(deserializeSnapshot) 28 | } 29 | 30 | override def saveVote(vote: Vote): Unit = { 31 | voteTerm.set(vote.term) 32 | voteMember.set(vote.member) 33 | stateDB.commit() 34 | } 35 | 36 | override def saveSnapshot(snapshot: Snapshot): Unit = { 37 | snapshotsMap.put("snapshot", serializeSnapshot(snapshot)) 38 | snapshotsDB.commit() 39 | } 40 | 41 | private def serializeSnapshot(snapshot: Snapshot): Array[Byte] = Serializer.serialize(snapshot) 42 | 43 | private def deserializeSnapshot(bytes: Array[Byte]): Snapshot = Serializer.deserialize(bytes) 44 | 45 | override def retrieveLatestVote(): Option[Vote] = { 46 | val term = voteTerm.get() 47 | val member = voteMember.get() 48 | if (term == 0 && member.isEmpty) { 49 | None 50 | } else { 51 | Some(Vote(term, member)) 52 | } 53 | } 54 | 55 | } 56 | 57 | object MapDBStorage { 58 | def apply(dataDir: String) = new MapDBStorage(Some(dataDir)) 59 | def apply() = new MapDBStorage() 60 | } 61 | -------------------------------------------------------------------------------- /ckite-mapdb/src/test/scala/ckite/mapdb/MapDBStorageTest.scala: -------------------------------------------------------------------------------- 1 | package ckite.mapdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import ckite.SingleClusterConfiguration 6 | import ckite.rlog.{ Vote, Snapshot } 7 | import ckite.rpc.{ NoOp, LogEntry } 8 | import org.scalatest.{ Matchers, FlatSpec } 9 | 10 | import scala.concurrent.{ Await, Future } 11 | import scala.concurrent.duration._ 12 | import scala.concurrent.ExecutionContext.Implicits.global 13 | 14 | class MapDBStorageTest extends FlatSpec with Matchers { 15 | 16 | "A MapDBStorage" should "store and retrieve snapshots" in { 17 | val mapdbStorage = MapDBStorage(dataDir) 18 | val snapshot = Snapshot(1, 1, SingleClusterConfiguration(Set("m1", "m2"), 1), ByteBuffer.wrap(Array[Byte](1))) 19 | mapdbStorage.saveSnapshot(snapshot) 20 | 21 | val someSnapshot = mapdbStorage.retrieveLatestSnapshot() 22 | 23 | someSnapshot shouldBe Some(snapshot) 24 | } 25 | 26 | it should "save and restore latest vote" in { 27 | val mapdbStorage = MapDBStorage(dataDir) 28 | 29 | val vote = Vote(1, "m1") 30 | 31 | mapdbStorage.saveVote(vote) 32 | 33 | mapdbStorage.retrieveLatestVote() shouldBe Some(vote) 34 | } 35 | 36 | "A MapDBStorage log" should "store and retrieve entries" in { 37 | val mapdbStorage = MapDBStorage(dataDir) 38 | 39 | mapdbStorage.log.discardEntriesFrom(1) 40 | 41 | val futures = (1 to 5) map { index ⇒ 42 | mapdbStorage.log.append(LogEntry(1, index, NoOp())) 43 | } 44 | 45 | Await.ready(Future.sequence(futures), 3 seconds) 46 | 47 | mapdbStorage.log.size shouldEqual 5 48 | (1 to 5) foreach { index ⇒ 49 | mapdbStorage.log.getEntry(index) shouldEqual LogEntry(1, index, NoOp()) 50 | } 51 | 52 | } 53 | 54 | private def dataDir = s"/tmp/ckite/test-${System.currentTimeMillis()}" 55 | } 56 | -------------------------------------------------------------------------------- /project/Build.scala: -------------------------------------------------------------------------------- 1 | import sbt.Keys._ 2 | import sbt._ 3 | import sbt.Defaults.itSettings 4 | import spray.revolver.RevolverPlugin._ 5 | 6 | object CKite extends Build { 7 | 8 | import Dependencies._ 9 | import Settings._ 10 | 11 | lazy val ckite: Project = Project("ckite", file(".")) 12 | .aggregate(ckiteCore, ckiteFinagle, ckiteMapDB) 13 | .settings(basicSettings: _*) 14 | .settings(sonatypeSettings: _*) 15 | .settings(formatSettings: _*) 16 | .settings(noPublishing: _*) 17 | 18 | lazy val ckiteCore: Project = Project("ckite-core", file("ckite-core")) 19 | .settings(basicSettings: _*) 20 | .settings(sonatypeSettings: _*) 21 | .settings(formatSettings: _*) 22 | .settings(libraryDependencies ++= 23 | compile(slf4j, config, chill) ++ 24 | test(scalaTest, logback)) 25 | 26 | 27 | lazy val ckiteFinagle: Project = Project("ckite-finagle", file("ckite-finagle")) 28 | .dependsOn(ckiteCore) 29 | .settings(basicSettings: _*) 30 | .settings(sonatypeSettings: _*) 31 | .settings(formatSettings: _*) 32 | .settings(libraryDependencies ++= 33 | compile(slf4j, scrooge, finagleCore, finagleThrift) ++ 34 | test(scalaTest, logback, finagleHttp, jacksonAfterBurner, jacksonScala)) 35 | 36 | lazy val ckiteMapDB: Project = Project("ckite-mapdb", file("ckite-mapdb")) 37 | .dependsOn(ckiteCore) 38 | .settings(basicSettings: _*) 39 | .settings(sonatypeSettings: _*) 40 | .settings(formatSettings: _*) 41 | .settings(libraryDependencies ++= 42 | compile(mapdb) ++ 43 | test(scalaTest, logback)) 44 | 45 | } -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import sbt._ 2 | 3 | object Dependencies { 4 | 5 | val finagleV = "6.43.0" 6 | val jacksonV = "2.4.4" 7 | 8 | val slf4j = "org.slf4j" % "slf4j-api" % "1.7.7" 9 | val scrooge = "com.twitter" %% "scrooge-core" % "4.15.0" 10 | val finagleCore = "com.twitter" %% "finagle-core" % finagleV exclude("com.twitter", "util-logging_2.11") exclude("com.twitter", "util-app_2.11") 11 | val finagleThrift = "com.twitter" %% "finagle-thrift" % finagleV 12 | val finagleHttp = "com.twitter" %% "finagle-http" % finagleV 13 | val config = "com.typesafe" % "config" % "1.0.2" 14 | val mapdb = "org.mapdb" % "mapdb" % "0.9.13" 15 | val chill = "com.twitter" %% "chill" % "0.9.3" 16 | val jacksonAfterBurner = "com.fasterxml.jackson.module" % "jackson-module-afterburner" % jacksonV 17 | val jacksonScala = "com.fasterxml.jackson.module" %% "jackson-module-scala" % jacksonV 18 | val scalaTest = "org.scalatest" %% "scalatest" % "3.0.5" 19 | val logback = "ch.qos.logback" % "logback-classic" % "1.1.2" 20 | val thrift = "org.apache.thrift" % "libthrift" % "0.9.2" 21 | 22 | def compile(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "compile") 23 | def provided(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "provided") 24 | def test(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "test") 25 | def runtime(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "runtime") 26 | def it(deps: ModuleID*): Seq[ModuleID] = deps map (_ % "it") 27 | 28 | } -------------------------------------------------------------------------------- /project/Settings.scala: -------------------------------------------------------------------------------- 1 | import com.typesafe.sbteclipse.core.EclipsePlugin.{EclipseCreateSrc, EclipseKeys} 2 | import sbt._ 3 | import Keys._ 4 | import com.typesafe.sbt.SbtScalariform 5 | import com.typesafe.sbt.SbtScalariform.ScalariformKeys 6 | import scalariform.formatter.preferences._ 7 | 8 | object Settings { 9 | 10 | val ScalaVersion = "2.11.12" 11 | val CrossScalaVersions = Seq("2.12.8","2.11.12") 12 | 13 | lazy val basicSettings = Seq( 14 | scalaVersion := ScalaVersion, 15 | crossScalaVersions := CrossScalaVersions, 16 | organization := "io.ckite", 17 | version := "0.2.2-SNAPSHOT", 18 | resolvers ++= Seq("twitter-repo" at "http://maven.twttr.com"), 19 | fork in(Test, run) := true, 20 | javacOptions := Seq( 21 | "-source", "1.8", "-target", "1.8" 22 | ), 23 | scalacOptions := Seq( 24 | "-encoding", 25 | "utf8", 26 | "-g:vars", 27 | "-feature", 28 | "-unchecked", 29 | "-optimise", 30 | "-deprecation", 31 | "-target:jvm-1.8", 32 | "-language:postfixOps", 33 | "-language:implicitConversions", 34 | "-language:reflectiveCalls", 35 | "-Xlog-reflective-calls" 36 | )) 37 | 38 | lazy val sonatypeSettings = Seq( 39 | publishMavenStyle := true, 40 | publishArtifact in Test := false, 41 | pomIncludeRepository := { x => false}, 42 | crossPaths := false, 43 | publishTo := { 44 | val nexus = "https://oss.sonatype.org/" 45 | if (version.value.trim.endsWith("SNAPSHOT")) 46 | Some("snapshots" at nexus + "content/repositories/snapshots") 47 | else if (version.value.trim.endsWith("LOCAL")) 48 | Some(Resolver.file("file", new File(Path.userHome.absolutePath+"/.m2/repository"))) 49 | else 50 | Some("releases" at nexus + "service/local/staging/deploy/maven2") 51 | }, 52 | pomExtra := { 53 | http://ckite.io 54 | 55 | 56 | Apache 2 57 | http://www.apache.org/licenses/LICENSE-2.0.txt 58 | repo 59 | 60 | 61 | 62 | scm:git:github.com/pablosmedina/ckite.git 63 | scm:git:git@github.com:pablosmedina/ckite.git 64 | github.com/pablosmedina/ckite.git 65 | 66 | 67 | 68 | pmedina 69 | Pablo S. Medina 70 | https://twitter.com/pablosmedina 71 | 72 | 73 | } 74 | ) 75 | 76 | lazy val formatSettings = SbtScalariform.scalariformSettings ++ Seq( 77 | ScalariformKeys.preferences in Compile := formattingPreferences, 78 | ScalariformKeys.preferences in Test := formattingPreferences 79 | ) 80 | 81 | def formattingPreferences = 82 | FormattingPreferences() 83 | .setPreference(RewriteArrowSymbols, true) 84 | .setPreference(AlignParameters, false) 85 | .setPreference(AlignSingleLineCaseStatements, true) 86 | .setPreference(DoubleIndentClassDeclaration, true) 87 | 88 | lazy val eclipseSettings = Seq(EclipseKeys.configurations := Set(Compile, Test, IntegrationTest), EclipseKeys.createSrc := EclipseCreateSrc.Default + EclipseCreateSrc.Resource) 89 | lazy val itExtraSettings = Seq( 90 | parallelExecution in IntegrationTest := false 91 | ) 92 | 93 | val noPublishing = Seq(publish :=(), publishLocal :=(), publishArtifact := false) 94 | 95 | } -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.18 -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "twitter-repo" at "http://maven.twttr.com" 2 | resolvers += "JBoss" at "https://repository.jboss.org" 3 | 4 | addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.4.0") 5 | 6 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0") 7 | 8 | addSbtPlugin("com.twitter" %% "scrooge-sbt-plugin" % "4.15.0") 9 | 10 | addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") 11 | 12 | addSbtPlugin("com.typesafe.sbt" % "sbt-scalariform" % "1.3.0") 13 | 14 | addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.5") 15 | --------------------------------------------------------------------------------