├── project ├── build.properties ├── plugins.sbt └── CentralRequirementsPlugin.scala ├── version.sbt ├── .gitignore ├── docs └── img │ └── logo.png ├── NOTICE ├── project.sbt ├── core ├── build.sbt └── src │ ├── main │ └── scala │ │ ├── messages.scala │ │ ├── ReconcileState.scala │ │ ├── OneTaskPerSlaveStateManager.scala │ │ ├── SchedulerState.scala │ │ └── Scheduler.scala │ └── test │ └── scala │ ├── OneTaskPerSlaveStateManagerSpec.scala │ ├── testimpl.scala │ └── SchedulerSpec.scala ├── example ├── src │ └── main │ │ ├── resources │ │ └── logback.xml │ │ └── scala │ │ ├── CustomMessageHandler.scala │ │ ├── Main.scala │ │ └── Service.scala ├── Dockerfile └── build.sbt ├── .travis.yml ├── README.md └── LICENSE /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.12 2 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "0.1.0-SNAPSHOT" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | target 4 | log 5 | project/project/ 6 | tmp 7 | -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Verizon/ark/HEAD/docs/img/logo.png -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Mesos Scheduler 2 | Copyright (c) 2016 Verizon. All rights reserved. 3 | 4 | This project includes code developed at Verizon. 5 | 6 | Licensed under Apache License 2.0. See LICENSE for terms. 7 | -------------------------------------------------------------------------------- /project.sbt: -------------------------------------------------------------------------------- 1 | 2 | organization in Global := "io.verizon.ark" 3 | 4 | scalaVersion in Global := "2.10.6" 5 | 6 | lazy val ark = project.in(file(".")).aggregate(core, example) 7 | 8 | lazy val core = project 9 | 10 | lazy val example = project.dependsOn(core % "test->test;compile->compile") 11 | 12 | enablePlugins(DisablePublishingPlugin) 13 | -------------------------------------------------------------------------------- /core/build.sbt: -------------------------------------------------------------------------------- 1 | 2 | libraryDependencies ++= Seq( 3 | "io.verizon.journal" %% "core" % "2.3.15", 4 | "org.scalaz.stream" %% "scalaz-stream" % "0.7.3a", 5 | "org.apache.mesos" % "mesos" % "0.26.0" 6 | ) 7 | 8 | scalacOptions in Test ~= (_.filterNot(Set("-Ywarn-value-discard"))) 9 | 10 | ivyScala := ivyScala.value map { _.copy(overrideScalaVersion = true) } 11 | -------------------------------------------------------------------------------- /example/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | 2 | resolvers += Resolver.url( 3 | "tpolecat-sbt-plugin-releases", 4 | url("http://dl.bintray.com/content/tpolecat/sbt-plugin-releases"))( 5 | Resolver.ivyStylePatterns) 6 | 7 | addSbtPlugin("io.verizon.build" % "sbt-rig" % "1.1.20") 8 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2") 9 | 10 | // docs 11 | addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1") 12 | addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3") 13 | addSbtPlugin("org.tpolecat" % "tut-plugin" % "0.3.2") 14 | addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.2") 15 | 16 | scalacOptions += "-deprecation" 17 | -------------------------------------------------------------------------------- /project/CentralRequirementsPlugin.scala: -------------------------------------------------------------------------------- 1 | package verizon.build 2 | 3 | import sbt._, Keys._ 4 | import xerial.sbt.Sonatype.autoImport.sonatypeProfileName 5 | 6 | object CentralRequirementsPlugin extends AutoPlugin { 7 | 8 | override def trigger = allRequirements 9 | 10 | override def requires = RigPlugin 11 | 12 | override lazy val projectSettings = Seq( 13 | sonatypeProfileName := "io.verizon", 14 | pomExtra in Global := { 15 | 16 | 17 | rolandomanrique 18 | Rolando Manrique 19 | http://github.com/rolandomanrique 20 | 21 | 22 | stew 23 | Stew O'Connor 24 | http://github.com/stew 25 | 26 | 27 | }, 28 | licenses := Seq("Apache-2.0" -> url("https://www.apache.org/licenses/LICENSE-2.0.html")), 29 | homepage := Some(url("http://verizon.github.io/ark/")), 30 | scmInfo := Some(ScmInfo(url("https://github.com/verizon/ark"), 31 | "git@github.com:verizon/ark.git")) 32 | ) 33 | } 34 | -------------------------------------------------------------------------------- /example/Dockerfile: -------------------------------------------------------------------------------- 1 | #: ---------------------------------------------------------------------------- 2 | #: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | #: 4 | #: Licensed under the Apache License, Version 2.0 (the "License"); 5 | #: you may not use this file except in compliance with the License. 6 | #: You may obtain a copy of the License at 7 | #: 8 | #: http://www.apache.org/licenses/LICENSE-2.0 9 | #: 10 | #: Unless required by applicable law or agreed to in writing, software 11 | #: distributed under the License is distributed on an "AS IS" BASIS, 12 | #: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | #: See the License for the specific language governing permissions and 14 | #: limitations under the License. 15 | #: 16 | #: ---------------------------------------------------------------------------- 17 | FROM ubuntu:14.04 18 | 19 | RUN echo "deb http://repos.mesosphere.io/ubuntu/ trusty main" > /etc/apt/sources.list.d/mesosphere.list && \ 20 | apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \ 21 | apt-get -y update && \ 22 | apt-get -y install mesos=0.25.* && \ 23 | apt-get clean && rm -rf /var/lib/apt/lists/* 24 | 25 | VOLUME /opt/app 26 | 27 | WORKDIR /opt/app 28 | 29 | ENTRYPOINT java -jar example-assembly-*.jar 30 | -------------------------------------------------------------------------------- /example/build.sbt: -------------------------------------------------------------------------------- 1 | import AssemblyKeys._ 2 | 3 | assemblySettings 4 | 5 | artifact in (Compile, assembly) ~= { art => 6 | art.copy(`classifier` = Some("assembly")) 7 | } 8 | 9 | addArtifact(artifact in (Compile, assembly), assembly) 10 | 11 | Keys.test in assembly := {} 12 | 13 | libraryDependencies ++= Seq( 14 | "org.http4s" %% "http4s-dsl" % "0.9.3", 15 | "org.http4s" %% "http4s-blaze-server" % "0.9.3", 16 | "org.http4s" %% "http4s-argonaut" % "0.9.3" 17 | ) 18 | 19 | scalacOptions in Test ~= (_.filterNot(Set("-Ywarn-value-discard"))) 20 | 21 | mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) => { 22 | case x if x.contains("journal") => MergeStrategy.first 23 | case x if x.contains("log4j") => MergeStrategy.discard 24 | case x if x.contains("logback.xml") => MergeStrategy.first 25 | case x if x.contains("BuildInfo") => MergeStrategy.first 26 | case x if x.contains("Pimped") => MergeStrategy.first 27 | case x if x.contains("package") => MergeStrategy.first 28 | case x if x.contains("ServiceConfig") => MergeStrategy.first 29 | case x if x.contains("JsonUtil") => MergeStrategy.first 30 | case x if x.contains("io.netty") => MergeStrategy.first 31 | case x if x.contains("ConfigLoader") => MergeStrategy.first 32 | case x => old(x) 33 | }} 34 | 35 | mainClass in run := Some("oncue.mesos.example.Main") 36 | 37 | mainClass in assembly := Some("oncue.mesos.example.Main") 38 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | 3 | jdk: oraclejdk8 4 | 5 | scala: 2.10.5 6 | 7 | branches: 8 | only: 9 | - master 10 | 11 | before_script: 12 | - "if [ $TRAVIS_PULL_REQUEST = 'false' ]; then git checkout -qf $TRAVIS_BRANCH; fi" 13 | 14 | script: 15 | - | 16 | if [ $TRAVIS_PULL_REQUEST = 'false' ]; then 17 | if [ $RELEASE_ON_PUSH = 'false' ]; then 18 | sbt test coverageReport 19 | else 20 | sbt ++$TRAVIS_SCALA_VERSION 'release with-defaults' 21 | fi 22 | else 23 | sbt test coverageReport 24 | fi 25 | - find $HOME/.sbt -name "*.lock" | xargs rm 26 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm 27 | 28 | cache: 29 | directories: 30 | - $HOME/.ivy2/cache 31 | - $HOME/.sbt/boot/scala-$TRAVIS_SCALA_VERSION 32 | 33 | after_success: 34 | - find $HOME/.sbt -name "*.lock" | xargs rm 35 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm 36 | - "bash <(curl -s https://codecov.io/bash) -r $TRAVIS_REPO_SLUG -t $CODECOV_TOKEN" 37 | 38 | env: 39 | global: 40 | - secure: "COUrXj1IJiY+yvmBABFW8+17lGs6ct8FuGt+XMgtkXO/ZXQYkffnqKjDyZYClCvgKDRyBNZiZ8HexXEcu3g/KKG9Qzx+/f4YcZEW6u/d/KxzWNHo7yuk3cX1p+mMCFrqWss7PjFeoKy4VhoZb8LmHVOBNgI6zrOB9KeyrUHmnbM=" 41 | - secure: "bBqIDTVUUIHxj6ARiKBQ6MVbAVw+bRtF6OrPiyLYMhQrOEwLXHjvPb3sKowUAPJtv6CHruiFjZVK1mgYofiybps6PJPDfoRME/yCAbu8KNv1bWJLf58uvwWoH30dEoGliv0Lw90KxHxs7WFXkksOTvJi9B8G7WFK/4y6ACjCZew=" 42 | - secure: "KmIhYTvl5jhRbZSwfOJPwyc0c84pPJdKD4DE7hx+DPookbClTfP0s5HB0m1wiM0jRs+GO5pNpasFMuyze0F5PAHbz5BZj312704QovUUWA9kEAnWHiTj3KxTN26hA8aCUhfzYwFhbRH/9ayw8HTzmQSuytlEmbtfOMa8vNaDMfE=" 43 | -------------------------------------------------------------------------------- /core/src/main/scala/messages.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import org.apache.mesos.Protos._ 20 | import org.apache.mesos.SchedulerDriver 21 | 22 | // Model every possible message coming from mesos master 23 | sealed trait MesosMessage { def driver: SchedulerDriver } 24 | case class ResourceOffersMessage(override val driver: SchedulerDriver, offer: Offer) 25 | extends MesosMessage 26 | case class OfferRescindedMessage(override val driver: SchedulerDriver, offerId: OfferID) 27 | extends MesosMessage 28 | case class RegisteredMessage(override val driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) 29 | extends MesosMessage 30 | case class ReregisteredMessage(override val driver: SchedulerDriver, masterInfo: MasterInfo) 31 | extends MesosMessage 32 | case class FrameworkMessageMessage(override val driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, 33 | data: Array[Byte]) extends MesosMessage 34 | case class StatusUpdateMessage(override val driver: SchedulerDriver, status: TaskStatus) 35 | extends MesosMessage 36 | case class SlaveLostMessage(override val driver: SchedulerDriver, slaveId: SlaveID) 37 | extends MesosMessage 38 | case class ExecutorLostMessage(override val driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, 39 | status: Int) extends MesosMessage 40 | case class ErrorMessage(override val driver: SchedulerDriver, message: String) 41 | extends MesosMessage 42 | 43 | // Users can extend CustomMessage to trigger state mutations through Scheduler.customEvents Process 44 | trait CustomMessage extends MesosMessage 45 | case class ReconcileMessage(override val driver: SchedulerDriver) 46 | extends CustomMessage 47 | -------------------------------------------------------------------------------- /example/src/main/scala/CustomMessageHandler.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | package example 19 | 20 | import org.apache.mesos.MesosSchedulerDriver 21 | 22 | import scalaz.\/ 23 | 24 | case class SchedulerInfo(mesosMaster: String, frameworkId: String, frameworkName: String, reqcpu: Double, reqmem: Double) 25 | case class GetInfo(override val driver: MesosSchedulerDriver, cb: (Throwable \/ SchedulerInfo) => Unit) extends CustomMessage 26 | case class Blacklist(override val driver: MesosSchedulerDriver, slaveId: String) extends CustomMessage 27 | case class Unblacklist(override val driver: MesosSchedulerDriver, slaveId: String) extends CustomMessage 28 | 29 | trait CustomMessageHandler { self: OneTaskPerSlaveStateManager => 30 | 31 | def master: String 32 | 33 | override def processCustomMessage(msg: CustomMessage)(state: OneTaskPerSlaveState): OneTaskPerSlaveState = msg match { 34 | // GetInfo message comes from HTTP service endpoints and provides a callback function that will be serialized into 35 | // the http response, we just need to create SchedulerInfo and pass it to the callback function 36 | case GetInfo(_, cb) => 37 | val info: SchedulerInfo = SchedulerInfo(master, state.frameworkId, state.frameworkName, reqcpu, reqmem) 38 | cb(\/.right(info)) 39 | state 40 | 41 | // Remove any tasks running and add slaveId to blacklist 42 | case Blacklist(_, slaveId) => 43 | state.copy(reconcileTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId), blacklist = state.blacklist + slaveId) 44 | 45 | // Remove slaveId from blacklist 46 | case Unblacklist(_, slaveId) => 47 | state.copy(blacklist = state.blacklist.filterNot(_ == slaveId)) 48 | 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /core/src/main/scala/ReconcileState.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import java.util 20 | import org.apache.mesos.Protos 21 | import scala.collection.JavaConverters._ 22 | 23 | // State for reconciliation algorithm (see http://mesos.apache.org/documentation/latest/reconciliation/) 24 | // tasks pending for reconciliation, no offers will be accepted until list is empty 25 | case class ReconcileState(reconciledAt: Long, reconcilingTasks: Set[ReconcileTaskStatus], 26 | minTaskReconciliationWait: Long = 5000, maxTaskReconciliationWait: Long = 30000) { 27 | 28 | val size = reconcilingTasks.size 29 | def minTimeElapsed: Boolean = System.currentTimeMillis() - reconciledAt > minTaskReconciliationWait 30 | def maxTimeElapsed: Boolean = System.currentTimeMillis() - reconciledAt > maxTaskReconciliationWait 31 | def reconciling: Boolean = reconcilingTasks.nonEmpty || !minTimeElapsed 32 | def expired: Boolean = reconcilingTasks.nonEmpty && maxTimeElapsed 33 | def getJavaCollection: util.Collection[Protos.TaskStatus] = reconcilingTasks.map(_.toTaskStatus).asJavaCollection 34 | 35 | } 36 | 37 | object ReconcileState { 38 | val empty = ReconcileState(0L, Set.empty) 39 | def apply(state: SchedulerState[_]): ReconcileState = ReconcileState(System.currentTimeMillis, state.reconcileTasks) 40 | } 41 | 42 | // Min info required to create TaskStatus for reconciliation 43 | case class ReconcileTaskStatus(taskId: String, slaveId: String) { 44 | def toTaskStatus: Protos.TaskStatus = Protos.TaskStatus.newBuilder() 45 | .setState(Protos.TaskState.TASK_RUNNING) 46 | .setTaskId(Protos.TaskID.newBuilder.setValue(taskId).build()) 47 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId).build()) 48 | .build() 49 | } 50 | -------------------------------------------------------------------------------- /example/src/main/scala/Main.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | package example 19 | 20 | import org.apache.mesos.{MesosSchedulerDriver, Protos} 21 | import org.http4s.server.blaze.BlazeBuilder 22 | import scala.concurrent.duration._ 23 | import scala.language.postfixOps 24 | 25 | object Main extends scala.App { 26 | val mesosMaster = "zk://127.0.0.1:2181/mesos" 27 | val frameworkName = "sample-scheduler" 28 | val reqcpu = 0.1 29 | val reqmem = 64.0 30 | val cmd = Protos.CommandInfo.newBuilder.setShell(true) 31 | .setValue("""echo "SAMPLE SCHEDULER! sleeping for 120 secs" && sleep 120""") 32 | 33 | val reconciliationInterval = 1 minute 34 | val frameworkInfo = Protos.FrameworkInfo.newBuilder 35 | .setName(frameworkName) 36 | .setUser("") 37 | .build 38 | 39 | val initialState = OneTaskPerSlaveState(frameworkName) 40 | val manager = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd) with CustomMessageHandler { 41 | override val master = mesosMaster 42 | } 43 | val scheduler = new Scheduler(manager) 44 | val driver = new MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster) 45 | 46 | // Set up http service 47 | val (service, httpStream) = Service.setup(driver) 48 | val server = BlazeBuilder.bindHttp(9000, System.getenv("LIBPROCESS_IP")).mountService(service, "/").run 49 | 50 | sys addShutdownHook { 51 | server.shutdownNow() 52 | scheduler.shutdown(driver) 53 | } 54 | 55 | // Scheduler companion object provides a process 56 | // that triggers a reconcile message on a given interval 57 | val reconcileProcess = Scheduler.reconcileProcess(driver, reconciliationInterval) 58 | 59 | // When running the scheduler we can pass a list of scalaz stream 60 | // processes to send messages to the state manager, in this case 61 | // we only provide reconcile process 62 | scheduler.init(initialState, driver, Seq(reconcileProcess, httpStream)).run 63 | 64 | } 65 | -------------------------------------------------------------------------------- /example/src/main/scala/Service.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | package example 19 | 20 | import org.apache.mesos.MesosSchedulerDriver 21 | import org.http4s.EntityEncoder 22 | import org.http4s.server.{HttpService, Router} 23 | import org.http4s.argonaut._ 24 | import org.http4s.dsl._ 25 | import argonaut._ 26 | import Argonaut._ 27 | import scala.concurrent.ExecutionContext 28 | import scala.language.postfixOps 29 | import scalaz.concurrent.Task 30 | import scalaz.stream.async 31 | import scalaz.stream.async.mutable.Queue 32 | 33 | object Service { 34 | 35 | implicit val infoEncoder: EntityEncoder[SchedulerInfo] = jsonEncoderOf[SchedulerInfo] 36 | implicit def infoJson: CodecJson[SchedulerInfo] = casecodec5(SchedulerInfo.apply, SchedulerInfo.unapply)( 37 | "mesosMaster", "frameworkId", "frameworkName", "reqcpu", "reqmem") 38 | 39 | def setup(driver: MesosSchedulerDriver) = { 40 | val inbound = async.boundedQueue[CustomMessage](100)(Scheduler.defaultExecutor) 41 | val stream = inbound.dequeue 42 | (service(inbound, driver), stream) 43 | } 44 | 45 | def service(inbound: Queue[CustomMessage], driver: MesosSchedulerDriver)( 46 | implicit executionContext: ExecutionContext = ExecutionContext.global): HttpService = 47 | Router("" -> rootService(inbound, driver)) 48 | 49 | def rootService(inbound: Queue[CustomMessage], driver: MesosSchedulerDriver)( 50 | implicit executionContext: ExecutionContext) = HttpService { 51 | 52 | case _ -> Root => MethodNotAllowed() 53 | 54 | case GET -> Root / "info" => { 55 | // When request comes we only block until message makes it to the queue 56 | // After that is just waiting for state manager to call callback function 57 | val res: Task[SchedulerInfo] = Task.async[SchedulerInfo](cb => inbound.enqueueOne(GetInfo(driver, cb)).run) 58 | Ok(res) 59 | } 60 | 61 | case POST -> Root / "blacklist" / slaveId => { 62 | inbound.enqueueOne(Blacklist(driver, slaveId)).run 63 | Ok(s"Requested Blacklist $slaveId") 64 | } 65 | 66 | case DELETE -> Root / "blacklist" / slaveId => { 67 | inbound.enqueueOne(Unblacklist(driver, slaveId)).run 68 | Ok(s"Requested Unblacklist $slaveId") 69 | } 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /core/src/test/scala/OneTaskPerSlaveStateManagerSpec.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import org.scalatest._ 20 | import scala.collection.JavaConverters._ 21 | import org.apache.mesos.Protos 22 | 23 | class OneTaskPerSlaveStateManagerSpec extends FlatSpec with MustMatchers { 24 | 25 | def cmdBuilder(cmd:String) = Protos.CommandInfo.newBuilder.setShell(true).setValue(cmd) 26 | 27 | behavior of "OneTaskPerSlaveStateManager" 28 | it should "accept any offer with enough resources and run the correct command" in { 29 | val frameworkName = "some-framework" 30 | val reqcpu = 1.0 31 | val reqmem = 1024.0 32 | val cmd = cmdBuilder("java -jar my-assembly.jar") 33 | val state = OneTaskPerSlaveState(frameworkName) 34 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd) 35 | 36 | val t = mgr.processOffer(2.0, 2048.0, "someslaveid")(state)._2.head 37 | t.getCommand.getValue must equal(cmd.getValue) 38 | t.getResourcesList.asScala.foreach(x => x.getName match { 39 | case "cpus" => x.getScalar.getValue must equal(reqcpu) 40 | case "mem" => x.getScalar.getValue must equal(reqmem) 41 | }) 42 | } 43 | 44 | it should "reject any offer in a slave where a task is already running" in { 45 | val frameworkName = "some-framework" 46 | val reqcpu = 1.0 47 | val reqmem = 1024.0 48 | val cmd = cmdBuilder("java -jar my-assembly.jar") 49 | val state1 = OneTaskPerSlaveState(frameworkName) 50 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd) 51 | 52 | val (state2, s1) = mgr.processOffer(2.0, 2048.0, "someslaveid")(state1) 53 | s1 must not equal Seq.empty 54 | val (state3, s2) = mgr.processOffer(2.0, 2048.0, "someslaveid")(state2) 55 | s2 must equal(Seq.empty) 56 | } 57 | 58 | it should "list reconcile tasks correctly" in { 59 | val frameworkName = "some-framework" 60 | val reqcpu = 1.0 61 | val reqmem = 1024.0 62 | val cmd = cmdBuilder("java -jar my-assembly.jar") 63 | val state1 = OneTaskPerSlaveState(frameworkName) 64 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd) 65 | val (state2, s1) = mgr.processOffer(2.0, 2048.0, "someslaveid1")(state1) 66 | s1 must not equal Seq.empty 67 | val (state3, s2) = mgr.processOffer(2.0, 2048.0, "someslaveid2")(state2) 68 | s2 must not equal Seq.empty 69 | val (state4, s3) = mgr.processOffer(2.0, 2048.0, "someslaveid3")(state3) 70 | s3 must not equal Seq.empty 71 | 72 | val exp = ReconcileTaskStatus(state1.taskId, "someslaveid1") :: 73 | ReconcileTaskStatus(state2.taskId, "someslaveid2") :: 74 | ReconcileTaskStatus(state3.taskId, "someslaveid3") :: 75 | Nil 76 | state4.reconcileTasks.toList.sortBy(_.slaveId) must equal(exp) 77 | } 78 | } -------------------------------------------------------------------------------- /core/src/test/scala/testimpl.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import java.util.concurrent.atomic.AtomicInteger 20 | 21 | import org.apache.mesos.Protos._ 22 | import org.apache.mesos.SchedulerDriver 23 | import scala.collection.JavaConversions._ 24 | import java.util 25 | 26 | import scala.collection.mutable.ListBuffer 27 | 28 | class DriverImpl extends SchedulerDriver { 29 | 30 | val declinedOffers = new ListBuffer[OfferID]() 31 | val acceptedOffers = new ListBuffer[OfferID]() 32 | val launchedTasks = new ListBuffer[TaskInfo]() 33 | val reconciledCount = new AtomicInteger(0) 34 | def reconciled = reconciledCount.intValue > 0 35 | 36 | override def declineOffer(offerId: OfferID): Status = { 37 | declinedOffers += offerId 38 | Status.DRIVER_RUNNING 39 | } 40 | 41 | override def launchTasks(offerIds: util.Collection[OfferID], tasks: util.Collection[TaskInfo]): Status = { 42 | acceptedOffers ++= offerIds 43 | launchedTasks ++= tasks 44 | Status.DRIVER_RUNNING 45 | } 46 | 47 | // Mesos 0.23.x 48 | override def acceptOffers(offerIds: util.Collection[OfferID], ops: util.Collection[Offer.Operation], 49 | filters: Filters): Status = Status.DRIVER_RUNNING 50 | 51 | override def killTask(taskId: TaskID): Status = Status.DRIVER_RUNNING 52 | 53 | override def reconcileTasks(statuses: util.Collection[TaskStatus]): Status = { 54 | reconciledCount.getAndIncrement() 55 | Status.DRIVER_RUNNING 56 | } 57 | 58 | override def suppressOffers(): Status = Status.DRIVER_RUNNING 59 | 60 | override def reviveOffers(): Status = Status.DRIVER_RUNNING 61 | 62 | override def declineOffer(offerId: OfferID, filters: Filters): Status = Status.DRIVER_RUNNING 63 | 64 | override def launchTasks(offerIds: util.Collection[OfferID], tasks: util.Collection[TaskInfo], 65 | filters: Filters): Status = Status.DRIVER_RUNNING 66 | 67 | override def launchTasks(offerId: OfferID, tasks: util.Collection[TaskInfo], filters: Filters) 68 | : Status = Status.DRIVER_RUNNING 69 | 70 | override def launchTasks(offerId: OfferID, tasks: util.Collection[TaskInfo]) 71 | : Status = Status.DRIVER_RUNNING 72 | 73 | override def requestResources(requests: util.Collection[Request]) 74 | : Status = Status.DRIVER_RUNNING 75 | 76 | override def sendFrameworkMessage(executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte]) 77 | : Status = Status.DRIVER_RUNNING 78 | 79 | override def acknowledgeStatusUpdate(ackStatus: TaskStatus) 80 | : Status = Status.DRIVER_RUNNING 81 | 82 | override def abort(): Status = Status.DRIVER_STOPPED 83 | 84 | override def join(): Status = Status.DRIVER_RUNNING 85 | 86 | override def run(): Status = Status.DRIVER_RUNNING 87 | 88 | override def start(): Status = Status.DRIVER_RUNNING 89 | 90 | override def stop(): Status = Status.DRIVER_STOPPED 91 | 92 | override def stop(failover: Boolean): Status = Status.DRIVER_STOPPED 93 | 94 | } 95 | 96 | case class StateImpl(override val reconcileTasks: Set[ReconcileTaskStatus]) extends SchedulerState[StateImpl] 97 | 98 | class StateManagerImpl(driver: SchedulerDriver, frameworkID: FrameworkID, masterInfo: MasterInfo) 99 | extends SchedulerStateManager[StateImpl] { 100 | val received = new ListBuffer[MesosMessage] 101 | 102 | override def processOffer(offer: Offer)(state: StateImpl): (StateImpl, Seq[TaskInfo.Builder]) = { 103 | received += ResourceOffersMessage(driver,offer) 104 | (state, Seq.empty) 105 | } 106 | 107 | override def statusUpdate(status: TaskStatus)(state: StateImpl): (StateImpl, Option[TaskID]) = { 108 | received += StatusUpdateMessage(driver, status) 109 | (state, None) 110 | } 111 | 112 | override def registered(id: String)(state: StateImpl): StateImpl = { 113 | received += RegisteredMessage(driver, frameworkID, masterInfo) 114 | state 115 | } 116 | 117 | override def processCustomMessage(msg: CustomMessage)(state: StateImpl): StateImpl = { 118 | received += msg 119 | state 120 | } 121 | 122 | } -------------------------------------------------------------------------------- /core/src/main/scala/OneTaskPerSlaveStateManager.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import journal.Logger 20 | import org.apache.mesos.Protos 21 | 22 | object OneTaskPerSlaveState { 23 | def apply(frameworkName: String): OneTaskPerSlaveState = 24 | OneTaskPerSlaveState(Set.empty, Set.empty, frameworkName, "not-registered-yet", 0) 25 | } 26 | 27 | case class OneTaskPerSlaveState(override val reconcileTasks: Set[ReconcileTaskStatus], blacklist: Set[String], 28 | frameworkName: String, frameworkId: String, nextId: Int) extends SchedulerState[OneTaskPerSlaveState] { 29 | val taskId = s"$frameworkName-$nextId-$frameworkId" 30 | } 31 | 32 | /** 33 | * Sample mesos scheduler state manager implementation that runs one task per slave 34 | */ 35 | class OneTaskPerSlaveStateManager(val reqcpu: Double, val reqmem: Double, val cmd: Protos.CommandInfo.Builder) 36 | extends SimpleSchedulerStateManager[OneTaskPerSlaveState] { 37 | 38 | private val log = Logger[this.type] 39 | 40 | override def processOffer(cpus: Double, mem: Double, slaveId: String)(state: OneTaskPerSlaveState) 41 | : (OneTaskPerSlaveState, Seq[Protos.TaskInfo.Builder]) = { 42 | if (reqcpu <= cpus && reqmem <= mem && !state.blacklist.contains(slaveId) && 43 | !state.reconcileTasks.exists(_.slaveId == slaveId)) { 44 | log.debug(s"accepting offer on $slaveId") 45 | val newTasks = state.reconcileTasks + ReconcileTaskStatus(state.taskId, slaveId) 46 | val newState = state.copy(nextId = state.nextId+1, reconcileTasks = newTasks) 47 | (newState, Seq(makeTask(state.taskId, reqcpu, reqmem, cmd))) 48 | } else { 49 | (state, Seq.empty) 50 | } 51 | } 52 | 53 | override def registered(frameworkId: String)(state: OneTaskPerSlaveState): OneTaskPerSlaveState = { 54 | state.copy(frameworkId = frameworkId) 55 | } 56 | 57 | // return false if task should be killed, called when TASK_RUNNING | TASK_STAGING 58 | override def taskRunning(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState) 59 | : (OneTaskPerSlaveState, Boolean) = { 60 | if (!state.reconcileTasks.exists(_.slaveId == slaveId)) { 61 | val newTasks = state.reconcileTasks + ReconcileTaskStatus(taskId, slaveId) 62 | val newState = state.copy(nextId = state.nextId+1, reconcileTasks = newTasks) 63 | (newState, true) 64 | } else { 65 | (state, true) 66 | } 67 | } 68 | 69 | // called when TASK_FINISHED 70 | override def taskFinished(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState) 71 | : OneTaskPerSlaveState = { 72 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId) 73 | state.copy(reconcileTasks = newTasks) 74 | } 75 | 76 | // called when TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED 77 | override def taskFailed(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState) 78 | : OneTaskPerSlaveState = { 79 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId) 80 | state.copy(reconcileTasks = newTasks) 81 | } 82 | 83 | // Return Seq[String] with task ids running in the executor 84 | override def executorLost(executorId: String, slaveId: String, status: Int)(state: OneTaskPerSlaveState) 85 | : (OneTaskPerSlaveState, Seq[String]) = { 86 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId) 87 | (state.copy(reconcileTasks = newTasks), Seq.empty) 88 | } 89 | 90 | // Return Seq[String] with task ids running in the slave 91 | override def slaveLost(slaveId: String)(state: OneTaskPerSlaveState) 92 | : (OneTaskPerSlaveState, Seq[String]) = { 93 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId) 94 | (state.copy(reconcileTasks = newTasks), Seq.empty) 95 | } 96 | 97 | 98 | def makeTask(id: String, cpus: Double, mem: Double, cmd: Protos.CommandInfo.Builder): Protos.TaskInfo.Builder = { 99 | Protos.TaskInfo.newBuilder 100 | .setTaskId(Protos.TaskID.newBuilder.setValue(id)) 101 | .setName(id) 102 | .addResources(scalarResource("cpus", cpus)) 103 | .addResources(scalarResource("mem", mem)) 104 | .setCommand(cmd) 105 | } 106 | 107 | protected def scalarResource(name: String, value: Double): Protos.Resource.Builder = 108 | Protos.Resource.newBuilder 109 | .setType(Protos.Value.Type.SCALAR) 110 | .setName(name) 111 | .setScalar(Protos.Value.Scalar.newBuilder.setValue(value)) 112 | 113 | } -------------------------------------------------------------------------------- /core/src/main/scala/SchedulerState.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import org.apache.mesos.Protos.TaskState._ 20 | import org.apache.mesos.Protos._ 21 | import org.apache.mesos.Protos 22 | 23 | import scala.collection.JavaConverters._ 24 | 25 | 26 | trait SchedulerState[T] { self: T => 27 | // Return list of tasks that the framework thinks is running 28 | def reconcileTasks: Set[ReconcileTaskStatus] 29 | } 30 | 31 | case class SimpleSchedulerState(override val reconcileTasks: Set[ReconcileTaskStatus]) extends SchedulerState[SimpleSchedulerState] 32 | 33 | /** 34 | * Very similar interface to org.apache.mesos.Scheduler but completely thread safe as each call is queued as a message 35 | * and processed one at a time to call the corresponding function in this interface. Each function receives the 36 | * current scheduler state and it is expected to return the new state after processing each message. 37 | */ 38 | trait SchedulerStateManager[T <: SchedulerState[T]] { 39 | 40 | // Return tasks to run 41 | def processOffer(offer: Offer)(state: T): (T, Seq[TaskInfo.Builder]) 42 | 43 | // Return Some(TaskID) if we need to kill this task 44 | def statusUpdate(status: TaskStatus)(state: T): (T, Option[TaskID]) = (state, None) 45 | 46 | // Return task ids running on the lost slave 47 | def slaveLost(slaveId: SlaveID)(state: T): (T, Seq[TaskID]) = (state, Seq.empty) 48 | 49 | // Return task ids running on the lost executor 50 | def executorLost(executorId: ExecutorID, slaveId: SlaveID, status: Int)(state: T): (T, Seq[TaskID]) = (state, Seq.empty) 51 | 52 | // Registered also has MasterInfo but its never used so ignoring in this case 53 | def registered(frameworkId: String)(state: T): T = state 54 | def reregistered(state: T): T = state 55 | 56 | // Handle custom messages 57 | def processCustomMessage(msg: CustomMessage)(state: T): T = state 58 | 59 | /*** Non-required methods, less common used ***/ 60 | def rescindOffer(offerId: OfferID)(state: T): T = state 61 | def frameworkMessage(executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte])(state: T): T = state 62 | def error(message: String)(state: T): T = state 63 | 64 | } 65 | 66 | /** 67 | * SimpleSchedulerState is a very simple implementation of SchedulerState that hides 68 | * org.apache.mesos.Protos as much as possible in favor of native scala types. 69 | */ 70 | trait SimpleSchedulerStateManager[T <: SchedulerState[T]] extends SchedulerStateManager[T] { 71 | 72 | // called when new offers come in 73 | def processOffer(cpus: Double, mem: Double, slaveId: String)(state: T): (T, Seq[TaskInfo.Builder]) 74 | 75 | // simple impl of process offer that only extracts cpus/mem resources and slave ID 76 | override def processOffer(offer: Offer)(state: T): (T, Seq[TaskInfo.Builder]) = { 77 | val res = scalarResources(offer) 78 | processOffer(res.getOrElse("cpus", 0.0), res.getOrElse("mem", 0.0), offer.getSlaveId.getValue)(state: T) 79 | } 80 | 81 | // return false if task should be killed, called when TASK_RUNNING | TASK_STAGING 82 | def taskRunning(taskId: String, executorId: String, slaveId: String)(state: T): (T, Boolean) 83 | 84 | // called when TASK_FINISHED 85 | def taskFinished(taskId: String, executorId: String, slaveId: String)(state: T): T 86 | 87 | // called when TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED 88 | def taskFailed(taskId: String, executorId: String, slaveId: String)(state: T): T 89 | 90 | // Simpler implementation of statusUpdate that opaques Protos.TaskState from user implementation 91 | override def statusUpdate(status: TaskStatus)(state: T): (T, Option[TaskID]) = { 92 | val tid = status.getTaskId 93 | val eid = status.getExecutorId 94 | val sid = status.getSlaveId 95 | status.getState match { 96 | case TASK_RUNNING | TASK_STAGING | TASK_STARTING => 97 | val res = taskRunning(tid.getValue, eid.getValue, sid.getValue)(state) 98 | if (!res._2) 99 | (res._1, Option(tid)) 100 | else 101 | (res._1, None) 102 | 103 | case TASK_FINISHED => 104 | val res = taskFinished(tid.getValue, eid.getValue, sid.getValue)(state) 105 | (res, None) 106 | 107 | case TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED => 108 | val res = taskFailed(tid.getValue, eid.getValue, sid.getValue)(state) 109 | (res, None) 110 | } 111 | } 112 | 113 | // Simpler implementation of executorLost that opaques Protos._ from user implementation 114 | override def executorLost(executorId: ExecutorID, slaveId: SlaveID, status: Int)(state: T): (T, Seq[TaskID]) = { 115 | val res = executorLost(executorId.getValue, slaveId.getValue, status)(state: T) 116 | (res._1, res._2.map(TaskID.newBuilder.setValue(_).build)) 117 | } 118 | 119 | // Return Seq[String] with task ids running in the executor 120 | def executorLost(executorId: String, slaveId: String, status: Int)(state: T): (T, Seq[String]) 121 | 122 | // Simpler implementation of slaveLost that opaques Protos._ from user implementation 123 | override def slaveLost(slaveId: SlaveID)(state: T): (T, Seq[TaskID]) = { 124 | val res = slaveLost(slaveId.getValue)(state) 125 | (res._1, res._2.map(TaskID.newBuilder.setValue(_).build)) 126 | } 127 | 128 | // Return Seq[String] with task ids running in the slave 129 | def slaveLost(slaveId: String)(state: T): (T, Seq[String]) 130 | 131 | def scalarResources(offer: Offer): Map[String,Double] = { 132 | offer.getResourcesList.asScala.toSet 133 | // Filter scalar resources 134 | .filter(x => x.getType == Protos.Value.Type.SCALAR) 135 | // Extract resource name and scalar value 136 | .map(x=>(x.getName, x.getScalar.getValue)) 137 | // Group by name 138 | .groupBy(_._1) 139 | // Add up values 140 | .mapValues(x => x.map(_._2).sum) 141 | } 142 | 143 | } 144 | 145 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ark 2 | 3 | ![image](docs/img/logo.png) 4 | 5 | [![Build Status](https://travis-ci.org/Verizon/ark.svg?branch=master)](https://travis-ci.org/Verizon/ark) 6 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.verizon.ark/core_2.10/badge.svg)](https://maven-badges.herokuapp.com/maven-central/io.verizon.ark/core_2.10) 7 | [![codecov](https://codecov.io/gh/Verizon/ark/branch/master/graph/badge.svg)](https://codecov.io/gh/Verizon/ark) 8 | 9 | This library provides a functional scala implementation of `org.apache.mesos.Scheduler` interface provided by [Mesos java API](http://mesos.apache.org/api/latest/java/). 10 | 11 | The goal of this library is to ease development of mesos schedulers by providing out-of-the-box implementations of common operational requirements of a framework, allowing developers to focus on domain logic implementation of task state transitions. 12 | 13 | Features: 14 | 15 | * Pure functional implementation of mesos scheduler tasks state. 16 | * Scalaz stream to queue all messages sent to the framework (from mesos master or custom user defined messages) to be processed one at a time making it completely thread safe. 17 | * Recurring reconcialiation based on [Mesos Reconciliation Algorithm](http://mesos.apache.org/documentation/latest/reconciliation/). 18 | * *TODO:* Re-registration on mesos master failures. 19 | * *TODO:* High-Availability mode and leader election. 20 | 21 | From the current state of the project there is a clear path to implement missing features above by enhancing `oncue.mesos.Scheduler.processMessage` function. 22 | 23 | 24 | ## Messages 25 | 26 | The core of this Mesos Scheduler implementation is handled by a scalaz async message queue. When Mesos calls any of the functions provided by the `Scheduler` interface, the scheduler creates one or many `oncue.mesos.MesosMessage` and enqueues them in the scalaz stream. 27 | 28 | ```scala 29 | sealed trait MesosMessage { def driver: org.apache.mesos.SchedulerDriver } 30 | ``` 31 | 32 | The main scalaz stream is created inside `oncue.mesos.Scheduler` to handle calls from Mesos to the Scheduler interface. Users can provide any number of `scalaz.stream.Process[scalaz.concurrent.Task, CustomMessage]` when initializing the `Scheduler`. These custom streams get merged into the internal scalaz stream. This way the user can trigger any `CustomMesssage` to the scheduler which is handled by the same `processMessage` function that handles messages from Mesos. 33 | 34 | ```scala 35 | trait CustomMessage extends MesosMessage 36 | ``` 37 | 38 | ## Reconciliation 39 | 40 | Mesos has very good documentation on how to implement the [Reconciliation Algorithm](http://mesos.apache.org/documentation/latest/reconciliation/), since most frameworks need to perform reconciliation this was the first feature to address in a common Mesos scheduler library. 41 | 42 | Reconciliation is triggered by sending a `ReconcileMessage` to the stream: 43 | 44 | ```scala 45 | case class ReconcileMessage(override val driver: SchedulerDriver) extends CustomMessage 46 | ``` 47 | 48 | `oncue.mesos.Scheduler` companion object provides a convenient function to initialize a timed reconciliation stream: 49 | 50 | ```scala 51 | def reconcileProcess(driver: SchedulerDriver, reconcileInterval: FiniteDuration): Process[Task, ReconcileMessage] = { 52 | time.awakeEvery(reconcileInterval)(defaultExecutor, timeOutScheduler) 53 | .map(_ => ReconcileMessage(driver)) 54 | } 55 | ``` 56 | 57 | The user can create a reconcile process by calling the function above and passing it to the scheduler `init` function, this will trigger reconcialiation every `reconcileInterval` and all offers will be declined until reconciliation is over. 58 | 59 | ```scala 60 | val reconciliationInterval = 1 hour 61 | val customStreams = Seq( Scheduler.reconcileProcess(driver, reconciliationInterval) ) 62 | scheduler.init(state, driver, customStreams).run 63 | ``` 64 | 65 | *TODO:* The wait time to reconcile all tasks is currently fixed, Mesos recommends to use truncated exponential back off to "avoid a snowball effect in the case of the driver or master being backed up". 66 | 67 | 68 | ## Usage 69 | 70 | A full implementation of Mesos Scheduler would be required to implement `oncue.mesos.SchedulerState` and `oncue.mesos.SchedulerStateManager` traits and run the scheduler like this: 71 | 72 | ```scala 73 | // implement state and state manager 74 | case class MyState( ... ) extends SchedulerState 75 | class MyStateManager extends SchedulerState[MyState] { ... } 76 | 77 | // initialize state and state manager 78 | val initialState = MyState( ... ) 79 | val stateManager = new MyStateManager( ... ) 80 | 81 | // define framework info 82 | val frameworkInfo = Protos.FrameworkInfo.newBuilder 83 | .setName("my-framework") 84 | .setOtherFrameworkattributes( ... ) 85 | .build 86 | 87 | // initialize scheduler and mesos driver 88 | val scheduler = new oncue.mesos.Scheduler(stateManager) 89 | val driver = new org.apache.mesos.MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster) 90 | 91 | // shutdown scheduler on exit 92 | sys addShutdownHook { 93 | scheduler.shutdown(driver) 94 | } 95 | 96 | // Seq[Process[Task,CustomMessage]] pass custom state mutation messages 97 | // Scheduler.reconcileProcess triggers reconciliation every "reconciliationInterval" 98 | val reconciliationInterval = 1 hour 99 | val customStreams = Seq(Scheduler.reconcileProcess(driver, reconciliationInterval)) 100 | 101 | // run scheduler (blocking) 102 | scheduler.init(initialState, driver, customStreams).run 103 | ``` 104 | 105 | ### Example 106 | 107 | The provided example implementation creates a scheduler that triggers the provided task on every slave in the cluster. 108 | This example also uses [http4s](http://http4s.org/) to set up REST endpoints to query current scheduler state by 109 | sending custom messages to the queue. User can query scheduler info and add or remove slaves from a blacklist. 110 | 111 | Running example module on a local mesos cluster with 2 slaves using docker-machine on mac (see 112 | https://github.com/mesosphere/docker-containers/tree/master/mesos): 113 | 114 | 1. Run ZK: 115 | 116 | ```bash 117 | docker run -d --net=host netflixoss/exhibitor:1.5.2 118 | ``` 119 | 120 | 1. Run master: 121 | 122 | ```bash 123 | docker run -d --net=host \ 124 | -e LIBPROCESS_IP=$(docker-machine ip) \ 125 | -e HOSTNAME=$(docker-machine ip) \ 126 | -e MESOS_PORT=5050 \ 127 | -e MESOS_ZK=zk://127.0.0.1:2181/mesos \ 128 | -e MESOS_QUORUM=1 \ 129 | -e MESOS_REGISTRY=in_memory \ 130 | -e MESOS_LOG_DIR=/var/log/mesos \ 131 | -e MESOS_WORK_DIR=/var/tmp/mesos \ 132 | -v "$(pwd)/log/mesos:/var/log/mesos" \ 133 | -v "$(pwd)/tmp/mesos:/var/tmp/mesos" \ 134 | mesosphere/mesos-master:0.25.0-0.2.70.ubuntu1404 135 | ``` 136 | 137 | 1. Run slaves, notice `MESOS_PORT` and mount points change for `/var/log/mesos` and `/var/tmp/mesos`: 138 | 139 | ```bash 140 | docker run -d --net=host --privileged \ 141 | -e LIBPROCESS_IP=$(docker-machine ip) \ 142 | -e HOSTNAME=$(docker-machine ip) \ 143 | -e MESOS_PORT=5051 \ 144 | -e MESOS_MASTER=zk://127.0.0.1:2181/mesos \ 145 | -e MESOS_SWITCH_USER=0 \ 146 | -e MESOS_CONTAINERIZERS=docker,mesos \ 147 | -e MESOS_LOG_DIR=/var/log/mesos \ 148 | -e MESOS_WORK_DIR=/var/tmp/mesos \ 149 | -v "$(pwd)/log/mesos1:/var/log/mesos" \ 150 | -v "$(pwd)/tmp/mesos1:/var/tmp/mesos" \ 151 | -v /var/run/docker.sock:/var/run/docker.sock \ 152 | -v /cgroup:/cgroup \ 153 | -v /sys:/sys \ 154 | -v /usr/local/bin/docker:/usr/local/bin/docker \ 155 | mesosphere/mesos-slave:0.25.0-0.2.70.ubuntu1404 156 | 157 | docker run -d --net=host --privileged \ 158 | -e LIBPROCESS_IP=$(docker-machine ip) \ 159 | -e HOSTNAME=$(docker-machine ip) \ 160 | -e MESOS_PORT=5052 \ 161 | -e MESOS_MASTER=zk://127.0.0.1:2181/mesos \ 162 | -e MESOS_SWITCH_USER=0 \ 163 | -e MESOS_CONTAINERIZERS=docker,mesos \ 164 | -e MESOS_LOG_DIR=/var/log/mesos \ 165 | -e MESOS_WORK_DIR=/var/tmp/mesos \ 166 | -v "$(pwd)/log/mesos2:/var/log/mesos" \ 167 | -v "$(pwd)/tmp/mesos2:/var/tmp/mesos" \ 168 | -v /var/run/docker.sock:/var/run/docker.sock \ 169 | -v /cgroup:/cgroup \ 170 | -v /sys:/sys \ 171 | -v /usr/local/bin/docker:/usr/local/bin/docker \ 172 | mesosphere/mesos-slave:0.25.0-0.2.70.ubuntu1404 173 | ``` 174 | 175 | 1. Build scheduler assembly jar 176 | 177 | ```bash 178 | sbt "project example" assembly 179 | ``` 180 | 1. Build scheduler container from example/Dockerfile: 181 | 182 | ```bash 183 | docker build -t mysched example/ 184 | ``` 185 | 186 | 1. Run scheduler container interactively: 187 | 188 | ```bash 189 | docker run --rm --net=host -it \ 190 | -e LIBPROCESS_IP=$(docker-machine ip) \ 191 | -v $(pwd)/example/target/scala-2.10:/opt/app \ 192 | mysched 193 | ``` 194 | 195 | 196 | -------------------------------------------------------------------------------- /core/src/test/scala/SchedulerSpec.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import java.util.UUID 20 | 21 | import journal.Logger 22 | import org.apache.mesos.Protos.{MasterInfo, FrameworkID, Offer} 23 | import org.apache.mesos.{SchedulerDriver, Protos} 24 | import org.scalatest._ 25 | import scala.language.postfixOps 26 | import scala.concurrent.duration._ 27 | import scala.collection.JavaConverters._ 28 | import scalaz.concurrent.Task 29 | import scalaz.stream.async 30 | 31 | 32 | class SchedulerSpec extends FlatSpec with MustMatchers { 33 | 34 | private val log = Logger[this.type] 35 | 36 | behavior of "Scheduler" 37 | 38 | it should "process messages" in { 39 | val driver = new DriverImpl 40 | val expFwId = fwId() 41 | val expOffer = offer(expFwId) 42 | val expmi = masterInfo() 43 | val expStatus = taskStatus() 44 | 45 | // rebuild MesosMessage and add to received list 46 | val st = StateImpl(Set.empty) 47 | val mgr = new StateManagerImpl(driver, expFwId, expmi) 48 | val scheduler = new Scheduler(mgr) 49 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ } 50 | Thread.sleep(500) 51 | scheduler.registered(driver, expFwId, expmi) 52 | Thread.sleep(6000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined 53 | driver.reconciled must equal(true) 54 | scheduler.resourceOffers(driver, List(expOffer).asJava) 55 | scheduler.statusUpdate(driver, expStatus) 56 | Thread.sleep(1000) 57 | scheduler.shutdown(driver) 58 | 59 | val expReceived = RegisteredMessage(driver, expFwId, expmi) :: 60 | ResourceOffersMessage(driver, expOffer) :: 61 | StatusUpdateMessage(driver, expStatus) :: 62 | Nil 63 | mgr.received.toList must equal(expReceived) 64 | 65 | } 66 | 67 | it should "process custom messages" in { 68 | case class MyCustomMessage(driver: SchedulerDriver, id: String) extends CustomMessage 69 | val driver = new DriverImpl 70 | val expFwId = fwId() 71 | val expmi = masterInfo() 72 | val expCustMsg = MyCustomMessage(driver, genid) 73 | 74 | val customEventsQueue = async.boundedQueue[MyCustomMessage](100)(Scheduler.defaultExecutor) 75 | val customEvents = customEventsQueue.dequeue 76 | 77 | // rebuild MesosMessage and add to received list 78 | val st = StateImpl(Set.empty) 79 | val mgr = new StateManagerImpl(driver, expFwId, expmi) 80 | val scheduler = new Scheduler(mgr) 81 | scheduler.init(st, driver, Seq(customEvents)).runAsync { case _ => /* noop */ } 82 | Thread.sleep(500) 83 | scheduler.registered(driver, expFwId, expmi) 84 | Thread.sleep(6000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined 85 | driver.reconciled must equal(true) 86 | customEventsQueue.enqueueOne(expCustMsg).run 87 | Thread.sleep(1000) 88 | scheduler.shutdown(driver) 89 | 90 | val expReceived = RegisteredMessage(driver, expFwId, expmi) :: expCustMsg :: Nil 91 | mgr.received.toList must equal(expReceived) 92 | 93 | } 94 | 95 | it should "decline all offers when reconciling and remove task from pending when status arrives" in { 96 | val driver = new DriverImpl 97 | val expFwId = fwId() 98 | val declinedOffer = offer(expFwId) 99 | val passedOffer = offer(expFwId) 100 | val expmi = masterInfo() 101 | val expTaskId = genid 102 | val expSlaveId = genid 103 | val expStatus = taskStatus(taskId = expTaskId, slaveId = expSlaveId) 104 | 105 | // rebuild MesosMessage and add to received list 106 | val st = StateImpl(Set(ReconcileTaskStatus(expTaskId, expSlaveId))) 107 | val mgr = new StateManagerImpl(driver, expFwId, expmi) 108 | val scheduler = new Scheduler(mgr) 109 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ } 110 | Thread.sleep(500) 111 | scheduler.registered(driver, expFwId, expmi) 112 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined 113 | driver.reconciled must equal(true) 114 | 115 | // offers should be declined when reconciling and it should not even make it to the state manager 116 | scheduler.resourceOffers(driver, List(declinedOffer).asJava) 117 | scheduler.resourceOffers(driver, List(declinedOffer).asJava) 118 | scheduler.resourceOffers(driver, List(declinedOffer).asJava) 119 | scheduler.resourceOffers(driver, List(declinedOffer).asJava) 120 | Thread.sleep(500) 121 | 122 | val declinedOffers = List(declinedOffer.getId, declinedOffer.getId, declinedOffer.getId, declinedOffer.getId) 123 | driver.declinedOffers.toList must equal(declinedOffers) 124 | val expReceived = RegisteredMessage(driver, expFwId, expmi) :: Nil 125 | mgr.received.toList must equal(expReceived) 126 | 127 | // once status arrives for pending tasks, offers should make it to the state manager 128 | scheduler.statusUpdate(driver, expStatus) 129 | 130 | scheduler.resourceOffers(driver, List(passedOffer).asJava) 131 | Thread.sleep(1000) 132 | scheduler.shutdown(driver) 133 | 134 | val expReceived2 = expReceived ++ ( 135 | StatusUpdateMessage(driver, expStatus) :: 136 | ResourceOffersMessage(driver, passedOffer) :: 137 | Nil) 138 | mgr.received.toList must equal(expReceived2) 139 | driver.declinedOffers.toList must equal(declinedOffers ++ List(passedOffer.getId)) 140 | } 141 | 142 | it should "start reconciliation every reconcileInterval" in { 143 | val reconciliationInterval = 1 second 144 | val driver = new DriverImpl 145 | val expFwId = fwId() 146 | val expmi = masterInfo() 147 | 148 | val reconcileEvents = Scheduler.reconcileProcess(driver, reconciliationInterval) 149 | 150 | // rebuild MesosMessage and add to received list 151 | val st = StateImpl(Set.empty) 152 | val mgr = new StateManagerImpl(driver, expFwId, expmi) 153 | val scheduler = new Scheduler(mgr) 154 | scheduler.init(st, driver, Seq(reconcileEvents)).runAsync { case _ => /* noop */ } 155 | Thread.sleep(500) 156 | scheduler.registered(driver, expFwId, expmi) 157 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined 158 | driver.reconciled must equal(true) 159 | Thread.sleep(5000) 160 | scheduler.shutdown(driver) 161 | 162 | // slept 5 seconds so make sure we got reconcile message at least 4 times 163 | driver.reconciledCount.intValue must be >= 4 164 | } 165 | 166 | it should "run one big task by combining smaller offers" in pendingUntilFixed { 167 | val driver = new DriverImpl 168 | val expFwId = fwId() 169 | val slaveId = genid 170 | val expOffer1 = offer(expFwId, slaveId = slaveId) // 3 cpus 4000 mem 171 | val expOffer2 = offer(expFwId, slaveId = slaveId) // 3 cpus 4000 mem 172 | val expmi = masterInfo() 173 | val expLaunchedTask = Protos.TaskInfo.newBuilder 174 | .setTaskId(Protos.TaskID.newBuilder.setValue("myTaskId")) 175 | .setName("myTaskName") 176 | .addResources(scalarResource("cpus", 5)) 177 | .addResources(scalarResource("mem", 6000)) 178 | .setCommand(Protos.CommandInfo.newBuilder.setShell(true).setValue("some command here")) 179 | 180 | val st = StateImpl(Set.empty) 181 | val mgr = new StateManagerImpl(driver, expFwId, expmi) { 182 | override def processOffer(offer: Offer)(state: StateImpl) = { 183 | if (state.reconcileTasks.isEmpty) { 184 | val newState = state.copy(reconcileTasks = state.reconcileTasks + ReconcileTaskStatus("myTaskId", "")) 185 | (newState, Seq(expLaunchedTask)) 186 | } else { 187 | (state, Seq.empty) 188 | } 189 | } 190 | } 191 | 192 | val scheduler = new Scheduler(mgr) 193 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ } 194 | Thread.sleep(500) 195 | scheduler.registered(driver, expFwId, expmi) 196 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined 197 | driver.reconciled must equal(true) 198 | scheduler.resourceOffers(driver, List(expOffer1, expOffer2).asJava) 199 | Thread.sleep(1000) 200 | scheduler.shutdown(driver) 201 | 202 | val expLaunchedTasks = List(expLaunchedTask.setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId)).build) 203 | val expAcceptedOffers = List(expOffer1.getId, expOffer2.getId) 204 | driver.launchedTasks.toList must equal(expLaunchedTasks) 205 | 206 | // TODO: This will fail until we we combine offers 207 | driver.acceptedOffers.toList must equal(expAcceptedOffers) 208 | 209 | } 210 | 211 | private def genid = UUID.randomUUID().toString 212 | 213 | private def fwId(id: String = genid) = Protos.FrameworkID.newBuilder.setValue(id).build 214 | 215 | private def scalarResource(name: String, value: Double): Protos.Resource = { 216 | Protos.Resource.newBuilder 217 | .setName(name) 218 | .setType(Protos.Value.Type.SCALAR) 219 | .setScalar(Protos.Value.Scalar.newBuilder().setValue(value)) 220 | .build 221 | } 222 | 223 | private def offer(fwid: Protos.FrameworkID, id: String = genid, slaveId: String = genid): Protos.Offer = { 224 | val resources = Seq( 225 | scalarResource("cpus", 3), 226 | scalarResource("mem", 4000), 227 | scalarResource("disk", 1000000) 228 | ).asJava 229 | 230 | Protos.Offer.newBuilder 231 | .setId(Protos.OfferID.newBuilder.setValue(id)) 232 | .setFrameworkId(fwid) 233 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId)) 234 | .setHostname("hostname") 235 | .addAllResources(resources) 236 | .build 237 | } 238 | 239 | private def masterInfo(id: String = genid): Protos.MasterInfo = { 240 | Protos.MasterInfo.newBuilder 241 | .setHostname("localhost") 242 | .setIp(123) 243 | .setId(id) 244 | .setPort(5050) 245 | .setVersion("0.25.0") 246 | .build 247 | 248 | } 249 | 250 | private def taskStatus(state: Protos.TaskState = Protos.TaskState.TASK_RUNNING, taskId: String = genid, 251 | slaveId: String = genid) = { 252 | Protos.TaskStatus.newBuilder 253 | .setTaskId(Protos.TaskID.newBuilder.setValue(taskId).build) 254 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId)) 255 | .setExecutorId(Protos.ExecutorID.newBuilder.setValue(genid)) 256 | .setState(state) 257 | .build 258 | } 259 | 260 | } 261 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner]. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /core/src/main/scala/Scheduler.scala: -------------------------------------------------------------------------------- 1 | //: ---------------------------------------------------------------------------- 2 | //: Copyright (C) 2016 Verizon. All Rights Reserved. 3 | //: 4 | //: Licensed under the Apache License, Version 2.0 (the "License"); 5 | //: you may not use this file except in compliance with the License. 6 | //: You may obtain a copy of the License at 7 | //: 8 | //: http://www.apache.org/licenses/LICENSE-2.0 9 | //: 10 | //: Unless required by applicable law or agreed to in writing, software 11 | //: distributed under the License is distributed on an "AS IS" BASIS, 12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | //: See the License for the specific language governing permissions and 14 | //: limitations under the License. 15 | //: 16 | //: ---------------------------------------------------------------------------- 17 | package ark 18 | 19 | import java.util.Collections 20 | 21 | import journal.Logger 22 | import org.apache.mesos 23 | import org.apache.mesos.Protos._ 24 | import org.apache.mesos.{Protos, SchedulerDriver} 25 | 26 | import scala.collection.JavaConverters._ 27 | import scala.language.postfixOps 28 | import scala.concurrent.duration._ 29 | import scalaz.Nondeterminism 30 | import scalaz.concurrent.{Strategy, Task} 31 | import scalaz.stream.{time, Process, Process1, async} 32 | import java.util.concurrent.{Executors, ExecutorService, ThreadFactory} 33 | 34 | /** 35 | * Thread safe implementation of org.apache.mesos.Scheduler based on scalaz streams. Every call to the Scheduler 36 | * interface is enqueued and process one at a time. 37 | * 38 | * User is required to implement and provide oncue.mesos.SchedulerStateManager. Users are expected to implement 39 | * CustomMessage's to trigger state mutation events in their state managers. CustomMessage's can be passed into 40 | * processMessage by providing scalaz.stream.Processes in the `customEvents` param passed to `init` function. 41 | * 42 | * This class also implements reconciliation algorithm as an additional message in the queue. User can pass 43 | * Scheduler.reconcileProcess to `init` function to trigger reconciliation in periodic intervals or define custom 44 | * reconciliation triggers. User must also provide list of tasks that it expects to be running, all offers will 45 | * be declined until state for all tasks has been received. 46 | */ 47 | class Scheduler[T <: SchedulerState[T]](stateManager: SchedulerStateManager[T]) extends mesos.Scheduler { 48 | 49 | private val log = Logger[this.type] 50 | private val inbound = async.boundedQueue[MesosMessage](100)(Scheduler.defaultExecutor) 51 | 52 | /** 53 | * @param state initial state 54 | * @param driver SchedulerDriver 55 | * @param customEvents Seq[ Process[ Task, CustomMessage ] ] Processes that generate custom messages to trigger 56 | * state mutation events in the manager. These need to managed my SchedulerStateManager 57 | * implementation in the `processCustomMessage(msg: CustomMessage)` function. 58 | */ 59 | def init(state: T, driver: SchedulerDriver, customEvents: Seq[Process[Task,CustomMessage]]): Task[Unit] = { 60 | // merge all inbound processes and define pipe into Process1 61 | val inboundProcess: Process[Task, MesosMessage] = 62 | customEvents.foldLeft(inbound.dequeue)((a,b) => a.merge(b)(Scheduler.defaultExecutor)) 63 | val sunkProcess: Process[Task,Unit] = inboundProcess pipe processMessage(state, ReconcileState.empty) 64 | 65 | // prepare to run mesos driver 66 | val driverTask = Task.fork(Task.delay{ 67 | driver.run() 68 | () 69 | })(Scheduler.defaultPool) 70 | 71 | // prepare to run process 72 | val streamTask = Task.fork(sunkProcess.run)(Scheduler.defaultPool) 73 | 74 | // return composed Task to be run by caller 75 | Nondeterminism[Task].gatherUnordered(Seq(driverTask, streamTask)).map(_ => ()) 76 | } 77 | 78 | /** 79 | * graceful shutdown 80 | * @param driver SchedulerDriver 81 | * @param failover see SchedulerDriver.stop docs 82 | */ 83 | def shutdown(driver: SchedulerDriver, failover: Boolean = false): Unit = { 84 | log.info(s"stopping driver...") 85 | driver.stop(failover) 86 | log.debug(s"stopping stream process...") 87 | inbound.kill.run 88 | } 89 | 90 | def processMessage(initialState: T, initialReconcileState: ReconcileState): Process1[MesosMessage,Unit] = { 91 | 92 | def receive(state: T, reconcileState: ReconcileState, msg: MesosMessage): (T, ReconcileState) = msg match { 93 | case ResourceOffersMessage(driver, offer) => 94 | if (!reconcileState.reconciling) { 95 | stateManager.processOffer(offer)(state) match { 96 | case (newState, s) if s.nonEmpty => 97 | val x: Seq[TaskInfo] = s.map(_.setSlaveId(offer.getSlaveId).build()) 98 | log.info(s"accepting offer ${offer.getId.getValue}@${offer.getSlaveId.getValue} " + 99 | s"launching tasks: ${x.map(_.getTaskId.getValue)}") 100 | driver.launchTasks(Collections.singleton(offer.getId), x.asJava) 101 | (newState, reconcileState) 102 | case (newState, _) => 103 | log.debug(s"declining offer ${offer.getId.getValue}@${offer.getSlaveId.getValue}") 104 | driver.declineOffer(offer.getId) 105 | (newState, reconcileState) 106 | } 107 | } else { 108 | log.info(s"declining all offers while reconciling task status, ${reconcileState.size} remaining") 109 | driver.declineOffer(offer.getId) 110 | val newReconcileState = checkReconciliation(reconcileState, driver) 111 | (state, newReconcileState) 112 | } 113 | 114 | case OfferRescindedMessage(driver, offerId) => 115 | (stateManager.rescindOffer(offerId)(state), reconcileState) 116 | 117 | case RegisteredMessage(driver, frameworkId, masterInfo) => 118 | (stateManager.registered(frameworkId.getValue)(state), reconcileState) 119 | 120 | case ReregisteredMessage(driver, masterInfo) => 121 | (stateManager.reregistered(state), reconcileState) 122 | 123 | case FrameworkMessageMessage(driver, executorId, slaveId, data) => 124 | (stateManager.frameworkMessage(executorId, slaveId, data)(state), reconcileState) 125 | 126 | case StatusUpdateMessage(driver, status) => 127 | // if reconciling, remove task from remaining tasks 128 | val newReconcileState = if (reconcileState.reconciling) { 129 | val filteredState = reconcileState.copy( 130 | reconcilingTasks = reconcileState.reconcilingTasks.filterNot(_.taskId == status.getTaskId.getValue)) 131 | checkReconciliation(filteredState, driver) 132 | } else { 133 | reconcileState 134 | } 135 | 136 | val (newState, killTasks) = stateManager.statusUpdate(status)(state) 137 | killTasks.map(driver.killTask) 138 | (newState, newReconcileState) 139 | 140 | case SlaveLostMessage(driver, slaveId) => 141 | // TODO: Do we need to kill this task in case slave comes back up??? We would want to kill it in that case 142 | val (newState, killTasks) = stateManager.slaveLost(slaveId)(state) 143 | killTasks.map(driver.killTask) 144 | (newState, reconcileState) 145 | 146 | case ExecutorLostMessage(driver, executorId, slaveId, status) => 147 | // TODO: Do we need to kill this task in case executor comes back up??? We would want to kill it in that case 148 | val (newState, killTasks) = stateManager.executorLost(executorId, slaveId, status)(state) 149 | killTasks.map(driver.killTask) 150 | (newState, reconcileState) 151 | 152 | case ErrorMessage(driver, message) => 153 | log.error(s"error message from mesos master: $message") 154 | (stateManager.error(message)(state), reconcileState) 155 | 156 | case ReconcileMessage(driver) => 157 | (state, startReconciliation(state, driver)) 158 | 159 | case x: CustomMessage => 160 | (stateManager.processCustomMessage(x)(state), reconcileState) 161 | } 162 | 163 | def go(state: T, reconcileState: ReconcileState): Process1[MesosMessage,Unit] = Process.receive1 { msg => 164 | log.debug(s"message received ${msg.getClass}") 165 | val (next, nextReconcile) = receive(state, reconcileState, msg) 166 | 167 | // after processing each message we need to compare tasks in new state against tasks in previous state 168 | // to determine what tasks need to be killed 169 | state.reconcileTasks.diff(next.reconcileTasks).foreach(t => 170 | msg.driver.killTask(Protos.TaskID.newBuilder().setValue(t.taskId).build)) 171 | 172 | Process.emit(()) ++ go(next, nextReconcile) 173 | } 174 | 175 | go(initialState, initialReconcileState) 176 | } 177 | 178 | // Update mutable reconciliation state and request all tasks to be reconciled 179 | // NOT THREAD SAFE!!! it should only be called within `processMessage` function 180 | private def startReconciliation(state: T, driver: SchedulerDriver): ReconcileState = { 181 | log.info(s"starting task reconciliation for all tasks") 182 | val reconcileState = ReconcileState(state) 183 | driver.reconcileTasks(Seq.empty[Protos.TaskStatus].asJavaCollection) 184 | reconcileState 185 | } 186 | 187 | // Check in max reconciliation wait time has elapsed, resend reconciliation request for remaining tasks 188 | // NOT THREAD SAFE!!! it should only be called within `processMessage` function 189 | private def checkReconciliation(reconcileState: ReconcileState, driver: SchedulerDriver): ReconcileState = { 190 | if (reconcileState.expired) { 191 | log.info(s"starting task reconciliation for remaining ${reconcileState.size} tasks") 192 | val newReconcileState = reconcileState.copy(reconciledAt = System.currentTimeMillis) 193 | driver.reconcileTasks(newReconcileState.getJavaCollection) 194 | newReconcileState 195 | } else { 196 | reconcileState 197 | } 198 | } 199 | 200 | override def resourceOffers(driver: SchedulerDriver, offers: java.util.List[Offer]): Unit = { 201 | // log.debug(s"received ${offers.size} offers") 202 | // TODO: Group offers by slave ID to be able to combine one big task in the resources of multiple offers 203 | inbound.enqueueAll(offers.asScala.map(o => ResourceOffersMessage(driver, o))).run 204 | } 205 | 206 | 207 | override def offerRescinded(driver: SchedulerDriver, offerId: OfferID): Unit = { 208 | log.info(s"offer [${offerId.getValue}] has been rescinded") 209 | inbound.enqueueOne(OfferRescindedMessage(driver, offerId)).run 210 | } 211 | 212 | override def frameworkMessage(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte]) 213 | : Unit = { 214 | log.info(s"frameworkMessage slave=${slaveId.getValue} executor=${executorId.getValue} data size=${data.length}") 215 | inbound.enqueueOne(FrameworkMessageMessage(driver, executorId, slaveId, data)).run 216 | } 217 | 218 | override def statusUpdate(driver: SchedulerDriver, status: TaskStatus): Unit = { 219 | log.info(s"statusUpdate ${status.getState} ${status.getTaskId.getValue}: ${status.getMessage}") 220 | inbound.enqueueOne(StatusUpdateMessage(driver, status)).run 221 | } 222 | 223 | override def slaveLost(driver: SchedulerDriver, slaveId: SlaveID): Unit = { 224 | log.info(s"slaveLost ${slaveId.getValue}") 225 | inbound.enqueueOne(SlaveLostMessage(driver, slaveId)).run 226 | } 227 | 228 | override def executorLost(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, status: Int): Unit = { 229 | log.info(s"executorLost slave=${slaveId.getValue} executor=${executorId.getValue} status=$status") 230 | inbound.enqueueOne(ExecutorLostMessage(driver, executorId, slaveId, status)).run 231 | } 232 | 233 | override def error(driver: SchedulerDriver, message: String): Unit = { 234 | log.error(s"Scheduler error: $message") 235 | inbound.enqueueOne(ErrorMessage(driver, message)).run 236 | } 237 | 238 | // When framework registers it is recommended to trigger reconciliation, sending a RegisteredMessage first 239 | // to allow state manager to initialize state before starting reconciliation. 240 | override def registered(driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo): Unit = { 241 | val host = masterInfo.getHostname 242 | val port = masterInfo.getPort 243 | val id = frameworkId.getValue 244 | log.info(s"Registered with Mesos master [$host:$port] frameworkID=$id") 245 | inbound.enqueueAll(Seq(RegisteredMessage(driver, frameworkId, masterInfo), ReconcileMessage(driver))).run 246 | } 247 | 248 | // When framework reregisters it is recommended to trigger reconciliation, sending a ReregisteredMessage first 249 | // to allow state manager to initialize state before starting reconciliation. 250 | override def reregistered(driver: SchedulerDriver, masterInfo: MasterInfo): Unit = { 251 | log.info(s"Reregistered with Mesos master ${masterInfo.getHostname}:${masterInfo.getPort}") 252 | inbound.enqueueAll(Seq(ReregisteredMessage(driver, masterInfo), ReconcileMessage(driver))).run 253 | } 254 | 255 | override def disconnected(driver: SchedulerDriver): Unit = { 256 | log.error(s"Disconnected from Mesos master...") 257 | } 258 | 259 | } 260 | 261 | object Scheduler { 262 | // this process is used to trigger reconciliation every few mins and it can be passed in to Scheduler 263 | def reconcileProcess(driver: SchedulerDriver, reconcileInterval: FiniteDuration): Process[Task, ReconcileMessage] = { 264 | time.awakeEvery(reconcileInterval)(defaultExecutor, timeOutScheduler) 265 | .map(_ => ReconcileMessage(driver)) 266 | } 267 | 268 | private def daemonThreads(name: String) = new ThreadFactory { 269 | def newThread(r: Runnable) = { 270 | val t = Executors.defaultThreadFactory.newThread(r) 271 | t.setDaemon(true) 272 | t.setName(name) 273 | t 274 | } 275 | } 276 | 277 | val defaultPool: ExecutorService = Executors.newFixedThreadPool(10, daemonThreads("scheduler")) 278 | val defaultExecutor: Strategy = Strategy.Executor(defaultPool) 279 | val timeOutScheduler = Executors.newScheduledThreadPool(10, daemonThreads("scheduler-sleep")) 280 | 281 | } 282 | 283 | object `package` { 284 | implicit val dontUseTheDefaultStrategy: scalaz.concurrent.Strategy = null 285 | implicit val theDefaultStrategyCausesProblems: scalaz.concurrent.Strategy = null 286 | } 287 | --------------------------------------------------------------------------------