├── project
├── build.properties
├── plugins.sbt
└── CentralRequirementsPlugin.scala
├── version.sbt
├── .gitignore
├── docs
└── img
│ └── logo.png
├── NOTICE
├── project.sbt
├── core
├── build.sbt
└── src
│ ├── main
│ └── scala
│ │ ├── messages.scala
│ │ ├── ReconcileState.scala
│ │ ├── OneTaskPerSlaveStateManager.scala
│ │ ├── SchedulerState.scala
│ │ └── Scheduler.scala
│ └── test
│ └── scala
│ ├── OneTaskPerSlaveStateManagerSpec.scala
│ ├── testimpl.scala
│ └── SchedulerSpec.scala
├── example
├── src
│ └── main
│ │ ├── resources
│ │ └── logback.xml
│ │ └── scala
│ │ ├── CustomMessageHandler.scala
│ │ ├── Main.scala
│ │ └── Service.scala
├── Dockerfile
└── build.sbt
├── .travis.yml
├── README.md
└── LICENSE
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version=0.13.12
2 |
--------------------------------------------------------------------------------
/version.sbt:
--------------------------------------------------------------------------------
1 | version in ThisBuild := "0.1.0-SNAPSHOT"
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea
3 | target
4 | log
5 | project/project/
6 | tmp
7 |
--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Verizon/ark/HEAD/docs/img/logo.png
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Mesos Scheduler
2 | Copyright (c) 2016 Verizon. All rights reserved.
3 |
4 | This project includes code developed at Verizon.
5 |
6 | Licensed under Apache License 2.0. See LICENSE for terms.
7 |
--------------------------------------------------------------------------------
/project.sbt:
--------------------------------------------------------------------------------
1 |
2 | organization in Global := "io.verizon.ark"
3 |
4 | scalaVersion in Global := "2.10.6"
5 |
6 | lazy val ark = project.in(file(".")).aggregate(core, example)
7 |
8 | lazy val core = project
9 |
10 | lazy val example = project.dependsOn(core % "test->test;compile->compile")
11 |
12 | enablePlugins(DisablePublishingPlugin)
13 |
--------------------------------------------------------------------------------
/core/build.sbt:
--------------------------------------------------------------------------------
1 |
2 | libraryDependencies ++= Seq(
3 | "io.verizon.journal" %% "core" % "2.3.15",
4 | "org.scalaz.stream" %% "scalaz-stream" % "0.7.3a",
5 | "org.apache.mesos" % "mesos" % "0.26.0"
6 | )
7 |
8 | scalacOptions in Test ~= (_.filterNot(Set("-Ywarn-value-discard")))
9 |
10 | ivyScala := ivyScala.value map { _.copy(overrideScalaVersion = true) }
11 |
--------------------------------------------------------------------------------
/example/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 |
2 | resolvers += Resolver.url(
3 | "tpolecat-sbt-plugin-releases",
4 | url("http://dl.bintray.com/content/tpolecat/sbt-plugin-releases"))(
5 | Resolver.ivyStylePatterns)
6 |
7 | addSbtPlugin("io.verizon.build" % "sbt-rig" % "1.1.20")
8 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
9 |
10 | // docs
11 | addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.8.1")
12 | addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.3")
13 | addSbtPlugin("org.tpolecat" % "tut-plugin" % "0.3.2")
14 | addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.2")
15 |
16 | scalacOptions += "-deprecation"
17 |
--------------------------------------------------------------------------------
/project/CentralRequirementsPlugin.scala:
--------------------------------------------------------------------------------
1 | package verizon.build
2 |
3 | import sbt._, Keys._
4 | import xerial.sbt.Sonatype.autoImport.sonatypeProfileName
5 |
6 | object CentralRequirementsPlugin extends AutoPlugin {
7 |
8 | override def trigger = allRequirements
9 |
10 | override def requires = RigPlugin
11 |
12 | override lazy val projectSettings = Seq(
13 | sonatypeProfileName := "io.verizon",
14 | pomExtra in Global := {
15 |
16 |
17 | rolandomanrique
18 | Rolando Manrique
19 | http://github.com/rolandomanrique
20 |
21 |
22 | stew
23 | Stew O'Connor
24 | http://github.com/stew
25 |
26 |
27 | },
28 | licenses := Seq("Apache-2.0" -> url("https://www.apache.org/licenses/LICENSE-2.0.html")),
29 | homepage := Some(url("http://verizon.github.io/ark/")),
30 | scmInfo := Some(ScmInfo(url("https://github.com/verizon/ark"),
31 | "git@github.com:verizon/ark.git"))
32 | )
33 | }
34 |
--------------------------------------------------------------------------------
/example/Dockerfile:
--------------------------------------------------------------------------------
1 | #: ----------------------------------------------------------------------------
2 | #: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | #:
4 | #: Licensed under the Apache License, Version 2.0 (the "License");
5 | #: you may not use this file except in compliance with the License.
6 | #: You may obtain a copy of the License at
7 | #:
8 | #: http://www.apache.org/licenses/LICENSE-2.0
9 | #:
10 | #: Unless required by applicable law or agreed to in writing, software
11 | #: distributed under the License is distributed on an "AS IS" BASIS,
12 | #: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | #: See the License for the specific language governing permissions and
14 | #: limitations under the License.
15 | #:
16 | #: ----------------------------------------------------------------------------
17 | FROM ubuntu:14.04
18 |
19 | RUN echo "deb http://repos.mesosphere.io/ubuntu/ trusty main" > /etc/apt/sources.list.d/mesosphere.list && \
20 | apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF && \
21 | apt-get -y update && \
22 | apt-get -y install mesos=0.25.* && \
23 | apt-get clean && rm -rf /var/lib/apt/lists/*
24 |
25 | VOLUME /opt/app
26 |
27 | WORKDIR /opt/app
28 |
29 | ENTRYPOINT java -jar example-assembly-*.jar
30 |
--------------------------------------------------------------------------------
/example/build.sbt:
--------------------------------------------------------------------------------
1 | import AssemblyKeys._
2 |
3 | assemblySettings
4 |
5 | artifact in (Compile, assembly) ~= { art =>
6 | art.copy(`classifier` = Some("assembly"))
7 | }
8 |
9 | addArtifact(artifact in (Compile, assembly), assembly)
10 |
11 | Keys.test in assembly := {}
12 |
13 | libraryDependencies ++= Seq(
14 | "org.http4s" %% "http4s-dsl" % "0.9.3",
15 | "org.http4s" %% "http4s-blaze-server" % "0.9.3",
16 | "org.http4s" %% "http4s-argonaut" % "0.9.3"
17 | )
18 |
19 | scalacOptions in Test ~= (_.filterNot(Set("-Ywarn-value-discard")))
20 |
21 | mergeStrategy in assembly <<= (mergeStrategy in assembly) { (old) => {
22 | case x if x.contains("journal") => MergeStrategy.first
23 | case x if x.contains("log4j") => MergeStrategy.discard
24 | case x if x.contains("logback.xml") => MergeStrategy.first
25 | case x if x.contains("BuildInfo") => MergeStrategy.first
26 | case x if x.contains("Pimped") => MergeStrategy.first
27 | case x if x.contains("package") => MergeStrategy.first
28 | case x if x.contains("ServiceConfig") => MergeStrategy.first
29 | case x if x.contains("JsonUtil") => MergeStrategy.first
30 | case x if x.contains("io.netty") => MergeStrategy.first
31 | case x if x.contains("ConfigLoader") => MergeStrategy.first
32 | case x => old(x)
33 | }}
34 |
35 | mainClass in run := Some("oncue.mesos.example.Main")
36 |
37 | mainClass in assembly := Some("oncue.mesos.example.Main")
38 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 |
3 | jdk: oraclejdk8
4 |
5 | scala: 2.10.5
6 |
7 | branches:
8 | only:
9 | - master
10 |
11 | before_script:
12 | - "if [ $TRAVIS_PULL_REQUEST = 'false' ]; then git checkout -qf $TRAVIS_BRANCH; fi"
13 |
14 | script:
15 | - |
16 | if [ $TRAVIS_PULL_REQUEST = 'false' ]; then
17 | if [ $RELEASE_ON_PUSH = 'false' ]; then
18 | sbt test coverageReport
19 | else
20 | sbt ++$TRAVIS_SCALA_VERSION 'release with-defaults'
21 | fi
22 | else
23 | sbt test coverageReport
24 | fi
25 | - find $HOME/.sbt -name "*.lock" | xargs rm
26 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
27 |
28 | cache:
29 | directories:
30 | - $HOME/.ivy2/cache
31 | - $HOME/.sbt/boot/scala-$TRAVIS_SCALA_VERSION
32 |
33 | after_success:
34 | - find $HOME/.sbt -name "*.lock" | xargs rm
35 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm
36 | - "bash <(curl -s https://codecov.io/bash) -r $TRAVIS_REPO_SLUG -t $CODECOV_TOKEN"
37 |
38 | env:
39 | global:
40 | - secure: "COUrXj1IJiY+yvmBABFW8+17lGs6ct8FuGt+XMgtkXO/ZXQYkffnqKjDyZYClCvgKDRyBNZiZ8HexXEcu3g/KKG9Qzx+/f4YcZEW6u/d/KxzWNHo7yuk3cX1p+mMCFrqWss7PjFeoKy4VhoZb8LmHVOBNgI6zrOB9KeyrUHmnbM="
41 | - secure: "bBqIDTVUUIHxj6ARiKBQ6MVbAVw+bRtF6OrPiyLYMhQrOEwLXHjvPb3sKowUAPJtv6CHruiFjZVK1mgYofiybps6PJPDfoRME/yCAbu8KNv1bWJLf58uvwWoH30dEoGliv0Lw90KxHxs7WFXkksOTvJi9B8G7WFK/4y6ACjCZew="
42 | - secure: "KmIhYTvl5jhRbZSwfOJPwyc0c84pPJdKD4DE7hx+DPookbClTfP0s5HB0m1wiM0jRs+GO5pNpasFMuyze0F5PAHbz5BZj312704QovUUWA9kEAnWHiTj3KxTN26hA8aCUhfzYwFhbRH/9ayw8HTzmQSuytlEmbtfOMa8vNaDMfE="
43 |
--------------------------------------------------------------------------------
/core/src/main/scala/messages.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import org.apache.mesos.Protos._
20 | import org.apache.mesos.SchedulerDriver
21 |
22 | // Model every possible message coming from mesos master
23 | sealed trait MesosMessage { def driver: SchedulerDriver }
24 | case class ResourceOffersMessage(override val driver: SchedulerDriver, offer: Offer)
25 | extends MesosMessage
26 | case class OfferRescindedMessage(override val driver: SchedulerDriver, offerId: OfferID)
27 | extends MesosMessage
28 | case class RegisteredMessage(override val driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo)
29 | extends MesosMessage
30 | case class ReregisteredMessage(override val driver: SchedulerDriver, masterInfo: MasterInfo)
31 | extends MesosMessage
32 | case class FrameworkMessageMessage(override val driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID,
33 | data: Array[Byte]) extends MesosMessage
34 | case class StatusUpdateMessage(override val driver: SchedulerDriver, status: TaskStatus)
35 | extends MesosMessage
36 | case class SlaveLostMessage(override val driver: SchedulerDriver, slaveId: SlaveID)
37 | extends MesosMessage
38 | case class ExecutorLostMessage(override val driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID,
39 | status: Int) extends MesosMessage
40 | case class ErrorMessage(override val driver: SchedulerDriver, message: String)
41 | extends MesosMessage
42 |
43 | // Users can extend CustomMessage to trigger state mutations through Scheduler.customEvents Process
44 | trait CustomMessage extends MesosMessage
45 | case class ReconcileMessage(override val driver: SchedulerDriver)
46 | extends CustomMessage
47 |
--------------------------------------------------------------------------------
/example/src/main/scala/CustomMessageHandler.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 | package example
19 |
20 | import org.apache.mesos.MesosSchedulerDriver
21 |
22 | import scalaz.\/
23 |
24 | case class SchedulerInfo(mesosMaster: String, frameworkId: String, frameworkName: String, reqcpu: Double, reqmem: Double)
25 | case class GetInfo(override val driver: MesosSchedulerDriver, cb: (Throwable \/ SchedulerInfo) => Unit) extends CustomMessage
26 | case class Blacklist(override val driver: MesosSchedulerDriver, slaveId: String) extends CustomMessage
27 | case class Unblacklist(override val driver: MesosSchedulerDriver, slaveId: String) extends CustomMessage
28 |
29 | trait CustomMessageHandler { self: OneTaskPerSlaveStateManager =>
30 |
31 | def master: String
32 |
33 | override def processCustomMessage(msg: CustomMessage)(state: OneTaskPerSlaveState): OneTaskPerSlaveState = msg match {
34 | // GetInfo message comes from HTTP service endpoints and provides a callback function that will be serialized into
35 | // the http response, we just need to create SchedulerInfo and pass it to the callback function
36 | case GetInfo(_, cb) =>
37 | val info: SchedulerInfo = SchedulerInfo(master, state.frameworkId, state.frameworkName, reqcpu, reqmem)
38 | cb(\/.right(info))
39 | state
40 |
41 | // Remove any tasks running and add slaveId to blacklist
42 | case Blacklist(_, slaveId) =>
43 | state.copy(reconcileTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId), blacklist = state.blacklist + slaveId)
44 |
45 | // Remove slaveId from blacklist
46 | case Unblacklist(_, slaveId) =>
47 | state.copy(blacklist = state.blacklist.filterNot(_ == slaveId))
48 |
49 | }
50 |
51 | }
--------------------------------------------------------------------------------
/core/src/main/scala/ReconcileState.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import java.util
20 | import org.apache.mesos.Protos
21 | import scala.collection.JavaConverters._
22 |
23 | // State for reconciliation algorithm (see http://mesos.apache.org/documentation/latest/reconciliation/)
24 | // tasks pending for reconciliation, no offers will be accepted until list is empty
25 | case class ReconcileState(reconciledAt: Long, reconcilingTasks: Set[ReconcileTaskStatus],
26 | minTaskReconciliationWait: Long = 5000, maxTaskReconciliationWait: Long = 30000) {
27 |
28 | val size = reconcilingTasks.size
29 | def minTimeElapsed: Boolean = System.currentTimeMillis() - reconciledAt > minTaskReconciliationWait
30 | def maxTimeElapsed: Boolean = System.currentTimeMillis() - reconciledAt > maxTaskReconciliationWait
31 | def reconciling: Boolean = reconcilingTasks.nonEmpty || !minTimeElapsed
32 | def expired: Boolean = reconcilingTasks.nonEmpty && maxTimeElapsed
33 | def getJavaCollection: util.Collection[Protos.TaskStatus] = reconcilingTasks.map(_.toTaskStatus).asJavaCollection
34 |
35 | }
36 |
37 | object ReconcileState {
38 | val empty = ReconcileState(0L, Set.empty)
39 | def apply(state: SchedulerState[_]): ReconcileState = ReconcileState(System.currentTimeMillis, state.reconcileTasks)
40 | }
41 |
42 | // Min info required to create TaskStatus for reconciliation
43 | case class ReconcileTaskStatus(taskId: String, slaveId: String) {
44 | def toTaskStatus: Protos.TaskStatus = Protos.TaskStatus.newBuilder()
45 | .setState(Protos.TaskState.TASK_RUNNING)
46 | .setTaskId(Protos.TaskID.newBuilder.setValue(taskId).build())
47 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId).build())
48 | .build()
49 | }
50 |
--------------------------------------------------------------------------------
/example/src/main/scala/Main.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 | package example
19 |
20 | import org.apache.mesos.{MesosSchedulerDriver, Protos}
21 | import org.http4s.server.blaze.BlazeBuilder
22 | import scala.concurrent.duration._
23 | import scala.language.postfixOps
24 |
25 | object Main extends scala.App {
26 | val mesosMaster = "zk://127.0.0.1:2181/mesos"
27 | val frameworkName = "sample-scheduler"
28 | val reqcpu = 0.1
29 | val reqmem = 64.0
30 | val cmd = Protos.CommandInfo.newBuilder.setShell(true)
31 | .setValue("""echo "SAMPLE SCHEDULER! sleeping for 120 secs" && sleep 120""")
32 |
33 | val reconciliationInterval = 1 minute
34 | val frameworkInfo = Protos.FrameworkInfo.newBuilder
35 | .setName(frameworkName)
36 | .setUser("")
37 | .build
38 |
39 | val initialState = OneTaskPerSlaveState(frameworkName)
40 | val manager = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd) with CustomMessageHandler {
41 | override val master = mesosMaster
42 | }
43 | val scheduler = new Scheduler(manager)
44 | val driver = new MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster)
45 |
46 | // Set up http service
47 | val (service, httpStream) = Service.setup(driver)
48 | val server = BlazeBuilder.bindHttp(9000, System.getenv("LIBPROCESS_IP")).mountService(service, "/").run
49 |
50 | sys addShutdownHook {
51 | server.shutdownNow()
52 | scheduler.shutdown(driver)
53 | }
54 |
55 | // Scheduler companion object provides a process
56 | // that triggers a reconcile message on a given interval
57 | val reconcileProcess = Scheduler.reconcileProcess(driver, reconciliationInterval)
58 |
59 | // When running the scheduler we can pass a list of scalaz stream
60 | // processes to send messages to the state manager, in this case
61 | // we only provide reconcile process
62 | scheduler.init(initialState, driver, Seq(reconcileProcess, httpStream)).run
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/example/src/main/scala/Service.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 | package example
19 |
20 | import org.apache.mesos.MesosSchedulerDriver
21 | import org.http4s.EntityEncoder
22 | import org.http4s.server.{HttpService, Router}
23 | import org.http4s.argonaut._
24 | import org.http4s.dsl._
25 | import argonaut._
26 | import Argonaut._
27 | import scala.concurrent.ExecutionContext
28 | import scala.language.postfixOps
29 | import scalaz.concurrent.Task
30 | import scalaz.stream.async
31 | import scalaz.stream.async.mutable.Queue
32 |
33 | object Service {
34 |
35 | implicit val infoEncoder: EntityEncoder[SchedulerInfo] = jsonEncoderOf[SchedulerInfo]
36 | implicit def infoJson: CodecJson[SchedulerInfo] = casecodec5(SchedulerInfo.apply, SchedulerInfo.unapply)(
37 | "mesosMaster", "frameworkId", "frameworkName", "reqcpu", "reqmem")
38 |
39 | def setup(driver: MesosSchedulerDriver) = {
40 | val inbound = async.boundedQueue[CustomMessage](100)(Scheduler.defaultExecutor)
41 | val stream = inbound.dequeue
42 | (service(inbound, driver), stream)
43 | }
44 |
45 | def service(inbound: Queue[CustomMessage], driver: MesosSchedulerDriver)(
46 | implicit executionContext: ExecutionContext = ExecutionContext.global): HttpService =
47 | Router("" -> rootService(inbound, driver))
48 |
49 | def rootService(inbound: Queue[CustomMessage], driver: MesosSchedulerDriver)(
50 | implicit executionContext: ExecutionContext) = HttpService {
51 |
52 | case _ -> Root => MethodNotAllowed()
53 |
54 | case GET -> Root / "info" => {
55 | // When request comes we only block until message makes it to the queue
56 | // After that is just waiting for state manager to call callback function
57 | val res: Task[SchedulerInfo] = Task.async[SchedulerInfo](cb => inbound.enqueueOne(GetInfo(driver, cb)).run)
58 | Ok(res)
59 | }
60 |
61 | case POST -> Root / "blacklist" / slaveId => {
62 | inbound.enqueueOne(Blacklist(driver, slaveId)).run
63 | Ok(s"Requested Blacklist $slaveId")
64 | }
65 |
66 | case DELETE -> Root / "blacklist" / slaveId => {
67 | inbound.enqueueOne(Unblacklist(driver, slaveId)).run
68 | Ok(s"Requested Unblacklist $slaveId")
69 | }
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/core/src/test/scala/OneTaskPerSlaveStateManagerSpec.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import org.scalatest._
20 | import scala.collection.JavaConverters._
21 | import org.apache.mesos.Protos
22 |
23 | class OneTaskPerSlaveStateManagerSpec extends FlatSpec with MustMatchers {
24 |
25 | def cmdBuilder(cmd:String) = Protos.CommandInfo.newBuilder.setShell(true).setValue(cmd)
26 |
27 | behavior of "OneTaskPerSlaveStateManager"
28 | it should "accept any offer with enough resources and run the correct command" in {
29 | val frameworkName = "some-framework"
30 | val reqcpu = 1.0
31 | val reqmem = 1024.0
32 | val cmd = cmdBuilder("java -jar my-assembly.jar")
33 | val state = OneTaskPerSlaveState(frameworkName)
34 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd)
35 |
36 | val t = mgr.processOffer(2.0, 2048.0, "someslaveid")(state)._2.head
37 | t.getCommand.getValue must equal(cmd.getValue)
38 | t.getResourcesList.asScala.foreach(x => x.getName match {
39 | case "cpus" => x.getScalar.getValue must equal(reqcpu)
40 | case "mem" => x.getScalar.getValue must equal(reqmem)
41 | })
42 | }
43 |
44 | it should "reject any offer in a slave where a task is already running" in {
45 | val frameworkName = "some-framework"
46 | val reqcpu = 1.0
47 | val reqmem = 1024.0
48 | val cmd = cmdBuilder("java -jar my-assembly.jar")
49 | val state1 = OneTaskPerSlaveState(frameworkName)
50 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd)
51 |
52 | val (state2, s1) = mgr.processOffer(2.0, 2048.0, "someslaveid")(state1)
53 | s1 must not equal Seq.empty
54 | val (state3, s2) = mgr.processOffer(2.0, 2048.0, "someslaveid")(state2)
55 | s2 must equal(Seq.empty)
56 | }
57 |
58 | it should "list reconcile tasks correctly" in {
59 | val frameworkName = "some-framework"
60 | val reqcpu = 1.0
61 | val reqmem = 1024.0
62 | val cmd = cmdBuilder("java -jar my-assembly.jar")
63 | val state1 = OneTaskPerSlaveState(frameworkName)
64 | val mgr = new OneTaskPerSlaveStateManager(reqcpu, reqmem, cmd)
65 | val (state2, s1) = mgr.processOffer(2.0, 2048.0, "someslaveid1")(state1)
66 | s1 must not equal Seq.empty
67 | val (state3, s2) = mgr.processOffer(2.0, 2048.0, "someslaveid2")(state2)
68 | s2 must not equal Seq.empty
69 | val (state4, s3) = mgr.processOffer(2.0, 2048.0, "someslaveid3")(state3)
70 | s3 must not equal Seq.empty
71 |
72 | val exp = ReconcileTaskStatus(state1.taskId, "someslaveid1") ::
73 | ReconcileTaskStatus(state2.taskId, "someslaveid2") ::
74 | ReconcileTaskStatus(state3.taskId, "someslaveid3") ::
75 | Nil
76 | state4.reconcileTasks.toList.sortBy(_.slaveId) must equal(exp)
77 | }
78 | }
--------------------------------------------------------------------------------
/core/src/test/scala/testimpl.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import java.util.concurrent.atomic.AtomicInteger
20 |
21 | import org.apache.mesos.Protos._
22 | import org.apache.mesos.SchedulerDriver
23 | import scala.collection.JavaConversions._
24 | import java.util
25 |
26 | import scala.collection.mutable.ListBuffer
27 |
28 | class DriverImpl extends SchedulerDriver {
29 |
30 | val declinedOffers = new ListBuffer[OfferID]()
31 | val acceptedOffers = new ListBuffer[OfferID]()
32 | val launchedTasks = new ListBuffer[TaskInfo]()
33 | val reconciledCount = new AtomicInteger(0)
34 | def reconciled = reconciledCount.intValue > 0
35 |
36 | override def declineOffer(offerId: OfferID): Status = {
37 | declinedOffers += offerId
38 | Status.DRIVER_RUNNING
39 | }
40 |
41 | override def launchTasks(offerIds: util.Collection[OfferID], tasks: util.Collection[TaskInfo]): Status = {
42 | acceptedOffers ++= offerIds
43 | launchedTasks ++= tasks
44 | Status.DRIVER_RUNNING
45 | }
46 |
47 | // Mesos 0.23.x
48 | override def acceptOffers(offerIds: util.Collection[OfferID], ops: util.Collection[Offer.Operation],
49 | filters: Filters): Status = Status.DRIVER_RUNNING
50 |
51 | override def killTask(taskId: TaskID): Status = Status.DRIVER_RUNNING
52 |
53 | override def reconcileTasks(statuses: util.Collection[TaskStatus]): Status = {
54 | reconciledCount.getAndIncrement()
55 | Status.DRIVER_RUNNING
56 | }
57 |
58 | override def suppressOffers(): Status = Status.DRIVER_RUNNING
59 |
60 | override def reviveOffers(): Status = Status.DRIVER_RUNNING
61 |
62 | override def declineOffer(offerId: OfferID, filters: Filters): Status = Status.DRIVER_RUNNING
63 |
64 | override def launchTasks(offerIds: util.Collection[OfferID], tasks: util.Collection[TaskInfo],
65 | filters: Filters): Status = Status.DRIVER_RUNNING
66 |
67 | override def launchTasks(offerId: OfferID, tasks: util.Collection[TaskInfo], filters: Filters)
68 | : Status = Status.DRIVER_RUNNING
69 |
70 | override def launchTasks(offerId: OfferID, tasks: util.Collection[TaskInfo])
71 | : Status = Status.DRIVER_RUNNING
72 |
73 | override def requestResources(requests: util.Collection[Request])
74 | : Status = Status.DRIVER_RUNNING
75 |
76 | override def sendFrameworkMessage(executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte])
77 | : Status = Status.DRIVER_RUNNING
78 |
79 | override def acknowledgeStatusUpdate(ackStatus: TaskStatus)
80 | : Status = Status.DRIVER_RUNNING
81 |
82 | override def abort(): Status = Status.DRIVER_STOPPED
83 |
84 | override def join(): Status = Status.DRIVER_RUNNING
85 |
86 | override def run(): Status = Status.DRIVER_RUNNING
87 |
88 | override def start(): Status = Status.DRIVER_RUNNING
89 |
90 | override def stop(): Status = Status.DRIVER_STOPPED
91 |
92 | override def stop(failover: Boolean): Status = Status.DRIVER_STOPPED
93 |
94 | }
95 |
96 | case class StateImpl(override val reconcileTasks: Set[ReconcileTaskStatus]) extends SchedulerState[StateImpl]
97 |
98 | class StateManagerImpl(driver: SchedulerDriver, frameworkID: FrameworkID, masterInfo: MasterInfo)
99 | extends SchedulerStateManager[StateImpl] {
100 | val received = new ListBuffer[MesosMessage]
101 |
102 | override def processOffer(offer: Offer)(state: StateImpl): (StateImpl, Seq[TaskInfo.Builder]) = {
103 | received += ResourceOffersMessage(driver,offer)
104 | (state, Seq.empty)
105 | }
106 |
107 | override def statusUpdate(status: TaskStatus)(state: StateImpl): (StateImpl, Option[TaskID]) = {
108 | received += StatusUpdateMessage(driver, status)
109 | (state, None)
110 | }
111 |
112 | override def registered(id: String)(state: StateImpl): StateImpl = {
113 | received += RegisteredMessage(driver, frameworkID, masterInfo)
114 | state
115 | }
116 |
117 | override def processCustomMessage(msg: CustomMessage)(state: StateImpl): StateImpl = {
118 | received += msg
119 | state
120 | }
121 |
122 | }
--------------------------------------------------------------------------------
/core/src/main/scala/OneTaskPerSlaveStateManager.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import journal.Logger
20 | import org.apache.mesos.Protos
21 |
22 | object OneTaskPerSlaveState {
23 | def apply(frameworkName: String): OneTaskPerSlaveState =
24 | OneTaskPerSlaveState(Set.empty, Set.empty, frameworkName, "not-registered-yet", 0)
25 | }
26 |
27 | case class OneTaskPerSlaveState(override val reconcileTasks: Set[ReconcileTaskStatus], blacklist: Set[String],
28 | frameworkName: String, frameworkId: String, nextId: Int) extends SchedulerState[OneTaskPerSlaveState] {
29 | val taskId = s"$frameworkName-$nextId-$frameworkId"
30 | }
31 |
32 | /**
33 | * Sample mesos scheduler state manager implementation that runs one task per slave
34 | */
35 | class OneTaskPerSlaveStateManager(val reqcpu: Double, val reqmem: Double, val cmd: Protos.CommandInfo.Builder)
36 | extends SimpleSchedulerStateManager[OneTaskPerSlaveState] {
37 |
38 | private val log = Logger[this.type]
39 |
40 | override def processOffer(cpus: Double, mem: Double, slaveId: String)(state: OneTaskPerSlaveState)
41 | : (OneTaskPerSlaveState, Seq[Protos.TaskInfo.Builder]) = {
42 | if (reqcpu <= cpus && reqmem <= mem && !state.blacklist.contains(slaveId) &&
43 | !state.reconcileTasks.exists(_.slaveId == slaveId)) {
44 | log.debug(s"accepting offer on $slaveId")
45 | val newTasks = state.reconcileTasks + ReconcileTaskStatus(state.taskId, slaveId)
46 | val newState = state.copy(nextId = state.nextId+1, reconcileTasks = newTasks)
47 | (newState, Seq(makeTask(state.taskId, reqcpu, reqmem, cmd)))
48 | } else {
49 | (state, Seq.empty)
50 | }
51 | }
52 |
53 | override def registered(frameworkId: String)(state: OneTaskPerSlaveState): OneTaskPerSlaveState = {
54 | state.copy(frameworkId = frameworkId)
55 | }
56 |
57 | // return false if task should be killed, called when TASK_RUNNING | TASK_STAGING
58 | override def taskRunning(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState)
59 | : (OneTaskPerSlaveState, Boolean) = {
60 | if (!state.reconcileTasks.exists(_.slaveId == slaveId)) {
61 | val newTasks = state.reconcileTasks + ReconcileTaskStatus(taskId, slaveId)
62 | val newState = state.copy(nextId = state.nextId+1, reconcileTasks = newTasks)
63 | (newState, true)
64 | } else {
65 | (state, true)
66 | }
67 | }
68 |
69 | // called when TASK_FINISHED
70 | override def taskFinished(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState)
71 | : OneTaskPerSlaveState = {
72 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId)
73 | state.copy(reconcileTasks = newTasks)
74 | }
75 |
76 | // called when TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED
77 | override def taskFailed(taskId: String, executorId: String, slaveId: String)(state: OneTaskPerSlaveState)
78 | : OneTaskPerSlaveState = {
79 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId)
80 | state.copy(reconcileTasks = newTasks)
81 | }
82 |
83 | // Return Seq[String] with task ids running in the executor
84 | override def executorLost(executorId: String, slaveId: String, status: Int)(state: OneTaskPerSlaveState)
85 | : (OneTaskPerSlaveState, Seq[String]) = {
86 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId)
87 | (state.copy(reconcileTasks = newTasks), Seq.empty)
88 | }
89 |
90 | // Return Seq[String] with task ids running in the slave
91 | override def slaveLost(slaveId: String)(state: OneTaskPerSlaveState)
92 | : (OneTaskPerSlaveState, Seq[String]) = {
93 | val newTasks = state.reconcileTasks.filterNot(_.slaveId == slaveId)
94 | (state.copy(reconcileTasks = newTasks), Seq.empty)
95 | }
96 |
97 |
98 | def makeTask(id: String, cpus: Double, mem: Double, cmd: Protos.CommandInfo.Builder): Protos.TaskInfo.Builder = {
99 | Protos.TaskInfo.newBuilder
100 | .setTaskId(Protos.TaskID.newBuilder.setValue(id))
101 | .setName(id)
102 | .addResources(scalarResource("cpus", cpus))
103 | .addResources(scalarResource("mem", mem))
104 | .setCommand(cmd)
105 | }
106 |
107 | protected def scalarResource(name: String, value: Double): Protos.Resource.Builder =
108 | Protos.Resource.newBuilder
109 | .setType(Protos.Value.Type.SCALAR)
110 | .setName(name)
111 | .setScalar(Protos.Value.Scalar.newBuilder.setValue(value))
112 |
113 | }
--------------------------------------------------------------------------------
/core/src/main/scala/SchedulerState.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import org.apache.mesos.Protos.TaskState._
20 | import org.apache.mesos.Protos._
21 | import org.apache.mesos.Protos
22 |
23 | import scala.collection.JavaConverters._
24 |
25 |
26 | trait SchedulerState[T] { self: T =>
27 | // Return list of tasks that the framework thinks is running
28 | def reconcileTasks: Set[ReconcileTaskStatus]
29 | }
30 |
31 | case class SimpleSchedulerState(override val reconcileTasks: Set[ReconcileTaskStatus]) extends SchedulerState[SimpleSchedulerState]
32 |
33 | /**
34 | * Very similar interface to org.apache.mesos.Scheduler but completely thread safe as each call is queued as a message
35 | * and processed one at a time to call the corresponding function in this interface. Each function receives the
36 | * current scheduler state and it is expected to return the new state after processing each message.
37 | */
38 | trait SchedulerStateManager[T <: SchedulerState[T]] {
39 |
40 | // Return tasks to run
41 | def processOffer(offer: Offer)(state: T): (T, Seq[TaskInfo.Builder])
42 |
43 | // Return Some(TaskID) if we need to kill this task
44 | def statusUpdate(status: TaskStatus)(state: T): (T, Option[TaskID]) = (state, None)
45 |
46 | // Return task ids running on the lost slave
47 | def slaveLost(slaveId: SlaveID)(state: T): (T, Seq[TaskID]) = (state, Seq.empty)
48 |
49 | // Return task ids running on the lost executor
50 | def executorLost(executorId: ExecutorID, slaveId: SlaveID, status: Int)(state: T): (T, Seq[TaskID]) = (state, Seq.empty)
51 |
52 | // Registered also has MasterInfo but its never used so ignoring in this case
53 | def registered(frameworkId: String)(state: T): T = state
54 | def reregistered(state: T): T = state
55 |
56 | // Handle custom messages
57 | def processCustomMessage(msg: CustomMessage)(state: T): T = state
58 |
59 | /*** Non-required methods, less common used ***/
60 | def rescindOffer(offerId: OfferID)(state: T): T = state
61 | def frameworkMessage(executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte])(state: T): T = state
62 | def error(message: String)(state: T): T = state
63 |
64 | }
65 |
66 | /**
67 | * SimpleSchedulerState is a very simple implementation of SchedulerState that hides
68 | * org.apache.mesos.Protos as much as possible in favor of native scala types.
69 | */
70 | trait SimpleSchedulerStateManager[T <: SchedulerState[T]] extends SchedulerStateManager[T] {
71 |
72 | // called when new offers come in
73 | def processOffer(cpus: Double, mem: Double, slaveId: String)(state: T): (T, Seq[TaskInfo.Builder])
74 |
75 | // simple impl of process offer that only extracts cpus/mem resources and slave ID
76 | override def processOffer(offer: Offer)(state: T): (T, Seq[TaskInfo.Builder]) = {
77 | val res = scalarResources(offer)
78 | processOffer(res.getOrElse("cpus", 0.0), res.getOrElse("mem", 0.0), offer.getSlaveId.getValue)(state: T)
79 | }
80 |
81 | // return false if task should be killed, called when TASK_RUNNING | TASK_STAGING
82 | def taskRunning(taskId: String, executorId: String, slaveId: String)(state: T): (T, Boolean)
83 |
84 | // called when TASK_FINISHED
85 | def taskFinished(taskId: String, executorId: String, slaveId: String)(state: T): T
86 |
87 | // called when TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED
88 | def taskFailed(taskId: String, executorId: String, slaveId: String)(state: T): T
89 |
90 | // Simpler implementation of statusUpdate that opaques Protos.TaskState from user implementation
91 | override def statusUpdate(status: TaskStatus)(state: T): (T, Option[TaskID]) = {
92 | val tid = status.getTaskId
93 | val eid = status.getExecutorId
94 | val sid = status.getSlaveId
95 | status.getState match {
96 | case TASK_RUNNING | TASK_STAGING | TASK_STARTING =>
97 | val res = taskRunning(tid.getValue, eid.getValue, sid.getValue)(state)
98 | if (!res._2)
99 | (res._1, Option(tid))
100 | else
101 | (res._1, None)
102 |
103 | case TASK_FINISHED =>
104 | val res = taskFinished(tid.getValue, eid.getValue, sid.getValue)(state)
105 | (res, None)
106 |
107 | case TASK_FAILED | TASK_LOST | TASK_ERROR | TASK_KILLED =>
108 | val res = taskFailed(tid.getValue, eid.getValue, sid.getValue)(state)
109 | (res, None)
110 | }
111 | }
112 |
113 | // Simpler implementation of executorLost that opaques Protos._ from user implementation
114 | override def executorLost(executorId: ExecutorID, slaveId: SlaveID, status: Int)(state: T): (T, Seq[TaskID]) = {
115 | val res = executorLost(executorId.getValue, slaveId.getValue, status)(state: T)
116 | (res._1, res._2.map(TaskID.newBuilder.setValue(_).build))
117 | }
118 |
119 | // Return Seq[String] with task ids running in the executor
120 | def executorLost(executorId: String, slaveId: String, status: Int)(state: T): (T, Seq[String])
121 |
122 | // Simpler implementation of slaveLost that opaques Protos._ from user implementation
123 | override def slaveLost(slaveId: SlaveID)(state: T): (T, Seq[TaskID]) = {
124 | val res = slaveLost(slaveId.getValue)(state)
125 | (res._1, res._2.map(TaskID.newBuilder.setValue(_).build))
126 | }
127 |
128 | // Return Seq[String] with task ids running in the slave
129 | def slaveLost(slaveId: String)(state: T): (T, Seq[String])
130 |
131 | def scalarResources(offer: Offer): Map[String,Double] = {
132 | offer.getResourcesList.asScala.toSet
133 | // Filter scalar resources
134 | .filter(x => x.getType == Protos.Value.Type.SCALAR)
135 | // Extract resource name and scalar value
136 | .map(x=>(x.getName, x.getScalar.getValue))
137 | // Group by name
138 | .groupBy(_._1)
139 | // Add up values
140 | .mapValues(x => x.map(_._2).sum)
141 | }
142 |
143 | }
144 |
145 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Ark
2 |
3 | 
4 |
5 | [](https://travis-ci.org/Verizon/ark)
6 | [](https://maven-badges.herokuapp.com/maven-central/io.verizon.ark/core_2.10)
7 | [](https://codecov.io/gh/Verizon/ark)
8 |
9 | This library provides a functional scala implementation of `org.apache.mesos.Scheduler` interface provided by [Mesos java API](http://mesos.apache.org/api/latest/java/).
10 |
11 | The goal of this library is to ease development of mesos schedulers by providing out-of-the-box implementations of common operational requirements of a framework, allowing developers to focus on domain logic implementation of task state transitions.
12 |
13 | Features:
14 |
15 | * Pure functional implementation of mesos scheduler tasks state.
16 | * Scalaz stream to queue all messages sent to the framework (from mesos master or custom user defined messages) to be processed one at a time making it completely thread safe.
17 | * Recurring reconcialiation based on [Mesos Reconciliation Algorithm](http://mesos.apache.org/documentation/latest/reconciliation/).
18 | * *TODO:* Re-registration on mesos master failures.
19 | * *TODO:* High-Availability mode and leader election.
20 |
21 | From the current state of the project there is a clear path to implement missing features above by enhancing `oncue.mesos.Scheduler.processMessage` function.
22 |
23 |
24 | ## Messages
25 |
26 | The core of this Mesos Scheduler implementation is handled by a scalaz async message queue. When Mesos calls any of the functions provided by the `Scheduler` interface, the scheduler creates one or many `oncue.mesos.MesosMessage` and enqueues them in the scalaz stream.
27 |
28 | ```scala
29 | sealed trait MesosMessage { def driver: org.apache.mesos.SchedulerDriver }
30 | ```
31 |
32 | The main scalaz stream is created inside `oncue.mesos.Scheduler` to handle calls from Mesos to the Scheduler interface. Users can provide any number of `scalaz.stream.Process[scalaz.concurrent.Task, CustomMessage]` when initializing the `Scheduler`. These custom streams get merged into the internal scalaz stream. This way the user can trigger any `CustomMesssage` to the scheduler which is handled by the same `processMessage` function that handles messages from Mesos.
33 |
34 | ```scala
35 | trait CustomMessage extends MesosMessage
36 | ```
37 |
38 | ## Reconciliation
39 |
40 | Mesos has very good documentation on how to implement the [Reconciliation Algorithm](http://mesos.apache.org/documentation/latest/reconciliation/), since most frameworks need to perform reconciliation this was the first feature to address in a common Mesos scheduler library.
41 |
42 | Reconciliation is triggered by sending a `ReconcileMessage` to the stream:
43 |
44 | ```scala
45 | case class ReconcileMessage(override val driver: SchedulerDriver) extends CustomMessage
46 | ```
47 |
48 | `oncue.mesos.Scheduler` companion object provides a convenient function to initialize a timed reconciliation stream:
49 |
50 | ```scala
51 | def reconcileProcess(driver: SchedulerDriver, reconcileInterval: FiniteDuration): Process[Task, ReconcileMessage] = {
52 | time.awakeEvery(reconcileInterval)(defaultExecutor, timeOutScheduler)
53 | .map(_ => ReconcileMessage(driver))
54 | }
55 | ```
56 |
57 | The user can create a reconcile process by calling the function above and passing it to the scheduler `init` function, this will trigger reconcialiation every `reconcileInterval` and all offers will be declined until reconciliation is over.
58 |
59 | ```scala
60 | val reconciliationInterval = 1 hour
61 | val customStreams = Seq( Scheduler.reconcileProcess(driver, reconciliationInterval) )
62 | scheduler.init(state, driver, customStreams).run
63 | ```
64 |
65 | *TODO:* The wait time to reconcile all tasks is currently fixed, Mesos recommends to use truncated exponential back off to "avoid a snowball effect in the case of the driver or master being backed up".
66 |
67 |
68 | ## Usage
69 |
70 | A full implementation of Mesos Scheduler would be required to implement `oncue.mesos.SchedulerState` and `oncue.mesos.SchedulerStateManager` traits and run the scheduler like this:
71 |
72 | ```scala
73 | // implement state and state manager
74 | case class MyState( ... ) extends SchedulerState
75 | class MyStateManager extends SchedulerState[MyState] { ... }
76 |
77 | // initialize state and state manager
78 | val initialState = MyState( ... )
79 | val stateManager = new MyStateManager( ... )
80 |
81 | // define framework info
82 | val frameworkInfo = Protos.FrameworkInfo.newBuilder
83 | .setName("my-framework")
84 | .setOtherFrameworkattributes( ... )
85 | .build
86 |
87 | // initialize scheduler and mesos driver
88 | val scheduler = new oncue.mesos.Scheduler(stateManager)
89 | val driver = new org.apache.mesos.MesosSchedulerDriver(scheduler, frameworkInfo, mesosMaster)
90 |
91 | // shutdown scheduler on exit
92 | sys addShutdownHook {
93 | scheduler.shutdown(driver)
94 | }
95 |
96 | // Seq[Process[Task,CustomMessage]] pass custom state mutation messages
97 | // Scheduler.reconcileProcess triggers reconciliation every "reconciliationInterval"
98 | val reconciliationInterval = 1 hour
99 | val customStreams = Seq(Scheduler.reconcileProcess(driver, reconciliationInterval))
100 |
101 | // run scheduler (blocking)
102 | scheduler.init(initialState, driver, customStreams).run
103 | ```
104 |
105 | ### Example
106 |
107 | The provided example implementation creates a scheduler that triggers the provided task on every slave in the cluster.
108 | This example also uses [http4s](http://http4s.org/) to set up REST endpoints to query current scheduler state by
109 | sending custom messages to the queue. User can query scheduler info and add or remove slaves from a blacklist.
110 |
111 | Running example module on a local mesos cluster with 2 slaves using docker-machine on mac (see
112 | https://github.com/mesosphere/docker-containers/tree/master/mesos):
113 |
114 | 1. Run ZK:
115 |
116 | ```bash
117 | docker run -d --net=host netflixoss/exhibitor:1.5.2
118 | ```
119 |
120 | 1. Run master:
121 |
122 | ```bash
123 | docker run -d --net=host \
124 | -e LIBPROCESS_IP=$(docker-machine ip) \
125 | -e HOSTNAME=$(docker-machine ip) \
126 | -e MESOS_PORT=5050 \
127 | -e MESOS_ZK=zk://127.0.0.1:2181/mesos \
128 | -e MESOS_QUORUM=1 \
129 | -e MESOS_REGISTRY=in_memory \
130 | -e MESOS_LOG_DIR=/var/log/mesos \
131 | -e MESOS_WORK_DIR=/var/tmp/mesos \
132 | -v "$(pwd)/log/mesos:/var/log/mesos" \
133 | -v "$(pwd)/tmp/mesos:/var/tmp/mesos" \
134 | mesosphere/mesos-master:0.25.0-0.2.70.ubuntu1404
135 | ```
136 |
137 | 1. Run slaves, notice `MESOS_PORT` and mount points change for `/var/log/mesos` and `/var/tmp/mesos`:
138 |
139 | ```bash
140 | docker run -d --net=host --privileged \
141 | -e LIBPROCESS_IP=$(docker-machine ip) \
142 | -e HOSTNAME=$(docker-machine ip) \
143 | -e MESOS_PORT=5051 \
144 | -e MESOS_MASTER=zk://127.0.0.1:2181/mesos \
145 | -e MESOS_SWITCH_USER=0 \
146 | -e MESOS_CONTAINERIZERS=docker,mesos \
147 | -e MESOS_LOG_DIR=/var/log/mesos \
148 | -e MESOS_WORK_DIR=/var/tmp/mesos \
149 | -v "$(pwd)/log/mesos1:/var/log/mesos" \
150 | -v "$(pwd)/tmp/mesos1:/var/tmp/mesos" \
151 | -v /var/run/docker.sock:/var/run/docker.sock \
152 | -v /cgroup:/cgroup \
153 | -v /sys:/sys \
154 | -v /usr/local/bin/docker:/usr/local/bin/docker \
155 | mesosphere/mesos-slave:0.25.0-0.2.70.ubuntu1404
156 |
157 | docker run -d --net=host --privileged \
158 | -e LIBPROCESS_IP=$(docker-machine ip) \
159 | -e HOSTNAME=$(docker-machine ip) \
160 | -e MESOS_PORT=5052 \
161 | -e MESOS_MASTER=zk://127.0.0.1:2181/mesos \
162 | -e MESOS_SWITCH_USER=0 \
163 | -e MESOS_CONTAINERIZERS=docker,mesos \
164 | -e MESOS_LOG_DIR=/var/log/mesos \
165 | -e MESOS_WORK_DIR=/var/tmp/mesos \
166 | -v "$(pwd)/log/mesos2:/var/log/mesos" \
167 | -v "$(pwd)/tmp/mesos2:/var/tmp/mesos" \
168 | -v /var/run/docker.sock:/var/run/docker.sock \
169 | -v /cgroup:/cgroup \
170 | -v /sys:/sys \
171 | -v /usr/local/bin/docker:/usr/local/bin/docker \
172 | mesosphere/mesos-slave:0.25.0-0.2.70.ubuntu1404
173 | ```
174 |
175 | 1. Build scheduler assembly jar
176 |
177 | ```bash
178 | sbt "project example" assembly
179 | ```
180 | 1. Build scheduler container from example/Dockerfile:
181 |
182 | ```bash
183 | docker build -t mysched example/
184 | ```
185 |
186 | 1. Run scheduler container interactively:
187 |
188 | ```bash
189 | docker run --rm --net=host -it \
190 | -e LIBPROCESS_IP=$(docker-machine ip) \
191 | -v $(pwd)/example/target/scala-2.10:/opt/app \
192 | mysched
193 | ```
194 |
195 |
196 |
--------------------------------------------------------------------------------
/core/src/test/scala/SchedulerSpec.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import java.util.UUID
20 |
21 | import journal.Logger
22 | import org.apache.mesos.Protos.{MasterInfo, FrameworkID, Offer}
23 | import org.apache.mesos.{SchedulerDriver, Protos}
24 | import org.scalatest._
25 | import scala.language.postfixOps
26 | import scala.concurrent.duration._
27 | import scala.collection.JavaConverters._
28 | import scalaz.concurrent.Task
29 | import scalaz.stream.async
30 |
31 |
32 | class SchedulerSpec extends FlatSpec with MustMatchers {
33 |
34 | private val log = Logger[this.type]
35 |
36 | behavior of "Scheduler"
37 |
38 | it should "process messages" in {
39 | val driver = new DriverImpl
40 | val expFwId = fwId()
41 | val expOffer = offer(expFwId)
42 | val expmi = masterInfo()
43 | val expStatus = taskStatus()
44 |
45 | // rebuild MesosMessage and add to received list
46 | val st = StateImpl(Set.empty)
47 | val mgr = new StateManagerImpl(driver, expFwId, expmi)
48 | val scheduler = new Scheduler(mgr)
49 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ }
50 | Thread.sleep(500)
51 | scheduler.registered(driver, expFwId, expmi)
52 | Thread.sleep(6000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined
53 | driver.reconciled must equal(true)
54 | scheduler.resourceOffers(driver, List(expOffer).asJava)
55 | scheduler.statusUpdate(driver, expStatus)
56 | Thread.sleep(1000)
57 | scheduler.shutdown(driver)
58 |
59 | val expReceived = RegisteredMessage(driver, expFwId, expmi) ::
60 | ResourceOffersMessage(driver, expOffer) ::
61 | StatusUpdateMessage(driver, expStatus) ::
62 | Nil
63 | mgr.received.toList must equal(expReceived)
64 |
65 | }
66 |
67 | it should "process custom messages" in {
68 | case class MyCustomMessage(driver: SchedulerDriver, id: String) extends CustomMessage
69 | val driver = new DriverImpl
70 | val expFwId = fwId()
71 | val expmi = masterInfo()
72 | val expCustMsg = MyCustomMessage(driver, genid)
73 |
74 | val customEventsQueue = async.boundedQueue[MyCustomMessage](100)(Scheduler.defaultExecutor)
75 | val customEvents = customEventsQueue.dequeue
76 |
77 | // rebuild MesosMessage and add to received list
78 | val st = StateImpl(Set.empty)
79 | val mgr = new StateManagerImpl(driver, expFwId, expmi)
80 | val scheduler = new Scheduler(mgr)
81 | scheduler.init(st, driver, Seq(customEvents)).runAsync { case _ => /* noop */ }
82 | Thread.sleep(500)
83 | scheduler.registered(driver, expFwId, expmi)
84 | Thread.sleep(6000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined
85 | driver.reconciled must equal(true)
86 | customEventsQueue.enqueueOne(expCustMsg).run
87 | Thread.sleep(1000)
88 | scheduler.shutdown(driver)
89 |
90 | val expReceived = RegisteredMessage(driver, expFwId, expmi) :: expCustMsg :: Nil
91 | mgr.received.toList must equal(expReceived)
92 |
93 | }
94 |
95 | it should "decline all offers when reconciling and remove task from pending when status arrives" in {
96 | val driver = new DriverImpl
97 | val expFwId = fwId()
98 | val declinedOffer = offer(expFwId)
99 | val passedOffer = offer(expFwId)
100 | val expmi = masterInfo()
101 | val expTaskId = genid
102 | val expSlaveId = genid
103 | val expStatus = taskStatus(taskId = expTaskId, slaveId = expSlaveId)
104 |
105 | // rebuild MesosMessage and add to received list
106 | val st = StateImpl(Set(ReconcileTaskStatus(expTaskId, expSlaveId)))
107 | val mgr = new StateManagerImpl(driver, expFwId, expmi)
108 | val scheduler = new Scheduler(mgr)
109 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ }
110 | Thread.sleep(500)
111 | scheduler.registered(driver, expFwId, expmi)
112 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined
113 | driver.reconciled must equal(true)
114 |
115 | // offers should be declined when reconciling and it should not even make it to the state manager
116 | scheduler.resourceOffers(driver, List(declinedOffer).asJava)
117 | scheduler.resourceOffers(driver, List(declinedOffer).asJava)
118 | scheduler.resourceOffers(driver, List(declinedOffer).asJava)
119 | scheduler.resourceOffers(driver, List(declinedOffer).asJava)
120 | Thread.sleep(500)
121 |
122 | val declinedOffers = List(declinedOffer.getId, declinedOffer.getId, declinedOffer.getId, declinedOffer.getId)
123 | driver.declinedOffers.toList must equal(declinedOffers)
124 | val expReceived = RegisteredMessage(driver, expFwId, expmi) :: Nil
125 | mgr.received.toList must equal(expReceived)
126 |
127 | // once status arrives for pending tasks, offers should make it to the state manager
128 | scheduler.statusUpdate(driver, expStatus)
129 |
130 | scheduler.resourceOffers(driver, List(passedOffer).asJava)
131 | Thread.sleep(1000)
132 | scheduler.shutdown(driver)
133 |
134 | val expReceived2 = expReceived ++ (
135 | StatusUpdateMessage(driver, expStatus) ::
136 | ResourceOffersMessage(driver, passedOffer) ::
137 | Nil)
138 | mgr.received.toList must equal(expReceived2)
139 | driver.declinedOffers.toList must equal(declinedOffers ++ List(passedOffer.getId))
140 | }
141 |
142 | it should "start reconciliation every reconcileInterval" in {
143 | val reconciliationInterval = 1 second
144 | val driver = new DriverImpl
145 | val expFwId = fwId()
146 | val expmi = masterInfo()
147 |
148 | val reconcileEvents = Scheduler.reconcileProcess(driver, reconciliationInterval)
149 |
150 | // rebuild MesosMessage and add to received list
151 | val st = StateImpl(Set.empty)
152 | val mgr = new StateManagerImpl(driver, expFwId, expmi)
153 | val scheduler = new Scheduler(mgr)
154 | scheduler.init(st, driver, Seq(reconcileEvents)).runAsync { case _ => /* noop */ }
155 | Thread.sleep(500)
156 | scheduler.registered(driver, expFwId, expmi)
157 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined
158 | driver.reconciled must equal(true)
159 | Thread.sleep(5000)
160 | scheduler.shutdown(driver)
161 |
162 | // slept 5 seconds so make sure we got reconcile message at least 4 times
163 | driver.reconciledCount.intValue must be >= 4
164 | }
165 |
166 | it should "run one big task by combining smaller offers" in pendingUntilFixed {
167 | val driver = new DriverImpl
168 | val expFwId = fwId()
169 | val slaveId = genid
170 | val expOffer1 = offer(expFwId, slaveId = slaveId) // 3 cpus 4000 mem
171 | val expOffer2 = offer(expFwId, slaveId = slaveId) // 3 cpus 4000 mem
172 | val expmi = masterInfo()
173 | val expLaunchedTask = Protos.TaskInfo.newBuilder
174 | .setTaskId(Protos.TaskID.newBuilder.setValue("myTaskId"))
175 | .setName("myTaskName")
176 | .addResources(scalarResource("cpus", 5))
177 | .addResources(scalarResource("mem", 6000))
178 | .setCommand(Protos.CommandInfo.newBuilder.setShell(true).setValue("some command here"))
179 |
180 | val st = StateImpl(Set.empty)
181 | val mgr = new StateManagerImpl(driver, expFwId, expmi) {
182 | override def processOffer(offer: Offer)(state: StateImpl) = {
183 | if (state.reconcileTasks.isEmpty) {
184 | val newState = state.copy(reconcileTasks = state.reconcileTasks + ReconcileTaskStatus("myTaskId", ""))
185 | (newState, Seq(expLaunchedTask))
186 | } else {
187 | (state, Seq.empty)
188 | }
189 | }
190 | }
191 |
192 | val scheduler = new Scheduler(mgr)
193 | scheduler.init(st, driver, Seq.empty).runAsync { case _ => /* noop */ }
194 | Thread.sleep(500)
195 | scheduler.registered(driver, expFwId, expmi)
196 | Thread.sleep(5000) // TODO: need to sleep ReconcileState.minTaskReconciliationWait or offers will be declined
197 | driver.reconciled must equal(true)
198 | scheduler.resourceOffers(driver, List(expOffer1, expOffer2).asJava)
199 | Thread.sleep(1000)
200 | scheduler.shutdown(driver)
201 |
202 | val expLaunchedTasks = List(expLaunchedTask.setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId)).build)
203 | val expAcceptedOffers = List(expOffer1.getId, expOffer2.getId)
204 | driver.launchedTasks.toList must equal(expLaunchedTasks)
205 |
206 | // TODO: This will fail until we we combine offers
207 | driver.acceptedOffers.toList must equal(expAcceptedOffers)
208 |
209 | }
210 |
211 | private def genid = UUID.randomUUID().toString
212 |
213 | private def fwId(id: String = genid) = Protos.FrameworkID.newBuilder.setValue(id).build
214 |
215 | private def scalarResource(name: String, value: Double): Protos.Resource = {
216 | Protos.Resource.newBuilder
217 | .setName(name)
218 | .setType(Protos.Value.Type.SCALAR)
219 | .setScalar(Protos.Value.Scalar.newBuilder().setValue(value))
220 | .build
221 | }
222 |
223 | private def offer(fwid: Protos.FrameworkID, id: String = genid, slaveId: String = genid): Protos.Offer = {
224 | val resources = Seq(
225 | scalarResource("cpus", 3),
226 | scalarResource("mem", 4000),
227 | scalarResource("disk", 1000000)
228 | ).asJava
229 |
230 | Protos.Offer.newBuilder
231 | .setId(Protos.OfferID.newBuilder.setValue(id))
232 | .setFrameworkId(fwid)
233 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId))
234 | .setHostname("hostname")
235 | .addAllResources(resources)
236 | .build
237 | }
238 |
239 | private def masterInfo(id: String = genid): Protos.MasterInfo = {
240 | Protos.MasterInfo.newBuilder
241 | .setHostname("localhost")
242 | .setIp(123)
243 | .setId(id)
244 | .setPort(5050)
245 | .setVersion("0.25.0")
246 | .build
247 |
248 | }
249 |
250 | private def taskStatus(state: Protos.TaskState = Protos.TaskState.TASK_RUNNING, taskId: String = genid,
251 | slaveId: String = genid) = {
252 | Protos.TaskStatus.newBuilder
253 | .setTaskId(Protos.TaskID.newBuilder.setValue(taskId).build)
254 | .setSlaveId(Protos.SlaveID.newBuilder.setValue(slaveId))
255 | .setExecutorId(Protos.ExecutorID.newBuilder.setValue(genid))
256 | .setState(state)
257 | .build
258 | }
259 |
260 | }
261 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner].
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/core/src/main/scala/Scheduler.scala:
--------------------------------------------------------------------------------
1 | //: ----------------------------------------------------------------------------
2 | //: Copyright (C) 2016 Verizon. All Rights Reserved.
3 | //:
4 | //: Licensed under the Apache License, Version 2.0 (the "License");
5 | //: you may not use this file except in compliance with the License.
6 | //: You may obtain a copy of the License at
7 | //:
8 | //: http://www.apache.org/licenses/LICENSE-2.0
9 | //:
10 | //: Unless required by applicable law or agreed to in writing, software
11 | //: distributed under the License is distributed on an "AS IS" BASIS,
12 | //: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | //: See the License for the specific language governing permissions and
14 | //: limitations under the License.
15 | //:
16 | //: ----------------------------------------------------------------------------
17 | package ark
18 |
19 | import java.util.Collections
20 |
21 | import journal.Logger
22 | import org.apache.mesos
23 | import org.apache.mesos.Protos._
24 | import org.apache.mesos.{Protos, SchedulerDriver}
25 |
26 | import scala.collection.JavaConverters._
27 | import scala.language.postfixOps
28 | import scala.concurrent.duration._
29 | import scalaz.Nondeterminism
30 | import scalaz.concurrent.{Strategy, Task}
31 | import scalaz.stream.{time, Process, Process1, async}
32 | import java.util.concurrent.{Executors, ExecutorService, ThreadFactory}
33 |
34 | /**
35 | * Thread safe implementation of org.apache.mesos.Scheduler based on scalaz streams. Every call to the Scheduler
36 | * interface is enqueued and process one at a time.
37 | *
38 | * User is required to implement and provide oncue.mesos.SchedulerStateManager. Users are expected to implement
39 | * CustomMessage's to trigger state mutation events in their state managers. CustomMessage's can be passed into
40 | * processMessage by providing scalaz.stream.Processes in the `customEvents` param passed to `init` function.
41 | *
42 | * This class also implements reconciliation algorithm as an additional message in the queue. User can pass
43 | * Scheduler.reconcileProcess to `init` function to trigger reconciliation in periodic intervals or define custom
44 | * reconciliation triggers. User must also provide list of tasks that it expects to be running, all offers will
45 | * be declined until state for all tasks has been received.
46 | */
47 | class Scheduler[T <: SchedulerState[T]](stateManager: SchedulerStateManager[T]) extends mesos.Scheduler {
48 |
49 | private val log = Logger[this.type]
50 | private val inbound = async.boundedQueue[MesosMessage](100)(Scheduler.defaultExecutor)
51 |
52 | /**
53 | * @param state initial state
54 | * @param driver SchedulerDriver
55 | * @param customEvents Seq[ Process[ Task, CustomMessage ] ] Processes that generate custom messages to trigger
56 | * state mutation events in the manager. These need to managed my SchedulerStateManager
57 | * implementation in the `processCustomMessage(msg: CustomMessage)` function.
58 | */
59 | def init(state: T, driver: SchedulerDriver, customEvents: Seq[Process[Task,CustomMessage]]): Task[Unit] = {
60 | // merge all inbound processes and define pipe into Process1
61 | val inboundProcess: Process[Task, MesosMessage] =
62 | customEvents.foldLeft(inbound.dequeue)((a,b) => a.merge(b)(Scheduler.defaultExecutor))
63 | val sunkProcess: Process[Task,Unit] = inboundProcess pipe processMessage(state, ReconcileState.empty)
64 |
65 | // prepare to run mesos driver
66 | val driverTask = Task.fork(Task.delay{
67 | driver.run()
68 | ()
69 | })(Scheduler.defaultPool)
70 |
71 | // prepare to run process
72 | val streamTask = Task.fork(sunkProcess.run)(Scheduler.defaultPool)
73 |
74 | // return composed Task to be run by caller
75 | Nondeterminism[Task].gatherUnordered(Seq(driverTask, streamTask)).map(_ => ())
76 | }
77 |
78 | /**
79 | * graceful shutdown
80 | * @param driver SchedulerDriver
81 | * @param failover see SchedulerDriver.stop docs
82 | */
83 | def shutdown(driver: SchedulerDriver, failover: Boolean = false): Unit = {
84 | log.info(s"stopping driver...")
85 | driver.stop(failover)
86 | log.debug(s"stopping stream process...")
87 | inbound.kill.run
88 | }
89 |
90 | def processMessage(initialState: T, initialReconcileState: ReconcileState): Process1[MesosMessage,Unit] = {
91 |
92 | def receive(state: T, reconcileState: ReconcileState, msg: MesosMessage): (T, ReconcileState) = msg match {
93 | case ResourceOffersMessage(driver, offer) =>
94 | if (!reconcileState.reconciling) {
95 | stateManager.processOffer(offer)(state) match {
96 | case (newState, s) if s.nonEmpty =>
97 | val x: Seq[TaskInfo] = s.map(_.setSlaveId(offer.getSlaveId).build())
98 | log.info(s"accepting offer ${offer.getId.getValue}@${offer.getSlaveId.getValue} " +
99 | s"launching tasks: ${x.map(_.getTaskId.getValue)}")
100 | driver.launchTasks(Collections.singleton(offer.getId), x.asJava)
101 | (newState, reconcileState)
102 | case (newState, _) =>
103 | log.debug(s"declining offer ${offer.getId.getValue}@${offer.getSlaveId.getValue}")
104 | driver.declineOffer(offer.getId)
105 | (newState, reconcileState)
106 | }
107 | } else {
108 | log.info(s"declining all offers while reconciling task status, ${reconcileState.size} remaining")
109 | driver.declineOffer(offer.getId)
110 | val newReconcileState = checkReconciliation(reconcileState, driver)
111 | (state, newReconcileState)
112 | }
113 |
114 | case OfferRescindedMessage(driver, offerId) =>
115 | (stateManager.rescindOffer(offerId)(state), reconcileState)
116 |
117 | case RegisteredMessage(driver, frameworkId, masterInfo) =>
118 | (stateManager.registered(frameworkId.getValue)(state), reconcileState)
119 |
120 | case ReregisteredMessage(driver, masterInfo) =>
121 | (stateManager.reregistered(state), reconcileState)
122 |
123 | case FrameworkMessageMessage(driver, executorId, slaveId, data) =>
124 | (stateManager.frameworkMessage(executorId, slaveId, data)(state), reconcileState)
125 |
126 | case StatusUpdateMessage(driver, status) =>
127 | // if reconciling, remove task from remaining tasks
128 | val newReconcileState = if (reconcileState.reconciling) {
129 | val filteredState = reconcileState.copy(
130 | reconcilingTasks = reconcileState.reconcilingTasks.filterNot(_.taskId == status.getTaskId.getValue))
131 | checkReconciliation(filteredState, driver)
132 | } else {
133 | reconcileState
134 | }
135 |
136 | val (newState, killTasks) = stateManager.statusUpdate(status)(state)
137 | killTasks.map(driver.killTask)
138 | (newState, newReconcileState)
139 |
140 | case SlaveLostMessage(driver, slaveId) =>
141 | // TODO: Do we need to kill this task in case slave comes back up??? We would want to kill it in that case
142 | val (newState, killTasks) = stateManager.slaveLost(slaveId)(state)
143 | killTasks.map(driver.killTask)
144 | (newState, reconcileState)
145 |
146 | case ExecutorLostMessage(driver, executorId, slaveId, status) =>
147 | // TODO: Do we need to kill this task in case executor comes back up??? We would want to kill it in that case
148 | val (newState, killTasks) = stateManager.executorLost(executorId, slaveId, status)(state)
149 | killTasks.map(driver.killTask)
150 | (newState, reconcileState)
151 |
152 | case ErrorMessage(driver, message) =>
153 | log.error(s"error message from mesos master: $message")
154 | (stateManager.error(message)(state), reconcileState)
155 |
156 | case ReconcileMessage(driver) =>
157 | (state, startReconciliation(state, driver))
158 |
159 | case x: CustomMessage =>
160 | (stateManager.processCustomMessage(x)(state), reconcileState)
161 | }
162 |
163 | def go(state: T, reconcileState: ReconcileState): Process1[MesosMessage,Unit] = Process.receive1 { msg =>
164 | log.debug(s"message received ${msg.getClass}")
165 | val (next, nextReconcile) = receive(state, reconcileState, msg)
166 |
167 | // after processing each message we need to compare tasks in new state against tasks in previous state
168 | // to determine what tasks need to be killed
169 | state.reconcileTasks.diff(next.reconcileTasks).foreach(t =>
170 | msg.driver.killTask(Protos.TaskID.newBuilder().setValue(t.taskId).build))
171 |
172 | Process.emit(()) ++ go(next, nextReconcile)
173 | }
174 |
175 | go(initialState, initialReconcileState)
176 | }
177 |
178 | // Update mutable reconciliation state and request all tasks to be reconciled
179 | // NOT THREAD SAFE!!! it should only be called within `processMessage` function
180 | private def startReconciliation(state: T, driver: SchedulerDriver): ReconcileState = {
181 | log.info(s"starting task reconciliation for all tasks")
182 | val reconcileState = ReconcileState(state)
183 | driver.reconcileTasks(Seq.empty[Protos.TaskStatus].asJavaCollection)
184 | reconcileState
185 | }
186 |
187 | // Check in max reconciliation wait time has elapsed, resend reconciliation request for remaining tasks
188 | // NOT THREAD SAFE!!! it should only be called within `processMessage` function
189 | private def checkReconciliation(reconcileState: ReconcileState, driver: SchedulerDriver): ReconcileState = {
190 | if (reconcileState.expired) {
191 | log.info(s"starting task reconciliation for remaining ${reconcileState.size} tasks")
192 | val newReconcileState = reconcileState.copy(reconciledAt = System.currentTimeMillis)
193 | driver.reconcileTasks(newReconcileState.getJavaCollection)
194 | newReconcileState
195 | } else {
196 | reconcileState
197 | }
198 | }
199 |
200 | override def resourceOffers(driver: SchedulerDriver, offers: java.util.List[Offer]): Unit = {
201 | // log.debug(s"received ${offers.size} offers")
202 | // TODO: Group offers by slave ID to be able to combine one big task in the resources of multiple offers
203 | inbound.enqueueAll(offers.asScala.map(o => ResourceOffersMessage(driver, o))).run
204 | }
205 |
206 |
207 | override def offerRescinded(driver: SchedulerDriver, offerId: OfferID): Unit = {
208 | log.info(s"offer [${offerId.getValue}] has been rescinded")
209 | inbound.enqueueOne(OfferRescindedMessage(driver, offerId)).run
210 | }
211 |
212 | override def frameworkMessage(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte])
213 | : Unit = {
214 | log.info(s"frameworkMessage slave=${slaveId.getValue} executor=${executorId.getValue} data size=${data.length}")
215 | inbound.enqueueOne(FrameworkMessageMessage(driver, executorId, slaveId, data)).run
216 | }
217 |
218 | override def statusUpdate(driver: SchedulerDriver, status: TaskStatus): Unit = {
219 | log.info(s"statusUpdate ${status.getState} ${status.getTaskId.getValue}: ${status.getMessage}")
220 | inbound.enqueueOne(StatusUpdateMessage(driver, status)).run
221 | }
222 |
223 | override def slaveLost(driver: SchedulerDriver, slaveId: SlaveID): Unit = {
224 | log.info(s"slaveLost ${slaveId.getValue}")
225 | inbound.enqueueOne(SlaveLostMessage(driver, slaveId)).run
226 | }
227 |
228 | override def executorLost(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, status: Int): Unit = {
229 | log.info(s"executorLost slave=${slaveId.getValue} executor=${executorId.getValue} status=$status")
230 | inbound.enqueueOne(ExecutorLostMessage(driver, executorId, slaveId, status)).run
231 | }
232 |
233 | override def error(driver: SchedulerDriver, message: String): Unit = {
234 | log.error(s"Scheduler error: $message")
235 | inbound.enqueueOne(ErrorMessage(driver, message)).run
236 | }
237 |
238 | // When framework registers it is recommended to trigger reconciliation, sending a RegisteredMessage first
239 | // to allow state manager to initialize state before starting reconciliation.
240 | override def registered(driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo): Unit = {
241 | val host = masterInfo.getHostname
242 | val port = masterInfo.getPort
243 | val id = frameworkId.getValue
244 | log.info(s"Registered with Mesos master [$host:$port] frameworkID=$id")
245 | inbound.enqueueAll(Seq(RegisteredMessage(driver, frameworkId, masterInfo), ReconcileMessage(driver))).run
246 | }
247 |
248 | // When framework reregisters it is recommended to trigger reconciliation, sending a ReregisteredMessage first
249 | // to allow state manager to initialize state before starting reconciliation.
250 | override def reregistered(driver: SchedulerDriver, masterInfo: MasterInfo): Unit = {
251 | log.info(s"Reregistered with Mesos master ${masterInfo.getHostname}:${masterInfo.getPort}")
252 | inbound.enqueueAll(Seq(ReregisteredMessage(driver, masterInfo), ReconcileMessage(driver))).run
253 | }
254 |
255 | override def disconnected(driver: SchedulerDriver): Unit = {
256 | log.error(s"Disconnected from Mesos master...")
257 | }
258 |
259 | }
260 |
261 | object Scheduler {
262 | // this process is used to trigger reconciliation every few mins and it can be passed in to Scheduler
263 | def reconcileProcess(driver: SchedulerDriver, reconcileInterval: FiniteDuration): Process[Task, ReconcileMessage] = {
264 | time.awakeEvery(reconcileInterval)(defaultExecutor, timeOutScheduler)
265 | .map(_ => ReconcileMessage(driver))
266 | }
267 |
268 | private def daemonThreads(name: String) = new ThreadFactory {
269 | def newThread(r: Runnable) = {
270 | val t = Executors.defaultThreadFactory.newThread(r)
271 | t.setDaemon(true)
272 | t.setName(name)
273 | t
274 | }
275 | }
276 |
277 | val defaultPool: ExecutorService = Executors.newFixedThreadPool(10, daemonThreads("scheduler"))
278 | val defaultExecutor: Strategy = Strategy.Executor(defaultPool)
279 | val timeOutScheduler = Executors.newScheduledThreadPool(10, daemonThreads("scheduler-sleep"))
280 |
281 | }
282 |
283 | object `package` {
284 | implicit val dontUseTheDefaultStrategy: scalaz.concurrent.Strategy = null
285 | implicit val theDefaultStrategyCausesProblems: scalaz.concurrent.Strategy = null
286 | }
287 |
--------------------------------------------------------------------------------