├── .dir-locals.el ├── .gitignore ├── .gitmodules ├── .projectile ├── LICENSE ├── Makefile ├── README.md ├── TODO.md ├── bin └── update-version ├── build.sbt ├── docs ├── launch-node-interaction.pu └── launch-node-interaction.svg ├── marathon-submodule └── src │ └── main │ └── scala │ └── mesosphere │ └── marathon │ ├── Exception.scala │ └── core │ ├── externalvolume │ └── ExternalVolumes.scala │ └── launcher │ └── impl │ ├── ReservationLabels.scala │ └── TaskLabels.scala ├── project ├── plugins.sbt └── version.properties ├── src ├── main │ ├── resources │ │ ├── application.conf │ │ ├── deployment-config.conf │ │ ├── logback.xml │ │ ├── reference.conf │ │ └── ui │ │ │ ├── index.html │ │ │ └── js │ └── scala │ │ └── com │ │ └── vivint │ │ └── ceph │ │ ├── AppConfiguration.scala │ │ ├── Behaviors.scala │ │ ├── ClusterSecretStore.scala │ │ ├── ConfigStore.scala │ │ ├── Constants.scala │ │ ├── FrameworkActor.scala │ │ ├── FrameworkIdStore.scala │ │ ├── JobBehavior.scala │ │ ├── JobFSM.scala │ │ ├── JobStore.scala │ │ ├── JobsState.scala │ │ ├── Main.scala │ │ ├── OfferMatchFactory.scala │ │ ├── OfferOperations.scala │ │ ├── PendingOffer.scala │ │ ├── ProtoHelpers.scala │ │ ├── ReleaseStore.scala │ │ ├── ReservationReaperActor.scala │ │ ├── SameThreadExecutionContext.scala │ │ ├── TaskActor.scala │ │ ├── api │ │ ├── ApiMarshalling.scala │ │ ├── HttpService.scala │ │ └── model │ │ │ ├── ApiPlayJsonFormats.scala │ │ │ └── ErrorResponse.scala │ │ ├── kvstore │ │ ├── CrashingKVStore.scala │ │ ├── FileStore.scala │ │ ├── KVStore.scala │ │ ├── MemStore.scala │ │ └── ZookeeperStore.scala │ │ ├── lib │ │ ├── Enum.scala │ │ ├── FutureHelpers.scala │ │ ├── FutureMonitor.scala │ │ ├── PortMatcher.scala │ │ ├── TgzHelper.scala │ │ └── package.scala │ │ ├── model │ │ ├── CephConfig.scala │ │ ├── ClusterSecrets.scala │ │ ├── Job.scala │ │ ├── JobRole.scala │ │ ├── Location.scala │ │ ├── PersistentState.scala │ │ ├── PlayJsonFormats.scala │ │ ├── ReservationRelease.scala │ │ ├── RunState.scala │ │ ├── TaskState.scala │ │ └── TaskStatus.scala │ │ ├── orchestrator │ │ ├── Bootstrap.scala │ │ ├── Orchestrator.scala │ │ └── OrchestratorFSM.scala │ │ └── views │ │ └── ConfigTemplates.scala └── test │ ├── resources │ └── application.conf │ └── scala │ └── com │ └── vivint │ └── ceph │ ├── ConfigStoreTest.scala │ ├── IntegrationTest.scala │ ├── JobBehaviorTest.scala │ ├── MesosTestHelper.scala │ ├── ReservationReaperActorTest.scala │ ├── Workbench.scala │ ├── lib │ ├── CephActorTest.scala │ ├── PortMatcherTest.scala │ ├── TestHelpers.scala │ └── TgzHelperTest.scala │ ├── model │ ├── CephConfigTest.scala │ ├── PlayJsonFormatsTest.scala │ └── TaskTest.scala │ └── views │ └── ConfigTemplatesTest.scala └── ui ├── Caddyfile ├── index.html └── src └── main └── scala └── cephui ├── ReactApp.scala ├── components ├── Footer.scala ├── TopNav.scala └── items │ ├── Item1Data.scala │ ├── Item2Data.scala │ └── ItemsInfo.scala ├── css ├── AppCSS.scala └── GlobalStyle.scala ├── elements └── ReactBootstrapComponent.scala ├── lib └── Http.scala ├── models ├── DanglingReservation.scala ├── ErrorResponse.scala ├── Job.scala ├── JsFormats.scala └── Menu.scala ├── pages ├── ConfigPage.scala ├── DanglingReservationsPage.scala └── HomePage.scala └── routes └── AppRouter.scala /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((nil . ((fill-column . 120) 2 | (column-enforce-column . 120) 3 | (scala-test:main-file-format . "%s/src/main/scala/%s%s.scala") 4 | (scala-test:test-file-format . "%s/src/test/scala/%s%sTest.scala") 5 | (mode . column-enforce)))) 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | .ensime* 3 | .project 4 | .idea 5 | *.iml 6 | .classpath 7 | scratch 8 | *.DS_Store 9 | target 10 | libmesos.dylib 11 | data/ 12 | 13 | ui/js/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "marathon-submodule/marathon"] 2 | path = marathon-submodule/marathon 3 | url = https://github.com/timcharper/marathon.git 4 | -------------------------------------------------------------------------------- /.projectile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivint-smarthome/ceph-on-mesos/89b84b5fd1e6bdba6825695463762ac1dd85480d/.projectile -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: 2 | .SECONDARY: 3 | 4 | README.md: project/version.properties 5 | bin/update-version 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ceph on Mesos 2 | 3 | Ceph on Mesos is working Mesos Framework with a boring name. It can be used to reliably deploy and manage a persistent, 4 | Ceph cluster. 5 | 6 | Some highlights: 7 | 8 | - Orchestrated bootstrap and deployment. 9 | - Reserves and launches Monitors and OSDs on reserved resources. This keeps other tasks from taking resources away from 10 | an OSD when it restarts. 11 | - Low dependencies. 12 | - Launch OSD containers in a "paused" state to support manual intervention. 13 | 14 | For a list of planned features (and progress thus far), see the [TODO.md](./TODO.md) file in this repository. 15 | 16 | # Introductory video 17 | 18 | To help break the ice and make it easier to learn about the Ceph on Mesos framework, an introductionory video is available 19 | for your viewing pleasure: 20 | 21 | [![Deploying Ceph on Mesos](http://img.youtube.com/vi/X5xqVwohhHA/0.jpg)](https://youtu.be/X5xqVwohhHA "Deploying Ceph on Mesos") 22 | 23 | # Requirements 24 | 25 | - Mesos 26 | - Docker (the ceph-docker image is used) 27 | - Zookeeper (mesos master discovery, exclusive framework lock, state store) 28 | - XFS formatted mount-disk resources for 29 | [OSDs](http://docs.ceph.com/docs/jewel/rados/configuration/filesystem-recommendations/). 30 | 31 | # Deployment 32 | 33 | If you have the JVM installed on some or all of your slaves, you can launch Ceph-on-Mesos using the Mesos 34 | Containerizer. You may also package the artifact up in a container, if you choose. It is easiest to use host 35 | networking, although so long as the framework has an IP which the Mesos masters can directly reach, you may use any 36 | networking abstraction you like. 37 | 38 | ## Step 1: Deploy the framework 39 | 40 | Here is an example of a Marathon job which deploys the artifact directly using the Mesos containerizer: 41 | 42 | ``` 43 | { 44 | "id": "/frameworks/ceph", 45 | "cmd": "cd ceph-on-mesos-*\nbin/ceph-on-mesos --api-port=$PORT0", 46 | "cpus": 0.1, 47 | "mem": 512, 48 | "disk": 0, 49 | "instances": 1, 50 | "env": { 51 | "MESOS_ROLE": "ceph", 52 | "MESOS_PRINCIPAL": "ceph", 53 | "MESOS_SECRET": "your-principal-super-secret", 54 | "PUBLIC_NETWORK": "172.0.0.0/24", 55 | "CLUSTER_NETWORK": "172.0.0.0/24", 56 | "ZOOKEEPER": "172.0.0.11:2181,172.0.0.12:2181,172.0.0.13:2181,172.0.0.14:2181,172.0.0.15:2181/", 57 | "API_HOST": "0.0.0.0", 58 | "MESOS_MASTER": "zk://172.0.0.11:2181,172.0.0.12:2181,172.0.0.13:2181,172.0.0.14:2181,172.0.0.15:2181/mesos" 59 | }, 60 | "uris": ["https://dl.bintray.com/vivint-smarthome/ceph-on-mesos/ceph-on-mesos-0.2.11.tgz"], 61 | "portDefinitions": [{"protocol": "tcp", "name": "api"}], 62 | "healthChecks": [ 63 | { 64 | "path": "/v1/tasks", 65 | "protocol": "HTTP", 66 | "portIndex": 0, 67 | "gracePeriodSeconds": 300, 68 | "intervalSeconds": 60, 69 | "timeoutSeconds": 20, 70 | "maxConsecutiveFailures": 3, 71 | "ignoreHttp1xx": false 72 | } 73 | ], 74 | "upgradeStrategy": { 75 | "minimumHealthCapacity": 0, 76 | "maximumOverCapacity": 0 77 | }, 78 | "labels": { 79 | "MARATHON_SINGLE_INSTANCE_APP": "true" 80 | } 81 | } 82 | ``` 83 | 84 | ## Step 2: Update the default configuration to launch monitors 85 | 86 | Once the framework is deployed, it create populate a default configuration file to ZooKeeper in the node 87 | `/ceph-on-mesos/ceph.conf`. You can either use the web-UI (point your browser to the marathon assigned host and port if 88 | you used the job specification above), or use your favorite zookeeper editor (Exhibitor, 89 | [zk-web](https://github.com/qiuxiafei/zk-web), etc.) to edit it. Set the resource requirements and update the mon count 90 | to 3 (as seen below). Save it. 91 | 92 | ``` 93 | deployment { 94 | mon { 95 | count = 3 96 | cpus = 1 97 | mem = 1024.0 98 | 99 | # # The type of multi-disk volume to use; valid values are root, path, and mount. 100 | disk_type = root 101 | 102 | # # Size of persistent volume. In the case of diskType = mount, the minimum size of disk to allocate. 103 | disk = 40960 104 | } 105 | 106 | ... 107 | ``` 108 | 109 | You can find the default configuration in this repository at 110 | [src/main/resources/deployment-config.conf](https://github.com/vivint-smarthome/ceph-on-mesos/blob/master/src/main/resources/deployment-config.conf). 111 | 112 | The framework detects changes to this configuration and will automatically deploy them. Don't panic! It will only add 113 | nodes in response to a config change, never remove them. Watch the framework logs (go to the framework's mesos sandbox, 114 | open stdout). If there are issues matching resources, you will see errors in the log. By default, `ceph-on-mesos` will 115 | only deploy one monitor per host. 116 | 117 | Once the monitors are deploy, you can join any of the docker images launched, and run `ceph -s`. If successful, you'll 118 | see a status which says "3 mons at ...". If no docker containers launched, check the Ceph on Mesos stdout output for 119 | errors. You can watch the status of jobs by opening the UI (again, if you followed the Marathon config above, then it is 120 | the port assigned to the task); alternatively, you can issue a request directly to the REST API to `GET /v1/jobs`. 121 | 122 | ## Step 3: Update the default configuration to launch OSDs 123 | 124 | 125 | 126 | ``` 127 | deployment { 128 | ... 129 | 130 | osd { 131 | # # Number of OSD instances to spawn 132 | count = 6 133 | 134 | cpus = 4 135 | 136 | mem = 4096 137 | # # The type of multi-disk volume to use for the persistent volume; valid values are root, path, and mount. 138 | disk_type = mount 139 | 140 | # # Size of persistent volume. In the case of diskType = mount, the minimum size of disk to allocate. It is heavily 141 | # # ill-advised to use anything except mount disks for OSDs. 142 | disk = 4700000 143 | 144 | # # For diskType = mount, don't allocate drives larger than this. 145 | 146 | # disk_max = 1048576 147 | 148 | # # pathConstraint will tell the ceph framework to only allocate persistent mount volumes at a path which FULLY 149 | # # matches the provided regular expression (I.E. pretend an implicit '^' is added at the beginning of your regex 150 | # # and a '$' at the end). 151 | 152 | # path_constraint = "/mnt/ssd-.+" 153 | } 154 | 155 | ``` 156 | 157 | # Rest API 158 | 159 | ## Get / Update deployment config 160 | 161 | ### fetch current deployment configuration 162 | 163 | ``` 164 | curl http://127.0.0.1:8080/v1/config/deployment-config.conf 165 | ``` 166 | 167 | ### update current deployment configuration 168 | 169 | ``` 170 | # where deployment-config.conf is a file residing on the local filesystem: 171 | 172 | curl -X PUT http://127.0.0.1:8080/v1/config/deployment-config.conf -d @deployment-config.conf 173 | ``` 174 | 175 | Please note that access to this endpoint contains an inherent security risk, as it is possible to specify the 176 | docker_image used to deploy ceph, and therefore could be used to gain access to the ceph secrets, and run arbitrary 177 | a specified container with host networking privileges. Protect the endpoint, accordingly. 178 | 179 | ## list tasks 180 | 181 | ``` 182 | curl http://127.0.0.1:8080/v1/jobs 183 | ``` 184 | 185 | ## Update job goal 186 | 187 | ``` 188 | # pause a job 189 | curl -X PUT http://127.0.0.1:8080/v1/jobs/aa76c126-16e8-4b43-a861-663332657f61/paused 190 | 191 | # resume a job 192 | curl -X PUT http://127.0.0.1:8080/v1/jobs/aa76c126-16e8-4b43-a861-663332657f61/running 193 | ``` 194 | 195 | 196 | # Updating configuration 197 | 198 | Config updates to `(ceph)/ceph.conf` happen as follows: 199 | 200 | - Increases to # of nodes to deploy are picked up immediately. 201 | - Deployment resource config is picked up immediately, but USED FOR LAUNCHING new jobs only. Currently there is no 202 | support for changing resources for a job. 203 | - Ceph configuration is picked up immediately, but applied as jobs are launched (IE - they are restarted). 204 | 205 | Note that the latest configuration is deployed when a job is relaunched. 206 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # Functionality 2 | 3 | ## definitely 4 | 5 | - [ ] Health checks. Leader launch pattern should wait for leader node to return successful health before launching 6 | other tasks. 7 | - [ ] add orchestrator events such as rolling restart, rolling repair, etc. 8 | - [ ] Version running states so we can rolling restart the nodes. 9 | - [ ] Support SSL for framework endpoint. 10 | - [ ] Add authentication for SSL framework endpoint. (SSL client auth?) 11 | - [ ] Support for CEPH deploying metadata servers 12 | - [ ] Support for launching Ceph on other container engines, such as RKT. 13 | 14 | ## maybe 15 | 16 | - [ ] Consider supporting unsupported file systems by allocating contiguous blocks. 17 | - [ ] Configurable secret to protect access to pull ceph config 18 | 19 | ## done 20 | 21 | - [x] Better task reconciliation handling with transitional agents. Assign TASK_LOST and disown after timeout. (causing 22 | it to be killed if it is seen again). 23 | - [x] Support Mesos SSL (verified working via LIBPROCESS_SSL_* vars) 24 | - [-] validate configuration before saving 25 | - [x] Permit OSD btrfs deployment 26 | - [x] Explicit resource releases. It would be better that a bug result in resources not being released, than release 27 | resources that shouldn't be released. 28 | - [x] Simple web UI 29 | - [x] Configurable ceph-docker image, allowing local / private repository to be used. 30 | - [x] Packaging 31 | - [x] Mesos DNS discovery / srv records [0.2.0] 32 | - [x] Exclusive lock. It would be castostrophic if two ceph-on-mesos frameworks launched concurrently. [0.1.0] 33 | - [x] Extract leader launch pattern into orchestrator [0.1.0] 34 | - [x] Simple API endpoints to pull configuration necessary to connect to ceph. [0.1.0] 35 | - [x] Unique reservation IDs to survive SlaveID changes. 36 | 37 | # Code cleanup 38 | 39 | - [x] Extract task state tracking from TaskActor. Guard operations. 40 | - [x] Emit TaskChanged messages as part of nodeupdated; concern too scattered. 41 | - [x] extract TaskFSM concerns from TaskActor. 42 | - [x] Extract launch strategy from node behavior and extract common behaviors into it's own library. 43 | - [x] Consider emitting separate events for different kinds of node changes. IE - goal updated, task status changed, 44 | lastLaunched changed. (not doing) 45 | 46 | -------------------------------------------------------------------------------- /bin/update-version: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | . project/version.properties 3 | gsed -E -i "s/ceph-on-mesos-[0-9.]+.tgz/ceph-on-mesos-${version}.tgz/" README.md 4 | 5 | echo $version 1>&2 6 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import java.util.Properties 2 | 3 | val akkaVersion = "2.4.11" 4 | val mesosVersion = "1.0.0" 5 | val curatorVer = "2.11.0" 6 | val playVersion = "2.5.8" 7 | val logbackVersion = "1.1.7" 8 | val zookeeperVersion = "3.4.9" 9 | val commonSettings = Seq( 10 | scalaVersion := "2.11.8", 11 | ensimeScalaVersion in ThisBuild := "2.11.8" 12 | ) 13 | 14 | val appProperties = { 15 | val prop = new Properties() 16 | IO.load(prop, new File("project/version.properties")) 17 | prop 18 | } 19 | 20 | ensimeScalaVersion in ThisBuild := "2.11.8" 21 | 22 | lazy val root = (project in file(".")). 23 | settings( 24 | commonSettings : _*). 25 | settings( 26 | name := "ceph-on-mesos", 27 | version := appProperties.getProperty("version"), 28 | resolvers += "Mesosphere Public Repo" at "http://downloads.mesosphere.com/maven", 29 | 30 | libraryDependencies ++= Seq( 31 | "org.kamranzafar" % "jtar" % "2.3", 32 | "commons-io" % "commons-io" % "2.5", 33 | "com.github.kxbmap" %% "configs" % "0.4.3", 34 | "org.scala-lang.modules" %% "scala-async" % "0.9.5", 35 | "org.apache.mesos" % "mesos" % mesosVersion, 36 | "com.typesafe.akka" %% "akka-actor" % akkaVersion, 37 | "com.typesafe.akka" %% "akka-http-experimental" % akkaVersion, 38 | "com.typesafe.akka" %% "akka-stream" % akkaVersion, 39 | "com.typesafe.akka" %% "akka-slf4j" % akkaVersion, 40 | "com.typesafe.play" %% "play-json" % playVersion, 41 | "com.typesafe.akka" %% "akka-http-experimental" % akkaVersion, 42 | "com.typesafe.akka" %% "akka-testkit" % akkaVersion, 43 | "org.scaldi" %% "scaldi" % "0.5.7", 44 | "org.scalatest" %% "scalatest" % "3.0.0" % "test", 45 | "org.rogach" %% "scallop" % "2.0.2", 46 | "ch.qos.logback" % "logback-classic" % "1.1.7", 47 | ("org.apache.curator" % "curator-framework" % curatorVer), 48 | "org.apache.curator" % "curator-recipes" % curatorVer, 49 | ("org.apache.zookeeper" % "zookeeper" % zookeeperVersion).exclude("org.slf4j", "slf4j-log4j12") 50 | ) 51 | ). 52 | dependsOn(marathon) 53 | 54 | enablePlugins(JavaAppPackaging) 55 | 56 | lazy val marathon = (project in file("marathon-submodule/")). 57 | settings( 58 | commonSettings: _*). 59 | settings( 60 | resolvers ++= Seq( 61 | "Mesosphere Public Repo" at "http://downloads.mesosphere.com/maven" 62 | ), 63 | libraryDependencies ++= Seq( 64 | "org.apache.mesos" % "mesos" % mesosVersion, 65 | "com.google.protobuf" % "protobuf-java" % "2.6.1", 66 | "com.wix" %% "accord-core" % "0.5", 67 | "com.typesafe.play" %% "play-json" % playVersion, 68 | "ch.qos.logback" % "logback-core" % logbackVersion, 69 | "org.apache.curator" % "curator-framework" % curatorVer, 70 | ("com.twitter" %% "util-zk" % "6.34.0").exclude("org.apache.zookeeper", "zookeeper"), 71 | "mesosphere" %% "mesos-utils" % "1.1.0-mesos-healthchecks", 72 | "com.fasterxml.uuid" % "java-uuid-generator" % "3.1.4", 73 | ("org.apache.zookeeper" % "zookeeper" % zookeeperVersion).exclude("org.slf4j", "slf4j-log4j12") 74 | ), 75 | unmanagedSources in Compile ++= Seq( 76 | baseDirectory.value / "marathon/plugin-interface/src/main/scala/mesosphere/marathon/plugin/PathId.scala", 77 | baseDirectory.value / "marathon/src/main/java/mesosphere/marathon/Protos.java", 78 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/Features.scala", 79 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/api/v2/Validation.scala", // move to util 80 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/core/task/Task.scala", 81 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/core/task/TaskStateOp.scala", 82 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/core/task/state/MarathonTaskStatus.scala", 83 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/core/task/state/MarathonTaskStatusMapping.scala", 84 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/FetchUri.scala", 85 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/MarathonState.scala", 86 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/PathId.scala", 87 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/ResourceRole.scala", 88 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/Timestamp.scala", 89 | baseDirectory.value / "marathon/src/main/scala/mesosphere/marathon/state/Volume.scala", 90 | baseDirectory.value / "marathon/src/main/scala/mesosphere/util/Logging.scala", 91 | baseDirectory.value / "marathon/src/main/scala/mesosphere/util/state/FrameworkId.scala" 92 | ), 93 | unmanagedSourceDirectories in Compile ++= Seq( 94 | baseDirectory.value / "marathon/src/main/scala/mesosphere/mesos/" 95 | ) 96 | ) 97 | 98 | val scalaJSReactVersion = "0.11.2" 99 | val scalaCssVersion = "0.5.0" 100 | val reactJSVersion = "15.3.2" 101 | 102 | 103 | val uiBuildPath = file("ui") / "js" 104 | lazy val ui = (project in file("ui")). 105 | enablePlugins(ScalaJSPlugin). 106 | settings( 107 | commonSettings: _*). 108 | settings( 109 | resolvers += "mmreleases" at "https://artifactory.mediamath.com/artifactory/libs-release-global", 110 | libraryDependencies ++= Seq("com.github.japgolly.scalajs-react" %%% "core" % scalaJSReactVersion, 111 | "com.github.japgolly.scalajs-react" %%% "extra" % scalaJSReactVersion, 112 | "com.github.japgolly.scalacss" %%% "core" % scalaCssVersion, 113 | "com.github.japgolly.scalacss" %%% "ext-react" % scalaCssVersion, 114 | "com.github.chandu0101.scalajs-react-components" %%% "core" % "0.5.0", 115 | "com.mediamath" %%% "scala-json" % "1.0"), 116 | 117 | 118 | // React JS itself (Note the filenames, adjust as needed, eg. to remove addons.) 119 | jsDependencies ++= Seq( 120 | 121 | "org.webjars.bower" % "react-bootstrap" % "0.30.3" 122 | / "react-bootstrap.js" 123 | minified "react-bootstrap.min.js" 124 | dependsOn "react-with-addons.js" 125 | commonJSName "ReactBootstrap", 126 | 127 | "org.webjars.bower" % "react" % reactJSVersion 128 | / "react-with-addons.js" 129 | minified "react-with-addons.min.js" 130 | commonJSName "React", 131 | 132 | "org.webjars.bower" % "react" % reactJSVersion 133 | / "react-dom.js" 134 | minified "react-dom.min.js" 135 | dependsOn "react-with-addons.js" 136 | commonJSName "ReactDOM", 137 | 138 | "org.webjars.bower" % "react" % reactJSVersion 139 | / "react-dom-server.js" 140 | minified "react-dom-server.min.js" 141 | dependsOn "react-dom.js" 142 | commonJSName "ReactDOMServer"), 143 | 144 | 145 | // create launcher file ( its search for object extends JSApp , make sure there is only one file) 146 | persistLauncher := true, 147 | 148 | persistLauncher in Test := false, 149 | 150 | skip in packageJSDependencies := false, 151 | 152 | crossTarget in (Compile, fullOptJS) := uiBuildPath, 153 | 154 | crossTarget in (Compile, fastOptJS) := uiBuildPath, 155 | 156 | crossTarget in (Compile, packageJSDependencies) := uiBuildPath, 157 | 158 | crossTarget in (Compile, packageScalaJSLauncher) := uiBuildPath, 159 | 160 | crossTarget in (Compile, packageMinifiedJSDependencies) := uiBuildPath, 161 | 162 | artifactPath in (Compile, fastOptJS) := ((crossTarget in (Compile, fastOptJS)).value / 163 | ((moduleName in fastOptJS).value + "-opt.js")), 164 | 165 | scalacOptions += "-feature" 166 | ). 167 | dependsOn( 168 | ProjectRef(uri("git://github.com/timcharper/scalajs-react-bridge.git"), "scalajs-react-bridge")) 169 | -------------------------------------------------------------------------------- /docs/launch-node-interaction.pu: -------------------------------------------------------------------------------- 1 | @startuml 2 | 3 | Participant Zookeeper 4 | Participant FrameworkActor 5 | Participant TaskActor 6 | Participant NodeActor 7 | 8 | Zookeeper -> TaskActor: Update config to add mon nodes 9 | hnote over of TaskActor: Add pending task 10 | TaskActor -> NodeActor: Create 11 | TaskActor -> NodeActor: Send initial state 12 | NodeActor -> TaskActor: Persist 13 | TaskActor -> NodeActor: Success; send version 14 | TaskActor -> Zookeeper: Persist 15 | Zookeeper -> TaskActor: Persist okay 16 | hnote over TaskActor: Update persistence version 17 | TaskActor -> NodeActor: Send 18 | hnote over NodeActor: Detects transaction was persisted\nIntrospects state 19 | NodeActor -> TaskActor: Listen for resources 20 | hnote over NodeActor: Becomes waiting for offer\nwith timeout to re-request 21 | 22 | FrameworkActor -> TaskActor: Offer 23 | hnote over TaskActor: Look for reservations\nfor my frameworkId 24 | hnote over TaskActor: If reservation for\nunknown taskId\nreject 25 | hnote over TaskActor: If reservation for\nknown taskId\nbut wrong slave\nreject 26 | TaskActor -> NodeActor: Send resources 27 | hnote over NodeActor: Clear offer request 28 | NodeActor -> TaskActor: Accept & reserve 29 | hnote over TaskActor: Forms reservation ops to reserve resources\n and create persistent volume 30 | TaskActor -> FrameworkActor : Accept offer 31 | FrameworkActor -> TaskActor: Send reserved offer 32 | TaskActor -> NodeActor: Forward reserved offer 33 | NodeActor -> TaskActor: Set slaveId 34 | TaskActor -> NodeActor: Okay 35 | TaskActor -> Zookeeper: Persist 36 | hnote over NodeActor: At this point this node\nis forever locked to this\nslave 37 | Zookeeper -> TaskActor: Okay 38 | TaskActor -> NodeActor: Update state 39 | NodeActor -> TaskActor: Launch task\nwith reserved\noffer 40 | TaskActor -> FrameworkActor: Launch task 41 | FrameworkActor -> TaskActor: TaskStatus 42 | TaskActor -> NodeActor: Update state 43 | 44 | @enduml 45 | -------------------------------------------------------------------------------- /marathon-submodule/src/main/scala/mesosphere/marathon/Exception.scala: -------------------------------------------------------------------------------- 1 | package mesosphere.marathon 2 | 3 | import com.wix.accord.Failure 4 | 5 | /** 6 | * Is thrown if an object validation is not successful. 7 | * @param obj object which is not valid 8 | * @param failure validation information kept in a Failure object 9 | */ 10 | case class ValidationFailedException(obj: Any, failure: Failure) extends Exception("Validation failed") 11 | -------------------------------------------------------------------------------- /marathon-submodule/src/main/scala/mesosphere/marathon/core/externalvolume/ExternalVolumes.scala: -------------------------------------------------------------------------------- 1 | package mesosphere.marathon.core.externalvolume 2 | 3 | import com.wix.accord.Validator 4 | import mesosphere.marathon.state.ExternalVolume 5 | trait ExternalVolumes 6 | 7 | object ExternalVolumes { 8 | def validExternalVolume: Validator[ExternalVolume] = ??? 9 | } 10 | -------------------------------------------------------------------------------- /marathon-submodule/src/main/scala/mesosphere/marathon/core/launcher/impl/ReservationLabels.scala: -------------------------------------------------------------------------------- 1 | package mesosphere.marathon.core.launcher.impl 2 | 3 | import org.apache.mesos.{ Protos => MesosProtos } 4 | 5 | object LabelsSerializer { 6 | def toMesos(labels: Map[String, String]): Iterable[MesosProtos.Label] = { 7 | for { 8 | (key, value) <- labels 9 | } yield MesosProtos.Label.newBuilder.setKey(key).setValue(value).build 10 | } 11 | 12 | def toMesosLabelsBuilder(labels: Map[String, String]): MesosProtos.Labels.Builder = { 13 | val builder = MesosProtos.Labels.newBuilder 14 | toMesos(labels).foreach(builder.addLabels) 15 | builder 16 | } 17 | } 18 | 19 | /** 20 | * Encapsulates information about a reserved resource and its (probably empty) list of reservation labels. 21 | */ 22 | case class ReservationLabels(labels: Map[String, String]) { 23 | lazy val mesosLabels: MesosProtos.Labels = { 24 | LabelsSerializer.toMesosLabelsBuilder(labels).build 25 | } 26 | 27 | def get(key: String): Option[String] = labels.get(key) 28 | 29 | override def toString: String = labels.map { case (k, v) => s"$k: $v" }.mkString(", ") 30 | } 31 | 32 | object ReservationLabels { 33 | def withoutLabels: ReservationLabels = new ReservationLabels(Map.empty) 34 | 35 | def apply(resource: MesosProtos.Resource): ReservationLabels = { 36 | if (resource.hasReservation && resource.getReservation.hasLabels) 37 | ReservationLabels(resource.getReservation.getLabels) 38 | else 39 | ReservationLabels.withoutLabels 40 | } 41 | def apply(labels: MesosProtos.Labels): ReservationLabels = { 42 | import scala.collection.JavaConverters._ 43 | ReservationLabels(labels.getLabelsList.asScala.iterator.map(l => l.getKey -> l.getValue).toMap) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /marathon-submodule/src/main/scala/mesosphere/marathon/core/launcher/impl/TaskLabels.scala: -------------------------------------------------------------------------------- 1 | package mesosphere.marathon.core.launcher.impl 2 | 3 | import mesosphere.marathon.core.task.Task 4 | import mesosphere.util.state.FrameworkId 5 | import org.apache.mesos.{ Protos => MesosProtos } 6 | 7 | object TaskLabels { 8 | private[this] final val FRAMEWORK_ID_LABEL = "ceph_framework_id" 9 | private[this] final val TASK_ID_LABEL = "ceph_task_id" 10 | 11 | /** 12 | * Returns a the task id for which this reservation has been performed if the reservation was 13 | * labeled by this framework. 14 | */ 15 | def taskIdForResource(frameworkId: FrameworkId, resource: MesosProtos.Resource): Option[Task.Id] = { 16 | val labels = ReservationLabels(resource) 17 | 18 | val maybeMatchingFrameworkId = labels.get(FRAMEWORK_ID_LABEL).filter(_ == frameworkId.id) 19 | def maybeTaskId = labels.get(TASK_ID_LABEL).map(Task.Id(_)) 20 | 21 | maybeMatchingFrameworkId.flatMap(_ => maybeTaskId) 22 | } 23 | 24 | def labelsForTask(frameworkId: FrameworkId, task: Task): ReservationLabels = 25 | labelsForTask(frameworkId, task.taskId) 26 | 27 | def labelsForTask(frameworkId: FrameworkId, taskId: Task.Id): ReservationLabels = 28 | ReservationLabels(Map( 29 | FRAMEWORK_ID_LABEL -> frameworkId.id, 30 | TASK_ID_LABEL -> taskId.idString 31 | )) 32 | 33 | def labelKeysForTaskReservations: Set[String] = Set(FRAMEWORK_ID_LABEL, TASK_ID_LABEL) 34 | 35 | } 36 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("io.spray" % "sbt-revolver" % "0.8.0") 2 | 3 | addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.1.4") 4 | 5 | addSbtPlugin("org.scala-js" % "sbt-scalajs" % "0.6.13") 6 | -------------------------------------------------------------------------------- /project/version.properties: -------------------------------------------------------------------------------- 1 | version=0.2.11 -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | loggers = ["akka.event.slf4j.Slf4jLogger"] 3 | loglevel = "DEBUG" 4 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" 5 | } -------------------------------------------------------------------------------- /src/main/resources/deployment-config.conf: -------------------------------------------------------------------------------- 1 | deployment { 2 | # # The docker image to use to launch Ceph. 3 | # docker_image = "ceph/daemon:tag-build-master-jewel-ubuntu-14.04" 4 | 5 | mon { 6 | count = 0 7 | cpus = 1 8 | mem = 256.0 9 | 10 | # # The type of multi-disk volume to use; valid values are root, path, and mount. 11 | disk_type = root 12 | 13 | # # Size of persistent volume (in MB). In the case of diskType = mount, the minimum size of disk to allocate. 14 | disk = 16 15 | 16 | # # Uncomment to cause the ceph framework to allocate a consistent port; otherwise, the first offered port is taken 17 | # port = 2015 18 | } 19 | 20 | osd { 21 | # # Number of OSD instances to spawn 22 | count = 0 23 | 24 | cpus = 1 25 | 26 | mem = 1024 27 | # # The type of multi-disk volume to use for the persistent volume; valid values are root, path, and mount. 28 | disk_type = mount 29 | 30 | # # Size of persistent volume (in MB). In the case of diskType = mount, the minimum size of disk to allocate. It is 31 | # # heavily ill-advised to use anything except mount disks for OSDs. 32 | disk = 512000 33 | 34 | # # For diskType = mount, don't allocate drives larger than this. 35 | 36 | # disk_max = 1048576 37 | 38 | # # pathConstraint will tell the ceph framework to only allocate persistent mount volumes at a path which FULLY 39 | # # matches the provided regular expression (I.E. pretend an implicit '^' is added at the beginning of your regex 40 | # # and a '$' at the end). 41 | 42 | # path_constraint = "/mnt/ssd-.+" 43 | } 44 | 45 | rgw { 46 | count = 0 47 | cpus = 1 48 | mem = 256 49 | 50 | # # If port is specified then a port resource is not requested, and it is implied that the container is running on a 51 | # # network where that port is guaranteed to be available 52 | # port = 80 53 | 54 | # # docker_flags specifies an array of arbitrary launch parameters to specify for the docker container 55 | docker_args = { 56 | # network = weave 57 | # hostname = "cephrgw.weave.local" 58 | } 59 | } 60 | } 61 | 62 | settings { 63 | # These settings are transparently inserted into the generated ceph.conf file, where values in the 'auth {}' will be 64 | # inserted in the corresponding [auth] ceph.conf section. 65 | auth { 66 | cephx = true 67 | cephx_require_signatures = false 68 | cephx_cluster_require_signatures = true 69 | cephx_service_require_signatures = false 70 | } 71 | 72 | global { 73 | max_open_files = 131072 74 | osd_pool_default_pg_num = 128 75 | osd_pool_default_pgp_num = 128 76 | osd_pool_default_size = 3 77 | osd_pool_default_min_size = 1 78 | 79 | mon_osd_full_ratio = .95 80 | mon_osd_nearfull_ratio = .85 81 | } 82 | 83 | mon { 84 | mon_osd_down_out_interval = 600 85 | mon_osd_min_down_reporters = 4 86 | mon_clock_drift_allowed = .15 87 | mon_clock_drift_warn_backoff = 30 88 | mon_osd_report_timeout = 300 89 | } 90 | 91 | osd { 92 | osd_journal_size = 100 93 | 94 | osd_mon_heartbeat_interval = 30 95 | 96 | # # crush 97 | pool_default_crush_rule = 0 98 | osd_crush_update_on_start = true 99 | 100 | # # backend 101 | osd_objectstore = filestore 102 | 103 | # # performance tuning 104 | filestore_merge_threshold = 40 105 | filestore_split_multiple = 8 106 | osd_op_threads = 8 107 | filestore_op_threads = 8 108 | filestore_max_sync_interval = 5 109 | osd_max_scrubs = 1 110 | 111 | # # recovery tuning 112 | osd_recovery_max_active = 5 113 | osd_max_backfills = 2 114 | osd_recovery_op_priority = 2 115 | osd_client_op_priority = 63 116 | osd_recovery_max_chunk = 1048576 117 | osd_recovery_threads = 1 118 | } 119 | 120 | client { 121 | rbd_cache_enabled = true 122 | rbd_cache_writethrough_until_flush = true 123 | } 124 | 125 | mds { 126 | mds_cache_size = 100000 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/main/resources/reference.conf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vivint-smarthome/ceph-on-mesos/89b84b5fd1e6bdba6825695463762ac1dd85480d/src/main/resources/reference.conf -------------------------------------------------------------------------------- /src/main/resources/ui/index.html: -------------------------------------------------------------------------------- 1 | ../../../../ui/index.html -------------------------------------------------------------------------------- /src/main/resources/ui/js: -------------------------------------------------------------------------------- 1 | ../../../../ui/js/ -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/AppConfiguration.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import org.rogach.scallop._ 4 | import scala.concurrent.duration._ 5 | 6 | class CephFrameworkOptions(args: List[String]) extends ScallopConf(args) { 7 | 8 | def env(key: String) = Option(System.getenv(key)) 9 | 10 | val master = opt[String]("master", 'm', 11 | required = true, 12 | descr = "Mesos master location; in zk://ip1:port1,ip2:port2/mesos format. Can set via MESOS_MASTER", 13 | default = env("MESOS_MASTER")) 14 | 15 | val name = opt[String]("name", 16 | descr = "framework name; can set via FRAMEWORK_NAME. Default = ceph", 17 | default = env("FRAMEWORK_NAME").orElse(Some("ceph"))) 18 | 19 | val principal = opt[String]("principal", 20 | descr = "mesos principal as which to authenticate; can set via MESOS_PRINCIPAL. Default = ceph.", 21 | required = true, 22 | default = env("MESOS_PRINCIPAL").orElse(Some("ceph"))) 23 | 24 | val role = opt[String]("role", 25 | descr = "mesos role to use for reservations; can set via MESOS_ROLE. Default = ceph.", 26 | default = env("MESOS_ROLE").orElse(Some("ceph"))) 27 | 28 | val secret = opt[String]("secret", 29 | descr = "mesos principal as which to authenticate; can set via MESOS_SECRET.", 30 | default = env("MESOS_SECRET")) 31 | 32 | val zookeeper = opt[String]("zookeeper", 33 | required = true, 34 | descr = "Location for zookeeper for ceph-framework to store it's state. Don't prefix with zk://. Can set via ZOOKEEPER", 35 | default = env("ZOOKEEPER")) 36 | 37 | val publicNetwork = opt[String]("public-network", 38 | required = true, 39 | descr = "CIDR of the public network, in 0.0.0.0/24 format; can be set by PUBLIC_NETWORK", 40 | default = env("PUBLIC_NETWORK") ) 41 | 42 | val clusterNetwork = opt[String]("cluster-network", 43 | descr = "CIDR of the ceph network, in 0.0.0.0/24 format; can be set by CEPH_NETWORK. Default = .", 44 | default = env("CLUSTER_NETWORK")) 45 | 46 | val offerTimeout = opt[Int]("offer-timeout", 47 | descr = "Duration in seconds after which offers timeout; Default = 30.", 48 | required = false, 49 | default = env("OFFER_TIMEOUT").map(_.toInt).orElse(Some(30))) 50 | 51 | val storageBackend = opt[String]("storage-backend", 52 | descr = "KV storage backend. Options: file, memory, or zookeeper. Default = zookeeper.", 53 | required = false, 54 | default = Some("zookeeper")) 55 | 56 | val failoverTimeout = opt[Long]("failover-timeout", 57 | descr = "Duration in seconds after which to timeout the framework (stopping all tasks); Default = 31536000 (1 year)", 58 | required = false, 59 | default = env("FAILOVER_TIMEOUT").map(_.toLong).orElse(Some(31536000L))) 60 | 61 | val apiPort = opt[Int]("api-port", 62 | descr = s"HTTP API port; can be set via API_PORT; default 8080", 63 | default = env("API_PORT").map(_.toInt)).orElse(Some(8080)) 64 | 65 | val apiHost = opt[String]("api-host", 66 | descr = s"HTTP API host; can be set via API_HOST; default 127.0.0.1", 67 | default = env("API_HOST")).orElse(Some("127.0.0.1")) 68 | } 69 | 70 | case class AppConfiguration( 71 | master: String, 72 | name: String, 73 | principal: String, 74 | secret: Option[String], 75 | role: String, 76 | zookeeper: String, 77 | offerTimeout: FiniteDuration, 78 | publicNetwork: String, 79 | clusterNetwork: String, 80 | storageBackend: String, 81 | failoverTimeout: Long = 31536000L, 82 | apiPort: Int = 8080, 83 | apiHost: String = "127.0.0.1" 84 | ) { 85 | require(AppConfiguration.validStorageBackends.contains(storageBackend)) 86 | } 87 | 88 | object AppConfiguration { 89 | val validStorageBackends = Set("zookeeper", "file", "memory") 90 | def fromOpts(o: CephFrameworkOptions): AppConfiguration = { 91 | AppConfiguration( 92 | master = o.master(), 93 | name = o.name(), 94 | principal = o.principal(), 95 | secret = o.secret.toOption, 96 | role = o.role.toOption. 97 | orElse(o.principal.toOption). 98 | getOrElse(Constants.DefaultRole), 99 | zookeeper = o.zookeeper(), 100 | offerTimeout = o.offerTimeout().seconds, 101 | publicNetwork = o.publicNetwork(), 102 | clusterNetwork = o.clusterNetwork.toOption.getOrElse(o.publicNetwork()), 103 | storageBackend = o.storageBackend(), 104 | failoverTimeout = o.failoverTimeout(), 105 | apiPort = o.apiPort(), 106 | apiHost = o.apiHost() 107 | ) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/ClusterSecretStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.kvstore.KVStore 4 | import scala.concurrent.{ ExecutionContext, Future } 5 | import java.nio.charset.StandardCharsets.UTF_8 6 | import model.ClusterSecrets 7 | import model.PlayJsonFormats._ 8 | import play.api.libs.json._ 9 | 10 | // We should probably encrypt this 11 | object ClusterSecretStore { 12 | def createOrGenerateSecrets(kvStore: KVStore): Future[ClusterSecrets] = { 13 | import ExecutionContext.Implicits.global 14 | 15 | kvStore.get("secrets.json").flatMap { 16 | case Some(secrets) => 17 | Future.successful( 18 | Json.parse(secrets).as[ClusterSecrets]) 19 | case None => 20 | val secrets = ClusterSecrets.generate 21 | kvStore.create("secrets.json", Json.toJson(secrets).toString().getBytes(UTF_8)).map { case _ => 22 | secrets } 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/ConfigStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.stream.scaladsl.Flow 4 | import java.nio.charset.StandardCharsets.UTF_8 5 | import model.CephConfigHelper 6 | import org.slf4j.LoggerFactory 7 | import scala.concurrent.{ ExecutionContext, Future } 8 | 9 | case class ConfigStore(kvStore: kvstore.KVStore) { 10 | private val log = LoggerFactory.getLogger(getClass) 11 | val configPath = "ceph.conf" 12 | 13 | def storeConfigIfNotExist(): Future[Unit] = { 14 | import ExecutionContext.Implicits.global 15 | 16 | kvStore.get(configPath).flatMap { 17 | case None => 18 | kvStore.createOrSet(configPath, ConfigStore.default()) 19 | case Some(_) => 20 | Future.successful(()) 21 | } 22 | } 23 | 24 | val configParsingFlow = Flow[Option[Array[Byte]]]. 25 | map { 26 | case Some(bytes) => 27 | try Some(CephConfigHelper.parse(bytes)) 28 | catch { case ex: Throwable => 29 | log.error("Error parsing configuration", ex) 30 | None 31 | } 32 | case None => 33 | log.error("No configuration detected.") 34 | None 35 | } 36 | def stream = 37 | kvStore.watch(configPath). 38 | via(configParsingFlow) 39 | 40 | def getText = 41 | kvStore.get(configPath) 42 | 43 | def storeText(str: String) = { 44 | try { 45 | model.CephConfigHelper.parse(str) 46 | kvStore.set(configPath, str.getBytes(UTF_8)) 47 | } catch { 48 | case ex: Throwable => 49 | Future.failed(ex) 50 | } 51 | } 52 | 53 | def get = { 54 | getText. 55 | map { 56 | case Some(bytes) => 57 | CephConfigHelper.parse(bytes) 58 | case None => 59 | throw new RuntimeException("No configuration detected") 60 | }(ExecutionContext.global) 61 | } 62 | } 63 | 64 | object ConfigStore { 65 | def default(environment: Map[String, String] = sys.env): Array[Byte] = { 66 | val setMonInitFixedPort: String => String = { input => 67 | environment.get("CEPH_MON_INIT_PORT") match { 68 | case Some(port) => 69 | input.replace("# port = 2015", s"port = ${port}") 70 | case None => 71 | input 72 | } 73 | } 74 | 75 | import org.apache.commons.io.IOUtils 76 | val f = getClass.getResourceAsStream("/deployment-config.conf") 77 | try { 78 | val defaultTemplate = new String(IOUtils.toByteArray(f)) 79 | setMonInitFixedPort(defaultTemplate). 80 | getBytes(UTF_8) 81 | } 82 | finally { f.close() } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/Constants.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | object Constants { 4 | val LockPath = "master-lock" 5 | val FrameworkIdLabel = "framework-id" 6 | val ReservationIdLabel = "reservation-id" 7 | val JobIdLabel = "job-id" 8 | val PortLabel = "port" 9 | val HostnameLabel = "hostname" 10 | val DefaultCluster = "ceph" 11 | val DefaultRole = "ceph" 12 | } 13 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/FrameworkActor.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.{ Actor, ActorContext, ActorLogging, ActorRef, Cancellable, DeadLetter, Stash } 4 | import akka.pattern.pipe 5 | import java.util.Collections 6 | import java.util.concurrent.TimeoutException 7 | import org.apache.mesos.Protos._ 8 | import org.apache.mesos._ 9 | import org.slf4j.LoggerFactory 10 | import com.vivint.ceph.kvstore.{KVStore, CrashingKVStore} 11 | import scala.concurrent.Await 12 | import scaldi.Injectable._ 13 | import scaldi.Injector 14 | import scala.concurrent.duration._ 15 | import scala.collection.mutable 16 | import scala.collection.immutable.Iterable 17 | import scala.collection.JavaConverters._ 18 | import FrameworkActor._ 19 | 20 | class FrameworkActor(implicit val injector: Injector) extends Actor with ActorLogging with Stash { 21 | val kvStore = CrashingKVStore(inject[KVStore]) 22 | val frameworkStore = inject[FrameworkIdStore] 23 | val frameworkTemplate = inject[FrameworkInfo] 24 | val credentials = inject[Option[Credential]] 25 | val options = inject[AppConfiguration] 26 | val pendingOffers = mutable.Map.empty[OfferID, Cancellable] 27 | lazy val taskActor = inject[ActorRef](classOf[TaskActor]) 28 | 29 | case class FrameworkIdLoaded(o: Option[FrameworkID]) 30 | override def preStart(): Unit = { 31 | import context.dispatcher 32 | frameworkStore.initial.map(FrameworkIdLoaded) pipeTo self 33 | } 34 | 35 | override def preRestart(reason: Throwable, message: Option[Any]): Unit = { 36 | // crash hard 37 | context.stop(self) 38 | log.error(reason, s"Exiting due to Framework Actor crash. Last message = {}", message) 39 | Thread.sleep(100) 40 | System.exit(1) 41 | } 42 | 43 | var frameworkId: Option[FrameworkID] = None 44 | 45 | def receive = { 46 | case FrameworkIdLoaded(optFrameworkId) => 47 | frameworkId = optFrameworkId 48 | log.info("frameworkId state read; optFrameworkId = {}", optFrameworkId) 49 | 50 | unstashAll() 51 | connect() 52 | case _ => 53 | stash() 54 | } 55 | 56 | // def waitForFirstRegistration(timer: Cancellable): Receive = 57 | 58 | def connect(): Unit = { 59 | val framework = frameworkId.map { id => 60 | frameworkTemplate.toBuilder().setId(id).build 61 | } getOrElse { 62 | frameworkTemplate 63 | } 64 | 65 | log.info("starting scheduler") 66 | 67 | val scheduler = new FrameworkActorScheduler 68 | 69 | val driver = credentials match { 70 | case Some(c) => 71 | new MesosSchedulerDriver(scheduler, framework, options.master, true, c) 72 | case None => 73 | new MesosSchedulerDriver(scheduler, framework, options.master, true) 74 | } 75 | // We exit on exception in this actor so we don't have to worry about closing the driver 76 | val status = driver.start() 77 | if (status != Status.DRIVER_RUNNING) 78 | throw new RuntimeException(s"Error starting framework: ${status}") 79 | // status.getNumber == Stat 80 | 81 | import context.dispatcher 82 | val timeout = context.system.scheduler.scheduleOnce(30.seconds, self, 'timeout) 83 | context.become({ 84 | case 'timeout => 85 | throw new TimeoutException("timed out while attempting to register framework with mesos") 86 | case r: Registered => 87 | log.info("Registered! ID = " + r.frameworkId.getValue) 88 | frameworkId match { 89 | case None => 90 | // It's pretty crucial that we don't continue if this fails 91 | frameworkId = Some(r.frameworkId) 92 | Await.result(frameworkStore.set(r.frameworkId), 30.seconds) 93 | case Some(frameworkId) if frameworkId != r.frameworkId => 94 | throw new RuntimeException(s"Framework launched with different value than that which was persisted. " + 95 | s"${frameworkId.getValue} != ${r.frameworkId.getValue}. Cowardly refusing to proceed.") 96 | case _ => 97 | () 98 | } 99 | 100 | timeout.cancel() 101 | stash() // we want to re-handle this message in the next behavior 102 | unstashAll() 103 | context.become(registationHandler) 104 | case _ => 105 | stash() 106 | }) 107 | } 108 | 109 | context.system.eventStream.subscribe(self, classOf[DeadLetter]) 110 | 111 | val registationHandler: Receive = { 112 | case Registered(driver, newFrameworkId, masterInfo) => 113 | taskActor ! Connected 114 | unstashAll() 115 | context.become(connected(driver)) 116 | 117 | case Reregistered(driver, masterInfo) => 118 | log.info("Reregistered") 119 | taskActor ! Connected 120 | unstashAll() 121 | context.become(connected(driver)) 122 | } 123 | 124 | def disconnected: Receive = registationHandler orElse { 125 | case Error(er) => 126 | throw new RuntimeException("Framework error: s{er}") 127 | case _ => 128 | stash() 129 | } 130 | 131 | def connected(driver: SchedulerDriver): Receive = registationHandler orElse { 132 | case newDriver: SchedulerDriver => 133 | context.become(connected(driver)) 134 | 135 | case Disconnected => 136 | pendingOffers.clear() 137 | context.become(disconnected) 138 | 139 | case statusUpdate: StatusUpdate => 140 | taskActor ! statusUpdate 141 | 142 | case o @ ResourceOffers(offers) => 143 | log.debug("received {} offers from mesos. Forwarding to TaskActor", offers.length) 144 | offers.foreach { offer => 145 | pendingOffers(offer.getId) = context.system.scheduler.scheduleOnce(options.offerTimeout) { 146 | log.debug(s"Timing out offer {}", offer.getId) 147 | self ! DeclineOffer(offer.getId, Some(0.seconds)) 148 | }(context.dispatcher) 149 | } 150 | taskActor ! o 151 | 152 | case d: DeadLetter => 153 | d match { 154 | case DeadLetter(resourceOffers: ResourceOffers, self, taskActor) => 155 | log.info("offer was not received. Declining") 156 | resourceOffers.offers.foreach { offer => 157 | self ! DeclineOffer(offer.getId, Some(30.seconds)) 158 | } 159 | case _ => 160 | } 161 | 162 | case cmd: Command => 163 | cmd match { 164 | case ReviveOffers => 165 | driver.reviveOffers() 166 | 167 | /* commands */ 168 | case DeclineOffer(offerId, refuseFor) => 169 | processingOffer(offerId) { 170 | log.debug(s"Decline offer {}", offerId) 171 | driver.declineOffer( 172 | offerId, 173 | ProtoHelpers.newFilters(refuseFor)) 174 | } 175 | case AcceptOffer(offerId, operations, refuseFor) => 176 | processingOffer(offerId) { 177 | if(log.isDebugEnabled) 178 | log.debug(s"Operations on ${offerId.getValue}:\n${operations.mkString("\n")}") 179 | driver.acceptOffers( 180 | Collections.singleton(offerId), 181 | operations.asJavaCollection, 182 | ProtoHelpers.newFilters(refuseFor.orElse(Some(0.seconds)))) 183 | } 184 | 185 | case Reconcile(tasks) => 186 | val status = driver.reconcileTasks(tasks.asJava) 187 | log.info("beginning reconciliation; status is {}", status) 188 | case KillTask(taskId) => 189 | driver.killTask(taskId) 190 | } 191 | } 192 | 193 | 194 | def processingOffer(offerId: OfferID)(body: => Unit): Unit = { 195 | pendingOffers.get(offerId).map { timer => 196 | body 197 | timer.cancel() 198 | pendingOffers.remove(offerId) 199 | true 200 | } 201 | } 202 | } 203 | 204 | object FrameworkActor { 205 | sealed trait ConnectEvent 206 | /* Mesos updates */ 207 | case class Registered(driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo) extends ConnectEvent 208 | case class Reregistered(driver: SchedulerDriver, masterInfo: MasterInfo) extends ConnectEvent 209 | case object Connected 210 | case object Disconnected 211 | case class ResourceOffers(offers: List[Offer]) 212 | case class OfferRescinded(offerId: OfferID) 213 | case class StatusUpdate(status: TaskStatus) 214 | case class FrameworkMessage(executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte]) 215 | case class SlaveLost(slaveId: SlaveID) 216 | case class ExecutorLost(executorId: ExecutorID, slaveId: SlaveID, status: Int) 217 | case class Error(message: String) 218 | 219 | /* Mesos commands */ 220 | sealed trait Command 221 | sealed trait OfferResponseCommand extends Command { 222 | def offerId: OfferID 223 | } 224 | 225 | case class DeclineOffer(offerId: OfferID, refuseFor: Option[FiniteDuration] = None) extends OfferResponseCommand 226 | case class AcceptOffer(offerId: OfferID, operations: Seq[Offer.Operation] = Nil, 227 | refuseFor: Option[FiniteDuration] = None) extends OfferResponseCommand 228 | case class Reconcile(tasks: List[TaskStatus]) extends Command 229 | case class KillTask(taskId: TaskID) extends Command 230 | case object ReviveOffers extends Command 231 | } 232 | 233 | 234 | class FrameworkActorScheduler(implicit context: ActorContext) 235 | extends Scheduler { 236 | val log = LoggerFactory.getLogger(getClass) 237 | import FrameworkActor._ 238 | override def registered(driver: SchedulerDriver, frameworkId: FrameworkID, masterInfo: MasterInfo): Unit = { 239 | log.info("framework registered; frameworkId = {}, masterInfo = {}", frameworkId: Any, masterInfo : Any) 240 | context.self ! Registered(driver, frameworkId, masterInfo) 241 | log.info("context.self = {}", context.self) 242 | } 243 | 244 | override def reregistered(driver: SchedulerDriver, masterInfo: MasterInfo): Unit = { 245 | context.self ! Reregistered(driver, masterInfo) 246 | } 247 | 248 | override def disconnected(driver: SchedulerDriver): Unit = { 249 | context.self ! Disconnected 250 | } 251 | 252 | override def resourceOffers(driver: SchedulerDriver, offers: java.util.List[Offer]): Unit = { 253 | import scala.collection.JavaConversions._ 254 | context.self ! driver 255 | context.self ! ResourceOffers(offers.toList) 256 | } 257 | 258 | override def offerRescinded(driver: SchedulerDriver, offerId: OfferID): Unit = { 259 | context.self ! driver 260 | context.self ! OfferRescinded(offerId) 261 | } 262 | 263 | override def statusUpdate(driver: SchedulerDriver, status: TaskStatus): Unit = { 264 | context.self ! driver 265 | context.self ! StatusUpdate(status) 266 | } 267 | 268 | override def frameworkMessage(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, data: Array[Byte]): 269 | Unit = { 270 | context.self ! driver 271 | context.self ! FrameworkMessage(executorId, slaveId, data) 272 | } 273 | 274 | override def slaveLost(driver: SchedulerDriver, slaveId: SlaveID): Unit = { 275 | context.self ! driver 276 | context.self ! SlaveLost(slaveId) 277 | } 278 | 279 | override def executorLost(driver: SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, status: Int): Unit = { 280 | context.self ! driver 281 | context.self ! ExecutorLost(executorId, slaveId, status) 282 | } 283 | 284 | def error(driver: SchedulerDriver, message: String): Unit = { 285 | log.error(s"Error: {}", message) 286 | context.self ! Error(message) 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/FrameworkIdStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.kvstore.KVStore 4 | import scala.concurrent.{ ExecutionContext, Future, Promise } 5 | import org.apache.mesos.Protos 6 | import scaldi.Injector 7 | import scaldi.Injectable._ 8 | 9 | case class FrameworkIdStore(implicit injector: Injector) { 10 | private val kvStore = inject[KVStore] 11 | private val ksPath = "frameworkId" 12 | 13 | private val getP = Promise[Protos.FrameworkID] 14 | val get = getP.future 15 | 16 | import ExecutionContext.Implicits.global 17 | 18 | val initial = kvStore.get(ksPath).map { _.map { bytes => 19 | Protos.FrameworkID.newBuilder().setValue(new String(bytes, "UTF-8")).build 20 | }} 21 | 22 | initial.foreach { 23 | case Some(fId) => getP.trySuccess(fId) 24 | case None => () 25 | } 26 | 27 | 28 | /** 29 | * Synchronously updates the value. Throws on error. 30 | */ 31 | def set(value: Protos.FrameworkID): Future[Unit] = { 32 | val bytes = value.getValue.getBytes("UTF-8") 33 | kvStore.createOrSet(ksPath, bytes).andThen { case _ => 34 | getP.trySuccess(value) 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/JobFSM.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.Cancellable 4 | import akka.event.LoggingAdapter 5 | import com.vivint.ceph.model.{ RunState, ServiceLocation, JobRole } 6 | import java.util.UUID 7 | import org.apache.mesos.Protos 8 | import mesosphere.mesos.matcher.ResourceMatcher 9 | import com.vivint.ceph.model.{PersistentState,Job} 10 | import JobFSM._ 11 | import scala.concurrent.duration._ 12 | import scala.collection.immutable.Iterable 13 | 14 | object JobFSM { 15 | sealed trait Event 16 | /** 17 | * external event caused the task state to change 18 | */ 19 | case class JobUpdated(prior: Job) extends Event { 20 | def taskStatusChanged(current: Job): Boolean = 21 | prior.taskStatus != current.taskStatus 22 | } 23 | case class MatchedOffer(offer: PendingOffer, matchResult: Option[ResourceMatcher.ResourceMatch]) extends Event 24 | case class Timer(id: Any) extends Event 25 | 26 | } 27 | 28 | class JobFSM(jobs: JobsState, log: LoggingAdapter, behaviorSet: BehaviorSet, 29 | setTimer: (Job, String, FiniteDuration) => Cancellable, 30 | revive: () => Unit, 31 | killTask: (String => Unit) 32 | ) { 33 | // TODO - for each task 34 | type JobId = UUID 35 | type TimerName = String 36 | import scala.collection.mutable 37 | private val jobTimers = mutable.Map.empty[JobId, mutable.Map[TimerName, Cancellable]] 38 | 39 | jobs.addSubscriber { 40 | case (Some(before), Some(after)) => 41 | handleEvent(after, JobFSM.JobUpdated(before)) 42 | } 43 | 44 | private def setBehaviorTimer(job: Job, timerName: TimerName, duration: FiniteDuration): Unit = { 45 | val timers = jobTimers.get(job.id).getOrElse { 46 | jobTimers(job.id) = mutable.Map.empty 47 | jobTimers(job.id) 48 | } 49 | 50 | timers.get(timerName).foreach(_.cancel()) 51 | timers(timerName) = setTimer(job, timerName, duration) 52 | } 53 | 54 | private def clearTimers(job: Job): Unit = 55 | jobTimers. 56 | remove(job.id). 57 | getOrElse(mutable.Map.empty). 58 | foreach { case (id, cancellable) => 59 | cancellable.cancel() 60 | } 61 | 62 | def onTimer(jobId: JobId, timerName: TimerName, behavior: Behavior): Unit = 63 | for { 64 | cancellable <- jobTimers(jobId).remove(timerName) 65 | job <- jobs.get(jobId) 66 | if behavior == job.behavior 67 | } { 68 | cancellable.cancel() // just in case it was manually invoked? 69 | log.debug("Timer {} for jobId {} fired", timerName, jobId) 70 | handleEvent(job, JobFSM.Timer(timerName)) 71 | } 72 | 73 | 74 | private def processEvents(job: Job, events: List[JobFSM.Event]): Job = events match { 75 | case event :: rest => 76 | log.debug("{} - sending event {}", job.id, event.getClass.getName) 77 | 78 | processEvents( 79 | processDirective( 80 | job, 81 | job.behavior.handleEvent(event, job, jobs.all)), 82 | rest) 83 | case Nil => 84 | job 85 | } 86 | 87 | private def processHeldEvents(job: Job): Job = { 88 | job.heldOffer match { 89 | case Some((offer, resourceMatch)) => 90 | processEvents( 91 | job.copy(heldOffer = None), 92 | JobFSM.MatchedOffer(offer, resourceMatch) :: Nil) 93 | case None => 94 | job 95 | } 96 | } 97 | 98 | private def processAction(job: Job, action: Directives.Action): Job = { 99 | log.debug("{} - processing directive response action {}", job.id, action.getClass.getName) 100 | action match { 101 | case Directives.Hold(offer, resourceMatch) => 102 | // Decline existing held offer 103 | job.heldOffer.foreach { 104 | case (pending, _) => pending.decline() 105 | } 106 | job.copy(heldOffer = Some((offer, resourceMatch))) 107 | case Directives.Persist(data) => 108 | job.copy( 109 | pState = data, 110 | taskState = if (job.taskId != data.taskId) None else job.taskState 111 | ) 112 | case Directives.SetBehaviorTimer(name, duration: FiniteDuration) => 113 | setBehaviorTimer(job, name, duration) 114 | job 115 | case Directives.Revive => 116 | revive() 117 | job 118 | case Directives.WantOffers => 119 | if (job.heldOffer.isEmpty) { 120 | revive() 121 | job.copy(wantingNewOffer = true) 122 | } else { 123 | job 124 | } 125 | case Directives.KillTask => 126 | job.taskId.foreach(killTask) 127 | job 128 | case Directives.OfferResponse(pendingOffer, operations) => 129 | pendingOffer.respond(operations.toList) 130 | job 131 | } 132 | } 133 | 134 | final def handleEvent(job: Job, event: JobFSM.Event): Unit = { 135 | jobs.updateJob( 136 | processEvents(job, List(event))) 137 | } 138 | 139 | final def initialize(job: Job): Unit = { 140 | jobs.updateJob( 141 | initializeBehavior(job)) 142 | } 143 | 144 | private final def processDirective(job: Job, directive: Directives.Directive): Job = { 145 | val jobAfterAction = directive.action.foldLeft(job)(processAction) 146 | 147 | directive.transition match { 148 | case Some(nextBehavior) => 149 | clearTimers(job) 150 | log.info("job {}: Transition {} -> {}", job.id, job.behavior.name, nextBehavior.name) 151 | processHeldEvents( 152 | initializeBehavior(jobAfterAction.copy(behavior = nextBehavior))) 153 | 154 | case None => 155 | jobAfterAction 156 | } 157 | } 158 | 159 | private final def initializeBehavior(job: Job): Job = { 160 | log.info("job {}: Initializing behavior {}", job.id, job.behavior.name) 161 | val maybeRemoveHeldOffer = 162 | if (job.heldOffer.map(_._1.responded).contains(true)) 163 | job.copy(heldOffer = None) 164 | else 165 | job 166 | processDirective(maybeRemoveHeldOffer, 167 | job.behavior.preStart(maybeRemoveHeldOffer, jobs.all)) 168 | } 169 | 170 | def defaultBehavior(role: JobRole.EnumVal): Behavior = 171 | behaviorSet.defaultBehavior(role) 172 | } 173 | 174 | object Directives { 175 | sealed trait Action { 176 | def withTransition(b: Behavior): Directive = 177 | Directive(List(this), Some(b)) 178 | def andAlso(other: Action): ActionList = 179 | ActionList(this :: other :: Nil) 180 | def andAlso(other: Directive): Directive = 181 | other.copy(action = this :: other.action) 182 | } 183 | 184 | case class ActionList(actions: List[Action]) { 185 | def withTransition(b: Behavior): Directive = 186 | Directive(actions, Some(b)) 187 | 188 | def andAlso(other: Action): ActionList = 189 | ActionList(actions :+ other) 190 | } 191 | 192 | /** Update the persistent state for a job. State is stored asynchronously and success can be tracked by version and 193 | * persistentVersion job fields. 194 | * If changing taskId then TaskState is assumed unknown (None) 195 | */ 196 | case class Persist(data: PersistentState) extends Action 197 | case object KillTask extends Action 198 | case class Hold(offer: PendingOffer, resourceMatch: Option[ResourceMatcher.ResourceMatch]) extends Action 199 | case object WantOffers extends Action 200 | case object Revive extends Action 201 | case class OfferResponse(offer: PendingOffer, operations: Iterable[Protos.Offer.Operation]) extends Action 202 | case class SetBehaviorTimer(id: String, duration: FiniteDuration) extends Action 203 | case class Directive(action: List[Action] = Nil, transition: Option[Behavior] = None) 204 | val Stay = Directive() 205 | final def Transition(behavior: Behavior) = Directive(Nil, Some(behavior)) 206 | 207 | object Directive { 208 | import scala.language.implicitConversions 209 | implicit def fromAction(action: Action): Directive = { 210 | Directive(List(action), None) 211 | } 212 | implicit def fromActionList(actionList: ActionList): Directive = { 213 | Directive(actionList.actions, None) 214 | } 215 | } 216 | 217 | } 218 | 219 | trait Behavior { 220 | import Directives._ 221 | lazy val name = getClass.getSimpleName.replace("$", "") 222 | 223 | @deprecated("use preStart", "now") 224 | final def initialize(state: Job, fullState: Map[UUID, Job]): Directive = 225 | preStart(state, fullState) 226 | 227 | @deprecated("use handleEvent", "now") 228 | final def submit(event: Event, state: Job, fullState: Map[UUID, Job]): Directive = 229 | handleEvent(event, state, fullState) 230 | 231 | /** 232 | * Method provides an opportunity to set the next step 233 | */ 234 | def preStart(state: Job, fullState: Map[UUID, Job]): Directive = Stay 235 | def handleEvent(event: Event, state: Job, fullState: Map[UUID, Job]): Directive 236 | 237 | protected def handleWith(event: Event)(handler: PartialFunction[JobFSM.Event, Directive]): Directive = { 238 | if (handler.isDefinedAt(event)) 239 | handler(event) 240 | else 241 | Stay 242 | } 243 | } 244 | 245 | object Behavior { 246 | type DecideFunction = (Job, Map[UUID, Job]) => Directives.Directive 247 | type TransitionFunction = (Job, Map[UUID, Job]) => Behavior 248 | } 249 | 250 | trait BehaviorSet { 251 | def defaultBehavior(role: model.JobRole.EnumVal): Behavior 252 | } 253 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/JobStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.kvstore.KVStore 4 | import scala.collection.immutable.Seq 5 | import scala.concurrent.{ ExecutionContext, Future } 6 | import scala.async.Async.{async, await} 7 | import play.api.libs.json._ 8 | import java.nio.charset.StandardCharsets.UTF_8 9 | 10 | case class JobStore(kvStore: KVStore) { 11 | private val tasksPath = "tasks" 12 | import ExecutionContext.Implicits.global 13 | import model._ 14 | import PlayJsonFormats._ 15 | 16 | def getTasks: Future[Seq[PersistentState]] = async { 17 | val paths = await(kvStore.children(tasksPath)). 18 | map { path => 19 | (tasksPath + "/" + path) 20 | } 21 | 22 | await(kvStore.getAll(paths)). 23 | zip(paths). 24 | map { 25 | case (Some(bytes), _) => 26 | (Json.parse(bytes).as[PersistentState]) 27 | case (None, path) => 28 | throw new RuntimeException(s"Error: empty task state at path ${path}") 29 | } 30 | } 31 | 32 | def save(task: PersistentState): Future[Unit] = { 33 | val data = Json.toJson(task).toString 34 | kvStore.createOrSet(s"tasks/${task.role}:" + task.id.toString, data.getBytes(UTF_8)) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/JobsState.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.event.LoggingAdapter 4 | import model.Job 5 | import JobsState._ 6 | import java.util.UUID 7 | 8 | object JobsState { 9 | type Subscriber = PartialFunction[(Option[Job], Option[Job]), Unit] 10 | } 11 | 12 | class JobsState(log: LoggingAdapter) { 13 | private [this] var _jobs: Map[UUID, Job] = Map.empty 14 | private [this] var subscribers = List.empty[Subscriber] 15 | def all = _jobs 16 | def values = _jobs.values 17 | def addSubscriber(subscriber: Subscriber): Unit = { 18 | subscribers = subscriber :: subscribers 19 | } 20 | 21 | def get(id: UUID): Option[Job] = _jobs.get(id) 22 | 23 | def apply(id: UUID): Job = _jobs(id) 24 | 25 | def contains(id: UUID): Boolean = _jobs contains id 26 | 27 | def getByTaskId(taskId: String) = _jobs.values.find(_.taskId.contains(taskId)) 28 | 29 | def containsTaskId(taskId: String) = _jobs.values.exists(_.taskId.contains(taskId)) 30 | 31 | def getByReservationId(reservationId: UUID): Option[Job] = 32 | _jobs.values.find(_.reservationId.contains(reservationId)) 33 | 34 | def containsReservationId(reservationId: UUID) = 35 | _jobs.values.exists(_.reservationId.contains(reservationId)) 36 | 37 | /** Given an updated task status, increments persistent state version if it has been changed. Calls all registered 38 | * subscribers. 39 | */ 40 | def updateJob(update: Job): Job = { 41 | val prior = _jobs.get(update.id) 42 | if (prior.contains(update)) 43 | return update 44 | 45 | if (update.purged) { 46 | log.debug("task purged: {}", model.PlayJsonFormats.JobWriter.writes(update)) 47 | _jobs = _jobs - update.id 48 | callSubscribers((prior, None)) 49 | update 50 | } else { 51 | val nextTask = 52 | if (_jobs.get(update.id).map(_.pState) != Some(update.pState)) { 53 | val nextVersion = update.version + 1 54 | update.copy( 55 | version = nextVersion) 56 | } else { 57 | update 58 | } 59 | if (log.isDebugEnabled) 60 | log.debug("task updated: {}", model.PlayJsonFormats.JobWriter.writes(nextTask)) 61 | 62 | _jobs = _jobs.updated(update.id, nextTask) 63 | callSubscribers((prior, Some(nextTask))) 64 | 65 | nextTask 66 | } 67 | } 68 | 69 | 70 | private def callSubscribers(event: (Option[Job], Option[Job])): Unit = 71 | subscribers.foreach { subscriber => 72 | if(subscriber.isDefinedAt(event)) 73 | subscriber(event) 74 | } 75 | 76 | def updatePersistence(id: UUID, version: Long) = { 77 | _jobs.get(id) foreach { task => 78 | updateJob( 79 | task.copy(persistentVersion = Math.max(task.persistentVersion, version))) 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/Main.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.{ ActorRef, ActorSystem, Props } 4 | import akka.pattern.BackoffSupervisor 5 | import org.apache.mesos.Protos._ 6 | import scala.concurrent.ExecutionContext 7 | import scala.concurrent.duration._ 8 | import scala.concurrent.Await 9 | import com.vivint.ceph.kvstore.KVStore 10 | import scaldi.Module 11 | 12 | trait FrameworkModule extends Module { 13 | 14 | bind [FrameworkInfo] to { 15 | val config = inject[AppConfiguration] 16 | val kvStore = inject[KVStore] 17 | 18 | val frameworkBuilder = FrameworkInfo.newBuilder(). 19 | setUser(""). 20 | setName(config.name). 21 | setCheckpoint(true). 22 | setRole(config.role). 23 | setPrincipal(config.principal). 24 | setCheckpoint(true). 25 | setFailoverTimeout(config.failoverTimeout.toDouble) 26 | 27 | frameworkBuilder.build() 28 | } 29 | } 30 | 31 | class Universe(config: AppConfiguration) extends FrameworkModule with Module { 32 | implicit val system = ActorSystem("ceph-on-mesos") 33 | 34 | bind [() => java.time.ZonedDateTime] identifiedBy 'now to { () => java.time.ZonedDateTime.now() } 35 | bind [AppConfiguration] to config 36 | 37 | bind [KVStore] to { 38 | config.storageBackend match { 39 | case "zookeeper" => 40 | new kvstore.ZookeeperStore 41 | case "file" => 42 | new kvstore.FileStore(new java.io.File("data")) 43 | case "memory" => 44 | new kvstore.MemStore 45 | } 46 | } 47 | 48 | bind [FrameworkIdStore] to (new FrameworkIdStore) 49 | bind [ActorSystem] to system destroyWith { _ => 50 | try { 51 | System.err.println("Shutting down actorSystem") 52 | Await.result(system.terminate(), 10.seconds) 53 | System.err.println("Actor system shut down") 54 | } catch { 55 | case ex: Throwable => 56 | System.err.println(s"Unable to shutdown actor system within timeout: ${ex.getMessage}") 57 | ex.printStackTrace(System.err) 58 | } 59 | } 60 | 61 | bind [views.ConfigTemplates] to new views.ConfigTemplates 62 | bind [OfferOperations] to new OfferOperations 63 | bind [Option[Credential]] to { 64 | config.secret.map { secret => 65 | Credential.newBuilder(). 66 | setPrincipal(config.principal). 67 | setSecret(secret). 68 | build() 69 | } 70 | } 71 | 72 | bind [ActorRef] identifiedBy (classOf[ReservationReaperActor]) to { 73 | system.actorOf( 74 | BackoffSupervisor.props(childProps = Props(new ReservationReaperActor), 75 | childName = "release-actor", 76 | minBackoff = 1.second, 77 | maxBackoff = 10.seconds, 78 | randomFactor = 0.2), 79 | "release-actor-backoff" 80 | ) 81 | } 82 | 83 | bind [ActorRef] identifiedBy (classOf[TaskActor]) to { 84 | system.actorOf( 85 | BackoffSupervisor.props(childProps = Props(new TaskActor), 86 | childName = "task-actor", 87 | minBackoff = 1.second, 88 | maxBackoff = 10.seconds, 89 | randomFactor = 0.2), 90 | "task-actor-backoff" 91 | ) 92 | } 93 | 94 | bind [ActorRef] identifiedBy (classOf[FrameworkActor]) to { 95 | system.actorOf(Props(new FrameworkActor), "framework-actor") 96 | } 97 | bind [api.HttpService] to { new api.HttpService } 98 | } 99 | 100 | object Main extends App { 101 | val cmdLineOpts = new CephFrameworkOptions(args.toList) 102 | cmdLineOpts.verify() 103 | val config = AppConfiguration.fromOpts(cmdLineOpts) 104 | 105 | val module = new Universe(config) 106 | import module.injector 107 | import scaldi.Injectable._ 108 | 109 | implicit val actorSystem = inject[ActorSystem] 110 | 111 | val taskActor = inject[ActorRef](classOf[TaskActor]) 112 | val frameworkActor = inject[ActorRef](classOf[FrameworkActor]) 113 | val httpService = inject[api.HttpService] 114 | 115 | def dieWith(ex: Throwable): Unit = { 116 | System.err.println(s"Error starting API service: ${ex.getMessage}") 117 | ex.printStackTrace(System.err) 118 | try { module.destroy(_ => true) } 119 | catch { case ex: Throwable => println("le problem") } 120 | System.exit(1) 121 | } 122 | 123 | httpService.run().onFailure { 124 | case ex: Throwable => 125 | dieWith(ex) 126 | }(ExecutionContext.global) 127 | } 128 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/OfferMatchFactory.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import java.util.UUID 4 | import mesosphere.marathon.Protos.Constraint 5 | import mesosphere.marathon.Protos.Constraint.Operator 6 | import mesosphere.marathon.state.{ PersistentVolume, PersistentVolumeInfo, DiskType } 7 | import mesosphere.mesos.matcher.{ DiskResourceMatcher, ResourceMatcher, ScalarMatchResult, ScalarResourceMatcher } 8 | import mesosphere.mesos.protos.Resource.{CPUS, MEM, DISK, PORTS} 9 | import org.apache.mesos.Protos 10 | import com.vivint.ceph.model.{ CephConfig, Job, JobRole } 11 | import OfferMatchFactory.{OfferMatcher, getPeers, peersAssignedToSlave, newPathConstraints} 12 | import scaldi.Injector 13 | import scaldi.Injectable._ 14 | 15 | object OfferMatchFactory { 16 | type OfferMatcher = (Protos.Offer, Job, Iterable[Job]) => Option[ResourceMatcher.ResourceMatch] 17 | def getPeers(task: Job, allTasks: Iterable[Job]): Stream[Job] = { 18 | allTasks.toStream.filter { other => 19 | other.id != task.id && other.role == task.role 20 | } 21 | } 22 | 23 | def peersAssignedToSlave(slaveId: Protos.SlaveID, task: Job, allTasks: Iterable[Job]): Int = { 24 | val peers = getPeers(task, allTasks) 25 | val offerSlaveId = slaveId.getValue 26 | peers.map(_.pState.slaveId).collect { 27 | case Some(peerSlaveId) if peerSlaveId == offerSlaveId => 1 28 | }.length 29 | } 30 | 31 | def newPathConstraints(matcherOpt: Option[String]): Set[Constraint] = matcherOpt match { 32 | case Some(matcher) => 33 | Set( 34 | Constraint.newBuilder. 35 | setField("path"). 36 | setOperator(Operator.LIKE). 37 | setValue(matcher). 38 | build) 39 | case None => 40 | Set.empty 41 | } 42 | } 43 | 44 | trait OfferMatchFactory extends (CephConfig => Map[JobRole.EnumVal, OfferMatcher]) { 45 | } 46 | 47 | class RGWOfferMatcher(cephConfig: CephConfig, frameworkRole: String) extends OfferMatcher { 48 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 49 | 50 | val resourceMatchers = { 51 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 52 | val rgwConfig = cephConfig.deployment.rgw 53 | 54 | val portMatcher = if (rgwConfig.port.isEmpty) 55 | Some(new lib.SinglePortMatcher(selector)) 56 | else 57 | None 58 | 59 | List( 60 | new ScalarResourceMatcher( 61 | CPUS, cephConfig.deployment.rgw.cpus, selector, ScalarMatchResult.Scope.NoneDisk), 62 | new ScalarResourceMatcher( 63 | MEM, cephConfig.deployment.rgw.mem, selector, ScalarMatchResult.Scope.NoneDisk) 64 | ) ++ portMatcher 65 | } 66 | 67 | def apply(offer: Protos.Offer, task: Job, allTasks: Iterable[Job]): Option[ResourceMatcher.ResourceMatch] = { 68 | val count = peersAssignedToSlave(offer.getSlaveId, task, allTasks) 69 | if (count < cephConfig.deployment.rgw.max_per_host) { 70 | ResourceMatcher.matchResources(offer, resourceMatchers, selector) 71 | } else { 72 | None 73 | } 74 | } 75 | } 76 | 77 | class OSDOfferMatcher(cephConfig: CephConfig, frameworkRole: String) extends OfferMatcher { 78 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 79 | 80 | val resourceMatchers = { 81 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 82 | val osdConfig = cephConfig.deployment.osd 83 | 84 | val volume = PersistentVolume.apply( 85 | "state", 86 | PersistentVolumeInfo( 87 | osdConfig.disk, 88 | constraints = newPathConstraints(cephConfig.deployment.osd.path_constraint), 89 | `maxSize` = osdConfig.disk_max.filter(_ => osdConfig.disk_type == DiskType.Mount), 90 | `type` = cephConfig.deployment.osd.disk_type), 91 | Protos.Volume.Mode.RW) 92 | 93 | List( 94 | new ScalarResourceMatcher( 95 | CPUS, cephConfig.deployment.osd.cpus, selector, ScalarMatchResult.Scope.NoneDisk), 96 | new ScalarResourceMatcher( 97 | MEM, cephConfig.deployment.osd.mem, selector, ScalarMatchResult.Scope.NoneDisk), 98 | new DiskResourceMatcher( 99 | selector, 0.0, List(volume), ScalarMatchResult.Scope.IncludingLocalVolumes), 100 | new lib.ContiguousPortMatcher(5, 101 | selector)) 102 | } 103 | 104 | def apply(offer: Protos.Offer, task: Job, allTasks: Iterable[Job]): Option[ResourceMatcher.ResourceMatch] = { 105 | val count = peersAssignedToSlave(offer.getSlaveId, task, allTasks) 106 | if (count < cephConfig.deployment.osd.max_per_host) { 107 | ResourceMatcher.matchResources(offer, resourceMatchers, selector) 108 | } else { 109 | None 110 | } 111 | } 112 | } 113 | 114 | class MonOfferMatcher(cephConfig: CephConfig, frameworkRole: String) extends OfferMatcher { 115 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 116 | 117 | val resourceMatchers = { 118 | val selector = ResourceMatcher.ResourceSelector.any(Set("*", frameworkRole)) 119 | 120 | val volume = PersistentVolume.apply( 121 | "state", 122 | PersistentVolumeInfo( 123 | cephConfig.deployment.mon.disk, 124 | constraints = newPathConstraints(cephConfig.deployment.mon.path_constraint), 125 | `type` = cephConfig.deployment.mon.disk_type), 126 | Protos.Volume.Mode.RW) 127 | 128 | // TODO - if matching reserved resources set matchers appropriately 129 | List( 130 | new ScalarResourceMatcher( 131 | CPUS, cephConfig.deployment.mon.cpus, selector, ScalarMatchResult.Scope.NoneDisk), 132 | new ScalarResourceMatcher( 133 | MEM, cephConfig.deployment.mon.mem, selector, ScalarMatchResult.Scope.NoneDisk), 134 | new DiskResourceMatcher( 135 | selector, 0.0, List(volume), ScalarMatchResult.Scope.IncludingLocalVolumes), 136 | cephConfig.deployment.mon.port match { 137 | case Some(port) => 138 | new lib.SpecificPortMatcher(port, selector) 139 | case None => 140 | new lib.SinglePortMatcher(selector) 141 | }) 142 | } 143 | 144 | def apply(offer: Protos.Offer, task: Job, allTasks: Iterable[Job]): Option[ResourceMatcher.ResourceMatch] = { 145 | val count = peersAssignedToSlave(offer.getSlaveId, task, allTasks) 146 | if (count < cephConfig.deployment.mon.max_per_host) { 147 | ResourceMatcher.matchResources(offer, resourceMatchers, selector) 148 | } else { 149 | None 150 | } 151 | } 152 | } 153 | 154 | class MasterOfferMatchFactory(implicit inj: Injector) extends OfferMatchFactory { 155 | val config = inject[AppConfiguration] 156 | 157 | def apply(cephConfig: CephConfig): Map[JobRole.EnumVal, OfferMatcher] = { 158 | Map( 159 | JobRole.Monitor -> (new MonOfferMatcher(cephConfig, frameworkRole = config.role)), 160 | JobRole.OSD -> (new OSDOfferMatcher(cephConfig, frameworkRole = config.role)), 161 | JobRole.RGW -> (new RGWOfferMatcher(cephConfig, frameworkRole = config.role)) 162 | ) 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/OfferOperations.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.{ Actor, ActorContext, ActorLogging, ActorRef, Cancellable, FSM, Kill, Props, Stash } 4 | import akka.pattern.pipe 5 | import akka.stream.ActorMaterializer 6 | import akka.stream.scaladsl.{ Flow, Keep, Sink } 7 | import java.util.UUID 8 | import java.util.concurrent.TimeoutException 9 | import lib.FutureHelpers.tSequence 10 | import mesosphere.marathon.state.{ PersistentVolume, PersistentVolumeInfo, DiskSource } 11 | import mesosphere.mesos.matcher._ 12 | import mesosphere.mesos.protos 13 | import org.apache.mesos.Protos._ 14 | import scala.collection.immutable.Seq 15 | import scala.concurrent.Future 16 | import mesosphere.mesos.protos.Resource.DISK 17 | import scaldi.Injector 18 | import scaldi.Injectable._ 19 | 20 | class OfferOperations(implicit inj: Injector) { 21 | private val config = inject[AppConfiguration] 22 | import ProtoHelpers._ 23 | /** Destroy the disks associated with this offer 24 | */ 25 | def unreserveOffer(resources: Iterable[Resource]): Seq[Offer.Operation] = { 26 | val volumesToDestroy = resources. 27 | filter { r => r.hasDisk && r.getDisk.hasPersistence } 28 | 29 | val offersToRelease = resources. 30 | filter(_.hasReservation) 31 | 32 | Seq( 33 | newOfferOperation(newDestroyOperation(volumesToDestroy)), 34 | newOfferOperation(newUnreserveOperation(resources))) 35 | } 36 | 37 | def volId(reservationId: UUID, containerPath: String): String = 38 | reservationId + "#" + containerPath 39 | 40 | /** Adapated from Marathon code. 41 | * TODO - put appropriate copyright information 42 | */ 43 | def createVolumes( 44 | frameworkId: FrameworkID, 45 | jobId: UUID, 46 | reservationId: UUID, 47 | localVolumes: Iterable[(DiskSource, PersistentVolume)]): Offer.Operation = { 48 | import scala.collection.JavaConverters._ 49 | 50 | val volumes: Iterable[Resource] = localVolumes.map { 51 | case (source, vol) => 52 | val disk = { 53 | val persistence = Resource.DiskInfo.Persistence.newBuilder(). 54 | setId(volId(reservationId = reservationId, containerPath = vol.containerPath)). 55 | setPrincipal(config.principal) 56 | 57 | val volume = Volume.newBuilder. 58 | setContainerPath(vol.containerPath). 59 | setMode(vol.mode) 60 | 61 | val builder = Resource.DiskInfo.newBuilder. 62 | setPersistence(persistence). 63 | setVolume(volume) 64 | source.asMesos.foreach(builder.setSource) 65 | builder 66 | } 67 | 68 | val reservation = Resource.ReservationInfo.newBuilder. 69 | setLabels(newLabels( 70 | Constants.FrameworkIdLabel -> frameworkId.getValue, 71 | Constants.ReservationIdLabel -> reservationId.toString, 72 | Constants.JobIdLabel -> jobId.toString)). 73 | setPrincipal(config.principal) 74 | 75 | Resource.newBuilder. 76 | setName(DISK). 77 | setType(Value.Type.SCALAR). 78 | setScalar(newScalar(vol.persistent.size.toDouble)). 79 | setRole(config.role). 80 | setReservation(reservation). 81 | setDisk(disk). 82 | build() 83 | } 84 | 85 | val create = Offer.Operation.Create.newBuilder(). 86 | addAllVolumes(volumes.asJava) 87 | 88 | newOfferOperation(create.build) 89 | } 90 | 91 | /** Adapated from Marathon code. 92 | * TODO - put appropriate copyright information */ 93 | def reserve(frameworkId: FrameworkID, jobId: UUID, reservationId: UUID, resources: Iterable[Resource]): 94 | Offer.Operation = { 95 | import scala.collection.JavaConverters._ 96 | val reservedResources = resources.map { resource => 97 | 98 | val reservation = Resource.ReservationInfo.newBuilder(). 99 | setLabels(newLabels( 100 | Constants.FrameworkIdLabel -> frameworkId.getValue, 101 | Constants.ReservationIdLabel -> reservationId.toString, 102 | Constants.JobIdLabel -> jobId.toString)). 103 | setPrincipal(config.principal) 104 | 105 | Resource.newBuilder(resource). 106 | setRole(config.role). 107 | setReservation(reservation). 108 | build() 109 | } 110 | 111 | val reserve = Offer.Operation.Reserve.newBuilder(). 112 | addAllResources(reservedResources.asJava). 113 | build() 114 | 115 | newOfferOperation(reserve) 116 | } 117 | 118 | def reserveAndCreateVolumes( 119 | frameworkId: FrameworkID, 120 | jobId: UUID, 121 | reservationId: UUID, 122 | resourceMatch: ResourceMatcher.ResourceMatch): List[Offer.Operation] = { 123 | 124 | val localVolumes = resourceMatch.matches. 125 | collect { case r: DiskResourceMatch => 126 | r.consumed 127 | }. 128 | flatten. 129 | collect { 130 | case DiskResourceMatch.Consumption(_, _, _, source, Some(volume)) => 131 | (source, volume) 132 | }. 133 | toList. 134 | distinct 135 | 136 | List( 137 | reserve( 138 | frameworkId, 139 | jobId = jobId, 140 | reservationId = reservationId, 141 | resources = resourceMatch.resources), 142 | createVolumes( 143 | frameworkId, 144 | jobId = jobId, 145 | reservationId = reservationId, 146 | localVolumes = localVolumes)) 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/PendingOffer.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import org.apache.mesos.Protos.Offer 4 | import scala.concurrent.Promise 5 | import scala.collection.immutable.Seq 6 | 7 | case class PendingOffer(offer: Offer) { 8 | private val resultingOperationsPromise = Promise[Seq[Offer.Operation]] 9 | def decline(): Boolean = respond(Nil) 10 | def respond(ops: Seq[Offer.Operation]): Boolean = 11 | resultingOperationsPromise.trySuccess(ops) 12 | def responded: Boolean = 13 | resultingOperationsPromise.isCompleted 14 | 15 | val resultingOperations = resultingOperationsPromise.future 16 | 17 | def slaveId = offer.getSlaveId.getValue 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/ProtoHelpers.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | import org.apache.mesos.Protos._ 3 | import scala.collection.immutable.NumericRange 4 | import scala.concurrent.duration._ 5 | import scala.collection.JavaConversions._ 6 | import scala.collection.breakOut 7 | 8 | object ProtoHelpers { 9 | def newFilters(refuseDuration: Option[FiniteDuration]): Filters = { 10 | val b = Filters.newBuilder 11 | refuseDuration.foreach { d => 12 | b.setRefuseSeconds(d.toMillis / 1000.0) 13 | } 14 | b.build 15 | } 16 | 17 | def newDisk(source: Option[Resource.DiskInfo.Source]): Resource.DiskInfo = { 18 | val b = Resource.DiskInfo.newBuilder 19 | source.foreach(b.setSource) 20 | b.build 21 | } 22 | 23 | sealed trait OfferOperationMagnet { 24 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder 25 | } 26 | case class UnreserveOperationMagnet(operation: Offer.Operation.Unreserve) extends OfferOperationMagnet { 27 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder = { 28 | b. 29 | setType(Offer.Operation.Type.UNRESERVE). 30 | setUnreserve(operation) 31 | } 32 | } 33 | case class DestroyOperationMagnet(destroy: Offer.Operation.Destroy) extends OfferOperationMagnet { 34 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder = { 35 | b. 36 | setType(Offer.Operation.Type.DESTROY). 37 | setDestroy(destroy) 38 | } 39 | } 40 | case class CreateOperationMagnet(create: Offer.Operation.Create) extends OfferOperationMagnet { 41 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder = { 42 | b. 43 | setType(Offer.Operation.Type.CREATE). 44 | setCreate(create) 45 | } 46 | } 47 | case class ReserveOperationMagnet(reserve: Offer.Operation.Reserve) extends OfferOperationMagnet { 48 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder = { 49 | b. 50 | setType(Offer.Operation.Type.RESERVE). 51 | setReserve(reserve) 52 | } 53 | } 54 | case class LaunchOperationMagnet(launch: Offer.Operation.Launch) extends OfferOperationMagnet { 55 | def apply(b: Offer.Operation.Builder): Offer.Operation.Builder = { 56 | b. 57 | setType(Offer.Operation.Type.LAUNCH). 58 | setLaunch(launch) 59 | } 60 | } 61 | object OfferOperationMagnet { 62 | import scala.language.implicitConversions 63 | implicit def fromUnreserve(r: Offer.Operation.Unreserve): OfferOperationMagnet = UnreserveOperationMagnet(r) 64 | implicit def fromDestroy(r: Offer.Operation.Destroy): OfferOperationMagnet = DestroyOperationMagnet(r) 65 | implicit def fromCreate(r: Offer.Operation.Create): OfferOperationMagnet = CreateOperationMagnet(r) 66 | implicit def fromReserve(r: Offer.Operation.Reserve): OfferOperationMagnet = ReserveOperationMagnet(r) 67 | implicit def fromLaunch(r: Offer.Operation.Launch): OfferOperationMagnet = LaunchOperationMagnet(r) 68 | } 69 | 70 | def newOfferOperation(operation: OfferOperationMagnet) = { 71 | val b = Offer.Operation.newBuilder 72 | operation(b) 73 | b.build 74 | } 75 | 76 | def newUnreserveOperation(resources: Iterable[Resource]): Offer.Operation.Unreserve = { 77 | val b = Offer.Operation.Unreserve.newBuilder 78 | resources.foreach(b.addResources) 79 | b.build 80 | } 81 | 82 | def newDestroyOperation(resources: Iterable[Resource]): Offer.Operation.Destroy = { 83 | val b = Offer.Operation.Destroy.newBuilder 84 | resources.foreach(b.addVolumes) 85 | b.build 86 | } 87 | 88 | def newLaunchOperation(taskInfos: Iterable[TaskInfo]): Offer.Operation.Launch = { 89 | val b = Offer.Operation.Launch.newBuilder 90 | taskInfos.foreach(b.addTaskInfos) 91 | b.build 92 | } 93 | 94 | def newLabels(kvs: (String, String)*): Labels = { 95 | val b = Labels.newBuilder 96 | 97 | kvs.foreach { case (key, value) => 98 | b.addLabels(newLabel(key,value)) 99 | } 100 | b.build 101 | } 102 | def newLabel(key: String, value: String): Label = { 103 | Label.newBuilder.setKey(key).setValue(value).build 104 | } 105 | 106 | 107 | def newParameters(kvs: (String, String)*): Parameters = { 108 | val b = Parameters.newBuilder 109 | kvs.foreach { case (key, value) => 110 | b.addParameter(newParameter(key,value)) 111 | } 112 | b.build 113 | } 114 | 115 | def newParameter(key: String, value: String): Parameter = { 116 | Parameter.newBuilder.setKey(key).setValue(value).build 117 | } 118 | 119 | def newTaskId(taskId: String): TaskID = { 120 | TaskID.newBuilder. 121 | setValue(taskId). 122 | build 123 | } 124 | 125 | def newVolume(containerPath: String, hostPath: String, mode: Volume.Mode = Volume.Mode.RW) = { 126 | Volume.newBuilder. 127 | setContainerPath(containerPath). 128 | setHostPath(hostPath). 129 | setMode(mode). 130 | build 131 | } 132 | 133 | def newVariable(name: String, value: String) = 134 | Environment.Variable.newBuilder. 135 | setName(name). 136 | setValue(value). 137 | build 138 | 139 | def newEnvironment(vars: (String, String)*) = { 140 | val b = Environment.newBuilder 141 | vars.foreach { case (name, value) => b.addVariables(newVariable(name, value)) } 142 | b.build 143 | } 144 | 145 | 146 | def newRanges(ranges: Iterable[NumericRange.Inclusive[Long]]): Value.Ranges = { 147 | val b = Value.Ranges.newBuilder 148 | ranges.foreach { r => 149 | b.addRange( 150 | Value.Range.newBuilder. 151 | setBegin(r.min). 152 | setEnd(r.max)) 153 | } 154 | b.build 155 | } 156 | 157 | def newScalar(amount: Double): Value.Scalar = { 158 | Value.Scalar.newBuilder.setValue(amount).build 159 | } 160 | 161 | def newOfferId(id: String): OfferID = { 162 | OfferID.newBuilder.setValue(id).build 163 | } 164 | 165 | def newFrameworkId(id: String): FrameworkID = { 166 | FrameworkID.newBuilder.setValue(id).build 167 | } 168 | 169 | def newSlaveId(id: String): SlaveID = { 170 | SlaveID.newBuilder.setValue(id).build 171 | } 172 | 173 | 174 | def newRangesResource(name: String, ranges: Iterable[NumericRange.Inclusive[Long]], role: String): Resource = 175 | Resource.newBuilder. 176 | setName(name). 177 | setRole(role). 178 | setType(Value.Type.RANGES). 179 | setRanges(newRanges(ranges)). 180 | build 181 | 182 | 183 | def newScalarResource(name: String, amount: Double, role: String = "*", 184 | reservation: Option[Resource.ReservationInfo] = None, disk: Option[Resource.DiskInfo] = None): Resource = { 185 | val b = Resource.newBuilder. 186 | setName(name). 187 | setScalar(newScalar(amount)). 188 | setType(Value.Type.SCALAR). 189 | setRole(role) 190 | reservation.foreach(b.setReservation) 191 | disk.foreach(b.setDisk) 192 | b.build 193 | } 194 | 195 | def newTaskStatus(taskId: String, slaveId: String, state: TaskState = TaskState.TASK_LOST): TaskStatus = 196 | TaskStatus.newBuilder. 197 | setTaskId(TaskID.newBuilder.setValue(taskId)). 198 | setSlaveId(SlaveID.newBuilder.setValue(slaveId)). 199 | setState(state). 200 | build 201 | 202 | implicit class RichCommand(command: CommandInfo) { 203 | implicit def environment: Map[String, String] = { 204 | if (command.hasEnvironment) 205 | command.getEnvironment.toMap 206 | else 207 | Map.empty 208 | } 209 | } 210 | 211 | implicit class RichOffer(offer: Offer) { 212 | def resources = 213 | offer.getResourcesList.toList 214 | 215 | def withResources(resources: Iterable[Resource]): Offer = { 216 | val b = offer.toBuilder.clearResources() 217 | resources.foreach(b.addResources) 218 | b.build 219 | } 220 | 221 | def slaveId: Option[String] = 222 | if (offer.hasSlaveId) 223 | Some(offer.getSlaveId.getValue) 224 | else 225 | None 226 | 227 | def id: String = 228 | offer.getId.getValue 229 | def hostname: Option[String] = 230 | if (offer.hasHostname) 231 | Some(offer.getHostname) 232 | else 233 | None 234 | } 235 | 236 | implicit class RichReservationInfo(reservation: Resource.ReservationInfo) { 237 | def labels = 238 | if (reservation.hasLabels) Some(reservation.getLabels) else None 239 | } 240 | 241 | implicit class RichEnvironment(env: Environment) { 242 | def get(key: String): Option[String] = 243 | env.getVariablesList.iterator.collectFirst { 244 | case variable if variable.getName == key => variable.getValue 245 | } 246 | 247 | def toMap: Map[String, String] = { 248 | env.getVariablesList.map { l => 249 | l.getName -> l.getValue 250 | }(breakOut) 251 | } 252 | } 253 | 254 | implicit class RichResource(resource: Resource) { 255 | 256 | def reservation = 257 | if (resource.hasReservation) Some(resource.getReservation) else None 258 | 259 | def diskSourceOption: Option[Resource.DiskInfo.Source] = 260 | if (resource.hasDisk && resource.getDisk.hasSource) 261 | Some(resource.getDisk.getSource) 262 | else 263 | None 264 | def ranges: List[NumericRange.Inclusive[Long]] = 265 | if (resource.hasRanges) { 266 | resource.getRanges.getRangeList.toList.map { r => 267 | NumericRange.inclusive(r.getBegin, r.getEnd, 1) 268 | } 269 | } else 270 | Nil 271 | } 272 | 273 | implicit class RichLabels(labels: Labels) { 274 | def get(key: String): Option[String] = 275 | labels.getLabelsList.iterator.collectFirst { 276 | case label if label.getKey == key => label.getValue 277 | } 278 | 279 | def toMap: Map[String, String] = { 280 | labels.getLabelsList.map { l => 281 | l.getKey -> l.getValue 282 | }(breakOut) 283 | } 284 | } 285 | 286 | implicit class RichLaunch(launch: Offer.Operation.Launch) { 287 | def tasks: List[TaskInfo] = { 288 | launch.getTaskInfosList.toList 289 | } 290 | } 291 | 292 | implicit class RichDocker(docker: ContainerInfo.DockerInfo) { 293 | def params: Map[String, String] = { 294 | docker.getParametersList.map { p => 295 | p.getKey -> p.getValue 296 | }(breakOut) 297 | } 298 | } 299 | } 300 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/ReleaseStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import java.util.UUID 4 | import org.slf4j.LoggerFactory 5 | import scala.async.Async.{async, await} 6 | import scala.collection.immutable.Seq 7 | import scala.concurrent.{ExecutionContext, Future} 8 | import java.nio.charset.StandardCharsets.UTF_8 9 | 10 | import com.vivint.ceph.kvstore.KVStore 11 | import model._ 12 | import play.api.libs.json._ 13 | 14 | case class ReleaseStore(kvStore: KVStore) { 15 | import PlayJsonFormats._ 16 | import ExecutionContext.Implicits.global 17 | val log = LoggerFactory.getLogger(getClass) 18 | 19 | val releasesPath = "reservation_releases" 20 | def getReleases: Future[Seq[ReservationRelease]] = async { 21 | val paths = await(kvStore.children(releasesPath)). 22 | map { path => 23 | (releasesPath + "/" + path) 24 | } 25 | 26 | val result = await(kvStore.getAll(paths)). 27 | flatten. 28 | map { bytes => Json.parse(bytes).as[ReservationRelease] } 29 | log.debug("loaded {} reservation releases", result.length) 30 | result 31 | } 32 | 33 | def save(release: ReservationRelease): Future[Unit] = { 34 | log.debug("saving release for reservation {}", release.id) 35 | val data = Json.toJson(release).toString 36 | kvStore.createOrSet(s"${releasesPath}/${release.id}", data.getBytes(UTF_8)) 37 | } 38 | 39 | def delete(reservationId: UUID): Future[Unit] = { 40 | log.debug("deleting release for reservation {}", reservationId) 41 | kvStore.delete(s"${releasesPath}/${reservationId}") 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/ReservationReaperActor.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.{ Actor, ActorLogging, Cancellable, Kill, Stash } 4 | import akka.stream.ActorMaterializer 5 | import akka.stream.scaladsl.Sink 6 | import akka.pattern.pipe 7 | import com.vivint.ceph.kvstore.KVStore 8 | import com.vivint.ceph.model.{ ReservationRelease, ReservationReleaseDetails } 9 | import java.util.UUID 10 | import scaldi.Injector 11 | import scaldi.Injectable._ 12 | import scala.concurrent.duration._ 13 | import ProtoHelpers._ 14 | import akka.stream.scaladsl.Source 15 | import java.time.ZonedDateTime 16 | import lib.zonedDateTimeOrdering 17 | 18 | object ReservationReaperActor { 19 | sealed trait Command 20 | case class OrderUnreserve(reservationId: UUID) extends Command 21 | 22 | case class UnknownReservation(reservationId: UUID, pendingOffer: PendingOffer) extends Command 23 | case object Cleanup extends Command 24 | case object GetPendingReleases extends Command 25 | } 26 | 27 | class ReservationReaperActor(implicit inj: Injector) extends Actor with ActorLogging with Stash { 28 | import ReservationReaperActor._ 29 | 30 | val releaseStore = ReleaseStore(inject[KVStore]) 31 | val offerOperations = inject[OfferOperations] 32 | val now = inject[() => ZonedDateTime]('now) 33 | val cleanupTimer = context.system.scheduler.schedule(1.day, 1.day, self, Cleanup)(context.dispatcher) 34 | case class InitialState(reelases: Seq[ReservationRelease]) 35 | 36 | private var pendingReleases: Map[UUID, ReservationReleaseDetails] = 37 | Map.empty.withDefault { id => ReservationReleaseDetails(id, lastSeen = now()) } 38 | 39 | override def preStart(): Unit = { 40 | val timer = context.system.scheduler.scheduleOnce(15.seconds, self, 'timeout)(context.dispatcher) 41 | import context.dispatcher 42 | releaseStore.getReleases.map(InitialState(_)).pipeTo(self) 43 | context.become { 44 | case 'timeout => 45 | log.error("Couldn't load state in time") 46 | context.stop(self) 47 | case InitialState(releases) => 48 | log.info("loaded initial state") 49 | pendingReleases ++= releases.map { r => r.id -> r.toDetailed()} 50 | unstashAll() 51 | timer.cancel() 52 | context.become(receive) 53 | case _ => 54 | stash() 55 | } 56 | } 57 | 58 | override def postStop(): Unit = { 59 | cleanupTimer.cancel() 60 | } 61 | 62 | def receive = { 63 | case UnknownReservation(reservationId, pendingOffer) => 64 | val next = pendingReleases(reservationId).copy( 65 | lastSeen = now(), 66 | details = Some(pendingOffer.offer.toString)) 67 | update(next) 68 | 69 | if (next.unreserve) 70 | pendingOffer.respond(offerOperations.unreserveOffer(pendingOffer.offer.resources)) 71 | else 72 | pendingOffer.decline() 73 | 74 | case OrderUnreserve(reservationId) => 75 | update( 76 | pendingReleases(reservationId).copy(lastSeen = now(), unreserve = true)) 77 | 78 | case Cleanup => 79 | val currentTime = now() 80 | val expireIfOlderThan = currentTime.minusDays(7L) 81 | for { 82 | (reservationId, pendingRelease) <- pendingReleases 83 | if pendingRelease.lastSeen isBefore expireIfOlderThan 84 | } { 85 | log.info(s"Cleaning up pending release for reservation last seen ${pendingRelease.lastSeen}") 86 | destroy(reservationId) 87 | } 88 | 89 | case GetPendingReleases => 90 | sender ! pendingReleases.values.toList 91 | } 92 | 93 | def destroy(reservationId: UUID): Unit = { 94 | pendingReleases -= reservationId 95 | releaseStore.delete(reservationId) 96 | } 97 | 98 | def update(next: ReservationReleaseDetails): Unit = { 99 | val prior = pendingReleases(next.id) 100 | if (prior != next) { 101 | pendingReleases += prior.id -> next 102 | releaseStore.save(next.withoutDetails) 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/SameThreadExecutionContext.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import scala.concurrent.ExecutionContext 4 | 5 | object SameThreadExecutionContext extends ExecutionContext { 6 | def execute(r: Runnable): Unit = 7 | r.run() 8 | override def reportFailure(t: Throwable): Unit = 9 | throw new IllegalStateException("problem in internal callback", t) 10 | } 11 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/api/ApiMarshalling.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.api 2 | 3 | import akka.http.scaladsl.marshalling.{Marshaller, ToEntityMarshaller} 4 | import akka.http.scaladsl.model.ParsingException 5 | import akka.http.scaladsl.unmarshalling.{FromEntityUnmarshaller, Unmarshaller} 6 | import akka.http.scaladsl.model.MediaTypes 7 | import com.vivint.ceph.model.{ Job, PlayJsonFormats, RunState, ReservationReleaseDetails } 8 | import model.ErrorResponse 9 | import play.api.libs.json._ 10 | 11 | object ApiMarshalling { 12 | def fromJsonResponse[T](implicit reader: Reads[T]): FromEntityUnmarshaller[T] = 13 | Unmarshaller.stringUnmarshaller. 14 | forContentTypes(MediaTypes.`application/json`). 15 | map { str => Json.parse(str).as[T] } 16 | 17 | def toJsonResponse[T](implicit writer: Writes[T]): ToEntityMarshaller[T] = 18 | Marshaller.stringMarshaller(MediaTypes.`application/json`). 19 | compose { data: T => 20 | Json.stringify(Json.toJson(data)) } 21 | 22 | import PlayJsonFormats._ 23 | import model.ApiPlayJsonFormats._ 24 | implicit val jobsWriter = toJsonResponse[Iterable[Job]] 25 | implicit val reservationReleaseWriter = toJsonResponse[Iterable[ReservationReleaseDetails]] 26 | implicit val errorWriter = toJsonResponse[ErrorResponse] 27 | 28 | def uuidFromString(str: String) = 29 | try { 30 | java.util.UUID.fromString(str) 31 | } catch { 32 | case ex: IllegalArgumentException => 33 | throw ParsingException(s"Couldn't parse UUID: ${ex.getMessage}") 34 | } 35 | 36 | def runStateFromString(str: String) = 37 | RunState.values.find(_.name == str).getOrElse { 38 | throw ParsingException(s"invalid runState '${str}', expected one of '${RunState.values.mkString(", ")}'") 39 | } 40 | 41 | 42 | implicit val runStateTextReader: Unmarshaller[String, RunState.EnumVal] = 43 | Unmarshaller.strict[String, RunState.EnumVal](runStateFromString) 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/api/HttpService.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package api 3 | 4 | import akka.actor.{ ActorRef, ActorSystem } 5 | import akka.http.scaladsl.model.{ ContentTypes, MediaTypes, StatusCodes } 6 | import akka.http.scaladsl.model.headers.`Content-Type` 7 | import akka.http.scaladsl.model.{ HttpHeader, ParsingException } 8 | import akka.http.scaladsl.server.ExceptionHandler 9 | import akka.util.Timeout 10 | import akka.pattern.ask 11 | import akka.http.scaladsl.Http 12 | import akka.stream.ActorMaterializer 13 | import com.vivint.ceph.kvstore.KVStore 14 | import com.vivint.ceph.views.ConfigTemplates 15 | import scala.collection.breakOut 16 | import com.vivint.ceph.model.{ RunState, ServiceLocation, JobRole, ReservationReleaseDetails, Job } 17 | import java.util.UUID 18 | import scaldi.Injector 19 | import scaldi.Injectable._ 20 | import scala.concurrent.duration._ 21 | import akka.http.scaladsl.server.Directives._ 22 | import scala.concurrent.ExecutionContext.Implicits.global 23 | import scala.concurrent.Future 24 | import scala.util.{Success,Failure} 25 | import lib.FutureHelpers.tSequence 26 | import lib.TgzHelper.makeTgz 27 | import play.api.libs.json._ 28 | 29 | class HttpService(implicit inj: Injector) { 30 | implicit val actorSystem = inject[ActorSystem] 31 | implicit val materializer = ActorMaterializer() 32 | import ApiMarshalling._ 33 | 34 | val config = inject[AppConfiguration] 35 | val taskActor = inject[ActorRef](classOf[TaskActor]) 36 | val releaseActor = inject[ActorRef](classOf[ReservationReaperActor]) 37 | val configTemplates = inject[ConfigTemplates] 38 | 39 | val a = null 40 | 41 | implicit val timeout = Timeout(5.seconds) 42 | val kvStore = inject[KVStore] 43 | 44 | implicit def myExceptionHandler: ExceptionHandler = 45 | ExceptionHandler { 46 | case ex: ParsingException => 47 | complete((400, s"Error parsing: ${ex.getMessage}")) 48 | } 49 | 50 | def getJobs: Future[Map[UUID, Job]] = 51 | (taskActor ? TaskActor.GetJobs).mapTo[Map[UUID, Job]] 52 | 53 | def getReleases: Future[List[ReservationReleaseDetails]] = 54 | (releaseActor ? ReservationReaperActor.GetPendingReleases).mapTo[List[ReservationReleaseDetails]] 55 | 56 | def findJobByUUID(id: UUID) = 57 | getJobs.map { _.values.find(_.id == id) } 58 | 59 | val configStore = ConfigStore(kvStore) 60 | def getConfig: Future[String] = { 61 | tSequence( 62 | ClusterSecretStore.createOrGenerateSecrets(kvStore), 63 | configStore.get, 64 | getJobs).map { 65 | case (secrets, cfg, jobs) => 66 | val monitors: Set[ServiceLocation] = 67 | jobs.values.filter(_.role == JobRole.Monitor).flatMap(_.pState.serviceLocation)(breakOut) 68 | configTemplates.cephConf(secrets, monitors, cfg.settings, None) 69 | } 70 | } 71 | 72 | def route = { 73 | path("" | "index.html") { 74 | getFromResource("ui/index.html") 75 | } ~ 76 | pathPrefix("js") { 77 | getFromResourceDirectory("ui/js") 78 | } ~ 79 | pathPrefix("v1") { 80 | // TODO - protect with key 81 | pathPrefix("config") { 82 | path("ceph.conf") { 83 | complete(getConfig) 84 | } ~ 85 | path("deployment-config.conf") { 86 | get { 87 | complete(configStore.getText) 88 | } ~ 89 | (put & entity(as[String])) { cfg => 90 | onComplete(configStore.storeText(cfg)) { 91 | case Success(_) => 92 | complete("ok") 93 | case Failure(ex) => 94 | complete((StatusCodes.BadRequest, model.ErrorResponse(ex.getMessage))) 95 | } 96 | } 97 | } 98 | } ~ 99 | pathPrefix("reservation-reaper") { 100 | (pathEnd & get) { 101 | complete(getReleases) 102 | } ~ 103 | (path("dangling")) { 104 | onSuccess(getReleases) { releases => 105 | complete(releases.filterNot(_.unreserve)) 106 | } 107 | } ~ 108 | (put & path(Segment.map(uuidFromString) / "unreserve")) { id => 109 | releaseActor ! ReservationReaperActor.OrderUnreserve(id) 110 | complete(s"Reservation ${id} release order submitted") 111 | } 112 | } ~ 113 | pathPrefix("jobs") { 114 | (pathEnd & get) { 115 | onSuccess(getJobs) { jobs => 116 | complete(jobs.values.toList) 117 | } 118 | } ~ 119 | (put & path(Segment.map(uuidFromString) / Segment.map(runStateFromString))) { (id, runState) => 120 | onSuccess(findJobByUUID(id)) { 121 | case Some(job) => 122 | taskActor ! TaskActor.UpdateGoal(job.id, runState) 123 | complete(s"Job ID ${job.id} state change submitted: ${job.goal} -> ${Some(runState)}") 124 | case None => 125 | complete((400, s"Couldn't find job with UUID ${id}.")) 126 | } 127 | } 128 | } 129 | } 130 | } 131 | 132 | def run() = { 133 | Http().bindAndHandle( 134 | route, 135 | config.apiHost, 136 | config.apiPort) 137 | } 138 | } 139 | 140 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/api/model/ApiPlayJsonFormats.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.api.model 2 | 3 | import play.api.libs.json._ 4 | object ApiPlayJsonFormats { 5 | implicit val ErrorResponseFormat = Json.format[ErrorResponse] 6 | } 7 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/api/model/ErrorResponse.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.api.model 2 | 3 | case class ErrorResponse(message: String) 4 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/kvstore/CrashingKVStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package kvstore 3 | 4 | import akka.stream.scaladsl.Source 5 | import scala.collection.immutable.Seq 6 | import scala.concurrent.{ Future, Promise } 7 | import scala.util.Failure 8 | 9 | /** KVStore which forwards the first failure through the crashed channel. 10 | * If this fails then the CrashingKVStore should be reinitialized 11 | */ 12 | case class CrashingKVStore(kvStore: KVStore) extends KVStore { 13 | private [this] val p = Promise[Unit] 14 | val crashed: Future[Unit] = p.future 15 | 16 | private def wrap[T](f: () => Future[T]): Future[T] = { 17 | if (crashed.isCompleted) { 18 | // throw the future in the calling thread to make it obvious that this shouldn't be used any more 19 | val Failure(ex) = crashed.value.get 20 | throw new IllegalStateException("This kvstore has crashed and should not be used any more", ex) 21 | } 22 | val resultingFuture = f() 23 | 24 | resultingFuture.onFailure { case ex => 25 | p.tryFailure(ex) 26 | }(SameThreadExecutionContext) 27 | resultingFuture 28 | } 29 | 30 | override def getAll(paths: Seq[String]): Future[Seq[Option[Array[Byte]]]] = wrap(() => kvStore.getAll(paths)) 31 | def create(path: String, data: Array[Byte]): Future[Unit] = wrap(() => kvStore.create(path, data)) 32 | def set(path: String, data: Array[Byte]): Future[Unit] = wrap(() => kvStore.set(path, data)) 33 | def createOrSet(path: String, data: Array[Byte]): Future[Unit] = wrap(() => kvStore.createOrSet(path, data)) 34 | def delete(path: String): Future[Unit] = wrap(() => kvStore.delete(path)) 35 | def get(path: String): Future[Option[Array[Byte]]] = wrap(() => kvStore.get(path)) 36 | def children(path: String): Future[Seq[String]] = wrap(() => kvStore.children(path)) 37 | def lock(path: String): Future[KVStore.CancellableWithResult] = { 38 | val f1 = wrap(() => kvStore.lock(path)) 39 | f1.onSuccess { case cancellable => 40 | try { wrap(() => cancellable.result) } 41 | catch { case ex: Throwable => println(ex) } 42 | }(SameThreadExecutionContext) 43 | f1 44 | } 45 | 46 | def watch(path: String, bufferSize: Int = 1): Source[Option[Array[Byte]], KVStore.CancellableWithResult] = 47 | kvStore.watch(path).mapMaterializedValue { r => 48 | wrap(() => r.result) 49 | r 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/kvstore/FileStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.kvstore 2 | 3 | import akka.Done 4 | import akka.stream.scaladsl.Keep 5 | import java.io.{ File, FileInputStream, FileOutputStream, RandomAccessFile } 6 | import java.util.Arrays 7 | import java.util.concurrent.Executors 8 | import org.apache.commons.io.IOUtils 9 | import scala.collection.immutable.Seq 10 | import scala.concurrent.{ ExecutionContext, Future, Promise } 11 | import scala.concurrent.duration._ 12 | import akka.stream.scaladsl.Source 13 | 14 | /** For use in development. Connecting to zookeeper with curator takes time. 15 | */ 16 | class FileStore(basePath: File) extends KVStore { 17 | 18 | implicit private val ec = ExecutionContext.fromExecutor( 19 | Executors.newSingleThreadExecutor()) 20 | 21 | private def fileFor(path: String) = 22 | new File(basePath, path) 23 | def create(path: String, data: Array[Byte]): Future[Unit] = Future { 24 | val output = fileFor(path) 25 | if (output.exists()) { 26 | throw new RuntimeException(s"path ${path} already exists") 27 | } 28 | 29 | output.getParentFile.mkdirs() 30 | val f = new FileOutputStream(output) 31 | IOUtils.writeChunked(data, f) 32 | f.close() 33 | } 34 | 35 | def set(path: String, data: Array[Byte]): Future[Unit] = Future { 36 | val output = fileFor(path) 37 | val f = new FileOutputStream(output) 38 | IOUtils.writeChunked(data, f) 39 | f.close() 40 | } 41 | 42 | private [this] def createOrSetSync(output: File, data: Array[Byte]): Unit = { 43 | output.getParentFile.mkdirs() 44 | val f = new FileOutputStream(output) 45 | IOUtils.writeChunked(data, f) 46 | f.close() 47 | } 48 | 49 | def createOrSet(path: String, data: Array[Byte]): Future[Unit] = Future { 50 | val output = fileFor(path) 51 | createOrSetSync(output, data) 52 | } 53 | 54 | def delete(path: String): Future[Unit] = Future { 55 | val output = fileFor(path) 56 | if (output.exists()) 57 | output.delete() 58 | } 59 | 60 | def get(path: String): Future[Option[Array[Byte]]] = Future { 61 | val input = fileFor(path) 62 | if (input.exists()) { 63 | val f = new FileInputStream(input) 64 | val data = IOUtils.toByteArray(f) 65 | f.close() 66 | Some(data) 67 | } else { 68 | None 69 | } 70 | } 71 | 72 | def lock(path: String): Future[KVStore.CancellableWithResult] = Future { 73 | val lockFile = fileFor(path) 74 | createOrSetSync(lockFile, Array.empty) 75 | val raf = new RandomAccessFile(lockFile, "rw") 76 | val lock = raf.getChannel().tryLock() 77 | if (lock == null) 78 | throw new RuntimeException(s"Could not acquire lock for ${path}") 79 | 80 | val p = Promise[Done] 81 | p.future.onComplete(_ => lock.release()) 82 | 83 | new KVStore.CancellableWithResult { 84 | def result = p.future 85 | def cancel(): Boolean = { p.trySuccess(Done) } 86 | def isCancelled = p.future.isCompleted 87 | } 88 | } 89 | 90 | def children(path: String): Future[Seq[String]] = Future { 91 | Option(fileFor(path).listFiles). 92 | map(_.toList). 93 | getOrElse(Nil). 94 | map { f => 95 | f.getName 96 | } 97 | } 98 | 99 | def watch(path: String, bufferSize: Int = 1): Source[Option[Array[Byte]], KVStore.CancellableWithResult] = { 100 | val first = Source.fromIterator( () => Iterator(path) ).mapAsync(1)(get) 101 | 102 | val updates = Source.tick(0.seconds, 1.second, path).mapMaterializedValue { cancellable => 103 | val cancelled = Promise[Done] 104 | 105 | new KVStore.CancellableWithResult { 106 | def result = cancelled.future 107 | def cancel(): Boolean = { 108 | cancelled.trySuccess(Done) 109 | cancellable.cancel 110 | } 111 | def isCancelled = { 112 | cancellable.isCancelled 113 | } 114 | } 115 | }.mapAsync(1)(get). 116 | sliding(2,1). 117 | mapConcat { 118 | case Vector(None, e @ Some(_)) => 119 | List(e) 120 | case Vector(Some(_), None) => 121 | List(None) 122 | case Vector(Some(a), e @ Some(b)) if ! Arrays.equals(a, b) => 123 | List(e) 124 | case Vector(e) => 125 | List(e) 126 | case _ => 127 | Nil 128 | } 129 | 130 | first.concatMat(updates)(Keep.right) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/kvstore/KVStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.kvstore 2 | 3 | import akka.Done 4 | import akka.actor.Cancellable 5 | import akka.stream.scaladsl.Source 6 | import scala.collection.immutable.Seq 7 | import scala.concurrent.{ ExecutionContext, Future } 8 | 9 | /** KVStore trait. Asynchronous but implementations MUST PROVIDE linearized read/write guarantees; IE, if two 10 | * overlapping async writes are fired off in the same thread, the second write request must always win. 11 | */ 12 | trait KVStore { 13 | def create(path: String, data: Array[Byte]): Future[Unit] 14 | def set(path: String, data: Array[Byte]): Future[Unit] 15 | def createOrSet(path: String, data: Array[Byte]): Future[Unit] 16 | def delete(path: String): Future[Unit] 17 | def get(path: String): Future[Option[Array[Byte]]] 18 | 19 | def lock(path: String): Future[KVStore.CancellableWithResult] 20 | 21 | def getAll(paths: Seq[String]): Future[Seq[Option[Array[Byte]]]] = { 22 | import ExecutionContext.Implicits.global 23 | Future.sequence { 24 | paths.map(get) 25 | } 26 | } 27 | 28 | def children(path: String): Future[Seq[String]] 29 | 30 | def watch(path: String, bufferSize: Int = 1): Source[Option[Array[Byte]], KVStore.CancellableWithResult] 31 | } 32 | 33 | object KVStore { 34 | trait CancellableWithResult extends Cancellable { 35 | def result: Future[Done] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/kvstore/MemStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.kvstore 2 | 3 | 4 | import akka.Done 5 | import akka.stream.OverflowStrategy 6 | import akka.stream.scaladsl.SourceQueueWithComplete 7 | import java.io.File 8 | import java.util.concurrent.Executors 9 | import scala.collection.immutable.Seq 10 | import scala.concurrent.{ ExecutionContext, Future, Promise } 11 | import akka.stream.scaladsl.Source 12 | 13 | /** For use in tests. 14 | */ 15 | class MemStore extends KVStore { 16 | 17 | implicit private val ec = ExecutionContext.fromExecutor( 18 | Executors.newSingleThreadExecutor()) 19 | 20 | private var state = Map.empty[File, Array[Byte]] 21 | private var subscriptions = Set.empty[SourceQueueWithComplete[File]] 22 | 23 | private def createFolders(path: File): Unit = { 24 | val parent = path.getParentFile 25 | if ((parent != null) && (!state.contains(parent))) { 26 | state = state + (parent -> Array.empty) 27 | createFolders(parent) 28 | } 29 | } 30 | 31 | private [kvstore] def removeSubscription(queue: SourceQueueWithComplete[File]) = Future { 32 | subscriptions = subscriptions - queue 33 | } 34 | 35 | private [kvstore] def addSubscription(queue: SourceQueueWithComplete[File]) = Future { 36 | subscriptions = subscriptions + queue 37 | } 38 | 39 | def fileFor(path: String) = 40 | new File("/", path) 41 | 42 | def create(path: String, data: Array[Byte]): Future[Unit] = Future { 43 | val output = fileFor(path) 44 | if (state.contains(output)) 45 | throw new RuntimeException(s"path ${path} already exists") 46 | 47 | if (! state.contains(output.getParentFile)) 48 | throw new RuntimeException(s"no such parent for ${output}: ${output.getParentFile}") 49 | 50 | state = state.updated(output, data) 51 | subscriptions.foreach(_.offer(output)) 52 | } 53 | 54 | def set(path: String, data: Array[Byte]): Future[Unit] = Future { 55 | val output = fileFor(path) 56 | if (!state.contains(output)) 57 | throw new RuntimeException(s"path ${path} doesn't exist") 58 | state = state.updated(output, data) 59 | subscriptions.foreach(_.offer(output)) 60 | } 61 | 62 | def createOrSet(path: String, data: Array[Byte]): Future[Unit] = Future { 63 | val output = fileFor(path) 64 | createFolders(output) 65 | state = state.updated(output, data) 66 | subscriptions.foreach(_.offer(output)) 67 | } 68 | 69 | def delete(path: String): Future[Unit] = Future { 70 | val deleteFile = fileFor(path) 71 | if (!state.contains(deleteFile)) 72 | throw new RuntimeException(s"path ${path} doesn't exist") 73 | state = state - deleteFile 74 | subscriptions.foreach(_.offer(deleteFile)) 75 | } 76 | 77 | def get(path: String): Future[Option[Array[Byte]]] = Future { 78 | val input = fileFor(path) 79 | state.get(input) 80 | } 81 | private [kvstore] def get(path: File): Future[Option[Array[Byte]]] = Future { 82 | state.get(path) 83 | } 84 | 85 | def lock(path: String): Future[KVStore.CancellableWithResult] = Future { 86 | val lockFile = fileFor(path) 87 | if (state.contains(lockFile)) { 88 | throw new RuntimeException("Couldn't acquire lock in memory lock") 89 | } 90 | 91 | state = state.updated(lockFile, Array.empty) 92 | 93 | val p = Promise[Done] 94 | new KVStore.CancellableWithResult { 95 | def result = p.future 96 | def cancel(): Boolean = { 97 | delete(path) 98 | p.trySuccess(Done); true 99 | } 100 | def isCancelled = p.isCompleted 101 | } 102 | } 103 | 104 | def children(path: String): Future[Seq[String]] = Future { 105 | val parent = fileFor(path) 106 | state.keys. 107 | filter { _.getParentFile == parent }. 108 | map ( _.getName ). 109 | toList 110 | } 111 | 112 | def watch(path: String, bufferSize: Int = 1): Source[Option[Array[Byte]], KVStore.CancellableWithResult] = { 113 | val input = fileFor(path) 114 | 115 | Source.queue[File](bufferSize, OverflowStrategy.dropHead). 116 | mapMaterializedValue { queue => 117 | addSubscription(queue).onComplete { _ => queue.offer(input) } 118 | 119 | var _isCancelled = false 120 | new KVStore.CancellableWithResult { 121 | def result = queue.watchCompletion() 122 | def cancel(): Boolean = { queue.complete(); true } 123 | def isCancelled = _isCancelled 124 | } 125 | }. 126 | filter(_ == input). 127 | mapAsync(1)(get) 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/kvstore/ZookeeperStore.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.kvstore 2 | 3 | import akka.Done 4 | import java.util.concurrent.{ Executors, TimeUnit } 5 | import org.apache.curator.framework.CuratorFramework 6 | import org.apache.curator.framework.state.{ ConnectionState, ConnectionStateListener } 7 | import org.apache.zookeeper.KeeperException.ConnectionLossException 8 | 9 | import scala.collection.JavaConversions._ 10 | import scala.collection.immutable.Seq 11 | import scala.concurrent.{ExecutionContext, Future, Promise} 12 | import scala.util.Try 13 | 14 | import akka.stream.OverflowStrategy 15 | import akka.stream.scaladsl.{Source, SourceQueueWithComplete} 16 | import org.apache.curator.framework.CuratorFrameworkFactory 17 | import org.apache.curator.framework.recipes.cache.{NodeCache, NodeCacheListener} 18 | import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreMutex 19 | import org.apache.curator.retry.ExponentialBackoffRetry 20 | import org.apache.zookeeper.KeeperException 21 | import com.vivint.ceph.AppConfiguration 22 | import scaldi.Injectable._ 23 | import scaldi.Injector 24 | 25 | /** Uses ZookeeperActor store to give linearized read / write guarantees */ 26 | class ZookeeperStore(namespace: String = "ceph-on-mesos")(implicit injector: Injector) extends KVStore { 27 | private val retryPolicy = new ExponentialBackoffRetry(1000, 3) 28 | val appConfiguration = inject[AppConfiguration] 29 | val client = CuratorFrameworkFactory.builder. 30 | connectString(appConfiguration.zookeeper). 31 | retryPolicy(retryPolicy). 32 | build() 33 | client.start() 34 | 35 | 36 | val executor = Executors.newSingleThreadExecutor() 37 | implicit private val ec = ExecutionContext.fromExecutor(executor) 38 | 39 | Future { 40 | val p = s"/${namespace}" 41 | Option(client.checkExists.forPath(p)). 42 | getOrElse(client.create.forPath(p)) 43 | }.onFailure { 44 | case ex: Throwable => 45 | System.err.println(s"Something seriously went wrong; couldn't assert the existence of ${namespace}. ${ex}") 46 | ex.printStackTrace(System.err) 47 | } 48 | 49 | Runtime.getRuntime.addShutdownHook(new Thread { 50 | override def run(): Unit = { 51 | System.err.println("Draining ZK writes") 52 | executor.shutdown() 53 | executor.awaitTermination(1, TimeUnit.MINUTES) 54 | System.err.println("Successfully drained") 55 | } 56 | }) 57 | 58 | private def sanitizePath(path: String): String = { 59 | if (path.startsWith("/")) 60 | s"/${namespace}${path}" 61 | else 62 | s"/${namespace}/${path}" 63 | } 64 | 65 | def create(path: String, data: Array[Byte]): Future[Unit] = Future { 66 | val sPath = sanitizePath(path) 67 | client.create. 68 | forPath(sPath, data) 69 | client.setData. 70 | forPath(sPath, data) 71 | } 72 | 73 | def set(path: String, data: Array[Byte]): Future[Unit] = Future { 74 | val sPath = sanitizePath(path) 75 | client.setData. 76 | forPath(sPath, data) 77 | } 78 | 79 | def createOrSet(path: String, data: Array[Byte]): Future[Unit] = Future { 80 | val sPath = sanitizePath(path) 81 | try { 82 | client.setData. 83 | forPath(sPath, data) 84 | } catch { case ex: KeeperException.NoNodeException => 85 | client.create. 86 | creatingParentsIfNeeded. 87 | forPath(sPath, data) 88 | } 89 | } 90 | 91 | def delete(path: String): Future[Unit] = Future { 92 | val sPath = sanitizePath(path) 93 | client.delete.forPath(sPath) 94 | } 95 | 96 | def get(path: String): Future[Option[Array[Byte]]] = Future { 97 | val sPath = sanitizePath(path) 98 | try Some(client.getData.forPath(sPath)) 99 | catch { 100 | case _: KeeperException.NoNodeException => 101 | None 102 | } 103 | } 104 | 105 | def children(path: String): Future[Seq[String]] = Future { 106 | val sPath = sanitizePath(path) 107 | try client.getChildren.forPath(sPath).toList 108 | catch { 109 | case ex: KeeperException.NoNodeException => 110 | Nil 111 | } 112 | } 113 | 114 | def lock(path: String): Future[KVStore.CancellableWithResult] = Future { 115 | val sPath = sanitizePath(path) 116 | val lock = new InterProcessSemaphoreMutex(client, sPath) 117 | lock.acquire() 118 | val p = Promise[Done] 119 | 120 | val listener = new ConnectionStateListener { 121 | def stateChanged(client: CuratorFramework, newState: ConnectionState): Unit = { 122 | if (!newState.isConnected()) { 123 | p.failure(new ConnectionLossException) 124 | } 125 | } 126 | } 127 | p.future.onComplete { _ => 128 | client.getConnectionStateListenable.removeListener(listener) 129 | lock.release() 130 | } 131 | 132 | new KVStore.CancellableWithResult { 133 | def result = p.future 134 | def cancel(): Boolean = { p.trySuccess(Done) } 135 | def isCancelled = p.future.isCompleted 136 | } 137 | } 138 | 139 | private def wireSourceQueue(path: String, queue: SourceQueueWithComplete[Option[Array[Byte]]]): Future[Unit] = Future { 140 | val sPath = sanitizePath(path) 141 | var _isCancelled = false 142 | val l = new NodeCache(client, sPath) 143 | l.getListenable.addListener(new NodeCacheListener { 144 | override def nodeChanged(): Unit = { 145 | queue.offer(Option(l.getCurrentData.getData)) 146 | } 147 | }) 148 | l.start() 149 | 150 | queue.watchCompletion().onComplete { _ => 151 | 152 | _isCancelled = true 153 | }(ExecutionContext.global) 154 | } 155 | 156 | def watch(path: String, bufferSize: Int = 1): Source[Option[Array[Byte]], KVStore.CancellableWithResult] = 157 | Source.queue[Option[Array[Byte]]](bufferSize, OverflowStrategy.dropHead).mapMaterializedValue { queue => 158 | var _isCancelled = false 159 | val result = wireSourceQueue(path, queue) 160 | result.onFailure { case ex => 161 | queue.fail(ex) 162 | }(ExecutionContext.global) 163 | 164 | new KVStore.CancellableWithResult { 165 | def result = queue.watchCompletion() 166 | def cancel(): Boolean = { queue.complete(); true } 167 | def isCancelled = _isCancelled 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/Enum.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | trait Enum { //DIY enum type 4 | type EnumVal <: Value //This is a type that needs to be found in the implementing class 5 | 6 | //This is the trait that we need to extend our EnumVal type with, it does the book-keeping for us 7 | protected trait Value { self: EnumVal => //Enforce that no one mixes in Value in a non-EnumVal type 8 | def name: String //All enum values should have a name 9 | 10 | override def toString = name //And that name is used for the toString operation 11 | override def equals(other: Any) = this eq other.asInstanceOf[AnyRef] 12 | override def hashCode = 31 * (this.getClass.## + name.##) 13 | } 14 | 15 | def values: Vector[EnumVal] 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/FutureHelpers.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | 4 | import scala.concurrent.{ ExecutionContext, Future } 5 | object FutureHelpers { 6 | def tSequence[A,B,C](a: Future[A], b: Future[B], c: Future[C])(implicit ec: ExecutionContext): Future[(A,B,C)] = { 7 | a.flatMap { a => b.flatMap { b => c.map { c => (a,b,c) } } } 8 | } 9 | def tSequence[A,B,C,D](a: Future[A], b: Future[B], c: Future[C], d: Future[D])(implicit ec: ExecutionContext): Future[(A,B,C,D)] = { 10 | a.flatMap { a => b.flatMap { b => c.flatMap { c => d.map { d => (a,b,c,d) } } } } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/FutureMonitor.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import akka.actor.{ ActorContext, Kill } 4 | import akka.event.LoggingAdapter 5 | import scala.concurrent.{Future, ExecutionContext} 6 | import scala.util.{Success,Failure} 7 | 8 | object FutureMonitor { 9 | def logSuccess[T](log: LoggingAdapter, f: Future[T], desc: String)(implicit ex: ExecutionContext): Future[T] = { 10 | log.debug(s"${desc} : pulling state") 11 | f.onComplete { 12 | case Success(_) => log.debug("{} : success", desc) 13 | case Failure(ex) => 14 | log.error(ex, "{}: failure", desc) 15 | } 16 | f 17 | } 18 | 19 | def crashSelfOnFailure(f: Future[Any], log: LoggingAdapter, description: String)( 20 | implicit context: ActorContext): Unit = { 21 | var failed = false 22 | if (failed) // prevent infinite loops if all children actors get restarted 23 | context.stop(context.self) 24 | else 25 | f.onFailure { 26 | case ex => 27 | failed = true 28 | context.self ! Kill 29 | log.error(ex, s"Unexpected error for ${description}") 30 | }(context.dispatcher) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/PortMatcher.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import mesosphere.mesos.matcher.PortsMatchResult 4 | import mesosphere.mesos.matcher.PortsMatchResult.PortWithRole 5 | import mesosphere.mesos.matcher.ResourceMatcher.ResourceSelector 6 | import mesosphere.mesos.matcher.{ MatchResult, ResourceMatcher } 7 | import mesosphere.mesos.protos.Resource 8 | import org.apache.mesos.Protos 9 | import scala.annotation.tailrec 10 | import com.vivint.ceph.ProtoHelpers 11 | 12 | /** Matches only a specific, fixed port */ 13 | class SpecificPortMatcher(port: Int, resourceSelector: ResourceSelector) extends ResourceMatcher { 14 | import ProtoHelpers._ 15 | val resourceName = Resource.PORTS 16 | 17 | def apply(offerId: String, resources: Iterable[Protos.Resource]): Iterable[MatchResult] = { 18 | 19 | (for { 20 | resource <- resources.filter(resourceSelector(_)).toStream 21 | range <- resource.ranges.toStream 22 | if range.contains(port.toLong) 23 | } yield { 24 | List( 25 | PortsMatchResult( 26 | true, 27 | List(Some(PortWithRole("main", port.toInt, resource.reservation))), 28 | List( 29 | resource.toBuilder(). 30 | setRanges(newRanges(List(port.toLong to port.toLong))) 31 | .build))) 32 | }).headOption getOrElse { 33 | List( 34 | PortsMatchResult( 35 | false, 36 | Nil, 37 | Nil)) 38 | } 39 | } 40 | } 41 | 42 | /** Matches any solitary port from the offer */ 43 | class SinglePortMatcher(resourceSelector: ResourceSelector) extends ResourceMatcher { 44 | import ProtoHelpers._ 45 | val resourceName = Resource.PORTS 46 | def apply(offerId: String, resources: Iterable[Protos.Resource]): Iterable[MatchResult] = { 47 | (for { 48 | resource <- resources.filter(resourceSelector(_)).headOption 49 | range <- resource.ranges.headOption 50 | } yield { 51 | val port = range.min 52 | 53 | // ReservationInfo 54 | List( 55 | PortsMatchResult( 56 | true, 57 | List(Some(PortWithRole("main", port.toInt, resource.reservation))), 58 | List( 59 | resource.toBuilder(). 60 | setRanges(newRanges(List(port to port))) 61 | .build))) 62 | }) getOrElse { 63 | List( 64 | PortsMatchResult( 65 | false, 66 | Nil, 67 | Nil)) 68 | } 69 | } 70 | } 71 | 72 | /** Matches n contiguous ports from the offer */ 73 | class ContiguousPortMatcher(ports: Int, resourceSelector: ResourceSelector) extends ResourceMatcher { 74 | import ProtoHelpers._ 75 | val resourceName = Resource.PORTS 76 | def apply(offerId: String, resources: Iterable[Protos.Resource]): Iterable[MatchResult] = 77 | doMatch(resources.toList) 78 | 79 | @tailrec private def doMatch(resources: List[Protos.Resource]): Iterable[MatchResult] = resources match { 80 | case Nil => 81 | List( 82 | PortsMatchResult( 83 | false, 84 | Nil, 85 | Nil)) 86 | case resource :: rest => 87 | if (!resourceSelector(resource)) 88 | doMatch(rest) 89 | else 90 | resource.ranges.find(_.length >= ports) match { 91 | case Some(range) => 92 | val reserveRange = range.min to (range.min + ports - 1) 93 | // ReservationInfo 94 | List( 95 | PortsMatchResult( 96 | true, 97 | reserveRange.map { n => Some(PortWithRole("main", n.toInt, resource.reservation)) }, 98 | List( 99 | resource.toBuilder. 100 | setRanges( 101 | newRanges(List(reserveRange))). 102 | build))) 103 | case None => 104 | doMatch(rest) 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/TgzHelper.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } 4 | import java.util.zip.GZIPInputStream 5 | import org.apache.commons.io.IOUtils 6 | import org.kamranzafar.jtar.{ TarEntry, TarHeader, TarInputStream, TarOutputStream } 7 | import java.util.zip.GZIPOutputStream 8 | import scala.collection.{Iterator,breakOut} 9 | import java.nio.charset.StandardCharsets.UTF_8 10 | 11 | object TgzHelper { 12 | def octal(digits: String): Int = 13 | BigInt(digits, 8).toInt 14 | 15 | case class FileEntry(mode: Int, data: Array[Byte]) 16 | object FileEntry extends ((Int, Array[Byte]) => FileEntry) { 17 | import scala.language.implicitConversions 18 | implicit def apply(contents: String): FileEntry = 19 | FileEntry(octal("644"), contents.getBytes(UTF_8)) 20 | } 21 | 22 | def makeTgz(files: Map[String, String]): Array[Byte] = { 23 | makeTgz(files.toSeq.map { case (k, v) => 24 | k -> FileEntry(v) 25 | } : _*) 26 | } 27 | 28 | def makeTgz(files: (String, FileEntry)*): Array[Byte] = { 29 | val dest = new ByteArrayOutputStream 30 | val tgz = new TarOutputStream(new GZIPOutputStream(dest)) 31 | val now = System.currentTimeMillis / 1000 32 | 33 | files.foreach { case (file, entry) => 34 | tgz.putNextEntry(new TarEntry( 35 | TarHeader.createHeader( 36 | file, 37 | entry.data.length.toLong, 38 | now, false, entry.mode))) 39 | tgz.write(entry.data, 0, entry.data.length) 40 | } 41 | tgz.close() 42 | dest.toByteArray() 43 | } 44 | 45 | class TarIterator(s: TarInputStream) extends Iterator[(TarEntry, Array[Byte])] { 46 | var _nextEntry: (TarEntry, Array[Byte]) = null 47 | private def loadNext(): Unit = 48 | _nextEntry = s.getNextEntry match { 49 | case null => null 50 | case entry => (entry, IOUtils.toByteArray(s)) 51 | } 52 | 53 | def hasNext = _nextEntry != null 54 | def next() = { 55 | val nextResult = _nextEntry 56 | loadNext() 57 | nextResult 58 | } 59 | loadNext() 60 | } 61 | 62 | def readTgz(tgz: Array[Byte]): Iterator[(String, Array[Byte])] = { 63 | val input = new ByteArrayInputStream(tgz) 64 | val stream = new TarInputStream(new GZIPInputStream(input)) 65 | 66 | new TarIterator(stream).map { 67 | case (entry, data) => entry.getName -> data 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/lib/package.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | package object lib { 4 | 5 | import java.time.ZonedDateTime 6 | 7 | implicit val zonedDateTimeOrdering = new Ordering[ZonedDateTime] { 8 | def compare(a: ZonedDateTime, b: ZonedDateTime): Int = 9 | a.compareTo(b) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/CephConfig.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import com.typesafe.config.{ Config, ConfigFactory, ConfigObject } 4 | import configs.FromString 5 | import mesosphere.marathon.state.DiskType 6 | import configs.syntax._ 7 | import java.nio.charset.StandardCharsets.UTF_8 8 | 9 | 10 | case class MonDeploymentConfig( 11 | count: Int, 12 | cpus: Double, 13 | mem: Double, 14 | disk_type: DiskType, 15 | disk: Long, 16 | path_constraint: Option[String], 17 | max_per_host: Int, 18 | port: Option[Int] 19 | ) 20 | 21 | case class OSDDeploymentConfig( 22 | count: Int, 23 | cpus: Double, 24 | mem: Double, 25 | disk_type: DiskType, 26 | disk: Long, 27 | disk_max: Option[Long], 28 | path_constraint: Option[String], 29 | max_per_host: Int 30 | ) 31 | 32 | case class RGWDeploymentConfig( 33 | count: Int, 34 | cpus: Double, 35 | mem: Double, 36 | port: Option[Int], 37 | docker_args: Map[String, String], 38 | max_per_host: Int 39 | ) 40 | 41 | case class DeploymentConfig( 42 | mon: MonDeploymentConfig, 43 | osd: OSDDeploymentConfig, 44 | rgw: RGWDeploymentConfig, 45 | docker_image: String 46 | ) 47 | 48 | case class CephSettings( 49 | global: ConfigObject, 50 | auth: ConfigObject, 51 | mon: ConfigObject, 52 | osd: ConfigObject, 53 | client: ConfigObject, 54 | mds: ConfigObject 55 | ) 56 | 57 | case class CephConfig( 58 | deployment: DeploymentConfig, 59 | settings: CephSettings 60 | ) 61 | 62 | 63 | object CephConfigHelper { 64 | val defaultConfig = ConfigFactory.parseString(s""" 65 | deployment { 66 | docker_image = "ceph/daemon:tag-build-master-jewel-ubuntu-14.04" 67 | 68 | mon { 69 | disk_type = root 70 | disk = 16 71 | max_per_host = 1 72 | } 73 | 74 | osd { 75 | disk_type = root 76 | max_per_host = 1 77 | } 78 | 79 | rgw { 80 | max_per_host = 1 81 | docker_args { 82 | } 83 | } 84 | } 85 | 86 | settings { 87 | auth {} 88 | global {} 89 | mon {} 90 | osd {} 91 | client {} 92 | mds {} 93 | } 94 | """) 95 | 96 | implicit val readDiskType: FromString[DiskType] = 97 | FromString.fromTry { str => 98 | DiskType.all. 99 | find(_.toString == str). 100 | getOrElse { 101 | throw(new RuntimeException(s"${str} is not a valid disk type")) 102 | } 103 | } 104 | 105 | def parse(str: String): CephConfig = { 106 | val config = ConfigFactory.parseString(str).withFallback(defaultConfig) 107 | config.extract[CephConfig].value 108 | } 109 | 110 | def parse(bytes: Array[Byte]): CephConfig = { 111 | parse(new String(bytes, UTF_8)) 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/ClusterSecrets.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import akka.util.ByteString 4 | import java.security.SecureRandom 5 | import java.util.UUID 6 | import java.nio.{ByteOrder,ByteBuffer} 7 | case class ClusterSecrets( 8 | fsid: UUID, 9 | adminRing: ByteString, 10 | monRing: ByteString, 11 | mdsRing: ByteString, 12 | osdRing: ByteString, 13 | rgwRing: ByteString) 14 | 15 | object ClusterSecrets { 16 | lazy val random = new SecureRandom() 17 | val KeySize = 16 18 | def generateKey = { 19 | // 0000000: 0100 3c64 f357 3dfe bd34 1000 2a00 93c7 .. (peer.role == this.role) && peer.id != this.id } 36 | 37 | def reservationId = pState.reservationId 38 | def reservationConfirmed = pState.reservationConfirmed 39 | def goal = pState.goal 40 | def lastLaunched = pState.lastLaunched 41 | def slaveId = pState.slaveId 42 | 43 | def withGoal(goal: Option[RunState.EnumVal]): Job = 44 | copy(pState = pState.copy(goal = goal)) 45 | 46 | /** Given a mesos status, reads the task status, infers and stores IP address if it is avail 47 | */ 48 | def withTaskStatus(taskStatus: Protos.TaskStatus): Job = { 49 | import scala.collection.JavaConversions._ 50 | val inferredIp = taskStatus.getContainerStatus.getNetworkInfosList. 51 | toStream. 52 | flatMap { _.getIpAddressesList.toStream }. 53 | map { _.getIpAddress }. 54 | headOption 55 | 56 | val nextLocation = (inferredIp, pState.location.ipOpt) match { 57 | case (None, Some(ip)) => pState.location.withIP(ip) 58 | case _ => pState.location 59 | } 60 | 61 | copy( 62 | taskState = Some(TaskState.fromMesos(taskStatus.getState)), 63 | pState = pState.copy( 64 | location = nextLocation, 65 | taskId = Some(taskStatus.getTaskId.getValue), 66 | slaveId = Some(taskStatus.getSlaveId.getValue))) 67 | } 68 | 69 | /** If task is running 70 | */ 71 | def runningState: Option[RunState.EnumVal] = for { 72 | status <- taskStatus 73 | launched <- pState.lastLaunched 74 | if (status.state == TaskState.TaskRunning) 75 | } yield { 76 | launched 77 | } 78 | } 79 | 80 | object Job { 81 | def newTask(id: UUID, cluster: String, role: JobRole.EnumVal, pState: PersistentState, 82 | defaultBehavior: JobRole.EnumVal => Behavior): Job = { 83 | 84 | Job( 85 | id = id, 86 | cluster = cluster, 87 | role = role, 88 | pState = pState, 89 | behavior = defaultBehavior(role), 90 | persistentVersion = 0, 91 | version = 1 92 | ) 93 | } 94 | 95 | def forRole(role: JobRole.EnumVal, defaultBehavior: JobRole.EnumVal => Behavior): Job = { 96 | val id = UUID.randomUUID 97 | newTask( 98 | id = id, 99 | cluster = Constants.DefaultCluster, 100 | role = role, 101 | pState = PersistentState(id = id, cluster = Constants.DefaultCluster, role = role), 102 | defaultBehavior = defaultBehavior) 103 | } 104 | 105 | 106 | def fromState(state: PersistentState, defaultBehavior: JobRole.EnumVal => Behavior): Job = { 107 | newTask( 108 | id = state.id, 109 | cluster = state.cluster, 110 | role = state.role, 111 | pState = state, 112 | defaultBehavior = defaultBehavior 113 | ) 114 | } 115 | def makeTaskId(role: JobRole.EnumVal, cluster: String): String = 116 | s"${cluster}.${role}.${UUID.randomUUID.toString}" 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/JobRole.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | 4 | object JobRole extends lib.Enum { 5 | sealed trait EnumVal extends Value 6 | 7 | case object Monitor extends EnumVal { val name = "mon" } 8 | case object OSD extends EnumVal { val name = "osd" } 9 | case object RGW extends EnumVal { val name = "rgw" } 10 | val values = Vector(Monitor, OSD, RGW) 11 | } 12 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/Location.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | sealed trait Location { 4 | def ipOpt: Option[String] 5 | def portOpt: Option[Int] 6 | def hostnameOpt: Option[String] 7 | def withIP(ip: String): Location 8 | } 9 | object Location { 10 | val empty = PartialLocation(None, None) 11 | } 12 | 13 | case class PartialLocation(ip: Option[String], port: Option[Int]) extends Location { 14 | def ipOpt: Option[String] = ip 15 | def portOpt: Option[Int] = port 16 | def hostnameOpt: Option[String] = None 17 | def withIP(ip: String) = port match { 18 | case Some(p) => IPLocation(ip, p) 19 | case None => PartialLocation(Some(ip), None) 20 | } 21 | } 22 | 23 | sealed trait IPLocationLike extends Location { 24 | def ip: String 25 | def port: Int 26 | 27 | def ipOpt: Option[String] = Some(ip) 28 | def portOpt: Option[Int] = Some(port) 29 | } 30 | 31 | case class IPLocation(ip: String, port: Int) extends Location with IPLocationLike { 32 | def withIP(ip: String) = copy(ip = ip) 33 | def hostnameOpt = None 34 | } 35 | 36 | case class ServiceLocation(hostname: String, ip: String, port: Int) extends Location with IPLocationLike { 37 | def withIP(ip: String) = copy(ip = ip) 38 | def hostnameOpt = Some(hostname) 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/PersistentState.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | 4 | import java.util.UUID 5 | 6 | case class PersistentState( 7 | id: UUID, 8 | cluster: String, 9 | role: JobRole.EnumVal, 10 | goal: Option[RunState.EnumVal] = None, 11 | lastLaunched: Option[RunState.EnumVal] = None, 12 | reservationConfirmed: Boolean = false, 13 | reservationId: Option[UUID] = None, 14 | slaveId: Option[String] = None, 15 | taskId: Option[String] = None, 16 | location: Location = Location.empty) { 17 | 18 | def ipLocation: Option[IPLocationLike] = location match { 19 | case i: IPLocationLike => Some(i) 20 | case _ => None 21 | } 22 | 23 | def serviceLocation: Option[ServiceLocation] = location match { 24 | case s: ServiceLocation => Some(s) 25 | case _ => None 26 | } 27 | 28 | if (reservationConfirmed) { 29 | require(slaveId.nonEmpty, "slaveId must be set if reservationConfirmed is set") 30 | require(reservationId.nonEmpty, "reservationId must be set if reservationConfirmed is set") 31 | } 32 | 33 | def resourcesReserved = 34 | slaveId.nonEmpty 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/PlayJsonFormats.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | import akka.util.ByteString 4 | import java.util.Base64 5 | import play.api.libs.json._ 6 | 7 | object PlayJsonFormats{ 8 | implicit val ByteStringFormat = new Format[ByteString] { 9 | val base64Decoder = Base64.getDecoder 10 | val base64Encoder = Base64.getEncoder 11 | 12 | def reads(js: JsValue): JsResult[ByteString] = 13 | js.validate[String].map { str => 14 | ByteString(base64Decoder.decode(str)) 15 | } 16 | 17 | def writes(byteString: ByteString): JsValue = 18 | JsString( 19 | base64Encoder.encodeToString(byteString.toArray)) 20 | } 21 | 22 | def enumFormat[T <: lib.Enum](e: T): Format[T#EnumVal] = new Format[T#EnumVal] { 23 | def reads(js: JsValue): JsResult[T#EnumVal] = js match { 24 | case JsString(str) => 25 | e.values.find(_.name == str) match { 26 | case Some(enumVal) => JsSuccess(enumVal) 27 | case None => 28 | JsError(s"invalid value for ${e}: '${str}'. Valid values are ${e.values.map { v => "'" + v.name + "'" }.mkString(",")}") 29 | } 30 | case other => 31 | JsError(s"string expected, got ${other}") 32 | } 33 | 34 | def writes(enumVal: T#EnumVal): JsValue = 35 | JsString(enumVal.name) 36 | } 37 | 38 | implicit val RunStateFormat = enumFormat(RunState) 39 | implicit val JobRoleFormat = enumFormat(JobRole) 40 | implicit val LocationFormat = new Format[Location] { 41 | def reads(js: JsValue): JsResult[Location] = 42 | for { 43 | hostname <- (js \ "hostname").validateOpt[String] 44 | ip <- (js \ "ip").validateOpt[String] 45 | port <- (js \ "port").validateOpt[Int] 46 | } yield { 47 | (hostname, ip, port) match { 48 | case (Some(hostname), Some(ip), Some(port)) => 49 | ServiceLocation(hostname, ip, port) 50 | case (None, Some(ip), Some(port)) => 51 | IPLocation(ip, port) 52 | case (_, ip, port) => 53 | PartialLocation(ip, port) 54 | } 55 | } 56 | 57 | def writes(location: Location): JsValue = { 58 | location match { 59 | case ServiceLocation(hostname, ip, port) => 60 | Json.obj("hostname" -> hostname, "ip" -> ip, "port" -> port) 61 | case IPLocation(ip, port) => 62 | Json.obj("ip" -> ip, "port" -> port) 63 | case PartialLocation(ip, port) => 64 | Json.obj("ip" -> ip, "port" -> port) 65 | } 66 | } 67 | } 68 | 69 | implicit val MonTaskFormat = Json.format[PersistentState] 70 | implicit val ClusterSecretsFormat = Json.format[ClusterSecrets] 71 | 72 | implicit val JobWriter = Writes[Job] { task => 73 | Json.toJson(task.pState).as[JsObject] ++ 74 | Json.obj( 75 | "version" -> task.version, 76 | "persistentVersion" -> task.persistentVersion, 77 | "behavior" -> task.behavior.name, 78 | "wantingNewOffer" -> task.wantingNewOffer, 79 | "taskStatus" -> task.taskStatus.map(_.state.name) 80 | ) 81 | 82 | } 83 | 84 | implicit val ReservationReleaseFormat = Json.format[ReservationRelease] 85 | implicit val ReservationReleaseDetailsFormat = Json.format[ReservationReleaseDetails] 86 | } 87 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/ReservationRelease.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import java.util.UUID 4 | import java.time.ZonedDateTime 5 | 6 | case class ReservationRelease( 7 | id: UUID, 8 | lastSeen: ZonedDateTime, 9 | unreserve: Boolean) { 10 | def toDetailed(details: Option[String] = None) = 11 | ReservationReleaseDetails( 12 | id = id, 13 | unreserve = unreserve, 14 | lastSeen = lastSeen, 15 | details = details) 16 | } 17 | 18 | case class ReservationReleaseDetails( 19 | id: UUID, 20 | lastSeen: ZonedDateTime, 21 | unreserve: Boolean = false, 22 | details: Option[String] = None) { 23 | 24 | def withoutDetails = 25 | ReservationRelease( 26 | id = id, 27 | unreserve = unreserve, 28 | lastSeen = lastSeen) 29 | } 30 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/RunState.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | 4 | object RunState extends lib.Enum { 5 | sealed trait EnumVal extends Value 6 | 7 | case object Paused extends EnumVal { val name = "paused" } 8 | case object Running extends EnumVal { val name = "running" } 9 | val values = Vector(Paused, Running) 10 | } 11 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/TaskState.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | 4 | import org.apache.mesos.Protos 5 | import scala.collection.breakOut 6 | 7 | object TaskState extends lib.Enum { 8 | sealed trait EnumVal extends Value { 9 | val id: Int 10 | } 11 | 12 | sealed trait Active extends EnumVal 13 | sealed trait Limbo extends EnumVal 14 | sealed trait Terminal extends EnumVal 15 | 16 | case object TaskStarting extends Active { val id = Protos.TaskState.TASK_STARTING_VALUE ; val name = "TASK_STARTING" } 17 | case object TaskStaging extends Active { val id = Protos.TaskState.TASK_STAGING_VALUE ; val name = "TASK_STAGING" } 18 | case object TaskRunning extends Active { val id = Protos.TaskState.TASK_RUNNING_VALUE ; val name = "TASK_RUNNING" } 19 | case object TaskKilling extends Active { val id = Protos.TaskState.TASK_KILLING_VALUE ; val name = "TASK_KILLING" } 20 | case object TaskFinished extends Terminal { val id = Protos.TaskState.TASK_FINISHED_VALUE ; val name = "TASK_FINISHED" } 21 | case object TaskFailed extends Terminal { val id = Protos.TaskState.TASK_FAILED_VALUE ; val name = "TASK_FAILED" } 22 | case object TaskKilled extends Terminal { val id = Protos.TaskState.TASK_KILLED_VALUE ; val name = "TASK_KILLED" } 23 | case object TaskError extends Terminal { val id = Protos.TaskState.TASK_ERROR_VALUE ; val name = "TASK_ERROR" } 24 | case object TaskLost extends Limbo { val id = Protos.TaskState.TASK_LOST_VALUE ; val name = "TASK_LOST" } 25 | 26 | val values = Vector(TaskStarting, TaskStaging, TaskRunning, TaskKilling, TaskFinished, TaskFailed, TaskKilled, 27 | TaskError, TaskLost) 28 | 29 | val valuesById: Map[Int, EnumVal] = 30 | values.map { v => v.id -> v}(breakOut) 31 | 32 | def fromMesos(p: Protos.TaskState): TaskState.EnumVal = { 33 | TaskState.valuesById(p.getNumber) 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/model/TaskStatus.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package model 3 | 4 | import org.apache.mesos.Protos 5 | 6 | case class TaskStatus(taskId: String, slaveId: String, state: TaskState.EnumVal) { 7 | def toMesos: Protos.TaskStatus = { 8 | ProtoHelpers.newTaskStatus(taskId, slaveId, Protos.TaskState.valueOf(state.id)) 9 | } 10 | } 11 | 12 | object TaskStatus extends ((String, String, TaskState.EnumVal) => TaskStatus) { 13 | def fromMesos(p: Protos.TaskStatus) = { 14 | TaskStatus( 15 | p.getTaskId.getValue, 16 | p.getSlaveId.getValue, 17 | TaskState.valuesById(p.getState.getNumber)) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/orchestrator/Bootstrap.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.orchestrator 2 | 3 | import com.vivint.ceph.model.{ RunState, Job, JobRole } 4 | import java.util.UUID 5 | import OrchestratorFSM._ 6 | 7 | object Bootstrap { 8 | def isMonitor(job: Job) = 9 | job.role == JobRole.Monitor 10 | 11 | case object Start extends Behavior { 12 | override def preStart(fullState: Map[UUID, Job]): Directive = 13 | fullState.values.filter(_.goal.nonEmpty).toList match { 14 | case Nil => 15 | Stay 16 | case List(single) => 17 | Transition(WaitingMonLeader(single.id)) 18 | case more => 19 | Transition(WaitingMonQuorum) 20 | } 21 | 22 | override def onUpdate(oldJob: Option[Job], newJob: Option[Job], fullState: Map[UUID, Job]): Directive = 23 | newJob match { 24 | case Some(job) 25 | if job.pState.reservationConfirmed && job.role == JobRole.Monitor => 26 | 27 | UpdateJob( 28 | job.withGoal(Some(RunState.Running))). 29 | andAlso( 30 | Transition(WaitingMonLeader(job.id))) 31 | case _ => 32 | Stay 33 | } 34 | } 35 | 36 | // TODO - wait for ready / health check to pass when those are implemented 37 | case class WaitingMonLeader(id: UUID) extends Behavior { 38 | override def preStart(fullState: Map[UUID, Job]): Directive = 39 | if (fullState(id).runningState.nonEmpty) 40 | Transition(WaitingMonQuorum) 41 | else 42 | Stay 43 | 44 | override def onUpdate(oldJob: Option[Job], newJob: Option[Job], fullState: Map[UUID, Job]): Directive = { 45 | newJob match { 46 | case Some(job) if job.id == id && job.runningState.nonEmpty => 47 | Transition(WaitingMonQuorum) 48 | case _ => 49 | Stay 50 | } 51 | } 52 | } 53 | 54 | /* 55 | TODO - implement condition pattern (with optimization hint to know when to re-evaluate the condition) 56 | */ 57 | case object WaitingMonQuorum extends Behavior { 58 | private def getMonitors(fullState: Map[UUID, Job]) = { 59 | fullState.values.view.filter(isMonitor) 60 | } 61 | 62 | override def preStart(fullState: Map[UUID, Job]): Directive = { 63 | Directive( 64 | getMonitors(fullState). 65 | filter(_.goal.isEmpty). 66 | map(_.withGoal(Some(RunState.Running))). 67 | map(UpdateJob). 68 | toList) 69 | } 70 | 71 | override def onUpdate(oldJob: Option[Job], newJob: Option[Job], fullState: Map[UUID, Job]): Directive = { 72 | if (quorumMonitorsAreRunning(fullState)) 73 | Transition(Up) 74 | else newJob match { 75 | case Some(job) if job.role == JobRole.Monitor => 76 | if (quorumMonitorsAreRunning(fullState)) 77 | Transition(Up) 78 | else 79 | Stay 80 | } 81 | } 82 | 83 | private def quorumMonitorsAreRunning(fullState: Map[UUID, Job]) = { 84 | val monitors = getMonitors(fullState) 85 | val runningMonitors = monitors.filter(_.runningState == Some(RunState.Running)) 86 | runningMonitors.size > (monitors.size / 2) // NOTE this always fails for mon count [0, 1] 87 | } 88 | } 89 | 90 | /** 91 | * Behavior when we are up - we give every task a goal of running 92 | */ 93 | case object Up extends Behavior { 94 | override def preStart(fullState: Map[UUID, Job]): Directive = { 95 | fullState.values. 96 | iterator. 97 | filter(_.goal.isEmpty). 98 | map(_.withGoal(Some(RunState.Running))). 99 | map(UpdateJob). 100 | foldLeft(Stay) { _ andAlso _ } 101 | } 102 | 103 | override def onUpdate(oldJob: Option[Job], newJob: Option[Job], fullState: Map[UUID, Job]): Directive = { 104 | newJob match { 105 | case Some(job) if job.goal.isEmpty => 106 | UpdateJob(job.withGoal(Some(RunState.Running))) 107 | case _ => 108 | Stay 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/orchestrator/Orchestrator.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.orchestrator 2 | 3 | import com.vivint.ceph.JobsState 4 | 5 | object Orchestrator { 6 | /** 7 | * Bootstraps the orchestrator 8 | */ 9 | def run(jobs: JobsState): Unit = { 10 | val orchestratorFSM = new OrchestratorFSM(Bootstrap.Start, jobs) 11 | jobs.addSubscriber(orchestratorFSM.update) 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/orchestrator/OrchestratorFSM.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.orchestrator 2 | 3 | import com.vivint.ceph.JobsState 4 | import com.vivint.ceph.model.Job 5 | import java.util.UUID 6 | import OrchestratorFSM._ 7 | 8 | class OrchestratorFSM(initialBehavior: Behavior, jobs: JobsState) { 9 | var currentBehavior: Behavior = initializeBehavior(initialBehavior) 10 | 11 | def initializeBehavior(behavior: Behavior): Behavior = 12 | processDirective(behavior, behavior.preStart(jobs.all)) 13 | 14 | def applyAction(action: Action): Unit = action match { 15 | case UpdateJob(job) => 16 | jobs.updateJob(job) 17 | } 18 | 19 | def processDirective(behavior: Behavior, directive: Directive): Behavior = { 20 | directive.action.foreach(applyAction) 21 | directive.transition match { 22 | case None => behavior 23 | case Some(newBehavior) => 24 | initializeBehavior(newBehavior) 25 | } 26 | } 27 | 28 | val update: JobsState.Subscriber = { case (oldJobState, newJobState) => 29 | currentBehavior = processDirective( 30 | currentBehavior, 31 | currentBehavior.onUpdate(oldJobState, newJobState, jobs.all)) 32 | } 33 | } 34 | 35 | object OrchestratorFSM { 36 | trait Behavior { 37 | def preStart(fullState: Map[UUID, Job]): Directive = Stay 38 | def onUpdate(oldJob: Option[Job], newJob: Option[Job], fullState: Map[UUID, Job]): Directive 39 | } 40 | 41 | case class Directive(action: List[Action] = Nil, transition: Option[Behavior] = None) { 42 | def andAlso(other: Action): Directive = 43 | copy(action = action :+ other) 44 | } 45 | object Directive { 46 | import scala.language.implicitConversions 47 | implicit def fromAction(action: Action): Directive = { 48 | Directive(List(action), None) 49 | } 50 | implicit def fromActionList(actionList: ActionList): Directive = { 51 | Directive(actionList.actions, None) 52 | } 53 | } 54 | 55 | sealed trait Action { 56 | def withTransition(b: Behavior): Directive = 57 | Directive(List(this), Some(b)) 58 | def andAlso(other: Action): ActionList = 59 | ActionList(this :: other :: Nil) 60 | def andAlso(other: Directive): Directive = 61 | other.copy(action = this :: other.action) 62 | } 63 | case class ActionList(actions: List[Action]) { 64 | def withTransition(b: Behavior): Directive = 65 | Directive(actions, Some(b)) 66 | 67 | def andAlso(other: Action): ActionList = 68 | ActionList(actions :+ other) 69 | } 70 | 71 | final def Transition(behavior: Behavior) = Directive(Nil, Some(behavior)) 72 | case class UpdateJob(job: Job) extends Action 73 | 74 | final val Stay = Directive() 75 | } 76 | -------------------------------------------------------------------------------- /src/main/scala/com/vivint/ceph/views/ConfigTemplates.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package views 3 | 4 | import akka.util.ByteString 5 | import com.typesafe.config.ConfigObject 6 | import java.net.{ Inet4Address, InetAddress } 7 | import java.util.Base64 8 | import model._ 9 | import org.apache.mesos.Protos 10 | import ProtoHelpers._ 11 | import mesosphere.mesos.protos.Resource 12 | import scala.collection.JavaConversions._ 13 | import scala.collection.immutable.NumericRange 14 | import scaldi.Injector 15 | import scaldi.Injectable._ 16 | import configs.syntax._ 17 | import mesosphere.mesos.protos.Resource.PORTS 18 | import java.nio.charset.StandardCharsets.UTF_8 19 | 20 | object ConfigTemplates { 21 | private[views] def renderSettings(cfg: ConfigObject): String = { 22 | val b = new StringBuilder 23 | cfg.keySet.toSeq.sorted.foreach { k => 24 | b.append(s"${k} = ${cfg(k).render}\n") 25 | } 26 | b.result 27 | } 28 | } 29 | 30 | class ConfigTemplates(implicit inj: Injector) { 31 | import ConfigTemplates._ 32 | val config = inject[AppConfiguration] 33 | 34 | def base64Encode(bs: ByteString): String = { 35 | Base64.getEncoder.encodeToString(bs.toArray) 36 | } 37 | 38 | def cephConf(secrets: ClusterSecrets, monitors: Set[ServiceLocation], cephSettings: CephSettings, 39 | osdPort: Option[NumericRange.Inclusive[Long]]) = { 40 | 41 | val sortedMonitors = monitors.toSeq.sortBy(_.hostname) 42 | 43 | val osdPortSection = osdPort.map { port => 44 | s""" 45 | |## ports 46 | |ms_bind_port_min = ${port.min} 47 | |ms_bind_port_max = ${port.max} 48 | |""".stripMargin 49 | }.getOrElse("") 50 | 51 | s""" 52 | [global] 53 | fsid = ${secrets.fsid} 54 | mon initial members = ${sortedMonitors.map(_.hostname).mkString(",")} 55 | mon host = ${sortedMonitors.map { m => m.ip + ":" + m.port }.mkString(",")} 56 | auth cluster required = cephx 57 | auth service required = cephx 58 | auth client required = cephx 59 | public network = ${config.publicNetwork} 60 | cluster network = ${config.clusterNetwork} 61 | ${renderSettings(cephSettings.global)} 62 | 63 | [auth] 64 | ${renderSettings(cephSettings.auth)} 65 | 66 | [mon] 67 | ${renderSettings(cephSettings.mon)} 68 | 69 | [osd] 70 | ${renderSettings(cephSettings.osd)} 71 | ${osdPortSection} 72 | 73 | [client] 74 | ${renderSettings(cephSettings.client)} 75 | 76 | [mds] 77 | ${renderSettings(cephSettings.mds)} 78 | """ 79 | } 80 | 81 | def cephClientAdminRing(secrets: ClusterSecrets) = { 82 | s""" 83 | [client.admin] 84 | key = ${base64Encode(secrets.adminRing)} 85 | auid = 0 86 | caps mds = "allow" 87 | caps mon = "allow *" 88 | caps osd = "allow *" 89 | """ 90 | } 91 | 92 | def cephMonRing(secrets: ClusterSecrets) = { 93 | s""" 94 | [mon.] 95 | key = ${base64Encode(secrets.monRing)} 96 | caps mon = "allow *" 97 | """ 98 | } 99 | 100 | def bootstrapMdsRing(secrets: ClusterSecrets) = { 101 | s""" 102 | [client.bootstrap-mds] 103 | key = ${base64Encode(secrets.mdsRing)} 104 | caps mon = "allow profile bootstrap-mds" 105 | """ 106 | } 107 | 108 | def bootstrapOsdRing(secrets: ClusterSecrets) = { 109 | s""" 110 | [client.bootstrap-osd] 111 | key = ${base64Encode(secrets.osdRing)} 112 | caps mon = "allow profile bootstrap-osd" 113 | """ 114 | } 115 | 116 | def bootstrapRgwRing(secrets: ClusterSecrets) = { 117 | s""" 118 | [client.bootstrap-rgw] 119 | key = ${base64Encode(secrets.rgwRing)} 120 | caps mon = "allow profile bootstrap-rgw" 121 | """ 122 | } 123 | 124 | def files(secrets: ClusterSecrets, monitors: Set[ServiceLocation], cephSettings: CephSettings, 125 | osdPort: Option[NumericRange.Inclusive[Long]] = None): Map[String, String] = { 126 | 127 | Map( 128 | "etc/ceph/ceph.conf" -> cephConf(secrets, monitors, cephSettings, osdPort), 129 | "etc/ceph/ceph.client.admin.keyring" -> cephClientAdminRing(secrets), 130 | "etc/ceph/ceph.mon.keyring" -> cephMonRing(secrets), 131 | "var/lib/ceph/bootstrap-mds/ceph.keyring" -> bootstrapMdsRing(secrets), 132 | "var/lib/ceph/bootstrap-osd/ceph.keyring" -> bootstrapOsdRing(secrets), 133 | "var/lib/ceph/bootstrap-rgw/ceph.keyring" -> bootstrapRgwRing(secrets)) 134 | } 135 | 136 | def tgz(secrets: ClusterSecrets, monitors: Set[ServiceLocation], cephSettings: CephSettings, 137 | osdPort: Option[NumericRange.Inclusive[Long]] = None): Array[Byte] = { 138 | import lib.TgzHelper.makeTgz 139 | 140 | makeTgz(files(secrets, monitors, cephSettings, osdPort)) 141 | // makeTgz( 142 | // "etc/ceph/ceph.conf" -> cephConf(secrets, monitors, cephSettings, osdPort), 143 | // "etc/ceph/ceph.client.admin.keyring" -> cephClientAdminRing(secrets), 144 | // "etc/ceph/ceph.mon.keyring" -> cephMonRing(secrets), 145 | // "var/lib/ceph/bootstrap-mds/ceph.keyring" -> bootstrapMdsRing(secrets), 146 | // "var/lib/ceph/bootstrap-osd/ceph.keyring" -> bootstrapOsdRing(secrets), 147 | // "var/lib/ceph/bootstrap-rgw/ceph.keyring" -> bootstrapRgwRing(secrets)) 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | akka { 2 | loggers = ["akka.event.slf4j.Slf4jLogger"] 3 | loglevel = "DEBUG" 4 | logging-filter = "akka.event.slf4j.Slf4jLoggingFilter" 5 | } -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/ConfigStoreTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.model.CephConfigHelper 4 | import org.scalatest.{FunSpec, Matchers} 5 | 6 | class ConfigStoreTest extends FunSpec with Matchers { 7 | describe("default config") { 8 | it("provides default values based on the present of environment variables") { 9 | val cfgString = ConfigStore.default( 10 | Map( 11 | "CEPH_MON_INIT_PORT" -> "999")) 12 | // println(new String(cfgString)) 13 | val cfg = CephConfigHelper.parse(cfgString) 14 | cfg.deployment.mon.port.shouldBe(Some(999)) 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/JobBehaviorTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.Directives.{ Directive, OfferResponse, Persist, SetBehaviorTimer } 4 | import com.vivint.ceph.model.{ CephConfigHelper, ClusterSecrets, Job, RunState, TaskState, Location } 5 | import org.scalatest.{ FunSpec, Inside, Matchers } 6 | import scaldi.Module 7 | 8 | class JobBehaviorTest extends FunSpec with Matchers with Inside { 9 | val module = new Module { 10 | bind [OfferOperations] to new OfferOperations 11 | bind [views.ConfigTemplates] to new views.ConfigTemplates 12 | bind [AppConfiguration] to Workbench.newAppConfiguration() 13 | } 14 | 15 | import module.injector 16 | val jobBehavior = new JobBehavior( 17 | ClusterSecrets.generate, 18 | frameworkId = MesosTestHelper.frameworkID, 19 | deploymentConfig = { () => CephConfigHelper.parse(ConfigStore.default(Map.empty)) } 20 | ) 21 | describe("MatchAndLaunchEphemeral") { 22 | it("waits for the task status to be known before requesting offers") { 23 | val rgwJob = Job.fromState(Workbench.newRunningRGWJob(), jobBehavior.defaultBehavior) 24 | 25 | val response = rgwJob.behavior.preStart(rgwJob, Map.empty) 26 | response.action shouldBe Nil 27 | response.transition.get shouldBe (jobBehavior.rgw.EphemeralRunning) 28 | } 29 | 30 | it("persists lastLaunched when accepting an offer") { 31 | val rgwJob = Job.fromState( 32 | Workbench.newPendingRGWJob( 33 | goal = Some(RunState.Running)), 34 | jobBehavior.defaultBehavior) 35 | 36 | val Directive(List(action), None) = rgwJob.behavior.preStart(rgwJob, Map.empty) 37 | action shouldBe (Directives.WantOffers) 38 | 39 | val pendingOffer = PendingOffer(MesosTestHelper.makeBasicOffer().build) 40 | 41 | val Directive(List(persistAction, offerResponseAction), Some(transition)) = 42 | rgwJob.behavior.handleEvent(JobFSM.MatchedOffer(pendingOffer, None), rgwJob, Map.empty) 43 | 44 | inside(persistAction) { 45 | case Persist(data) => 46 | data.taskId.isEmpty shouldBe false 47 | data.lastLaunched shouldBe Some(RunState.Running) 48 | data.slaveId.isEmpty shouldBe false 49 | } 50 | 51 | inside(offerResponseAction) { 52 | case OfferResponse(_, List(launchCommand)) => 53 | launchCommand.hasLaunch() shouldBe true 54 | } 55 | 56 | transition shouldBe jobBehavior.rgw.EphemeralRunning 57 | } 58 | 59 | it("relaunches tasks that are TASK_LOST after timeout") { 60 | val rgwJob = Job.fromState( 61 | Workbench.newRunningRGWJob(), 62 | jobBehavior.defaultBehavior) 63 | 64 | val Directive(Nil, Some(nextBehavior)) = rgwJob.behavior.preStart(rgwJob, Map.empty) 65 | 66 | nextBehavior shouldBe jobBehavior.rgw.EphemeralRunning 67 | 68 | val Directive(List(SetBehaviorTimer(timerId, _)), None) = nextBehavior.preStart(rgwJob, Map.empty) 69 | 70 | val taskLost = rgwJob.copy(taskState = Some(TaskState.TaskLost)) 71 | 72 | val Directive(List(SetBehaviorTimer(`timerId`, _)), None) = 73 | nextBehavior.handleEvent(JobFSM.JobUpdated(rgwJob), taskLost, Map.empty) 74 | 75 | val Directive(List(Persist(relaunchState)), Some(relaunchBehavior)) = 76 | nextBehavior.handleEvent(JobFSM.Timer(timerId), taskLost, Map.empty) 77 | 78 | relaunchState.taskId shouldBe None 79 | relaunchState.slaveId shouldBe None 80 | relaunchState.location shouldBe Location.empty 81 | 82 | relaunchBehavior shouldBe jobBehavior.rgw.MatchAndLaunchEphemeral 83 | 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/MesosTestHelper.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import mesosphere.marathon.state.ResourceRole 4 | import org.apache.mesos.Protos.Offer 5 | import org.apache.mesos.{Protos => Mesos} 6 | import mesosphere.mesos.protos.Resource.{CPUS, MEM, PORTS, DISK} 7 | import scala.collection.JavaConverters._ 8 | import scala.collection.JavaConversions._ 9 | import scala.collection.immutable.NumericRange 10 | 11 | /** Adapted from MarathonTestHelper; TODO licensing 12 | */ 13 | object MesosTestHelper { 14 | import ProtoHelpers._ 15 | val idx = new java.util.concurrent.atomic.AtomicInteger 16 | val frameworkID: Mesos.FrameworkID = newFrameworkId("ceph-framework-id") 17 | 18 | def makeBasicOffer(cpus: Double = 4.0, mem: Double = 16000, disk: Double = 1024.0, 19 | ports: NumericRange.Inclusive[Long] = (31000L to 32000L), role: String = ResourceRole.Unreserved, 20 | reservationLabels: Option[Mesos.Labels] = None, slaveId: Int = 0, offerId: Int = idx.incrementAndGet): Offer.Builder = { 21 | 22 | require(role != ResourceRole.Unreserved || reservationLabels.isEmpty, "reserved resources cannot have role *") 23 | 24 | def heedReserved(resource: Mesos.Resource): Mesos.Resource = { 25 | reservationLabels match { 26 | case Some(labels) => 27 | val reservation = 28 | Mesos.Resource.ReservationInfo.newBuilder() 29 | .setPrincipal("ceph") 30 | .setLabels(labels) 31 | resource.toBuilder.setReservation(reservation).build() 32 | case None => 33 | resource 34 | } 35 | } 36 | 37 | val cpusResource = heedReserved(newScalarResource(CPUS, cpus, role = role)) 38 | val memResource = heedReserved(newScalarResource(MEM, mem, role = role)) 39 | val diskResource = heedReserved(newScalarResource(DISK, disk, role = role)) 40 | val portsResource = heedReserved(newRangesResource(PORTS, Seq(ports), role)) 41 | 42 | val offerBuilder = Offer.newBuilder. 43 | setId(newOfferId(offerId.toString)). 44 | setFrameworkId(frameworkID). 45 | setSlaveId(newSlaveId(s"slave-${slaveId}")). 46 | setHostname("localhost"). 47 | addResources(cpusResource). 48 | addResources(memResource). 49 | addResources(diskResource). 50 | addResources(portsResource) 51 | 52 | offerBuilder 53 | } 54 | 55 | def mountSource(path: Option[String]): Mesos.Resource.DiskInfo.Source = { 56 | val b = Mesos.Resource.DiskInfo.Source.newBuilder. 57 | setType(Mesos.Resource.DiskInfo.Source.Type.MOUNT) 58 | path.foreach { p => 59 | b.setMount(Mesos.Resource.DiskInfo.Source.Mount.newBuilder. 60 | setRoot(p)) 61 | } 62 | 63 | b.build 64 | } 65 | 66 | def mountSource(path: String): Mesos.Resource.DiskInfo.Source = 67 | mountSource(Some(path)) 68 | 69 | def mountDisk(path: Option[String]): Mesos.Resource.DiskInfo = { 70 | Mesos.Resource.DiskInfo.newBuilder. 71 | setSource( 72 | mountSource(path)). 73 | build 74 | } 75 | 76 | def mountDisk(path: String): Mesos.Resource.DiskInfo = 77 | mountDisk(Some(path)) 78 | 79 | def pathSource(path: Option[String]): Mesos.Resource.DiskInfo.Source = { 80 | val b = Mesos.Resource.DiskInfo.Source.newBuilder. 81 | setType(Mesos.Resource.DiskInfo.Source.Type.PATH) 82 | path.foreach { p => 83 | b.setPath(Mesos.Resource.DiskInfo.Source.Path.newBuilder. 84 | setRoot(p)) 85 | } 86 | 87 | b.build 88 | } 89 | 90 | def pathSource(path: String): Mesos.Resource.DiskInfo.Source = 91 | pathSource(Some(path)) 92 | 93 | def pathDisk(path: Option[String]): Mesos.Resource.DiskInfo = { 94 | Mesos.Resource.DiskInfo.newBuilder. 95 | setSource( 96 | pathSource(path)). 97 | build 98 | } 99 | 100 | def pathDisk(path: String): Mesos.Resource.DiskInfo = 101 | pathDisk(Some(path)) 102 | 103 | /** 104 | * crude method which currently assumes that all disk resources are represented by volumes */ 105 | def mergeReservation(offer: Mesos.Offer, 106 | resources: Mesos.Offer.Operation.Reserve, 107 | volumes: Mesos.Offer.Operation.Create): Mesos.Offer = { 108 | 109 | offer.toBuilder.clearResources(). 110 | addAllResources(resources.getResourcesList.filterNot(_.getName == DISK)). 111 | addAllResources(volumes.getVolumesList). 112 | setId(newOfferId(idx.incrementAndGet.toString)). 113 | build 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/ReservationReaperActorTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import akka.actor.{ ActorRef, ActorSystem, Props } 4 | import akka.pattern.ask 5 | import akka.util.Timeout 6 | import akka.testkit.{ TestKit, TestProbe } 7 | import com.vivint.ceph.ReservationReaperActor._ 8 | import com.vivint.ceph.kvstore.KVStore 9 | import com.vivint.ceph.model.{ ReservationRelease, ReservationReleaseDetails } 10 | import java.time.ZonedDateTime 11 | import java.util.UUID 12 | import org.scalatest.{ BeforeAndAfterAll, Inside } 13 | import org.scalatest.path.FunSpecLike 14 | import org.scalatest.{FunSpec, Matchers} 15 | import scala.concurrent.duration._ 16 | import scaldi.{ Injector, Module } 17 | import scaldi.Injectable._ 18 | import ProtoHelpers._ 19 | 20 | class ReservationReaperActorTest extends lib.CephActorTest("releaseActorTest") 21 | with FunSpecLike with Matchers with BeforeAndAfterAll with Inside with lib.TestHelpers { 22 | 23 | class TestBindings extends Module { 24 | val id = idx.incrementAndGet() 25 | 26 | var now: ZonedDateTime = ZonedDateTime.parse("2016-01-01T00:00:00Z") 27 | bind [TestProbe] to { TestProbe() } 28 | bind [AppConfiguration] to Workbench.newAppConfiguration() 29 | bind [OfferOperations] to { new OfferOperations } 30 | bind [KVStore] to { new kvstore.MemStore } 31 | bind [ActorRef] identifiedBy(classOf[ReservationReaperActor]) to { 32 | system.actorOf(Props(new ReservationReaperActor), s"release-actor-${id}") 33 | } destroyWith (system.stop) 34 | bind [() => ZonedDateTime] identifiedBy 'now to { () => now } 35 | } 36 | 37 | def getReleases(implicit inj: Injector) = { 38 | val releaseActor = inject[ActorRef](classOf[ReservationReaperActor]) 39 | await((releaseActor ? GetPendingReleases).mapTo[List[ReservationReleaseDetails]]) 40 | } 41 | 42 | trait Fixtures { 43 | val module = new TestBindings 44 | implicit val injector = module.injector 45 | 46 | val probe = inject[TestProbe] 47 | implicit val sender = probe.ref 48 | lazy val releaseActor = inject[ActorRef](classOf[ReservationReaperActor]) 49 | } 50 | 51 | implicit val timeout = Timeout(5.seconds) 52 | 53 | def reservedOffer(reservationId: UUID) = 54 | MesosTestHelper. 55 | makeBasicOffer( 56 | role = "ceph", 57 | reservationLabels = Some( 58 | newLabels(Constants.ReservationIdLabel -> reservationId.toString))). 59 | build 60 | 61 | describe("ReservationReaperActor") { 62 | it("does not register an item for release until it is permitted to do so") { 63 | new Fixtures { 64 | val reservationId = UUID.randomUUID() 65 | 66 | val pendingOffer = PendingOffer( 67 | reservedOffer(reservationId)) 68 | 69 | releaseActor ! UnknownReservation(reservationId, pendingOffer) 70 | await(pendingOffer.resultingOperations) shouldBe Nil 71 | val List(release) = await((releaseActor ? GetPendingReleases).mapTo[List[ReservationReleaseDetails]]) 72 | 73 | release.unreserve shouldBe false 74 | release.lastSeen shouldBe module.now 75 | release.details.nonEmpty shouldBe true 76 | } 77 | } 78 | 79 | it("releases an offer which is permitted to be released ") { 80 | new Fixtures { 81 | val reservationId = UUID.randomUUID() 82 | 83 | val pendingOffer = PendingOffer( 84 | reservedOffer(reservationId)) 85 | 86 | releaseActor ! OrderUnreserve(reservationId) 87 | releaseActor ! UnknownReservation(reservationId, pendingOffer) 88 | await(pendingOffer.resultingOperations).nonEmpty shouldBe true 89 | } 90 | } 91 | 92 | it("loads the initial state from the kvStore") { 93 | new Fixtures { 94 | val releaseStore = ReleaseStore(inject[KVStore]) 95 | val reservationId = UUID.randomUUID() 96 | releaseStore.save(ReservationRelease(reservationId, unreserve = true, lastSeen = module.now)) 97 | 98 | val List(release) = getReleases 99 | release.id shouldBe reservationId 100 | release.unreserve shouldBe true 101 | } 102 | } 103 | 104 | it("cleans up pending releases after 7 days") { 105 | new Fixtures { 106 | val releaseStore = ReleaseStore(inject[KVStore]) 107 | val reservationId = UUID.randomUUID() 108 | releaseStore.save(ReservationRelease(reservationId, unreserve = true, lastSeen = module.now)) 109 | 110 | releaseActor ! Cleanup 111 | // Shouldn't have an effect 112 | inside(getReleases) { 113 | case List(release) => 114 | release.id shouldBe reservationId 115 | release.unreserve shouldBe true 116 | } 117 | inside(await(releaseStore.getReleases)) { 118 | case List(release) => 119 | release.id shouldBe reservationId 120 | release.unreserve shouldBe true 121 | } 122 | 123 | // fast forward 8 days and run cleanup again 124 | module.now = module.now.plusDays(8L) 125 | 126 | releaseActor ! Cleanup 127 | 128 | getReleases.isEmpty shouldBe true 129 | await(releaseStore.getReleases).isEmpty shouldBe true 130 | } 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/Workbench.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | 3 | import com.vivint.ceph.model.{ IPLocation, Job, JobRole, Location, PersistentState, RunState } 4 | import java.util.UUID 5 | import scala.concurrent.duration._ 6 | 7 | object Workbench { 8 | def newAppConfiguration() = { 9 | AppConfiguration(master = "hai", name = "ceph-test", principal = "ceph", secret = None, role = "ceph", 10 | zookeeper = "zk://test", offerTimeout = 5.seconds, publicNetwork = "10.11.12.0/24", 11 | clusterNetwork = "10.11.12.0/24", storageBackend = "memory") 12 | } 13 | 14 | def newPendingRGWJob(goal: Option[RunState.EnumVal] = None) = { 15 | PersistentState( 16 | id = UUID.randomUUID(), 17 | cluster = "ceph", 18 | role = JobRole.RGW, 19 | lastLaunched = None, 20 | goal = goal, 21 | reservationConfirmed = false, 22 | slaveId = None, 23 | reservationId = None, 24 | taskId = None, 25 | location = Location.empty) 26 | } 27 | 28 | def newRunningRGWJob( 29 | taskId: String = Job.makeTaskId(JobRole.RGW, "ceph"), 30 | location: Location = IPLocation("10.11.12.13", 31001)) = { 31 | PersistentState( 32 | id = UUID.randomUUID(), 33 | cluster = "ceph", 34 | role = JobRole.RGW, 35 | lastLaunched = Some(RunState.Running), 36 | goal = Some(RunState.Running), 37 | reservationConfirmed = false, 38 | slaveId = Some("slave-12"), 39 | reservationId = None, 40 | taskId = Some(taskId), 41 | location = location) 42 | } 43 | def newRunningMonitorJob( 44 | taskId: String = Job.makeTaskId(JobRole.Monitor, "ceph"), 45 | location: Location = IPLocation("10.11.12.13", 31001)) = { 46 | PersistentState( 47 | id = UUID.randomUUID(), 48 | cluster = "ceph", 49 | role = JobRole.Monitor, 50 | lastLaunched = Some(RunState.Running), 51 | goal = Some(RunState.Running), 52 | reservationConfirmed = true, 53 | slaveId = Some("slave-12"), 54 | reservationId = Some(UUID.randomUUID()), 55 | taskId = Some(taskId), 56 | location = location) 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/lib/CephActorTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import akka.actor.ActorSystem 4 | import akka.testkit.TestKit 5 | import java.util.concurrent.atomic.AtomicInteger 6 | import org.scalatest.BeforeAndAfterAll 7 | import org.scalatest.fixture.Suite 8 | 9 | 10 | abstract class CephActorTest(name: String) extends TestKit(ActorSystem(name)) with Suite with BeforeAndAfterAll { 11 | val idx = new AtomicInteger() 12 | override def afterAll(): Unit = { 13 | super.afterAll() 14 | TestKit.shutdownActorSystem(system) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/lib/PortMatcherTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph 2 | package lib 3 | 4 | import org.scalatest.{FunSpec, Matchers} 5 | import mesosphere.mesos.protos.Resource.PORTS 6 | import mesosphere.mesos.matcher.ResourceMatcher.ResourceSelector 7 | 8 | class PortMatcherTest extends FunSpec with Matchers { 9 | import ProtoHelpers._ 10 | describe("SpecificPortMatcher") { 11 | 12 | it("matches only the specified port") { 13 | val matcher = new SpecificPortMatcher(1000, ResourceSelector.any(Set("*"))) 14 | val Seq(result) = matcher("offer-0", Seq(newRangesResource(PORTS, Seq(1000L to 1005L), role = "*"))) 15 | 16 | result.matches shouldBe true 17 | val Seq(consumed) = result.consumedResources 18 | consumed.getRole shouldBe "*" 19 | consumed.getName shouldBe PORTS 20 | consumed.getRanges.getRange(0).getBegin shouldBe 1000L 21 | consumed.getRanges.getRange(0).getEnd shouldBe 1000L 22 | } 23 | 24 | it("matches ports in subsequent ranges") { 25 | val matcher = new SpecificPortMatcher(1000, ResourceSelector.any(Set("*"))) 26 | val Seq(result) = matcher("offer-0", 27 | Seq( 28 | newRangesResource(PORTS, Seq(800L to 900L), role = "*"), 29 | newRangesResource(PORTS, Seq(1000L to 1005L), role = "*"))) 30 | 31 | result.matches shouldBe true 32 | val Seq(consumed) = result.consumedResources 33 | consumed.getRole shouldBe "*" 34 | consumed.getName shouldBe PORTS 35 | consumed.getRanges.getRange(0).getBegin shouldBe 1000L 36 | consumed.getRanges.getRange(0).getEnd shouldBe 1000L 37 | } 38 | 39 | it("does not match if the specified port is not offered") { 40 | val matcher = new SpecificPortMatcher(999, ResourceSelector.any(Set("*"))) 41 | val Seq(result) = matcher("offer-0", Seq(newRangesResource(PORTS, Seq(1000L to 1005L), role = "*"))) 42 | 43 | result.matches shouldBe false 44 | } 45 | } 46 | 47 | describe("ContiguousPortMatcher") { 48 | it("matches a port block in subsequent ranges") { 49 | val matcher = new ContiguousPortMatcher(5, ResourceSelector.any(Set("*"))) 50 | val Seq(result) = matcher("offer-0", 51 | Seq( 52 | newRangesResource(PORTS, Seq(800L to 801L), role = "*"), 53 | newRangesResource(PORTS, Seq(1000L to 1005L), role = "*"))) 54 | 55 | result.matches shouldBe true 56 | val Seq(consumed) = result.consumedResources 57 | consumed.getRole shouldBe "*" 58 | consumed.getName shouldBe PORTS 59 | consumed.getRanges.getRange(0).getBegin shouldBe 1000L 60 | consumed.getRanges.getRange(0).getEnd shouldBe 1004L 61 | } 62 | 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/lib/TestHelpers.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import scala.concurrent.{ Await, Awaitable } 4 | import scala.concurrent.duration._ 5 | 6 | 7 | trait TestHelpers { 8 | def await[T](f: Awaitable[T], duration: FiniteDuration = 5.seconds) = { 9 | Await.result(f, duration) 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/lib/TgzHelperTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.lib 2 | 3 | import org.scalatest.{FunSpec,Matchers} 4 | import java.nio.charset.StandardCharsets.UTF_8 5 | 6 | class TgzHelperTest extends FunSpec with Matchers { 7 | it("should tar and untar a file") { 8 | import TgzHelper._ 9 | val tgz = makeTgz( 10 | "path/to/file" -> FileEntry(octal("777"), "hello".getBytes(UTF_8)), 11 | "path/to/file2" -> FileEntry(octal("777"), "bye".getBytes(UTF_8)) 12 | ) 13 | readTgz(tgz).map { case (k,v) => (k, new String(v, UTF_8)) }.toMap shouldBe Map( 14 | "path/to/file" -> "hello", 15 | "path/to/file2" -> "bye") 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/model/CephConfigTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import org.apache.commons.io.IOUtils 4 | import org.scalatest.{FunSpec, Matchers} 5 | import scala.collection.JavaConversions._ 6 | import java.nio.charset.StandardCharsets.UTF_8 7 | import mesosphere.marathon.state.DiskType 8 | 9 | class CephConfigTest extends FunSpec with Matchers { 10 | val exampleFile = IOUtils.toString(getClass.getResourceAsStream("/deployment-config.conf"), UTF_8) 11 | describe("parsing") { 12 | it("should be able to parse the example file") { 13 | val config = CephConfigHelper.parse(exampleFile) 14 | 15 | config.deployment.mon.count shouldBe 0 16 | config.deployment.mon.cpus shouldBe 1.0 17 | config.deployment.mon.mem shouldBe 256.0 18 | config.deployment.mon.disk_type shouldBe (DiskType.Root) 19 | config.deployment.mon.disk shouldBe (16) 20 | 21 | config.deployment.osd.count shouldBe 0 22 | config.deployment.osd.cpus shouldBe 1.0 23 | config.deployment.osd.mem shouldBe 1024.0 24 | config.deployment.osd.disk_type shouldBe (DiskType.Mount) 25 | config.deployment.osd.disk shouldBe (512000) 26 | config.deployment.osd.disk_max shouldBe (None) 27 | config.deployment.osd.path_constraint shouldBe (None) 28 | 29 | config.settings.auth.keySet.toSet shouldBe Set( 30 | "cephx_service_require_signatures", "cephx", "cephx_cluster_require_signatures", 31 | "cephx_require_signatures") 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/model/PlayJsonFormatsTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import org.scalatest.{FunSpec, Matchers} 4 | import play.api.libs.json._ 5 | 6 | class PlayJsonFormatsTest extends FunSpec with Matchers { 7 | import PlayJsonFormats._ 8 | describe("LocationFormat") { 9 | it("it goes in and out properly") { 10 | Json.toJson(Location.empty).as[Location] shouldBe Location.empty 11 | Json.toJson(PartialLocation(Some("ip"), None)).as[Location] shouldBe PartialLocation(Some("ip"), None) 12 | Json.toJson(PartialLocation(None, Some(1234))).as[Location] shouldBe PartialLocation(None, Some(1234)) 13 | Json.toJson(IPLocation("ip", 1234)).as[Location] shouldBe IPLocation("ip", 1234) 14 | Json.toJson(ServiceLocation("hostname", "ip", 1234)).as[Location] shouldBe ServiceLocation("hostname", "ip", 1234) 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/model/TaskTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.model 2 | 3 | import org.scalatest.{FunSpec, Matchers} 4 | 5 | class TaskTest extends FunSpec with Matchers { 6 | describe("TaskStatus") { 7 | it("converts to and form mesos") { 8 | val original = TaskStatus("abc", "123", TaskState.TaskRunning) 9 | TaskStatus.fromMesos(original.toMesos) shouldBe original 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/test/scala/com/vivint/ceph/views/ConfigTemplatesTest.scala: -------------------------------------------------------------------------------- 1 | package com.vivint.ceph.views 2 | 3 | import org.scalatest.{FunSpec, Matchers} 4 | import com.typesafe.config.{ConfigFactory,ConfigObject} 5 | import configs.syntax._ 6 | 7 | class ConfigTemplatesTest extends FunSpec with Matchers { 8 | val config = ConfigFactory.parseString(""" 9 | auth { 10 | number = 1 11 | double = 1.5 12 | boolean = false 13 | string = "very string" 14 | } 15 | """) 16 | 17 | describe("renderingSettings") { 18 | it("renders various keys of a config object") { 19 | val cObj = config.get[ConfigObject]("auth").value 20 | ConfigTemplates.renderSettings(cObj) shouldBe ( 21 | """boolean = false 22 | |double = 1.5 23 | |number = 1 24 | |string = "very string" 25 | |""".stripMargin) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /ui/Caddyfile: -------------------------------------------------------------------------------- 1 | localhost:2015 2 | # proxy /v1 localhost:8081 3 | proxy /v1 localhost:8080 4 | root ./ -------------------------------------------------------------------------------- /ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Ceph on Mesos 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | loading app... 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/ReactApp.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | 3 | import japgolly.scalajs.react._ 4 | import org.scalajs.dom 5 | 6 | import scala.scalajs.js.JSApp 7 | import scala.scalajs.js.annotation.JSExport 8 | import cephui.css.AppCSS 9 | import cephui.routes.AppRouter 10 | 11 | @JSExport 12 | object ReactApp extends JSApp { 13 | 14 | @JSExport 15 | override def main(): Unit = { 16 | AppCSS.load 17 | AppRouter.router().render(dom.document.getElementById("ceph-ui")) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/components/Footer.scala: -------------------------------------------------------------------------------- 1 | package cephui.components 2 | 3 | import japgolly.scalajs.react._ 4 | import japgolly.scalajs.react.vdom.prefix_<^._ 5 | 6 | object Footer { 7 | 8 | val component = ReactComponentB.static("Footer", 9 | <.footer(^.textAlign.center, 10 | <.div(^.borderBottom := "1px solid grey", ^.padding := "0px"), 11 | <.p(^.paddingTop := "5px", "Built using scalajs/scalajs-react/scalacss") 12 | ) 13 | ).buildU 14 | 15 | def apply() = component() 16 | } 17 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/components/TopNav.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | package components 3 | 4 | import japgolly.scalajs.react._ 5 | import japgolly.scalajs.react.extra.Reusability 6 | import japgolly.scalajs.react.extra.router.RouterCtl 7 | import japgolly.scalajs.react.vdom.prefix_<^._ 8 | 9 | import scala.scalajs.js 10 | import scalacss.Defaults._ 11 | import scalacss.ScalaCssReact._ 12 | import cephui.models.Menu 13 | import cephui.routes.AppRouter.AppPage 14 | 15 | 16 | object TopNav { 17 | 18 | object Style extends StyleSheet.Inline { 19 | 20 | import dsl._ 21 | 22 | val navMenu = style(display.flex, 23 | alignItems.center, 24 | backgroundColor(c"#F2706D"), 25 | margin.`0`, 26 | listStyle := "none") 27 | 28 | val menuItem = styleF.bool(selected => styleS( 29 | padding(20.px), 30 | fontSize(1.5.em), 31 | cursor.pointer, 32 | color(c"rgb(244, 233, 233)"), 33 | mixinIfElse(selected)( 34 | backgroundColor(c"#E8433F"), 35 | fontWeight._500) 36 | (&.hover( 37 | backgroundColor(c"#B6413E"))) 38 | )) 39 | 40 | } 41 | 42 | case class Props(menus: Vector[Menu], selectedPage: AppPage, ctrl: RouterCtl[AppPage]) 43 | 44 | implicit val currentPageReuse = Reusability.by_==[AppPage] 45 | implicit val propsReuse = Reusability.by((_:Props).selectedPage) 46 | 47 | val component = ReactComponentB[Props]("TopNav") 48 | .render_P { P => 49 | <.header( 50 | elements.Nav( 51 | bsStyle = "pills", 52 | activeKey = P.menus.find(_.route == P.selectedPage).map(_.name).getOrElse(js.undefined) 53 | )( 54 | P.menus.map { item => 55 | elements.NavItem( 56 | key = item.name, 57 | eventKey = item.name, 58 | // onClick = hi 59 | onSelect = P.ctrl.set(item.route).toScalaFn 60 | )(item.name) 61 | 62 | // <.li(^.key := item.name, 63 | // Style.menuItem(item.route.getClass == P.selectedPage.getClass), 64 | // item.name, 65 | // P.ctrl setOnClick item.route) 66 | } 67 | ) 68 | ) 69 | } 70 | .configure(Reusability.shouldComponentUpdate) 71 | .build 72 | 73 | def apply(props: Props, ref: js.UndefOr[String] = "", key: js.Any = {}) = component.set(key, ref)(props) 74 | 75 | } 76 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/components/items/Item1Data.scala: -------------------------------------------------------------------------------- 1 | package cephui.components.items 2 | 3 | import japgolly.scalajs.react._ 4 | import japgolly.scalajs.react.vdom.prefix_<^._ 5 | 6 | object Item1Data { 7 | 8 | val component = ReactComponentB.static("Item1", 9 | <.div("This is Item1 Page ") 10 | ).buildU 11 | 12 | def apply() = component() 13 | } 14 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/components/items/Item2Data.scala: -------------------------------------------------------------------------------- 1 | package cephui.components.items 2 | 3 | import japgolly.scalajs.react._ 4 | import japgolly.scalajs.react.vdom.prefix_<^._ 5 | 6 | object Item2Data { 7 | 8 | val component = ReactComponentB.static("Item2", 9 | <.div("This is Item2 Page ") 10 | ).buildU 11 | 12 | def apply() = component() 13 | } 14 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/components/items/ItemsInfo.scala: -------------------------------------------------------------------------------- 1 | package cephui.components.items 2 | 3 | import japgolly.scalajs.react.{ReactComponentB, _} 4 | import japgolly.scalajs.react.vdom.prefix_<^._ 5 | 6 | object ItemsInfo { 7 | 8 | val component = ReactComponentB.static("ItemsInfo", 9 | <.div(" Items Root Page ") 10 | ).buildU 11 | 12 | def apply() = component() 13 | } 14 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/css/AppCSS.scala: -------------------------------------------------------------------------------- 1 | package cephui.css 2 | 3 | import scalacss.ScalaCssReact._ 4 | import scalacss.internal.mutable.GlobalRegistry 5 | import scalacss.Defaults._ 6 | 7 | object AppCSS { 8 | object Style extends StyleSheet.Inline { 9 | import scalacss.Defaults._ 10 | import dsl._ 11 | 12 | val lightGrey = Color("#DDD") 13 | 14 | val hiddenTableRow = style("hiddentablerow")( 15 | padding.`0`.important, 16 | backgroundColor(lightGrey)) 17 | } 18 | 19 | def load = { 20 | 21 | GlobalRegistry.register(Style) 22 | 23 | GlobalRegistry.onRegistration(_.addToDocument()) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/css/GlobalStyle.scala: -------------------------------------------------------------------------------- 1 | package cephui.css 2 | 3 | import scalacss.Defaults._ 4 | 5 | object GlobalStyle extends StyleSheet.Inline { 6 | 7 | import dsl._ 8 | 9 | style(unsafeRoot("body")( 10 | margin.`0`, 11 | padding.`0`, 12 | fontSize(14.px), 13 | fontFamily := "Roboto, sans-serif" 14 | )) 15 | } 16 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/lib/Http.scala: -------------------------------------------------------------------------------- 1 | package cephui.lib 2 | 3 | import scala.concurrent.{Future,Promise} 4 | import org.scalajs.dom 5 | import scala.util.Try 6 | import scala.scalajs.js 7 | 8 | object Http { 9 | sealed class HttpFailure(msg: String) extends Exception(msg) 10 | case class RequestFailure(req: dom.XMLHttpRequest) extends HttpFailure( 11 | s"response ${req.status} received; ${req.responseText}") 12 | case class MarshallingFailure(str: String) extends HttpFailure(str) 13 | 14 | trait Unmarshaller[T] extends (dom.XMLHttpRequest => T) 15 | object Unmarshaller { 16 | implicit def stringUnmarshaller = new Unmarshaller[String] { 17 | def apply(e: dom.XMLHttpRequest) = e.responseText 18 | } 19 | implicit def unitUnmarshaller = new Unmarshaller[Unit] { 20 | def apply(e: dom.XMLHttpRequest) = () 21 | } 22 | } 23 | 24 | def request[T](method: String, uri: String, headers: Map[String, String] = Map.empty, data: js.Any = js.undefined )( 25 | implicit um: Unmarshaller[T]): Future[T] = { 26 | val p = Promise[T] 27 | val xhr = new dom.XMLHttpRequest() 28 | xhr.open(method, uri) 29 | headers.foreach { case (h, v) => 30 | xhr.setRequestHeader(h, v) 31 | } 32 | 33 | xhr.onload = { (e: dom.Event) => 34 | if ((200 until 300) contains xhr.status) { 35 | p.complete(Try(um.apply(xhr))) 36 | } else { 37 | p.failure(RequestFailure(xhr)) 38 | } 39 | } 40 | xhr.send(data) 41 | p.future 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/models/DanglingReservation.scala: -------------------------------------------------------------------------------- 1 | package cephui.models 2 | 3 | case class DanglingReservation( 4 | id: String, 5 | lastSeen: String, 6 | unreserve: Boolean = false, 7 | details: Option[String] = None) 8 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/models/ErrorResponse.scala: -------------------------------------------------------------------------------- 1 | package cephui.models 2 | 3 | case class ErrorResponse(message: String) 4 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/models/Job.scala: -------------------------------------------------------------------------------- 1 | package cephui.models 2 | 3 | case class Location( 4 | hostname: Option[String], 5 | ip: Option[String], 6 | port: Option[Int]) 7 | 8 | case class Job( 9 | id: String, 10 | cluster: String, 11 | role: String, 12 | goal: Option[String], 13 | lastLaunched: Option[String], 14 | reservationConfirmed: Boolean, 15 | reservationId: Option[String], 16 | slaveId: Option[String], 17 | taskId: Option[String], 18 | location: Location, 19 | version: Int, 20 | persistentVersion: Int, 21 | behavior: String, 22 | wantingNewOffer: Boolean, 23 | taskStatus: Option[String]) 24 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/models/JsFormats.scala: -------------------------------------------------------------------------------- 1 | package cephui.models 2 | 3 | import json._ 4 | 5 | object JsFormats { 6 | implicit val locationAccessor = ObjectAccessor.create[Location] 7 | implicit val jobAccessor = ObjectAccessor.create[Job] 8 | implicit val danglingReservationAccessor = ObjectAccessor.create[DanglingReservation] 9 | implicit val errorReservationAccessor = ObjectAccessor.create[ErrorResponse] 10 | } 11 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/models/Menu.scala: -------------------------------------------------------------------------------- 1 | package cephui.models 2 | 3 | import cephui.routes.AppRouter.AppPage 4 | 5 | case class Menu(name: String, route: AppPage) 6 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/pages/ConfigPage.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | package pages 3 | 4 | import lib.Http 5 | import japgolly.scalajs.react._ 6 | import japgolly.scalajs.react.vdom.prefix_<^._ 7 | import org.scalajs.dom 8 | import elements._ 9 | import scalacss.Defaults._ 10 | import scalacss.ScalaCssReact._ 11 | import scala.concurrent.ExecutionContext.Implicits.global 12 | import scala.util.{Success,Failure} 13 | import json._ 14 | import models.JsFormats._ 15 | 16 | object ConfigPage { 17 | object Style extends StyleSheet.Inline { 18 | import dsl._ 19 | val content = style(textAlign.center, 20 | fontSize(30.px), 21 | minHeight(450.px), 22 | paddingTop(40.px)) 23 | } 24 | 25 | case class Message(error: Boolean, text: String) 26 | case class State(config: Option[String], saving: Boolean = false, message: Option[Message] = None) 27 | class Backend($: BackendScope[Unit, State]) { 28 | def start() = CallbackTo { 29 | Http.request[String]("GET", "/v1/config/deployment-config.conf"). 30 | onComplete { 31 | case Success(cfg) => $.modState { ste => ste.copy(Some(cfg)) }.runNow() 32 | case Failure(ex) => 33 | $.modState { _.copy(message = Some(Message(error = true, text = ex.getMessage))) }.runNow() 34 | } 35 | } 36 | 37 | def updateText(e: ReactEventI) = { 38 | val v = e.target.value 39 | $.modState { ste => 40 | dom.console.log(e) 41 | ste.copy(config = Some(v)) 42 | } 43 | } 44 | 45 | def saveConfig() = 46 | $.modState { ste => 47 | ste.config match { 48 | case Some(cfg) => 49 | Http.request[Unit]("PUT", "/v1/config/deployment-config.conf", 50 | headers = Map("Content-Type" -> "application/text"), 51 | data = cfg). 52 | onComplete { 53 | case Success(_) => 54 | $.modState( 55 | _.copy(saving = false, message = Some(Message(error = false, text = "Saved successfully")))). 56 | runNow() 57 | case Failure(Http.RequestFailure(xhr)) if (xhr.status == 400) => 58 | val err = JValue.fromString(xhr.responseText).toObject[models.ErrorResponse] 59 | $.modState( 60 | _.copy(saving = false, message = Some(Message(error = true, text = err.message)))).runNow() 61 | case Failure(ex) => 62 | $.modState( 63 | _.copy(saving = false, message = Some(Message(error = true, text = ex.getMessage)))).runNow() 64 | } 65 | ste.copy(saving = true, message = None) 66 | case None => 67 | ste 68 | } 69 | }.runNow() 70 | 71 | def render(s: State) = 72 | <.div( 73 | s.config.map { cfg => 74 | Grid()( 75 | Row()( 76 | <.textarea( 77 | ^.className := "col-xs-12", 78 | ^.rows := 25, 79 | ^.defaultValue := cfg, 80 | ^.disabled := s.saving, 81 | ^.onChange ==> updateText)), 82 | Row()( 83 | Col( 84 | xs = 4)( 85 | Button( 86 | bsStyle = "success", 87 | disabled = s.saving, 88 | onClick = { () => saveConfig() })("Save Changes")) 89 | ), 90 | s.message match { 91 | case Some(Message(false, text)) => 92 | Row()( 93 | Col(xs = 4)( 94 | Alert( 95 | bsStyle = "success", 96 | closeLabel = "Dismiss")( 97 | <.h4("Success!"), 98 | <.p(text)))) 99 | case Some(Message(true, text)) => 100 | Row()( 101 | Col(xs = 4)( 102 | Alert( 103 | bsStyle = "danger", 104 | closeLabel = "Dismiss")( 105 | <.h4("An error occurred!"), 106 | <.p(text)))) 107 | case _ => 108 | Nil 109 | } 110 | ) 111 | } 112 | ) 113 | } 114 | 115 | val component = ReactComponentB[Unit]("ConfigPage"). 116 | initialState(State(None)). 117 | renderBackend[Backend]. 118 | componentDidMount(_.backend.start()). 119 | build 120 | 121 | 122 | def apply() = component() 123 | } 124 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/pages/DanglingReservationsPage.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | package pages 3 | 4 | import cephui.css.AppCSS 5 | import japgolly.scalajs.react._ 6 | import japgolly.scalajs.react.extra.Reusability 7 | import japgolly.scalajs.react.vdom.prefix_<^._ 8 | import scalacss.Defaults._ 9 | import scala.scalajs.js 10 | import scalacss.ScalaCssReact._ 11 | import org.scalajs.dom 12 | import models.DanglingReservation 13 | import json._ 14 | import scala.concurrent.duration._ 15 | import scala.concurrent.{Future,Promise} 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import models.JsFormats._ 18 | import lib.Http 19 | import scala.util.{Success, Failure} 20 | 21 | object DanglingReservationsPage { 22 | 23 | object Style extends StyleSheet.Inline { 24 | import dsl._ 25 | val content = style(textAlign.center, 26 | fontSize(30.px), 27 | minHeight(450.px), 28 | paddingTop(40.px)) 29 | } 30 | 31 | 32 | case class State(dangling: Seq[DanglingReservation], expanded: Set[String] = Set.empty) 33 | 34 | import elements.{Table, Grid, Row, Col, Button, Alert, Glyphicon} 35 | 36 | implicit val danglingReservationsDeserializer = new Http.Unmarshaller[Seq[DanglingReservation]] { 37 | def apply(xhr: dom.XMLHttpRequest): Seq[DanglingReservation] = 38 | JValue.fromString(xhr.responseText).toObject[Seq[DanglingReservation]] 39 | } 40 | 41 | def unreserve(reservation: DanglingReservation): Unit = 42 | Http.request[Unit]("PUT", s"/v1/reservation-reaper/${reservation.id}/unreserve"). 43 | onComplete { 44 | case Success(_) => 45 | dom.console.log(s"reservation ${reservation.id} set to be unreserved") 46 | case Failure(ex) => 47 | dom.console.log(ex.getMessage) 48 | } 49 | 50 | val danglingReservationDetails = ReactComponentB[DanglingReservation]("DanglingReservationDetails"). 51 | render_P { danglingReservation => 52 | Row()( 53 | Col(xs = 1)(), 54 | Col(xs = 6)( 55 | Table()( 56 | <.thead( 57 | <.tr( 58 | <.th("Field"), 59 | <.th("Value"))), 60 | <.tbody( 61 | <.tr(<.td("ID"), <.td(danglingReservation.id)), 62 | <.tr(<.td("details"), <.td( 63 | <.pre(danglingReservation.details.getOrElse[String](""))))))), 64 | Col( 65 | xs = 4)( 66 | Button(bsStyle = "warning", 67 | onClick = { () => unreserve(danglingReservation) })("Destroy"))) 68 | }. 69 | build 70 | 71 | class Backend($: BackendScope[Unit, State]) { 72 | private var running = true 73 | 74 | def poll(): Unit = 75 | Http.request[Seq[DanglingReservation]]("GET", "/v1/reservation-reaper/dangling"). 76 | onComplete { 77 | case Success(danglingReservations) => 78 | js.timers.setTimeout(3.seconds) { 79 | if (running) poll() 80 | } 81 | $.modState { ste => State(danglingReservations, ste.expanded)}.runNow() 82 | case Failure(ex) => 83 | println(ex.getMessage) 84 | ex.printStackTrace(System.out) 85 | } 86 | 87 | def start() = CallbackTo { 88 | dom.console.log("le start") 89 | running = true 90 | poll() 91 | } 92 | 93 | def clear() = CallbackTo { 94 | dom.console.log("le end") 95 | running = false 96 | } 97 | 98 | def render(s: State) = 99 | <.div( 100 | Row()( 101 | Col(xs = 1)(), 102 | Col(xs = 10)( 103 | <.p(""" 104 | |Dangling Reservations are resource reservations that are associated with a job that the framework doesn't 105 | |recognize. They can occur in the following scenarios: 106 | |""".stripMargin), 107 | <.ul( 108 | <.li("A job is deleted manually from the framework's persistent store, and the framework is restarted."), 109 | <.li("An issue with the persistence layer or the framework itself.")), 110 | <.p(""" 111 | |In order to prevent ceph-on-mesos from deleting important persistent data in these cases, reservations 112 | |are listed here so they can be marked to be unreserved manually once the framework operator has confirmed 113 | |that the resources are in fact okay to release. Restoring jobs with these resource reservations is a 114 | |manual effort. 115 | |""".stripMargin), 116 | <.p(""" 117 | |Resources are not unreserved immediately; the framework must wait until the reservation is offered again, 118 | |which could take several minutes. To accelerate this process you can try restarting the framework. 119 | |""".stripMargin))), 120 | Row()( 121 | Col(xs = 1)(), 122 | Col(xs = 10)( 123 | if(s.dangling.isEmpty) { 124 | Alert(bsStyle = "success")("There are no dangling reservations") 125 | } else { 126 | Table()( 127 | <.thead( 128 | <.tr( 129 | <.th("id"), 130 | <.th("lastSeen"), 131 | <.th())), 132 | <.tbody( 133 | s.dangling.sortBy(_.id).map { dangling => 134 | Seq( 135 | <.tr( 136 | ^.onClick --> $.modState { _ => 137 | s.copy(expanded = 138 | if (s.expanded.contains(dangling.id)) 139 | s.expanded - dangling.id 140 | else 141 | s.expanded + dangling.id) }, 142 | <.td(dangling.id.take(7)), 143 | <.td(dangling.lastSeen), 144 | <.td(Glyphicon( 145 | if (s.expanded contains dangling.id) 146 | "menu-down" 147 | else 148 | "menu-up")()) 149 | ), 150 | <.tr( 151 | <.td( 152 | AppCSS.Style.hiddenTableRow, 153 | ^.colSpan := 3, 154 | if (s.expanded contains dangling.id) 155 | danglingReservationDetails(dangling) 156 | else 157 | <.span()))) 158 | } 159 | )) 160 | } 161 | )) 162 | ) 163 | } 164 | 165 | val DanglingReservationsComponent = ReactComponentB[Unit]("DanglingReservations"). 166 | initialState(State(Nil)). 167 | renderBackend[Backend]. 168 | componentDidMount(_.backend.start()). 169 | componentWillUnmount(_.backend.clear()). 170 | build 171 | 172 | def apply() = { 173 | DanglingReservationsComponent() 174 | } 175 | 176 | } 177 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/pages/HomePage.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | package pages 3 | 4 | import cephui.css.AppCSS 5 | import japgolly.scalajs.react._ 6 | import japgolly.scalajs.react.extra.Reusability 7 | import japgolly.scalajs.react.vdom.prefix_<^._ 8 | import scalacss.Defaults._ 9 | import scala.scalajs.js 10 | import scalacss.ScalaCssReact._ 11 | import org.scalajs.dom 12 | import models.Job 13 | import json._ 14 | import scala.concurrent.duration._ 15 | import scala.concurrent.{Future,Promise} 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import models.JsFormats._ 18 | import lib.Http 19 | import scala.util.{Success, Failure} 20 | 21 | object HomePage { 22 | 23 | object Style extends StyleSheet.Inline { 24 | import dsl._ 25 | val content = style(textAlign.center, 26 | fontSize(30.px), 27 | minHeight(450.px), 28 | paddingTop(40.px)) 29 | } 30 | 31 | 32 | case class State(jobs: Seq[Job], expanded: Set[String] = Set.empty) 33 | 34 | import elements.{Table, Grid, Row, Col, Button, Glyphicon} 35 | def renderLocation(jobId: String, location: models.Location): ReactNode = { 36 | val portSuffix = location.port.map(p => s":${p}").getOrElse("") 37 | 38 | (location.ip, location.hostname) match { 39 | case (Some(ip), Some(hostname)) => 40 | elements.OverlayTrigger( 41 | overlay = elements.Tooltip(id = jobId + "-location")(ip), 42 | placement = "top")(<.div(s"${hostname}${portSuffix}")) 43 | case (Some(ip), None) => 44 | <.span(s"${ip}${portSuffix}") 45 | case _ => 46 | <.span("") 47 | } 48 | } 49 | 50 | val `data-toggle` = "data-toggle".reactAttr 51 | val `data-target` = "data-target".reactAttr 52 | 53 | implicit val jobsDeserializer = new Http.Unmarshaller[Seq[Job]] { 54 | def apply(xhr: dom.XMLHttpRequest): Seq[Job] = 55 | JValue.fromString(xhr.responseText).toObject[Seq[Job]] 56 | } 57 | 58 | def setGoal(job: Job, state: String): Unit = 59 | Http.request[Unit]("PUT", s"/v1/jobs/${job.id}/${state}"). 60 | onComplete { 61 | case Success(_) => 62 | dom.console.log(s"transition job ${job.id} to ${state} success") 63 | case Failure(ex) => 64 | dom.console.log(ex.getMessage) 65 | } 66 | 67 | val jobDetails = ReactComponentB[Job]("JobDetails"). 68 | render_P { job => 69 | Row()( 70 | Col(xs = 1)(), 71 | Col(xs = 6)( 72 | Table()( 73 | <.thead( 74 | <.tr( 75 | <.th("Field"), 76 | <.th("Value"))), 77 | <.tbody( 78 | <.tr(<.td("ID"), <.td(job.id)), 79 | <.tr(<.td("Behavior"), <.td(job.behavior)), 80 | <.tr(<.td("lastLaunched"), <.td(job.lastLaunched.getOrElse[String](""))), 81 | <.tr(<.td("goal"), <.td(job.goal.getOrElse[String](""))), 82 | <.tr(<.td("persistence"), <.td(s"${job.version} / ${job.persistentVersion}")), 83 | <.tr(<.td("wantingNewOffer"), <.td(job.wantingNewOffer.toString))))), 84 | Col( 85 | xs = 4)( 86 | job.goal match { 87 | case Some("running") => 88 | Button(bsStyle = "warning", 89 | onClick = { () => setGoal(job, "paused") })( 90 | "Pause") 91 | case Some("paused") => 92 | Button(bsStyle = "success", 93 | onClick = { () => setGoal(job, "running") })( 94 | "Run") 95 | case _ => 96 | <.span() 97 | } 98 | )) 99 | }. 100 | build 101 | 102 | class Backend($: BackendScope[Unit, State]) { 103 | private var running = true 104 | 105 | def poll(): Unit = 106 | Http.request[Seq[Job]]("GET", "/v1/jobs"). 107 | onComplete { 108 | case Success(jobs) => 109 | js.timers.setTimeout(3.seconds) { 110 | if (running) poll() 111 | } 112 | $.modState { ste => State(jobs, ste.expanded)}.runNow() 113 | case Failure(ex) => 114 | println(ex.getMessage) 115 | ex.printStackTrace(System.out) 116 | } 117 | 118 | def start() = CallbackTo { 119 | dom.console.log("le start") 120 | running = true 121 | poll() 122 | } 123 | 124 | def clear() = CallbackTo { 125 | dom.console.log("le end") 126 | running = false 127 | } 128 | 129 | def render(s: State) = 130 | <.div( 131 | s.jobs.groupBy { _.role }.toSeq.sortBy(_._1).map { 132 | case (role, roleJobs) => 133 | <.div( 134 | <.h2(role), 135 | Table()( 136 | <.thead( 137 | <.tr( 138 | <.th("id"), 139 | <.th("location"), 140 | <.th("goal"), 141 | <.th("state"), 142 | <.th())), 143 | <.tbody( 144 | roleJobs.sortBy(_.id).flatMap { job => 145 | Seq( 146 | <.tr( 147 | ^.onClick --> $.modState { _ => 148 | s.copy(expanded = 149 | if (s.expanded.contains(job.id)) s.expanded - job.id else s.expanded + job.id) }, 150 | <.td(job.id.take(7)), 151 | <.td(renderLocation(job.id, job.location)), 152 | <.td(job.goal.getOrElse[String]("")), 153 | <.td(job.taskStatus.map(_.toString).getOrElse[String]("")), 154 | <.td(Glyphicon( 155 | if (s.expanded contains job.id) 156 | "menu-down" 157 | else 158 | "menu-up")()) 159 | ), 160 | <.tr( 161 | <.td( 162 | AppCSS.Style.hiddenTableRow, 163 | ^.colSpan := 5, 164 | if (s.expanded contains job.id) 165 | jobDetails(job) 166 | else 167 | <.span()))) 168 | } 169 | ) 170 | ) 171 | ) 172 | 173 | } 174 | ) 175 | } 176 | 177 | val JobsComponent = ReactComponentB[Unit]("Jobs"). 178 | initialState(State(Nil)). 179 | renderBackend[Backend]. 180 | componentDidMount(_.backend.start()). 181 | componentWillUnmount(_.backend.clear()). 182 | build 183 | 184 | def apply() = { 185 | JobsComponent() 186 | } 187 | 188 | } 189 | -------------------------------------------------------------------------------- /ui/src/main/scala/cephui/routes/AppRouter.scala: -------------------------------------------------------------------------------- 1 | package cephui 2 | package routes 3 | 4 | import japgolly.scalajs.react.extra.router.{Resolution, RouterConfigDsl, RouterCtl, _} 5 | import japgolly.scalajs.react.vdom.prefix_<^._ 6 | 7 | import cephui.components.{TopNav, Footer} 8 | import cephui.models.Menu 9 | import cephui.pages.{HomePage,ConfigPage,DanglingReservationsPage} 10 | 11 | object AppRouter { 12 | 13 | sealed trait AppPage 14 | 15 | case object Home extends AppPage 16 | case object Config extends AppPage 17 | case object DanglingReservations extends AppPage 18 | 19 | val config = RouterConfigDsl[AppPage].buildConfig { dsl => 20 | import dsl._ 21 | (trimSlashes 22 | | staticRoute(root, Home) ~> render(HomePage()) 23 | | staticRoute("#config", Config) ~> render(ConfigPage()) 24 | | staticRoute("#dangling-reservations", DanglingReservations) ~> render(DanglingReservationsPage()) 25 | ).notFound(redirectToPage(Home)(Redirect.Replace)) 26 | .renderWith(layout) 27 | } 28 | 29 | 30 | val mainMenu = Vector( 31 | Menu("Home",Home), 32 | Menu("Config",Config), 33 | Menu("Dangling Reservations",DanglingReservations) 34 | ) 35 | 36 | def layout(c: RouterCtl[AppPage], r: Resolution[AppPage]) = { 37 | <.div( 38 | <.div( 39 | ^.cls := "container-fluid", 40 | TopNav(TopNav.Props(mainMenu,r.page,c))), 41 | <.div( 42 | r.render(), 43 | Footer())) 44 | } 45 | 46 | val baseUrl = BaseUrl.fromWindowOrigin // / "scalajs-react-template/" 47 | 48 | val router = Router(baseUrl, config) 49 | } 50 | --------------------------------------------------------------------------------