├── .gitignore ├── LICENSE.md ├── Makefile ├── README.md ├── examples ├── example-job │ ├── LICENSE.md │ ├── pom.xml │ └── src │ │ └── main │ │ └── scala │ │ └── com.job │ │ └── SparkJobImplemented.scala └── s3-download-job │ ├── pom.xml │ └── src │ └── main │ └── scala │ └── com.job │ ├── ExecuteDownload.scala │ ├── S3DownloadJob.scala │ └── S3Utils.scala ├── pom.xml ├── spark-job-rest-api ├── pom.xml └── src │ └── main │ └── scala │ ├── api │ ├── ContextLike.scala │ └── SparkJob.scala │ └── responses │ ├── JobStates.scala │ └── ResponseObjects.scala ├── spark-job-rest-client ├── pom.xml └── src │ └── main │ └── scala │ └── client │ └── SparkJobRestClient.scala ├── spark-job-rest-sql ├── pom.xml └── src │ ├── main │ └── scala │ │ ├── api │ │ └── SparkSqlJob.scala │ │ └── context │ │ ├── HiveContextFactory.scala │ │ ├── SQLContextFactory.scala │ │ └── SparkSQLContextFactory.scala │ └── test │ └── scala │ └── context │ ├── HiveContextFactorySpec.scala │ ├── SQLContextFactorySpec.scala │ └── SparkSQLContextFactorySpec.scala └── spark-job-rest ├── pom.xml └── src ├── main ├── assembly │ └── archive.xml ├── resources │ ├── application.conf │ ├── context_start.sh │ ├── log4j.properties │ ├── settings.sh │ └── webapp │ │ ├── assets │ │ ├── css │ │ │ └── style.css │ │ ├── fonts │ │ │ └── bootstrap │ │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ │ ├── glyphicons-halflings-regular.svg │ │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ │ ├── glyphicons-halflings-regular.woff │ │ │ │ └── glyphicons-halflings-regular.woff2 │ │ └── img │ │ │ ├── halftone.png │ │ │ ├── loading-sm.gif │ │ │ └── loading.gif │ │ ├── index.html │ │ └── js │ │ ├── behaviour.js │ │ ├── bootstrap-notify.min.js │ │ ├── bootstrap.min.js │ │ ├── fileinput.min.js │ │ └── spin.min.js ├── scala │ ├── context │ │ ├── JobContextFactory.scala │ │ └── SparkContextFactory.scala │ ├── logging │ │ └── LoggingOutputStream.scala │ ├── server │ │ ├── CORSDirectives.scala │ │ ├── Controller.scala │ │ ├── Main.scala │ │ ├── MainContext.scala │ │ └── domain │ │ │ └── actors │ │ │ ├── ContextActor.scala │ │ │ ├── ContextManagerActor.scala │ │ │ ├── ContextProcessActor.scala │ │ │ ├── JarActor.scala │ │ │ ├── JobActor.scala │ │ │ ├── ReTry.scala │ │ │ ├── Supervisor.scala │ │ │ └── package.scala │ └── utils │ │ ├── ActorUtils.scala │ │ ├── ContextUtils.scala │ │ ├── FileUtils.scala │ │ ├── HdfsUtils.scala │ │ └── JarUtils.scala └── scripts │ ├── deploy.sh │ ├── restart_server.sh │ ├── start_server.sh │ └── stop_server.sh └── test ├── resources └── application.conf └── scala ├── context ├── JobContextFactorySpec.scala └── SparkContextFactorySpec.scala ├── integration └── IntegrationTests.scala └── server └── domain └── actors ├── ContextActorSpec.scala └── JarActorTest.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | syntax: glob 3 | *.log 4 | *.log* 5 | RemoteSystemsTempFiles 6 | *.metadata 7 | target/* 8 | .DS_Store 9 | *.settings 10 | *.classpath 11 | *.idea 12 | *.iml 13 | *.orig 14 | *.txt~ 15 | *.sh~ 16 | *.scala~ 17 | *.md~ 18 | *.jar 19 | .metadata 20 | target 21 | .DS_Store 22 | .settings 23 | .classpath 24 | .project 25 | classpath 26 | .idea 27 | .iml 28 | .orig 29 | .java.orig 30 | dependency-reduced-pom.xml 31 | /deploy 32 | **/resources/deploy.conf 33 | **/resources/deploy-settings.sh 34 | # TODO: Implement basic fairscheduler config 35 | **/resources/fairscheduler.xml 36 | metastore_db 37 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CURRENT_DIR := $(shell pwd) 2 | 3 | # 4 | # Deployment configuration 5 | # 6 | # Default path to the tarball 7 | PACKAGE_PATH := $(CURRENT_DIR)/spark-job-rest/target/spark-job-rest.tar.gz 8 | # Override this to set deploy path 9 | SJR_DEPLOY_PATH ?= $(CURRENT_DIR)/deploy 10 | 11 | # 12 | # We strongly suggest not to keep remote deployment configuration variables out of Git control! 13 | # 14 | # Overrides SJR_DEPLOY_PATH in remote deploy mode if not empty 15 | SJR_REMOTE_DEPLOY_PATH ?= 16 | # Set this the [user]@hostname of the machine you want to deploy to 17 | SJR_DEPLOY_HOST ?= 18 | # Optionally set path to SSH key here 19 | SJR_DEPLOY_KEY ?= 20 | 21 | # 22 | # Remote deployment parameters 23 | # 24 | REMOTE_PARAMS := SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \ 25 | SJR_DEPLOY_HOST=$(SJR_DEPLOY_HOST) \ 26 | SJR_DEPLOY_KEY=$(SJR_DEPLOY_KEY) \ 27 | SJR_PACKAGE_PATH=$(PACKAGE_PATH) \ 28 | SJR_IS_REMOTE_DEPLOY="true" \ 29 | SJR_REMOTE_DEPLOY_PATH=$(SJR_REMOTE_DEPLOY_PATH) 30 | 31 | all: build deploy 32 | 33 | build: 34 | @mvn clean install 35 | 36 | deploy: 37 | @SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \ 38 | $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh deploy 39 | 40 | remote-deploy: 41 | @$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh deploy 42 | 43 | remote-start: 44 | @$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh start 45 | 46 | remote-stop: 47 | @$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh stop 48 | 49 | remote-log: 50 | @$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh log 51 | 52 | start: 53 | @SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \ 54 | $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh start 55 | 56 | stop: 57 | @SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \ 58 | $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh stop 59 | 60 | .PHONY: all build deploy -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Atigeo/spark-job-rest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 2 | 3 | ## Features: 4 | 5 | **Supports multiple spark contexts created from the same server** 6 | 7 | The main problem this project solves is the inability to run multiple Spark contexts from the same JVM. This is a bug in Spark core that was also present in Ooyala's Spark Job Server, from which this project is inspired. The project launches a new process for each Spark context/application, with its own driver memory setting and its own driver log. Each driver JVM is created with its own Spark UI port, sent back to the api caller. Inter-process communication is achieved with akka actors, and each process is shut down when a Spark context/application is deleted. 8 | 9 | ## Version compatibility 10 | 11 | SJR Version | Spark Version 12 | ------------- | ------------- 13 | 0.3.0 | 1.1.0 14 | 0.3.1 | 1.3.1 15 | 16 | ## Building Spark-Job-Rest (SJR) 17 | 18 | The project is build with Maven3 and Java7. 19 | ``` 20 | make build 21 | ``` 22 | SJR can now be deployed from spark-job-rest/spark-job-rest/target/spark-job-rest.tar.gz 23 | 24 | If your build fails with this error: 25 | ``` 26 | [ERROR] spark-job-rest/src/main/scala/server/domain/actors/ContextManagerActor.scala:171: error: value redirectOutput is not a member of ProcessBuilder 27 | ``` 28 | This happens because Maven uses Java6. You can run mvn -version in order to check the Java version that Maven uses. 29 | ```sh 30 | $ mvn -version 31 | Apache Maven 3.2.5 32 | Java version: 1.6.0_65 33 | ``` 34 | If Maven uses Java6 you need to change it to Java7. This can be done by adding the JAVA_HOME export in your ~/.mavenrc file: 35 | ```sh 36 | # OSX: 37 | export JAVA_HOME=/Library/Java/JavaVirtualMachines/{jdk-version}/Contents/Home 38 | ``` 39 | ```sh 40 | # Ubuntu: 41 | export JAVA_HOME=/usr/lib/jvm/{jdk-version} 42 | ``` 43 | 44 | If running from IDE fails with: 45 | ``` 46 | Exception in thread "main" java.lang.NoClassDefFoundError: akka/actor/Props 47 | ``` 48 | This happens because the spark dependency has the provided scope. In order to run from IDE you can remove the provided scope for the spark dependency(inside pom.xml) or you can add the spark assembly jar to the running classpath. 49 | 50 | ## Deploying Spark-Job-Rest 51 | 52 | You can deploy Spark-Job-Rest locally to `deploy` directory inside the project by: 53 | ```sh 54 | make deploy 55 | ``` 56 | Optionally you can specifying install directory in `$SJR_DEPLOY_PATH` environment variable: 57 | ```sh 58 | SJR_DEPLOY_PATH=/opt/spark-job-rest make deploy 59 | ``` 60 | 61 | Before running JSR ensure that [working environment](#configure-spark-environment) is configured. 62 | 63 | In order to have a proper installation you should set `$SPARK_HOME` to your Apache Spark distribution and `$SPARK_CONF_HOME` to directory which consists `spark-env.sh` (usually `$SPARK_HOME/conf` or `$SPARK_HOME/libexec/conf`). 64 | You can do it in your bash profile (`~/.bash_profile` or `~/.bashrc`) by adding the following lines: 65 | ```sh 66 | export SPARK_HOME= 67 | export SPARK_CONF_HOME=$SPARK_HOME/libexec/conf # or $SPARK_HOME/conf depending on your distribution 68 | ``` 69 | After that either run in the new terminal session or source your bash profile. 70 | 71 | ### Deploying to remote host 72 | 73 | You can deploy Spark-Job-REST to remote host via: 74 | ```sh 75 | make remote-deploy 76 | ``` 77 | 78 | For remote deployment you should set following environment variables: 79 | ```sh 80 | # Mandatory connection string 81 | export SJR_DEPLOY_HOST= 82 | # Optional parameters 83 | export SJR_DEPLOY_KEY= 84 | export SJR_REMOTE_DEPLOY_PATH= 85 | ``` 86 | If `SJR_REMOTE_DEPLOY_PATH` is not set then `SJR_DEPLOY_PATH` will be used during remote deploy. 87 | 88 | ## Starting Spark-Job-Rest 89 | 90 | To start/stop SJR use 91 | ```sh 92 | cd $SJR_DEPLOY_PATH 93 | bin/start_server.sh 94 | bin/stop_server.sh 95 | ``` 96 | 97 | or if it deployed to default destination just 98 | ```sh 99 | make start 100 | make stop 101 | ``` 102 | 103 | ## Configure Spark-job-rest 104 | 105 | Spark-Job-REST default configuration is stored in `resources/application.conf` (here and after under `spark-job-rest/src/main/`). 106 | To add or override settings create `resources/deploy.conf` (ignored by VCS). 107 | 108 | ### Spark context settings 109 | Configure the default spark properties for context creation as they are normal Spark configuration options 110 | ``` 111 | spark.executor.memory=2g 112 | spark.master="local" 113 | spark.path="/Users/user/spark-1.1.0" 114 | ........ 115 | ``` 116 | To set how much memory should be allocated for driver use `driver.xmxMemory` (default is `1g`). 117 | 118 | ### Application settings 119 | 120 | Configure settings like web server port and akka system ports 121 | ``` 122 | appConf{ 123 | web.services.port=8097 124 | spark.ui.first.port = 16000 125 | ........ 126 | } 127 | ``` 128 | 129 | ### Configure folders & class paths 130 | 131 | You may configure folders by setting environment variables and by creating and editing `resources/deploy-settings.sh` (under `spark-job-rest/src/main/`): 132 | 133 | ```sh 134 | export SJR_LOG_DIR= 135 | export SJR_JAR_PATH= 136 | export SJR_CONTEXTS_BASE_DIR= 137 | export JSR_EXTRA_CLASSPATH= 138 | ``` 139 | 140 | ### Java & GC options 141 | 142 | You can extend or override Java and GC options in `resources/deploy-settings.sh`: 143 | 144 | ```sh 145 | JAVA_OPTS="${JAVA_OPTS} 146 | ${YOUR_EXTRA_JAVA_OPTIONS}" 147 | GC_OPTS="${GC_OPTS} 148 | ${YOUR_EXTRA_GC_OPTIONS}" 149 | ``` 150 | 151 | ## Custom contexts 152 | 153 | Spark-Job-REST supports custom job context factories defined in `context.job-context-factory` property of config. 154 | By default SJR uses `context.SparkContextFactory` which creates one Spark Context per JVM. 155 | 156 | ### SQL contexts 157 | 158 | To run jobs with provided SQL contexts include `spark-job-rest-sql` in your project, set context factory to one of SQLContext factories provided by this library and inherit your job from `api.SparkSqlJob`. 159 | Currently supported contexts: 160 | 161 | 1. `context.SparkSqlContextFactory` creates simple job SQLContext. 162 | 2. `context.HiveContextFactory` creates Hive SQL context. 163 | 164 | ## Configure Spark environment 165 | 166 | In order to have a proper installation you should set `$SPARK_HOME` to your Apache Spark distribution and `$SPARK_CONF_HOME` to directory which consists `spark-env.sh` (usually `$SPARK_HOME/conf` or `$SPARK_HOME/libexec/conf`). 167 | You can do it in your bash profile (`~/.bash_profile` or `~/.bashrc`) by adding the following lines: 168 | ```sh 169 | export SPARK_HOME= 170 | export SPARK_CONF_HOME=$SPARK_HOME/libexec/conf # or $SPARK_HOME/conf depending on your distribution 171 | ``` 172 | After that either run in the new terminal session or source your bash profile. 173 | 174 | The SJR can be run from outside the Spark cluster, but you need to at least copy the deployment folder from one of the slaves or master nodes. 175 | 176 | ## Run Spark-job-rest 177 | 178 | After editing all the configuration files SJR can be run by executing the script `start-server.sh` 179 | 180 | The UI can be accessed at `:`. 181 | 182 | ## API 183 | 184 | **Contexts** 185 | 186 | - POST /contexts/{contextName} - Create Context 187 | 188 | * Body: Raw entity with key-value pairs. 189 | * jars key is required and it should be in the form of a comma separated list of jar paths. These jars will be added at Spark context creation time to the class path of the newly created context's JVM process. There are 3 types of jar paths supported: 190 | * Absolute path on the server side : /home/ubuntu/example.jar 191 | * Name of the jar that was uploaded to the server : example.jar 192 | * Hdfs path : hdfs://devbox.local:8020/user/test/example.jar 193 | 194 | ``` 195 | # Body example: 196 | jars="/home/ubuntu/example.jar,example.jar,hdfs://devbox.local:8020/user/test/example.jar” 197 | spark.executor.memory=2g 198 | driver.xmxMemory = 1g 199 | ``` 200 | 201 | - GET /contexts/{contextName} - returns Context JSON object | No such context. 202 | 203 | - DELETE /contexts/{contextName} - Delete Context 204 | 205 | **Jobs** 206 | 207 | - POST /jobs?runningClass={runningClass}&context={contextName} - Job Submission 208 | 209 | * Body: Raw entity with key-value pairs. Here you can set any configuration properties that will be passed to the config parameter of the validate and run methods of the provided jar (see the SparkJob definition below) 210 | 211 | - GET /jobs/{jobId}?contextName={contextName} - Gets the result or state of a specific job 212 | 213 | - GET /jobs - Gets the states/results of all jobs from all running contexts 214 | 215 | **Jars** 216 | 217 | - POST /jars/{jarName} - Upload jar 218 | * Body: Jar Bytes 219 | 220 | - POST /jars - Upload jar 221 | * Body: MultiPart Form 222 | 223 | - GET /jars - Gets all the uploaded jars 224 | 225 | - DELETE /jars/{jarName} - Delete jar 226 | 227 | ## HTTP Client 228 | 229 | All the API methods can be called from Scala/Java with the help of an HTTP Client. 230 | 231 | Maven Spark-Job-Rest-Client dependency: 232 | ```xml 233 | 234 | com.xpatterns 235 | spark-job-rest-client 236 | 0.3.1 237 | 238 | ``` 239 | 240 | ## Create Spark Job Project 241 | 242 | Add maven Spark-Job-Rest-Api dependency: 243 | ```xml 244 | 245 | com.xpatterns 246 | spark-job-rest-api 247 | 0.3.1 248 | 249 | ``` 250 | 251 | To create a job that can be submitted through the server, the class must implement the SparkJob trait. 252 | 253 | ```scala 254 | import com.typesafe.config.Config 255 | import org.apache.spark.SparkContext 256 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob} 257 | 258 | class Example extends SparkJob { 259 | override def runJob(sc:SparkContext, jobConfig: Config): Any = { ... } 260 | override def validate(sc:SparkContext, config: Config): SparkJobValidation = { ... } 261 | } 262 | ``` 263 | 264 | - runJob method contains the implementation of the Job. SparkContext and Config objects are provided through parameters. 265 | - validate method allows for an initial validation. In order to run the job return SparkJobValid(), otherwise return SparkJobInvalid(message). 266 | 267 | ## Example 268 | 269 | An example for this project can be found here: ```spark-job-rest/examples/example-job```. In order to package it, run 270 | ```sh 271 | mvn clean install 272 | ``` 273 | 274 | **Upload JAR** 275 | ```sh 276 | # In the project root directory 277 | curl --data-binary @spark-job-rest/examples/example-job/target/example-job.jar 'localhost:8097/jars/example-job.jar' 278 | 279 | { 280 | "contextName": "test-context", 281 | "sparkUiPort": "16003" 282 | } 283 | ``` 284 | 285 | **Create a context** 286 | ```sh 287 | curl -X POST -d "jars=example-job.jar" 'localhost:8097/contexts/test-context' 288 | 289 | { 290 | "contextName": "test-context", 291 | "sparkUiPort": "16003" 292 | } 293 | ``` 294 | 295 | **Check if context exists** 296 | 297 | ```sh 298 | curl 'localhost:8097/contexts/test-context' 299 | 300 | { 301 | "contextName": "test-context", 302 | "sparkUiPort": "16003" 303 | } 304 | ``` 305 | 306 | **Run job** - The example job creates an RDD from a Range(0,input) and applies count on it. 307 | 308 | ```sh 309 | curl -X POST -d "input=10000" 'localhost:8097/jobs?runningClass=com.job.SparkJobImplemented&contextName=test-context' 310 | 311 | { 312 | "jobId": "2bd438a2-ac1e-401a-b767-5fa044b2bd69", 313 | "contextName": "test-context", 314 | "status": "Running", 315 | "result": "", 316 | "startTime": 1430287260144 317 | } 318 | ``` 319 | 320 | ```2bd438a2-ac1e-401a-b767-5fa044b2bd69``` represents the jobId. This id can be used to query for the job status/results. 321 | 322 | **Query for results** 323 | 324 | ```sh 325 | curl 'localhost:8097/jobs/2bd438a2-ac1e-401a-b767-5fa044b2bd69?contextName=test-context' 326 | 327 | { 328 | "jobId": "2bd438a2-ac1e-401a-b767-5fa044b2bd69", 329 | "contextName": "test-context", 330 | "status": "Finished", 331 | "result": "10000", 332 | "startTime": 1430287261108 333 | } 334 | ``` 335 | 336 | **Delete context** 337 | 338 | ```sh 339 | curl -X DELETE 'localhost:8097/contexts/test-context' 340 | 341 | { 342 | "message": "Context deleted." 343 | } 344 | ``` 345 | 346 | **HTTP Client Example** 347 | 348 | ```scala 349 | object Example extends App { 350 | implicit val system = ActorSystem() 351 | val contextName = "testContext" 352 | 353 | try { 354 | val sjrc = new SparkJobRestClient("http://localhost:8097") 355 | 356 | val context = sjrc.createContext(contextName, Map("jars" -> "/Users/raduchilom/projects/spark-job-rest/examples/example-job/target/example-job.jar")) 357 | println(context) 358 | 359 | val job = sjrc.runJob("com.job.SparkJobImplemented", contextName, Map("input" -> "10")) 360 | println(job) 361 | 362 | var jobFinal = sjrc.getJob(job.jobId, job.contextName) 363 | while (jobFinal.status.equals(JobStates.RUNNING.toString())) { 364 | Thread.sleep(1000) 365 | jobFinal = sjrc.getJob(job.jobId, job.contextName) 366 | } 367 | println(jobFinal) 368 | 369 | sjrc.deleteContext(contextName) 370 | } catch { 371 | case e:Exception => { 372 | e.printStackTrace() 373 | } 374 | } 375 | 376 | system.shutdown() 377 | } 378 | ``` 379 | Running this would produce the output: 380 | 381 | ``` 382 | Context(testContext,16002) 383 | Job(ab63c19f-bbb4-461e-8c6f-f0a35f73a943,testContext,Running,,1430291077689) 384 | Job(ab63c19f-bbb4-461e-8c6f-f0a35f73a943,testContext,Finished,10,1430291078694) 385 | ``` 386 | 387 | 388 | ## UI 389 | 390 | The UI was added in a compiled and minified state. For sources and changes please refer to [spark-job-rest-ui](https://github.com/marianbanita82/spark-job-rest-ui) project. 391 | -------------------------------------------------------------------------------- /examples/example-job/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2014 Atigeo, LLC. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -------------------------------------------------------------------------------- /examples/example-job/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | example-job 7 | 1.0.0 8 | jar 9 | 10 | example-job 11 | http://maven.apache.org 12 | 13 | 14 | 15 | mvnrepository 16 | http://repo1.maven.org/maven2 17 | 18 | 19 | cloudera-repo-releases 20 | https://repository.cloudera.com/artifactory/repo/ 21 | 22 | 23 | Akka repository 24 | http://repo.akka.io/releases 25 | 26 | 27 | 28 | 29 | UTF-8 30 | 2.10.3 31 | 2.10 32 | 33 | 34 | 35 | 36 | com.xpatterns 37 | spark-job-rest-api 38 | 0.3.2 39 | 40 | 41 | 42 | org.apache.spark 43 | spark-core_2.10 44 | 1.3.1 45 | provided 46 | 47 | 48 | 49 | 50 | 51 | 52 | src/main/resources 53 | 54 | * 55 | 56 | false 57 | 58 | 59 | 60 | ${project.artifactId} 61 | 62 | 63 | 64 | 65 | org.scala-tools 66 | maven-scala-plugin 67 | 2.9.1 68 | 69 | 70 | org.apache.maven.plugins 71 | maven-compiler-plugin 72 | 2.0.2 73 | 74 | 75 | 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-compiler-plugin 81 | 82 | 83 | 1.7 84 | 1.7 85 | 86 | 87 | 88 | 89 | org.scala-tools 90 | maven-scala-plugin 91 | 92 | 93 | scala-compile-first 94 | process-resources 95 | 96 | add-source 97 | compile 98 | 99 | 100 | 101 | scala-test-compile 102 | process-test-resources 103 | 104 | testCompile 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /examples/example-job/src/main/scala/com.job/SparkJobImplemented.scala: -------------------------------------------------------------------------------- 1 | package com.job 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.SparkContext 5 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob} 6 | 7 | /** 8 | * Created by raduc on 03/11/14. 9 | */ 10 | class SparkJobImplemented extends SparkJob 11 | { 12 | override def runJob(sc: SparkContext, jobConfig: Config): Any = { 13 | 14 | val nr = jobConfig.getInt("input") 15 | 16 | val list = Range(0,nr) 17 | val rdd = sc.parallelize(list) 18 | rdd.count() 19 | 20 | } 21 | 22 | override def validate(sc: SparkContext, config: Config): SparkJobValidation = { 23 | if(config.hasPath("input")) SparkJobValid() else SparkJobInvalid("The input parameter is missing.") 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /examples/s3-download-job/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | s3-download-job 7 | 1.0.0 8 | jar 9 | 10 | s3-download-job 11 | http://maven.apache.org 12 | 13 | 14 | 15 | mvnrepository 16 | http://repo1.maven.org/maven2 17 | 18 | 19 | cloudera-repo-releases 20 | https://repository.cloudera.com/artifactory/repo/ 21 | 22 | 23 | Akka repository 24 | http://repo.akka.io/releases 25 | 26 | 27 | 28 | 29 | UTF-8 30 | 2.10.3 31 | 2.10 32 | 33 | 34 | 35 | 36 | org.apache.httpcomponents 37 | httpclient 38 | 4.3.4 39 | 40 | 41 | 42 | com.xpatterns 43 | spark-job-rest-api 44 | 0.3.2 45 | 46 | 47 | 48 | com.xpatterns 49 | spark-job-rest-client 50 | 0.3.1 51 | 52 | 53 | 54 | com.amazonaws 55 | aws-java-sdk 56 | 1.8.3 57 | 58 | 59 | 60 | org.apache.spark 61 | spark-core_2.10 62 | 1.3.1 63 | provided 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | src/main/resources 72 | 73 | * 74 | 75 | false 76 | 77 | 78 | ${project.artifactId} 79 | 80 | 81 | 82 | org.scala-tools 83 | maven-scala-plugin 84 | 2.9.1 85 | 86 | 87 | org.apache.maven.plugins 88 | maven-compiler-plugin 89 | 2.0.2 90 | 91 | 92 | 93 | 94 | 95 | org.apache.maven.plugins 96 | maven-compiler-plugin 97 | 98 | 99 | 1.7 100 | 1.7 101 | 102 | 103 | 104 | org.scala-tools 105 | maven-scala-plugin 106 | 107 | 108 | scala-compile-first 109 | process-resources 110 | 111 | add-source 112 | compile 113 | 114 | 115 | 116 | scala-test-compile 117 | process-test-resources 118 | 119 | testCompile 120 | 121 | 122 | 123 | 124 | 125 | 126 | org.apache.maven.plugins 127 | maven-shade-plugin 128 | 2.2 129 | 130 | 131 | 132 | package 133 | 134 | shade 135 | 136 | 137 | 138 | 139 | reference.conf 140 | 141 | 142 | 143 | 144 | *:* 145 | 146 | META-INF/*.SF 147 | META-INF/*.DSA 148 | META-INF/*.RSA 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /examples/s3-download-job/src/main/scala/com.job/ExecuteDownload.scala: -------------------------------------------------------------------------------- 1 | package com.job 2 | 3 | import akka.actor.ActorSystem 4 | import client.SparkJobRestClient 5 | import com.typesafe.config.{ConfigFactory, Config} 6 | import org.apache.spark.{SparkContext, SparkConf} 7 | import responses.JobStates 8 | 9 | /** 10 | * Created by raduchilom on 5/4/15. 11 | */ 12 | 13 | object ExecuteDownload extends App { 14 | implicit val system = ActorSystem() 15 | val contextName = "downloadDataContext" 16 | 17 | try { 18 | val sjrc = new SparkJobRestClient("http://localhost:8097") 19 | 20 | val context = sjrc.createContext(contextName, Map("jars" -> "/home/ubuntu/s3-download-job.jar")) 21 | println(context) 22 | 23 | val bucketName="public-financial-transactions" 24 | val numPartitions=10 25 | val outputFolder="\"tachyon://localhost:19998/user/ubuntu/downloaded_data\"" 26 | 27 | val job = sjrc.runJob("com.job.S3DownloadJob", contextName, 28 | Map("s3.bucket" -> bucketName, 29 | "num.partitions" -> String.valueOf(numPartitions), 30 | "fs.output" -> outputFolder 31 | )) 32 | println(job) 33 | 34 | 35 | var jobFinal = sjrc.getJob(job.jobId, job.contextName) 36 | while (jobFinal.status.equals(JobStates.RUNNING.toString())) { 37 | Thread.sleep(1000) 38 | jobFinal = sjrc.getJob(job.jobId, job.contextName) 39 | } 40 | println(jobFinal) 41 | 42 | sjrc.deleteContext(contextName) 43 | } catch { 44 | case e:Exception => { 45 | e.printStackTrace() 46 | } 47 | } 48 | 49 | system.shutdown() 50 | } -------------------------------------------------------------------------------- /examples/s3-download-job/src/main/scala/com.job/S3DownloadJob.scala: -------------------------------------------------------------------------------- 1 | package com.job 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.SparkContext 5 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob} 6 | import org.slf4j.LoggerFactory 7 | 8 | import scala.collection.mutable.ListBuffer 9 | import scala.util.{Success, Failure, Try} 10 | 11 | /** 12 | * Created by raduc on 03/11/14. 13 | */ 14 | class S3DownloadJob extends SparkJob { 15 | 16 | val log = LoggerFactory.getLogger(getClass) 17 | 18 | override def runJob(sc: SparkContext, jobConfig: Config): Any = { 19 | 20 | val bucketName = jobConfig.getString("s3.bucket") 21 | 22 | val numPartitions = jobConfig.getInt("num.partitions") 23 | val outputFolder = jobConfig.getString("fs.output") 24 | 25 | // slow for a large number of files 26 | // val fileList = S3Utils.getFiles(bucketName) 27 | // val files = sc.parallelize(fileList, numPartitions) 28 | 29 | val filesRdd = S3Utils.getFilesDistributed(bucketName, sc, numPartitions) 30 | // known bug in spark 1.1.0 31 | // filesRdd.partitions 32 | val files = filesRdd.repartition(numPartitions) 33 | 34 | log.info(s"Number of partitions: ${files.partitions.length}") 35 | 36 | val results = files.mapPartitions{ iterator => 37 | 38 | val listBuffer = ListBuffer[(Try[Any], String)]() 39 | val s3Client = S3Utils.getS3Client() 40 | 41 | while(iterator.hasNext) { 42 | val tuple = iterator.next() 43 | listBuffer += S3Utils.downloadFile(bucketName, tuple._2, outputFolder, s3Client) 44 | } 45 | 46 | listBuffer.iterator 47 | } 48 | 49 | log.info("Listing the file with errors:") 50 | val errorFiles = results.filter { x => 51 | x._1 match { 52 | case Success(v) => false 53 | case Failure(e) => true 54 | } 55 | }.collect() 56 | log.warn(s"There were ${errorFiles.size} files with error") 57 | errorFiles.foreach(t => log.error("", t._1.get)) 58 | 59 | if(errorFiles.size == 0){ 60 | s"Number of failed files: ${errorFiles.size}" 61 | } else { 62 | errorFiles 63 | } 64 | 65 | } 66 | 67 | override def validate(sc: SparkContext, config: Config): SparkJobValidation = { 68 | if(!config.hasPath("s3.bucket")) return SparkJobInvalid("The \"s3.bucket\" parameter is missing.") 69 | if(!config.hasPath("fs.output")) return SparkJobInvalid("The \"fs.output\" parameter is missing.") 70 | if(!config.hasPath("num.partitions")) return SparkJobInvalid("The \"num.partitions\" parameter is missing.") 71 | 72 | SparkJobValid() 73 | } 74 | 75 | } -------------------------------------------------------------------------------- /examples/s3-download-job/src/main/scala/com.job/S3Utils.scala: -------------------------------------------------------------------------------- 1 | package com.job 2 | 3 | import com.amazonaws.auth.profile.ProfileCredentialsProvider 4 | import com.amazonaws.regions.{Regions, Region} 5 | import com.amazonaws.services.s3.AmazonS3Client 6 | import com.amazonaws.services.s3.model.{S3ObjectInputStream, GetObjectRequest, ObjectListing, ListObjectsRequest} 7 | import org.apache.hadoop.conf.Configuration 8 | import org.apache.hadoop.fs.{FSDataOutputStream, Path, FileSystem} 9 | import org.apache.hadoop.io.IOUtils 10 | import org.apache.spark.SparkContext 11 | import org.slf4j.LoggerFactory 12 | 13 | import scala.collection.mutable.ListBuffer 14 | import scala.util.{Failure, Try} 15 | 16 | /** 17 | * Created by raduchilom on 4/18/15. 18 | */ 19 | object S3Utils { 20 | 21 | val log = LoggerFactory.getLogger(getClass) 22 | 23 | def getFiles(bucketName: String):List[(Int, String)] = { 24 | 25 | val s3Client: AmazonS3Client = getS3Client() 26 | 27 | val fileList = ListBuffer[(Int, String)]() 28 | 29 | try { 30 | log.info("Listing objects from S3") 31 | var counter = 0 32 | 33 | val listObjectsRequest = new ListObjectsRequest() 34 | .withBucketName(bucketName) 35 | var objectListing: ObjectListing = null 36 | 37 | do { 38 | import scala.collection.JavaConversions._ 39 | objectListing = s3Client.listObjects(listObjectsRequest) 40 | objectListing.getObjectSummaries.foreach { objectSummary => 41 | if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) { 42 | fileList += Tuple2(counter, objectSummary.getKey) 43 | counter += 1 44 | } 45 | } 46 | listObjectsRequest.setMarker(objectListing.getNextMarker()); 47 | } while (objectListing.isTruncated()) 48 | 49 | log.info("Finished listing objects from S3") 50 | 51 | } catch { 52 | case e: Exception => { 53 | log.error("Failed listing files. ", e) 54 | throw e 55 | } 56 | } 57 | 58 | fileList.toList 59 | } 60 | 61 | def getFilesDistributed(bucketName: String, sc: SparkContext, numPartitions: Int) = { 62 | val s3Client: AmazonS3Client = getS3Client() 63 | 64 | val fileList = ListBuffer[String]() 65 | val folderList = ListBuffer[String]() 66 | 67 | val listObjectsRequest = new ListObjectsRequest() 68 | .withBucketName(bucketName) 69 | .withPrefix("") 70 | .withDelimiter("/") 71 | 72 | var objectListing: ObjectListing = null 73 | 74 | do { 75 | import scala.collection.JavaConversions._ 76 | objectListing = s3Client.listObjects(listObjectsRequest) 77 | folderList ++= objectListing.getCommonPrefixes 78 | objectListing.getObjectSummaries.foreach { objectSummary => 79 | if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) { 80 | fileList += objectSummary.getKey 81 | } 82 | } 83 | listObjectsRequest.setMarker(objectListing.getNextMarker()); 84 | } while (objectListing.isTruncated()) 85 | 86 | val folderRdd = sc.parallelize(folderList.toList, folderList.size) 87 | val filesRdd1 = folderRdd.flatMap{ folder => 88 | getFilesFromFolder(bucketName, folder) 89 | } 90 | filesRdd1.cache() 91 | filesRdd1.count() 92 | 93 | val filesRdd2 = sc.parallelize(fileList, numPartitions) 94 | val filesRdd = filesRdd1.union(filesRdd2) 95 | 96 | filesRdd.zipWithIndex().map { 97 | case (value, index) => (index, value) 98 | } 99 | } 100 | 101 | def getFilesFromFolder(bucketName: String, folderKey: String):List[String] = { 102 | 103 | val s3Client: AmazonS3Client = getS3Client() 104 | 105 | val fileList = ListBuffer[String]() 106 | 107 | try { 108 | log.info("Listing objects from S3") 109 | var counter = 0 110 | 111 | val listObjectsRequest = new ListObjectsRequest() 112 | .withBucketName(bucketName) 113 | .withPrefix(folderKey) 114 | var objectListing: ObjectListing = null 115 | 116 | do { 117 | import scala.collection.JavaConversions._ 118 | objectListing = s3Client.listObjects(listObjectsRequest) 119 | objectListing.getObjectSummaries.foreach { objectSummary => 120 | if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) { 121 | fileList += objectSummary.getKey 122 | counter += 1 123 | } 124 | } 125 | listObjectsRequest.setMarker(objectListing.getNextMarker()); 126 | } while (objectListing.isTruncated()) 127 | 128 | log.info(s"Finished listing objects for folder $folderKey") 129 | 130 | } catch { 131 | case e: Exception => { 132 | log.error("Failed listing files. ", e) 133 | throw e 134 | } 135 | } 136 | 137 | fileList.toList 138 | } 139 | 140 | def getS3Client(): AmazonS3Client = { 141 | // val awsCreds = new ProfileCredentialsProvider() 142 | // val s3client = new AmazonS3Client(awsCreds) 143 | val s3client = new AmazonS3Client() 144 | // s3client.setRegion(Region.getRegion(Regions.EU_WEST_1)) 145 | s3client 146 | } 147 | 148 | def downloadFile(bucketName: String, key: String, outputFolder: String, s3Client: AmazonS3Client): (Try[Any], String) = { 149 | 150 | val downloadTry = Try { 151 | 152 | var inputStream: S3ObjectInputStream = null 153 | var outputStream: FSDataOutputStream = null 154 | 155 | try { 156 | 157 | log.info(s"Downloading file: $key") 158 | val s3object = s3Client.getObject(new GetObjectRequest(bucketName, key)) 159 | inputStream = s3object.getObjectContent 160 | 161 | val outputPath = outputFolder + Path.SEPARATOR + key 162 | log.info(s"Writing file to: $outputPath") 163 | 164 | val conf = new Configuration(); 165 | conf.set("fs.defaultFS", outputFolder) 166 | conf.set("fs.tachyon.impl","tachyon.hadoop.TFS") 167 | // new instance & set file in configuration 168 | val fs = FileSystem.get(conf); 169 | outputStream = fs.create(new Path(outputPath)); 170 | 171 | IOUtils.copyBytes(inputStream, outputStream, 8192) 172 | 173 | fs.getFileStatus(new Path(outputPath)).getLen 174 | 175 | } finally { 176 | if (inputStream != null) { 177 | inputStream.close() 178 | } 179 | if (outputStream != null) { 180 | outputStream.close() 181 | } 182 | } 183 | } 184 | 185 | downloadTry match { 186 | case Failure(e:Throwable) => log.error("Error: ", e) 187 | case _ => 188 | } 189 | 190 | (downloadTry, key) 191 | } 192 | 193 | 194 | } 195 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | spark-job-REST 7 | 0.3.2 8 | pom 9 | 10 | spark-job-REST 11 | http://maven.apache.org 12 | 13 | 14 | spark-job-rest-api 15 | spark-job-rest-sql 16 | spark-job-rest 17 | spark-job-rest-client 18 | examples/example-job 19 | examples/s3-download-job 20 | 21 | -------------------------------------------------------------------------------- /spark-job-rest-api/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | spark-job-rest-api 7 | 0.3.2 8 | jar 9 | 10 | spark-job-rest-api 11 | https://github.com/Atigeo/spark-job-rest 12 | 13 | The API for Spark-Job-Rest. 14 | Contains the SparkJob interface that must be extended in order to run jobs on the server. 15 | 16 | 17 | 18 | 19 | The Apache License, Version 2.0 20 | http://www.apache.org/licenses/LICENSE-2.0.txt 21 | 22 | 23 | 24 | 25 | 26 | Radu Chilom 27 | raduchilom@gmail.com 28 | Atigeo 29 | http://www.atigeo.com 30 | 31 | 32 | 33 | 34 | scm:git:git@github.com:Atigeo/spark-job-rest.git 35 | scm:git:git@github.com:Atigeo/spark-job-rest.git 36 | git@github.com:Atigeo/spark-job-rest.git 37 | 38 | 39 | 40 | 41 | ossrh 42 | https://oss.sonatype.org/content/repositories/snapshots 43 | 44 | 45 | ossrh 46 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 47 | 48 | 49 | 50 | 51 | 52 | mvnrepository 53 | http://repo1.maven.org/maven2 54 | 55 | 56 | Akka repository 57 | http://repo.akka.io/releases 58 | 59 | 60 | cloudera-repo-releases 61 | https://repository.cloudera.com/artifactory/repo/ 62 | 63 | 64 | 65 | 66 | UTF-8 67 | 2.10.3 68 | 2.10 69 | 70 | 71 | 72 | 73 | org.apache.spark 74 | spark-core_2.10 75 | 1.3.1 76 | provided 77 | 78 | 79 | io.spray 80 | spray-json_2.10 81 | 1.2.6 82 | 83 | 84 | 85 | 86 | 87 | 88 | src/main/resources 89 | 90 | * 91 | 92 | false 93 | 94 | 95 | ${project.artifactId} 96 | 97 | 98 | org.apache.maven.plugins 99 | maven-compiler-plugin 100 | 3.1 101 | 102 | 1.7 103 | 1.7 104 | 105 | 106 | 107 | net.alchim31.maven 108 | scala-maven-plugin 109 | 3.2.0 110 | 111 | 112 | scala-compile-first 113 | process-resources 114 | 115 | add-source 116 | compile 117 | 118 | 119 | 120 | scala-test-compile 121 | process-test-resources 122 | 123 | testCompile 124 | 125 | 126 | 127 | doc 128 | generate-sources 129 | 130 | doc-jar 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | release 141 | 142 | 143 | 144 | org.sonatype.plugins 145 | nexus-staging-maven-plugin 146 | 1.6.3 147 | true 148 | 149 | ossrh 150 | https://oss.sonatype.org/ 151 | true 152 | 153 | 154 | 155 | 156 | org.apache.maven.plugins 157 | maven-source-plugin 158 | 2.2.1 159 | 160 | 161 | attach-sources 162 | 163 | jar-no-fork 164 | 165 | 166 | 167 | 168 | 169 | org.apache.maven.plugins 170 | maven-javadoc-plugin 171 | 2.9.1 172 | 173 | 174 | attach-javadocs 175 | 176 | jar 177 | 178 | 179 | 180 | 181 | 182 | org.apache.maven.plugins 183 | maven-gpg-plugin 184 | 1.5 185 | 186 | 187 | sign-artifacts 188 | verify 189 | 190 | sign 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /spark-job-rest-api/src/main/scala/api/ContextLike.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.apache.spark.SparkContext 4 | 5 | trait ContextLike { 6 | /** 7 | * Type of the context for representation 8 | */ 9 | val contextClass: String 10 | 11 | override def toString: String = { 12 | super.toString + s"($contextClass)" 13 | } 14 | 15 | /** 16 | * Underlying Spark context 17 | * @return 18 | */ 19 | def sparkContext: SparkContext 20 | 21 | /** 22 | * Validates whether job is valid for this context 23 | * @param job job to validate 24 | * @return 25 | */ 26 | def validateJob(job: SparkJobBase): SparkJobValidation = 27 | if (isValidJob(job)) 28 | SparkJobValid() 29 | else 30 | SparkJobInvalid(s"Job ${job.toString} doesn't match context $this.") 31 | 32 | /** 33 | * Validates whether job is valid for this context 34 | * Should be implemented in concrete classes. 35 | * @param job job to validate 36 | * @return 37 | */ 38 | def isValidJob(job: SparkJobBase): Boolean 39 | 40 | /** 41 | * This method should be called during cleanup 42 | */ 43 | def stop() 44 | } 45 | -------------------------------------------------------------------------------- /spark-job-rest-api/src/main/scala/api/SparkJob.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.SparkContext 5 | 6 | sealed trait SparkJobValidation { 7 | // NOTE(harish): We tried using lazy eval here by passing in a function 8 | // instead, which worked fine with tests but when run with the job-server 9 | // it would just hang and timeout. This is something worth investigating 10 | def &&(sparkValidation: SparkJobValidation): SparkJobValidation = this match { 11 | case x => x 12 | } 13 | } 14 | case class SparkJobValid() extends SparkJobValidation 15 | case class SparkJobInvalid(reason: String) extends SparkJobValidation 16 | 17 | /** 18 | * This trait is the main API for Spark jobs submitted to the Job Server. 19 | */ 20 | trait SparkJobBase { 21 | type C 22 | /** 23 | * This is the entry point for a Spark Job Server to execute Spark jobs. 24 | * This function should create or reuse RDDs and return the result at the end, which the 25 | * Job Server will cache or display. 26 | * @param sc a SparkContext for the job. May be reused across jobs. 27 | * @param jobConfig the Typesafe Config object passed into the job request 28 | * @return the job result 29 | */ 30 | def runJob(sc: C, jobConfig: Config): Any 31 | 32 | /** 33 | * This method is called by the job server to allow jobs to validate their input and reject 34 | * invalid job requests. If SparkJobInvalid is returned, then the job server returns 400 35 | * to the user. 36 | * NOTE: this method should return very quickly. If it responds slowly then the job server may time out 37 | * trying to start this job. 38 | * @return either SparkJobValid or SparkJobInvalid 39 | */ 40 | def validate(sc: C, config: Config): SparkJobValidation 41 | } 42 | 43 | trait SparkJob extends SparkJobBase { 44 | type C = SparkContext 45 | } 46 | -------------------------------------------------------------------------------- /spark-job-rest-api/src/main/scala/responses/JobStates.scala: -------------------------------------------------------------------------------- 1 | package responses 2 | 3 | /** 4 | * States of Spark jobs. 5 | */ 6 | object JobStates { 7 | 8 | sealed abstract class JobState(val name: String) { 9 | override def toString = name 10 | } 11 | 12 | case object RUNNING extends JobState("Running") 13 | 14 | case object ERROR extends JobState("Error") 15 | 16 | case object FINISHED extends JobState("Finished") 17 | 18 | case object QUEUED extends JobState("Queued") 19 | 20 | } 21 | -------------------------------------------------------------------------------- /spark-job-rest-api/src/main/scala/responses/ResponseObjects.scala: -------------------------------------------------------------------------------- 1 | package responses 2 | 3 | import spray.json.DefaultJsonProtocol._ 4 | 5 | /** 6 | * Created by raduc on 24/04/15. 7 | */ 8 | 9 | case class Context(contextName: String, sparkUiPort: String) 10 | 11 | object Context { 12 | implicit val logJson = jsonFormat2(apply) 13 | } 14 | 15 | case class Contexts(contexts: Array[Context]) 16 | 17 | object Contexts { 18 | implicit val logJson = jsonFormat1(apply) 19 | } 20 | 21 | case class Job(jobId: String, contextName: String, status: String, result: String, startTime: Long) 22 | 23 | object Job { 24 | implicit val logJson = jsonFormat5(apply) 25 | } 26 | 27 | case class Jobs(jobs: Array[Job]) 28 | 29 | object Jobs { 30 | implicit val logJson = jsonFormat1(apply) 31 | } 32 | 33 | case class JarInfo(name: String, size: Long, timestamp: Long) 34 | 35 | object JarInfo { 36 | implicit val logJson = jsonFormat3(apply) 37 | } 38 | 39 | case class JarsInfo(jars: Array[JarInfo]) 40 | 41 | object JarsInfo { 42 | implicit val logJson = jsonFormat1(apply) 43 | } 44 | 45 | case class ErrorResponse(error: String) 46 | 47 | object ErrorResponse { 48 | implicit val logJson = jsonFormat1(apply) 49 | } 50 | 51 | case class SimpleMessage(message: String) 52 | 53 | object SimpleMessage { 54 | implicit val logJson = jsonFormat1(apply) 55 | } 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /spark-job-rest-client/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | spark-job-rest-client 7 | 0.3.2 8 | jar 9 | 10 | spark-job-rest-client 11 | https://github.com/Atigeo/spark-job-rest 12 | 13 | The Http Spray Client for Spark-Job-Rest. 14 | 15 | 16 | 17 | The Apache License, Version 2.0 18 | http://www.apache.org/licenses/LICENSE-2.0.txt 19 | 20 | 21 | 22 | 23 | 24 | Radu Chilom 25 | raduchilom@gmail.com 26 | Atigeo 27 | http://www.atigeo.com 28 | 29 | 30 | 31 | 32 | scm:git:git@github.com:Atigeo/spark-job-rest.git 33 | scm:git:git@github.com:Atigeo/spark-job-rest.git 34 | git@github.com:Atigeo/spark-job-rest.git 35 | 36 | 37 | 38 | 39 | ossrh 40 | https://oss.sonatype.org/content/repositories/snapshots 41 | 42 | 43 | ossrh 44 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 45 | 46 | 47 | 48 | 49 | 50 | mvnrepository 51 | http://repo1.maven.org/maven2 52 | 53 | 54 | Akka repository 55 | http://repo.akka.io/releases 56 | 57 | 58 | cloudera-repo-releases 59 | https://repository.cloudera.com/artifactory/repo/ 60 | 61 | 62 | 63 | 64 | UTF-8 65 | 2.10.3 66 | 2.10 67 | 68 | 69 | 70 | 71 | io.spray 72 | spray-client 73 | 1.2.1 74 | 75 | 76 | com.typesafe 77 | config 78 | 1.2.1 79 | 80 | 81 | com.typesafe.akka 82 | akka-actor_2.10 83 | 2.3.4 84 | 85 | 86 | com.xpatterns 87 | spark-job-rest-api 88 | ${version} 89 | 90 | 91 | org.slf4j 92 | slf4j-api 93 | 1.7.10 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | src/main/resources 102 | 103 | * 104 | 105 | false 106 | 107 | 108 | ${project.artifactId} 109 | 110 | 111 | org.apache.maven.plugins 112 | maven-compiler-plugin 113 | 3.1 114 | 115 | 1.7 116 | 1.7 117 | 118 | 119 | 120 | net.alchim31.maven 121 | scala-maven-plugin 122 | 3.2.0 123 | 124 | 125 | scala-compile-first 126 | process-resources 127 | 128 | add-source 129 | compile 130 | 131 | 132 | 133 | scala-test-compile 134 | process-test-resources 135 | 136 | testCompile 137 | 138 | 139 | 140 | doc 141 | generate-sources 142 | 143 | doc-jar 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | release 156 | 157 | 158 | 159 | org.sonatype.plugins 160 | nexus-staging-maven-plugin 161 | 1.6.3 162 | true 163 | 164 | ossrh 165 | https://oss.sonatype.org/ 166 | true 167 | 168 | 169 | 170 | 171 | org.apache.maven.plugins 172 | maven-source-plugin 173 | 2.2.1 174 | 175 | 176 | attach-sources 177 | 178 | jar-no-fork 179 | 180 | 181 | 182 | 183 | 184 | org.apache.maven.plugins 185 | maven-javadoc-plugin 186 | 2.9.1 187 | 188 | 189 | attach-javadocs 190 | 191 | jar 192 | 193 | 194 | 195 | 196 | 197 | org.apache.maven.plugins 198 | maven-gpg-plugin 199 | 1.5 200 | 201 | 202 | sign-artifacts 203 | verify 204 | 205 | sign 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /spark-job-rest-client/src/main/scala/client/SparkJobRestClient.scala: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import java.io.File 4 | import java.util.concurrent.TimeUnit 5 | 6 | import akka.actor.{ActorSystem} 7 | import akka.util.Timeout 8 | import org.slf4j.LoggerFactory 9 | import spray.http._ 10 | import spray.client.pipelining._ 11 | import responses._ 12 | import spray.httpx.SprayJsonSupport.sprayJsonUnmarshaller 13 | import spray.httpx.UnsuccessfulResponseException 14 | import spray.httpx.unmarshalling.Unmarshaller 15 | import spray.json.DefaultJsonProtocol._ 16 | 17 | import scala.concurrent.duration.Duration 18 | import scala.concurrent.{Await, Future} 19 | import scala.util.{Failure, Success} 20 | 21 | /** 22 | * Created by raduc on 23/04/15. 23 | */ 24 | class SparkJobRestClient(serverAddress: String)(implicit system: ActorSystem) { 25 | import system.dispatcher 26 | val log = LoggerFactory.getLogger(getClass) 27 | 28 | val contextsRoute = "/contexts" 29 | val jobsRoute = "/jobs" 30 | val jarsRoute = "/jars" 31 | val heartBeatRoute = "/heartbeat" 32 | 33 | val SEPARATOR = "/" 34 | 35 | implicit val timeout = Timeout(30, TimeUnit.SECONDS) 36 | 37 | 38 | // ============ Contexts Route ============ 39 | @throws(classOf[Exception]) 40 | def getContexts() : Contexts = { 41 | 42 | val pipeline: HttpRequest => Future[Contexts] = sendReceive ~> unmarshal[Contexts] 43 | 44 | val response: Future[Contexts] = pipeline(Get(serverAddress + contextsRoute)) 45 | 46 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 47 | 48 | case Success(contexts: Contexts) => { 49 | return contexts 50 | } 51 | case Failure(e) => { 52 | log.error("Failed request: ", e) 53 | throw e 54 | } 55 | 56 | } 57 | 58 | null 59 | } 60 | 61 | @throws(classOf[Exception]) 62 | def getContext() : Context = { 63 | 64 | val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context] 65 | 66 | val response: Future[Context] = pipeline(Get(serverAddress + contextsRoute)) 67 | 68 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 69 | 70 | case Success(context: Context) => { 71 | return context 72 | } 73 | case Failure(e: UnsuccessfulResponseException) => { 74 | log.error("Unsuccessful response: ", e) 75 | throw e 76 | } 77 | case Failure(e) => { 78 | log.error("Failed request: ", e) 79 | throw e 80 | } 81 | 82 | } 83 | 84 | null 85 | } 86 | 87 | @throws(classOf[Exception]) 88 | def checkIfContextExists(contextName: String) : Boolean = { 89 | 90 | val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context] 91 | 92 | val response: Future[Context] = pipeline(Get(serverAddress + contextsRoute + SEPARATOR + contextName)) 93 | 94 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 95 | 96 | case Success(context: Context) => { 97 | return true 98 | } 99 | case Failure(e: UnsuccessfulResponseException) => { 100 | log.error("Unsuccessful response: ", e) 101 | return false 102 | } 103 | case Failure(e: Throwable) => { 104 | log.error("Unsuccessful request: ", e) 105 | throw e 106 | } 107 | 108 | } 109 | 110 | false 111 | } 112 | 113 | @throws(classOf[Exception]) 114 | def deleteContext(contextName: String) : Boolean = { 115 | 116 | val pipeline: HttpRequest => Future[SimpleMessage] = sendReceive ~> unmarshal[SimpleMessage] 117 | 118 | val response: Future[SimpleMessage] = pipeline(Delete(serverAddress + contextsRoute + SEPARATOR + contextName)) 119 | 120 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 121 | 122 | case Success(simpleMessage: SimpleMessage) => { 123 | return true 124 | } 125 | case Failure(e: UnsuccessfulResponseException) => { 126 | log.error("Unsuccessful response: ", e) 127 | throw e 128 | } 129 | case Failure(e: Throwable) => { 130 | log.error("Unsuccessful request: ", e) 131 | throw e 132 | } 133 | 134 | } 135 | 136 | false 137 | } 138 | 139 | @throws(classOf[Exception]) 140 | def createContext(contextName: String, parameters: Map[String, String]) : Context = { 141 | 142 | val body = createParametersString(parameters) 143 | 144 | val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context] 145 | 146 | val response: Future[Context] = pipeline(Post(serverAddress + contextsRoute + SEPARATOR + contextName, body)) 147 | 148 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 149 | 150 | case Success(context: Context) => { 151 | return context 152 | } 153 | case Failure(e: UnsuccessfulResponseException) => { 154 | log.error("Unsuccessful response: ", e) 155 | throw e 156 | } 157 | case Failure(e: Throwable) => { 158 | log.error("Unsuccessful request: ", e) 159 | throw e 160 | } 161 | 162 | } 163 | 164 | null 165 | } 166 | 167 | // ============ Jobs Route ============ 168 | @throws(classOf[Exception]) 169 | def getJobs() : Jobs = { 170 | 171 | val pipeline: HttpRequest => Future[Jobs] = sendReceive ~> unmarshal[Jobs] 172 | 173 | val response: Future[Jobs] = pipeline(Get(serverAddress + jobsRoute)) 174 | 175 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 176 | 177 | case Success(jobs: Jobs) => { 178 | return jobs 179 | } 180 | case Failure(e) => { 181 | log.error("Failed request: ", e) 182 | throw e 183 | } 184 | 185 | } 186 | 187 | null 188 | } 189 | 190 | @throws(classOf[Exception]) 191 | def getJob(jobId: String, contextName: String) : Job = { 192 | 193 | val pipeline: HttpRequest => Future[Job] = sendReceive ~> unmarshal[Job] 194 | 195 | val response: Future[Job] = pipeline(Get(serverAddress + jobsRoute + SEPARATOR + jobId + "?contextName=" + contextName)) 196 | 197 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 198 | 199 | case Success(job: Job) => { 200 | return job 201 | } 202 | case Failure(e: UnsuccessfulResponseException) => { 203 | log.error("Unsuccessful response: ", e) 204 | throw e 205 | } 206 | case Failure(e: Throwable) => { 207 | log.error("Unsuccessful request: ", e) 208 | throw e 209 | } 210 | 211 | } 212 | 213 | null 214 | } 215 | 216 | @throws(classOf[Exception]) 217 | def runJob(runningClass: String, contextName: String, parameters: Map[String, String]) : Job = { 218 | 219 | val body = createParametersString(parameters) 220 | 221 | val pipeline: HttpRequest => Future[Job] = sendReceive ~> unmarshal[Job] 222 | 223 | val response: Future[Job] = pipeline(Post(serverAddress + jobsRoute + "?runningClass=" + runningClass + "&contextName=" + contextName, body)) 224 | 225 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 226 | 227 | case Success(job: Job) => { 228 | return job 229 | } 230 | case Failure(e: UnsuccessfulResponseException) => { 231 | log.error("Unsuccessful response: ", e) 232 | throw e 233 | } 234 | case Failure(e: Throwable) => { 235 | log.error("Unsuccessful request: ", e) 236 | throw e 237 | } 238 | 239 | } 240 | 241 | null 242 | } 243 | 244 | // ============ Jars Route ============ 245 | @throws(classOf[Exception]) 246 | def getJars() : JarsInfo = { 247 | 248 | val pipeline: HttpRequest => Future[JarsInfo] = sendReceive ~> unmarshal[JarsInfo] 249 | 250 | val response: Future[JarsInfo] = pipeline(Get(serverAddress + jarsRoute)) 251 | 252 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 253 | 254 | case Success(jarsInfo: JarsInfo) => { 255 | return jarsInfo 256 | } 257 | case Failure(e) => { 258 | log.error("Failed request: ", e) 259 | throw e 260 | } 261 | 262 | } 263 | 264 | null 265 | } 266 | 267 | @throws(classOf[Exception]) 268 | def deleteJar(jarName: String) : Boolean = { 269 | 270 | val pipeline: HttpRequest => Future[SimpleMessage] = sendReceive ~> unmarshal[SimpleMessage] 271 | 272 | val response: Future[SimpleMessage] = pipeline(Delete(serverAddress + jarsRoute + SEPARATOR + jarName)) 273 | 274 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 275 | 276 | case Success(simpleMessage: SimpleMessage) => { 277 | return true 278 | } 279 | case Failure(e: UnsuccessfulResponseException) => { 280 | log.error("Unsuccessful response: ", e) 281 | return false 282 | } 283 | case Failure(e: Throwable) => { 284 | log.error("Unsuccessful request: ", e) 285 | throw e 286 | } 287 | 288 | } 289 | 290 | false 291 | } 292 | 293 | @throws(classOf[Exception]) 294 | def uploadJar(jarName: String, jarPath: String) : JarInfo = { 295 | 296 | val pipeline: HttpRequest => Future[JarInfo] = sendReceive ~> unmarshal[JarInfo] 297 | 298 | val body = MultipartFormData(Seq(BodyPart(new File(jarPath), jarName, MediaTypes.`application/java-archive`))) 299 | 300 | val response: Future[JarInfo] = pipeline(Post(serverAddress + jarsRoute , body)) 301 | 302 | Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match { 303 | 304 | case Success(jarInfo: JarInfo) => { 305 | return jarInfo 306 | } 307 | case Failure(e: UnsuccessfulResponseException) => { 308 | log.error("Unsuccessful response: ", e) 309 | throw e 310 | } 311 | case Failure(e: Throwable) => { 312 | log.error("Unsuccessful request: ", e) 313 | throw e 314 | } 315 | 316 | } 317 | 318 | null 319 | } 320 | 321 | def createParametersString(parameters: Map[String, String]): String = { 322 | parameters.foldLeft("") { case (acc, (key, value)) => { 323 | acc + key + "=" + value + "\n" 324 | } 325 | } 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /spark-job-rest-sql/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | spark-job-rest-sql 7 | 0.3.2 8 | jar 9 | 10 | spark-job-rest-sql 11 | http://maven.apache.org 12 | 13 | 14 | 15 | mvnrepository 16 | http://repo1.maven.org/maven2 17 | 18 | 19 | cloudera-repo-releases 20 | https://repository.cloudera.com/artifactory/repo/ 21 | 22 | 23 | Akka repository 24 | http://repo.akka.io/releases 25 | 26 | 27 | 28 | 29 | 30 | UTF-8 31 | 2.10.3 32 | 2.10 33 | 34 | 35 | 36 | 37 | org.apache.spark 38 | spark-core_2.10 39 | 1.3.1 40 | provided 41 | 42 | 43 | 44 | org.apache.spark 45 | spark-hive_2.10 46 | 1.3.1 47 | provided 48 | 49 | 50 | 51 | org.apache.spark 52 | spark-sql_2.10 53 | 1.3.1 54 | provided 55 | 56 | 57 | 58 | com.xpatterns 59 | spark-job-rest-api 60 | ${version} 61 | provided 62 | 63 | 64 | 65 | com.xpatterns 66 | spark-job-rest 67 | ${version} 68 | provided 69 | 70 | 71 | 72 | com.typesafe 73 | config 74 | 1.2.1 75 | provided 76 | 77 | 78 | 79 | 80 | junit 81 | junit 82 | 4.4 83 | test 84 | 85 | 86 | 87 | org.scalatest 88 | scalatest_2.10 89 | 2.2.4 90 | test 91 | 92 | 93 | 94 | 95 | 96 | 97 | src/main/resources 98 | 99 | * 100 | 101 | false 102 | 103 | 104 | ${project.artifactId} 105 | 106 | 107 | org.apache.maven.plugins 108 | maven-compiler-plugin 109 | 3.1 110 | 111 | 1.7 112 | 1.7 113 | 114 | 115 | 116 | net.alchim31.maven 117 | scala-maven-plugin 118 | 3.2.0 119 | 120 | 121 | scala-compile-first 122 | process-resources 123 | 124 | add-source 125 | compile 126 | 127 | 128 | 129 | scala-test-compile 130 | process-test-resources 131 | 132 | testCompile 133 | 134 | 135 | 136 | doc 137 | generate-sources 138 | 139 | doc-jar 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/main/scala/api/SparkSqlJob.scala: -------------------------------------------------------------------------------- 1 | package api 2 | 3 | import org.apache.spark.sql.SQLContext 4 | 5 | trait SparkSqlJob extends SparkJobBase { 6 | type C = SQLContext 7 | } 8 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/main/scala/context/HiveContextFactory.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.{ContextLike, SparkJobBase, SparkSqlJob} 4 | import com.typesafe.config.Config 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.sql.hive.HiveContext 7 | import org.slf4j.LoggerFactory 8 | 9 | /** 10 | * Factory which creates Hive context. 11 | */ 12 | class HiveContextFactory extends SQLContextFactory { 13 | type C = HiveContext with ContextLike 14 | val logger = LoggerFactory.getLogger(getClass) 15 | 16 | def makeContext(config: Config, sc: SparkContext): C = { 17 | logger.info(s"Creating Hive context for Spark context $sc.") 18 | new HiveContext(sc) with ContextLike { 19 | val contextClass = classOf[HiveContext].getName 20 | def isValidJob(job: SparkJobBase) = job.isInstanceOf[SparkSqlJob] 21 | def stop() = sparkContext.stop() 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/main/scala/context/SQLContextFactory.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.SparkContext 5 | 6 | import scala.util.Try 7 | 8 | trait SQLContextFactory extends JobContextFactory { 9 | /** 10 | * Creates Spark context from class specified under [[SQLContextFactory.sparkContextFactoryClassNameConfigEntry]] 11 | * config entry or from [[JobContextFactory.defaultFactoryClassName]] 12 | * @param config general configuration 13 | * @param contextName context name 14 | * @return 15 | */ 16 | def makeContext(config: Config, contextName: String): C = { 17 | val sparkContext = getSparkContextFactory(config) 18 | .makeContext(config: Config, contextName: String) 19 | .asInstanceOf[SparkContext] 20 | makeContext(config, sparkContext) 21 | } 22 | 23 | /** 24 | * Creates SQL context for specified Spark context. 25 | * Should be implemented by concrete SQL context factory 26 | * @param config general configuration 27 | * @param sparkContext underlying Spark context 28 | * @return 29 | */ 30 | def makeContext(config: Config, sparkContext: SparkContext): C 31 | 32 | /** 33 | * Loads factory for Spark context. 34 | * @param config general configuration as in [[JobContextFactory.getFactory()]] 35 | * @return 36 | */ 37 | def getSparkContextFactory(config: Config): JobContextFactory = { 38 | val className = Try { 39 | config.getString(SQLContextFactory.sparkContextFactoryClassNameConfigEntry) 40 | }.getOrElse(JobContextFactory.defaultFactoryClassName) 41 | JobContextFactory.getFactory(className) 42 | } 43 | } 44 | 45 | object SQLContextFactory { 46 | val sparkContextFactoryClassNameConfigEntry = "context.spark-context-factory" 47 | } 48 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/main/scala/context/SparkSQLContextFactory.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.{ContextLike, SparkJobBase, SparkSqlJob} 4 | import com.typesafe.config.Config 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.sql.SQLContext 7 | import org.slf4j.LoggerFactory 8 | 9 | /** 10 | * Factory which creates simple SQL context. 11 | */ 12 | class SparkSQLContextFactory extends SQLContextFactory { 13 | type C = SQLContext with ContextLike 14 | val logger = LoggerFactory.getLogger(getClass) 15 | 16 | def makeContext(config: Config, sc: SparkContext): C = { 17 | logger.info(s"Creating SQL context for Spark context $sc.") 18 | new SQLContext(sc) with ContextLike { 19 | val contextClass = classOf[SQLContext].getName 20 | def isValidJob(job: SparkJobBase) = job.isInstanceOf[SparkSqlJob] 21 | def stop() = sparkContext.stop() 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/test/scala/context/HiveContextFactorySpec.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.ContextLike 4 | import com.typesafe.config.ConfigFactory 5 | import org.apache.spark.sql.hive.HiveContext 6 | import org.junit.runner.RunWith 7 | import org.scalatest._ 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | import scala.util.Try 11 | 12 | /** 13 | * Test suite for [[HiveContextFactory]]. 14 | */ 15 | @RunWith(classOf[JUnitRunner]) 16 | class HiveContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter { 17 | type C = HiveContext with ContextLike 18 | 19 | var hiveContext: C = _ 20 | 21 | val hiveContextFactory = new HiveContextFactory() 22 | 23 | // Clean Spark context after each test 24 | after { 25 | Try{ hiveContext.stop() } 26 | } 27 | 28 | "HiveContextFactory" should { 29 | "create Hive context" in { 30 | hiveContext = hiveContextFactory.makeContext(config, this.getClass.getName) 31 | hiveContext.sparkContext.appName mustEqual this.getClass.getName 32 | } 33 | 34 | "stop underlying Spark context if context is stopped" in { 35 | hiveContext = hiveContextFactory.makeContext(config, "context1") 36 | hiveContext.stop() 37 | hiveContext = hiveContextFactory.makeContext(config, "context2") 38 | hiveContext.sparkContext.appName mustEqual "context2" 39 | } 40 | } 41 | 42 | val config = ConfigFactory.parseString( 43 | """ 44 | |{ 45 | | context.jars = [], 46 | | spark.master = "local" 47 | |} 48 | """.stripMargin) 49 | } 50 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/test/scala/context/SQLContextFactorySpec.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.{ContextLike, SparkJobBase} 4 | import com.typesafe.config.{Config, ConfigFactory} 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.sql.SQLContext 7 | import org.apache.spark.sql.hive.HiveContext 8 | import org.junit.runner.RunWith 9 | import org.scalatest._ 10 | import org.scalatest.junit.JUnitRunner 11 | import utils.ContextUtils.configToSparkConf 12 | 13 | import scala.util.Try 14 | 15 | trait FakeContext 16 | 17 | class FakeJobContextFactory extends JobContextFactory { 18 | type C = ContextLike 19 | def makeContext(config: Config, contextName: String): ContextLike = { 20 | val sparkConf = configToSparkConf(config, contextName) 21 | new SparkContext(sparkConf) with ContextLike with FakeContext { 22 | val contextClass = classOf[FakeContext].getName 23 | override def isValidJob(job: SparkJobBase): Boolean = true 24 | override def sparkContext: SparkContext = this 25 | } 26 | } 27 | } 28 | 29 | /** 30 | * Test suite for [[SQLContextFactory]]. 31 | */ 32 | @RunWith(classOf[JUnitRunner]) 33 | class SQLContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter { 34 | type C <: ContextLike 35 | 36 | var sqlContext: C = _ 37 | 38 | // Clean up Spark context after each test 39 | after { 40 | Try{ sqlContext.stop() } 41 | } 42 | 43 | "SQLContextFactory" should { 44 | "create SQL context" in { 45 | sqlContext = JobContextFactory.makeContext(sqlContextFactoryConfig, "test").asInstanceOf[C] 46 | sqlContext.isInstanceOf[SQLContext] mustEqual true 47 | sqlContext.sparkContext.isInstanceOf[SparkContext] mustEqual true 48 | sqlContext.sparkContext.appName mustEqual "test" 49 | } 50 | 51 | "create SQL context on top of specified Spark context factory" in { 52 | sqlContext = JobContextFactory.makeContext(hiveSqlFactoryWithCustomSparkContextConfig, "test").asInstanceOf[C] 53 | sqlContext.isInstanceOf[HiveContext] mustEqual true 54 | sqlContext.sparkContext.isInstanceOf[FakeContext] mustEqual true 55 | sqlContext.sparkContext.appName mustEqual "test" 56 | } 57 | } 58 | 59 | val sqlContextFactoryConfig = ConfigFactory.parseString( 60 | """ 61 | |{ 62 | | context.jars = [], 63 | | context.job-context-factory = "context.SparkSQLContextFactory" 64 | | spark.master = "local", 65 | | spark.app.id = "test" 66 | |} 67 | """.stripMargin).resolve() 68 | 69 | val hiveSqlFactoryWithCustomSparkContextConfig = ConfigFactory.parseString( 70 | """ 71 | |{ 72 | | context.jars = [], 73 | | context.job-context-factory = "context.HiveContextFactory" 74 | | context.spark-context-factory = "context.FakeJobContextFactory" 75 | | spark.master = "local", 76 | | spark.app.id = "test" 77 | |} 78 | """.stripMargin).resolve() 79 | } 80 | -------------------------------------------------------------------------------- /spark-job-rest-sql/src/test/scala/context/SparkSQLContextFactorySpec.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.ContextLike 4 | import com.typesafe.config.ConfigFactory 5 | import org.apache.spark.sql.SQLContext 6 | import org.junit.runner.RunWith 7 | import org.scalatest._ 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | import scala.util.Try 11 | 12 | /** 13 | * Test suite for [[HiveContextFactory]]. 14 | */ 15 | @RunWith(classOf[JUnitRunner]) 16 | class SparkSQLContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter { 17 | type C = SQLContext with ContextLike 18 | 19 | var sqlContext: C = _ 20 | 21 | val sqlContextFactory = new SparkSQLContextFactory() 22 | 23 | // Clean up Spark context after each test 24 | after { 25 | Try{ sqlContext.stop() } 26 | } 27 | 28 | "SQLContextFactory" should { 29 | "create SQL context" in { 30 | sqlContext = sqlContextFactory.makeContext(config, this.getClass.getName) 31 | sqlContext.sparkContext.appName mustEqual this.getClass.getName 32 | } 33 | 34 | "stop underlying Spark context if context is stopped" in { 35 | sqlContext = sqlContextFactory.makeContext(config, "context1") 36 | sqlContext.stop() 37 | sqlContext = sqlContextFactory.makeContext(config, "context2") 38 | sqlContext.sparkContext.appName mustEqual "context2" 39 | } 40 | } 41 | 42 | val config = ConfigFactory.parseString( 43 | """ 44 | |{ 45 | | context.jars = [], 46 | | spark.master = "local" 47 | |} 48 | """.stripMargin) 49 | } 50 | -------------------------------------------------------------------------------- /spark-job-rest/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.xpatterns 6 | spark-job-rest 7 | 0.3.2 8 | jar 9 | 10 | spark-job-rest 11 | http://maven.apache.org 12 | 13 | 14 | 15 | mvnrepository 16 | http://repo1.maven.org/maven2 17 | 18 | 19 | cloudera-repo-releases 20 | https://repository.cloudera.com/artifactory/repo/ 21 | 22 | 23 | Akka repository 24 | http://repo.akka.io/releases 25 | 26 | 27 | 28 | 29 | UTF-8 30 | 3.0.5.RELEASE 31 | 1.2.3 32 | 2.10.3 33 | 2.10 34 | 2.3.4 35 | 1.3.1 36 | 37 | 38 | 39 | 40 | 41 | org.apache.spark 42 | spark-core_2.10 43 | 1.3.1 44 | provided 45 | 46 | 47 | 48 | com.xpatterns 49 | spark-job-rest-api 50 | ${version} 51 | 52 | 53 | 54 | com.google.code.gson 55 | gson 56 | 2.3.1 57 | 58 | 59 | 60 | 61 | io.spray 62 | spray-client 63 | ${spray.io.version} 64 | 65 | 66 | 67 | io.spray 68 | spray-routing 69 | ${spray.io.version} 70 | 71 | 72 | 73 | io.spray 74 | spray-can 75 | ${spray.io.version} 76 | 77 | 78 | 79 | io.spray 80 | spray-caching 81 | ${spray.io.version} 82 | 83 | 84 | 85 | com.google.code.findbugs 86 | jsr305 87 | 2.0.3 88 | 89 | 90 | 91 | com.fasterxml.jackson.core 92 | jackson-annotations 93 | 2.4.4 94 | 95 | 96 | 97 | commons-cli 98 | commons-cli 99 | 1.2 100 | 101 | 102 | 103 | log4j 104 | log4j 105 | 1.2.17 106 | 107 | 108 | 109 | com.typesafe 110 | config 111 | 1.2.1 112 | 113 | 114 | 115 | joda-time 116 | joda-time 117 | 2.7 118 | 119 | 120 | 121 | org.joda 122 | joda-convert 123 | 1.7 124 | 125 | 126 | 127 | 128 | org.apache.httpcomponents 129 | httpclient 130 | 4.3.5 131 | 132 | 133 | 134 | org.apache.httpcomponents 135 | httpcore 136 | 4.3.2 137 | 138 | 139 | 140 | 141 | io.spray 142 | spray-testkit 143 | 1.2.1 144 | 145 | 146 | com.typesafe 147 | config 148 | 149 | 150 | akka-actor_2.10 151 | com.typesafe.akka 152 | 153 | 154 | test 155 | 156 | 157 | 158 | com.typesafe.akka 159 | akka-testkit_2.10 160 | ${akka.version} 161 | test 162 | 163 | 164 | 165 | junit 166 | junit 167 | 4.4 168 | test 169 | 170 | 171 | 172 | org.scalatest 173 | scalatest_2.10 174 | 2.2.4 175 | test 176 | 177 | 178 | 179 | com.xpatterns 180 | spark-job-rest-client 181 | ${version} 182 | test 183 | 184 | 185 | 186 | 187 | 188 | 189 | src/main/resources 190 | 191 | deploy.conf 192 | deploy-settings.sh 193 | 194 | false 195 | 196 | 197 | ${project.artifactId} 198 | 199 | 200 | 201 | org.scala-tools 202 | maven-scala-plugin 203 | 2.15.2 204 | 205 | 206 | org.apache.maven.plugins 207 | maven-compiler-plugin 208 | 2.0.2 209 | 210 | 211 | 212 | 213 | 214 | org.apache.maven.plugins 215 | maven-compiler-plugin 216 | 3.2 217 | 218 | 1.7 219 | 1.7 220 | 221 | 222 | 223 | org.scala-tools 224 | maven-scala-plugin 225 | 226 | 227 | scala-compile-first 228 | process-resources 229 | 230 | add-source 231 | compile 232 | 233 | 234 | 235 | scala-test-compile 236 | process-test-resources 237 | 238 | testCompile 239 | 240 | 241 | 242 | 243 | 244 | 245 | org.apache.maven.plugins 246 | maven-shade-plugin 247 | 2.2 248 | 249 | 250 | 251 | package 252 | 253 | shade 254 | 255 | 256 | 257 | 258 | reference.conf 259 | 260 | 261 | 262 | 263 | *:* 264 | 265 | META-INF/*.SF 266 | META-INF/*.DSA 267 | META-INF/*.RSA 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | org.codehaus.mojo 278 | appassembler-maven-plugin 279 | 1.8.1 280 | 281 | 282 | 283 | 284 | server.Main 285 | main-server.sh 286 | 287 | 288 | 289 | src/main/resources 290 | resources 291 | true 292 | true 293 | ${project.build.directory}/temp_build 294 | 295 | 296 | 297 | 298 | package 299 | 300 | assemble 301 | 302 | 303 | 304 | 305 | 306 | org.apache.maven.plugins 307 | maven-antrun-plugin 308 | 309 | 310 | package 311 | 312 | run 313 | 314 | 315 | 316 | 318 | 320 | 322 | 324 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | org.apache.maven.plugins 333 | maven-assembly-plugin 334 | 2.2.1 335 | 336 | 337 | src/main/assembly/archive.xml 338 | 339 | false 340 | 341 | 342 | 343 | make-assembly 344 | package 345 | 346 | single 347 | 348 | 349 | 350 | 351 | 352 | 353 | org.apache.maven.plugins 354 | maven-surefire-plugin 355 | 2.7 356 | 357 | true 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/assembly/archive.xml: -------------------------------------------------------------------------------- 1 | 5 | archive 6 | 7 | tar.gz 8 | 9 | 10 | 11 | ${project.build.directory}/temp_build 12 | / 13 | 14 | 15 | ${basedir}/resources 16 | /resources 17 | 18 | 19 | ${project.build.directory} 20 | / 21 | 22 | spark-job-rest.jar 23 | 24 | 25 | 26 | 27 | 28 | ${basedir}/src/main/scripts/start_server.sh 29 | /bin/ 30 | 31 | 32 | ${basedir}/src/main/scripts/stop_server.sh 33 | /bin/ 34 | 35 | 36 | ${basedir}/src/main/scripts/restart_server.sh 37 | /bin/ 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | # spark default configuration 2 | spark.executor.memory=2g 3 | spark.mesos.coarse=false 4 | spark.scheduler.mode=FAIR 5 | spark.cores.max=2 6 | spark.master="local" 7 | spark.path=${SPARK_HOME} 8 | spark.default.parallelism=384 9 | spark.storage.memoryFraction=0.3 10 | spark.shuffle.memoryFraction=0.6 11 | spark.shuffle.compress=true 12 | spark.shuffle.spill-compress=true 13 | spark.reducer.maxMbInFlight=48 14 | spark.akka.frameSize=100 15 | spark.akka.threads=4 16 | spark.akka.timeout=100 17 | spark.task.maxFailures=4 18 | spark.shuffle.consolidateFiles=true 19 | spark.deploy.spreadOut=true 20 | spark.shuffle.spill=false 21 | spark.kryo.referenceTracking=false 22 | 23 | #Default Spark Driver JVM memory 24 | driver.xmxMemory = 1g 25 | 26 | # application configuration 27 | appConf{ 28 | # This ip on which to deploy the apis 29 | web.services.ip="0.0.0.0" 30 | # The port on which to deploy the apis 31 | web.services.port=8097 32 | # Implicit akka timeout 33 | timeout=1000000 34 | # Remote context initialization 35 | init { 36 | # Implicit sleep (milliseconds) before sending init message 37 | sleep=3000 38 | # Tries before consider remote context as dead 39 | tries=20 40 | # Timeout for each attempt (milliseconds) 41 | retry-timeout=1000 42 | # Inteval beetween attempts to reach remote context (milliseconds) 43 | retry-interval=1500 44 | } 45 | # The port where the range for actor system starts 46 | actor.systems.first.port = 11000 47 | # The port where the range for spark ui starts 48 | spark.ui.first.port = 16000 49 | # The path to the folder where to keep the jars 50 | jars.path = ${JAR_PATH} 51 | } 52 | 53 | context{ 54 | # Path to context process work directory 55 | contexts-base-dir = ${CONTEXTS_BASE_DIR} 56 | # Amount of jobs which can be executed on context in parallel. Zero means infinit concurency. 57 | cuncurrent-jobs = 0 58 | # Context factory that will be dynamically loaded to instantiate job context 59 | job-context-factory = "context.HiveContextFactory" 60 | # Spark context factory that will be used for non-Spark job contexts (SQL or Hive) 61 | spark-context-factory = "context.SparkContextFactory" 62 | } 63 | 64 | manager { 65 | akka { 66 | log-dead-letters = 1 67 | actor { 68 | provider = "akka.remote.RemoteActorRefProvider" 69 | } 70 | remote { 71 | log-remote-lifecycle-events = off 72 | enabled-transports = ["akka.remote.netty.tcp"] 73 | log-sent-messages = on 74 | log-received-messages = on 75 | netty.tcp { 76 | transport-class = "akka.remote.transport.netty.NettyTransport" 77 | hostname = "127.0.0.1" 78 | port = 4042 79 | maximum-frame-size = 256000b 80 | } 81 | } 82 | } 83 | 84 | spray.can.server { 85 | # uncomment the next line for making this an HTTPS example 86 | # ssl-encryption = on 87 | idle-timeout = 61 s 88 | request-timeout = 60 s 89 | parsing.max-content-length = 200m 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/context_start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to start the job server 3 | set -e 4 | 5 | get_abs_script_path() { 6 | pushd . >/dev/null 7 | cd $(dirname $0) 8 | appdir=$(pwd) 9 | popd >/dev/null 10 | } 11 | get_abs_script_path 12 | 13 | parentdir="$(dirname "$appdir")" 14 | 15 | classpathParam=$1 16 | contextName=$2 17 | port=$3 18 | xmxMemory=$4 19 | processDir=$5 20 | 21 | echo "classpathParam = $classpathParam" 22 | echo "contextName = $contextName" 23 | echo "port = $port" 24 | 25 | 26 | GC_OPTS="-XX:+UseConcMarkSweepGC 27 | -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out 28 | -XX:MaxPermSize=512m 29 | -XX:+CMSClassUnloadingEnabled" 30 | 31 | JAVA_OPTS="-Xmx$xmxMemory -XX:MaxDirectMemorySize=512M 32 | -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true 33 | -Dcom.sun.management.jmxremote.authenticate=false 34 | -Dcom.sun.management.jmxremote.ssl=false" 35 | 36 | MAIN="server.MainContext" 37 | 38 | if [ -f "$appdir/settings.sh" ]; then 39 | . $appdir/settings.sh 40 | else 41 | echo "Missing $appdir/settings.sh, exiting" 42 | exit 1 43 | fi 44 | 45 | if [ -z "$SPARK_HOME" ]; then 46 | echo "Please set SPARK_HOME or put it in $appdir/settings.sh first" 47 | exit 1 48 | fi 49 | 50 | # Pull in other env vars in spark config, such as MESOS_NATIVE_LIBRARY 51 | . $SPARK_CONF_HOME/spark-env.sh 52 | 53 | mkdir -p $LOG_DIR 54 | 55 | LOGGING_OPTS="-Dlog4j.configuration=log4j.properties 56 | -DLOG_DIR=$LOG_DIR 57 | -DLOG_FILE=$contextName.log" 58 | 59 | # For Mesos 60 | #CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI " 61 | # For Mesos/Marathon, use the passed-in port 62 | if [ "$PORT" != "" ]; then 63 | CONFIG_OVERRIDES+="-Dspark.jobserver.port=$PORT " 64 | fi 65 | 66 | # The following should be exported in order to be accessible in Config substitutions 67 | export SPARK_HOME 68 | export APP_DIR 69 | export JAR_PATH 70 | export CONTEXTS_BASE_DIR 71 | 72 | # job server jar needs to appear first so its deps take higher priority 73 | # need to explicitly include app dir in classpath so logging configs can be found 74 | CLASSPATH="$parentdir/resources:$appdir:$parentdir/spark-job-rest.jar:$classpathParam:$EXTRA_CLASSPATH:$($SPARK_HOME/bin/compute-classpath.sh)" 75 | echo "CLASSPATH = ${CLASSPATH}" 76 | 77 | # Create context process directory 78 | mkdir -p "$processDir" 79 | 80 | cd "$processDir" 81 | exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $contextName $port 82 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # 3 | # LOG4J CONFIG FILE 4 | # 5 | # Possible Log Levels: 6 | # FATAL, ERROR, WARN, INFO, DEBUG 7 | # 8 | #------------------------------------------------------------------------------ 9 | 10 | log4j.rootCategory = INFO, defaultFile 11 | 12 | #------------------------------------------------------------------------------ 13 | # 14 | # The following properties configure the Daily Rolling File appender. 15 | # See http://logging.apache.org/log4j/docs/api/index.html for details. 16 | # 17 | #------------------------------------------------------------------------------ 18 | 19 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender 20 | log4j.appender.defaultFile.File = ${LOG_DIR}/${LOG_FILE} 21 | log4j.appender.defaultFile.Append = true 22 | log4j.appender.defaultFile.DatePattern = '.'yyyy-MM-dd 23 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout 24 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/settings.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 4 | APP_DIR="${CDIR}/../" 5 | DEPLOY_CONFIG="${CDIR}/deploy-settings.sh" 6 | 7 | # Load optional deployment settings 8 | if [ -f "${DEPLOY_CONFIG}" ]; then 9 | source "${DEPLOY_CONFIG}" 10 | fi 11 | 12 | if [ -z "${SPARK_HOME}" ]; then 13 | SPARK_HOME="/opt/spark" 14 | fi 15 | 16 | if [ -z "${SPARK_CONF_HOME}" ]; then 17 | SPARK_CONF_HOME=$SPARK_HOME/conf 18 | fi 19 | 20 | # Only needed for Mesos deploys 21 | #SPARK_EXECUTOR_URI=/home/spark/spark-1.1.0.tar.gz 22 | 23 | # Logging directory 24 | LOG_DIR=${SJR_LOG_DIR-"${APP_DIR}/logs"} 25 | 26 | # Extra classes: 27 | EXTRA_CLASSPATH="${JSR_EXTRA_CLASSPATH}" 28 | 29 | # Set proper jar path 30 | JAR_PATH=${SJR_JAR_PATH-"${APP_DIR}/jars"} 31 | 32 | # Root location for contexts process directories 33 | CONTEXTS_BASE_DIR=${SJR_CONTEXTS_BASE_DIR-"${APP_DIR}/contexts"} -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/img/halftone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/halftone.png -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/img/loading-sm.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/loading-sm.gif -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/assets/img/loading.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/loading.gif -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Spark-Job-Rest 6 | 7 | 8 | 9 |
10 | 11 | 28 | 29 |
30 |
31 |
32 | 33 | 34 | 99 | 100 |
101 |
102 |
103 |
104 | 105 | 106 | 171 | 172 | 173 | 174 | 175 | 221 | 222 | 223 | 224 | 225 | 246 | 247 | 248 |
249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/js/bootstrap-notify.min.js: -------------------------------------------------------------------------------- 1 | /* Project: Bootstrap Growl = v3.0.2 | Description: Turns standard Bootstrap alerts into "Growl-like" notifications. | Author: Mouse0270 aka Robert McIntosh | License: MIT License | Website: https://github.com/mouse0270/bootstrap-growl */ 2 | !function(t){"function"==typeof define&&define.amd?define(["jquery"],t):t("object"==typeof exports?require("jquery"):jQuery)}(function(t){function e(e,i,n){var i={content:{message:"object"==typeof i?i.message:i,title:i.title?i.title:"",icon:i.icon?i.icon:"",url:i.url?i.url:"#",target:i.target?i.target:"-"}};n=t.extend(!0,{},i,n),this.settings=t.extend(!0,{},s,n),this._defaults=s,"-"==this.settings.content.target&&(this.settings.content.target=this.settings.url_target),this.animations={start:"webkitAnimationStart oanimationstart MSAnimationStart animationstart",end:"webkitAnimationEnd oanimationend MSAnimationEnd animationend"},"number"==typeof this.settings.offset&&(this.settings.offset={x:this.settings.offset,y:this.settings.offset}),this.init()}var s={element:"body",position:null,type:"info",allow_dismiss:!0,newest_on_top:!1,showProgressbar:!1,placement:{from:"top",align:"right"},offset:20,spacing:10,z_index:1031,delay:5e3,timer:1e3,url_target:"_blank",mouse_over:null,animate:{enter:"animated fadeInDown",exit:"animated fadeOutUp"},onShow:null,onShown:null,onClose:null,onClosed:null,icon_type:"class",template:''};String.format=function(){for(var t=arguments[0],e=1;e .progress-bar').removeClass("progress-bar-"+t.settings.type),t.settings.type=i[e],this.$ele.addClass("alert-"+i[e]).find('[data-notify="progressbar"] > .progress-bar').addClass("progress-bar-"+i[e]);break;case"icon":var n=this.$ele.find('[data-notify="icon"]');"class"==t.settings.icon_type.toLowerCase()?n.removeClass(t.settings.content.icon).addClass(i[e]):(n.is("img")||n.find("img"),n.attr("src",i[e]));break;case"progress":var a=t.settings.delay-t.settings.delay*(i[e]/100);this.$ele.data("notify-delay",a),this.$ele.find('[data-notify="progressbar"] > div').attr("aria-valuenow",i[e]).css("width",i[e]+"%");break;case"url":this.$ele.find('[data-notify="url"]').attr("href",i[e]);break;case"target":this.$ele.find('[data-notify="url"]').attr("target",i[e]);break;default:this.$ele.find('[data-notify="'+e+'"]').html(i[e])}var o=this.$ele.outerHeight()+parseInt(t.settings.spacing)+parseInt(t.settings.offset.y);t.reposition(o)},close:function(){t.close()}}},buildNotify:function(){var e=this.settings.content;this.$ele=t(String.format(this.settings.template,this.settings.type,e.title,e.message,e.url,e.target)),this.$ele.attr("data-notify-position",this.settings.placement.from+"-"+this.settings.placement.align),this.settings.allow_dismiss||this.$ele.find('[data-notify="dismiss"]').css("display","none"),(this.settings.delay<=0&&!this.settings.showProgressbar||!this.settings.showProgressbar)&&this.$ele.find('[data-notify="progressbar"]').remove()},setIcon:function(){"class"==this.settings.icon_type.toLowerCase()?this.$ele.find('[data-notify="icon"]').addClass(this.settings.content.icon):this.$ele.find('[data-notify="icon"]').is("img")?this.$ele.find('[data-notify="icon"]').attr("src",this.settings.content.icon):this.$ele.find('[data-notify="icon"]').append('Notify Icon')},styleURL:function(){this.$ele.find('[data-notify="url"]').css({backgroundImage:"url(data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7)",height:"100%",left:"0px",position:"absolute",top:"0px",width:"100%",zIndex:this.settings.z_index+1}),this.$ele.find('[data-notify="dismiss"]').css({position:"absolute",right:"10px",top:"5px",zIndex:this.settings.z_index+2})},placement:function(){var e=this,s=this.settings.offset.y,i={display:"inline-block",margin:"0px auto",position:this.settings.position?this.settings.position:"body"===this.settings.element?"fixed":"absolute",transition:"all .5s ease-in-out",zIndex:this.settings.z_index},n=!1,a=this.settings;switch(t('[data-notify-position="'+this.settings.placement.from+"-"+this.settings.placement.align+'"]:not([data-closing="true"])').each(function(){return s=Math.max(s,parseInt(t(this).css(a.placement.from))+parseInt(t(this).outerHeight())+parseInt(a.spacing))}),1==this.settings.newest_on_top&&(s=this.settings.offset.y),i[this.settings.placement.from]=s+"px",this.settings.placement.align){case"left":case"right":i[this.settings.placement.align]=this.settings.offset.x+"px";break;case"center":i.left=0,i.right=0}this.$ele.css(i).addClass(this.settings.animate.enter),t(this.settings.element).append(this.$ele),1==this.settings.newest_on_top&&(s=parseInt(s)+parseInt(this.settings.spacing)+this.$ele.outerHeight(),this.reposition(s)),t.isFunction(e.settings.onShow)&&e.settings.onShow.call(this.$ele),this.$ele.one(this.animations.start,function(){n=!0}).one(this.animations.end,function(){t.isFunction(e.settings.onShown)&&e.settings.onShown.call(this)}),setTimeout(function(){n||t.isFunction(e.settings.onShown)&&e.settings.onShown.call(this)},600)},bind:function(){var e=this;if(this.$ele.find('[data-notify="dismiss"]').on("click",function(){e.close()}),this.$ele.mouseover(function(){t(this).data("data-hover","true")}).mouseout(function(){t(this).data("data-hover","false")}),this.$ele.data("data-hover","false"),this.settings.delay>0){e.$ele.data("notify-delay",e.settings.delay);var s=setInterval(function(){var t=parseInt(e.$ele.data("notify-delay"))-e.settings.timer;if("false"===e.$ele.data("data-hover")&&"pause"==e.settings.mouse_over||"pause"!=e.settings.mouse_over){var i=(e.settings.delay-t)/e.settings.delay*100;e.$ele.data("notify-delay",t),e.$ele.find('[data-notify="progressbar"] > div').attr("aria-valuenow",i).css("width",i+"%")}t<=-e.settings.timer&&(clearInterval(s),e.close())},e.settings.timer)}},close:function(){var e=this,s=parseInt(this.$ele.css(this.settings.placement.from)),i=!1;this.$ele.data("closing","true").addClass(this.settings.animate.exit),e.reposition(s),t.isFunction(e.settings.onClose)&&e.settings.onClose.call(this.$ele),this.$ele.one(this.animations.start,function(){i=!0}).one(this.animations.end,function(){t(this).remove(),t.isFunction(e.settings.onClosed)&&e.settings.onClosed.call(this)}),setTimeout(function(){i||(e.$ele.remove(),e.settings.onClosed&&e.settings.onClosed(e.$ele))},600)},reposition:function(e){var s=this,i='[data-notify-position="'+this.settings.placement.from+"-"+this.settings.placement.align+'"]:not([data-closing="true"])',n=this.$ele.nextAll(i);1==this.settings.newest_on_top&&(n=this.$ele.prevAll(i)),n.each(function(){t(this).css(s.settings.placement.from,e),e=parseInt(e)+parseInt(s.settings.spacing)+t(this).outerHeight()})}}),t.notify=function(t,s){var i=new e(this,t,s);return i.notify},t.notifyDefaults=function(e){return s=t.extend(!0,{},s,e)},t.notifyClose=function(e){"undefined"==typeof e||"all"==e?t("[data-notify]").find('[data-notify="dismiss"]').trigger("click"):t('[data-notify-position="'+e+'"]').find('[data-notify="dismiss"]').trigger("click")}}); -------------------------------------------------------------------------------- /spark-job-rest/src/main/resources/webapp/js/spin.min.js: -------------------------------------------------------------------------------- 1 | //fgnass.github.com/spin.js#v2.1.0 2 | !function(a,b){"object"==typeof exports?module.exports=b():"function"==typeof define&&define.amd?define(b):a.Spinner=b()}(this,function(){"use strict";function a(a,b){var c,d=document.createElement(a||"div");for(c in b)d[c]=b[c];return d}function b(a){for(var b=1,c=arguments.length;c>b;b++)a.appendChild(arguments[b]);return a}function c(a,b,c,d){var e=["opacity",b,~~(100*a),c,d].join("-"),f=.01+c/d*100,g=Math.max(1-(1-a)/b*(100-f),a),h=j.substring(0,j.indexOf("Animation")).toLowerCase(),i=h&&"-"+h+"-"||"";return m[e]||(k.insertRule("@"+i+"keyframes "+e+"{0%{opacity:"+g+"}"+f+"%{opacity:"+a+"}"+(f+.01)+"%{opacity:1}"+(f+b)%100+"%{opacity:"+a+"}100%{opacity:"+g+"}}",k.cssRules.length),m[e]=1),e}function d(a,b){var c,d,e=a.style;for(b=b.charAt(0).toUpperCase()+b.slice(1),d=0;d',c)}k.addRule(".spin-vml","behavior:url(#default#VML)"),h.prototype.lines=function(a,d){function f(){return e(c("group",{coordsize:k+" "+k,coordorigin:-j+" "+-j}),{width:k,height:k})}function h(a,h,i){b(m,b(e(f(),{rotation:360/d.lines*a+"deg",left:~~h}),b(e(c("roundrect",{arcsize:d.corners}),{width:j,height:d.scale*d.width,left:d.scale*d.radius,top:-d.scale*d.width>>1,filter:i}),c("fill",{color:g(d.color,a),opacity:d.opacity}),c("stroke",{opacity:0}))))}var i,j=d.scale*(d.length+d.width),k=2*d.scale*j,l=-(d.width+d.length)*d.scale*2+"px",m=e(f(),{position:"absolute",top:l,left:l});if(d.shadow)for(i=1;i<=d.lines;i++)h(i,-2,"progid:DXImageTransform.Microsoft.Blur(pixelradius=2,makeshadow=1,shadowopacity=.3)");for(i=1;i<=d.lines;i++)h(i);return b(a,m)},h.prototype.opacity=function(a,b,c,d){var e=a.firstChild;d=d.shadow&&d.lines||0,e&&b+d>1)+"px"})}for(var i,k=0,l=(f.lines-1)*(1-f.direction)/2;k 10 | private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = { 11 | val headers: List[HttpHeader] = List( 12 | HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))), 13 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 14 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid") 15 | ) ++ rh.toList 16 | 17 | respondWithHeaders(headers) 18 | } 19 | private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = { 20 | val headers: List[HttpHeader] = List( 21 | HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins), 22 | HttpHeaders.`Access-Control-Allow-Credentials`(true), 23 | HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept"), 24 | HttpHeaders.`Access-Control-Allow-Methods`(HttpMethods.DELETE, HttpMethods.GET, HttpMethods.POST ) 25 | ) ++ rh.toList 26 | 27 | respondWithHeaders(headers) 28 | } 29 | 30 | def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) = 31 | if (origins.contains("*")) 32 | respondWithCORSHeadersAllOrigins(rh)(route) 33 | else 34 | optionalHeaderValueByName("Origin") { 35 | case None => 36 | route 37 | case Some(clientOrigin) => { 38 | if (origins.contains(clientOrigin)) 39 | respondWithCORSHeaders(clientOrigin, rh)(route) 40 | else { 41 | // Maybe, a Rejection will fit better 42 | complete(StatusCodes.Forbidden, "Invalid origin") 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/Controller.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.{ActorRef, ActorSystem} 4 | import akka.pattern.ask 5 | import akka.util.Timeout 6 | import com.typesafe.config.{Config, ConfigFactory} 7 | import org.slf4j.LoggerFactory 8 | import responses._ 9 | import server.domain.actors.ContextActor.FailedInit 10 | import server.domain.actors.ContextManagerActor._ 11 | import server.domain.actors.JarActor._ 12 | import server.domain.actors.JobActor._ 13 | import server.domain.actors.getValueFromConfig 14 | import spray.http._ 15 | import spray.httpx.SprayJsonSupport.sprayJsonMarshaller 16 | import spray.routing.{Route, SimpleRoutingApp} 17 | 18 | import scala.concurrent.ExecutionContext.Implicits.global 19 | import scala.concurrent.duration._ 20 | import scala.util.{Failure, Success, Try} 21 | 22 | /** 23 | * Spark-Job-REST HTTP service for Web UI and REST API. 24 | */ 25 | class Controller(config: Config, contextManagerActor: ActorRef, jobManagerActor: ActorRef, jarActor: ActorRef, originalSystem: ActorSystem) 26 | extends SimpleRoutingApp with CORSDirectives{ 27 | 28 | implicit val system = originalSystem 29 | implicit val timeout: Timeout = 60 seconds 30 | 31 | val log = LoggerFactory.getLogger(getClass) 32 | log.info("Starting web service.") 33 | 34 | var StateKey = "state" 35 | var ResultKey = "result" 36 | 37 | // Get ip from config, "0.0.0.0" as default 38 | val webIp = getValueFromConfig(config, "appConf.web.services.ip", "0.0.0.0") 39 | val webPort = getValueFromConfig(config, "appConf.web.services.port", 8097) 40 | 41 | val route = jobRoute ~ contextRoute ~ indexRoute ~ jarRoute 42 | 43 | startServer(webIp, webPort) (route) map { 44 | case bound => log.info(s"Started web service: $bound") 45 | } onFailure { 46 | case e: Exception => 47 | log.error("Failed to start Spark-Job-REST web service", e) 48 | throw e 49 | } 50 | 51 | def indexRoute: Route = pathPrefix(""){ 52 | pathEnd { 53 | get { 54 | getFromResource("webapp/index.html") 55 | } 56 | } ~ 57 | options { 58 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 59 | complete { 60 | "OK" 61 | } 62 | } 63 | } 64 | } ~ 65 | pathPrefix("assets"){ 66 | get { 67 | getFromResourceDirectory("webapp/assets") 68 | } ~ options { 69 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 70 | complete { 71 | "OK" 72 | } 73 | } 74 | } 75 | } ~ 76 | pathPrefix("js"){ 77 | get { 78 | getFromResourceDirectory("webapp/js") 79 | } ~ options { 80 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 81 | complete { 82 | "OK" 83 | } 84 | } 85 | } 86 | } ~ 87 | path("hearbeat") { 88 | get { 89 | complete { 90 | "Spark Job Rest is up and running!" 91 | } ~ options { 92 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) { 93 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 94 | ctx.complete(StatusCodes.OK) 95 | } 96 | } 97 | } 98 | } 99 | } 100 | 101 | 102 | def jobRoute: Route = pathPrefix("jobs"){ 103 | pathEnd { 104 | get { 105 | corsFilter(List("*")) { 106 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 107 | val resultFuture = jobManagerActor ? GetAllJobsStatus() 108 | resultFuture.map { 109 | case jobs: Jobs => ctx.complete(StatusCodes.OK, jobs) 110 | case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 111 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 112 | } 113 | } 114 | } 115 | } 116 | } ~ 117 | get { 118 | path(Segment) { jobId => 119 | parameters('contextName) { contextName => 120 | corsFilter(List("*")) { 121 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 122 | val resultFuture = jobManagerActor ? JobStatusEnquiry(contextName, jobId) 123 | resultFuture.map { 124 | case job:Job => ctx.complete(StatusCodes.OK, job) 125 | case JobDoesNotExist() => ctx.complete(StatusCodes.BadRequest, ErrorResponse("JobId does not exist!")) 126 | case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Context does not exist!")) 127 | case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 128 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 129 | } 130 | } 131 | } 132 | } 133 | } 134 | } ~ 135 | post { 136 | parameters('runningClass, 'contextName) { (runningClass, context) => 137 | entity(as[String]) { configString => 138 | corsFilter(List("*")) { 139 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 140 | Try{ 141 | ConfigFactory.parseString(configString) 142 | } match { 143 | case Success(requestConfig) => 144 | val resultFuture = jobManagerActor ? RunJob(runningClass, context, requestConfig) 145 | resultFuture.map { 146 | case job: Job => ctx.complete(StatusCodes.OK, job) 147 | case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context.")) 148 | case e: Exception => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 149 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 150 | } 151 | case Failure(e) => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Invalid parameter: " + e.getMessage)) 152 | } 153 | } 154 | } 155 | } 156 | } 157 | } ~ 158 | options { 159 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST))) { 160 | complete { 161 | "OK" 162 | } 163 | } 164 | } 165 | 166 | } 167 | def contextRoute : Route = pathPrefix("contexts"){ 168 | post { 169 | path(Segment) { contextName => 170 | entity(as[String]) { configString => 171 | corsFilter(List("*")) { 172 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 173 | Try{ 174 | ConfigFactory.parseString(configString) 175 | } match { 176 | case Success(requestConfig) => 177 | val resultFuture = contextManagerActor ? CreateContext(contextName, getValueFromConfig(requestConfig, "jars", ""), requestConfig) 178 | resultFuture.map { 179 | case context:Context => ctx.complete(StatusCodes.OK, context) 180 | case e: FailedInit => ctx.complete(StatusCodes.InternalServerError, ErrorResponse("Failed Init: " + e.message)) 181 | case ContextAlreadyExists => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Context already exists.")) 182 | case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 183 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 184 | } 185 | case Failure(e) => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Invalid parameters: " + e.getMessage)) 186 | } 187 | 188 | 189 | } 190 | } 191 | } 192 | } 193 | } ~ 194 | get { 195 | path(Segment) { contextName => 196 | corsFilter(List("*")) { 197 | val resultFuture = contextManagerActor ? GetContextInfo(contextName) 198 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 199 | resultFuture.map { 200 | case context: Context => ctx.complete(StatusCodes.OK, context) 201 | case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context.")) 202 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 203 | } 204 | } 205 | } 206 | } 207 | } ~ 208 | pathEnd { 209 | get { 210 | corsFilter(List("*")) { 211 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 212 | val resultFuture = contextManagerActor ? GetAllContextsForClient() 213 | resultFuture.map { 214 | case contexts: Contexts => ctx.complete(StatusCodes.OK, contexts) 215 | case e: Exception => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 216 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 217 | } 218 | } 219 | } 220 | } 221 | } ~ 222 | delete { 223 | path(Segment) { contextName => 224 | corsFilter(List("*")) { 225 | val resultFuture = contextManagerActor ? DeleteContext(contextName) 226 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 227 | resultFuture.map { 228 | case Success => ctx.complete(StatusCodes.OK, SimpleMessage("Context deleted.")) 229 | case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context.")) 230 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 231 | } 232 | } 233 | } 234 | } 235 | } ~ options { 236 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST, HttpMethods.DELETE))) { 237 | complete { 238 | "OK" 239 | } 240 | } 241 | } 242 | 243 | } 244 | 245 | def jarRoute : Route = pathPrefix("jars"){ 246 | post { 247 | path(Segment) { jarName => 248 | entity(as[Array[Byte]]) { jarBytes => 249 | corsFilter(List("*")) { 250 | val resultFuture = jarActor ? AddJar(jarName, jarBytes) 251 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 252 | resultFuture.map { 253 | case Success(jarInfo: JarInfo) => ctx.complete(StatusCodes.OK, jarInfo) 254 | case Failure(e) => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 255 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 256 | } 257 | } 258 | } 259 | } 260 | } ~ 261 | pathEnd { 262 | entity(as[MultipartFormData]) { formData => 263 | corsFilter(List("*")) { 264 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 265 | formData.fields.foreach { 266 | case bodyPart: BodyPart => 267 | val resultFuture = jarActor ? AddJar(bodyPart.filename.get, bodyPart.entity.data.toByteArray) 268 | resultFuture.map { 269 | case Success(jarInfo: JarInfo) => ctx.complete(StatusCodes.OK, jarInfo) 270 | case Failure(e) => 271 | log.error("Error uploading jar: ", e) 272 | ctx.complete(StatusCodes.BadRequest, "") 273 | case x: Any => ctx.complete(StatusCodes.InternalServerError, "") 274 | // TODO: Message is empty due to a bug on the Ui File Upload part. When fixed used ErrorResponse(e.getMessage) 275 | } 276 | } 277 | } 278 | } 279 | } 280 | } 281 | } ~ 282 | delete { 283 | path(Segment) { jarName => 284 | corsFilter(List("*")) { 285 | val resultFuture = jarActor ? DeleteJar(jarName) 286 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 287 | resultFuture.map { 288 | case Success(message: String) => ctx.complete(StatusCodes.OK, SimpleMessage(message)) 289 | case NoSuchJar() => ctx.complete(StatusCodes.BadRequest,ErrorResponse("No such jar.")) 290 | case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 291 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 292 | } 293 | } 294 | } 295 | } 296 | } ~ 297 | pathEnd { 298 | get { 299 | corsFilter(List("*")) { 300 | respondWithMediaType(MediaTypes.`application/json`) { ctx => 301 | val future = jarActor ? GetAllJars() 302 | future.map { 303 | case jarsInfo: JarsInfo => ctx.complete(StatusCodes.OK, jarsInfo) 304 | case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage)) 305 | case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString)) 306 | } 307 | } 308 | } 309 | } 310 | } ~ options { 311 | corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST, HttpMethods.DELETE))) { 312 | complete { 313 | "OK" 314 | } 315 | } 316 | } 317 | } 318 | 319 | } 320 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/Main.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.{ActorRef, ActorSystem, Props} 4 | import akka.pattern.ask 5 | import com.typesafe.config.ConfigFactory 6 | import logging.LoggingOutputStream 7 | import server.domain.actors._ 8 | 9 | import scala.concurrent.Await 10 | 11 | /** 12 | * Spark-Job-REST entry point. 13 | */ 14 | object Main { 15 | def main(args: Array[String]) { 16 | 17 | LoggingOutputStream.redirectConsoleOutput 18 | 19 | // Loads deployment configuration `deploy.conf` on top of application defaults `application.conf` 20 | val defaultConfig = ConfigFactory.load("deploy").withFallback(ConfigFactory.load()) 21 | 22 | val masterConfig = defaultConfig.getConfig("manager") 23 | val system = ActorSystem("ManagerSystem", masterConfig) 24 | 25 | val supervisor = system.actorOf(Props(classOf[Supervisor]), "Supervisor") 26 | 27 | val jarActor = createActor(Props(new JarActor(defaultConfig)), "JarActor", system, supervisor) 28 | val contextManagerActor = createActor(Props(new ContextManagerActor(defaultConfig, jarActor)), "ContextManager", system, supervisor) 29 | val jobManagerActor = createActor(Props(new JobActor(defaultConfig, contextManagerActor)), "JobManager", system, supervisor) 30 | new Controller(defaultConfig, contextManagerActor, jobManagerActor, jarActor, system) 31 | } 32 | 33 | def createActor(props: Props, name: String, customSystem: ActorSystem, supervisor: ActorRef): ActorRef = { 34 | val actorRefFuture = ask(supervisor, (props, name)) 35 | Await.result(actorRefFuture, timeout.duration).asInstanceOf[ActorRef] 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/MainContext.scala: -------------------------------------------------------------------------------- 1 | package server 2 | 3 | import akka.actor.{ActorSystem, Props} 4 | import com.typesafe.config.ConfigFactory 5 | import logging.LoggingOutputStream 6 | import org.slf4j.LoggerFactory 7 | import server.domain.actors.ContextActor 8 | import utils.ActorUtils 9 | 10 | /** 11 | * Spark context container entry point. 12 | */ 13 | object MainContext { 14 | 15 | LoggingOutputStream.redirectConsoleOutput 16 | val log = LoggerFactory.getLogger(getClass) 17 | 18 | def main(args: Array[String]) { 19 | val contextName = args(0) 20 | val port = args(1).toInt 21 | 22 | log.info(s"Started new process for contextName = $contextName with port = $port") 23 | 24 | val defaultConfig = ConfigFactory.load("deploy").withFallback(ConfigFactory.load()) 25 | val config = ActorUtils.remoteConfig("localhost", port, defaultConfig) 26 | val system = ActorSystem(ActorUtils.PREFIX_CONTEXT_SYSTEM + contextName, config) 27 | 28 | system.actorOf(Props(new ContextActor(defaultConfig)), ActorUtils.PREFIX_CONTEXT_ACTOR + contextName) 29 | 30 | log.info(s"Initialized system ${ActorUtils.PREFIX_CONTEXT_SYSTEM}$contextName and actor ${ActorUtils.PREFIX_CONTEXT_SYSTEM}$contextName") 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/ContextActor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import akka.actor.{Actor, Terminated} 4 | import api._ 5 | import com.google.gson.Gson 6 | import com.typesafe.config.{Config, ConfigValueFactory} 7 | import context.JobContextFactory 8 | import org.apache.commons.lang.exception.ExceptionUtils 9 | import org.slf4j.LoggerFactory 10 | import responses.{Job, JobStates} 11 | import server.domain.actors.ContextActor._ 12 | import server.domain.actors.JobActor._ 13 | import utils.ActorUtils 14 | 15 | import scala.collection.JavaConverters._ 16 | import scala.collection.mutable 17 | import scala.concurrent.ExecutionContext.Implicits.global 18 | import scala.concurrent.Future 19 | import scala.util.{Failure, Success, Try} 20 | 21 | /** 22 | * Context actor messages 23 | */ 24 | object ContextActor { 25 | case class Initialize(contextName: String, config: Config, jarsForSpark: List[String]) 26 | case class Initialized() 27 | case class FailedInit(message: String) 28 | case class ShutDown() 29 | } 30 | 31 | /** 32 | * Context actor responsible for creation and managing Spark Context 33 | * @param localConfig config of the context application 34 | */ 35 | class ContextActor(localConfig: Config) extends Actor { 36 | import context.become 37 | 38 | val log = LoggerFactory.getLogger(getClass) 39 | var jobContext: ContextLike = _ 40 | var defaultConfig: Config = _ 41 | var jobStateMap = new mutable.HashMap[String, JobStatus]() with mutable.SynchronizedMap[String, JobStatus] 42 | 43 | var name = "" 44 | val gsonTransformer = new Gson() 45 | 46 | startWatchingManagerActor() 47 | 48 | /** 49 | * Initial actor mode when it responds to IsAwake message and can be initialized 50 | * @return 51 | */ 52 | override def receive: Receive = { 53 | case ContextManagerActor.IsAwake => 54 | sender ! ContextManagerActor.IsAwake 55 | 56 | case Initialize(contextName, config, jarsForSpark) => 57 | log.info(s"Received InitializeContext message : contextName=$contextName") 58 | log.info("Initializing context " + contextName) 59 | name = contextName 60 | 61 | try { 62 | defaultConfig = config.withValue("context.jars", ConfigValueFactory.fromAnyRef(jarsForSpark.asJava)) 63 | jobContext = JobContextFactory.makeContext(defaultConfig, name) 64 | 65 | sender ! Initialized() 66 | log.info("Successfully initialized context " + contextName) 67 | } catch { 68 | case e: Exception => 69 | log.error("Exception while initializing", e) 70 | sender ! FailedInit(ExceptionUtils.getStackTrace(e)) 71 | gracefullyShutdown() 72 | } 73 | 74 | become(initialized) 75 | } 76 | 77 | /** 78 | * Main actor mode when it can run jobs 79 | * @return 80 | */ 81 | def initialized: Receive = { 82 | case ShutDown() => 83 | log.info(s"Context received ShutDown message : contextName=$name") 84 | log.info(s"Shutting down SparkContext $name") 85 | 86 | gracefullyShutdown() 87 | 88 | case RunJob(runningClass, contextName, jobConfig, uuid) => 89 | log.info(s"Received RunJob message : runningClass=$runningClass contextName=$contextName uuid=$uuid ") 90 | jobStateMap += (uuid -> JobStarted()) 91 | 92 | Future { 93 | Try { 94 | val classLoader = Thread.currentThread.getContextClassLoader 95 | val runnableClass = classLoader.loadClass(runningClass) 96 | val sparkJob = runnableClass.newInstance.asInstanceOf[SparkJobBase] 97 | 98 | jobContext.validateJob(sparkJob) match { 99 | case SparkJobValid() => log.info(s"Job $uuid passed context validation.") 100 | case SparkJobInvalid(message) => throw new IllegalArgumentException(s"Invalid job $uuid: $message") 101 | } 102 | 103 | val jobConfigValidation = sparkJob.validate(jobContext.asInstanceOf[sparkJob.C], jobConfig.withFallback(defaultConfig)) 104 | jobConfigValidation match { 105 | case SparkJobInvalid(message) => throw new IllegalArgumentException(message) 106 | case SparkJobValid() => log.info("Job config validation passed.") 107 | } 108 | 109 | sparkJob.runJob(jobContext.asInstanceOf[sparkJob.C], jobConfig.withFallback(defaultConfig)) 110 | } 111 | } andThen { 112 | case Success(futureResult) => futureResult match { 113 | case Success(result) => 114 | log.info(s"Finished running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ") 115 | jobStateMap += (uuid -> JobRunSuccess(gsonTransformer.toJson(result))) 116 | case Failure(e: Throwable) => 117 | jobStateMap += (uuid -> JobRunError(ExceptionUtils.getStackTrace(e))) 118 | log.error(s"Error running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ", e) 119 | case x: Any => 120 | log.error("Received ANY from running job !!! " + x) 121 | } 122 | 123 | case Failure(e: Throwable) => 124 | jobStateMap += (uuid -> JobRunError(ExceptionUtils.getStackTrace(e))) 125 | log.error(s"Error running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ", e) 126 | 127 | case x: Any => 128 | log.error("Received ANY from running job !!! " + x) 129 | } 130 | 131 | case Terminated(actor) => 132 | if (actor.path.toString.contains("Supervisor/ContextManager")) { 133 | log.info(s"Received Terminated message from: ${actor.path.toString}") 134 | log.warn("Shutting down the system because the ManagerSystem terminated.") 135 | gracefullyShutdown() 136 | } 137 | 138 | case JobStatusEnquiry(contextName, jobId) => 139 | val jobState = jobStateMap.getOrElse(jobId, JobDoesNotExist()) 140 | import JobStates._ 141 | jobState match { 142 | case x: JobRunSuccess => sender ! Job(jobId, name, FINISHED.toString, x.result, x.startTime) 143 | case e: JobRunError => sender ! Job(jobId, name, ERROR.toString, e.errorMessage, e.startTime) 144 | case x: JobStarted => sender ! Job(jobId, name, RUNNING.toString, "", x.startTime) 145 | case x: JobDoesNotExist => sender ! JobDoesNotExist 146 | } 147 | 148 | case GetAllJobsStatus() => 149 | import JobStates._ 150 | val jobsList = jobStateMap.map { 151 | case (id: String, x: JobRunSuccess) => Job(id, name, FINISHED.toString, x.result, x.startTime) 152 | case (id: String, e: JobRunError) => Job(id, name, ERROR.toString, e.errorMessage, e.startTime) 153 | case (id: String, x: JobStarted) => Job(id, name, RUNNING.toString, "", x.startTime) 154 | }.toList 155 | sender ! jobsList 156 | 157 | case x @ _ => 158 | log.info(s"Received UNKNOWN message type $x") 159 | } 160 | 161 | def gracefullyShutdown() { 162 | Option(jobContext).foreach(_.stop()) 163 | context.system.shutdown() 164 | } 165 | 166 | def startWatchingManagerActor() = { 167 | val managerPort = getValueFromConfig(localConfig, ActorUtils.PORT_PROPERTY_NAME, 4042) 168 | val managerHost = getValueFromConfig(localConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1") 169 | log.info("Trying to watch the manager actor at : " + managerHost + ":" + managerPort) 170 | val managerActor = context.actorSelection(ActorUtils.getActorAddress("ManagerSystem", managerHost, managerPort, "Supervisor/ContextManager")) 171 | managerActor.resolveOne().onComplete { 172 | case Success(actorRef) => 173 | log.info(s"Now watching the ContextManager from this actor.") 174 | context.watch(actorRef) 175 | case x @ _ => log.info(s"Received message of type $x") 176 | } 177 | } 178 | } 179 | 180 | 181 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/ContextManagerActor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import java.util 4 | 5 | import akka.actor.{Actor, ActorRef, ActorSelection, Props} 6 | import akka.pattern.ask 7 | import com.typesafe.config.{Config, ConfigFactory} 8 | import org.apache.commons.lang.exception.ExceptionUtils 9 | import org.slf4j.LoggerFactory 10 | import responses.{Context, Contexts} 11 | import server.domain.actors.ContextManagerActor._ 12 | import server.domain.actors.JarActor.{GetJarsPathForAll, ResultJarsPathForAll} 13 | import utils.ActorUtils 14 | 15 | import scala.collection.mutable 16 | import scala.concurrent.ExecutionContext.Implicits.global 17 | import scala.concurrent.duration._ 18 | import scala.sys.process.{Process, ProcessBuilder} 19 | import scala.util.Success 20 | 21 | /** 22 | * Context management messages 23 | */ 24 | object ContextManagerActor { 25 | case class CreateContext(contextName: String, jars: String, config: Config) 26 | case class ContextInitialized(port: String) 27 | case class DeleteContext(contextName: String) 28 | case class ContextProcessTerminated(contextName: String, statusCode: Int) 29 | case class GetContext(contextName: String) 30 | case class GetContextInfo(contextName: String) 31 | case class GetAllContextsForClient() 32 | case class GetAllContexts() 33 | case class NoSuchContext() 34 | case class ContextAlreadyExists() 35 | case class DestroyProcess(process: Process) 36 | case class IsAwake() 37 | case class ContextInfo(contextName: String, sparkUiPort: String, @transient referenceActor: ActorSelection) 38 | } 39 | 40 | /** 41 | * Actor that creates, monitors and destroys contexts and corresponding processes. 42 | * @param defaultConfig configuration defaults 43 | * @param jarActor actor that responsible for jars which may be included to context classpath 44 | */ 45 | class ContextManagerActor(defaultConfig: Config, jarActor: ActorRef) extends Actor { 46 | 47 | val log = LoggerFactory.getLogger(getClass) 48 | 49 | var lastUsedPort = getValueFromConfig(defaultConfig, "appConf.actor.systems.first.port", 11000) 50 | var lastUsedPortSparkUi = getValueFromConfig(defaultConfig, "appConf.spark.ui.first.port", 16000) 51 | 52 | val contextMap = new mutable.HashMap[String, ContextInfo]() with mutable.SynchronizedMap[String, ContextInfo] 53 | val processMap = new mutable.HashMap[String, ActorRef]() with mutable.SynchronizedMap[String, ActorRef] 54 | 55 | val sparkUIConfigPath: String = "spark.ui.port" 56 | 57 | override def receive: Receive = { 58 | case CreateContext(contextName, jars, config) => 59 | if (contextMap contains contextName) { 60 | sender ! ContextAlreadyExists 61 | } else if (jars.isEmpty) { 62 | sender ! ContextActor.FailedInit("jars property is not defined or is empty.") 63 | } else { 64 | //adding the default configs 65 | var mergedConfig = config.withFallback(defaultConfig) 66 | 67 | //The port for the actor system 68 | val port = ActorUtils.findAvailablePort(lastUsedPort) 69 | lastUsedPort = port 70 | 71 | //If not defined, setting the spark.ui port 72 | if (!config.hasPath(sparkUIConfigPath)) { 73 | mergedConfig = addSparkUiPortToConfig(mergedConfig) 74 | } 75 | 76 | val webSender = sender() 77 | log.info(s"Received CreateContext message : context=$contextName jars=$jars") 78 | 79 | val jarsFuture = jarActor ? GetJarsPathForAll(jars, contextName) 80 | jarsFuture map { 81 | case result @ ResultJarsPathForAll(pathForClasspath, pathForSpark) => 82 | log.info(s"Received jars path: $result") 83 | val processBuilder = createProcessBuilder(contextName, port, pathForClasspath, mergedConfig) 84 | val command = processBuilder.toString 85 | log.info(s"Starting new process for context $contextName: '$command'") 86 | val processActor = context.actorOf(Props(classOf[ContextProcessActor], processBuilder, contextName)) 87 | processMap += contextName -> processActor 88 | 89 | val host = getValueFromConfig(defaultConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1") 90 | val actorRef = context.actorSelection(ActorUtils.getContextActorAddress(contextName, host, port)) 91 | sendInitMessage(contextName, port, actorRef, webSender, mergedConfig, pathForSpark) 92 | } onFailure { 93 | case e: Exception => 94 | log.error(s"Failed! ${ExceptionUtils.getStackTrace(e)}") 95 | webSender ! e 96 | } 97 | } 98 | 99 | case DeleteContext(contextName) => 100 | log.info(s"Received DeleteContext message : context=$contextName") 101 | if (contextMap contains contextName) { 102 | for ( 103 | contextInfo <- contextMap remove contextName; 104 | processRef <- processMap remove contextName 105 | ) { 106 | contextInfo.referenceActor ! ContextActor.ShutDown() 107 | sender ! Success 108 | 109 | // Terminate process 110 | processRef ! ContextProcessActor.Terminate() 111 | } 112 | } else { 113 | sender ! NoSuchContext 114 | } 115 | 116 | case ContextProcessTerminated(contextName, statusCode) => 117 | log.info(s"Received ContextProcessTerminated message : context=$contextName, statusCode=$statusCode") 118 | contextMap remove contextName foreach { 119 | case contextInfo: ContextInfo => 120 | log.error(s"Removing context $contextName due to corresponding process exit with status code $statusCode") 121 | contextInfo.referenceActor ! DeleteContext(contextName) 122 | } 123 | 124 | case GetContext(contextName) => 125 | log.info(s"Received GetContext message : context=$contextName") 126 | if (contextMap contains contextName) { 127 | sender ! contextMap(contextName).referenceActor 128 | } else { 129 | sender ! NoSuchContext 130 | } 131 | 132 | case GetContextInfo(contextName) => 133 | log.info(s"Received GetContext message : context=$contextName") 134 | if (contextMap contains contextName) { 135 | sender ! Context(contextName, contextMap(contextName).sparkUiPort) 136 | } else { 137 | sender ! NoSuchContext 138 | } 139 | 140 | case GetAllContextsForClient() => 141 | log.info(s"Received GetAllContexts message.") 142 | sender ! Contexts(contextMap.values.map(contextInfo => Context(contextInfo.contextName, contextInfo.sparkUiPort)).toArray) 143 | 144 | case GetAllContexts() => 145 | sender ! contextMap.values.map(_.referenceActor) 146 | log.info(s"Received GetAllContexts message.") 147 | } 148 | 149 | def sendInitMessage(contextName: String, port: Int, actorRef: ActorSelection, sender: ActorRef, config: Config, jarsForSpark: List[String]): Unit = { 150 | 151 | val sleepTime = getValueFromConfig(config, "appConf.init.sleep", 3000) 152 | val tries = config.getInt("appConf.init.tries") 153 | val retryTimeOut = config.getLong("appConf.init.retry-timeout") millis 154 | val retryInterval = config.getLong("appConf.init.retry-interval") millis 155 | val sparkUiPort = config.getString(sparkUIConfigPath) 156 | 157 | context.system.scheduler.scheduleOnce(sleepTime millis) { 158 | val isAwakeFuture = context.actorOf(ReTry.props(tries, retryTimeOut, retryInterval, actorRef)) ? IsAwake 159 | isAwakeFuture.map { 160 | case isAwake => 161 | log.info(s"Remote context actor is awaken: $isAwake") 162 | val initializationFuture = actorRef ? ContextActor.Initialize(contextName, config, jarsForSpark) 163 | initializationFuture map { 164 | case success: ContextActor.Initialized => 165 | log.info(s"Context '$contextName' initialized: $success") 166 | contextMap += contextName -> ContextInfo(contextName, sparkUiPort, actorRef) 167 | sender ! Context(contextName, sparkUiPort) 168 | case error @ ContextActor.FailedInit(reason) => 169 | log.error(s"Init failed for context $contextName", reason) 170 | sender ! error 171 | processMap.remove(contextName).get ! ContextProcessActor.Terminate() 172 | } onFailure { 173 | case e: Exception => 174 | log.error("FAILED to send init message!", e) 175 | sender ! ContextActor.FailedInit(ExceptionUtils.getStackTrace(e)) 176 | processMap.remove(contextName).get ! ContextProcessActor.Terminate() 177 | } 178 | } onFailure { 179 | case e: Exception => 180 | log.error("Refused to wait for remote actor, consider it as dead!", e) 181 | sender ! ContextActor.FailedInit(ExceptionUtils.getStackTrace(e)) 182 | } 183 | } 184 | } 185 | 186 | def addSparkUiPortToConfig(config: Config): Config = { 187 | lastUsedPortSparkUi = ActorUtils.findAvailablePort(lastUsedPortSparkUi) 188 | val map = new util.HashMap[String, String]() 189 | map.put(sparkUIConfigPath, lastUsedPortSparkUi.toString) 190 | val newConf = ConfigFactory.parseMap(map) 191 | newConf.withFallback(config) 192 | } 193 | 194 | def createProcessBuilder(contextName: String, port: Int, jarsForClasspath: String, config: Config): ProcessBuilder = { 195 | val scriptPath = ContextManagerActor.getClass.getClassLoader.getResource("context_start.sh").getPath 196 | val xmxMemory = getValueFromConfig(config, "driver.xmxMemory", "1g") 197 | 198 | // Create context process directory 199 | val processDirName = new java.io.File(defaultConfig.getString("context.contexts-base-dir")).toString + s"/$contextName" 200 | 201 | Process(scriptPath, Seq(jarsForClasspath, contextName, port.toString, xmxMemory, processDirName)) 202 | } 203 | } 204 | 205 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/ContextProcessActor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import akka.actor.Actor 4 | import org.slf4j.LoggerFactory 5 | 6 | import scala.concurrent.ExecutionContext.Implicits.global 7 | import scala.concurrent.duration._ 8 | import scala.sys.process.{Process, ProcessBuilder, ProcessLogger} 9 | 10 | object ContextProcessActor { 11 | case class Terminate() 12 | } 13 | 14 | class ContextProcessActor(processBuilder: ProcessBuilder, contextName: String) extends Actor { 15 | import ContextProcessActor._ 16 | 17 | val log = LoggerFactory.getLogger(s"$getClass::$contextName") 18 | 19 | class Slf4jProcessLogger extends ProcessLogger { 20 | def out(line: => String): Unit = log.info(line) 21 | def err(line: => String): Unit = log.error(line) 22 | def buffer[T](f: => T): T = f 23 | } 24 | 25 | val process: Process = processBuilder.run(new Slf4jProcessLogger) 26 | 27 | context.system.scheduler.scheduleOnce(1 seconds) { 28 | val statusCode = process.exitValue() 29 | 30 | if (statusCode < 0) { 31 | log.error(s"Context $contextName exit with error code $statusCode.") 32 | } else { 33 | log.info(s"Context process exit with status $statusCode") 34 | } 35 | 36 | context.parent ! ContextManagerActor.ContextProcessTerminated(contextName, statusCode) 37 | context.system.stop(self) 38 | } 39 | 40 | def receive: Receive = { 41 | case Terminate => 42 | log.info(s"Received Terminate message") 43 | context.system.scheduler.scheduleOnce(5 seconds) { 44 | process.destroy() 45 | context.system.stop(self) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/JarActor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import java.io.File 4 | 5 | import akka.actor.{Actor, ActorLogging} 6 | import com.typesafe.config.Config 7 | import org.slf4j.LoggerFactory 8 | import responses.{JarsInfo, JarInfo} 9 | import server.domain.actors.JarActor._ 10 | import utils.{FileUtils, JarUtils} 11 | import scala.collection.mutable.ListBuffer 12 | import scala.util.{Failure, Success, Try} 13 | 14 | /** 15 | * Created by raduc on 04/11/14. 16 | */ 17 | 18 | object JarActor { 19 | case class AddJar(jarName: String, bytes: Array[Byte]) 20 | case class NoSuchJar() 21 | case class DeleteJar(jarName: String) 22 | case class GetAllJars() 23 | case class GetAllJarsNames() 24 | case class GetJarsPathForClasspath(paths: String, contextName: String) 25 | case class GetJarsPathForSpark(paths: String) 26 | case class GetJarsPathForAll(paths: String, contextName: String) 27 | case class DeleteJarFolder() 28 | case class CreateJarFolder(overwrite: Boolean) 29 | case class JarFolderExists() 30 | case class ResultJarsPathForAll(pathForClasspath: String, pathForSpark: List[String]) 31 | 32 | 33 | 34 | val CLASSPATH_JAR_SEPARATOR = ":" 35 | val JAR_FOLDER_PROPERTY_PATH = "appConf.jars.path" 36 | 37 | 38 | } 39 | 40 | class JarActor(config: Config) extends Actor { 41 | 42 | val log = LoggerFactory.getLogger(getClass) 43 | 44 | val jarFolder = getValueFromConfig(config, JAR_FOLDER_PROPERTY_PATH, "") 45 | FileUtils.createFolder(jarFolder, false) 46 | 47 | override def receive: Receive = { 48 | case AddJar(jarName, bytes) => { 49 | log.info(s"Received AddJar request for jar $jarName") 50 | Try { 51 | if(!JarUtils.validateJar(bytes)){ 52 | log.error("Jar " + jarName + " is not valid!") 53 | throw new Exception("Jar " + jarName + " is not valid!") 54 | } 55 | FileUtils.writeToFile(jarName, jarFolder, bytes) 56 | } match { 57 | case Success(v) => { 58 | val fileJar = new File(jarFolder + File.separator + jarName) 59 | if(fileJar.exists()) { 60 | sender ! Success(JarInfo(jarName, fileJar.length(), fileJar.lastModified())) 61 | } else { 62 | sender ! Failure(new Exception("Jar was wrote to disk.")) 63 | } 64 | } 65 | case Failure(e) => sender ! Failure(e) 66 | } 67 | } 68 | case DeleteJar(jarName) => { 69 | val file = new File(jarFolder + File.separator + jarName) 70 | if(file.exists()){ 71 | file.delete() 72 | sender ! Success("Jar deleted.") 73 | } else { 74 | sender ! NoSuchJar() 75 | } 76 | } 77 | case GetAllJars() => { 78 | val folderJar = new File(jarFolder) 79 | val files = folderJar.listFiles() 80 | if(files != null){ 81 | val jarInfos = JarsInfo(files.map(jarFile => JarInfo(jarFile.getName, jarFile.length, jarFile.lastModified)).filter(_.name.endsWith(".jar"))) 82 | sender ! jarInfos 83 | } else { 84 | sender ! List() 85 | } 86 | } 87 | case GetAllJarsNames() => { 88 | val folderJar = new File(jarFolder) 89 | val files = folderJar.listFiles() 90 | if(files != null){ 91 | val jarNames = files.map(_.getName).filter(_.endsWith(".jar")).toList 92 | sender ! jarNames 93 | } else { 94 | sender ! List() 95 | } 96 | } 97 | case GetJarsPathForClasspath(path, contextName) => { 98 | 99 | Try { 100 | getJarsPathForClasspath(path, contextName) 101 | } match { 102 | case Success(path) => sender ! path 103 | case Failure(e) => sender ! e 104 | } 105 | 106 | } 107 | case GetJarsPathForSpark(path) => { 108 | Try { 109 | sender ! getJarsPathForSpark(path) 110 | } match { 111 | case Success(path) => sender ! path 112 | case Failure(e) => sender ! e 113 | } 114 | } 115 | 116 | case GetJarsPathForAll(paths: String, contextName: String) => { 117 | Try { 118 | ResultJarsPathForAll(getJarsPathForClasspath(paths, contextName), getJarsPathForSpark(paths)) 119 | } match { 120 | case Success(result) => sender ! result 121 | case Failure(e) => sender ! e 122 | } 123 | 124 | } 125 | 126 | case DeleteJarFolder() => { 127 | FileUtils.deleteFolder(jarFolder) 128 | } 129 | 130 | case CreateJarFolder(overwrite: Boolean) => { 131 | FileUtils.createFolder(jarFolder, overwrite) 132 | } 133 | 134 | case JarFolderExists() => { 135 | val file = new File(jarFolder) 136 | sender ! file.exists() 137 | } 138 | } 139 | 140 | 141 | def getJarsPathForSpark(path: String): List[String] = { 142 | var jarSparkPathList = ListBuffer[String]() 143 | path.split(",").foreach { x => 144 | jarSparkPathList += (JarUtils.getJarPathForSpark(x, jarFolder)) 145 | } 146 | jarSparkPathList.toList 147 | } 148 | 149 | def getJarsPathForClasspath(path: String, contextName: String) = { 150 | var jarClasspath = "" 151 | path.split(",").foreach { x => 152 | jarClasspath += JarUtils.getPathForClasspath(x, jarFolder, contextName) + CLASSPATH_JAR_SEPARATOR 153 | } 154 | jarClasspath.substring(0, jarClasspath.size - 1) 155 | } 156 | } 157 | 158 | 159 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/JobActor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import java.util.UUID 4 | 5 | import akka.actor.{Actor, ActorRef, ActorSelection} 6 | import akka.pattern.ask 7 | import com.typesafe.config.Config 8 | import org.joda.time.{DateTimeZone, DateTime} 9 | import responses.{JobStates, Jobs, Job} 10 | import server.domain.actors.ContextManagerActor.{GetAllContexts, GetContext, NoSuchContext} 11 | import org.slf4j.LoggerFactory 12 | import server.domain.actors.JobActor._ 13 | import scala.concurrent.ExecutionContext.Implicits.global 14 | import scala.concurrent.Future 15 | import scala.util.{Success, Failure} 16 | 17 | /** 18 | * Created by raduc on 03/11/14. 19 | */ 20 | 21 | 22 | object JobActor { 23 | 24 | trait JobStatus { 25 | val startTime: Long = new DateTime (DateTimeZone.UTC).getMillis 26 | } 27 | 28 | case class JobStatusEnquiry(contextName: String, jobId: String) 29 | 30 | case class RunJob(runningClass: String, contextName: String, config: Config, uuid: String = UUID.randomUUID().toString) 31 | 32 | case class JobRunError(errorMessage: String) extends JobStatus 33 | 34 | case class JobRunSuccess(result:String) extends JobStatus 35 | 36 | case class JobStarted() extends JobStatus 37 | 38 | case class JobDoesNotExist() extends JobStatus 39 | 40 | case class UpdateJobStatus(uuid: String, status: JobStatus) 41 | 42 | case class GetAllJobsStatus() 43 | 44 | } 45 | 46 | 47 | class JobActor(config: Config, contextManagerActor: ActorRef) extends Actor { 48 | 49 | val log = LoggerFactory.getLogger(getClass) 50 | 51 | override def receive: Receive = { 52 | case job: RunJob => { 53 | log.info(s"Received RunJob message : runningClass=${job.runningClass} context=${job.contextName} uuid=${job.uuid}") 54 | 55 | val fromWebApi = sender 56 | 57 | val future = contextManagerActor ? GetContext(job.contextName) 58 | future onSuccess { 59 | case contextRef: ActorSelection => { 60 | 61 | import JobStates.RUNNING 62 | fromWebApi ! Job(job.uuid, job.contextName, RUNNING.toString, "", DateTime.now(DateTimeZone.UTC).getMillis) 63 | 64 | log.info(s"Sending RunJob message to actor $contextRef") 65 | contextRef ! job 66 | } 67 | case NoSuchContext => fromWebApi ! NoSuchContext 68 | case e @ _ => log.warn(s"Received UNKNOWN TYPE when asked for context. Type received $e") 69 | } 70 | future onFailure { 71 | case e => { 72 | fromWebApi ! e 73 | log.error(s"An error has occured.", e) 74 | } 75 | } 76 | } 77 | 78 | 79 | case jobEnquiry:JobStatusEnquiry => { 80 | log.info(s"Received JobStatusEnquiry message : uuid=${jobEnquiry.jobId}") 81 | val fromWebApi = sender 82 | 83 | 84 | val contextActorFuture = contextManagerActor ? GetContext(jobEnquiry.contextName) 85 | 86 | contextActorFuture onSuccess { 87 | case contextRef: ActorSelection => { 88 | 89 | val enquiryFuture = contextRef ? jobEnquiry 90 | 91 | enquiryFuture onSuccess{ 92 | case state:JobStatus => { 93 | log.info("Job with id: " + jobEnquiry.jobId + " has state : " + state) 94 | fromWebApi ! state 95 | } 96 | case x:Any => { 97 | log.info(s"Received $x TYPE when asked for job enquiry.") 98 | fromWebApi ! x 99 | } 100 | } 101 | 102 | enquiryFuture onFailure { 103 | case e => { 104 | fromWebApi ! e 105 | log.error(s"An error has occured.", e) 106 | } 107 | } 108 | } 109 | case NoSuchContext => fromWebApi ! NoSuchContext 110 | case e @ _ => log.warn(s"Received UNKNOWN TYPE when asked for context. Type received $e") 111 | } 112 | 113 | contextActorFuture onFailure { 114 | case e => { 115 | fromWebApi ! e 116 | log.error(s"An error has occured.", e) 117 | } 118 | } 119 | } 120 | 121 | case GetAllJobsStatus() => { 122 | 123 | val webApi = sender 124 | val future = contextManagerActor ? GetAllContexts() 125 | 126 | val future2: Future[Future[List[List[Job]]]] = future map { 127 | case contexts: List[ActorSelection] => { 128 | val contextsList = contexts.map { context => 129 | val oneContextFuture = context ? GetAllJobsStatus() 130 | oneContextFuture.map{ 131 | case jobs: List[Job] => jobs 132 | } 133 | } 134 | Future.sequence(contextsList) 135 | } 136 | } 137 | val future3: Future[List[List[Job]]] = future2.flatMap(identity) 138 | val future4: Future[List[Job]] = future3.map(x => x.flatMap(identity)) 139 | 140 | future4 onComplete { 141 | case Success(jobsList:List[Job]) => { 142 | webApi ! Jobs(jobsList.toArray.sortWith(_.startTime > _.startTime)) 143 | } 144 | case Failure(e) => webApi ! e 145 | } 146 | 147 | } 148 | } 149 | } 150 | 151 | 152 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/ReTry.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import akka.actor._ 4 | import akka.pattern.ask 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.duration._ 8 | import scala.util.{Failure, Success} 9 | 10 | /* 11 | See http://www.codetinkerhack.com/2014/01/re-try-pattern-using-akka-actor-ask.html 12 | */ 13 | 14 | object ReTry { 15 | private case class Retry(originalSender: ActorRef, message: Any, times: Int) 16 | 17 | private case class Response(originalSender: ActorRef, result: Any) 18 | 19 | def props(tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection): Props = Props(new ReTry(tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection)) 20 | 21 | } 22 | 23 | class ReTry(val tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection) extends Actor { 24 | 25 | import context.dispatcher 26 | import server.domain.actors.ReTry._ 27 | val log = LoggerFactory.getLogger(getClass) 28 | 29 | // Retry loop that keep on Re-trying the request 30 | def retryLoop: Receive = { 31 | 32 | // Response from future either Success or Failure is a Success - we propagate it back to a original sender 33 | case Response(originalSender, result) => 34 | originalSender ! result 35 | context stop self 36 | 37 | case Retry(originalSender, message, triesLeft) => 38 | 39 | // Process (Re)try here. When future completes it sends result to self 40 | (forwardTo ? message) (retryTimeOut) onComplete { 41 | 42 | case Success(result) => 43 | self ! Response(originalSender, result) // sending responses via self synchronises results from futures that may come potentially in any order. It also helps the case when the actor is stopped (in this case responses will become deadletters) 44 | 45 | case Failure(ex) => 46 | if (triesLeft - 1 == 0) {// In case of last try and we got a failure (timeout) lets send Retries exceeded error 47 | self ! Response(originalSender, Failure(new Exception("Retries exceeded"))) 48 | } 49 | else 50 | log.error("Error occurred: " + ex) 51 | } 52 | 53 | // Send one more retry after interval 54 | if (triesLeft - 1 > 0) 55 | context.system.scheduler.scheduleOnce(retryInterval, self, Retry(originalSender, message, triesLeft - 1)) 56 | 57 | case m @ _ => 58 | log.error("No handling defined for message: " + m) 59 | 60 | } 61 | 62 | // Initial receive loop 63 | def receive: Receive = { 64 | 65 | case message @ _ => 66 | context.system.scheduler.scheduleOnce(retryInterval, self, Retry(sender, message, tries)) 67 | // self ! Retry(sender, message, tries) 68 | 69 | // Lets swap to a retry loop here. 70 | context.become(retryLoop, false) 71 | 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/Supervisor.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import akka.actor.SupervisorStrategy._ 4 | import akka.actor.{Actor, OneForOneStrategy, Props, actorRef2Scala} 5 | import org.slf4j.LoggerFactory 6 | 7 | import scala.concurrent.duration._ 8 | 9 | 10 | class Supervisor extends Actor { 11 | 12 | val log = LoggerFactory.getLogger(getClass) 13 | 14 | override val supervisorStrategy = 15 | OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange = 1 minute) { 16 | case e: Exception => { 17 | log.error("Exception", e) 18 | Resume 19 | } 20 | } 21 | 22 | def receive = { 23 | case (p: Props, name: String) => sender ! context.actorOf(p, name) 24 | } 25 | 26 | } -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/server/domain/actors/package.scala: -------------------------------------------------------------------------------- 1 | package server.domain 2 | 3 | import akka.util.Timeout 4 | import com.typesafe.config.Config 5 | 6 | import scala.concurrent.duration._ 7 | 8 | /** 9 | * Utility functions for actors 10 | */ 11 | package object actors { 12 | implicit val timeout: Timeout = 50 seconds 13 | 14 | def getValueFromConfig[T](config: Config, configPath: String, defaultValue: T): T ={ 15 | if (config.hasPath(configPath)) config.getAnyRef(configPath).asInstanceOf[T] else defaultValue 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/utils/ActorUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import java.io.IOException 4 | import java.net.ServerSocket 5 | 6 | import com.typesafe.config.{Config, ConfigFactory} 7 | import server.domain.actors._ 8 | 9 | /** 10 | * Created by raduc on 11/11/14. 11 | */ 12 | object ActorUtils { 13 | 14 | val PREFIX_CONTEXT_ACTOR = "A-" 15 | val PREFIX_CONTEXT_SYSTEM = "S-" 16 | 17 | val HOST_PROPERTY_NAME = "manager.akka.remote.netty.tcp.hostname" 18 | val PORT_PROPERTY_NAME = "manager.akka.remote.netty.tcp.port" 19 | 20 | def getContextActorAddress(contextName: String, host: String, port: Int): String ={ 21 | getActorAddress(PREFIX_CONTEXT_SYSTEM + contextName, host, port, PREFIX_CONTEXT_ACTOR + contextName) 22 | } 23 | 24 | def getActorAddress(systemName: String, host: String, port: Int, actorName: String): String = { 25 | "akka.tcp://" + systemName + "@" + host + ":" + port + "/user/" + actorName 26 | } 27 | 28 | def findAvailablePort(lastUsedPort: Int): Integer = { 29 | val notFound = true; 30 | var port = lastUsedPort + 1 31 | while (notFound) { 32 | try { 33 | new ServerSocket(port).close() 34 | return port 35 | } 36 | catch { 37 | case e: IOException => { 38 | port += 1 39 | } 40 | } 41 | } 42 | return 0 43 | } 44 | 45 | def remoteConfig(hostname: String, port: Int, commonConfig: Config): Config = { 46 | 47 | val host = getValueFromConfig(commonConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1") 48 | 49 | val configStr = """ 50 | akka{ 51 | log-dead-letters = 0 52 | actor { 53 | provider = "akka.remote.RemoteActorRefProvider" 54 | } 55 | remote { 56 | enabled-transports = ["akka.remote.netty.tcp"] 57 | log-sent-messages = on 58 | log-received-messages = on 59 | log-remote-lifecycle-events = off 60 | netty.tcp { 61 | maximum-frame-size = 512000b 62 | hostname = """" + host + """" 63 | port = """ + port + 64 | """ } 65 | } 66 | }""" 67 | 68 | ConfigFactory.parseString(configStr).withFallback(commonConfig) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/utils/ContextUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.SparkConf 5 | 6 | import scala.collection.JavaConverters._ 7 | 8 | object ContextUtils { 9 | def configToSparkConf(config:Config, contextName:String): SparkConf ={ 10 | val sparkConf = new SparkConf() 11 | .setAppName(contextName) 12 | .setJars(config.getStringList("context.jars").asScala) 13 | 14 | for(x <- config.entrySet().asScala if x.getKey.startsWith("spark.")) { 15 | sparkConf.set(x.getKey, x.getValue.unwrapped().toString) 16 | } 17 | 18 | sparkConf 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/utils/FileUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import java.io.{File, FileOutputStream} 4 | import org.apache.commons 5 | 6 | /** 7 | * Created by raduchilom on 22/03/15. 8 | */ 9 | object FileUtils { 10 | 11 | def writeToFile(fileName: String, folderName: String, bytes: Array[Byte]): Unit = { 12 | val fos = new FileOutputStream(folderName + File.separator + fileName) 13 | fos.write(bytes) 14 | fos.close() 15 | } 16 | 17 | def createFolder(folder: String, overwrite: Boolean) = { 18 | val file = new File(folder) 19 | if(!file.exists()){ 20 | file.mkdir() 21 | } else if (overwrite){ 22 | commons.io.FileUtils.deleteDirectory(file) 23 | file.mkdir() 24 | } 25 | } 26 | 27 | def deleteFolder(folder: String): Unit = { 28 | val file = new File(folder) 29 | if(file.exists()){ 30 | commons.io.FileUtils.deleteDirectory(file) 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/utils/HdfsUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.hadoop.fs.{Path, FileSystem} 5 | 6 | /** 7 | * Created by raduchilom on 22/03/15. 8 | */ 9 | object HdfsUtils { 10 | 11 | def copyJarFromHdfs(hdfsPath: String, outputFolder: String) = { 12 | 13 | // if(!config.hasPath("hdfs.namenode")){ 14 | // println("ERROR: HDFS NameNode is not set in application.conf!") 15 | // throw new Exception("HDFS NameNode is not set in application.conf!") 16 | // } 17 | 18 | val conf = new Configuration() 19 | // conf.set("fs.defaultFS", getValueFromConfig(config, "hdfs.namenode", "")) 20 | conf.set("fs.defaultFS", hdfsPath) 21 | val hdfsFileSystem = FileSystem.get(conf) 22 | 23 | hdfsFileSystem.copyToLocalFile(new Path(hdfsPath), new Path(outputFolder)) 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scala/utils/JarUtils.scala: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import java.io.File 4 | 5 | /** 6 | * Created by raduchilom on 22/03/15. 7 | */ 8 | object JarUtils { 9 | 10 | def validateJar(bytes: Array[Byte]): Boolean = { 11 | // For now just check the first few bytes are the ZIP signature: 0x04034b50 little endian 12 | if(bytes.size < 4 || bytes(0) != 0x50 || bytes(1) != 0x4b || bytes(2) != 0x03 || bytes(3) != 0x04){ 13 | false 14 | } else { 15 | true 16 | } 17 | } 18 | 19 | def getPathForClasspath(path: String, jarFolder: String, contextName: String): String = { 20 | val diskPath = 21 | if(path.startsWith("/")){ 22 | path 23 | } else if (path.startsWith("hdfs")){ 24 | val tempFolder = jarFolder + "tmp" + File.pathSeparator + contextName 25 | FileUtils.createFolder(tempFolder, true) 26 | HdfsUtils.copyJarFromHdfs(path, tempFolder) 27 | tempFolder + File.pathSeparator + getJarName(path) 28 | } else { 29 | jarFolder + File.separator + getJarName(path) 30 | } 31 | 32 | val diskFile = new File(diskPath) 33 | if (diskFile.exists()) { 34 | return diskPath 35 | } 36 | 37 | throw new Exception(s"Jar $path not found.") 38 | } 39 | 40 | 41 | def getJarName(path: String): String = { 42 | if(path.contains('\\')) { 43 | path.substring(path.lastIndexOf('\\')) 44 | } else { 45 | path 46 | } 47 | } 48 | 49 | def getJarPathForSpark(path: String, jarFolder: String): String = { 50 | if(path.startsWith("hdfs")){ 51 | //TODO: perform hdfs validation 52 | return path 53 | } else { 54 | val diskPath = 55 | if(path.startsWith("/")){ 56 | path 57 | } else { 58 | jarFolder + File.separator + getJarName(path) 59 | } 60 | val diskFile = new File(diskPath) 61 | if (diskFile.exists()) { 62 | return diskPath 63 | } 64 | } 65 | 66 | throw new Exception(s"Jar $path not found.") 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scripts/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | CMD=$1 6 | ARG1=$2 7 | 8 | CDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) 9 | PROJECT_DIR="${CDIR}/../../../.." 10 | 11 | SJR_IS_REMOTE_DEPLOY=${SJR_IS_REMOTE_DEPLOY-false} 12 | SJR_PACKAGE_PATH=${SJR_PACKAGE_PATH-${PROJECT_DIR}/spark-job-rest/target/spark-job-rest.tar.gz} 13 | 14 | SJR_DEPLOY_PATH="${SJR_DEPLOY_PATH}" # Empty variable will cause error in action 15 | SJR_REMOTE_DEPLOY_PATH="${SJR_REMOTE_DEPLOY_PATH}" # Overrides SJR_DEPLOY_PATH in case of remote deploy 16 | SJR_DEPLOY_KEY="${SJR_DEPLOY_KEY}" # Empty by default 17 | SJR_DEPLOY_HOST="${SJR_DEPLOY_HOST}" # Empty for local deploy 18 | 19 | CONFIGURATION_IS_SET="false" 20 | 21 | function setup_defaults() { 22 | if [ -z "${SJR_DEPLOY_PATH}" ]; then 23 | echo "Spark-Job-REST deployment path is not defined. Set 'SJR_DEPLOY_PATH' before running this script." 24 | exit -1 25 | fi 26 | } 27 | 28 | function setup_remote() { 29 | SSH_KEY_EXPRESSION="" 30 | if [ ! -z "${SJR_DEPLOY_KEY}" ]; then 31 | echo "Using SSH key from '${SJR_DEPLOY_KEY}'" 32 | SSH_KEY_EXPRESSION="-i ${SJR_DEPLOY_KEY}" 33 | fi 34 | 35 | if [ -z "${SJR_DEPLOY_HOST}" ]; then 36 | echo "Spark-Job-REST deployment host is not defined. Set 'SJR_DEPLOY_HOST' before running this script." 37 | exit -1 38 | fi 39 | 40 | # Override deploy path in remote mode 41 | if [ ! -z "${SJR_REMOTE_DEPLOY_PATH}" ]; then 42 | SJR_DEPLOY_PATH="${SJR_REMOTE_DEPLOY_PATH}" 43 | fi 44 | } 45 | 46 | function setup() { 47 | if [ "${CONFIGURATION_IS_SET}" = "false" ]; then 48 | CONFIGURATION_IS_SET="true" 49 | setup_defaults 50 | if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then 51 | setup_remote 52 | else 53 | SJR_DEPLOY_HOST="localhost" 54 | fi 55 | fi 56 | } 57 | 58 | function exec_remote() { 59 | setup 60 | ssh -i "${SJR_DEPLOY_KEY}" "${SJR_DEPLOY_HOST}" "$1" 61 | } 62 | 63 | function exec_local() { 64 | setup 65 | eval "$1" 66 | } 67 | 68 | function exec_cmd() { 69 | if [ "$SJR_IS_REMOTE_DEPLOY" = "true" ]; then 70 | exec_remote "$1" 71 | else 72 | exec_local "$1" 73 | fi 74 | } 75 | 76 | function stop_server() { 77 | echo "Stopping server" 78 | exec_cmd "if [ -d ${SJR_DEPLOY_PATH} ]; then ${SJR_DEPLOY_PATH}/bin/stop_server.sh; fi" 79 | exec_cmd "pkill -f 'java.*spark-job-rest.jar'" || true 80 | } 81 | 82 | function remove_server() { 83 | echo "Remove server" 84 | setup 85 | exec_cmd "rm -rf ${SJR_DEPLOY_PATH}" 86 | } 87 | 88 | function upload_tarball() { 89 | if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then 90 | echo "Upload tarball" 91 | scp "${SSH_KEY_EXPRESSION}" "$SJR_PACKAGE_PATH" "${SJR_DEPLOY_HOST}":"/tmp/" 92 | fi 93 | } 94 | 95 | function extract_package() { 96 | echo "Extract from tarball" 97 | exec_cmd "mkdir -p ${SJR_DEPLOY_PATH}" 98 | if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then 99 | exec_remote "tar zxf /tmp/spark-job-rest.tar.gz -C ${SJR_DEPLOY_PATH} --strip-components=1" 100 | else 101 | exec_local "tar zxf ${SJR_PACKAGE_PATH} -C ${SJR_DEPLOY_PATH} --strip-components=1" 102 | fi 103 | } 104 | 105 | function deploy_server() { 106 | echo "Deploing to ${SJR_DEPLOY_HOST}:${SJR_DEPLOY_PATH}" 107 | stop_server 108 | remove_server 109 | upload_tarball 110 | extract_package 111 | start_server 112 | } 113 | 114 | function start_server() { 115 | echo "Run server" 116 | exec_cmd "${SJR_DEPLOY_PATH}/bin/start_server.sh" 117 | } 118 | 119 | function server_log() { 120 | echo "Spark-Job-REST main log:" 121 | exec_cmd "tail -f ${SJR_DEPLOY_PATH}/logs/spark-job-rest.log" 122 | } 123 | 124 | function server_log_context() { 125 | CONTEXT_NAME=$ARG1 126 | echo "Spark-Job-REST '${CONTEXT_NAME}' log:" 127 | exec_cmd "tail -f ${SJR_DEPLOY_PATH}/logs/${CONTEXT_NAME}.log" 128 | } 129 | 130 | function show_help() { 131 | echo "Spark-Job-REST deployment tool" 132 | echo "Usage: deploy.sh [deploy|start|stop|restart|log|log-context ]" 133 | } 134 | 135 | function show_vars() { 136 | echo "SJR_DEPLOY_PATH=${SJR_DEPLOY_PATH}" 137 | echo "SJR_DEPLOY_HOST=${SJR_DEPLOY_HOST}" 138 | echo "SJR_DEPLOY_KEY=${SJR_DEPLOY_KEY}" 139 | echo "SJR_PACKAGE_PATH=${SJR_PACKAGE_PATH}" 140 | echo "SJR_IS_REMOTE_DEPLOY=${SJR_IS_REMOTE_DEPLOY}" 141 | echo "SJR_REMOTE_DEPLOY_PATH=${SJR_REMOTE_DEPLOY_PATH}" 142 | } 143 | 144 | function main() { 145 | case "$CMD" in 146 | deploy) setup 147 | deploy_server 148 | ;; 149 | stop) setup 150 | stop_server 151 | ;; 152 | start) setup 153 | start_server 154 | ;; 155 | restart) setup 156 | stop_server 157 | start_server 158 | ;; 159 | log) setup 160 | server_log 161 | ;; 162 | log-context) setup 163 | server_log_context 164 | ;; 165 | debug) show_vars 166 | ;; 167 | help) show_help 168 | ;; 169 | *) show_help 170 | ;; 171 | esac 172 | } 173 | 174 | main -------------------------------------------------------------------------------- /spark-job-rest/src/main/scripts/restart_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | get_abs_script_path() { 4 | pushd . >/dev/null 5 | cd $(dirname $0) 6 | appdir=$(pwd) 7 | popd >/dev/null 8 | } 9 | get_abs_script_path 10 | 11 | "$appdir/stop_server.sh" 12 | "$appdir/start_server.sh" 13 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scripts/start_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to start the job server 3 | set -e 4 | 5 | get_abs_script_path() { 6 | pushd . >/dev/null 7 | cd $(dirname $0) 8 | appdir=$(pwd) 9 | popd >/dev/null 10 | } 11 | get_abs_script_path 12 | 13 | parentdir="$(dirname "$appdir")" 14 | 15 | GC_OPTS="-XX:+UseConcMarkSweepGC 16 | -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out 17 | -XX:MaxPermSize=512m 18 | -XX:+CMSClassUnloadingEnabled " 19 | 20 | JAVA_OPTS="-Xmx1g -XX:MaxDirectMemorySize=512M 21 | -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true 22 | -Dcom.sun.management.jmxremote.authenticate=false 23 | -Dcom.sun.management.jmxremote.ssl=false" 24 | 25 | MAIN="server.Main" 26 | 27 | conffile="$parentdir/resources/application.conf" 28 | 29 | if [ ! -f "$conffile" ]; then 30 | echo "No configuration file $conffile found" 31 | exit 1 32 | fi 33 | 34 | if [ -f "$parentdir/resources/settings.sh" ]; then 35 | . $parentdir/resources/settings.sh 36 | else 37 | echo "Missing $parentdir/resources/settings.sh, exiting" 38 | exit 1 39 | fi 40 | 41 | if [ -z "$SPARK_HOME" ]; then 42 | echo "Please set SPARK_HOME or put it in $parentdir/resources/settings.sh first" 43 | exit 1 44 | fi 45 | 46 | # Pull in other env vars in spark config, such as MESOS_NATIVE_LIBRARY 47 | . $SPARK_CONF_HOME/spark-env.sh 48 | 49 | 50 | mkdir -p "${LOG_DIR}" 51 | 52 | LOGGING_OPTS="-Dlog4j.configuration=log4j.properties 53 | -DLOG_DIR=$LOG_DIR 54 | -DLOG_FILE=spark-job-rest.log" 55 | 56 | # For Mesos 57 | #CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI " 58 | # For Mesos/Marathon, use the passed-in port 59 | if [ -n "$PORT" ]; then 60 | CONFIG_OVERRIDES+="-Dspark.jobserver.port=$PORT " 61 | fi 62 | 63 | # The following should be exported in order to be accessible in Config substitutions 64 | export SPARK_HOME 65 | export APP_DIR 66 | export JAR_PATH 67 | export CONTEXTS_BASE_DIR 68 | 69 | # job server jar needs to appear first so its deps take higher priority 70 | # need to explicitly include app dir in classpath so logging configs can be found 71 | #CLASSPATH="$appdir:$appdir/spark-job-server.jar:$($SPARK_HOME/bin/compute-classpath.sh)" 72 | CLASSPATH="$parentdir/resources:$appdir:$parentdir/spark-job-rest.jar:$($SPARK_HOME/bin/compute-classpath.sh)" 73 | echo "CLASSPATH = $CLASSPATH" 74 | 75 | 76 | exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $conffile > /dev/null 2>&1 & 77 | echo $! > $appdir/server.pid 78 | -------------------------------------------------------------------------------- /spark-job-rest/src/main/scripts/stop_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | appdir="$(dirname "$0")" 4 | 5 | if [ -f "$appdir/server.pid" ]; then 6 | pid="$(cat "$appdir/server.pid")" 7 | proc="$(ps axu | grep "$pid" | grep spark-job-rest.jar | awk '{print $2}')" 8 | if [ -n "$proc" ]; then 9 | echo "Killing pid $proc" 10 | kill -9 $proc 11 | else 12 | echo "Pid $pid does not exist or it's not for spark-job-rest." 13 | fi 14 | else 15 | echo "Pid file $appdir/server.pid was not found" 16 | fi 17 | 18 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/resources/application.conf: -------------------------------------------------------------------------------- 1 | # spark default configuration 2 | spark.executor.memory=2g 3 | spark.mesos.coarse=false 4 | spark.scheduler.mode=FAIR 5 | spark.cores.max=2 6 | spark.master="local" 7 | spark.path=${SPARK_HOME} 8 | spark.default.parallelism=384 9 | spark.storage.memoryFraction=0.3 10 | spark.shuffle.memoryFraction=0.6 11 | spark.shuffle.compress=true 12 | spark.shuffle.spill-compress=true 13 | spark.reducer.maxMbInFlight=48 14 | spark.akka.frameSize=100 15 | spark.akka.threads=4 16 | spark.akka.timeout=100 17 | spark.task.maxFailures=4 18 | spark.shuffle.consolidateFiles=true 19 | spark.deploy.spreadOut=true 20 | spark.shuffle.spill=false 21 | spark.kryo.referenceTracking=false 22 | 23 | #Default Spark Driver JVM memory 24 | driver.xmxMemory = 1g 25 | 26 | # application configuration 27 | appConf{ 28 | # This ip on which to deploy the apis 29 | web.services.ip="0.0.0.0" 30 | # The port on which to deploy the apis 31 | web.services.port=8097 32 | # Implicit akka timeout 33 | timeout=1000000 34 | # Remote context initialization 35 | init { 36 | # Implicit sleep (milliseconds) before sending init message 37 | sleep=3000 38 | # Tries before consider remote context as dead 39 | tries=20 40 | # Timeout for each attempt (milliseconds) 41 | retry-timeout=1000 42 | # Inteval beetween attempts to reach remote context (milliseconds) 43 | retry-interval=1500 44 | } 45 | # The port where the range for actor system starts 46 | actor.systems.first.port = 11000 47 | # The port where the range for spark ui starts 48 | spark.ui.first.port = 16000 49 | # The path to the folder where to keep the jars 50 | jars.path = /tmp/spark-job-rest/jars 51 | } 52 | 53 | context{ 54 | # Path to context process work directory 55 | contexts-base-dir = /tmp/spark-job-rest/contexts 56 | # Amount of jobs which can be executed on context in parallel. Zero means infinit concurency. 57 | cuncurrent-jobs = 0 58 | # Context factory that will be dynamically loaded to instantiate job context 59 | job-context-factory = "context.SparkContextFactory" 60 | } 61 | 62 | manager { 63 | akka { 64 | log-dead-letters = 1 65 | actor { 66 | provider = "akka.remote.RemoteActorRefProvider" 67 | } 68 | remote { 69 | log-remote-lifecycle-events = off 70 | enabled-transports = ["akka.remote.netty.tcp"] 71 | log-sent-messages = on 72 | log-received-messages = on 73 | netty.tcp { 74 | transport-class = "akka.remote.transport.netty.NettyTransport" 75 | hostname = "localhost" 76 | port = 4042 77 | maximum-frame-size = 256000b 78 | } 79 | } 80 | } 81 | 82 | spray.can.server { 83 | # uncomment the next line for making this an HTTPS example 84 | # ssl-encryption = on 85 | idle-timeout = 61 s 86 | request-timeout = 60 s 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/scala/context/JobContextFactorySpec.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.{ContextLike, SparkJobBase} 4 | import com.typesafe.config.{Config, ConfigFactory} 5 | import org.apache.spark.SparkContext 6 | import org.junit.runner.RunWith 7 | import org.scalatest._ 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | trait FakeContext 11 | 12 | class FakeJobContextFactory extends JobContextFactory { 13 | type C = ContextLike 14 | def makeContext(config: Config, contextName: String): ContextLike = new ContextLike with FakeContext { 15 | val contextClass = classOf[FakeContext].getName 16 | override def stop(): Unit = {} 17 | override def isValidJob(job: SparkJobBase): Boolean = true 18 | override def sparkContext: SparkContext = null 19 | } 20 | } 21 | 22 | /** 23 | * Test suite for [[JobContextFactory]]. 24 | */ 25 | @RunWith(classOf[JUnitRunner]) 26 | class JobContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter { 27 | "JobContextFactory" should { 28 | "load specified factory" in { 29 | JobContextFactory 30 | .getFactory("context.SparkContextFactory") 31 | .isInstanceOf[SparkContextFactory] mustEqual true 32 | } 33 | 34 | "load default factory" in { 35 | JobContextFactory 36 | .getFactory() 37 | .isInstanceOf[SparkContextFactory] mustEqual true 38 | } 39 | 40 | "make context with default factory if other is not specified" in { 41 | val context = JobContextFactory.makeContext(ConfigFactory.parseString( 42 | """ 43 | |{ 44 | | context.jars = [], 45 | | spark.master = "local", 46 | | spark.app.id = "test" 47 | |} 48 | """.stripMargin).resolve(), "test") 49 | context.isInstanceOf[SparkContext] mustEqual true 50 | context.stop() 51 | } 52 | 53 | "make context with specified factory if other is not specified" in { 54 | JobContextFactory.makeContext(ConfigFactory.parseString( 55 | """ 56 | |{ 57 | | context.jars = [], 58 | | context.job-context-factory = "context.FakeJobContextFactory", 59 | | spark.master = "local", 60 | | spark.app.id = "test" 61 | |} 62 | """.stripMargin).resolve(), "test") 63 | .isInstanceOf[FakeContext] mustEqual true 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/scala/context/SparkContextFactorySpec.scala: -------------------------------------------------------------------------------- 1 | package context 2 | 3 | import api.ContextLike 4 | import com.typesafe.config.ConfigFactory 5 | import org.apache.spark.SparkContext 6 | import org.junit.runner.RunWith 7 | import org.scalatest._ 8 | import org.scalatest.junit.JUnitRunner 9 | 10 | import scala.util.Try 11 | 12 | /** 13 | * Test suite for [[SparkContextFactory]]. 14 | */ 15 | @RunWith(classOf[JUnitRunner]) 16 | class SparkContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter { 17 | type C = SparkContext with ContextLike 18 | 19 | var sparkContext: C = _ 20 | val sparkContextFactory = new SparkContextFactory() 21 | 22 | // Destroy Spark context after each test 23 | after { 24 | Try{ sparkContext.stop() } 25 | } 26 | 27 | "SingletonSparkContextFactory" should { 28 | "create Spark context" in { 29 | sparkContext = sparkContextFactory.makeContext(config, this.getClass.getName) 30 | sparkContext.appName mustEqual this.getClass.getName 31 | } 32 | } 33 | 34 | val config = ConfigFactory.parseString( 35 | """ 36 | |{ 37 | | context.jars = [], 38 | | spark.master = "local" 39 | |} 40 | """.stripMargin) 41 | } 42 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/scala/integration/IntegrationTests.scala: -------------------------------------------------------------------------------- 1 | package integration 2 | 3 | import akka.actor.ActorSystem 4 | import akka.util.Timeout 5 | import client.SparkJobRestClient 6 | import org.junit.runner.RunWith 7 | import org.scalatest.concurrent.ScalaFutures 8 | import org.scalatest.{Matchers, BeforeAndAfter, FunSuite} 9 | import org.scalatest.junit.JUnitRunner 10 | import responses.Job 11 | 12 | /** 13 | * Created by raduchilom on 4/25/15. 14 | */ 15 | @RunWith(classOf[JUnitRunner]) 16 | class IntegrationTests extends FunSuite with BeforeAndAfter with ScalaFutures with Matchers { 17 | 18 | implicit val timeout = Timeout(10000) 19 | implicit val system = ActorSystem("localSystem") 20 | 21 | val client = new SparkJobRestClient("http://localhost:8097") 22 | val contextName = "testContext" 23 | val exampleJarPath = "/Users/raduchilom/projects/spark-job-rest/examples/example-job/target/example-job.jar" 24 | val parameters = Map[String, String]("jars" -> exampleJarPath, 25 | "input" -> "100") 26 | 27 | 28 | before { 29 | } 30 | 31 | after { 32 | } 33 | 34 | test("Create Context & Delete Context") { 35 | val context = client.createContext(contextName, parameters) 36 | context.contextName should be(contextName) 37 | 38 | var contexts = client.getContexts() 39 | contexts.contexts.size should be(1) 40 | contexts.contexts.contains(context) should be(true) 41 | 42 | client.deleteContext(contextName) should be(true) 43 | contexts = client.getContexts() 44 | contexts.contexts.size should be(0) 45 | } 46 | 47 | test("Create Contexts & Delete Contexts") { 48 | 49 | for(i <- 0 to 4) { 50 | val context = client.createContext(contextName + i, parameters) 51 | context.contextName should be(contextName + i) 52 | } 53 | 54 | var contexts = client.getContexts() 55 | contexts.contexts.size should be(5) 56 | 57 | for(i <- 0 to 4) { 58 | client.deleteContext(contextName + i) should be(true) 59 | contexts = client.getContexts() 60 | contexts.contexts.size should be(4 - i) 61 | } 62 | } 63 | 64 | test("Create Context & Run Job") { 65 | val context = client.createContext(contextName, parameters) 66 | context.contextName should be(contextName) 67 | 68 | var contexts = client.getContexts() 69 | contexts.contexts.size should be(1) 70 | contexts.contexts.contains(context) should be(true) 71 | 72 | val job = client.runJob("com.job.SparkJobImplemented", contextName, parameters) 73 | job shouldBe a [Job] 74 | 75 | Thread.sleep(2000) 76 | 77 | val jobResult = client.getJob(job.jobId, contextName) 78 | jobResult.result should be("100") 79 | 80 | client.deleteContext(contextName) should be(true) 81 | contexts = client.getContexts() 82 | contexts.contexts.size should be(0) 83 | } 84 | 85 | test("Upload Jar") { 86 | val jarInfo = client.uploadJar("example-job.jar", exampleJarPath) 87 | jarInfo.name should be("example-job.jar") 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/scala/server/domain/actors/ContextActorSpec.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import java.util.concurrent.TimeUnit 4 | 5 | import akka.actor.ActorSystem 6 | import akka.pattern.ask 7 | import akka.testkit.TestActorRef 8 | import akka.util.Timeout 9 | import com.typesafe.config.ConfigFactory 10 | import context.{FakeContext, JobContextFactory} 11 | import org.apache.spark.SparkContext 12 | import org.junit.runner.RunWith 13 | import org.scalatest._ 14 | import org.scalatest.concurrent.TimeLimitedTests 15 | import org.scalatest.junit.JUnitRunner 16 | import org.scalatest.time.SpanSugar._ 17 | 18 | import scala.util.Success 19 | 20 | /** 21 | * Test suit for [[ContextActor]] 22 | */ 23 | @RunWith(classOf[JUnitRunner]) 24 | class ContextActorSpec extends WordSpec with MustMatchers with BeforeAndAfter with TimeLimitedTests { 25 | val timeLimit = 10 seconds 26 | 27 | val config = ConfigFactory.load() 28 | 29 | implicit val timeout = Timeout(10, TimeUnit.SECONDS) 30 | implicit val system = ActorSystem("localSystem") 31 | 32 | var contextActorRef: TestActorRef[ContextActor] = _ 33 | def contextActor = contextActorRef.underlyingActor 34 | 35 | val contextName = "demoContext" 36 | 37 | before { 38 | contextActorRef = TestActorRef(new ContextActor(config)) 39 | } 40 | 41 | after { 42 | contextActor.jobContext.stop() 43 | } 44 | 45 | "ContextActor" should { 46 | "create Spark context when initialized" in { 47 | val future = contextActorRef ? ContextActor.Initialize(contextName, config, List()) 48 | val Success(result: ContextActor.Initialized) = future.value.get 49 | result must not equal null 50 | contextActor.jobContext.isInstanceOf[SparkContext] mustEqual true 51 | } 52 | 53 | "have default factory for Spark context" in { 54 | val configWithoutFactory = config.withoutPath(JobContextFactory.classNameConfigEntry) 55 | val future = contextActorRef ? ContextActor.Initialize(contextName, configWithoutFactory, List()) 56 | val Success(result: ContextActor.Initialized) = future.value.get 57 | result must not equal null 58 | contextActor.jobContext.isInstanceOf[SparkContext] mustEqual true 59 | } 60 | 61 | "create context from specified factory" in { 62 | val future = contextActorRef ? ContextActor.Initialize(contextName, fakeContextFactoryConfig, List()) 63 | val Success(result: ContextActor.Initialized) = future.value.get 64 | result must not equal null 65 | contextActor.jobContext.isInstanceOf[FakeContext] mustEqual true 66 | } 67 | } 68 | 69 | val fakeContextFactoryConfig = ConfigFactory.parseString( 70 | """ 71 | |{ 72 | | context.job-context-factory = "context.FakeJobContextFactory", 73 | |} 74 | """.stripMargin).withFallback(config) 75 | } 76 | -------------------------------------------------------------------------------- /spark-job-rest/src/test/scala/server/domain/actors/JarActorTest.scala: -------------------------------------------------------------------------------- 1 | package server.domain.actors 2 | 3 | import java.io.File 4 | import java.util.concurrent.TimeUnit 5 | 6 | import akka.actor.ActorSystem 7 | import akka.pattern.ask 8 | import akka.testkit.TestActorRef 9 | import akka.util.Timeout 10 | import com.typesafe.config.ConfigFactory 11 | import org.junit.runner.RunWith 12 | import org.scalatest.concurrent.ScalaFutures 13 | import org.scalatest.junit.JUnitRunner 14 | import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} 15 | import responses.JarInfo 16 | import server.domain.actors.JarActor._ 17 | import utils.FileUtils 18 | 19 | import scala.util.{Random, Success} 20 | 21 | /** 22 | * Test suite for [[JarActor]]. 23 | */ 24 | @RunWith(classOf[JUnitRunner]) 25 | class JarActorTest extends FunSuite with BeforeAndAfter with ScalaFutures with Matchers { 26 | 27 | val config = ConfigFactory.load() 28 | 29 | implicit val timeout = Timeout(10, TimeUnit.SECONDS) 30 | implicit val system = ActorSystem("localSystem") 31 | 32 | val jarActor = TestActorRef(new JarActor(config)) 33 | val contextName = "demoContext" 34 | 35 | val jarFolder = config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) 36 | 37 | before { 38 | jarActor ! CreateJarFolder 39 | } 40 | 41 | after { 42 | jarActor ! DeleteJarFolder 43 | } 44 | 45 | test("Delete & Create Jar Folder") { 46 | 47 | jarActor ! DeleteJarFolder() 48 | 49 | var future = jarActor ? JarFolderExists() 50 | val Success(resultNotExists: Boolean) = future.value.get 51 | resultNotExists should be(false) 52 | 53 | jarActor ! CreateJarFolder(true) 54 | 55 | future = jarActor ? JarFolderExists() 56 | val Success(resultExists: Boolean) = future.value.get 57 | resultExists should be(true) 58 | 59 | future = jarActor ? GetAllJarsNames() 60 | val Success(result: List[String]) = future.value.get 61 | result should be( Nil ) 62 | 63 | } 64 | 65 | test("Write & Delete Jar") { 66 | 67 | val jarName = Random.nextString(5) + ".jar" 68 | 69 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 70 | val Success(result: Success[JarInfo]) = future.value.get 71 | result shouldBe an [Success[JarInfo]] 72 | 73 | future = jarActor ? GetAllJarsNames() 74 | val Success(resultJars: List[String]) = future.value.get 75 | resultJars should be( List(jarName) ) 76 | 77 | future = jarActor ? DeleteJar(jarName) 78 | val Success(deleteResult: Success[String]) = future.value.get 79 | deleteResult should be( Success("Jar deleted.") ) 80 | 81 | future = jarActor ? GetAllJarsNames() 82 | val Success(emptyJarList: List[String]) = future.value.get 83 | emptyJarList should be( Nil ) 84 | 85 | } 86 | 87 | test("Write 10 Jars") { 88 | for( i <- 1 to 10) { 89 | val jarName = Random.nextString(5) + ".jar" 90 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 91 | } 92 | 93 | val future = jarActor ? GetAllJarsNames() 94 | val Success(emptyJarList: List[String]) = future.value.get 95 | emptyJarList.size should be( 10 ) 96 | } 97 | 98 | test("Get Classpath For Uploaded Jar"){ 99 | val jarName = Random.nextString(5) + ".jar" 100 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 101 | 102 | future = jarActor ? GetJarsPathForClasspath(jarName, contextName) 103 | val Success(result: String) = future.value.get 104 | result should be( config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName) 105 | } 106 | 107 | def createLocalJar(jarName: String) = { 108 | val jarPath = jarFolder + jarName 109 | FileUtils.writeToFile(jarName, jarFolder, getTestJarBytes) 110 | } 111 | 112 | test("Get Classpath For Local Jar"){ 113 | 114 | val jarName = "test.jar" 115 | val jarPath = jarFolder + File.separator + jarName 116 | createLocalJar(jarName) 117 | 118 | val future = jarActor ? GetJarsPathForClasspath(jarPath, contextName) 119 | val Success(result: String) = future.value.get 120 | result should be( jarPath ) 121 | } 122 | 123 | // test("Get Classpath For Hdfs Jar"){ 124 | // TODO: Add test for hdfs jar 125 | // val jarPath = "/home/ubuntu/test.jar" 126 | // 127 | // val future = jarActor ? GetJarsPathForClasspath(jarPath) 128 | // val Success(result: String) = future.value.get 129 | // result should be( jarPath ) 130 | // } 131 | 132 | test("Get Classpath For Multiple Jars"){ 133 | 134 | // TODO: Add hdfs jar to this test 135 | val localJarName = "test.jar" 136 | val jarPath = jarFolder + File.separator + localJarName 137 | createLocalJar(localJarName) 138 | 139 | val jarName = Random.nextString(5) + ".jar" 140 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 141 | 142 | future = jarActor ? GetJarsPathForClasspath(jarPath + "," + jarName, contextName) 143 | val Success(result: String) = future.value.get 144 | result should be( jarPath + JarActor.CLASSPATH_JAR_SEPARATOR + config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName) 145 | } 146 | 147 | test("Get Spark Jars For Uploaded Jar"){ 148 | val jarName = Random.nextString(5) + ".jar" 149 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 150 | 151 | future = jarActor ? GetJarsPathForSpark(jarName) 152 | val Success(result: List[String]) = future.value.get 153 | result should be( List(config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName)) 154 | } 155 | 156 | test("Get Spark Jars For Local Jar"){ 157 | 158 | val localJarName = "test.jar" 159 | val jarPath = jarFolder + File.separator + localJarName 160 | createLocalJar(localJarName) 161 | 162 | val future = jarActor ? GetJarsPathForSpark(jarPath) 163 | val Success(result: List[String]) = future.value.get 164 | result should be( List(jarPath) ) 165 | } 166 | 167 | test("Get Spark Jars For Hdfs Jar"){ 168 | val jarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar" 169 | 170 | val future = jarActor ? GetJarsPathForSpark(jarPath) 171 | val Success(result: List[String]) = future.value.get 172 | result should be( List(jarPath) ) 173 | } 174 | 175 | test("Get Spark Jars For Multiple Jars"){ 176 | 177 | val localJarName = "test.jar" 178 | val jarPath = jarFolder + File.separator + localJarName 179 | createLocalJar(localJarName) 180 | 181 | val hdfsJarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar" 182 | 183 | val jarName = Random.nextString(5) + ".jar" 184 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 185 | 186 | future = jarActor ? GetJarsPathForSpark(jarPath + "," + jarName + "," + hdfsJarPath) 187 | val Success(result: List[String]) = future.value.get 188 | result should be( List(jarPath, config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName, hdfsJarPath)) 189 | } 190 | 191 | test("Get All Jars Paths For Multiple Jars"){ 192 | 193 | val localJarName = "test.jar" 194 | val jarPath = jarFolder + File.separator + localJarName 195 | createLocalJar(localJarName) 196 | 197 | // val hdfsJarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar" 198 | 199 | val jarName = Random.nextString(5) + ".jar" 200 | var future = jarActor ? AddJar(jarName, getTestJarBytes) 201 | 202 | future = jarActor ? GetJarsPathForAll(jarPath + "," + jarName, contextName) 203 | val Success(result: ResultJarsPathForAll) = future.value.get 204 | result.pathForSpark should be( List(jarPath, config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName)) 205 | result.pathForClasspath should be ( jarPath + JarActor.CLASSPATH_JAR_SEPARATOR + jarFolder + File.separator + jarName) 206 | } 207 | 208 | 209 | def getTestJarBytes: Array[Byte] = { 210 | val bytes: Array[Byte] = Array(0x50.toByte, 0x4b.toByte, 0x03.toByte, 0x04.toByte) 211 | 212 | val randomBytes = new Array[Byte](20) 213 | Random.nextBytes(randomBytes) 214 | 215 | bytes ++ randomBytes 216 | } 217 | } 218 | --------------------------------------------------------------------------------