├── .gitignore
├── LICENSE.md
├── Makefile
├── README.md
├── examples
    ├── example-job
    │   ├── LICENSE.md
    │   ├── pom.xml
    │   └── src
    │   │   └── main
    │   │       └── scala
    │   │           └── com.job
    │   │               └── SparkJobImplemented.scala
    └── s3-download-job
    │   ├── pom.xml
    │   └── src
    │       └── main
    │           └── scala
    │               └── com.job
    │                   ├── ExecuteDownload.scala
    │                   ├── S3DownloadJob.scala
    │                   └── S3Utils.scala
├── pom.xml
├── spark-job-rest-api
    ├── pom.xml
    └── src
    │   └── main
    │       └── scala
    │           ├── api
    │               ├── ContextLike.scala
    │               └── SparkJob.scala
    │           └── responses
    │               ├── JobStates.scala
    │               └── ResponseObjects.scala
├── spark-job-rest-client
    ├── pom.xml
    └── src
    │   └── main
    │       └── scala
    │           └── client
    │               └── SparkJobRestClient.scala
├── spark-job-rest-sql
    ├── pom.xml
    └── src
    │   ├── main
    │       └── scala
    │       │   ├── api
    │       │       └── SparkSqlJob.scala
    │       │   └── context
    │       │       ├── HiveContextFactory.scala
    │       │       ├── SQLContextFactory.scala
    │       │       └── SparkSQLContextFactory.scala
    │   └── test
    │       └── scala
    │           └── context
    │               ├── HiveContextFactorySpec.scala
    │               ├── SQLContextFactorySpec.scala
    │               └── SparkSQLContextFactorySpec.scala
└── spark-job-rest
    ├── pom.xml
    └── src
        ├── main
            ├── assembly
            │   └── archive.xml
            ├── resources
            │   ├── application.conf
            │   ├── context_start.sh
            │   ├── log4j.properties
            │   ├── settings.sh
            │   └── webapp
            │   │   ├── assets
            │   │       ├── css
            │   │       │   └── style.css
            │   │       ├── fonts
            │   │       │   └── bootstrap
            │   │       │   │   ├── glyphicons-halflings-regular.eot
            │   │       │   │   ├── glyphicons-halflings-regular.svg
            │   │       │   │   ├── glyphicons-halflings-regular.ttf
            │   │       │   │   ├── glyphicons-halflings-regular.woff
            │   │       │   │   └── glyphicons-halflings-regular.woff2
            │   │       └── img
            │   │       │   ├── halftone.png
            │   │       │   ├── loading-sm.gif
            │   │       │   └── loading.gif
            │   │   ├── index.html
            │   │   └── js
            │   │       ├── behaviour.js
            │   │       ├── bootstrap-notify.min.js
            │   │       ├── bootstrap.min.js
            │   │       ├── fileinput.min.js
            │   │       └── spin.min.js
            ├── scala
            │   ├── context
            │   │   ├── JobContextFactory.scala
            │   │   └── SparkContextFactory.scala
            │   ├── logging
            │   │   └── LoggingOutputStream.scala
            │   ├── server
            │   │   ├── CORSDirectives.scala
            │   │   ├── Controller.scala
            │   │   ├── Main.scala
            │   │   ├── MainContext.scala
            │   │   └── domain
            │   │   │   └── actors
            │   │   │       ├── ContextActor.scala
            │   │   │       ├── ContextManagerActor.scala
            │   │   │       ├── ContextProcessActor.scala
            │   │   │       ├── JarActor.scala
            │   │   │       ├── JobActor.scala
            │   │   │       ├── ReTry.scala
            │   │   │       ├── Supervisor.scala
            │   │   │       └── package.scala
            │   └── utils
            │   │   ├── ActorUtils.scala
            │   │   ├── ContextUtils.scala
            │   │   ├── FileUtils.scala
            │   │   ├── HdfsUtils.scala
            │   │   └── JarUtils.scala
            └── scripts
            │   ├── deploy.sh
            │   ├── restart_server.sh
            │   ├── start_server.sh
            │   └── stop_server.sh
        └── test
            ├── resources
                └── application.conf
            └── scala
                ├── context
                    ├── JobContextFactorySpec.scala
                    └── SparkContextFactorySpec.scala
                ├── integration
                    └── IntegrationTests.scala
                └── server
                    └── domain
                        └── actors
                            ├── ContextActorSpec.scala
                            └── JarActorTest.scala


/.gitignore:
--------------------------------------------------------------------------------
 1 | .cache
 2 | syntax: glob
 3 | *.log
 4 | *.log*
 5 | RemoteSystemsTempFiles
 6 | *.metadata
 7 | target/*
 8 | .DS_Store
 9 | *.settings
10 | *.classpath
11 | *.idea
12 | *.iml
13 | *.orig
14 | *.txt~
15 | *.sh~
16 | *.scala~
17 | *.md~
18 | *.jar
19 | .metadata
20 | target
21 | .DS_Store
22 | .settings
23 | .classpath
24 | .project
25 | classpath
26 | .idea
27 | .iml
28 | .orig
29 | .java.orig
30 | dependency-reduced-pom.xml
31 | /deploy
32 | **/resources/deploy.conf
33 | **/resources/deploy-settings.sh
34 | # TODO: Implement basic fairscheduler config
35 | **/resources/fairscheduler.xml
36 | metastore_db
37 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 | 
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 | 
5 |    http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CURRENT_DIR := $(shell pwd)
 2 | 
 3 | #
 4 | # Deployment configuration
 5 | #
 6 | # Default path to the tarball
 7 | PACKAGE_PATH := $(CURRENT_DIR)/spark-job-rest/target/spark-job-rest.tar.gz
 8 | # Override this to set deploy path
 9 | SJR_DEPLOY_PATH ?= $(CURRENT_DIR)/deploy
10 | 
11 | #
12 | # We strongly suggest not to keep remote deployment configuration variables out of Git control!
13 | #
14 | # Overrides SJR_DEPLOY_PATH in remote deploy mode if not empty
15 | SJR_REMOTE_DEPLOY_PATH ?=
16 | # Set this the [user]@hostname of the machine you want to deploy to
17 | SJR_DEPLOY_HOST ?=
18 | # Optionally set path to SSH key here
19 | SJR_DEPLOY_KEY ?=
20 | 
21 | #
22 | # Remote deployment parameters
23 | #
24 | REMOTE_PARAMS := SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \
25 |                  SJR_DEPLOY_HOST=$(SJR_DEPLOY_HOST) \
26 |                  SJR_DEPLOY_KEY=$(SJR_DEPLOY_KEY) \
27 |                  SJR_PACKAGE_PATH=$(PACKAGE_PATH) \
28 |                  SJR_IS_REMOTE_DEPLOY="true" \
29 |                  SJR_REMOTE_DEPLOY_PATH=$(SJR_REMOTE_DEPLOY_PATH)
30 | 
31 | all: build deploy
32 | 
33 | build:
34 | 	@mvn clean install
35 | 
36 | deploy:
37 | 	@SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \
38 | 	$(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh deploy
39 | 
40 | remote-deploy:
41 | 	@$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh deploy
42 | 
43 | remote-start:
44 | 	@$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh start
45 | 
46 | remote-stop:
47 | 	@$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh stop
48 | 
49 | remote-log:
50 | 	@$(REMOTE_PARAMS) $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh log
51 | 
52 | start:
53 | 	@SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \
54 |     $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh start
55 | 
56 | stop:
57 | 	@SJR_DEPLOY_PATH=$(SJR_DEPLOY_PATH) \
58 |     $(CURRENT_DIR)/spark-job-rest/src/main/scripts/deploy.sh stop
59 | 
60 | .PHONY: all build deploy


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Atigeo/spark-job-rest?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
  2 | 
  3 | ## Features:
  4 | 
  5 | **Supports multiple spark contexts created from the same server**
  6 | 
  7 | The main problem this project solves is the inability to run multiple Spark contexts from the same JVM. This is a bug in Spark core that was also present in Ooyala's Spark Job Server, from which this project is inspired. The project launches a new process for each Spark context/application, with its own driver memory setting and its own driver log. Each driver JVM is created with its own Spark UI port, sent back to the api caller. Inter-process communication is achieved with akka actors, and each process is shut down when a Spark context/application is deleted.
  8 | 
  9 | ## Version compatibility
 10 | 
 11 | SJR Version   | Spark Version
 12 | ------------- | -------------
 13 | 0.3.0         |  1.1.0 
 14 | 0.3.1         |  1.3.1 
 15 | 
 16 | ## Building Spark-Job-Rest (SJR)
 17 | 
 18 | The project is build with Maven3 and Java7.
 19 | ```
 20 | make build
 21 | ```
 22 | SJR can now be deployed from spark-job-rest/spark-job-rest/target/spark-job-rest.tar.gz
 23 | 
 24 | If your build fails with this error:
 25 | ```
 26 | [ERROR] spark-job-rest/src/main/scala/server/domain/actors/ContextManagerActor.scala:171: error: value redirectOutput is not a member of ProcessBuilder
 27 | ```
 28 | This happens because Maven uses Java6. You can run mvn -version in order to check the Java version that Maven uses.
 29 | ```sh
 30 | $ mvn -version
 31 | Apache Maven 3.2.5
 32 | Java version: 1.6.0_65
 33 | ```
 34 | If Maven uses Java6 you need to change it to Java7. This can be done by adding the JAVA_HOME export in your ~/.mavenrc file:
 35 | ```sh
 36 | # OSX:
 37 | export JAVA_HOME=/Library/Java/JavaVirtualMachines/{jdk-version}/Contents/Home
 38 | ```
 39 | ```sh
 40 | # Ubuntu:
 41 | export JAVA_HOME=/usr/lib/jvm/{jdk-version}
 42 | ```
 43 | 
 44 | If running from IDE fails with:
 45 | ```
 46 | Exception in thread "main" java.lang.NoClassDefFoundError: akka/actor/Props
 47 | ```
 48 | This happens because the spark dependency has the provided scope. In order to run from IDE you can remove the provided scope for the spark dependency(inside pom.xml) or you can add the spark assembly jar to the running classpath.
 49 | 
 50 | ## Deploying Spark-Job-Rest
 51 | 
 52 | You can deploy Spark-Job-Rest locally to `deploy` directory inside the project by:
 53 | ```sh
 54 | make deploy
 55 | ```
 56 | Optionally you can specifying install directory in `$SJR_DEPLOY_PATH` environment variable:
 57 | ```sh
 58 | SJR_DEPLOY_PATH=/opt/spark-job-rest make deploy
 59 | ```
 60 | 
 61 | Before running JSR ensure that [working environment](#configure-spark-environment) is configured.
 62 | 
 63 | In order to have a proper installation you should set `$SPARK_HOME` to your Apache Spark distribution and `$SPARK_CONF_HOME` to directory which consists `spark-env.sh` (usually `$SPARK_HOME/conf` or `$SPARK_HOME/libexec/conf`).
 64 | You can do it in your bash profile (`~/.bash_profile` or `~/.bashrc`) by adding the following lines:
 65 | ```sh
 66 | export SPARK_HOME=<Path to Apache Spark>
 67 | export SPARK_CONF_HOME=$SPARK_HOME/libexec/conf  # or $SPARK_HOME/conf depending on your distribution
 68 | ```
 69 | After that either run in the new terminal session or source your bash profile.
 70 | 
 71 | ### Deploying to remote host
 72 | 
 73 | You can deploy Spark-Job-REST to remote host via:
 74 | ```sh
 75 | make remote-deploy
 76 | ```
 77 | 
 78 | For remote deployment you should set following environment variables:
 79 | ```sh
 80 | # Mandatory connection string
 81 | export SJR_DEPLOY_HOST=<user@hostname for remote machine>
 82 | # Optional parameters
 83 | export SJR_DEPLOY_KEY=<optional path to your SSH key>
 84 | export SJR_REMOTE_DEPLOY_PATH=<where you want to install Spark-Job-REST on remote host>
 85 | ```
 86 | If `SJR_REMOTE_DEPLOY_PATH` is not set then `SJR_DEPLOY_PATH` will be used during remote deploy.
 87 |  
 88 | ## Starting Spark-Job-Rest
 89 | 
 90 | To start/stop SJR use
 91 | ```sh
 92 | cd $SJR_DEPLOY_PATH
 93 | bin/start_server.sh
 94 | bin/stop_server.sh
 95 | ```
 96 | 
 97 | or if it deployed to default destination just
 98 | ```sh
 99 | make start
100 | make stop
101 | ```
102 | 
103 | ## Configure Spark-job-rest
104 | 
105 | Spark-Job-REST default configuration is stored in `resources/application.conf` (here and after under `spark-job-rest/src/main/`).
106 | To add or override settings create `resources/deploy.conf` (ignored by VCS).
107 | 
108 | ### Spark context settings
109 | Configure the default spark properties for context creation as they are normal Spark configuration options
110 | ```
111 | spark.executor.memory=2g
112 | spark.master="local"
113 | spark.path="/Users/user/spark-1.1.0"
114 | ........
115 | ```
116 | To set how much memory should be allocated for driver use `driver.xmxMemory` (default is `1g`).
117 | 
118 | ### Application settings
119 | 
120 | Configure settings like web server port and akka system ports
121 | ```
122 | appConf{
123 |   web.services.port=8097
124 |   spark.ui.first.port = 16000
125 |   ........
126 | }
127 | ```
128 | 
129 | ### Configure folders & class paths
130 | 
131 | You may configure folders by setting environment variables and by creating and editing `resources/deploy-settings.sh` (under `spark-job-rest/src/main/`):
132 | 
133 | ```sh
134 | export SJR_LOG_DIR=<path to logs directory>
135 | export SJR_JAR_PATH=<path to jar files storage>
136 | export SJR_CONTEXTS_BASE_DIR=<path to the rood directory for contexts process directories>
137 | export JSR_EXTRA_CLASSPATH=<additional classes required for your application to run>
138 | ```
139 | 
140 | ### Java & GC options
141 | 
142 | You can extend or override Java and GC options in `resources/deploy-settings.sh`:
143 | 
144 | ```sh
145 | JAVA_OPTS="${JAVA_OPTS}
146 |            ${YOUR_EXTRA_JAVA_OPTIONS}"
147 | GC_OPTS="${GC_OPTS}
148 |          ${YOUR_EXTRA_GC_OPTIONS}"           
149 | ```
150 | 
151 | ## Custom contexts
152 | 
153 | Spark-Job-REST supports custom job context factories defined in `context.job-context-factory` property of config.
154 | By default SJR uses `context.SparkContextFactory` which creates one Spark Context per JVM.
155 | 
156 | ### SQL contexts
157 | 
158 | To run jobs with provided SQL contexts include `spark-job-rest-sql` in your project, set context factory to one of SQLContext factories provided by this library and inherit your job from `api.SparkSqlJob`.
159 | Currently supported contexts:
160 | 
161 | 1. `context.SparkSqlContextFactory` creates simple job SQLContext.
162 | 2. `context.HiveContextFactory` creates Hive SQL context.
163 | 
164 | ## Configure Spark environment
165 | 
166 | In order to have a proper installation you should set `$SPARK_HOME` to your Apache Spark distribution and `$SPARK_CONF_HOME` to directory which consists `spark-env.sh` (usually `$SPARK_HOME/conf` or `$SPARK_HOME/libexec/conf`).
167 | You can do it in your bash profile (`~/.bash_profile` or `~/.bashrc`) by adding the following lines:
168 | ```sh
169 | export SPARK_HOME=<Path to Apache Spark>
170 | export SPARK_CONF_HOME=$SPARK_HOME/libexec/conf  # or $SPARK_HOME/conf depending on your distribution
171 | ```
172 | After that either run in the new terminal session or source your bash profile.
173 | 
174 | The SJR can be run from outside the Spark cluster, but you need to at least copy the deployment folder from one of the slaves or master nodes.
175 | 
176 | ## Run Spark-job-rest
177 | 
178 | After editing all the configuration files SJR can be run by executing the script `start-server.sh`
179 | 
180 | The UI can be accessed at `<server address>:<appConf.web.services.port>`.
181 | 
182 | ## API
183 | 
184 | **Contexts**
185 | 
186 | - POST /contexts/{contextName}  -  Create Context
187 | 
188 |  * Body:  Raw entity with key-value pairs. 
189 |  * jars key is required and it should be in the form of a comma separated list of jar paths. These jars will be added at Spark context creation time to the class path of the newly created context's JVM process. There are 3 types of jar paths supported:
190 |     * Absolute path on the server side : /home/ubuntu/example.jar
191 |     * Name of the jar that was uploaded to the server : example.jar
192 |     * Hdfs path : hdfs://devbox.local:8020/user/test/example.jar
193 |   
194 |   ``` 
195 |  # Body example:
196 |  jars="/home/ubuntu/example.jar,example.jar,hdfs://devbox.local:8020/user/test/example.jar”
197 |  spark.executor.memory=2g
198 |  driver.xmxMemory = 1g
199 |   ```
200 | 
201 | - GET /contexts/{contextName}  -  returns Context JSON object | No such context.
202 | 
203 | - DELETE /contexts/{contextName}  -  Delete Context
204 | 
205 | **Jobs**
206 | 
207 | - POST /jobs?runningClass={runningClass}&context={contextName}  - Job Submission 
208 | 
209 |   * Body:  Raw entity with key-value pairs. Here you can set any configuration properties that will be passed to the config parameter of the validate and run methods of the provided jar (see the SparkJob definition below)
210 | 
211 | - GET /jobs/{jobId}?contextName={contextName} - Gets the result or state of a specific job
212 | 
213 | - GET /jobs - Gets the states/results of all jobs from all running contexts 
214 | 
215 | **Jars**
216 | 
217 | - POST /jars/{jarName}  - Upload jar
218 |   * Body: Jar Bytes
219 |   
220 | - POST /jars  - Upload jar
221 |   * Body: MultiPart Form
222 | 
223 | - GET /jars - Gets all the uploaded jars
224 | 
225 | - DELETE /jars/{jarName} - Delete jar
226 | 
227 | ## HTTP Client
228 | 
229 | All the API methods can be called from Scala/Java with the help of an HTTP Client.
230 | 
231 | Maven Spark-Job-Rest-Client dependency:
232 | ```xml
233 | <dependency>
234 |     <groupId>com.xpatterns</groupId>
235 |     <artifactId>spark-job-rest-client</artifactId>
236 |     <version>0.3.1</version>
237 | </dependency>
238 | ```
239 | 
240 | ## Create Spark Job Project
241 | 
242 | Add maven Spark-Job-Rest-Api dependency:
243 | ```xml
244 | <dependency>
245 |     <groupId>com.xpatterns</groupId>
246 |     <artifactId>spark-job-rest-api</artifactId>
247 |     <version>0.3.1</version>
248 | </dependency>
249 | ```
250 | 
251 | To create a job that can be submitted through the server, the class must implement the SparkJob trait.
252 | 
253 | ```scala
254 | import com.typesafe.config.Config
255 | import org.apache.spark.SparkContext
256 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob}
257 | 
258 | class Example extends SparkJob {
259 |     override def runJob(sc:SparkContext, jobConfig: Config): Any = { ... }
260 |     override def validate(sc:SparkContext, config: Config): SparkJobValidation = { ... }
261 | }
262 | ```
263 | 
264 | - runJob method contains the implementation of the Job. SparkContext and Config objects are provided through parameters.
265 | - validate method allows for an initial validation. In order to run the job return SparkJobValid(), otherwise return SparkJobInvalid(message).
266 | 
267 | ## Example
268 | 
269 | An example for this project can be found here: ```spark-job-rest/examples/example-job```. In order to package it, run 
270 | ```sh
271 | mvn clean install
272 | ```
273 | 
274 | **Upload JAR**
275 | ```sh
276 | # In the project root directory
277 | curl --data-binary @spark-job-rest/examples/example-job/target/example-job.jar 'localhost:8097/jars/example-job.jar'
278 | 
279 | {
280 |   "contextName": "test-context",
281 |   "sparkUiPort": "16003"
282 | }
283 | ```
284 | 
285 | **Create a context**
286 | ```sh
287 | curl -X POST -d "jars=example-job.jar" 'localhost:8097/contexts/test-context'
288 | 
289 | {
290 |   "contextName": "test-context",
291 |   "sparkUiPort": "16003"
292 | }
293 | ```
294 | 
295 | **Check if context exists**
296 | 
297 | ```sh
298 | curl 'localhost:8097/contexts/test-context'
299 | 
300 | {
301 |   "contextName": "test-context",
302 |   "sparkUiPort": "16003"
303 | }
304 | ```
305 | 
306 | **Run job** - The example job creates an RDD from a Range(0,input) and applies count on it.
307 | 
308 | ```sh
309 | curl -X POST -d "input=10000" 'localhost:8097/jobs?runningClass=com.job.SparkJobImplemented&contextName=test-context'
310 | 
311 | {
312 |   "jobId": "2bd438a2-ac1e-401a-b767-5fa044b2bd69",
313 |   "contextName": "test-context",
314 |   "status": "Running",
315 |   "result": "",
316 |   "startTime": 1430287260144
317 | }
318 | ```
319 | 
320 | ```2bd438a2-ac1e-401a-b767-5fa044b2bd69``` represents the jobId. This id can be used to query for the job status/results.
321 | 
322 | **Query for results**
323 | 
324 | ```sh
325 | curl 'localhost:8097/jobs/2bd438a2-ac1e-401a-b767-5fa044b2bd69?contextName=test-context'
326 | 
327 | {
328 |   "jobId": "2bd438a2-ac1e-401a-b767-5fa044b2bd69",
329 |   "contextName": "test-context",
330 |   "status": "Finished",
331 |   "result": "10000",
332 |   "startTime": 1430287261108
333 | }
334 | ```
335 | 
336 | **Delete context**
337 | 
338 | ```sh
339 | curl -X DELETE 'localhost:8097/contexts/test-context'
340 | 
341 | {
342 |   "message": "Context deleted."
343 | }
344 | ```
345 | 
346 | **HTTP Client Example**
347 | 
348 | ```scala
349 | object Example extends App {
350 |   implicit val system = ActorSystem()
351 |   val contextName = "testContext"
352 | 
353 |   try {
354 |     val sjrc = new SparkJobRestClient("http://localhost:8097")
355 | 
356 |     val context = sjrc.createContext(contextName, Map("jars" -> "/Users/raduchilom/projects/spark-job-rest/examples/example-job/target/example-job.jar"))
357 |     println(context)
358 | 
359 |     val job = sjrc.runJob("com.job.SparkJobImplemented", contextName, Map("input" -> "10"))
360 |     println(job)
361 | 
362 |     var jobFinal = sjrc.getJob(job.jobId, job.contextName)
363 |     while (jobFinal.status.equals(JobStates.RUNNING.toString())) {
364 |       Thread.sleep(1000)
365 |       jobFinal = sjrc.getJob(job.jobId, job.contextName)
366 |     }
367 |     println(jobFinal)
368 | 
369 |     sjrc.deleteContext(contextName)
370 |   } catch {
371 |     case e:Exception => {
372 |       e.printStackTrace()
373 |     }
374 |   }
375 | 
376 |   system.shutdown()
377 | }
378 | ```
379 | Running this would produce the output:
380 | 
381 | ```
382 | Context(testContext,16002)
383 | Job(ab63c19f-bbb4-461e-8c6f-f0a35f73a943,testContext,Running,,1430291077689)
384 | Job(ab63c19f-bbb4-461e-8c6f-f0a35f73a943,testContext,Finished,10,1430291078694)
385 | ```
386 | 
387 | 
388 | ## UI
389 | 
390 | The UI was added in a compiled and minified state. For sources and changes please refer to [spark-job-rest-ui](https://github.com/marianbanita82/spark-job-rest-ui) project.
391 | 


--------------------------------------------------------------------------------
/examples/example-job/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2014 Atigeo, LLC.
2 | 
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 | 
5 |    http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.


--------------------------------------------------------------------------------
/examples/example-job/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>example-job</artifactId>
  7 | 	<version>1.0.0</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>example-job</name>
 11 | 	<url>http://maven.apache.org</url>
 12 | 
 13 | 	<repositories>
 14 | 		<repository>
 15 | 			<id>mvnrepository</id>
 16 | 			<url>http://repo1.maven.org/maven2</url>
 17 | 		</repository>
 18 | 		<repository>
 19 | 			<id>cloudera-repo-releases</id>
 20 | 			<url>https://repository.cloudera.com/artifactory/repo/</url>
 21 | 		</repository>
 22 | 		<repository>
 23 | 			<id>Akka repository</id>
 24 | 			<url>http://repo.akka.io/releases</url>
 25 | 		</repository>
 26 | 	</repositories>
 27 | 
 28 | 	<properties>
 29 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 30 | 		<scala.version>2.10.3</scala.version>
 31 | 		<scala.version.alias>2.10</scala.version.alias>
 32 | 	</properties>
 33 | 
 34 | 	<dependencies>
 35 |         <dependency>
 36 |             <groupId>com.xpatterns</groupId>
 37 |             <artifactId>spark-job-rest-api</artifactId>
 38 |             <version>0.3.2</version>
 39 |         </dependency>
 40 | 
 41 | 		<dependency>
 42 |             <groupId>org.apache.spark</groupId>
 43 |             <artifactId>spark-core_2.10</artifactId>
 44 |             <version>1.3.1</version>
 45 |             <scope>provided</scope>
 46 |         </dependency>
 47 | 	</dependencies>
 48 | 
 49 | 	<build>
 50 | 		<resources>
 51 | 			<resource>
 52 | 				<directory>src/main/resources</directory>
 53 | 				<excludes>
 54 | 					<exclude>*</exclude>
 55 | 				</excludes>
 56 | 				<filtering>false</filtering>
 57 | 			</resource>
 58 | 		</resources>
 59 | 
 60 | 		<finalName>${project.artifactId}</finalName>
 61 | 
 62 | 		<pluginManagement>
 63 | 			<plugins>
 64 | 				<plugin>
 65 | 					<groupId>org.scala-tools</groupId>
 66 | 					<artifactId>maven-scala-plugin</artifactId>
 67 | 					<version>2.9.1</version>
 68 | 				</plugin>
 69 | 				<plugin>
 70 | 					<groupId>org.apache.maven.plugins</groupId>
 71 | 					<artifactId>maven-compiler-plugin</artifactId>
 72 | 					<version>2.0.2</version>
 73 | 				</plugin>
 74 | 			</plugins>
 75 | 		</pluginManagement>
 76 | 
 77 | 		<plugins>
 78 | 			<plugin>
 79 | 				<groupId>org.apache.maven.plugins</groupId>
 80 | 				<artifactId>maven-compiler-plugin</artifactId>
 81 | 				<!-- <version>3.1</version> -->
 82 | 				<configuration>
 83 | 					<source>1.7</source>
 84 | 					<target>1.7</target>
 85 | 				</configuration>
 86 | 			</plugin>
 87 | 
 88 | 			<plugin>
 89 | 				<groupId>org.scala-tools</groupId>
 90 | 				<artifactId>maven-scala-plugin</artifactId>
 91 | 				<executions>
 92 | 					<execution>
 93 | 						<id>scala-compile-first</id>
 94 | 						<phase>process-resources</phase>
 95 | 						<goals>
 96 | 							<goal>add-source</goal>
 97 | 							<goal>compile</goal>
 98 | 						</goals>
 99 | 					</execution>
100 | 					<execution>
101 | 						<id>scala-test-compile</id>
102 | 						<phase>process-test-resources</phase>
103 | 						<goals>
104 | 							<goal>testCompile</goal>
105 | 						</goals>
106 | 					</execution>
107 | 				</executions>
108 | 			</plugin>
109 | 
110 | 		</plugins>
111 | 
112 | 	</build>
113 | 
114 | </project>
115 | 


--------------------------------------------------------------------------------
/examples/example-job/src/main/scala/com.job/SparkJobImplemented.scala:
--------------------------------------------------------------------------------
 1 | package com.job
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.spark.SparkContext
 5 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob}
 6 | 
 7 | /**
 8 |  * Created by raduc on 03/11/14.
 9 |  */
10 | class SparkJobImplemented extends SparkJob
11 | {
12 |   override def runJob(sc: SparkContext, jobConfig: Config): Any = {
13 | 
14 |     val nr = jobConfig.getInt("input")
15 | 
16 |     val list = Range(0,nr)
17 |     val rdd = sc.parallelize(list)
18 |     rdd.count()
19 | 
20 |   }
21 | 
22 |   override def validate(sc: SparkContext, config: Config): SparkJobValidation = {
23 |     if(config.hasPath("input")) SparkJobValid() else SparkJobInvalid("The input parameter is missing.")
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/examples/s3-download-job/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>s3-download-job</artifactId>
  7 | 	<version>1.0.0</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>s3-download-job</name>
 11 | 	<url>http://maven.apache.org</url>
 12 | 
 13 | 	<repositories>
 14 | 		<repository>
 15 | 			<id>mvnrepository</id>
 16 | 			<url>http://repo1.maven.org/maven2</url>
 17 | 		</repository>
 18 | 		<repository>
 19 | 			<id>cloudera-repo-releases</id>
 20 | 			<url>https://repository.cloudera.com/artifactory/repo/</url>
 21 | 		</repository>
 22 | 		<repository>
 23 | 			<id>Akka repository</id>
 24 | 			<url>http://repo.akka.io/releases</url>
 25 | 		</repository>
 26 | 
 27 | 	</repositories>
 28 | 	<properties>
 29 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 30 | 		<scala.version>2.10.3</scala.version>
 31 | 		<scala.version.alias>2.10</scala.version.alias>
 32 | 	</properties>
 33 | 
 34 | 	<dependencies>
 35 |         <dependency>
 36 |             <groupId>org.apache.httpcomponents</groupId>
 37 |             <artifactId>httpclient</artifactId>
 38 |             <version>4.3.4</version>
 39 |         </dependency>
 40 | 		
 41 |         <dependency>
 42 |             <groupId>com.xpatterns</groupId>
 43 |             <artifactId>spark-job-rest-api</artifactId>
 44 |             <version>0.3.2</version>
 45 |         </dependency>
 46 | 		
 47 |         <dependency>
 48 |             <groupId>com.xpatterns</groupId>
 49 |             <artifactId>spark-job-rest-client</artifactId>
 50 |             <version>0.3.1</version>
 51 |         </dependency>
 52 | 
 53 |         <dependency>
 54 |             <groupId>com.amazonaws</groupId>
 55 |             <artifactId>aws-java-sdk</artifactId>
 56 |             <version>1.8.3</version>
 57 |         </dependency>
 58 | 		
 59 |         <dependency>
 60 |             <groupId>org.apache.spark</groupId>
 61 |             <artifactId>spark-core_2.10</artifactId>
 62 |             <version>1.3.1</version>
 63 |             <scope>provided</scope>
 64 |         </dependency>
 65 |     </dependencies>
 66 | 
 67 | 
 68 | 	<build>
 69 | 		<resources>
 70 | 			<resource>
 71 | 				<directory>src/main/resources</directory>
 72 | 				<excludes>
 73 | 					<exclude>*</exclude>
 74 | 				</excludes>
 75 | 				<filtering>false</filtering>
 76 | 			</resource>
 77 | 		</resources>
 78 | 		<finalName>${project.artifactId}</finalName>
 79 | 		<pluginManagement>
 80 | 			<plugins>
 81 | 				<plugin>
 82 | 					<groupId>org.scala-tools</groupId>
 83 | 					<artifactId>maven-scala-plugin</artifactId>
 84 | 					<version>2.9.1</version>
 85 | 				</plugin>
 86 | 				<plugin>
 87 | 					<groupId>org.apache.maven.plugins</groupId>
 88 | 					<artifactId>maven-compiler-plugin</artifactId>
 89 | 					<version>2.0.2</version>
 90 | 				</plugin>
 91 | 			</plugins>
 92 | 		</pluginManagement>
 93 | 		<plugins>
 94 | 			<plugin>
 95 | 				<groupId>org.apache.maven.plugins</groupId>
 96 | 				<artifactId>maven-compiler-plugin</artifactId>
 97 | 				<!-- <version>3.1</version> -->
 98 | 				<configuration>
 99 | 					<source>1.7</source>
100 | 					<target>1.7</target>
101 | 				</configuration>
102 | 			</plugin>
103 | 			<plugin>
104 | 				<groupId>org.scala-tools</groupId>
105 | 				<artifactId>maven-scala-plugin</artifactId>
106 | 				<executions>
107 | 					<execution>
108 | 						<id>scala-compile-first</id>
109 | 						<phase>process-resources</phase>
110 | 						<goals>
111 | 							<goal>add-source</goal>
112 | 							<goal>compile</goal>
113 | 						</goals>
114 | 					</execution>
115 | 					<execution>
116 | 						<id>scala-test-compile</id>
117 | 						<phase>process-test-resources</phase>
118 | 						<goals>
119 | 							<goal>testCompile</goal>
120 | 						</goals>
121 | 					</execution>
122 | 				</executions>
123 | 			</plugin>
124 | 
125 |             <plugin>
126 |                 <groupId>org.apache.maven.plugins</groupId>
127 |                 <artifactId>maven-shade-plugin</artifactId>
128 |                 <version>2.2</version>
129 | 
130 |                 <executions>
131 |                     <execution>
132 |                         <phase>package</phase>
133 |                         <goals>
134 |                             <goal>shade</goal>
135 |                         </goals>
136 |                         <configuration>
137 |                             <transformers>
138 |                                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
139 |                                     <resource>reference.conf</resource>
140 |                                 </transformer>
141 |                             </transformers>
142 |                             <filters>
143 |                                 <filter>
144 |                                     <artifact>*:*</artifact>
145 |                                     <excludes>
146 |                                         <exclude>META-INF/*.SF</exclude>
147 |                                         <exclude>META-INF/*.DSA</exclude>
148 |                                         <exclude>META-INF/*.RSA</exclude>
149 |                                     </excludes>
150 |                                 </filter>
151 |                             </filters>
152 |                         </configuration>
153 | 
154 |                     </execution>
155 |                 </executions>
156 |             </plugin>
157 | 
158 | 		</plugins>
159 | 
160 | 
161 | 	</build>
162 | 
163 | </project>
164 | 


--------------------------------------------------------------------------------
/examples/s3-download-job/src/main/scala/com.job/ExecuteDownload.scala:
--------------------------------------------------------------------------------
 1 | package com.job
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import client.SparkJobRestClient
 5 | import com.typesafe.config.{ConfigFactory, Config}
 6 | import org.apache.spark.{SparkContext, SparkConf}
 7 | import responses.JobStates
 8 | 
 9 | /**
10 |  * Created by raduchilom on 5/4/15.
11 |  */
12 | 
13 | object ExecuteDownload extends App {
14 |   implicit val system = ActorSystem()
15 |   val contextName = "downloadDataContext"
16 | 
17 |   try {
18 |     val sjrc = new SparkJobRestClient("http://localhost:8097")
19 | 
20 |     val context = sjrc.createContext(contextName, Map("jars" -> "/home/ubuntu/s3-download-job.jar"))
21 |     println(context)
22 | 
23 |     val bucketName="public-financial-transactions"
24 |     val numPartitions=10
25 |     val outputFolder="\"tachyon://localhost:19998/user/ubuntu/downloaded_data\""
26 | 
27 |     val job = sjrc.runJob("com.job.S3DownloadJob", contextName,
28 |       Map("s3.bucket" -> bucketName,
29 |         "num.partitions" -> String.valueOf(numPartitions),
30 |         "fs.output" -> outputFolder
31 |       ))
32 |     println(job)
33 | 
34 | 
35 |     var jobFinal = sjrc.getJob(job.jobId, job.contextName)
36 |     while (jobFinal.status.equals(JobStates.RUNNING.toString())) {
37 |       Thread.sleep(1000)
38 |       jobFinal = sjrc.getJob(job.jobId, job.contextName)
39 |     }
40 |     println(jobFinal)
41 | 
42 |     sjrc.deleteContext(contextName)
43 |   } catch {
44 |     case e:Exception => {
45 |       e.printStackTrace()
46 |     }
47 |   }
48 | 
49 |   system.shutdown()
50 | }


--------------------------------------------------------------------------------
/examples/s3-download-job/src/main/scala/com.job/S3DownloadJob.scala:
--------------------------------------------------------------------------------
 1 | package com.job
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.spark.SparkContext
 5 | import api.{SparkJobInvalid, SparkJobValid, SparkJobValidation, SparkJob}
 6 | import org.slf4j.LoggerFactory
 7 | 
 8 | import scala.collection.mutable.ListBuffer
 9 | import scala.util.{Success, Failure, Try}
10 | 
11 | /**
12 |  * Created by raduc on 03/11/14.
13 |  */
14 | class S3DownloadJob extends SparkJob {
15 | 
16 |   val log = LoggerFactory.getLogger(getClass)
17 | 
18 |   override def runJob(sc: SparkContext, jobConfig: Config): Any = {
19 | 
20 |     val bucketName = jobConfig.getString("s3.bucket")
21 | 
22 |     val numPartitions = jobConfig.getInt("num.partitions")
23 |     val outputFolder = jobConfig.getString("fs.output")
24 | 
25 | //    slow for a large number of files
26 | //    val fileList = S3Utils.getFiles(bucketName)
27 | //    val files = sc.parallelize(fileList, numPartitions)
28 | 
29 |     val filesRdd = S3Utils.getFilesDistributed(bucketName, sc, numPartitions)
30 | //    known bug in spark 1.1.0
31 | //    filesRdd.partitions
32 |     val files = filesRdd.repartition(numPartitions)
33 | 
34 |     log.info(s"Number of partitions: ${files.partitions.length}")
35 | 
36 |       val results = files.mapPartitions{ iterator =>
37 | 
38 |       val listBuffer = ListBuffer[(Try[Any], String)]()
39 |       val s3Client = S3Utils.getS3Client()
40 | 
41 |       while(iterator.hasNext) {
42 |         val tuple = iterator.next()
43 |         listBuffer += S3Utils.downloadFile(bucketName, tuple._2, outputFolder, s3Client)
44 |       }
45 | 
46 |       listBuffer.iterator
47 |     }
48 | 
49 |     log.info("Listing the file with errors:")
50 |     val errorFiles = results.filter { x =>
51 |       x._1 match {
52 |         case Success(v) => false
53 |         case Failure(e) => true
54 |       }
55 |     }.collect()
56 |     log.warn(s"There were ${errorFiles.size} files with error")
57 |     errorFiles.foreach(t => log.error("", t._1.get))
58 | 
59 |     if(errorFiles.size == 0){
60 |       s"Number of failed files: ${errorFiles.size}"
61 |     } else {
62 |       errorFiles
63 |     }
64 | 
65 |   }
66 | 
67 |   override def validate(sc: SparkContext, config: Config): SparkJobValidation = {
68 |       if(!config.hasPath("s3.bucket")) return SparkJobInvalid("The \"s3.bucket\" parameter is missing.")
69 |       if(!config.hasPath("fs.output")) return SparkJobInvalid("The \"fs.output\" parameter is missing.")
70 |       if(!config.hasPath("num.partitions")) return SparkJobInvalid("The \"num.partitions\" parameter is missing.")
71 | 
72 |     SparkJobValid()
73 |   }
74 | 
75 | }


--------------------------------------------------------------------------------
/examples/s3-download-job/src/main/scala/com.job/S3Utils.scala:
--------------------------------------------------------------------------------
  1 | package com.job
  2 | 
  3 | import com.amazonaws.auth.profile.ProfileCredentialsProvider
  4 | import com.amazonaws.regions.{Regions, Region}
  5 | import com.amazonaws.services.s3.AmazonS3Client
  6 | import com.amazonaws.services.s3.model.{S3ObjectInputStream, GetObjectRequest, ObjectListing, ListObjectsRequest}
  7 | import org.apache.hadoop.conf.Configuration
  8 | import org.apache.hadoop.fs.{FSDataOutputStream, Path, FileSystem}
  9 | import org.apache.hadoop.io.IOUtils
 10 | import org.apache.spark.SparkContext
 11 | import org.slf4j.LoggerFactory
 12 | 
 13 | import scala.collection.mutable.ListBuffer
 14 | import scala.util.{Failure, Try}
 15 | 
 16 | /**
 17 |  * Created by raduchilom on 4/18/15.
 18 |  */
 19 | object S3Utils {
 20 | 
 21 |   val log = LoggerFactory.getLogger(getClass)
 22 | 
 23 |   def getFiles(bucketName: String):List[(Int, String)] = {
 24 | 
 25 |     val s3Client: AmazonS3Client = getS3Client()
 26 | 
 27 |     val fileList = ListBuffer[(Int, String)]()
 28 | 
 29 |     try {
 30 |       log.info("Listing objects from S3")
 31 |       var counter = 0
 32 | 
 33 |       val listObjectsRequest = new ListObjectsRequest()
 34 |         .withBucketName(bucketName)
 35 |       var objectListing: ObjectListing = null
 36 | 
 37 |       do {
 38 |         import scala.collection.JavaConversions._
 39 |         objectListing = s3Client.listObjects(listObjectsRequest)
 40 |         objectListing.getObjectSummaries.foreach { objectSummary =>
 41 |           if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) {
 42 |             fileList += Tuple2(counter, objectSummary.getKey)
 43 |             counter += 1
 44 |           }
 45 |         }
 46 |         listObjectsRequest.setMarker(objectListing.getNextMarker());
 47 |       } while (objectListing.isTruncated())
 48 | 
 49 |       log.info("Finished listing objects from S3")
 50 | 
 51 |     } catch {
 52 |       case e: Exception => {
 53 |         log.error("Failed listing files. ", e)
 54 |         throw e
 55 |       }
 56 |     }
 57 | 
 58 |     fileList.toList
 59 |   }
 60 | 
 61 |   def getFilesDistributed(bucketName: String, sc: SparkContext, numPartitions: Int) = {
 62 |     val s3Client: AmazonS3Client = getS3Client()
 63 | 
 64 |     val fileList = ListBuffer[String]()
 65 |     val folderList = ListBuffer[String]()
 66 | 
 67 |     val listObjectsRequest = new ListObjectsRequest()
 68 |       .withBucketName(bucketName)
 69 |       .withPrefix("")
 70 |       .withDelimiter("/")
 71 | 
 72 |     var objectListing: ObjectListing = null
 73 | 
 74 |     do {
 75 |       import scala.collection.JavaConversions._
 76 |       objectListing = s3Client.listObjects(listObjectsRequest)
 77 |       folderList ++= objectListing.getCommonPrefixes
 78 |       objectListing.getObjectSummaries.foreach { objectSummary =>
 79 |         if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) {
 80 |           fileList += objectSummary.getKey
 81 |         }
 82 |       }
 83 |       listObjectsRequest.setMarker(objectListing.getNextMarker());
 84 |     } while (objectListing.isTruncated())
 85 | 
 86 |     val folderRdd = sc.parallelize(folderList.toList, folderList.size)
 87 |     val filesRdd1 = folderRdd.flatMap{ folder =>
 88 |       getFilesFromFolder(bucketName, folder)
 89 |     }
 90 |     filesRdd1.cache()
 91 |     filesRdd1.count()
 92 | 
 93 |     val filesRdd2 = sc.parallelize(fileList, numPartitions)
 94 |     val filesRdd = filesRdd1.union(filesRdd2)
 95 | 
 96 |     filesRdd.zipWithIndex().map {
 97 |       case (value, index) => (index, value)
 98 |     }
 99 |   }
100 | 
101 |   def getFilesFromFolder(bucketName: String, folderKey: String):List[String] = {
102 | 
103 |     val s3Client: AmazonS3Client = getS3Client()
104 | 
105 |     val fileList = ListBuffer[String]()
106 | 
107 |     try {
108 |       log.info("Listing objects from S3")
109 |       var counter = 0
110 | 
111 |       val listObjectsRequest = new ListObjectsRequest()
112 |         .withBucketName(bucketName)
113 |         .withPrefix(folderKey)
114 |       var objectListing: ObjectListing = null
115 | 
116 |       do {
117 |         import scala.collection.JavaConversions._
118 |         objectListing = s3Client.listObjects(listObjectsRequest)
119 |         objectListing.getObjectSummaries.foreach { objectSummary =>
120 |           if(!objectSummary.getKey.endsWith(Path.SEPARATOR)) {
121 |             fileList += objectSummary.getKey
122 |             counter += 1
123 |           }
124 |         }
125 |         listObjectsRequest.setMarker(objectListing.getNextMarker());
126 |       } while (objectListing.isTruncated())
127 | 
128 |       log.info(s"Finished listing objects for folder $folderKey")
129 | 
130 |     } catch {
131 |       case e: Exception => {
132 |         log.error("Failed listing files. ", e)
133 |         throw e
134 |       }
135 |     }
136 | 
137 |     fileList.toList
138 |   }
139 | 
140 |   def getS3Client(): AmazonS3Client = {
141 | //    val awsCreds = new ProfileCredentialsProvider()
142 | //    val s3client = new AmazonS3Client(awsCreds)
143 | val s3client = new AmazonS3Client()
144 | //    s3client.setRegion(Region.getRegion(Regions.EU_WEST_1))
145 |     s3client
146 |   }
147 | 
148 |   def downloadFile(bucketName: String, key: String, outputFolder: String, s3Client: AmazonS3Client): (Try[Any], String) = {
149 | 
150 |     val downloadTry = Try {
151 | 
152 |       var inputStream: S3ObjectInputStream = null
153 |       var outputStream: FSDataOutputStream = null
154 | 
155 |       try {
156 | 
157 |         log.info(s"Downloading file: $key")
158 |         val s3object = s3Client.getObject(new GetObjectRequest(bucketName, key))
159 |         inputStream = s3object.getObjectContent
160 | 
161 |         val outputPath = outputFolder + Path.SEPARATOR + key
162 |         log.info(s"Writing file to: $outputPath")
163 | 
164 |         val conf = new Configuration();
165 |         conf.set("fs.defaultFS", outputFolder)
166 |         conf.set("fs.tachyon.impl","tachyon.hadoop.TFS")
167 |         // new instance & set file in configuration
168 |         val fs = FileSystem.get(conf);
169 |         outputStream = fs.create(new Path(outputPath));
170 | 
171 |         IOUtils.copyBytes(inputStream, outputStream, 8192)
172 | 
173 |         fs.getFileStatus(new Path(outputPath)).getLen
174 | 
175 |       } finally {
176 |         if (inputStream != null) {
177 |           inputStream.close()
178 |         }
179 |         if (outputStream != null) {
180 |           outputStream.close()
181 |         }
182 |       }
183 |     }
184 | 
185 |     downloadTry match {
186 |       case Failure(e:Throwable) => log.error("Error: ", e)
187 |       case _ =>
188 |     }
189 | 
190 |     (downloadTry, key)
191 |   }
192 | 
193 | 
194 | }
195 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 | 	<modelVersion>4.0.0</modelVersion>
 4 | 
 5 | 	<groupId>com.xpatterns</groupId>
 6 | 	<artifactId>spark-job-REST</artifactId>
 7 | 	<version>0.3.2</version>
 8 | 	<packaging>pom</packaging>
 9 | 
10 | 	<name>spark-job-REST</name>
11 | 	<url>http://maven.apache.org</url>
12 | 
13 | 	<modules>
14 | 		<module>spark-job-rest-api</module>
15 | 		<module>spark-job-rest-sql</module>
16 | 		<module>spark-job-rest</module>
17 |         <module>spark-job-rest-client</module>
18 | 		<module>examples/example-job</module>
19 | 		<module>examples/s3-download-job</module>
20 | 	</modules>
21 | </project>


--------------------------------------------------------------------------------
/spark-job-rest-api/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>spark-job-rest-api</artifactId>
  7 | 	<version>0.3.2</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>spark-job-rest-api</name>
 11 | 	<url>https://github.com/Atigeo/spark-job-rest</url>
 12 | 
 13 |     <description>The API for Spark-Job-Rest.
 14 |         Contains the SparkJob interface that must be extended in order to run jobs on the server.
 15 |     </description>
 16 | 
 17 |     <licenses>
 18 |         <license>
 19 |             <name>The Apache License, Version 2.0</name>
 20 |             <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 21 |         </license>
 22 |     </licenses>
 23 | 
 24 |     <developers>
 25 |         <developer>
 26 |             <name>Radu Chilom</name>
 27 |             <email>raduchilom@gmail.com</email>
 28 |             <organization>Atigeo</organization>
 29 |             <organizationUrl>http://www.atigeo.com</organizationUrl>
 30 |         </developer>
 31 |     </developers>
 32 | 
 33 |     <scm>
 34 |         <connection>scm:git:git@github.com:Atigeo/spark-job-rest.git</connection>
 35 |         <developerConnection>scm:git:git@github.com:Atigeo/spark-job-rest.git</developerConnection>
 36 |         <url>git@github.com:Atigeo/spark-job-rest.git</url>
 37 |     </scm>
 38 | 
 39 |     <distributionManagement>
 40 |         <snapshotRepository>
 41 |             <id>ossrh</id>
 42 |             <url>https://oss.sonatype.org/content/repositories/snapshots</url>
 43 |         </snapshotRepository>
 44 |         <repository>
 45 |             <id>ossrh</id>
 46 |             <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
 47 |         </repository>
 48 |     </distributionManagement>
 49 | 
 50 |     <repositories>
 51 | 		<repository>
 52 | 			<id>mvnrepository</id>
 53 | 			<url>http://repo1.maven.org/maven2</url>			
 54 | 		</repository>
 55 | 		<repository>
 56 | 			<id>Akka repository</id>
 57 | 			<url>http://repo.akka.io/releases</url>
 58 | 		</repository>
 59 |         <repository>
 60 |             <id>cloudera-repo-releases</id>
 61 |             <url>https://repository.cloudera.com/artifactory/repo/</url>
 62 |         </repository>
 63 | 	</repositories>
 64 | 
 65 | 	<properties>
 66 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 67 | 		<scala.version>2.10.3</scala.version>
 68 | 		<scala.version.alias>2.10</scala.version.alias>
 69 | 	</properties>
 70 | 
 71 | 	<dependencies>
 72 |         <dependency>
 73 |             <groupId>org.apache.spark</groupId>
 74 |             <artifactId>spark-core_2.10</artifactId>
 75 |             <version>1.3.1</version>
 76 |             <scope>provided</scope>
 77 |         </dependency>
 78 |         <dependency>
 79 |             <groupId>io.spray</groupId>
 80 |             <artifactId>spray-json_2.10</artifactId>
 81 |             <version>1.2.6</version>
 82 |         </dependency>
 83 | 	</dependencies>
 84 | 
 85 |     <build>
 86 |         <resources>
 87 |             <resource>
 88 |                 <directory>src/main/resources</directory>
 89 |                 <excludes>
 90 |                     <exclude>*</exclude>
 91 |                 </excludes>
 92 |                 <filtering>false</filtering>
 93 |             </resource>
 94 |         </resources>
 95 |         <finalName>${project.artifactId}</finalName>
 96 |         <plugins>
 97 |             <plugin>
 98 |                 <groupId>org.apache.maven.plugins</groupId>
 99 |                 <artifactId>maven-compiler-plugin</artifactId>
100 |                  <version>3.1</version>
101 |                 <configuration>
102 |                     <source>1.7</source>
103 |                     <target>1.7</target>
104 |                 </configuration>
105 |             </plugin>
106 |             <plugin>
107 |                 <groupId>net.alchim31.maven</groupId>
108 |                 <artifactId>scala-maven-plugin</artifactId>
109 |                 <version>3.2.0</version>
110 |                 <executions>
111 |                     <execution>
112 |                         <id>scala-compile-first</id>
113 |                         <phase>process-resources</phase>
114 |                         <goals>
115 |                             <goal>add-source</goal>
116 |                             <goal>compile</goal>
117 |                         </goals>
118 |                     </execution>
119 |                     <execution>
120 |                         <id>scala-test-compile</id>
121 |                         <phase>process-test-resources</phase>
122 |                         <goals>
123 |                             <goal>testCompile</goal>
124 |                         </goals>
125 |                     </execution>
126 |                     <execution>
127 |                         <id>doc</id>
128 |                         <phase>generate-sources</phase>
129 |                         <goals>
130 |                             <goal>doc-jar</goal>
131 |                         </goals>
132 |                     </execution>
133 |                 </executions>
134 |             </plugin>
135 |         </plugins>
136 |     </build>
137 | 
138 |     <profiles>
139 |         <profile>
140 |             <id>release</id>
141 |             <build>
142 |                 <plugins>
143 |                     <plugin>
144 |                         <groupId>org.sonatype.plugins</groupId>
145 |                         <artifactId>nexus-staging-maven-plugin</artifactId>
146 |                         <version>1.6.3</version>
147 |                         <extensions>true</extensions>
148 |                         <configuration>
149 |                             <serverId>ossrh</serverId>
150 |                             <nexusUrl>https://oss.sonatype.org/</nexusUrl>
151 |                             <autoReleaseAfterClose>true</autoReleaseAfterClose>
152 |                         </configuration>
153 |                     </plugin>
154 | 
155 |                     <plugin>
156 |                         <groupId>org.apache.maven.plugins</groupId>
157 |                         <artifactId>maven-source-plugin</artifactId>
158 |                         <version>2.2.1</version>
159 |                         <executions>
160 |                             <execution>
161 |                                 <id>attach-sources</id>
162 |                                 <goals>
163 |                                     <goal>jar-no-fork</goal>
164 |                                 </goals>
165 |                             </execution>
166 |                         </executions>
167 |                     </plugin>
168 |                     <plugin>
169 |                         <groupId>org.apache.maven.plugins</groupId>
170 |                         <artifactId>maven-javadoc-plugin</artifactId>
171 |                         <version>2.9.1</version>
172 |                         <executions>
173 |                             <execution>
174 |                                 <id>attach-javadocs</id>
175 |                                 <goals>
176 |                                     <goal>jar</goal>
177 |                                 </goals>
178 |                             </execution>
179 |                         </executions>
180 |                     </plugin>
181 |                     <plugin>
182 |                         <groupId>org.apache.maven.plugins</groupId>
183 |                         <artifactId>maven-gpg-plugin</artifactId>
184 |                         <version>1.5</version>
185 |                         <executions>
186 |                             <execution>
187 |                                 <id>sign-artifacts</id>
188 |                                 <phase>verify</phase>
189 |                                 <goals>
190 |                                     <goal>sign</goal>
191 |                                 </goals>
192 |                             </execution>
193 |                         </executions>
194 |                     </plugin>
195 |                 </plugins>
196 |             </build>
197 |         </profile>
198 |     </profiles>
199 | 
200 | </project>
201 | 


--------------------------------------------------------------------------------
/spark-job-rest-api/src/main/scala/api/ContextLike.scala:
--------------------------------------------------------------------------------
 1 | package api
 2 | 
 3 | import org.apache.spark.SparkContext
 4 | 
 5 | trait ContextLike {
 6 |   /**
 7 |    * Type of the context for representation
 8 |    */
 9 |   val contextClass: String
10 | 
11 |   override def toString: String = {
12 |     super.toString + s"($contextClass)"
13 |   }
14 | 
15 |   /**
16 |    * Underlying Spark context
17 |    * @return
18 |    */
19 |   def sparkContext: SparkContext
20 | 
21 |   /**
22 |    * Validates whether job is valid for this context
23 |    * @param job job to validate
24 |    * @return
25 |    */
26 |   def validateJob(job: SparkJobBase): SparkJobValidation =
27 |     if (isValidJob(job))
28 |       SparkJobValid()
29 |     else
30 |       SparkJobInvalid(s"Job ${job.toString} doesn't match context $this.")
31 | 
32 |   /**
33 |    * Validates whether job is valid for this context
34 |    * Should be implemented in concrete classes.
35 |    * @param job job to validate
36 |    * @return
37 |    */
38 |   def isValidJob(job: SparkJobBase): Boolean
39 | 
40 |   /**
41 |    * This method should be called during cleanup
42 |    */
43 |   def stop()
44 | }
45 | 


--------------------------------------------------------------------------------
/spark-job-rest-api/src/main/scala/api/SparkJob.scala:
--------------------------------------------------------------------------------
 1 | package api
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.spark.SparkContext
 5 | 
 6 | sealed trait SparkJobValidation {
 7 |   // NOTE(harish): We tried using lazy eval here by passing in a function
 8 |   // instead, which worked fine with tests but when run with the job-server
 9 |   // it would just hang and timeout. This is something worth investigating
10 |   def &&(sparkValidation: SparkJobValidation): SparkJobValidation = this match {
11 |     case x => x
12 |   }
13 | }
14 | case class SparkJobValid() extends SparkJobValidation
15 | case class SparkJobInvalid(reason: String) extends SparkJobValidation
16 | 
17 | /**
18 |  *  This trait is the main API for Spark jobs submitted to the Job Server.
19 |  */
20 | trait SparkJobBase {
21 |   type C
22 |   /**
23 |    * This is the entry point for a Spark Job Server to execute Spark jobs.
24 |    * This function should create or reuse RDDs and return the result at the end, which the
25 |    * Job Server will cache or display.
26 |    * @param sc a SparkContext for the job.  May be reused across jobs.
27 |    * @param jobConfig the Typesafe Config object passed into the job request
28 |    * @return the job result
29 |    */
30 |   def runJob(sc: C, jobConfig: Config): Any
31 | 
32 |   /**
33 |    * This method is called by the job server to allow jobs to validate their input and reject
34 |    * invalid job requests.  If SparkJobInvalid is returned, then the job server returns 400
35 |    * to the user.
36 |    * NOTE: this method should return very quickly.  If it responds slowly then the job server may time out
37 |    * trying to start this job.
38 |    * @return either SparkJobValid or SparkJobInvalid
39 |    */
40 |   def validate(sc: C, config: Config): SparkJobValidation
41 | }
42 | 
43 | trait SparkJob extends SparkJobBase {
44 |   type C = SparkContext
45 | }
46 | 


--------------------------------------------------------------------------------
/spark-job-rest-api/src/main/scala/responses/JobStates.scala:
--------------------------------------------------------------------------------
 1 | package responses
 2 | 
 3 | /**
 4 |  * States of Spark jobs.
 5 |  */
 6 | object JobStates {
 7 | 
 8 |   sealed abstract class JobState(val name: String) {
 9 |     override def toString = name
10 |   }
11 | 
12 |   case object RUNNING extends JobState("Running")
13 | 
14 |   case object ERROR extends JobState("Error")
15 | 
16 |   case object FINISHED extends JobState("Finished")
17 | 
18 |   case object QUEUED extends JobState("Queued")
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/spark-job-rest-api/src/main/scala/responses/ResponseObjects.scala:
--------------------------------------------------------------------------------
 1 | package responses
 2 | 
 3 | import spray.json.DefaultJsonProtocol._
 4 | 
 5 | /**
 6 |  * Created by raduc on 24/04/15.
 7 |  */
 8 | 
 9 |   case class Context(contextName: String, sparkUiPort: String)
10 | 
11 |   object Context {
12 |     implicit val logJson = jsonFormat2(apply)
13 |   }
14 | 
15 |   case class Contexts(contexts: Array[Context])
16 | 
17 |   object Contexts {
18 |     implicit val logJson = jsonFormat1(apply)
19 |   }
20 | 
21 |   case class Job(jobId: String, contextName: String, status: String, result: String, startTime: Long)
22 | 
23 |   object Job {
24 |     implicit val logJson = jsonFormat5(apply)
25 |   }
26 | 
27 |   case class Jobs(jobs: Array[Job])
28 | 
29 |   object Jobs {
30 |     implicit val logJson = jsonFormat1(apply)
31 |   }
32 | 
33 |   case class JarInfo(name: String, size: Long, timestamp: Long)
34 | 
35 |   object JarInfo {
36 |     implicit val logJson = jsonFormat3(apply)
37 |   }
38 | 
39 |   case class JarsInfo(jars: Array[JarInfo])
40 | 
41 |   object JarsInfo {
42 |     implicit val logJson = jsonFormat1(apply)
43 |   }
44 | 
45 |   case class ErrorResponse(error: String)
46 | 
47 |   object ErrorResponse {
48 |     implicit val logJson = jsonFormat1(apply)
49 |   }
50 | 
51 |   case class SimpleMessage(message: String)
52 | 
53 |   object SimpleMessage {
54 |     implicit val logJson = jsonFormat1(apply)
55 |   }
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/spark-job-rest-client/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>spark-job-rest-client</artifactId>
  7 | 	<version>0.3.2</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>spark-job-rest-client</name>
 11 | 	<url>https://github.com/Atigeo/spark-job-rest</url>
 12 | 
 13 |     <description>The Http Spray Client for Spark-Job-Rest.</description>
 14 | 
 15 |     <licenses>
 16 |         <license>
 17 |             <name>The Apache License, Version 2.0</name>
 18 |             <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 19 |         </license>
 20 |     </licenses>
 21 | 
 22 |     <developers>
 23 |         <developer>
 24 |             <name>Radu Chilom</name>
 25 |             <email>raduchilom@gmail.com</email>
 26 |             <organization>Atigeo</organization>
 27 |             <organizationUrl>http://www.atigeo.com</organizationUrl>
 28 |         </developer>
 29 |     </developers>
 30 | 
 31 |     <scm>
 32 |         <connection>scm:git:git@github.com:Atigeo/spark-job-rest.git</connection>
 33 |         <developerConnection>scm:git:git@github.com:Atigeo/spark-job-rest.git</developerConnection>
 34 |         <url>git@github.com:Atigeo/spark-job-rest.git</url>
 35 |     </scm>
 36 | 
 37 |     <distributionManagement>
 38 |         <snapshotRepository>
 39 |             <id>ossrh</id>
 40 |             <url>https://oss.sonatype.org/content/repositories/snapshots</url>
 41 |         </snapshotRepository>
 42 |         <repository>
 43 |             <id>ossrh</id>
 44 |             <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
 45 |         </repository>
 46 |     </distributionManagement>
 47 | 
 48 |     <repositories>
 49 | 		<repository>
 50 | 			<id>mvnrepository</id>
 51 | 			<url>http://repo1.maven.org/maven2</url>			
 52 | 		</repository>
 53 | 		<repository>
 54 | 			<id>Akka repository</id>
 55 | 			<url>http://repo.akka.io/releases</url>
 56 | 		</repository>
 57 |         <repository>
 58 |             <id>cloudera-repo-releases</id>
 59 |             <url>https://repository.cloudera.com/artifactory/repo/</url>
 60 |         </repository>
 61 | 	</repositories>
 62 | 
 63 | 	<properties>
 64 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 65 | 		<scala.version>2.10.3</scala.version>
 66 | 		<scala.version.alias>2.10</scala.version.alias>
 67 | 	</properties>
 68 | 
 69 | 	<dependencies>
 70 |         <dependency>
 71 |             <groupId>io.spray</groupId>
 72 |             <artifactId>spray-client</artifactId>
 73 |             <version>1.2.1</version>
 74 |         </dependency>
 75 |         <dependency>
 76 |             <groupId>com.typesafe</groupId>
 77 |             <artifactId>config</artifactId>
 78 |             <version>1.2.1</version>
 79 |         </dependency>
 80 |         <dependency>
 81 |             <groupId>com.typesafe.akka</groupId>
 82 |             <artifactId>akka-actor_2.10</artifactId>
 83 |             <version>2.3.4</version>
 84 |         </dependency>
 85 |         <dependency>
 86 |             <groupId>com.xpatterns</groupId>
 87 |             <artifactId>spark-job-rest-api</artifactId>
 88 |             <version>${version}</version>
 89 |         </dependency>
 90 |         <dependency>
 91 |             <groupId>org.slf4j</groupId>
 92 |             <artifactId>slf4j-api</artifactId>
 93 |             <version>1.7.10</version>
 94 |         </dependency>
 95 |     </dependencies>
 96 | 
 97 | 
 98 |     <build>
 99 |         <resources>
100 |             <resource>
101 |                 <directory>src/main/resources</directory>
102 |                 <excludes>
103 |                     <exclude>*</exclude>
104 |                 </excludes>
105 |                 <filtering>false</filtering>
106 |             </resource>
107 |         </resources>
108 |         <finalName>${project.artifactId}</finalName>
109 |         <plugins>
110 |             <plugin>
111 |                 <groupId>org.apache.maven.plugins</groupId>
112 |                 <artifactId>maven-compiler-plugin</artifactId>
113 |                  <version>3.1</version>
114 |                 <configuration>
115 |                     <source>1.7</source>
116 |                     <target>1.7</target>
117 |                 </configuration>
118 |             </plugin>
119 |             <plugin>
120 |                 <groupId>net.alchim31.maven</groupId>
121 |                 <artifactId>scala-maven-plugin</artifactId>
122 |                 <version>3.2.0</version>
123 |                 <executions>
124 |                     <execution>
125 |                         <id>scala-compile-first</id>
126 |                         <phase>process-resources</phase>
127 |                         <goals>
128 |                             <goal>add-source</goal>
129 |                             <goal>compile</goal>
130 |                         </goals>
131 |                     </execution>
132 |                     <execution>
133 |                         <id>scala-test-compile</id>
134 |                         <phase>process-test-resources</phase>
135 |                         <goals>
136 |                             <goal>testCompile</goal>
137 |                         </goals>
138 |                     </execution>
139 |                     <execution>
140 |                         <id>doc</id>
141 |                         <phase>generate-sources</phase>
142 |                         <goals>
143 |                             <goal>doc-jar</goal>
144 |                         </goals>
145 |                     </execution>
146 |                 </executions>
147 |             </plugin>
148 | 
149 |         </plugins>
150 | 
151 |     </build>
152 | 
153 |     <profiles>
154 |         <profile>
155 |             <id>release</id>
156 |             <build>
157 |                 <plugins>
158 |                     <plugin>
159 |                         <groupId>org.sonatype.plugins</groupId>
160 |                         <artifactId>nexus-staging-maven-plugin</artifactId>
161 |                         <version>1.6.3</version>
162 |                         <extensions>true</extensions>
163 |                         <configuration>
164 |                             <serverId>ossrh</serverId>
165 |                             <nexusUrl>https://oss.sonatype.org/</nexusUrl>
166 |                             <autoReleaseAfterClose>true</autoReleaseAfterClose>
167 |                         </configuration>
168 |                     </plugin>
169 | 
170 |                     <plugin>
171 |                         <groupId>org.apache.maven.plugins</groupId>
172 |                         <artifactId>maven-source-plugin</artifactId>
173 |                         <version>2.2.1</version>
174 |                         <executions>
175 |                             <execution>
176 |                                 <id>attach-sources</id>
177 |                                 <goals>
178 |                                     <goal>jar-no-fork</goal>
179 |                                 </goals>
180 |                             </execution>
181 |                         </executions>
182 |                     </plugin>
183 |                     <plugin>
184 |                         <groupId>org.apache.maven.plugins</groupId>
185 |                         <artifactId>maven-javadoc-plugin</artifactId>
186 |                         <version>2.9.1</version>
187 |                         <executions>
188 |                             <execution>
189 |                                 <id>attach-javadocs</id>
190 |                                 <goals>
191 |                                     <goal>jar</goal>
192 |                                 </goals>
193 |                             </execution>
194 |                         </executions>
195 |                     </plugin>
196 |                     <plugin>
197 |                         <groupId>org.apache.maven.plugins</groupId>
198 |                         <artifactId>maven-gpg-plugin</artifactId>
199 |                         <version>1.5</version>
200 |                         <executions>
201 |                             <execution>
202 |                                 <id>sign-artifacts</id>
203 |                                 <phase>verify</phase>
204 |                                 <goals>
205 |                                     <goal>sign</goal>
206 |                                 </goals>
207 |                             </execution>
208 |                         </executions>
209 |                     </plugin>
210 |                 </plugins>
211 |             </build>
212 |         </profile>
213 |     </profiles>
214 | 
215 | </project>
216 | 


--------------------------------------------------------------------------------
/spark-job-rest-client/src/main/scala/client/SparkJobRestClient.scala:
--------------------------------------------------------------------------------
  1 | package client
  2 | 
  3 | import java.io.File
  4 | import java.util.concurrent.TimeUnit
  5 | 
  6 | import akka.actor.{ActorSystem}
  7 | import akka.util.Timeout
  8 | import org.slf4j.LoggerFactory
  9 | import spray.http._
 10 | import spray.client.pipelining._
 11 | import responses._
 12 | import spray.httpx.SprayJsonSupport.sprayJsonUnmarshaller
 13 | import spray.httpx.UnsuccessfulResponseException
 14 | import spray.httpx.unmarshalling.Unmarshaller
 15 | import spray.json.DefaultJsonProtocol._
 16 | 
 17 | import scala.concurrent.duration.Duration
 18 | import scala.concurrent.{Await, Future}
 19 | import scala.util.{Failure, Success}
 20 | 
 21 | /**
 22 | * Created by raduc on 23/04/15.
 23 | */
 24 | class SparkJobRestClient(serverAddress: String)(implicit system: ActorSystem) {
 25 |   import system.dispatcher
 26 |   val log = LoggerFactory.getLogger(getClass)
 27 | 
 28 |   val contextsRoute = "/contexts"
 29 |   val jobsRoute = "/jobs"
 30 |   val jarsRoute = "/jars"
 31 |   val heartBeatRoute = "/heartbeat"
 32 | 
 33 |   val SEPARATOR = "/"
 34 | 
 35 |   implicit val timeout = Timeout(30, TimeUnit.SECONDS)
 36 | 
 37 | 
 38 | //  ============  Contexts Route  ============
 39 |   @throws(classOf[Exception])
 40 |   def getContexts() : Contexts = {
 41 | 
 42 |     val pipeline: HttpRequest => Future[Contexts] = sendReceive ~> unmarshal[Contexts]
 43 | 
 44 |     val response: Future[Contexts] = pipeline(Get(serverAddress + contextsRoute))
 45 | 
 46 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
 47 | 
 48 |       case Success(contexts: Contexts) => {
 49 |         return contexts
 50 |       }
 51 |       case Failure(e) => {
 52 |         log.error("Failed request: ", e)
 53 |         throw e
 54 |       }
 55 | 
 56 |     }
 57 | 
 58 |     null
 59 |   }
 60 | 
 61 |   @throws(classOf[Exception])
 62 |   def getContext() : Context = {
 63 | 
 64 |     val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context]
 65 | 
 66 |     val response: Future[Context] = pipeline(Get(serverAddress + contextsRoute))
 67 | 
 68 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
 69 | 
 70 |       case Success(context: Context) => {
 71 |         return context
 72 |       }
 73 |       case Failure(e: UnsuccessfulResponseException) => {
 74 |         log.error("Unsuccessful response: ", e)
 75 |         throw e
 76 |       }
 77 |       case Failure(e) => {
 78 |         log.error("Failed request: ", e)
 79 |         throw e
 80 |       }
 81 | 
 82 |     }
 83 | 
 84 |     null
 85 |   }
 86 | 
 87 |   @throws(classOf[Exception])
 88 |   def checkIfContextExists(contextName: String) : Boolean = {
 89 | 
 90 |     val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context]
 91 | 
 92 |     val response: Future[Context] = pipeline(Get(serverAddress + contextsRoute + SEPARATOR + contextName))
 93 | 
 94 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
 95 | 
 96 |       case Success(context: Context) => {
 97 |         return true
 98 |       }
 99 |       case Failure(e: UnsuccessfulResponseException) => {
100 |         log.error("Unsuccessful response: ", e)
101 |         return false
102 |       }
103 |       case Failure(e: Throwable) => {
104 |         log.error("Unsuccessful request: ", e)
105 |         throw e
106 |       }
107 | 
108 |     }
109 | 
110 |     false
111 |   }
112 | 
113 |   @throws(classOf[Exception])
114 |   def deleteContext(contextName: String) : Boolean = {
115 | 
116 |     val pipeline: HttpRequest => Future[SimpleMessage] = sendReceive ~> unmarshal[SimpleMessage]
117 | 
118 |     val response: Future[SimpleMessage] = pipeline(Delete(serverAddress + contextsRoute + SEPARATOR + contextName))
119 | 
120 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
121 | 
122 |       case Success(simpleMessage: SimpleMessage) => {
123 |         return true
124 |       }
125 |       case Failure(e: UnsuccessfulResponseException) => {
126 |         log.error("Unsuccessful response: ", e)
127 |         throw e
128 |       }
129 |       case Failure(e: Throwable) => {
130 |         log.error("Unsuccessful request: ", e)
131 |         throw e
132 |       }
133 | 
134 |     }
135 | 
136 |     false
137 |   }
138 | 
139 |   @throws(classOf[Exception])
140 |   def createContext(contextName: String, parameters: Map[String, String]) : Context = {
141 | 
142 |     val body = createParametersString(parameters)
143 | 
144 |     val pipeline: HttpRequest => Future[Context] = sendReceive ~> unmarshal[Context]
145 | 
146 |     val response: Future[Context] = pipeline(Post(serverAddress + contextsRoute + SEPARATOR + contextName, body))
147 | 
148 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
149 | 
150 |       case Success(context: Context) => {
151 |         return context
152 |       }
153 |       case Failure(e: UnsuccessfulResponseException) => {
154 |         log.error("Unsuccessful response: ", e)
155 |         throw e
156 |       }
157 |       case Failure(e: Throwable) => {
158 |         log.error("Unsuccessful request: ", e)
159 |         throw e
160 |       }
161 | 
162 |     }
163 | 
164 |     null
165 |   }
166 | 
167 | //  ============  Jobs Route  ============
168 |   @throws(classOf[Exception])
169 |   def getJobs() : Jobs = {
170 | 
171 |     val pipeline: HttpRequest => Future[Jobs] = sendReceive ~> unmarshal[Jobs]
172 | 
173 |     val response: Future[Jobs] = pipeline(Get(serverAddress + jobsRoute))
174 | 
175 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
176 | 
177 |       case Success(jobs: Jobs) => {
178 |         return jobs
179 |       }
180 |       case Failure(e) => {
181 |         log.error("Failed request: ", e)
182 |         throw e
183 |       }
184 | 
185 |     }
186 | 
187 |     null
188 |   }
189 | 
190 |   @throws(classOf[Exception])
191 |   def getJob(jobId: String, contextName: String) : Job = {
192 | 
193 |     val pipeline: HttpRequest => Future[Job] = sendReceive ~> unmarshal[Job]
194 | 
195 |     val response: Future[Job] = pipeline(Get(serverAddress + jobsRoute + SEPARATOR + jobId + "?contextName=" + contextName))
196 | 
197 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
198 | 
199 |       case Success(job: Job) => {
200 |         return job
201 |       }
202 |       case Failure(e: UnsuccessfulResponseException) => {
203 |         log.error("Unsuccessful response: ", e)
204 |         throw e
205 |       }
206 |       case Failure(e: Throwable) => {
207 |         log.error("Unsuccessful request: ", e)
208 |         throw e
209 |       }
210 | 
211 |     }
212 | 
213 |     null
214 |   }
215 | 
216 |   @throws(classOf[Exception])
217 |   def runJob(runningClass: String, contextName: String, parameters: Map[String, String]) : Job = {
218 | 
219 |     val body = createParametersString(parameters)
220 | 
221 |     val pipeline: HttpRequest => Future[Job] = sendReceive ~> unmarshal[Job]
222 | 
223 |     val response: Future[Job] = pipeline(Post(serverAddress + jobsRoute + "?runningClass=" + runningClass + "&contextName=" + contextName, body))
224 | 
225 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
226 | 
227 |       case Success(job: Job) => {
228 |         return job
229 |       }
230 |       case Failure(e: UnsuccessfulResponseException) => {
231 |         log.error("Unsuccessful response: ", e)
232 |         throw e
233 |       }
234 |       case Failure(e: Throwable) => {
235 |         log.error("Unsuccessful request: ", e)
236 |         throw e
237 |       }
238 | 
239 |     }
240 | 
241 |     null
242 |   }
243 | 
244 |   //  ============  Jars Route  ============
245 |   @throws(classOf[Exception])
246 |   def getJars() : JarsInfo = {
247 | 
248 |     val pipeline: HttpRequest => Future[JarsInfo] = sendReceive ~> unmarshal[JarsInfo]
249 | 
250 |     val response: Future[JarsInfo] = pipeline(Get(serverAddress + jarsRoute))
251 | 
252 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
253 | 
254 |       case Success(jarsInfo: JarsInfo) => {
255 |         return jarsInfo
256 |       }
257 |       case Failure(e) => {
258 |         log.error("Failed request: ", e)
259 |         throw e
260 |       }
261 | 
262 |     }
263 | 
264 |     null
265 |   }
266 | 
267 |   @throws(classOf[Exception])
268 |   def deleteJar(jarName: String) : Boolean = {
269 | 
270 |     val pipeline: HttpRequest => Future[SimpleMessage] = sendReceive ~> unmarshal[SimpleMessage]
271 | 
272 |     val response: Future[SimpleMessage] = pipeline(Delete(serverAddress + jarsRoute + SEPARATOR + jarName))
273 | 
274 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
275 | 
276 |       case Success(simpleMessage: SimpleMessage) => {
277 |         return true
278 |       }
279 |       case Failure(e: UnsuccessfulResponseException) => {
280 |         log.error("Unsuccessful response: ", e)
281 |         return false
282 |       }
283 |       case Failure(e: Throwable) => {
284 |         log.error("Unsuccessful request: ", e)
285 |         throw e
286 |       }
287 | 
288 |     }
289 | 
290 |     false
291 |   }
292 | 
293 |   @throws(classOf[Exception])
294 |   def uploadJar(jarName: String, jarPath: String) : JarInfo = {
295 | 
296 |     val pipeline: HttpRequest => Future[JarInfo] = sendReceive ~> unmarshal[JarInfo]
297 | 
298 |     val body = MultipartFormData(Seq(BodyPart(new File(jarPath), jarName, MediaTypes.`application/java-archive`)))
299 | 
300 |     val response: Future[JarInfo] = pipeline(Post(serverAddress + jarsRoute , body))
301 | 
302 |     Await.ready(response, Duration.create(30, TimeUnit.SECONDS)).value.get match {
303 | 
304 |       case Success(jarInfo: JarInfo) => {
305 |         return jarInfo
306 |       }
307 |       case Failure(e: UnsuccessfulResponseException) => {
308 |         log.error("Unsuccessful response: ", e)
309 |         throw e
310 |       }
311 |       case Failure(e: Throwable) => {
312 |         log.error("Unsuccessful request: ", e)
313 |         throw e
314 |       }
315 | 
316 |     }
317 | 
318 |     null
319 |   }
320 | 
321 |   def createParametersString(parameters: Map[String, String]): String = {
322 |       parameters.foldLeft("") { case (acc, (key, value)) => {
323 |         acc + key + "=" + value + "\n"
324 |       }
325 |     }
326 |   }
327 | }
328 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>spark-job-rest-sql</artifactId>
  7 | 	<version>0.3.2</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>spark-job-rest-sql</name>
 11 | 	<url>http://maven.apache.org</url>
 12 | 
 13 | 	<repositories>
 14 | 		<repository>
 15 | 			<id>mvnrepository</id>
 16 | 			<url>http://repo1.maven.org/maven2</url>
 17 | 		</repository>
 18 | 		<repository>
 19 | 			<id>cloudera-repo-releases</id>
 20 | 			<url>https://repository.cloudera.com/artifactory/repo/</url>
 21 | 		</repository>
 22 | 		<repository>
 23 | 			<id>Akka repository</id>
 24 | 			<url>http://repo.akka.io/releases</url>
 25 | 		</repository>
 26 | 
 27 | 	</repositories>
 28 | 
 29 | 	<properties>
 30 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 31 | 		<scala.version>2.10.3</scala.version>
 32 | 		<scala.version.alias>2.10</scala.version.alias>
 33 | 	</properties>
 34 | 
 35 | 	<dependencies>
 36 |         <dependency>
 37 |             <groupId>org.apache.spark</groupId>
 38 |             <artifactId>spark-core_2.10</artifactId>
 39 |             <version>1.3.1</version>
 40 |             <scope>provided</scope>
 41 |         </dependency>
 42 | 
 43 | 		<dependency>
 44 | 			<groupId>org.apache.spark</groupId>
 45 | 			<artifactId>spark-hive_2.10</artifactId>
 46 | 			<version>1.3.1</version>
 47 | 			<scope>provided</scope>
 48 | 		</dependency>
 49 | 
 50 | 		<dependency>
 51 | 			<groupId>org.apache.spark</groupId>
 52 | 			<artifactId>spark-sql_2.10</artifactId>
 53 | 			<version>1.3.1</version>
 54 | 			<scope>provided</scope>
 55 | 		</dependency>
 56 | 
 57 |         <dependency>
 58 |             <groupId>com.xpatterns</groupId>
 59 |             <artifactId>spark-job-rest-api</artifactId>
 60 |             <version>${version}</version>
 61 | 			<scope>provided</scope>
 62 |         </dependency>
 63 | 
 64 | 		<dependency>
 65 | 			<groupId>com.xpatterns</groupId>
 66 | 			<artifactId>spark-job-rest</artifactId>
 67 | 			<version>${version}</version>
 68 | 			<scope>provided</scope>
 69 | 		</dependency>
 70 | 
 71 | 		<dependency>
 72 | 			<groupId>com.typesafe</groupId>
 73 | 			<artifactId>config</artifactId>
 74 | 			<version>1.2.1</version>
 75 | 			<scope>provided</scope>
 76 | 		</dependency>
 77 | 
 78 | 		<!-- Test dependencies -->
 79 | 		<dependency>
 80 | 			<groupId>junit</groupId>
 81 | 			<artifactId>junit</artifactId>
 82 | 			<version>4.4</version>
 83 | 			<scope>test</scope>
 84 | 		</dependency>
 85 | 
 86 | 		<dependency>
 87 | 			<groupId>org.scalatest</groupId>
 88 | 			<artifactId>scalatest_2.10</artifactId>
 89 | 			<version>2.2.4</version>
 90 | 			<scope>test</scope>
 91 | 		</dependency>
 92 | 	</dependencies>
 93 | 
 94 | 	<build>
 95 | 		<resources>
 96 | 			<resource>
 97 | 				<directory>src/main/resources</directory>
 98 | 				<excludes>
 99 | 					<exclude>*</exclude>
100 | 				</excludes>
101 | 				<filtering>false</filtering>
102 | 			</resource>
103 | 		</resources>
104 | 		<finalName>${project.artifactId}</finalName>
105 | 		<plugins>
106 | 			<plugin>
107 | 				<groupId>org.apache.maven.plugins</groupId>
108 | 				<artifactId>maven-compiler-plugin</artifactId>
109 | 				<version>3.1</version>
110 | 				<configuration>
111 | 					<source>1.7</source>
112 | 					<target>1.7</target>
113 | 				</configuration>
114 | 			</plugin>
115 | 			<plugin>
116 | 				<groupId>net.alchim31.maven</groupId>
117 | 				<artifactId>scala-maven-plugin</artifactId>
118 | 				<version>3.2.0</version>
119 | 				<executions>
120 | 					<execution>
121 | 						<id>scala-compile-first</id>
122 | 						<phase>process-resources</phase>
123 | 						<goals>
124 | 							<goal>add-source</goal>
125 | 							<goal>compile</goal>
126 | 						</goals>
127 | 					</execution>
128 | 					<execution>
129 | 						<id>scala-test-compile</id>
130 | 						<phase>process-test-resources</phase>
131 | 						<goals>
132 | 							<goal>testCompile</goal>
133 | 						</goals>
134 | 					</execution>
135 | 					<execution>
136 | 						<id>doc</id>
137 | 						<phase>generate-sources</phase>
138 | 						<goals>
139 | 							<goal>doc-jar</goal>
140 | 						</goals>
141 | 					</execution>
142 | 				</executions>
143 | 			</plugin>
144 | 		</plugins>
145 | 	</build>
146 | 
147 | </project>
148 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/main/scala/api/SparkSqlJob.scala:
--------------------------------------------------------------------------------
1 | package api
2 | 
3 | import org.apache.spark.sql.SQLContext
4 | 
5 | trait SparkSqlJob extends SparkJobBase {
6 |   type C = SQLContext
7 | }
8 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/main/scala/context/HiveContextFactory.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.{ContextLike, SparkJobBase, SparkSqlJob}
 4 | import com.typesafe.config.Config
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.sql.hive.HiveContext
 7 | import org.slf4j.LoggerFactory
 8 | 
 9 | /**
10 |  * Factory which creates Hive context.
11 |  */
12 | class HiveContextFactory extends SQLContextFactory {
13 |   type C = HiveContext with ContextLike
14 |   val logger = LoggerFactory.getLogger(getClass)
15 | 
16 |   def makeContext(config: Config, sc: SparkContext): C = {
17 |     logger.info(s"Creating Hive context for Spark context $sc.")
18 |     new HiveContext(sc) with ContextLike {
19 |       val contextClass = classOf[HiveContext].getName
20 |       def isValidJob(job: SparkJobBase) = job.isInstanceOf[SparkSqlJob]
21 |       def stop() = sparkContext.stop()
22 |     }
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/main/scala/context/SQLContextFactory.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.spark.SparkContext
 5 | 
 6 | import scala.util.Try
 7 | 
 8 | trait SQLContextFactory extends JobContextFactory {
 9 |   /**
10 |    * Creates Spark context from class specified under [[SQLContextFactory.sparkContextFactoryClassNameConfigEntry]]
11 |    * config entry or from [[JobContextFactory.defaultFactoryClassName]]
12 |    * @param config general configuration
13 |    * @param contextName context name
14 |    * @return
15 |    */
16 |   def makeContext(config: Config, contextName: String): C = {
17 |     val sparkContext = getSparkContextFactory(config)
18 |       .makeContext(config: Config, contextName: String)
19 |       .asInstanceOf[SparkContext]
20 |     makeContext(config, sparkContext)
21 |   }
22 | 
23 |   /**
24 |    * Creates SQL context for specified Spark context.
25 |    * Should be implemented by concrete SQL context factory
26 |    * @param config general configuration
27 |    * @param sparkContext underlying Spark context
28 |    * @return
29 |    */
30 |   def makeContext(config: Config, sparkContext: SparkContext): C
31 | 
32 |   /**
33 |    * Loads factory for Spark context.
34 |    * @param config general configuration as in [[JobContextFactory.getFactory()]]
35 |    * @return
36 |    */
37 |   def getSparkContextFactory(config: Config): JobContextFactory = {
38 |     val className = Try {
39 |       config.getString(SQLContextFactory.sparkContextFactoryClassNameConfigEntry)
40 |     }.getOrElse(JobContextFactory.defaultFactoryClassName)
41 |     JobContextFactory.getFactory(className)
42 |   }
43 | }
44 | 
45 | object SQLContextFactory {
46 |   val sparkContextFactoryClassNameConfigEntry = "context.spark-context-factory"
47 | }
48 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/main/scala/context/SparkSQLContextFactory.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.{ContextLike, SparkJobBase, SparkSqlJob}
 4 | import com.typesafe.config.Config
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.sql.SQLContext
 7 | import org.slf4j.LoggerFactory
 8 | 
 9 | /**
10 |  * Factory which creates simple SQL context.
11 |  */
12 | class SparkSQLContextFactory extends SQLContextFactory {
13 |   type C = SQLContext with ContextLike
14 |   val logger = LoggerFactory.getLogger(getClass)
15 | 
16 |   def makeContext(config: Config, sc: SparkContext): C = {
17 |     logger.info(s"Creating SQL context for Spark context $sc.")
18 |     new SQLContext(sc) with ContextLike {
19 |       val contextClass = classOf[SQLContext].getName
20 |       def isValidJob(job: SparkJobBase) = job.isInstanceOf[SparkSqlJob]
21 |       def stop() = sparkContext.stop()
22 |     }
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/test/scala/context/HiveContextFactorySpec.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.ContextLike
 4 | import com.typesafe.config.ConfigFactory
 5 | import org.apache.spark.sql.hive.HiveContext
 6 | import org.junit.runner.RunWith
 7 | import org.scalatest._
 8 | import org.scalatest.junit.JUnitRunner
 9 | 
10 | import scala.util.Try
11 | 
12 | /**
13 |  * Test suite for [[HiveContextFactory]].
14 |  */
15 | @RunWith(classOf[JUnitRunner])
16 | class HiveContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter {
17 |   type C = HiveContext with ContextLike
18 |   
19 |   var hiveContext: C = _
20 |   
21 |   val hiveContextFactory = new HiveContextFactory()
22 |   
23 |   // Clean Spark context after each test
24 |   after {
25 |     Try{ hiveContext.stop() }
26 |   }
27 | 
28 |   "HiveContextFactory" should {
29 |     "create Hive context" in {
30 |       hiveContext = hiveContextFactory.makeContext(config, this.getClass.getName)
31 |       hiveContext.sparkContext.appName mustEqual this.getClass.getName
32 |     }
33 | 
34 |     "stop underlying Spark context if context is stopped" in {
35 |       hiveContext = hiveContextFactory.makeContext(config, "context1")
36 |       hiveContext.stop()
37 |       hiveContext = hiveContextFactory.makeContext(config, "context2")
38 |       hiveContext.sparkContext.appName mustEqual "context2"
39 |     }
40 |   }
41 | 
42 |   val config = ConfigFactory.parseString(
43 |     """
44 |       |{
45 |       |  context.jars = [],
46 |       |  spark.master = "local"
47 |       |}
48 |     """.stripMargin)
49 | }
50 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/test/scala/context/SQLContextFactorySpec.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.{ContextLike, SparkJobBase}
 4 | import com.typesafe.config.{Config, ConfigFactory}
 5 | import org.apache.spark.SparkContext
 6 | import org.apache.spark.sql.SQLContext
 7 | import org.apache.spark.sql.hive.HiveContext
 8 | import org.junit.runner.RunWith
 9 | import org.scalatest._
10 | import org.scalatest.junit.JUnitRunner
11 | import utils.ContextUtils.configToSparkConf
12 | 
13 | import scala.util.Try
14 | 
15 | trait FakeContext
16 | 
17 | class FakeJobContextFactory extends JobContextFactory {
18 |   type C = ContextLike
19 |   def makeContext(config: Config, contextName: String): ContextLike = {
20 |     val sparkConf = configToSparkConf(config, contextName)
21 |     new SparkContext(sparkConf) with ContextLike with FakeContext {
22 |       val contextClass = classOf[FakeContext].getName
23 |       override def isValidJob(job: SparkJobBase): Boolean = true
24 |       override def sparkContext: SparkContext = this
25 |     }
26 |   }
27 | }
28 | 
29 | /**
30 |  * Test suite for [[SQLContextFactory]].
31 |  */
32 | @RunWith(classOf[JUnitRunner])
33 | class SQLContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter {
34 |   type C <: ContextLike
35 |   
36 |   var sqlContext: C = _
37 |   
38 |   // Clean up Spark context after each test
39 |   after {
40 |     Try{ sqlContext.stop() }
41 |   }
42 | 
43 |   "SQLContextFactory" should {
44 |     "create SQL context" in {
45 |       sqlContext = JobContextFactory.makeContext(sqlContextFactoryConfig, "test").asInstanceOf[C]
46 |       sqlContext.isInstanceOf[SQLContext] mustEqual true
47 |       sqlContext.sparkContext.isInstanceOf[SparkContext] mustEqual true
48 |       sqlContext.sparkContext.appName mustEqual "test"
49 |     }
50 | 
51 |     "create SQL context on top of specified Spark context factory" in {
52 |       sqlContext = JobContextFactory.makeContext(hiveSqlFactoryWithCustomSparkContextConfig, "test").asInstanceOf[C]
53 |       sqlContext.isInstanceOf[HiveContext] mustEqual true
54 |       sqlContext.sparkContext.isInstanceOf[FakeContext] mustEqual true
55 |       sqlContext.sparkContext.appName mustEqual "test"
56 |     }
57 |   }
58 | 
59 |   val sqlContextFactoryConfig = ConfigFactory.parseString(
60 |     """
61 |       |{
62 |       |  context.jars = [],
63 |       |  context.job-context-factory = "context.SparkSQLContextFactory"
64 |       |  spark.master = "local",
65 |       |  spark.app.id = "test"
66 |       |}
67 |     """.stripMargin).resolve()
68 | 
69 |   val hiveSqlFactoryWithCustomSparkContextConfig = ConfigFactory.parseString(
70 |     """
71 |       |{
72 |       |  context.jars = [],
73 |       |  context.job-context-factory = "context.HiveContextFactory"
74 |       |  context.spark-context-factory = "context.FakeJobContextFactory"
75 |       |  spark.master = "local",
76 |       |  spark.app.id = "test"
77 |       |}
78 |     """.stripMargin).resolve()
79 | }
80 | 


--------------------------------------------------------------------------------
/spark-job-rest-sql/src/test/scala/context/SparkSQLContextFactorySpec.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.ContextLike
 4 | import com.typesafe.config.ConfigFactory
 5 | import org.apache.spark.sql.SQLContext
 6 | import org.junit.runner.RunWith
 7 | import org.scalatest._
 8 | import org.scalatest.junit.JUnitRunner
 9 | 
10 | import scala.util.Try
11 | 
12 | /**
13 |  * Test suite for [[HiveContextFactory]].
14 |  */
15 | @RunWith(classOf[JUnitRunner])
16 | class SparkSQLContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter {
17 |   type C = SQLContext with ContextLike
18 |   
19 |   var sqlContext: C = _
20 |   
21 |   val sqlContextFactory = new SparkSQLContextFactory()
22 |   
23 |   // Clean up Spark context after each test
24 |   after {
25 |     Try{ sqlContext.stop() }
26 |   }
27 | 
28 |   "SQLContextFactory" should {
29 |     "create SQL context" in {
30 |       sqlContext = sqlContextFactory.makeContext(config, this.getClass.getName)
31 |       sqlContext.sparkContext.appName mustEqual this.getClass.getName
32 |     }
33 | 
34 |     "stop underlying Spark context if context is stopped" in {
35 |       sqlContext = sqlContextFactory.makeContext(config, "context1")
36 |       sqlContext.stop()
37 |       sqlContext = sqlContextFactory.makeContext(config, "context2")
38 |       sqlContext.sparkContext.appName mustEqual "context2"
39 |     }
40 |   }
41 | 
42 |   val config = ConfigFactory.parseString(
43 |     """
44 |       |{
45 |       |  context.jars = [],
46 |       |  spark.master = "local"
47 |       |}
48 |     """.stripMargin)
49 | }
50 | 


--------------------------------------------------------------------------------
/spark-job-rest/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 
  5 | 	<groupId>com.xpatterns</groupId>
  6 | 	<artifactId>spark-job-rest</artifactId>
  7 | 	<version>0.3.2</version>
  8 | 	<packaging>jar</packaging>
  9 | 
 10 | 	<name>spark-job-rest</name>
 11 | 	<url>http://maven.apache.org</url>
 12 | 
 13 | 	<repositories>
 14 | 		<repository>
 15 | 			<id>mvnrepository</id>
 16 | 			<url>http://repo1.maven.org/maven2</url>
 17 | 		</repository>
 18 | 		<repository>
 19 | 			<id>cloudera-repo-releases</id>
 20 | 			<url>https://repository.cloudera.com/artifactory/repo/</url>
 21 | 		</repository>
 22 | 		<repository>
 23 | 			<id>Akka repository</id>
 24 | 			<url>http://repo.akka.io/releases</url>
 25 | 		</repository>
 26 | 
 27 | 	</repositories>
 28 | 	<properties>
 29 | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 30 | 		<org.springframework.version>3.0.5.RELEASE</org.springframework.version>
 31 | 		<spray.json.version>1.2.3</spray.json.version>
 32 | 		<scala.version>2.10.3</scala.version>
 33 | 		<scala.version.alias>2.10</scala.version.alias>
 34 | 		<akka.version>2.3.4</akka.version>
 35 | 		<spray.io.version>1.3.1</spray.io.version>
 36 | 	</properties>
 37 | 
 38 | 	<dependencies>
 39 | 
 40 |         <dependency>
 41 |             <groupId>org.apache.spark</groupId>
 42 |             <artifactId>spark-core_2.10</artifactId>
 43 |             <version>1.3.1</version>
 44 |             <scope>provided</scope>
 45 |         </dependency>
 46 | 
 47 |         <dependency>
 48 |             <groupId>com.xpatterns</groupId>
 49 |             <artifactId>spark-job-rest-api</artifactId>
 50 |             <version>${version}</version>
 51 |         </dependency>
 52 | 
 53 |         <dependency>
 54 |             <groupId>com.google.code.gson</groupId>
 55 |             <artifactId>gson</artifactId>
 56 |             <version>2.3.1</version>
 57 |         </dependency>
 58 | 
 59 | 
 60 |         <dependency>
 61 | 			<groupId>io.spray</groupId>
 62 | 			<artifactId>spray-client</artifactId>
 63 | 			<version>${spray.io.version}</version>
 64 | 		</dependency>
 65 | 
 66 | 		<dependency>
 67 | 			<groupId>io.spray</groupId>
 68 | 			<artifactId>spray-routing</artifactId>
 69 | 			<version>${spray.io.version}</version>
 70 | 		</dependency>
 71 | 
 72 | 		<dependency>
 73 | 			<groupId>io.spray</groupId>
 74 | 			<artifactId>spray-can</artifactId>
 75 | 			<version>${spray.io.version}</version>
 76 | 		</dependency>
 77 | 
 78 | 		<dependency>
 79 | 			<groupId>io.spray</groupId>
 80 | 			<artifactId>spray-caching</artifactId>
 81 | 			<version>${spray.io.version}</version>
 82 | 		</dependency>
 83 | 
 84 | 		<dependency>
 85 | 			<groupId>com.google.code.findbugs</groupId>
 86 | 			<artifactId>jsr305</artifactId>
 87 | 			<version>2.0.3</version>
 88 | 		</dependency>
 89 | 
 90 | 		<dependency>
 91 | 			<groupId>com.fasterxml.jackson.core</groupId>
 92 | 			<artifactId>jackson-annotations</artifactId>
 93 | 			<version>2.4.4</version>
 94 | 		</dependency>
 95 | 
 96 | 		<dependency>
 97 | 			<groupId>commons-cli</groupId>
 98 | 			<artifactId>commons-cli</artifactId>
 99 | 			<version>1.2</version>
100 | 		</dependency>
101 | 
102 | 		<dependency>
103 | 			<groupId>log4j</groupId>
104 | 			<artifactId>log4j</artifactId>
105 | 			<version>1.2.17</version>
106 | 		</dependency>
107 | 
108 |         <dependency>
109 |             <groupId>com.typesafe</groupId>
110 |             <artifactId>config</artifactId>
111 |             <version>1.2.1</version>
112 |         </dependency>
113 | 
114 |         <dependency>
115 |             <groupId>joda-time</groupId>
116 |             <artifactId>joda-time</artifactId>
117 |             <version>2.7</version>
118 |         </dependency>
119 | 
120 |         <dependency>
121 |             <groupId>org.joda</groupId>
122 |             <artifactId>joda-convert</artifactId>
123 |             <version>1.7</version>
124 |         </dependency>
125 | 
126 |         <!--Http client-->
127 |         <dependency>
128 |             <groupId>org.apache.httpcomponents</groupId>
129 |             <artifactId>httpclient</artifactId>
130 |             <version>4.3.5</version>
131 |         </dependency>
132 | 
133 |         <dependency>
134 |             <groupId>org.apache.httpcomponents</groupId>
135 |             <artifactId>httpcore</artifactId>
136 |             <version>4.3.2</version>
137 |         </dependency>
138 | 
139 |         <!-- Test dependencies -->
140 |         <dependency>
141 |             <groupId>io.spray</groupId>
142 |             <artifactId>spray-testkit</artifactId>
143 |             <version>1.2.1</version>
144 |             <exclusions>
145 |                 <exclusion>
146 |                     <groupId>com.typesafe</groupId>
147 |                     <artifactId>config</artifactId>
148 |                 </exclusion>
149 |                 <exclusion>
150 |                     <artifactId>akka-actor_2.10</artifactId>
151 |                     <groupId>com.typesafe.akka</groupId>
152 |                 </exclusion>
153 |             </exclusions>
154 | 			<scope>test</scope>
155 |         </dependency>
156 | 
157 | 		<dependency>
158 | 			<groupId>com.typesafe.akka</groupId>
159 | 			<artifactId>akka-testkit_2.10</artifactId>
160 | 			<version>${akka.version}</version>
161 | 			<scope>test</scope>
162 | 		</dependency>
163 | 
164 | 		<dependency>
165 | 			<groupId>junit</groupId>
166 | 			<artifactId>junit</artifactId>
167 | 			<version>4.4</version>
168 | 			<scope>test</scope>
169 | 		</dependency>
170 | 
171 |         <dependency>
172 |             <groupId>org.scalatest</groupId>
173 |             <artifactId>scalatest_2.10</artifactId>
174 |             <version>2.2.4</version>
175 |             <scope>test</scope>
176 |         </dependency>
177 | 
178 |         <dependency>
179 |             <groupId>com.xpatterns</groupId>
180 |             <artifactId>spark-job-rest-client</artifactId>
181 |             <version>${version}</version>
182 | 			<scope>test</scope>
183 |         </dependency>
184 | 	</dependencies>
185 | 
186 | 	<build>
187 | 		<resources>
188 | 			<resource>
189 | 				<directory>src/main/resources</directory>
190 | 				<excludes>
191 | 					<exclude>deploy.conf</exclude>
192 | 					<exclude>deploy-settings.sh</exclude>
193 | 				</excludes>
194 | 				<filtering>false</filtering>
195 | 			</resource>
196 | 		</resources>
197 | 		<finalName>${project.artifactId}</finalName>
198 | 		<pluginManagement>
199 | 			<plugins>
200 | 				<plugin>
201 | 					<groupId>org.scala-tools</groupId>
202 | 					<artifactId>maven-scala-plugin</artifactId>
203 | 					<version>2.15.2</version>
204 | 				</plugin>
205 | 				<plugin>
206 | 					<groupId>org.apache.maven.plugins</groupId>
207 | 					<artifactId>maven-compiler-plugin</artifactId>
208 | 					<version>2.0.2</version>
209 | 				</plugin>
210 | 			</plugins>
211 | 		</pluginManagement>
212 | 		<plugins>
213 | 			<plugin>
214 | 				<groupId>org.apache.maven.plugins</groupId>
215 | 				<artifactId>maven-compiler-plugin</artifactId>
216 | 				<version>3.2</version>
217 | 				<configuration>
218 | 					<source>1.7</source>
219 | 					<target>1.7</target>
220 | 				</configuration>
221 | 			</plugin>
222 | 			<plugin>
223 | 				<groupId>org.scala-tools</groupId>
224 | 				<artifactId>maven-scala-plugin</artifactId>
225 | 				<executions>
226 | 					<execution>
227 | 						<id>scala-compile-first</id>
228 | 						<phase>process-resources</phase>
229 | 						<goals>
230 | 							<goal>add-source</goal>
231 | 							<goal>compile</goal>
232 | 						</goals>
233 | 					</execution>
234 | 					<execution>
235 | 						<id>scala-test-compile</id>
236 | 						<phase>process-test-resources</phase>
237 | 						<goals>
238 | 							<goal>testCompile</goal>
239 | 						</goals>
240 | 					</execution>
241 | 				</executions>
242 | 			</plugin>
243 | 
244 | 			<plugin>
245 | 				<groupId>org.apache.maven.plugins</groupId>
246 | 				<artifactId>maven-shade-plugin</artifactId>
247 | 				<version>2.2</version>
248 | 
249 | 				<executions>
250 | 					<execution>
251 | 						<phase>package</phase>
252 | 						<goals>
253 | 							<goal>shade</goal>
254 | 						</goals>
255 | 						<configuration>
256 |                             <transformers>
257 |                                 <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
258 |                                     <resource>reference.conf</resource>
259 |                                 </transformer>
260 |                             </transformers>
261 | 							<filters>
262 | 								<filter>
263 | 									<artifact>*:*</artifact>
264 | 									<excludes>
265 | 										<exclude>META-INF/*.SF</exclude>
266 | 										<exclude>META-INF/*.DSA</exclude>
267 | 										<exclude>META-INF/*.RSA</exclude>
268 | 									</excludes>
269 | 								</filter>
270 | 							</filters>
271 | 						</configuration>
272 | 
273 | 					</execution>
274 | 				</executions>
275 | 			</plugin>
276 | 			<plugin>
277 | 				<groupId>org.codehaus.mojo</groupId>
278 | 				<artifactId>appassembler-maven-plugin</artifactId>
279 | 				<version>1.8.1</version>
280 | 				<configuration>
281 | 
282 | 					<programs>
283 | 						<program>
284 | 							<mainClass>server.Main</mainClass>
285 | 							<name>main-server.sh</name>
286 | 						</program>
287 | 					</programs>
288 | 
289 | 					<configurationSourceDirectory>src/main/resources</configurationSourceDirectory>
290 | 					<configurationDirectory>resources</configurationDirectory>
291 | 					<copyConfigurationDirectory>true</copyConfigurationDirectory>
292 | 					<includeConfigurationDirectoryInClasspath>true</includeConfigurationDirectoryInClasspath>
293 | 					<assembleDirectory>${project.build.directory}/temp_build</assembleDirectory>
294 | 
295 | 				</configuration>
296 | 				<executions>
297 | 					<execution>
298 | 						<phase>package</phase>
299 | 						<goals>
300 | 							<goal>assemble</goal>
301 | 						</goals>
302 | 					</execution>
303 | 				</executions>
304 | 			</plugin>
305 | 			<plugin>
306 | 				<groupId>org.apache.maven.plugins</groupId>
307 | 				<artifactId>maven-antrun-plugin</artifactId>
308 | 				<executions>
309 | 					<execution>
310 | 						<phase>package</phase>
311 | 						<goals>
312 | 							<goal>run</goal>
313 | 						</goals>
314 | 						<configuration>
315 | 							<tasks>
316 | 								<chmod file="${basedir}/target/temp_build/bin/main-server.sh"
317 | 									perm="ugo+rx" />
318 | 								<chmod file="${basedir}/src/main/scripts/start_server.sh"
319 | 									perm="ugo+rx" />
320 |                                 <chmod file="${basedir}/src/main/scripts/stop_server.sh"
321 |                                        perm="ugo+rx" />
322 |                                 <chmod file="${basedir}/src/main/scripts/restart_server.sh"
323 |                                        perm="ugo+rx" />
324 |                                 <chmod file="${basedir}/target/temp_build/resources/context_start.sh"
325 |                                        perm="ugo+rx" />
326 | 							</tasks>
327 | 						</configuration>
328 | 					</execution>
329 | 				</executions>
330 | 			</plugin>
331 | 			<plugin>
332 | 				<groupId>org.apache.maven.plugins</groupId>
333 | 				<artifactId>maven-assembly-plugin</artifactId>
334 | 				<version>2.2.1</version>
335 | 				<configuration>
336 | 					<descriptors>
337 | 						<descriptor>src/main/assembly/archive.xml</descriptor>
338 | 					</descriptors>
339 | 					<appendAssemblyId>false</appendAssemblyId>
340 | 				</configuration>
341 | 				<executions>
342 | 					<execution>
343 | 						<id>make-assembly</id>
344 | 						<phase>package</phase>
345 | 						<goals>
346 | 							<goal>single</goal>
347 | 						</goals>
348 | 					</execution>
349 | 				</executions>
350 | 			</plugin>
351 | 
352 | 			<plugin>
353 | 				<groupId>org.apache.maven.plugins</groupId>
354 | 				<artifactId>maven-surefire-plugin</artifactId>
355 | 				<version>2.7</version>
356 | 				<configuration>
357 | 					<skipTests>true</skipTests>
358 | 				</configuration>
359 | 			</plugin>
360 | 
361 | 		</plugins>
362 | 
363 | 
364 | 	</build>
365 | 
366 | </project>
367 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/assembly/archive.xml:
--------------------------------------------------------------------------------
 1 | <assembly
 2 | 	xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
 3 | 	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 | 	xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
 5 | 	<id>archive</id>
 6 | 	<formats>
 7 | 		<format>tar.gz</format>
 8 | 	</formats>
 9 | 	<fileSets>
10 | 		<fileSet>
11 | 			<directory>${project.build.directory}/temp_build</directory>
12 | 			<outputDirectory>/</outputDirectory>
13 | 		</fileSet>
14 | 		<fileSet>
15 | 			<directory>${basedir}/resources</directory>
16 | 			<outputDirectory>/resources</outputDirectory>
17 | 		</fileSet>
18 | 		<fileSet>
19 | 			<directory>${project.build.directory}</directory>
20 | 			<outputDirectory>/</outputDirectory>
21 | 			<includes>
22 | 				<include>spark-job-rest.jar</include>
23 | 			</includes>
24 | 		</fileSet>
25 | 	</fileSets>
26 | 	<files>
27 | 		<file>
28 | 			<source>${basedir}/src/main/scripts/start_server.sh</source>
29 | 			<outputDirectory>/bin/</outputDirectory>
30 | 		</file>
31 |         <file>
32 |             <source>${basedir}/src/main/scripts/stop_server.sh</source>
33 |             <outputDirectory>/bin/</outputDirectory>
34 |         </file>
35 |         <file>
36 |             <source>${basedir}/src/main/scripts/restart_server.sh</source>
37 |             <outputDirectory>/bin/</outputDirectory>
38 |         </file>
39 | 	</files>
40 | </assembly>
41 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/application.conf:
--------------------------------------------------------------------------------
 1 | # spark default configuration
 2 | spark.executor.memory=2g
 3 | spark.mesos.coarse=false
 4 | spark.scheduler.mode=FAIR
 5 | spark.cores.max=2
 6 | spark.master="local"
 7 | spark.path=${SPARK_HOME}
 8 | spark.default.parallelism=384
 9 | spark.storage.memoryFraction=0.3
10 | spark.shuffle.memoryFraction=0.6
11 | spark.shuffle.compress=true
12 | spark.shuffle.spill-compress=true
13 | spark.reducer.maxMbInFlight=48
14 | spark.akka.frameSize=100
15 | spark.akka.threads=4
16 | spark.akka.timeout=100
17 | spark.task.maxFailures=4
18 | spark.shuffle.consolidateFiles=true
19 | spark.deploy.spreadOut=true
20 | spark.shuffle.spill=false
21 | spark.kryo.referenceTracking=false
22 | 
23 | #Default Spark Driver JVM memory
24 | driver.xmxMemory = 1g
25 | 
26 | # application configuration
27 | appConf{
28 |   # This ip on which to deploy the apis
29 |   web.services.ip="0.0.0.0"
30 |   # The port on which to deploy the apis
31 |   web.services.port=8097
32 |   # Implicit akka timeout
33 |   timeout=1000000
34 |   # Remote context initialization
35 |   init {
36 |     # Implicit sleep (milliseconds) before sending init message
37 |     sleep=3000
38 |     # Tries before consider remote context as dead
39 |     tries=20
40 |     # Timeout for each attempt (milliseconds)
41 |     retry-timeout=1000
42 |     # Inteval beetween attempts to reach remote context (milliseconds)
43 |     retry-interval=1500
44 |   }
45 |   # The port where the range for actor system starts
46 |   actor.systems.first.port = 11000
47 |   # The port where the range for spark ui starts
48 |   spark.ui.first.port = 16000
49 |   # The path to the folder where to keep the jars
50 |   jars.path = ${JAR_PATH}
51 | }
52 | 
53 | context{
54 |   # Path to context process work directory
55 |   contexts-base-dir = ${CONTEXTS_BASE_DIR}
56 |   # Amount of jobs which can be executed on context in parallel. Zero means infinit concurency.
57 |   cuncurrent-jobs = 0
58 |   # Context factory that will be dynamically loaded to instantiate job context
59 |   job-context-factory = "context.HiveContextFactory"
60 |   # Spark context factory that will be used for non-Spark job contexts (SQL or Hive)
61 |   spark-context-factory = "context.SparkContextFactory"
62 | }
63 | 
64 | manager {
65 |   akka {
66 |     log-dead-letters = 1
67 |     actor {
68 |       provider = "akka.remote.RemoteActorRefProvider"
69 |     }
70 |     remote {
71 |       log-remote-lifecycle-events = off
72 |       enabled-transports = ["akka.remote.netty.tcp"]
73 |       log-sent-messages = on
74 |       log-received-messages = on
75 |       netty.tcp {
76 |         transport-class = "akka.remote.transport.netty.NettyTransport"
77 |         hostname = "127.0.0.1"
78 |         port = 4042
79 |         maximum-frame-size = 256000b
80 |       }
81 |     }
82 |   }
83 | 
84 |   spray.can.server {
85 |     # uncomment the next line for making this an HTTPS example
86 |     # ssl-encryption = on
87 |     idle-timeout = 61 s
88 |     request-timeout = 60 s
89 |     parsing.max-content-length = 200m
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/context_start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to start the job server
 3 | set -e
 4 | 
 5 | get_abs_script_path() {
 6 |   pushd . >/dev/null
 7 |   cd $(dirname $0)
 8 |   appdir=$(pwd)
 9 |   popd  >/dev/null
10 | }
11 | get_abs_script_path
12 | 
13 | parentdir="$(dirname "$appdir")"
14 | 
15 | classpathParam=$1
16 | contextName=$2
17 | port=$3
18 | xmxMemory=$4
19 | processDir=$5
20 | 
21 | echo "classpathParam = $classpathParam"
22 | echo "contextName = $contextName"
23 | echo "port = $port"
24 | 
25 | 
26 | GC_OPTS="-XX:+UseConcMarkSweepGC
27 |          -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out
28 |          -XX:MaxPermSize=512m
29 |          -XX:+CMSClassUnloadingEnabled"
30 | 
31 | JAVA_OPTS="-Xmx$xmxMemory -XX:MaxDirectMemorySize=512M
32 |            -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true
33 |            -Dcom.sun.management.jmxremote.authenticate=false
34 |            -Dcom.sun.management.jmxremote.ssl=false"
35 | 
36 | MAIN="server.MainContext"
37 | 
38 | if [ -f "$appdir/settings.sh" ]; then
39 |   . $appdir/settings.sh
40 | else
41 |   echo "Missing $appdir/settings.sh, exiting"
42 |   exit 1
43 | fi
44 | 
45 | if [ -z "$SPARK_HOME" ]; then
46 |   echo "Please set SPARK_HOME or put it in $appdir/settings.sh first"
47 |   exit 1
48 | fi
49 | 
50 | # Pull in other env vars in spark config, such as MESOS_NATIVE_LIBRARY
51 | . $SPARK_CONF_HOME/spark-env.sh
52 | 
53 | mkdir -p $LOG_DIR
54 | 
55 | LOGGING_OPTS="-Dlog4j.configuration=log4j.properties
56 |               -DLOG_DIR=$LOG_DIR
57 |               -DLOG_FILE=$contextName.log"
58 | 
59 | # For Mesos
60 | #CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI "
61 | # For Mesos/Marathon, use the passed-in port
62 | if [ "$PORT" != "" ]; then
63 |   CONFIG_OVERRIDES+="-Dspark.jobserver.port=$PORT "
64 | fi
65 | 
66 | # The following should be exported in order to be accessible in Config substitutions
67 | export SPARK_HOME
68 | export APP_DIR
69 | export JAR_PATH
70 | export CONTEXTS_BASE_DIR
71 | 
72 | # job server jar needs to appear first so its deps take higher priority
73 | # need to explicitly include app dir in classpath so logging configs can be found
74 | CLASSPATH="$parentdir/resources:$appdir:$parentdir/spark-job-rest.jar:$classpathParam:$EXTRA_CLASSPATH:$($SPARK_HOME/bin/compute-classpath.sh)"
75 | echo "CLASSPATH = ${CLASSPATH}"
76 | 
77 | # Create context process directory
78 | mkdir -p "$processDir"
79 | 
80 | cd "$processDir"
81 | exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $contextName $port
82 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | #
 3 | #   LOG4J CONFIG FILE
 4 | #
 5 | #    Possible Log Levels:
 6 | #      FATAL, ERROR, WARN, INFO, DEBUG
 7 | #
 8 | #------------------------------------------------------------------------------
 9 | 
10 | log4j.rootCategory = INFO, defaultFile
11 | 
12 | #------------------------------------------------------------------------------
13 | #
14 | #  The following properties configure the Daily Rolling File appender.
15 | #  See http://logging.apache.org/log4j/docs/api/index.html for details.
16 | #
17 | #------------------------------------------------------------------------------
18 | 
19 | log4j.appender.defaultFile = org.apache.log4j.DailyRollingFileAppender
20 | log4j.appender.defaultFile.File = ${LOG_DIR}/${LOG_FILE}
21 | log4j.appender.defaultFile.Append = true
22 | log4j.appender.defaultFile.DatePattern = '.'yyyy-MM-dd
23 | log4j.appender.defaultFile.layout = org.apache.log4j.PatternLayout
24 | log4j.appender.defaultFile.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/settings.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
 4 | APP_DIR="${CDIR}/../"
 5 | DEPLOY_CONFIG="${CDIR}/deploy-settings.sh"
 6 | 
 7 | # Load optional deployment settings
 8 | if [ -f "${DEPLOY_CONFIG}" ]; then
 9 |     source "${DEPLOY_CONFIG}"
10 | fi
11 | 
12 | if [ -z "${SPARK_HOME}" ]; then
13 |     SPARK_HOME="/opt/spark"
14 | fi
15 | 
16 | if [ -z "${SPARK_CONF_HOME}" ]; then
17 |     SPARK_CONF_HOME=$SPARK_HOME/conf
18 | fi
19 | 
20 | # Only needed for Mesos deploys
21 | #SPARK_EXECUTOR_URI=/home/spark/spark-1.1.0.tar.gz
22 | 
23 | # Logging directory
24 | LOG_DIR=${SJR_LOG_DIR-"${APP_DIR}/logs"}
25 | 
26 | # Extra classes:
27 | EXTRA_CLASSPATH="${JSR_EXTRA_CLASSPATH}"
28 | 
29 | # Set proper jar path
30 | JAR_PATH=${SJR_JAR_PATH-"${APP_DIR}/jars"}
31 | 
32 | # Root location for contexts process directories
33 | CONTEXTS_BASE_DIR=${SJR_CONTEXTS_BASE_DIR-"${APP_DIR}/contexts"}


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/fonts/bootstrap/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/img/halftone.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/halftone.png


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/img/loading-sm.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/loading-sm.gif


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/assets/img/loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/VeritoneAlpha/spark-job-rest/9ab6629141361bad3acbbad61b347d3227fa9ed0/spark-job-rest/src/main/resources/webapp/assets/img/loading.gif


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Spark-Job-Rest</title>
  6 |     <link rel="stylesheet" href="assets/css/style.css">
  7 | </head>
  8 | <body>
  9 |     <div id="wrapper">
 10 |         <!-- Navigation -->
 11 |         <nav class="navbar navbar-default navbar-static-top" role="navigation" style="margin-bottom: 0">
 12 |             <div class="navbar-header">
 13 |                 <a class="navbar-brand" href="index.html">Spark-Job-Rest</a>
 14 |             </div>
 15 |             <!-- /.navbar-header -->
 16 |             <div class="navbar-default sidebar" role="navigation">
 17 |                 <div class="sidebar-nav navbar-collapse">
 18 |                     <ul class="nav" id="navTabs">
 19 |                         <li class="active" role="presentation"><a aria-controls="contexts" href="#contexts" data-toggle="tab">Contexts</a></li>
 20 |                         <li role="presentation"><a aria-controls="jobs" href="#jobs" data-toggle="tab">Jobs</a></li>
 21 |                         <li role="presentation"><a aria-controls="jars" href="#jars" data-toggle="tab">Jars</a></li>
 22 |                     </ul>
 23 |                 </div>
 24 |                 <!-- /.sidebar-collapse -->
 25 |             </div>
 26 |             <!-- /.navbar-static-side -->
 27 |         </nav>
 28 | 
 29 |         <div id="page-wrapper">
 30 |             <div class="row">
 31 |                 <div class="col-md-12">
 32 | 
 33 |                     <!-- Tab panes -->
 34 |                     <div id="navContents" class="tab-content">
 35 |                         <div role="tabpanel" class="tab-pane active" id="contexts">
 36 | 
 37 |                             <div id="contextsTable">
 38 |                                 <table class="table table-striped">
 39 |                                     <thead>
 40 |                                       <tr>
 41 |                                         <th>Context Name</th>
 42 |                                         <th>Spark UI Port</th>
 43 |                                         <th></th>
 44 |                                       </tr>
 45 |                                     </thead>
 46 |                                     <tbody>
 47 | 
 48 |                                     </tbody>
 49 |                                 </table>
 50 |                                 <div class="clearfix">
 51 |                                     <button data-target="#addContext" data-toggle="modal" class="btn btn-success btn-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Add context</button>
 52 |                                 </div>
 53 |                             </div>
 54 |                         </div>
 55 | 
 56 |                         <div role="tabpanel" class="tab-pane" id="jobs">
 57 |                             <div id="jobsTable">
 58 |                                 <table class="table table-striped">
 59 |                                     <thead>
 60 |                                       <tr>
 61 |                                         <th>Job ID</th>
 62 |                                         <th>Context Name</th>
 63 |                                         <th>Status</th>
 64 |                                         <th></th>
 65 |                                       </tr>
 66 |                                     </thead>
 67 |                                     <tbody>
 68 | 
 69 |                                     </tbody>
 70 |                                 </table>
 71 |                                 <div class="clearfix">
 72 |                                     <button data-target="#runJob" data-toggle="modal" class="btn btn-success btn-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Run job</button>
 73 |                                 </div>
 74 |                             </div>
 75 |                         </div>
 76 | 
 77 |                         <div role="tabpanel" class="tab-pane" id="jars">
 78 |                             <div id="jarsTable">
 79 |                                 <table class="table table-striped">
 80 |                                     <thead>
 81 |                                       <tr>
 82 |                                         <th>Name</th>
 83 |                                         <th>Size</th>
 84 |                                         <th>Time</th>
 85 |                                         <th></th>
 86 |                                       </tr>
 87 |                                     </thead>
 88 |                                     <tbody>
 89 | 
 90 |                                     </tbody>
 91 |                                 </table>
 92 |                                 <div class="clearfix">
 93 |                                     <button data-target="#uploadJar" data-toggle="modal" class="btn btn-success btn-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Upload jar</button>
 94 |                                 </div>
 95 |                             </div>
 96 |                         </div>
 97 | 
 98 |                     </div>
 99 | 
100 |                 </div>
101 |             </div>
102 |         </div>
103 |     </div>
104 | 
105 |     <!-- start addContext -->
106 |     <div id="addContext" class="modal fade" role="dialog"  aria-hidden="true">
107 |         <div class="modal-dialog">
108 |             <div class="modal-content">
109 |                 <div class="modal-header">
110 |                     <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
111 |                 <h4 class="modal-title">Add context</h4>
112 |             </div>
113 |             <div class="modal-body">
114 |                 <form role="form">
115 |                     <div class="form-group">
116 |                         <label for="ctx_name">Name:</label>
117 |                         <input type="text" class="form-control" id="ctx_name" placeholder="Enter context name">
118 |                     </div>
119 | 
120 | 
121 |                     <div class="form-group">
122 |                         <label for="ctx_jar">Jar:</label>
123 | 
124 |                         <div class="input-group">
125 |                           <input type="text" class="form-control" id="ctx_jar" placeholder="Enter context jar">
126 |                           <div class="input-group-btn">
127 |                             <button id="jarDropdownToggle" aria-expanded="false" data-toggle="dropdown" class="btn btn-default dropdown-toggle" type="button">
128 |                               <span class="caret"></span>
129 |                               <span class="sr-only">Toggle Dropdown</span>
130 |                             </button>
131 |                             <button class="btn btn-success jar-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Add jar</button>
132 | 
133 | 
134 |                             <ul id="jarSuggestList" class="dropdown-menu dropdown-menu-right" role="menu">
135 | 
136 |                             </ul>
137 |                           </div><!-- /btn-group -->
138 |                         </div><!-- /input-group -->
139 |                     </div>
140 | 
141 |                     <table id="ctxJarsTable" class="table table-striped">
142 |                         <tbody>
143 | 
144 |                         </tbody>
145 |                     </table>
146 | 
147 |                     <div class="form-group">
148 |                         <label for="ctx_param">Parameter:</label>
149 |                         <div class="input-group">
150 |                             <input type="text" class="form-control" id="ctx_param" placeholder="Enter context parameter">
151 |                             <span class="input-group-btn">
152 |                                 <button class="btn btn-success param-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Add parameter</button>
153 |                             </span>
154 |                         </div><!-- /input-group -->
155 |                     </div>
156 |                     <table id="ctxParamsTable" class="table table-striped">
157 |                         <tbody>
158 | 
159 |                         </tbody>
160 |                     </table>
161 | 
162 |                 </form>
163 |             </div>
164 |             <div class="modal-footer">
165 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
166 |                 <button type="button" class="btn btn-primary btn-save">Add context</button>
167 |             </div>
168 |           </div><!-- /.modal-content -->
169 |         </div><!-- /.modal-dialog -->
170 |     </div><!-- /.modal -->
171 |     <!-- end addContext -->
172 | 
173 | 
174 |     <!-- start runJob -->
175 |     <div id="runJob" class="modal fade" role="dialog"  aria-hidden="true">
176 |         <div class="modal-dialog">
177 |             <div class="modal-content">
178 |                 <div class="modal-header">
179 |                     <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
180 |                 <h4 class="modal-title">Run job</h4>
181 |             </div>
182 |             <div class="modal-body">
183 |                 <form role="form">
184 |                     <div class="form-group">
185 |                         <label for="job_class">Job Class:</label>
186 |                         <input type="text" class="form-control" id="job_class" placeholder="Enter job class">
187 |                     </div>
188 | 
189 | 
190 |                     <div class="form-group">
191 |                         <label for="ctxSuggestList">Context:</label>
192 |                         <select class="form-control" id="ctxSuggestList">
193 |                         </select>
194 | 
195 |                     </div>
196 | 
197 |                     <div class="form-group">
198 |                         <label for="ctx_param">Parameter:</label>
199 |                         <div class="input-group">
200 |                             <input type="text" class="form-control" id="job_param" placeholder="Enter job parameter">
201 |                             <span class="input-group-btn">
202 |                                 <button class="btn btn-success param-add" type="button"><span aria-hidden="true" class="glyphicon glyphicon-plus-sign"></span> Add parameter</button>
203 |                             </span>
204 |                         </div><!-- /input-group -->
205 |                     </div>
206 |                     <table id="jobParamsTable" class="table table-striped">
207 |                         <tbody>
208 | 
209 |                         </tbody>
210 |                     </table>
211 | 
212 |                 </form>
213 |             </div>
214 |             <div class="modal-footer">
215 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
216 |                 <button type="button" class="btn btn-primary btn-save">Run job</button>
217 |             </div>
218 |           </div><!-- /.modal-content -->
219 |         </div><!-- /.modal-dialog -->
220 |     </div><!-- /.modal -->
221 |     <!-- end runJob -->
222 | 
223 | 
224 |     <!-- start uploadJar -->
225 |     <div id="uploadJar" class="modal fade" role="dialog"  aria-hidden="true">
226 |         <div class="modal-dialog">
227 |             <div class="modal-content">
228 |                 <div class="modal-header">
229 |                     <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
230 |                 <h4 class="modal-title">Upload jar</h4>
231 |             </div>
232 |             <div class="modal-body">
233 |                 <form enctype="multipart/form-data">
234 |                     <div class="form-group">
235 |                         <input id="uploadJarInput" type="file">
236 |                     </div>
237 |                 </form>
238 |             </div>
239 |             <div class="modal-footer">
240 |                 <button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
241 |                 <button type="button" class="btn btn-primary btn-save">Upload jar</button>
242 |             </div>
243 |           </div><!-- /.modal-content -->
244 |         </div><!-- /.modal-dialog -->
245 |     </div><!-- /.modal -->
246 |     <!-- end uploadJar -->
247 | 
248 |     <div id="ajaxLoader"></div>
249 | 
250 |     <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
251 |     <script src="js/bootstrap.min.js"></script>
252 |     <script src="js/bootstrap-notify.min.js"></script>
253 |     <script src="js/fileinput.min.js"></script>
254 |     <script src="js/spin.min.js"></script>
255 |     <script src="js/behaviour.js"></script>
256 | </body>
257 | </html>
258 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/js/bootstrap-notify.min.js:
--------------------------------------------------------------------------------
1 | /* Project: Bootstrap Growl = v3.0.2 | Description: Turns standard Bootstrap alerts into "Growl-like" notifications. | Author: Mouse0270 aka Robert McIntosh | License: MIT License | Website: https://github.com/mouse0270/bootstrap-growl */
2 | !function(t){"function"==typeof define&&define.amd?define(["jquery"],t):t("object"==typeof exports?require("jquery"):jQuery)}(function(t){function e(e,i,n){var i={content:{message:"object"==typeof i?i.message:i,title:i.title?i.title:"",icon:i.icon?i.icon:"",url:i.url?i.url:"#",target:i.target?i.target:"-"}};n=t.extend(!0,{},i,n),this.settings=t.extend(!0,{},s,n),this._defaults=s,"-"==this.settings.content.target&&(this.settings.content.target=this.settings.url_target),this.animations={start:"webkitAnimationStart oanimationstart MSAnimationStart animationstart",end:"webkitAnimationEnd oanimationend MSAnimationEnd animationend"},"number"==typeof this.settings.offset&&(this.settings.offset={x:this.settings.offset,y:this.settings.offset}),this.init()}var s={element:"body",position:null,type:"info",allow_dismiss:!0,newest_on_top:!1,showProgressbar:!1,placement:{from:"top",align:"right"},offset:20,spacing:10,z_index:1031,delay:5e3,timer:1e3,url_target:"_blank",mouse_over:null,animate:{enter:"animated fadeInDown",exit:"animated fadeOutUp"},onShow:null,onShown:null,onClose:null,onClosed:null,icon_type:"class",template:'<div data-notify="container" class="col-xs-11 col-sm-4 alert alert-{0}" role="alert"><button type="button" aria-hidden="true" class="close" data-notify="dismiss">&times;</button><span data-notify="icon"></span> <span data-notify="title">{1}</span> <span data-notify="message">{2}</span><div class="progress" data-notify="progressbar"><div class="progress-bar progress-bar-{0}" role="progressbar" aria-valuenow="0" aria-valuemin="0" aria-valuemax="100" style="width: 0%;"></div></div><a href="{3}" target="{4}" data-notify="url"></a></div>'};String.format=function(){for(var t=arguments[0],e=1;e<arguments.length;e++)t=t.replace(RegExp("\\{"+(e-1)+"\\}","gm"),arguments[e]);return t},t.extend(e.prototype,{init:function(){var t=this;this.buildNotify(),this.settings.content.icon&&this.setIcon(),"#"!=this.settings.content.url&&this.styleURL(),this.placement(),this.bind(),this.notify={$ele:this.$ele,update:function(e,s){var i={};"string"==typeof e?i[e]=s:i=e;for(var e in i)switch(e){case"type":this.$ele.removeClass("alert-"+t.settings.type),this.$ele.find('[data-notify="progressbar"] > .progress-bar').removeClass("progress-bar-"+t.settings.type),t.settings.type=i[e],this.$ele.addClass("alert-"+i[e]).find('[data-notify="progressbar"] > .progress-bar').addClass("progress-bar-"+i[e]);break;case"icon":var n=this.$ele.find('[data-notify="icon"]');"class"==t.settings.icon_type.toLowerCase()?n.removeClass(t.settings.content.icon).addClass(i[e]):(n.is("img")||n.find("img"),n.attr("src",i[e]));break;case"progress":var a=t.settings.delay-t.settings.delay*(i[e]/100);this.$ele.data("notify-delay",a),this.$ele.find('[data-notify="progressbar"] > div').attr("aria-valuenow",i[e]).css("width",i[e]+"%");break;case"url":this.$ele.find('[data-notify="url"]').attr("href",i[e]);break;case"target":this.$ele.find('[data-notify="url"]').attr("target",i[e]);break;default:this.$ele.find('[data-notify="'+e+'"]').html(i[e])}var o=this.$ele.outerHeight()+parseInt(t.settings.spacing)+parseInt(t.settings.offset.y);t.reposition(o)},close:function(){t.close()}}},buildNotify:function(){var e=this.settings.content;this.$ele=t(String.format(this.settings.template,this.settings.type,e.title,e.message,e.url,e.target)),this.$ele.attr("data-notify-position",this.settings.placement.from+"-"+this.settings.placement.align),this.settings.allow_dismiss||this.$ele.find('[data-notify="dismiss"]').css("display","none"),(this.settings.delay<=0&&!this.settings.showProgressbar||!this.settings.showProgressbar)&&this.$ele.find('[data-notify="progressbar"]').remove()},setIcon:function(){"class"==this.settings.icon_type.toLowerCase()?this.$ele.find('[data-notify="icon"]').addClass(this.settings.content.icon):this.$ele.find('[data-notify="icon"]').is("img")?this.$ele.find('[data-notify="icon"]').attr("src",this.settings.content.icon):this.$ele.find('[data-notify="icon"]').append('<img src="'+this.settings.content.icon+'" alt="Notify Icon" />')},styleURL:function(){this.$ele.find('[data-notify="url"]').css({backgroundImage:"url(data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7)",height:"100%",left:"0px",position:"absolute",top:"0px",width:"100%",zIndex:this.settings.z_index+1}),this.$ele.find('[data-notify="dismiss"]').css({position:"absolute",right:"10px",top:"5px",zIndex:this.settings.z_index+2})},placement:function(){var e=this,s=this.settings.offset.y,i={display:"inline-block",margin:"0px auto",position:this.settings.position?this.settings.position:"body"===this.settings.element?"fixed":"absolute",transition:"all .5s ease-in-out",zIndex:this.settings.z_index},n=!1,a=this.settings;switch(t('[data-notify-position="'+this.settings.placement.from+"-"+this.settings.placement.align+'"]:not([data-closing="true"])').each(function(){return s=Math.max(s,parseInt(t(this).css(a.placement.from))+parseInt(t(this).outerHeight())+parseInt(a.spacing))}),1==this.settings.newest_on_top&&(s=this.settings.offset.y),i[this.settings.placement.from]=s+"px",this.settings.placement.align){case"left":case"right":i[this.settings.placement.align]=this.settings.offset.x+"px";break;case"center":i.left=0,i.right=0}this.$ele.css(i).addClass(this.settings.animate.enter),t(this.settings.element).append(this.$ele),1==this.settings.newest_on_top&&(s=parseInt(s)+parseInt(this.settings.spacing)+this.$ele.outerHeight(),this.reposition(s)),t.isFunction(e.settings.onShow)&&e.settings.onShow.call(this.$ele),this.$ele.one(this.animations.start,function(){n=!0}).one(this.animations.end,function(){t.isFunction(e.settings.onShown)&&e.settings.onShown.call(this)}),setTimeout(function(){n||t.isFunction(e.settings.onShown)&&e.settings.onShown.call(this)},600)},bind:function(){var e=this;if(this.$ele.find('[data-notify="dismiss"]').on("click",function(){e.close()}),this.$ele.mouseover(function(){t(this).data("data-hover","true")}).mouseout(function(){t(this).data("data-hover","false")}),this.$ele.data("data-hover","false"),this.settings.delay>0){e.$ele.data("notify-delay",e.settings.delay);var s=setInterval(function(){var t=parseInt(e.$ele.data("notify-delay"))-e.settings.timer;if("false"===e.$ele.data("data-hover")&&"pause"==e.settings.mouse_over||"pause"!=e.settings.mouse_over){var i=(e.settings.delay-t)/e.settings.delay*100;e.$ele.data("notify-delay",t),e.$ele.find('[data-notify="progressbar"] > div').attr("aria-valuenow",i).css("width",i+"%")}t<=-e.settings.timer&&(clearInterval(s),e.close())},e.settings.timer)}},close:function(){var e=this,s=parseInt(this.$ele.css(this.settings.placement.from)),i=!1;this.$ele.data("closing","true").addClass(this.settings.animate.exit),e.reposition(s),t.isFunction(e.settings.onClose)&&e.settings.onClose.call(this.$ele),this.$ele.one(this.animations.start,function(){i=!0}).one(this.animations.end,function(){t(this).remove(),t.isFunction(e.settings.onClosed)&&e.settings.onClosed.call(this)}),setTimeout(function(){i||(e.$ele.remove(),e.settings.onClosed&&e.settings.onClosed(e.$ele))},600)},reposition:function(e){var s=this,i='[data-notify-position="'+this.settings.placement.from+"-"+this.settings.placement.align+'"]:not([data-closing="true"])',n=this.$ele.nextAll(i);1==this.settings.newest_on_top&&(n=this.$ele.prevAll(i)),n.each(function(){t(this).css(s.settings.placement.from,e),e=parseInt(e)+parseInt(s.settings.spacing)+t(this).outerHeight()})}}),t.notify=function(t,s){var i=new e(this,t,s);return i.notify},t.notifyDefaults=function(e){return s=t.extend(!0,{},s,e)},t.notifyClose=function(e){"undefined"==typeof e||"all"==e?t("[data-notify]").find('[data-notify="dismiss"]').trigger("click"):t('[data-notify-position="'+e+'"]').find('[data-notify="dismiss"]').trigger("click")}});


--------------------------------------------------------------------------------
/spark-job-rest/src/main/resources/webapp/js/spin.min.js:
--------------------------------------------------------------------------------
1 | //fgnass.github.com/spin.js#v2.1.0
2 | !function(a,b){"object"==typeof exports?module.exports=b():"function"==typeof define&&define.amd?define(b):a.Spinner=b()}(this,function(){"use strict";function a(a,b){var c,d=document.createElement(a||"div");for(c in b)d[c]=b[c];return d}function b(a){for(var b=1,c=arguments.length;c>b;b++)a.appendChild(arguments[b]);return a}function c(a,b,c,d){var e=["opacity",b,~~(100*a),c,d].join("-"),f=.01+c/d*100,g=Math.max(1-(1-a)/b*(100-f),a),h=j.substring(0,j.indexOf("Animation")).toLowerCase(),i=h&&"-"+h+"-"||"";return m[e]||(k.insertRule("@"+i+"keyframes "+e+"{0%{opacity:"+g+"}"+f+"%{opacity:"+a+"}"+(f+.01)+"%{opacity:1}"+(f+b)%100+"%{opacity:"+a+"}100%{opacity:"+g+"}}",k.cssRules.length),m[e]=1),e}function d(a,b){var c,d,e=a.style;for(b=b.charAt(0).toUpperCase()+b.slice(1),d=0;d<l.length;d++)if(c=l[d]+b,void 0!==e[c])return c;return void 0!==e[b]?b:void 0}function e(a,b){for(var c in b)a.style[d(a,c)||c]=b[c];return a}function f(a){for(var b=1;b<arguments.length;b++){var c=arguments[b];for(var d in c)void 0===a[d]&&(a[d]=c[d])}return a}function g(a,b){return"string"==typeof a?a:a[b%a.length]}function h(a){this.opts=f(a||{},h.defaults,n)}function i(){function c(b,c){return a("<"+b+' xmlns="urn:schemas-microsoft.com:vml" class="spin-vml">',c)}k.addRule(".spin-vml","behavior:url(#default#VML)"),h.prototype.lines=function(a,d){function f(){return e(c("group",{coordsize:k+" "+k,coordorigin:-j+" "+-j}),{width:k,height:k})}function h(a,h,i){b(m,b(e(f(),{rotation:360/d.lines*a+"deg",left:~~h}),b(e(c("roundrect",{arcsize:d.corners}),{width:j,height:d.scale*d.width,left:d.scale*d.radius,top:-d.scale*d.width>>1,filter:i}),c("fill",{color:g(d.color,a),opacity:d.opacity}),c("stroke",{opacity:0}))))}var i,j=d.scale*(d.length+d.width),k=2*d.scale*j,l=-(d.width+d.length)*d.scale*2+"px",m=e(f(),{position:"absolute",top:l,left:l});if(d.shadow)for(i=1;i<=d.lines;i++)h(i,-2,"progid:DXImageTransform.Microsoft.Blur(pixelradius=2,makeshadow=1,shadowopacity=.3)");for(i=1;i<=d.lines;i++)h(i);return b(a,m)},h.prototype.opacity=function(a,b,c,d){var e=a.firstChild;d=d.shadow&&d.lines||0,e&&b+d<e.childNodes.length&&(e=e.childNodes[b+d],e=e&&e.firstChild,e=e&&e.firstChild,e&&(e.opacity=c))}}var j,k,l=["webkit","Moz","ms","O"],m={},n={lines:12,length:7,width:5,radius:10,scale:1,rotate:0,corners:1,color:"#000",direction:1,speed:1,trail:100,opacity:.25,fps:20,zIndex:2e9,className:"spinner",top:"50%",left:"50%",position:"absolute"};if(h.defaults={},f(h.prototype,{spin:function(b){this.stop();var c=this,d=c.opts,f=c.el=e(a(0,{className:d.className}),{position:d.position,width:0,zIndex:d.zIndex});if(e(f,{left:d.left,top:d.top}),b&&b.insertBefore(f,b.firstChild||null),f.setAttribute("role","progressbar"),c.lines(f,c.opts),!j){var g,h=0,i=(d.lines-1)*(1-d.direction)/2,k=d.fps,l=k/d.speed,m=(1-d.opacity)/(l*d.trail/100),n=l/d.lines;!function o(){h++;for(var a=0;a<d.lines;a++)g=Math.max(1-(h+(d.lines-a)*n)%l*m,d.opacity),c.opacity(f,a*d.direction+i,g,d);c.timeout=c.el&&setTimeout(o,~~(1e3/k))}()}return c},stop:function(){var a=this.el;return a&&(clearTimeout(this.timeout),a.parentNode&&a.parentNode.removeChild(a),this.el=void 0),this},lines:function(d,f){function h(b,c){return e(a(),{position:"absolute",width:f.scale*(f.length+f.width)+"px",height:f.scale*f.width+"px",background:b,boxShadow:c,transformOrigin:"left",transform:"rotate("+~~(360/f.lines*k+f.rotate)+"deg) translate("+f.scale*f.radius+"px,0)",borderRadius:(f.corners*f.scale*f.width>>1)+"px"})}for(var i,k=0,l=(f.lines-1)*(1-f.direction)/2;k<f.lines;k++)i=e(a(),{position:"absolute",top:1+~(f.scale*f.width/2)+"px",transform:f.hwaccel?"translate3d(0,0,0)":"",opacity:f.opacity,animation:j&&c(f.opacity,f.trail,l+k*f.direction,f.lines)+" "+1/f.speed+"s linear infinite"}),f.shadow&&b(i,e(h("#000","0 0 4px #000"),{top:"2px"})),b(d,b(i,h(g(f.color,k),"0 0 1px rgba(0,0,0,.1)")));return d},opacity:function(a,b,c){b<a.childNodes.length&&(a.childNodes[b].style.opacity=c)}}),"undefined"!=typeof document){k=function(){var c=a("style",{type:"text/css"});return b(document.getElementsByTagName("head")[0],c),c.sheet||c.styleSheet}();var o=e(a("group"),{behavior:"url(#default#VML)"});!d(o,"transform")&&o.adj?i():j=d(o,"animation")}return h});


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/context/JobContextFactory.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.ContextLike
 4 | import com.typesafe.config.Config
 5 | import org.slf4j.LoggerFactory
 6 | import server.domain.actors.getValueFromConfig
 7 | 
 8 | trait JobContextFactory {
 9 |   type C <: ContextLike
10 |   def makeContext(config: Config, contextName: String): C
11 | }
12 | 
13 | object JobContextFactory {
14 |   val logger = LoggerFactory.getLogger(getClass)
15 |   val defaultFactoryClassName = "context.SparkContextFactory"
16 |   val classNameConfigEntry = "context.job-context-factory"
17 | 
18 |   /**
19 |    * Loads context factory with a class name in `context.factory`
20 |    * @param config config for context and context factory
21 |    * @param contextName context name
22 |    * @return Spark context
23 |    */
24 |   def makeContext(config: Config, contextName: String): ContextLike = {
25 |     val factory = getFactory(config)
26 |     logger.info(s"Creating context $contextName from factory $factory.")
27 |     factory.makeContext(config, contextName)
28 |   }
29 | 
30 |   /**
31 |    * Loads context factory from specified class
32 |    * @param className context factory class
33 |    * @return
34 |    */
35 |   def getFactory(className: String = defaultFactoryClassName): JobContextFactory = {
36 |     logger.info(s"Loading context factory $className.")
37 |     Class.forName(className).newInstance().asInstanceOf[JobContextFactory]
38 |   }
39 | 
40 |   /**
41 |    * Loads factory for specified configuration.
42 |    * If config doesn't contain `context.job-context-factory` than [[JobContextFactory.defaultFactoryClassName]] will be loaded.
43 |    * @param config config
44 |    * @return
45 |    */
46 |   def getFactory(config: Config): JobContextFactory =
47 |     getFactory(getValueFromConfig(config, classNameConfigEntry, defaultFactoryClassName))
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/context/SparkContextFactory.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.{ContextLike, SparkJob, SparkJobBase}
 4 | import com.typesafe.config.Config
 5 | import org.apache.spark.SparkContext
 6 | import org.slf4j.LoggerFactory
 7 | import utils.ContextUtils.configToSparkConf
 8 | 
 9 | /**
10 |  * This is a default implementation for Spark Context factory.
11 |  */
12 | class SparkContextFactory extends JobContextFactory {
13 |   type C = SparkContext with ContextLike
14 |   val logger = LoggerFactory.getLogger(getClass)
15 |   
16 |   def makeContext(config: Config, contextName: String) = {
17 |     val sparkConf = configToSparkConf(config, contextName)
18 |     logger.info(s"Creating Spark context $contextName with config $sparkConf.")
19 |     new SparkContext(sparkConf) with ContextLike {
20 |       val contextClass = classOf[SparkContext].getName
21 |       def sparkContext: SparkContext = this.asInstanceOf[SparkContext]
22 |       def isValidJob(job: SparkJobBase) = job.isInstanceOf[SparkJob]
23 |     }
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/logging/LoggingOutputStream.scala:
--------------------------------------------------------------------------------
  1 | package logging
  2 | 
  3 | import org.apache.log4j.Level
  4 | import org.apache.log4j.Logger
  5 | import java.io.{PrintStream, IOException, OutputStream}
  6 | 
  7 | /**
  8 |  * Created by raduchilom on 4/27/15.
  9 |  */
 10 | object LoggingOutputStream {
 11 |   /**
 12 |    * Default number of bytes in the buffer.
 13 |    */
 14 |   private val DEFAULT_BUFFER_LENGTH: Int = 256
 15 | 
 16 |   val log = Logger.getLogger(getClass)
 17 | 
 18 |   def redirectConsoleOutput = {
 19 |     System.setErr(new PrintStream(new LoggingOutputStream(log, Level.ERROR)));
 20 |     System.setOut(new PrintStream(new LoggingOutputStream(log, Level.INFO)));
 21 |   }
 22 | }
 23 | 
 24 | class LoggingOutputStream extends OutputStream {
 25 |   /**
 26 |    * Indicates stream state.
 27 |    */
 28 |   private var hasBeenClosed: Boolean = false
 29 |   /**
 30 |    * Internal buffer where data is stored.
 31 |    */
 32 |   private var buf: Array[Byte] = null
 33 |   /**
 34 |    * The number of valid bytes in the buffer.
 35 |    */
 36 |   private var count: Int = 0
 37 |   /**
 38 |    * Remembers the size of the buffer.
 39 |    */
 40 |   private var curBufLength: Int = 0
 41 |   /**
 42 |    * The logger to write to.
 43 |    */
 44 |   private var log: Logger = null
 45 |   /**
 46 |    * The log level.
 47 |    */
 48 |   private var level: Level = null
 49 | 
 50 |   /**
 51 |    * Creates the Logging instance to flush to the given logger.
 52 |    *
 53 |    * @param log         the Logger to write to
 54 |    * @param level       the log level
 55 |    * @throws IllegalArgumentException in case if one of arguments
 56 |    *                                  is  null.
 57 |    */
 58 |   @throws(classOf[IllegalArgumentException])
 59 |   def this(log: Logger, level: Level) {
 60 |     this()
 61 |     if (log == null || level == null) {
 62 |       throw new IllegalArgumentException("Logger or log level must be not null")
 63 |     }
 64 |     this.log = log
 65 |     this.level = level
 66 |     curBufLength = LoggingOutputStream.DEFAULT_BUFFER_LENGTH
 67 |     buf = new Array[Byte](curBufLength)
 68 |     count = 0
 69 |   }
 70 | 
 71 |   /**
 72 |    * Writes the specified byte to this output stream.
 73 |    *
 74 |    * @param b the byte to write
 75 |    * @throws IOException if an I/O error occurs.
 76 |    */
 77 |   @throws(classOf[IOException])
 78 |   def write(b: Int) {
 79 |     if (hasBeenClosed) {
 80 |       throw new IOException("The stream has been closed.")
 81 |     }
 82 |     if (b == 0) {
 83 |       return
 84 |     }
 85 |     if (count == curBufLength) {
 86 |       flush
 87 |     }
 88 |     buf(count) = b.toByte
 89 |     count += 1
 90 |   }
 91 | 
 92 |   /**
 93 |    * Flushes this output stream and forces any buffered output
 94 |    * bytes to be written out.
 95 |    */
 96 |   override def flush {
 97 |     if (count == 0) {
 98 |       return
 99 |     }
100 |     val bytes: Array[Byte] = new Array[Byte](count)
101 |     System.arraycopy(buf, 0, bytes, 0, count)
102 |     val str: String = new String(bytes)
103 |     log.log(level, str)
104 |     count = 0
105 |   }
106 | 
107 |   /**
108 |    * Closes this output stream and releases any system resources
109 |    * associated with this stream.
110 |    */
111 |   override def close {
112 |     flush
113 |     hasBeenClosed = true
114 |   }
115 | }


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/CORSDirectives.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import spray.http._
 4 | import spray.routing._
 5 | /**
 6 |  * Created by emaorhian
 7 |  * Copy-pasted from https://github.com/Atigeo/jaws-spark-sql-rest/blob/01a1bd6579123ca4762ca00ac63cdb62e962125e/jaws-spark-sql-rest/src/main/scala/customs/CORSDirectives.scala
 8 |  */
 9 | trait CORSDirectives { this: HttpService =>
10 |   private def respondWithCORSHeaders(origin: String, rh: Seq[HttpHeader]) = {
11 |     val headers: List[HttpHeader] = List(
12 |       HttpHeaders.`Access-Control-Allow-Origin`(SomeOrigins(List(origin))),
13 |       HttpHeaders.`Access-Control-Allow-Credentials`(true),
14 |       HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept", "apiKey", "affiliationid")
15 |     ) ++ rh.toList
16 | 
17 |     respondWithHeaders(headers)
18 |   }
19 |   private def respondWithCORSHeadersAllOrigins(rh: Seq[HttpHeader]) = {
20 |     val headers: List[HttpHeader] = List(
21 |       HttpHeaders.`Access-Control-Allow-Origin`(AllOrigins),
22 |       HttpHeaders.`Access-Control-Allow-Credentials`(true),
23 |       HttpHeaders.`Access-Control-Allow-Headers`("Origin", "X-Requested-With", "Content-Type", "Accept"),
24 |       HttpHeaders.`Access-Control-Allow-Methods`(HttpMethods.DELETE, HttpMethods.GET, HttpMethods.POST  )
25 |     ) ++ rh.toList
26 | 
27 |     respondWithHeaders(headers)
28 |   }
29 | 
30 |   def corsFilter(origins: List[String], rh: HttpHeader*)(route: Route) =
31 |     if (origins.contains("*"))
32 |       respondWithCORSHeadersAllOrigins(rh)(route)
33 |     else
34 |       optionalHeaderValueByName("Origin") {
35 |         case None =>
36 |           route
37 |         case Some(clientOrigin) => {
38 |           if (origins.contains(clientOrigin))
39 |             respondWithCORSHeaders(clientOrigin, rh)(route)
40 |           else {
41 |             // Maybe, a Rejection will fit better
42 |             complete(StatusCodes.Forbidden, "Invalid origin")
43 |           }
44 |         }
45 |       }
46 | }
47 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/Controller.scala:
--------------------------------------------------------------------------------
  1 | package server
  2 | 
  3 | import akka.actor.{ActorRef, ActorSystem}
  4 | import akka.pattern.ask
  5 | import akka.util.Timeout
  6 | import com.typesafe.config.{Config, ConfigFactory}
  7 | import org.slf4j.LoggerFactory
  8 | import responses._
  9 | import server.domain.actors.ContextActor.FailedInit
 10 | import server.domain.actors.ContextManagerActor._
 11 | import server.domain.actors.JarActor._
 12 | import server.domain.actors.JobActor._
 13 | import server.domain.actors.getValueFromConfig
 14 | import spray.http._
 15 | import spray.httpx.SprayJsonSupport.sprayJsonMarshaller
 16 | import spray.routing.{Route, SimpleRoutingApp}
 17 | 
 18 | import scala.concurrent.ExecutionContext.Implicits.global
 19 | import scala.concurrent.duration._
 20 | import scala.util.{Failure, Success, Try}
 21 | 
 22 | /**
 23 |  * Spark-Job-REST HTTP service for Web UI and REST API.
 24 |  */
 25 | class Controller(config: Config, contextManagerActor: ActorRef, jobManagerActor: ActorRef, jarActor: ActorRef, originalSystem: ActorSystem)
 26 |   extends SimpleRoutingApp with CORSDirectives{
 27 | 
 28 |   implicit val system = originalSystem
 29 |   implicit val timeout: Timeout = 60 seconds
 30 | 
 31 |   val log = LoggerFactory.getLogger(getClass)
 32 |   log.info("Starting web service.")
 33 | 
 34 |   var StateKey = "state"
 35 |   var ResultKey = "result"
 36 | 
 37 |   // Get ip from config, "0.0.0.0" as default
 38 |   val webIp = getValueFromConfig(config, "appConf.web.services.ip", "0.0.0.0")
 39 |   val webPort = getValueFromConfig(config, "appConf.web.services.port", 8097)
 40 | 
 41 |   val route = jobRoute ~ contextRoute ~ indexRoute ~ jarRoute
 42 | 
 43 |   startServer(webIp, webPort) (route) map {
 44 |     case bound => log.info(s"Started web service: $bound")
 45 |   } onFailure {
 46 |     case e: Exception =>
 47 |       log.error("Failed to start Spark-Job-REST web service", e)
 48 |       throw e
 49 |   }
 50 | 
 51 |   def indexRoute: Route = pathPrefix(""){
 52 |     pathEnd {
 53 |       get {
 54 |         getFromResource("webapp/index.html")
 55 |       }
 56 |     } ~
 57 |     options {
 58 |       corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
 59 |         complete {
 60 |           "OK"
 61 |         }
 62 |       }
 63 |     }
 64 |   } ~
 65 |   pathPrefix("assets"){
 66 |     get {
 67 |       getFromResourceDirectory("webapp/assets")
 68 |     } ~ options {
 69 |       corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
 70 |         complete {
 71 |           "OK"
 72 |         }
 73 |       }
 74 |     }
 75 |   } ~
 76 |   pathPrefix("js"){
 77 |     get {
 78 |       getFromResourceDirectory("webapp/js")
 79 |     } ~ options {
 80 |       corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
 81 |         complete {
 82 |           "OK"
 83 |         }
 84 |       }
 85 |     }
 86 |   } ~
 87 |   path("hearbeat") {
 88 |     get {
 89 |       complete {
 90 |         "Spark Job Rest is up and running!"
 91 |       } ~ options {
 92 |         corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET))) {
 93 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
 94 |             ctx.complete(StatusCodes.OK)
 95 |           }
 96 |         }
 97 |       }
 98 |     }
 99 |   }
100 | 
101 | 
102 |   def jobRoute: Route = pathPrefix("jobs"){
103 |     pathEnd {
104 |       get {
105 |         corsFilter(List("*")) {
106 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
107 |             val resultFuture = jobManagerActor ? GetAllJobsStatus()
108 |             resultFuture.map {
109 |               case jobs: Jobs => ctx.complete(StatusCodes.OK, jobs)
110 |               case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
111 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
112 |             }
113 |           }
114 |         }
115 |       }
116 |     } ~
117 |     get {
118 |       path(Segment) { jobId =>
119 |         parameters('contextName) { contextName =>
120 |           corsFilter(List("*")) {
121 |             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
122 |               val resultFuture = jobManagerActor ? JobStatusEnquiry(contextName, jobId)
123 |               resultFuture.map {
124 |                 case job:Job => ctx.complete(StatusCodes.OK, job)
125 |                 case JobDoesNotExist() => ctx.complete(StatusCodes.BadRequest, ErrorResponse("JobId does not exist!"))
126 |                 case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Context does not exist!"))
127 |                 case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
128 |                 case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
129 |               }
130 |             }
131 |           }
132 |         }
133 |       }
134 |     } ~
135 |     post {
136 |       parameters('runningClass, 'contextName) { (runningClass, context) =>
137 |         entity(as[String]) { configString =>
138 |           corsFilter(List("*")) {
139 |             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
140 |               Try{
141 |                 ConfigFactory.parseString(configString)
142 |               } match {
143 |                 case Success(requestConfig) =>
144 |                   val resultFuture = jobManagerActor ? RunJob(runningClass, context, requestConfig)
145 |                   resultFuture.map {
146 |                     case job: Job => ctx.complete(StatusCodes.OK, job)
147 |                     case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context."))
148 |                     case e: Exception => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
149 |                     case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
150 |                   }
151 |                 case Failure(e) => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Invalid parameter: " + e.getMessage))
152 |               }
153 |             }
154 |           }
155 |         }
156 |       }
157 |     } ~
158 |       options {
159 |         corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST))) {
160 |           complete {
161 |             "OK"
162 |           }
163 |         }
164 |       }
165 | 
166 |   }
167 |     def contextRoute : Route = pathPrefix("contexts"){
168 |     post {
169 |       path(Segment) { contextName =>
170 |         entity(as[String]) { configString =>
171 |           corsFilter(List("*")) {
172 |             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
173 |               Try{
174 |                 ConfigFactory.parseString(configString)
175 |               } match {
176 |                 case Success(requestConfig) =>
177 |                   val resultFuture = contextManagerActor ? CreateContext(contextName, getValueFromConfig(requestConfig, "jars", ""), requestConfig)
178 |                   resultFuture.map {
179 |                     case context:Context => ctx.complete(StatusCodes.OK, context)
180 |                     case e: FailedInit => ctx.complete(StatusCodes.InternalServerError, ErrorResponse("Failed Init: " + e.message))
181 |                     case ContextAlreadyExists => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Context already exists."))
182 |                     case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
183 |                     case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
184 |                   }
185 |                 case Failure(e) => ctx.complete(StatusCodes.BadRequest, ErrorResponse("Invalid parameters: " + e.getMessage))
186 |               }
187 | 
188 | 
189 |             }
190 |           }
191 |         }
192 |       }
193 |     } ~
194 |     get {
195 |       path(Segment) { contextName =>
196 |         corsFilter(List("*")) {
197 |           val resultFuture = contextManagerActor ? GetContextInfo(contextName)
198 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
199 |             resultFuture.map {
200 |               case context: Context => ctx.complete(StatusCodes.OK, context)
201 |               case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context."))
202 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
203 |             }
204 |           }
205 |         }
206 |       }
207 |     } ~
208 |     pathEnd {
209 |       get {
210 |         corsFilter(List("*")) {
211 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
212 |             val resultFuture = contextManagerActor ? GetAllContextsForClient()
213 |             resultFuture.map {
214 |               case contexts: Contexts => ctx.complete(StatusCodes.OK, contexts)
215 |               case e: Exception => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
216 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
217 |             }
218 |           }
219 |         }
220 |       }
221 |     } ~
222 |     delete {
223 |       path(Segment) { contextName =>
224 |         corsFilter(List("*")) {
225 |           val resultFuture = contextManagerActor ? DeleteContext(contextName)
226 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
227 |             resultFuture.map {
228 |               case Success => ctx.complete(StatusCodes.OK, SimpleMessage("Context deleted."))
229 |               case NoSuchContext => ctx.complete(StatusCodes.BadRequest, ErrorResponse("No such context."))
230 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
231 |             }
232 |           }
233 |         }
234 |       }
235 |     } ~ options {
236 |       corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST, HttpMethods.DELETE))) {
237 |         complete {
238 |           "OK"
239 |         }
240 |       }
241 |     }
242 | 
243 |   }
244 | 
245 |   def jarRoute : Route = pathPrefix("jars"){
246 |     post {
247 |       path(Segment) { jarName =>
248 |         entity(as[Array[Byte]]) { jarBytes =>
249 |           corsFilter(List("*")) {
250 |             val resultFuture = jarActor ? AddJar(jarName, jarBytes)
251 |             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
252 |               resultFuture.map {
253 |                 case Success(jarInfo: JarInfo) => ctx.complete(StatusCodes.OK, jarInfo)
254 |                 case Failure(e) => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
255 |                 case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
256 |               }
257 |             }
258 |           }
259 |         }
260 |       } ~
261 |       pathEnd {
262 |         entity(as[MultipartFormData]) { formData =>
263 |           corsFilter(List("*")) {
264 |             respondWithMediaType(MediaTypes.`application/json`) { ctx =>
265 |               formData.fields.foreach {
266 |                 case bodyPart: BodyPart =>
267 |                   val resultFuture = jarActor ? AddJar(bodyPart.filename.get, bodyPart.entity.data.toByteArray)
268 |                   resultFuture.map {
269 |                     case Success(jarInfo: JarInfo) => ctx.complete(StatusCodes.OK, jarInfo)
270 |                     case Failure(e) =>
271 |                       log.error("Error uploading jar: ", e)
272 |                       ctx.complete(StatusCodes.BadRequest, "")
273 |                     case x: Any => ctx.complete(StatusCodes.InternalServerError, "")
274 |                     // TODO: Message is empty due to a bug on the Ui File Upload part. When fixed used ErrorResponse(e.getMessage)
275 |                   }
276 |               }
277 |             }
278 |           }
279 |         }
280 |       }
281 |     } ~
282 |     delete {
283 |       path(Segment) { jarName =>
284 |         corsFilter(List("*")) {
285 |           val resultFuture = jarActor ? DeleteJar(jarName)
286 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
287 |             resultFuture.map {
288 |               case Success(message: String) => ctx.complete(StatusCodes.OK, SimpleMessage(message))
289 |               case NoSuchJar() => ctx.complete(StatusCodes.BadRequest,ErrorResponse("No such jar."))
290 |               case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
291 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
292 |             }
293 |           }
294 |         }
295 |       }
296 |     } ~
297 |     pathEnd {
298 |       get {
299 |         corsFilter(List("*")) {
300 |           respondWithMediaType(MediaTypes.`application/json`) { ctx =>
301 |             val future = jarActor ? GetAllJars()
302 |             future.map {
303 |               case jarsInfo: JarsInfo => ctx.complete(StatusCodes.OK, jarsInfo)
304 |               case e: Throwable => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(e.getMessage))
305 |               case x: Any => ctx.complete(StatusCodes.InternalServerError, ErrorResponse(x.toString))
306 |             }
307 |           }
308 |         }
309 |       }
310 |     } ~ options {
311 |       corsFilter(List("*"), HttpHeaders.`Access-Control-Allow-Methods`(Seq(HttpMethods.OPTIONS, HttpMethods.GET, HttpMethods.POST, HttpMethods.DELETE))) {
312 |         complete {
313 |           "OK"
314 |         }
315 |       }
316 |     }
317 |   }
318 | 
319 | }
320 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/Main.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import akka.actor.{ActorRef, ActorSystem, Props}
 4 | import akka.pattern.ask
 5 | import com.typesafe.config.ConfigFactory
 6 | import logging.LoggingOutputStream
 7 | import server.domain.actors._
 8 | 
 9 | import scala.concurrent.Await
10 | 
11 | /**
12 |  * Spark-Job-REST entry point.
13 |  */
14 | object Main {
15 |   def main(args: Array[String]) {
16 | 
17 |     LoggingOutputStream.redirectConsoleOutput
18 | 
19 |     // Loads deployment configuration `deploy.conf` on top of application defaults `application.conf`
20 |     val defaultConfig = ConfigFactory.load("deploy").withFallback(ConfigFactory.load())
21 | 
22 |     val masterConfig = defaultConfig.getConfig("manager")
23 |     val system = ActorSystem("ManagerSystem", masterConfig)
24 | 
25 |     val supervisor = system.actorOf(Props(classOf[Supervisor]), "Supervisor")
26 | 
27 |     val jarActor = createActor(Props(new JarActor(defaultConfig)), "JarActor", system, supervisor)
28 |     val contextManagerActor = createActor(Props(new ContextManagerActor(defaultConfig, jarActor)), "ContextManager", system, supervisor)
29 |     val jobManagerActor = createActor(Props(new JobActor(defaultConfig, contextManagerActor)), "JobManager", system, supervisor)
30 |     new Controller(defaultConfig, contextManagerActor, jobManagerActor, jarActor, system)
31 |   }
32 | 
33 |   def createActor(props: Props, name: String, customSystem: ActorSystem, supervisor: ActorRef): ActorRef = {
34 |     val actorRefFuture = ask(supervisor, (props, name))
35 |     Await.result(actorRefFuture, timeout.duration).asInstanceOf[ActorRef]
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/MainContext.scala:
--------------------------------------------------------------------------------
 1 | package server
 2 | 
 3 | import akka.actor.{ActorSystem, Props}
 4 | import com.typesafe.config.ConfigFactory
 5 | import logging.LoggingOutputStream
 6 | import org.slf4j.LoggerFactory
 7 | import server.domain.actors.ContextActor
 8 | import utils.ActorUtils
 9 | 
10 | /**
11 |  * Spark context container entry point.
12 |  */
13 | object MainContext {
14 | 
15 |   LoggingOutputStream.redirectConsoleOutput
16 |   val log = LoggerFactory.getLogger(getClass)
17 | 
18 |   def main(args: Array[String]) {
19 |     val contextName = args(0)
20 |     val port = args(1).toInt
21 | 
22 |     log.info(s"Started new process for contextName = $contextName with port = $port")
23 | 
24 |     val defaultConfig = ConfigFactory.load("deploy").withFallback(ConfigFactory.load())
25 |     val config = ActorUtils.remoteConfig("localhost", port, defaultConfig)
26 |     val system = ActorSystem(ActorUtils.PREFIX_CONTEXT_SYSTEM + contextName, config)
27 | 
28 |     system.actorOf(Props(new ContextActor(defaultConfig)), ActorUtils.PREFIX_CONTEXT_ACTOR + contextName)
29 | 
30 |     log.info(s"Initialized system ${ActorUtils.PREFIX_CONTEXT_SYSTEM}$contextName and actor ${ActorUtils.PREFIX_CONTEXT_SYSTEM}$contextName")
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/ContextActor.scala:
--------------------------------------------------------------------------------
  1 | package server.domain.actors
  2 | 
  3 | import akka.actor.{Actor, Terminated}
  4 | import api._
  5 | import com.google.gson.Gson
  6 | import com.typesafe.config.{Config, ConfigValueFactory}
  7 | import context.JobContextFactory
  8 | import org.apache.commons.lang.exception.ExceptionUtils
  9 | import org.slf4j.LoggerFactory
 10 | import responses.{Job, JobStates}
 11 | import server.domain.actors.ContextActor._
 12 | import server.domain.actors.JobActor._
 13 | import utils.ActorUtils
 14 | 
 15 | import scala.collection.JavaConverters._
 16 | import scala.collection.mutable
 17 | import scala.concurrent.ExecutionContext.Implicits.global
 18 | import scala.concurrent.Future
 19 | import scala.util.{Failure, Success, Try}
 20 | 
 21 | /**
 22 |  * Context actor messages
 23 |  */
 24 | object ContextActor {
 25 |   case class Initialize(contextName: String, config: Config, jarsForSpark: List[String])
 26 |   case class Initialized()
 27 |   case class FailedInit(message: String)
 28 |   case class ShutDown()
 29 | }
 30 | 
 31 | /**
 32 |  * Context actor responsible for creation and managing Spark Context
 33 |  * @param localConfig config of the context application
 34 |  */
 35 | class ContextActor(localConfig: Config) extends Actor {
 36 |   import context.become
 37 | 
 38 |   val log = LoggerFactory.getLogger(getClass)
 39 |   var jobContext: ContextLike = _
 40 |   var defaultConfig: Config = _
 41 |   var jobStateMap = new mutable.HashMap[String, JobStatus]() with mutable.SynchronizedMap[String, JobStatus]
 42 | 
 43 |   var name = ""
 44 |   val gsonTransformer = new Gson()
 45 | 
 46 |   startWatchingManagerActor()
 47 | 
 48 |   /**
 49 |    * Initial actor mode when it responds to IsAwake message and can be initialized
 50 |    * @return
 51 |    */
 52 |   override def receive: Receive = {
 53 |     case ContextManagerActor.IsAwake =>
 54 |       sender ! ContextManagerActor.IsAwake
 55 | 
 56 |     case Initialize(contextName, config, jarsForSpark) =>
 57 |       log.info(s"Received InitializeContext message : contextName=$contextName")
 58 |       log.info("Initializing context " + contextName)
 59 |       name = contextName
 60 | 
 61 |       try {
 62 |         defaultConfig = config.withValue("context.jars", ConfigValueFactory.fromAnyRef(jarsForSpark.asJava))
 63 |         jobContext = JobContextFactory.makeContext(defaultConfig, name)
 64 | 
 65 |         sender ! Initialized()
 66 |         log.info("Successfully initialized context " + contextName)
 67 |       } catch {
 68 |         case e: Exception =>
 69 |           log.error("Exception while initializing", e)
 70 |           sender ! FailedInit(ExceptionUtils.getStackTrace(e))
 71 |           gracefullyShutdown()
 72 |       }
 73 | 
 74 |       become(initialized)
 75 |   }
 76 | 
 77 |   /**
 78 |    * Main actor mode when it can run jobs
 79 |    * @return
 80 |    */
 81 |   def initialized: Receive = {
 82 |     case ShutDown() =>
 83 |       log.info(s"Context received ShutDown message : contextName=$name")
 84 |       log.info(s"Shutting down SparkContext $name")
 85 | 
 86 |       gracefullyShutdown()
 87 | 
 88 |     case RunJob(runningClass, contextName, jobConfig, uuid) =>
 89 |       log.info(s"Received RunJob message : runningClass=$runningClass contextName=$contextName uuid=$uuid ")
 90 |       jobStateMap += (uuid -> JobStarted())
 91 | 
 92 |       Future {
 93 |         Try {
 94 |           val classLoader = Thread.currentThread.getContextClassLoader
 95 |           val runnableClass = classLoader.loadClass(runningClass)
 96 |           val sparkJob = runnableClass.newInstance.asInstanceOf[SparkJobBase]
 97 | 
 98 |           jobContext.validateJob(sparkJob) match {
 99 |             case SparkJobValid() => log.info(s"Job $uuid passed context validation.")
100 |             case SparkJobInvalid(message) => throw new IllegalArgumentException(s"Invalid job $uuid: $message")
101 |           }
102 | 
103 |           val jobConfigValidation = sparkJob.validate(jobContext.asInstanceOf[sparkJob.C], jobConfig.withFallback(defaultConfig))
104 |           jobConfigValidation match {
105 |             case SparkJobInvalid(message) => throw new IllegalArgumentException(message)
106 |             case SparkJobValid() => log.info("Job config validation passed.")
107 |           }
108 | 
109 |           sparkJob.runJob(jobContext.asInstanceOf[sparkJob.C], jobConfig.withFallback(defaultConfig))
110 |         }
111 |       } andThen {
112 |         case Success(futureResult) => futureResult match {
113 |           case Success(result) =>
114 |             log.info(s"Finished running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ")
115 |             jobStateMap += (uuid -> JobRunSuccess(gsonTransformer.toJson(result)))
116 |           case Failure(e: Throwable) =>
117 |             jobStateMap += (uuid -> JobRunError(ExceptionUtils.getStackTrace(e)))
118 |             log.error(s"Error running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ", e)
119 |           case x: Any =>
120 |             log.error("Received ANY from running job !!! " + x)
121 |         }
122 | 
123 |         case Failure(e: Throwable) =>
124 |           jobStateMap += (uuid -> JobRunError(ExceptionUtils.getStackTrace(e)))
125 |           log.error(s"Error running job : runningClass=$runningClass contextName=$contextName uuid=$uuid ", e)
126 | 
127 |         case x: Any =>
128 |           log.error("Received ANY from running job !!! " + x)
129 |       }
130 | 
131 |     case Terminated(actor) =>
132 |       if (actor.path.toString.contains("Supervisor/ContextManager")) {
133 |         log.info(s"Received Terminated message from: ${actor.path.toString}")
134 |         log.warn("Shutting down the system because the ManagerSystem terminated.")
135 |         gracefullyShutdown()
136 |       }
137 | 
138 |     case JobStatusEnquiry(contextName, jobId) =>
139 |       val jobState = jobStateMap.getOrElse(jobId, JobDoesNotExist())
140 |       import JobStates._
141 |       jobState match {
142 |         case x: JobRunSuccess => sender ! Job(jobId, name, FINISHED.toString, x.result, x.startTime)
143 |         case e: JobRunError => sender ! Job(jobId, name, ERROR.toString, e.errorMessage, e.startTime)
144 |         case x: JobStarted => sender ! Job(jobId, name, RUNNING.toString, "", x.startTime)
145 |         case x: JobDoesNotExist => sender ! JobDoesNotExist
146 |       }
147 | 
148 |     case GetAllJobsStatus() =>
149 |       import JobStates._
150 |       val jobsList = jobStateMap.map {
151 |          case (id: String, x: JobRunSuccess) => Job(id, name, FINISHED.toString, x.result, x.startTime)
152 |          case (id: String, e: JobRunError) => Job(id, name, ERROR.toString, e.errorMessage, e.startTime)
153 |          case (id: String, x: JobStarted) => Job(id, name, RUNNING.toString, "", x.startTime)
154 |        }.toList
155 |       sender ! jobsList
156 | 
157 |     case x @ _ =>
158 |       log.info(s"Received UNKNOWN message type $x")
159 |   }
160 | 
161 |   def gracefullyShutdown() {
162 |     Option(jobContext).foreach(_.stop())
163 |     context.system.shutdown()
164 |   }
165 | 
166 |   def startWatchingManagerActor() = {
167 |     val managerPort = getValueFromConfig(localConfig, ActorUtils.PORT_PROPERTY_NAME, 4042)
168 |     val managerHost = getValueFromConfig(localConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1")
169 |     log.info("Trying to watch the manager actor at : " + managerHost + ":" + managerPort)
170 |     val managerActor = context.actorSelection(ActorUtils.getActorAddress("ManagerSystem", managerHost, managerPort, "Supervisor/ContextManager"))
171 |     managerActor.resolveOne().onComplete {
172 |       case Success(actorRef) =>
173 |         log.info(s"Now watching the ContextManager from this actor.")
174 |         context.watch(actorRef)
175 |       case x @ _ => log.info(s"Received message of type $x")
176 |     }
177 |   }
178 | }
179 | 
180 | 
181 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/ContextManagerActor.scala:
--------------------------------------------------------------------------------
  1 | package server.domain.actors
  2 | 
  3 | import java.util
  4 | 
  5 | import akka.actor.{Actor, ActorRef, ActorSelection, Props}
  6 | import akka.pattern.ask
  7 | import com.typesafe.config.{Config, ConfigFactory}
  8 | import org.apache.commons.lang.exception.ExceptionUtils
  9 | import org.slf4j.LoggerFactory
 10 | import responses.{Context, Contexts}
 11 | import server.domain.actors.ContextManagerActor._
 12 | import server.domain.actors.JarActor.{GetJarsPathForAll, ResultJarsPathForAll}
 13 | import utils.ActorUtils
 14 | 
 15 | import scala.collection.mutable
 16 | import scala.concurrent.ExecutionContext.Implicits.global
 17 | import scala.concurrent.duration._
 18 | import scala.sys.process.{Process, ProcessBuilder}
 19 | import scala.util.Success
 20 | 
 21 | /**
 22 |  * Context management messages
 23 |  */
 24 | object ContextManagerActor {
 25 |   case class CreateContext(contextName: String, jars: String, config: Config)
 26 |   case class ContextInitialized(port: String)
 27 |   case class DeleteContext(contextName: String)
 28 |   case class ContextProcessTerminated(contextName: String, statusCode: Int)
 29 |   case class GetContext(contextName: String)
 30 |   case class GetContextInfo(contextName: String)
 31 |   case class GetAllContextsForClient()
 32 |   case class GetAllContexts()
 33 |   case class NoSuchContext()
 34 |   case class ContextAlreadyExists()
 35 |   case class DestroyProcess(process: Process)
 36 |   case class IsAwake()
 37 |   case class ContextInfo(contextName: String, sparkUiPort: String, @transient referenceActor: ActorSelection)
 38 | }
 39 | 
 40 | /**
 41 |  * Actor that creates, monitors and destroys contexts and corresponding processes.
 42 |  * @param defaultConfig configuration defaults
 43 |  * @param jarActor actor that responsible for jars which may be included to context classpath
 44 |  */
 45 | class ContextManagerActor(defaultConfig: Config, jarActor: ActorRef) extends Actor {
 46 | 
 47 |   val log = LoggerFactory.getLogger(getClass)
 48 | 
 49 |   var lastUsedPort = getValueFromConfig(defaultConfig, "appConf.actor.systems.first.port", 11000)
 50 |   var lastUsedPortSparkUi = getValueFromConfig(defaultConfig, "appConf.spark.ui.first.port", 16000)
 51 | 
 52 |   val contextMap = new mutable.HashMap[String, ContextInfo]() with mutable.SynchronizedMap[String, ContextInfo]
 53 |   val processMap = new mutable.HashMap[String, ActorRef]() with mutable.SynchronizedMap[String, ActorRef]
 54 | 
 55 |   val sparkUIConfigPath: String = "spark.ui.port"
 56 | 
 57 |   override def receive: Receive = {
 58 |     case CreateContext(contextName, jars, config) =>
 59 |       if (contextMap contains contextName) {
 60 |         sender ! ContextAlreadyExists
 61 |       } else if (jars.isEmpty) {
 62 |         sender ! ContextActor.FailedInit("jars property is not defined or is empty.")
 63 |       } else {
 64 |         //adding the default configs
 65 |         var mergedConfig = config.withFallback(defaultConfig)
 66 | 
 67 |         //The port for the actor system
 68 |         val port = ActorUtils.findAvailablePort(lastUsedPort)
 69 |         lastUsedPort = port
 70 | 
 71 |         //If not defined, setting the spark.ui port
 72 |         if (!config.hasPath(sparkUIConfigPath)) {
 73 |           mergedConfig = addSparkUiPortToConfig(mergedConfig)
 74 |         }
 75 | 
 76 |         val webSender = sender()
 77 |         log.info(s"Received CreateContext message : context=$contextName jars=$jars")
 78 | 
 79 |         val jarsFuture = jarActor ? GetJarsPathForAll(jars, contextName)
 80 |         jarsFuture map {
 81 |           case result @ ResultJarsPathForAll(pathForClasspath, pathForSpark) =>
 82 |             log.info(s"Received jars path: $result")
 83 |             val processBuilder = createProcessBuilder(contextName, port, pathForClasspath, mergedConfig)
 84 |             val command = processBuilder.toString
 85 |             log.info(s"Starting new process for context $contextName: '$command'")
 86 |             val processActor = context.actorOf(Props(classOf[ContextProcessActor], processBuilder, contextName))
 87 |             processMap += contextName -> processActor
 88 | 
 89 |             val host = getValueFromConfig(defaultConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1")
 90 |             val actorRef = context.actorSelection(ActorUtils.getContextActorAddress(contextName, host, port))
 91 |             sendInitMessage(contextName, port, actorRef, webSender, mergedConfig, pathForSpark)
 92 |         } onFailure {
 93 |           case e: Exception =>
 94 |             log.error(s"Failed! ${ExceptionUtils.getStackTrace(e)}")
 95 |             webSender ! e
 96 |         }
 97 |       }
 98 | 
 99 |     case DeleteContext(contextName) =>
100 |       log.info(s"Received DeleteContext message : context=$contextName")
101 |       if (contextMap contains contextName) {
102 |         for (
103 |           contextInfo <- contextMap remove contextName;
104 |           processRef <- processMap remove contextName
105 |         ) {
106 |           contextInfo.referenceActor ! ContextActor.ShutDown()
107 |           sender ! Success
108 | 
109 |           // Terminate process
110 |           processRef ! ContextProcessActor.Terminate()
111 |         }
112 |       } else {
113 |         sender ! NoSuchContext
114 |       }
115 | 
116 |     case ContextProcessTerminated(contextName, statusCode) =>
117 |       log.info(s"Received ContextProcessTerminated message : context=$contextName, statusCode=$statusCode")
118 |       contextMap remove contextName foreach {
119 |         case contextInfo: ContextInfo =>
120 |           log.error(s"Removing context $contextName due to corresponding process exit with status code $statusCode")
121 |           contextInfo.referenceActor ! DeleteContext(contextName)
122 |       }
123 | 
124 |     case GetContext(contextName) =>
125 |       log.info(s"Received GetContext message : context=$contextName")
126 |       if (contextMap contains contextName) {
127 |         sender ! contextMap(contextName).referenceActor
128 |       } else {
129 |         sender ! NoSuchContext
130 |       }
131 | 
132 |     case GetContextInfo(contextName) =>
133 |       log.info(s"Received GetContext message : context=$contextName")
134 |       if (contextMap contains contextName) {
135 |         sender ! Context(contextName, contextMap(contextName).sparkUiPort)
136 |       } else {
137 |         sender ! NoSuchContext
138 |       }
139 | 
140 |     case GetAllContextsForClient() =>
141 |       log.info(s"Received GetAllContexts message.")
142 |       sender ! Contexts(contextMap.values.map(contextInfo => Context(contextInfo.contextName, contextInfo.sparkUiPort)).toArray)
143 | 
144 |     case GetAllContexts() =>
145 |       sender ! contextMap.values.map(_.referenceActor)
146 |       log.info(s"Received GetAllContexts message.")
147 |   }
148 | 
149 |   def sendInitMessage(contextName: String, port: Int, actorRef: ActorSelection, sender: ActorRef, config: Config, jarsForSpark: List[String]): Unit = {
150 | 
151 |     val sleepTime = getValueFromConfig(config, "appConf.init.sleep", 3000)
152 |     val tries = config.getInt("appConf.init.tries")
153 |     val retryTimeOut = config.getLong("appConf.init.retry-timeout") millis
154 |     val retryInterval = config.getLong("appConf.init.retry-interval") millis
155 |     val sparkUiPort = config.getString(sparkUIConfigPath)
156 | 
157 |     context.system.scheduler.scheduleOnce(sleepTime millis) {
158 |       val isAwakeFuture = context.actorOf(ReTry.props(tries, retryTimeOut, retryInterval, actorRef)) ? IsAwake
159 |       isAwakeFuture.map {
160 |         case isAwake =>
161 |           log.info(s"Remote context actor is awaken: $isAwake")
162 |           val initializationFuture = actorRef ? ContextActor.Initialize(contextName, config, jarsForSpark)
163 |           initializationFuture map {
164 |             case success: ContextActor.Initialized =>
165 |               log.info(s"Context '$contextName' initialized: $success")
166 |               contextMap += contextName -> ContextInfo(contextName, sparkUiPort, actorRef)
167 |               sender ! Context(contextName, sparkUiPort)
168 |             case error @ ContextActor.FailedInit(reason) =>
169 |               log.error(s"Init failed for context $contextName", reason)
170 |               sender ! error
171 |               processMap.remove(contextName).get ! ContextProcessActor.Terminate()
172 |           } onFailure {
173 |             case e: Exception =>
174 |               log.error("FAILED to send init message!", e)
175 |               sender ! ContextActor.FailedInit(ExceptionUtils.getStackTrace(e))
176 |               processMap.remove(contextName).get ! ContextProcessActor.Terminate()
177 |           }
178 |       } onFailure {
179 |         case e: Exception =>
180 |           log.error("Refused to wait for remote actor, consider it as dead!", e)
181 |           sender ! ContextActor.FailedInit(ExceptionUtils.getStackTrace(e))
182 |       }
183 |     }
184 |   }
185 | 
186 |   def addSparkUiPortToConfig(config: Config): Config = {
187 |     lastUsedPortSparkUi = ActorUtils.findAvailablePort(lastUsedPortSparkUi)
188 |     val map = new util.HashMap[String, String]()
189 |     map.put(sparkUIConfigPath, lastUsedPortSparkUi.toString)
190 |     val newConf = ConfigFactory.parseMap(map)
191 |     newConf.withFallback(config)
192 |   }
193 | 
194 |   def createProcessBuilder(contextName: String, port: Int, jarsForClasspath: String, config: Config): ProcessBuilder = {
195 |     val scriptPath = ContextManagerActor.getClass.getClassLoader.getResource("context_start.sh").getPath
196 |     val xmxMemory = getValueFromConfig(config, "driver.xmxMemory", "1g")
197 | 
198 |     // Create context process directory
199 |     val processDirName = new java.io.File(defaultConfig.getString("context.contexts-base-dir")).toString + s"/$contextName"
200 | 
201 |     Process(scriptPath, Seq(jarsForClasspath, contextName, port.toString, xmxMemory, processDirName))
202 |   }
203 | }
204 | 
205 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/ContextProcessActor.scala:
--------------------------------------------------------------------------------
 1 | package server.domain.actors
 2 | 
 3 | import akka.actor.Actor
 4 | import org.slf4j.LoggerFactory
 5 | 
 6 | import scala.concurrent.ExecutionContext.Implicits.global
 7 | import scala.concurrent.duration._
 8 | import scala.sys.process.{Process, ProcessBuilder, ProcessLogger}
 9 | 
10 | object ContextProcessActor {
11 |   case class Terminate()
12 | }
13 | 
14 | class ContextProcessActor(processBuilder: ProcessBuilder, contextName: String) extends Actor {
15 |   import ContextProcessActor._
16 | 
17 |   val log = LoggerFactory.getLogger(s"$getClass::$contextName")
18 | 
19 |   class Slf4jProcessLogger extends ProcessLogger {
20 |     def out(line: => String): Unit = log.info(line)
21 |     def err(line: => String): Unit = log.error(line)
22 |     def buffer[T](f: => T): T = f
23 |   }
24 | 
25 |   val process: Process = processBuilder.run(new Slf4jProcessLogger)
26 | 
27 |   context.system.scheduler.scheduleOnce(1 seconds) {
28 |     val statusCode = process.exitValue()
29 | 
30 |     if (statusCode < 0) {
31 |       log.error(s"Context $contextName exit with error code $statusCode.")
32 |     } else {
33 |       log.info(s"Context process exit with status $statusCode")
34 |     }
35 | 
36 |     context.parent ! ContextManagerActor.ContextProcessTerminated(contextName, statusCode)
37 |     context.system.stop(self)
38 |   }
39 | 
40 |   def receive: Receive = {
41 |     case Terminate =>
42 |       log.info(s"Received Terminate message")
43 |       context.system.scheduler.scheduleOnce(5 seconds) {
44 |         process.destroy()
45 |         context.system.stop(self)
46 |       }
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/JarActor.scala:
--------------------------------------------------------------------------------
  1 | package server.domain.actors
  2 | 
  3 | import java.io.File
  4 | 
  5 | import akka.actor.{Actor, ActorLogging}
  6 | import com.typesafe.config.Config
  7 | import org.slf4j.LoggerFactory
  8 | import responses.{JarsInfo, JarInfo}
  9 | import server.domain.actors.JarActor._
 10 | import utils.{FileUtils, JarUtils}
 11 | import scala.collection.mutable.ListBuffer
 12 | import scala.util.{Failure, Success, Try}
 13 | 
 14 | /**
 15 |  * Created by raduc on 04/11/14.
 16 |  */
 17 | 
 18 | object JarActor {
 19 |   case class AddJar(jarName: String, bytes: Array[Byte])
 20 |   case class NoSuchJar()
 21 |   case class DeleteJar(jarName: String)
 22 |   case class GetAllJars()
 23 |   case class GetAllJarsNames()
 24 |   case class GetJarsPathForClasspath(paths: String, contextName: String)
 25 |   case class GetJarsPathForSpark(paths: String)
 26 |   case class GetJarsPathForAll(paths: String, contextName: String)
 27 |   case class DeleteJarFolder()
 28 |   case class CreateJarFolder(overwrite: Boolean)
 29 |   case class JarFolderExists()
 30 |   case class ResultJarsPathForAll(pathForClasspath: String, pathForSpark: List[String])
 31 | 
 32 | 
 33 | 
 34 |   val CLASSPATH_JAR_SEPARATOR = ":"
 35 |   val JAR_FOLDER_PROPERTY_PATH = "appConf.jars.path"
 36 | 
 37 | 
 38 | }
 39 | 
 40 | class JarActor(config: Config) extends Actor {
 41 | 
 42 |   val log = LoggerFactory.getLogger(getClass)
 43 | 
 44 |   val jarFolder = getValueFromConfig(config, JAR_FOLDER_PROPERTY_PATH, "")
 45 |   FileUtils.createFolder(jarFolder, false)
 46 | 
 47 |   override def receive: Receive = {
 48 |     case AddJar(jarName, bytes) => {
 49 |       log.info(s"Received AddJar request for jar $jarName")
 50 |       Try {
 51 |         if(!JarUtils.validateJar(bytes)){
 52 |           log.error("Jar " + jarName + " is not valid!")
 53 |           throw new Exception("Jar " + jarName + " is not valid!")
 54 |         }
 55 |         FileUtils.writeToFile(jarName, jarFolder, bytes)
 56 |       } match {
 57 |         case Success(v) => {
 58 |           val fileJar = new File(jarFolder + File.separator + jarName)
 59 |           if(fileJar.exists()) {
 60 |             sender ! Success(JarInfo(jarName, fileJar.length(), fileJar.lastModified()))
 61 |           } else {
 62 |             sender ! Failure(new Exception("Jar was wrote to disk."))
 63 |           }
 64 |         }
 65 |         case Failure(e) => sender ! Failure(e)
 66 |       }
 67 |     }
 68 |     case DeleteJar(jarName) => {
 69 |       val file = new File(jarFolder + File.separator + jarName)
 70 |       if(file.exists()){
 71 |         file.delete()
 72 |         sender ! Success("Jar deleted.")
 73 |       } else {
 74 |         sender ! NoSuchJar()
 75 |       }
 76 |     }
 77 |     case GetAllJars() => {
 78 |       val folderJar = new File(jarFolder)
 79 |       val files = folderJar.listFiles()
 80 |       if(files != null){
 81 |         val jarInfos = JarsInfo(files.map(jarFile => JarInfo(jarFile.getName, jarFile.length, jarFile.lastModified)).filter(_.name.endsWith(".jar")))
 82 |         sender ! jarInfos
 83 |       } else {
 84 |         sender ! List()
 85 |       }
 86 |     }
 87 |     case GetAllJarsNames() => {
 88 |       val folderJar = new File(jarFolder)
 89 |       val files = folderJar.listFiles()
 90 |       if(files != null){
 91 |         val jarNames = files.map(_.getName).filter(_.endsWith(".jar")).toList
 92 |         sender ! jarNames
 93 |       } else {
 94 |         sender ! List()
 95 |       }
 96 |     }
 97 |     case GetJarsPathForClasspath(path, contextName) => {
 98 | 
 99 |       Try {
100 |         getJarsPathForClasspath(path, contextName)
101 |       } match {
102 |         case Success(path) => sender ! path
103 |         case Failure(e) => sender ! e
104 |       }
105 | 
106 |     }
107 |     case GetJarsPathForSpark(path) => {
108 |       Try {
109 |         sender ! getJarsPathForSpark(path)
110 |       } match {
111 |         case Success(path) => sender ! path
112 |         case Failure(e) => sender ! e
113 |       }
114 |     }
115 | 
116 |     case GetJarsPathForAll(paths: String, contextName: String) => {
117 |       Try {
118 |         ResultJarsPathForAll(getJarsPathForClasspath(paths, contextName), getJarsPathForSpark(paths))
119 |       } match {
120 |         case Success(result) => sender ! result
121 |         case Failure(e) => sender ! e
122 |       }
123 | 
124 |     }
125 | 
126 |     case DeleteJarFolder() => {
127 |       FileUtils.deleteFolder(jarFolder)
128 |     }
129 | 
130 |     case CreateJarFolder(overwrite: Boolean) => {
131 |       FileUtils.createFolder(jarFolder, overwrite)
132 |     }
133 | 
134 |     case JarFolderExists() => {
135 |       val file = new File(jarFolder)
136 |       sender ! file.exists()
137 |     }
138 |   }
139 | 
140 | 
141 |   def getJarsPathForSpark(path: String): List[String] = {
142 |     var jarSparkPathList = ListBuffer[String]()
143 |     path.split(",").foreach { x =>
144 |       jarSparkPathList += (JarUtils.getJarPathForSpark(x, jarFolder))
145 |     }
146 |     jarSparkPathList.toList
147 |   }
148 | 
149 |   def getJarsPathForClasspath(path: String, contextName: String) = {
150 |     var jarClasspath = ""
151 |     path.split(",").foreach { x =>
152 |       jarClasspath += JarUtils.getPathForClasspath(x, jarFolder, contextName) + CLASSPATH_JAR_SEPARATOR
153 |     }
154 |     jarClasspath.substring(0, jarClasspath.size - 1)
155 |   }
156 | }
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/JobActor.scala:
--------------------------------------------------------------------------------
  1 | package server.domain.actors
  2 | 
  3 | import java.util.UUID
  4 | 
  5 | import akka.actor.{Actor, ActorRef, ActorSelection}
  6 | import akka.pattern.ask
  7 | import com.typesafe.config.Config
  8 | import org.joda.time.{DateTimeZone, DateTime}
  9 | import responses.{JobStates, Jobs, Job}
 10 | import server.domain.actors.ContextManagerActor.{GetAllContexts, GetContext, NoSuchContext}
 11 | import org.slf4j.LoggerFactory
 12 | import server.domain.actors.JobActor._
 13 | import scala.concurrent.ExecutionContext.Implicits.global
 14 | import scala.concurrent.Future
 15 | import scala.util.{Success, Failure}
 16 | 
 17 | /**
 18 |  * Created by raduc on 03/11/14.
 19 |  */
 20 | 
 21 | 
 22 | object JobActor {
 23 | 
 24 |   trait JobStatus {
 25 |     val startTime: Long = new DateTime  (DateTimeZone.UTC).getMillis
 26 |   }
 27 | 
 28 |   case class JobStatusEnquiry(contextName: String, jobId: String)
 29 | 
 30 |   case class RunJob(runningClass: String, contextName: String, config: Config, uuid: String = UUID.randomUUID().toString)
 31 | 
 32 |   case class JobRunError(errorMessage: String) extends JobStatus
 33 | 
 34 |   case class JobRunSuccess(result:String) extends JobStatus
 35 | 
 36 |   case class JobStarted() extends JobStatus
 37 | 
 38 |   case class JobDoesNotExist() extends JobStatus
 39 | 
 40 |   case class UpdateJobStatus(uuid: String, status: JobStatus)
 41 | 
 42 |   case class GetAllJobsStatus()
 43 | 
 44 | }
 45 | 
 46 | 
 47 | class JobActor(config: Config, contextManagerActor: ActorRef) extends Actor {
 48 | 
 49 |   val log = LoggerFactory.getLogger(getClass)
 50 | 
 51 |   override def receive: Receive = {
 52 |     case job: RunJob => {
 53 |       log.info(s"Received RunJob message : runningClass=${job.runningClass} context=${job.contextName} uuid=${job.uuid}")
 54 | 
 55 |       val fromWebApi = sender
 56 | 
 57 |       val future = contextManagerActor ? GetContext(job.contextName)
 58 |       future onSuccess {
 59 |         case contextRef: ActorSelection => {
 60 | 
 61 |           import JobStates.RUNNING
 62 |           fromWebApi ! Job(job.uuid, job.contextName, RUNNING.toString, "", DateTime.now(DateTimeZone.UTC).getMillis)
 63 | 
 64 |           log.info(s"Sending RunJob message to actor $contextRef")
 65 |           contextRef ! job
 66 |         }
 67 |         case NoSuchContext => fromWebApi ! NoSuchContext
 68 |         case e @ _ => log.warn(s"Received UNKNOWN TYPE when asked for context. Type received $e")
 69 |       }
 70 |       future onFailure {
 71 |         case e => {
 72 |           fromWebApi ! e
 73 |           log.error(s"An error has occured.", e)
 74 |         }
 75 |       }
 76 |     }
 77 | 
 78 | 
 79 |     case jobEnquiry:JobStatusEnquiry => {
 80 |       log.info(s"Received JobStatusEnquiry message : uuid=${jobEnquiry.jobId}")
 81 |       val fromWebApi = sender
 82 | 
 83 | 
 84 |       val contextActorFuture = contextManagerActor ? GetContext(jobEnquiry.contextName)
 85 | 
 86 |       contextActorFuture onSuccess {
 87 |         case contextRef: ActorSelection => {
 88 | 
 89 |           val enquiryFuture = contextRef ? jobEnquiry
 90 | 
 91 |           enquiryFuture onSuccess{
 92 |             case state:JobStatus => {
 93 |               log.info("Job with id: " + jobEnquiry.jobId + "  has state : " + state)
 94 |               fromWebApi ! state
 95 |             }
 96 |             case x:Any => {
 97 |               log.info(s"Received $x TYPE when asked for job enquiry.")
 98 |               fromWebApi ! x
 99 |             }
100 |           }
101 | 
102 |           enquiryFuture onFailure {
103 |             case e => {
104 |               fromWebApi ! e
105 |               log.error(s"An error has occured.", e)
106 |             }
107 |           }
108 |         }
109 |         case NoSuchContext => fromWebApi ! NoSuchContext
110 |         case e @ _ => log.warn(s"Received UNKNOWN TYPE when asked for context. Type received $e")
111 |       }
112 | 
113 |       contextActorFuture onFailure {
114 |         case e => {
115 |           fromWebApi ! e
116 |           log.error(s"An error has occured.", e)
117 |         }
118 |       }
119 |     }
120 | 
121 |     case GetAllJobsStatus() => {
122 | 
123 |       val webApi = sender
124 |       val future = contextManagerActor ? GetAllContexts()
125 | 
126 |       val future2: Future[Future[List[List[Job]]]] = future map {
127 |         case contexts: List[ActorSelection] => {
128 |           val contextsList = contexts.map { context =>
129 |             val oneContextFuture = context ? GetAllJobsStatus()
130 |             oneContextFuture.map{
131 |               case jobs: List[Job] => jobs
132 |             }
133 |           }
134 |           Future.sequence(contextsList)
135 |         }
136 |       }
137 |       val future3: Future[List[List[Job]]] = future2.flatMap(identity)
138 |       val future4: Future[List[Job]] = future3.map(x => x.flatMap(identity))
139 | 
140 |       future4 onComplete {
141 |         case Success(jobsList:List[Job]) => {
142 |           webApi ! Jobs(jobsList.toArray.sortWith(_.startTime > _.startTime))
143 |         }
144 |         case Failure(e) => webApi ! e
145 |       }
146 | 
147 |     }
148 |   }
149 | }
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/ReTry.scala:
--------------------------------------------------------------------------------
 1 | package server.domain.actors
 2 | 
 3 | import akka.actor._
 4 | import akka.pattern.ask
 5 | import org.slf4j.LoggerFactory
 6 | 
 7 | import scala.concurrent.duration._
 8 | import scala.util.{Failure, Success}
 9 | 
10 | /*
11 |  See http://www.codetinkerhack.com/2014/01/re-try-pattern-using-akka-actor-ask.html
12 |  */
13 | 
14 | object ReTry {
15 |   private case class Retry(originalSender: ActorRef, message: Any, times: Int)
16 | 
17 |   private case class Response(originalSender: ActorRef, result: Any)
18 | 
19 |   def props(tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection): Props = Props(new ReTry(tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection))
20 | 
21 | }
22 | 
23 | class ReTry(val tries: Int, retryTimeOut: FiniteDuration, retryInterval: FiniteDuration, forwardTo: ActorSelection) extends Actor {
24 | 
25 |   import context.dispatcher
26 |   import server.domain.actors.ReTry._
27 |   val log = LoggerFactory.getLogger(getClass)
28 | 
29 |   // Retry loop that keep on Re-trying the request
30 |   def retryLoop: Receive = {
31 | 
32 |     // Response from future either Success or Failure is a Success - we propagate it back to a original sender
33 |     case Response(originalSender, result) =>
34 |       originalSender ! result
35 |       context stop self
36 | 
37 |     case Retry(originalSender, message, triesLeft) =>
38 | 
39 |       // Process (Re)try here. When future completes it sends result to self
40 |       (forwardTo ? message) (retryTimeOut) onComplete {
41 | 
42 |         case Success(result) =>
43 |           self ! Response(originalSender, result) // sending responses via self synchronises results from futures that may come potentially in any order. It also helps the case when the actor is stopped (in this case responses will become deadletters)
44 | 
45 |         case Failure(ex) =>
46 |           if (triesLeft - 1 == 0) {// In case of last try and we got a failure (timeout) lets send Retries exceeded error
47 |             self ! Response(originalSender, Failure(new Exception("Retries exceeded")))
48 |           }
49 |           else
50 |             log.error("Error occurred: " + ex)
51 |       }
52 | 
53 |       // Send one more retry after interval
54 |       if (triesLeft - 1 > 0)
55 |         context.system.scheduler.scheduleOnce(retryInterval, self, Retry(originalSender, message, triesLeft - 1))
56 | 
57 |     case m @ _ =>
58 |       log.error("No handling defined for message: " + m)
59 | 
60 |   }
61 | 
62 |   // Initial receive loop
63 |   def receive: Receive = {
64 | 
65 |     case message @ _ =>
66 |       context.system.scheduler.scheduleOnce(retryInterval, self, Retry(sender, message, tries))
67 | //      self ! Retry(sender, message, tries)
68 | 
69 |       // Lets swap to a retry loop here.
70 |       context.become(retryLoop, false)
71 | 
72 |   }
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/Supervisor.scala:
--------------------------------------------------------------------------------
 1 | package server.domain.actors
 2 | 
 3 | import akka.actor.SupervisorStrategy._
 4 | import akka.actor.{Actor, OneForOneStrategy, Props, actorRef2Scala}
 5 | import org.slf4j.LoggerFactory
 6 | 
 7 | import scala.concurrent.duration._
 8 |  
 9 | 
10 | class Supervisor extends Actor {
11 | 
12 |   val log = LoggerFactory.getLogger(getClass)
13 | 
14 |   override val supervisorStrategy =
15 |     OneForOneStrategy(maxNrOfRetries = 10, withinTimeRange = 1 minute) {
16 |       case e: Exception => {
17 |         log.error("Exception", e)
18 |         Resume
19 |       }
20 |     }
21 |  
22 |   def receive = {     
23 |       case (p: Props, name: String) => sender ! context.actorOf(p, name)
24 |   }
25 | 
26 | }


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/server/domain/actors/package.scala:
--------------------------------------------------------------------------------
 1 | package server.domain
 2 | 
 3 | import akka.util.Timeout
 4 | import com.typesafe.config.Config
 5 | 
 6 | import scala.concurrent.duration._
 7 | 
 8 | /**
 9 |  * Utility functions for actors
10 |  */
11 | package object actors {
12 |   implicit val timeout: Timeout = 50 seconds
13 | 
14 |   def getValueFromConfig[T](config: Config, configPath: String, defaultValue: T): T ={
15 |     if (config.hasPath(configPath)) config.getAnyRef(configPath).asInstanceOf[T] else defaultValue
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/utils/ActorUtils.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import java.io.IOException
 4 | import java.net.ServerSocket
 5 | 
 6 | import com.typesafe.config.{Config, ConfigFactory}
 7 | import server.domain.actors._
 8 | 
 9 | /**
10 |  * Created by raduc on 11/11/14.
11 |  */
12 | object ActorUtils {
13 | 
14 |   val PREFIX_CONTEXT_ACTOR = "A-"
15 |   val PREFIX_CONTEXT_SYSTEM = "S-"
16 | 
17 |   val HOST_PROPERTY_NAME = "manager.akka.remote.netty.tcp.hostname"
18 |   val PORT_PROPERTY_NAME = "manager.akka.remote.netty.tcp.port"
19 | 
20 |   def getContextActorAddress(contextName: String, host: String, port: Int): String ={
21 |     getActorAddress(PREFIX_CONTEXT_SYSTEM + contextName, host, port, PREFIX_CONTEXT_ACTOR + contextName)
22 |   }
23 | 
24 |   def getActorAddress(systemName: String, host: String, port: Int, actorName: String): String = {
25 |     "akka.tcp://"  + systemName + "@" + host + ":" + port + "/user/" + actorName
26 |   }
27 | 
28 |   def findAvailablePort(lastUsedPort: Int): Integer = {
29 |     val notFound = true;
30 |     var port = lastUsedPort + 1
31 |     while (notFound) {
32 |       try {
33 |         new ServerSocket(port).close()
34 |         return port
35 |       }
36 |       catch {
37 |         case e: IOException => {
38 |           port += 1
39 |         }
40 |       }
41 |     }
42 |     return 0
43 |   }
44 | 
45 |   def remoteConfig(hostname: String, port: Int, commonConfig: Config): Config = {
46 | 
47 |     val host = getValueFromConfig(commonConfig, ActorUtils.HOST_PROPERTY_NAME, "127.0.0.1")
48 | 
49 |     val configStr = """
50 |       akka{
51 |         log-dead-letters = 0
52 |         actor {
53 |             provider = "akka.remote.RemoteActorRefProvider"
54 |         }
55 |         remote {
56 |           enabled-transports = ["akka.remote.netty.tcp"]
57 |           log-sent-messages = on
58 |           log-received-messages = on
59 |           log-remote-lifecycle-events = off
60 |           netty.tcp {
61 |               maximum-frame-size = 512000b
62 |               hostname = """" + host + """"
63 |               port = """ + port +
64 |       """ }
65 |         }
66 |       }"""
67 | 
68 |     ConfigFactory.parseString(configStr).withFallback(commonConfig)
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/utils/ContextUtils.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import com.typesafe.config.Config
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | import scala.collection.JavaConverters._
 7 | 
 8 | object ContextUtils {
 9 |   def configToSparkConf(config:Config, contextName:String): SparkConf ={
10 |     val sparkConf = new SparkConf()
11 |       .setAppName(contextName)
12 |       .setJars(config.getStringList("context.jars").asScala)
13 | 
14 |     for(x <- config.entrySet().asScala if x.getKey.startsWith("spark.")) {
15 |       sparkConf.set(x.getKey, x.getValue.unwrapped().toString)
16 |     }
17 | 
18 |     sparkConf
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/utils/FileUtils.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import java.io.{File, FileOutputStream}
 4 | import org.apache.commons
 5 | 
 6 | /**
 7 |  * Created by raduchilom on 22/03/15.
 8 |  */
 9 | object FileUtils {
10 | 
11 |   def writeToFile(fileName: String, folderName: String, bytes: Array[Byte]): Unit = {
12 |     val fos = new FileOutputStream(folderName + File.separator + fileName)
13 |     fos.write(bytes)
14 |     fos.close()
15 |   }
16 | 
17 |   def createFolder(folder: String, overwrite: Boolean) = {
18 |     val file = new File(folder)
19 |     if(!file.exists()){
20 |       file.mkdir()
21 |     } else if (overwrite){
22 |       commons.io.FileUtils.deleteDirectory(file)
23 |       file.mkdir()
24 |     }
25 |   }
26 | 
27 |   def deleteFolder(folder: String): Unit = {
28 |     val file = new File(folder)
29 |     if(file.exists()){
30 |       commons.io.FileUtils.deleteDirectory(file)
31 |     }
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/utils/HdfsUtils.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import org.apache.hadoop.conf.Configuration
 4 | import org.apache.hadoop.fs.{Path, FileSystem}
 5 | 
 6 | /**
 7 |  * Created by raduchilom on 22/03/15.
 8 |  */
 9 | object HdfsUtils {
10 | 
11 |   def copyJarFromHdfs(hdfsPath: String, outputFolder: String) = {
12 | 
13 |     //    if(!config.hasPath("hdfs.namenode")){
14 |     //      println("ERROR: HDFS NameNode is not set in application.conf!")
15 |     //      throw new Exception("HDFS NameNode is not set in application.conf!")
16 |     //    }
17 | 
18 |     val conf = new Configuration()
19 |     //    conf.set("fs.defaultFS", getValueFromConfig(config, "hdfs.namenode", ""))
20 |     conf.set("fs.defaultFS", hdfsPath)
21 |     val hdfsFileSystem = FileSystem.get(conf)
22 | 
23 |     hdfsFileSystem.copyToLocalFile(new Path(hdfsPath), new Path(outputFolder))
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scala/utils/JarUtils.scala:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import java.io.File
 4 | 
 5 | /**
 6 |  * Created by raduchilom on 22/03/15.
 7 |  */
 8 | object JarUtils {
 9 | 
10 |   def validateJar(bytes: Array[Byte]): Boolean = {
11 |     // For now just check the first few bytes are the ZIP signature: 0x04034b50 little endian
12 |     if(bytes.size < 4 || bytes(0) != 0x50 || bytes(1) != 0x4b || bytes(2) != 0x03 || bytes(3) != 0x04){
13 |       false
14 |     } else {
15 |       true
16 |     }
17 |   }
18 | 
19 |   def getPathForClasspath(path: String, jarFolder: String, contextName: String): String = {
20 |     val diskPath =
21 |       if(path.startsWith("/")){
22 |         path
23 |       } else if (path.startsWith("hdfs")){
24 |         val tempFolder = jarFolder + "tmp" + File.pathSeparator + contextName
25 |         FileUtils.createFolder(tempFolder, true)
26 |         HdfsUtils.copyJarFromHdfs(path, tempFolder)
27 |         tempFolder + File.pathSeparator + getJarName(path)
28 |       } else {
29 |         jarFolder + File.separator + getJarName(path)
30 |       }
31 | 
32 |     val diskFile = new File(diskPath)
33 |     if (diskFile.exists()) {
34 |       return diskPath
35 |     }
36 | 
37 |     throw new Exception(s"Jar $path not found.")
38 |   }
39 | 
40 | 
41 |   def getJarName(path: String): String = {
42 |     if(path.contains('\\')) {
43 |       path.substring(path.lastIndexOf('\\'))
44 |     } else {
45 |       path
46 |     }
47 |   }
48 | 
49 |   def getJarPathForSpark(path: String, jarFolder: String): String = {
50 |     if(path.startsWith("hdfs")){
51 |       //TODO: perform hdfs validation
52 |       return path
53 |     } else {
54 |       val diskPath =
55 |         if(path.startsWith("/")){
56 |           path
57 |         } else {
58 |           jarFolder + File.separator + getJarName(path)
59 |         }
60 |       val diskFile = new File(diskPath)
61 |       if (diskFile.exists()) {
62 |         return diskPath
63 |       }
64 |     }
65 | 
66 |     throw new Exception(s"Jar $path not found.")
67 |   }
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scripts/deploy.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -e
  4 | 
  5 | CMD=$1
  6 | ARG1=$2
  7 | 
  8 | CDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
  9 | PROJECT_DIR="${CDIR}/../../../.."
 10 | 
 11 | SJR_IS_REMOTE_DEPLOY=${SJR_IS_REMOTE_DEPLOY-false}
 12 | SJR_PACKAGE_PATH=${SJR_PACKAGE_PATH-${PROJECT_DIR}/spark-job-rest/target/spark-job-rest.tar.gz}
 13 | 
 14 | SJR_DEPLOY_PATH="${SJR_DEPLOY_PATH}"                 # Empty variable will cause error in action
 15 | SJR_REMOTE_DEPLOY_PATH="${SJR_REMOTE_DEPLOY_PATH}"   # Overrides SJR_DEPLOY_PATH in case of remote deploy
 16 | SJR_DEPLOY_KEY="${SJR_DEPLOY_KEY}"                   # Empty by default
 17 | SJR_DEPLOY_HOST="${SJR_DEPLOY_HOST}"                 # Empty for local deploy
 18 | 
 19 | CONFIGURATION_IS_SET="false"
 20 | 
 21 | function setup_defaults() {
 22 |     if [ -z "${SJR_DEPLOY_PATH}" ]; then
 23 |         echo "Spark-Job-REST deployment path is not defined. Set 'SJR_DEPLOY_PATH' before running this script."
 24 |         exit -1
 25 |     fi
 26 | }
 27 | 
 28 | function setup_remote() {
 29 |     SSH_KEY_EXPRESSION=""
 30 |     if [ ! -z "${SJR_DEPLOY_KEY}" ]; then
 31 |         echo "Using SSH key from '${SJR_DEPLOY_KEY}'"
 32 |         SSH_KEY_EXPRESSION="-i ${SJR_DEPLOY_KEY}"
 33 |     fi
 34 | 
 35 |     if [ -z "${SJR_DEPLOY_HOST}" ]; then
 36 |         echo "Spark-Job-REST deployment host is not defined. Set 'SJR_DEPLOY_HOST' before running this script."
 37 |         exit -1
 38 |     fi
 39 | 
 40 |     # Override deploy path in remote mode
 41 |     if [ ! -z "${SJR_REMOTE_DEPLOY_PATH}" ]; then
 42 |         SJR_DEPLOY_PATH="${SJR_REMOTE_DEPLOY_PATH}"
 43 |     fi
 44 | }
 45 | 
 46 | function setup() {
 47 |     if [ "${CONFIGURATION_IS_SET}" = "false" ]; then
 48 |         CONFIGURATION_IS_SET="true"
 49 |         setup_defaults
 50 |         if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then
 51 |             setup_remote
 52 |         else
 53 |             SJR_DEPLOY_HOST="localhost"
 54 |         fi
 55 |     fi
 56 | }
 57 | 
 58 | function exec_remote() {
 59 |     setup
 60 |     ssh -i "${SJR_DEPLOY_KEY}" "${SJR_DEPLOY_HOST}" "$1"
 61 | }
 62 | 
 63 | function exec_local() {
 64 |     setup
 65 |     eval "$1"
 66 | }
 67 | 
 68 | function exec_cmd() {
 69 |     if [ "$SJR_IS_REMOTE_DEPLOY" = "true" ]; then
 70 |         exec_remote "$1"
 71 |     else
 72 |         exec_local "$1"
 73 |     fi
 74 | }
 75 | 
 76 | function stop_server() {
 77 |     echo "Stopping server"
 78 |     exec_cmd "if [ -d ${SJR_DEPLOY_PATH} ]; then ${SJR_DEPLOY_PATH}/bin/stop_server.sh; fi"
 79 |     exec_cmd "pkill -f 'java.*spark-job-rest.jar'" || true
 80 | }
 81 | 
 82 | function remove_server() {
 83 |     echo "Remove server"
 84 |     setup
 85 |     exec_cmd "rm -rf ${SJR_DEPLOY_PATH}"
 86 | }
 87 | 
 88 | function upload_tarball() {
 89 |     if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then
 90 |         echo "Upload tarball"
 91 |         scp "${SSH_KEY_EXPRESSION}" "$SJR_PACKAGE_PATH" "${SJR_DEPLOY_HOST}":"/tmp/"
 92 |     fi
 93 | }
 94 | 
 95 | function extract_package() {
 96 |     echo "Extract from tarball"
 97 |     exec_cmd "mkdir -p ${SJR_DEPLOY_PATH}"
 98 |     if [ "${SJR_IS_REMOTE_DEPLOY}" = "true" ]; then
 99 |         exec_remote "tar zxf /tmp/spark-job-rest.tar.gz -C ${SJR_DEPLOY_PATH} --strip-components=1"
100 |     else
101 |         exec_local "tar zxf ${SJR_PACKAGE_PATH} -C ${SJR_DEPLOY_PATH} --strip-components=1"
102 |     fi
103 | }
104 | 
105 | function deploy_server() {
106 |     echo "Deploing to ${SJR_DEPLOY_HOST}:${SJR_DEPLOY_PATH}"
107 |     stop_server
108 |     remove_server
109 |     upload_tarball
110 |     extract_package
111 |     start_server
112 | }
113 | 
114 | function start_server() {
115 |     echo "Run server"
116 |     exec_cmd "${SJR_DEPLOY_PATH}/bin/start_server.sh"
117 | }
118 | 
119 | function server_log() {
120 |     echo "Spark-Job-REST main log:"
121 |     exec_cmd "tail -f ${SJR_DEPLOY_PATH}/logs/spark-job-rest.log"
122 | }
123 | 
124 | function server_log_context() {
125 |     CONTEXT_NAME=$ARG1
126 |     echo "Spark-Job-REST '${CONTEXT_NAME}' log:"
127 |     exec_cmd "tail -f ${SJR_DEPLOY_PATH}/logs/${CONTEXT_NAME}.log"
128 | }
129 | 
130 | function show_help() {
131 |     echo "Spark-Job-REST deployment tool"
132 |     echo "Usage: deploy.sh [deploy|start|stop|restart|log|log-context <context>]"
133 | }
134 | 
135 | function show_vars() {
136 |     echo "SJR_DEPLOY_PATH=${SJR_DEPLOY_PATH}"
137 |     echo "SJR_DEPLOY_HOST=${SJR_DEPLOY_HOST}"
138 |     echo "SJR_DEPLOY_KEY=${SJR_DEPLOY_KEY}"
139 |     echo "SJR_PACKAGE_PATH=${SJR_PACKAGE_PATH}"
140 |     echo "SJR_IS_REMOTE_DEPLOY=${SJR_IS_REMOTE_DEPLOY}"
141 |     echo "SJR_REMOTE_DEPLOY_PATH=${SJR_REMOTE_DEPLOY_PATH}"
142 | }
143 | 
144 | function main() {
145 |     case "$CMD" in
146 |     deploy) setup
147 |         deploy_server
148 |         ;;
149 |     stop) setup
150 |         stop_server
151 |         ;;
152 |     start) setup
153 |         start_server
154 |         ;;
155 |     restart) setup
156 |         stop_server
157 |         start_server
158 |         ;;
159 |     log) setup
160 |         server_log
161 |         ;;
162 |     log-context) setup
163 |         server_log_context
164 |         ;;
165 |     debug) show_vars
166 |         ;;
167 |     help) show_help
168 |         ;;
169 |     *) show_help
170 |         ;;
171 |     esac
172 | }
173 | 
174 | main


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scripts/restart_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | get_abs_script_path() {
 4 |   pushd . >/dev/null
 5 |   cd $(dirname $0)
 6 |   appdir=$(pwd)
 7 |   popd  >/dev/null
 8 | }
 9 | get_abs_script_path
10 | 
11 | "$appdir/stop_server.sh"
12 | "$appdir/start_server.sh"
13 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scripts/start_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to start the job server
 3 | set -e
 4 | 
 5 | get_abs_script_path() {
 6 |   pushd . >/dev/null
 7 |   cd $(dirname $0)
 8 |   appdir=$(pwd)
 9 |   popd  >/dev/null
10 | }
11 | get_abs_script_path
12 | 
13 | parentdir="$(dirname "$appdir")"
14 | 
15 | GC_OPTS="-XX:+UseConcMarkSweepGC
16 |          -verbose:gc -XX:+PrintGCTimeStamps -Xloggc:$appdir/gc.out
17 |          -XX:MaxPermSize=512m
18 |          -XX:+CMSClassUnloadingEnabled "
19 | 
20 | JAVA_OPTS="-Xmx1g -XX:MaxDirectMemorySize=512M
21 |            -XX:+HeapDumpOnOutOfMemoryError -Djava.net.preferIPv4Stack=true
22 |            -Dcom.sun.management.jmxremote.authenticate=false
23 |            -Dcom.sun.management.jmxremote.ssl=false"
24 | 
25 | MAIN="server.Main"
26 | 
27 | conffile="$parentdir/resources/application.conf"
28 | 
29 | if [ ! -f "$conffile" ]; then
30 |   echo "No configuration file $conffile found"
31 |   exit 1
32 | fi
33 | 
34 | if [ -f "$parentdir/resources/settings.sh" ]; then
35 |   . $parentdir/resources/settings.sh
36 | else
37 |   echo "Missing $parentdir/resources/settings.sh, exiting"
38 |   exit 1
39 | fi
40 | 
41 | if [ -z "$SPARK_HOME" ]; then
42 |   echo "Please set SPARK_HOME or put it in $parentdir/resources/settings.sh first"
43 |   exit 1
44 | fi
45 | 
46 | # Pull in other env vars in spark config, such as MESOS_NATIVE_LIBRARY
47 | . $SPARK_CONF_HOME/spark-env.sh
48 | 
49 | 
50 | mkdir -p "${LOG_DIR}"
51 | 
52 | LOGGING_OPTS="-Dlog4j.configuration=log4j.properties
53 |               -DLOG_DIR=$LOG_DIR
54 |               -DLOG_FILE=spark-job-rest.log"
55 | 
56 | # For Mesos
57 | #CONFIG_OVERRIDES="-Dspark.executor.uri=$SPARK_EXECUTOR_URI "
58 | # For Mesos/Marathon, use the passed-in port
59 | if [ -n "$PORT" ]; then
60 |   CONFIG_OVERRIDES+="-Dspark.jobserver.port=$PORT "
61 | fi
62 | 
63 | # The following should be exported in order to be accessible in Config substitutions
64 | export SPARK_HOME
65 | export APP_DIR
66 | export JAR_PATH
67 | export CONTEXTS_BASE_DIR
68 | 
69 | # job server jar needs to appear first so its deps take higher priority
70 | # need to explicitly include app dir in classpath so logging configs can be found
71 | #CLASSPATH="$appdir:$appdir/spark-job-server.jar:$($SPARK_HOME/bin/compute-classpath.sh)"
72 | CLASSPATH="$parentdir/resources:$appdir:$parentdir/spark-job-rest.jar:$($SPARK_HOME/bin/compute-classpath.sh)"
73 | echo "CLASSPATH = $CLASSPATH"
74 | 
75 | 
76 | exec java -cp $CLASSPATH $GC_OPTS $JAVA_OPTS $LOGGING_OPTS $CONFIG_OVERRIDES $MAIN $conffile  > /dev/null 2>&1 &
77 | echo $! > $appdir/server.pid
78 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/main/scripts/stop_server.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | appdir="$(dirname "$0")"
 4 | 
 5 | if [ -f "$appdir/server.pid" ]; then
 6 |     pid="$(cat "$appdir/server.pid")"
 7 |     proc="$(ps axu | grep "$pid" | grep spark-job-rest.jar | awk '{print $2}')"
 8 |     if [ -n "$proc" ]; then
 9 |         echo "Killing pid $proc"
10 |         kill -9 $proc
11 |     else
12 |         echo "Pid $pid does not exist or it's not for spark-job-rest."
13 |     fi
14 | else
15 | echo "Pid file $appdir/server.pid was not found"
16 | fi
17 | 
18 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/resources/application.conf:
--------------------------------------------------------------------------------
 1 | # spark default configuration
 2 | spark.executor.memory=2g
 3 | spark.mesos.coarse=false
 4 | spark.scheduler.mode=FAIR
 5 | spark.cores.max=2
 6 | spark.master="local"
 7 | spark.path=${SPARK_HOME}
 8 | spark.default.parallelism=384
 9 | spark.storage.memoryFraction=0.3
10 | spark.shuffle.memoryFraction=0.6
11 | spark.shuffle.compress=true
12 | spark.shuffle.spill-compress=true
13 | spark.reducer.maxMbInFlight=48
14 | spark.akka.frameSize=100
15 | spark.akka.threads=4
16 | spark.akka.timeout=100
17 | spark.task.maxFailures=4
18 | spark.shuffle.consolidateFiles=true
19 | spark.deploy.spreadOut=true
20 | spark.shuffle.spill=false
21 | spark.kryo.referenceTracking=false
22 | 
23 | #Default Spark Driver JVM memory
24 | driver.xmxMemory = 1g
25 | 
26 | # application configuration
27 | appConf{
28 |   # This ip on which to deploy the apis
29 |   web.services.ip="0.0.0.0"
30 |   # The port on which to deploy the apis
31 |   web.services.port=8097
32 |   # Implicit akka timeout
33 |   timeout=1000000
34 |   # Remote context initialization
35 |   init {
36 |     # Implicit sleep (milliseconds) before sending init message
37 |     sleep=3000
38 |     # Tries before consider remote context as dead
39 |     tries=20
40 |     # Timeout for each attempt (milliseconds)
41 |     retry-timeout=1000
42 |     # Inteval beetween attempts to reach remote context (milliseconds)
43 |     retry-interval=1500
44 |   }
45 |   # The port where the range for actor system starts
46 |   actor.systems.first.port = 11000
47 |   # The port where the range for spark ui starts
48 |   spark.ui.first.port = 16000
49 |   # The path to the folder where to keep the jars
50 |   jars.path = /tmp/spark-job-rest/jars
51 | }
52 | 
53 | context{
54 |   # Path to context process work directory
55 |   contexts-base-dir = /tmp/spark-job-rest/contexts
56 |   # Amount of jobs which can be executed on context in parallel. Zero means infinit concurency.
57 |   cuncurrent-jobs = 0
58 |   # Context factory that will be dynamically loaded to instantiate job context
59 |   job-context-factory = "context.SparkContextFactory"
60 | }
61 | 
62 | manager {
63 |   akka {
64 |     log-dead-letters = 1
65 |     actor {
66 |       provider = "akka.remote.RemoteActorRefProvider"
67 |     }
68 |     remote {
69 |       log-remote-lifecycle-events = off
70 |       enabled-transports = ["akka.remote.netty.tcp"]
71 |       log-sent-messages = on
72 |       log-received-messages = on
73 |       netty.tcp {
74 |         transport-class = "akka.remote.transport.netty.NettyTransport"
75 |         hostname = "localhost"
76 |         port = 4042
77 |         maximum-frame-size = 256000b
78 |       }
79 |     }
80 |   }
81 | 
82 |   spray.can.server {
83 |     # uncomment the next line for making this an HTTPS example
84 |     # ssl-encryption = on
85 |     idle-timeout = 61 s
86 |     request-timeout = 60 s
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/scala/context/JobContextFactorySpec.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.{ContextLike, SparkJobBase}
 4 | import com.typesafe.config.{Config, ConfigFactory}
 5 | import org.apache.spark.SparkContext
 6 | import org.junit.runner.RunWith
 7 | import org.scalatest._
 8 | import org.scalatest.junit.JUnitRunner
 9 | 
10 | trait FakeContext
11 | 
12 | class FakeJobContextFactory extends JobContextFactory {
13 |   type C = ContextLike
14 |   def makeContext(config: Config, contextName: String): ContextLike = new ContextLike with FakeContext {
15 |     val contextClass = classOf[FakeContext].getName
16 |     override def stop(): Unit = {}
17 |     override def isValidJob(job: SparkJobBase): Boolean = true
18 |     override def sparkContext: SparkContext = null
19 |   }
20 | }
21 | 
22 | /**
23 |  * Test suite for [[JobContextFactory]].
24 |  */
25 | @RunWith(classOf[JUnitRunner])
26 | class JobContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter {
27 |   "JobContextFactory" should {
28 |     "load specified factory" in {
29 |       JobContextFactory
30 |         .getFactory("context.SparkContextFactory")
31 |         .isInstanceOf[SparkContextFactory] mustEqual true
32 |     }
33 | 
34 |     "load default factory" in {
35 |       JobContextFactory
36 |         .getFactory()
37 |         .isInstanceOf[SparkContextFactory] mustEqual true
38 |     }
39 | 
40 |     "make context with default factory if other is not specified" in {
41 |       val context = JobContextFactory.makeContext(ConfigFactory.parseString(
42 |         """
43 |         |{
44 |         |  context.jars = [],
45 |         |  spark.master = "local",
46 |         |  spark.app.id = "test"
47 |         |}
48 |         """.stripMargin).resolve(), "test")
49 |       context.isInstanceOf[SparkContext] mustEqual true
50 |       context.stop()
51 |     }
52 | 
53 |     "make context with specified factory if other is not specified" in {
54 |       JobContextFactory.makeContext(ConfigFactory.parseString(
55 |         """
56 |           |{
57 |           |  context.jars = [],
58 |           |  context.job-context-factory = "context.FakeJobContextFactory",
59 |           |  spark.master = "local",
60 |           |  spark.app.id = "test"
61 |           |}
62 |         """.stripMargin).resolve(), "test")
63 |         .isInstanceOf[FakeContext] mustEqual true
64 |     }
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/scala/context/SparkContextFactorySpec.scala:
--------------------------------------------------------------------------------
 1 | package context
 2 | 
 3 | import api.ContextLike
 4 | import com.typesafe.config.ConfigFactory
 5 | import org.apache.spark.SparkContext
 6 | import org.junit.runner.RunWith
 7 | import org.scalatest._
 8 | import org.scalatest.junit.JUnitRunner
 9 | 
10 | import scala.util.Try
11 | 
12 | /**
13 |  * Test suite for [[SparkContextFactory]].
14 |  */
15 | @RunWith(classOf[JUnitRunner])
16 | class SparkContextFactorySpec extends WordSpec with MustMatchers with BeforeAndAfter {
17 |   type C = SparkContext with ContextLike
18 | 
19 |   var sparkContext: C = _
20 |   val sparkContextFactory = new SparkContextFactory()
21 | 
22 |   // Destroy Spark context after each test
23 |   after {
24 |     Try{ sparkContext.stop() }
25 |   }
26 | 
27 |   "SingletonSparkContextFactory" should {
28 |     "create Spark context" in {
29 |       sparkContext = sparkContextFactory.makeContext(config, this.getClass.getName)
30 |       sparkContext.appName mustEqual this.getClass.getName
31 |     }
32 |   }
33 | 
34 |   val config = ConfigFactory.parseString(
35 |     """
36 |       |{
37 |       |  context.jars = [],
38 |       |  spark.master = "local"
39 |       |}
40 |     """.stripMargin)
41 | }
42 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/scala/integration/IntegrationTests.scala:
--------------------------------------------------------------------------------
 1 | package integration
 2 | 
 3 | import akka.actor.ActorSystem
 4 | import akka.util.Timeout
 5 | import client.SparkJobRestClient
 6 | import org.junit.runner.RunWith
 7 | import org.scalatest.concurrent.ScalaFutures
 8 | import org.scalatest.{Matchers, BeforeAndAfter, FunSuite}
 9 | import org.scalatest.junit.JUnitRunner
10 | import responses.Job
11 | 
12 | /**
13 | * Created by raduchilom on 4/25/15.
14 | */
15 | @RunWith(classOf[JUnitRunner])
16 | class IntegrationTests extends FunSuite with BeforeAndAfter with ScalaFutures with Matchers {
17 | 
18 |   implicit val timeout = Timeout(10000)
19 |   implicit val system = ActorSystem("localSystem")
20 | 
21 |   val client = new SparkJobRestClient("http://localhost:8097")
22 |   val contextName = "testContext"
23 |   val exampleJarPath = "/Users/raduchilom/projects/spark-job-rest/examples/example-job/target/example-job.jar"
24 |   val parameters = Map[String, String]("jars" -> exampleJarPath,
25 |     "input" -> "100")
26 | 
27 | 
28 |   before {
29 |   }
30 | 
31 |   after {
32 |   }
33 | 
34 |   test("Create Context & Delete Context") {
35 |     val context = client.createContext(contextName, parameters)
36 |     context.contextName should be(contextName)
37 | 
38 |     var contexts = client.getContexts()
39 |     contexts.contexts.size should be(1)
40 |     contexts.contexts.contains(context) should be(true)
41 | 
42 |     client.deleteContext(contextName) should be(true)
43 |     contexts = client.getContexts()
44 |     contexts.contexts.size should be(0)
45 |   }
46 | 
47 |   test("Create Contexts & Delete Contexts") {
48 | 
49 |     for(i <- 0 to 4) {
50 |       val context = client.createContext(contextName + i, parameters)
51 |       context.contextName should be(contextName + i)
52 |     }
53 | 
54 |     var contexts = client.getContexts()
55 |     contexts.contexts.size should be(5)
56 | 
57 |     for(i <- 0 to 4) {
58 |       client.deleteContext(contextName + i) should be(true)
59 |       contexts = client.getContexts()
60 |       contexts.contexts.size should be(4 - i)
61 |     }
62 |   }
63 | 
64 |   test("Create Context & Run Job") {
65 |     val context = client.createContext(contextName, parameters)
66 |     context.contextName should be(contextName)
67 | 
68 |     var contexts = client.getContexts()
69 |     contexts.contexts.size should be(1)
70 |     contexts.contexts.contains(context) should be(true)
71 | 
72 |     val job = client.runJob("com.job.SparkJobImplemented", contextName, parameters)
73 |     job shouldBe a [Job]
74 | 
75 |     Thread.sleep(2000)
76 | 
77 |     val jobResult = client.getJob(job.jobId, contextName)
78 |     jobResult.result should be("100")
79 | 
80 |     client.deleteContext(contextName) should be(true)
81 |     contexts = client.getContexts()
82 |     contexts.contexts.size should be(0)
83 |   }
84 | 
85 |   test("Upload Jar") {
86 |     val jarInfo = client.uploadJar("example-job.jar", exampleJarPath)
87 |     jarInfo.name should be("example-job.jar")
88 |   }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/scala/server/domain/actors/ContextActorSpec.scala:
--------------------------------------------------------------------------------
 1 | package server.domain.actors
 2 | 
 3 | import java.util.concurrent.TimeUnit
 4 | 
 5 | import akka.actor.ActorSystem
 6 | import akka.pattern.ask
 7 | import akka.testkit.TestActorRef
 8 | import akka.util.Timeout
 9 | import com.typesafe.config.ConfigFactory
10 | import context.{FakeContext, JobContextFactory}
11 | import org.apache.spark.SparkContext
12 | import org.junit.runner.RunWith
13 | import org.scalatest._
14 | import org.scalatest.concurrent.TimeLimitedTests
15 | import org.scalatest.junit.JUnitRunner
16 | import org.scalatest.time.SpanSugar._
17 | 
18 | import scala.util.Success
19 | 
20 | /**
21 |  * Test suit for [[ContextActor]]
22 |  */
23 | @RunWith(classOf[JUnitRunner])
24 | class ContextActorSpec extends WordSpec with MustMatchers with BeforeAndAfter with TimeLimitedTests {
25 |   val timeLimit = 10 seconds
26 | 
27 |   val config = ConfigFactory.load()
28 | 
29 |   implicit val timeout = Timeout(10, TimeUnit.SECONDS)
30 |   implicit val system = ActorSystem("localSystem")
31 | 
32 |   var contextActorRef: TestActorRef[ContextActor] = _
33 |   def contextActor = contextActorRef.underlyingActor
34 | 
35 |   val contextName = "demoContext"
36 | 
37 |   before {
38 |     contextActorRef = TestActorRef(new ContextActor(config))
39 |   }
40 | 
41 |   after {
42 |     contextActor.jobContext.stop()
43 |   }
44 | 
45 |   "ContextActor" should {
46 |     "create Spark context when initialized" in {
47 |       val future = contextActorRef ? ContextActor.Initialize(contextName, config, List())
48 |       val Success(result: ContextActor.Initialized) = future.value.get
49 |       result must not equal null
50 |       contextActor.jobContext.isInstanceOf[SparkContext] mustEqual true
51 |     }
52 | 
53 |     "have default factory for Spark context" in {
54 |       val configWithoutFactory = config.withoutPath(JobContextFactory.classNameConfigEntry)
55 |       val future = contextActorRef ? ContextActor.Initialize(contextName, configWithoutFactory, List())
56 |       val Success(result: ContextActor.Initialized) = future.value.get
57 |       result must not equal null
58 |       contextActor.jobContext.isInstanceOf[SparkContext] mustEqual true
59 |     }
60 | 
61 |     "create context from specified factory" in {
62 |       val future = contextActorRef ? ContextActor.Initialize(contextName, fakeContextFactoryConfig, List())
63 |       val Success(result: ContextActor.Initialized) = future.value.get
64 |       result must not equal null
65 |       contextActor.jobContext.isInstanceOf[FakeContext] mustEqual true
66 |     }
67 |   }
68 | 
69 |   val fakeContextFactoryConfig = ConfigFactory.parseString(
70 |     """
71 |       |{
72 |       |  context.job-context-factory = "context.FakeJobContextFactory",
73 |       |}
74 |     """.stripMargin).withFallback(config)
75 | }
76 | 


--------------------------------------------------------------------------------
/spark-job-rest/src/test/scala/server/domain/actors/JarActorTest.scala:
--------------------------------------------------------------------------------
  1 | package server.domain.actors
  2 | 
  3 | import java.io.File
  4 | import java.util.concurrent.TimeUnit
  5 | 
  6 | import akka.actor.ActorSystem
  7 | import akka.pattern.ask
  8 | import akka.testkit.TestActorRef
  9 | import akka.util.Timeout
 10 | import com.typesafe.config.ConfigFactory
 11 | import org.junit.runner.RunWith
 12 | import org.scalatest.concurrent.ScalaFutures
 13 | import org.scalatest.junit.JUnitRunner
 14 | import org.scalatest.{BeforeAndAfter, FunSuite, Matchers}
 15 | import responses.JarInfo
 16 | import server.domain.actors.JarActor._
 17 | import utils.FileUtils
 18 | 
 19 | import scala.util.{Random, Success}
 20 | 
 21 | /**
 22 |  * Test suite for [[JarActor]].
 23 |  */
 24 | @RunWith(classOf[JUnitRunner])
 25 | class JarActorTest extends FunSuite with BeforeAndAfter with ScalaFutures with Matchers {
 26 | 
 27 |   val config = ConfigFactory.load()
 28 | 
 29 |   implicit val timeout = Timeout(10, TimeUnit.SECONDS)
 30 |   implicit val system = ActorSystem("localSystem")
 31 | 
 32 |   val jarActor = TestActorRef(new JarActor(config))
 33 |   val contextName = "demoContext"
 34 | 
 35 |   val jarFolder = config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH)
 36 | 
 37 |   before {
 38 |     jarActor ! CreateJarFolder
 39 |   }
 40 | 
 41 |   after {
 42 |     jarActor ! DeleteJarFolder
 43 |   }
 44 | 
 45 |   test("Delete & Create Jar Folder") {
 46 | 
 47 |     jarActor ! DeleteJarFolder()
 48 | 
 49 |     var future = jarActor ? JarFolderExists()
 50 |     val Success(resultNotExists: Boolean) = future.value.get
 51 |     resultNotExists should be(false)
 52 | 
 53 |     jarActor ! CreateJarFolder(true)
 54 | 
 55 |     future = jarActor ? JarFolderExists()
 56 |     val Success(resultExists: Boolean) = future.value.get
 57 |     resultExists should be(true)
 58 | 
 59 |     future = jarActor ? GetAllJarsNames()
 60 |     val Success(result: List[String]) = future.value.get
 61 |     result should be( Nil )
 62 | 
 63 |   }
 64 | 
 65 |   test("Write & Delete Jar") {
 66 | 
 67 |     val jarName = Random.nextString(5) + ".jar"
 68 | 
 69 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
 70 |     val Success(result: Success[JarInfo]) = future.value.get
 71 |     result shouldBe an [Success[JarInfo]]
 72 | 
 73 |     future = jarActor ? GetAllJarsNames()
 74 |     val Success(resultJars: List[String]) = future.value.get
 75 |     resultJars should be( List(jarName) )
 76 | 
 77 |     future = jarActor ? DeleteJar(jarName)
 78 |     val Success(deleteResult: Success[String]) = future.value.get
 79 |     deleteResult should be( Success("Jar deleted.") )
 80 | 
 81 |     future = jarActor ? GetAllJarsNames()
 82 |     val Success(emptyJarList: List[String]) = future.value.get
 83 |     emptyJarList should be( Nil )
 84 | 
 85 |   }
 86 | 
 87 |   test("Write 10 Jars") {
 88 |     for( i <- 1 to 10) {
 89 |       val jarName = Random.nextString(5) + ".jar"
 90 |       var future = jarActor ? AddJar(jarName, getTestJarBytes)
 91 |     }
 92 | 
 93 |     val future = jarActor ? GetAllJarsNames()
 94 |     val Success(emptyJarList: List[String]) = future.value.get
 95 |     emptyJarList.size should be( 10 )
 96 |   }
 97 | 
 98 |   test("Get Classpath For Uploaded Jar"){
 99 |     val jarName = Random.nextString(5) + ".jar"
100 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
101 | 
102 |     future = jarActor ? GetJarsPathForClasspath(jarName, contextName)
103 |     val Success(result: String) = future.value.get
104 |     result should be( config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName)
105 |   }
106 | 
107 |   def createLocalJar(jarName: String) = {
108 |     val jarPath = jarFolder + jarName
109 |     FileUtils.writeToFile(jarName, jarFolder, getTestJarBytes)
110 |   }
111 | 
112 |   test("Get Classpath For Local Jar"){
113 | 
114 |     val jarName = "test.jar"
115 |     val jarPath = jarFolder + File.separator + jarName
116 |     createLocalJar(jarName)
117 | 
118 |     val future = jarActor ? GetJarsPathForClasspath(jarPath, contextName)
119 |     val Success(result: String) = future.value.get
120 |     result should be( jarPath )
121 |   }
122 | 
123 | //  test("Get Classpath For Hdfs Jar"){
124 | //    TODO: Add test for hdfs jar
125 | //    val jarPath = "/home/ubuntu/test.jar"
126 | //
127 | //    val future = jarActor ? GetJarsPathForClasspath(jarPath)
128 | //    val Success(result: String) = future.value.get
129 | //    result should be( jarPath )
130 | //  }
131 | 
132 |   test("Get Classpath For Multiple Jars"){
133 | 
134 |     //    TODO: Add hdfs jar to this test
135 |     val localJarName = "test.jar"
136 |     val jarPath = jarFolder + File.separator + localJarName
137 |     createLocalJar(localJarName)
138 | 
139 |     val jarName = Random.nextString(5) + ".jar"
140 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
141 | 
142 |     future = jarActor ? GetJarsPathForClasspath(jarPath + "," + jarName, contextName)
143 |     val Success(result: String) = future.value.get
144 |     result should be( jarPath + JarActor.CLASSPATH_JAR_SEPARATOR + config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName)
145 |   }
146 | 
147 |   test("Get Spark Jars For Uploaded Jar"){
148 |     val jarName = Random.nextString(5) + ".jar"
149 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
150 | 
151 |     future = jarActor ? GetJarsPathForSpark(jarName)
152 |     val Success(result: List[String]) = future.value.get
153 |     result should be( List(config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName))
154 |   }
155 | 
156 |   test("Get Spark Jars For Local Jar"){
157 | 
158 |     val localJarName = "test.jar"
159 |     val jarPath = jarFolder + File.separator + localJarName
160 |     createLocalJar(localJarName)
161 | 
162 |     val future = jarActor ? GetJarsPathForSpark(jarPath)
163 |     val Success(result: List[String]) = future.value.get
164 |     result should be( List(jarPath) )
165 |   }
166 | 
167 |     test("Get Spark Jars For Hdfs Jar"){
168 |       val jarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar"
169 | 
170 |       val future = jarActor ? GetJarsPathForSpark(jarPath)
171 |       val Success(result: List[String]) = future.value.get
172 |       result should be( List(jarPath) )
173 |     }
174 | 
175 |   test("Get Spark Jars For Multiple Jars"){
176 | 
177 |     val localJarName = "test.jar"
178 |     val jarPath = jarFolder + File.separator + localJarName
179 |     createLocalJar(localJarName)
180 | 
181 |     val hdfsJarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar"
182 | 
183 |     val jarName = Random.nextString(5) + ".jar"
184 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
185 | 
186 |     future = jarActor ? GetJarsPathForSpark(jarPath + "," + jarName  + "," + hdfsJarPath)
187 |     val Success(result: List[String]) = future.value.get
188 |     result should be( List(jarPath, config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName, hdfsJarPath))
189 |   }
190 | 
191 |   test("Get All Jars Paths For Multiple Jars"){
192 | 
193 |     val localJarName = "test.jar"
194 |     val jarPath = jarFolder + File.separator + localJarName
195 |     createLocalJar(localJarName)
196 | 
197 | //    val hdfsJarPath = "hdfs://devbox.local:8020/home/ubuntu/test.jar"
198 | 
199 |     val jarName = Random.nextString(5) + ".jar"
200 |     var future = jarActor ? AddJar(jarName, getTestJarBytes)
201 | 
202 |     future = jarActor ? GetJarsPathForAll(jarPath + "," + jarName, contextName)
203 |     val Success(result: ResultJarsPathForAll) = future.value.get
204 |     result.pathForSpark should be( List(jarPath, config.getString(JarActor.JAR_FOLDER_PROPERTY_PATH) + File.separator + jarName))
205 |     result.pathForClasspath should be ( jarPath + JarActor.CLASSPATH_JAR_SEPARATOR + jarFolder + File.separator + jarName)
206 |   }
207 | 
208 | 
209 |   def getTestJarBytes: Array[Byte] = {
210 |     val bytes: Array[Byte] = Array(0x50.toByte, 0x4b.toByte, 0x03.toByte, 0x04.toByte)
211 | 
212 |     val randomBytes = new Array[Byte](20)
213 |     Random.nextBytes(randomBytes)
214 | 
215 |     bytes ++ randomBytes
216 |   }
217 | }
218 | 


--------------------------------------------------------------------------------