├── .gitignore
├── src
└── main
│ ├── resources
│ └── application.conf
│ └── scala
│ ├── com
│ └── spoddutur
│ │ ├── spark
│ │ └── SparkFactory.scala
│ │ ├── web
│ │ ├── HttpService.scala
│ │ └── WebServer.scala
│ │ └── util
│ │ └── AppConfig.scala
│ └── MainApp.scala
├── README.md
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | .idea
3 | .project
4 | .classpath
5 | *.iml
6 | dependency-reduced-pom.xml
7 |
8 |
--------------------------------------------------------------------------------
/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | spark.master=local
2 | spark.appname=spark-as-service-using-embedded-server
3 | akka.http.port=8001
--------------------------------------------------------------------------------
/src/main/scala/com/spoddutur/spark/SparkFactory.scala:
--------------------------------------------------------------------------------
1 | package com.spoddutur.spark
2 |
3 | import com.spoddutur.util.AppConfig
4 | import org.apache.spark.sql.SparkSession
5 |
6 | /**
7 | * Created by sruthi on 03/07/17.
8 | * Creates one SparkSession which is shared and reused among multiple HttpRequests
9 | */
10 | object SparkFactory {
11 | val spark: SparkSession = SparkSession.builder
12 | .master(AppConfig.sparkMaster)
13 | .appName(AppConfig.sparkAppName)
14 | .getOrCreate
15 |
16 | val sc = spark.sparkContext
17 | val sparkConf = sc.getConf
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/scala/MainApp.scala:
--------------------------------------------------------------------------------
1 | import akka.http.scaladsl.settings.ServerSettings
2 | import com.spoddutur.util.AppConfig
3 | import com.spoddutur.web.WebServer
4 | import com.typesafe.config.ConfigFactory
5 |
6 | /**
7 | * Created by sruthi on 03/07/17.
8 | */
9 | object MainApp extends App {
10 |
11 | // init config params from cmd-line args
12 | AppConfig.parse(this.args.toList)
13 |
14 | // Starting the server
15 | WebServer.startServer("localhost", AppConfig.akkaHttpPort, ServerSettings(ConfigFactory.load))
16 |
17 | println(s"Server online at http://localhost:", AppConfig.akkaHttpPort, "/")
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/scala/com/spoddutur/web/HttpService.scala:
--------------------------------------------------------------------------------
1 | package com.spoddutur.web
2 |
3 | import com.spoddutur.spark.SparkFactory
4 |
5 | /**
6 | * Created by sruthi on 03/07/17.
7 | * Service class computing the value for route bindings "/activeStreams" and "/count" respectively.
8 | */
9 | object HttpService {
10 |
11 | val sc = SparkFactory.sc
12 |
13 | // To server http://host:port/count route binding
14 | // Random spark job counting a seq of integers split into 25 partitions
15 | def count(): String = sc.parallelize(0 to 500000, 25).count.toString
16 |
17 | // To server http://host:port/activeStreams route binding
18 | // Returns how many streams are active in sparkSession currently
19 | def activeStreamsInSparkContext(): Int = SparkFactory.spark.streams.active.length
20 | }
21 |
--------------------------------------------------------------------------------
/src/main/scala/com/spoddutur/web/WebServer.scala:
--------------------------------------------------------------------------------
1 | package com.spoddutur.web
2 |
3 | /**
4 | * Created by sruthi on 03/07/17.
5 | */
6 | import akka.http.scaladsl.model.{ContentTypes, HttpEntity}
7 | import akka.http.scaladsl.server.Directives._
8 | import akka.http.scaladsl.server.{HttpApp, Route}
9 | import com.spoddutur.spark.SparkFactory
10 |
11 | /**
12 | * Http Server definition
13 | * Configured 4 routes:
14 | * 1. homepage - http://host:port - says "hello world"
15 | * 2. version - http://host:port/version - tells "spark version"
16 | * 3. activeStreams - http://host:port/activeStreams - tells how many spark streams are active currently
17 | * 4. count - http://host:port/count - random spark job to count a seq of integers
18 | */
19 | object WebServer extends HttpApp {
20 | case class Colour(r: Int, g:Int, b:Int) {
21 | require(r >=0 && r<=255, "Wrong color pallete")
22 | require(g >=0 && g<=255, "Wrong color pallete")
23 | require(b >=0 && b<=255, "Wrong color pallete")
24 | }
25 |
26 | override def routes: Route = {
27 | pathEndOrSingleSlash {
28 | get {
29 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"
Hello World!! This is Akka responding..
"))
30 | }
31 | } ~
32 | path("version") {
33 | get {
34 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"Spark version: ${SparkFactory.sc.version}
"))
35 | }
36 | } ~
37 | path("activeStreams") {
38 | get {
39 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"Current active streams in SparkContext: ${HttpService.activeStreamsInSparkContext()}"))
40 | }
41 | } ~
42 | path("count") {
43 | get {
44 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"Count 0 to 500000 using Spark with 25 partitions: ${HttpService.count()}"))
45 | }
46 | } ~
47 | path("customer"/IntNumber) { id =>
48 | complete {
49 | s"CustId: ${id}"
50 | }
51 | } ~
52 | path("customer") {
53 | parameter('id.as[Int]) { id =>
54 | complete {
55 | s"CustId: ${id}"
56 | }
57 | }
58 | } ~
59 | path("color") {
60 | parameters('r.as[Int], 'g.as[Int], 'b.as[Int]) { (r1, g, b) =>
61 |
62 | complete {
63 | s"(R,G,B): ${r1}, ${g}, ${b}"
64 | }
65 | }
66 | }
67 | }}
--------------------------------------------------------------------------------
/src/main/scala/com/spoddutur/util/AppConfig.scala:
--------------------------------------------------------------------------------
1 | package com.spoddutur.util
2 |
3 | import com.typesafe.config.ConfigFactory
4 |
5 | /**
6 | * Created by sruthi on 03/07/17.
7 | * Loads default config params from application.conf file.
8 | * It also supports cmd-line args to override the default values.
9 | */
10 | object AppConfig {
11 |
12 | val conf = ConfigFactory.load
13 | val sparkMasterDef = conf.getString("spark.master")
14 | val sparkAppNameDef = conf.getString("spark.appname")
15 | val akkaHttpPortDef = conf.getInt("akka.http.port")
16 |
17 | var akkaHttpPort = akkaHttpPortDef
18 | var sparkMaster = sparkMasterDef
19 | var sparkAppName = sparkAppNameDef
20 |
21 | def main(args: Array[String]): Unit = {
22 | parse("-m localhost1 --akkaHttpPort 8080".split(" ").toList)
23 | print(sparkMaster, sparkAppName, akkaHttpPort)
24 | }
25 |
26 | val usage =
27 | s"""
28 | This application comes as Spark2.1-REST-Service-Provider using an embedded,
29 | Reactive-Streams-based, fully asynchronous HTTP server (i.e., using akka-http).
30 | So, this application needs config params like AkkaWebPort to bind to, SparkMaster
31 | and SparkAppName
32 |
33 | Usage: spark-submit spark-as-service-using-embedded-server.jar [options]
34 | Options:
35 | -h, --help
36 | -m, --master spark://host:port, mesos://host:port, yarn, or local. Default: $sparkMasterDef
37 | -n, --name A name of your application. Default: $sparkAppNameDef
38 | -p, --akkaHttpPort Port where akka-http is binded. Default: $akkaHttpPortDef
39 |
40 | Configured 4 routes:
41 | 1. homepage - http://host:port - says "hello world"
42 | 2. version - http://host:port/version - tells "spark version"
43 | 3. activeStreams - http://host:port/activeStreams - tells how many spark streams are active currently
44 | 4. count - http://host:port/count - random spark job to count a seq of integers
45 | """
46 |
47 | def parse(list: List[String]): this.type = {
48 |
49 | list match {
50 | case Nil => this
51 | case ("--master" | "-m") :: value :: tail => {
52 | sparkMaster = value
53 | parse(tail)
54 | }
55 | case ("--name" | "-n") :: value :: tail => {
56 | sparkAppName = value
57 | parse(tail)
58 | }
59 | case ("--akkaHttpPort" | "-p") :: value :: tail => {
60 | akkaHttpPort = value.toInt
61 | parse(tail)
62 | }
63 | case ("--help" | "-h") :: tail => {
64 | printUsage(0)
65 | }
66 | case _ => {
67 | printUsage(1)
68 | }
69 | }
70 | }
71 |
72 | def printUsage(exitNumber: Int) = {
73 | println(usage)
74 | sys.exit(status = exitNumber)
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # spark-as-service-using-embedded-server
2 | This application comes as Spark2.1-REST-Service-Provider using an embedded, Reactive-Streams-based, fully asynchronous HTTP server.
3 |
4 | ## 1. Central Idea
5 | I wanted to build an interactive REST api service on top of my ApacheSpark application which serves use-cases like:
6 | ```ini
7 | - Load the trained model in SparkSession and quickly do the prediction for user given query._
8 | - Have your big-data cached in cluster and provide user an endpoint to query it.
9 | - Run some recurrent spark queries with varying parameters.
10 | ```
11 |
12 | As you can see that the ```core``` of the application is not primarily a web-application OR browser-interaction but to have REST service performing big-data cluster-computation on ApacheSpark.
13 |
14 | ## 2. Akka-HTTP as apt-fit:
15 | With Akka-Http, you normally don’t build your application ```on top of``` Akka HTTP, but you build your application on top of whatever makes sense and use Akka HTTP merely for the HTTP integration needs. So, I found Akka-HTTP to be right fit for the usecases mentioned above.
16 |
17 | ## 3. Architecture
18 | ### 3.1 To demo this, I've configured following four routes:
19 | 1. **homepage** - [http://localhost:8001](#homepage) - says "hello world"
20 | 2. **version** - [http://localhost:8001/version](#version) - queries shared SparkSession and tells "spark version"
21 | 3. **activeStreams** - [http://localhost:8001/activeStreams](#activeStreams) - tells how many spark streams are active currently
22 | 4. **count** - [http://localhost:8001/count](#count) - random spark job to count number of elements in a sequence.
23 |
24 | Following picture illustrates the routing of a HttpRequest:
25 |
26 |
27 | ## 4. Building
28 | It uses [Scala 2.11](#scala), [Spark 2.1](#spark) and [Akka-Http](#akka-http)
29 | ```markdown
30 | mvn clean install
31 | ```
32 | ## 5. Execution
33 | We can start our application as stand-alone jar like this:
34 | ```markdown
35 | mvn exec:java
36 | ```
37 | ### 5.1 cmd-line-args
38 | Optionally, you can provide configuration params like spark-master, akka-port etc from command line. To see the list of configurable params, just type:
39 | ```markdown
40 | mvn exec:java -Dexec.args="--help"
41 | OR
42 | mvn exec:java -Dexec.args=“-h"
43 | ```
44 |
45 | ```ini
46 | Help content will look something like this:
47 | This application comes as Spark2.1-REST-Service-Provider using an embedded,
48 | Reactive-Streams-based, fully asynchronous HTTP server (i.e., using akka-http).
49 | So, this application needs config params like AkkaWebPort to bind to, SparkMaster
50 | and SparkAppName
51 |
52 | Usage: spark-submit spark-as-service-using-embedded-server.jar [options]
53 | Options:
54 | -h, --help
55 | -m, --master spark://host:port, mesos://host:port, yarn, or local. Default: local
56 | -n, --name A name of your application. Default: SparkAsRestService
57 | -p, --akkaHttpPort Port where akka-http is binded. Default: 8001
58 | ```
59 | ### 5.2 Tweak Default cmd-line args
60 | There are 2 ways to change the default param values:
61 | 1. Update ```src/main/resources/application.conf``` file directly. Build and then Run
62 | 2. ```mvn exec:java -Dexec.args="--master --name --akkaHttpPort "```
63 |
64 | ## 6. References
65 | [Akka](http://doc.akka.io/docs/akka-http/current/scala/http/introduction.html)
66 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.spoddutur
8 | spark-as-service-using-embedded-server
9 | 1.0-SNAPSHOT
10 |
11 |
12 | UTF-8
13 | 1.8
14 | 2.11.8
15 | 2.11
16 | 2.1.0
17 | 1.10.50
18 |
19 |
20 |
21 |
22 |
23 | net.alchim31.maven
24 | scala-maven-plugin
25 | 3.2.0
26 |
27 |
28 |
29 | process-sources
30 |
31 | compile
32 | testCompile
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | org.apache.maven.plugins
41 | maven-compiler-plugin
42 | 3.3
43 |
44 | ${java.version}
45 | ${java.version}
46 |
47 |
48 |
49 | org.apache.maven.plugins
50 | maven-shade-plugin
51 |
52 |
53 | package
54 |
55 | shade
56 |
57 |
58 |
59 |
60 | *:*
61 |
62 | META-INF/*.SF
63 | META-INF/*.DSA
64 | META-INF/*.RSA
65 |
66 |
67 |
68 |
69 |
71 | at.seresunit.lecturemanager_connector.App
72 |
73 |
75 | META-INF/spring.handlers
76 |
77 |
79 | META-INF/spring.schemas
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 | org.codehaus.mojo
88 | exec-maven-plugin
89 | 1.6.0
90 |
91 | MainApp
92 |
93 |
94 |
95 |
96 |
97 | src/main/resources/
98 |
99 |
100 |
101 |
102 |
103 |
104 | com.typesafe
105 | config
106 | 1.3.1
107 |
108 |
109 | org.scala-lang
110 | scala-library
111 | ${scala.version}
112 |
113 |
114 | org.scalacheck
115 | scalacheck_${scala.binary.version}
116 | 1.11.4
117 | test
118 |
119 |
120 | org.scalatest
121 | scalatest_${scala.binary.version}
122 | 2.2.0
123 | test
124 |
125 |
126 | org.apache.spark
127 | spark-core_${scala.binary.version}
128 | ${spark.version}
129 |
130 |
131 | org.apache.spark
132 | spark-sql_${scala.binary.version}
133 | ${spark.version}
134 |
135 |
136 | org.apache.spark
137 | spark-streaming_${scala.binary.version}
138 | ${spark.version}
139 |
140 |
141 | org.slf4j
142 | slf4j-log4j12
143 | 1.7.13
144 |
145 |
146 | log4j
147 | log4j
148 | 1.2.17
149 |
150 |
151 | org.slf4j
152 | slf4j-api
153 | 1.7.13
154 |
155 |
156 | com.typesafe.akka
157 | akka-actor_2.11
158 | 2.5.3
159 |
160 |
161 | com.typesafe.akka
162 | akka-stream_2.11
163 | 2.5.3
164 |
165 |
166 | com.typesafe.akka
167 | akka-http_2.11
168 | 10.0.9
169 |
170 |
171 |
--------------------------------------------------------------------------------