├── .gitignore ├── src └── main │ ├── resources │ └── application.conf │ └── scala │ ├── com │ └── spoddutur │ │ ├── spark │ │ └── SparkFactory.scala │ │ ├── web │ │ ├── HttpService.scala │ │ └── WebServer.scala │ │ └── util │ │ └── AppConfig.scala │ └── MainApp.scala ├── README.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | .idea 3 | .project 4 | .classpath 5 | *.iml 6 | dependency-reduced-pom.xml 7 | 8 | -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | spark.master=local 2 | spark.appname=spark-as-service-using-embedded-server 3 | akka.http.port=8001 -------------------------------------------------------------------------------- /src/main/scala/com/spoddutur/spark/SparkFactory.scala: -------------------------------------------------------------------------------- 1 | package com.spoddutur.spark 2 | 3 | import com.spoddutur.util.AppConfig 4 | import org.apache.spark.sql.SparkSession 5 | 6 | /** 7 | * Created by sruthi on 03/07/17. 8 | * Creates one SparkSession which is shared and reused among multiple HttpRequests 9 | */ 10 | object SparkFactory { 11 | val spark: SparkSession = SparkSession.builder 12 | .master(AppConfig.sparkMaster) 13 | .appName(AppConfig.sparkAppName) 14 | .getOrCreate 15 | 16 | val sc = spark.sparkContext 17 | val sparkConf = sc.getConf 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/MainApp.scala: -------------------------------------------------------------------------------- 1 | import akka.http.scaladsl.settings.ServerSettings 2 | import com.spoddutur.util.AppConfig 3 | import com.spoddutur.web.WebServer 4 | import com.typesafe.config.ConfigFactory 5 | 6 | /** 7 | * Created by sruthi on 03/07/17. 8 | */ 9 | object MainApp extends App { 10 | 11 | // init config params from cmd-line args 12 | AppConfig.parse(this.args.toList) 13 | 14 | // Starting the server 15 | WebServer.startServer("localhost", AppConfig.akkaHttpPort, ServerSettings(ConfigFactory.load)) 16 | 17 | println(s"Server online at http://localhost:", AppConfig.akkaHttpPort, "/") 18 | } 19 | -------------------------------------------------------------------------------- /src/main/scala/com/spoddutur/web/HttpService.scala: -------------------------------------------------------------------------------- 1 | package com.spoddutur.web 2 | 3 | import com.spoddutur.spark.SparkFactory 4 | 5 | /** 6 | * Created by sruthi on 03/07/17. 7 | * Service class computing the value for route bindings "/activeStreams" and "/count" respectively. 8 | */ 9 | object HttpService { 10 | 11 | val sc = SparkFactory.sc 12 | 13 | // To server http://host:port/count route binding 14 | // Random spark job counting a seq of integers split into 25 partitions 15 | def count(): String = sc.parallelize(0 to 500000, 25).count.toString 16 | 17 | // To server http://host:port/activeStreams route binding 18 | // Returns how many streams are active in sparkSession currently 19 | def activeStreamsInSparkContext(): Int = SparkFactory.spark.streams.active.length 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/com/spoddutur/web/WebServer.scala: -------------------------------------------------------------------------------- 1 | package com.spoddutur.web 2 | 3 | /** 4 | * Created by sruthi on 03/07/17. 5 | */ 6 | import akka.http.scaladsl.model.{ContentTypes, HttpEntity} 7 | import akka.http.scaladsl.server.Directives._ 8 | import akka.http.scaladsl.server.{HttpApp, Route} 9 | import com.spoddutur.spark.SparkFactory 10 | 11 | /** 12 | * Http Server definition 13 | * Configured 4 routes: 14 | * 1. homepage - http://host:port - says "hello world" 15 | * 2. version - http://host:port/version - tells "spark version" 16 | * 3. activeStreams - http://host:port/activeStreams - tells how many spark streams are active currently 17 | * 4. count - http://host:port/count - random spark job to count a seq of integers 18 | */ 19 | object WebServer extends HttpApp { 20 | case class Colour(r: Int, g:Int, b:Int) { 21 | require(r >=0 && r<=255, "Wrong color pallete") 22 | require(g >=0 && g<=255, "Wrong color pallete") 23 | require(b >=0 && b<=255, "Wrong color pallete") 24 | } 25 | 26 | override def routes: Route = { 27 | pathEndOrSingleSlash { 28 | get { 29 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"

Hello World!! This is Akka responding..

")) 30 | } 31 | } ~ 32 | path("version") { 33 | get { 34 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"

Spark version: ${SparkFactory.sc.version}

")) 35 | } 36 | } ~ 37 | path("activeStreams") { 38 | get { 39 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"

Current active streams in SparkContext: ${HttpService.activeStreamsInSparkContext()}")) 40 | } 41 | } ~ 42 | path("count") { 43 | get { 44 | complete(HttpEntity(ContentTypes.`text/html(UTF-8)`, s"

Count 0 to 500000 using Spark with 25 partitions: ${HttpService.count()}")) 45 | } 46 | } ~ 47 | path("customer"/IntNumber) { id => 48 | complete { 49 | s"CustId: ${id}" 50 | } 51 | } ~ 52 | path("customer") { 53 | parameter('id.as[Int]) { id => 54 | complete { 55 | s"CustId: ${id}" 56 | } 57 | } 58 | } ~ 59 | path("color") { 60 | parameters('r.as[Int], 'g.as[Int], 'b.as[Int]) { (r1, g, b) => 61 | 62 | complete { 63 | s"(R,G,B): ${r1}, ${g}, ${b}" 64 | } 65 | } 66 | } 67 | }} -------------------------------------------------------------------------------- /src/main/scala/com/spoddutur/util/AppConfig.scala: -------------------------------------------------------------------------------- 1 | package com.spoddutur.util 2 | 3 | import com.typesafe.config.ConfigFactory 4 | 5 | /** 6 | * Created by sruthi on 03/07/17. 7 | * Loads default config params from application.conf file. 8 | * It also supports cmd-line args to override the default values. 9 | */ 10 | object AppConfig { 11 | 12 | val conf = ConfigFactory.load 13 | val sparkMasterDef = conf.getString("spark.master") 14 | val sparkAppNameDef = conf.getString("spark.appname") 15 | val akkaHttpPortDef = conf.getInt("akka.http.port") 16 | 17 | var akkaHttpPort = akkaHttpPortDef 18 | var sparkMaster = sparkMasterDef 19 | var sparkAppName = sparkAppNameDef 20 | 21 | def main(args: Array[String]): Unit = { 22 | parse("-m localhost1 --akkaHttpPort 8080".split(" ").toList) 23 | print(sparkMaster, sparkAppName, akkaHttpPort) 24 | } 25 | 26 | val usage = 27 | s""" 28 | This application comes as Spark2.1-REST-Service-Provider using an embedded, 29 | Reactive-Streams-based, fully asynchronous HTTP server (i.e., using akka-http). 30 | So, this application needs config params like AkkaWebPort to bind to, SparkMaster 31 | and SparkAppName 32 | 33 | Usage: spark-submit spark-as-service-using-embedded-server.jar [options] 34 | Options: 35 | -h, --help 36 | -m, --master spark://host:port, mesos://host:port, yarn, or local. Default: $sparkMasterDef 37 | -n, --name A name of your application. Default: $sparkAppNameDef 38 | -p, --akkaHttpPort Port where akka-http is binded. Default: $akkaHttpPortDef 39 | 40 | Configured 4 routes: 41 | 1. homepage - http://host:port - says "hello world" 42 | 2. version - http://host:port/version - tells "spark version" 43 | 3. activeStreams - http://host:port/activeStreams - tells how many spark streams are active currently 44 | 4. count - http://host:port/count - random spark job to count a seq of integers 45 | """ 46 | 47 | def parse(list: List[String]): this.type = { 48 | 49 | list match { 50 | case Nil => this 51 | case ("--master" | "-m") :: value :: tail => { 52 | sparkMaster = value 53 | parse(tail) 54 | } 55 | case ("--name" | "-n") :: value :: tail => { 56 | sparkAppName = value 57 | parse(tail) 58 | } 59 | case ("--akkaHttpPort" | "-p") :: value :: tail => { 60 | akkaHttpPort = value.toInt 61 | parse(tail) 62 | } 63 | case ("--help" | "-h") :: tail => { 64 | printUsage(0) 65 | } 66 | case _ => { 67 | printUsage(1) 68 | } 69 | } 70 | } 71 | 72 | def printUsage(exitNumber: Int) = { 73 | println(usage) 74 | sys.exit(status = exitNumber) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spark-as-service-using-embedded-server 2 | This application comes as Spark2.1-REST-Service-Provider using an embedded, Reactive-Streams-based, fully asynchronous HTTP server. 3 | 4 | ## 1. Central Idea 5 | I wanted to build an interactive REST api service on top of my ApacheSpark application which serves use-cases like: 6 | ```ini 7 | - Load the trained model in SparkSession and quickly do the prediction for user given query._ 8 | - Have your big-data cached in cluster and provide user an endpoint to query it. 9 | - Run some recurrent spark queries with varying parameters. 10 | ``` 11 | 12 | As you can see that the ```core``` of the application is not primarily a web-application OR browser-interaction but to have REST service performing big-data cluster-computation on ApacheSpark. 13 | 14 | ## 2. Akka-HTTP as apt-fit: 15 | With Akka-Http, you normally don’t build your application ```on top of``` Akka HTTP, but you build your application on top of whatever makes sense and use Akka HTTP merely for the HTTP integration needs. So, I found Akka-HTTP to be right fit for the usecases mentioned above. 16 | 17 | ## 3. Architecture 18 | ### 3.1 To demo this, I've configured following four routes: 19 | 1. **homepage** - [http://localhost:8001](#homepage) - says "hello world" 20 | 2. **version** - [http://localhost:8001/version](#version) - queries shared SparkSession and tells "spark version" 21 | 3. **activeStreams** - [http://localhost:8001/activeStreams](#activeStreams) - tells how many spark streams are active currently 22 | 4. **count** - [http://localhost:8001/count](#count) - random spark job to count number of elements in a sequence. 23 | 24 | Following picture illustrates the routing of a HttpRequest: 25 | 26 | 27 | ## 4. Building 28 | It uses [Scala 2.11](#scala), [Spark 2.1](#spark) and [Akka-Http](#akka-http) 29 | ```markdown 30 | mvn clean install 31 | ``` 32 | ## 5. Execution 33 | We can start our application as stand-alone jar like this: 34 | ```markdown 35 | mvn exec:java 36 | ``` 37 | ### 5.1 cmd-line-args 38 | Optionally, you can provide configuration params like spark-master, akka-port etc from command line. To see the list of configurable params, just type: 39 | ```markdown 40 | mvn exec:java -Dexec.args="--help" 41 | OR 42 | mvn exec:java -Dexec.args=“-h" 43 | ``` 44 | 45 | ```ini 46 | Help content will look something like this: 47 | This application comes as Spark2.1-REST-Service-Provider using an embedded, 48 | Reactive-Streams-based, fully asynchronous HTTP server (i.e., using akka-http). 49 | So, this application needs config params like AkkaWebPort to bind to, SparkMaster 50 | and SparkAppName 51 | 52 | Usage: spark-submit spark-as-service-using-embedded-server.jar [options] 53 | Options: 54 | -h, --help 55 | -m, --master spark://host:port, mesos://host:port, yarn, or local. Default: local 56 | -n, --name A name of your application. Default: SparkAsRestService 57 | -p, --akkaHttpPort Port where akka-http is binded. Default: 8001 58 | ``` 59 | ### 5.2 Tweak Default cmd-line args 60 | There are 2 ways to change the default param values: 61 | 1. Update ```src/main/resources/application.conf``` file directly. Build and then Run 62 | 2. ```mvn exec:java -Dexec.args="--master --name --akkaHttpPort "``` 63 | 64 | ## 6. References 65 | [Akka](http://doc.akka.io/docs/akka-http/current/scala/http/introduction.html) 66 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.spoddutur 8 | spark-as-service-using-embedded-server 9 | 1.0-SNAPSHOT 10 | 11 | 12 | UTF-8 13 | 1.8 14 | 2.11.8 15 | 2.11 16 | 2.1.0 17 | 1.10.50 18 | 19 | 20 | 21 | 22 | 23 | net.alchim31.maven 24 | scala-maven-plugin 25 | 3.2.0 26 | 27 | 28 | 29 | process-sources 30 | 31 | compile 32 | testCompile 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.apache.maven.plugins 41 | maven-compiler-plugin 42 | 3.3 43 | 44 | ${java.version} 45 | ${java.version} 46 | 47 | 48 | 49 | org.apache.maven.plugins 50 | maven-shade-plugin 51 | 52 | 53 | package 54 | 55 | shade 56 | 57 | 58 | 59 | 60 | *:* 61 | 62 | META-INF/*.SF 63 | META-INF/*.DSA 64 | META-INF/*.RSA 65 | 66 | 67 | 68 | 69 | 71 | at.seresunit.lecturemanager_connector.App 72 | 73 | 75 | META-INF/spring.handlers 76 | 77 | 79 | META-INF/spring.schemas 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | org.codehaus.mojo 88 | exec-maven-plugin 89 | 1.6.0 90 | 91 | MainApp 92 | 93 | 94 | 95 | 96 | 97 | src/main/resources/ 98 | 99 | 100 | 101 | 102 | 103 | 104 | com.typesafe 105 | config 106 | 1.3.1 107 | 108 | 109 | org.scala-lang 110 | scala-library 111 | ${scala.version} 112 | 113 | 114 | org.scalacheck 115 | scalacheck_${scala.binary.version} 116 | 1.11.4 117 | test 118 | 119 | 120 | org.scalatest 121 | scalatest_${scala.binary.version} 122 | 2.2.0 123 | test 124 | 125 | 126 | org.apache.spark 127 | spark-core_${scala.binary.version} 128 | ${spark.version} 129 | 130 | 131 | org.apache.spark 132 | spark-sql_${scala.binary.version} 133 | ${spark.version} 134 | 135 | 136 | org.apache.spark 137 | spark-streaming_${scala.binary.version} 138 | ${spark.version} 139 | 140 | 141 | org.slf4j 142 | slf4j-log4j12 143 | 1.7.13 144 | 145 | 146 | log4j 147 | log4j 148 | 1.2.17 149 | 150 | 151 | org.slf4j 152 | slf4j-api 153 | 1.7.13 154 | 155 | 156 | com.typesafe.akka 157 | akka-actor_2.11 158 | 2.5.3 159 | 160 | 161 | com.typesafe.akka 162 | akka-stream_2.11 163 | 2.5.3 164 | 165 | 166 | com.typesafe.akka 167 | akka-http_2.11 168 | 10.0.9 169 | 170 | 171 | --------------------------------------------------------------------------------