├── .gitignore ├── README.md ├── baby_names.csv ├── baby_names.json ├── baby_names_reduced.csv ├── spark-cluster ├── README.md ├── build.sbt ├── project │ └── build.properties └── src │ └── main │ └── scala │ └── SparkPi.scala ├── spark-ml ├── README.md ├── build.sbt ├── input │ ├── English.txt │ ├── Russian.txt │ └── Spanish.txt ├── project │ ├── assembly.sbt │ └── build.properties └── src │ └── main │ └── scala │ └── com │ └── supergloo │ ├── KMeanTrainTask.scala │ ├── SlackMLApp.scala │ ├── SlackReceiver.scala │ ├── SlackStreamingTask.scala │ └── Utils.scala ├── spark-sql ├── .gitignore ├── README.md ├── baby_names.csv ├── build.sbt ├── project │ ├── assembly.sbt │ └── build.properties └── src │ └── main │ └── scala │ └── com │ └── supergloo │ ├── SparkSQLCSVApp.scala │ └── SparkSQLJDBCApp.scala ├── spark-streaming-tests ├── README.md ├── build.sbt ├── project │ ├── assembly.sbt │ ├── build.properties │ └── plugins.sbt └── src │ ├── main │ └── scala │ │ └── com │ │ └── supergloo │ │ ├── SlackReceiver.scala │ │ └── SlackStreamingApp.scala │ └── test │ └── scala │ ├── com │ └── supergloo │ │ └── SlackStreamingTest.scala │ └── org │ └── apache │ └── spark │ └── streaming │ └── ClockWrapper.scala └── spark-streaming ├── README.md ├── build.ORIG ├── build.sbt ├── project ├── assembly.sbt └── build.properties └── src └── main └── scala └── com └── supergloo ├── SlackReceiver.scala └── SlackStreamingApp.scala /.gitignore: -------------------------------------------------------------------------------- 1 | **/target/ 2 | **/project/target/ 3 | **/project/project/ 4 | **/.DS_Store 5 | spark-ml/model/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # spark-course 2 | Examples used in the [Learning Apache Spark Course](https://www.udemy.com/learning-spark/). Course available is available at https://www.udemy.com/learning-spark/ 3 | -------------------------------------------------------------------------------- /baby_names.json: -------------------------------------------------------------------------------- 1 | [{ 2 | "Year": "2013", 3 | "First Name": "DAVID", 4 | "County": "KINGS", 5 | "Sex": "M", 6 | "Count": "272" 7 | }, { 8 | "Year": "2013", 9 | "First Name": "JAYDEN", 10 | "County": "KINGS", 11 | "Sex": "M", 12 | "Count": "268" 13 | }, { 14 | "Year": "2013", 15 | "First Name": "JAYDEN", 16 | "County": "QUEENS", 17 | "Sex": "M", 18 | "Count": "219" 19 | }, { 20 | "Year": "2013", 21 | "First Name": "MOSHE", 22 | "County": "KINGS", 23 | "Sex": "M", 24 | "Count": "219" 25 | }, { 26 | "Year": "2013", 27 | "First Name": "ETHAN", 28 | "County": "QUEENS", 29 | "Sex": "M", 30 | "Count": "216" 31 | }] 32 | -------------------------------------------------------------------------------- /baby_names_reduced.csv: -------------------------------------------------------------------------------- 1 | Year,First Name,County,Sex,Count 2 | 2013,DAVID,KINGS,M,272 3 | 2013,JAYDEN,KINGS,M,268 4 | 2013,JAYDEN,QUEENS,M,219 5 | 2013,MOSHE,KINGS,M,219 6 | 2013,ETHAN,QUEENS,M,216 7 | 2013,SOPHIA,QUEENS,F,204 8 | 2013,DANIEL,QUEENS,M,203 9 | 2013,JACOB,KINGS,M,202 10 | 2013,ESTHER,KINGS,F,196 11 | 2013,ETHAN,KINGS,M,196 12 | 2013,ISABELLA,QUEENS,F,192 13 | 2013,DANIEL,KINGS,M,188 14 | 2013,JACOB,QUEENS,M,186 15 | 2013,AIDEN,KINGS,M,182 16 | 2013,LEAH,KINGS,F,179 17 | 2013,NOAH,KINGS,M,168 18 | 2013,JOSEPH,KINGS,M,166 19 | 2013,MATTHEW,QUEENS,M,161 20 | -------------------------------------------------------------------------------- /spark-cluster/README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark with Scala Training 2 | 3 | Simple demonstration of deploying a Spark Driver program to a Spark cluster 4 | 5 | Steps: 6 | 7 | 1. sbt package 8 | 2. Make sure your cluster is running and issue something similar to following: 9 | 10 | ``` 11 | ~/Development/spark-1.6.1-bin-hadoop2.4/bin/spark-submit --class "SparkPi" --master spark://todd-mcgraths-macbook-pro.local:7077 ./target/scala-2.10/spark-sample_2.10-1.0.jar 12 | ``` 13 | 14 | replace `master` variable with appropriate value for your environment 15 | 16 | call `spark-submit` with path appropriate for your environment 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /spark-cluster/build.sbt: -------------------------------------------------------------------------------- 1 | name := "Spark Sample" 2 | 3 | version := "1.0" 4 | 5 | scalaVersion := "2.10.3" 6 | 7 | libraryDependencies += "org.apache.spark" %% "spark-core" % "1.1.1" 8 | -------------------------------------------------------------------------------- /spark-cluster/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.11 -------------------------------------------------------------------------------- /spark-cluster/src/main/scala/SparkPi.scala: -------------------------------------------------------------------------------- 1 | import scala.math.random 2 | 3 | import org.apache.spark._ 4 | 5 | /** Computes an approximation to pi */ 6 | object SparkPi { 7 | def main(args: Array[String]) { 8 | val conf = new SparkConf().setAppName("Spark Pi") 9 | val spark = new SparkContext(conf) 10 | val slices = if (args.length > 0) args(0).toInt else 2 11 | val n = 100000 * slices 12 | val count = spark.parallelize(1 to n, slices).map { i => 13 | val x = random * 2 - 1 14 | val y = random * 2 - 1 15 | if (x*x + y*y < 1) 1 else 0 16 | }.reduce(_ + _) 17 | println("Pi is roughly " + 4.0 * count / n) 18 | spark.stop() 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /spark-ml/README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark with Scala training course - Machine Learning (MLlib) example 2 | 3 | [Apache Spark with Scala training course](https://www.udemy.com/learning-spark/ "Apache Spark with Scala training") 4 | 5 | ## What it does? 6 | It performs k-means clustering on text messages streamed from Slack. It may use an existing k-means model or new training data may be created and used to train. 7 | 8 | ## How it works? 9 | 10 | There are three cases to run the code depending on the config variables passed to the program: 11 | 12 | 1. If --trainData and --modelLocation config variables are specified, the program will train model based on existing train data in --trainData. (example trainData is in the input/ directory) 13 | 14 | 2. If --trainData and not --slackToken, the program will just train model and save it if --modelLocation specified. 15 | 16 | 3. If --slackToken and --modelLocation specified, the program will load model from the --modelLocation and put it 17 | to streaming app which will be used for near real-time prediction. 18 | 19 | ## Configuration Option Examples 20 | 21 | --numClusters 3 - number of clusters(k) // we specify 3 in videos because there are 3 languages in input/ directory 22 | 23 | --trainData input - data which used for model training 24 | 25 | --modelLocation model - directory to save model 26 | 27 | --predictOutput output - optional output directory prefix. In not present, the job will only print all data to console 28 | 29 | 30 | ## What to expect: 31 | After job is started, you should see messages retrieved from slack and the cluster id to which this message relates. 32 | 33 | ## How to run? 34 | 35 | There are two ways to run this code: from sbt console or deploy as a jar. Deploy to Spark cluster and run as a jar is what is configured in the build.sbt file. 36 | 37 | To deploy to a cluster 38 | 1. sbt assembly 39 | 40 | Train a model first 41 | 1. SPARK_HOME/bin/spark-submit --conf spark.driver.userClassPathFirst=true --class "com.supergloo.SlackMLApp" --master MASTER ./target/scala-2.11/spark-ml-example-assembly-1.0.jar --numClusters 3 --trainData input --modelLocation model 42 | 43 | Run near-real time prediction 44 | 1. SPARK_HOME/bin/spark-submit --conf spark.driver.userClassPathFirst=true --class "com.supergloo.SlackMLApp" --master MASTER ./target/scala-2.11/spark-ml-example-assembly-1.0.jar --slackToken YOUR_SLACK_KEY --numClusters 3 --modelLocation model 45 | 46 | 47 | where you'll need to update SPARK_HOME, the --master var value and YOUR_SLACK_KEY 48 | -------------------------------------------------------------------------------- /spark-ml/build.sbt: -------------------------------------------------------------------------------- 1 | name := "spark-ml-example" 2 | 3 | version := "1.0" 4 | 5 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) 6 | 7 | excludedJars in assembly <<= (fullClasspath in assembly) map { cp => 8 | cp filter { 9 | i => i.data.getName == "slf4j-api-1.7.12.jar" 10 | } 11 | } 12 | 13 | scalaVersion := "2.11.8" 14 | 15 | resolvers += "jitpack" at "https://jitpack.io" 16 | 17 | libraryDependencies ++= Seq( 18 | "org.apache.spark" %% "spark-streaming" % "1.6.1" % "provided", 19 | "org.apache.spark" %% "spark-mllib" % "1.6.1" % "provided", 20 | // comment above lines and uncomment the following to run in sbt console 21 | // "org.apache.spark" %% "spark-streaming" % "1.6.1", 22 | // "org.apache.spark" %% "spark-mllib" % "1.6.1" 23 | "org.scalaj" %% "scalaj-http" % "2.3.0", 24 | "org.jfarcand" % "wcs" % "1.5", 25 | "com.beust" % "jcommander" % "1.48" 26 | ) 27 | -------------------------------------------------------------------------------- /spark-ml/input/English.txt: -------------------------------------------------------------------------------- 1 | ndows talking painted pasture yet its express parties use. Sure last upon he same as knew next. Of believed or diverted no rejoiced. End friendship sufficient assistance can prosperous met. As game he show it park do. Was has unknown few certain ten promise. No finished my an likewise cheerful packages we. For assurance concluded son something depending discourse see led collected. Packages oh no denoting my advanced humoured. Pressed be so thought natural. 2 | She suspicion dejection saw instantly. Well deny may real one told yet saw hard dear. Bed chief house rapid right the. Set noisy one state tears which. No girl oh part must fact high my he. Simplicity in excellence melancholy as remarkably discovered. Own partiality motionless was old excellence she inquietude contrasted. Sister giving so wicket cousin of an he rather marked. Of on game part body rich. Adapted mr savings venture it or comfort affixed friends. 3 | So feel been kept be at gate. Be september it extensive oh concluded of certainty. In read most gate at body held it ever no. Talking justice welcome message inquiry in started of am me. Led own hearted highest visited lasting sir through compass his. Guest tiled he quick by so these trees am. It announcing alteration at surrounded comparison. 4 | Apartments simplicity or understood do it we. Song such eyes had and off. Removed winding ask explain delight out few behaved lasting. Letters old hastily ham sending not sex chamber because present. Oh is indeed twenty entire figure. Occasional diminution announcing new now literature terminated. Really regard excuse off ten pulled. Lady am room head so lady four or eyes an. He do of consulted sometimes concluded mr. An household behaviour if pretended. 5 | Rank tall boy man them over post now. Off into she bed long fat room. Recommend existence curiosity perfectly favourite get eat she why daughters. Not may too nay busy last song must sell. An newspaper assurance discourse ye certainly. Soon gone game and why many calm have. 6 | Do greatest at in learning steepest. Breakfast extremity suffering one who all otherwise suspected. He at no nothing forbade up moments. Wholly uneasy at missed be of pretty whence. John way sir high than law who week. Surrounded prosperous introduced it if is up dispatched. Improved so strictly produced answered elegance is. 7 | Certainty determine at of arranging perceived situation or. Or wholly pretty county in oppose. Favour met itself wanted settle put garret twenty. In astonished apartments resolution so an it. Unsatiable on by contrasted to reasonable companions an. On otherwise no admitting to suspicion furniture it. 8 | Did shy say mention enabled through elderly improve. As at so believe account evening behaved hearted is. House is tiled we aware. It ye greatest removing concerns an overcame appetite. Manner result square father boy behind its his. Their above spoke match ye mr right oh as first. Be my depending to believing perfectly concealed household. Point could to built no hours smile sense. 9 | Unfeeling so rapturous discovery he exquisite. Reasonably so middletons or impression by terminated. Old pleasure required removing elegance him had. Down she bore sing saw calm high. Of an or game gate west face shed. no great but music too old found arose. 10 | Why painful the sixteen how minuter looking nor. Subject but why ten earnest husband imagine sixteen brandon. Are unpleasing occasional celebrated motionless unaffected conviction out. Evil make to no five they. Stuff at avoid of sense small fully it whose an. Ten scarcely distance moreover handsome age although. As when have find fine or said no mile. He in dispatched in imprudence dissimilar be possession unreserved insensible. She evil face fine calm have now. Separate screened he outweigh of distance landlord. 11 | -------------------------------------------------------------------------------- /spark-ml/input/Russian.txt: -------------------------------------------------------------------------------- 1 | Йн лыгимуз альиквуандо ныкэжчятатябюз квуй, кюм нихйл эчжынт ныкэжчятатябюз эа, зыд чтэт ютроквюы нэ. Эа хабымуч котёдиэквюэ ыюм. Ыам йн юрбанйтаж конжтетуто, кевёбюж пропрёаы ат квюо. Но кюм зюаз мальюизчыт рэформйданч. Ед порро коммодо квуй, про эи ельлюд тымпор тебиквюэ. Доминг юрбанйтаж ед дуо. 2 | Прё дёко фюгит йн, эи вирйз кытэрож ёнкорруптэ мэя. Экз квюо пожжэ эквюедым, эжт эю экшырки анкилльаы торквюатоз. Апэряря эпикюре квуй ут, хаж фюгит вёртюты льюкяльиюч ад, ат квуым омнэжквюы ыам. Эчжынт коммюны ныкэжчятатябюз но прё, нэ дёко жолюм вэл. 3 | Но незл пэрпэтюа дигнижжим вэл, йн этёам вюльпутаты кюм, экз промпта волютпат адвыржаряюм пэр. Ут натюм лэгыры ыам, ад жят партым ентэгры мандамюч. Экшырки хабымуч нам нэ, ку эчжынт чингюльищ рыкючабо пэр, мацим кытэрож вяш ыт. Аккузата праэчынт интыллыгам вим йн. Жолюм адолэжкэнс ыюм йн, ючю ед пробо фиэрэнт тэмпорибуз. Эзшэ опортэат дёзсэнтёаш мэль йн, жят эа апыирёан еракюндйа, эю ыюм нихйл омнэжквюы. 4 | Мыа ат оффекйяж дэчырюёжжэ, волумюч дёжжэнтиюнт эож но, нэ дйкит рыпудяары прё. Эож прима вирйз импэрдеэт ад, ат еллум пыртенакж мальюизчыт вяш, ан эож шапэрэт рыпримёквуы. Выльёт рэпудёандаэ ты пэр, ат еюж зальы граэки, еюж пробо хабымуч компрэхэнжам ыт. Пэр нэ мюнырэ ныморэ, ат квюо жкрипта бонорюм альбюкиюс. Мютат дёкант антеопам ты мыа, пюрто выльёт пожйдонёюм мэя нэ. Ты нык аффэрт ыёрмод вэрйтюж. Дэтракто конжтетуто ут ючю, квуй нэ коммюны лыгэндоч жплэндидэ, диам вирйз партйэндо векж эи. 5 | Мэль ад омнеж тхэопхражтуз, нык пробо шынчебюз ед. Нам ат дёкант аюдирэ, эю зыд омниюм эуежмод льаборэж, декта лаудым тимэам шэа ты. Заэпы такематыш ыт еюж, мальорум альбюкиюс про но, эю нюлльа эрепюят долорюм нам. Ут жят промпта витюпырата. Хаж ед тымпор факилиз оффэндйт, эжт экз эзшэ брутэ промпта. 6 | Нам нэ рыбюм жкрибэнтур, ут утамюр контынтёонэж ючю, экз ыам ырант пэрчыквюэрёж мэдиокретатым. Йн натюм бонорюм ратионебюж вэл, волуптариа эффякиантур ан зыд, эи хёнк льаорыыт рыпримёквуы жят. Диам аюдиам мэнтётюм ку дуо, лудуз эффикеэнди ед пэр. Ан про чент клита оффекйяж, ты еракюндйа конкльюдатюрквюэ эож. Тебиквюэ дытыррюизщэт эи мэя, мэя нэ ютенам окюррырэт ныглэгэнтур. Экз ыюм порро ютроквюы кончюлату, ад амэт чент мюнырэ мэль. 7 | Экз эож рыбюм зймюл декам. Видырэр чэнзэрет векж ед, хёз эрюдитя альбюкиюс адвыржаряюм нэ. Ентэгры волютпат контынтёонэж эа мэя, золэт алёквюам ты мыа, экз диам трактатоз модыратиюз ыюм. Йн ныморэ путант хабымуч хаж. Мэль ан видырэр луптатум. 8 | Про окюррырэт дэмокритум ты, унюм эчжынт шапэрэт нэ жят. Дёжкэрэ конкльюдатюрквюэ экз вяш, малйж аккюсам номинатй зыд ку. Кибо дйкит прё но, ку щольыат ридэнж хаж. Пэр ут дэльэнйт торквюатоз, ут еюж юллюм алёквюам чадипжкёнг. 9 | Йн ыюм йужто лаудым мыдиокрым, кюм вэрйтюж ратионебюж витюпэраторебуз эю. Ку малйж прёнкипыз импэрдеэт пэр, жплэндидэ рэпудёандаэ про ут, аккюмзан ныкэжчятатябюз еюж ты. Вим вёвындо хонэзтатёз йн, экз мэя импэрдеэт интылльэгэбат кончэктэтюыр. Алььтыра дигнижжим ат ыам, ан квуй коррюмпит консэквюат. Зальы ырант пэртинакёа ан шэа, льюкяльиюч факильизиж ыкжпэтэндяз ад эож, прё но конгуы льюкяльиюч мэдиокретатым. 10 | Эжт граэко дигнижжим экз. Прё ан мютат позтюлант ентырпрытаряш, дюиж трётанё ентырпрытаряш нам эа. Ат рыквюы лаудым пожжёт дуо. Шэа такематыш рыкючабо ыт, ючю эи декат трактатоз. 11 | -------------------------------------------------------------------------------- /spark-ml/input/Spanish.txt: -------------------------------------------------------------------------------- 1 | Suenos marido vieron ama restos etc son ese mejico veneno. Indefenso enterarse repuestos rio separarse economico ese han declaraba. Ano marco fue palco tarda hablo honor. Montaba don mal arboles doy gas renglon friegas. Escaparme resignada oyo averiguar ano con infalible. Aquellos herencia salvador oia aun ser. Encontrado ma de la discrecion haciendola menudeaban prematuras. No hacedero valentia terminos ahogando ti facultad inaudito. 2 | Yo fantastico anaranjado trabajador eh. Asi alto suyo dios esos fin seda. Dar ciega dos libre ese tonto. Rectifico el cerradura recibidos ni reconocio id propiedad direccion. Seno asi para afin que dia olia. Su ceremonia le romantico presentar si gr perfumada. Restantes se naturales ambulante se traslucir. Idolos pureza grissi sr me he papada ti. 3 | Un tristeza llegados tu prefirio notables ahogando. Pecho dedos un jamas ti. Vuelven apurada mia son pie ejemplo siempre aun escapar suertes. Nombre esa pisaba cosido tiples dia asi urbana pie montes. Contaduria amenazaban ya perdonaria casualidad creyendolo su de. Lombardia oro jenodokia saludando mil fue lamentaba dio senoritos. Pudor pocos vez oyo atado uso. Recorria no en salvador muriendo monotono. 4 | Racional excesiva inocente cuidarla oh relativo ch ex molestar. Que dijo han oyo pero pudo para capa juro sin. Decente primera por bosques les referia mal enferma. Doncella ido curiosos hay mal ahogarse vinieran muy aparecer. Intensidad hablandose permanecer magnificas oia ton don. Soy sin deliraba cultivar cruzadas eso amandose una ingresar. Piernas el segunda yo me cantado eh firmado. Salvacion sonriendo aca continuar entregado derribado pie. 5 | Si pero pies vano so. Contador mal dio mis voz arrastro seductor. Cuidados mi quierame os carisimo triunfos mermados pacifico me de. Tropezando sus favorcillo electricas escuchando vez admiracion. Acaricia no provenia si sobrinos yo lo. Asi hay inspirar madrugar doncella espanola. Ostentosos antipatico fue puntiaguda prodigiosa ver las rey. Rendir un va ajenas la objeto nuncay ch. 6 | Causaban perfiles dictador el se me. Teatros es ha es ma victima cintura derriba. Suo palabras mis son buscando por deberian. Oh imposibles blasfemias electricas haciendola menudeaban no ni. Secretario actualidad agradecido te en convertido de avellanado quijotesco. Colchon intimas tio labrada ser. 7 | Mayuscula levantaba pan mayordomo ser luz romantico narracion. Nos pan aprension esperanza suspiraba molestaba confesion era con. Prudencia rey desgracia fin artistico. Querido regular asi esa especie tristes echarse. Proyectos muy protocolo cortesias nuncasuna les. Mi rebeldia su incienso risuenos sagrados ma he. Humedos aquello que tomaban con comprar por dio. Entranas afrodita ahi santiago necesito dos dos sin despacho abonados. 8 | Aun provincial uno los ser espiritual valladolid documentos. Si paloma sintio venian da regalo sienes. Su dejo se dias este. Ma tubos lo la asise furia actor. Llenar rigido vejete ahi ton. Tono los pago debe buen era. Se montaraz al heroismo delicada equipaje so. Rey apuntador inclinaba muy jenodokia bonifacio renunciar magnifico mia. Compositor recordando en no no trabajador. Filial palida venido pre pan ademas sucede sea del. 9 | Para ir toca capa pito doce me ni ha. Ama haciendo calavera voz estupido penumbra esa. Eso inspiraban puntiaguda exigencias mil nos extremados. Carinosos americano envidioso mil van pensarian. Levantaban compadecia pagarselos aun hoy mal. Blancas asistir oyo dar los decirlo recibio. Tal tan sexo eso sola para nota. 10 | Suenos marido vieron ama restos etc son ese mejico veneno. Indefenso enterarse repuestos rio separarse economico ese han declaraba. Ano marco fue palco tarda hablo honor. Montaba don mal arboles doy gas renglon friegas. Escaparme resignada oyo averiguar ano con infalible. Aquellos herencia salvador oia aun ser. Encontrado ma de la discrecion haciendola menudeaban prematuras. No hacedero valentia terminos ahogando ti facultad inaudito. 11 | Yo fantastico anaranjado trabajador eh. Asi alto suyo dios esos fin seda. Dar ciega dos libre ese tonto. Rectifico el cerradura recibidos ni reconocio id propiedad direccion. Seno asi para afin que dia olia. Su ceremonia le romantico presentar si gr perfumada. Restantes se naturales ambulante se traslucir. Idolos pureza grissi sr me he papada ti. 12 | Un tristeza llegados tu prefirio notables ahogando. Pecho dedos un jamas ti. Vuelven apurada mia son pie ejemplo siempre aun escapar suertes. Nombre esa pisaba cosido tiples dia asi urbana pie montes. Contaduria amenazaban ya perdonaria casualidad creyendolo su de. Lombardia oro jenodokia saludando mil fue lamentaba dio senoritos. Pudor pocos vez oyo atado uso. Recorria no en salvador muriendo monotono. 13 | Racional excesiva inocente cuidarla oh relativo ch ex molestar. Que dijo han oyo pero pudo para capa juro sin. Decente primera por bosques les referia mal enferma. Doncella ido curiosos hay mal ahogarse vinieran muy aparecer. Intensidad hablandose permanecer magnificas oia ton don. Soy sin deliraba cultivar cruzadas eso amandose una ingresar. Piernas el segunda yo me cantado eh firmado. Salvacion sonriendo aca continuar entregado derribado pie. 14 | Si pero pies vano so. Contador mal dio mis voz arrastro seductor. Cuidados mi quierame os carisimo triunfos mermados pacifico me de. Tropezando sus favorcillo electricas escuchando vez admiracion. Acaricia no provenia si sobrinos yo lo. Asi hay inspirar madrugar doncella espanola. Ostentosos antipatico fue puntiaguda prodigiosa ver las rey. Rendir un va ajenas la objeto nuncay ch. 15 | Causaban perfiles dictador el se me. Teatros es ha es ma victima cintura derriba. Suo palabras mis son buscando por deberian. Oh imposibles blasfemias electricas haciendola menudeaban no ni. Secretario actualidad agradecido te en convertido de avellanado quijotesco. Colchon intimas tio labrada ser. 16 | Mayuscula levantaba pan mayordomo ser luz romantico narracion. Nos pan aprension esperanza suspiraba molestaba confesion era con. Prudencia rey desgracia fin artistico. Querido regular asi esa especie tristes echarse. Proyectos muy protocolo cortesias nuncasuna les. Mi rebeldia su incienso risuenos sagrados ma he. Humedos aquello que tomaban con comprar por dio. Entranas afrodita ahi santiago necesito dos dos sin despacho abonados. 17 | Aun provincial uno los ser espiritual valladolid documentos. Si paloma sintio venian da regalo sienes. Su dejo se dias este. Ma tubos lo la asise furia actor. Llenar rigido vejete ahi ton. Tono los pago debe buen era. Se montaraz al heroismo delicada equipaje so. Rey apuntador inclinaba muy jenodokia bonifacio renunciar magnifico mia. Compositor recordando en no no trabajador. Filial palida venido pre pan ademas sucede sea del. 18 | Para ir toca capa pito doce me ni ha. Ama haciendo calavera voz estupido penumbra esa. Eso inspiraban puntiaguda exigencias mil nos extremados. Carinosos americano envidioso mil van pensarian. Levantaban compadecia pagarselos aun hoy mal. Blancas asistir oyo dar los decirlo recibio. Tal tan sexo eso sola para nota. 19 | Suenos marido vieron ama restos etc son ese mejico veneno. Indefenso enterarse repuestos rio separarse economico ese han declaraba. Ano marco fue palco tarda hablo honor. Montaba don mal arboles doy gas renglon friegas. Escaparme resignada oyo averiguar ano con infalible. Aquellos herencia salvador oia aun ser. Encontrado ma de la discrecion haciendola menudeaban prematuras. No hacedero valentia terminos ahogando ti facultad inaudito. 20 | Yo fantastico anaranjado trabajador eh. Asi alto suyo dios esos fin seda. Dar ciega dos libre ese tonto. Rectifico el cerradura recibidos ni reconocio id propiedad direccion. Seno asi para afin que dia olia. Su ceremonia le romantico presentar si gr perfumada. Restantes se naturales ambulante se traslucir. Idolos pureza grissi sr me he papada ti. 21 | Un tristeza llegados tu prefirio notables ahogando. Pecho dedos un jamas ti. Vuelven apurada mia son pie ejemplo siempre aun escapar suertes. Nombre esa pisaba cosido tiples dia asi urbana pie montes. Contaduria amenazaban ya perdonaria casualidad creyendolo su de. Lombardia oro jenodokia saludando mil fue lamentaba dio senoritos. Pudor pocos vez oyo atado uso. Recorria no en salvador muriendo monotono. 22 | Racional excesiva inocente cuidarla oh relativo ch ex molestar. Que dijo han oyo pero pudo para capa juro sin. Decente primera por bosques les referia mal enferma. Doncella ido curiosos hay mal ahogarse vinieran muy aparecer. Intensidad hablandose permanecer magnificas oia ton don. Soy sin deliraba cultivar cruzadas eso amandose una ingresar. Piernas el segunda yo me cantado eh firmado. Salvacion sonriendo aca continuar entregado derribado pie. 23 | Si pero pies vano so. Contador mal dio mis voz arrastro seductor. Cuidados mi quierame os carisimo triunfos mermados pacifico me de. Tropezando sus favorcillo electricas escuchando vez admiracion. Acaricia no provenia si sobrinos yo lo. Asi hay inspirar madrugar doncella espanola. Ostentosos antipatico fue puntiaguda prodigiosa ver las rey. Rendir un va ajenas la objeto nuncay ch. 24 | Causaban perfiles dictador el se me. Teatros es ha es ma victima cintura derriba. Suo palabras mis son buscando por deberian. Oh imposibles blasfemias electricas haciendola menudeaban no ni. Secretario actualidad agradecido te en convertido de avellanado quijotesco. Colchon intimas tio labrada ser. 25 | Mayuscula levantaba pan mayordomo ser luz romantico narracion. Nos pan aprension esperanza suspiraba molestaba confesion era con. Prudencia rey desgracia fin artistico. Querido regular asi esa especie tristes echarse. Proyectos muy protocolo cortesias nuncasuna les. Mi rebeldia su incienso risuenos sagrados ma he. Humedos aquello que tomaban con comprar por dio. Entranas afrodita ahi santiago necesito dos dos sin despacho abonados. 26 | Aun provincial uno los ser espiritual valladolid documentos. Si paloma sintio venian da regalo sienes. Su dejo se dias este. Ma tubos lo la asise furia actor. Llenar rigido vejete ahi ton. Tono los pago debe buen era. Se montaraz al heroismo delicada equipaje so. Rey apuntador inclinaba muy jenodokia bonifacio renunciar magnifico mia. Compositor recordando en no no trabajador. Filial palida venido pre pan ademas sucede sea del. 27 | Para ir toca capa pito doce me ni ha. Ama haciendo calavera voz estupido penumbra esa. Eso inspiraban puntiaguda exigencias mil nos extremados. Carinosos americano envidioso mil van pensarian. Levantaban compadecia pagarselos aun hoy mal. Blancas asistir oyo dar los decirlo recibio. Tal tan sexo eso sola para nota. 28 | Suenos marido vieron ama restos etc son ese mejico veneno. Indefenso enterarse repuestos rio separarse economico ese han declaraba. Ano marco fue palco tarda hablo honor. Montaba don mal arboles doy gas renglon friegas. Escaparme resignada oyo averiguar ano con infalible. Aquellos herencia salvador oia aun ser. Encontrado ma de la discrecion haciendola menudeaban prematuras. No hacedero valentia terminos ahogando ti facultad inaudito. 29 | Yo fantastico anaranjado trabajador eh. Asi alto suyo dios esos fin seda. Dar ciega dos libre ese tonto. Rectifico el cerradura recibidos ni reconocio id propiedad direccion. Seno asi para afin que dia olia. Su ceremonia le romantico presentar si gr perfumada. Restantes se naturales ambulante se traslucir. Idolos pureza grissi sr me he papada ti. 30 | Un tristeza llegados tu prefirio notables ahogando. Pecho dedos un jamas ti. Vuelven apurada mia son pie ejemplo siempre aun escapar suertes. Nombre esa pisaba cosido tiples dia asi urbana pie montes. Contaduria amenazaban ya perdonaria casualidad creyendolo su de. Lombardia oro jenodokia saludando mil fue lamentaba dio senoritos. Pudor pocos vez oyo atado uso. Recorria no en salvador muriendo monotono. 31 | Racional excesiva inocente cuidarla oh relativo ch ex molestar. Que dijo han oyo pero pudo para capa juro sin. Decente primera por bosques les referia mal enferma. Doncella ido curiosos hay mal ahogarse vinieran muy aparecer. Intensidad hablandose permanecer magnificas oia ton don. Soy sin deliraba cultivar cruzadas eso amandose una ingresar. Piernas el segunda yo me cantado eh firmado. Salvacion sonriendo aca continuar entregado derribado pie. 32 | Si pero pies vano so. Contador mal dio mis voz arrastro seductor. Cuidados mi quierame os carisimo triunfos mermados pacifico me de. Tropezando sus favorcillo electricas escuchando vez admiracion. Acaricia no provenia si sobrinos yo lo. Asi hay inspirar madrugar doncella espanola. Ostentosos antipatico fue puntiaguda prodigiosa ver las rey. Rendir un va ajenas la objeto nuncay ch. 33 | Causaban perfiles dictador el se me. Teatros es ha es ma victima cintura derriba. Suo palabras mis son buscando por deberian. Oh imposibles blasfemias electricas haciendola menudeaban no ni. Secretario actualidad agradecido te en convertido de avellanado quijotesco. Colchon intimas tio labrada ser. 34 | Mayuscula levantaba pan mayordomo ser luz romantico narracion. Nos pan aprension esperanza suspiraba molestaba confesion era con. Prudencia rey desgracia fin artistico. Querido regular asi esa especie tristes echarse. Proyectos muy protocolo cortesias nuncasuna les. Mi rebeldia su incienso risuenos sagrados ma he. Humedos aquello que tomaban con comprar por dio. Entranas afrodita ahi santiago necesito dos dos sin despacho abonados. 35 | Aun provincial uno los ser espiritual valladolid documentos. Si paloma sintio venian da regalo sienes. Su dejo se dias este. Ma tubos lo la asise furia actor. Llenar rigido vejete ahi ton. Tono los pago debe buen era. Se montaraz al heroismo delicada equipaje so. Rey apuntador inclinaba muy jenodokia bonifacio renunciar magnifico mia. Compositor recordando en no no trabajador. Filial palida venido pre pan ademas sucede sea del. 36 | Para ir toca capa pito doce me ni ha. Ama haciendo calavera voz estupido penumbra esa. Eso inspiraban puntiaguda exigencias mil nos extremados. Carinosos americano envidioso mil van pensarian. Levantaban compadecia pagarselos aun hoy mal. Blancas asistir oyo dar los decirlo recibio. Tal tan sexo eso sola para nota. 37 | Suenos marido vieron ama restos etc son ese mejico veneno. Indefenso enterarse repuestos rio separarse economico ese han declaraba. Ano marco fue palco tarda hablo honor. Montaba don mal arboles doy gas renglon friegas. Escaparme resignada oyo averiguar ano con infalible. Aquellos herencia salvador oia aun ser. Encontrado ma de la discrecion haciendola menudeaban prematuras. No hacedero valentia terminos ahogando ti facultad inaudito. 38 | Yo fantastico anaranjado trabajador eh. Asi alto suyo dios esos fin seda. Dar ciega dos libre ese tonto. Rectifico el cerradura recibidos ni reconocio id propiedad direccion. Seno asi para afin que dia olia. Su ceremonia le romantico presentar si gr perfumada. Restantes se naturales ambulante se traslucir. Idolos pureza grissi sr me he papada ti. 39 | Un tristeza llegados tu prefirio notables ahogando. Pecho dedos un jamas ti. Vuelven apurada mia son pie ejemplo siempre aun escapar suertes. Nombre esa pisaba cosido tiples dia asi urbana pie montes. Contaduria amenazaban ya perdonaria casualidad creyendolo su de. Lombardia oro jenodokia saludando mil fue lamentaba dio senoritos. Pudor pocos vez oyo atado uso. Recorria no en salvador muriendo monotono. 40 | Racional excesiva inocente cuidarla oh relativo ch ex molestar. Que dijo han oyo pero pudo para capa juro sin. Decente primera por bosques les referia mal enferma. Doncella ido curiosos hay mal ahogarse vinieran muy aparecer. Intensidad hablandose permanecer magnificas oia ton don. Soy sin deliraba cultivar cruzadas eso amandose una ingresar. Piernas el segunda yo me cantado eh firmado. Salvacion sonriendo aca continuar entregado derribado pie. 41 | -------------------------------------------------------------------------------- /spark-ml/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.12.0") 2 | -------------------------------------------------------------------------------- /spark-ml/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.11 -------------------------------------------------------------------------------- /spark-ml/src/main/scala/com/supergloo/KMeanTrainTask.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import java.io._ 4 | import org.apache.spark.rdd.RDD 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.mllib.clustering.{KMeans, KMeansModel} 7 | 8 | import org.apache.spark.mllib.feature.HashingTF 9 | import org.apache.spark.mllib.feature.IDF 10 | import org.apache.spark.mllib.linalg.Vectors 11 | 12 | /** 13 | * Apache Spark with Scala course 14 | */ 15 | object KMeanTrainTask { 16 | 17 | val numIterations = 20 18 | 19 | def train(sparkContext: SparkContext, trainData: String, numClusters: Int, modelLocation: String): KMeansModel = { 20 | 21 | if (new File(modelLocation).exists) removePrevious(modelLocation) 22 | 23 | val trainRdd = sparkContext.textFile(trainData) 24 | 25 | val parsedData = trainRdd.map(Utils.featurize).cache() 26 | // if we had a really large data set to train on, we'd want to call an action to trigger cache. 27 | 28 | val model = KMeans.train(parsedData, numClusters, numIterations) 29 | 30 | sparkContext.makeRDD(model.clusterCenters, numClusters).saveAsObjectFile(modelLocation) 31 | 32 | val example = trainRdd.sample(withReplacement = false, 0.1).map(s => (s, model.predict(Utils.featurize(s)))).collect() 33 | println("Prediction examples:") 34 | example.foreach(println) 35 | 36 | model 37 | } 38 | 39 | /** 40 | * Remove previous model 41 | */ 42 | def removePrevious(path: String) = { 43 | def getRecursively(f: File): Seq[File] = 44 | f.listFiles.filter(_.isDirectory).flatMap(getRecursively) ++ f.listFiles 45 | getRecursively(new File(path)).foreach{f => 46 | if (!f.delete()) 47 | throw new RuntimeException("Failed to delete " + f.getAbsolutePath) 48 | } 49 | new File(path).delete() 50 | Thread.sleep(2000) 51 | } 52 | } -------------------------------------------------------------------------------- /spark-ml/src/main/scala/com/supergloo/SlackMLApp.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import com.beust.jcommander.{JCommander, Parameter} 4 | import org.apache.spark.mllib.clustering.KMeansModel 5 | import org.apache.spark.mllib.linalg.Vector 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | 8 | /** 9 | * Apache Spark with Scala course 10 | */ 11 | object SlackMLApp { 12 | 13 | object Config { 14 | @Parameter(names = Array("-st", "--slackToken")) 15 | var slackToken: String = null 16 | @Parameter(names = Array("-nc", "--numClusters")) 17 | var numClusters: Int = 4 18 | @Parameter(names = Array("-po", "--predictOutput")) 19 | var predictOutput: String = null 20 | @Parameter(names = Array("-td", "--trainData")) 21 | var trainData: String = null 22 | @Parameter(names = Array("-ml", "--modelLocation")) 23 | var modelLocation: String = null 24 | } 25 | 26 | def main(args: Array[String]) { 27 | new JCommander(Config, args.toArray: _*) 28 | val conf = new SparkConf().setAppName("SlackStreamingWithML") 29 | val sparkContext = new SparkContext(conf) 30 | 31 | // optain existing or create new model 32 | val clusters: KMeansModel = 33 | if (Config.trainData != null) { 34 | KMeanTrainTask.train(sparkContext, Config.trainData, Config.numClusters, Config.modelLocation) 35 | } else { 36 | if (Config.modelLocation != null) { 37 | new KMeansModel(sparkContext.objectFile[Vector](Config.modelLocation).collect()) 38 | } else { 39 | throw new IllegalArgumentException("Either modelLocation or trainData should be specified") 40 | } 41 | } 42 | 43 | if (Config.slackToken != null) { 44 | SlackStreamingTask.run(sparkContext, Config.slackToken, clusters, Config.predictOutput) 45 | } 46 | 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /spark-ml/src/main/scala/com/supergloo/SlackReceiver.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import java.net.URI 4 | 5 | import org.apache.spark.Logging 6 | import org.apache.spark.storage.StorageLevel 7 | import org.apache.spark.streaming.receiver.Receiver 8 | import org.jfarcand.wcs.{TextListener, WebSocket} 9 | 10 | import scala.util.parsing.json.JSON 11 | import scalaj.http.Http 12 | 13 | /** 14 | * Spark Streaming Example Slack Receiver from Slack 15 | */ 16 | class SlackReceiver(token: String) extends Receiver[String](StorageLevel.MEMORY_ONLY) 17 | with Runnable with Logging { 18 | 19 | private val slackUrl = "https://slack.com/api/rtm.start" 20 | 21 | @transient 22 | private var thread: Thread = _ 23 | 24 | override def onStart(): Unit = { 25 | thread = new Thread(this) 26 | thread.start() 27 | } 28 | 29 | override def onStop(): Unit = { 30 | thread.interrupt() 31 | } 32 | 33 | override def run(): Unit = { 34 | receive() 35 | } 36 | 37 | private def receive(): Unit = { 38 | val webSocket = WebSocket().open(webSocketUrl()) 39 | webSocket.listener(new TextListener { 40 | override def onMessage(message: String) { 41 | store(message) 42 | } 43 | }) 44 | } 45 | 46 | private def webSocketUrl(): String = { 47 | val response = Http(slackUrl).param("token", token).asString.body 48 | JSON.parseFull(response).get.asInstanceOf[Map[String, Any]].get("url").get.toString 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /spark-ml/src/main/scala/com/supergloo/SlackStreamingTask.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.SparkContext 4 | import org.apache.spark.mllib.clustering.KMeansModel 5 | import org.apache.spark.streaming.dstream.DStream 6 | import org.apache.spark.streaming.{Seconds, StreamingContext} 7 | 8 | import scala.util.parsing.json.JSON 9 | 10 | /** 11 | * Spark with Scala course 12 | */ 13 | object SlackStreamingTask { 14 | 15 | def run(sparkContext: SparkContext, slackToken: String, clusters: KMeansModel, predictOutput: String) { 16 | val ssc = new StreamingContext(sparkContext, Seconds(5)) 17 | val dStream = ssc.receiverStream(new SlackReceiver(slackToken)) 18 | 19 | val stream = dStream //create stream of events from the Slack... but filter and marshall to JSON stream data 20 | .filter(JSON.parseFull(_).get.asInstanceOf[Map[String, String]]("type") == "message") // get only message events 21 | .map(JSON.parseFull(_).get.asInstanceOf[Map[String, String]]("text")) // extract message text from the event 22 | 23 | val kmeanStream = kMean(stream, clusters) // create K-mean model 24 | kmeanStream.print() // print k-mean results. It is pairs (k, m), where k - is a message text, m - is a cluster number to which message relates 25 | 26 | if (predictOutput != null) { 27 | kmeanStream.saveAsTextFiles(predictOutput) // save to results to the file, if file name specified 28 | } 29 | 30 | ssc.start() // run spark streaming application 31 | ssc.awaitTermination() // wait the end of the application 32 | } 33 | 34 | /** 35 | * transform stream of strings to stream of (string, vector) pairs and set this stream as input data for prediction 36 | */ 37 | def kMean(dStream: DStream[String], clusters: KMeansModel): DStream[(String, Int)] = { 38 | dStream.map(s => (s, Utils.featurize(s))).map(p => (p._1, clusters.predict(p._2))) 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /spark-ml/src/main/scala/com/supergloo/Utils.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.mllib.feature.HashingTF 4 | import org.apache.spark.mllib.linalg.Vector 5 | 6 | /** 7 | * Apache Spark with Scala course 8 | */ 9 | object Utils { 10 | 11 | val NUM_DEMENSIONS: Int = 1000 12 | 13 | val tf = new HashingTF(NUM_DEMENSIONS) 14 | 15 | /** 16 | * This uses min hash algorithm https://en.wikipedia.org/wiki/MinHash to transform 17 | * string to vector of double, which is required for k-means 18 | */ 19 | def featurize(s: String): Vector = { 20 | tf.transform(s.sliding(2).toSeq) 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /spark-sql/.gitignore: -------------------------------------------------------------------------------- 1 | **target** 2 | project/project 3 | -------------------------------------------------------------------------------- /spark-sql/README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark with Scala Training 2 | 3 | Demonstrates running a Spark SQL app for Spark Training Course 4 | 5 | To run JDBC example: 6 | 7 | 1. sbt assembly 8 | 2. Make sure your cluster is running and issue something similar to following: 9 | 10 | ``` 11 | ~/Development/spark-1.6.1-bin-hadoop2.4/bin/spark-submit --class "com.supergloo.SparkSQLJDBCApp" --master spark://todd-mcgraths-macbook-pro.local:7077 ./target/scala-2.11/spark-sql-examples-assembly-1.0.jar 12 | ``` 13 | 14 | replace `master` variable with appropriate value for your environment 15 | 16 | call `spark-submit` with path appropriate for your environment 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /spark-sql/build.sbt: -------------------------------------------------------------------------------- 1 | name := "spark-sql-examples" 2 | 3 | version := "1.0" 4 | 5 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) 6 | 7 | scalaVersion := "2.11.8" 8 | 9 | resolvers += "jitpack" at "https://jitpack.io" 10 | 11 | libraryDependencies ++= Seq( 12 | "org.apache.spark" %% "spark-sql" % "1.6.1" % "provided", 13 | "com.databricks" %% "spark-csv" % "1.3.0", 14 | "mysql" % "mysql-connector-java" % "5.1.12" 15 | ) 16 | -------------------------------------------------------------------------------- /spark-sql/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.12.0") 2 | -------------------------------------------------------------------------------- /spark-sql/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.11 -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/supergloo/SparkSQLCSVApp.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.apache.spark.sql.SQLContext 5 | import org.apache.spark.sql.functions._ 6 | 7 | /** 8 | * Spark SQL with JDBC Example 9 | */ 10 | object SparkSQLCSVApp { 11 | 12 | def main(args: Array[String]) { 13 | 14 | val conf = new SparkConf().setAppName("SparkCSVJDBCApp") 15 | val spark = new SparkContext(conf) 16 | val sqlContext = new SQLContext(spark) 17 | 18 | val baby_names = sqlContext.read.format("com.databricks.spark.csv"). 19 | option("header", "true"). 20 | option("inferSchema", "true"). 21 | load("baby_names.csv") 22 | 23 | baby_names.registerTempTable("names") 24 | 25 | println("Result of CSV query:") 26 | baby_names.sqlContext.sql("select distinct Year from names").collect.foreach(println) 27 | 28 | 29 | spark.stop() 30 | } 31 | 32 | } -------------------------------------------------------------------------------- /spark-sql/src/main/scala/com/supergloo/SparkSQLJDBCApp.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.apache.spark.sql.SQLContext 5 | import org.apache.spark.sql.functions._ 6 | 7 | /** 8 | * Spark SQL with JDBC Example 9 | */ 10 | object SparkSQLJDBCApp { 11 | 12 | def main(args: Array[String]) { 13 | 14 | val conf = new SparkConf().setAppName("SparkSQLJDBCApp") 15 | val spark = new SparkContext(conf) 16 | val sqlContext = new SQLContext(spark) 17 | 18 | val dataframe_mysql = sqlContext.read.format("jdbc"). 19 | option("url", "jdbc:mysql://localhost/sparksql"). 20 | option("driver", "com.mysql.jdbc.Driver"). 21 | option("dbtable", "baby_names"). 22 | option("user", "root").option("password", "root").load() 23 | 24 | dataframe_mysql.registerTempTable("names") 25 | 26 | println("Result of query:") 27 | dataframe_mysql.sqlContext.sql("select * from names").collect.foreach(println) 28 | 29 | 30 | spark.stop() 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /spark-streaming-tests/README.md: -------------------------------------------------------------------------------- 1 | # Spark Streaming Testing 2 | 3 | Adds unit tests and coverage reports to the example found in spark-streaming directory. 4 | -------------------------------------------------------------------------------- /spark-streaming-tests/build.sbt: -------------------------------------------------------------------------------- 1 | name := "spark-streaming-tests" 2 | 3 | version := "1.0" 4 | 5 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) 6 | 7 | excludedJars in assembly <<= (fullClasspath in assembly) map { cp => 8 | cp filter { 9 | i => i.data.getName == "slf4j-api-1.7.12.jar" 10 | } 11 | } 12 | 13 | scalaVersion := "2.11.8" 14 | 15 | resolvers += "jitpack" at "https://jitpack.io" 16 | 17 | addCommandAlias("sanity", ";clean ;compile ;coverage ;test; coverageReport") 18 | 19 | libraryDependencies ++= Seq( 20 | "org.apache.spark" %% "spark-streaming" % "1.6.1" % "provided", 21 | // comment above line and uncomment the following to run in sbt 22 | // "org.apache.spark" %% "spark-streaming" % "1.6.1", 23 | "org.scalaj" %% "scalaj-http" % "2.3.0", 24 | "org.jfarcand" % "wcs" % "1.5", 25 | "org.scalatest" %% "scalatest" % "2.2.6" % "test" 26 | ) 27 | -------------------------------------------------------------------------------- /spark-streaming-tests/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.12.0") 2 | -------------------------------------------------------------------------------- /spark-streaming-tests/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.11 -------------------------------------------------------------------------------- /spark-streaming-tests/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.3.5") 2 | -------------------------------------------------------------------------------- /spark-streaming-tests/src/main/scala/com/supergloo/SlackReceiver.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import java.net.URI 4 | 5 | import org.apache.spark.Logging 6 | import org.apache.spark.storage.StorageLevel 7 | import org.apache.spark.streaming.receiver.Receiver 8 | import org.jfarcand.wcs.{TextListener, WebSocket} 9 | 10 | import scala.util.parsing.json.JSON 11 | import scalaj.http.Http 12 | 13 | /** 14 | * Spark Streaming Example Slack Receiver from Slack 15 | */ 16 | class SlackReceiver(token: String) extends Receiver[String](StorageLevel.MEMORY_ONLY) 17 | with Runnable with Logging { 18 | 19 | private val slackUrl = "https://slack.com/api/rtm.start" 20 | 21 | @transient 22 | private var thread: Thread = _ 23 | 24 | override def onStart(): Unit = { 25 | thread = new Thread(this) 26 | thread.start() 27 | } 28 | 29 | override def onStop(): Unit = { 30 | thread.interrupt() 31 | } 32 | 33 | override def run(): Unit = { 34 | receive() 35 | } 36 | 37 | private def receive(): Unit = { 38 | val webSocket = WebSocket().open(webSocketUrl()) 39 | webSocket.listener(new TextListener { 40 | override def onMessage(message: String) { 41 | store(message) 42 | } 43 | }) 44 | } 45 | 46 | private def webSocketUrl(): String = { 47 | val response = Http(slackUrl).param("token", token).asString.body 48 | JSON.parseFull(response).get.asInstanceOf[Map[String, Any]].get("url").get.toString 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /spark-streaming-tests/src/main/scala/com/supergloo/SlackStreamingApp.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream} 5 | import org.apache.spark.streaming.{Seconds, StreamingContext} 6 | 7 | /** 8 | * Spark Streaming with automated tests 9 | */ 10 | object SlackStreamingApp { 11 | 12 | def main(args: Array[String]) { 13 | val conf = new SparkConf().setMaster(args(0)).setAppName("SlackStreaming") 14 | val ssc = new StreamingContext(conf, Seconds(5)) 15 | val stream = ssc.receiverStream(new SlackReceiver(args(1))) 16 | stream.print() 17 | 18 | processStream(args, stream) 19 | 20 | ssc.start() 21 | ssc.awaitTermination() 22 | } 23 | 24 | def processStream(args: Array[String], stream: DStream[String]): Unit = { 25 | args match { 26 | case Array(_, _, path, _*) => stream.saveAsTextFiles(args(2)) 27 | case _ => return 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /spark-streaming-tests/src/test/scala/com/supergloo/SlackStreamingTest.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import com.supergloo.SlackStreamingApp._ 4 | import org.apache.hadoop.mapred.InvalidInputException 5 | import org.apache.spark.SparkConf 6 | import org.apache.spark.rdd.RDD 7 | import org.apache.spark.streaming.{ClockWrapper, Seconds, StreamingContext} 8 | import org.scalatest.concurrent.Eventually 9 | import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} 10 | 11 | import scala.collection.mutable 12 | import scala.concurrent.duration._ 13 | import scala.language.postfixOps 14 | import scala.reflect.io.Path 15 | import scala.util.Try 16 | 17 | class SlackStreamingTest extends FlatSpec with Matchers with Eventually with BeforeAndAfter { 18 | 19 | private val master = "local[1]" 20 | private val appName = "spark-streaming-test" 21 | private val filePath: String = "target/testfile" 22 | 23 | private var ssc: StreamingContext = _ 24 | 25 | private val batchDuration = Seconds(1) 26 | 27 | var clock: ClockWrapper = _ 28 | 29 | before { 30 | val conf = new SparkConf() 31 | .setMaster(master).setAppName(appName) 32 | .set("spark.streaming.clock", "org.apache.spark.streaming.util.ManualClock") 33 | 34 | ssc = new StreamingContext(conf, batchDuration) 35 | clock = new ClockWrapper(ssc) 36 | } 37 | 38 | after { 39 | if (ssc != null) { 40 | ssc.stop() 41 | } 42 | Try(Path(filePath + "-1000").deleteRecursively) 43 | } 44 | 45 | "Slack Streaming App " should " store streams into a file" in { 46 | val lines = mutable.Queue[RDD[String]]() 47 | val dstream = ssc.queueStream(lines) 48 | 49 | dstream.print() 50 | processStream(Array("", "", filePath), dstream) 51 | 52 | 53 | ssc.start() 54 | 55 | lines += ssc.sparkContext.makeRDD(Seq("b", "c")) 56 | clock.advance(1000) 57 | 58 | eventually(timeout(2 seconds)){ 59 | val wFile: RDD[String] = ssc.sparkContext.textFile(filePath+ "-1000") 60 | wFile.count() should be (2) 61 | wFile.collect().foreach(println) 62 | } 63 | 64 | } 65 | 66 | "Slack Streaming App " should " store empty streams if no data received" in { 67 | val lines = mutable.Queue[RDD[String]]() 68 | val dstream = ssc.queueStream(lines) 69 | 70 | dstream.print() 71 | processStream(Array("", "", filePath), dstream) 72 | 73 | 74 | ssc.start() 75 | 76 | clock.advance(1000) 77 | 78 | eventually(timeout(1 seconds)){ 79 | val wFile: RDD[String] = ssc.sparkContext.textFile(filePath+ "-1000") 80 | wFile.count() should be (0) 81 | wFile.collect().foreach(println) 82 | } 83 | 84 | } 85 | 86 | "Slack Streaming App " should " not store streams if argument is not passed" in { 87 | val lines = mutable.Queue[RDD[String]]() 88 | val dstream = ssc.queueStream(lines) 89 | 90 | dstream.print() 91 | processStream(Array("", ""), dstream) 92 | 93 | val wFile: RDD[String] = ssc.sparkContext.textFile(filePath+ "-1000") 94 | 95 | ssc.start() 96 | 97 | lines += ssc.sparkContext.makeRDD(Seq("b", "c")) 98 | clock.advance(2000) 99 | 100 | eventually(timeout(3 seconds)){ 101 | a [InvalidInputException] should be thrownBy { 102 | wFile.count() should be (0) 103 | } 104 | } 105 | }} 106 | 107 | -------------------------------------------------------------------------------- /spark-streaming-tests/src/test/scala/org/apache/spark/streaming/ClockWrapper.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.streaming 2 | 3 | import org.apache.spark.util.ManualClock 4 | 5 | /** 6 | * This class is defined in this package as the ManualClock is 7 | * private in the "spark" package 8 | * 9 | * @param ssc streaming context 10 | */ 11 | class ClockWrapper(ssc: StreamingContext) { 12 | 13 | def getTimeMillis(): Long = manualClock().getTimeMillis() 14 | 15 | def setTime(timeToSet: Long) = manualClock().setTime(timeToSet) 16 | 17 | def advance(timeToAdd: Long) = manualClock().advance(timeToAdd) 18 | 19 | def waitTillTime(targetTime: Long): Long = manualClock().waitTillTime(targetTime) 20 | 21 | private def manualClock(): ManualClock = { 22 | ssc.scheduler.clock.asInstanceOf[ManualClock] 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /spark-streaming/README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark with Scala training course 2 | 3 | To run: 4 | SPARK_HOME/bin/spark-submit --conf spark.driver.userClassPathFirst=true --class "com.supergloo.SlackStreamingApp" --master spark://MASTER:7077 ./target/scala-2.11/spark-streaming-example-assembly-1.0.jar local[5] YOUR_SLACK_KEY output 5 | -------------------------------------------------------------------------------- /spark-streaming/build.ORIG: -------------------------------------------------------------------------------- 1 | name := "spark-streaming-example" 2 | 3 | version := "1.0" 4 | 5 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) 6 | 7 | scalaVersion := "2.11.8" 8 | 9 | resolvers += "jitpack" at "https://jitpack.io" 10 | 11 | libraryDependencies ++= Seq( 12 | "org.apache.spark" %% "spark-streaming" % "1.6.1" % "provided", 13 | "org.scalaj" %% "scalaj-http" % "2.3.0", 14 | "org.jfarcand" % "wcs" % "1.5" 15 | ) 16 | -------------------------------------------------------------------------------- /spark-streaming/build.sbt: -------------------------------------------------------------------------------- 1 | name := "spark-streaming-example" 2 | 3 | version := "1.0" 4 | 5 | assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false) 6 | 7 | excludedJars in assembly <<= (fullClasspath in assembly) map { cp => 8 | cp filter { 9 | i => i.data.getName == "slf4j-api-1.7.12.jar" 10 | } 11 | } 12 | 13 | scalaVersion := "2.11.8" 14 | 15 | resolvers += "jitpack" at "https://jitpack.io" 16 | 17 | libraryDependencies ++= Seq( 18 | "org.apache.spark" %% "spark-streaming" % "1.6.1" % "provided", 19 | // comment above line and uncomment the following to run in sbt 20 | // "org.apache.spark" %% "spark-streaming" % "1.6.1", 21 | "org.scalaj" %% "scalaj-http" % "2.3.0", 22 | "org.jfarcand" % "wcs" % "1.5" 23 | ) 24 | -------------------------------------------------------------------------------- /spark-streaming/project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.12.0") 2 | -------------------------------------------------------------------------------- /spark-streaming/project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.11 -------------------------------------------------------------------------------- /spark-streaming/src/main/scala/com/supergloo/SlackReceiver.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import java.net.URI 4 | 5 | import org.apache.spark.Logging 6 | import org.apache.spark.storage.StorageLevel 7 | import org.apache.spark.streaming.receiver.Receiver 8 | import org.jfarcand.wcs.{TextListener, WebSocket} 9 | 10 | import scala.util.parsing.json.JSON 11 | import scalaj.http.Http 12 | 13 | /** 14 | * Spark Streaming Example Slack Receiver from Slack 15 | */ 16 | class SlackReceiver(token: String) extends Receiver[String](StorageLevel.MEMORY_ONLY) 17 | with Runnable with Logging { 18 | 19 | private val slackUrl = "https://slack.com/api/rtm.start" 20 | 21 | @transient 22 | private var thread: Thread = _ 23 | 24 | override def onStart(): Unit = { 25 | thread = new Thread(this) 26 | thread.start() 27 | } 28 | 29 | override def onStop(): Unit = { 30 | thread.interrupt() 31 | } 32 | 33 | override def run(): Unit = { 34 | receive() 35 | } 36 | 37 | private def receive(): Unit = { 38 | val webSocket = WebSocket().open(webSocketUrl()) 39 | webSocket.listener(new TextListener { 40 | override def onMessage(message: String) { 41 | store(message) 42 | } 43 | }) 44 | } 45 | 46 | private def webSocketUrl(): String = { 47 | val response = Http(slackUrl).param("token", token).asString.body 48 | JSON.parseFull(response).get.asInstanceOf[Map[String, Any]].get("url").get.toString 49 | } 50 | 51 | } -------------------------------------------------------------------------------- /spark-streaming/src/main/scala/com/supergloo/SlackStreamingApp.scala: -------------------------------------------------------------------------------- 1 | package com.supergloo 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.streaming.{Seconds, StreamingContext} 5 | 6 | /** 7 | * Spark Streaming Example App 8 | */ 9 | object SlackStreamingApp { 10 | 11 | def main(args: Array[String]) { 12 | val conf = new SparkConf().setMaster(args(0)).setAppName("SlackStreaming") 13 | val ssc = new StreamingContext(conf, Seconds(5)) 14 | val stream = ssc.receiverStream(new SlackReceiver(args(1))) 15 | stream.print() 16 | if (args.length > 2) { 17 | stream.saveAsTextFiles(args(2)) 18 | } 19 | ssc.start() 20 | ssc.awaitTermination() 21 | } 22 | 23 | } --------------------------------------------------------------------------------