├── Chapter01
└── Chapter1.scala
├── Chapter02
└── Chapter2.scala
├── Chapter03
└── chapter3.scala
├── Chapter04
└── chapter4.scala
├── Chapter05
├── Chapter5.scala
├── SparkBookChapter5.zip
└── SparkBookChapter5
│ ├── SparkBookChapter5
│ ├── .DS_Store
│ └── Chapter5
│ │ ├── .DS_Store
│ │ └── code
│ │ ├── .DS_Store
│ │ └── p1
│ │ ├── .DS_Store
│ │ ├── project
│ │ ├── .DS_Store
│ │ └── target
│ │ │ ├── .DS_Store
│ │ │ ├── config-classes
│ │ │ ├── $54139fcdd8ccf3cf91c0$$anonfun$$sbtdef$1.class
│ │ │ ├── $54139fcdd8ccf3cf91c0$.class
│ │ │ ├── $54139fcdd8ccf3cf91c0.cache
│ │ │ ├── $54139fcdd8ccf3cf91c0.class
│ │ │ ├── $8f473ff6075c0d114f2b$$anonfun$$sbtdef$1.class
│ │ │ ├── $8f473ff6075c0d114f2b$.class
│ │ │ ├── $8f473ff6075c0d114f2b.cache
│ │ │ ├── $8f473ff6075c0d114f2b.class
│ │ │ ├── $ad7bd08a723d0037f87c$$anonfun$$sbtdef$1.class
│ │ │ ├── $ad7bd08a723d0037f87c$.class
│ │ │ ├── $ad7bd08a723d0037f87c.cache
│ │ │ ├── $ad7bd08a723d0037f87c.class
│ │ │ ├── $af6f748636a17add42c9$$anonfun$$sbtdef$1.class
│ │ │ ├── $af6f748636a17add42c9$.class
│ │ │ ├── $af6f748636a17add42c9.cache
│ │ │ ├── $af6f748636a17add42c9.class
│ │ │ ├── $b82be0445fee798245c6$$anonfun$$sbtdef$1.class
│ │ │ ├── $b82be0445fee798245c6$.class
│ │ │ ├── $b82be0445fee798245c6.cache
│ │ │ └── $b82be0445fee798245c6.class
│ │ │ ├── resolution-cache
│ │ │ ├── .DS_Store
│ │ │ ├── default
│ │ │ │ ├── .DS_Store
│ │ │ │ └── p1-build
│ │ │ │ │ ├── .DS_Store
│ │ │ │ │ └── scala_2.10
│ │ │ │ │ ├── .DS_Store
│ │ │ │ │ └── sbt_0.13
│ │ │ │ │ └── 0.1-SNAPSHOT
│ │ │ │ │ ├── resolved.xml.properties
│ │ │ │ │ └── resolved.xml.xml
│ │ │ └── reports
│ │ │ │ ├── default-p1-build-compile-internal.xml
│ │ │ │ ├── default-p1-build-compile.xml
│ │ │ │ ├── default-p1-build-docs.xml
│ │ │ │ ├── default-p1-build-optional.xml
│ │ │ │ ├── default-p1-build-plugin.xml
│ │ │ │ ├── default-p1-build-pom.xml
│ │ │ │ ├── default-p1-build-provided.xml
│ │ │ │ ├── default-p1-build-runtime-internal.xml
│ │ │ │ ├── default-p1-build-runtime.xml
│ │ │ │ ├── default-p1-build-scala-tool.xml
│ │ │ │ ├── default-p1-build-sources.xml
│ │ │ │ ├── default-p1-build-test-internal.xml
│ │ │ │ ├── default-p1-build-test.xml
│ │ │ │ ├── ivy-report.css
│ │ │ │ └── ivy-report.xsl
│ │ │ └── streams
│ │ │ ├── $global
│ │ │ ├── $global
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── out
│ │ │ ├── dependencyPositions
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── update_cache_2.10
│ │ │ │ │ ├── input_dsp
│ │ │ │ │ └── output_dsp
│ │ │ ├── ivyConfiguration
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── out
│ │ │ ├── ivySbt
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── out
│ │ │ ├── projectDescriptors
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── out
│ │ │ └── update
│ │ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ ├── out
│ │ │ │ └── update_cache_2.10
│ │ │ │ ├── inputs
│ │ │ │ └── output
│ │ │ ├── .DS_Store
│ │ │ ├── compile
│ │ │ ├── $global
│ │ │ │ └── $global
│ │ │ │ │ └── discoveredMainClasses
│ │ │ │ │ └── data
│ │ │ ├── compile
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── out
│ │ │ ├── compileIncremental
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ ├── export
│ │ │ │ │ └── out
│ │ │ ├── copyResources
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ ├── copy-resources
│ │ │ │ │ └── out
│ │ │ ├── dependencyClasspath
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ ├── exportedProducts
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ ├── externalDependencyClasspath
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ ├── internalDependencyClasspath
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ ├── managedClasspath
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ ├── unmanagedClasspath
│ │ │ │ └── $global
│ │ │ │ │ └── streams
│ │ │ │ │ └── export
│ │ │ └── unmanagedJars
│ │ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ └── runtime
│ │ │ ├── dependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── exportedProducts
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── externalDependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── fullClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── internalDependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── managedClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ ├── unmanagedClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ │ └── unmanagedJars
│ │ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── simple.sbt
│ │ ├── src
│ │ ├── .DS_Store
│ │ └── main
│ │ │ ├── .DS_Store
│ │ │ └── scala
│ │ │ ├── .DS_Store
│ │ │ ├── CustomReceiver.scala
│ │ │ ├── TFLArrivalPredictionsByLine.scala
│ │ │ ├── TFLCustomReceiver.scala
│ │ │ └── TFLStreamingApp.scala
│ │ └── target
│ │ ├── .DS_Store
│ │ ├── .history
│ │ ├── resolution-cache
│ │ ├── reports
│ │ │ ├── ivy-report.css
│ │ │ ├── ivy-report.xsl
│ │ │ ├── simple-project-simple-project_2.11-compile-internal.xml
│ │ │ ├── simple-project-simple-project_2.11-compile.xml
│ │ │ ├── simple-project-simple-project_2.11-docs.xml
│ │ │ ├── simple-project-simple-project_2.11-optional.xml
│ │ │ ├── simple-project-simple-project_2.11-plugin.xml
│ │ │ ├── simple-project-simple-project_2.11-pom.xml
│ │ │ ├── simple-project-simple-project_2.11-provided.xml
│ │ │ ├── simple-project-simple-project_2.11-runtime-internal.xml
│ │ │ ├── simple-project-simple-project_2.11-runtime.xml
│ │ │ ├── simple-project-simple-project_2.11-scala-tool.xml
│ │ │ ├── simple-project-simple-project_2.11-sources.xml
│ │ │ ├── simple-project-simple-project_2.11-test-internal.xml
│ │ │ ├── simple-project-simple-project_2.11-test.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-compile-internal.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-compile.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-docs.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-optional.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-plugin.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-pom.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-provided.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-runtime-internal.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-runtime.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-scala-tool.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-sources.xml
│ │ │ ├── spark-streaming-example-spark-streaming-example_2.11-test-internal.xml
│ │ │ └── spark-streaming-example-spark-streaming-example_2.11-test.xml
│ │ ├── simple-project
│ │ │ └── simple-project_2.11
│ │ │ │ └── 1.0
│ │ │ │ ├── resolved.xml.properties
│ │ │ │ └── resolved.xml.xml
│ │ └── spark-streaming-example
│ │ │ └── spark-streaming-example_2.11
│ │ │ └── 1.0
│ │ │ ├── resolved.xml.properties
│ │ │ └── resolved.xml.xml
│ │ ├── scala-2.11
│ │ ├── .DS_Store
│ │ ├── classes
│ │ │ ├── CustomReceiver$$anon$1.class
│ │ │ ├── CustomReceiver$$anonfun$1.class
│ │ │ ├── CustomReceiver$$anonfun$2.class
│ │ │ ├── CustomReceiver$$anonfun$3.class
│ │ │ ├── CustomReceiver$.class
│ │ │ ├── CustomReceiver.class
│ │ │ ├── TFLArrivalPredictionsByLine.class
│ │ │ ├── TFLCustomReceiver$$anon$1.class
│ │ │ ├── TFLCustomReceiver$.class
│ │ │ ├── TFLCustomReceiver.class
│ │ │ ├── TFLStreamingApp$.class
│ │ │ └── TFLStreamingApp.class
│ │ └── spark-streaming-example_2.11-1.0.jar
│ │ └── streams
│ │ ├── $global
│ │ ├── $global
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── dependencyPositions
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── update_cache_2.11
│ │ │ │ ├── input_dsp
│ │ │ │ └── output_dsp
│ │ ├── ivyConfiguration
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── ivySbt
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── projectDescriptors
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ └── update
│ │ │ └── $global
│ │ │ └── streams
│ │ │ ├── out
│ │ │ └── update_cache_2.11
│ │ │ ├── inputs
│ │ │ └── output
│ │ ├── compile
│ │ ├── $global
│ │ │ └── $global
│ │ │ │ └── discoveredMainClasses
│ │ │ │ └── data
│ │ ├── compile
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── compileIncremental
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ ├── export
│ │ │ │ └── out
│ │ ├── copyResources
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ ├── copy-resources
│ │ │ │ └── out
│ │ ├── dependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ ├── exportedProducts
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ ├── externalDependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ ├── incCompileSetup
│ │ │ └── $global
│ │ │ │ ├── .DS_Store
│ │ │ │ └── streams
│ │ │ │ └── inc_compile_2.11
│ │ ├── internalDependencyClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ ├── mainClass
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── managedClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ ├── packageBin
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ ├── inputs
│ │ │ │ ├── out
│ │ │ │ └── output
│ │ ├── run
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── out
│ │ ├── unmanagedClasspath
│ │ │ └── $global
│ │ │ │ └── streams
│ │ │ │ └── export
│ │ └── unmanagedJars
│ │ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ └── runtime
│ │ ├── dependencyClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── exportedProducts
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── externalDependencyClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── fullClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── internalDependencyClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── managedClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ ├── unmanagedClasspath
│ │ └── $global
│ │ │ └── streams
│ │ │ └── export
│ │ └── unmanagedJars
│ │ └── $global
│ │ └── streams
│ │ └── export
│ └── __MACOSX
│ └── SparkBookChapter5
│ ├── ._.DS_Store
│ └── Chapter5
│ ├── ._.DS_Store
│ └── code
│ ├── ._.DS_Store
│ └── p1
│ ├── ._.DS_Store
│ ├── ._simple.sbt
│ ├── project
│ ├── ._.DS_Store
│ └── target
│ │ ├── ._.DS_Store
│ │ ├── resolution-cache
│ │ ├── ._.DS_Store
│ │ └── default
│ │ │ ├── ._.DS_Store
│ │ │ └── p1-build
│ │ │ ├── ._.DS_Store
│ │ │ └── scala_2.10
│ │ │ └── ._.DS_Store
│ │ └── streams
│ │ └── ._.DS_Store
│ ├── src
│ ├── ._.DS_Store
│ └── main
│ │ ├── ._.DS_Store
│ │ └── scala
│ │ └── ._.DS_Store
│ └── target
│ ├── ._.DS_Store
│ ├── scala-2.11
│ └── ._.DS_Store
│ └── streams
│ └── compile
│ └── incCompileSetup
│ └── $global
│ └── ._.DS_Store
├── Chapter06
└── Chapter6.scala
├── Chapter07
├── Chapter7.scala
├── Chapter7
│ └── Chapter7
│ │ ├── .DS_Store
│ │ ├── .classpath
│ │ ├── .project
│ │ ├── .settings
│ │ └── org.eclipse.jdt.core.prefs
│ │ ├── bin
│ │ ├── Category.class
│ │ ├── Preprocess.class
│ │ ├── Product.class
│ │ ├── Review.class
│ │ └── ReviewMetaData.class
│ │ └── src
│ │ ├── Category.java
│ │ ├── Preprocess.java
│ │ ├── Product.java
│ │ ├── Review.java
│ │ └── ReviewMetaData.java
└── __MACOSX
│ └── Chapter7
│ └── ._.DS_Store
├── Chapter08
└── Chapter8.R
├── Chapter09
├── Chapter9.scala
└── Chapter9.zip
├── Chapter10
└── Chapter10.scala
├── Chapter11
└── Chapter11.scala
├── Chapter12
└── Chapter12.scala
├── LICENSE
└── README.md
/Chapter01/Chapter1.scala:
--------------------------------------------------------------------------------
1 | //Code for Chapter 1
2 | //To be executed in Spark 2.2 shell
3 |
4 | //Code for Introducing SparkSession section
5 | import org.apache.spark.sql.types._
6 | spark.conf.set("spark.executor.cores", "2")
7 | spark.conf.set("spark.executor.memory", "4g")
8 |
9 | val recordSchema = new StructType().add("sample", "long").add("cThick", "integer").add("uCSize", "integer").add("uCShape", "integer").add("mAdhes", "integer").add("sECSize", "integer").add("bNuc", "integer").add("bChrom", "integer").add("nNuc", "integer").add("mitosis", "integer").add("clas", "integer")
10 | //Replace directory for the input file with location of the file on your machine.
11 | val df = spark.read.format("csv").option("header", false).schema(recordSchema).load("file:///Users/aurobindosarkar/Downloads/breast-cancer-wisconsin.data")
12 | df.show()
13 |
14 | df.createOrReplaceTempView("cancerTable")
15 | val sqlDF = spark.sql("SELECT sample, bNuc from cancerTable")
16 | sqlDF.show()
17 |
18 | case class CancerClass(sample: Long, cThick: Int, uCSize: Int, uCShape: Int, mAdhes: Int, sECSize: Int, bNuc: Int, bChrom: Int, nNuc: Int, mitosis: Int, clas: Int)
19 | //Replace directory for the input file with location of the file on your machine.
20 | val cancerDS = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Documents/SparkBook/data/breast-cancer-wisconsin.data").map(_.split(",")).map(attributes => CancerClass(attributes(0).trim.toLong, attributes(1).trim.toInt, attributes(2).trim.toInt, attributes(3).trim.toInt, attributes(4).trim.toInt, attributes(5).trim.toInt, attributes(6).trim.toInt, attributes(7).trim.toInt, attributes(8).trim.toInt, attributes(9).trim.toInt, attributes(10).trim.toInt)).toDS()
21 | def binarize(s: Int): Int = s match {case 2 => 0 case 4 => 1 }
22 | spark.udf.register("udfValueToCategory", (arg: Int) => binarize(arg))
23 | val sqlUDF = spark.sql("SELECT *, udfValueToCategory(clas) from cancerTable")
24 | sqlUDF.show()
25 |
26 | spark.catalog.currentDatabase
27 | spark.catalog.isCached("cancerTable")
28 |
29 | spark.catalog.cacheTable("cancerTable")
30 | spark.catalog.isCached("cancerTable")
31 | spark.catalog.clearCache
32 | spark.catalog.listDatabases.show()
33 |
34 | spark.catalog.listDatabases.take(1)
35 | spark.catalog.listTables.show()
36 | spark.catalog.dropTempView("cancerTable")
37 | spark.catalog.listTables.show()
38 |
39 | //Code for Understanding Resilient Distributed Datasets (RDDs) section
40 | //Replace directory for the input file with location of the file on your machine.
41 | val cancerRDD = sc.textFile("file:///Users/aurobindosarkar/Downloads/breast-cancer-wisconsin.data", 4)
42 | cancerRDD.partitions.size
43 | import spark.implicits._
44 | val cancerDF = cancerRDD.toDF()
45 | import org.apache.spark.sql.Row
46 | def row(line: List[String]): Row = { Row(line(0).toLong, line(1).toInt, line(2).toInt, line(3).toInt, line(4).toInt, line(5).toInt, line(6).toInt, line(7).toInt, line(8).toInt, line(9).toInt, line(10).toInt) }
47 | val data = cancerRDD.map(_.split(",").to[List]).map(row)
48 | val cancerDF = spark.createDataFrame(data, recordSchema)
49 |
50 | //Code for Understanding DataFrames and Datasets section
51 | case class RestClass(name: String, street: String, city: String, phone: String, cuisine: String)
52 | //Replace directory for the input files with location of the files on your machine.
53 | val rest1DS = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Documents/SparkBook/data/zagats.csv").map(_.split(",")).map(attributes => RestClass(attributes(0).trim, attributes(1).trim, attributes(2).trim, attributes(3).trim, attributes(4).trim)).toDS()
54 | val rest2DS = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Documents/SparkBook/data/fodors.csv").map(_.split(",")).map(attributes => RestClass(attributes(0).trim, attributes(1).trim, attributes(2).trim, attributes(3).trim, attributes(4).trim)).toDS()
55 | def formatPhoneNo(s: String): String = s match {case s if s.contains("/") => s.replaceAll("/", "-").replaceAll("- ", "-").replaceAll("--", "-") case _ => s }
56 | val udfStandardizePhoneNos = udf[String, String]( x => formatPhoneNo(x) )
57 | val rest2DSM1 = rest2DS.withColumn("stdphone", udfStandardizePhoneNos(rest2DS.col("phone")))
58 | rest1DS.createOrReplaceTempView("rest1Table")
59 | rest2DSM1.createOrReplaceTempView("rest2Table")
60 | spark.sql("SELECT count(*) from rest1Table, rest2Table where rest1Table.phone = rest2Table.stdphone").show()
61 | val sqlDF = spark.sql("SELECT a.name, b.name, a.phone, b.stdphone from rest1Table a, rest2Table b where a.phone = b.stdphone")
62 | sqlUDF.show()
63 |
64 | //Code for Understanding Catalyst transformations section
65 | case class PinTrans(bidid: String, timestamp: String, ipinyouid: String, useragent: String, IP: String, region: String, city: String, adexchange: String, domain: String, url:String, urlid: String, slotid: String, slotwidth: String, slotheight: String, slotvisibility: String, slotformat: String, slotprice: String, creative: String, bidprice: String)
66 | case class PinRegion(region: String, regionName: String)
67 | //Replace directory for the input files with location of the files on your machine.
68 | val pintransDF = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Downloads/make-ipinyou-data-master/original-data/ipinyou.contest.dataset/training1st/bid.20130314.txt").map(_.split("\t")).map(attributes => PinTrans(attributes(0).trim, attributes(1).trim, attributes(2).trim, attributes(3).trim, attributes(4).trim, attributes(5).trim, attributes(6).trim, attributes(7).trim, attributes(8).trim, attributes(9).trim, attributes(10).trim, attributes(11).trim, attributes(12).trim, attributes(13).trim, attributes(14).trim, attributes(15).trim, attributes(16).trim, attributes(17).trim, attributes(18).trim)).toDF()
69 | val pinregionDF = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Downloads/make-ipinyou-data-master/original-data/ipinyou.contest.dataset/region.en.txt").map(_.split("\t")).map(attributes => PinRegion(attributes(0).trim, attributes(1).trim)).toDF()
70 |
71 | def benchmark(name: String)(f: => Unit) {
72 | val startTime = System.nanoTime
73 | f
74 | val endTime = System.nanoTime
75 | println(s"Time taken in $name: " + (endTime - startTime).toDouble / 1000000000 + " seconds")
76 | }
77 |
78 | spark.conf.set("spark.sql.codegen.wholeStage", false)
79 | benchmark("Spark 1.6") {
80 | pintransDF.join(pinregionDF, "region").count()
81 | }
82 | spark.conf.set("spark.sql.codegen.wholeStage", true)
83 | benchmark("Spark 2.2") { pintransDF.join(pinregionDF, "region").count()
84 | }
85 |
86 | pintransDF.join(pinregionDF, "region").selectExpr("count(*)").explain(true)
87 |
88 | //Code for Introducing Project Tungsten section
89 | pintransDF.join(pinregionDF, "region").selectExpr("count(*)").explain()
90 |
91 | //Code for Using Spark SQL for streaming applications section
92 | import org.apache.spark.sql.types._
93 | import org.apache.spark.sql.functions._
94 | import scala.concurrent.duration._
95 | import org.apache.spark.sql.streaming.ProcessingTime
96 | import org.apache.spark.sql.streaming.OutputMode.Complete
97 |
98 | val bidSchema = new StructType().add("bidid", StringType).add("timestamp", StringType).add("ipinyouid", StringType).add("useragent", StringType).add("IP", StringType).add("region", IntegerType).add("city", IntegerType).add("adexchange", StringType).add("domain", StringType).add("url:String", StringType).add("urlid: String", StringType).add("slotid: String", StringType).add("slotwidth", StringType).add("slotheight", StringType).add("slotvisibility", StringType).add("slotformat", StringType).add("slotprice", StringType).add("creative", StringType).add("bidprice", StringType)
99 | val streamingInputDF = spark.readStream.format("csv").schema(bidSchema).option("header", false).option("inferSchema", true).option("sep", "\t").option("maxFilesPerTrigger", 1).load("file:///Users/aurobindosarkar/Downloads/make-ipinyou-data-master/original-data/ipinyou.contest.dataset/bidfiles")
100 | val streamingCountsDF = streamingInputDF.groupBy($"city").count()
101 | val query = streamingCountsDF.writeStream.format("console").trigger(ProcessingTime(20.seconds)).queryName("counts").outputMode(Complete).start()
102 | spark.streams.active.foreach(println)
103 | //Execute the following stop() method after you have executed the code in the next section (otherwise you will not see results of the code in the next section)
104 | query.stop()
105 |
106 | //Code for Understanding Structured Streaming Internals section
107 | spark.streams.active(0).explain
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/Chapter02/Chapter2.scala:
--------------------------------------------------------------------------------
1 | //Code for Chapter 2
2 | //For some sections, please follow the sequence of execution in the book. For example, in the MySQL section - certain commands need to be executed on MySQL.
3 |
4 | // This file contains Scala code to be executed in Spark shell only.
5 | //Code for Using Spark with relational data section. Please follow the step-wise instructions in the book.
6 | //This sect
7 | import org.apache.spark.sql.types._
8 | import org.apache.spark.sql.Row
9 | import java.util.Properties
10 | val inFileRDD = sc.textFile("file:///Users/aurobindosarkar/Downloads/UCI Online Retail.txt")
11 | val allRowsRDD = inFileRDD.map(line =>line.split("\t").map(_.trim))
12 | val header = allRowsRDD.first
13 | val data = allRowsRDD.filter(_(0) != header(0))
14 | val fields = Seq(
15 | StructField("invoiceNo", StringType, true),
16 | StructField("stockCode", StringType, true),
17 | StructField("description", StringType, true),
18 | StructField("quantity", IntegerType, true),
19 | StructField("invoiceDate", StringType, true),
20 | StructField("unitPrice", DoubleType, true),
21 | StructField("customerID", StringType, true),
22 | StructField("country", StringType, true)
23 | )
24 | val schema = StructType(fields)
25 | val rowRDD = data.map(attributes => Row(attributes(0), attributes(1), attributes(2), attributes(3).toInt, attributes(4), attributes(5).toDouble, attributes(6), attributes(7)))
26 | val r1DF = spark.createDataFrame(rowRDD, schema)
27 | val ts = unix_timestamp($"invoiceDate","dd/MM/yyHH:mm").cast("timestamp")
28 | val r2DF = r1DF.withColumn("ts", ts)
29 | r2DF.show()
30 | r2DF.createOrReplaceTempView("retailTable")
31 | val r3DF = spark.sql("select * from retailTable where ts< '2011-12-01'")
32 | val r4DF = spark.sql("select * from retailTable where ts>= '2011-12-01'")
33 | val selectData = r4DF.select("invoiceNo", "stockCode", "description", "quantity", "unitPrice", "customerID", "country", "ts")
34 | val writeData = selectData.withColumnRenamed("ts", "invoiceDate")
35 | writeData.show()
36 |
37 | //Dependency on MySQL part.
38 | val dbUrl = "jdbc:mysql://localhost:3306/retailDB"
39 | val prop = new Properties()
40 | prop.setProperty("user", "retaildbuser")
41 | prop.setProperty("password", "mypass")
42 | writeData.write.mode("append").jdbc(dbUrl, "transactions", prop)
43 | //End of MySQL Dependency
44 |
45 | scala>val selectData = r3DF.select("invoiceNo", "stockCode", "description", "quantity", "unitPrice", "customerID", "country", "ts")
46 | scala>val writeData = selectData.withColumnRenamed("ts", "invoiceDate")
47 | scala>writeData.select("*").write.format("json").save("hdfs://localhost:9000/Users/r3DF")
48 |
49 | //Code for Using Spark with MongoDB section
50 | //Download the connector and then start Spark shell as shown below:
51 | //./bin/spark-shell --jars /Users/aurobindosarkar/Downloads/mongo-spark-connector_2.11-2.2.0-assembly.jar
52 | //This section has dependencies on certain steps to be done in MongoDB environment
53 | import org.apache.spark.sql.SQLContext
54 | import org.apache.spark.{SparkConf, SparkContext}
55 | import com.mongodb.spark.MongoSpark
56 | import com.mongodb.spark.config.{ReadConfig, WriteConfig}
57 | val readConfig = ReadConfig(Map("uri" -> "mongodb://localhost:27017/nycschoolsDB.schools?readPreference=primaryPreferred"))
58 | val writeConfig = WriteConfig(Map("uri" -> "mongodb://localhost:27017/nycschoolsDB.outCollection"))
59 | case class School(dbn: String, school_name: String, boro: String, building_code: String, phone_number: String, fax_number: String, grade_span_min: String, grade_span_max: String, expgrade_span_min: String, expgrade_span_max: String, bus: String, subway: String, primary_address_line_1: String, city: String, state_code: String, zip: String, website: String, total_students: String, campus_name: String, school_type: String, overview_paragraph: String, program_highlights: String, language_classes: String, advancedplacement_courses: String, online_ap_courses: String, online_language_courses: String, extracurricular_activities: String, psal_sports_boys: String, psal_sports_girls: String, psal_sports_coed: String, school_sports: String, partner_cbo: String, partner_hospital: String, partner_highered: String, partner_cultural: String, partner_nonprofit: String, partner_corporate: String, partner_financial: String, partner_other: String, addtl_info1: String, addtl_info2: String, start_time: String, end_time: String, se_services: String, ell_programs: String, school_accessibility_description: String, number_programs: String, priority01: String, priority02: String, priority03: String, priority04: String, priority05: String, priority06: String, priority07: String, priority08: String, priority09: String, priority10: String, Location_1: String)
60 | val schoolsDF = MongoSpark.load(sc, readConfig).toDF[School]
61 | schoolsDF.take(1).foreach(println)
62 |
63 | //Code for Using Spark with JSON data section
64 | val reviewsDF = spark.read.json("file:///Users/aurobindosarkar/Downloads/reviews_Electronics_5.json")
65 | reviewsDF.printSchema()
66 | reviewsDF.createOrReplaceTempView("reviewsTable")
67 | val selectedDF = spark.sql("SELECT asin, overall, reviewTime, reviewerID, reviewerName FROM reviewsTable WHERE overall >= 3")
68 | selectedDF.show()
69 | val selectedJSONArrayElementDF = reviewsDF.select($"asin", $"overall", $"helpful").where($"helpful".getItem(0) < 3)
70 | selectedJSONArrayElementDF.show()
71 |
72 | //Code for Using Spark with Avro files. You will need to shift to Spark 2.1 for this section due to a reported bug in the spark-avro connector.
73 | //Start Spark shell as shown below:
74 | //Aurobindos-MacBook-Pro-2:spark-2.1.0-bin-hadoop2.7 aurobindosarkar$ bin/spark-shell --jars /Users/aurobindosarkar/Downloads/spark-avro_2.11-3.2.0.jar
75 | import com.databricks.spark.avro._
76 | val reviewsDF = spark.read.json("file:///Users/aurobindosarkar/Downloads/reviews_Electronics_5.json")
77 | reviewsDF.count()
78 | reviewsDF.filter("overall < 3").coalesce(1).write.avro("file:///Users/aurobindosarkar/Downloads/amazon_reviews/avro")
79 | val reviewsAvroDF = spark.read.avro("file:///Users/aurobindosarkar/Downloads/amazon_reviews/avro/part-00000-c6b6b423-70d6-440f-acbe-0de65a6a7f2e.avro")
80 | reviewsAvroDF.count()
81 | reviewsAvroDF.select("asin", "helpful", "overall", "reviewTime", "reviewerID", "reviewerName").show(5)
82 | spark.conf.set("spark.sql.avro.compression.codec", "deflate")
83 | spark.conf.set("spark.sql.avro.deflate.level", "5")
84 | val reviewsAvroDF = spark.read.avro("file:////Users/aurobindosarkar/Downloads/amazon_reviews/avro/part-00000-c6b6b423-70d6-440f-acbe-0de65a6a7f2e.avro")
85 | reviewsAvroDF.write.partitionBy("overall").avro("file:////Users/aurobindosarkar/Downloads/amazon_reviews/avro/partitioned")
86 |
87 | //Using Spark with Parquet files
88 | reviewsDF.filter("overall < 3").coalesce(1).write.parquet("file:///Users/aurobindosarkar/Downloads/amazon_reviews/parquet")
89 | val reviewsParquetDF = spark.read.parquet("file:///Users/aurobindosarkar/Downloads/amazon_reviews/parquet/part-00000-3b512935-ec11-48fa-8720-e52a6a29416b.snappy.parquet")
90 | reviewsParquetDF.createOrReplaceTempView("reviewsTable")
91 | val reviews1RatingsDF = spark.sql("select asin, overall, reviewerID, reviewerName from reviewsTable where overall < 2")
92 | reviews1RatingsDF.show(5, false)
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
--------------------------------------------------------------------------------
/Chapter03/chapter3.scala:
--------------------------------------------------------------------------------
1 | //Code for Chapter 3: To be executed in Spark shell
2 |
3 | //Code for Using Spark SQL for basic data analysis section
4 | import org.apache.spark.sql.types._
5 | import spark.implicits._
6 |
7 | val age = StructField("age", DataTypes.IntegerType)
8 | val job = StructField("job", DataTypes.StringType)
9 | val marital = StructField("marital", DataTypes.StringType)
10 | val edu = StructField("edu", DataTypes.StringType)
11 | val credit_default = StructField("credit_default", DataTypes.StringType)
12 | val housing = StructField("housing", DataTypes.StringType)
13 | val loan = StructField("loan", DataTypes.StringType)
14 | val contact = StructField("contact", DataTypes.StringType)
15 | val month = StructField("month", DataTypes.StringType)
16 | val day = StructField("day", DataTypes.StringType)
17 | val dur = StructField("dur", DataTypes.DoubleType)
18 | val campaign = StructField("campaign", DataTypes.DoubleType)
19 | val pdays = StructField("pdays", DataTypes.DoubleType)
20 | val prev = StructField("prev", DataTypes.DoubleType)
21 | val pout = StructField("pout", DataTypes.StringType)
22 | val emp_var_rate = StructField("emp_var_rate", DataTypes.DoubleType)
23 | val cons_price_idx = StructField("cons_price_idx", DataTypes.DoubleType)
24 | val cons_conf_idx = StructField("cons_conf_idx", DataTypes.DoubleType)
25 | val euribor3m = StructField("euribor3m", DataTypes.DoubleType)
26 | val nr_employed = StructField("nr_employed", DataTypes.DoubleType)
27 | val deposit = StructField("deposit", DataTypes.StringType)
28 |
29 | val fields = Array(age, job, marital, edu, credit_default, housing, loan, contact, month, day, dur, campaign, pdays, prev, pout, emp_var_rate, cons_price_idx, cons_conf_idx, euribor3m, nr_employed, deposit)
30 | val schema = StructType(fields)
31 | val df = spark.read.schema(schema).option("sep", ";").option("header", true).csv("file:///Users/aurobindosarkar/Downloads/bank-additional/bank-additional-full.csv")
32 | df.count()
33 |
34 | case class Call(age: Int, job: String, marital: String, edu: String, credit_default: String, housing: String, loan: String, contact: String, month: String, day: String, dur: Double, campaign: Double, pdays: Double, prev: Double, pout: String, emp_var_rate: Double, cons_price_idx: Double, cons_conf_idx: Double, euribor3m: Double, nr_employed: Double, deposit: String)
35 | val ds = df.as[Call]
36 | ds.printSchema()
37 |
38 | //Code for Identifying Missing Data section
39 | val dfMissing = spark.read.schema(schema).option("sep", ";").option("header", true).csv("file:///Users/aurobindosarkar/Downloads/bank-additional/bank-additional-full-with-missing.csv")
40 | val dsMissing = dfMissing.as[Call]
41 | dsMissing.groupBy("marital").count().show()
42 | dsMissing.groupBy("job").count().show()
43 |
44 | //Code for Computing Basic Statistics section
45 | case class CallStats(age: Int, dur: Double, campaign: Double, prev: Double, deposit: String)
46 | val dsSubset = ds.select($"age", $"dur", $"campaign", $"prev", $"deposit")
47 | dsSubset.show(5)
48 | val dsCallStats = dsSubset.as[CallStats]
49 | dsSubset.describe().show()
50 | val cov = dsSubset.stat.cov("age","dur")
51 | println("age to call duration : Covariance = %.4f".format(cov))
52 | val corr = dsSubset.stat.corr("age","dur")
53 | println("age to call duration : Correlation = %.4f".format(corr))
54 |
55 | ds.stat.crosstab("age", "marital").orderBy("age_marital").show(10)
56 | val freq = df.stat.freqItems(Seq("edu"), 0.3)
57 | freq.collect()(0)
58 | val quantiles = df.stat.approxQuantile("age", Array(0.25,0.5,0.75),0.0)
59 | dsCallStats.cache()
60 | import org.apache.spark.sql.expressions.scalalang.typed.{count => typedCount, avg => typedAvg, sum => typedSum}
61 | (dsCallStats.groupByKey(callstats => callstats.deposit).agg(typedCount[CallStats](_.age).name("A"),typedAvg[CallStats](_.campaign).name("B"),typedAvg[CallStats](_.dur).name("C"),typedAvg[CallStats](_.prev).name("D")).withColumnRenamed("value", "E")).select($"E".name("TD Subscribed?"), $"A".name("Total Customers"), round($"B", 2).name("Avg calls(curr)"), round($"C", 2).name("Avg dur"), round($"D", 2).name("Avg calls(prev)")).show()
62 | (dsCallStats.groupByKey(callstats => callstats.age).agg(typedCount[CallStats](_.age).name("A"),typedAvg[CallStats](_.campaign).name("B"),typedAvg[CallStats](_.dur).name("C"),typedAvg[CallStats](_.prev).name("D")).withColumnRenamed("value", "E")).select($"E".name("Age"), $"A".name("Total Customers"), round($"B", 2).name("Avg calls(curr)"), round($"C", 2).name("Avg dur"), round($"D", 2).name("Avg calls(prev)")).orderBy($"age").show(5)
63 |
64 | //Code for Identifying Data Outliers section
65 | import org.apache.spark.mllib.linalg.Vectors
66 | import org.apache.spark.mllib.clustering.KMeans
67 | val vectors = df.rdd.map(r => Vectors.dense(r.getDouble(10),r.getDouble(11), r.getDouble(12), r.getDouble(13)))
68 | vectors.cache()
69 | val kMeansModel = KMeans.train(vectors, 2, 20)
70 | kMeansModel.clusterCenters.foreach(println)
71 |
72 | //Follow the chapter for code on the Zeppelin section
73 |
74 | //Code for Sampling with Dataset API section
75 | import scala.collection.immutable.Map
76 | val fractions = Map("unknown" -> .10, "divorced" -> .15, "married" -> 0.5, "single" -> .25)
77 | val dsStratifiedSample = ds.stat.sampleBy("marital", fractions, 36L)
78 | dsStratifiedSample.count()
79 | dsStratifiedSample.groupBy("marital").count().orderBy("marital").show()
80 | val dsSampleWithReplacement = ds.sample(true, .10)
81 | dsSampleWithReplacement.groupBy("marital").count().orderBy("marital").show()
82 |
83 | //Code for Sampling with RDD API section
84 | import org.apache.spark.mllib.linalg.Vector
85 | val rowsRDD = df.rdd.map(r => (r.getAs[String](2), List(r.getInt(0), r.getString(1), r.getString(2), r.getString(3), r.getString(4), r.getString(5), r.getString(6), r.getString(7), r.getString(8), r.getString(9), r.getDouble(10), r.getDouble(11), r.getDouble(12), r.getDouble(13), r.getString(14), r.getDouble(15), r.getDouble(16), r.getDouble(17), r.getDouble(18), r.getDouble(19), r.getString(20))))
86 | rowsRDD.take(2).foreach(println)
87 | val fractions = Map("unknown" -> .10, "divorced" -> .15, "married" -> 0.5, "single" -> .25)
88 | val rowsSampleRDD = rowsRDD.sampleByKey(true, fractions, 1)
89 | val rowsSampleRDDExact = rowsRDD.sampleByKeyExact(true, fractions, 1)
90 | println(rowsRDD.countByKey)
91 | println(rowsSampleRDD.countByKey)
92 | println(rowsSampleRDDExact.countByKey)
93 |
94 | //Code for Using Spark SQL for creating pivot tables
95 | val sourceDF = df.select($"job", $"marital", $"edu", $"housing", $"loan", $"contact", $"month", $"day", $"dur", $"campaign", $"pdays", $"prev", $"pout", $"deposit")
96 | sourceDF.groupBy("marital").pivot("housing").agg(count("housing")).sort("marital").show()
97 | sourceDF.groupBy("job").pivot("marital", Seq("unknown", "divorced", "married", "single")).agg(round(sum("campaign"), 2), round(avg("campaign"), 2)).sort("job").toDF("Job", "U-Tot", "U-Avg", "D-Tot", "D-Avg", "M-Tot", "M-Avg", "S-Tot", "S-Avg").show()
98 | sourceDF.groupBy("job").pivot("marital", Seq("unknown", "divorced", "married", "single")).agg(round(sum("dur"), 2), round(avg("dur"), 2)).sort("job").toDF("Job", "U-Tot", "U-Avg", "D-Tot", "D-Avg", "M-Tot", "M-Avg", "S-Tot", "S-Avg").show()
99 | sourceDF.groupBy("job").pivot("marital", Seq("divorced", "married")).agg(round(avg("dur"), 2)).sort("job").show()
100 | sourceDF.groupBy("job", "housing").pivot("marital", Seq("divorced", "married")).agg(round(avg("dur"), 2)).sort("job").show
101 |
102 | import org.apache.spark.sql._
103 | val saveDF = sourceDF.groupBy("deposit").pivot("month", Seq("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")).agg(count("deposit")).sort("deposit").na.fill(0)
104 | val writer: DataFrameWriter[Row] = saveDF.write
105 | writer.format("csv").mode("overwrite").save("file:///Users/aurobindosarkar/Downloads/saveDF")
106 | val dataRDD = sc.textFile("file:///Users/aurobindosarkar/Downloads/saveDF/*.csv").map(_.split(","))
107 | val labels = List("deposit", "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec")
108 | val labelQ1 = List("jan", "feb", "mar")
109 | val labelQ2 = List("apr", "may", "jun")
110 | val labelQ3 = List("jul", "aug", "sep")
111 | val labelQ4 = List("oct", "nov", "dec")
112 | val indexQ1 = labelQ1.map(x => labels.indexOf(x))
113 | val indexQ2 = labelQ2.map(x => labels.indexOf(x))
114 | val indexQ3 = labelQ3.map(x => labels.indexOf(x))
115 | val indexQ4 = labelQ4.map(x => labels.indexOf(x))
116 | dataRDD.map(x => indexQ1.map(i => x(i).toDouble).sum).collect
117 | dataRDD.map(x => indexQ2.map(i => x(i).toDouble).sum).collect
118 | dataRDD.map(x => indexQ3.map(i => x(i).toDouble).sum).collect
119 | dataRDD.map(x => indexQ4.map(i => x(i).toDouble).sum).collect
120 |
121 |
122 |
123 |
124 |
125 |
--------------------------------------------------------------------------------
/Chapter05/Chapter5.scala:
--------------------------------------------------------------------------------
1 | //Code for Chapter 5
2 | //Code for Building Spark streaming applications section
3 | import org.apache.spark.sql.types._
4 | import org.apache.spark.sql.functions._
5 | import scala.concurrent.duration._
6 | import org.apache.spark.sql.streaming.ProcessingTime
7 | import org.apache.spark.sql.streaming.OutputMode.Complete
8 | import spark.implicits._
9 |
10 | val bidSchema = new StructType().add("bidid", StringType).add("timestamp", StringType).add("ipinyouid", StringType).add("useragent", StringType).add("IP", StringType).add("region", IntegerType).add("cityID", IntegerType).add("adexchange", StringType).add("domain", StringType).add("turl", StringType).add("urlid", StringType).add("slotid", StringType).add("slotwidth", StringType).add("slotheight", StringType).add("slotvisibility", StringType).add("slotformat", StringType).add("slotprice", StringType).add("creative", StringType).add("bidprice", StringType)
11 | val streamingInputDF = spark.readStream.format("csv").schema(bidSchema).option("header", false).option("inferSchema", true).option("sep", "\t").option("maxFilesPerTrigger", 1).load("file:///Users/aurobindosarkar/Downloads/make-ipinyou-data-master/original-data/ipinyou.contest.dataset/bidfiles")
12 | streamingInputDF.printSchema()
13 |
14 | //Code for Implementing sliding window-based functionality section
15 | val ts = unix_timestamp($"timestamp", "yyyyMMddHHmmssSSS").cast("timestamp")
16 | val streamingCityTimeDF = streamingInputDF.withColumn("ts", ts).select($"cityID", $"ts")
17 | //Wait for the output show on the screen after the next statement
18 | val windowedCounts = streamingCityTimeDF.groupBy(window($"ts", "10 minutes", "5 minutes"), $"cityID").count().writeStream.outputMode("complete").format("console").start()
19 |
20 | //Code for Joining a streaming dataset with a static dataset section
21 | val citySchema = new StructType().add("cityID", StringType).add("cityName", StringType)
22 | val staticDF = spark.read.format("csv").schema(citySchema).option("header", false).option("inferSchema", true).option("sep", "\t").load("file:///Users/aurobindosarkar/Downloads/make-ipinyou-data-master/original-data/ipinyou.contest.dataset/city.en.txt")
23 | val joinedDF = streamingCityTimeDF.join(staticDF, "cityID")
24 | //Wait for the output show on the screen after the next statement
25 | val windowedCityCounts = joinedDF.groupBy(window($"ts", "10 minutes", "5 minutes"), $"cityName").count().writeStream.outputMode("complete").format("console").start()
26 | val streamingCityNameBidsTimeDF = streamingInputDF.withColumn("ts", ts).select($"ts", $"bidid", $"cityID", $"bidprice", $"slotprice").join(staticDF, "cityID")
27 | //Wait for the output show on the screen after the next statement
28 | val cityBids = streamingCityNameBidsTimeDF.select($"ts", $"bidid", $"bidprice", $"slotprice", $"cityName").writeStream.outputMode("append").format("console").start()
29 |
30 |
31 | //Code for Using the Dataset API in Structured Streaming section
32 | case class Bid(bidid: String, timestamp: String, ipinyouid: String, useragent: String, IP: String, region: Integer, cityID: Integer, adexchange: String, domain: String, turl: String, urlid: String, slotid: String, slotwidth: String, slotheight: String, slotvisibility: String, slotformat: String, slotprice: String, creative: String, bidprice: String)
33 | val ds = streamingInputDF.as[Bid]
34 |
35 | //Code for Using the Foreach Sink for arbitrary computations on output section
36 | import org.apache.spark.sql.ForeachWriter
37 | val writer = new ForeachWriter[String] {
38 | override def open(partitionId: Long, version: Long) = true
39 | override def process(value: String) = println(value)
40 | override def close(errorOrNull: Throwable) = {}
41 | }
42 | val dsForeach = ds.filter(_.adexchange == "3").map(_.useragent).writeStream.foreach(writer).start()
43 |
44 | //Code for Using the Memory Sink to save output to a table section
45 | val aggAdexchangeDF = streamingInputDF.groupBy($"adexchange").count()
46 | //Wait for the output show on the screen after the next statement
47 | val aggQuery = aggAdexchangeDF.writeStream.queryName("aggregateTable").outputMode("complete").format("memory").start()
48 | spark.sql("select * from aggregateTable").show()
49 |
50 | //Code for Using the File Sink to save output to a partitioned table section
51 | val cityBidsParquet = streamingCityNameBidsTimeDF.select($"bidid", $"bidprice", $"slotprice", $"cityName").writeStream.outputMode("append").format("parquet").option("path", "hdfs://localhost:9000/pout").option("checkpointLocation", "hdfs://localhost:9000/poutcp").start()
52 |
53 | //Code for Monitoring streaming queries section
54 | spark.streams.active.foreach(x => println("ID:"+ x.id + " Run ID:"+ x.runId + " Status: "+ x.status))
55 |
56 | // get the unique identifier of the running query that persists across restarts from checkpoint data
57 | windowedCounts.id
58 | // get the unique id of this run of the query, which will be generated at every start/restart
59 | windowedCounts.runId
60 | // the exception if the query has been terminated with error
61 | windowedCounts.exception
62 | // the most recent progress update of this streaming query
63 | windowedCounts.lastProgress
64 |
65 | windowedCounts.stop()
66 |
67 | //Code for Using Kafka with Spark Structured Streaming
68 | //Refer book for steps to be executed for Kafka and ZooKeeper
69 | //Proper sequence of steps need to be followed for execution of code in this section
70 | val ds1 = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9092").option("subscribe", "test").load().selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").as[(String, String)]
71 |
72 | val query = ds1.writeStream.outputMode("append").format("console").start()
73 | val ds2 = spark.readStream.format("kafka").option("kafka.bootstrap.servers", "localhost:9092").option("subscribe", "connect-test").load().selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)").as[(String, String)]
74 | val query = ds2.writeStream.outputMode("append").format("console").start()
75 |
76 | //Follow the instructions in the book to compile & execute code for Writing a receiver for a custom data source section
77 |
78 |
79 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5.zip
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0$$anonfun$$sbtdef$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0$$anonfun$$sbtdef$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0.cache:
--------------------------------------------------------------------------------
1 | sbt.internals.DslEntry
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$54139fcdd8ccf3cf91c0.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b$$anonfun$$sbtdef$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b$$anonfun$$sbtdef$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b.cache:
--------------------------------------------------------------------------------
1 | sbt.internals.DslEntry
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$8f473ff6075c0d114f2b.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c$$anonfun$$sbtdef$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c$$anonfun$$sbtdef$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c.cache:
--------------------------------------------------------------------------------
1 | sbt.internals.DslEntry
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$ad7bd08a723d0037f87c.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9$$anonfun$$sbtdef$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9$$anonfun$$sbtdef$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9.cache:
--------------------------------------------------------------------------------
1 | sbt.internals.DslEntry
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$af6f748636a17add42c9.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6$$anonfun$$sbtdef$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6$$anonfun$$sbtdef$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6.cache:
--------------------------------------------------------------------------------
1 | sbt.internals.DslEntry
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/config-classes/$b82be0445fee798245c6.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/scala_2.10/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/scala_2.10/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #default#p1-build;0.1-SNAPSHOT resolved revisions
2 | #Thu Dec 15 11:32:33 IST 2016
3 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+revision\:\#@\#\:+2.10.5\:\#@\#\:=2.10.5 ? 2.10.5 null
4 | +organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+revision\:\#@\#\:+0.13.9\:\#@\#\:=0.13.9 release 0.13.9 null
5 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+revision\:\#@\#\:+2.10.5\:\#@\#\:=2.10.5 release 2.10.5 null
6 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
11 | p1-build
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-compile.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-docs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-optional.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-runtime-internal.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-runtime.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-scala-tool.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-sources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/default-p1-build-test.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/reports/ivy-report.css:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | *
17 | */
18 |
19 | body {
20 | font-family:"Trebuchet MS",Verdana,Geneva,Arial,Helvetica,sans-serif;
21 | font-size:small;
22 | }
23 |
24 | div#logo {
25 | float: right;
26 | padding-left: 10px;
27 | padding-bottom: 10px;
28 | background: white;
29 | text-align: center;
30 | }
31 |
32 | #logo img {
33 | border: 0;
34 | }
35 |
36 | div#date {
37 | font-style: italic;
38 | padding-left: 60px;
39 | padding-bottom: 40px;
40 | }
41 |
42 |
43 | h1 {
44 | margin-bottom:2px;
45 |
46 | border-color:#7A9437;
47 | border-style:solid;
48 | border-width:0 0 3px 0;
49 | }
50 |
51 | span#module {
52 | color:#7A9437;
53 | text-decoration:none;
54 | }
55 |
56 | span#organisation {
57 | color:black;
58 | text-decoration:none;
59 | }
60 |
61 | #confmenu {
62 | color: #000;
63 | border-bottom: 2px solid black;
64 | margin: 12px 0px 0px 0px;
65 | padding: 0px;
66 | z-index: 1;
67 | padding-left: 10px
68 | }
69 |
70 | #confmenu li {
71 | display: inline;
72 | overflow: hidden;
73 | list-style-type: none;
74 | }
75 |
76 | #confmenu a, a.active {
77 | color: #DEDECF;
78 | background: #898B5E;
79 | font: bold 1em "Trebuchet MS", Arial, sans-serif;
80 | border: 2px solid black;
81 | padding: 2px 5px 0px 5px;
82 | text-decoration: none;
83 | }
84 |
85 | /*
86 | background: #ABAD85 #CED4BD
87 | background: #DEE4CD
88 | */
89 |
90 | #confmenu a.active {
91 | color: #7A9437;
92 | background: #DEE4CD;
93 | border-bottom: 3px solid #DEE4CD;
94 | }
95 |
96 | #confmenu a:hover {
97 | color: #fff;
98 | background: #ADC09F;
99 | }
100 |
101 | #confmenu a:visited {
102 | color: #DEDECF;
103 | }
104 |
105 | #confmenu a.active:visited {
106 | color: #7A9437;
107 | }
108 |
109 | #confmenu a.active:hover {
110 | background: #DEE4CD;
111 | color: #DEDECF;
112 | }
113 |
114 | #content {
115 | background: #DEE4CD;
116 | padding: 20px;
117 | border: 2px solid black;
118 | border-top: none;
119 | z-index: 2;
120 | }
121 |
122 | #content a {
123 | text-decoration: none;
124 | color: #E8E9BE;
125 | }
126 |
127 | #content a:hover {
128 | background: #898B5E;
129 | }
130 |
131 |
132 | h2 {
133 | margin-bottom:2px;
134 | font-size:medium;
135 |
136 | border-color:#7A9437;
137 | border-style:solid;
138 | border-width:0 0 2px 0;
139 | }
140 |
141 | h3 {
142 | margin-top:30px;
143 | margin-bottom:2px;
144 | padding: 5 5 5 0;
145 | font-size: 24px;
146 | border-style:solid;
147 | border-width:0 0 2px 0;
148 | }
149 |
150 | h4 {
151 | margin-bottom:2px;
152 | margin-top:2px;
153 | font-size:medium;
154 |
155 | border-color:#7A9437;
156 | border-style:dashed;
157 | border-width:0 0 1px 0;
158 | }
159 |
160 | h5 {
161 | margin-bottom:2px;
162 | margin-top:2px;
163 | margin-left:20px;
164 | font-size:medium;
165 | }
166 |
167 | span.resolved {
168 | padding-left: 15px;
169 | font-weight: 500;
170 | font-size: small;
171 | }
172 |
173 |
174 | #content table {
175 | border-collapse:collapse;
176 | width:90%;
177 | margin:auto;
178 | margin-top: 5px;
179 | }
180 | #content thead {
181 | background-color:#CED4BD;
182 | border:1px solid #7A9437;
183 | }
184 | #content tbody {
185 | border-collapse:collapse;
186 | background-color:#FFFFFF;
187 | border:1px solid #7A9437;
188 | }
189 |
190 | #content th {
191 | font-family:monospace;
192 | border:1px solid #7A9437;
193 | padding:5px;
194 | }
195 |
196 | #content td {
197 | border:1px dotted #7A9437;
198 | padding:0 3 0 3;
199 | }
200 |
201 | #content table a {
202 | color:#7A9437;
203 | text-decoration:none;
204 | }
205 |
206 | #content table a:hover {
207 | background-color:#CED4BD;
208 | color:#7A9437;
209 | }
210 |
211 |
212 |
213 | table.deps {
214 | border-collapse:collapse;
215 | width:90%;
216 | margin:auto;
217 | margin-top: 5px;
218 | }
219 |
220 | table.deps thead {
221 | background-color:#CED4BD;
222 | border:1px solid #7A9437;
223 | }
224 | table.deps tbody {
225 | border-collapse:collapse;
226 | background-color:#FFFFFF;
227 | border:1px solid #7A9437;
228 | }
229 |
230 | table.deps th {
231 | font-family:monospace;
232 | border:1px solid #7A9437;
233 | padding:2;
234 | }
235 |
236 | table.deps td {
237 | border:1px dotted #7A9437;
238 | padding:0 3 0 3;
239 | }
240 |
241 |
242 |
243 |
244 |
245 | table.header {
246 | border:0;
247 | width:90%;
248 | margin:auto;
249 | margin-top: 5px;
250 | }
251 |
252 | table.header thead {
253 | border:0;
254 | }
255 | table.header tbody {
256 | border:0;
257 | }
258 | table.header tr {
259 | padding:0px;
260 | border:0;
261 | }
262 | table.header td {
263 | padding:0 3 0 3;
264 | border:0;
265 | }
266 |
267 | td.title {
268 | width:150px;
269 | margin-right:15px;
270 |
271 | font-size:small;
272 | font-weight:700;
273 | }
274 |
275 | td.title:first-letter {
276 | color:#7A9437;
277 | background-color:transparent;
278 | }
279 |
280 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/$global/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/$global/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/input_dsp:
--------------------------------------------------------------------------------
1 | org.scala-lang
scala-library 2.10.5 provided
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/ivyConfiguration/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0mOther repositories:[0m
2 | [0m[[0mdebug[0m] [0mDefault repositories:[0m
3 | [0m[[0mdebug[0m] [0mUsing inline dependencies specified in Scala.[0m
4 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/ivySbt/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/ivySbt/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/projectDescriptors/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/projectDescriptors/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/update/$global/streams/update_cache_2.10/inputs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/update/$global/streams/update_cache_2.10/inputs
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/update/$global/streams/update_cache_2.10/output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/$global/update/$global/streams/update_cache_2.10/output
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/$global/$global/discoveredMainClasses/data:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/compile/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/compile/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/compileIncremental/$global/streams/export:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/compileIncremental/$global/streams/export
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/compileIncremental/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0m[0m
2 | [0m[[0mdebug[0m] [0mInitial source changes: [0m
3 | [0m[[0mdebug[0m] [0m removed:Set()[0m
4 | [0m[[0mdebug[0m] [0m added: Set()[0m
5 | [0m[[0mdebug[0m] [0m modified: Set()[0m
6 | [0m[[0mdebug[0m] [0mInvalidated products: Set()[0m
7 | [0m[[0mdebug[0m] [0mExternal API changes: API Changes: Set()[0m
8 | [0m[[0mdebug[0m] [0mModified binary dependencies: Set()[0m
9 | [0m[[0mdebug[0m] [0mInitial directly invalidated sources: Set()[0m
10 | [0m[[0mdebug[0m] [0m[0m
11 | [0m[[0mdebug[0m] [0mSources indirectly invalidated by:[0m
12 | [0m[[0mdebug[0m] [0m product: Set()[0m
13 | [0m[[0mdebug[0m] [0m binary dep: Set()[0m
14 | [0m[[0mdebug[0m] [0m external source: Set()[0m
15 | [0m[[0mdebug[0m] [0mAll initially invalidated sources: Set()[0m
16 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/copyResources/$global/streams/copy-resources:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/copyResources/$global/streams/copy-resources
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/copyResources/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0mCopy resource mappings: [0m
2 | [0m[[0mdebug[0m] [0m [0m
3 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/dependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-library.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/sbt/jars/sbt-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main/jars/main-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/actions/jars/actions-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classpath/jars/classpath-0.13.9.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-compiler.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-reflect.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/interface/jars/interface-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/io/jars/io-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/control/jars/control-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/launcher-interface/jars/launcher-interface-1.0.0-M1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/completion/jars/completion-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/collections/jars/collections-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/jline/jline/jars/jline-2.11.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/api/jars/api-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-integration/jars/compiler-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/incremental-compiler/jars/incremental-compiler-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logging/jars/logging-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/process/jars/process-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/relation/jars/relation-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compile/jars/compile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classfile/jars/classfile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/persist/jars/persist-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-tools.sbinary/sbinary_2.10/jars/sbinary_2.10-0.4.2.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-ivy-integration/jars/compiler-ivy-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/ivy/jars/ivy-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cross/jars/cross-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt.ivy/ivy/jars/ivy-2.3.0-sbt-c5d1b95fdcc1e1007740ffbecf4eb07abc51ec93.jar:/Users/aurobindosarkar/.ivy2/cache/com.jcraft/jsch/jars/jsch-0.1.46.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/serialization_2.10/jars/serialization_2.10-0.1.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-lang.modules/scala-pickling_2.10/jars/scala-pickling_2.10-0.10.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scalamacros/quasiquotes_2.10/jars/quasiquotes_2.10-2.0.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-core_2.10/jars/json4s-core_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-ast_2.10/jars/json4s-ast_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/com.thoughtworks.paranamer/paranamer/jars/paranamer-2.6.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/jawn-parser_2.10/jars/jawn-parser_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/json4s-support_2.10/jars/json4s-support_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/run/jars/run-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/task-system/jars/task-system-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tasks/jars/tasks-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tracking/jars/tracking-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cache/jars/cache-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/testing/jars/testing-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-agent/jars/test-agent-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-interface/jars/test-interface-1.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main-settings/jars/main-settings-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/apply-macro/jars/apply-macro-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/command/jars/command-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logic/jars/logic-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-src-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_8_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_3/jars/compiler-interface-bin-0.13.9.jar
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/project/target/scala-2.10/sbt-0.13/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/externalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-library.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/sbt/jars/sbt-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main/jars/main-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/actions/jars/actions-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classpath/jars/classpath-0.13.9.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-compiler.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-reflect.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/interface/jars/interface-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/io/jars/io-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/control/jars/control-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/launcher-interface/jars/launcher-interface-1.0.0-M1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/completion/jars/completion-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/collections/jars/collections-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/jline/jline/jars/jline-2.11.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/api/jars/api-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-integration/jars/compiler-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/incremental-compiler/jars/incremental-compiler-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logging/jars/logging-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/process/jars/process-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/relation/jars/relation-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compile/jars/compile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classfile/jars/classfile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/persist/jars/persist-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-tools.sbinary/sbinary_2.10/jars/sbinary_2.10-0.4.2.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-ivy-integration/jars/compiler-ivy-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/ivy/jars/ivy-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cross/jars/cross-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt.ivy/ivy/jars/ivy-2.3.0-sbt-c5d1b95fdcc1e1007740ffbecf4eb07abc51ec93.jar:/Users/aurobindosarkar/.ivy2/cache/com.jcraft/jsch/jars/jsch-0.1.46.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/serialization_2.10/jars/serialization_2.10-0.1.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-lang.modules/scala-pickling_2.10/jars/scala-pickling_2.10-0.10.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scalamacros/quasiquotes_2.10/jars/quasiquotes_2.10-2.0.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-core_2.10/jars/json4s-core_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-ast_2.10/jars/json4s-ast_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/com.thoughtworks.paranamer/paranamer/jars/paranamer-2.6.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/jawn-parser_2.10/jars/jawn-parser_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/json4s-support_2.10/jars/json4s-support_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/run/jars/run-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/task-system/jars/task-system-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tasks/jars/tasks-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tracking/jars/tracking-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cache/jars/cache-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/testing/jars/testing-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-agent/jars/test-agent-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-interface/jars/test-interface-1.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main-settings/jars/main-settings-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/apply-macro/jars/apply-macro-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/command/jars/command-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logic/jars/logic-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-src-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_8_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_3/jars/compiler-interface-bin-0.13.9.jar
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/managedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-library.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/sbt/jars/sbt-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main/jars/main-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/actions/jars/actions-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classpath/jars/classpath-0.13.9.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-compiler.jar:/Users/aurobindosarkar/.sbt/boot/scala-2.10.5/lib/scala-reflect.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/interface/jars/interface-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/io/jars/io-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/control/jars/control-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/launcher-interface/jars/launcher-interface-1.0.0-M1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/completion/jars/completion-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/collections/jars/collections-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/jline/jline/jars/jline-2.11.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/api/jars/api-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-integration/jars/compiler-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/incremental-compiler/jars/incremental-compiler-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logging/jars/logging-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/process/jars/process-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/relation/jars/relation-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compile/jars/compile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/classfile/jars/classfile-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/persist/jars/persist-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-tools.sbinary/sbinary_2.10/jars/sbinary_2.10-0.4.2.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-ivy-integration/jars/compiler-ivy-integration-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/ivy/jars/ivy-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cross/jars/cross-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt.ivy/ivy/jars/ivy-2.3.0-sbt-c5d1b95fdcc1e1007740ffbecf4eb07abc51ec93.jar:/Users/aurobindosarkar/.ivy2/cache/com.jcraft/jsch/jars/jsch-0.1.46.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/serialization_2.10/jars/serialization_2.10-0.1.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-lang.modules/scala-pickling_2.10/jars/scala-pickling_2.10-0.10.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scalamacros/quasiquotes_2.10/jars/quasiquotes_2.10-2.0.1.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-core_2.10/jars/json4s-core_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/org.json4s/json4s-ast_2.10/jars/json4s-ast_2.10-3.2.10.jar:/Users/aurobindosarkar/.ivy2/cache/com.thoughtworks.paranamer/paranamer/jars/paranamer-2.6.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/jawn-parser_2.10/jars/jawn-parser_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.spire-math/json4s-support_2.10/jars/json4s-support_2.10-0.6.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/run/jars/run-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/task-system/jars/task-system-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tasks/jars/tasks-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/tracking/jars/tracking-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/cache/jars/cache-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/testing/jars/testing-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-agent/jars/test-agent-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/test-interface/jars/test-interface-1.0.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/main-settings/jars/main-settings-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/apply-macro/jars/apply-macro-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/command/jars/command-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/logic/jars/logic-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-src-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/compiler-interface/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_8_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_2/jars/compiler-interface-bin-0.13.9.jar:/Users/aurobindosarkar/.ivy2/cache/org.scala-sbt/precompiled-2_9_3/jars/compiler-interface-bin-0.13.9.jar
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/compile/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/dependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/project/target/scala-2.10/sbt-0.13/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/project/target/scala-2.10/sbt-0.13/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/externalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/fullClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/project/target/scala-2.10/sbt-0.13/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/project/target/scala-2.10/sbt-0.13/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/managedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/project/target/streams/runtime/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/simple.sbt:
--------------------------------------------------------------------------------
1 | name := "spark-streaming-example"
2 |
3 | version := "1.0"
4 |
5 | scalaVersion := "2.11.7"
6 |
7 | resolvers += "jitpack" at "https://jitpack.io"
8 |
9 | libraryDependencies ++= Seq("org.apache.spark" %% "spark-core" % "2.2.0",
10 | "org.apache.spark" %% "spark-streaming" % "2.2.0",
11 | "org.apache.httpcomponents" % "httpclient" % "4.5.2",
12 | "org.scalaj" %% "scalaj-http" % "2.2.1",
13 | "org.jfarcand" % "wcs" % "1.5")
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/CustomReceiver.scala:
--------------------------------------------------------------------------------
1 | import java.io.{BufferedReader, InputStreamReader}
2 | import java.net.Socket
3 | import java.nio.charset.StandardCharsets
4 |
5 | import org.apache.spark.SparkConf
6 | import org.apache.spark.internal.Logging
7 | import org.apache.spark.storage.StorageLevel
8 | import org.apache.spark.streaming.{Seconds, StreamingContext}
9 | import org.apache.spark.streaming.receiver.Receiver
10 |
11 | /**
12 | * Custom Receiver that receives data over a socket. Received bytes are interpreted as
13 | * text and \n delimited lines are considered as records. They are then counted and printed.
14 | *
15 | * To run this on your local machine, you need to first run a Netcat server
16 | * `$ nc -lk 9999`
17 | * and then run the example
18 | * `$ bin/run-example org.apache.spark.examples.streaming.CustomReceiver localhost 9999`
19 | */
20 | object CustomReceiver {
21 | def main(args: Array[String]) {
22 | if (args.length < 2) {
23 | System.err.println("Usage: CustomReceiver ")
24 | System.exit(1)
25 | }
26 |
27 | //StreamingExamples.setStreamingLogLevels()
28 |
29 | // Create the context with a 1 second batch size
30 | val sparkConf = new SparkConf().setAppName("CustomReceiver")
31 | val ssc = new StreamingContext(sparkConf, Seconds(1))
32 |
33 | // Create an input stream with the custom receiver on target ip:port and count the
34 | // words in input stream of \n delimited text (eg. generated by 'nc')
35 | val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
36 | val words = lines.flatMap(_.split(" "))
37 | val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
38 | wordCounts.print()
39 | ssc.start()
40 | ssc.awaitTermination()
41 | }
42 | }
43 |
44 |
45 | class CustomReceiver(host: String, port: Int)
46 | extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) {
47 |
48 | def onStart() {
49 | // Start the thread that receives data over a connection
50 | new Thread("Socket Receiver") {
51 | override def run() { receive() }
52 | }.start()
53 | }
54 |
55 | def onStop() {
56 | // There is nothing much to do as the thread calling receive()
57 | // is designed to stop by itself isStopped() returns false
58 | }
59 |
60 | /** Create a socket connection and receive data until receiver is stopped */
61 | private def receive() {
62 | var socket: Socket = null
63 | var userInput: String = null
64 | try {
65 | println("Connecting to " + host + ":" + port)
66 | socket = new Socket(host, port)
67 | println("Connected to " + host + ":" + port)
68 | val reader = new BufferedReader(
69 | new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8))
70 | userInput = reader.readLine()
71 | while(!isStopped && userInput != null) {
72 | store(userInput)
73 | userInput = reader.readLine()
74 | }
75 | reader.close()
76 | socket.close()
77 | println("Stopped receiving")
78 | restart("Trying to connect again")
79 | } catch {
80 | case e: java.net.ConnectException =>
81 | restart("Error connecting to " + host + ":" + port, e)
82 | case t: Throwable =>
83 | restart("Error receiving data", t)
84 | }
85 | }
86 | }
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/TFLArrivalPredictionsByLine.scala:
--------------------------------------------------------------------------------
1 | import org.apache.spark.storage.StorageLevel
2 | import org.apache.spark.streaming.receiver.Receiver
3 | import org.jfarcand.wcs.{TextListener, WebSocket}
4 | import scala.util.parsing.json.JSON
5 | import scalaj.http.Http
6 |
7 | import java.io.BufferedReader;
8 | import java.io.IOException;
9 | import java.io.InputStreamReader;
10 | import org.apache.http.HttpResponse;
11 | import org.apache.http.client.ClientProtocolException;
12 | import org.apache.http.client.methods.HttpGet;
13 | import org.apache.http.impl.client.DefaultHttpClient;
14 | /**
15 | * Spark Streaming Example TfL Receiver from Slack
16 | */
17 | class TFLArrivalPredictionsByLine() extends Receiver[String](StorageLevel.MEMORY_ONLY) with Runnable {
18 | private val tflUrl = "https://api.tfl.gov.uk/Line/circle/Arrivals?stopPointId=940GZZLUERC&app_id=a73727f3&app_key=dc8150560a2422afae2b70cf291c4327"
19 | @transient
20 | private var thread: Thread = _
21 | override def onStart(): Unit = {
22 | thread = new Thread(this)
23 | thread.start()
24 | }
25 | override def onStop(): Unit = {
26 | thread.interrupt()
27 | }
28 | override def run(): Unit = {
29 | while (true){
30 | receive();
31 | Thread.sleep(60*1000);
32 | }
33 | }
34 |
35 | private def receive(): Unit = {
36 | val httpClient = new DefaultHttpClient();
37 | val getRequest = new HttpGet(tflUrl);
38 | getRequest.addHeader("accept", "application/json");
39 |
40 | val response = httpClient.execute(getRequest);
41 | if (response.getStatusLine().getStatusCode() != 200) {
42 | throw new RuntimeException("Failed : HTTP error code : "
43 | + response.getStatusLine().getStatusCode());
44 | }
45 |
46 | val br = new BufferedReader(
47 | new InputStreamReader((response.getEntity().getContent())));
48 |
49 | var output=br.readLine();
50 | while(output!=null){
51 | println(output)
52 | output=br.readLine()
53 | }
54 | }
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/TFLCustomReceiver.scala:
--------------------------------------------------------------------------------
1 | import java.io.BufferedReader;
2 | import java.io.IOException;
3 | import java.io.InputStreamReader;
4 | import org.apache.http.HttpResponse;
5 | import org.apache.http.client.ClientProtocolException;
6 | import org.apache.http.client.methods.HttpGet;
7 | import org.apache.http.impl.client.DefaultHttpClient;
8 | import org.apache.spark.storage.StorageLevel
9 | import org.apache.spark.streaming.receiver.Receiver
10 |
11 | import org.apache.spark.SparkConf
12 | import org.apache.spark.streaming.{Seconds, StreamingContext}
13 |
14 | object TFLCustomReceiver {
15 | private val url = "https://api.tfl.gov.uk/Line/circle/Arrivals?stopPointId=940GZZLUERC&app_id=a73727f3&app_key=dc8150560a2422afae2b70cf291c4327"
16 | def main(args: Array[String]) {
17 | // Create the context with a 1 second batch size
18 | val sparkConf = new SparkConf().setAppName("TFLCustomReceiver")
19 | val ssc = new StreamingContext(sparkConf, Seconds(300))
20 |
21 | val lines = ssc.receiverStream(new TFLCustomReceiver(url))
22 | lines.print()
23 | ssc.start()
24 | ssc.awaitTermination()
25 | }
26 | }
27 |
28 | class TFLCustomReceiver(url: String)
29 | extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) {
30 |
31 | def onStart() {
32 | // Start the thread that receives data over a connection
33 | new Thread("Http Receiver") {
34 | override def run() { receive() }
35 | }.start()
36 | }
37 |
38 | def onStop() {
39 | // There is nothing much to do as the thread calling receive()
40 | // is designed to stop by itself if isStopped() returns false
41 | }
42 |
43 | /** Create a socket connection and receive data until receiver is stopped */
44 |
45 | private def receive() {
46 | var userInput: String = null
47 | var httpClient: DefaultHttpClient = null
48 | var getRequest: HttpGet = null
49 |
50 | try {
51 | // Connect to host:port
52 | httpClient = new DefaultHttpClient();
53 | getRequest = new HttpGet(url);
54 | getRequest.addHeader("accept", "application/json");
55 |
56 | while(!isStopped) {
57 | val response = httpClient.execute(getRequest);
58 | if (response.getStatusLine().getStatusCode() != 200) {
59 | throw new RuntimeException("Failed : HTTP error code : "+ response.getStatusLine().getStatusCode());
60 | }
61 | val reader = new BufferedReader(new InputStreamReader((response.getEntity().getContent())));
62 | userInput = reader.readLine()
63 | while(userInput != null) {
64 | store(userInput)
65 | //println(userInput)
66 | userInput = reader.readLine()
67 | }
68 | reader.close()
69 | Thread.sleep(60*1000)
70 | }
71 | httpClient.close()
72 | // Restart in an attempt to connect again when server is active again
73 | //restart("Trying to connect again")
74 | } catch {
75 | case e: java.net.ConnectException =>
76 | // restart if could not connect to server
77 | restart("Error connecting to " + url, e)
78 | case t: Throwable =>
79 | // restart if there is any other error
80 | restart("Error receiving data", t)
81 | }
82 | }
83 |
84 | }
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/src/main/scala/TFLStreamingApp.scala:
--------------------------------------------------------------------------------
1 | import org.apache.spark.SparkConf
2 | import org.apache.spark.streaming.{Seconds, StreamingContext}
3 | /**
4 | * Spark Streaming Example App
5 | */
6 | object TFLStreamingApp {
7 | def main(args: Array[String]) {
8 | val conf = new SparkConf().setAppName("TFLStreaming")
9 | val ssc = new StreamingContext(conf, Seconds(300))
10 | val stream = ssc.receiverStream(new TFLArrivalPredictionsByLine())
11 | println("Before")
12 | stream.print()
13 | println("After")
14 | if (args.length > 2) {
15 | stream.saveAsTextFiles(args(2))
16 | }
17 | ssc.start()
18 | ssc.awaitTermination()
19 | }
20 | }
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/.history:
--------------------------------------------------------------------------------
1 | run local[2]
2 | package
3 | run local[2]
4 | run
5 | run TFLCustomReceiver https://api.tfl.gov.uk/Line/circle/Arrivals?stopPointId=940GZZLUERC&app_id=a73727f3&app_key=dc8150560a2422afae2b70cf291c4327
6 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/ivy-report.css:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | *
17 | */
18 |
19 | body {
20 | font-family:"Trebuchet MS",Verdana,Geneva,Arial,Helvetica,sans-serif;
21 | font-size:small;
22 | }
23 |
24 | div#logo {
25 | float: right;
26 | padding-left: 10px;
27 | padding-bottom: 10px;
28 | background: white;
29 | text-align: center;
30 | }
31 |
32 | #logo img {
33 | border: 0;
34 | }
35 |
36 | div#date {
37 | font-style: italic;
38 | padding-left: 60px;
39 | padding-bottom: 40px;
40 | }
41 |
42 |
43 | h1 {
44 | margin-bottom:2px;
45 |
46 | border-color:#7A9437;
47 | border-style:solid;
48 | border-width:0 0 3px 0;
49 | }
50 |
51 | span#module {
52 | color:#7A9437;
53 | text-decoration:none;
54 | }
55 |
56 | span#organisation {
57 | color:black;
58 | text-decoration:none;
59 | }
60 |
61 | #confmenu {
62 | color: #000;
63 | border-bottom: 2px solid black;
64 | margin: 12px 0px 0px 0px;
65 | padding: 0px;
66 | z-index: 1;
67 | padding-left: 10px
68 | }
69 |
70 | #confmenu li {
71 | display: inline;
72 | overflow: hidden;
73 | list-style-type: none;
74 | }
75 |
76 | #confmenu a, a.active {
77 | color: #DEDECF;
78 | background: #898B5E;
79 | font: bold 1em "Trebuchet MS", Arial, sans-serif;
80 | border: 2px solid black;
81 | padding: 2px 5px 0px 5px;
82 | text-decoration: none;
83 | }
84 |
85 | /*
86 | background: #ABAD85 #CED4BD
87 | background: #DEE4CD
88 | */
89 |
90 | #confmenu a.active {
91 | color: #7A9437;
92 | background: #DEE4CD;
93 | border-bottom: 3px solid #DEE4CD;
94 | }
95 |
96 | #confmenu a:hover {
97 | color: #fff;
98 | background: #ADC09F;
99 | }
100 |
101 | #confmenu a:visited {
102 | color: #DEDECF;
103 | }
104 |
105 | #confmenu a.active:visited {
106 | color: #7A9437;
107 | }
108 |
109 | #confmenu a.active:hover {
110 | background: #DEE4CD;
111 | color: #DEDECF;
112 | }
113 |
114 | #content {
115 | background: #DEE4CD;
116 | padding: 20px;
117 | border: 2px solid black;
118 | border-top: none;
119 | z-index: 2;
120 | }
121 |
122 | #content a {
123 | text-decoration: none;
124 | color: #E8E9BE;
125 | }
126 |
127 | #content a:hover {
128 | background: #898B5E;
129 | }
130 |
131 |
132 | h2 {
133 | margin-bottom:2px;
134 | font-size:medium;
135 |
136 | border-color:#7A9437;
137 | border-style:solid;
138 | border-width:0 0 2px 0;
139 | }
140 |
141 | h3 {
142 | margin-top:30px;
143 | margin-bottom:2px;
144 | padding: 5 5 5 0;
145 | font-size: 24px;
146 | border-style:solid;
147 | border-width:0 0 2px 0;
148 | }
149 |
150 | h4 {
151 | margin-bottom:2px;
152 | margin-top:2px;
153 | font-size:medium;
154 |
155 | border-color:#7A9437;
156 | border-style:dashed;
157 | border-width:0 0 1px 0;
158 | }
159 |
160 | h5 {
161 | margin-bottom:2px;
162 | margin-top:2px;
163 | margin-left:20px;
164 | font-size:medium;
165 | }
166 |
167 | span.resolved {
168 | padding-left: 15px;
169 | font-weight: 500;
170 | font-size: small;
171 | }
172 |
173 |
174 | #content table {
175 | border-collapse:collapse;
176 | width:90%;
177 | margin:auto;
178 | margin-top: 5px;
179 | }
180 | #content thead {
181 | background-color:#CED4BD;
182 | border:1px solid #7A9437;
183 | }
184 | #content tbody {
185 | border-collapse:collapse;
186 | background-color:#FFFFFF;
187 | border:1px solid #7A9437;
188 | }
189 |
190 | #content th {
191 | font-family:monospace;
192 | border:1px solid #7A9437;
193 | padding:5px;
194 | }
195 |
196 | #content td {
197 | border:1px dotted #7A9437;
198 | padding:0 3 0 3;
199 | }
200 |
201 | #content table a {
202 | color:#7A9437;
203 | text-decoration:none;
204 | }
205 |
206 | #content table a:hover {
207 | background-color:#CED4BD;
208 | color:#7A9437;
209 | }
210 |
211 |
212 |
213 | table.deps {
214 | border-collapse:collapse;
215 | width:90%;
216 | margin:auto;
217 | margin-top: 5px;
218 | }
219 |
220 | table.deps thead {
221 | background-color:#CED4BD;
222 | border:1px solid #7A9437;
223 | }
224 | table.deps tbody {
225 | border-collapse:collapse;
226 | background-color:#FFFFFF;
227 | border:1px solid #7A9437;
228 | }
229 |
230 | table.deps th {
231 | font-family:monospace;
232 | border:1px solid #7A9437;
233 | padding:2;
234 | }
235 |
236 | table.deps td {
237 | border:1px dotted #7A9437;
238 | padding:0 3 0 3;
239 | }
240 |
241 |
242 |
243 |
244 |
245 | table.header {
246 | border:0;
247 | width:90%;
248 | margin:auto;
249 | margin-top: 5px;
250 | }
251 |
252 | table.header thead {
253 | border:0;
254 | }
255 | table.header tbody {
256 | border:0;
257 | }
258 | table.header tr {
259 | padding:0px;
260 | border:0;
261 | }
262 | table.header td {
263 | padding:0 3 0 3;
264 | border:0;
265 | }
266 |
267 | td.title {
268 | width:150px;
269 | margin-right:15px;
270 |
271 | font-size:small;
272 | font-weight:700;
273 | }
274 |
275 | td.title:first-letter {
276 | color:#7A9437;
277 | background-color:transparent;
278 | }
279 |
280 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-docs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-optional.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-provided.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-scala-tool.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/simple-project-simple-project_2.11-sources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-docs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-optional.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-plugin.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-provided.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/reports/spark-streaming-example-spark-streaming-example_2.11-sources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/simple-project/simple-project_2.11/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #simple-project#simple-project_2.11;1.0 resolved revisions
2 | #Mon Dec 12 23:05:56 IST 2016
3 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+revision\:\#@\#\:+2.11.7\:\#@\#\:=2.11.7 ? 2.11.8 null
4 | +organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+spark-core_2.11\:\#@\#\:+revision\:\#@\#\:+2.0.0\:\#@\#\:=2.0.0 release 2.0.0 null
5 | +organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+spark-streaming_2.11\:\#@\#\:+revision\:\#@\#\:+2.0.0\:\#@\#\:=2.0.0 release 2.0.0 null
6 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+revision\:\#@\#\:+2.11.7\:\#@\#\:=2.11.7 release 2.11.7 null
7 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/simple-project/simple-project_2.11/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
10 | Simple Project
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/spark-streaming-example/spark-streaming-example_2.11/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #spark-streaming-example#spark-streaming-example_2.11;1.0 resolved revisions
2 | #Fri Aug 25 23:30:17 IST 2017
3 | +organisation\:\#@\#\:+org.jfarcand\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+wcs\:\#@\#\:+revision\:\#@\#\:+1.5\:\#@\#\:=1.5 release 1.5 null
4 | +organisation\:\#@\#\:+org.apache.httpcomponents\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+httpclient\:\#@\#\:+revision\:\#@\#\:+4.5.2\:\#@\#\:=4.5.2 release 4.5.2 null
5 | +organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+spark-streaming_2.11\:\#@\#\:+revision\:\#@\#\:+2.2.0\:\#@\#\:=2.2.0 release 2.2.0 null
6 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+revision\:\#@\#\:+2.11.7\:\#@\#\:=2.11.7 ? 2.11.8 null
7 | +organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+spark-core_2.11\:\#@\#\:+revision\:\#@\#\:+2.2.0\:\#@\#\:=2.2.0 release 2.2.0 null
8 | +organisation\:\#@\#\:+org.scalaj\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scalaj-http_2.11\:\#@\#\:+revision\:\#@\#\:+2.2.1\:\#@\#\:=2.2.1 release 2.2.1 null
9 | +organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+revision\:\#@\#\:+2.11.7\:\#@\#\:=2.11.7 release 2.11.7 null
10 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/resolution-cache/spark-streaming-example/spark-streaming-example_2.11/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
9 |
10 | spark-streaming-example
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anon$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anon$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$2.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$2.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$3.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$3.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLArrivalPredictionsByLine.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLArrivalPredictionsByLine.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$$anon$1.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$$anon$1.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp$.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp$.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp.class
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/spark-streaming-example_2.11-1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/spark-streaming-example_2.11-1.0.jar
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/$global/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0mOther repositories:[0m
2 | [0m[[0mdebug[0m] [0mDefault repositories:[0m
3 | [0m[[0mdebug[0m] [0mOther repositories:[0m
4 | [0m[[0mdebug[0m] [0mDefault repositories:[0m
5 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.11/input_dsp:
--------------------------------------------------------------------------------
1 | org.scala-lang
scala-library 2.11.7 org.apache.spark
2 | spark-core 2.2.0 org.apache.spark spark-streaming 2.2.0 org.apache.httpcomponents
3 | httpclient 4.5.2
4 | org.scalaj scalaj-http 2.2.1 org.jfarcand wcs 1.5
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.11/output_dsp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.11/output_dsp
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/ivyConfiguration/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0mOther repositories:[0m
2 | [0m[[0mdebug[0m] [0mDefault repositories:[0m
3 | [0m[[0mdebug[0m] [0mUsing inline dependencies specified in Scala.[0m
4 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/ivySbt/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/ivySbt/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/projectDescriptors/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/projectDescriptors/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/update/$global/streams/update_cache_2.11/inputs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/update/$global/streams/update_cache_2.11/inputs
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/update/$global/streams/update_cache_2.11/output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/$global/update/$global/streams/update_cache_2.11/output
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/$global/$global/discoveredMainClasses/data:
--------------------------------------------------------------------------------
1 | CustomReceiver TFLCustomReceiver TFLStreamingApp
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/compile/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/compile/$global/streams/out
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/copyResources/$global/streams/copy-resources:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/copyResources/$global/streams/copy-resources
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/copyResources/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0mdebug[0m] [0mCopy resource mappings: [0m
2 | [0m[[0mdebug[0m] [0m [0m
3 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/incCompileSetup/$global/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/incCompileSetup/$global/.DS_Store
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/mainClass/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[33mwarn[0m] [0mMultiple main classes detected. Run 'show discoveredMainClasses' to see the list[0m
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/packageBin/$global/streams/inputs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/packageBin/$global/streams/inputs
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/packageBin/$global/streams/out:
--------------------------------------------------------------------------------
1 | [0m[[0minfo[0m] [0mPackaging /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/spark-streaming-example_2.11-1.0.jar ...[0m
2 | [0m[[0mdebug[0m] [0mInput file mappings:[0m
3 | [0m[[0mdebug[0m] [0m TFLArrivalPredictionsByLine.class[0m
4 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLArrivalPredictionsByLine.class[0m
5 | [0m[[0mdebug[0m] [0m TFLCustomReceiver$.class[0m
6 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$.class[0m
7 | [0m[[0mdebug[0m] [0m TFLCustomReceiver$$anon$1.class[0m
8 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver$$anon$1.class[0m
9 | [0m[[0mdebug[0m] [0m CustomReceiver$.class[0m
10 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$.class[0m
11 | [0m[[0mdebug[0m] [0m CustomReceiver.class[0m
12 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver.class[0m
13 | [0m[[0mdebug[0m] [0m TFLStreamingApp.class[0m
14 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp.class[0m
15 | [0m[[0mdebug[0m] [0m CustomReceiver$$anonfun$2.class[0m
16 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$2.class[0m
17 | [0m[[0mdebug[0m] [0m CustomReceiver$$anon$1.class[0m
18 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anon$1.class[0m
19 | [0m[[0mdebug[0m] [0m CustomReceiver$$anonfun$3.class[0m
20 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$3.class[0m
21 | [0m[[0mdebug[0m] [0m CustomReceiver$$anonfun$1.class[0m
22 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/CustomReceiver$$anonfun$1.class[0m
23 | [0m[[0mdebug[0m] [0m TFLStreamingApp$.class[0m
24 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLStreamingApp$.class[0m
25 | [0m[[0mdebug[0m] [0m TFLCustomReceiver.class[0m
26 | [0m[[0mdebug[0m] [0m /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes/TFLCustomReceiver.class[0m
27 | [0m[[0minfo[0m] [0mDone packaging.[0m
28 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/packageBin/$global/streams/output:
--------------------------------------------------------------------------------
1 | r/Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/spark-streaming-example_2.11-1.0.jar
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/run/$global/streams/out:
--------------------------------------------------------------------------------
1 | java.lang.RuntimeException: No main class detected.
2 | at scala.sys.package$.error(package.scala:27)
3 | at sbt.Defaults$$anonfun$runTask$1$$anonfun$apply$36$$anonfun$apply$37$$anonfun$29.apply(Defaults.scala:733)
4 | at sbt.Defaults$$anonfun$runTask$1$$anonfun$apply$36$$anonfun$apply$37$$anonfun$29.apply(Defaults.scala:733)
5 | at scala.Option.getOrElse(Option.scala:120)
6 | at sbt.Defaults$$anonfun$runTask$1$$anonfun$apply$36$$anonfun$apply$37.apply(Defaults.scala:733)
7 | at sbt.Defaults$$anonfun$runTask$1$$anonfun$apply$36$$anonfun$apply$37.apply(Defaults.scala:732)
8 | at scala.Function1$$anonfun$compose$1.apply(Function1.scala:47)
9 | at sbt.$tilde$greater$$anonfun$$u2219$1.apply(TypeFunctions.scala:40)
10 | at sbt.std.Transform$$anon$4.work(System.scala:63)
11 | at sbt.Execute$$anonfun$submit$1$$anonfun$apply$1.apply(Execute.scala:226)
12 | at sbt.Execute$$anonfun$submit$1$$anonfun$apply$1.apply(Execute.scala:226)
13 | at sbt.ErrorHandling$.wideConvert(ErrorHandling.scala:17)
14 | at sbt.Execute.work(Execute.scala:235)
15 | at sbt.Execute$$anonfun$submit$1.apply(Execute.scala:226)
16 | at sbt.Execute$$anonfun$submit$1.apply(Execute.scala:226)
17 | at sbt.ConcurrentRestrictions$$anon$4$$anonfun$1.apply(ConcurrentRestrictions.scala:159)
18 | at sbt.CompletionService$$anon$2.call(CompletionService.scala:28)
19 | at java.util.concurrent.FutureTask.run(FutureTask.java:266)
20 | at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
21 | at java.util.concurrent.FutureTask.run(FutureTask.java:266)
22 | at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
23 | at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
24 | at java.lang.Thread.run(Thread.java:745)
25 | [0m[[31merror[0m] [0m(compile:[31mrun[0m) No main class detected.[0m
26 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/runtime/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/runtime/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/aurobindosarkar/Downloads/SparkBook/Chapter5/code/p1/target/scala-2.11/classes
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/runtime/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/SparkBookChapter5/Chapter5/code/p1/target/streams/runtime/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/._simple.sbt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/._simple.sbt
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/resolution-cache/default/p1-build/scala_2.10/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/project/target/streams/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/src/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/src/main/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/src/main/scala/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/target/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/target/scala-2.11/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter05/SparkBookChapter5/__MACOSX/SparkBookChapter5/Chapter5/code/p1/target/streams/compile/incCompileSetup/$global/._.DS_Store:
--------------------------------------------------------------------------------
1 | Mac OS X 2 F x ATTR x x
--------------------------------------------------------------------------------
/Chapter07/Chapter7.scala:
--------------------------------------------------------------------------------
1 | //Code for Chapter 7 to be executed in Spark shell
2 | //./bin/spark-shell --packages graphframes:graphframes:0.3.0-spark2.0-s_2.11
3 |
4 | //Code for Exploring graphs using GraphFrames section
5 | import org.apache.spark.sql.types._
6 | import org.apache.spark.sql.functions._
7 | import spark.implicits._
8 | import org.apache.spark.sql.Row
9 | import org.graphframes._
10 |
11 | //Code for Constructing a GraphFrame section
12 | val edgesRDD = spark.sparkContext.textFile("file:///Users/aurobindosarkar/Downloads/amzncopurchase/amazon0601.txt")
13 | val schemaString = "src dst"
14 | val fields = schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, nullable = false))
15 | val edgesSchema = new StructType(fields)
16 | val rowRDD = edgesRDD.map(_.split("\t")).map(attributes => Row(attributes(0).trim, attributes(1).trim))
17 | val edgesDF = spark.createDataFrame(rowRDD, edgesSchema)
18 | val srcVerticesDF = edgesDF.select($"src").distinct
19 | val destVerticesDF = edgesDF.select($"dst").distinct
20 | val verticesDF = srcVerticesDF.union(destVerticesDF).distinct.select($"src".alias("id"))
21 | edgesDF.count()
22 | verticesDF.count()
23 | val g = GraphFrame(verticesDF, edgesDF)
24 |
25 | //Code for Basic graph queries and operations section
26 | g.vertices.show(5)
27 | g.edges.show(5)
28 | g.inDegrees.show(5)
29 | g.outDegrees.show(5)
30 | g.edges.filter("src == 2").count()
31 | g.edges.filter("src == 2").show()
32 | g.edges.filter("dst == 2").show()
33 | g.inDegrees.filter("inDegree >= 10").show()
34 | g.inDegrees.groupBy("inDegree").count().sort(desc("inDegree")).show(5)
35 | g.outDegrees.groupBy("outDegree").count().sort(desc("outDegree")).show(5)
36 |
37 | //Code for Motif analysis using GraphFrames section
38 | val motifs = g.find("(a)-[e]->(b); (b)-[e2]->(a)")
39 | motifs.show(5)
40 | motifs.filter("b.id == 2").show()
41 | val motifs3 = g.find("(a)-[e1]->(b); (a)-[e2]->(c)").filter("(b != c)")
42 | motifs3.show(5)
43 | val motifs3 = g.find("(a)-[]->(b); (a)-[]->(c)").filter("(b != c)")
44 | motifs3.show()
45 | motifs3.count()
46 | val motifs3 = g.find("(a)-[]->(b); (a)-[]->(c); (b)-[]->(a)").filter("(b != c)")
47 | motifs3.show()
48 | motifs3.count()
49 | val motifs3 = g.find("(a)-[]->(b); (c)-[]->(b)").filter("(a != c)")
50 | motifs3.show(5)
51 | motifs3.count()
52 |
53 | val motifs3 = g.find("(a)-[]->(b); (b)-[]->(c); (c)-[]->(b)")
54 | motifs3.show(5)
55 | motifs3.count()
56 |
57 | //Ensure you sufficient disk >100 GB and RAM >=14 GB or the following motifs4.
58 | //Alternatively you run the following on smaller subgraphs
59 | val motifs4 = g.find("(a)-[e1]->(b); (c)-[e2]->(b); (c)-[e3]->(d)").filter("(a != c) AND (d != b) AND (d != a)")
60 | motifs4.show(5)
61 | motifs4.count()
62 |
63 |
64 | //Code for Processing subgraphs using GraphFrames
65 | val v2 = g.vertices.filter("src < 10")
66 | val e2 = g.edges.filter("src < 10")
67 | val g2 = GraphFrame(v2, e2)
68 | g2.edges.groupBy("src").count().show()
69 |
70 | val paths = g.find("(a)-[e]->(b)").filter("e.src < e.dst")
71 | val e2 = paths.select("e.*")
72 |
73 | //************************************
74 | //sc.setCheckpointDir("hdfs://localhost:9000/cp")
75 | //***********************************
76 | //Code for Applying graph algorithms using GraphFrames section
77 | val result = g.stronglyConnectedComponents.maxIter(10).run()
78 | result.select("id", "component").groupBy("component").count().sort($"count".desc).show()
79 |
80 | val results = g.triangleCount.run()
81 | results.select("id", "count").show()
82 |
83 | val results = g.pageRank.resetProbability(0.15).tol(0.01).run()
84 | val prank = results.vertices.sort(desc("pagerank"))
85 | prank.show(5)
86 |
87 | val results = g.labelPropagation.maxIter(10).run()
88 | results.select("id", "label").show()
89 |
90 | val results = g.shortestPaths.landmarks(Seq("1110", "352")).run()
91 | results.select("id", "distances").take(5).foreach(println)
92 |
93 | //Code for Saving and loading GraphFrames section
94 | g.vertices.write.mode("overwrite").parquet("hdfs://localhost:9000/gf/vertices")
95 | g.edges.write.mode("overwrite").parquet("hdfs://localhost:9000/gf/edges")
96 | val v = spark.read.parquet("hdfs://localhost:9000/gf/vertices")
97 | val e = spark.read.parquet("hdfs://localhost:9000/gf/edges")
98 | val g = GraphFrame(v, e)
99 |
100 | //Code for Analyzing a JSON dataset modeled as a graph section
101 | //The downloaded input file needs to be processed before executing the following code.
102 | //Use the Preprocess.java class for this purpose.
103 | //Open the source file and change the source and destination file paths.
104 | //And then compile and run the program to create the JSON format input file.
105 | val df1 = spark.read.json("file:///Users/aurobindosarkar/Downloads/input.json")
106 | df1.printSchema()
107 | df1.take(5).foreach(println)
108 | val x1=df1.select(df1.col("similarLines"))
109 | df1.select(df1.col("similarLines.similar")).take(5).foreach(println)
110 |
111 | df1.select(concat_ws(",", $"similarLines.similar")).take(5).foreach(println)
112 | val flattened = df1.select($"ASIN", explode($"reviewLines.review").as("review_flat"))
113 | flattened.show()
114 | val flatReview = flattened.select("ASIN", "review_flat.customerId")
115 |
116 | val nodesDF = df1.select($"ASIN".alias("id"), $"Id".alias("productId"), $"title", $"ReviewMetaData", $"categories", $"categoryLines", $"group", $"reviewLines", $"salerank", $"similarLines", $"similars")
117 | val edgesDF = df1.select($"ASIN".alias("src"), explode($"similarLines.similar").as("dst"))
118 | val g = GraphFrame(nodesDF, edgesDF)
119 | g.edges.filter("salerank < 100").count()
120 | g.vertices.groupBy("group").count().show()
121 | val v2 = g.vertices.filter("group = 'Book'")
122 | v2.count()
123 | val e2 = g.edges
124 | e2.count()
125 | val g2 = GraphFrame(v2, e2)
126 | g2.vertices.count()
127 | g2.edges.count()
128 |
129 | val v2t = v2.select("id")
130 | val e2t = v2t.join(e2, v2t("id") === e2("src"))
131 | e2t.count()
132 | val e2t1 = v2t.join(e2, v2t("id") === e2("src")).drop("id")
133 | val e2t2 = v2t.join(e2t1, v2t("id") === e2t1("dst")).drop("id")
134 | e2t2.count()
135 | // "paths" contains vertex info. Extract the edges.
136 |
137 | val es = g.edges.filter("salerank < 100")
138 | es.count()
139 | val e3 = es.select("src", "dst")
140 | val g3 = GraphFrame(g.vertices, e3)
141 | g3.vertices.count()
142 | g3.edges.count()
143 |
144 | val motifs = g3.find("(a)-[e]->(b); (b)-[e2]->(a)")
145 | motifs.show()
146 | motifs.filter("b.ReviewMetaData.avg_rating > 4.0").show()
147 | val paths = g3.find("(a)-[e]->(b)").filter("a.group = 'Book' AND b.group = 'Book'").filter("a.salerank < b.salerank")
148 | val e2 = paths.select("e.src", "e.dst")
149 | val g2 = GraphFrame(g.vertices, e2)
150 | g2.vertices.take(5).foreach(println)
151 | g2.edges.take(5).foreach(println)
152 |
153 |
154 | import org.graphframes.lib.AggregateMessages
155 | val AM = AggregateMessages
156 | val msgToSrc = AM.dst("similars")
157 | val msgToDst = AM.src("similars")
158 | val agg = g.aggregateMessages.sendToSrc(msgToSrc).sendToDst(msgToDst).agg(sum(AM.msg).as("SummedSimilars"))
159 | agg.show()
160 |
161 | //Code for Processing graphs containing multiple types of relationships section
162 | val joinDF = nodesDF.join(edgesDF).where(nodesDF("id") === edgesDF("src")).withColumn("relationship", when(($"similars" > 4) and ($"categories" <= 3), "highSimilars").otherwise("alsoPurchased"))
163 | val edgesDFR = joinDF.select("src", "dst", "relationship")
164 | val gDFR = GraphFrame(nodesDF, edgesDFR)
165 | gDFR.edges.groupBy("relationship").count().show()
166 | gDFR.edges.show()
167 | val numHS = gDFR.edges.filter("relationship = 'highSimilars'").count()
168 |
169 | val v2 = gDFR.vertices.filter("salerank < 2000000")
170 | val e2 = gDFR.edges.filter("relationship = 'highSimilars'")
171 | val g2 = GraphFrame(v2, e2)
172 | val numEHS = g2.edges.count()
173 | val numVHS = g2.vertices.count()
174 |
175 | val paths = g.find("(a)-[e]->(b)").filter("e.relationship = 'highSimilars'").filter("a.group === b.group")
176 |
177 |
178 | val bfsDF = gDFR.bfs.fromExpr("group = 'Book'").toExpr("categories < 3").edgeFilter("relationship != 'alsoPurchased'").maxPathLength(3).run()
179 | bfsDF.take(2).foreach(println)
180 |
181 | val v2 = gDFR.vertices.select("id", "group", "similars").filter("group = 'Book'")
182 | val e2 = gDFR.edges.filter("relationship = 'highSimilars'")
183 | val g2 = GraphFrame(v2, e2)
184 | val numEHS = g2.edges.count()
185 | val numVHS = g2.vertices.count()
186 | val res1 = g2.find("(a)-[]->(b); (b)-[]->(c); !(a)-[]->(c)").filter("(a.group = c.group) and (a.similars = c.similars)")
187 | val res2 = res1.filter("a.id != c.id").select("a.id", "a.group", "a.similars", "c.id", "c.group", "c.similars")
188 | res2.count()
189 | res2.show(5)
190 |
191 | val v2 = gDFR.vertices.select("id", "group", "title").filter("group = 'Book'")
192 | val e2 = gDFR.edges.filter("relationship = 'highSimilars'")
193 | val g2 = GraphFrame(v2, e2)
194 | val results = g2.pageRank.resetProbability(0.15).tol(0.01).run()
195 | val prank = results.vertices.sort(desc("pagerank"))
196 | prank.take(10).foreach(println)
197 |
198 | //Code for Viewing GraphFrame physical execution plan section
199 | g.edges.filter("salerank < 100").explain()
200 |
201 | //Code for Understanding partitioning in GraphFrames section
202 | val v1 = g.vertices.select("id", "group").na.fill("unknown")
203 | v1.show()
204 | v1.groupBy("group").count().show
205 | val g1 = GraphFrame(v1, g.edges)
206 |
207 | val v2 = g.vertices.select("id", "group").na.fill("unknown")
208 | val g2t1 = GraphFrame(v2, g.edges)
209 | val g2t2 = g2t1.vertices.repartition(11, $"group")
210 | val g2 = GraphFrame(g2t2, g.edges)
211 | g1.vertices.show()
212 | g2.vertices.show()
213 |
214 | g1.vertices.rdd.partitions.size
215 | g2.vertices.rdd.partitions.size
216 |
217 | g1.vertices.write.mode("overwrite").csv("file:///Users/aurobindosarkar/Downloads/g1/partitions")
218 | g2.vertices.write.mode("overwrite").csv("file:///Users/aurobindosarkar/Downloads/g2/partitions")
219 |
220 |
221 | val g2c = g2.vertices.coalesce(5)
222 | g2c.rdd.partitions.size
223 |
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/.DS_Store
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | Chapter7
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.8
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=1.8
12 |
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/bin/Category.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/bin/Category.class
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/bin/Preprocess.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/bin/Preprocess.class
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/bin/Product.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/bin/Product.class
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/bin/Review.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/bin/Review.class
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/bin/ReviewMetaData.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Learning-Spark-SQL/792438703bb86065b6733fb8eeb88f775f8a50b7/Chapter07/Chapter7/Chapter7/bin/ReviewMetaData.class
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/src/Category.java:
--------------------------------------------------------------------------------
1 |
2 | public class Category {
3 | String[] fields;
4 |
5 | public String[] getFields() {
6 | return fields;
7 | }
8 |
9 | public void setFields(String[] fields) {
10 | this.fields = fields;
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/Chapter07/Chapter7/Chapter7/src/Preprocess.java:
--------------------------------------------------------------------------------
1 | import java.io.BufferedReader;
2 | import java.io.File;
3 | import java.io.FileNotFoundException;
4 | import java.io.FileReader;
5 | import java.io.IOException;
6 | import java.io.PrintWriter;
7 | import java.util.StringTokenizer;
8 |
9 | public class Preprocess {
10 | static int readCount=0;
11 | public static void main(String[] args) throws IOException{
12 | if (args.length < 2) {
13 | System.out.println("Correct usgae: java Preprocess your-input-file-with-full-path your-output-file-with-full-path");
14 | System.out.println("For example: java Preprocess /Users/aurobindosarkar/Downloads/amazon-meta.txt /Users/aurobindosarkar/Downloads/delaftertestinput.json");
15 | System.exit(0);
16 | }
17 | String cmdLineInFile = args[0];
18 | String cmdLineOutFile = args[1];
19 | boolean discontinued = false;
20 | BufferedReader br = new BufferedReader(new FileReader(cmdLineInFile));
21 | PrintWriter inputJson = new PrintWriter(cmdLineOutFile);
22 | System.out.println("Processing...");
23 | int writeCount=0;
24 | String line = "";
25 | Category[] categories;
26 | int categoryCount = 0;
27 | int dropCount=0;
28 | Product p = null;
29 | while ((line=br.readLine())!=null){
30 | if (line.contains("discontinued product")){
31 | discontinued=true;
32 | String record=p.convertToJson(discontinued);
33 | inputJson.println(record);
34 | discontinued=false;
35 | p=null;
36 | } else {
37 | if ((line.contains("Id:")) && (!line.contains("title"))){
38 | if (p!=null){
39 | String record=p.convertToJson(discontinued);
40 | inputJson.println(record);
41 | writeCount++;
42 | }
43 | p = new Product();
44 | p.setId(Integer.parseInt(line.substring(line.indexOf("Id:")+3).trim()));
45 | } else if (line.contains("ASIN:")) {
46 | p.setASIN(line.substring(line.indexOf("ASIN:")+5).trim());
47 | } else if (line.contains("title:")) {
48 | p.setTitle(line.substring(line.indexOf("title:")+6).trim());
49 | } else if (line.contains("group:")) {
50 | p.setGroup(line.substring(line.indexOf("group:")+6).trim());
51 | } else if (line.contains("salesrank")){
52 | p.setSalesrank(Integer.parseInt(line.substring(line.indexOf("salesrank:")+10).trim()));
53 | } else if (line.contains("similar")){
54 | StringTokenizer st = new StringTokenizer(line.substring(line.indexOf("similar:")+8).trim());
55 | String[] similar = new String[st.countTokens()-1];
56 | int i = 0;
57 | int simCount = 0;
58 | int j = 0;
59 | while (st.hasMoreElements()) {
60 | if (i==0) {
61 | simCount = Integer.parseInt(st.nextElement().toString());
62 | } else {
63 | similar[j]=st.nextElement().toString();
64 | j++;
65 | }
66 | i++;
67 | }
68 | p.setSimCount(simCount);
69 | p.setSimilar(similar);
70 | } else if (line.contains("categories")){
71 | p.setCatCount(Integer.parseInt(line.substring(line.indexOf("categories:")+11).trim()));
72 | categories = new Category[p.getCatCount()];
73 | String[] lines = new String[p.getCatCount()];
74 | for (int i=0; i categories;
14 | ReviewMetaData rmd;
15 | ArrayList reviews = new ArrayList();
16 | public int getSimCount() {
17 | return simCount;
18 | }
19 | public void setSimCount(int simCount) {
20 | this.simCount = simCount;
21 | }
22 | public ArrayList getReviews() {
23 | return reviews;
24 | }
25 | public void setReviews(ArrayList reviews) {
26 | this.reviews = reviews;
27 | }
28 | public void addReviews(Review review){
29 | reviews.add(review);
30 | }
31 | public ReviewMetaData getRmd() {
32 | return rmd;
33 | }
34 | public void setRmd(ReviewMetaData rmd) {
35 | this.rmd = rmd;
36 | }
37 | ArrayList review;
38 | public int getId() {
39 | return Id;
40 | }
41 | public void setId(int id) {
42 | Id = id;
43 | }
44 | public String getASIN() {
45 | return ASIN;
46 | }
47 | public void setASIN(String aSIN) {
48 | ASIN = aSIN;
49 | }
50 | public String getTitle() {
51 | return title;
52 | }
53 | public void setTitle(String title) {
54 | this.title = title;
55 | }
56 | public String getGroup() {
57 | return group;
58 | }
59 | public void setGroup(String group) {
60 | this.group = group;
61 | }
62 | public int getSalesrank() {
63 | return salesrank;
64 | }
65 | public void setSalesrank(int salesrank) {
66 | this.salesrank = salesrank;
67 | }
68 | public String[] getSimilar() {
69 | return similar;
70 | }
71 | public void setSimilar(String[] similar) {
72 | this.similar = similar;
73 | }
74 | public int getCatCount() {
75 | return catCount;
76 | }
77 | public void setCatCount(int catCount) {
78 | this.catCount = catCount;
79 | categories = new ArrayList();
80 | }
81 | public ArrayList getCategories() {
82 | return categories;
83 | }
84 | public void setCategories(ArrayList categories) {
85 | this.categories = categories;
86 | }
87 | public void addCategories(Category category){
88 | categories.add(category);
89 | }
90 | public ArrayList getReview() {
91 | return review;
92 | }
93 | public void setReview(ArrayList review) {
94 | this.review = review;
95 | }
96 | public String convertToJson(boolean discontinued){
97 | StringBuffer jsonStringBuffer=new StringBuffer();
98 | jsonStringBuffer.append("{\"Id\":").append(this.getId()).append(",").append("\"ASIN\":").append("\"").append(this.getASIN()).append("\"");
99 | if (!discontinued){
100 | jsonStringBuffer.append(",");
101 | String t = this.getTitle().replaceAll("\"", " ").replaceAll("\\\\", " ").replaceAll("\t", " ");
102 |
103 | jsonStringBuffer.append("\"title\":").append("\"").append(t).append("\"").append(",");
104 | jsonStringBuffer.append("\"group\":").append("\"").append(this.getGroup()).append("\"").append(",").append("\"salerank\":").append(this.getSalesrank()).append(",");
105 | String[] similars=this.getSimilar();
106 | jsonStringBuffer.append("\"similars\":").append(this.getSimCount()).append(",");
107 | if (this.getSimCount()>0){
108 | jsonStringBuffer.append("\"similarLines\":").append("[");
109 | for (int i=0; i 0) {
119 | ArrayList categories = this.getCategories();
120 | jsonStringBuffer.append("\"categoryLines\": ");
121 | jsonStringBuffer.append("[");
122 | for (int i=0; i reviews = this.getReviews();
143 | jsonStringBuffer.append("\"reviewLines\":").append("[");
144 | for (int j=0; j