├── .gitattributes ├── .gitignore ├── 00-HelloSparkSQL ├── HelloSparkSQL.py ├── data │ └── sample.csv ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 01-StreamingWC ├── StreamingWC.py ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 02-FileStreamDemo ├── FileStreamDemo.py ├── SampleData │ ├── Invoice-set1.json │ ├── Invoice-set1.parquet │ ├── Invoice-set2.json │ ├── Invoice-set2.parquet │ ├── Invoice-set3.json │ └── Invoice-set3.parquet ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 03-KafkaStreamDemo ├── KafkaStreamDemo.py ├── datagen │ ├── data │ │ ├── Invoice.json │ │ ├── address.json │ │ └── products.json │ ├── pos-simulator-2.0.0.jar │ ├── run-datagen.cmd │ └── samples.json ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-topic.cmd │ └── 04-start-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 04-KafkaSinkDemo ├── KafkaSinkDemo.py ├── data │ └── samples.json ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ ├── 04-create-output-topic.cmd │ ├── 05-start-producer.cmd │ └── 06-start-consumer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 05-MultiQueryDemo ├── MultiQueryDemo.py ├── data │ └── samples.json ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ ├── 04-create-output-topic.cmd │ ├── 05-start-producer.cmd │ └── 06-start-consumer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 06-KafkaAvroSinkDemo ├── KafkaAvroSinkDemo.py ├── data │ └── samples.json ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ ├── 04-create-output-topic.cmd │ ├── 05-start-producer.cmd │ └── 06-start-consumer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 07-KafkaAvroSourceDemo ├── KafkaAvroSourceDemo.py ├── kafka-scripts │ ├── 04-create-output-topic.cmd │ └── 06-start-consumer.cmd ├── lib │ ├── __init__.py │ └── logger.py ├── log4j.properties └── schema │ └── invoice-items ├── 08-TumblingWindowDemo ├── TumblingWindowDemo.py ├── data │ └── samples.txt ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ └── 05-start-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 09-SlidingWindowDemo ├── SlidingWindowDemo.py ├── data │ └── samples.json ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ └── 05-start-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 10-WatermarkDemo ├── WatermarkDemo.py ├── data │ └── samples.txt ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ └── 05-start-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 11-StreamTableJoinDemo ├── StreamTableJoinDemo.py ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-input-topic.cmd │ └── 05-start-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py ├── log4j.properties ├── static-data │ └── user.cql └── streaming-data │ └── logins.txt ├── 12-StreamStreamJoinDemo ├── StreamStreamJoinDemo.py ├── data │ └── events.txt ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-impression-topic.cmd │ ├── 04-create-click-topic.cmd │ ├── 05-start-impression-producer.cmd │ └── 06-start-click-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 13-StreamingWatermarkDemo ├── StreamingWatermarkDemo.py ├── data │ └── events.txt ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-impression-topic.cmd │ ├── 04-create-click-topic.cmd │ ├── 05-start-impression-producer.cmd │ └── 06-start-click-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── 14-StreamingOuterJoinDemo ├── StreamingOuterJoinDemo.py ├── data │ └── events.txt ├── kafka-scripts │ ├── 01-start-zookeeper.cmd │ ├── 02-start-kafka.cmd │ ├── 03-create-impression-topic.cmd │ ├── 04-create-click-topic.cmd │ ├── 05-start-impression-producer.cmd │ └── 06-start-click-producer.cmd ├── lib │ ├── __init__.py │ └── logger.py └── log4j.properties ├── LICENSE └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Leave entire .ipython checkpoint folder 2 | */.ipynb* 3 | *Untitled* 4 | */dataSink/* 5 | */.idea* 6 | */app-logs* 7 | *__pycache__* 8 | */chk-point-dir* 9 | 10 | *.crc 11 | */*/_SUCCESS 12 | -------------------------------------------------------------------------------- /00-HelloSparkSQL/HelloSparkSQL.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | 3 | from lib.logger import Log4j 4 | 5 | if __name__ == "__main__": 6 | spark = SparkSession \ 7 | .builder \ 8 | .master("local[3]") \ 9 | .appName("HelloSparkSQL") \ 10 | .getOrCreate() 11 | 12 | logger = Log4j(spark) 13 | 14 | surveyDF = spark.read \ 15 | .option("header", "true") \ 16 | .option("inferSchema", "true") \ 17 | .csv("data/sample.csv") 18 | 19 | surveyDF.createOrReplaceTempView("survey_tbl") 20 | countDF = spark.sql("select Country, count(1) as count from survey_tbl where Age<40 group by Country") 21 | 22 | countDF.show() 23 | -------------------------------------------------------------------------------- /00-HelloSparkSQL/data/sample.csv: -------------------------------------------------------------------------------- 1 | "Timestamp","Age","Gender","Country","state","self_employed","family_history","treatment","work_interfere","no_employees","remote_work","tech_company","benefits","care_options","wellness_program","seek_help","anonymity","leave","mental_health_consequence","phys_health_consequence","coworkers","supervisor","mental_health_interview","phys_health_interview","mental_vs_physical","obs_consequence","comments" 2 | 2014-08-27 11:29:31,37,"Female","United States","IL",NA,"No","Yes","Often","6-25","No","Yes","Yes","Not sure","No","Yes","Yes","Somewhat easy","No","No","Some of them","Yes","No","Maybe","Yes","No",NA 3 | 2014-08-27 11:29:37,44,"M","United States","IN",NA,"No","No","Rarely","More than 1000","No","No","Don't know","No","Don't know","Don't know","Don't know","Don't know","Maybe","No","No","No","No","No","Don't know","No",NA 4 | 2014-08-27 11:29:44,32,"Male","Canada",NA,NA,"No","No","Rarely","6-25","No","Yes","No","No","No","No","Don't know","Somewhat difficult","No","No","Yes","Yes","Yes","Yes","No","No",NA 5 | 2014-08-27 11:29:46,31,"Male","United Kingdom",NA,NA,"Yes","Yes","Often","26-100","No","Yes","No","Yes","No","No","No","Somewhat difficult","Yes","Yes","Some of them","No","Maybe","Maybe","No","Yes",NA 6 | 2014-08-27 11:30:22,31,"Male","United States","TX",NA,"No","No","Never","100-500","Yes","Yes","Yes","No","Don't know","Don't know","Don't know","Don't know","No","No","Some of them","Yes","Yes","Yes","Don't know","No",NA 7 | 2014-08-27 11:31:22,33,"Male","United States","TN",NA,"Yes","No","Sometimes","6-25","No","Yes","Yes","Not sure","No","Don't know","Don't know","Don't know","No","No","Yes","Yes","No","Maybe","Don't know","No",NA 8 | 2014-08-27 11:31:50,35,"Female","United States","MI",NA,"Yes","Yes","Sometimes","1-5","Yes","Yes","No","No","No","No","No","Somewhat difficult","Maybe","Maybe","Some of them","No","No","No","Don't know","No",NA 9 | 2014-08-27 11:32:05,39,"M","Canada",NA,NA,"No","No","Never","1-5","Yes","Yes","No","Yes","No","No","Yes","Don't know","No","No","No","No","No","No","No","No",NA 10 | 2014-08-27 11:32:39,42,"Female","United States","IL",NA,"Yes","Yes","Sometimes","100-500","No","Yes","Yes","Yes","No","No","No","Very difficult","Maybe","No","Yes","Yes","No","Maybe","No","No",NA -------------------------------------------------------------------------------- /00-HelloSparkSQL/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/00-HelloSparkSQL/lib/__init__.py -------------------------------------------------------------------------------- /00-HelloSparkSQL/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /00-HelloSparkSQL/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console, file 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define rolling file appender 15 | log4j.appender.file=org.apache.log4j.RollingFileAppender 16 | log4j.appender.file.File=${spark.yarn.app.container.log.dir}/${logfile.name}.log 17 | #define following in Java System 18 | # -Dlog4j.configuration=file:log4j.properties 19 | # -Dlogfile.name=hello-spark 20 | # -Dspark.yarn.app.container.log.dir=app-logs 21 | log4j.appender.file.ImmediateFlush=true 22 | log4j.appender.file.Append=false 23 | log4j.appender.file.MaxFileSize=500MB 24 | log4j.appender.file.MaxBackupIndex=2 25 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 26 | log4j.appender.file.layout.conversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 27 | 28 | # Recommendations from Spark template 29 | log4j.logger.org.apache.spark.repl.Main=WARN 30 | log4j.logger.org.spark_project.jetty=WARN 31 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 32 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 33 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 34 | log4j.logger.org.apache.parquet=ERROR 35 | log4j.logger.parquet=ERROR 36 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 37 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 38 | 39 | -------------------------------------------------------------------------------- /01-StreamingWC/StreamingWC.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import * 3 | 4 | from lib.logger import Log4j 5 | 6 | if __name__ == "__main__": 7 | spark = SparkSession \ 8 | .builder \ 9 | .appName("Streaming Word Count") \ 10 | .master("local[3]") \ 11 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 12 | .config("spark.sql.shuffle.partitions", 3) \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | lines_df = spark.readStream \ 18 | .format("socket") \ 19 | .option("host", "localhost") \ 20 | .option("port", "9999") \ 21 | .load() 22 | 23 | # lines_df.printSchema() 24 | 25 | # words_df = lines_df.select(explode(split("value", " ")).alias("word")) 26 | words_df = lines_df.select(expr("explode(split(value,' ')) as word")) 27 | counts_df = words_df.groupBy("word").count() 28 | 29 | word_count_query = counts_df.writeStream \ 30 | .format("console") \ 31 | .outputMode("complete") \ 32 | .option("checkpointLocation", "chk-point-dir") \ 33 | .start() 34 | 35 | logger.info("Listening to localhost:9999") 36 | word_count_query.awaitTermination() 37 | -------------------------------------------------------------------------------- /01-StreamingWC/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/01-StreamingWC/lib/__init__.py -------------------------------------------------------------------------------- /01-StreamingWC/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /01-StreamingWC/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /02-FileStreamDemo/FileStreamDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import expr 3 | 4 | from lib.logger import Log4j 5 | 6 | if __name__ == "__main__": 7 | spark = SparkSession \ 8 | .builder \ 9 | .appName("File Streaming Demo") \ 10 | .master("local[3]") \ 11 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 12 | .config("spark.sql.streaming.schemaInference", "true") \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | raw_df = spark.readStream \ 18 | .format("json") \ 19 | .option("path", "input") \ 20 | .option("maxFilesPerTrigger", 1) \ 21 | .load() 22 | 23 | explode_df = raw_df.selectExpr("InvoiceNumber", "CreatedTime", "StoreID", "PosID", 24 | "CustomerType", "PaymentMethod", "DeliveryType", "DeliveryAddress.City", 25 | "DeliveryAddress.State", 26 | "DeliveryAddress.PinCode", "explode(InvoiceLineItems) as LineItem") 27 | 28 | flattened_df = explode_df \ 29 | .withColumn("ItemCode", expr("LineItem.ItemCode")) \ 30 | .withColumn("ItemDescription", expr("LineItem.ItemDescription")) \ 31 | .withColumn("ItemPrice", expr("LineItem.ItemPrice")) \ 32 | .withColumn("ItemQty", expr("LineItem.ItemQty")) \ 33 | .withColumn("TotalValue", expr("LineItem.TotalValue")) \ 34 | .drop("LineItem") 35 | 36 | invoiceWriterQuery = flattened_df.writeStream \ 37 | .format("json") \ 38 | .queryName("Flattened Invoice Writer") \ 39 | .outputMode("append") \ 40 | .option("path", "output") \ 41 | .option("checkpointLocation", "chk-point-dir") \ 42 | .trigger(processingTime="1 minute") \ 43 | .start() 44 | 45 | logger.info("Flattened Invoice Writer started") 46 | invoiceWriterQuery.awaitTermination() 47 | -------------------------------------------------------------------------------- /02-FileStreamDemo/SampleData/Invoice-set1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/02-FileStreamDemo/SampleData/Invoice-set1.parquet -------------------------------------------------------------------------------- /02-FileStreamDemo/SampleData/Invoice-set2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/02-FileStreamDemo/SampleData/Invoice-set2.parquet -------------------------------------------------------------------------------- /02-FileStreamDemo/SampleData/Invoice-set3.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/02-FileStreamDemo/SampleData/Invoice-set3.parquet -------------------------------------------------------------------------------- /02-FileStreamDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/02-FileStreamDemo/lib/__init__.py -------------------------------------------------------------------------------- /02-FileStreamDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /02-FileStreamDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /03-KafkaStreamDemo/KafkaStreamDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType, IntegerType, ArrayType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("File Streaming Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | schema = StructType([ 18 | StructField("InvoiceNumber", StringType()), 19 | StructField("CreatedTime", LongType()), 20 | StructField("StoreID", StringType()), 21 | StructField("PosID", StringType()), 22 | StructField("CashierID", StringType()), 23 | StructField("CustomerType", StringType()), 24 | StructField("CustomerCardNo", StringType()), 25 | StructField("TotalAmount", DoubleType()), 26 | StructField("NumberOfItems", IntegerType()), 27 | StructField("PaymentMethod", StringType()), 28 | StructField("CGST", DoubleType()), 29 | StructField("SGST", DoubleType()), 30 | StructField("CESS", DoubleType()), 31 | StructField("DeliveryType", StringType()), 32 | StructField("DeliveryAddress", StructType([ 33 | StructField("AddressLine", StringType()), 34 | StructField("City", StringType()), 35 | StructField("State", StringType()), 36 | StructField("PinCode", StringType()), 37 | StructField("ContactNumber", StringType()) 38 | ])), 39 | StructField("InvoiceLineItems", ArrayType(StructType([ 40 | StructField("ItemCode", StringType()), 41 | StructField("ItemDescription", StringType()), 42 | StructField("ItemPrice", DoubleType()), 43 | StructField("ItemQty", IntegerType()), 44 | StructField("TotalValue", DoubleType()) 45 | ]))), 46 | ]) 47 | 48 | kafka_df = spark.readStream \ 49 | .format("kafka") \ 50 | .option("kafka.bootstrap.servers", "localhost:9092") \ 51 | .option("subscribe", "invoices") \ 52 | .option("startingOffsets", "earliest") \ 53 | .load() 54 | 55 | value_df = kafka_df.select(from_json(col("value").cast("string"), schema).alias("value")) 56 | 57 | explode_df = value_df.selectExpr("value.InvoiceNumber", "value.CreatedTime", "value.StoreID", 58 | "value.PosID", "value.CustomerType", "value.PaymentMethod", "value.DeliveryType", 59 | "value.DeliveryAddress.City", 60 | "value.DeliveryAddress.State", "value.DeliveryAddress.PinCode", 61 | "explode(value.InvoiceLineItems) as LineItem") 62 | 63 | flattened_df = explode_df \ 64 | .withColumn("ItemCode", expr("LineItem.ItemCode")) \ 65 | .withColumn("ItemDescription", expr("LineItem.ItemDescription")) \ 66 | .withColumn("ItemPrice", expr("LineItem.ItemPrice")) \ 67 | .withColumn("ItemQty", expr("LineItem.ItemQty")) \ 68 | .withColumn("TotalValue", expr("LineItem.TotalValue")) \ 69 | .drop("LineItem") 70 | 71 | invoice_writer_query = flattened_df.writeStream \ 72 | .format("json") \ 73 | .queryName("Flattened Invoice Writer") \ 74 | .outputMode("append") \ 75 | .option("path", "output") \ 76 | .option("checkpointLocation", "chk-point-dir") \ 77 | .trigger(processingTime="1 minute") \ 78 | .start() 79 | 80 | logger.info("Listening to Kafka") 81 | invoice_writer_query.awaitTermination() 82 | -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/data/Invoice.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "InvoiceNumber": 0, 4 | "CreatedTime": "0", 5 | "StoreID": "STR1534", 6 | "PosID": "POS364", 7 | "CashierID": "OAS898", 8 | "CustomerType": "NONPRIME", 9 | "CustomerCardNo": "3006055636", 10 | "TotalAmount": 0, 11 | "NumberOfItems": 0, 12 | "PaymentMethod": "CASH", 13 | "TaxableAmount": 0, 14 | "CGST": 0, 15 | "SGST": 0, 16 | "CESS": 0, 17 | "DeliveryType": "HOME-DELIVERY" 18 | }, 19 | { 20 | "InvoiceNumber": 0, 21 | "CreatedTime": "0", 22 | "StoreID": "STR1534", 23 | "PosID": "POS135", 24 | "CashierID": "OAS285", 25 | "CustomerType": "PRIME", 26 | "CustomerCardNo": "5582740626", 27 | "TotalAmount": 0, 28 | "NumberOfItems": 0, 29 | "PaymentMethod": "CARD", 30 | "TaxableAmount": 0, 31 | "CGST": 0, 32 | "SGST": 0, 33 | "CESS": 0, 34 | "DeliveryType": "TAKEAWAY" 35 | }, 36 | { 37 | "InvoiceNumber": 0, 38 | "CreatedTime": "0", 39 | "StoreID": "STR1534", 40 | "PosID": "POS934", 41 | "CashierID": "OAS544", 42 | "CustomerType": "PRIME", 43 | "CustomerCardNo": "5525406429", 44 | "TotalAmount": 0, 45 | "NumberOfItems": 0, 46 | "PaymentMethod": "CASH", 47 | "TaxableAmount": 0, 48 | "CGST": 0, 49 | "SGST": 0, 50 | "CESS": 0, 51 | "DeliveryType": "TAKEAWAY" 52 | }, 53 | { 54 | "InvoiceNumber": 0, 55 | "CreatedTime": "0", 56 | "StoreID": "STR1534", 57 | "PosID": "POS944", 58 | "CashierID": "OAS994", 59 | "CustomerType": "NONPRIME", 60 | "CustomerCardNo": "2968880358", 61 | "TotalAmount": 0, 62 | "NumberOfItems": 0, 63 | "PaymentMethod": "CASH", 64 | "TaxableAmount": 0, 65 | "CGST": 0, 66 | "SGST": 0, 67 | "CESS": 0, 68 | "DeliveryType": "HOME-DELIVERY" 69 | }, 70 | { 71 | "InvoiceNumber": 0, 72 | "CreatedTime": "0", 73 | "StoreID": "STR1534", 74 | "PosID": "POS151", 75 | "CashierID": "OAS177", 76 | "CustomerType": "NONPRIME", 77 | "CustomerCardNo": "7061658033", 78 | "TotalAmount": 0, 79 | "NumberOfItems": 0, 80 | "PaymentMethod": "CASH", 81 | "TaxableAmount": 0, 82 | "CGST": 0, 83 | "SGST": 0, 84 | "CESS": 0, 85 | "DeliveryType": "HOME-DELIVERY" 86 | }, 87 | { 88 | "InvoiceNumber": 0, 89 | "CreatedTime": "0", 90 | "StoreID": "STR1955", 91 | "PosID": "POS992", 92 | "CashierID": "OAS521", 93 | "CustomerType": "NONPRIME", 94 | "CustomerCardNo": "1482417862", 95 | "TotalAmount": 0, 96 | "NumberOfItems": 0, 97 | "PaymentMethod": "CASH", 98 | "TaxableAmount": 0, 99 | "CGST": 0, 100 | "SGST": 0, 101 | "CESS": 0, 102 | "DeliveryType": "TAKEAWAY" 103 | }, 104 | { 105 | "InvoiceNumber": 0, 106 | "CreatedTime": "0", 107 | "StoreID": "STR1955", 108 | "PosID": "POS324", 109 | "CashierID": "OAS183", 110 | "CustomerType": "NONPRIME", 111 | "CustomerCardNo": "3916555911", 112 | "TotalAmount": 0, 113 | "NumberOfItems": 0, 114 | "PaymentMethod": "CASH", 115 | "TaxableAmount": 0, 116 | "CGST": 0, 117 | "SGST": 0, 118 | "CESS": 0, 119 | "DeliveryType": "HOME-DELIVERY" 120 | }, 121 | { 122 | "InvoiceNumber": 0, 123 | "CreatedTime": "0", 124 | "StoreID": "STR1955", 125 | "PosID": "POS769", 126 | "CashierID": "OAS996", 127 | "CustomerType": "PRIME", 128 | "CustomerCardNo": "8154556937", 129 | "TotalAmount": 0, 130 | "NumberOfItems": 0, 131 | "PaymentMethod": "CARD", 132 | "TaxableAmount": 0, 133 | "CGST": 0, 134 | "SGST": 0, 135 | "CESS": 0, 136 | "DeliveryType": "HOME-DELIVERY" 137 | }, 138 | { 139 | "InvoiceNumber": 0, 140 | "CreatedTime": "0", 141 | "StoreID": "STR1955", 142 | "PosID": "POS837", 143 | "CashierID": "OAS861", 144 | "CustomerType": "NONPRIME", 145 | "CustomerCardNo": "7136581152", 146 | "TotalAmount": 0, 147 | "NumberOfItems": 0, 148 | "PaymentMethod": "CASH", 149 | "TaxableAmount": 0, 150 | "CGST": 0, 151 | "SGST": 0, 152 | "CESS": 0, 153 | "DeliveryType": "HOME-DELIVERY" 154 | }, 155 | { 156 | "InvoiceNumber": 0, 157 | "CreatedTime": "0", 158 | "StoreID": "STR1955", 159 | "PosID": "POS493", 160 | "CashierID": "OAS769", 161 | "CustomerType": "PRIME", 162 | "CustomerCardNo": "4405006483", 163 | "TotalAmount": 0, 164 | "NumberOfItems": 0, 165 | "PaymentMethod": "CARD", 166 | "TaxableAmount": 0, 167 | "CGST": 0, 168 | "SGST": 0, 169 | "CESS": 0, 170 | "DeliveryType": "TAKEAWAY" 171 | }, 172 | { 173 | "InvoiceNumber": 0, 174 | "CreatedTime": "0", 175 | "StoreID": "STR2629", 176 | "PosID": "POS768", 177 | "CashierID": "OAS282", 178 | "CustomerType": "NONPRIME", 179 | "CustomerCardNo": "1360645962", 180 | "TotalAmount": 0, 181 | "NumberOfItems": 0, 182 | "PaymentMethod": "CARD", 183 | "TaxableAmount": 0, 184 | "CGST": 0, 185 | "SGST": 0, 186 | "CESS": 0, 187 | "DeliveryType": "HOME-DELIVERY" 188 | }, 189 | { 190 | "InvoiceNumber": 0, 191 | "CreatedTime": "0", 192 | "StoreID": "STR2629", 193 | "PosID": "POS285", 194 | "CashierID": "OAS882", 195 | "CustomerType": "NONPRIME", 196 | "CustomerCardNo": "6419885098", 197 | "TotalAmount": 0, 198 | "NumberOfItems": 0, 199 | "PaymentMethod": "CASH", 200 | "TaxableAmount": 0, 201 | "CGST": 0, 202 | "SGST": 0, 203 | "CESS": 0, 204 | "DeliveryType": "TAKEAWAY" 205 | }, 206 | { 207 | "InvoiceNumber": 0, 208 | "CreatedTime": "0", 209 | "StoreID": "STR2629", 210 | "PosID": "POS172", 211 | "CashierID": "OAS622", 212 | "CustomerType": "NONPRIME", 213 | "CustomerCardNo": "7829975914", 214 | "TotalAmount": 0, 215 | "NumberOfItems": 0, 216 | "PaymentMethod": "CASH", 217 | "TaxableAmount": 0, 218 | "CGST": 0, 219 | "SGST": 0, 220 | "CESS": 0, 221 | "DeliveryType": "HOME-DELIVERY" 222 | }, 223 | { 224 | "InvoiceNumber": 0, 225 | "CreatedTime": "0", 226 | "StoreID": "STR2629", 227 | "PosID": "POS647", 228 | "CashierID": "OAS515", 229 | "CustomerType": "PRIME", 230 | "CustomerCardNo": "1053863739", 231 | "TotalAmount": 0, 232 | "NumberOfItems": 0, 233 | "PaymentMethod": "CASH", 234 | "TaxableAmount": 0, 235 | "CGST": 0, 236 | "SGST": 0, 237 | "CESS": 0, 238 | "DeliveryType": "TAKEAWAY" 239 | }, 240 | { 241 | "InvoiceNumber": 0, 242 | "CreatedTime": "0", 243 | "StoreID": "STR2629", 244 | "PosID": "POS253", 245 | "CashierID": "OAS737", 246 | "CustomerType": "NONPRIME", 247 | "CustomerCardNo": "2461788838", 248 | "TotalAmount": 0, 249 | "NumberOfItems": 0, 250 | "PaymentMethod": "CARD", 251 | "TaxableAmount": 0, 252 | "CGST": 0, 253 | "SGST": 0, 254 | "CESS": 0, 255 | "DeliveryType": "HOME-DELIVERY" 256 | }, 257 | { 258 | "InvoiceNumber": 0, 259 | "CreatedTime": "0", 260 | "StoreID": "STR2699", 261 | "PosID": "POS832", 262 | "CashierID": "OAS496", 263 | "CustomerType": "NONPRIME", 264 | "CustomerCardNo": "4552039054", 265 | "TotalAmount": 0, 266 | "NumberOfItems": 0, 267 | "PaymentMethod": "CASH", 268 | "TaxableAmount": 0, 269 | "CGST": 0, 270 | "SGST": 0, 271 | "CESS": 0, 272 | "DeliveryType": "HOME-DELIVERY" 273 | }, 274 | { 275 | "InvoiceNumber": 0, 276 | "CreatedTime": "0", 277 | "StoreID": "STR2699", 278 | "PosID": "POS792", 279 | "CashierID": "OAS152", 280 | "CustomerType": "PRIME", 281 | "CustomerCardNo": "7475384243", 282 | "TotalAmount": 0, 283 | "NumberOfItems": 0, 284 | "PaymentMethod": "CASH", 285 | "TaxableAmount": 0, 286 | "CGST": 0, 287 | "SGST": 0, 288 | "CESS": 0, 289 | "DeliveryType": "HOME-DELIVERY" 290 | }, 291 | { 292 | "InvoiceNumber": 0, 293 | "CreatedTime": "0", 294 | "StoreID": "STR2699", 295 | "PosID": "POS773", 296 | "CashierID": "OAS568", 297 | "CustomerType": "NONPRIME", 298 | "CustomerCardNo": "9327106277", 299 | "TotalAmount": 0, 300 | "NumberOfItems": 0, 301 | "PaymentMethod": "CASH", 302 | "TaxableAmount": 0, 303 | "CGST": 0, 304 | "SGST": 0, 305 | "CESS": 0, 306 | "DeliveryType": "TAKEAWAY" 307 | }, 308 | { 309 | "InvoiceNumber": 0, 310 | "CreatedTime": "0", 311 | "StoreID": "STR2699", 312 | "PosID": "POS779", 313 | "CashierID": "OAS161", 314 | "CustomerType": "PRIME", 315 | "CustomerCardNo": "6797767929", 316 | "TotalAmount": 0, 317 | "NumberOfItems": 0, 318 | "PaymentMethod": "CASH", 319 | "TaxableAmount": 0, 320 | "CGST": 0, 321 | "SGST": 0, 322 | "CESS": 0, 323 | "DeliveryType": "HOME-DELIVERY" 324 | }, 325 | { 326 | "InvoiceNumber": 0, 327 | "CreatedTime": "0", 328 | "StoreID": "STR2699", 329 | "PosID": "POS566", 330 | "CashierID": "OAS318", 331 | "CustomerType": "PRIME", 332 | "CustomerCardNo": "7543202868", 333 | "TotalAmount": 0, 334 | "NumberOfItems": 0, 335 | "PaymentMethod": "CARD", 336 | "TaxableAmount": 0, 337 | "CGST": 0, 338 | "SGST": 0, 339 | "CESS": 0, 340 | "DeliveryType": "HOME-DELIVERY" 341 | }, 342 | { 343 | "InvoiceNumber": 0, 344 | "CreatedTime": "0", 345 | "StoreID": "STR2952", 346 | "PosID": "POS152", 347 | "CashierID": "OAS329", 348 | "CustomerType": "PRIME", 349 | "CustomerCardNo": "3027514652", 350 | "TotalAmount": 0, 351 | "NumberOfItems": 0, 352 | "PaymentMethod": "CASH", 353 | "TaxableAmount": 0, 354 | "CGST": 0, 355 | "SGST": 0, 356 | "CESS": 0, 357 | "DeliveryType": "TAKEAWAY" 358 | }, 359 | { 360 | "InvoiceNumber": 0, 361 | "CreatedTime": "0", 362 | "StoreID": "STR2952", 363 | "PosID": "POS351", 364 | "CashierID": "OAS841", 365 | "CustomerType": "NONPRIME", 366 | "CustomerCardNo": "2024774071", 367 | "TotalAmount": 0, 368 | "NumberOfItems": 0, 369 | "PaymentMethod": "CASH", 370 | "TaxableAmount": 0, 371 | "CGST": 0, 372 | "SGST": 0, 373 | "CESS": 0, 374 | "DeliveryType": "HOME-DELIVERY" 375 | }, 376 | { 377 | "InvoiceNumber": 0, 378 | "CreatedTime": "0", 379 | "StoreID": "STR2952", 380 | "PosID": "POS477", 381 | "CashierID": "OAS259", 382 | "CustomerType": "NONPRIME", 383 | "CustomerCardNo": "3425633108", 384 | "TotalAmount": 0, 385 | "NumberOfItems": 0, 386 | "PaymentMethod": "CASH", 387 | "TaxableAmount": 0, 388 | "CGST": 0, 389 | "SGST": 0, 390 | "CESS": 0, 391 | "DeliveryType": "TAKEAWAY" 392 | }, 393 | { 394 | "InvoiceNumber": 0, 395 | "CreatedTime": "0", 396 | "StoreID": "STR2952", 397 | "PosID": "POS471", 398 | "CashierID": "OAS936", 399 | "CustomerType": "NONPRIME", 400 | "CustomerCardNo": "9741556539", 401 | "TotalAmount": 0, 402 | "NumberOfItems": 0, 403 | "PaymentMethod": "CASH", 404 | "TaxableAmount": 0, 405 | "CGST": 0, 406 | "SGST": 0, 407 | "CESS": 0, 408 | "DeliveryType": "TAKEAWAY" 409 | }, 410 | { 411 | "InvoiceNumber": 0, 412 | "CreatedTime": "0", 413 | "StoreID": "STR2952", 414 | "PosID": "POS663", 415 | "CashierID": "OAS444", 416 | "CustomerType": "NONPRIME", 417 | "CustomerCardNo": "7928716562", 418 | "TotalAmount": 0, 419 | "NumberOfItems": 0, 420 | "PaymentMethod": "CASH", 421 | "TaxableAmount": 0, 422 | "CGST": 0, 423 | "SGST": 0, 424 | "CESS": 0, 425 | "DeliveryType": "HOME-DELIVERY" 426 | }, 427 | { 428 | "InvoiceNumber": 0, 429 | "CreatedTime": "0", 430 | "StoreID": "STR3781", 431 | "PosID": "POS843", 432 | "CashierID": "OAS685", 433 | "CustomerType": "NONPRIME", 434 | "CustomerCardNo": "1191112820", 435 | "TotalAmount": 0, 436 | "NumberOfItems": 0, 437 | "PaymentMethod": "CARD", 438 | "TaxableAmount": 0, 439 | "CGST": 0, 440 | "SGST": 0, 441 | "CESS": 0, 442 | "DeliveryType": "HOME-DELIVERY" 443 | }, 444 | { 445 | "InvoiceNumber": 0, 446 | "CreatedTime": "0", 447 | "StoreID": "STR3781", 448 | "PosID": "POS444", 449 | "CashierID": "OAS935", 450 | "CustomerType": "NONPRIME", 451 | "CustomerCardNo": "9223558893", 452 | "TotalAmount": 0, 453 | "NumberOfItems": 0, 454 | "PaymentMethod": "CASH", 455 | "TaxableAmount": 0, 456 | "CGST": 0, 457 | "SGST": 0, 458 | "CESS": 0, 459 | "DeliveryType": "TAKEAWAY" 460 | }, 461 | { 462 | "InvoiceNumber": 0, 463 | "CreatedTime": "0", 464 | "StoreID": "STR3781", 465 | "PosID": "POS129", 466 | "CashierID": "OAS311", 467 | "CustomerType": "PRIME", 468 | "CustomerCardNo": "4692642935", 469 | "TotalAmount": 0, 470 | "NumberOfItems": 0, 471 | "PaymentMethod": "CASH", 472 | "TaxableAmount": 0, 473 | "CGST": 0, 474 | "SGST": 0, 475 | "CESS": 0, 476 | "DeliveryType": "HOME-DELIVERY" 477 | }, 478 | { 479 | "InvoiceNumber": 0, 480 | "CreatedTime": "0", 481 | "StoreID": "STR3781", 482 | "PosID": "POS494", 483 | "CashierID": "OAS377", 484 | "CustomerType": "PRIME", 485 | "CustomerCardNo": "7892154732", 486 | "TotalAmount": 0, 487 | "NumberOfItems": 0, 488 | "PaymentMethod": "CARD", 489 | "TaxableAmount": 0, 490 | "CGST": 0, 491 | "SGST": 0, 492 | "CESS": 0, 493 | "DeliveryType": "HOME-DELIVERY" 494 | }, 495 | { 496 | "InvoiceNumber": 0, 497 | "CreatedTime": "0", 498 | "StoreID": "STR3781", 499 | "PosID": "POS978", 500 | "CashierID": "OAS981", 501 | "CustomerType": "NONPRIME", 502 | "CustomerCardNo": "6244229107", 503 | "TotalAmount": 0, 504 | "NumberOfItems": 0, 505 | "PaymentMethod": "CARD", 506 | "TaxableAmount": 0, 507 | "CGST": 0, 508 | "SGST": 0, 509 | "CESS": 0, 510 | "DeliveryType": "TAKEAWAY" 511 | }, 512 | { 513 | "InvoiceNumber": 0, 514 | "CreatedTime": "0", 515 | "StoreID": "STR4899", 516 | "PosID": "POS584", 517 | "CashierID": "OAS452", 518 | "CustomerType": "NONPRIME", 519 | "CustomerCardNo": "2502121621", 520 | "TotalAmount": 0, 521 | "NumberOfItems": 0, 522 | "PaymentMethod": "CASH", 523 | "TaxableAmount": 0, 524 | "CGST": 0, 525 | "SGST": 0, 526 | "CESS": 0, 527 | "DeliveryType": "HOME-DELIVERY" 528 | }, 529 | { 530 | "InvoiceNumber": 0, 531 | "CreatedTime": "0", 532 | "StoreID": "STR4899", 533 | "PosID": "POS447", 534 | "CashierID": "OAS729", 535 | "CustomerType": "NONPRIME", 536 | "CustomerCardNo": "5576072500", 537 | "TotalAmount": 0, 538 | "NumberOfItems": 0, 539 | "PaymentMethod": "CARD", 540 | "TaxableAmount": 0, 541 | "CGST": 0, 542 | "SGST": 0, 543 | "CESS": 0, 544 | "DeliveryType": "TAKEAWAY" 545 | }, 546 | { 547 | "InvoiceNumber": 0, 548 | "CreatedTime": "0", 549 | "StoreID": "STR4899", 550 | "PosID": "POS441", 551 | "CashierID": "OAS487", 552 | "CustomerType": "NONPRIME", 553 | "CustomerCardNo": "4227803954", 554 | "TotalAmount": 0, 555 | "NumberOfItems": 0, 556 | "PaymentMethod": "CASH", 557 | "TaxableAmount": 0, 558 | "CGST": 0, 559 | "SGST": 0, 560 | "CESS": 0, 561 | "DeliveryType": "TAKEAWAY" 562 | }, 563 | { 564 | "InvoiceNumber": 0, 565 | "CreatedTime": "0", 566 | "StoreID": "STR4899", 567 | "PosID": "POS799", 568 | "CashierID": "OAS145", 569 | "CustomerType": "PRIME", 570 | "CustomerCardNo": "2325763742", 571 | "TotalAmount": 0, 572 | "NumberOfItems": 0, 573 | "PaymentMethod": "CASH", 574 | "TaxableAmount": 0, 575 | "CGST": 0, 576 | "SGST": 0, 577 | "CESS": 0, 578 | "DeliveryType": "HOME-DELIVERY" 579 | }, 580 | { 581 | "InvoiceNumber": 0, 582 | "CreatedTime": "0", 583 | "StoreID": "STR4899", 584 | "PosID": "POS954", 585 | "CashierID": "OAS151", 586 | "CustomerType": "NONPRIME", 587 | "CustomerCardNo": "3716602332", 588 | "TotalAmount": 0, 589 | "NumberOfItems": 0, 590 | "PaymentMethod": "CASH", 591 | "TaxableAmount": 0, 592 | "CGST": 0, 593 | "SGST": 0, 594 | "CESS": 0, 595 | "DeliveryType": "HOME-DELIVERY" 596 | }, 597 | { 598 | "InvoiceNumber": 0, 599 | "CreatedTime": "0", 600 | "StoreID": "STR5494", 601 | "PosID": "POS384", 602 | "CashierID": "OAS398", 603 | "CustomerType": "NONPRIME", 604 | "CustomerCardNo": "6048272250", 605 | "TotalAmount": 0, 606 | "NumberOfItems": 0, 607 | "PaymentMethod": "CARD", 608 | "TaxableAmount": 0, 609 | "CGST": 0, 610 | "SGST": 0, 611 | "CESS": 0, 612 | "DeliveryType": "HOME-DELIVERY" 613 | }, 614 | { 615 | "InvoiceNumber": 0, 616 | "CreatedTime": "0", 617 | "StoreID": "STR5494", 618 | "PosID": "POS121", 619 | "CashierID": "OAS237", 620 | "CustomerType": "PRIME", 621 | "CustomerCardNo": "9509332022", 622 | "TotalAmount": 0, 623 | "NumberOfItems": 0, 624 | "PaymentMethod": "CARD", 625 | "TaxableAmount": 0, 626 | "CGST": 0, 627 | "SGST": 0, 628 | "CESS": 0, 629 | "DeliveryType": "TAKEAWAY" 630 | }, 631 | { 632 | "InvoiceNumber": 0, 633 | "CreatedTime": "0", 634 | "StoreID": "STR5494", 635 | "PosID": "POS353", 636 | "CashierID": "OAS969", 637 | "CustomerType": "NONPRIME", 638 | "CustomerCardNo": "8189067868", 639 | "TotalAmount": 0, 640 | "NumberOfItems": 0, 641 | "PaymentMethod": "CASH", 642 | "TaxableAmount": 0, 643 | "CGST": 0, 644 | "SGST": 0, 645 | "CESS": 0, 646 | "DeliveryType": "TAKEAWAY" 647 | }, 648 | { 649 | "InvoiceNumber": 0, 650 | "CreatedTime": "0", 651 | "StoreID": "STR5494", 652 | "PosID": "POS759", 653 | "CashierID": "OAS796", 654 | "CustomerType": "NONPRIME", 655 | "CustomerCardNo": "4937932830", 656 | "TotalAmount": 0, 657 | "NumberOfItems": 0, 658 | "PaymentMethod": "CARD", 659 | "TaxableAmount": 0, 660 | "CGST": 0, 661 | "SGST": 0, 662 | "CESS": 0, 663 | "DeliveryType": "HOME-DELIVERY" 664 | }, 665 | { 666 | "InvoiceNumber": 0, 667 | "CreatedTime": "0", 668 | "StoreID": "STR5494", 669 | "PosID": "POS722", 670 | "CashierID": "OAS576", 671 | "CustomerType": "PRIME", 672 | "CustomerCardNo": "9724245321", 673 | "TotalAmount": 0, 674 | "NumberOfItems": 0, 675 | "PaymentMethod": "CASH", 676 | "TaxableAmount": 0, 677 | "CGST": 0, 678 | "SGST": 0, 679 | "CESS": 0, 680 | "DeliveryType": "HOME-DELIVERY" 681 | }, 682 | { 683 | "InvoiceNumber": 0, 684 | "CreatedTime": "0", 685 | "StoreID": "STR5646", 686 | "PosID": "POS968", 687 | "CashierID": "OAS689", 688 | "CustomerType": "PRIME", 689 | "CustomerCardNo": "2220404140", 690 | "TotalAmount": 0, 691 | "NumberOfItems": 0, 692 | "PaymentMethod": "CASH", 693 | "TaxableAmount": 0, 694 | "CGST": 0, 695 | "SGST": 0, 696 | "CESS": 0, 697 | "DeliveryType": "HOME-DELIVERY" 698 | }, 699 | { 700 | "InvoiceNumber": 0, 701 | "CreatedTime": "0", 702 | "StoreID": "STR5646", 703 | "PosID": "POS639", 704 | "CashierID": "OAS167", 705 | "CustomerType": "PRIME", 706 | "CustomerCardNo": "9711257112", 707 | "TotalAmount": 0, 708 | "NumberOfItems": 0, 709 | "PaymentMethod": "CARD", 710 | "TaxableAmount": 0, 711 | "CGST": 0, 712 | "SGST": 0, 713 | "CESS": 0, 714 | "DeliveryType": "HOME-DELIVERY" 715 | }, 716 | { 717 | "InvoiceNumber": 0, 718 | "CreatedTime": "0", 719 | "StoreID": "STR5646", 720 | "PosID": "POS624", 721 | "CashierID": "OAS196", 722 | "CustomerType": "PRIME", 723 | "CustomerCardNo": "8109010950", 724 | "TotalAmount": 0, 725 | "NumberOfItems": 0, 726 | "PaymentMethod": "CASH", 727 | "TaxableAmount": 0, 728 | "CGST": 0, 729 | "SGST": 0, 730 | "CESS": 0, 731 | "DeliveryType": "TAKEAWAY" 732 | }, 733 | { 734 | "InvoiceNumber": 0, 735 | "CreatedTime": "0", 736 | "StoreID": "STR5646", 737 | "PosID": "POS212", 738 | "CashierID": "OAS284", 739 | "CustomerType": "NONPRIME", 740 | "CustomerCardNo": "4226427254", 741 | "TotalAmount": 0, 742 | "NumberOfItems": 0, 743 | "PaymentMethod": "CARD", 744 | "TaxableAmount": 0, 745 | "CGST": 0, 746 | "SGST": 0, 747 | "CESS": 0, 748 | "DeliveryType": "HOME-DELIVERY" 749 | }, 750 | { 751 | "InvoiceNumber": 0, 752 | "CreatedTime": "0", 753 | "StoreID": "STR5646", 754 | "PosID": "POS543", 755 | "CashierID": "OAS921", 756 | "CustomerType": "NONPRIME", 757 | "CustomerCardNo": "5947487902", 758 | "TotalAmount": 0, 759 | "NumberOfItems": 0, 760 | "PaymentMethod": "CARD", 761 | "TaxableAmount": 0, 762 | "CGST": 0, 763 | "SGST": 0, 764 | "CESS": 0, 765 | "DeliveryType": "HOME-DELIVERY" 766 | }, 767 | { 768 | "InvoiceNumber": 0, 769 | "CreatedTime": "0", 770 | "StoreID": "STR5864", 771 | "PosID": "POS132", 772 | "CashierID": "OAS386", 773 | "CustomerType": "NONPRIME", 774 | "CustomerCardNo": "6909538460", 775 | "TotalAmount": 0, 776 | "NumberOfItems": 0, 777 | "PaymentMethod": "CASH", 778 | "TaxableAmount": 0, 779 | "CGST": 0, 780 | "SGST": 0, 781 | "CESS": 0, 782 | "DeliveryType": "HOME-DELIVERY" 783 | }, 784 | { 785 | "InvoiceNumber": 0, 786 | "CreatedTime": "0", 787 | "StoreID": "STR5864", 788 | "PosID": "POS945", 789 | "CashierID": "OAS649", 790 | "CustomerType": "PRIME", 791 | "CustomerCardNo": "5305473526", 792 | "TotalAmount": 0, 793 | "NumberOfItems": 0, 794 | "PaymentMethod": "CARD", 795 | "TaxableAmount": 0, 796 | "CGST": 0, 797 | "SGST": 0, 798 | "CESS": 0, 799 | "DeliveryType": "HOME-DELIVERY" 800 | }, 801 | { 802 | "InvoiceNumber": 0, 803 | "CreatedTime": "0", 804 | "StoreID": "STR5864", 805 | "PosID": "POS465", 806 | "CashierID": "OAS582", 807 | "CustomerType": "NONPRIME", 808 | "CustomerCardNo": "1211843745", 809 | "TotalAmount": 0, 810 | "NumberOfItems": 0, 811 | "PaymentMethod": "CARD", 812 | "TaxableAmount": 0, 813 | "CGST": 0, 814 | "SGST": 0, 815 | "CESS": 0, 816 | "DeliveryType": "TAKEAWAY" 817 | }, 818 | { 819 | "InvoiceNumber": 0, 820 | "CreatedTime": "0", 821 | "StoreID": "STR5864", 822 | "PosID": "POS872", 823 | "CashierID": "OAS287", 824 | "CustomerType": "PRIME", 825 | "CustomerCardNo": "7589671731", 826 | "TotalAmount": 0, 827 | "NumberOfItems": 0, 828 | "PaymentMethod": "CASH", 829 | "TaxableAmount": 0, 830 | "CGST": 0, 831 | "SGST": 0, 832 | "CESS": 0, 833 | "DeliveryType": "HOME-DELIVERY" 834 | }, 835 | { 836 | "InvoiceNumber": 0, 837 | "CreatedTime": "0", 838 | "StoreID": "STR5864", 839 | "PosID": "POS878", 840 | "CashierID": "OAS219", 841 | "CustomerType": "NONPRIME", 842 | "CustomerCardNo": "9728415172", 843 | "TotalAmount": 0, 844 | "NumberOfItems": 0, 845 | "PaymentMethod": "CARD", 846 | "TaxableAmount": 0, 847 | "CGST": 0, 848 | "SGST": 0, 849 | "CESS": 0, 850 | "DeliveryType": "TAKEAWAY" 851 | }, 852 | { 853 | "InvoiceNumber": 0, 854 | "CreatedTime": "0", 855 | "StoreID": "STR6162", 856 | "PosID": "POS835", 857 | "CashierID": "OAS583", 858 | "CustomerType": "NONPRIME", 859 | "CustomerCardNo": "5611417583", 860 | "TotalAmount": 0, 861 | "NumberOfItems": 0, 862 | "PaymentMethod": "CARD", 863 | "TaxableAmount": 0, 864 | "CGST": 0, 865 | "SGST": 0, 866 | "CESS": 0, 867 | "DeliveryType": "TAKEAWAY" 868 | }, 869 | { 870 | "InvoiceNumber": 0, 871 | "CreatedTime": "0", 872 | "StoreID": "STR6162", 873 | "PosID": "POS279", 874 | "CashierID": "OAS964", 875 | "CustomerType": "PRIME", 876 | "CustomerCardNo": "2262471989", 877 | "TotalAmount": 0, 878 | "NumberOfItems": 0, 879 | "PaymentMethod": "CASH", 880 | "TaxableAmount": 0, 881 | "CGST": 0, 882 | "SGST": 0, 883 | "CESS": 0, 884 | "DeliveryType": "HOME-DELIVERY" 885 | }, 886 | { 887 | "InvoiceNumber": 0, 888 | "CreatedTime": "0", 889 | "StoreID": "STR6162", 890 | "PosID": "POS987", 891 | "CashierID": "OAS795", 892 | "CustomerType": "NONPRIME", 893 | "CustomerCardNo": "8071065242", 894 | "TotalAmount": 0, 895 | "NumberOfItems": 0, 896 | "PaymentMethod": "CASH", 897 | "TaxableAmount": 0, 898 | "CGST": 0, 899 | "SGST": 0, 900 | "CESS": 0, 901 | "DeliveryType": "TAKEAWAY" 902 | }, 903 | { 904 | "InvoiceNumber": 0, 905 | "CreatedTime": "0", 906 | "StoreID": "STR6162", 907 | "PosID": "POS868", 908 | "CashierID": "OAS172", 909 | "CustomerType": "NONPRIME", 910 | "CustomerCardNo": "9282685191", 911 | "TotalAmount": 0, 912 | "NumberOfItems": 0, 913 | "PaymentMethod": "CARD", 914 | "TaxableAmount": 0, 915 | "CGST": 0, 916 | "SGST": 0, 917 | "CESS": 0, 918 | "DeliveryType": "HOME-DELIVERY" 919 | }, 920 | { 921 | "InvoiceNumber": 0, 922 | "CreatedTime": "0", 923 | "StoreID": "STR6162", 924 | "PosID": "POS431", 925 | "CashierID": "OAS139", 926 | "CustomerType": "NONPRIME", 927 | "CustomerCardNo": "8651346374", 928 | "TotalAmount": 0, 929 | "NumberOfItems": 0, 930 | "PaymentMethod": "CARD", 931 | "TaxableAmount": 0, 932 | "CGST": 0, 933 | "SGST": 0, 934 | "CESS": 0, 935 | "DeliveryType": "HOME-DELIVERY" 936 | }, 937 | { 938 | "InvoiceNumber": 0, 939 | "CreatedTime": "0", 940 | "StoreID": "STR6347", 941 | "PosID": "POS622", 942 | "CashierID": "OAS793", 943 | "CustomerType": "PRIME", 944 | "CustomerCardNo": "5697125813", 945 | "TotalAmount": 0, 946 | "NumberOfItems": 0, 947 | "PaymentMethod": "CARD", 948 | "TaxableAmount": 0, 949 | "CGST": 0, 950 | "SGST": 0, 951 | "CESS": 0, 952 | "DeliveryType": "HOME-DELIVERY" 953 | }, 954 | { 955 | "InvoiceNumber": 0, 956 | "CreatedTime": "0", 957 | "StoreID": "STR6347", 958 | "PosID": "POS927", 959 | "CashierID": "OAS186", 960 | "CustomerType": "NONPRIME", 961 | "CustomerCardNo": "3996109634", 962 | "TotalAmount": 0, 963 | "NumberOfItems": 0, 964 | "PaymentMethod": "CARD", 965 | "TaxableAmount": 0, 966 | "CGST": 0, 967 | "SGST": 0, 968 | "CESS": 0, 969 | "DeliveryType": "HOME-DELIVERY" 970 | }, 971 | { 972 | "InvoiceNumber": 0, 973 | "CreatedTime": "0", 974 | "StoreID": "STR6347", 975 | "PosID": "POS994", 976 | "CashierID": "OAS274", 977 | "CustomerType": "PRIME", 978 | "CustomerCardNo": "6296964163", 979 | "TotalAmount": 0, 980 | "NumberOfItems": 0, 981 | "PaymentMethod": "CASH", 982 | "TaxableAmount": 0, 983 | "CGST": 0, 984 | "SGST": 0, 985 | "CESS": 0, 986 | "DeliveryType": "TAKEAWAY" 987 | }, 988 | { 989 | "InvoiceNumber": 0, 990 | "CreatedTime": "0", 991 | "StoreID": "STR6347", 992 | "PosID": "POS333", 993 | "CashierID": "OAS697", 994 | "CustomerType": "NONPRIME", 995 | "CustomerCardNo": "9740604930", 996 | "TotalAmount": 0, 997 | "NumberOfItems": 0, 998 | "PaymentMethod": "CARD", 999 | "TaxableAmount": 0, 1000 | "CGST": 0, 1001 | "SGST": 0, 1002 | "CESS": 0, 1003 | "DeliveryType": "HOME-DELIVERY" 1004 | }, 1005 | { 1006 | "InvoiceNumber": 0, 1007 | "CreatedTime": "0", 1008 | "StoreID": "STR6347", 1009 | "PosID": "POS856", 1010 | "CashierID": "OAS128", 1011 | "CustomerType": "NONPRIME", 1012 | "CustomerCardNo": "5724653917", 1013 | "TotalAmount": 0, 1014 | "NumberOfItems": 0, 1015 | "PaymentMethod": "CASH", 1016 | "TaxableAmount": 0, 1017 | "CGST": 0, 1018 | "SGST": 0, 1019 | "CESS": 0, 1020 | "DeliveryType": "TAKEAWAY" 1021 | }, 1022 | { 1023 | "InvoiceNumber": 0, 1024 | "CreatedTime": "0", 1025 | "StoreID": "STR6382", 1026 | "PosID": "POS661", 1027 | "CashierID": "OAS597", 1028 | "CustomerType": "PRIME", 1029 | "CustomerCardNo": "7115250228", 1030 | "TotalAmount": 0, 1031 | "NumberOfItems": 0, 1032 | "PaymentMethod": "CASH", 1033 | "TaxableAmount": 0, 1034 | "CGST": 0, 1035 | "SGST": 0, 1036 | "CESS": 0, 1037 | "DeliveryType": "TAKEAWAY" 1038 | }, 1039 | { 1040 | "InvoiceNumber": 0, 1041 | "CreatedTime": "0", 1042 | "StoreID": "STR6382", 1043 | "PosID": "POS145", 1044 | "CashierID": "OAS872", 1045 | "CustomerType": "PRIME", 1046 | "CustomerCardNo": "6733153948", 1047 | "TotalAmount": 0, 1048 | "NumberOfItems": 0, 1049 | "PaymentMethod": "CARD", 1050 | "TaxableAmount": 0, 1051 | "CGST": 0, 1052 | "SGST": 0, 1053 | "CESS": 0, 1054 | "DeliveryType": "HOME-DELIVERY" 1055 | }, 1056 | { 1057 | "InvoiceNumber": 0, 1058 | "CreatedTime": "0", 1059 | "StoreID": "STR6382", 1060 | "PosID": "POS265", 1061 | "CashierID": "OAS328", 1062 | "CustomerType": "PRIME", 1063 | "CustomerCardNo": "7908234230", 1064 | "TotalAmount": 0, 1065 | "NumberOfItems": 0, 1066 | "PaymentMethod": "CASH", 1067 | "TaxableAmount": 0, 1068 | "CGST": 0, 1069 | "SGST": 0, 1070 | "CESS": 0, 1071 | "DeliveryType": "HOME-DELIVERY" 1072 | }, 1073 | { 1074 | "InvoiceNumber": 0, 1075 | "CreatedTime": "0", 1076 | "StoreID": "STR6382", 1077 | "PosID": "POS968", 1078 | "CashierID": "OAS687", 1079 | "CustomerType": "PRIME", 1080 | "CustomerCardNo": "4148565252", 1081 | "TotalAmount": 0, 1082 | "NumberOfItems": 0, 1083 | "PaymentMethod": "CASH", 1084 | "TaxableAmount": 0, 1085 | "CGST": 0, 1086 | "SGST": 0, 1087 | "CESS": 0, 1088 | "DeliveryType": "HOME-DELIVERY" 1089 | }, 1090 | { 1091 | "InvoiceNumber": 0, 1092 | "CreatedTime": "0", 1093 | "StoreID": "STR6382", 1094 | "PosID": "POS851", 1095 | "CashierID": "OAS351", 1096 | "CustomerType": "NONPRIME", 1097 | "CustomerCardNo": "8110266676", 1098 | "TotalAmount": 0, 1099 | "NumberOfItems": 0, 1100 | "PaymentMethod": "CARD", 1101 | "TaxableAmount": 0, 1102 | "CGST": 0, 1103 | "SGST": 0, 1104 | "CESS": 0, 1105 | "DeliveryType": "TAKEAWAY" 1106 | }, 1107 | { 1108 | "InvoiceNumber": 0, 1109 | "CreatedTime": "0", 1110 | "StoreID": "STR7188", 1111 | "PosID": "POS682", 1112 | "CashierID": "OAS394", 1113 | "CustomerType": "NONPRIME", 1114 | "CustomerCardNo": "1489226224", 1115 | "TotalAmount": 0, 1116 | "NumberOfItems": 0, 1117 | "PaymentMethod": "CASH", 1118 | "TaxableAmount": 0, 1119 | "CGST": 0, 1120 | "SGST": 0, 1121 | "CESS": 0, 1122 | "DeliveryType": "TAKEAWAY" 1123 | }, 1124 | { 1125 | "InvoiceNumber": 0, 1126 | "CreatedTime": "0", 1127 | "StoreID": "STR7188", 1128 | "PosID": "POS198", 1129 | "CashierID": "OAS199", 1130 | "CustomerType": "PRIME", 1131 | "CustomerCardNo": "5733357005", 1132 | "TotalAmount": 0, 1133 | "NumberOfItems": 0, 1134 | "PaymentMethod": "CARD", 1135 | "TaxableAmount": 0, 1136 | "CGST": 0, 1137 | "SGST": 0, 1138 | "CESS": 0, 1139 | "DeliveryType": "HOME-DELIVERY" 1140 | }, 1141 | { 1142 | "InvoiceNumber": 0, 1143 | "CreatedTime": "0", 1144 | "StoreID": "STR7188", 1145 | "PosID": "POS956", 1146 | "CashierID": "OAS134", 1147 | "CustomerType": "PRIME", 1148 | "CustomerCardNo": "4629185211", 1149 | "TotalAmount": 0, 1150 | "NumberOfItems": 0, 1151 | "PaymentMethod": "CARD", 1152 | "TaxableAmount": 0, 1153 | "CGST": 0, 1154 | "SGST": 0, 1155 | "CESS": 0, 1156 | "DeliveryType": "TAKEAWAY" 1157 | }, 1158 | { 1159 | "InvoiceNumber": 0, 1160 | "CreatedTime": "0", 1161 | "StoreID": "STR7188", 1162 | "PosID": "POS664", 1163 | "CashierID": "OAS971", 1164 | "CustomerType": "PRIME", 1165 | "CustomerCardNo": "9316477281", 1166 | "TotalAmount": 0, 1167 | "NumberOfItems": 0, 1168 | "PaymentMethod": "CARD", 1169 | "TaxableAmount": 0, 1170 | "CGST": 0, 1171 | "SGST": 0, 1172 | "CESS": 0, 1173 | "DeliveryType": "HOME-DELIVERY" 1174 | }, 1175 | { 1176 | "InvoiceNumber": 0, 1177 | "CreatedTime": "0", 1178 | "StoreID": "STR7188", 1179 | "PosID": "POS825", 1180 | "CashierID": "OAS329", 1181 | "CustomerType": "PRIME", 1182 | "CustomerCardNo": "7051101351", 1183 | "TotalAmount": 0, 1184 | "NumberOfItems": 0, 1185 | "PaymentMethod": "CASH", 1186 | "TaxableAmount": 0, 1187 | "CGST": 0, 1188 | "SGST": 0, 1189 | "CESS": 0, 1190 | "DeliveryType": "HOME-DELIVERY" 1191 | }, 1192 | { 1193 | "InvoiceNumber": 0, 1194 | "CreatedTime": "0", 1195 | "StoreID": "STR7255", 1196 | "PosID": "POS626", 1197 | "CashierID": "OAS437", 1198 | "CustomerType": "PRIME", 1199 | "CustomerCardNo": "2602353115", 1200 | "TotalAmount": 0, 1201 | "NumberOfItems": 0, 1202 | "PaymentMethod": "CARD", 1203 | "TaxableAmount": 0, 1204 | "CGST": 0, 1205 | "SGST": 0, 1206 | "CESS": 0, 1207 | "DeliveryType": "TAKEAWAY" 1208 | }, 1209 | { 1210 | "InvoiceNumber": 0, 1211 | "CreatedTime": "0", 1212 | "StoreID": "STR7255", 1213 | "PosID": "POS115", 1214 | "CashierID": "OAS634", 1215 | "CustomerType": "PRIME", 1216 | "CustomerCardNo": "1682244797", 1217 | "TotalAmount": 0, 1218 | "NumberOfItems": 0, 1219 | "PaymentMethod": "CASH", 1220 | "TaxableAmount": 0, 1221 | "CGST": 0, 1222 | "SGST": 0, 1223 | "CESS": 0, 1224 | "DeliveryType": "HOME-DELIVERY" 1225 | }, 1226 | { 1227 | "InvoiceNumber": 0, 1228 | "CreatedTime": "0", 1229 | "StoreID": "STR7255", 1230 | "PosID": "POS866", 1231 | "CashierID": "OAS244", 1232 | "CustomerType": "PRIME", 1233 | "CustomerCardNo": "5432615113", 1234 | "TotalAmount": 0, 1235 | "NumberOfItems": 0, 1236 | "PaymentMethod": "CASH", 1237 | "TaxableAmount": 0, 1238 | "CGST": 0, 1239 | "SGST": 0, 1240 | "CESS": 0, 1241 | "DeliveryType": "TAKEAWAY" 1242 | }, 1243 | { 1244 | "InvoiceNumber": 0, 1245 | "CreatedTime": "0", 1246 | "StoreID": "STR7255", 1247 | "PosID": "POS692", 1248 | "CashierID": "OAS812", 1249 | "CustomerType": "NONPRIME", 1250 | "CustomerCardNo": "8891304816", 1251 | "TotalAmount": 0, 1252 | "NumberOfItems": 0, 1253 | "PaymentMethod": "CASH", 1254 | "TaxableAmount": 0, 1255 | "CGST": 0, 1256 | "SGST": 0, 1257 | "CESS": 0, 1258 | "DeliveryType": "TAKEAWAY" 1259 | }, 1260 | { 1261 | "InvoiceNumber": 0, 1262 | "CreatedTime": "0", 1263 | "StoreID": "STR7255", 1264 | "PosID": "POS738", 1265 | "CashierID": "OAS349", 1266 | "CustomerType": "NONPRIME", 1267 | "CustomerCardNo": "8582181693", 1268 | "TotalAmount": 0, 1269 | "NumberOfItems": 0, 1270 | "PaymentMethod": "CASH", 1271 | "TaxableAmount": 0, 1272 | "CGST": 0, 1273 | "SGST": 0, 1274 | "CESS": 0, 1275 | "DeliveryType": "TAKEAWAY" 1276 | }, 1277 | { 1278 | "InvoiceNumber": 0, 1279 | "CreatedTime": "0", 1280 | "StoreID": "STR7262", 1281 | "PosID": "POS581", 1282 | "CashierID": "OAS742", 1283 | "CustomerType": "NONPRIME", 1284 | "CustomerCardNo": "6218924818", 1285 | "TotalAmount": 0, 1286 | "NumberOfItems": 0, 1287 | "PaymentMethod": "CARD", 1288 | "TaxableAmount": 0, 1289 | "CGST": 0, 1290 | "SGST": 0, 1291 | "CESS": 0, 1292 | "DeliveryType": "TAKEAWAY" 1293 | }, 1294 | { 1295 | "InvoiceNumber": 0, 1296 | "CreatedTime": "0", 1297 | "StoreID": "STR7262", 1298 | "PosID": "POS576", 1299 | "CashierID": "OAS819", 1300 | "CustomerType": "NONPRIME", 1301 | "CustomerCardNo": "2016894106", 1302 | "TotalAmount": 0, 1303 | "NumberOfItems": 0, 1304 | "PaymentMethod": "CARD", 1305 | "TaxableAmount": 0, 1306 | "CGST": 0, 1307 | "SGST": 0, 1308 | "CESS": 0, 1309 | "DeliveryType": "TAKEAWAY" 1310 | }, 1311 | { 1312 | "InvoiceNumber": 0, 1313 | "CreatedTime": "0", 1314 | "StoreID": "STR7262", 1315 | "PosID": "POS664", 1316 | "CashierID": "OAS536", 1317 | "CustomerType": "NONPRIME", 1318 | "CustomerCardNo": "2954913332", 1319 | "TotalAmount": 0, 1320 | "NumberOfItems": 0, 1321 | "PaymentMethod": "CARD", 1322 | "TaxableAmount": 0, 1323 | "CGST": 0, 1324 | "SGST": 0, 1325 | "CESS": 0, 1326 | "DeliveryType": "HOME-DELIVERY" 1327 | }, 1328 | { 1329 | "InvoiceNumber": 0, 1330 | "CreatedTime": "0", 1331 | "StoreID": "STR7262", 1332 | "PosID": "POS733", 1333 | "CashierID": "OAS959", 1334 | "CustomerType": "PRIME", 1335 | "CustomerCardNo": "1518460232", 1336 | "TotalAmount": 0, 1337 | "NumberOfItems": 0, 1338 | "PaymentMethod": "CASH", 1339 | "TaxableAmount": 0, 1340 | "CGST": 0, 1341 | "SGST": 0, 1342 | "CESS": 0, 1343 | "DeliveryType": "TAKEAWAY" 1344 | }, 1345 | { 1346 | "InvoiceNumber": 0, 1347 | "CreatedTime": "0", 1348 | "StoreID": "STR7262", 1349 | "PosID": "POS182", 1350 | "CashierID": "OAS538", 1351 | "CustomerType": "PRIME", 1352 | "CustomerCardNo": "5378825053", 1353 | "TotalAmount": 0, 1354 | "NumberOfItems": 0, 1355 | "PaymentMethod": "CASH", 1356 | "TaxableAmount": 0, 1357 | "CGST": 0, 1358 | "SGST": 0, 1359 | "CESS": 0, 1360 | "DeliveryType": "HOME-DELIVERY" 1361 | }, 1362 | { 1363 | "InvoiceNumber": 0, 1364 | "CreatedTime": "1570745999", 1365 | "StoreID": "STR7443", 1366 | "PosID": "POS411", 1367 | "CashierID": "OAS936", 1368 | "CustomerType": "NONPRIME", 1369 | "CustomerCardNo": "4369698403", 1370 | "TotalAmount": 0, 1371 | "NumberOfItems": 0, 1372 | "PaymentMethod": "CARD", 1373 | "TaxableAmount": 0, 1374 | "CGST": 0, 1375 | "SGST": 0, 1376 | "CESS": 0, 1377 | "DeliveryType": "HOME-DELIVERY" 1378 | }, 1379 | { 1380 | "InvoiceNumber": 0, 1381 | "CreatedTime": "0", 1382 | "StoreID": "STR7443", 1383 | "PosID": "POS365", 1384 | "CashierID": "OAS845", 1385 | "CustomerType": "PRIME", 1386 | "CustomerCardNo": "8790333340", 1387 | "TotalAmount": 0, 1388 | "NumberOfItems": 0, 1389 | "PaymentMethod": "CASH", 1390 | "TaxableAmount": 0, 1391 | "CGST": 0, 1392 | "SGST": 0, 1393 | "CESS": 0, 1394 | "DeliveryType": "HOME-DELIVERY" 1395 | }, 1396 | { 1397 | "InvoiceNumber": 0, 1398 | "CreatedTime": "0", 1399 | "StoreID": "STR7443", 1400 | "PosID": "POS394", 1401 | "CashierID": "OAS813", 1402 | "CustomerType": "NONPRIME", 1403 | "CustomerCardNo": "7587821520", 1404 | "TotalAmount": 0, 1405 | "NumberOfItems": 0, 1406 | "PaymentMethod": "CASH", 1407 | "TaxableAmount": 0, 1408 | "CGST": 0, 1409 | "SGST": 0, 1410 | "CESS": 0, 1411 | "DeliveryType": "TAKEAWAY" 1412 | }, 1413 | { 1414 | "InvoiceNumber": 0, 1415 | "CreatedTime": "0", 1416 | "StoreID": "STR7443", 1417 | "PosID": "POS333", 1418 | "CashierID": "OAS883", 1419 | "CustomerType": "NONPRIME", 1420 | "CustomerCardNo": "5483488802", 1421 | "TotalAmount": 0, 1422 | "NumberOfItems": 0, 1423 | "PaymentMethod": "CASH", 1424 | "TaxableAmount": 0, 1425 | "CGST": 0, 1426 | "SGST": 0, 1427 | "CESS": 0, 1428 | "DeliveryType": "HOME-DELIVERY" 1429 | }, 1430 | { 1431 | "InvoiceNumber": 0, 1432 | "CreatedTime": "0", 1433 | "StoreID": "STR7443", 1434 | "PosID": "POS388", 1435 | "CashierID": "OAS436", 1436 | "CustomerType": "PRIME", 1437 | "CustomerCardNo": "8714158103", 1438 | "TotalAmount": 0, 1439 | "NumberOfItems": 0, 1440 | "PaymentMethod": "CASH", 1441 | "TaxableAmount": 0, 1442 | "CGST": 0, 1443 | "SGST": 0, 1444 | "CESS": 0, 1445 | "DeliveryType": "HOME-DELIVERY" 1446 | }, 1447 | { 1448 | "InvoiceNumber": 0, 1449 | "CreatedTime": "0", 1450 | "StoreID": "STR7449", 1451 | "PosID": "POS351", 1452 | "CashierID": "OAS487", 1453 | "CustomerType": "NONPRIME", 1454 | "CustomerCardNo": "9639928743", 1455 | "TotalAmount": 0, 1456 | "NumberOfItems": 0, 1457 | "PaymentMethod": "CASH", 1458 | "TaxableAmount": 0, 1459 | "CGST": 0, 1460 | "SGST": 0, 1461 | "CESS": 0, 1462 | "DeliveryType": "TAKEAWAY" 1463 | }, 1464 | { 1465 | "InvoiceNumber": 0, 1466 | "CreatedTime": "0", 1467 | "StoreID": "STR7449", 1468 | "PosID": "POS923", 1469 | "CashierID": "OAS748", 1470 | "CustomerType": "NONPRIME", 1471 | "CustomerCardNo": "1422009832", 1472 | "TotalAmount": 0, 1473 | "NumberOfItems": 0, 1474 | "PaymentMethod": "CARD", 1475 | "TaxableAmount": 0, 1476 | "CGST": 0, 1477 | "SGST": 0, 1478 | "CESS": 0, 1479 | "DeliveryType": "HOME-DELIVERY" 1480 | }, 1481 | { 1482 | "InvoiceNumber": 0, 1483 | "CreatedTime": "0", 1484 | "StoreID": "STR7449", 1485 | "PosID": "POS423", 1486 | "CashierID": "OAS733", 1487 | "CustomerType": "PRIME", 1488 | "CustomerCardNo": "7691699125", 1489 | "TotalAmount": 0, 1490 | "NumberOfItems": 0, 1491 | "PaymentMethod": "CASH", 1492 | "TaxableAmount": 0, 1493 | "CGST": 0, 1494 | "SGST": 0, 1495 | "CESS": 0, 1496 | "DeliveryType": "HOME-DELIVERY" 1497 | }, 1498 | { 1499 | "InvoiceNumber": 0, 1500 | "CreatedTime": "0", 1501 | "StoreID": "STR7449", 1502 | "PosID": "POS215", 1503 | "CashierID": "OAS663", 1504 | "CustomerType": "PRIME", 1505 | "CustomerCardNo": "3453134392", 1506 | "TotalAmount": 0, 1507 | "NumberOfItems": 0, 1508 | "PaymentMethod": "CARD", 1509 | "TaxableAmount": 0, 1510 | "CGST": 0, 1511 | "SGST": 0, 1512 | "CESS": 0, 1513 | "DeliveryType": "HOME-DELIVERY" 1514 | }, 1515 | { 1516 | "InvoiceNumber": 0, 1517 | "CreatedTime": "0", 1518 | "StoreID": "STR7449", 1519 | "PosID": "POS728", 1520 | "CashierID": "OAS323", 1521 | "CustomerType": "PRIME", 1522 | "CustomerCardNo": "5301477438", 1523 | "TotalAmount": 0, 1524 | "NumberOfItems": 0, 1525 | "PaymentMethod": "CASH", 1526 | "TaxableAmount": 0, 1527 | "CGST": 0, 1528 | "SGST": 0, 1529 | "CESS": 0, 1530 | "DeliveryType": "TAKEAWAY" 1531 | }, 1532 | { 1533 | "InvoiceNumber": 0, 1534 | "CreatedTime": "0", 1535 | "StoreID": "STR8311", 1536 | "PosID": "POS975", 1537 | "CashierID": "OAS881", 1538 | "CustomerType": "NONPRIME", 1539 | "CustomerCardNo": "4680751410", 1540 | "TotalAmount": 0, 1541 | "NumberOfItems": 0, 1542 | "PaymentMethod": "CARD", 1543 | "TaxableAmount": 0, 1544 | "CGST": 0, 1545 | "SGST": 0, 1546 | "CESS": 0, 1547 | "DeliveryType": "TAKEAWAY" 1548 | }, 1549 | { 1550 | "InvoiceNumber": 0, 1551 | "CreatedTime": "0", 1552 | "StoreID": "STR8311", 1553 | "PosID": "POS377", 1554 | "CashierID": "OAS478", 1555 | "CustomerType": "NONPRIME", 1556 | "CustomerCardNo": "8670468851", 1557 | "TotalAmount": 0, 1558 | "NumberOfItems": 0, 1559 | "PaymentMethod": "CASH", 1560 | "TaxableAmount": 0, 1561 | "CGST": 0, 1562 | "SGST": 0, 1563 | "CESS": 0, 1564 | "DeliveryType": "TAKEAWAY" 1565 | }, 1566 | { 1567 | "InvoiceNumber": 0, 1568 | "CreatedTime": "0", 1569 | "StoreID": "STR8311", 1570 | "PosID": "POS728", 1571 | "CashierID": "OAS852", 1572 | "CustomerType": "PRIME", 1573 | "CustomerCardNo": "8490473378", 1574 | "TotalAmount": 0, 1575 | "NumberOfItems": 0, 1576 | "PaymentMethod": "CASH", 1577 | "TaxableAmount": 0, 1578 | "CGST": 0, 1579 | "SGST": 0, 1580 | "CESS": 0, 1581 | "DeliveryType": "HOME-DELIVERY" 1582 | }, 1583 | { 1584 | "InvoiceNumber": 0, 1585 | "CreatedTime": "0", 1586 | "StoreID": "STR8311", 1587 | "PosID": "POS935", 1588 | "CashierID": "OAS755", 1589 | "CustomerType": "NONPRIME", 1590 | "CustomerCardNo": "5196911608", 1591 | "TotalAmount": 0, 1592 | "NumberOfItems": 0, 1593 | "PaymentMethod": "CARD", 1594 | "TaxableAmount": 0, 1595 | "CGST": 0, 1596 | "SGST": 0, 1597 | "CESS": 0, 1598 | "DeliveryType": "TAKEAWAY" 1599 | }, 1600 | { 1601 | "InvoiceNumber": 0, 1602 | "CreatedTime": "0", 1603 | "StoreID": "STR8311", 1604 | "PosID": "POS224", 1605 | "CashierID": "OAS399", 1606 | "CustomerType": "NONPRIME", 1607 | "CustomerCardNo": "2989852024", 1608 | "TotalAmount": 0, 1609 | "NumberOfItems": 0, 1610 | "PaymentMethod": "CASH", 1611 | "TaxableAmount": 0, 1612 | "CGST": 0, 1613 | "SGST": 0, 1614 | "CESS": 0, 1615 | "DeliveryType": "TAKEAWAY" 1616 | }, 1617 | { 1618 | "InvoiceNumber": 0, 1619 | "CreatedTime": "0", 1620 | "StoreID": "STR8513", 1621 | "PosID": "POS881", 1622 | "CashierID": "OAS354", 1623 | "CustomerType": "NONPRIME", 1624 | "CustomerCardNo": "2599848717", 1625 | "TotalAmount": 0, 1626 | "NumberOfItems": 0, 1627 | "PaymentMethod": "CARD", 1628 | "TaxableAmount": 0, 1629 | "CGST": 0, 1630 | "SGST": 0, 1631 | "CESS": 0, 1632 | "DeliveryType": "TAKEAWAY" 1633 | }, 1634 | { 1635 | "InvoiceNumber": 0, 1636 | "CreatedTime": "0", 1637 | "StoreID": "STR8513", 1638 | "PosID": "POS155", 1639 | "CashierID": "OAS558", 1640 | "CustomerType": "NONPRIME", 1641 | "CustomerCardNo": "2389018842", 1642 | "TotalAmount": 0, 1643 | "NumberOfItems": 0, 1644 | "PaymentMethod": "CASH", 1645 | "TaxableAmount": 0, 1646 | "CGST": 0, 1647 | "SGST": 0, 1648 | "CESS": 0, 1649 | "DeliveryType": "HOME-DELIVERY" 1650 | }, 1651 | { 1652 | "InvoiceNumber": 0, 1653 | "CreatedTime": "0", 1654 | "StoreID": "STR8513", 1655 | "PosID": "POS582", 1656 | "CashierID": "OAS212", 1657 | "CustomerType": "PRIME", 1658 | "CustomerCardNo": "6705218300", 1659 | "TotalAmount": 0, 1660 | "NumberOfItems": 0, 1661 | "PaymentMethod": "CASH", 1662 | "TaxableAmount": 0, 1663 | "CGST": 0, 1664 | "SGST": 0, 1665 | "CESS": 0, 1666 | "DeliveryType": "TAKEAWAY" 1667 | }, 1668 | { 1669 | "InvoiceNumber": 0, 1670 | "CreatedTime": "0", 1671 | "StoreID": "STR8513", 1672 | "PosID": "POS163", 1673 | "CashierID": "OAS961", 1674 | "CustomerType": "PRIME", 1675 | "CustomerCardNo": "2762345282", 1676 | "TotalAmount": 0, 1677 | "NumberOfItems": 0, 1678 | "PaymentMethod": "CARD", 1679 | "TaxableAmount": 0, 1680 | "CGST": 0, 1681 | "SGST": 0, 1682 | "CESS": 0, 1683 | "DeliveryType": "HOME-DELIVERY" 1684 | }, 1685 | { 1686 | "InvoiceNumber": 0, 1687 | "CreatedTime": "0", 1688 | "StoreID": "STR8513", 1689 | "PosID": "POS311", 1690 | "CashierID": "OAS845", 1691 | "CustomerType": "NONPRIME", 1692 | "CustomerCardNo": "8111613552", 1693 | "TotalAmount": 0, 1694 | "NumberOfItems": 0, 1695 | "PaymentMethod": "CASH", 1696 | "TaxableAmount": 0, 1697 | "CGST": 0, 1698 | "SGST": 0, 1699 | "CESS": 0, 1700 | "DeliveryType": "TAKEAWAY" 1701 | } 1702 | ] -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/data/address.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "AddressLine": "HN. 535, 4472 Eu St.", 4 | "City": "Katihar", 5 | "State": "Bihar", 6 | "PinCode": "867820", 7 | "ContactNumber": "5998971437" 8 | }, 9 | { 10 | "AddressLine": "7418 Dolor St.", 11 | "City": "Nagpur", 12 | "State": "Maharastra", 13 | "PinCode": "710782", 14 | "ContactNumber": "3018429913" 15 | }, 16 | { 17 | "AddressLine": "HN. 821, 8267 Dapibus Av.", 18 | "City": "Kolkata", 19 | "State": "West Bengal", 20 | "PinCode": "701624", 21 | "ContactNumber": "6351552880" 22 | }, 23 | { 24 | "AddressLine": "773 Eu Ave", 25 | "City": "Sitapur", 26 | "State": "Uttar Pradesh", 27 | "PinCode": "431667", 28 | "ContactNumber": "9934440762" 29 | }, 30 | { 31 | "AddressLine": "7409 Laoreet Rd.", 32 | "City": "Jammu", 33 | "State": "Jammu and Kashmir", 34 | "PinCode": "697806", 35 | "ContactNumber": "8335722151" 36 | }, 37 | { 38 | "AddressLine": "7114 Eu, Rd.", 39 | "City": "Ratlam", 40 | "State": "Madhya Pradesh", 41 | "PinCode": "925281", 42 | "ContactNumber": "4057182350" 43 | }, 44 | { 45 | "AddressLine": "Flat No. #658-8929 Arcu Rd.", 46 | "City": "Guna", 47 | "State": "Madhya Pradesh", 48 | "PinCode": "555836", 49 | "ContactNumber": "5015943575" 50 | }, 51 | { 52 | "AddressLine": "291-4367 Aliquam St.", 53 | "City": "Pali", 54 | "State": "Rajasthan", 55 | "PinCode": "906511", 56 | "ContactNumber": "7544884134" 57 | }, 58 | { 59 | "AddressLine": "Flat No. #396-1703 Ullamcorper, Street", 60 | "City": "Hassan", 61 | "State": "Karnataka", 62 | "PinCode": "701126", 63 | "ContactNumber": "1157976639" 64 | }, 65 | { 66 | "AddressLine": "House No 847, 2219 Donec St.", 67 | "City": "Adoni", 68 | "State": "Andhra Pradesh", 69 | "PinCode": "187597", 70 | "ContactNumber": "4515437963" 71 | }, 72 | { 73 | "AddressLine": "Flat No. #325-2029 Ac, Ave", 74 | "City": "Agra", 75 | "State": "Uttar Pradesh", 76 | "PinCode": "556412", 77 | "ContactNumber": "2844494619" 78 | }, 79 | { 80 | "AddressLine": "249-7358 Nec St.", 81 | "City": "Jaipur", 82 | "State": "Rajasthan", 83 | "PinCode": "326647", 84 | "ContactNumber": "4455040114" 85 | }, 86 | { 87 | "AddressLine": "6549 Ac, Ave", 88 | "City": "Ramagundam", 89 | "State": "Andhra Pradesh", 90 | "PinCode": "699979", 91 | "ContactNumber": "2758430969" 92 | }, 93 | { 94 | "AddressLine": "House No 727, 9696 Ullamcorper, Road", 95 | "City": "Dabgram", 96 | "State": "West Bengal", 97 | "PinCode": "953658", 98 | "ContactNumber": "4166559042" 99 | }, 100 | { 101 | "AddressLine": "House No 703, 9601 Aliquam Av.", 102 | "City": "Arrah", 103 | "State": "Bihar", 104 | "PinCode": "732227", 105 | "ContactNumber": "6382741027" 106 | }, 107 | { 108 | "AddressLine": "Flat No. #229-5133 Quam. Road", 109 | "City": "Haridwar", 110 | "State": "Uttarakhand", 111 | "PinCode": "478334", 112 | "ContactNumber": "8593433887" 113 | }, 114 | { 115 | "AddressLine": "611-1753 Etiam Avenue", 116 | "City": "Nagaon", 117 | "State": "Assam", 118 | "PinCode": "935500", 119 | "ContactNumber": "3398236143" 120 | }, 121 | { 122 | "AddressLine": "3122 Morbi Ave", 123 | "City": "Palakkad", 124 | "State": "Kerala", 125 | "PinCode": "800459", 126 | "ContactNumber": "8235927974" 127 | }, 128 | { 129 | "AddressLine": "5930 Aliquam Street", 130 | "City": "Chhindwara", 131 | "State": "Madhya Pradesh", 132 | "PinCode": "724661", 133 | "ContactNumber": "2141075027" 134 | }, 135 | { 136 | "AddressLine": "House No 416, 8652 Sodales Rd.", 137 | "City": "Mathura", 138 | "State": "Uttar Pradesh", 139 | "PinCode": "148055", 140 | "ContactNumber": "8645000577" 141 | }, 142 | { 143 | "AddressLine": "8612 Non Rd.", 144 | "City": "Guna", 145 | "State": "Madhya Pradesh", 146 | "PinCode": "210683", 147 | "ContactNumber": "8336980338" 148 | }, 149 | { 150 | "AddressLine": "295-7690 At Street", 151 | "City": "Shahjahanpur", 152 | "State": "Uttar Pradesh", 153 | "PinCode": "228410", 154 | "ContactNumber": "4624129756" 155 | }, 156 | { 157 | "AddressLine": "House No 638, 4720 Nisi Road", 158 | "City": "Madurai", 159 | "State": "Tamil Nadu", 160 | "PinCode": "754286", 161 | "ContactNumber": "5741337087" 162 | }, 163 | { 164 | "AddressLine": "706-3622 Pharetra Av.", 165 | "City": "Serampore", 166 | "State": "West Bengal", 167 | "PinCode": "266237", 168 | "ContactNumber": "8034395531" 169 | }, 170 | { 171 | "AddressLine": "6363 Non Rd.", 172 | "City": "Darbhanga", 173 | "State": "Bihar", 174 | "PinCode": "570288", 175 | "ContactNumber": "5656699770" 176 | }, 177 | { 178 | "AddressLine": "853-9772 Enim, Avenue", 179 | "City": "Allappuzha", 180 | "State": "Kerala", 181 | "PinCode": "140312", 182 | "ContactNumber": "5528409181" 183 | }, 184 | { 185 | "AddressLine": "Flat No. #773-5039 Lacinia Road", 186 | "City": "Gudivada", 187 | "State": "Andhra Pradesh", 188 | "PinCode": "553874", 189 | "ContactNumber": "6772227684" 190 | }, 191 | { 192 | "AddressLine": "Flat No. #753-706 Molestie Av.", 193 | "City": "North Barrackpur", 194 | "State": "West Bengal", 195 | "PinCode": "925233", 196 | "ContactNumber": "8615470547" 197 | }, 198 | { 199 | "AddressLine": "Flat No. #794-4263 Tempor St.", 200 | "City": "Shivapuri", 201 | "State": "Madhya Pradesh", 202 | "PinCode": "563047", 203 | "ContactNumber": "6403786002" 204 | }, 205 | { 206 | "AddressLine": "House No 786, 3820 Luctus Av.", 207 | "City": "Ambala Sadar", 208 | "State": "Haryana", 209 | "PinCode": "536057", 210 | "ContactNumber": "7336393163" 211 | }, 212 | { 213 | "AddressLine": "1852 Est St.", 214 | "City": "Imphal", 215 | "State": "Manipur", 216 | "PinCode": "120023", 217 | "ContactNumber": "6124913142" 218 | }, 219 | { 220 | "AddressLine": "7697 Sed, Road", 221 | "City": "Tiruvottiyur", 222 | "State": "Tamil Nadu", 223 | "PinCode": "819925", 224 | "ContactNumber": "6282658715" 225 | }, 226 | { 227 | "AddressLine": "House No 735, 8148 Eu St.", 228 | "City": "Bahraich", 229 | "State": "Uttar Pradesh", 230 | "PinCode": "977360", 231 | "ContactNumber": "5692615243" 232 | }, 233 | { 234 | "AddressLine": "9728 Ac Ave", 235 | "City": "Gonda", 236 | "State": "Uttar Pradesh", 237 | "PinCode": "468305", 238 | "ContactNumber": "8482561251" 239 | }, 240 | { 241 | "AddressLine": "Flat No. #570-3269 Adipiscing, Rd.", 242 | "City": "Chapra", 243 | "State": "Bihar", 244 | "PinCode": "257573", 245 | "ContactNumber": "1799428172" 246 | }, 247 | { 248 | "AddressLine": "Flat No. #420-9203 Pretium Street", 249 | "City": "Dehri", 250 | "State": "Bihar", 251 | "PinCode": "392400", 252 | "ContactNumber": "3841876454" 253 | }, 254 | { 255 | "AddressLine": "Flat No. #335-7984 Senectus Rd.", 256 | "City": "Pali", 257 | "State": "Rajasthan", 258 | "PinCode": "900530", 259 | "ContactNumber": "6125618251" 260 | }, 261 | { 262 | "AddressLine": "393-5141 Enim. Ave", 263 | "City": "Surendranagar", 264 | "State": "Gujarat", 265 | "PinCode": "209008", 266 | "ContactNumber": "8862122529" 267 | }, 268 | { 269 | "AddressLine": "2465 Laoreet, Street", 270 | "City": "Dehri", 271 | "State": "Bihar", 272 | "PinCode": "637308", 273 | "ContactNumber": "2662305605" 274 | }, 275 | { 276 | "AddressLine": "467-7967 Volutpat Ave", 277 | "City": "Jodhpur", 278 | "State": "Rajasthan", 279 | "PinCode": "983371", 280 | "ContactNumber": "6577421741" 281 | }, 282 | { 283 | "AddressLine": "644-1102 Quam, Road", 284 | "City": "Muzaffarnagar", 285 | "State": "Uttar Pradesh", 286 | "PinCode": "513402", 287 | "ContactNumber": "7979432420" 288 | }, 289 | { 290 | "AddressLine": "House No 712, 4763 Turpis Street", 291 | "City": "Pune", 292 | "State": "Maharastra", 293 | "PinCode": "551272", 294 | "ContactNumber": "3419930245" 295 | }, 296 | { 297 | "AddressLine": "582-5177 Suscipit Rd.", 298 | "City": "Patna", 299 | "State": "Bihar", 300 | "PinCode": "530094", 301 | "ContactNumber": "1158759250" 302 | }, 303 | { 304 | "AddressLine": "House No 424, 5509 Ut Street", 305 | "City": "Ganganagar", 306 | "State": "Rajasthan", 307 | "PinCode": "692405", 308 | "ContactNumber": "8395635840" 309 | }, 310 | { 311 | "AddressLine": "House No 953, 785 Viverra. Street", 312 | "City": "Hathras", 313 | "State": "Uttar Pradesh", 314 | "PinCode": "973613", 315 | "ContactNumber": "3242686167" 316 | }, 317 | { 318 | "AddressLine": "265-1062 Sapien. Road", 319 | "City": "Murwara", 320 | "State": "Madhya Pradesh", 321 | "PinCode": "894885", 322 | "ContactNumber": "2503052325" 323 | }, 324 | { 325 | "AddressLine": "Flat No. #338-5510 Justo. Av.", 326 | "City": "Puri", 327 | "State": "Odisha", 328 | "PinCode": "127980", 329 | "ContactNumber": "9686763415" 330 | }, 331 | { 332 | "AddressLine": "House No 992, 8003 In Rd.", 333 | "City": "Dehradun", 334 | "State": "Uttarakhand", 335 | "PinCode": "425528", 336 | "ContactNumber": "9875104847" 337 | }, 338 | { 339 | "AddressLine": "1407 Auctor, St.", 340 | "City": "Ichalkaranji", 341 | "State": "Maharastra", 342 | "PinCode": "115863", 343 | "ContactNumber": "2598303301" 344 | }, 345 | { 346 | "AddressLine": "715-449 Arcu. Rd.", 347 | "City": "Delhi", 348 | "State": "Delhi", 349 | "PinCode": "338019", 350 | "ContactNumber": "5994731583" 351 | }, 352 | { 353 | "AddressLine": "House No 556, 2384 Libero. Rd.", 354 | "City": "Banda", 355 | "State": "Uttar Pradesh", 356 | "PinCode": "767511", 357 | "ContactNumber": "9801318492" 358 | }, 359 | { 360 | "AddressLine": "Flat No. #740-6953 Scelerisque Rd.", 361 | "City": "Mango", 362 | "State": "Jharkhand", 363 | "PinCode": "697196", 364 | "ContactNumber": "7172289826" 365 | }, 366 | { 367 | "AddressLine": "Flat No. #210-902 Neque Street", 368 | "City": "South Dum Dum", 369 | "State": "West Bengal", 370 | "PinCode": "504795", 371 | "ContactNumber": "7508353683" 372 | }, 373 | { 374 | "AddressLine": "Flat No. #741-3157 Metus Av.", 375 | "City": "Delhi", 376 | "State": "Delhi", 377 | "PinCode": "495095", 378 | "ContactNumber": "7097939373" 379 | }, 380 | { 381 | "AddressLine": "7711 Ac Avenue", 382 | "City": "Raigarh", 383 | "State": "Chhattisgarh", 384 | "PinCode": "282126", 385 | "ContactNumber": "1952031107" 386 | }, 387 | { 388 | "AddressLine": "846-8342 Sollicitudin Road", 389 | "City": "Allappuzha", 390 | "State": "Kerala", 391 | "PinCode": "510771", 392 | "ContactNumber": "6773875946" 393 | }, 394 | { 395 | "AddressLine": "Flat No. #863-3118 Odio Rd.", 396 | "City": "Mathura", 397 | "State": "Uttar Pradesh", 398 | "PinCode": "587393", 399 | "ContactNumber": "8562646144" 400 | }, 401 | { 402 | "AddressLine": "800-9178 Aliquam Street", 403 | "City": "Karnal", 404 | "State": "Haryana", 405 | "PinCode": "132797", 406 | "ContactNumber": "6883499359" 407 | }, 408 | { 409 | "AddressLine": "Flat No. #131-5960 Varius St.", 410 | "City": "Ongole", 411 | "State": "Andhra Pradesh", 412 | "PinCode": "881860", 413 | "ContactNumber": "6735030289" 414 | }, 415 | { 416 | "AddressLine": "3508 Ligula. Rd.", 417 | "City": "Ahmadnagar", 418 | "State": "Maharastra", 419 | "PinCode": "703396", 420 | "ContactNumber": "9638404068" 421 | }, 422 | { 423 | "AddressLine": "Flat No. #316-5621 Laoreet St.", 424 | "City": "Nellore", 425 | "State": "Andhra Pradesh", 426 | "PinCode": "333433", 427 | "ContactNumber": "5232382321" 428 | }, 429 | { 430 | "AddressLine": "House No 383, 4427 Pellentesque Rd.", 431 | "City": "Bokaro Steel City", 432 | "State": "Jharkhand", 433 | "PinCode": "509723", 434 | "ContactNumber": "1442202063" 435 | }, 436 | { 437 | "AddressLine": "6226 Mauris St.", 438 | "City": "Ingraj Bazar", 439 | "State": "West Bengal", 440 | "PinCode": "592487", 441 | "ContactNumber": "8382307048" 442 | }, 443 | { 444 | "AddressLine": "855-4409 Duis Street", 445 | "City": "Junagadh", 446 | "State": "Gujarat", 447 | "PinCode": "622745", 448 | "ContactNumber": "6984835508" 449 | }, 450 | { 451 | "AddressLine": "735-1235 Pretium Street", 452 | "City": "Unnao", 453 | "State": "Uttar Pradesh", 454 | "PinCode": "103540", 455 | "ContactNumber": "4331966638" 456 | }, 457 | { 458 | "AddressLine": "7319 Nunc Ave", 459 | "City": "Muzaffarpur", 460 | "State": "Bihar", 461 | "PinCode": "760765", 462 | "ContactNumber": "7586450222" 463 | }, 464 | { 465 | "AddressLine": "House No 989, 5810 Nulla Av.", 466 | "City": "Ashoknagar-Kalyangarh", 467 | "State": "West Bengal", 468 | "PinCode": "561119", 469 | "ContactNumber": "9838697111" 470 | }, 471 | { 472 | "AddressLine": "233-9672 Pharetra. Rd.", 473 | "City": "Kanpur Cantonment", 474 | "State": "Uttar Pradesh", 475 | "PinCode": "832089", 476 | "ContactNumber": "8675203394" 477 | }, 478 | { 479 | "AddressLine": "529-4520 Libero. Ave", 480 | "City": "Raigarh", 481 | "State": "Chhattisgarh", 482 | "PinCode": "183678", 483 | "ContactNumber": "3057906681" 484 | }, 485 | { 486 | "AddressLine": "House No 936, 3362 Vehicula Ave", 487 | "City": "Serampore", 488 | "State": "West Bengal", 489 | "PinCode": "367103", 490 | "ContactNumber": "2614588263" 491 | }, 492 | { 493 | "AddressLine": "House No 695, 9930 A, St.", 494 | "City": "Hassan", 495 | "State": "Karnataka", 496 | "PinCode": "277874", 497 | "ContactNumber": "4013626333" 498 | }, 499 | { 500 | "AddressLine": "Flat No. #735-431 Gravida. St.", 501 | "City": "Chapra", 502 | "State": "Bihar", 503 | "PinCode": "386153", 504 | "ContactNumber": "2004897617" 505 | }, 506 | { 507 | "AddressLine": "488-2166 Class St.", 508 | "City": "Bellary", 509 | "State": "Karnataka", 510 | "PinCode": "831105", 511 | "ContactNumber": "4355640111" 512 | }, 513 | { 514 | "AddressLine": "Flat No. #736-9947 Et Av.", 515 | "City": "Sambalpur", 516 | "State": "Odisha", 517 | "PinCode": "979906", 518 | "ContactNumber": "9196326946" 519 | }, 520 | { 521 | "AddressLine": "444-1842 Dui. Rd.", 522 | "City": "Shivapuri", 523 | "State": "Madhya Pradesh", 524 | "PinCode": "561012", 525 | "ContactNumber": "7243866404" 526 | }, 527 | { 528 | "AddressLine": "2923 Sollicitudin Road", 529 | "City": "Secunderabad", 530 | "State": "Andhra Pradesh", 531 | "PinCode": "382673", 532 | "ContactNumber": "3685997991" 533 | }, 534 | { 535 | "AddressLine": "517-8912 Nulla St.", 536 | "City": "Champdani", 537 | "State": "West Bengal", 538 | "PinCode": "680616", 539 | "ContactNumber": "8183195143" 540 | }, 541 | { 542 | "AddressLine": "5418 Magna. Rd.", 543 | "City": "Chennai", 544 | "State": "Tamil Nadu", 545 | "PinCode": "386032", 546 | "ContactNumber": "6557358508" 547 | }, 548 | { 549 | "AddressLine": "548-8023 Vulputate, Rd.", 550 | "City": "Gonda", 551 | "State": "Uttar Pradesh", 552 | "PinCode": "945821", 553 | "ContactNumber": "2509549312" 554 | }, 555 | { 556 | "AddressLine": "House No 235, 5674 Vitae Street", 557 | "City": "Chapra", 558 | "State": "Bihar", 559 | "PinCode": "201938", 560 | "ContactNumber": "4051146489" 561 | }, 562 | { 563 | "AddressLine": "House No 740, 6689 Tempor Av.", 564 | "City": "Bharatpur", 565 | "State": "Rajasthan", 566 | "PinCode": "932264", 567 | "ContactNumber": "8563079826" 568 | }, 569 | { 570 | "AddressLine": "House No 534, 9974 Rutrum Av.", 571 | "City": "Bellary", 572 | "State": "Karnataka", 573 | "PinCode": "761574", 574 | "ContactNumber": "7652091989" 575 | }, 576 | { 577 | "AddressLine": "Flat No. #378-6117 Nulla Rd.", 578 | "City": "Mandasor", 579 | "State": "Madhya Pradesh", 580 | "PinCode": "975340", 581 | "ContactNumber": "9006343802" 582 | }, 583 | { 584 | "AddressLine": "7947 Mauris, Av.", 585 | "City": "Tambaram", 586 | "State": "Tamil Nadu", 587 | "PinCode": "339533", 588 | "ContactNumber": "5898142373" 589 | }, 590 | { 591 | "AddressLine": "Flat No. #296-6151 Neque Road", 592 | "City": "Varanasi", 593 | "State": "Uttar Pradesh", 594 | "PinCode": "340397", 595 | "ContactNumber": "9083194560" 596 | }, 597 | { 598 | "AddressLine": "Flat No. #879-8630 Sed Road", 599 | "City": "Vijayawada", 600 | "State": "Andhra Pradesh", 601 | "PinCode": "824631", 602 | "ContactNumber": "2805327045" 603 | }, 604 | { 605 | "AddressLine": "Flat No. #398-2797 A Rd.", 606 | "City": "Sikar", 607 | "State": "Rajasthan", 608 | "PinCode": "660265", 609 | "ContactNumber": "1131794401" 610 | }, 611 | { 612 | "AddressLine": "House No 471, 2701 Purus, Rd.", 613 | "City": "Faridabad", 614 | "State": "Haryana", 615 | "PinCode": "121303", 616 | "ContactNumber": "4452007276" 617 | }, 618 | { 619 | "AddressLine": "House No 639, 1344 Nisl St.", 620 | "City": "Farrukhabad-cum-Fatehgarh", 621 | "State": "Uttar Pradesh", 622 | "PinCode": "481405", 623 | "ContactNumber": "5281218896" 624 | }, 625 | { 626 | "AddressLine": "House No 400, 2541 Lacus Street", 627 | "City": "Alandur", 628 | "State": "Tamil Nadu", 629 | "PinCode": "919471", 630 | "ContactNumber": "3677519188" 631 | }, 632 | { 633 | "AddressLine": "627-5294 Nisi St.", 634 | "City": "Hyderabad", 635 | "State": "Andhra Pradesh", 636 | "PinCode": "827202", 637 | "ContactNumber": "7553553297" 638 | }, 639 | { 640 | "AddressLine": "691-6564 Lacinia Rd.", 641 | "City": "Arrah", 642 | "State": "Bihar", 643 | "PinCode": "380536", 644 | "ContactNumber": "1115912385" 645 | }, 646 | { 647 | "AddressLine": "House No 240, 7563 Vel Rd.", 648 | "City": "Rishra", 649 | "State": "West Bengal", 650 | "PinCode": "201130", 651 | "ContactNumber": "2828547194" 652 | }, 653 | { 654 | "AddressLine": "Flat No. #245-5336 Nibh. Av.", 655 | "City": "Jammu", 656 | "State": "Jammu and Kashmir", 657 | "PinCode": "114809", 658 | "ContactNumber": "9836944618" 659 | }, 660 | { 661 | "AddressLine": "3474 Nam Rd.", 662 | "City": "Mangalore", 663 | "State": "Karnataka", 664 | "PinCode": "164228", 665 | "ContactNumber": "6528245422" 666 | }, 667 | { 668 | "AddressLine": "Flat No. #527-9373 At Rd.", 669 | "City": "South Dum Dum", 670 | "State": "West Bengal", 671 | "PinCode": "138781", 672 | "ContactNumber": "2356382980" 673 | }, 674 | { 675 | "AddressLine": "Flat No. #630-2414 Enim. St.", 676 | "City": "Satna", 677 | "State": "Madhya Pradesh", 678 | "PinCode": "991102", 679 | "ContactNumber": "8931080189" 680 | }, 681 | { 682 | "AddressLine": "567-3331 In St.", 683 | "City": "Cuddalore", 684 | "State": "Tamil Nadu", 685 | "PinCode": "813558", 686 | "ContactNumber": "3549212065" 687 | }, 688 | { 689 | "AddressLine": "9606 Integer Ave", 690 | "City": "Multan", 691 | "State": "Punjab", 692 | "PinCode": "491357", 693 | "ContactNumber": "7979421441" 694 | }, 695 | { 696 | "AddressLine": "Flat No. #174-7165 Magna, Street", 697 | "City": "Eluru", 698 | "State": "Andhra Pradesh", 699 | "PinCode": "356811", 700 | "ContactNumber": "4117163673" 701 | } 702 | ] -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/data/products.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "ItemCode": 203, 4 | "ItemDescription": "Bunk bed", 5 | "ItemPrice": "1686", 6 | "ItemQty": 0 7 | }, 8 | { 9 | "ItemCode": 208, 10 | "ItemDescription": "Canopy bed", 11 | "ItemPrice": "508", 12 | "ItemQty": 0 13 | }, 14 | { 15 | "ItemCode": 213, 16 | "ItemDescription": "Infant bed", 17 | "ItemPrice": "1755", 18 | "ItemQty": 0 19 | }, 20 | { 21 | "ItemCode": 218, 22 | "ItemDescription": "Sofa bed", 23 | "ItemPrice": "895", 24 | "ItemQty": 0 25 | }, 26 | { 27 | "ItemCode": 223, 28 | "ItemDescription": "Hammock", 29 | "ItemPrice": "1210", 30 | "ItemQty": 0 31 | }, 32 | { 33 | "ItemCode": 228, 34 | "ItemDescription": "Divan", 35 | "ItemPrice": "1083", 36 | "ItemQty": 0 37 | }, 38 | { 39 | "ItemCode": 233, 40 | "ItemDescription": "Coffee table", 41 | "ItemPrice": "1055", 42 | "ItemQty": 0 43 | }, 44 | { 45 | "ItemCode": 238, 46 | "ItemDescription": "Dining table", 47 | "ItemPrice": "1582", 48 | "ItemQty": 0 49 | }, 50 | { 51 | "ItemCode": 243, 52 | "ItemDescription": "Folding table", 53 | "ItemPrice": "738", 54 | "ItemQty": 0 55 | }, 56 | { 57 | "ItemCode": 248, 58 | "ItemDescription": "TV tray table", 59 | "ItemPrice": "1947", 60 | "ItemQty": 0 61 | }, 62 | { 63 | "ItemCode": 253, 64 | "ItemDescription": "Bathroom cabinet", 65 | "ItemPrice": "1513", 66 | "ItemQty": 0 67 | }, 68 | { 69 | "ItemCode": 258, 70 | "ItemDescription": "Closet", 71 | "ItemPrice": "1687", 72 | "ItemQty": 0 73 | }, 74 | { 75 | "ItemCode": 263, 76 | "ItemDescription": "Kitchen cabinet", 77 | "ItemPrice": "1050", 78 | "ItemQty": 0 79 | }, 80 | { 81 | "ItemCode": 268, 82 | "ItemDescription": "Floating shelf", 83 | "ItemPrice": "1894", 84 | "ItemQty": 0 85 | }, 86 | { 87 | "ItemCode": 273, 88 | "ItemDescription": "Bedroom set", 89 | "ItemPrice": "842", 90 | "ItemQty": 0 91 | }, 92 | { 93 | "ItemCode": 278, 94 | "ItemDescription": "Dining set", 95 | "ItemPrice": "1940", 96 | "ItemQty": 0 97 | }, 98 | { 99 | "ItemCode": 283, 100 | "ItemDescription": "Portable Lamps", 101 | "ItemPrice": "2236", 102 | "ItemQty": 0 103 | }, 104 | { 105 | "ItemCode": 288, 106 | "ItemDescription": "Hutch", 107 | "ItemPrice": "1812", 108 | "ItemQty": 0 109 | }, 110 | { 111 | "ItemCode": 293, 112 | "ItemDescription": "Bar stool", 113 | "ItemPrice": "1148", 114 | "ItemQty": 0 115 | }, 116 | { 117 | "ItemCode": 298, 118 | "ItemDescription": "Bardic chair", 119 | "ItemPrice": "1649", 120 | "ItemQty": 0 121 | }, 122 | { 123 | "ItemCode": 303, 124 | "ItemDescription": "Bench", 125 | "ItemPrice": "2251", 126 | "ItemQty": 0 127 | }, 128 | { 129 | "ItemCode": 308, 130 | "ItemDescription": "Butterfly chair", 131 | "ItemPrice": "857", 132 | "ItemQty": 0 133 | }, 134 | { 135 | "ItemCode": 313, 136 | "ItemDescription": "Brewster Chair", 137 | "ItemPrice": "2195", 138 | "ItemQty": 0 139 | }, 140 | { 141 | "ItemCode": 318, 142 | "ItemDescription": "Bofinger chair", 143 | "ItemPrice": "1119", 144 | "ItemQty": 0 145 | }, 146 | { 147 | "ItemCode": 323, 148 | "ItemDescription": "Chiavari chair", 149 | "ItemPrice": "653", 150 | "ItemQty": 0 151 | }, 152 | { 153 | "ItemCode": 328, 154 | "ItemDescription": "Dining chair", 155 | "ItemPrice": "1223", 156 | "ItemQty": 0 157 | }, 158 | { 159 | "ItemCode": 333, 160 | "ItemDescription": "Gaming chair", 161 | "ItemPrice": "1902", 162 | "ItemQty": 0 163 | }, 164 | { 165 | "ItemCode": 338, 166 | "ItemDescription": "Kneeling chairs", 167 | "ItemPrice": "442", 168 | "ItemQty": 0 169 | }, 170 | { 171 | "ItemCode": 343, 172 | "ItemDescription": "Massage chair", 173 | "ItemPrice": "1354", 174 | "ItemQty": 0 175 | }, 176 | { 177 | "ItemCode": 348, 178 | "ItemDescription": "Navy chair", 179 | "ItemPrice": "1998", 180 | "ItemQty": 0 181 | }, 182 | { 183 | "ItemCode": 353, 184 | "ItemDescription": "Recliner", 185 | "ItemPrice": "1585", 186 | "ItemQty": 0 187 | }, 188 | { 189 | "ItemCode": 358, 190 | "ItemDescription": "Restraint chair", 191 | "ItemPrice": "1943", 192 | "ItemQty": 0 193 | }, 194 | { 195 | "ItemCode": 363, 196 | "ItemDescription": "Rocking chair", 197 | "ItemPrice": "2021", 198 | "ItemQty": 0 199 | }, 200 | { 201 | "ItemCode": 368, 202 | "ItemDescription": "Stool", 203 | "ItemPrice": "888", 204 | "ItemQty": 0 205 | }, 206 | { 207 | "ItemCode": 373, 208 | "ItemDescription": "Latex Mattress", 209 | "ItemPrice": "1313", 210 | "ItemQty": 0 211 | }, 212 | { 213 | "ItemCode": 378, 214 | "ItemDescription": "Air Bed", 215 | "ItemPrice": "2263", 216 | "ItemQty": 0 217 | }, 218 | { 219 | "ItemCode": 383, 220 | "ItemDescription": "Innerspring Mattress", 221 | "ItemPrice": "655", 222 | "ItemQty": 0 223 | }, 224 | { 225 | "ItemCode": 388, 226 | "ItemDescription": "Memory Foam", 227 | "ItemPrice": "1884", 228 | "ItemQty": 0 229 | }, 230 | { 231 | "ItemCode": 393, 232 | "ItemDescription": "Hybrid Mattress", 233 | "ItemPrice": "2217", 234 | "ItemQty": 0 235 | }, 236 | { 237 | "ItemCode": 398, 238 | "ItemDescription": "Latex Mattress", 239 | "ItemPrice": "773", 240 | "ItemQty": 0 241 | }, 242 | { 243 | "ItemCode": 403, 244 | "ItemDescription": "Adjustable Air Mattress", 245 | "ItemPrice": "1417", 246 | "ItemQty": 0 247 | }, 248 | { 249 | "ItemCode": 408, 250 | "ItemDescription": "Confidante", 251 | "ItemPrice": "1242", 252 | "ItemQty": 0 253 | }, 254 | { 255 | "ItemCode": 413, 256 | "ItemDescription": "Slipcover", 257 | "ItemPrice": "1896", 258 | "ItemQty": 0 259 | }, 260 | { 261 | "ItemCode": 418, 262 | "ItemDescription": "Wing chair", 263 | "ItemPrice": "1431", 264 | "ItemQty": 0 265 | }, 266 | { 267 | "ItemCode": 423, 268 | "ItemDescription": "Quilt", 269 | "ItemPrice": "1485", 270 | "ItemQty": 0 271 | }, 272 | { 273 | "ItemCode": 428, 274 | "ItemDescription": "Cocktail glass", 275 | "ItemPrice": "1422", 276 | "ItemQty": 0 277 | }, 278 | { 279 | "ItemCode": 433, 280 | "ItemDescription": "Depression glass", 281 | "ItemPrice": "1950", 282 | "ItemQty": 0 283 | }, 284 | { 285 | "ItemCode": 438, 286 | "ItemDescription": "Lead glass", 287 | "ItemPrice": "2069", 288 | "ItemQty": 0 289 | }, 290 | { 291 | "ItemCode": 443, 292 | "ItemDescription": "Margarita glass", 293 | "ItemPrice": "2475", 294 | "ItemQty": 0 295 | }, 296 | { 297 | "ItemCode": 448, 298 | "ItemDescription": "Martini glass", 299 | "ItemPrice": "2263", 300 | "ItemQty": 0 301 | }, 302 | { 303 | "ItemCode": 453, 304 | "ItemDescription": "Shot glass", 305 | "ItemPrice": "674", 306 | "ItemQty": 0 307 | }, 308 | { 309 | "ItemCode": 458, 310 | "ItemDescription": "Wine glass", 311 | "ItemPrice": "1644", 312 | "ItemQty": 0 313 | }, 314 | { 315 | "ItemCode": 463, 316 | "ItemDescription": "Cookie jars", 317 | "ItemPrice": "793", 318 | "ItemQty": 0 319 | }, 320 | { 321 | "ItemCode": 468, 322 | "ItemDescription": "Lunch box", 323 | "ItemPrice": "1467", 324 | "ItemQty": 0 325 | }, 326 | { 327 | "ItemCode": 473, 328 | "ItemDescription": "Mason jars", 329 | "ItemPrice": "1358", 330 | "ItemQty": 0 331 | }, 332 | { 333 | "ItemCode": 478, 334 | "ItemDescription": "Teapot", 335 | "ItemPrice": "885", 336 | "ItemQty": 0 337 | }, 338 | { 339 | "ItemCode": 483, 340 | "ItemDescription": "Teacups and saucers", 341 | "ItemPrice": "1781", 342 | "ItemQty": 1 343 | }, 344 | { 345 | "ItemCode": 488, 346 | "ItemDescription": "Bread knife", 347 | "ItemPrice": "1077", 348 | "ItemQty": 0 349 | }, 350 | { 351 | "ItemCode": 493, 352 | "ItemDescription": "Boning knife", 353 | "ItemPrice": "513", 354 | "ItemQty": 0 355 | }, 356 | { 357 | "ItemCode": 498, 358 | "ItemDescription": "Carving knifes", 359 | "ItemPrice": "1424", 360 | "ItemQty": 3 361 | }, 362 | { 363 | "ItemCode": 503, 364 | "ItemDescription": "Chef's knife", 365 | "ItemPrice": "1973", 366 | "ItemQty": 0 367 | }, 368 | { 369 | "ItemCode": 508, 370 | "ItemDescription": "Kitchen knife", 371 | "ItemPrice": "2483", 372 | "ItemQty": 0 373 | }, 374 | { 375 | "ItemCode": 513, 376 | "ItemDescription": "Flip clock", 377 | "ItemPrice": "1259", 378 | "ItemQty": 0 379 | }, 380 | { 381 | "ItemCode": 518, 382 | "ItemDescription": "Hourglass", 383 | "ItemPrice": "1598", 384 | "ItemQty": 3 385 | }, 386 | { 387 | "ItemCode": 523, 388 | "ItemDescription": "Oil-lamp clock", 389 | "ItemPrice": "1371", 390 | "ItemQty": 0 391 | }, 392 | { 393 | "ItemCode": 528, 394 | "ItemDescription": "Projection clock", 395 | "ItemPrice": "2365", 396 | "ItemQty": 0 397 | }, 398 | { 399 | "ItemCode": 533, 400 | "ItemDescription": "Rolling ball clock", 401 | "ItemPrice": "1651", 402 | "ItemQty": 0 403 | }, 404 | { 405 | "ItemCode": 538, 406 | "ItemDescription": "Grandmother clock", 407 | "ItemPrice": "1301", 408 | "ItemQty": 1 409 | }, 410 | { 411 | "ItemCode": 543, 412 | "ItemDescription": "Lighthouse clock", 413 | "ItemPrice": "636", 414 | "ItemQty": 0 415 | }, 416 | { 417 | "ItemCode": 548, 418 | "ItemDescription": "Cartel clock", 419 | "ItemPrice": "1500", 420 | "ItemQty": 0 421 | }, 422 | { 423 | "ItemCode": 553, 424 | "ItemDescription": "Banjo clock", 425 | "ItemPrice": "1651", 426 | "ItemQty": 0 427 | }, 428 | { 429 | "ItemCode": 558, 430 | "ItemDescription": "Balloon clock", 431 | "ItemPrice": "1633", 432 | "ItemQty": 0 433 | }, 434 | { 435 | "ItemCode": 563, 436 | "ItemDescription": "Chiming clock", 437 | "ItemPrice": "2168", 438 | "ItemQty": 0 439 | }, 440 | { 441 | "ItemCode": 568, 442 | "ItemDescription": "Pinch Pleated Curtains", 443 | "ItemPrice": "1718", 444 | "ItemQty": 0 445 | }, 446 | { 447 | "ItemCode": 573, 448 | "ItemDescription": "Eyelet Curtains", 449 | "ItemPrice": "642", 450 | "ItemQty": 0 451 | }, 452 | { 453 | "ItemCode": 578, 454 | "ItemDescription": "Goblet Pleat Curtains", 455 | "ItemPrice": "1765", 456 | "ItemQty": 2 457 | }, 458 | { 459 | "ItemCode": 583, 460 | "ItemDescription": "Tab Top Curtains", 461 | "ItemPrice": "1329", 462 | "ItemQty": 0 463 | }, 464 | { 465 | "ItemCode": 588, 466 | "ItemDescription": "Sheer Curtains", 467 | "ItemPrice": "801", 468 | "ItemQty": 0 469 | }, 470 | { 471 | "ItemCode": 593, 472 | "ItemDescription": "Hanging curtains", 473 | "ItemPrice": "1896", 474 | "ItemQty": 0 475 | }, 476 | { 477 | "ItemCode": 598, 478 | "ItemDescription": "Grommet Curtains", 479 | "ItemPrice": "914", 480 | "ItemQty": 0 481 | }, 482 | { 483 | "ItemCode": 603, 484 | "ItemDescription": "Sheer Curtains", 485 | "ItemPrice": "1282", 486 | "ItemQty": 0 487 | }, 488 | { 489 | "ItemCode": 608, 490 | "ItemDescription": "Pleated Drapes", 491 | "ItemPrice": "1684", 492 | "ItemQty": 0 493 | }, 494 | { 495 | "ItemCode": 613, 496 | "ItemDescription": "Goblet Pleat Drapes", 497 | "ItemPrice": "1583", 498 | "ItemQty": 0 499 | }, 500 | { 501 | "ItemCode": 618, 502 | "ItemDescription": "Pencil Pleat Drapes", 503 | "ItemPrice": "1140", 504 | "ItemQty": 0 505 | }, 506 | { 507 | "ItemCode": 623, 508 | "ItemDescription": " Box Pleat Drapes", 509 | "ItemPrice": "1683", 510 | "ItemQty": 0 511 | }, 512 | { 513 | "ItemCode": 628, 514 | "ItemDescription": "Window Scarf", 515 | "ItemPrice": "1774", 516 | "ItemQty": 3 517 | }, 518 | { 519 | "ItemCode": 633, 520 | "ItemDescription": "Cafe Curtains", 521 | "ItemPrice": "796", 522 | "ItemQty": 0 523 | }, 524 | { 525 | "ItemCode": 638, 526 | "ItemDescription": "Baster", 527 | "ItemPrice": "1214", 528 | "ItemQty": 0 529 | }, 530 | { 531 | "ItemCode": 643, 532 | "ItemDescription": "Blow torch", 533 | "ItemPrice": "1955", 534 | "ItemQty": 0 535 | }, 536 | { 537 | "ItemCode": 648, 538 | "ItemDescription": "Bottle opener", 539 | "ItemPrice": "321", 540 | "ItemQty": 0 541 | }, 542 | { 543 | "ItemCode": 653, 544 | "ItemDescription": "Browning tray", 545 | "ItemPrice": "375", 546 | "ItemQty": 0 547 | }, 548 | { 549 | "ItemCode": 658, 550 | "ItemDescription": "Chinois", 551 | "ItemPrice": "567", 552 | "ItemQty": 0 553 | }, 554 | { 555 | "ItemCode": 663, 556 | "ItemDescription": "Corkscrew", 557 | "ItemPrice": "983", 558 | "ItemQty": 0 559 | }, 560 | { 561 | "ItemCode": 668, 562 | "ItemDescription": "Crab cracker", 563 | "ItemPrice": "785", 564 | "ItemQty": 0 565 | }, 566 | { 567 | "ItemCode": 673, 568 | "ItemDescription": "Dough scraper", 569 | "ItemPrice": "980", 570 | "ItemQty": 0 571 | }, 572 | { 573 | "ItemCode": 678, 574 | "ItemDescription": "Fillet knife", 575 | "ItemPrice": "350", 576 | "ItemQty": 0 577 | }, 578 | { 579 | "ItemCode": 683, 580 | "ItemDescription": "Grater", 581 | "ItemPrice": "370", 582 | "ItemQty": 0 583 | }, 584 | { 585 | "ItemCode": 688, 586 | "ItemDescription": "Ladle", 587 | "ItemPrice": "570", 588 | "ItemQty": 0 589 | }, 590 | { 591 | "ItemCode": 693, 592 | "ItemDescription": "Mandoline", 593 | "ItemPrice": "250", 594 | "ItemQty": 0 595 | }, 596 | { 597 | "ItemCode": 698, 598 | "ItemDescription": "Meat grinder", 599 | "ItemPrice": "1250", 600 | "ItemQty": 0 601 | } 602 | ] -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/pos-simulator-2.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/03-KafkaStreamDemo/datagen/pos-simulator-2.0.0.jar -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/run-datagen.cmd: -------------------------------------------------------------------------------- 1 | java -cp ./* guru.learningjournal.kafka.examples.PosSimulator invoices 1 1000 -------------------------------------------------------------------------------- /03-KafkaStreamDemo/datagen/samples.json: -------------------------------------------------------------------------------- 1 | {"InvoiceNumber":"51402977","CreatedTime":1595688900348,"StoreID":"STR7188","PosID":"POS956","CashierID":"OAS134","CustomerType":"PRIME","CustomerCardNo":"4629185211","TotalAmount":11114.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":11114.0,"CGST":277.85,"SGST":277.85,"CESS":13.8925,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"458","ItemDescription":"Wine glass","ItemPrice":1644.0,"ItemQty":2,"TotalValue":3288.0},{"ItemCode":"283","ItemDescription":"Portable Lamps","ItemPrice":2236.0,"ItemQty":1,"TotalValue":2236.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":2,"TotalValue":2848.0},{"ItemCode":"523","ItemDescription":"Oil-lamp clock","ItemPrice":1371.0,"ItemQty":2,"TotalValue":2742.0}]} 2 | {"InvoiceNumber":"91372973","CreatedTime":1595688901219,"StoreID":"STR8513","PosID":"POS163","CashierID":"OAS961","CustomerType":"PRIME","CustomerCardNo":"2762345282","TotalAmount":8272.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":8272.0,"CGST":206.8,"SGST":206.8,"CESS":10.34,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"444-1842 Dui. Rd.","City":"Shivapuri","State":"Madhya Pradesh","PinCode":"561012","ContactNumber":"7243866404"},"InvoiceLineItems":[{"ItemCode":"413","ItemDescription":"Slipcover","ItemPrice":1896.0,"ItemQty":1,"TotalValue":1896.0},{"ItemCode":"483","ItemDescription":"Teacups and saucers","ItemPrice":1781.0,"ItemQty":1,"TotalValue":1781.0},{"ItemCode":"583","ItemDescription":"Tab Top Curtains","ItemPrice":1329.0,"ItemQty":1,"TotalValue":1329.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":2,"TotalValue":3266.0}]} 3 | {"InvoiceNumber":"93647513","CreatedTime":1595688902254,"StoreID":"STR8513","PosID":"POS881","CashierID":"OAS354","CustomerType":"NONPRIME","CustomerCardNo":"2599848717","TotalAmount":3374.0,"NumberOfItems":1,"PaymentMethod":"CARD","TaxableAmount":3374.0,"CGST":84.35000000000001,"SGST":84.35000000000001,"CESS":4.2175,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"258","ItemDescription":"Closet","ItemPrice":1687.0,"ItemQty":2,"TotalValue":3374.0}]} 4 | {"InvoiceNumber":"8320594","CreatedTime":1595688902254,"StoreID":"STR7188","PosID":"POS825","CashierID":"OAS329","CustomerType":"PRIME","CustomerCardNo":"7051101351","TotalAmount":5824.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":5824.0,"CGST":145.6,"SGST":145.6,"CESS":7.28,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"2465 Laoreet, Street","City":"Dehri","State":"Bihar","PinCode":"637308","ContactNumber":"2662305605"},"InvoiceLineItems":[{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":1,"TotalValue":1633.0},{"ItemCode":"658","ItemDescription":"Chinois","ItemPrice":567.0,"ItemQty":1,"TotalValue":567.0}]} 5 | {"InvoiceNumber":"4888607","CreatedTime":1595688903258,"StoreID":"STR2629","PosID":"POS172","CashierID":"OAS622","CustomerType":"NONPRIME","CustomerCardNo":"7829975914","TotalAmount":1750.0,"NumberOfItems":2,"PaymentMethod":"CASH","TaxableAmount":1750.0,"CGST":43.75,"SGST":43.75,"CESS":2.1875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"7114 Eu, Rd.","City":"Ratlam","State":"Madhya Pradesh","PinCode":"925281","ContactNumber":"4057182350"},"InvoiceLineItems":[{"ItemCode":"208","ItemDescription":"Canopy bed","ItemPrice":508.0,"ItemQty":1,"TotalValue":508.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 6 | {"InvoiceNumber":"99794761","CreatedTime":1595688903258,"StoreID":"STR6347","PosID":"POS333","CashierID":"OAS697","CustomerType":"NONPRIME","CustomerCardNo":"9740604930","TotalAmount":6566.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":6566.0,"CGST":164.15,"SGST":164.15,"CESS":8.2075,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"517-8912 Nulla St.","City":"Champdani","State":"West Bengal","PinCode":"680616","ContactNumber":"8183195143"},"InvoiceLineItems":[{"ItemCode":"353","ItemDescription":"Recliner","ItemPrice":1585.0,"ItemQty":2,"TotalValue":3170.0},{"ItemCode":"488","ItemDescription":"Bread knife","ItemPrice":1077.0,"ItemQty":2,"TotalValue":2154.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 7 | {"InvoiceNumber":"89101510","CreatedTime":1595688904763,"StoreID":"STR6347","PosID":"POS994","CashierID":"OAS274","CustomerType":"PRIME","CustomerCardNo":"6296964163","TotalAmount":5560.0,"NumberOfItems":4,"PaymentMethod":"CASH","TaxableAmount":5560.0,"CGST":139.0,"SGST":139.0,"CESS":6.95,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"493","ItemDescription":"Boning knife","ItemPrice":513.0,"ItemQty":1,"TotalValue":513.0},{"ItemCode":"468","ItemDescription":"Lunch box","ItemPrice":1467.0,"ItemQty":1,"TotalValue":1467.0},{"ItemCode":"348","ItemDescription":"Navy chair","ItemPrice":1998.0,"ItemQty":1,"TotalValue":1998.0},{"ItemCode":"238","ItemDescription":"Dining table","ItemPrice":1582.0,"ItemQty":1,"TotalValue":1582.0}]} 8 | {"InvoiceNumber":"26723058","CreatedTime":1595689028262,"StoreID":"STR7188","PosID":"POS664","CashierID":"OAS971","CustomerType":"PRIME","CustomerCardNo":"9316477281","TotalAmount":5235.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":5235.0,"CGST":130.875,"SGST":130.875,"CESS":6.54375,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"5418 Magna. Rd.","City":"Chennai","State":"Tamil Nadu","PinCode":"386032","ContactNumber":"6557358508"},"InvoiceLineItems":[{"ItemCode":"653","ItemDescription":"Browning tray","ItemPrice":375.0,"ItemQty":1,"TotalValue":375.0},{"ItemCode":"568","ItemDescription":"Pinch Pleated Curtains","ItemPrice":1718.0,"ItemQty":2,"TotalValue":3436.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":1,"TotalValue":1424.0}]} 9 | {"InvoiceNumber":"96837834","CreatedTime":1595689029002,"StoreID":"STR6162","PosID":"POS835","CashierID":"OAS583","CustomerType":"NONPRIME","CustomerCardNo":"5611417583","TotalAmount":13481.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":13481.0,"CGST":337.02500000000003,"SGST":337.02500000000003,"CESS":16.85125,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"213","ItemDescription":"Infant bed","ItemPrice":1755.0,"ItemQty":1,"TotalValue":1755.0},{"ItemCode":"528","ItemDescription":"Projection clock","ItemPrice":2365.0,"ItemQty":2,"TotalValue":4730.0},{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"203","ItemDescription":"Bunk bed","ItemPrice":1686.0,"ItemQty":2,"TotalValue":3372.0}]} 10 | {"InvoiceNumber":"78302645","CreatedTime":1595689029506,"StoreID":"STR8513","PosID":"POS155","CashierID":"OAS558","CustomerType":"NONPRIME","CustomerCardNo":"2389018842","TotalAmount":6543.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":6543.0,"CGST":163.57500000000002,"SGST":163.57500000000002,"CESS":8.17875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"Flat No. #210-902 Neque Street","City":"South Dum Dum","State":"West Bengal","PinCode":"504795","ContactNumber":"7508353683"},"InvoiceLineItems":[{"ItemCode":"533","ItemDescription":"Rolling ball clock","ItemPrice":1651.0,"ItemQty":2,"TotalValue":3302.0},{"ItemCode":"538","ItemDescription":"Grandmother clock","ItemPrice":1301.0,"ItemQty":1,"TotalValue":1301.0},{"ItemCode":"278","ItemDescription":"Dining set","ItemPrice":1940.0,"ItemQty":1,"TotalValue":1940.0}]} 11 | -------------------------------------------------------------------------------- /03-KafkaStreamDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /03-KafkaStreamDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /03-KafkaStreamDemo/kafka-scripts/03-create-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoices -------------------------------------------------------------------------------- /03-KafkaStreamDemo/kafka-scripts/04-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic invoices -------------------------------------------------------------------------------- /03-KafkaStreamDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/03-KafkaStreamDemo/lib/__init__.py -------------------------------------------------------------------------------- /03-KafkaStreamDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /03-KafkaStreamDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /04-KafkaSinkDemo/KafkaSinkDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType, IntegerType, ArrayType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("File Streaming Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | schema = StructType([ 18 | StructField("InvoiceNumber", StringType()), 19 | StructField("CreatedTime", LongType()), 20 | StructField("StoreID", StringType()), 21 | StructField("PosID", StringType()), 22 | StructField("CashierID", StringType()), 23 | StructField("CustomerType", StringType()), 24 | StructField("CustomerCardNo", StringType()), 25 | StructField("TotalAmount", DoubleType()), 26 | StructField("NumberOfItems", IntegerType()), 27 | StructField("PaymentMethod", StringType()), 28 | StructField("CGST", DoubleType()), 29 | StructField("SGST", DoubleType()), 30 | StructField("CESS", DoubleType()), 31 | StructField("DeliveryType", StringType()), 32 | StructField("DeliveryAddress", StructType([ 33 | StructField("AddressLine", StringType()), 34 | StructField("City", StringType()), 35 | StructField("State", StringType()), 36 | StructField("PinCode", StringType()), 37 | StructField("ContactNumber", StringType()) 38 | ])), 39 | StructField("InvoiceLineItems", ArrayType(StructType([ 40 | StructField("ItemCode", StringType()), 41 | StructField("ItemDescription", StringType()), 42 | StructField("ItemPrice", DoubleType()), 43 | StructField("ItemQty", IntegerType()), 44 | StructField("TotalValue", DoubleType()) 45 | ]))), 46 | ]) 47 | 48 | kafka_df = spark.readStream \ 49 | .format("kafka") \ 50 | .option("kafka.bootstrap.servers", "localhost:9092") \ 51 | .option("subscribe", "invoices") \ 52 | .option("startingOffsets", "earliest") \ 53 | .load() 54 | 55 | value_df = kafka_df.select(from_json(col("value").cast("string"), schema).alias("value")) 56 | 57 | notification_df = value_df.select("value.InvoiceNumber", "value.CustomerCardNo", "value.TotalAmount") \ 58 | .withColumn("EarnedLoyaltyPoints", expr("TotalAmount * 0.2")) 59 | 60 | # kafka_target_df = notification_df.selectExpr("InvoiceNumber as key", "to_json(struct(*)) as value") 61 | 62 | kafka_target_df = notification_df.selectExpr("InvoiceNumber as key", 63 | """to_json(named_struct( 64 | 'CustomerCardNo', CustomerCardNo, 65 | 'TotalAmount', TotalAmount, 66 | 'EarnedLoyaltyPoints', TotalAmount * 0.2)) as value""") 67 | 68 | ''' 69 | notification_writer_query = kafkaTarget_df.writeStream \ 70 | .format("console") \ 71 | .outputMode("append") \ 72 | .option("truncate", "false") \ 73 | .option("checkpointLocation", "chk-point-dir") \ 74 | .start() 75 | ''' 76 | 77 | notification_writer_query = kafka_target_df \ 78 | .writeStream \ 79 | .queryName("Notification Writer") \ 80 | .format("kafka") \ 81 | .option("kafka.bootstrap.servers", "localhost:9092") \ 82 | .option("topic", "notifications") \ 83 | .outputMode("append") \ 84 | .option("checkpointLocation", "chk-point-dir") \ 85 | .start() 86 | 87 | logger.info("Listening and writing to Kafka") 88 | notification_writer_query.awaitTermination() 89 | -------------------------------------------------------------------------------- /04-KafkaSinkDemo/data/samples.json: -------------------------------------------------------------------------------- 1 | {"InvoiceNumber":"51402977","CreatedTime":1595688900348,"StoreID":"STR7188","PosID":"POS956","CashierID":"OAS134","CustomerType":"PRIME","CustomerCardNo":"4629185211","TotalAmount":11114.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":11114.0,"CGST":277.85,"SGST":277.85,"CESS":13.8925,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"458","ItemDescription":"Wine glass","ItemPrice":1644.0,"ItemQty":2,"TotalValue":3288.0},{"ItemCode":"283","ItemDescription":"Portable Lamps","ItemPrice":2236.0,"ItemQty":1,"TotalValue":2236.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":2,"TotalValue":2848.0},{"ItemCode":"523","ItemDescription":"Oil-lamp clock","ItemPrice":1371.0,"ItemQty":2,"TotalValue":2742.0}]} 2 | {"InvoiceNumber":"91372973","CreatedTime":1595688901219,"StoreID":"STR8513","PosID":"POS163","CashierID":"OAS961","CustomerType":"PRIME","CustomerCardNo":"2762345282","TotalAmount":8272.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":8272.0,"CGST":206.8,"SGST":206.8,"CESS":10.34,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"444-1842 Dui. Rd.","City":"Shivapuri","State":"Madhya Pradesh","PinCode":"561012","ContactNumber":"7243866404"},"InvoiceLineItems":[{"ItemCode":"413","ItemDescription":"Slipcover","ItemPrice":1896.0,"ItemQty":1,"TotalValue":1896.0},{"ItemCode":"483","ItemDescription":"Teacups and saucers","ItemPrice":1781.0,"ItemQty":1,"TotalValue":1781.0},{"ItemCode":"583","ItemDescription":"Tab Top Curtains","ItemPrice":1329.0,"ItemQty":1,"TotalValue":1329.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":2,"TotalValue":3266.0}]} 3 | {"InvoiceNumber":"93647513","CreatedTime":1595688902254,"StoreID":"STR8513","PosID":"POS881","CashierID":"OAS354","CustomerType":"NONPRIME","CustomerCardNo":"2599848717","TotalAmount":3374.0,"NumberOfItems":1,"PaymentMethod":"CARD","TaxableAmount":3374.0,"CGST":84.35000000000001,"SGST":84.35000000000001,"CESS":4.2175,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"258","ItemDescription":"Closet","ItemPrice":1687.0,"ItemQty":2,"TotalValue":3374.0}]} 4 | {"InvoiceNumber":"8320594","CreatedTime":1595688902254,"StoreID":"STR7188","PosID":"POS825","CashierID":"OAS329","CustomerType":"PRIME","CustomerCardNo":"7051101351","TotalAmount":5824.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":5824.0,"CGST":145.6,"SGST":145.6,"CESS":7.28,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"2465 Laoreet, Street","City":"Dehri","State":"Bihar","PinCode":"637308","ContactNumber":"2662305605"},"InvoiceLineItems":[{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":1,"TotalValue":1633.0},{"ItemCode":"658","ItemDescription":"Chinois","ItemPrice":567.0,"ItemQty":1,"TotalValue":567.0}]} 5 | {"InvoiceNumber":"4888607","CreatedTime":1595688903258,"StoreID":"STR2629","PosID":"POS172","CashierID":"OAS622","CustomerType":"NONPRIME","CustomerCardNo":"7829975914","TotalAmount":1750.0,"NumberOfItems":2,"PaymentMethod":"CASH","TaxableAmount":1750.0,"CGST":43.75,"SGST":43.75,"CESS":2.1875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"7114 Eu, Rd.","City":"Ratlam","State":"Madhya Pradesh","PinCode":"925281","ContactNumber":"4057182350"},"InvoiceLineItems":[{"ItemCode":"208","ItemDescription":"Canopy bed","ItemPrice":508.0,"ItemQty":1,"TotalValue":508.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 6 | {"InvoiceNumber":"99794761","CreatedTime":1595688903258,"StoreID":"STR6347","PosID":"POS333","CashierID":"OAS697","CustomerType":"NONPRIME","CustomerCardNo":"9740604930","TotalAmount":6566.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":6566.0,"CGST":164.15,"SGST":164.15,"CESS":8.2075,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"517-8912 Nulla St.","City":"Champdani","State":"West Bengal","PinCode":"680616","ContactNumber":"8183195143"},"InvoiceLineItems":[{"ItemCode":"353","ItemDescription":"Recliner","ItemPrice":1585.0,"ItemQty":2,"TotalValue":3170.0},{"ItemCode":"488","ItemDescription":"Bread knife","ItemPrice":1077.0,"ItemQty":2,"TotalValue":2154.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 7 | {"InvoiceNumber":"89101510","CreatedTime":1595688904763,"StoreID":"STR6347","PosID":"POS994","CashierID":"OAS274","CustomerType":"PRIME","CustomerCardNo":"6296964163","TotalAmount":5560.0,"NumberOfItems":4,"PaymentMethod":"CASH","TaxableAmount":5560.0,"CGST":139.0,"SGST":139.0,"CESS":6.95,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"493","ItemDescription":"Boning knife","ItemPrice":513.0,"ItemQty":1,"TotalValue":513.0},{"ItemCode":"468","ItemDescription":"Lunch box","ItemPrice":1467.0,"ItemQty":1,"TotalValue":1467.0},{"ItemCode":"348","ItemDescription":"Navy chair","ItemPrice":1998.0,"ItemQty":1,"TotalValue":1998.0},{"ItemCode":"238","ItemDescription":"Dining table","ItemPrice":1582.0,"ItemQty":1,"TotalValue":1582.0}]} 8 | {"InvoiceNumber":"26723058","CreatedTime":1595689028262,"StoreID":"STR7188","PosID":"POS664","CashierID":"OAS971","CustomerType":"PRIME","CustomerCardNo":"9316477281","TotalAmount":5235.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":5235.0,"CGST":130.875,"SGST":130.875,"CESS":6.54375,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"5418 Magna. Rd.","City":"Chennai","State":"Tamil Nadu","PinCode":"386032","ContactNumber":"6557358508"},"InvoiceLineItems":[{"ItemCode":"653","ItemDescription":"Browning tray","ItemPrice":375.0,"ItemQty":1,"TotalValue":375.0},{"ItemCode":"568","ItemDescription":"Pinch Pleated Curtains","ItemPrice":1718.0,"ItemQty":2,"TotalValue":3436.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":1,"TotalValue":1424.0}]} 9 | {"InvoiceNumber":"96837834","CreatedTime":1595689029002,"StoreID":"STR6162","PosID":"POS835","CashierID":"OAS583","CustomerType":"NONPRIME","CustomerCardNo":"5611417583","TotalAmount":13481.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":13481.0,"CGST":337.02500000000003,"SGST":337.02500000000003,"CESS":16.85125,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"213","ItemDescription":"Infant bed","ItemPrice":1755.0,"ItemQty":1,"TotalValue":1755.0},{"ItemCode":"528","ItemDescription":"Projection clock","ItemPrice":2365.0,"ItemQty":2,"TotalValue":4730.0},{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"203","ItemDescription":"Bunk bed","ItemPrice":1686.0,"ItemQty":2,"TotalValue":3372.0}]} 10 | {"InvoiceNumber":"78302645","CreatedTime":1595689029506,"StoreID":"STR8513","PosID":"POS155","CashierID":"OAS558","CustomerType":"NONPRIME","CustomerCardNo":"2389018842","TotalAmount":6543.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":6543.0,"CGST":163.57500000000002,"SGST":163.57500000000002,"CESS":8.17875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"Flat No. #210-902 Neque Street","City":"South Dum Dum","State":"West Bengal","PinCode":"504795","ContactNumber":"7508353683"},"InvoiceLineItems":[{"ItemCode":"533","ItemDescription":"Rolling ball clock","ItemPrice":1651.0,"ItemQty":2,"TotalValue":3302.0},{"ItemCode":"538","ItemDescription":"Grandmother clock","ItemPrice":1301.0,"ItemQty":1,"TotalValue":1301.0},{"ItemCode":"278","ItemDescription":"Dining set","ItemPrice":1940.0,"ItemQty":1,"TotalValue":1940.0}]} 11 | -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoices -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/04-create-output-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic notifications -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic invoices -------------------------------------------------------------------------------- /04-KafkaSinkDemo/kafka-scripts/06-start-consumer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic notifications --from-beginning -------------------------------------------------------------------------------- /04-KafkaSinkDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/04-KafkaSinkDemo/lib/__init__.py -------------------------------------------------------------------------------- /04-KafkaSinkDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /04-KafkaSinkDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /05-MultiQueryDemo/MultiQueryDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType, LongType, DoubleType, IntegerType, ArrayType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("Multi Query Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | schema = StructType([ 18 | StructField("InvoiceNumber", StringType()), 19 | StructField("CreatedTime", LongType()), 20 | StructField("StoreID", StringType()), 21 | StructField("PosID", StringType()), 22 | StructField("CashierID", StringType()), 23 | StructField("CustomerType", StringType()), 24 | StructField("CustomerCardNo", StringType()), 25 | StructField("TotalAmount", DoubleType()), 26 | StructField("NumberOfItems", IntegerType()), 27 | StructField("PaymentMethod", StringType()), 28 | StructField("CGST", DoubleType()), 29 | StructField("SGST", DoubleType()), 30 | StructField("CESS", DoubleType()), 31 | StructField("DeliveryType", StringType()), 32 | StructField("DeliveryAddress", StructType([ 33 | StructField("AddressLine", StringType()), 34 | StructField("City", StringType()), 35 | StructField("State", StringType()), 36 | StructField("PinCode", StringType()), 37 | StructField("ContactNumber", StringType()) 38 | ])), 39 | StructField("InvoiceLineItems", ArrayType(StructType([ 40 | StructField("ItemCode", StringType()), 41 | StructField("ItemDescription", StringType()), 42 | StructField("ItemPrice", DoubleType()), 43 | StructField("ItemQty", IntegerType()), 44 | StructField("TotalValue", DoubleType()) 45 | ]))), 46 | ]) 47 | 48 | kafka_df = spark.readStream \ 49 | .format("kafka") \ 50 | .option("kafka.bootstrap.servers", "localhost:9092") \ 51 | .option("subscribe", "invoices") \ 52 | .option("startingOffsets", "earliest") \ 53 | .load() 54 | 55 | value_df = kafka_df.select(from_json(col("value").cast("string"), schema).alias("value")) 56 | 57 | notification_df = value_df.select("value.InvoiceNumber", "value.CustomerCardNo", "value.TotalAmount") \ 58 | .withColumn("EarnedLoyaltyPoints", expr("TotalAmount * 0.2")) 59 | 60 | kafka_target_df = notification_df.selectExpr("InvoiceNumber as key", "to_json(struct(*)) as value") 61 | 62 | notification_writer_query = kafka_target_df \ 63 | .writeStream \ 64 | .queryName("Notification Writer") \ 65 | .format("kafka") \ 66 | .option("kafka.bootstrap.servers", "localhost:9092") \ 67 | .option("topic", "notifications") \ 68 | .outputMode("append") \ 69 | .option("checkpointLocation", "chk-point-dir/notify") \ 70 | .start() 71 | 72 | explode_df = value_df.selectExpr("value.InvoiceNumber", "value.CreatedTime", "value.StoreID", 73 | "value.PosID", "value.CustomerType", "value.PaymentMethod", "value.DeliveryType", 74 | "value.DeliveryAddress.City", 75 | "value.DeliveryAddress.State", "value.DeliveryAddress.PinCode", 76 | "explode(value.InvoiceLineItems) as LineItem") 77 | 78 | flattened_df = explode_df \ 79 | .withColumn("ItemCode", expr("LineItem.ItemCode")) \ 80 | .withColumn("ItemDescription", expr("LineItem.ItemDescription")) \ 81 | .withColumn("ItemPrice", expr("LineItem.ItemPrice")) \ 82 | .withColumn("ItemQty", expr("LineItem.ItemQty")) \ 83 | .withColumn("TotalValue", expr("LineItem.TotalValue")) \ 84 | .drop("LineItem") 85 | 86 | invoice_writer_query = flattened_df.writeStream \ 87 | .format("json") \ 88 | .queryName("Flattened Invoice Writer") \ 89 | .outputMode("append") \ 90 | .option("path", "output") \ 91 | .option("checkpointLocation", "chk-point-dir/flatten") \ 92 | .start() 93 | 94 | logger.info("Waiting for Queries") 95 | spark.streams.awaitAnyTermination() 96 | -------------------------------------------------------------------------------- /05-MultiQueryDemo/data/samples.json: -------------------------------------------------------------------------------- 1 | {"InvoiceNumber":"51402977","CreatedTime":1595688900348,"StoreID":"STR7188","PosID":"POS956","CashierID":"OAS134","CustomerType":"PRIME","CustomerCardNo":"4629185211","TotalAmount":11114.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":11114.0,"CGST":277.85,"SGST":277.85,"CESS":13.8925,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"458","ItemDescription":"Wine glass","ItemPrice":1644.0,"ItemQty":2,"TotalValue":3288.0},{"ItemCode":"283","ItemDescription":"Portable Lamps","ItemPrice":2236.0,"ItemQty":1,"TotalValue":2236.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":2,"TotalValue":2848.0},{"ItemCode":"523","ItemDescription":"Oil-lamp clock","ItemPrice":1371.0,"ItemQty":2,"TotalValue":2742.0}]} 2 | {"InvoiceNumber":"91372973","CreatedTime":1595688901219,"StoreID":"STR8513","PosID":"POS163","CashierID":"OAS961","CustomerType":"PRIME","CustomerCardNo":"2762345282","TotalAmount":8272.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":8272.0,"CGST":206.8,"SGST":206.8,"CESS":10.34,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"444-1842 Dui. Rd.","City":"Shivapuri","State":"Madhya Pradesh","PinCode":"561012","ContactNumber":"7243866404"},"InvoiceLineItems":[{"ItemCode":"413","ItemDescription":"Slipcover","ItemPrice":1896.0,"ItemQty":1,"TotalValue":1896.0},{"ItemCode":"483","ItemDescription":"Teacups and saucers","ItemPrice":1781.0,"ItemQty":1,"TotalValue":1781.0},{"ItemCode":"583","ItemDescription":"Tab Top Curtains","ItemPrice":1329.0,"ItemQty":1,"TotalValue":1329.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":2,"TotalValue":3266.0}]} 3 | {"InvoiceNumber":"93647513","CreatedTime":1595688902254,"StoreID":"STR8513","PosID":"POS881","CashierID":"OAS354","CustomerType":"NONPRIME","CustomerCardNo":"2599848717","TotalAmount":3374.0,"NumberOfItems":1,"PaymentMethod":"CARD","TaxableAmount":3374.0,"CGST":84.35000000000001,"SGST":84.35000000000001,"CESS":4.2175,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"258","ItemDescription":"Closet","ItemPrice":1687.0,"ItemQty":2,"TotalValue":3374.0}]} 4 | {"InvoiceNumber":"8320594","CreatedTime":1595688902254,"StoreID":"STR7188","PosID":"POS825","CashierID":"OAS329","CustomerType":"PRIME","CustomerCardNo":"7051101351","TotalAmount":5824.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":5824.0,"CGST":145.6,"SGST":145.6,"CESS":7.28,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"2465 Laoreet, Street","City":"Dehri","State":"Bihar","PinCode":"637308","ContactNumber":"2662305605"},"InvoiceLineItems":[{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":1,"TotalValue":1633.0},{"ItemCode":"658","ItemDescription":"Chinois","ItemPrice":567.0,"ItemQty":1,"TotalValue":567.0}]} 5 | {"InvoiceNumber":"4888607","CreatedTime":1595688903258,"StoreID":"STR2629","PosID":"POS172","CashierID":"OAS622","CustomerType":"NONPRIME","CustomerCardNo":"7829975914","TotalAmount":1750.0,"NumberOfItems":2,"PaymentMethod":"CASH","TaxableAmount":1750.0,"CGST":43.75,"SGST":43.75,"CESS":2.1875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"7114 Eu, Rd.","City":"Ratlam","State":"Madhya Pradesh","PinCode":"925281","ContactNumber":"4057182350"},"InvoiceLineItems":[{"ItemCode":"208","ItemDescription":"Canopy bed","ItemPrice":508.0,"ItemQty":1,"TotalValue":508.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 6 | {"InvoiceNumber":"99794761","CreatedTime":1595688903258,"StoreID":"STR6347","PosID":"POS333","CashierID":"OAS697","CustomerType":"NONPRIME","CustomerCardNo":"9740604930","TotalAmount":6566.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":6566.0,"CGST":164.15,"SGST":164.15,"CESS":8.2075,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"517-8912 Nulla St.","City":"Champdani","State":"West Bengal","PinCode":"680616","ContactNumber":"8183195143"},"InvoiceLineItems":[{"ItemCode":"353","ItemDescription":"Recliner","ItemPrice":1585.0,"ItemQty":2,"TotalValue":3170.0},{"ItemCode":"488","ItemDescription":"Bread knife","ItemPrice":1077.0,"ItemQty":2,"TotalValue":2154.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 7 | {"InvoiceNumber":"89101510","CreatedTime":1595688904763,"StoreID":"STR6347","PosID":"POS994","CashierID":"OAS274","CustomerType":"PRIME","CustomerCardNo":"6296964163","TotalAmount":5560.0,"NumberOfItems":4,"PaymentMethod":"CASH","TaxableAmount":5560.0,"CGST":139.0,"SGST":139.0,"CESS":6.95,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"493","ItemDescription":"Boning knife","ItemPrice":513.0,"ItemQty":1,"TotalValue":513.0},{"ItemCode":"468","ItemDescription":"Lunch box","ItemPrice":1467.0,"ItemQty":1,"TotalValue":1467.0},{"ItemCode":"348","ItemDescription":"Navy chair","ItemPrice":1998.0,"ItemQty":1,"TotalValue":1998.0},{"ItemCode":"238","ItemDescription":"Dining table","ItemPrice":1582.0,"ItemQty":1,"TotalValue":1582.0}]} 8 | {"InvoiceNumber":"26723058","CreatedTime":1595689028262,"StoreID":"STR7188","PosID":"POS664","CashierID":"OAS971","CustomerType":"PRIME","CustomerCardNo":"9316477281","TotalAmount":5235.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":5235.0,"CGST":130.875,"SGST":130.875,"CESS":6.54375,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"5418 Magna. Rd.","City":"Chennai","State":"Tamil Nadu","PinCode":"386032","ContactNumber":"6557358508"},"InvoiceLineItems":[{"ItemCode":"653","ItemDescription":"Browning tray","ItemPrice":375.0,"ItemQty":1,"TotalValue":375.0},{"ItemCode":"568","ItemDescription":"Pinch Pleated Curtains","ItemPrice":1718.0,"ItemQty":2,"TotalValue":3436.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":1,"TotalValue":1424.0}]} 9 | {"InvoiceNumber":"96837834","CreatedTime":1595689029002,"StoreID":"STR6162","PosID":"POS835","CashierID":"OAS583","CustomerType":"NONPRIME","CustomerCardNo":"5611417583","TotalAmount":13481.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":13481.0,"CGST":337.02500000000003,"SGST":337.02500000000003,"CESS":16.85125,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"213","ItemDescription":"Infant bed","ItemPrice":1755.0,"ItemQty":1,"TotalValue":1755.0},{"ItemCode":"528","ItemDescription":"Projection clock","ItemPrice":2365.0,"ItemQty":2,"TotalValue":4730.0},{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"203","ItemDescription":"Bunk bed","ItemPrice":1686.0,"ItemQty":2,"TotalValue":3372.0}]} 10 | {"InvoiceNumber":"78302645","CreatedTime":1595689029506,"StoreID":"STR8513","PosID":"POS155","CashierID":"OAS558","CustomerType":"NONPRIME","CustomerCardNo":"2389018842","TotalAmount":6543.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":6543.0,"CGST":163.57500000000002,"SGST":163.57500000000002,"CESS":8.17875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"Flat No. #210-902 Neque Street","City":"South Dum Dum","State":"West Bengal","PinCode":"504795","ContactNumber":"7508353683"},"InvoiceLineItems":[{"ItemCode":"533","ItemDescription":"Rolling ball clock","ItemPrice":1651.0,"ItemQty":2,"TotalValue":3302.0},{"ItemCode":"538","ItemDescription":"Grandmother clock","ItemPrice":1301.0,"ItemQty":1,"TotalValue":1301.0},{"ItemCode":"278","ItemDescription":"Dining set","ItemPrice":1940.0,"ItemQty":1,"TotalValue":1940.0}]} 11 | -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoices -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/04-create-output-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic notifications -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic invoices -------------------------------------------------------------------------------- /05-MultiQueryDemo/kafka-scripts/06-start-consumer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic notifications --from-beginning -------------------------------------------------------------------------------- /05-MultiQueryDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/05-MultiQueryDemo/lib/__init__.py -------------------------------------------------------------------------------- /05-MultiQueryDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /05-MultiQueryDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/KafkaAvroSinkDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.avro.functions import to_avro 3 | from pyspark.sql.functions import from_json, col, expr, struct 4 | from pyspark.sql.types import ArrayType, StructType, StructField, StringType, LongType, DoubleType, IntegerType 5 | 6 | from lib.logger import Log4j 7 | 8 | if __name__ == "__main__": 9 | spark = SparkSession \ 10 | .builder \ 11 | .appName("Multi Query Demo") \ 12 | .master("local[3]") \ 13 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | schema = StructType([ 19 | StructField("InvoiceNumber", StringType()), 20 | StructField("CreatedTime", LongType()), 21 | StructField("StoreID", StringType()), 22 | StructField("PosID", StringType()), 23 | StructField("CashierID", StringType()), 24 | StructField("CustomerType", StringType()), 25 | StructField("CustomerCardNo", StringType()), 26 | StructField("TotalAmount", DoubleType()), 27 | StructField("NumberOfItems", IntegerType()), 28 | StructField("PaymentMethod", StringType()), 29 | StructField("CGST", DoubleType()), 30 | StructField("SGST", DoubleType()), 31 | StructField("CESS", DoubleType()), 32 | StructField("DeliveryType", StringType()), 33 | StructField("DeliveryAddress", StructType([ 34 | StructField("AddressLine", StringType()), 35 | StructField("City", StringType()), 36 | StructField("State", StringType()), 37 | StructField("PinCode", StringType()), 38 | StructField("ContactNumber", StringType()) 39 | ])), 40 | StructField("InvoiceLineItems", ArrayType(StructType([ 41 | StructField("ItemCode", StringType()), 42 | StructField("ItemDescription", StringType()), 43 | StructField("ItemPrice", DoubleType()), 44 | StructField("ItemQty", IntegerType()), 45 | StructField("TotalValue", DoubleType()) 46 | ]))), 47 | ]) 48 | 49 | kafka_df = spark.readStream \ 50 | .format("kafka") \ 51 | .option("kafka.bootstrap.servers", "localhost:9092") \ 52 | .option("subscribe", "invoices") \ 53 | .option("startingOffsets", "earliest") \ 54 | .load() 55 | 56 | value_df = kafka_df.select(from_json(col("value").cast("string"), schema).alias("value")) 57 | 58 | explode_df = value_df.selectExpr("value.InvoiceNumber", "value.CreatedTime", "value.StoreID", 59 | "value.PosID", "value.CustomerType", "value.CustomerCardNo", "value.DeliveryType", 60 | "value.DeliveryAddress.City", 61 | "value.DeliveryAddress.State", "value.DeliveryAddress.PinCode", 62 | "explode(value.InvoiceLineItems) as LineItem") 63 | 64 | flattened_df = explode_df \ 65 | .withColumn("ItemCode", expr("LineItem.ItemCode")) \ 66 | .withColumn("ItemDescription", expr("LineItem.ItemDescription")) \ 67 | .withColumn("ItemPrice", expr("LineItem.ItemPrice")) \ 68 | .withColumn("ItemQty", expr("LineItem.ItemQty")) \ 69 | .withColumn("TotalValue", expr("LineItem.TotalValue")) \ 70 | .drop("LineItem") 71 | 72 | kafka_target_df = flattened_df.select(expr("InvoiceNumber as key"), 73 | to_avro(struct("*")).alias("value")) 74 | 75 | invoice_writer_query = kafka_target_df \ 76 | .writeStream \ 77 | .queryName("Flattened Invoice Writer") \ 78 | .format("kafka") \ 79 | .option("kafka.bootstrap.servers", "localhost:9092") \ 80 | .option("topic", "invoice-items") \ 81 | .outputMode("append") \ 82 | .option("checkpointLocation", "chk-point-dir") \ 83 | .start() 84 | 85 | logger.info("Start Writer Query") 86 | invoice_writer_query.awaitTermination() 87 | -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/data/samples.json: -------------------------------------------------------------------------------- 1 | {"InvoiceNumber":"51402977","CreatedTime":1595688900348,"StoreID":"STR7188","PosID":"POS956","CashierID":"OAS134","CustomerType":"PRIME","CustomerCardNo":"4629185211","TotalAmount":11114.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":11114.0,"CGST":277.85,"SGST":277.85,"CESS":13.8925,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"458","ItemDescription":"Wine glass","ItemPrice":1644.0,"ItemQty":2,"TotalValue":3288.0},{"ItemCode":"283","ItemDescription":"Portable Lamps","ItemPrice":2236.0,"ItemQty":1,"TotalValue":2236.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":2,"TotalValue":2848.0},{"ItemCode":"523","ItemDescription":"Oil-lamp clock","ItemPrice":1371.0,"ItemQty":2,"TotalValue":2742.0}]} 2 | {"InvoiceNumber":"91372973","CreatedTime":1595688901219,"StoreID":"STR8513","PosID":"POS163","CashierID":"OAS961","CustomerType":"PRIME","CustomerCardNo":"2762345282","TotalAmount":8272.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":8272.0,"CGST":206.8,"SGST":206.8,"CESS":10.34,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"444-1842 Dui. Rd.","City":"Shivapuri","State":"Madhya Pradesh","PinCode":"561012","ContactNumber":"7243866404"},"InvoiceLineItems":[{"ItemCode":"413","ItemDescription":"Slipcover","ItemPrice":1896.0,"ItemQty":1,"TotalValue":1896.0},{"ItemCode":"483","ItemDescription":"Teacups and saucers","ItemPrice":1781.0,"ItemQty":1,"TotalValue":1781.0},{"ItemCode":"583","ItemDescription":"Tab Top Curtains","ItemPrice":1329.0,"ItemQty":1,"TotalValue":1329.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":2,"TotalValue":3266.0}]} 3 | {"InvoiceNumber":"93647513","CreatedTime":1595688902254,"StoreID":"STR8513","PosID":"POS881","CashierID":"OAS354","CustomerType":"NONPRIME","CustomerCardNo":"2599848717","TotalAmount":3374.0,"NumberOfItems":1,"PaymentMethod":"CARD","TaxableAmount":3374.0,"CGST":84.35000000000001,"SGST":84.35000000000001,"CESS":4.2175,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"258","ItemDescription":"Closet","ItemPrice":1687.0,"ItemQty":2,"TotalValue":3374.0}]} 4 | {"InvoiceNumber":"8320594","CreatedTime":1595688902254,"StoreID":"STR7188","PosID":"POS825","CashierID":"OAS329","CustomerType":"PRIME","CustomerCardNo":"7051101351","TotalAmount":5824.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":5824.0,"CGST":145.6,"SGST":145.6,"CESS":7.28,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"2465 Laoreet, Street","City":"Dehri","State":"Bihar","PinCode":"637308","ContactNumber":"2662305605"},"InvoiceLineItems":[{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"558","ItemDescription":"Balloon clock","ItemPrice":1633.0,"ItemQty":1,"TotalValue":1633.0},{"ItemCode":"658","ItemDescription":"Chinois","ItemPrice":567.0,"ItemQty":1,"TotalValue":567.0}]} 5 | {"InvoiceNumber":"4888607","CreatedTime":1595688903258,"StoreID":"STR2629","PosID":"POS172","CashierID":"OAS622","CustomerType":"NONPRIME","CustomerCardNo":"7829975914","TotalAmount":1750.0,"NumberOfItems":2,"PaymentMethod":"CASH","TaxableAmount":1750.0,"CGST":43.75,"SGST":43.75,"CESS":2.1875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"7114 Eu, Rd.","City":"Ratlam","State":"Madhya Pradesh","PinCode":"925281","ContactNumber":"4057182350"},"InvoiceLineItems":[{"ItemCode":"208","ItemDescription":"Canopy bed","ItemPrice":508.0,"ItemQty":1,"TotalValue":508.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 6 | {"InvoiceNumber":"99794761","CreatedTime":1595688903258,"StoreID":"STR6347","PosID":"POS333","CashierID":"OAS697","CustomerType":"NONPRIME","CustomerCardNo":"9740604930","TotalAmount":6566.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":6566.0,"CGST":164.15,"SGST":164.15,"CESS":8.2075,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"517-8912 Nulla St.","City":"Champdani","State":"West Bengal","PinCode":"680616","ContactNumber":"8183195143"},"InvoiceLineItems":[{"ItemCode":"353","ItemDescription":"Recliner","ItemPrice":1585.0,"ItemQty":2,"TotalValue":3170.0},{"ItemCode":"488","ItemDescription":"Bread knife","ItemPrice":1077.0,"ItemQty":2,"TotalValue":2154.0},{"ItemCode":"408","ItemDescription":"Confidante","ItemPrice":1242.0,"ItemQty":1,"TotalValue":1242.0}]} 7 | {"InvoiceNumber":"89101510","CreatedTime":1595688904763,"StoreID":"STR6347","PosID":"POS994","CashierID":"OAS274","CustomerType":"PRIME","CustomerCardNo":"6296964163","TotalAmount":5560.0,"NumberOfItems":4,"PaymentMethod":"CASH","TaxableAmount":5560.0,"CGST":139.0,"SGST":139.0,"CESS":6.95,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"493","ItemDescription":"Boning knife","ItemPrice":513.0,"ItemQty":1,"TotalValue":513.0},{"ItemCode":"468","ItemDescription":"Lunch box","ItemPrice":1467.0,"ItemQty":1,"TotalValue":1467.0},{"ItemCode":"348","ItemDescription":"Navy chair","ItemPrice":1998.0,"ItemQty":1,"TotalValue":1998.0},{"ItemCode":"238","ItemDescription":"Dining table","ItemPrice":1582.0,"ItemQty":1,"TotalValue":1582.0}]} 8 | {"InvoiceNumber":"26723058","CreatedTime":1595689028262,"StoreID":"STR7188","PosID":"POS664","CashierID":"OAS971","CustomerType":"PRIME","CustomerCardNo":"9316477281","TotalAmount":5235.0,"NumberOfItems":3,"PaymentMethod":"CARD","TaxableAmount":5235.0,"CGST":130.875,"SGST":130.875,"CESS":6.54375,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"5418 Magna. Rd.","City":"Chennai","State":"Tamil Nadu","PinCode":"386032","ContactNumber":"6557358508"},"InvoiceLineItems":[{"ItemCode":"653","ItemDescription":"Browning tray","ItemPrice":375.0,"ItemQty":1,"TotalValue":375.0},{"ItemCode":"568","ItemDescription":"Pinch Pleated Curtains","ItemPrice":1718.0,"ItemQty":2,"TotalValue":3436.0},{"ItemCode":"498","ItemDescription":"Carving knifes","ItemPrice":1424.0,"ItemQty":1,"TotalValue":1424.0}]} 9 | {"InvoiceNumber":"96837834","CreatedTime":1595689029002,"StoreID":"STR6162","PosID":"POS835","CashierID":"OAS583","CustomerType":"NONPRIME","CustomerCardNo":"5611417583","TotalAmount":13481.0,"NumberOfItems":4,"PaymentMethod":"CARD","TaxableAmount":13481.0,"CGST":337.02500000000003,"SGST":337.02500000000003,"CESS":16.85125,"DeliveryType":"TAKEAWAY","InvoiceLineItems":[{"ItemCode":"213","ItemDescription":"Infant bed","ItemPrice":1755.0,"ItemQty":1,"TotalValue":1755.0},{"ItemCode":"528","ItemDescription":"Projection clock","ItemPrice":2365.0,"ItemQty":2,"TotalValue":4730.0},{"ItemCode":"288","ItemDescription":"Hutch","ItemPrice":1812.0,"ItemQty":2,"TotalValue":3624.0},{"ItemCode":"203","ItemDescription":"Bunk bed","ItemPrice":1686.0,"ItemQty":2,"TotalValue":3372.0}]} 10 | {"InvoiceNumber":"78302645","CreatedTime":1595689029506,"StoreID":"STR8513","PosID":"POS155","CashierID":"OAS558","CustomerType":"NONPRIME","CustomerCardNo":"2389018842","TotalAmount":6543.0,"NumberOfItems":3,"PaymentMethod":"CASH","TaxableAmount":6543.0,"CGST":163.57500000000002,"SGST":163.57500000000002,"CESS":8.17875,"DeliveryType":"HOME-DELIVERY","DeliveryAddress":{"AddressLine":"Flat No. #210-902 Neque Street","City":"South Dum Dum","State":"West Bengal","PinCode":"504795","ContactNumber":"7508353683"},"InvoiceLineItems":[{"ItemCode":"533","ItemDescription":"Rolling ball clock","ItemPrice":1651.0,"ItemQty":2,"TotalValue":3302.0},{"ItemCode":"538","ItemDescription":"Grandmother clock","ItemPrice":1301.0,"ItemQty":1,"TotalValue":1301.0},{"ItemCode":"278","ItemDescription":"Dining set","ItemPrice":1940.0,"ItemQty":1,"TotalValue":1940.0}]} 11 | -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoices -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/04-create-output-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic invoice-items -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic invoices -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/kafka-scripts/06-start-consumer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic invoice-items --from-beginning -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/06-KafkaAvroSinkDemo/lib/__init__.py -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /06-KafkaAvroSinkDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/KafkaAvroSourceDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.avro.functions import from_avro 3 | from pyspark.sql.functions import expr, col, struct, to_json, sum 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("Multi Query Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .getOrCreate() 14 | 15 | logger = Log4j(spark) 16 | 17 | kafka_source_df = spark \ 18 | .readStream \ 19 | .format("kafka") \ 20 | .option("kafka.bootstrap.servers", "localhost:9092") \ 21 | .option("subscribe", "invoice-items") \ 22 | .option("startingOffsets", "earliest") \ 23 | .load() 24 | 25 | avroSchema = open('schema/invoice-items', mode='r').read() 26 | 27 | value_df = kafka_source_df.select(from_avro(col("value"), avroSchema).alias("value")) 28 | 29 | rewards_df = value_df.filter("value.CustomerType == 'PRIME'") \ 30 | .groupBy("value.CustomerCardNo") \ 31 | .agg(sum("value.TotalValue").alias("TotalPurchase"), 32 | sum(expr("value.TotalValue * 0.2").cast("integer")).alias("AggregatedRewards")) 33 | 34 | kafka_target_df = rewards_df.select(expr("CustomerCardNo as key"), 35 | to_json(struct("TotalPurchase", "AggregatedRewards")).alias("value")) 36 | 37 | # kafka_target_df.show(truncate=False) 38 | 39 | rewards_writer_query = kafka_target_df \ 40 | .writeStream \ 41 | .queryName("Rewards Writer") \ 42 | .format("kafka") \ 43 | .option("kafka.bootstrap.servers", "localhost:9092") \ 44 | .option("topic", "customer-rewards") \ 45 | .outputMode("update") \ 46 | .option("checkpointLocation", "chk-point-dir") \ 47 | .start() 48 | 49 | rewards_writer_query.awaitTermination() 50 | -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/kafka-scripts/04-create-output-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic customer-rewards -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/kafka-scripts/06-start-consumer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic customer-rewards --property print.key=true --property key.separator=":" --from-beginning -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/07-KafkaAvroSourceDemo/lib/__init__.py -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /07-KafkaAvroSourceDemo/schema/invoice-items: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "name": "InvoiceItem", 4 | "namespace": "guru.learningjournal.spark.examples.types", 5 | "fields": [ 6 | {"name": "InvoiceNumber","type": ["string", "null"]}, 7 | {"name": "CreatedTime","type": ["long", "null"]}, 8 | {"name": "StoreID","type": ["string", "null"]}, 9 | {"name": "PosID","type": ["string", "null"]}, 10 | {"name": "CustomerType","type": ["string", "null"]}, 11 | {"name": "CustomerCardNo","type": ["string", "null"]}, 12 | {"name": "DeliveryType","type": ["string", "null"]}, 13 | {"name": "City","type": ["string", "null"]}, 14 | {"name": "State","type": ["string", "null"]}, 15 | {"name": "PinCode","type": ["string", "null"]}, 16 | {"name": "ItemCode","type": ["string", "null"]}, 17 | {"name": "ItemDescription","type": ["string", "null"]}, 18 | {"name": "ItemPrice","type": ["double", "null"]}, 19 | {"name": "ItemQty","type": ["int", "null"]}, 20 | {"name": "TotalValue","type": ["double", "null"]} 21 | ] 22 | } -------------------------------------------------------------------------------- /08-TumblingWindowDemo/TumblingWindowDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession, Window 2 | from pyspark.sql.functions import from_json, col, to_timestamp, window, expr, sum 3 | from pyspark.sql.types import StructType, StructField, StringType, IntegerType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("Tumbling Window Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 2) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | stock_schema = StructType([ 19 | StructField("CreatedTime", StringType()), 20 | StructField("Type", StringType()), 21 | StructField("Amount", IntegerType()), 22 | StructField("BrokerCode", StringType()) 23 | ]) 24 | 25 | kafka_df = spark.readStream \ 26 | .format("kafka") \ 27 | .option("kafka.bootstrap.servers", "localhost:9092") \ 28 | .option("subscribe", "trades") \ 29 | .option("startingOffsets", "earliest") \ 30 | .load() 31 | 32 | value_df = kafka_df.select(from_json(col("value").cast("string"), stock_schema).alias("value")) 33 | 34 | trade_df = value_df.select("value.*") \ 35 | .withColumn("CreatedTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 36 | .withColumn("Buy", expr("case when Type == 'BUY' then Amount else 0 end")) \ 37 | .withColumn("Sell", expr("case when Type == 'SELL' then Amount else 0 end")) 38 | 39 | window_agg_df = trade_df \ 40 | .groupBy( # col("BrokerCode"), 41 | window(col("CreatedTime"), "15 minute")) \ 42 | .agg(sum("Buy").alias("TotalBuy"), 43 | sum("Sell").alias("TotalSell")) 44 | 45 | output_df = window_agg_df.select("window.start", "window.end", "TotalBuy", "TotalSell") 46 | 47 | ''' 48 | running_total_window = Window.orderBy("end") \ 49 | .rowsBetween(Window.unboundedPreceding, Window.currentRow) 50 | 51 | final_output_df = output_df \ 52 | .withColumn("RTotalBuy", sum("TotalBuy").over(running_total_window)) \ 53 | .withColumn("RTotalSell", sum("TotalSell").over(running_total_window)) \ 54 | .withColumn("NetValue", expr("RTotalBuy - RTotalSell")) 55 | 56 | final_output_df.show(truncate=False) 57 | ''' 58 | window_query = output_df.writeStream \ 59 | .format("console") \ 60 | .outputMode("update") \ 61 | .option("checkpointLocation", "chk-point-dir") \ 62 | .trigger(processingTime="1 minute") \ 63 | .start() 64 | 65 | logger.info("Waiting for Query") 66 | window_query.awaitTermination() 67 | -------------------------------------------------------------------------------- /08-TumblingWindowDemo/data/samples.txt: -------------------------------------------------------------------------------- 1 | {"CreatedTime": "2019-02-05 10:05:00", "Type": "BUY", "Amount": 500, "BrokerCode": "ABX"} 2 | {"CreatedTime": "2019-02-05 10:12:00", "Type": "BUY", "Amount": 300, "BrokerCode": "ABX"} 3 | {"CreatedTime": "2019-02-05 10:20:00", "Type": "BUY", "Amount": 800, "BrokerCode": "ABX"} 4 | {"CreatedTime": "2019-02-05 10:40:00", "Type": "BUY", "Amount": 900, "BrokerCode": "ABX"} 5 | {"CreatedTime": "2019-02-05 10:25:00", "Type": "SELL", "Amount": 400, "BrokerCode": "ABX"} 6 | {"CreatedTime": "2019-02-05 10:48:00", "Type": "SELL", "Amount": 600, "BrokerCode": "ABX"} 7 | 8 | 9 | -------------------------------------------------------------------------------- /08-TumblingWindowDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /08-TumblingWindowDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /08-TumblingWindowDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic trades -------------------------------------------------------------------------------- /08-TumblingWindowDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic trades -------------------------------------------------------------------------------- /08-TumblingWindowDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/08-TumblingWindowDemo/lib/__init__.py -------------------------------------------------------------------------------- /08-TumblingWindowDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /08-TumblingWindowDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /09-SlidingWindowDemo/SlidingWindowDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, col, to_timestamp, window, max 3 | from pyspark.sql.types import StructType, StructField, StringType, DoubleType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .master("local[3]") \ 11 | .appName("Sliding Window Demo") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 1) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | schema = StructType([ 19 | StructField("CreatedTime", StringType()), 20 | StructField("Reading", DoubleType()) 21 | ]) 22 | 23 | kafka_source_df = spark \ 24 | .readStream \ 25 | .format("kafka") \ 26 | .option("kafka.bootstrap.servers", "localhost:9092") \ 27 | .option("subscribe", "sensor") \ 28 | .option("startingOffsets", "earliest") \ 29 | .load() 30 | 31 | value_df = kafka_source_df.select(col("key").cast("string").alias("SensorID"), 32 | from_json(col("value").cast("string"), schema).alias("value")) 33 | 34 | sensor_df = value_df.select("SensorID", "value.*") \ 35 | .withColumn("CreatedTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) 36 | 37 | agg_df = sensor_df \ 38 | .withWatermark("CreatedTime", "30 minute") \ 39 | .groupBy(col("SensorID"), 40 | window(col("CreatedTime"), "15 minute", "5 minute")) \ 41 | .agg(max("Reading").alias("MaxReading")) 42 | 43 | output_df = agg_df.select("SensorID", "window.start", "window.end", "MaxReading") 44 | 45 | window_query = output_df.writeStream \ 46 | .format("console") \ 47 | .outputMode("update") \ 48 | .option("checkpointLocation", "chk-point-dir") \ 49 | .trigger(processingTime="1 minute") \ 50 | .start() 51 | 52 | logger.info("Waiting for Query") 53 | window_query.awaitTermination() 54 | -------------------------------------------------------------------------------- /09-SlidingWindowDemo/data/samples.json: -------------------------------------------------------------------------------- 1 | SET41:{"CreatedTime": "2019-02-05 09:54:00","Reading": 36.2} 2 | SET41:{"CreatedTime": "2019-02-05 09:59:00","Reading": 36.5} 3 | SET41:{"CreatedTime": "2019-02-05 10:04:00","Reading": 36.8} 4 | SET41:{"CreatedTime": "2019-02-05 10:09:00","Reading": 36.2} 5 | SET41:{"CreatedTime": "2019-02-05 10:14:00","Reading": 36.5} 6 | SET41:{"CreatedTime": "2019-02-05 10:19:00","Reading": 36.3} 7 | SET41:{"CreatedTime": "2019-02-05 10:24:00","Reading": 37.7} 8 | SET41:{"CreatedTime": "2019-02-05 10:29:00","Reading": 37.2} 9 | 10 | -------------------------------------------------------------------------------- /09-SlidingWindowDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /09-SlidingWindowDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /09-SlidingWindowDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic sensor -------------------------------------------------------------------------------- /09-SlidingWindowDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic sensor --property "parse.key=true" --property "key.separator=:" -------------------------------------------------------------------------------- /09-SlidingWindowDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/09-SlidingWindowDemo/lib/__init__.py -------------------------------------------------------------------------------- /09-SlidingWindowDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /09-SlidingWindowDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /10-WatermarkDemo/WatermarkDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession, Window 2 | from pyspark.sql.functions import from_json, col, to_timestamp, window, expr, sum 3 | from pyspark.sql.types import StructType, StructField, StringType, IntegerType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .appName("Watermark Demo") \ 11 | .master("local[3]") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 2) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | stock_schema = StructType([ 19 | StructField("CreatedTime", StringType()), 20 | StructField("Type", StringType()), 21 | StructField("Amount", IntegerType()), 22 | StructField("BrokerCode", StringType()) 23 | ]) 24 | 25 | kafka_df = spark.readStream \ 26 | .format("kafka") \ 27 | .option("kafka.bootstrap.servers", "localhost:9092") \ 28 | .option("subscribe", "trades") \ 29 | .option("startingOffsets", "earliest") \ 30 | .load() 31 | 32 | value_df = kafka_df.select(from_json(col("value").cast("string"), stock_schema).alias("value")) 33 | 34 | trade_df = value_df.select("value.*") \ 35 | .withColumn("CreatedTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 36 | .withColumn("Buy", expr("case when Type == 'BUY' then Amount else 0 end")) \ 37 | .withColumn("Sell", expr("case when Type == 'SELL' then Amount else 0 end")) 38 | 39 | window_agg_df = trade_df \ 40 | .withWatermark("CreatedTime", "30 minute") \ 41 | .groupBy(window(col("CreatedTime"), "15 minute")) \ 42 | .agg(sum("Buy").alias("TotalBuy"), 43 | sum("Sell").alias("TotalSell")) 44 | 45 | output_df = window_agg_df.select("window.start", "window.end", "TotalBuy", "TotalSell") 46 | 47 | window_query = output_df.writeStream \ 48 | .format("console") \ 49 | .outputMode("update") \ 50 | .option("checkpointLocation", "chk-point-dir") \ 51 | .trigger(processingTime="30 second") \ 52 | .start() 53 | 54 | logger.info("Waiting for Query") 55 | window_query.awaitTermination() 56 | -------------------------------------------------------------------------------- /10-WatermarkDemo/data/samples.txt: -------------------------------------------------------------------------------- 1 | {"CreatedTime": "2019-02-05 10:05:00", "Type": "BUY", "Amount": 500, "BrokerCode": "ABX"} 2 | {"CreatedTime": "2019-02-05 10:40:00", "Type": "BUY", "Amount": 900, "BrokerCode": "ABX"} 3 | {"CreatedTime": "2019-02-05 10:20:00", "Type": "BUY", "Amount": 800, "BrokerCode": "ABX"} 4 | {"CreatedTime": "2019-02-05 10:48:00", "Type": "SELL", "Amount": 600, "BrokerCode": "ABX"} 5 | 6 | {"CreatedTime": "2019-02-05 10:14:00", "Type": "SELL", "Amount": 300, "BrokerCode": "ABX"} 7 | {"CreatedTime": "2019-02-05 10:16:00", "Type": "SELL", "Amount": 300, "BrokerCode": "ABX"} 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /10-WatermarkDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /10-WatermarkDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /10-WatermarkDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic trades -------------------------------------------------------------------------------- /10-WatermarkDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic trades -------------------------------------------------------------------------------- /10-WatermarkDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/10-WatermarkDemo/lib/__init__.py -------------------------------------------------------------------------------- /10-WatermarkDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /10-WatermarkDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/StreamTableJoinDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, col, to_timestamp 3 | from pyspark.sql.types import StructType, StructField, StringType 4 | 5 | from lib.logger import Log4j 6 | 7 | 8 | def write_to_cassandra(target_df, batch_id): 9 | target_df.write \ 10 | .format("org.apache.spark.sql.cassandra") \ 11 | .option("keyspace", "spark_db") \ 12 | .option("table", "users") \ 13 | .mode("append") \ 14 | .save() 15 | target_df.show() 16 | 17 | 18 | if __name__ == "__main__": 19 | spark = SparkSession \ 20 | .builder \ 21 | .master("local[3]") \ 22 | .appName("Stream Table Join Demo") \ 23 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 24 | .config("spark.sql.shuffle.partitions", 2) \ 25 | .config("spark.cassandra.connection.host", "localhost") \ 26 | .config("spark.cassandra.connection.port", "9042") \ 27 | .config("spark.sql.extensions", "com.datastax.spark.connector.CassandraSparkExtensions") \ 28 | .config("spark.sql.catalog.lh", "com.datastax.spark.connector.datasource.CassandraCatalog") \ 29 | .getOrCreate() 30 | 31 | logger = Log4j(spark) 32 | 33 | login_schema = StructType([ 34 | StructField("created_time", StringType()), 35 | StructField("login_id", StringType()) 36 | ]) 37 | 38 | kafka_source_df = spark \ 39 | .readStream \ 40 | .format("kafka") \ 41 | .option("kafka.bootstrap.servers", "localhost:9092") \ 42 | .option("subscribe", "logins") \ 43 | .option("startingOffsets", "earliest") \ 44 | .load() 45 | 46 | value_df = kafka_source_df.select(from_json(col("value").cast("string"), login_schema).alias("value")) 47 | 48 | login_df = value_df.select("value.*") \ 49 | .withColumn("created_time", to_timestamp(col("created_time"), "yyyy-MM-dd HH:mm:ss")) 50 | 51 | user_df = spark.read \ 52 | .format("org.apache.spark.sql.cassandra") \ 53 | .option("keyspace", "spark_db") \ 54 | .option("table", "users") \ 55 | .load() 56 | 57 | join_expr = login_df.login_id == user_df.login_id 58 | join_type = "inner" 59 | 60 | joined_df = login_df.join(user_df, join_expr, join_type) \ 61 | .drop(login_df.login_id) 62 | 63 | output_df = joined_df.select(col("login_id"), col("user_name"), 64 | col("created_time").alias("last_login")) 65 | 66 | output_query = output_df.writeStream \ 67 | .foreachBatch(write_to_cassandra) \ 68 | .outputMode("update") \ 69 | .option("checkpointLocation", "chk-point-dir") \ 70 | .trigger(processingTime="1 minute") \ 71 | .start() 72 | 73 | logger.info("Waiting for Query") 74 | output_query.awaitTermination() 75 | -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/kafka-scripts/03-create-input-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic logins -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/kafka-scripts/05-start-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic logins -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/11-StreamTableJoinDemo/lib/__init__.py -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/static-data/user.cql: -------------------------------------------------------------------------------- 1 | CREATE KEYSPACE spark_db WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}; 2 | USE spark_db; 3 | CREATE TABLE users(Login_id text PRIMARY KEY, user_name text, last_login timestamp); 4 | 5 | INSERT INTO users (Login_id, user_name, last_login) VALUES( '100001', 'Prashant', '2019-02-05 10:05:00'); 6 | INSERT INTO users (Login_id, user_name, last_login) VALUES( '100009', 'Alisha', '2019-03-07 11:03:00'); 7 | INSERT INTO users (Login_id, user_name, last_login) VALUES( '100087', 'Abdul', '2019-06-12 09:43:00'); 8 | 9 | INSERT INTO users (Login_id, user_name, last_login) VALUES( '100091', 'New User', '2019-06-12 09:43:00'); -------------------------------------------------------------------------------- /11-StreamTableJoinDemo/streaming-data/logins.txt: -------------------------------------------------------------------------------- 1 | {"login_id": "100001", "created_time": "2020-09-09 10:18:00"} 2 | {"login_id": "100009", "created_time": "2020-09-18 07:15:00"} 3 | {"login_id": "100087", "created_time": "2020-09-18 07:15:00"} 4 | {"login_id": "100091", "created_time": "2020-09-18 07:15:00"} -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/StreamStreamJoinDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, to_timestamp, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .master("local[3]") \ 11 | .appName("Stream Stream Join Demo") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 2) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | impressionSchema = StructType([ 19 | StructField("InventoryID", StringType()), 20 | StructField("CreatedTime", StringType()), 21 | StructField("Campaigner", StringType()) 22 | ]) 23 | 24 | clickSchema = StructType([ 25 | StructField("InventoryID", StringType()), 26 | StructField("CreatedTime", StringType()) 27 | ]) 28 | 29 | kafka_impression_df = spark \ 30 | .readStream \ 31 | .format("kafka") \ 32 | .option("kafka.bootstrap.servers", "localhost:9092") \ 33 | .option("subscribe", "impressions") \ 34 | .option("startingOffsets", "earliest") \ 35 | .load() 36 | 37 | impressions_df = kafka_impression_df \ 38 | .select(from_json(col("value").cast("string"), impressionSchema).alias("value")) \ 39 | .selectExpr("value.InventoryID as ImpressionID", "value.CreatedTime", "value.Campaigner") \ 40 | .withColumn("ImpressionTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 41 | .drop("CreatedTime") 42 | 43 | kafka_click_df = spark \ 44 | .readStream \ 45 | .format("kafka") \ 46 | .option("kafka.bootstrap.servers", "localhost:9092") \ 47 | .option("subscribe", "clicks") \ 48 | .option("startingOffsets", "earliest") \ 49 | .load() 50 | 51 | clicks_df = kafka_click_df.select( 52 | from_json(col("value").cast("string"), clickSchema).alias("value")) \ 53 | .selectExpr("value.InventoryID as ClickID", "value.CreatedTime") \ 54 | .withColumn("ClickTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 55 | .drop("CreatedTime") 56 | 57 | join_expr = "ImpressionID == ClickID" 58 | join_type = "inner" 59 | 60 | joined_df = impressions_df.join(clicks_df, expr(join_expr), join_type) 61 | 62 | output_query = joined_df.writeStream \ 63 | .format("console") \ 64 | .outputMode("append") \ 65 | .option("checkpointLocation", "chk-point-dir") \ 66 | .trigger(processingTime="1 minute") \ 67 | .start() 68 | 69 | logger.info("Waiting for Query") 70 | output_query.awaitTermination() 71 | -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/data/events.txt: -------------------------------------------------------------------------------- 1 | {"InventoryID": "100001", "CreatedTime": "2020-09-09 10:00:00", "Campaigner": "ABC Ltd"} 2 | {"InventoryID": "100002", "CreatedTime": "2020-09-09 10:06:00", "Campaigner": "ABC Ltd"} 3 | {"InventoryID": "100003", "CreatedTime": "2020-09-09 10:02:00", "Campaigner": "XYZ Ltd"} 4 | {"InventoryID": "100004", "CreatedTime": "2020-09-09 10:09:00", "Campaigner": "XYZ Ltd"} 5 | 6 | {"InventoryID": "100001", "CreatedTime": "2020-09-09 10:18:00"} 7 | {"InventoryID": "100002", "CreatedTime": "2020-09-09 10:18:00"} 8 | {"InventoryID": "100003", "CreatedTime": "2020-09-09 10:18:00"} 9 | {"InventoryID": "100004", "CreatedTime": "2020-09-09 10:18:00"} 10 | {"InventoryID": "100001", "CreatedTime": "2020-09-09 10:18:00"} -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/03-create-impression-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic impressions -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/04-create-click-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic clicks -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/05-start-impression-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic impressions -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/kafka-scripts/06-start-click-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic clicks -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/12-StreamStreamJoinDemo/lib/__init__.py -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /12-StreamStreamJoinDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/StreamingWatermarkDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, to_timestamp, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .master("local[3]") \ 11 | .appName("Streaming Watermark Demo") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 2) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | impressionSchema = StructType([ 19 | StructField("ImpressionID", StringType()), 20 | StructField("CreatedTime", StringType()), 21 | StructField("Campaigner", StringType()) 22 | ]) 23 | 24 | clickSchema = StructType([ 25 | StructField("ImpressionID", StringType()), 26 | StructField("CreatedTime", StringType()) 27 | ]) 28 | 29 | kafka_impression_df = spark \ 30 | .readStream \ 31 | .format("kafka") \ 32 | .option("kafka.bootstrap.servers", "localhost:9092") \ 33 | .option("subscribe", "impressions") \ 34 | .option("startingOffsets", "earliest") \ 35 | .load() 36 | 37 | impressions_df = kafka_impression_df \ 38 | .select(from_json(col("value").cast("string"), impressionSchema).alias("value")) \ 39 | .selectExpr("value.ImpressionID", "value.CreatedTime", "value.Campaigner") \ 40 | .withColumn("ImpressionTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 41 | .drop("CreatedTime") \ 42 | .withWatermark("ImpressionTime", "30 minute") 43 | 44 | kafka_click_df = spark \ 45 | .readStream \ 46 | .format("kafka") \ 47 | .option("kafka.bootstrap.servers", "localhost:9092") \ 48 | .option("subscribe", "clicks") \ 49 | .option("startingOffsets", "earliest") \ 50 | .load() 51 | 52 | clicks_df = kafka_click_df.select( 53 | from_json(col("value").cast("string"), clickSchema).alias("value")) \ 54 | .selectExpr("value.ImpressionID as ClickID", "value.CreatedTime") \ 55 | .withColumn("ClickTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 56 | .drop("CreatedTime") \ 57 | .withWatermark("ClickTime", "30 minute") 58 | 59 | join_expr = "ImpressionID == ClickID" 60 | join_type = "inner" 61 | 62 | joined_df = impressions_df.join(clicks_df, expr(join_expr), join_type) 63 | 64 | output_query = joined_df.writeStream \ 65 | .format("console") \ 66 | .outputMode("append") \ 67 | .option("checkpointLocation", "chk-point-dir") \ 68 | .trigger(processingTime="1 minute") \ 69 | .start() 70 | 71 | logger.info("Waiting for Query") 72 | output_query.awaitTermination() 73 | -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/data/events.txt: -------------------------------------------------------------------------------- 1 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:00:00", "Campaigner": "ABC Ltd"} 2 | {"ImpressionID": "102", "CreatedTime": "2020-09-09 10:00:00", "Campaigner": "ABC Ltd"} 3 | 4 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:15:00"} 5 | ---------------------------------------- 6 | {"ImpressionID": "103", "CreatedTime": "2020-09-09 10:46:00", "Campaigner": "ABC Ltd"} 7 | {"ImpressionID": "103", "CreatedTime": "2020-09-09 11:00:00"} 8 | ---------------------------------------- 9 | {"ImpressionID": "102", "CreatedTime": "2020-09-09 10:15:00"} 10 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:15:00"} -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/03-create-impression-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic impressions -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/04-create-click-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic clicks -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/05-start-impression-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic impressions -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/kafka-scripts/06-start-click-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic clicks -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/13-StreamingWatermarkDemo/lib/__init__.py -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /13-StreamingWatermarkDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/StreamingOuterJoinDemo.py: -------------------------------------------------------------------------------- 1 | from pyspark.sql import SparkSession 2 | from pyspark.sql.functions import from_json, to_timestamp, col, expr 3 | from pyspark.sql.types import StructType, StructField, StringType 4 | 5 | from lib.logger import Log4j 6 | 7 | if __name__ == "__main__": 8 | spark = SparkSession \ 9 | .builder \ 10 | .master("local[3]") \ 11 | .appName("Streaming Outer Join Demo") \ 12 | .config("spark.streaming.stopGracefullyOnShutdown", "true") \ 13 | .config("spark.sql.shuffle.partitions", 2) \ 14 | .getOrCreate() 15 | 16 | logger = Log4j(spark) 17 | 18 | impressionSchema = StructType([ 19 | StructField("ImpressionID", StringType()), 20 | StructField("CreatedTime", StringType()), 21 | StructField("Campaigner", StringType()) 22 | ]) 23 | 24 | clickSchema = StructType([ 25 | StructField("ImpressionID", StringType()), 26 | StructField("CreatedTime", StringType()) 27 | ]) 28 | 29 | kafka_impression_df = spark \ 30 | .readStream \ 31 | .format("kafka") \ 32 | .option("kafka.bootstrap.servers", "localhost:9092") \ 33 | .option("subscribe", "impressions") \ 34 | .option("startingOffsets", "earliest") \ 35 | .load() 36 | 37 | impressions_df = kafka_impression_df \ 38 | .select(from_json(col("value").cast("string"), impressionSchema).alias("value")) \ 39 | .selectExpr("value.ImpressionID", "value.CreatedTime", "value.Campaigner") \ 40 | .withColumn("ImpressionTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 41 | .drop("CreatedTime") \ 42 | .withWatermark("ImpressionTime", "30 minute") 43 | 44 | kafka_click_df = spark \ 45 | .readStream \ 46 | .format("kafka") \ 47 | .option("kafka.bootstrap.servers", "localhost:9092") \ 48 | .option("subscribe", "clicks") \ 49 | .option("startingOffsets", "earliest") \ 50 | .load() 51 | 52 | clicks_df = kafka_click_df.select( 53 | from_json(col("value").cast("string"), clickSchema).alias("value")) \ 54 | .selectExpr("value.ImpressionID as ClickID", "value.CreatedTime") \ 55 | .withColumn("ClickTime", to_timestamp(col("CreatedTime"), "yyyy-MM-dd HH:mm:ss")) \ 56 | .drop("CreatedTime") \ 57 | .withWatermark("ClickTime", "30 minute") 58 | 59 | join_expr = "ImpressionID == ClickID" + \ 60 | " AND ClickTime BETWEEN ImpressionTime AND ImpressionTime + interval 15 minute" 61 | 62 | join_type = "leftOuter" 63 | 64 | joined_df = impressions_df.join(clicks_df, expr(join_expr), join_type) 65 | 66 | output_query = joined_df.writeStream \ 67 | .format("console") \ 68 | .outputMode("append") \ 69 | .option("checkpointLocation", "chk-point-dir") \ 70 | .trigger(processingTime="1 minute") \ 71 | .start() 72 | 73 | logger.info("Waiting for Query") 74 | output_query.awaitTermination() 75 | -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/data/events.txt: -------------------------------------------------------------------------------- 1 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:00:00", "Campaigner": "ABC Ltd"} 2 | {"ImpressionID": "102", "CreatedTime": "2020-09-09 10:00:00", "Campaigner": "ABC Ltd"} 3 | 4 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:15:00"} 5 | {"ImpressionID": "101", "CreatedTime": "2020-09-09 10:16:00"} 6 | ---------------------------------------- 7 | {"ImpressionID": "103", "CreatedTime": "2020-09-09 10:40:00", "Campaigner": "ABC Ltd"} 8 | {"ImpressionID": "103", "CreatedTime": "2020-09-09 10:50:00"} 9 | ---------------------------------------- 10 | {"ImpressionID": "104", "CreatedTime": "2020-09-09 10:46:00", "Campaigner": "ABC Ltd"} 11 | {"ImpressionID": "104", "CreatedTime": "2020-09-09 11:00:00"} 12 | 13 | -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/01-start-zookeeper.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\zookeeper-server-start.bat %KAFKA_HOME%\config\zookeeper.properties -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/02-start-kafka.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-server-start.bat %KAFKA_HOME%\config\server.properties -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/03-create-impression-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic impressions -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/04-create-click-topic.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic clicks -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/05-start-impression-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic impressions -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/kafka-scripts/06-start-click-producer.cmd: -------------------------------------------------------------------------------- 1 | %KAFKA_HOME%\bin\windows\kafka-console-producer.bat --broker-list localhost:9092 --topic clicks -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LearningJournal/Spark-Streaming-In-Python/13fdefae019848cc83c93aaaa3ba4e17af09bfae/14-StreamingOuterJoinDemo/lib/__init__.py -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/lib/logger.py: -------------------------------------------------------------------------------- 1 | class Log4j: 2 | def __init__(self, spark): 3 | log4j = spark._jvm.org.apache.log4j 4 | 5 | root_class = "guru.learningjournal.spark.examples" 6 | conf = spark.sparkContext.getConf() 7 | app_name = conf.get("spark.app.name") 8 | 9 | self.logger = log4j.LogManager.getLogger(root_class + "." + app_name) 10 | 11 | def warn(self, message): 12 | self.logger.warn(message) 13 | 14 | def info(self, message): 15 | self.logger.info(message) 16 | 17 | def error(self, message): 18 | self.logger.error(message) 19 | 20 | def debug(self, message): 21 | self.logger.debug(message) -------------------------------------------------------------------------------- /14-StreamingOuterJoinDemo/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | 4 | # define console appender 5 | log4j.appender.console=org.apache.log4j.ConsoleAppender 6 | log4j.appender.console.target=System.out 7 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 8 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n 9 | 10 | #application log 11 | log4j.logger.guru.learningjournal.spark.examples=INFO, console 12 | log4j.additivity.guru.learningjournal.spark.examples=false 13 | 14 | #define following in Java System 15 | # -Dlog4j.configuration=file:log4j.properties 16 | 17 | # Recommendations from Spark template 18 | log4j.logger.org.apache.spark.repl.Main=WARN 19 | log4j.logger.org.spark_project.jetty=WARN 20 | log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR 21 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 22 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 23 | log4j.logger.org.apache.parquet=ERROR 24 | log4j.logger.parquet=ERROR 25 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL 26 | log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR 27 | 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Learning Journal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Apache Spark 3 - Real-time Stream Processing using Python 2 | This is the central repository for all the materials related to Apache Spark 3 -Real-time Stream Processing using Python
Course by Prashant Pandey. 3 |
You can get the full course at 4 | Apache Spark Course @ Udemy. 5 | 6 | 7 |
8 | 9 | Apache Spark 3 - Real-time Stream Processing using Python 10 | 11 | 12 |

Description

13 |

14 | I am creating Apache Spark 3 - Real-time Stream Processing using Python course to help you understand the Stream Processing using Apache Spark and apply that knowledge to build stream processing solutions. This course is example-driven and follows a working session like approach. We will be taking a live coding approach and explain all the needed concepts along the way. 15 |

16 | 17 |

Who should take this Course?

18 |

19 | I designed this course for software engineers willing to develop a Stream Processing pipeline and application using the Apache Spark. I am also creating this course for data architects and data engineers who are responsible for designing and building the organization’s data-centric infrastructure. Another group of people is the managers and architects who do not directly work with Spark implementation. Still, they work with the people who implement Apache Spark at the ground level. 20 |

21 | 22 |

Spark and source code version

23 |

24 | This Course is using the Apache Spark 3.x. I have tested all the source code and examples used in this Course on Apache Spark 3.0.0 open-source distribution. 25 |

26 | 27 |
28 | --------------------------------------------------------------------------------