├── LICENSE
├── README.md
├── pom.xml
└── src
└── main
├── java
└── com
│ └── packt
│ └── sfjd
│ ├── ch10
│ ├── BikeRentalPrediction.java
│ ├── Flight.java
│ ├── FlightDelay.java
│ ├── JavaALSExample.java
│ ├── JavaEstimatorTransformerParamExample.java
│ └── Rating.java
│ ├── ch11
│ ├── AbsFunc1.java
│ ├── AbsFunc2.java
│ ├── AbsFunc3.java
│ ├── AbsFunc4.java
│ ├── AbsFunc5.java
│ ├── AbsFunc6.java
│ ├── AbsFunc7.java
│ ├── AbsFunc8.java
│ ├── PropertyGraphExample.java
│ └── PropertyGraphExampleFromEdges.java
│ ├── ch2
│ ├── AInnerClassVsLambda.java
│ ├── Car.java
│ ├── ClosureDemo.java
│ ├── ClosureExample.java
│ ├── CollectorsExamples.java
│ ├── CreateStreamExample.java
│ ├── Interface1.java
│ ├── Interface2.java
│ ├── InterfaceImpl.java
│ ├── IntermediateOpExample.java
│ ├── LambdaExamples.java
│ ├── LexicalScoping.java
│ ├── MethodReferenceExample.java
│ ├── MyFileNameFilter.java
│ ├── MyFilterImpl.java
│ ├── MyInterface.java
│ ├── MyInterfaceDemo.java
│ ├── MyInterfaceImpl.java
│ ├── ShortCircuitOperationExample.java
│ ├── TerminalOpExample.java
│ ├── WordCountInJava.java
│ └── generics
│ │ ├── FirstExample.java
│ │ ├── MyGeneric.java
│ │ └── MyGenericsDemo.java
│ ├── ch4
│ ├── ActionExamples.java
│ ├── ActionsExamplesOld.java
│ ├── AggeregateExample.java
│ ├── JavaWordCount.java
│ ├── PersistExample.java
│ ├── SparkWordCount.java
│ ├── SparkWordCount_1_7.java
│ ├── WordCount.java
│ └── transformations
│ │ ├── Test.java
│ │ ├── TestMain.java
│ │ └── Transformations.java
│ ├── ch5
│ ├── CSVFileOperations.java
│ ├── CassandraExample.java
│ ├── DelimitedFileOperations.java
│ ├── Employee.java
│ ├── HdfsExample.java
│ ├── JsonFileOperations.java
│ ├── LFSExample.java
│ ├── Movie.java
│ ├── Person.java
│ ├── PersonDetails.java
│ ├── S3Example.java
│ ├── TextFileOperations.java
│ └── XMLFileOperations.java
│ ├── ch7
│ ├── AdvanceActionExamples.java
│ ├── BroadcastVariable.java
│ ├── CustomPartitioner.java
│ ├── CustomPartitionerExample.java
│ ├── ListAccumulator.java
│ ├── MapSideJoinBroadcast.java
│ ├── PartitionIndexInformation.java
│ ├── Partitioning.java
│ ├── TestAccumulator.java
│ └── Transformations.java
│ ├── ch8
│ ├── Average.java
│ ├── AverageUDAF.java
│ ├── CalcDaysUDF.java
│ ├── ContextCreation.java
│ ├── DatasetOperations.java
│ ├── DfExample.java
│ ├── DsExample.java
│ ├── Employee.java
│ ├── SparkSessionExample.java
│ ├── SparkSessionHeloWorld.java
│ ├── TypeSafeUDAF.java
│ └── UDFExample.java
│ └── ch9
│ ├── Calculator.java
│ ├── FileStreamingEx.java
│ ├── FlightDetails.java
│ ├── KafkaExample.java
│ ├── StateFulProcessingExample.java
│ ├── StateLessProcessingExample.java
│ ├── StructuredStreamingExample.java
│ ├── TweetText.java
│ ├── WindowBatchInterval.java
│ ├── WordCountRecoverableEx.java
│ ├── WordCountSocketEx.java
│ ├── WordCountSocketJava8Ex.java
│ ├── WordCountSocketStateful.java
│ └── WordCountTransformOpEx.java
└── resources
├── Apology_by_Plato.txt
├── Employee.txt
├── breakfast_menu.xml
├── dept.txt
├── log4j.properties
├── logFileWithException.log
├── movies.csv
├── numSeries.txt
├── people.tsv
└── pep_json.json
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # Apache Spark for Java Developers
5 | This is the code repository for [Apache Spark for Java Developers](https://www.packtpub.com/big-data-and-business-intelligence/apache-spark-java-developers?utm_source=github&utm_medium=repository&utm_campaign=9781787126497), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish.
6 | ## About the Book
7 | Apache Spark is the buzzword in the big data industry right now, especially with the increasing need for real-time streaming and data processing. While Spark is built on Scala, the Spark Java API exposes all the Spark features available in the Scala version for Java developers. This book will show you how you can implement various functionalities of the Apache Spark framework in Java, without stepping out of your comfort zone.
8 |
9 | The book starts with an introduction to the Apache Spark 2.x ecosystem, followed by explaining how to install and configure Spark, and refreshes the Java concepts that will be useful to you when consuming Apache Spark's APIs. You will explore RDD and its associated common Action and Transformation Java APIs, set up a production-like clustered environment, and work with Spark SQL. Moving on, you will perform near-real-time processing with Spark streaming, Machine Learning analytics with Spark MLlib, and graph processing with GraphX, all using various Java packages.
10 |
11 | By the end of the book, you will have a solid foundation in implementing components in the Spark framework in Java to build fast, real-time applications.
12 |
13 | ## Instructions and Navigation
14 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02.
15 |
16 | Chapter wise code files are placed inside the following folder:
17 |
18 |
19 | The code will look like the following:
20 |
21 | Any command-line input or output is written as follows: "\src\main\java\com\packt\sfjd"
22 | ```
23 | SparkConf conf =new SparkConf().setMaster("local").setAppName("Local File
24 | system Example");
25 | JavaSparkContext jsc=new JavaSparkContext(conf);
26 | ```
27 |
28 | If you want to set up Spark on your local machine, then you can follow the instructions mentioned in Chapter 3, Let Us Spark.
29 |
30 | ## Related Products
31 | * [Apache Spark for Data Science Cookbook](https://www.packtpub.com/big-data-and-business-intelligence/apache-spark-data-science-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781785880100)
32 |
33 | * [Mastering Apache Spark 2.x - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/mastering-apache-spark-2x-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781786462749)
34 |
35 | * [Apache Spark 2.x Cookbook](https://www.packtpub.com/big-data-and-business-intelligence/apache-spark-2x-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781787127265)
36 | ### Download a free PDF
37 |
38 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost. Simply click on the link to claim your free PDF.
39 |
https://packt.link/free-ebook/9781787126497
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | com.packt.spark.dev
5 | SparkForJavaDevelopers
6 | 0.0.1-SNAPSHOT
7 |
8 | 2.1.1
9 | 2.11
10 | 1.8
11 | 2.6.5
12 | UTF-8
13 |
14 |
15 |
16 |
17 |
18 | org.apache.hadoop
19 | hadoop-aws
20 | 2.7.1
21 |
22 |
23 |
24 | org.apache.spark
25 | spark-core_${scala.binary.version}
26 | ${spark.version}
27 |
28 |
29 |
30 |
31 | org.apache.spark
32 | spark-sql_${scala.binary.version}
33 | ${spark.version}
34 |
35 |
36 | com.databricks
37 | spark-xml_${scala.binary.version}
38 | 0.3.3
39 |
40 |
41 | org.apache.spark
42 | spark-streaming_${scala.binary.version}
43 | ${spark.version}
44 |
45 |
46 | org.apache.spark
47 | spark-mllib_${scala.binary.version}
48 | ${spark.version}
49 |
50 |
51 | org.apache.spark
52 | spark-streaming-kafka-0-10_${scala.binary.version}
53 | ${spark.version}
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 | com.databricks
62 | spark-xml_${scala.binary.version}
63 | 0.4.1
64 |
65 |
66 |
67 | net.sf.saxon
68 | Saxon-HE
69 | 9.4
70 |
71 |
72 | com.datastax.spark
73 | spark-cassandra-connector_2.11
74 | 2.0.0-M1
75 |
77 |
78 |
79 |
80 |
81 | com.fasterxml.jackson.core
82 | jackson-annotations
83 | ${jackson.version}
84 |
85 |
86 | com.fasterxml.jackson.core
87 | jackson-core
88 | ${jackson.version}
89 |
90 |
91 | com.fasterxml.jackson.core
92 | jackson-databind
93 | ${jackson.version}
94 |
95 |
96 | com.sun
97 | tools
98 | ${java-version}
99 | system
100 | C:\\Program Files\\Java\\jdk1.8.0_65\\lib\\tools.jar
101 |
102 |
103 |
104 |
105 |
106 |
107 | org.apache.maven.plugins
108 | maven-compiler-plugin
109 | 3.1
110 |
111 | ${java-version}
112 | ${java-version}
113 |
114 |
115 |
116 | maven-assembly-plugin
117 |
118 |
119 | jar-with-dependencies
120 |
121 |
122 |
123 |
124 | make-assembly
125 | package
126 |
127 | single
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch10/BikeRentalPrediction.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch10;
2 |
3 | import org.apache.log4j.Level;
4 | import org.apache.log4j.LogManager;
5 | import org.apache.log4j.Logger;
6 | import org.apache.spark.ml.Pipeline;
7 | import org.apache.spark.ml.PipelineModel;
8 | import org.apache.spark.ml.PipelineStage;
9 | import org.apache.spark.ml.evaluation.RegressionEvaluator;
10 | import org.apache.spark.ml.feature.VectorAssembler;
11 | import org.apache.spark.ml.feature.VectorIndexer;
12 | import org.apache.spark.ml.param.ParamMap;
13 | import org.apache.spark.ml.regression.GBTRegressor;
14 | import org.apache.spark.ml.tuning.CrossValidator;
15 | import org.apache.spark.ml.tuning.ParamGridBuilder;
16 | import org.apache.spark.sql.Dataset;
17 | import org.apache.spark.sql.Row;
18 | import org.apache.spark.sql.SparkSession;
19 |
20 |
21 | import org.apache.spark.sql.types.DataTypes;
22 |
23 |
24 | import static org.apache.spark.sql.functions.col;
25 |
26 | //https://docs.cloud.databricks.com/docs/latest/sample_applications/index.html#Sample%20ML/MLPipeline%20Bike%20Dataset.html
27 |
28 | public class BikeRentalPrediction {
29 |
30 | public static void main(String[] args) {
31 | System.setProperty("hadoop.home.dir", "E:\\sumitK\\Hadoop");
32 | SparkSession sparkSession = SparkSession
33 | .builder()
34 | .master("local")
35 | .config("spark.sql.warehouse.dir",
36 | "file:///E:/sumitK/Hadoop/warehouse")
37 | .appName("BikeRentalPrediction").getOrCreate();
38 | Logger rootLogger = LogManager.getRootLogger();
39 | rootLogger.setLevel(Level.WARN);
40 | //We use the sqlContext.read method to read the data and set a few options:
41 | // 'format': specifies the Spark CSV data source
42 | // 'header': set to true to indicate that the first line of the CSV data file is a header
43 | // The file is called 'hour.csv'.
44 | Dataset ds=sparkSession.read()
45 | .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
46 | .option("header", "true")
47 | .load("E:\\sumitK\\Hadoop\\Bike-Sharing-Dataset\\hour.csv");
48 |
49 | ds.cache();
50 |
51 | ds.select("season").show();;
52 |
53 | ds.show();
54 |
55 | System.out.println("Our dataset has rows :: "+ ds.count());
56 |
57 | Dataset df = ds.drop("instant").drop("dteday").drop("casual").drop("registered");
58 | df.printSchema();
59 | //col("...") is preferable to df.col("...")
60 | Dataset dformatted = df.select(col("season").cast(DataTypes.IntegerType),
61 | col("yr").cast(DataTypes.IntegerType),
62 | col("mnth").cast(DataTypes.IntegerType),
63 | col("hr").cast(DataTypes.IntegerType),
64 | col("holiday").cast(DataTypes.IntegerType),
65 | col("weekday").cast(DataTypes.IntegerType),
66 | col("workingday").cast(DataTypes.IntegerType),
67 | col("weathersit").cast(DataTypes.IntegerType),
68 | col("temp").cast(DataTypes.IntegerType),
69 | col("atemp").cast(DataTypes.IntegerType),
70 | col("hum").cast(DataTypes.IntegerType),
71 | col("windspeed").cast(DataTypes.IntegerType),
72 | col("cnt").cast(DataTypes.IntegerType));
73 |
74 |
75 | dformatted.printSchema();
76 | Dataset[] data= dformatted.randomSplit(new double[]{0.7,0.3});
77 | System.out.println("We have training examples count :: "+ data[0].count()+" and test examples count ::"+data[1].count());
78 |
79 | ///
80 | //removing 'cnt' cloumn and then forming str array
81 | String[] featuresCols = dformatted.drop("cnt").columns();
82 |
83 | for(String str:featuresCols){
84 | System.out.println(str+" :: ");
85 | }
86 |
87 | //This concatenates all feature columns into a single feature vector in a new column "rawFeatures".
88 | VectorAssembler vectorAssembler = new VectorAssembler().setInputCols(featuresCols).setOutputCol("rawFeatures");
89 | //This identifies categorical features and indexes them.
90 | VectorIndexer vectorIndexer= new VectorIndexer().setInputCol("rawFeatures").setOutputCol("features").setMaxCategories(4);
91 |
92 | //Takes the "features" column and learns to predict "cnt"
93 | GBTRegressor gbt = new GBTRegressor().setLabelCol("cnt");
94 |
95 | // Define a grid of hyperparameters to test:
96 | // - maxDepth: max depth of each decision tree in the GBT ensemble
97 | // - maxIter: iterations, i.e., number of trees in each GBT ensemble
98 | // In this example notebook, we keep these values small. In practice, to get the highest accuracy, you would likely want to try deeper trees (10 or higher) and more trees in the ensemble (>100).
99 | ParamMap[] paramGrid = new ParamGridBuilder().addGrid(gbt.maxDepth(),new int[]{2, 5}).addGrid(gbt.maxIter(),new int[] {10, 100}).build();
100 | // We define an evaluation metric. This tells CrossValidator how well we are doing by comparing the true labels with predictions.
101 | RegressionEvaluator evaluator = new RegressionEvaluator().setMetricName("rmse").setLabelCol(gbt.getLabelCol()).setPredictionCol(gbt.getPredictionCol());
102 |
103 | // # Declare the CrossValidator, which runs model tuning for us.
104 | CrossValidator cv = new CrossValidator().setEstimator(gbt).setEvaluator(evaluator).setEstimatorParamMaps(paramGrid);
105 |
106 | Pipeline pipeline = new Pipeline().setStages(new PipelineStage[]{vectorAssembler,vectorIndexer,cv});
107 |
108 | PipelineModel pipelineModel=pipeline.fit(data[0]);
109 |
110 | Dataset predictions = pipelineModel.transform(data[1]);
111 |
112 | predictions.show();
113 | //predictions.select("cnt", "prediction", *featuresCols);
114 | }
115 |
116 | }
117 |
118 |
119 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch10/Flight.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch10;
2 |
3 | import java.io.Serializable;
4 |
5 | public class Flight implements Serializable {
6 | /**
7 | |-- CRSArrTime: integer (nullable = true)
8 | |-- CRSDepTime: integer (nullable = true)
9 | |-- CRSElapsedTime: integer (nullable = true)
10 | |-- actualElapsedTime: integer (nullable = true)
11 | |-- airTime: integer (nullable = true)
12 | |-- arrDelay: double (nullable = true)
13 | |-- arrTime: integer (nullable = true)
14 | |-- dayOfWeek: string (nullable = true)
15 | |-- dayofMonth: string (nullable = true)
16 | |-- depDelay: integer (nullable = true)
17 | |-- depTime: integer (nullable = true)
18 | |-- distance: integer (nullable = true)
19 | |-- month: string (nullable = true)
20 | |-- origin: string (nullable = true)
21 | |-- uniqueCarrier: string (nullable = true)
22 | */
23 | private static final long serialVersionUID = 1L;
24 | private String Month;
25 | private String DayofMonth;
26 | private String DayOfWeek;
27 | private Integer DepTime;
28 | private Integer CRSDepTime;
29 | private Integer ArrTime;
30 | private Integer CRSArrTime;
31 | private String UniqueCarrier;
32 | private Integer ActualElapsedTime;
33 | private Integer CRSElapsedTime;
34 | private Integer AirTime;
35 | private Double ArrDelay;
36 | private Integer DepDelay;
37 | private String Origin;
38 | private Integer Distance;
39 |
40 |
41 |
42 | public Flight(String month, String dayofMonth, String dayOfWeek,
43 | Integer depTime, Integer cRSDepTime, Integer arrTime,
44 | Integer cRSArrTime, String uniqueCarrier,
45 | Integer actualElapsedTime, Integer cRSElapsedTime, Integer airTime,
46 | Double arrDelay, Integer depDelay, String origin, Integer distance) {
47 | super();
48 | Month = month;
49 | DayofMonth = dayofMonth;
50 | DayOfWeek = dayOfWeek;
51 | DepTime = depTime;
52 | CRSDepTime = cRSDepTime;
53 | ArrTime = arrTime;
54 | CRSArrTime = cRSArrTime;
55 | UniqueCarrier = uniqueCarrier;
56 | ActualElapsedTime = actualElapsedTime;
57 | CRSElapsedTime = cRSElapsedTime;
58 | AirTime = airTime;
59 | ArrDelay = arrDelay;
60 | DepDelay = depDelay;
61 | Origin = origin;
62 | Distance = distance;
63 | }
64 |
65 |
66 | @Override
67 | public String toString() {
68 | return "Flight [Month=" + Month + ", DayofMonth=" + DayofMonth
69 | + ", DayOfWeek=" + DayOfWeek + ", DepTime=" + DepTime
70 | + ", CRSDepTime=" + CRSDepTime + ", ArrTime=" + ArrTime
71 | + ", CRSArrTime=" + CRSArrTime + ", UniqueCarrier="
72 | + UniqueCarrier + ", ActualElapsedTime=" + ActualElapsedTime
73 | + ", CRSElapsedTime=" + CRSElapsedTime + ", AirTime=" + AirTime
74 | + ", ArrDelay=" + ArrDelay + ", DepDelay=" + DepDelay
75 | + ", Origin=" + Origin + ", Distance=" + Distance + "]";
76 | }
77 |
78 |
79 | public String getMonth() {
80 | return Month;
81 | }
82 | public void setMonth(String month) {
83 | Month = month;
84 | }
85 | public String getDayofMonth() {
86 | return DayofMonth;
87 | }
88 | public void setDayofMonth(String dayofMonth) {
89 | DayofMonth = dayofMonth;
90 | }
91 | public String getDayOfWeek() {
92 | return DayOfWeek;
93 | }
94 | public void setDayOfWeek(String dayOfWeek) {
95 | DayOfWeek = dayOfWeek;
96 | }
97 | public Integer getDepTime() {
98 | return DepTime;
99 | }
100 | public void setDepTime(Integer depTime) {
101 | DepTime = depTime;
102 | }
103 | public Integer getCRSDepTime() {
104 | return CRSDepTime;
105 | }
106 | public void setCRSDepTime(Integer cRSDepTime) {
107 | CRSDepTime = cRSDepTime;
108 | }
109 | public Integer getArrTime() {
110 | return ArrTime;
111 | }
112 | public void setArrTime(Integer arrTime) {
113 | ArrTime = arrTime;
114 | }
115 | public Integer getCRSArrTime() {
116 | return CRSArrTime;
117 | }
118 | public void setCRSArrTime(Integer cRSArrTime) {
119 | CRSArrTime = cRSArrTime;
120 | }
121 | public String getUniqueCarrier() {
122 | return UniqueCarrier;
123 | }
124 | public void setUniqueCarrier(String uniqueCarrier) {
125 | UniqueCarrier = uniqueCarrier;
126 | }
127 | public Integer getActualElapsedTime() {
128 | return ActualElapsedTime;
129 | }
130 | public void setActualElapsedTime(Integer actualElapsedTime) {
131 | ActualElapsedTime = actualElapsedTime;
132 | }
133 | public Integer getCRSElapsedTime() {
134 | return CRSElapsedTime;
135 | }
136 | public void setCRSElapsedTime(Integer cRSElapsedTime) {
137 | CRSElapsedTime = cRSElapsedTime;
138 | }
139 | public Integer getAirTime() {
140 | return AirTime;
141 | }
142 | public void setAirTime(Integer airTime) {
143 | AirTime = airTime;
144 | }
145 | public Double getArrDelay() {
146 | return ArrDelay;
147 | }
148 | public void setArrDelay(Double arrDelay) {
149 | ArrDelay = arrDelay;
150 | }
151 | public Integer getDepDelay() {
152 | return DepDelay;
153 | }
154 | public void setDepDelay(Integer depDelay) {
155 | DepDelay = depDelay;
156 | }
157 | public String getOrigin() {
158 | return Origin;
159 | }
160 | public void setOrigin(String origin) {
161 | Origin = origin;
162 | }
163 | public Integer getDistance() {
164 | return Distance;
165 | }
166 | public void setDistance(Integer distance) {
167 | Distance = distance;
168 | }
169 |
170 |
171 | }
172 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch10/JavaALSExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch10;
2 |
3 | import org.apache.log4j.Level;
4 | import org.apache.log4j.LogManager;
5 | import org.apache.log4j.Logger;
6 | import org.apache.spark.sql.Dataset;
7 | import org.apache.spark.sql.Row;
8 | import org.apache.spark.sql.SparkSession;
9 |
10 |
11 | import org.apache.spark.api.java.JavaRDD;
12 | import org.apache.spark.api.java.function.Function;
13 | import org.apache.spark.ml.evaluation.RegressionEvaluator;
14 | import org.apache.spark.ml.recommendation.ALS;
15 | import org.apache.spark.ml.recommendation.ALSModel;
16 |
17 |
18 | //examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java
19 | // examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
20 |
21 | public class JavaALSExample {
22 |
23 | public static void main(String[] args) {
24 | System.setProperty("hadoop.home.dir", "E:\\sumitK\\Hadoop");
25 | Logger rootLogger = LogManager.getRootLogger();
26 | rootLogger.setLevel(Level.WARN);
27 | SparkSession spark = SparkSession
28 | .builder()
29 | .master("local")
30 | .config("spark.sql.warehouse.dir","file:///E:/sumitK/Hadoop/warehouse")
31 | .appName("JavaALSExample")
32 | .getOrCreate();
33 |
34 | // $example on$
35 | JavaRDD ratingsRDD = spark
36 | .read().textFile("E:\\sumitK\\Hadoop\\movieLens-latest-small\\ratings.csv").javaRDD().filter(str-> !str.contains("userId"))
37 | .map(new Function() {
38 | public Rating call(String str) {
39 | return Rating.parseRating(str);
40 | }
41 | });
42 |
43 | /* Dataset ratingDS = spark.read()
44 | .format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat")
45 | .option("header", "true")
46 | .load("E:\\sumitK\\Hadoop\\movieLens-latest-small\\ratings.csv");*/
47 |
48 |
49 |
50 | Dataset ratings = spark.createDataFrame(ratingsRDD, Rating.class);
51 | ratings.show();
52 | Dataset[] splits = ratings.randomSplit(new double[]{0.8, 0.2});
53 | Dataset training = splits[0];
54 | Dataset test = splits[1];
55 | System.out.println("The no of training rows are :"+training.count()+" and the row count of test are :"+test.count());
56 |
57 | // Build the recommendation model using ALS on the training data
58 | ALS als = new ALS()
59 | .setMaxIter(5)
60 | .setRegParam(0.01)
61 | .setUserCol("userId")
62 | .setItemCol("movieId")
63 | .setRatingCol("rating");
64 | ALSModel model = als.fit(training);
65 |
66 | // Evaluate the model by computing the RMSE on the test data
67 | Dataset predictions = model.transform(test);
68 | predictions.show();
69 |
70 | RegressionEvaluator evaluator = new RegressionEvaluator()
71 | .setMetricName("rmse")
72 | .setLabelCol("rating")
73 | .setPredictionCol("prediction");
74 | Double rmse = evaluator.evaluate(predictions);
75 | System.out.println("Root-mean-square error = " + rmse);
76 | // $example off$
77 | spark.stop();
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch10/JavaEstimatorTransformerParamExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch10;
2 |
3 | //$example on$
4 | import java.util.Arrays;
5 | import java.util.List;
6 |
7 | import org.apache.log4j.Level;
8 | import org.apache.log4j.LogManager;
9 | import org.apache.log4j.Logger;
10 | import org.apache.spark.ml.classification.LogisticRegression;
11 | import org.apache.spark.ml.classification.LogisticRegressionModel;
12 | import org.apache.spark.ml.linalg.VectorUDT;
13 | import org.apache.spark.ml.linalg.Vectors;
14 | import org.apache.spark.ml.param.ParamMap;
15 | import org.apache.spark.sql.Dataset;
16 | import org.apache.spark.sql.Row;
17 | import org.apache.spark.sql.RowFactory;
18 | import org.apache.spark.sql.types.DataTypes;
19 | import org.apache.spark.sql.types.Metadata;
20 | import org.apache.spark.sql.types.StructField;
21 | import org.apache.spark.sql.types.StructType;
22 | //$example off$
23 | import org.apache.spark.sql.SparkSession;
24 |
25 | public class JavaEstimatorTransformerParamExample {
26 |
27 | public static void main(String[] args) {
28 | SparkSession spark = SparkSession
29 | .builder().master("local").config("spark.sql.warehouse.dir", "file:///C:/Users/sumit.kumar/Downloads/bin/warehouse")
30 | .appName("JavaEstimatorTransformerParamExample")
31 | .getOrCreate();
32 | Logger rootLogger = LogManager.getRootLogger();
33 | rootLogger.setLevel(Level.WARN);
34 | // $example on$
35 | // Prepare training data.
36 | List dataTraining = Arrays.asList(
37 | RowFactory.create(1.0, Vectors.dense(0.0, 1.1, 0.1)),
38 | RowFactory.create(0.0, Vectors.dense(2.0, 1.0, -1.0)),
39 | RowFactory.create(0.0, Vectors.dense(2.0, 1.3, 1.0)),
40 | RowFactory.create(1.0, Vectors.dense(0.0, 1.2, -0.5))
41 | );
42 | StructType schema = new StructType(new StructField[]{
43 | new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
44 | new StructField("features", new VectorUDT(), false, Metadata.empty())
45 | });
46 | Dataset training = spark.createDataFrame(dataTraining, schema);
47 |
48 | // Create a LogisticRegression instance. This instance is an Estimator.
49 | LogisticRegression lr = new LogisticRegression();
50 | // Print out the parameters, documentation, and any default values.
51 | System.out.println("LogisticRegression parameters:\n" + lr.explainParams() + "\n");
52 |
53 | // We may set parameters using setter methods.
54 | lr.setMaxIter(10).setRegParam(0.01);
55 |
56 | // Learn a LogisticRegression model. This uses the parameters stored in lr.
57 | LogisticRegressionModel model1 = lr.fit(training);
58 | // Since model1 is a Model (i.e., a Transformer produced by an Estimator),
59 | // we can view the parameters it used during fit().
60 | // This prints the parameter (name: value) pairs, where names are unique IDs for this
61 | // LogisticRegression instance.
62 | System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap());
63 |
64 | // We may alternatively specify parameters using a ParamMap.
65 | ParamMap paramMap = new ParamMap()
66 | .put(lr.maxIter().w(20)) // Specify 1 Param.
67 | .put(lr.maxIter(), 30) // This overwrites the original maxIter.
68 | .put(lr.regParam().w(0.1), lr.threshold().w(0.55)); // Specify multiple Params.
69 |
70 | // One can also combine ParamMaps.
71 | ParamMap paramMap2 = new ParamMap()
72 | .put(lr.probabilityCol().w("myProbability")); // Change output column name
73 | ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2);
74 |
75 | // Now learn a new model using the paramMapCombined parameters.
76 | // paramMapCombined overrides all parameters set earlier via lr.set* methods.
77 | LogisticRegressionModel model2 = lr.fit(training, paramMapCombined);
78 | System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap());
79 |
80 | // Prepare test documents.
81 | List dataTest = Arrays.asList(
82 | RowFactory.create(1.0, Vectors.dense(-1.0, 1.5, 1.3)),
83 | RowFactory.create(0.0, Vectors.dense(3.0, 2.0, -0.1)),
84 | RowFactory.create(1.0, Vectors.dense(0.0, 2.2, -1.5))
85 | );
86 | Dataset test = spark.createDataFrame(dataTest, schema);
87 |
88 | // Make predictions on test documents using the Transformer.transform() method.
89 | // LogisticRegression.transform will only use the 'features' column.
90 | // Note that model2.transform() outputs a 'myProbability' column instead of the usual
91 | // 'probability' column since we renamed the lr.probabilityCol parameter previously.
92 | Dataset results = model2.transform(test);
93 | Dataset rows = results.select("features", "label", "myProbability", "prediction");
94 | for (Row r: rows.collectAsList()) {
95 | System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2)
96 | + ", prediction=" + r.get(3));
97 | }
98 | // $example off$
99 |
100 | spark.stop();
101 | }
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch10/Rating.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch10;
2 |
3 | import java.io.Serializable;
4 |
5 | public class Rating implements Serializable{
6 |
7 |
8 | /**
9 | *
10 | */
11 | private static final long serialVersionUID = 1L;
12 | private int userId;
13 | private int movieId;
14 | private float rating;
15 | private long timestamp;
16 |
17 | public Rating() {}
18 |
19 | public Rating(int userId, int movieId, float rating, long timestamp) {
20 | this.userId = userId;
21 | this.movieId = movieId;
22 | this.rating = rating;
23 | this.timestamp = timestamp;
24 | }
25 |
26 | public int getUserId() {
27 | return userId;
28 | }
29 |
30 | public int getMovieId() {
31 | return movieId;
32 | }
33 |
34 | public float getRating() {
35 | return rating;
36 | }
37 |
38 | public long getTimestamp() {
39 | return timestamp;
40 | }
41 |
42 | public static Rating parseRating(String str) {
43 | String[] fields = str.split(",");
44 | if (fields.length != 4) {
45 | throw new IllegalArgumentException("Each line must contain 4 fields");
46 | }
47 | int userId = Integer.parseInt(fields[0]);
48 | int movieId = Integer.parseInt(fields[1]);
49 | float rating = Float.parseFloat(fields[2]);
50 | long timestamp = Long.parseLong(fields[3]);
51 | return new Rating(userId, movieId, rating, timestamp);
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc1.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import org.apache.spark.graphx.EdgeTriplet;
6 |
7 | import scala.runtime.AbstractFunction1;
8 |
9 | public class AbsFunc1 extends AbstractFunction1, Object> implements Serializable{
10 |
11 |
12 | @Override
13 | public Object apply(EdgeTriplet arg0) {
14 | return arg0.attr().equals("Friend");
15 | }
16 |
17 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc2.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import scala.runtime.AbstractFunction2;
6 |
7 | public class AbsFunc2 extends AbstractFunction2 implements Serializable{
8 |
9 | @Override
10 | public Object apply(Object arg0, String arg1) {
11 |
12 | return true;
13 | }
14 |
15 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc3.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 |
6 | public class AbsFunc3 extends scala.runtime.AbstractFunction2 implements Serializable{
7 |
8 | @Override
9 | public String apply(Object arg0, String arg1) {
10 |
11 | return "Vertex:"+arg1;
12 | }
13 |
14 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc4.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import org.apache.spark.graphx.EdgeContext;
6 | import org.apache.spark.graphx.EdgeTriplet;
7 |
8 | import scala.runtime.AbstractFunction1;
9 | import scala.runtime.BoxedUnit;
10 |
11 | public class AbsFunc4 extends AbstractFunction1, BoxedUnit> implements Serializable{
12 |
13 | @Override
14 | public BoxedUnit apply(EdgeContext arg0) {
15 |
16 |
17 | arg0.sendToDst(1);
18 | return BoxedUnit.UNIT;
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc5.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 |
6 | public class AbsFunc5 extends scala.runtime.AbstractFunction2 implements Serializable{
7 |
8 | @Override
9 | public Integer apply(Integer i1, Integer i2) {
10 |
11 | return i1+i2;
12 | }
13 |
14 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc6.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import scala.Option;
6 | import scala.runtime.AbstractFunction3;
7 |
8 | public class AbsFunc6 extends AbstractFunction3, String> implements Serializable {
9 |
10 | @Override
11 | public String apply(Object o, String s1, Option s2) {
12 |
13 | if (s2.isEmpty()) {
14 | return s1 ;
15 | } else {
16 | return s1 + " " + s2.get();
17 | }
18 |
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc7.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import org.apache.spark.graphx.Edge;
6 | import org.apache.spark.graphx.EdgeTriplet;
7 |
8 | import scala.runtime.AbstractFunction1;
9 |
10 | public class AbsFunc7 extends AbstractFunction1,Integer> implements Serializable{
11 |
12 | @Override
13 | public Integer apply(Edge edge) {
14 | return edge.attr().length();
15 | }
16 |
17 |
18 |
19 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/AbsFunc8.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.io.Serializable;
4 |
5 | import org.apache.spark.graphx.Edge;
6 | import org.apache.spark.graphx.EdgeTriplet;
7 |
8 | import scala.runtime.AbstractFunction1;
9 |
10 | public class AbsFunc8 extends AbstractFunction1,Integer> implements Serializable{
11 |
12 | @Override
13 | public Integer apply(EdgeTriplet triplet) {
14 | return triplet.attr().length();
15 | }
16 |
17 |
18 |
19 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/PropertyGraphExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import org.apache.spark.SparkConf;
7 | import org.apache.spark.api.java.JavaRDD;
8 | import org.apache.spark.api.java.JavaSparkContext;
9 | import org.apache.spark.graphx.Edge;
10 | import org.apache.spark.graphx.Graph;
11 | import org.apache.spark.graphx.GraphOps;
12 | import org.apache.spark.graphx.PartitionStrategy;
13 | import org.apache.spark.graphx.TripletFields;
14 | import org.apache.spark.graphx.VertexRDD;
15 | import org.apache.spark.storage.StorageLevel;
16 |
17 | import scala.Predef.$eq$colon$eq;
18 | import scala.Tuple2;
19 | import scala.reflect.ClassTag;
20 |
21 | public class PropertyGraphExample {
22 | public static void main(String[] args) {
23 |
24 | System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
25 | SparkConf conf = new SparkConf().setMaster("local").setAppName("graph");
26 | JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
27 | ClassTag stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
28 | ClassTag intTag = scala.reflect.ClassTag$.MODULE$.apply(Integer.class);
29 |
30 |
31 |
32 | //$eq$colon$eq scala$Predef$$singleton_$eq$colon$eq = scala.Predef$.MODULE$.scala$Predef$$singleton_$eq$colon$eq;
33 | $eq$colon$eq tpEquals = scala.Predef.$eq$colon$eq$.MODULE$.tpEquals();
34 | List> vertices = new ArrayList<>();
35 |
36 | vertices.add(new Tuple2(1l, "James"));
37 | vertices.add(new Tuple2(2l, "Robert"));
38 | vertices.add(new Tuple2(3l, "Charlie"));
39 | vertices.add(new Tuple2(4l, "Roger"));
40 | vertices.add(new Tuple2(5l, "Tony"));
41 |
42 | List> edges = new ArrayList<>();
43 |
44 | edges.add(new Edge(2, 1, "Friend"));
45 | edges.add(new Edge(3, 2, "Advisor"));
46 | edges.add(new Edge(3, 1, "Friend"));
47 | /*edges.add(new Edge(1, 2, "Friend"));
48 | edges.add(new Edge(2, 3, "Advisor"));
49 | edges.add(new Edge(1, 3, "Friend"));*/
50 | edges.add(new Edge(4, 3, "colleague"));
51 | edges.add(new Edge(4, 5, "Relative"));
52 | edges.add(new Edge(5, 2, "BusinessPartners"));
53 |
54 | JavaRDD> verticesRDD = javaSparkContext.parallelize(vertices);
55 | JavaRDD> edgesRDD = javaSparkContext.parallelize(edges);
56 |
57 | Graph graph = Graph.apply(verticesRDD.rdd(), edgesRDD.rdd(), "", StorageLevel.MEMORY_ONLY(),
58 | StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
59 |
60 |
61 |
62 |
63 | graph.vertices().toJavaRDD().collect().forEach(System.out::println);
64 | /*System.out.println("-------------------------------");
65 | graph.edges().toJavaRDD().collect().forEach(System.out::println);*/
66 |
67 | //Graph operations
68 |
69 | //mapvertices
70 |
71 | /*Graph mapVertices = graph.mapVertices(new AbsFunc3(), stringTag, tpEquals);
72 | mapVertices.vertices().toJavaRDD().collect().forEach(System.out::println);*/
73 |
74 | //mapEdges
75 |
76 | /*Graph mapEdges = graph.mapEdges(new AbsFunc7(), scala.reflect.ClassTag$.MODULE$.apply(Integer.class));
77 | mapEdges.edges().toJavaRDD().collect().forEach(System.out::println);*/
78 |
79 | //mapTriplets
80 | //Graph mapTriplets = graph.mapTriplets(new AbsFunc8(), scala.reflect.ClassTag$.MODULE$.apply(Integer.class));
81 | //mapTriplets.triplets().toJavaRDD().collect().forEach(System.out::println);
82 |
83 | //Other way - loose indices
84 | //JavaRDD map = graph.vertices().toJavaRDD().map(x->"Vertex:"+x);
85 |
86 | //Triplets
87 |
88 | //Reverse
89 | /* Graph reversedGraph = graph.reverse();
90 | reversedGraph.triplets().toJavaRDD().collect().forEach(System.out::println);*/
91 |
92 |
93 | //Subgraph
94 | /* Graph subgraph = graph.subgraph(new AbsFunc1(), new AbsFunc2());
95 | subgraph.triplets().toJavaRDD().collect().forEach(System.out::println);*/
96 |
97 | //Aggregate Messages
98 |
99 | /*VertexRDD aggregateMessages = graph.aggregateMessages(new AbsFunc4(), new AbsFunc5(), TripletFields.All, intTag);
100 |
101 | aggregateMessages.toJavaRDD().collect().forEach(System.out::println);*/
102 |
103 |
104 |
105 | //Join
106 | // List> dataToJoin = new ArrayList<>();
107 | //
108 | // dataToJoin.add(new Tuple2(1l,"Wilson"));
109 | // dataToJoin.add(new Tuple2(2l,"Harmon"));
110 | // dataToJoin.add(new Tuple2(3l,"Johnson"));
111 | // dataToJoin.add(new Tuple2(4l,"Peterson"));
112 | // dataToJoin.add(new Tuple2(5l,"Adams"));
113 | //
114 | // JavaRDD> dataToJoinRdd = javaSparkContext.parallelize(dataToJoin);
115 | //
116 | // Graph outerJoinVertices = graph.outerJoinVertices(dataToJoinRdd.rdd(), new AbsFunc6(), scala.reflect.ClassTag$.MODULE$.apply(String.class), scala.reflect.ClassTag$.MODULE$.apply(String.class), scala.Predef.$eq$colon$eq$.MODULE$.tpEquals());
117 | // outerJoinVertices.vertices().toJavaRDD().collect().forEach(System.out::println);
118 |
119 |
120 | //Graph-Anaytics
121 |
122 | //PageRank
123 | /*Graph graphWithStaticRanking = graph.ops().staticPageRank(1,0.20);
124 | graphWithStaticRanking.vertices().toJavaRDD().collect().forEach(System.out::println);
125 | */
126 | //graph.ops().pageRank(0.00001,0.20).vertices().toJavaRDD().collect().forEach(System.out::println);;
127 |
128 | //Triangle count
129 | graph.partitionBy(PartitionStrategy.CanonicalRandomVertexCut$.MODULE$);
130 |
131 | Graph triangleCountedGraph = graph.ops().triangleCount();
132 | triangleCountedGraph.vertices().toJavaRDD().collect().forEach(System.out::println);
133 |
134 | //Connected components
135 | /*Graph connectedComponentsGraph = graph.ops().connectedComponents();
136 | connectedComponentsGraph.vertices().toJavaRDD().collect().forEach(System.out::println);;*/
137 |
138 | /*scala.collection.immutable.Set set = new scala.collection.immutable.HashSet();
139 | List list =new ArrayList<>();
140 |
141 | JavaConverters.collectionAsScalaIterableConverter(list).asScala().toSeq();*/
142 | // ShortestPaths.run
143 |
144 |
145 |
146 | }
147 |
148 | }
149 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch11/PropertyGraphExampleFromEdges.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch11;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | import org.apache.spark.SparkConf;
7 | import org.apache.spark.api.java.JavaRDD;
8 | import org.apache.spark.api.java.JavaSparkContext;
9 | import org.apache.spark.graphx.Edge;
10 | import org.apache.spark.graphx.EdgeTriplet;
11 | import org.apache.spark.graphx.Graph;
12 | import org.apache.spark.storage.StorageLevel;
13 |
14 | import scala.Function1;
15 | import scala.reflect.ClassTag;
16 | import scala.runtime.AbstractFunction1;
17 |
18 | public class PropertyGraphExampleFromEdges {
19 | public static void main(String[] args) {
20 | System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");
21 | SparkConf conf = new SparkConf().setMaster("local").setAppName("graph");
22 | JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
23 | ClassTag stringTag = scala.reflect.ClassTag$.MODULE$.apply(String.class);
24 |
25 |
26 | List> edges = new ArrayList<>();
27 |
28 | edges.add(new Edge(1, 2, "Friend"));
29 | edges.add(new Edge(2, 3, "Advisor"));
30 | edges.add(new Edge(1, 3, "Friend"));
31 | edges.add(new Edge(4, 3, "colleague"));
32 | edges.add(new Edge(4, 5, "Relative"));
33 | edges.add(new Edge(2, 5, "BusinessPartners"));
34 |
35 |
36 | JavaRDD> edgeRDD = javaSparkContext.parallelize(edges);
37 |
38 |
39 | Graph graph = Graph.fromEdges(edgeRDD.rdd(), "",StorageLevel.MEMORY_ONLY(), StorageLevel.MEMORY_ONLY(), stringTag, stringTag);
40 |
41 |
42 | graph.vertices().toJavaRDD().collect().forEach(System.out::println);
43 |
44 |
45 |
46 | // graph.aggregateMessages(sendMsg, mergeMsg, tripletFields, evidence$11)
47 |
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/AInnerClassVsLambda.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.io.File;
4 | import java.io.FilenameFilter;
5 |
6 | public class AInnerClassVsLambda {
7 |
8 | public static void main(String[] args) {
9 |
10 | File sourceDir= new File("/home/user");
11 | sourceDir.list(new FilenameFilter() {
12 |
13 | @Override
14 | public boolean accept(File dir, String name) {
15 |
16 | return name.endsWith("txt");
17 | }
18 | });
19 |
20 |
21 | sourceDir.list((dir,name)->name.endsWith("txt"));
22 |
23 | // Lexical scoping-wont work ---System.out.println(dir);
24 |
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/Car.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public interface Car {
4 |
5 | void shape();
6 | void price();
7 | void color();
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/ClosureDemo.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.Arrays;
4 | import java.util.List;
5 | import java.util.function.Function;
6 |
7 | public class ClosureDemo {
8 | public static void main(String[] args) {
9 | List list = Arrays.asList(1, 2, 3, 4, 5);
10 | Function closure = ClosureExample.closure();
11 | list.stream().map(closure).forEach(n -> System.out.print(n+" "));
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/ClosureExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.function.Function;
4 |
5 | public class ClosureExample {
6 | public static Function closure() {
7 | int a=3;
8 |
9 | Function function = t->{
10 | //a++;
11 | return t*a;
12 | };
13 |
14 | return function;
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/CollectorsExamples.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Map;
6 | import java.util.Set;
7 | import java.util.TreeSet;
8 | import java.util.function.Supplier;
9 | import java.util.stream.Collectors;
10 | import java.util.stream.Stream;
11 |
12 | public class CollectorsExamples {
13 |
14 | public static void main(String[] args) {
15 |
16 | Supplier> streamSupplier =()->Stream.of( new String[]{"The","Stream","from","an","array","of","The","Strings"} ) ;
17 |
18 | //String Concatenation using non parameterized joining
19 | String concatedString = streamSupplier.get().collect(Collectors.joining());
20 | System.out.println("The result of String Concatnation using non parameterized joining :: ");
21 | System.out.println(concatedString);
22 |
23 | //String Concatenation using joining with delimiter parameter
24 | String delimitedString = streamSupplier.get().collect(Collectors.joining(","));
25 | System.out.println("The result of String Concatenation using joining with delimeter parameter :: ");
26 | System.out.println(delimitedString);
27 |
28 | //String Concatenation using joining with delimiter parameter
29 | String concatString = streamSupplier.get().collect(Collectors.joining(",","[","]"));
30 | System.out.println("The result of String Concatenation using joining with delimeter parameter :: ");
31 | System.out.println(concatString);
32 |
33 | //Collection Collectors
34 | List listCollected =streamSupplier.get().collect(Collectors.toList());
35 | System.out.println("The list collected value of Stream are :: "+listCollected);
36 |
37 | Set setCollected=streamSupplier.get().collect(Collectors.toSet());
38 | System.out.println("The set collected value of Stream are :: "+setCollected);
39 |
40 | Set orderedSetCollected=streamSupplier.get().collect(Collectors.toCollection(TreeSet::new));
41 | System.out.println("The ordered set collected value of Stream are :: "+orderedSetCollected);
42 |
43 | //Map Collectors
44 | Map mapCollected=orderedSetCollected.stream().collect(Collectors.toMap(x->x.toString(),x->x.toString().length() ));
45 | System.out.println("The generated Map values are :: "+mapCollected);
46 |
47 | //Map Collectors with duplicate key handling
48 | Map> mapWithDupVals=streamSupplier.get().collect(Collectors.toMap(x->x.toString(), //KeyMapper
49 | x -> {List tmp = new ArrayList <> (); tmp.add(x.toString().length()); return tmp;}, //ValueMapper
50 | (L1, L2) -> { L1.addAll(L2); return L1;} //MergeFunction
51 | ));
52 | System.out.println("The generated Map values with duplicate values::"+mapWithDupVals);
53 |
54 | //Grouping Collectors
55 | Map> groupExample= streamSupplier.get().collect(Collectors.groupingBy(x->x.toString().length()));
56 | System.out.println("Grouping stream elements on the basis of its length :: "+groupExample);
57 |
58 | //Partition Collectors
59 | Map> partitionExample=streamSupplier.get().collect(Collectors.partitioningBy( x->x.toString().length() > 5 ));
60 | System.out.println("Patitioning of elements on the basis of its length :: "+partitionExample);
61 |
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/CreateStreamExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.io.IOException;
4 | import java.math.BigInteger;
5 | import java.nio.charset.Charset;
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 | import java.nio.file.Paths;
9 | import java.util.ArrayList;
10 | import java.util.Arrays;
11 | import java.util.HashMap;
12 | import java.util.List;
13 | import java.util.Map;
14 | import java.util.concurrent.atomic.AtomicInteger;
15 | import java.util.stream.DoubleStream;
16 | import java.util.stream.IntStream;
17 | import java.util.stream.LongStream;
18 | import java.util.stream.Stream;
19 |
20 | public class CreateStreamExample {
21 |
22 | public static void main(String[] args) throws IOException {
23 |
24 | //Creating Streams using user/programmatically specified elements
25 | Stream Userstream = Stream.of("Creating","Streams","from","Specific","elements");
26 | Userstream.forEach(p -> System.out.println(p));
27 |
28 |
29 | //Creating Streams using array of objects
30 | Stream ArrayStream = Stream.of( new String[]{"Stream","from","an","array","of","objects"} );
31 | ArrayStream.forEach(p -> System.out.println(p));
32 |
33 |
34 | //Creating Streams from an array
35 | String[] StringArray=new String[]{"We","can","convert","an","array","to","a","Stream","using","Arrays","as","well"};
36 | Stream StringStream=Arrays.stream(StringArray);
37 | StringStream.forEach(p -> System.out.println(p));
38 |
39 | //Creating Streams from Collection
40 | List myCollection = new ArrayList<>();
41 | for(int i=0; i<10; i++){
42 | myCollection.add(Math.random());
43 | }
44 | //sequential stream
45 | Stream sequentialStream = myCollection.stream();
46 | sequentialStream.forEach(p -> System.out.println(p));
47 |
48 | //parallel stream
49 | Stream parallelStream = myCollection.parallelStream();
50 | parallelStream.forEach(p -> System.out.println(p));
51 |
52 |
53 | //Stream from Hashmap
54 | Map mapData = new HashMap<>();
55 | mapData.put("This", 1900);
56 | mapData.put("is", 2000);
57 | mapData.put("HashMap", 2100);
58 |
59 | mapData.entrySet()
60 | .stream()
61 | .forEach(p -> System.out.println(p));
62 |
63 | mapData.keySet()
64 | .stream()
65 | .forEach(p-> System.out.println(p));
66 |
67 | //primitive streams
68 | IntStream.range(1, 4)
69 | .forEach(p -> System.out.println(p));
70 |
71 | LongStream.rangeClosed(1, 4)
72 | .forEach(p -> System.out.println(p));
73 |
74 | DoubleStream.of(1.0,2.0,3.0,4.0)
75 | .forEach(p -> System.out.println(p));
76 |
77 | //Infinite Streams using generate()
78 | Stream sequentialDoubleStream = Stream.generate(Math :: random);
79 |
80 | Stream sequentialIntegerStream = Stream.generate(new AtomicInteger () :: getAndIncrement);
81 |
82 | //Infinite Streams using iterate()
83 | Stream sequentialIntegerStream1 = Stream.iterate (Integer.MIN_VALUE, i -> i++);
84 |
85 | Stream sequentialBigIntegerStream = Stream.iterate(BigInteger.ZERO, i -> i.add (BigInteger.TEN));
86 |
87 | //Streams from File
88 | Stream streamOfStrings = Files.lines(Paths.get("Apology_by_Plato.txt"));
89 | Stream streamWithCharset = Files.lines(Paths.get("Apology_by_Plato.txt"), Charset.forName("UTF-8"));
90 |
91 |
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/Interface1.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | //@FunctionalInterface
4 | public interface Interface1 {
5 |
6 | default void hello(){
7 | System.out.println("Hello from Interface1");
8 | }
9 |
10 | //void method1();
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/Interface2.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public interface Interface2 {
4 | default void hello(){
5 | System.out.println("Hello from Interface1");
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/InterfaceImpl.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public class InterfaceImpl implements Interface1,Interface2{
4 | @Override
5 | public void hello() {
6 | // TODO Auto-generated method stub
7 | Interface1.super.hello();
8 | Interface2.super.hello();
9 | }
10 |
11 | }
12 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/IntermediateOpExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.Arrays;
4 | import java.util.Comparator;
5 | import java.util.List;
6 | import java.util.function.Supplier;
7 | import java.util.stream.IntStream;
8 | import java.util.stream.Stream;
9 |
10 | public class IntermediateOpExample {
11 |
12 | public static void main(String[] args) {
13 |
14 | //Filter Operation
15 | IntStream.rangeClosed(1, 10)
16 | .filter(s -> s>4)
17 | .forEach(p -> System.out.println(p));
18 |
19 | //Map Operation
20 | Supplier> streamSupplier =()->Stream.of( new String[]{"Stream","from","an","array","of","objects"} ) ;
21 | int sumOfLength=streamSupplier.get().map(x -> x.toString().length()).peek(x->System.out.println(Integer.parseInt(x.toString())))
22 | .mapToInt(x->x.intValue()).sum();
23 |
24 | int incrementVal=6;
25 | IntStream.rangeClosed(1, 10)
26 | .filter(s -> s>4)
27 | .map(x -> x+incrementVal)
28 | .forEach(p -> System.out.println(p));
29 |
30 | Stream.of(new String[]{"Let me see what i get this time"}).map(x -> x.split("\\s+")).forEach(System.out::println);
31 |
32 | //Sorted
33 | // Stream ArrayStream = Stream.of( new String[]{"stream","from","an","array","of","objects"} );
34 |
35 | //http://stackoverflow.com/questions/23860533/copy-a-stream-to-avoid-stream-has-already-been-operated-upon-or-closed-java-8
36 | // Supplier> streamSupplier =()-> Stream.of( new String[]{"stream","from","an","array","of","objects"} );
37 |
38 | //Natural Sorting
39 | streamSupplier.get().sorted().forEach(System.out::println);
40 |
41 | //Comparing elements with reverse order
42 | streamSupplier.get().sorted(Comparator.reverseOrder()).forEach(System.out::println);
43 |
44 |
45 | //Sorting the element in reverse order based on their length
46 | streamSupplier.get().sorted(Comparator.comparing(x -> x.toString().length()).reversed()).forEach(System.out::println);
47 |
48 | //Sorting on multiple fields
49 | streamSupplier.get().sorted(Comparator.comparing(x -> x.toString().length()).thenComparing(x->x.toString())).forEach(System.out::println);
50 |
51 |
52 | //Distinct filters all the multiple records having same length
53 | streamSupplier.get().mapToInt(x-> x.toString().length()).distinct().forEach(System.out::println);
54 |
55 | //Limiting the size of the stream
56 | streamSupplier.get().limit(2).forEach(System.out::println);
57 |
58 | //flatMap
59 | Stream> streamList = Stream.of(
60 | Arrays.asList("FistList-FirstElement"),
61 | Arrays.asList("SecondList-FirstElement", "SecondList-SecondElement"),
62 | Arrays.asList("ThirdList-FirstElement"));
63 | //The streamList is of the form List
64 | Stream flatStream = streamList
65 | .flatMap(strList -> strList.stream());
66 | // But after applying flatMap operaton it translates into Strem
67 | flatStream.forEach(System.out::println);
68 |
69 | //
70 |
71 | // Stream.of(1, 2, 3)
72 | // .flatMap(x -> IntStream.range(0, x))
73 | // .forEach(System.out::println);
74 |
75 | System.out.println( " the count of stream is "+
76 |
77 | //String[] sr=(String[])
78 |
79 | Stream.of(new String[]{"Let,me,see,what,i,get,this,time","ok,now,what"}) //Stream
80 | .peek(x->System.out.println( "the length of the sream is"+ x.length()))
81 | .map(x -> x.split(",")) //Stream>
82 | .peek(x->System.out.println(x.length))
83 | .count());
84 |
85 |
86 | // .collect(Collectors.toList())
87 | // .flatMap(x -> Arrays.stream(x)).forEach(System.out::println);
88 |
89 |
90 | /*.forEach(x -> {
91 | for(String sr:x){
92 | System.out.println(x.length);
93 | System.out.println(sr);
94 | }
95 | });
96 | */
97 | // .peek(x-> System.out.println(x))
98 | // .flatMap(Arrays::stream)
99 | // .forEach(System.out::println);
100 |
101 | }
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/LambdaExamples.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.Arrays;
4 | import java.util.List;
5 |
6 | public class LambdaExamples {
7 | public static void main(String[] args) {
8 | List list = Arrays.asList(1,2,3,4,5);
9 |
10 | list.forEach(n-> System.out.println(n));
11 |
12 | list.stream().map(n -> n*2 ).forEach(n-> System.out.println(n));;
13 | list.stream().map(n->{
14 | return n*2;
15 | }).forEach(System.out::println);
16 |
17 | }
18 |
19 |
20 |
21 |
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/LexicalScoping.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public class LexicalScoping {
4 | int a = 1;
5 | // a has class level scope. So It will be available to be accessed
6 | // throughout the class
7 |
8 | public void sumandPrint() {
9 | int b = 1;
10 | int c = a + b;
11 | // b and c are local variables of method. These will be accessible
12 | // inside the method only
13 | }
14 | // b and c are no longer accessible
15 | }
16 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MethodReferenceExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.Arrays;
4 | import java.util.HashSet;
5 | import java.util.List;
6 | import java.util.Optional;
7 | import java.util.TreeSet;
8 | import java.util.function.Supplier;
9 | import java.util.stream.Collectors;
10 | import java.util.stream.IntStream;
11 | import java.util.stream.Stream;
12 |
13 |
14 | public class MethodReferenceExample {
15 |
16 | public static boolean isOdd(Integer n) { return n % 2 != 0; };
17 | public static boolean isEven(Integer n) { return n % 2 == 0; };
18 |
19 |
20 | public static void main(String[] args) {
21 | Supplier> streamSupplier =()->Stream.of( new String[]{"Stream","from","an","array","of","objects"} ) ;
22 |
23 | //1.Static Method Reference
24 | IntStream.range(1, 8).filter(MethodReferenceExample::isOdd).forEach(x->System.out.println(x));
25 |
26 | //Instance Method Reference
27 | IntStream.range(1, 8).filter(x-> x%2==0).forEach(System.out::println);
28 |
29 | //Constructor Reference
30 | TreeSet hset= streamSupplier.get().collect(Collectors.toCollection(TreeSet::new));
31 |
32 |
33 | //4. Instance method Reference of an arbitrary object of a particular type
34 | System.out.println(" The sum of lengths are ::"+ streamSupplier.get().map(x->x.length()).reduce(Integer::sum));
35 |
36 |
37 | }
38 |
39 |
40 | }
41 |
42 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MyFileNameFilter.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.io.File;
4 | import java.io.FilenameFilter;
5 |
6 |
7 | public class MyFileNameFilter implements FilenameFilter {
8 | @Override
9 | public boolean accept(File dir, String name) {
10 |
11 | return name.endsWith("java");
12 | }
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MyFilterImpl.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.io.File;
4 |
5 | public class MyFilterImpl {
6 | public static void main(String[] args) {
7 | File dir = new File("src/main/java");
8 | //dir.list(new MyFileNameFilter());
9 | dir.list((dirname,name)->name.endsWith("java"));
10 |
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MyInterface.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public interface MyInterface {
4 |
5 | default String hello() {
6 | return "Inside static method in interface";
7 | }
8 |
9 | void absmethod();
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MyInterfaceDemo.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public class MyInterfaceDemo {
4 | public static void main(String[] args) {
5 | System.out.println();
6 | MyInterfaceImpl obj =new MyInterfaceImpl();
7 | obj.hello(); // wont-complie
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/MyInterfaceImpl.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public class MyInterfaceImpl implements MyInterface{
4 |
5 | @Override
6 | public void absmethod() {
7 | System.out.println("Abstract method implementaion in class");
8 | }
9 |
10 | }
11 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/ShortCircuitOperationExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | public class ShortCircuitOperationExample {
4 |
5 | public static void main(String[] args) {
6 | // TODO Auto-generated method stub
7 | /*
8 | boolean matched = memberNames.stream()
9 | .anyMatch((s) -> s.startsWith("A"));
10 |
11 | System.out.println(matched);
12 |
13 | String firstMatchedName = memberNames.stream()
14 | .filter((s) -> s.startsWith("L"))
15 | .findFirst().get();
16 |
17 | System.out.println(firstMatchedName);*/
18 |
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/TerminalOpExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.util.Arrays;
4 | import java.util.Optional;
5 | import java.util.function.Supplier;
6 | import java.util.stream.Collectors;
7 | import java.util.stream.IntStream;
8 | import java.util.stream.Stream;
9 |
10 | public class TerminalOpExample {
11 |
12 | public static void main(String[] args) {
13 | // forEach
14 | Supplier> streamSupplier =()->Stream.of( new String[]{"Stream","from","an","array","of","objects"} ) ;
15 | //Sequential For each
16 | streamSupplier.get().sequential().forEach(P->System.out.println("Sequential output :: "+P));
17 | //Parallel For each
18 | streamSupplier.get().parallel().forEach(P->System.out.println("Parallel output :: "+P));
19 |
20 | //sum
21 | // System.out.println(streamSupplier.get().map(x -> x.toString().length()).peek(System.out::println).sum());
22 |
23 | System.out.println("Number of alphabets present in the stream ::"+streamSupplier.get().mapToInt(x -> x.length()).sum()); //Notice here had we used MAP , we would have had to another map function to convert the in Int.
24 |
25 | //reduce
26 | Optional simpleSum= streamSupplier.get().map(x->x.length()).reduce((x,y)-> x+y);
27 |
28 | System.out.println( "The value with simpe reduce is ::"+simpleSum.get());
29 |
30 | Integer defaulValSum= streamSupplier.get().map(x->x.length()).reduce(0,(x,y)-> x+y);
31 | System.out.println( "The value with default reduce is ::"+defaulValSum);
32 |
33 | Integer valSum= streamSupplier.get().reduce(0,(x,y)-> x+y.length(),(acc1,acc2)->acc1+acc2);
34 | System.out.println("The value with with cobine reduce is ::"+valSum);
35 |
36 | //collect
37 |
38 | StringBuilder concat = streamSupplier.get()
39 | .collect(() -> new StringBuilder(),
40 | (sbuilder, str) -> sbuilder.append(str),
41 | (sbuilder1, sbuiler2) -> sbuilder1.append(sbuiler2));
42 |
43 |
44 | StringBuilder concatM = streamSupplier.get()
45 | .collect(StringBuilder::new,
46 | StringBuilder::append,
47 | StringBuilder::append);
48 |
49 | String concatC = streamSupplier.get().collect(Collectors.joining());
50 |
51 | //Match
52 | boolean matchesAll =streamSupplier.get().allMatch(x->x.toString().length() > 1);
53 | System.out.println("All the elemetns have lenght greater than 1 ::"+matchesAll);
54 |
55 | boolean noneMatches =streamSupplier.get().noneMatch(x->x.toString().length() > 1);
56 | System.out.println("None of the elemetns have lenght greater than 1 ::"+noneMatches);
57 |
58 | boolean anyMatches =streamSupplier.get().peek(x->System.out.println("Element being iterated is :: "+x)).anyMatch(x->x.toString().length() == 2);
59 | System.out.println("The short circuit terminal operation finished with return value :: "+anyMatches);
60 |
61 | //Finding Element
62 | System.out.println("In a paralled stream from 5-100 finding any element :: "+IntStream.range(5, 100).parallel().findAny());
63 |
64 | System.out.println("In a paralled stream from 8-100 finding the first element :: "+IntStream.range(8, 100).parallel().findFirst());
65 |
66 | //Count
67 | long elementCount=streamSupplier.get().count();
68 | System.out.println("The number of elements in the stream are :: "+elementCount);
69 |
70 |
71 |
72 | //System.out.println( joinWithReduce(Stream . of ( "foo" , "bar" , "baz" ) ));
73 |
74 | //System.out.println( joinWithCollect(Stream . of ( "foo" , "bar" , "baz" ) ));
75 |
76 | }
77 |
78 | static String joinWithReduce ( Stream < String > stream ) { // BAD
79 | return stream.reduce( new StringBuilder (), StringBuilder :: append , StringBuilder :: append ). toString (); }
80 |
81 | static String joinWithCollect ( Stream < String > stream ) { // OK
82 | return stream.collect ( StringBuilder :: new , StringBuilder :: append , StringBuilder :: append ). toString (); }
83 |
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/WordCountInJava.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Files;
5 | import java.nio.file.Paths;
6 | import java.util.ArrayList;
7 | import java.util.Arrays;
8 | import java.util.List;
9 | import java.util.Map;
10 | import java.util.TreeMap;
11 | import java.util.stream.Collectors;
12 | import java.util.stream.Stream;
13 |
14 | import static java.util.function.Function.identity;
15 | import static java.util.stream.Collectors.counting;
16 | import static java.util.stream.Collectors.groupingBy;
17 |
18 | public class WordCountInJava {
19 | public static final String REGEX = "\\s+";
20 | public static final String NEW_LINE_CHAR = "\n";
21 | public static final String imagineLyrics="Imagine there's no heaven \n"
22 | + "It's easy if you try \n"
23 | + "No hell below us \n"
24 | + "Above us only sky \n"
25 | + "Imagine all the people living for today";
26 |
27 | public static void main(String[] args) {
28 |
29 | try {
30 | //TreeMap count = Files.lines(Paths.get(args[0]), StandardCharsets.UTF_8)
31 | TreeMap count = Stream.of( imagineLyrics.split(NEW_LINE_CHAR))
32 | .map(line -> line.split(REGEX))
33 | .flatMap(Arrays::stream)
34 | .collect(groupingBy(identity(), TreeMap::new, counting()));
35 |
36 | // Using Lambda Expression
37 | Stream.of(count).forEach(x -> System.out.println(x));
38 | //Using Method Reference
39 | Stream.of(count).forEach(System.out::println);
40 |
41 |
42 | Stream mapResult=Stream.of( imagineLyrics.split(NEW_LINE_CHAR))
43 | .map(line -> line.split(REGEX)).flatMap(Arrays::stream);
44 |
45 | //sort and suffle phase
46 | Map> sortedData=mapResult.collect(Collectors.toMap(x->x.toString(), x->{
47 | List temp=new ArrayList<>(); temp.add(1); return temp; },
48 | (L1,L2)-> {L1.addAll(L2);return L1;}));
49 | //Reduce Phase
50 | /*Map wordCount=sortedData.entrySet().stream().collect(Collectors.toMap(
51 | e -> e.getKey(),
52 | e -> Integer.parseInt(e.getValue())
53 | ));*/
54 |
55 | } catch (Exception e) {
56 | // TODO Auto-generated catch block
57 | e.printStackTrace();
58 | }
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/generics/FirstExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2.generics;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | public class FirstExample {
7 | public static void main(String[] args) {
8 |
9 | List list1 =new ArrayList();
10 | List list2 =new ArrayList();
11 |
12 | List list =new ArrayList<>();
13 |
14 | list.add(1);
15 | list.add(2);
16 | list.add("hello");
17 |
18 | Integer object = (Integer)list.get(0);
19 |
20 | System.out.println(object);
21 |
22 | List listGeneric =new ArrayList<>();
23 |
24 | listGeneric.add(1);
25 | listGeneric.add(2);
26 | //list1.add("hello"); - wont work
27 | Integer intObject = listGeneric.get(0);
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/generics/MyGeneric.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2.generics;
2 |
3 | public class MyGeneric {
4 |
5 | T input;
6 |
7 | public MyGeneric(T input) {
8 | this.input=input;
9 |
10 | }
11 |
12 | public T getInput()
13 | {
14 | return input;
15 | }
16 |
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch2/generics/MyGenericsDemo.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch2.generics;
2 |
3 | public class MyGenericsDemo {
4 |
5 | public static void main(String[] args) {
6 | MyGeneric m1 =new MyGeneric(1);
7 | System.out.println(m1.getInput());
8 |
9 | MyGeneric m2 =new MyGeneric("hello");
10 | System.out.println(m2.getInput());
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/ActionsExamplesOld.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import java.io.Serializable;
4 | import java.util.Arrays;
5 | import java.util.Comparator;
6 | import java.util.List;
7 | import java.util.Map;
8 | import java.util.function.BiConsumer;
9 |
10 | import org.apache.log4j.Level;
11 | import org.apache.log4j.LogManager;
12 | import org.apache.log4j.Logger;
13 | import org.apache.spark.SparkConf;
14 | import org.apache.spark.api.java.JavaDoubleRDD;
15 | import org.apache.spark.api.java.JavaPairRDD;
16 | import org.apache.spark.api.java.JavaRDD;
17 | import org.apache.spark.api.java.JavaSparkContext;
18 | import org.apache.spark.api.java.function.Function2;
19 | import org.apache.spark.api.java.function.PairFunction;
20 | import org.apache.spark.sql.SparkSession;
21 | import org.spark_project.guava.collect.Lists;
22 |
23 | import scala.Tuple2;
24 |
25 | public class ActionsExamplesOld implements Serializable{
26 |
27 | /**
28 | *
29 | */
30 | private static final long serialVersionUID = 1L;
31 |
32 | public static void main(String[] args) {
33 | System.setProperty("hadoop.home.dir", "C:\\Users\\sumit.kumar\\Downloads");
34 | String logFile = "src/main/resources/numSeries.txt"; // Should be some file on your system
35 | Logger rootLogger = LogManager.getRootLogger();
36 | rootLogger.setLevel(Level.WARN);
37 | /* SparkSession spark = SparkSession
38 | .builder().master("local")
39 | .appName("JavaPageRank")
40 | .config("spark.sql.warehouse.dir", "file:///C:/Users/sumit.kumar/Downloads/bin/warehouse")
41 | .getOrCreate();
42 | */
43 | SparkConf conf = new SparkConf().setMaster("local").setAppName("ApacheSparkForJavaDevelopers");
44 | // SparkContext context =new SparkContext(conf);
45 | // RDD textFile = context.textFile("abc", 1);
46 |
47 | JavaSparkContext spark = new JavaSparkContext(conf);
48 |
49 |
50 |
51 | JavaRDD lines = spark.textFile(logFile);
52 | //JavaRDD lines = spark.textFile(logFile).toJavaRDD().cache();
53 | JavaDoubleRDD intMap= lines.mapToDouble(a-> Integer.parseInt(a)).cache();
54 | JavaPairRDD intDivMap= intMap.mapToPair(new PairFunction() {
55 |
56 | /**
57 | *
58 | */
59 | private static final long serialVersionUID = 1L;
60 |
61 | @Override
62 | public Tuple2 call(Double t) throws Exception {
63 |
64 | return new Tuple2(t, t%2);
65 | }
66 | });
67 |
68 | // isEmpty
69 | JavaRDD intRDD = spark.parallelize(Arrays.asList(1,2,3));
70 | boolean isRDDEmpty= intRDD.filter(a-> a.equals(5)).isEmpty();
71 | System.out.println("The RDD is empty ::"+isRDDEmpty);
72 |
73 | //Collect
74 | List collectedList= lines.collect();
75 |
76 | //count()
77 | long countVal=lines.count();
78 | //CountByKey:
79 | Map countByKeyMap= intDivMap.countByKey();
80 |
81 | countByKeyMap.forEach(new BiConsumer() {
82 |
83 | @Override
84 | public void accept( Double L, Long U ) {
85 | System.out.println("The key val is 1 ::"+L);
86 | System.out.println("The Long is 1 ::"+U);
87 | }
88 | });
89 |
90 |
91 | Map, Long> countByValMap= intDivMap.countByValue();
92 |
93 | countByValMap.forEach(new BiConsumer, Long>() {
94 |
95 | @Override
96 | public void accept( Tuple2 L, Long U ) {
97 | System.out.println("The touple val is 1 ::"+L._1());
98 | System.out.println("The touple val is 2 ::"+L._2());
99 | System.out.println("The Long is 1 ::"+U);
100 | }
101 | });
102 |
103 |
104 | //countByValue()
105 | Map countByVal=lines.countByValue();
106 | // max
107 | intMap.max();
108 |
109 | /* Comparator comp =new Comparator() {
110 |
111 | @Override
112 | public int compare(Double a, Double b) {
113 | // TODO Auto-generated method stub
114 | return a.compareTo(b);
115 | }
116 | };*/
117 |
118 | intMap.max(new doubleComparator());
119 |
120 | /* intMap.max(new Comparator() {
121 |
122 | @Override
123 | public int compare(Double a, Double b) {
124 | // TODO Auto-generated method stub
125 | return a.compareTo(b);
126 | }
127 | });
128 | */
129 | intMap.max(Comparator.naturalOrder());
130 | intMap.max(Comparator.reverseOrder());
131 | //////check this
132 | // intMap.max(Comparator.comparing(a->a));
133 |
134 | //min
135 | intMap.min();
136 | intMap.min(Comparator.naturalOrder());
137 |
138 | // First:
139 | System.out.println("The first element of RDD is"+ intMap.first());
140 |
141 |
142 |
143 | //take()
144 | List takeTwo=lines.take(2);
145 | takeTwo.forEach(x->System.out.println("The take elements are :: "+x));
146 |
147 | // TakeOrdered:
148 | List takeOrderedTwo= lines.takeOrdered(2);
149 | takeOrderedTwo.forEach(x->System.out.println("The takeOrdered elements are :: "+x));
150 |
151 |
152 | // takeOrdered(int num, java.util.Comparator comp)
153 | List takeCustomOrderedTwo= lines.takeOrdered(2, Comparator.reverseOrder());
154 | takeCustomOrderedTwo.forEach(x->System.out.println("The takeOrdered elements with custom Comparator are :: "+x));
155 |
156 |
157 |
158 | //TakeSample:
159 | intRDD.takeSample(true, 3).forEach(x-> System.out.println("The take sample vals for true are :"+x));
160 | intRDD.takeSample(false, 3).forEach(x-> System.out.println("The take sample vals for false are :"+x));
161 | intRDD.takeSample(true, 3,9).forEach(x-> System.out.println("The take sample vals with seed are :"+x));
162 |
163 | //top()
164 | List topFive=lines.top(5);
165 | topFive.forEach(x->System.out.println("The value of top are ::"+x));
166 |
167 | // top(int num, java.util.Comparator comp)
168 | // lines.top(3, Comparator.comparing(x->Integer.parseInt(x)));
169 |
170 | //reduce
171 | Function2 reduceSumFunc = (a, b) -> (Integer.parseInt(a) + Integer.parseInt(b));
172 | Double sumInt=intMap.reduce((a,b)->a+b);
173 |
174 |
175 | /* Integer sumInt=lines.reduce(new Function2(
176 | ) {
177 | @Override
178 | public Integer call(String a, String b) throws Exception {
179 | // TODO Auto-generated method stub
180 | return Integer.parseInt(a) + Integer.parseInt(b);
181 | }
182 | });*/
183 |
184 |
185 | //fold()
186 | Double foldInt=intMap.fold((double) 0, (a,b)-> a+b);
187 |
188 | //
189 | //Aggeregate:
190 | // ForEach:
191 | lines.foreach(s->System.out.println(s));
192 |
193 | // saveAsTextFile
194 | // saveAsObjectFile(String path)
195 | JavaRDD rdd = spark.parallelize(Lists.newArrayList("1", "2"));
196 | rdd.mapToPair(p -> new Tuple2<>(p, p)).saveAsObjectFile("objFileDir");
197 | JavaPairRDD pairRDD
198 | = JavaPairRDD.fromJavaRDD(spark.objectFile("objFileDir"));
199 | pairRDD.collect().forEach(System.out::println);
200 |
201 | }
202 |
203 | static class doubleComparator implements Comparator,Serializable{
204 |
205 | /**
206 | *
207 | */
208 | private static final long serialVersionUID = 1L;
209 |
210 | @Override
211 | public int compare(Double a, Double b) {
212 | // TODO Auto-generated method stub
213 | return a.compareTo(b);
214 | }
215 | }
216 |
217 | }
218 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/AggeregateExample.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.List;
6 |
7 | import org.apache.spark.api.java.JavaPairRDD;
8 | import org.apache.spark.api.java.JavaRDD;
9 | import org.apache.spark.api.java.JavaSparkContext;
10 | import org.apache.spark.api.java.function.Function2;
11 |
12 | import scala.Tuple2;
13 |
14 | public class AggeregateExample {
15 |
16 | public static void main(String[] args) {
17 | String master;
18 | if (args.length > 0) {
19 | master = args[0];
20 | } else {
21 | master = "local";
22 | }
23 |
24 | JavaSparkContext sc = new JavaSparkContext(
25 | master, "AggeregateExample");
26 | JavaRDD rdd = sc.parallelize(Arrays.asList(1, 2, 3,4,5),3);
27 | System.out.println("The no of partitions are ::"+rdd.getNumPartitions());
28 | //TODO print elements with partition with index mappationwithindex()
29 | Function2 agg=new Function2() {
30 | @Override
31 | public String call(String v1, Integer v2) throws Exception {
32 | return v1+v2;
33 | }
34 | } ;
35 |
36 | Function2 combineAgg=new Function2() {
37 | @Override
38 | public String call(String v1, String v2) throws Exception {
39 | return v1+v2;
40 | }
41 | };
42 |
43 |
44 | //String result= rdd.aggregate("X", agg, combineAgg);
45 | String result= rdd.aggregate("X", (x,y)->x+y, (x,z)->x+z);
46 | System.out.println("The aggerate value is ::"+result);
47 |
48 |
49 | int res= rdd.aggregate(3, (x,y)-> x>y?x:y, (w,z)->w>z?w:z);
50 | System.out.println("the res is ::"+res);
51 |
52 | List> listS = new ArrayList>();
53 | listS.add(new Tuple2("a", 1));
54 | listS.add(new Tuple2("b", 2));
55 | listS.add(new Tuple2("c", 3));
56 | listS.add(new Tuple2("a", 4));
57 |
58 | //
59 | JavaPairRDD R = sc.parallelizePairs(listS);
60 | List> es= R.aggregateByKey(1, (x,y)->x+y, (x,y)->x+y).collect();
61 |
62 | for (Tuple2 tuple2 : es) {
63 | System.out.println("the key is"+tuple2._1()+" and the val is ::"+tuple2._2());
64 | }
65 |
66 | }
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/JavaWordCount.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import scala.Tuple2;
4 |
5 | import org.apache.spark.api.java.JavaPairRDD;
6 | import org.apache.spark.api.java.JavaRDD;
7 | import org.apache.spark.api.java.function.FlatMapFunction;
8 | import org.apache.spark.api.java.function.Function2;
9 | import org.apache.spark.api.java.function.PairFunction;
10 | import org.apache.spark.sql.SparkSession;
11 |
12 | import java.util.Arrays;
13 | import java.util.Iterator;
14 | import java.util.List;
15 | import java.util.regex.Pattern;
16 |
17 | public final class JavaWordCount {
18 | private static final Pattern SPACE = Pattern.compile(" ");
19 |
20 | public static void main(String[] args) throws Exception {
21 |
22 | if (args.length < 1) {
23 | System.err.println("Usage: JavaWordCount ");
24 | System.exit(1);
25 | }
26 |
27 | SparkSession spark = SparkSession
28 | .builder()
29 | .appName("JavaWordCount")
30 | .getOrCreate();
31 |
32 | JavaRDD lines = spark.read().textFile(args[0]).javaRDD();
33 |
34 | JavaRDD words = lines.flatMap(new FlatMapFunction() {
35 | @Override
36 | public Iterator call(String s) {
37 | return Arrays.asList(SPACE.split(s)).iterator();
38 | }
39 | });
40 |
41 | JavaPairRDD ones = words.mapToPair(
42 | new PairFunction() {
43 | @Override
44 | public Tuple2 call(String s) {
45 | return new Tuple2<>(s, 1);
46 | }
47 | });
48 |
49 | JavaPairRDD counts = ones.reduceByKey(
50 | new Function2() {
51 | @Override
52 | public Integer call(Integer i1, Integer i2) {
53 | return i1 + i2;
54 | }
55 | });
56 |
57 | List> output = counts.collect();
58 | for (Tuple2,?> tuple : output) {
59 | System.out.println(tuple._1() + ": " + tuple._2());
60 | }
61 | spark.stop();
62 | }
63 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/PersistExample.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.packt.sfjd.ch4;
5 |
6 | import java.util.Arrays;
7 |
8 | import org.apache.log4j.Level;
9 | import org.apache.log4j.LogManager;
10 | import org.apache.log4j.Logger;
11 | import org.apache.spark.SparkConf;
12 | import org.apache.spark.api.java.JavaRDD;
13 | import org.apache.spark.api.java.JavaSparkContext;
14 | import org.apache.spark.api.java.function.VoidFunction;
15 | import org.apache.spark.sql.SparkSession;
16 | import org.apache.spark.sql.catalog.Function;
17 | import org.apache.spark.storage.StorageLevel;
18 |
19 | /**
20 | * @author sumit.kumar
21 | *
22 | */
23 | public class PersistExample {
24 |
25 | /**
26 | * @param args
27 | */
28 | public static void main(String[] args) {
29 | //C:\Users\sumit.kumar\Downloads\bin\warehouse
30 | //System.setProperty("hadoop.home.dir", "C:\\Users\\sumit.kumar\\Downloads");
31 | String logFile = "src/main/resources/Apology_by_Plato.txt"; // Should be some file on your system
32 | Logger rootLogger = LogManager.getRootLogger();
33 | rootLogger.setLevel(Level.WARN);
34 | SparkConf conf = new SparkConf().setMaster("local").setAppName("ActionExamples").set("spark.hadoop.validateOutputSpecs", "false");
35 | JavaSparkContext sparkContext = new JavaSparkContext(conf);
36 | JavaRDD rdd = sparkContext.parallelize(Arrays.asList(1, 2, 3,4,5),3).cache();
37 | JavaRDD evenRDD= rdd.filter(new org.apache.spark.api.java.function.Function() {
38 | @Override
39 | public Boolean call(Integer v1) throws Exception {
40 | return ((v1%2)==0)?true:false;
41 | }
42 | });
43 |
44 | evenRDD.persist(StorageLevel.MEMORY_AND_DISK());
45 | evenRDD.foreach(new VoidFunction() {
46 | @Override
47 | public void call(Integer t) throws Exception {
48 | System.out.println("The value of RDD are :"+t);
49 | }
50 | });
51 | //unpersisting the RDD
52 | evenRDD.unpersist();
53 | rdd.unpersist();
54 |
55 | /* JavaRDD lines = spark.read().textFile(logFile).javaRDD().cache();
56 | System.out.println("DEBUG: \n"+ lines.toDebugString());
57 | long word= lines.count();
58 | JavaRDD distinctLines=lines.distinct();
59 | System.out.println("DEBUG: \n"+ distinctLines.toDebugString());
60 | JavaRDD finalRdd=lines.subtract(distinctLines);
61 |
62 |
63 | System.out.println("DEBUG: \n"+ finalRdd.toDebugString());
64 | System.out.println("The count is "+word);
65 | System.out.println("The count is "+distinctLines.count());
66 | System.out.println("The count is "+finalRdd.count());
67 |
68 | finalRdd.foreach(new VoidFunction() {
69 |
70 | @Override
71 | public void call(String t) throws Exception {
72 | // TODO Auto-generated method stub
73 | System.out.println(t);
74 | }
75 | });
76 | */ /*SparkConf conf = new SparkConf().setAppName("Simple Application");
77 | JavaSparkContext sc = new JavaSparkContext(conf);
78 | StorageLevel newLevel;
79 | JavaRDD logData = sc.textFile(logFile).cache();
80 |
81 | long numAs = logData.filter(new Function(logFile, logFile, logFile, logFile, false) {
82 | public Boolean call(String s) { return s.contains("a"); }
83 | }).count();
84 |
85 | long numBs = logData.filter(new Function(logFile, logFile, logFile, logFile, false) {
86 | public Boolean call(String s) { return s.contains("b"); }
87 | }).count();
88 |
89 | System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
90 |
91 | sc.stop();*/
92 |
93 | }
94 |
95 | }
96 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/SparkWordCount.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import java.io.File;
4 | import java.util.Arrays;
5 |
6 | import org.apache.commons.io.FileUtils;
7 | import org.apache.spark.api.java.JavaPairRDD;
8 | import org.apache.spark.api.java.JavaRDD;
9 | import org.apache.spark.api.java.JavaSparkContext;
10 |
11 | import scala.Tuple2;
12 | //http://stackoverflow.com/questions/19620642/failed-to-locate-the-winutils-binary-in-the-hadoop-binary-path
13 |
14 | //http://www.javaworld.com/article/2972863/big-data/open-source-java-projects-apache-spark.html
15 |
16 | public class SparkWordCount {
17 | public static void main(String[] args) throws Exception {
18 | System.out.println(System.getProperty("hadoop.home.dir"));
19 | String inputPath = args[0];
20 | String outputPath = args[1];
21 | FileUtils.deleteQuietly(new File(outputPath));
22 |
23 | JavaSparkContext sc = new JavaSparkContext("local", "sparkwordcount");
24 |
25 | JavaRDD rdd = sc.textFile(inputPath);
26 |
27 | JavaPairRDD counts = rdd
28 | .flatMap(x -> Arrays.asList(x.split(" ")).iterator())
29 | .mapToPair(x -> new Tuple2((String) x, 1))
30 | .reduceByKey((x, y) -> x + y);
31 |
32 | counts.saveAsTextFile(outputPath);
33 | sc.close();
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/SparkWordCount_1_7.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import java.io.File;
4 | import java.util.Arrays;
5 | import java.util.Iterator;
6 |
7 | import org.apache.commons.io.FileUtils;
8 | import org.apache.spark.api.java.JavaPairRDD;
9 | import org.apache.spark.api.java.JavaRDD;
10 | import org.apache.spark.api.java.JavaSparkContext;
11 | import org.apache.spark.api.java.function.FlatMapFunction;
12 | import org.apache.spark.api.java.function.Function2;
13 | import org.apache.spark.api.java.function.PairFunction;
14 |
15 | import scala.Tuple2;
16 |
17 | public class SparkWordCount_1_7 {
18 | public static void main(String[] args) throws Exception {
19 | System.out.println(System.getProperty("hadoop.home.dir"));
20 | String inputPath = args[0];
21 | String outputPath = args[1];
22 | FileUtils.deleteQuietly(new File(outputPath));
23 |
24 | JavaSparkContext sc = new JavaSparkContext("local", "sparkwordcount1.7");
25 |
26 | JavaRDD rdd = sc.textFile(inputPath);
27 |
28 | JavaPairRDD counts = rdd
29 | .flatMap(new FlatMapFunction() {
30 | public Iterator call(String x) {
31 | return (Iterator) Arrays.asList(x.split(" "));
32 | }
33 | }).mapToPair(new PairFunction() {
34 | public Tuple2 call(String x) {
35 | return new Tuple2(x, 1);
36 | }
37 | }).reduceByKey(new Function2() {
38 | public Integer call(Integer x, Integer y) {
39 | return x + y;
40 | }
41 | });
42 | counts.saveAsTextFile(outputPath);
43 | sc.close();
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/WordCount.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4;
2 |
3 | import org.apache.spark.SparkConf;
4 | import org.apache.spark.api.java.JavaPairRDD;
5 | import org.apache.spark.api.java.JavaRDD;
6 | import org.apache.spark.api.java.JavaSparkContext;
7 | import org.apache.spark.api.java.function.FlatMapFunction;
8 | import org.apache.spark.api.java.function.Function2;
9 | import org.apache.spark.api.java.function.PairFunction;
10 |
11 | import scala.Tuple2;
12 |
13 | import java.util.Arrays;
14 | import java.util.Iterator;
15 |
16 | /**
17 | * Sample Spark application that counts the words in a text file
18 | */
19 | public class WordCount
20 | {
21 |
22 | public static void wordCountJava7( String filename )
23 | {
24 | // Define a configuration to use to interact with Spark
25 | SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App");
26 |
27 | // Create a Java version of the Spark Context from the configuration
28 | JavaSparkContext sc = new JavaSparkContext(conf);
29 |
30 | // Load the input data, which is a text file read from the command line
31 | JavaRDD input = sc.textFile( filename );
32 |
33 | // Java 7 and earlier
34 | JavaRDD words = input.flatMap(
35 | new FlatMapFunction() {
36 | public Iterator call(String s) {
37 | return (Iterator) Arrays.asList(s.split(" "));
38 | }
39 | } );
40 |
41 | // Java 7 and earlier: transform the collection of words into pairs (word and 1)
42 | JavaPairRDD counts = words.mapToPair(
43 | new PairFunction(){
44 | public Tuple2 call(String s){
45 | return new Tuple2(s, 1);
46 | }
47 | } );
48 |
49 | // Java 7 and earlier: count the words
50 | JavaPairRDD reducedCounts = counts.reduceByKey(
51 | new Function2(){
52 | public Integer call(Integer x, Integer y){ return x + y; }
53 | } );
54 |
55 | // Save the word count back out to a text file, causing evaluation.
56 | reducedCounts.saveAsTextFile( "output" );
57 | }
58 |
59 | public static void wordCountJava8( String filename )
60 | {
61 | // Define a configuration to use to interact with Spark
62 | SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App");
63 |
64 | // Create a Java version of the Spark Context from the configuration
65 | JavaSparkContext sc = new JavaSparkContext(conf);
66 |
67 | // Load the input data, which is a text file read from the command line
68 | JavaRDD input = sc.textFile( filename );
69 |
70 | // Java 8 with lambdas: split the input string into words
71 | // TODO here a change has happened
72 | JavaRDD words = input.flatMap( s -> Arrays.asList( s.split( " " ) ).iterator() );
73 |
74 | // Java 8 with lambdas: transform the collection of words into pairs (word and 1) and then count them
75 | JavaPairRDD counts = words.mapToPair( t -> new Tuple2( t, 1 ) ).reduceByKey( (x, y) -> (int)x + (int)y );
76 |
77 | // Save the word count back out to a text file, causing evaluation.
78 | counts.saveAsTextFile( "output" );
79 | }
80 |
81 | public static void main( String[] args )
82 | {
83 | if( args.length == 0 )
84 | {
85 | System.out.println( "Usage: WordCount " );
86 | System.exit( 0 );
87 | }
88 |
89 | wordCountJava8( args[ 0 ] );
90 | }
91 | }
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/transformations/Test.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4.transformations;
2 |
3 | import java.io.Serializable;
4 |
5 | public class Test implements Serializable{//implements Comparable,Serializable{
6 |
7 | Test(int age)
8 | {
9 | this.age=age;
10 | }
11 | private int age;
12 |
13 |
14 | public int getAge() {
15 | return age;
16 | }
17 |
18 |
19 | public void setAge(int age) {
20 | this.age = age;
21 | }
22 |
23 |
24 | // @Override
25 | // public int compareTo(Test o) {
26 | //
27 | // return this.getAge()-o.getAge();
28 | // }
29 |
30 |
31 | @Override
32 | public String toString() {
33 | return "Test [age=" + age + "]";
34 | }
35 |
36 |
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/transformations/TestMain.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4.transformations;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Collections;
5 | import java.util.List;
6 |
7 | public class TestMain {
8 | public static void main(String[] args) {
9 | List list =new ArrayList<>();
10 | list.add(new Test(5));
11 | list.add(new Test(3));
12 | list.add(new Test(6));
13 |
14 | //Collections.sort(list);
15 | list.forEach(t -> System.out.println(t.getAge()));
16 |
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/com/packt/sfjd/ch4/transformations/Transformations.java:
--------------------------------------------------------------------------------
1 | package com.packt.sfjd.ch4.transformations;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Arrays;
5 | import java.util.Iterator;
6 | import java.util.List;
7 | import java.util.stream.Collector;
8 | import java.util.stream.Collectors;
9 |
10 | import org.apache.spark.Partitioner;
11 | import org.apache.spark.SparkConf;
12 | import org.apache.spark.SparkContext;
13 | import org.apache.spark.api.java.JavaPairRDD;
14 | import org.apache.spark.api.java.JavaRDD;
15 | import org.apache.spark.api.java.JavaSparkContext;
16 | import org.apache.spark.api.java.function.Function;
17 | import org.apache.spark.api.java.function.Function2;
18 | import org.apache.spark.rdd.RDD;
19 |
20 | import scala.Tuple2;
21 |
22 | public class Transformations {
23 | public static void main(String[] args) {
24 | SparkConf conf = new SparkConf().setMaster("local").setAppName("ApacheSparkForJavaDevelopers");
25 | // SparkContext context =new SparkContext(conf);
26 | // RDD textFile = context.textFile("abc", 1);
27 |
28 | JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
29 |
30 | List intList = Arrays.asList(1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
31 |
32 | JavaRDD intRDD = javaSparkContext.parallelize(intList, 2);
33 | // intRDD.repartition(2);
34 |
35 | // Map Transformation
36 | JavaRDD mappedRDD = intRDD.map(x -> x + 1);
37 |
38 | // Map with partitions
39 | JavaRDD mapPartitions = intRDD.mapPartitions(iterator -> {
40 | int sum = 0;
41 | while (iterator.hasNext()) {
42 | sum += iterator.next();
43 | }
44 | return Arrays.asList(sum).iterator();
45 | });
46 |
47 | // map partitions with index
48 | JavaRDD mapPartitionsWithIndex = intRDD
49 | .mapPartitionsWithIndex(new Function2, Iterator>() {
50 |
51 | /**
52 | *
53 | */
54 | private static final long serialVersionUID = 739746028261776589L;
55 |
56 | @Override
57 | public Iterator call(Integer index, Iterator iterator) throws Exception {
58 | int sum = 0;
59 | while (iterator.hasNext()) {
60 | sum += iterator.next();
61 | }
62 | return Arrays.asList(index + ":" + sum).iterator();
63 | }
64 | }, true);
65 |
66 | // filter RDD
67 | JavaRDD filter = intRDD.filter(x -> (x % 2 == 0));
68 |
69 | JavaRDD stringRDD = javaSparkContext.parallelize(Arrays.asList("Hello Spark", "Hello Java"));
70 |
71 | // flat map
72 |
73 | JavaRDD flatMap = stringRDD.flatMap(t -> Arrays.asList(t.split(" ")).iterator());
74 | // map to pair
75 |
76 | JavaPairRDD pairRDD = intRDD.mapToPair(
77 | i -> (i % 2 == 0) ? new Tuple2("even", i) : new Tuple2("odd", i));
78 |
79 | // flat map to pair
80 |
81 | JavaPairRDD flatMapToPair = stringRDD.flatMapToPair(s -> Arrays.asList(s.split(" ")).stream()
82 | .map(token -> new Tuple2(token, token.length())).collect(Collectors.toList())
83 | .iterator());
84 | // List> list =new ArrayList<>();
85 | // for (String token : s.split(" ")) {
86 | // list.add(new Tuple2(token, token.length()));
87 | //
88 | // }
89 | // return list.iterator();
90 |
91 | // sample
92 | JavaRDD sample = intRDD.sample(true, 2);
93 |
94 | // union
95 | JavaRDD