() {
35 | @Override
36 | public Double map(Integer count) throws Exception {
37 | return count / (double) 10000 * 4;
38 | }
39 | }).print();
40 |
41 |
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/chapter03/flink-batch/src/main/java/com/demo/flink/batch/OlympicsAthletesBatchJob.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 |
4 | import org.apache.flink.api.java.DataSet;
5 | import org.apache.flink.api.java.ExecutionEnvironment;
6 | import org.apache.flink.api.common.functions.FlatMapFunction;
7 | import org.apache.flink.api.java.tuple.Tuple2;
8 |
9 | import org.apache.flink.util.Collector;
10 |
11 | /**
12 | * Implements the Oylympics Athletes program that gives insights about games played and medals won.
13 | *
14 | * Sample input file is provided in src/main/resources/data folder
15 | *
16 | * This example shows how to:
17 | *
18 | * - write a simple Flink batch program.
19 | *
- use Tuple data types.
20 | *
- write and use user-defined functions.
21 | *
22 | *
23 | */
24 | public class OlympicsAthletesBatchJob {
25 |
26 |
27 | public static void main(String[] args) throws Exception {
28 |
29 | // set up the execution environment
30 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
31 |
32 | DataSet csvInput = env.readCsvFile("olympic-athletes.csv")
33 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
34 |
35 | DataSet> groupedByCountry = csvInput
36 | .flatMap(new FlatMapFunction>() {
37 |
38 | private static final long serialVersionUID = 1L;
39 |
40 | @Override
41 | public void flatMap(Record record, Collector> out) throws Exception {
42 |
43 | out.collect(new Tuple2(record.getCountry(), 1));
44 | }
45 | }).groupBy(0).sum(1);
46 | groupedByCountry.print();
47 |
48 | DataSet> groupedByGame = csvInput
49 | .flatMap(new FlatMapFunction>() {
50 |
51 | private static final long serialVersionUID = 1L;
52 |
53 | @Override
54 | public void flatMap(Record record, Collector> out) throws Exception {
55 |
56 | out.collect(new Tuple2(record.getGame(), 1));
57 | }
58 | }).groupBy(0).sum(1);
59 | groupedByGame.print();
60 |
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/chapter03/flink-batch/src/main/java/com/demo/flink/batch/Record.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | public class Record {
4 |
5 | private String playerName;
6 | private String country;
7 | private int year;
8 | private String game;
9 | private int gold;
10 | private int silver;
11 | private int bronze;
12 | private int total;
13 |
14 | public String getPlayerName() {
15 | return playerName;
16 | }
17 |
18 | public void setPlayerName(String playerName) {
19 | this.playerName = playerName;
20 | }
21 |
22 | public String getCountry() {
23 | return country;
24 | }
25 |
26 | public void setCountry(String country) {
27 | this.country = country;
28 | }
29 |
30 | public int getYear() {
31 | return year;
32 | }
33 |
34 | public void setYear(int year) {
35 | this.year = year;
36 | }
37 |
38 | public String getGame() {
39 | return game;
40 | }
41 |
42 | public void setGame(String game) {
43 | this.game = game;
44 | }
45 |
46 | public int getGold() {
47 | return gold;
48 | }
49 |
50 | public void setGold(int gold) {
51 | this.gold = gold;
52 | }
53 |
54 | public int getSilver() {
55 | return silver;
56 | }
57 |
58 | public void setSilver(int silver) {
59 | this.silver = silver;
60 | }
61 |
62 | public int getBronze() {
63 | return bronze;
64 | }
65 |
66 | public void setBronze(int bronze) {
67 | this.bronze = bronze;
68 | }
69 |
70 | public int getTotal() {
71 | return total;
72 | }
73 |
74 | public void setTotal(int total) {
75 | this.total = total;
76 | }
77 |
78 | @Override
79 | public String toString() {
80 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game
81 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]";
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/chapter03/flink-batch/src/main/resources/data/olympic-athletes.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter03/flink-batch/src/main/resources/data/olympic-athletes.csv
--------------------------------------------------------------------------------
/chapter03/flink-batch/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/chapter04/flink-table/pom.xml:
--------------------------------------------------------------------------------
1 |
11 |
13 | 4.0.0
14 |
15 | com.demo
16 | flink-table
17 | 1.0
18 | jar
19 |
20 | Flink Quickstart Job
21 | http://www.myorganization.org
22 |
23 |
24 | UTF-8
25 | 1.1.4
26 | 1.7.7
27 | 1.2.17
28 |
29 |
30 |
31 |
32 | apache.snapshots
33 | Apache Development Snapshot Repository
34 | https://repository.apache.org/content/repositories/snapshots/
35 |
36 | false
37 |
38 |
39 | true
40 |
41 |
42 |
43 |
44 |
55 |
56 |
57 |
58 |
59 | org.apache.flink
60 | flink-java
61 | ${flink.version}
62 |
63 |
64 | org.apache.flink
65 | flink-streaming-java_2.11
66 | ${flink.version}
67 |
68 |
69 | org.apache.flink
70 | flink-clients_2.11
71 | ${flink.version}
72 |
73 |
74 |
75 | org.apache.flink
76 | flink-table_2.11
77 | ${flink.version}
78 |
79 |
80 |
81 |
83 |
84 | org.slf4j
85 | slf4j-log4j12
86 | ${slf4j.version}
87 |
88 |
89 | log4j
90 | log4j
91 | ${log4j.version}
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | build-jar
100 |
101 |
102 | false
103 |
104 |
105 |
106 |
107 | org.apache.flink
108 | flink-java
109 | ${flink.version}
110 | provided
111 |
112 |
113 | org.apache.flink
114 | flink-streaming-java_2.11
115 | ${flink.version}
116 | provided
117 |
118 |
119 | org.apache.flink
120 | flink-clients_2.11
121 | ${flink.version}
122 | provided
123 |
124 |
125 | org.slf4j
126 | slf4j-log4j12
127 | ${slf4j.version}
128 | provided
129 |
130 |
131 | log4j
132 | log4j
133 | ${log4j.version}
134 | provided
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 | org.apache.maven.plugins
143 | maven-shade-plugin
144 | 2.4.1
145 |
146 |
147 | package
148 |
149 | shade
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
170 |
171 | org.apache.maven.plugins
172 | maven-shade-plugin
173 | 2.4.1
174 |
175 |
176 |
177 | package
178 |
179 | shade
180 |
181 |
182 |
183 |
184 |
186 | org.apache.flink:flink-annotations
187 | org.apache.flink:flink-shaded-hadoop2
188 | org.apache.flink:flink-shaded-curator-recipes
189 | org.apache.flink:flink-core
190 | org.apache.flink:flink-java
191 | org.apache.flink:flink-scala_2.11
192 | org.apache.flink:flink-runtime_2.11
193 | org.apache.flink:flink-optimizer_2.11
194 | org.apache.flink:flink-clients_2.11
195 | org.apache.flink:flink-avro_2.11
196 | org.apache.flink:flink-examples-batch_2.11
197 | org.apache.flink:flink-examples-streaming_2.11
198 | org.apache.flink:flink-streaming-java_2.11
199 | org.apache.flink:flink-streaming-scala_2.11
200 | org.apache.flink:flink-scala-shell_2.11
201 | org.apache.flink:flink-python
202 | org.apache.flink:flink-metrics-core
203 | org.apache.flink:flink-metrics-jmx
204 | org.apache.flink:flink-statebackend-rocksdb_2.11
205 |
206 |
209 |
210 | log4j:log4j
211 | org.scala-lang:scala-library
212 | org.scala-lang:scala-compiler
213 | org.scala-lang:scala-reflect
214 | com.data-artisans:flakka-actor_*
215 | com.data-artisans:flakka-remote_*
216 | com.data-artisans:flakka-slf4j_*
217 | io.netty:netty-all
218 | io.netty:netty
219 | commons-fileupload:commons-fileupload
220 | org.apache.avro:avro
221 | commons-collections:commons-collections
222 | org.codehaus.jackson:jackson-core-asl
223 | org.codehaus.jackson:jackson-mapper-asl
224 | com.thoughtworks.paranamer:paranamer
225 | org.xerial.snappy:snappy-java
226 | org.apache.commons:commons-compress
227 | org.tukaani:xz
228 | com.esotericsoftware.kryo:kryo
229 | com.esotericsoftware.minlog:minlog
230 | org.objenesis:objenesis
231 | com.twitter:chill_*
232 | com.twitter:chill-java
233 | commons-lang:commons-lang
234 | junit:junit
235 | org.apache.commons:commons-lang3
236 | org.slf4j:slf4j-api
237 | org.slf4j:slf4j-log4j12
238 | log4j:log4j
239 | org.apache.commons:commons-math
240 | org.apache.sling:org.apache.sling.commons.json
241 | commons-logging:commons-logging
242 | commons-codec:commons-codec
243 | com.fasterxml.jackson.core:jackson-core
244 | com.fasterxml.jackson.core:jackson-databind
245 | com.fasterxml.jackson.core:jackson-annotations
246 | stax:stax-api
247 | com.typesafe:config
248 | org.uncommons.maths:uncommons-maths
249 | com.github.scopt:scopt_*
250 | commons-io:commons-io
251 | commons-cli:commons-cli
252 |
253 |
254 |
255 |
256 | org.apache.flink:*
257 |
258 |
259 | org/apache/flink/shaded/com/**
260 | web-docs/**
261 |
262 |
263 |
264 |
266 | *:*
267 |
268 | META-INF/*.SF
269 | META-INF/*.DSA
270 | META-INF/*.RSA
271 |
272 |
273 |
274 |
276 |
278 | false
279 |
280 |
281 |
282 |
283 |
284 |
285 | org.apache.maven.plugins
286 | maven-compiler-plugin
287 | 3.1
288 |
289 | 1.7
290 | 1.7
291 |
292 |
293 |
294 |
295 |
296 |
298 |
312 |
313 |
314 |
315 |
--------------------------------------------------------------------------------
/chapter04/flink-table/src/main/java/com/demo/flink/table/BatchJob.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.table;
2 |
3 | import org.apache.flink.api.java.DataSet;
4 | import org.apache.flink.api.java.ExecutionEnvironment;
5 | import org.apache.flink.api.java.table.BatchTableEnvironment;
6 | import org.apache.flink.api.table.Table;
7 | import org.apache.flink.api.table.TableEnvironment;
8 |
9 | public class BatchJob {
10 |
11 | public static void main(String[] args) throws Exception {
12 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
13 | BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
14 |
15 | DataSet csvInput = env
16 | .readCsvFile("D://NOTBACKEDUP//dataflow//flink-table//src//main//resources//data//olympic-athletes.csv")
17 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
18 | // register the DataSet athletes as table "athletes" with fields derived
19 | // from the dataset
20 | Table atheltes = tableEnv.fromDataSet(csvInput);
21 | tableEnv.registerTable("athletes", atheltes);
22 | // run a SQL query on the Table and retrieve the result as a new Table
23 | Table groupedByCountry = tableEnv.sql("SELECT country, SUM(total) as frequency FROM athletes group by country");
24 |
25 | DataSet result = tableEnv.toDataSet(groupedByCountry, Result.class);
26 |
27 | result.print();
28 |
29 | Table groupedByGame = atheltes.groupBy("game").select("game, total.sum as frequency");
30 |
31 | DataSet gameResult = tableEnv.toDataSet(groupedByGame, GameResult.class);
32 |
33 | gameResult.print();
34 |
35 | }
36 |
37 | public static class Result {
38 | public String country;
39 | public Integer frequency;
40 |
41 | public Result() {
42 | super();
43 | }
44 |
45 | public Result(String country, Integer total) {
46 | this.country = country;
47 | this.frequency = total;
48 | }
49 |
50 | @Override
51 | public String toString() {
52 | return "Result " + country + " " + frequency;
53 | }
54 | }
55 |
56 | public static class GameResult {
57 | public String game;
58 | public Integer frequency;
59 |
60 | public GameResult(String game, Integer frequency) {
61 | super();
62 | this.game = game;
63 | this.frequency = frequency;
64 | }
65 |
66 | public GameResult() {
67 | super();
68 | }
69 |
70 | @Override
71 | public String toString() {
72 | return "GameResult [game=" + game + ", frequency=" + frequency + "]";
73 | }
74 |
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/chapter04/flink-table/src/main/java/com/demo/flink/table/Record.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.table;
2 |
3 | public class Record {
4 |
5 | private String playerName;
6 | private String country;
7 | private int year;
8 | private String game;
9 | private int gold;
10 | private int silver;
11 | private int bronze;
12 | private int total;
13 |
14 | public String getPlayerName() {
15 | return playerName;
16 | }
17 |
18 | public void setPlayerName(String playerName) {
19 | this.playerName = playerName;
20 | }
21 |
22 | public String getCountry() {
23 | return country;
24 | }
25 |
26 | public void setCountry(String country) {
27 | this.country = country;
28 | }
29 |
30 | public int getYear() {
31 | return year;
32 | }
33 |
34 | public void setYear(int year) {
35 | this.year = year;
36 | }
37 |
38 | public String getGame() {
39 | return game;
40 | }
41 |
42 | public void setGame(String game) {
43 | this.game = game;
44 | }
45 |
46 | public int getGold() {
47 | return gold;
48 | }
49 |
50 | public void setGold(int gold) {
51 | this.gold = gold;
52 | }
53 |
54 | public int getSilver() {
55 | return silver;
56 | }
57 |
58 | public void setSilver(int silver) {
59 | this.silver = silver;
60 | }
61 |
62 | public int getBronze() {
63 | return bronze;
64 | }
65 |
66 | public void setBronze(int bronze) {
67 | this.bronze = bronze;
68 | }
69 |
70 | public int getTotal() {
71 | return total;
72 | }
73 |
74 | public void setTotal(int total) {
75 | this.total = total;
76 | }
77 |
78 | @Override
79 | public String toString() {
80 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game
81 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]";
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/chapter04/flink-table/src/main/resources/data/olympic-athletes.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter04/flink-table/src/main/resources/data/olympic-athletes.csv
--------------------------------------------------------------------------------
/chapter04/flink-table/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/chapter05/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.demo
6 | chapter05
7 | 1.0
8 | jar
9 |
10 | chapter05
11 | http://maven.apache.org
12 |
13 |
14 | UTF-8
15 |
16 |
17 |
18 |
19 | junit
20 | junit
21 | 3.8.1
22 | test
23 |
24 |
25 |
26 | org.apache.flink
27 | flink-cep-scala_2.10
28 | 1.1.2
29 |
30 |
31 |
32 | org.apache.flink
33 | flink-streaming-java_2.10
34 | 1.1.2
35 |
36 |
37 |
38 | org.apache.flink
39 | flink-streaming-scala_2.10
40 | 1.1.2
41 |
42 |
43 | org.apache.flink
44 | flink-connector-kafka-0.9_2.10
45 | 1.0.0
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/Alert.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | public class Alert {
4 |
5 | private String message;
6 |
7 | public String getMessage() {
8 | return message;
9 | }
10 |
11 | public void setMessage(String message) {
12 | this.message = message;
13 | }
14 |
15 | public Alert(String message) {
16 | super();
17 | this.message = message;
18 | }
19 |
20 | @Override
21 | public String toString() {
22 | return "Alert [message=" + message + "]";
23 | }
24 |
25 | @Override
26 | public int hashCode() {
27 | final int prime = 31;
28 | int result = 1;
29 | result = prime * result + ((message == null) ? 0 : message.hashCode());
30 | return result;
31 | }
32 |
33 | @Override
34 | public boolean equals(Object obj) {
35 | if (this == obj)
36 | return true;
37 | if (obj == null)
38 | return false;
39 | if (getClass() != obj.getClass())
40 | return false;
41 | Alert other = (Alert) obj;
42 | if (message == null) {
43 | if (other.message != null)
44 | return false;
45 | } else if (!message.equals(other.message))
46 | return false;
47 | return true;
48 | }
49 |
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/App.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | import java.util.Map;
4 |
5 | import org.apache.flink.api.common.functions.FilterFunction;
6 | import org.apache.flink.cep.CEP;
7 | import org.apache.flink.cep.PatternSelectFunction;
8 | import org.apache.flink.cep.pattern.Pattern;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.time.Time;
12 |
13 | public class App {
14 | public static void main(String[] args) throws Exception {
15 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
16 | DataStream inputEventStream = env.fromElements(new TemperatureEvent("xyz", 22.0),
17 | new TemperatureEvent("xyz", 20.1), new TemperatureEvent("xyz", 21.1), new TemperatureEvent("xyz", 22.2),
18 | new TemperatureEvent("xyz", 29.1), new TemperatureEvent("xyz", 22.3), new TemperatureEvent("xyz", 22.1),
19 | new TemperatureEvent("xyz", 22.4), new TemperatureEvent("xyz", 22.7),
20 | new TemperatureEvent("xyz", 27.0));
21 |
22 | Pattern warningPattern = Pattern. begin("first")
23 | .subtype(TemperatureEvent.class).where(new FilterFunction() {
24 | private static final long serialVersionUID = 1L;
25 |
26 | public boolean filter(TemperatureEvent value) {
27 | if (value.getTemperature() >= 26.0) {
28 | return true;
29 | }
30 | return false;
31 | }
32 | }).within(Time.seconds(10));
33 |
34 | DataStream patternStream = CEP.pattern(inputEventStream, warningPattern)
35 | .select(new PatternSelectFunction() {
36 | private static final long serialVersionUID = 1L;
37 |
38 | public Alert select(Map event) throws Exception {
39 |
40 | return new Alert("Temperature Rise Detected");
41 | }
42 |
43 | });
44 |
45 | patternStream.print();
46 | env.execute("CEP on Temperature Sensor");
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/EventDeserializationSchema.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | import java.io.IOException;
4 | import java.nio.charset.StandardCharsets;
5 |
6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
7 | import org.apache.flink.api.java.typeutils.TypeExtractor;
8 | import org.apache.flink.streaming.util.serialization.DeserializationSchema;
9 |
10 | public class EventDeserializationSchema implements DeserializationSchema {
11 |
12 | public TypeInformation getProducedType() {
13 | return TypeExtractor.getForClass(TemperatureEvent.class);
14 | }
15 |
16 | public TemperatureEvent deserialize(byte[] arg0) throws IOException {
17 | String str = new String(arg0, StandardCharsets.UTF_8);
18 |
19 | String[] parts = str.split("=");
20 | return new TemperatureEvent(parts[0], Double.parseDouble(parts[1]));
21 | }
22 |
23 | public boolean isEndOfStream(TemperatureEvent arg0) {
24 | return false;
25 | }
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/KafkaApp.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | import java.util.Map;
4 | import java.util.Properties;
5 |
6 | import org.apache.flink.api.common.functions.FilterFunction;
7 | import org.apache.flink.cep.CEP;
8 | import org.apache.flink.cep.PatternSelectFunction;
9 | import org.apache.flink.cep.pattern.Pattern;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09;
14 |
15 | public class KafkaApp {
16 | public static void main(String[] args) throws Exception {
17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 |
19 | Properties properties = new Properties();
20 | properties.setProperty("bootstrap.servers", "localhost:9092");
21 | properties.setProperty("group.id", "test");
22 |
23 | DataStream inputEventStream = env.addSource(
24 | new FlinkKafkaConsumer09("test", new EventDeserializationSchema(), properties));
25 |
26 | Pattern warningPattern = Pattern. begin("first")
27 | .subtype(TemperatureEvent.class).where(new FilterFunction() {
28 | private static final long serialVersionUID = 1L;
29 |
30 | public boolean filter(TemperatureEvent value) {
31 | if (value.getTemperature() >= 26.0) {
32 | return true;
33 | }
34 | return false;
35 | }
36 | }).within(Time.seconds(10));
37 |
38 | DataStream patternStream = CEP.pattern(inputEventStream, warningPattern)
39 | .select(new PatternSelectFunction() {
40 | private static final long serialVersionUID = 1L;
41 |
42 | public Alert select(Map event) throws Exception {
43 |
44 | return new Alert("Temperature Rise Detected:" + event.get("first").getTemperature()
45 | + " on machine name:" + event.get("first").getMachineName());
46 | }
47 |
48 | });
49 |
50 | patternStream.print();
51 | env.execute("CEP on Temperature Sensor");
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/MonitoringEvent.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | public abstract class MonitoringEvent {
4 |
5 | private String machineName;
6 |
7 | public String getMachineName() {
8 | return machineName;
9 | }
10 |
11 | public void setMachineName(String machineName) {
12 | this.machineName = machineName;
13 | }
14 |
15 | @Override
16 | public int hashCode() {
17 | final int prime = 31;
18 | int result = 1;
19 | result = prime * result + ((machineName == null) ? 0 : machineName.hashCode());
20 | return result;
21 | }
22 |
23 | @Override
24 | public boolean equals(Object obj) {
25 | if (this == obj)
26 | return true;
27 | if (obj == null)
28 | return false;
29 | if (getClass() != obj.getClass())
30 | return false;
31 | MonitoringEvent other = (MonitoringEvent) obj;
32 | if (machineName == null) {
33 | if (other.machineName != null)
34 | return false;
35 | } else if (!machineName.equals(other.machineName))
36 | return false;
37 | return true;
38 | }
39 |
40 | public MonitoringEvent(String machineName) {
41 | super();
42 | this.machineName = machineName;
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/chapter05/src/main/java/com/demo/chapter05/TemperatureEvent.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | public class TemperatureEvent extends MonitoringEvent {
4 |
5 | public TemperatureEvent(String machineName) {
6 | super(machineName);
7 | }
8 |
9 | private double temperature;
10 |
11 | public double getTemperature() {
12 | return temperature;
13 | }
14 |
15 | public void setTemperature(double temperature) {
16 | this.temperature = temperature;
17 | }
18 |
19 | @Override
20 | public int hashCode() {
21 | final int prime = 31;
22 | int result = super.hashCode();
23 | long temp;
24 | temp = Double.doubleToLongBits(temperature);
25 | result = prime * result + (int) (temp ^ (temp >>> 32));
26 | return result;
27 | }
28 |
29 | @Override
30 | public boolean equals(Object obj) {
31 | if (this == obj)
32 | return true;
33 | if (!super.equals(obj))
34 | return false;
35 | if (getClass() != obj.getClass())
36 | return false;
37 | TemperatureEvent other = (TemperatureEvent) obj;
38 | if (Double.doubleToLongBits(temperature) != Double.doubleToLongBits(other.temperature))
39 | return false;
40 | return true;
41 | }
42 |
43 | public TemperatureEvent(String machineName, double temperature) {
44 | super(machineName);
45 | this.temperature = temperature;
46 | }
47 |
48 | @Override
49 | public String toString() {
50 | return "TemperatureEvent [getTemperature()=" + getTemperature() + ", getMachineName()=" + getMachineName()
51 | + "]";
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/chapter05/src/test/java/com/demo/chapter05/AppTest.java:
--------------------------------------------------------------------------------
1 | package com.demo.chapter05;
2 |
3 | import junit.framework.Test;
4 | import junit.framework.TestCase;
5 | import junit.framework.TestSuite;
6 |
7 | /**
8 | * Unit test for simple App.
9 | */
10 | public class AppTest
11 | extends TestCase
12 | {
13 | /**
14 | * Create the test case
15 | *
16 | * @param testName name of the test case
17 | */
18 | public AppTest( String testName )
19 | {
20 | super( testName );
21 | }
22 |
23 | /**
24 | * @return the suite of tests being tested
25 | */
26 | public static Test suite()
27 | {
28 | return new TestSuite( AppTest.class );
29 | }
30 |
31 | /**
32 | * Rigourous Test :-)
33 | */
34 | public void testApp()
35 | {
36 | assertTrue( true );
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/pom.xml:
--------------------------------------------------------------------------------
1 |
11 |
13 | 4.0.0
14 |
15 | com.demo
16 | flink-ml
17 | 1.0
18 | jar
19 |
20 | Flink Quickstart Job
21 | http://www.myorganization.org
22 |
23 |
24 |
25 | apache.snapshots
26 | Apache Development Snapshot Repository
27 | https://repository.apache.org/content/repositories/snapshots/
28 |
29 | false
30 |
31 |
32 | true
33 |
34 |
35 |
36 |
37 |
38 | UTF-8
39 | 1.1.4
40 |
41 |
42 |
53 |
54 |
55 |
56 | org.apache.flink
57 | flink-scala_2.11
58 | ${flink.version}
59 |
60 |
61 | org.apache.flink
62 | flink-streaming-scala_2.11
63 | ${flink.version}
64 |
65 |
66 | org.apache.flink
67 | flink-clients_2.11
68 | ${flink.version}
69 |
70 |
71 | org.apache.flink
72 | flink-ml_2.11
73 | ${flink.version}
74 |
75 |
76 |
77 |
81 |
82 |
83 |
87 |
88 | org.apache.maven.plugins
89 | maven-shade-plugin
90 | 2.4.1
91 |
92 |
93 |
94 | package
95 |
96 | shade
97 |
98 |
99 |
100 |
101 |
103 | org.apache.flink:flink-shaded-*_2.11
104 | org.apache.flink:flink-core_2.11
105 | org.apache.flink:flink-java_2.11
106 | org.apache.flink:flink-scala_2.11
107 | org.apache.flink:flink-runtime_2.11
108 | org.apache.flink:flink-optimizer_2.11
109 | org.apache.flink:flink-clients_2.11
110 | org.apache.flink:flink-avro_2.11
111 | org.apache.flink:flink-java-examples_2.11
112 | org.apache.flink:flink-scala-examples_2.11
113 | org.apache.flink:flink-streaming-examples_2.11
114 | org.apache.flink:flink-streaming-java_2.11
115 |
116 |
119 |
120 | org.scala-lang:scala-library
121 | org.scala-lang:scala-compiler
122 | org.scala-lang:scala-reflect
123 | com.amazonaws:aws-java-sdk
124 | com.typesafe.akka:akka-actor_*
125 | com.typesafe.akka:akka-remote_*
126 | com.typesafe.akka:akka-slf4j_*
127 | io.netty:netty-all
128 | io.netty:netty
129 | org.eclipse.jetty:jetty-server
130 | org.eclipse.jetty:jetty-continuation
131 | org.eclipse.jetty:jetty-http
132 | org.eclipse.jetty:jetty-io
133 | org.eclipse.jetty:jetty-util
134 | org.eclipse.jetty:jetty-security
135 | org.eclipse.jetty:jetty-servlet
136 | commons-fileupload:commons-fileupload
137 | org.apache.avro:avro
138 | commons-collections:commons-collections
139 | org.codehaus.jackson:jackson-core-asl
140 | org.codehaus.jackson:jackson-mapper-asl
141 | com.thoughtworks.paranamer:paranamer
142 | org.xerial.snappy:snappy-java
143 | org.apache.commons:commons-compress
144 | org.tukaani:xz
145 | com.esotericsoftware.kryo:kryo
146 | com.esotericsoftware.minlog:minlog
147 | org.objenesis:objenesis
148 | com.twitter:chill_*
149 | com.twitter:chill-java
150 | com.twitter:chill-avro_*
151 | com.twitter:chill-bijection_*
152 | com.twitter:bijection-core_*
153 | com.twitter:bijection-avro_*
154 | commons-lang:commons-lang
155 | junit:junit
156 | de.javakaffee:kryo-serializers
157 | joda-time:joda-time
158 | org.apache.commons:commons-lang3
159 | org.slf4j:slf4j-api
160 | org.slf4j:slf4j-log4j12
161 | log4j:log4j
162 | org.apache.commons:commons-math
163 | org.apache.sling:org.apache.sling.commons.json
164 | commons-logging:commons-logging
165 | org.apache.httpcomponents:httpclient
166 | org.apache.httpcomponents:httpcore
167 | commons-codec:commons-codec
168 | com.fasterxml.jackson.core:jackson-core
169 | com.fasterxml.jackson.core:jackson-databind
170 | com.fasterxml.jackson.core:jackson-annotations
171 | org.codehaus.jettison:jettison
172 | stax:stax-api
173 | com.typesafe:config
174 | org.uncommons.maths:uncommons-maths
175 | com.github.scopt:scopt_*
176 | org.mortbay.jetty:servlet-api
177 | commons-io:commons-io
178 | commons-cli:commons-cli
179 |
180 |
181 |
182 |
183 | org.apache.flink:*
184 |
185 | org/apache/flink/shaded/**
186 | web-docs/**
187 |
188 |
189 |
190 |
192 | *:*
193 |
194 | META-INF/*.SF
195 | META-INF/*.DSA
196 | META-INF/*.RSA
197 |
198 |
199 |
200 |
201 |
202 |
204 | com.demo.flink.ml.Job
205 |
206 |
207 | false
208 |
209 |
210 |
211 |
212 |
213 |
214 | org.apache.maven.plugins
215 | maven-compiler-plugin
216 | 3.1
217 |
218 | 1.7
219 | 1.7
220 |
221 |
222 |
223 | net.alchim31.maven
224 | scala-maven-plugin
225 | 3.1.4
226 |
227 |
228 |
229 | compile
230 | testCompile
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 | org.apache.maven.plugins
239 | maven-eclipse-plugin
240 | 2.8
241 |
242 | true
243 |
244 | org.scala-ide.sdt.core.scalanature
245 | org.eclipse.jdt.core.javanature
246 |
247 |
248 | org.scala-ide.sdt.core.scalabuilder
249 |
250 |
251 | org.scala-ide.sdt.launching.SCALA_CONTAINER
252 |
253 | org.eclipse.jdt.launching.JRE_CONTAINER
254 |
255 |
256 |
257 | org.scala-lang:scala-library
258 | org.scala-lang:scala-compiler
259 |
260 |
261 | **/*.scala
262 | **/*.java
263 |
264 |
265 |
266 |
267 |
268 |
269 | org.codehaus.mojo
270 | build-helper-maven-plugin
271 | 1.7
272 |
273 |
274 |
275 | add-source
276 | generate-sources
277 |
278 | add-source
279 |
280 |
281 |
282 | src/main/scala
283 |
284 |
285 |
286 |
287 |
288 | add-test-source
289 | generate-test-sources
290 |
291 | add-test-source
292 |
293 |
294 |
295 | src/test/scala
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
307 | build-jar
308 |
309 | false
310 |
311 |
312 |
313 | org.apache.flink
314 | flink-scala_2.11
315 | ${flink.version}
316 | provided
317 |
318 |
319 | org.apache.flink
320 | flink-streaming-java_2.11
321 | ${flink.version}
322 | provided
323 |
324 |
325 | org.apache.flink
326 | flink-clients_2.11
327 | ${flink.version}
328 | provided
329 |
330 |
331 |
332 |
333 |
334 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/resources/data/iris-test.txt:
--------------------------------------------------------------------------------
1 | 1 1:5.1 2:3.5 3:1.4 4:0.2
2 | 1 1:4.9 2:3.0 3:1.4 4:0.2
3 | 1 1:4.7 2:3.2 3:1.3 4:0.2
4 | 1 1:4.6 2:3.1 3:1.5 4:0.2
5 | 1 1:5.0 2:3.6 3:1.4 4:0.2
6 | 1 1:5.4 2:3.9 3:1.7 4:0.4
7 | 1 1:4.6 2:3.4 3:1.4 4:0.3
8 | 1 1:5.0 2:3.4 3:1.5 4:0.2
9 | 1 1:4.4 2:2.9 3:1.4 4:0.2
10 | 1 1:4.9 2:3.1 3:1.5 4:0.1
11 | 1 1:5.4 2:3.7 3:1.5 4:0.2
12 | 1 1:4.8 2:3.4 3:1.6 4:0.2
13 | 1 1:4.8 2:3.0 3:1.4 4:0.1
14 | 1 1:4.3 2:3.0 3:1.1 4:0.1
15 | 1 1:5.8 2:4.0 3:1.2 4:0.2
16 | 1 1:5.7 2:4.4 3:1.5 4:0.4
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/resources/data/iris-train.txt:
--------------------------------------------------------------------------------
1 | 1 1:5.1 2:3.5 3:1.4 4:0.2
2 | 1 1:4.9 2:3.0 3:1.4 4:0.2
3 | 1 1:4.7 2:3.2 3:1.3 4:0.2
4 | 1 1:4.6 2:3.1 3:1.5 4:0.2
5 | 1 1:5.0 2:3.6 3:1.4 4:0.2
6 | 1 1:5.4 2:3.9 3:1.7 4:0.4
7 | 1 1:4.6 2:3.4 3:1.4 4:0.3
8 | 1 1:5.0 2:3.4 3:1.5 4:0.2
9 | 1 1:4.4 2:2.9 3:1.4 4:0.2
10 | 1 1:4.9 2:3.1 3:1.5 4:0.1
11 | 1 1:5.4 2:3.7 3:1.5 4:0.2
12 | 1 1:4.8 2:3.4 3:1.6 4:0.2
13 | 1 1:4.8 2:3.0 3:1.4 4:0.1
14 | 1 1:4.3 2:3.0 3:1.1 4:0.1
15 | 1 1:5.8 2:4.0 3:1.2 4:0.2
16 | 1 1:5.7 2:4.4 3:1.5 4:0.4
17 | 1 1:5.4 2:3.9 3:1.3 4:0.4
18 | 1 1:5.1 2:3.5 3:1.4 4:0.3
19 | 1 1:5.7 2:3.8 3:1.7 4:0.3
20 | 1 1:5.1 2:3.8 3:1.5 4:0.3
21 | 1 1:5.4 2:3.4 3:1.7 4:0.2
22 | 1 1:5.1 2:3.7 3:1.5 4:0.4
23 | 1 1:4.6 2:3.6 3:1.0 4:0.2
24 | 1 1:5.1 2:3.3 3:1.7 4:0.5
25 | 1 1:4.8 2:3.4 3:1.9 4:0.2
26 | 1 1:5.0 2:3.0 3:1.6 4:0.2
27 | 1 1:5.0 2:3.4 3:1.6 4:0.4
28 | 1 1:5.2 2:3.5 3:1.5 4:0.2
29 | 1 1:5.2 2:3.4 3:1.4 4:0.2
30 | 1 1:4.7 2:3.2 3:1.6 4:0.2
31 | 1 1:4.8 2:3.1 3:1.6 4:0.2
32 | 1 1:5.4 2:3.4 3:1.5 4:0.4
33 | 1 1:5.2 2:4.1 3:1.5 4:0.1
34 | 1 1:5.5 2:4.2 3:1.4 4:0.2
35 | 1 1:4.9 2:3.1 3:1.5 4:0.1
36 | 1 1:5.0 2:3.2 3:1.2 4:0.2
37 | 1 1:5.5 2:3.5 3:1.3 4:0.2
38 | 1 1:4.9 2:3.1 3:1.5 4:0.1
39 | 1 1:4.4 2:3.0 3:1.3 4:0.2
40 | 1 1:5.1 2:3.4 3:1.5 4:0.2
41 | 1 1:5.0 2:3.5 3:1.3 4:0.3
42 | 1 1:4.5 2:2.3 3:1.3 4:0.3
43 | 1 1:4.4 2:3.2 3:1.3 4:0.2
44 | 1 1:5.0 2:3.5 3:1.6 4:0.6
45 | 1 1:5.1 2:3.8 3:1.9 4:0.4
46 | 1 1:4.8 2:3.0 3:1.4 4:0.3
47 | 1 1:5.1 2:3.8 3:1.6 4:0.2
48 | 1 1:4.6 2:3.2 3:1.4 4:0.2
49 | 1 1:5.3 2:3.7 3:1.5 4:0.2
50 | 1 1:5.0 2:3.3 3:1.4 4:0.2
51 | 2 1:7.0 2:3.2 3:4.7 4:1.4
52 | 2 1:6.4 2:3.2 3:4.5 4:1.5
53 | 2 1:6.9 2:3.1 3:4.9 4:1.5
54 | 2 1:5.5 2:2.3 3:4.0 4:1.3
55 | 2 1:6.5 2:2.8 3:4.6 4:1.5
56 | 2 1:5.7 2:2.8 3:4.5 4:1.3
57 | 2 1:6.3 2:3.3 3:4.7 4:1.6
58 | 2 1:4.9 2:2.4 3:3.3 4:1.0
59 | 2 1:6.6 2:2.9 3:4.6 4:1.3
60 | 2 1:5.2 2:2.7 3:3.9 4:1.4
61 | 2 1:5.0 2:2.0 3:3.5 4:1.0
62 | 2 1:5.9 2:3.0 3:4.2 4:1.5
63 | 2 1:6.0 2:2.2 3:4.0 4:1.0
64 | 2 1:6.1 2:2.9 3:4.7 4:1.4
65 | 2 1:5.6 2:2.9 3:3.6 4:1.3
66 | 2 1:6.7 2:3.1 3:4.4 4:1.4
67 | 2 1:5.6 2:3.0 3:4.5 4:1.5
68 | 2 1:5.8 2:2.7 3:4.1 4:1.0
69 | 2 1:6.2 2:2.2 3:4.5 4:1.5
70 | 2 1:5.6 2:2.5 3:3.9 4:1.1
71 | 2 1:5.9 2:3.2 3:4.8 4:1.8
72 | 2 1:6.1 2:2.8 3:4.0 4:1.3
73 | 2 1:6.3 2:2.5 3:4.9 4:1.5
74 | 2 1:6.1 2:2.8 3:4.7 4:1.2
75 | 2 1:6.4 2:2.9 3:4.3 4:1.3
76 | 2 1:6.6 2:3.0 3:4.4 4:1.4
77 | 2 1:6.8 2:2.8 3:4.8 4:1.4
78 | 2 1:6.7 2:3.0 3:5.0 4:1.7
79 | 2 1:6.0 2:2.9 3:4.5 4:1.5
80 | 2 1:5.7 2:2.6 3:3.5 4:1.0
81 | 2 1:5.5 2:2.4 3:3.8 4:1.1
82 | 2 1:5.5 2:2.4 3:3.7 4:1.0
83 | 2 1:5.8 2:2.7 3:3.9 4:1.2
84 | 2 1:6.0 2:2.7 3:5.1 4:1.6
85 | 2 1:5.4 2:3.0 3:4.5 4:1.5
86 | 2 1:6.0 2:3.4 3:4.5 4:1.6
87 | 2 1:6.7 2:3.1 3:4.7 4:1.5
88 | 2 1:6.3 2:2.3 3:4.4 4:1.3
89 | 2 1:5.6 2:3.0 3:4.1 4:1.3
90 | 2 1:5.5 2:2.5 3:4.0 4:1.3
91 | 2 1:5.5 2:2.6 3:4.4 4:1.2
92 | 2 1:6.1 2:3.0 3:4.6 4:1.4
93 | 2 1:5.8 2:2.6 3:4.0 4:1.2
94 | 2 1:5.0 2:2.3 3:3.3 4:1.0
95 | 2 1:5.6 2:2.7 3:4.2 4:1.3
96 | 2 1:5.7 2:3.0 3:4.2 4:1.2
97 | 2 1:5.7 2:2.9 3:4.2 4:1.3
98 | 2 1:6.2 2:2.9 3:4.3 4:1.3
99 | 2 1:5.1 2:2.5 3:3.0 4:1.1
100 | 2 1:5.7 2:2.8 3:4.1 4:1.3
101 | 3 1:6.3 2:3.3 3:6.0 4:2.5
102 | 3 1:5.8 2:2.7 3:5.1 4:1.9
103 | 3 1:7.1 2:3.0 3:5.9 4:2.1
104 | 3 1:6.3 2:2.9 3:5.6 4:1.8
105 | 3 1:6.5 2:3.0 3:5.8 4:2.2
106 | 3 1:7.6 2:3.0 3:6.6 4:2.1
107 | 3 1:4.9 2:2.5 3:4.5 4:1.7
108 | 3 1:7.3 2:2.9 3:6.3 4:1.8
109 | 3 1:6.7 2:2.5 3:5.8 4:1.8
110 | 3 1:7.2 2:3.6 3:6.1 4:2.5
111 | 3 1:6.5 2:3.2 3:5.1 4:2.0
112 | 3 1:6.4 2:2.7 3:5.3 4:1.9
113 | 3 1:6.8 2:3.0 3:5.5 4:2.1
114 | 3 1:5.7 2:2.5 3:5.0 4:2.0
115 | 3 1:5.8 2:2.8 3:5.1 4:2.4
116 | 3 1:6.4 2:3.2 3:5.3 4:2.3
117 | 3 1:6.5 2:3.0 3:5.5 4:1.8
118 | 3 1:7.7 2:3.8 3:6.7 4:2.2
119 | 3 1:7.7 2:2.6 3:6.9 4:2.3
120 | 3 1:6.0 2:2.2 3:5.0 4:1.5
121 | 3 1:6.9 2:3.2 3:5.7 4:2.3
122 | 3 1:5.6 2:2.8 3:4.9 4:2.0
123 | 3 1:7.7 2:2.8 3:6.7 4:2.0
124 | 3 1:6.3 2:2.7 3:4.9 4:1.8
125 | 3 1:6.7 2:3.3 3:5.7 4:2.1
126 | 3 1:7.2 2:3.2 3:6.0 4:1.8
127 | 3 1:6.2 2:2.8 3:4.8 4:1.8
128 | 3 1:6.1 2:3.0 3:4.9 4:1.8
129 | 3 1:6.4 2:2.8 3:5.6 4:2.1
130 | 3 1:7.2 2:3.0 3:5.8 4:1.6
131 | 3 1:7.4 2:2.8 3:6.1 4:1.9
132 | 3 1:7.9 2:3.8 3:6.4 4:2.0
133 | 3 1:6.4 2:2.8 3:5.6 4:2.2
134 | 3 1:6.3 2:2.8 3:5.1 4:1.5
135 | 3 1:6.1 2:2.6 3:5.6 4:1.4
136 | 3 1:7.7 2:3.0 3:6.1 4:2.3
137 | 3 1:6.3 2:3.4 3:5.6 4:2.4
138 | 3 1:6.4 2:3.1 3:5.5 4:1.8
139 | 3 1:6.0 2:3.0 3:4.8 4:1.8
140 | 3 1:6.9 2:3.1 3:5.4 4:2.1
141 | 3 1:6.7 2:3.1 3:5.6 4:2.4
142 | 3 1:6.9 2:3.1 3:5.1 4:2.3
143 | 3 1:5.8 2:2.7 3:5.1 4:1.9
144 | 3 1:6.8 2:3.2 3:5.9 4:2.3
145 | 3 1:6.7 2:3.3 3:5.7 4:2.5
146 | 3 1:6.7 2:3.0 3:5.2 4:2.3
147 | 3 1:6.3 2:2.5 3:5.0 4:1.9
148 | 3 1:6.5 2:3.0 3:5.2 4:2.0
149 | 3 1:6.2 2:3.4 3:5.4 4:2.3
150 | 3 1:5.9 2:3.0 3:5.1 4:1.8
151 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/scala/com/demo/flink/ml/Job.scala:
--------------------------------------------------------------------------------
1 | package com.demo.flink.ml
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml._
5 | import org.apache.flink.ml.common.LabeledVector
6 | import org.apache.flink.ml.math.DenseVector
7 | import org.apache.flink.ml.math.Vector
8 | import org.apache.flink.ml.preprocessing.Splitter
9 | import org.apache.flink.ml.regression.MultipleLinearRegression
10 |
11 | object Job {
12 | def main(args: Array[String]) {
13 | // set up the execution environment
14 | val env = ExecutionEnvironment.getExecutionEnvironment
15 |
16 | val iriscsv = env.readCsvFile[(String, String, String, String, String)]("iris.csv")
17 | val irisLV = iriscsv
18 | .map { tuple =>
19 | val list = tuple.productIterator.toList
20 | val numList = list.map(_.asInstanceOf[String].toDouble)
21 | LabeledVector(numList(4), DenseVector(numList.take(4).toArray))
22 | }
23 |
24 | // irisLV.print
25 | // val trainTestData = Splitter.trainTestSplit(irisLV)
26 | val trainTestData = Splitter.trainTestSplit(irisLV, .6, true)
27 | val trainingData: DataSet[LabeledVector] = trainTestData.training
28 |
29 | val testingData: DataSet[Vector] = trainTestData.testing.map(lv => lv.vector)
30 |
31 | testingData.print()
32 |
33 | val mlr = MultipleLinearRegression()
34 | .setStepsize(1.0)
35 | .setIterations(5)
36 | .setConvergenceThreshold(0.001)
37 |
38 | mlr.fit(trainingData)
39 |
40 | // The fitted model can now be used to make predictions
41 | val predictions = mlr.predict(testingData)
42 |
43 | predictions.print()
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/scala/com/demo/flink/ml/MLRJob.scala:
--------------------------------------------------------------------------------
1 | package com.demo.flink.ml
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml._
5 | import org.apache.flink.ml.common.LabeledVector
6 | import org.apache.flink.ml.math.DenseVector
7 | import org.apache.flink.ml.math.Vector
8 | import org.apache.flink.ml.preprocessing.Splitter
9 | import org.apache.flink.ml.regression.MultipleLinearRegression
10 | import org.apache.flink.ml.preprocessing.PolynomialFeatures
11 |
12 | object MLRJob {
13 | def main(args: Array[String]) {
14 | // set up the execution environment
15 | val env = ExecutionEnvironment.getExecutionEnvironment
16 |
17 |
18 | val trainingDataset = MLUtils.readLibSVM(env, "iris-train.txt")
19 | val testingDataset = MLUtils.readLibSVM(env, "iris-test.txt").map { lv => lv.vector }
20 | val mlr = MultipleLinearRegression()
21 | .setStepsize(1.0)
22 | .setIterations(5)
23 | .setConvergenceThreshold(0.001)
24 |
25 | mlr.fit(trainingDataset)
26 |
27 | // The fitted model can now be used to make predictions
28 | val predictions = mlr.predict(testingDataset)
29 |
30 | predictions.print()
31 |
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/chapter06/flink-ml/src/main/scala/com/demo/flink/ml/MLRJobPipelines.scala:
--------------------------------------------------------------------------------
1 | package com.demo.flink.ml
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml._
5 | import org.apache.flink.ml.common.LabeledVector
6 | import org.apache.flink.ml.math.DenseVector
7 | import org.apache.flink.ml.math.Vector
8 | import org.apache.flink.ml.preprocessing.Splitter
9 | import org.apache.flink.ml.regression.MultipleLinearRegression
10 | import org.apache.flink.ml.preprocessing.PolynomialFeatures
11 | import org.apache.flink.ml.preprocessing.StandardScaler
12 | import org.apache.flink.ml.preprocessing.MinMaxScaler
13 |
14 | /**
15 | * This class shows how to solve classification problems using Flink ML
16 | *
17 | * Machine Learning Algorithm - Multiple Linear Regression
18 | * Data Pre-processing - Using Standard Scaler and Polynomial Feature
19 | */
20 | object MLRJobPipelines {
21 | def main(args: Array[String]) {
22 | // set up the execution environment
23 | val env = ExecutionEnvironment.getExecutionEnvironment
24 | // Use polynomial feature with degree 3
25 | val polyFeatures = PolynomialFeatures()
26 | .setDegree(3)
27 |
28 | val scaler = StandardScaler()
29 | .setMean(10.0)
30 | .setStd(2.0)
31 |
32 | val minMaxscaler = MinMaxScaler()
33 | .setMin(1.0)
34 | .setMax(3.0)
35 |
36 | val trainingDataset = MLUtils.readLibSVM(env, "iris-train.txt")
37 | val testingDataset = MLUtils.readLibSVM(env, "iris-test.txt").map { lv => lv.vector }
38 | val mlr = MultipleLinearRegression()
39 | .setStepsize(1.0)
40 | .setIterations(5)
41 | .setConvergenceThreshold(0.001)
42 |
43 | // Learn the mean and standard deviation of the training data
44 | // scaler.fit(trainingDataset)
45 | minMaxscaler.fit(trainingDataset)
46 |
47 | // Scale the provided data set to have mean=10.0 and std=2.0
48 | //val scaledDS = scaler.transform(trainingDataset)
49 |
50 | val scaledDS = minMaxscaler.transform(trainingDataset)
51 |
52 | scaledDS.print()
53 | // Create pipeline PolynomialFeatures -> MultipleLinearRegression
54 | val pipeline = polyFeatures.chainPredictor(mlr)
55 |
56 | // train the model
57 | pipeline.fit(scaledDS)
58 |
59 | // The fitted model can now be used to make predictions
60 | val predictions = pipeline.predict(testingDataset)
61 |
62 | predictions.print()
63 |
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/chapter06/ml-examples/pom.xml:
--------------------------------------------------------------------------------
1 |
11 |
13 | 4.0.0
14 |
15 | com.demo
16 | chapter06
17 | 1.0
18 | jar
19 |
20 | Flink Quickstart Job
21 | http://www.myorganization.org
22 |
23 |
24 |
25 | apache.snapshots
26 | Apache Development Snapshot Repository
27 | https://repository.apache.org/content/repositories/snapshots/
28 |
29 | false
30 |
31 |
32 | true
33 |
34 |
35 |
36 |
37 |
38 | UTF-8
39 | 0.10.2
40 |
41 |
42 |
53 |
54 |
55 |
56 | org.apache.flink
57 | flink-scala_2.11
58 | ${flink.version}
59 |
60 |
61 | org.apache.flink
62 | flink-streaming-scala_2.11
63 | ${flink.version}
64 |
65 |
66 | org.apache.flink
67 | flink-clients_2.11
68 | ${flink.version}
69 |
70 |
71 |
72 | org.apache.flink
73 | flink-ml_2.11
74 | ${flink.version}
75 |
76 |
77 |
78 |
79 |
83 |
84 |
85 |
89 |
90 | org.apache.maven.plugins
91 | maven-shade-plugin
92 | 2.4.1
93 |
94 |
95 |
96 | package
97 |
98 | shade
99 |
100 |
101 |
102 |
103 |
105 | org.apache.flink:flink-shaded-*_2.11
106 | org.apache.flink:flink-core_2.11
107 | org.apache.flink:flink-java_2.11
108 | org.apache.flink:flink-scala_2.11
109 | org.apache.flink:flink-runtime_2.11
110 | org.apache.flink:flink-optimizer_2.11
111 | org.apache.flink:flink-clients_2.11
112 | org.apache.flink:flink-avro_2.11
113 | org.apache.flink:flink-java-examples_2.11
114 | org.apache.flink:flink-scala-examples_2.11
115 | org.apache.flink:flink-streaming-examples_2.11
116 | org.apache.flink:flink-streaming-java_2.11
117 |
118 |
121 |
122 | org.scala-lang:scala-library
123 | org.scala-lang:scala-compiler
124 | org.scala-lang:scala-reflect
125 | com.amazonaws:aws-java-sdk
126 | com.typesafe.akka:akka-actor_*
127 | com.typesafe.akka:akka-remote_*
128 | com.typesafe.akka:akka-slf4j_*
129 | io.netty:netty-all
130 | io.netty:netty
131 | org.eclipse.jetty:jetty-server
132 | org.eclipse.jetty:jetty-continuation
133 | org.eclipse.jetty:jetty-http
134 | org.eclipse.jetty:jetty-io
135 | org.eclipse.jetty:jetty-util
136 | org.eclipse.jetty:jetty-security
137 | org.eclipse.jetty:jetty-servlet
138 | commons-fileupload:commons-fileupload
139 | org.apache.avro:avro
140 | commons-collections:commons-collections
141 | org.codehaus.jackson:jackson-core-asl
142 | org.codehaus.jackson:jackson-mapper-asl
143 | com.thoughtworks.paranamer:paranamer
144 | org.xerial.snappy:snappy-java
145 | org.apache.commons:commons-compress
146 | org.tukaani:xz
147 | com.esotericsoftware.kryo:kryo
148 | com.esotericsoftware.minlog:minlog
149 | org.objenesis:objenesis
150 | com.twitter:chill_*
151 | com.twitter:chill-java
152 | com.twitter:chill-avro_*
153 | com.twitter:chill-bijection_*
154 | com.twitter:bijection-core_*
155 | com.twitter:bijection-avro_*
156 | commons-lang:commons-lang
157 | junit:junit
158 | de.javakaffee:kryo-serializers
159 | joda-time:joda-time
160 | org.apache.commons:commons-lang3
161 | org.slf4j:slf4j-api
162 | org.slf4j:slf4j-log4j12
163 | log4j:log4j
164 | org.apache.commons:commons-math
165 | org.apache.sling:org.apache.sling.commons.json
166 | commons-logging:commons-logging
167 | org.apache.httpcomponents:httpclient
168 | org.apache.httpcomponents:httpcore
169 | commons-codec:commons-codec
170 | com.fasterxml.jackson.core:jackson-core
171 | com.fasterxml.jackson.core:jackson-databind
172 | com.fasterxml.jackson.core:jackson-annotations
173 | org.codehaus.jettison:jettison
174 | stax:stax-api
175 | com.typesafe:config
176 | org.uncommons.maths:uncommons-maths
177 | com.github.scopt:scopt_*
178 | org.mortbay.jetty:servlet-api
179 | commons-io:commons-io
180 | commons-cli:commons-cli
181 |
182 |
183 |
184 |
185 | org.apache.flink:*
186 |
187 | org/apache/flink/shaded/**
188 | web-docs/**
189 |
190 |
191 |
192 |
194 | *:*
195 |
196 | META-INF/*.SF
197 | META-INF/*.DSA
198 | META-INF/*.RSA
199 |
200 |
201 |
202 |
203 |
204 |
206 | com.demo.chapter06.Job
207 |
208 |
209 | false
210 |
211 |
212 |
213 |
214 |
215 |
216 | org.apache.maven.plugins
217 | maven-compiler-plugin
218 | 3.1
219 |
220 | 1.7
221 | 1.7
222 |
223 |
224 |
225 | net.alchim31.maven
226 | scala-maven-plugin
227 | 3.1.4
228 |
229 |
230 |
231 | compile
232 | testCompile
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 | org.apache.maven.plugins
241 | maven-eclipse-plugin
242 | 2.8
243 |
244 | true
245 |
246 | org.scala-ide.sdt.core.scalanature
247 | org.eclipse.jdt.core.javanature
248 |
249 |
250 | org.scala-ide.sdt.core.scalabuilder
251 |
252 |
253 | org.scala-ide.sdt.launching.SCALA_CONTAINER
254 |
255 | org.eclipse.jdt.launching.JRE_CONTAINER
256 |
257 |
258 |
259 | org.scala-lang:scala-library
260 | org.scala-lang:scala-compiler
261 |
262 |
263 | **/*.scala
264 | **/*.java
265 |
266 |
267 |
268 |
269 |
270 |
271 | org.codehaus.mojo
272 | build-helper-maven-plugin
273 | 1.7
274 |
275 |
276 |
279 |
280 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
291 | build-jar
292 |
293 | false
294 |
295 |
296 |
297 | org.apache.flink
298 | flink-scala_2.11
299 | ${flink.version}
300 | provided
301 |
302 |
303 | org.apache.flink
304 | flink-streaming-java_2.11
305 | ${flink.version}
306 | provided
307 |
308 |
309 | org.apache.flink
310 | flink-clients_2.11
311 | ${flink.version}
312 | provided
313 |
314 |
315 |
316 |
317 |
318 |
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/resources/data/books-test.csv:
--------------------------------------------------------------------------------
1 | 1,10
2 | 1,11
3 | 1,12
4 | 1,13
5 | 1,14
6 | 1,15
7 | 1,16
8 | 1,17
9 | 1,18
10 | 2,10
11 | 2,11
12 | 2,15
13 | 2,16
14 | 2,17
15 | 2,18
16 | 3,11
17 | 3,12
18 | 3,13
19 | 3,14
20 | 3,15
21 | 3,16
22 | 3,17
23 | 3,18
24 | 4,10
25 | 4,11
26 | 4,12
27 | 4,13
28 | 4,14
29 | 4,15
30 | 4,16
31 | 4,17
32 | 4,18
33 |
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/resources/data/books.csv:
--------------------------------------------------------------------------------
1 | 1,10,1.0
2 | 1,11,2.0
3 | 1,12,5.0
4 | 1,13,5.0
5 | 1,14,5.0
6 | 1,15,4.0
7 | 1,16,5.0
8 | 1,17,1.0
9 | 1,18,5.0
10 | 2,10,1.0
11 | 2,11,2.0
12 | 2,15,5.0
13 | 2,16,4.5
14 | 2,17,1.0
15 | 2,18,5.0
16 | 3,11,2.5
17 | 3,12,4.5
18 | 3,13,4.0
19 | 3,14,3.0
20 | 3,15,3.5
21 | 3,16,4.5
22 | 3,17,4.0
23 | 3,18,5.0
24 | 4,10,5.0
25 | 4,11,5.0
26 | 4,12,5.0
27 | 4,13,0.0
28 | 4,14,2.0
29 | 4,15,3.0
30 | 4,16,1.0
31 | 4,17,4.0
32 | 4,18,1.0
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/resources/data/iris-test.txt:
--------------------------------------------------------------------------------
1 | 1 1:5.1 2:3.5 3:1.4 4:0.2
2 | 1 1:4.9 2:3.0 3:1.4 4:0.2
3 | 1 1:4.7 2:3.2 3:1.3 4:0.2
4 | 1 1:4.6 2:3.1 3:1.5 4:0.2
5 | 1 1:5.0 2:3.6 3:1.4 4:0.2
6 | 1 1:5.4 2:3.9 3:1.7 4:0.4
7 | 1 1:4.6 2:3.4 3:1.4 4:0.3
8 | 1 1:5.0 2:3.4 3:1.5 4:0.2
9 | 1 1:4.4 2:2.9 3:1.4 4:0.2
10 | 1 1:4.9 2:3.1 3:1.5 4:0.1
11 | 1 1:5.4 2:3.7 3:1.5 4:0.2
12 | 1 1:4.8 2:3.4 3:1.6 4:0.2
13 | 1 1:4.8 2:3.0 3:1.4 4:0.1
14 | 1 1:4.3 2:3.0 3:1.1 4:0.1
15 | 1 1:5.8 2:4.0 3:1.2 4:0.2
16 | 1 1:5.7 2:4.4 3:1.5 4:0.4
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/resources/data/iris-train.txt:
--------------------------------------------------------------------------------
1 | 1 1:5.1 2:3.5 3:1.4 4:0.2
2 | 1 1:4.9 2:3.0 3:1.4 4:0.2
3 | 1 1:4.7 2:3.2 3:1.3 4:0.2
4 | 1 1:4.6 2:3.1 3:1.5 4:0.2
5 | 1 1:5.0 2:3.6 3:1.4 4:0.2
6 | 1 1:5.4 2:3.9 3:1.7 4:0.4
7 | 1 1:4.6 2:3.4 3:1.4 4:0.3
8 | 1 1:5.0 2:3.4 3:1.5 4:0.2
9 | 1 1:4.4 2:2.9 3:1.4 4:0.2
10 | 1 1:4.9 2:3.1 3:1.5 4:0.1
11 | 1 1:5.4 2:3.7 3:1.5 4:0.2
12 | 1 1:4.8 2:3.4 3:1.6 4:0.2
13 | 1 1:4.8 2:3.0 3:1.4 4:0.1
14 | 1 1:4.3 2:3.0 3:1.1 4:0.1
15 | 1 1:5.8 2:4.0 3:1.2 4:0.2
16 | 1 1:5.7 2:4.4 3:1.5 4:0.4
17 | 1 1:5.4 2:3.9 3:1.3 4:0.4
18 | 1 1:5.1 2:3.5 3:1.4 4:0.3
19 | 1 1:5.7 2:3.8 3:1.7 4:0.3
20 | 1 1:5.1 2:3.8 3:1.5 4:0.3
21 | 1 1:5.4 2:3.4 3:1.7 4:0.2
22 | 1 1:5.1 2:3.7 3:1.5 4:0.4
23 | 1 1:4.6 2:3.6 3:1.0 4:0.2
24 | 1 1:5.1 2:3.3 3:1.7 4:0.5
25 | 1 1:4.8 2:3.4 3:1.9 4:0.2
26 | 1 1:5.0 2:3.0 3:1.6 4:0.2
27 | 1 1:5.0 2:3.4 3:1.6 4:0.4
28 | 1 1:5.2 2:3.5 3:1.5 4:0.2
29 | 1 1:5.2 2:3.4 3:1.4 4:0.2
30 | 1 1:4.7 2:3.2 3:1.6 4:0.2
31 | 1 1:4.8 2:3.1 3:1.6 4:0.2
32 | 1 1:5.4 2:3.4 3:1.5 4:0.4
33 | 1 1:5.2 2:4.1 3:1.5 4:0.1
34 | 1 1:5.5 2:4.2 3:1.4 4:0.2
35 | 1 1:4.9 2:3.1 3:1.5 4:0.1
36 | 1 1:5.0 2:3.2 3:1.2 4:0.2
37 | 1 1:5.5 2:3.5 3:1.3 4:0.2
38 | 1 1:4.9 2:3.1 3:1.5 4:0.1
39 | 1 1:4.4 2:3.0 3:1.3 4:0.2
40 | 1 1:5.1 2:3.4 3:1.5 4:0.2
41 | 1 1:5.0 2:3.5 3:1.3 4:0.3
42 | 1 1:4.5 2:2.3 3:1.3 4:0.3
43 | 1 1:4.4 2:3.2 3:1.3 4:0.2
44 | 1 1:5.0 2:3.5 3:1.6 4:0.6
45 | 1 1:5.1 2:3.8 3:1.9 4:0.4
46 | 1 1:4.8 2:3.0 3:1.4 4:0.3
47 | 1 1:5.1 2:3.8 3:1.6 4:0.2
48 | 1 1:4.6 2:3.2 3:1.4 4:0.2
49 | 1 1:5.3 2:3.7 3:1.5 4:0.2
50 | 1 1:5.0 2:3.3 3:1.4 4:0.2
51 | 2 1:7.0 2:3.2 3:4.7 4:1.4
52 | 2 1:6.4 2:3.2 3:4.5 4:1.5
53 | 2 1:6.9 2:3.1 3:4.9 4:1.5
54 | 2 1:5.5 2:2.3 3:4.0 4:1.3
55 | 2 1:6.5 2:2.8 3:4.6 4:1.5
56 | 2 1:5.7 2:2.8 3:4.5 4:1.3
57 | 2 1:6.3 2:3.3 3:4.7 4:1.6
58 | 2 1:4.9 2:2.4 3:3.3 4:1.0
59 | 2 1:6.6 2:2.9 3:4.6 4:1.3
60 | 2 1:5.2 2:2.7 3:3.9 4:1.4
61 | 2 1:5.0 2:2.0 3:3.5 4:1.0
62 | 2 1:5.9 2:3.0 3:4.2 4:1.5
63 | 2 1:6.0 2:2.2 3:4.0 4:1.0
64 | 2 1:6.1 2:2.9 3:4.7 4:1.4
65 | 2 1:5.6 2:2.9 3:3.6 4:1.3
66 | 2 1:6.7 2:3.1 3:4.4 4:1.4
67 | 2 1:5.6 2:3.0 3:4.5 4:1.5
68 | 2 1:5.8 2:2.7 3:4.1 4:1.0
69 | 2 1:6.2 2:2.2 3:4.5 4:1.5
70 | 2 1:5.6 2:2.5 3:3.9 4:1.1
71 | 2 1:5.9 2:3.2 3:4.8 4:1.8
72 | 2 1:6.1 2:2.8 3:4.0 4:1.3
73 | 2 1:6.3 2:2.5 3:4.9 4:1.5
74 | 2 1:6.1 2:2.8 3:4.7 4:1.2
75 | 2 1:6.4 2:2.9 3:4.3 4:1.3
76 | 2 1:6.6 2:3.0 3:4.4 4:1.4
77 | 2 1:6.8 2:2.8 3:4.8 4:1.4
78 | 2 1:6.7 2:3.0 3:5.0 4:1.7
79 | 2 1:6.0 2:2.9 3:4.5 4:1.5
80 | 2 1:5.7 2:2.6 3:3.5 4:1.0
81 | 2 1:5.5 2:2.4 3:3.8 4:1.1
82 | 2 1:5.5 2:2.4 3:3.7 4:1.0
83 | 2 1:5.8 2:2.7 3:3.9 4:1.2
84 | 2 1:6.0 2:2.7 3:5.1 4:1.6
85 | 2 1:5.4 2:3.0 3:4.5 4:1.5
86 | 2 1:6.0 2:3.4 3:4.5 4:1.6
87 | 2 1:6.7 2:3.1 3:4.7 4:1.5
88 | 2 1:6.3 2:2.3 3:4.4 4:1.3
89 | 2 1:5.6 2:3.0 3:4.1 4:1.3
90 | 2 1:5.5 2:2.5 3:4.0 4:1.3
91 | 2 1:5.5 2:2.6 3:4.4 4:1.2
92 | 2 1:6.1 2:3.0 3:4.6 4:1.4
93 | 2 1:5.8 2:2.6 3:4.0 4:1.2
94 | 2 1:5.0 2:2.3 3:3.3 4:1.0
95 | 2 1:5.6 2:2.7 3:4.2 4:1.3
96 | 2 1:5.7 2:3.0 3:4.2 4:1.2
97 | 2 1:5.7 2:2.9 3:4.2 4:1.3
98 | 2 1:6.2 2:2.9 3:4.3 4:1.3
99 | 2 1:5.1 2:2.5 3:3.0 4:1.1
100 | 2 1:5.7 2:2.8 3:4.1 4:1.3
101 | 3 1:6.3 2:3.3 3:6.0 4:2.5
102 | 3 1:5.8 2:2.7 3:5.1 4:1.9
103 | 3 1:7.1 2:3.0 3:5.9 4:2.1
104 | 3 1:6.3 2:2.9 3:5.6 4:1.8
105 | 3 1:6.5 2:3.0 3:5.8 4:2.2
106 | 3 1:7.6 2:3.0 3:6.6 4:2.1
107 | 3 1:4.9 2:2.5 3:4.5 4:1.7
108 | 3 1:7.3 2:2.9 3:6.3 4:1.8
109 | 3 1:6.7 2:2.5 3:5.8 4:1.8
110 | 3 1:7.2 2:3.6 3:6.1 4:2.5
111 | 3 1:6.5 2:3.2 3:5.1 4:2.0
112 | 3 1:6.4 2:2.7 3:5.3 4:1.9
113 | 3 1:6.8 2:3.0 3:5.5 4:2.1
114 | 3 1:5.7 2:2.5 3:5.0 4:2.0
115 | 3 1:5.8 2:2.8 3:5.1 4:2.4
116 | 3 1:6.4 2:3.2 3:5.3 4:2.3
117 | 3 1:6.5 2:3.0 3:5.5 4:1.8
118 | 3 1:7.7 2:3.8 3:6.7 4:2.2
119 | 3 1:7.7 2:2.6 3:6.9 4:2.3
120 | 3 1:6.0 2:2.2 3:5.0 4:1.5
121 | 3 1:6.9 2:3.2 3:5.7 4:2.3
122 | 3 1:5.6 2:2.8 3:4.9 4:2.0
123 | 3 1:7.7 2:2.8 3:6.7 4:2.0
124 | 3 1:6.3 2:2.7 3:4.9 4:1.8
125 | 3 1:6.7 2:3.3 3:5.7 4:2.1
126 | 3 1:7.2 2:3.2 3:6.0 4:1.8
127 | 3 1:6.2 2:2.8 3:4.8 4:1.8
128 | 3 1:6.1 2:3.0 3:4.9 4:1.8
129 | 3 1:6.4 2:2.8 3:5.6 4:2.1
130 | 3 1:7.2 2:3.0 3:5.8 4:1.6
131 | 3 1:7.4 2:2.8 3:6.1 4:1.9
132 | 3 1:7.9 2:3.8 3:6.4 4:2.0
133 | 3 1:6.4 2:2.8 3:5.6 4:2.2
134 | 3 1:6.3 2:2.8 3:5.1 4:1.5
135 | 3 1:6.1 2:2.6 3:5.6 4:1.4
136 | 3 1:7.7 2:3.0 3:6.1 4:2.3
137 | 3 1:6.3 2:3.4 3:5.6 4:2.4
138 | 3 1:6.4 2:3.1 3:5.5 4:1.8
139 | 3 1:6.0 2:3.0 3:4.8 4:1.8
140 | 3 1:6.9 2:3.1 3:5.4 4:2.1
141 | 3 1:6.7 2:3.1 3:5.6 4:2.4
142 | 3 1:6.9 2:3.1 3:5.1 4:2.3
143 | 3 1:5.8 2:2.7 3:5.1 4:1.9
144 | 3 1:6.8 2:3.2 3:5.9 4:2.3
145 | 3 1:6.7 2:3.3 3:5.7 4:2.5
146 | 3 1:6.7 2:3.0 3:5.2 4:2.3
147 | 3 1:6.3 2:2.5 3:5.0 4:1.9
148 | 3 1:6.5 2:3.0 3:5.2 4:2.0
149 | 3 1:6.2 2:3.4 3:5.4 4:2.3
150 | 3 1:5.9 2:3.0 3:5.1 4:1.8
151 |
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/scala/com/demo/chapter06/MyALSApp.scala:
--------------------------------------------------------------------------------
1 | package com.demo.chapter06
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml.recommendation._
5 | import org.apache.flink.ml.common.ParameterMap
6 |
7 | object MyALSApp {
8 | def main(args: Array[String]): Unit = {
9 |
10 | val env = ExecutionEnvironment.getExecutionEnvironment
11 | val inputDS: DataSet[(Int, Int, Double)] = env.readCsvFile[(Int, Int, Double)]("books.csv")
12 |
13 | // Setup the ALS learner
14 | val als = ALS()
15 | .setIterations(10)
16 | .setNumFactors(10)
17 | .setBlocks(100)
18 | .setTemporaryPath("D:\\tmp")
19 |
20 | // Set the other parameters via a parameter map
21 | val parameters = ParameterMap()
22 | .add(ALS.Lambda, 0.9)
23 | .add(ALS.Seed, 42L)
24 |
25 | // Calculate the factorization
26 | als.fit(inputDS, parameters)
27 |
28 | // Read the testing data set from a csv file
29 | val testingDS: DataSet[(Int, Int)] = env.readCsvFile[(Int, Int)]("books-test.csv")
30 |
31 | // Calculate the ratings according to the matrix factorization
32 | val predictedRatings = als.predict(testingDS)
33 |
34 | predictedRatings.writeAsCsv("books-output")
35 |
36 | env.execute("Flink Recommendation App")
37 | }
38 | }
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/scala/com/demo/chapter06/MyMRLApp.scala:
--------------------------------------------------------------------------------
1 | package com.demo.chapter06
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml.regression.MultipleLinearRegression
5 | import org.apache.flink.ml.common.LabeledVector
6 | import org.apache.flink.ml.math.Vector
7 | import org.apache.flink.ml.math.{ SparseVector, DenseVector }
8 |
9 | object MyMRLApp {
10 |
11 | def main(args: Array[String]): Unit = {
12 | val env = ExecutionEnvironment.getExecutionEnvironment
13 |
14 | // Create multiple linear regression learner
15 | val mlr = MultipleLinearRegression()
16 | .setIterations(10)
17 | .setStepsize(0.5)
18 | .setConvergenceThreshold(0.001)
19 |
20 | // Obtain training and testing data set
21 | val trainingDS: DataSet[LabeledVector] = // input data
22 | // val testingDS: DataSet[Vector] = // output data
23 |
24 | // Fit the linear model to the provided data
25 | mlr.fit(trainingDS)
26 |
27 | // Calculate the predictions for the test data
28 | // val predictions = mlr.predict(testingDS)
29 | predictions.writeAsText("mlr-out")
30 |
31 | env.execute("Flink MLR App")
32 | }
33 | }
--------------------------------------------------------------------------------
/chapter06/ml-examples/src/main/scala/com/demo/chapter06/MySVMApp.scala:
--------------------------------------------------------------------------------
1 | package com.demo.chapter06
2 |
3 | import org.apache.flink.api.scala._
4 | import org.apache.flink.ml.math.Vector
5 | import org.apache.flink.ml.common.LabeledVector
6 | import org.apache.flink.ml.classification.SVM
7 | import org.apache.flink.ml.RichExecutionEnvironment
8 |
9 | object MySVMApp {
10 | def main(args: Array[String]) {
11 | // set up the execution environment
12 | val pathToTrainingFile: String = "iris-train.txt"
13 | val pathToTestingFile: String = "iris-train.txt"
14 | val env = ExecutionEnvironment.getExecutionEnvironment
15 |
16 | // Read the training data set, from a LibSVM formatted file
17 | val trainingDS: DataSet[LabeledVector] = env.readLibSVM(pathToTrainingFile)
18 |
19 | // Create the SVM learner
20 | val svm = SVM()
21 | .setBlocks(10)
22 |
23 | // Learn the SVM model
24 | svm.fit(trainingDS)
25 |
26 | // Read the testing data set
27 | val testingDS: DataSet[Vector] = env.readLibSVM(pathToTestingFile).map(_.vector)
28 |
29 | // Calculate the predictions for the testing data set
30 | val predictionDS: DataSet[(Vector, Double)] = svm.predict(testingDS)
31 | predictionDS.writeAsText("out")
32 |
33 | env.execute("Flink Scala API Skeleton")
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/chapter07/flink-gelly/pom.xml:
--------------------------------------------------------------------------------
1 |
11 |
13 | 4.0.0
14 |
15 | com.demo
16 | flink-gelly
17 | 0.0.1-SNAPSHOT
18 | jar
19 |
20 | Flink Quickstart Job
21 | http://www.myorganization.org
22 |
23 |
24 | UTF-8
25 | 1.1.4
26 | 1.7.7
27 | 1.2.17
28 |
29 |
30 |
31 |
32 | apache.snapshots
33 | Apache Development Snapshot Repository
34 | https://repository.apache.org/content/repositories/snapshots/
35 |
36 | false
37 |
38 |
39 | true
40 |
41 |
42 |
43 |
44 |
55 |
56 |
57 |
58 |
59 | org.apache.flink
60 | flink-java
61 | ${flink.version}
62 |
63 |
64 | org.apache.flink
65 | flink-streaming-java_2.11
66 | ${flink.version}
67 |
68 |
69 | org.apache.flink
70 | flink-clients_2.11
71 | ${flink.version}
72 |
73 |
74 |
75 | org.apache.flink
76 | flink-gelly_2.11
77 | ${flink.version}
78 |
79 |
81 |
82 | org.slf4j
83 | slf4j-log4j12
84 | ${slf4j.version}
85 |
86 |
87 | log4j
88 | log4j
89 | ${log4j.version}
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 | build-jar
98 |
99 |
100 | false
101 |
102 |
103 |
104 |
105 | org.apache.flink
106 | flink-java
107 | ${flink.version}
108 | provided
109 |
110 |
111 | org.apache.flink
112 | flink-streaming-java_2.11
113 | ${flink.version}
114 | provided
115 |
116 |
117 | org.apache.flink
118 | flink-clients_2.11
119 | ${flink.version}
120 | provided
121 |
122 |
123 | org.slf4j
124 | slf4j-log4j12
125 | ${slf4j.version}
126 | provided
127 |
128 |
129 | log4j
130 | log4j
131 | ${log4j.version}
132 | provided
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | org.apache.maven.plugins
141 | maven-shade-plugin
142 | 2.4.1
143 |
144 |
145 | package
146 |
147 | shade
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
168 |
169 | org.apache.maven.plugins
170 | maven-shade-plugin
171 | 2.4.1
172 |
173 |
174 |
175 | package
176 |
177 | shade
178 |
179 |
180 |
181 |
182 |
184 | org.apache.flink:flink-annotations
185 | org.apache.flink:flink-shaded-hadoop2
186 | org.apache.flink:flink-shaded-curator-recipes
187 | org.apache.flink:flink-core
188 | org.apache.flink:flink-java
189 | org.apache.flink:flink-scala_2.11
190 | org.apache.flink:flink-runtime_2.11
191 | org.apache.flink:flink-optimizer_2.11
192 | org.apache.flink:flink-clients_2.11
193 | org.apache.flink:flink-avro_2.11
194 | org.apache.flink:flink-examples-batch_2.11
195 | org.apache.flink:flink-examples-streaming_2.11
196 | org.apache.flink:flink-streaming-java_2.11
197 | org.apache.flink:flink-streaming-scala_2.11
198 | org.apache.flink:flink-scala-shell_2.11
199 | org.apache.flink:flink-python
200 | org.apache.flink:flink-metrics-core
201 | org.apache.flink:flink-metrics-jmx
202 | org.apache.flink:flink-statebackend-rocksdb_2.11
203 |
204 |
207 |
208 | log4j:log4j
209 | org.scala-lang:scala-library
210 | org.scala-lang:scala-compiler
211 | org.scala-lang:scala-reflect
212 | com.data-artisans:flakka-actor_*
213 | com.data-artisans:flakka-remote_*
214 | com.data-artisans:flakka-slf4j_*
215 | io.netty:netty-all
216 | io.netty:netty
217 | commons-fileupload:commons-fileupload
218 | org.apache.avro:avro
219 | commons-collections:commons-collections
220 | org.codehaus.jackson:jackson-core-asl
221 | org.codehaus.jackson:jackson-mapper-asl
222 | com.thoughtworks.paranamer:paranamer
223 | org.xerial.snappy:snappy-java
224 | org.apache.commons:commons-compress
225 | org.tukaani:xz
226 | com.esotericsoftware.kryo:kryo
227 | com.esotericsoftware.minlog:minlog
228 | org.objenesis:objenesis
229 | com.twitter:chill_*
230 | com.twitter:chill-java
231 | commons-lang:commons-lang
232 | junit:junit
233 | org.apache.commons:commons-lang3
234 | org.slf4j:slf4j-api
235 | org.slf4j:slf4j-log4j12
236 | log4j:log4j
237 | org.apache.commons:commons-math
238 | org.apache.sling:org.apache.sling.commons.json
239 | commons-logging:commons-logging
240 | commons-codec:commons-codec
241 | com.fasterxml.jackson.core:jackson-core
242 | com.fasterxml.jackson.core:jackson-databind
243 | com.fasterxml.jackson.core:jackson-annotations
244 | stax:stax-api
245 | com.typesafe:config
246 | org.uncommons.maths:uncommons-maths
247 | com.github.scopt:scopt_*
248 | commons-io:commons-io
249 | commons-cli:commons-cli
250 |
251 |
252 |
253 |
254 | org.apache.flink:*
255 |
256 |
257 | org/apache/flink/shaded/com/**
258 | web-docs/**
259 |
260 |
261 |
262 |
264 | *:*
265 |
266 | META-INF/*.SF
267 | META-INF/*.DSA
268 | META-INF/*.RSA
269 |
270 |
271 |
272 |
274 |
276 | false
277 |
278 |
279 |
280 |
281 |
282 |
283 | org.apache.maven.plugins
284 | maven-compiler-plugin
285 | 3.1
286 |
287 | 1.7
288 | 1.7
289 |
290 |
291 |
292 |
293 |
294 |
296 |
310 |
311 |
312 |
313 |
--------------------------------------------------------------------------------
/chapter07/flink-gelly/src/main/java/com/demo/flink/gelly/BatchJob.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.gelly;
2 |
3 | import org.apache.flink.api.java.DataSet;
4 |
5 | import org.apache.flink.api.java.ExecutionEnvironment;
6 | import org.apache.flink.api.java.tuple.Tuple2;
7 | import org.apache.flink.api.java.tuple.Tuple3;
8 | import org.apache.flink.graph.Edge;
9 | import org.apache.flink.graph.Graph;
10 | import org.apache.flink.graph.Vertex;
11 | import org.apache.flink.graph.pregel.ComputeFunction;
12 | import org.apache.flink.graph.pregel.MessageCombiner;
13 | import org.apache.flink.graph.pregel.MessageIterator;
14 |
15 | public class BatchJob {
16 |
17 | final static String srcId = "s15";
18 |
19 | public static void main(String[] args) throws Exception {
20 | // set up the batch execution environment
21 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
22 |
23 | // Create graph by reading from CSV files
24 | DataSet> airportVertices = env
25 | .readCsvFile("D://work//Mastering Flink//Chapter 7//data//nodes.csv").types(String.class, Double.class);
26 |
27 | DataSet> airportEdges = env
28 | .readCsvFile("D://work//Mastering Flink//Chapter 7//data//edges.csv")
29 | .types(String.class, String.class, Double.class);
30 |
31 | Graph graph = Graph.fromTupleDataSet(airportVertices, airportEdges, env);
32 |
33 | // Find out no. of airports and routes
34 | System.out.println("No. of Routes in Graph:" + graph.numberOfEdges());
35 | System.out.println("No. of Airports in Graph:" + graph.numberOfVertices());
36 |
37 | // define the maximum number of iterations
38 | int maxIterations = 10;
39 |
40 | // Execute the vertex-centric iteration
41 | Graph result = graph.runVertexCentricIteration(new SSSPComputeFunction(),
42 | new SSSPCombiner(), maxIterations);
43 |
44 | // Extract the vertices as the result
45 | DataSet> singleSourceShortestPaths = result.getVertices();
46 |
47 | singleSourceShortestPaths.print();
48 |
49 |
50 |
51 | }
52 |
53 | final static class SSSPComputeFunction extends ComputeFunction {
54 |
55 | @Override
56 | public void compute(Vertex vertex, MessageIterator messages) throws Exception {
57 | double minDistance = (vertex.getId().equals(srcId)) ? 0d : Double.POSITIVE_INFINITY;
58 | for (Double msg : messages) {
59 | minDistance = Math.min(minDistance, msg);
60 | }
61 |
62 | if (minDistance < vertex.getValue()) {
63 | setNewVertexValue(minDistance);
64 | for (Edge e : getEdges()) {
65 | sendMessageTo(e.getTarget(), minDistance + e.getValue());
66 | }
67 | }
68 |
69 | }
70 |
71 | }
72 |
73 | final static class SSSPCombiner extends MessageCombiner {
74 |
75 | public void combineMessages(MessageIterator messages) {
76 |
77 | double minMessage = Double.POSITIVE_INFINITY;
78 | for (Double msg : messages) {
79 | minMessage = Math.min(minMessage, msg);
80 | }
81 | sendCombinedMessage(minMessage);
82 | }
83 |
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/chapter07/flink-gelly/src/main/resources/data/edges.csv:
--------------------------------------------------------------------------------
1 | s01,s02,10
2 | s01,s02,12
3 | s01,s03,22
4 | s01,s04,21
5 | s04,s11,22
6 | s05,s15,21
7 | s06,s17,21
8 | s08,s09,11
9 | s08,s09,12
10 | s03,s04,22
11 | s04,s03,23
12 | s01,s15,20
13 | s15,s01,11
14 | s15,s01,11
15 | s16,s17,21
16 | s16,s06,23
17 | s06,s16,21
18 | s09,s10,21
19 | s08,s07,21
20 | s07,s08,22
21 | s07,s10,21
22 | s05,s02,21
23 | s02,s03,21
24 | s02,s01,23
25 | s03,s01,21
26 | s12,s13,22
27 | s12,s14,22
28 | s14,s13,21
29 | s13,s12,21
30 | s05,s09,2
31 | s02,s10,5
32 | s03,s12,1
33 | s04,s06,1
34 | s10,s03,2
35 | s03,s10,2
36 | s04,s12,3
37 | s13,s17,1
38 | s06,s06,1
39 | s14,s11,1
40 | s03,s11,1
41 | s12,s06,2
42 | s04,s17,2
43 | s17,s04,4
44 | s08,s03,2
45 | s03,s08,4
46 | s07,s14,4
47 | s15,s06,4
48 | s15,s04,1
49 | s05,s01,1
50 | s02,s09,1
51 | s03,s05,1
52 | s07,s03,1
53 |
--------------------------------------------------------------------------------
/chapter07/flink-gelly/src/main/resources/data/nodes.csv:
--------------------------------------------------------------------------------
1 | s01,1
2 | s02,2
3 | s03,3
4 | s04,4
5 | s05,5
6 | s06,6
7 | s07,7
8 | s08,8
9 | s09,9
10 | s10,10
11 | s11,11
12 | s12,12
13 | s13,13
14 | s14,14
15 | s15,15
16 | s16,16
17 | s17,17
18 |
--------------------------------------------------------------------------------
/chapter07/flink-gelly/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/chapter08/readme.txt:
--------------------------------------------------------------------------------
1 | This chapter does not contain any coding examples.
--------------------------------------------------------------------------------
/chapter09/readme.txt:
--------------------------------------------------------------------------------
1 | This chapter does not contain any coding examples.
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/pom.xml:
--------------------------------------------------------------------------------
1 |
11 |
13 | 4.0.0
14 |
15 | com.demo
16 | flink-batch-adv
17 | 1.0
18 | jar
19 |
20 | Flink Quickstart Job
21 | http://www.myorganization.org
22 |
23 |
24 | UTF-8
25 | 1.1.3
26 | 1.7.7
27 | 1.2.17
28 |
29 |
30 |
31 |
32 | apache.snapshots
33 | Apache Development Snapshot Repository
34 | https://repository.apache.org/content/repositories/snapshots/
35 |
36 | false
37 |
38 |
39 | true
40 |
41 |
42 |
43 |
44 |
55 |
56 |
57 |
58 |
59 | org.apache.flink
60 | flink-java
61 | ${flink.version}
62 |
63 |
64 | org.apache.flink
65 | flink-streaming-java_2.11
66 | ${flink.version}
67 |
68 |
69 | org.apache.flink
70 | flink-clients_2.11
71 | ${flink.version}
72 |
73 |
74 |
75 | com.esotericsoftware
76 | kryo
77 | 4.0.0
78 |
79 |
80 |
81 |
83 |
84 | org.slf4j
85 | slf4j-log4j12
86 | ${slf4j.version}
87 |
88 |
89 | log4j
90 | log4j
91 | ${log4j.version}
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 | build-jar
100 |
101 |
102 | false
103 |
104 |
105 |
106 |
107 | org.apache.flink
108 | flink-java
109 | ${flink.version}
110 | provided
111 |
112 |
113 | org.apache.flink
114 | flink-streaming-java_2.11
115 | ${flink.version}
116 | provided
117 |
118 |
119 | org.apache.flink
120 | flink-clients_2.11
121 | ${flink.version}
122 | provided
123 |
124 |
125 | org.slf4j
126 | slf4j-log4j12
127 | ${slf4j.version}
128 | provided
129 |
130 |
131 | log4j
132 | log4j
133 | ${log4j.version}
134 | provided
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 | org.apache.maven.plugins
143 | maven-shade-plugin
144 | 2.4.1
145 |
146 |
147 | package
148 |
149 | shade
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
170 |
171 | org.apache.maven.plugins
172 | maven-shade-plugin
173 | 2.4.1
174 |
175 |
176 |
177 | package
178 |
179 | shade
180 |
181 |
182 |
183 |
184 |
186 | org.apache.flink:flink-annotations
187 | org.apache.flink:flink-shaded-hadoop2
188 | org.apache.flink:flink-shaded-curator-recipes
189 | org.apache.flink:flink-core
190 | org.apache.flink:flink-java
191 | org.apache.flink:flink-scala_2.11
192 | org.apache.flink:flink-runtime_2.11
193 | org.apache.flink:flink-optimizer_2.11
194 | org.apache.flink:flink-clients_2.11
195 | org.apache.flink:flink-avro_2.11
196 | org.apache.flink:flink-examples-batch_2.11
197 | org.apache.flink:flink-examples-streaming_2.11
198 | org.apache.flink:flink-streaming-java_2.11
199 | org.apache.flink:flink-streaming-scala_2.11
200 | org.apache.flink:flink-scala-shell_2.11
201 | org.apache.flink:flink-python
202 | org.apache.flink:flink-metrics-core
203 | org.apache.flink:flink-metrics-jmx
204 | org.apache.flink:flink-statebackend-rocksdb_2.11
205 |
206 |
209 |
210 | log4j:log4j
211 | org.scala-lang:scala-library
212 | org.scala-lang:scala-compiler
213 | org.scala-lang:scala-reflect
214 | com.data-artisans:flakka-actor_*
215 | com.data-artisans:flakka-remote_*
216 | com.data-artisans:flakka-slf4j_*
217 | io.netty:netty-all
218 | io.netty:netty
219 | commons-fileupload:commons-fileupload
220 | org.apache.avro:avro
221 | commons-collections:commons-collections
222 | org.codehaus.jackson:jackson-core-asl
223 | org.codehaus.jackson:jackson-mapper-asl
224 | com.thoughtworks.paranamer:paranamer
225 | org.xerial.snappy:snappy-java
226 | org.apache.commons:commons-compress
227 | org.tukaani:xz
228 | com.esotericsoftware.kryo:kryo
229 | com.esotericsoftware.minlog:minlog
230 | org.objenesis:objenesis
231 | com.twitter:chill_*
232 | com.twitter:chill-java
233 | commons-lang:commons-lang
234 | junit:junit
235 | org.apache.commons:commons-lang3
236 | org.slf4j:slf4j-api
237 | org.slf4j:slf4j-log4j12
238 | log4j:log4j
239 | org.apache.commons:commons-math
240 | org.apache.sling:org.apache.sling.commons.json
241 | commons-logging:commons-logging
242 | commons-codec:commons-codec
243 | com.fasterxml.jackson.core:jackson-core
244 | com.fasterxml.jackson.core:jackson-databind
245 | com.fasterxml.jackson.core:jackson-annotations
246 | stax:stax-api
247 | com.typesafe:config
248 | org.uncommons.maths:uncommons-maths
249 | com.github.scopt:scopt_*
250 | commons-io:commons-io
251 | commons-cli:commons-cli
252 |
253 |
254 |
255 |
256 | org.apache.flink:*
257 |
258 |
259 | org/apache/flink/shaded/com/**
260 | web-docs/**
261 |
262 |
263 |
264 |
266 | *:*
267 |
268 | META-INF/*.SF
269 | META-INF/*.DSA
270 | META-INF/*.RSA
271 |
272 |
273 |
274 |
276 |
278 | false
279 |
280 |
281 |
282 |
283 |
284 |
285 | org.apache.maven.plugins
286 | maven-compiler-plugin
287 | 3.1
288 |
289 | 1.7
290 | 1.7
291 |
292 |
293 |
294 |
295 |
296 |
298 |
312 |
313 |
314 |
315 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/IterativePiExample.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | import org.apache.flink.api.common.functions.MapFunction;
4 | import org.apache.flink.api.java.DataSet;
5 | import org.apache.flink.api.java.ExecutionEnvironment;
6 |
7 | import org.apache.flink.api.java.operators.IterativeDataSet;
8 | /**
9 | * Iterative Pi example, makes use of iteration data set to compute Pi.
10 | * @author TDeshpande
11 | *
12 | */
13 | public class IterativePiExample {
14 |
15 | public static void main(String[] args) throws Exception {
16 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
17 |
18 | // Create initial IterativeDataSet
19 | IterativeDataSet initial = env.fromElements(0).iterate(10000);
20 |
21 | DataSet iteration = initial.map(new MapFunction() {
22 | @Override
23 | public Integer map(Integer i) throws Exception {
24 | double x = Math.random();
25 | double y = Math.random();
26 |
27 | return i + ((x * x + y * y < 1) ? 1 : 0);
28 | }
29 | });
30 |
31 | // Iteratively transform the IterativeDataSet
32 | DataSet count = initial.closeWith(iteration);
33 |
34 | count.map(new MapFunction() {
35 | @Override
36 | public Double map(Integer count) throws Exception {
37 | return count / (double) 10000 * 4;
38 | }
39 | }).print();
40 |
41 |
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/OlympicsAthletesBatchJob.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | import org.apache.flink.api.java.DataSet;
4 | import org.apache.flink.api.java.ExecutionEnvironment;
5 |
6 | import java.util.List;
7 |
8 | import org.apache.flink.api.common.functions.FlatMapFunction;
9 |
10 | import org.apache.flink.api.common.functions.RichMapFunction;
11 | import org.apache.flink.api.java.tuple.Tuple2;
12 | import org.apache.flink.configuration.Configuration;
13 | import org.apache.flink.util.Collector;
14 |
15 | /**
16 | * Implements the Oylympics Athletes program that gives insights about games
17 | * played and medals won.
18 | *
19 | * Sample input file is provided in src/main/resources/data folder
20 | *
21 | * This example shows how to:
22 | *
23 | * - write a simple Flink batch program.
24 | *
- use Tuple data types.
25 | *
- write and use user-defined functions.
26 | *
27 | *
28 | */
29 | public class OlympicsAthletesBatchJob {
30 |
31 | public static void main(String[] args) throws Exception {
32 |
33 | // set up the execution environment
34 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
35 | env.getConfig().registerTypeWithKryoSerializer(Record.class, RecordSerializer.class);
36 |
37 | DataSet csvInput = env
38 | .readCsvFile("D://NOTBACKEDUP//dataflow//flink-batch//src//main//resources//data//olympic-athletes.csv")
39 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total");
40 |
41 | DataSet> groupedByCountry = csvInput
42 | .flatMap(new FlatMapFunction>() {
43 |
44 | private static final long serialVersionUID = 1L;
45 |
46 | @Override
47 | public void flatMap(Record record, Collector> out) throws Exception {
48 |
49 | out.collect(new Tuple2(record.getCountry(), 1));
50 | }
51 | }).groupBy(0).sum(1);
52 | groupedByCountry.print();
53 |
54 | DataSet> groupedByGame = csvInput
55 | .flatMap(new FlatMapFunction>() {
56 |
57 | private static final long serialVersionUID = 1L;
58 |
59 | @Override
60 | public void flatMap(Record record, Collector> out) throws Exception {
61 |
62 | out.collect(new Tuple2(record.getGame(), 1));
63 | }
64 | }).groupBy(0).sum(1);
65 | groupedByGame.print();
66 |
67 | // Get a data set to be broadcasted
68 | DataSet toBroadcast = env.fromElements(1, 2, 3);
69 | DataSet data = env.fromElements("India", "USA", "UK").map(new RichMapFunction() {
70 | private List toBroadcast;
71 |
72 | // We have to use open method to get broadcast set from the context
73 | @Override
74 | public void open(Configuration parameters) throws Exception {
75 | // Get the broadcast set, available as collection
76 | this.toBroadcast = getRuntimeContext().getBroadcastVariable("country");
77 | }
78 |
79 | @Override
80 | public String map(String input) throws Exception {
81 |
82 | int sum = 0;
83 | for (int a : toBroadcast) {
84 | sum = a + sum;
85 | }
86 | return input.toUpperCase() + sum;
87 | }
88 | }).withBroadcastSet(toBroadcast, "country"); // Broadcast the set with
89 | // name
90 | data.print();
91 |
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/Record.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | public class Record {
4 |
5 | private String playerName;
6 | private String country;
7 | private int year;
8 | private String game;
9 | private int gold;
10 | private int silver;
11 | private int bronze;
12 | private int total;
13 |
14 | public String getPlayerName() {
15 | return playerName;
16 | }
17 |
18 | public void setPlayerName(String playerName) {
19 | this.playerName = playerName;
20 | }
21 |
22 | public String getCountry() {
23 | return country;
24 | }
25 |
26 | public void setCountry(String country) {
27 | this.country = country;
28 | }
29 |
30 | public int getYear() {
31 | return year;
32 | }
33 |
34 | public void setYear(int year) {
35 | this.year = year;
36 | }
37 |
38 | public String getGame() {
39 | return game;
40 | }
41 |
42 | public void setGame(String game) {
43 | this.game = game;
44 | }
45 |
46 | public int getGold() {
47 | return gold;
48 | }
49 |
50 | public void setGold(int gold) {
51 | this.gold = gold;
52 | }
53 |
54 | public int getSilver() {
55 | return silver;
56 | }
57 |
58 | public void setSilver(int silver) {
59 | this.silver = silver;
60 | }
61 |
62 | public int getBronze() {
63 | return bronze;
64 | }
65 |
66 | public void setBronze(int bronze) {
67 | this.bronze = bronze;
68 | }
69 |
70 | public int getTotal() {
71 | return total;
72 | }
73 |
74 | public void setTotal(int total) {
75 | this.total = total;
76 | }
77 |
78 | public Record() {
79 |
80 | }
81 |
82 | public Record(String playerName, String country, int year, String game, int gold, int silver, int bronze,
83 | int total) {
84 | super();
85 | this.playerName = playerName;
86 | this.country = country;
87 | this.year = year;
88 | this.game = game;
89 | this.gold = gold;
90 | this.silver = silver;
91 | this.bronze = bronze;
92 | this.total = total;
93 | }
94 |
95 | @Override
96 | public String toString() {
97 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game
98 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]";
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/RecordSerializer.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | import com.esotericsoftware.kryo.Kryo;
4 | import com.esotericsoftware.kryo.Serializer;
5 | import com.esotericsoftware.kryo.io.Input;
6 | import com.esotericsoftware.kryo.io.Output;
7 |
8 | public class RecordSerializer extends Serializer {
9 |
10 | @Override
11 | public Record read(Kryo kryo, Input input, Class type) {
12 |
13 | return new Record(input.readString(), input.readString(), input.read(), input.readString(), input.read(),
14 | input.read(), input.read(), input.read());
15 | }
16 |
17 | @Override
18 | public void write(Kryo kryo, Output output, Record object) {
19 | output.writeString(object.getPlayerName());
20 | output.writeString(object.getCountry());
21 | output.writeInt(object.getYear());
22 | output.writeString(object.getGame());
23 | output.writeInt(object.getGold());
24 | output.writeInt(object.getSilver());
25 | output.writeInt(object.getBronze());
26 | output.writeInt(object.getTotal());
27 |
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/RecordTuple.java:
--------------------------------------------------------------------------------
1 | package com.demo.flink.batch;
2 |
3 | import org.apache.flink.api.java.tuple.Tuple8;
4 |
5 | public class RecordTuple extends Tuple8 {
6 |
7 | private static final long serialVersionUID = 1L;
8 |
9 | public RecordTuple() {
10 | super();
11 | }
12 |
13 | public RecordTuple(String value0, String value1, Integer value2, String value3, Integer value4, Integer value5,
14 | Integer value6, Integer value7) {
15 | super(value0, value1, value2, value3, value4, value5, value6, value7);
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/resources/data/olympic-athletes.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter10/flink-batch-adv/src/main/resources/data/olympic-athletes.csv
--------------------------------------------------------------------------------
/chapter10/flink-batch-adv/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger=INFO, console
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 |
--------------------------------------------------------------------------------
/images/Tanmay_Books.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/images/Tanmay_Books.png
--------------------------------------------------------------------------------
/images/flink.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/images/flink.JPG
--------------------------------------------------------------------------------