├── README.md ├── chapter01 └── readme.txt ├── chapter02 └── flink-streaming │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── demo │ │ └── flink │ │ └── streaming │ │ ├── CustomWatermarkEmitter.java │ │ ├── Splitter.java │ │ └── StreamingJob.java │ └── resources │ └── log4j.properties ├── chapter03 └── flink-batch │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── demo │ │ └── flink │ │ └── batch │ │ ├── IterativePiExample.java │ │ ├── OlympicsAthletesBatchJob.java │ │ └── Record.java │ └── resources │ ├── data │ └── olympic-athletes.csv │ └── log4j.properties ├── chapter04 └── flink-table │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── demo │ │ └── flink │ │ └── table │ │ ├── BatchJob.java │ │ └── Record.java │ └── resources │ ├── data │ └── olympic-athletes.csv │ └── log4j.properties ├── chapter05 ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── demo │ │ └── chapter05 │ │ ├── Alert.java │ │ ├── App.java │ │ ├── EventDeserializationSchema.java │ │ ├── KafkaApp.java │ │ ├── MonitoringEvent.java │ │ └── TemperatureEvent.java │ └── test │ └── java │ └── com │ └── demo │ └── chapter05 │ └── AppTest.java ├── chapter06 ├── flink-ml │ ├── pom.xml │ └── src │ │ └── main │ │ ├── resources │ │ ├── data │ │ │ ├── iris-test.txt │ │ │ └── iris-train.txt │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── demo │ │ └── flink │ │ └── ml │ │ ├── Job.scala │ │ ├── MLRJob.scala │ │ └── MLRJobPipelines.scala └── ml-examples │ ├── pom.xml │ └── src │ └── main │ ├── resources │ ├── data │ │ ├── books-test.csv │ │ ├── books.csv │ │ ├── iris-test.txt │ │ └── iris-train.txt │ └── log4j.properties │ └── scala │ └── com │ └── demo │ └── chapter06 │ ├── MyALSApp.scala │ ├── MyMRLApp.scala │ └── MySVMApp.scala ├── chapter07 └── flink-gelly │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── demo │ │ └── flink │ │ └── gelly │ │ └── BatchJob.java │ └── resources │ ├── data │ ├── edges.csv │ └── nodes.csv │ └── log4j.properties ├── chapter08 └── readme.txt ├── chapter09 └── readme.txt ├── chapter10 └── flink-batch-adv │ ├── pom.xml │ └── src │ └── main │ ├── java │ └── com │ │ └── demo │ │ └── flink │ │ └── batch │ │ ├── IterativePiExample.java │ │ ├── OlympicsAthletesBatchJob.java │ │ ├── Record.java │ │ ├── RecordSerializer.java │ │ └── RecordTuple.java │ └── resources │ ├── data │ └── olympic-athletes.csv │ └── log4j.properties └── images ├── Tanmay_Books.png └── flink.JPG /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/README.md -------------------------------------------------------------------------------- /chapter01/readme.txt: -------------------------------------------------------------------------------- 1 | This chapter does not contain any coding examples. -------------------------------------------------------------------------------- /chapter02/flink-streaming/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 21 | 4.0.0 22 | 23 | com.demo 24 | flink-streaming 25 | 1.0 26 | jar 27 | 28 | Flink Quickstart Job 29 | http://www.myorganization.org 30 | 31 | 32 | UTF-8 33 | 1.1.4 34 | 1.7.7 35 | 1.2.17 36 | 37 | 38 | 39 | 40 | apache.snapshots 41 | Apache Development Snapshot Repository 42 | https://repository.apache.org/content/repositories/snapshots/ 43 | 44 | false 45 | 46 | 47 | true 48 | 49 | 50 | 51 | 52 | 75 | 76 | 77 | 78 | 79 | org.apache.flink 80 | flink-java 81 | ${flink.version} 82 | 83 | 84 | org.apache.flink 85 | flink-streaming-java_2.11 86 | ${flink.version} 87 | 88 | 89 | org.apache.flink 90 | flink-clients_2.11 91 | ${flink.version} 92 | 93 | 94 | 96 | 97 | org.slf4j 98 | slf4j-log4j12 99 | ${slf4j.version} 100 | 101 | 102 | log4j 103 | log4j 104 | ${log4j.version} 105 | 106 | 107 | 108 | org.apache.flink 109 | flink-connector-kafka-0.9_2.11 110 | ${flink.version} 111 | 112 | 113 | 114 | 115 | 116 | 117 | build-jar 118 | 119 | 120 | false 121 | 122 | 123 | 124 | 125 | org.apache.flink 126 | flink-java 127 | ${flink.version} 128 | provided 129 | 130 | 131 | org.apache.flink 132 | flink-streaming-java_2.11 133 | ${flink.version} 134 | provided 135 | 136 | 137 | org.apache.flink 138 | flink-clients_2.11 139 | ${flink.version} 140 | provided 141 | 142 | 143 | org.slf4j 144 | slf4j-log4j12 145 | ${slf4j.version} 146 | provided 147 | 148 | 149 | log4j 150 | log4j 151 | ${log4j.version} 152 | provided 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | org.apache.maven.plugins 161 | maven-shade-plugin 162 | 2.4.1 163 | 164 | 165 | package 166 | 167 | shade 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 187 | 188 | org.apache.maven.plugins 189 | maven-shade-plugin 190 | 2.4.1 191 | 192 | 193 | 194 | package 195 | 196 | shade 197 | 198 | 199 | 200 | 201 | 204 | org.apache.flink:flink-annotations 205 | org.apache.flink:flink-shaded-hadoop2 206 | org.apache.flink:flink-shaded-curator-recipes 207 | org.apache.flink:flink-core 208 | org.apache.flink:flink-java 209 | org.apache.flink:flink-scala_2.11 210 | org.apache.flink:flink-runtime_2.11 211 | org.apache.flink:flink-optimizer_2.11 212 | org.apache.flink:flink-clients_2.11 213 | org.apache.flink:flink-avro_2.11 214 | org.apache.flink:flink-examples-batch_2.11 215 | org.apache.flink:flink-examples-streaming_2.11 216 | org.apache.flink:flink-streaming-java_2.11 217 | org.apache.flink:flink-streaming-scala_2.11 218 | org.apache.flink:flink-scala-shell_2.11 219 | org.apache.flink:flink-python 220 | org.apache.flink:flink-metrics-core 221 | org.apache.flink:flink-metrics-jmx 222 | org.apache.flink:flink-statebackend-rocksdb_2.11 223 | 224 | 230 | 231 | log4j:log4j 232 | org.scala-lang:scala-library 233 | org.scala-lang:scala-compiler 234 | org.scala-lang:scala-reflect 235 | com.data-artisans:flakka-actor_* 236 | com.data-artisans:flakka-remote_* 237 | com.data-artisans:flakka-slf4j_* 238 | io.netty:netty-all 239 | io.netty:netty 240 | commons-fileupload:commons-fileupload 241 | org.apache.avro:avro 242 | commons-collections:commons-collections 243 | org.codehaus.jackson:jackson-core-asl 244 | org.codehaus.jackson:jackson-mapper-asl 245 | com.thoughtworks.paranamer:paranamer 246 | org.xerial.snappy:snappy-java 247 | org.apache.commons:commons-compress 248 | org.tukaani:xz 249 | com.esotericsoftware.kryo:kryo 250 | com.esotericsoftware.minlog:minlog 251 | org.objenesis:objenesis 252 | com.twitter:chill_* 253 | com.twitter:chill-java 254 | commons-lang:commons-lang 255 | junit:junit 256 | org.apache.commons:commons-lang3 257 | org.slf4j:slf4j-api 258 | org.slf4j:slf4j-log4j12 259 | log4j:log4j 260 | org.apache.commons:commons-math 261 | org.apache.sling:org.apache.sling.commons.json 262 | commons-logging:commons-logging 263 | commons-codec:commons-codec 264 | com.fasterxml.jackson.core:jackson-core 265 | com.fasterxml.jackson.core:jackson-databind 266 | com.fasterxml.jackson.core:jackson-annotations 267 | stax:stax-api 268 | com.typesafe:config 269 | org.uncommons.maths:uncommons-maths 270 | com.github.scopt:scopt_* 271 | commons-io:commons-io 272 | commons-cli:commons-cli 273 | 274 | 275 | 276 | 277 | org.apache.flink:* 278 | 279 | 280 | org/apache/flink/shaded/com/** 281 | web-docs/** 282 | 283 | 284 | 285 | 287 | *:* 288 | 289 | META-INF/*.SF 290 | META-INF/*.DSA 291 | META-INF/*.RSA 292 | 293 | 294 | 295 | 297 | 304 | false 305 | 306 | 307 | 308 | 309 | 310 | 311 | org.apache.maven.plugins 312 | maven-compiler-plugin 313 | 3.1 314 | 315 | 1.7 316 | 1.7 317 | 318 | 319 | 320 | 321 | 322 | 323 | 383 | 384 | 385 | 386 | -------------------------------------------------------------------------------- /chapter02/flink-streaming/src/main/java/com/demo/flink/streaming/CustomWatermarkEmitter.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.streaming; 2 | 3 | import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; 4 | import org.apache.flink.streaming.api.watermark.Watermark; 5 | 6 | /** 7 | * Custom Watermark Emitter. 8 | * @author TDeshpande 9 | * 10 | */ 11 | public class CustomWatermarkEmitter implements AssignerWithPunctuatedWatermarks { 12 | 13 | 14 | private static final long serialVersionUID = 1L; 15 | 16 | @Override 17 | public long extractTimestamp(String arg0, long arg1) { 18 | if (null != arg0 && arg0.contains(",")) { 19 | String parts[] = arg0.split(","); 20 | return Long.parseLong(parts[0]); 21 | } 22 | 23 | return 0; 24 | } 25 | 26 | @Override 27 | public Watermark checkAndGetNextWatermark(String arg0, long arg1) { 28 | if (null != arg0 && arg0.contains(",")) { 29 | String parts[] = arg0.split(","); 30 | return new Watermark(Long.parseLong(parts[0])); 31 | } 32 | return null; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /chapter02/flink-streaming/src/main/java/com/demo/flink/streaming/Splitter.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.streaming; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.util.Collector; 6 | 7 | public class Splitter implements FlatMapFunction> { 8 | 9 | /** 10 | * 11 | */ 12 | private static final long serialVersionUID = 1L; 13 | 14 | @Override 15 | public void flatMap(String value, Collector> out) throws Exception { 16 | 17 | if (null != value && value.contains(",")) { 18 | String parts[] = value.split(","); 19 | out.collect(new Tuple2(parts[2], Double.parseDouble(parts[1]))); 20 | } 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /chapter02/flink-streaming/src/main/java/com/demo/flink/streaming/StreamingJob.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.streaming; 2 | 3 | import java.util.Properties; 4 | 5 | 6 | import org.apache.flink.api.java.tuple.Tuple; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 14 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; 15 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema; 16 | import org.apache.flink.util.Collector; 17 | 18 | public class StreamingJob { 19 | 20 | public static void main(String[] args) throws Exception { 21 | // set up the streaming execution environment 22 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | // env.enableCheckpointing(5000); 24 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 25 | 26 | Properties properties = new Properties(); 27 | properties.setProperty("bootstrap.servers", "localhost:9092"); 28 | 29 | properties.setProperty("zookeeper.connect", "localhost:2181"); 30 | properties.setProperty("group.id", "test"); 31 | 32 | FlinkKafkaConsumer09 myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(), 33 | properties); 34 | myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter()); 35 | 36 | 37 | DataStream> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0) 38 | .timeWindow(Time.seconds(300)) 39 | .apply(new WindowFunction, Tuple2, Tuple, TimeWindow>() { 40 | 41 | @Override 42 | public void apply(Tuple key, TimeWindow window, Iterable> input, 43 | Collector> out) throws Exception { 44 | double sum = 0L; 45 | int count = 0; 46 | for (Tuple2 record : input) { 47 | sum += record.f1; 48 | count++; 49 | } 50 | 51 | Tuple2 result = input.iterator().next(); 52 | result.f1 = (sum/count); 53 | out.collect(result); 54 | 55 | } 56 | }); 57 | 58 | keyedStream.print(); 59 | 60 | // execute program 61 | env.execute("Flink Streaming Java API Skeleton"); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /chapter02/flink-streaming/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter03/flink-batch/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 21 | 4.0.0 22 | 23 | com.demo 24 | flink-batch 25 | 1.0 26 | jar 27 | 28 | Flink Quickstart Job 29 | http://www.myorganization.org 30 | 31 | 32 | UTF-8 33 | 1.1.3 34 | 1.7.7 35 | 1.2.17 36 | 37 | 38 | 39 | 40 | apache.snapshots 41 | Apache Development Snapshot Repository 42 | https://repository.apache.org/content/repositories/snapshots/ 43 | 44 | false 45 | 46 | 47 | true 48 | 49 | 50 | 51 | 52 | 75 | 76 | 77 | 78 | 79 | org.apache.flink 80 | flink-java 81 | ${flink.version} 82 | 83 | 84 | org.apache.flink 85 | flink-streaming-java_2.11 86 | ${flink.version} 87 | 88 | 89 | org.apache.flink 90 | flink-clients_2.11 91 | ${flink.version} 92 | 93 | 94 | 96 | 97 | org.slf4j 98 | slf4j-log4j12 99 | ${slf4j.version} 100 | 101 | 102 | log4j 103 | log4j 104 | ${log4j.version} 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | build-jar 113 | 114 | 115 | false 116 | 117 | 118 | 119 | 120 | org.apache.flink 121 | flink-java 122 | ${flink.version} 123 | provided 124 | 125 | 126 | org.apache.flink 127 | flink-streaming-java_2.11 128 | ${flink.version} 129 | provided 130 | 131 | 132 | org.apache.flink 133 | flink-clients_2.11 134 | ${flink.version} 135 | provided 136 | 137 | 138 | org.slf4j 139 | slf4j-log4j12 140 | ${slf4j.version} 141 | provided 142 | 143 | 144 | log4j 145 | log4j 146 | ${log4j.version} 147 | provided 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | org.apache.maven.plugins 156 | maven-shade-plugin 157 | 2.4.1 158 | 159 | 160 | package 161 | 162 | shade 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 182 | 183 | org.apache.maven.plugins 184 | maven-shade-plugin 185 | 2.4.1 186 | 187 | 188 | 189 | package 190 | 191 | shade 192 | 193 | 194 | 195 | 196 | 199 | org.apache.flink:flink-annotations 200 | org.apache.flink:flink-shaded-hadoop2 201 | org.apache.flink:flink-shaded-curator-recipes 202 | org.apache.flink:flink-core 203 | org.apache.flink:flink-java 204 | org.apache.flink:flink-scala_2.11 205 | org.apache.flink:flink-runtime_2.11 206 | org.apache.flink:flink-optimizer_2.11 207 | org.apache.flink:flink-clients_2.11 208 | org.apache.flink:flink-avro_2.11 209 | org.apache.flink:flink-examples-batch_2.11 210 | org.apache.flink:flink-examples-streaming_2.11 211 | org.apache.flink:flink-streaming-java_2.11 212 | org.apache.flink:flink-streaming-scala_2.11 213 | org.apache.flink:flink-scala-shell_2.11 214 | org.apache.flink:flink-python 215 | org.apache.flink:flink-metrics-core 216 | org.apache.flink:flink-metrics-jmx 217 | org.apache.flink:flink-statebackend-rocksdb_2.11 218 | 219 | 225 | 226 | log4j:log4j 227 | org.scala-lang:scala-library 228 | org.scala-lang:scala-compiler 229 | org.scala-lang:scala-reflect 230 | com.data-artisans:flakka-actor_* 231 | com.data-artisans:flakka-remote_* 232 | com.data-artisans:flakka-slf4j_* 233 | io.netty:netty-all 234 | io.netty:netty 235 | commons-fileupload:commons-fileupload 236 | org.apache.avro:avro 237 | commons-collections:commons-collections 238 | org.codehaus.jackson:jackson-core-asl 239 | org.codehaus.jackson:jackson-mapper-asl 240 | com.thoughtworks.paranamer:paranamer 241 | org.xerial.snappy:snappy-java 242 | org.apache.commons:commons-compress 243 | org.tukaani:xz 244 | com.esotericsoftware.kryo:kryo 245 | com.esotericsoftware.minlog:minlog 246 | org.objenesis:objenesis 247 | com.twitter:chill_* 248 | com.twitter:chill-java 249 | commons-lang:commons-lang 250 | junit:junit 251 | org.apache.commons:commons-lang3 252 | org.slf4j:slf4j-api 253 | org.slf4j:slf4j-log4j12 254 | log4j:log4j 255 | org.apache.commons:commons-math 256 | org.apache.sling:org.apache.sling.commons.json 257 | commons-logging:commons-logging 258 | commons-codec:commons-codec 259 | com.fasterxml.jackson.core:jackson-core 260 | com.fasterxml.jackson.core:jackson-databind 261 | com.fasterxml.jackson.core:jackson-annotations 262 | stax:stax-api 263 | com.typesafe:config 264 | org.uncommons.maths:uncommons-maths 265 | com.github.scopt:scopt_* 266 | commons-io:commons-io 267 | commons-cli:commons-cli 268 | 269 | 270 | 271 | 272 | org.apache.flink:* 273 | 274 | 275 | org/apache/flink/shaded/com/** 276 | web-docs/** 277 | 278 | 279 | 280 | 282 | *:* 283 | 284 | META-INF/*.SF 285 | META-INF/*.DSA 286 | META-INF/*.RSA 287 | 288 | 289 | 290 | 292 | 299 | false 300 | 301 | 302 | 303 | 304 | 305 | 306 | org.apache.maven.plugins 307 | maven-compiler-plugin 308 | 3.1 309 | 310 | 1.7 311 | 1.7 312 | 313 | 314 | 315 | 316 | 317 | 318 | 378 | 379 | 380 | 381 | -------------------------------------------------------------------------------- /chapter03/flink-batch/src/main/java/com/demo/flink/batch/IterativePiExample.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | 7 | import org.apache.flink.api.java.operators.IterativeDataSet; 8 | /** 9 | * Iterative Pi example, makes use of iteration data set to compute Pi. 10 | * @author TDeshpande 11 | * 12 | */ 13 | public class IterativePiExample { 14 | 15 | public static void main(String[] args) throws Exception { 16 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | // Create initial IterativeDataSet 19 | IterativeDataSet initial = env.fromElements(0).iterate(10000); 20 | 21 | DataSet iteration = initial.map(new MapFunction() { 22 | @Override 23 | public Integer map(Integer i) throws Exception { 24 | double x = Math.random(); 25 | double y = Math.random(); 26 | 27 | return i + ((x * x + y * y < 1) ? 1 : 0); 28 | } 29 | }); 30 | 31 | // Iteratively transform the IterativeDataSet 32 | DataSet count = initial.closeWith(iteration); 33 | 34 | count.map(new MapFunction() { 35 | @Override 36 | public Double map(Integer count) throws Exception { 37 | return count / (double) 10000 * 4; 38 | } 39 | }).print(); 40 | 41 | 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /chapter03/flink-batch/src/main/java/com/demo/flink/batch/OlympicsAthletesBatchJob.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.common.functions.FlatMapFunction; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * Implements the Oylympics Athletes program that gives insights about games played and medals won. 13 | * 14 | * Sample input file is provided in src/main/resources/data folder 15 | *

16 | * This example shows how to: 17 | *

    18 | *
  • write a simple Flink batch program. 19 | *
  • use Tuple data types. 20 | *
  • write and use user-defined functions. 21 | *
22 | * 23 | */ 24 | public class OlympicsAthletesBatchJob { 25 | 26 | 27 | public static void main(String[] args) throws Exception { 28 | 29 | // set up the execution environment 30 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 31 | 32 | DataSet csvInput = env.readCsvFile("olympic-athletes.csv") 33 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total"); 34 | 35 | DataSet> groupedByCountry = csvInput 36 | .flatMap(new FlatMapFunction>() { 37 | 38 | private static final long serialVersionUID = 1L; 39 | 40 | @Override 41 | public void flatMap(Record record, Collector> out) throws Exception { 42 | 43 | out.collect(new Tuple2(record.getCountry(), 1)); 44 | } 45 | }).groupBy(0).sum(1); 46 | groupedByCountry.print(); 47 | 48 | DataSet> groupedByGame = csvInput 49 | .flatMap(new FlatMapFunction>() { 50 | 51 | private static final long serialVersionUID = 1L; 52 | 53 | @Override 54 | public void flatMap(Record record, Collector> out) throws Exception { 55 | 56 | out.collect(new Tuple2(record.getGame(), 1)); 57 | } 58 | }).groupBy(0).sum(1); 59 | groupedByGame.print(); 60 | 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /chapter03/flink-batch/src/main/java/com/demo/flink/batch/Record.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | public class Record { 4 | 5 | private String playerName; 6 | private String country; 7 | private int year; 8 | private String game; 9 | private int gold; 10 | private int silver; 11 | private int bronze; 12 | private int total; 13 | 14 | public String getPlayerName() { 15 | return playerName; 16 | } 17 | 18 | public void setPlayerName(String playerName) { 19 | this.playerName = playerName; 20 | } 21 | 22 | public String getCountry() { 23 | return country; 24 | } 25 | 26 | public void setCountry(String country) { 27 | this.country = country; 28 | } 29 | 30 | public int getYear() { 31 | return year; 32 | } 33 | 34 | public void setYear(int year) { 35 | this.year = year; 36 | } 37 | 38 | public String getGame() { 39 | return game; 40 | } 41 | 42 | public void setGame(String game) { 43 | this.game = game; 44 | } 45 | 46 | public int getGold() { 47 | return gold; 48 | } 49 | 50 | public void setGold(int gold) { 51 | this.gold = gold; 52 | } 53 | 54 | public int getSilver() { 55 | return silver; 56 | } 57 | 58 | public void setSilver(int silver) { 59 | this.silver = silver; 60 | } 61 | 62 | public int getBronze() { 63 | return bronze; 64 | } 65 | 66 | public void setBronze(int bronze) { 67 | this.bronze = bronze; 68 | } 69 | 70 | public int getTotal() { 71 | return total; 72 | } 73 | 74 | public void setTotal(int total) { 75 | this.total = total; 76 | } 77 | 78 | @Override 79 | public String toString() { 80 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game 81 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]"; 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /chapter03/flink-batch/src/main/resources/data/olympic-athletes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter03/flink-batch/src/main/resources/data/olympic-athletes.csv -------------------------------------------------------------------------------- /chapter03/flink-batch/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter04/flink-table/pom.xml: -------------------------------------------------------------------------------- 1 | 11 | 13 | 4.0.0 14 | 15 | com.demo 16 | flink-table 17 | 1.0 18 | jar 19 | 20 | Flink Quickstart Job 21 | http://www.myorganization.org 22 | 23 | 24 | UTF-8 25 | 1.1.4 26 | 1.7.7 27 | 1.2.17 28 | 29 | 30 | 31 | 32 | apache.snapshots 33 | Apache Development Snapshot Repository 34 | https://repository.apache.org/content/repositories/snapshots/ 35 | 36 | false 37 | 38 | 39 | true 40 | 41 | 42 | 43 | 44 | 55 | 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-java 61 | ${flink.version} 62 | 63 | 64 | org.apache.flink 65 | flink-streaming-java_2.11 66 | ${flink.version} 67 | 68 | 69 | org.apache.flink 70 | flink-clients_2.11 71 | ${flink.version} 72 | 73 | 74 | 75 | org.apache.flink 76 | flink-table_2.11 77 | ${flink.version} 78 | 79 | 80 | 81 | 83 | 84 | org.slf4j 85 | slf4j-log4j12 86 | ${slf4j.version} 87 | 88 | 89 | log4j 90 | log4j 91 | ${log4j.version} 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | build-jar 100 | 101 | 102 | false 103 | 104 | 105 | 106 | 107 | org.apache.flink 108 | flink-java 109 | ${flink.version} 110 | provided 111 | 112 | 113 | org.apache.flink 114 | flink-streaming-java_2.11 115 | ${flink.version} 116 | provided 117 | 118 | 119 | org.apache.flink 120 | flink-clients_2.11 121 | ${flink.version} 122 | provided 123 | 124 | 125 | org.slf4j 126 | slf4j-log4j12 127 | ${slf4j.version} 128 | provided 129 | 130 | 131 | log4j 132 | log4j 133 | ${log4j.version} 134 | provided 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | org.apache.maven.plugins 143 | maven-shade-plugin 144 | 2.4.1 145 | 146 | 147 | package 148 | 149 | shade 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 170 | 171 | org.apache.maven.plugins 172 | maven-shade-plugin 173 | 2.4.1 174 | 175 | 176 | 177 | package 178 | 179 | shade 180 | 181 | 182 | 183 | 184 | 186 | org.apache.flink:flink-annotations 187 | org.apache.flink:flink-shaded-hadoop2 188 | org.apache.flink:flink-shaded-curator-recipes 189 | org.apache.flink:flink-core 190 | org.apache.flink:flink-java 191 | org.apache.flink:flink-scala_2.11 192 | org.apache.flink:flink-runtime_2.11 193 | org.apache.flink:flink-optimizer_2.11 194 | org.apache.flink:flink-clients_2.11 195 | org.apache.flink:flink-avro_2.11 196 | org.apache.flink:flink-examples-batch_2.11 197 | org.apache.flink:flink-examples-streaming_2.11 198 | org.apache.flink:flink-streaming-java_2.11 199 | org.apache.flink:flink-streaming-scala_2.11 200 | org.apache.flink:flink-scala-shell_2.11 201 | org.apache.flink:flink-python 202 | org.apache.flink:flink-metrics-core 203 | org.apache.flink:flink-metrics-jmx 204 | org.apache.flink:flink-statebackend-rocksdb_2.11 205 | 206 | 209 | 210 | log4j:log4j 211 | org.scala-lang:scala-library 212 | org.scala-lang:scala-compiler 213 | org.scala-lang:scala-reflect 214 | com.data-artisans:flakka-actor_* 215 | com.data-artisans:flakka-remote_* 216 | com.data-artisans:flakka-slf4j_* 217 | io.netty:netty-all 218 | io.netty:netty 219 | commons-fileupload:commons-fileupload 220 | org.apache.avro:avro 221 | commons-collections:commons-collections 222 | org.codehaus.jackson:jackson-core-asl 223 | org.codehaus.jackson:jackson-mapper-asl 224 | com.thoughtworks.paranamer:paranamer 225 | org.xerial.snappy:snappy-java 226 | org.apache.commons:commons-compress 227 | org.tukaani:xz 228 | com.esotericsoftware.kryo:kryo 229 | com.esotericsoftware.minlog:minlog 230 | org.objenesis:objenesis 231 | com.twitter:chill_* 232 | com.twitter:chill-java 233 | commons-lang:commons-lang 234 | junit:junit 235 | org.apache.commons:commons-lang3 236 | org.slf4j:slf4j-api 237 | org.slf4j:slf4j-log4j12 238 | log4j:log4j 239 | org.apache.commons:commons-math 240 | org.apache.sling:org.apache.sling.commons.json 241 | commons-logging:commons-logging 242 | commons-codec:commons-codec 243 | com.fasterxml.jackson.core:jackson-core 244 | com.fasterxml.jackson.core:jackson-databind 245 | com.fasterxml.jackson.core:jackson-annotations 246 | stax:stax-api 247 | com.typesafe:config 248 | org.uncommons.maths:uncommons-maths 249 | com.github.scopt:scopt_* 250 | commons-io:commons-io 251 | commons-cli:commons-cli 252 | 253 | 254 | 255 | 256 | org.apache.flink:* 257 | 258 | 259 | org/apache/flink/shaded/com/** 260 | web-docs/** 261 | 262 | 263 | 264 | 266 | *:* 267 | 268 | META-INF/*.SF 269 | META-INF/*.DSA 270 | META-INF/*.RSA 271 | 272 | 273 | 274 | 276 | 278 | false 279 | 280 | 281 | 282 | 283 | 284 | 285 | org.apache.maven.plugins 286 | maven-compiler-plugin 287 | 3.1 288 | 289 | 1.7 290 | 1.7 291 | 292 | 293 | 294 | 295 | 296 | 298 | 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /chapter04/flink-table/src/main/java/com/demo/flink/table/BatchJob.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.table; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | import org.apache.flink.api.java.table.BatchTableEnvironment; 6 | import org.apache.flink.api.table.Table; 7 | import org.apache.flink.api.table.TableEnvironment; 8 | 9 | public class BatchJob { 10 | 11 | public static void main(String[] args) throws Exception { 12 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 13 | BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); 14 | 15 | DataSet csvInput = env 16 | .readCsvFile("D://NOTBACKEDUP//dataflow//flink-table//src//main//resources//data//olympic-athletes.csv") 17 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total"); 18 | // register the DataSet athletes as table "athletes" with fields derived 19 | // from the dataset 20 | Table atheltes = tableEnv.fromDataSet(csvInput); 21 | tableEnv.registerTable("athletes", atheltes); 22 | // run a SQL query on the Table and retrieve the result as a new Table 23 | Table groupedByCountry = tableEnv.sql("SELECT country, SUM(total) as frequency FROM athletes group by country"); 24 | 25 | DataSet result = tableEnv.toDataSet(groupedByCountry, Result.class); 26 | 27 | result.print(); 28 | 29 | Table groupedByGame = atheltes.groupBy("game").select("game, total.sum as frequency"); 30 | 31 | DataSet gameResult = tableEnv.toDataSet(groupedByGame, GameResult.class); 32 | 33 | gameResult.print(); 34 | 35 | } 36 | 37 | public static class Result { 38 | public String country; 39 | public Integer frequency; 40 | 41 | public Result() { 42 | super(); 43 | } 44 | 45 | public Result(String country, Integer total) { 46 | this.country = country; 47 | this.frequency = total; 48 | } 49 | 50 | @Override 51 | public String toString() { 52 | return "Result " + country + " " + frequency; 53 | } 54 | } 55 | 56 | public static class GameResult { 57 | public String game; 58 | public Integer frequency; 59 | 60 | public GameResult(String game, Integer frequency) { 61 | super(); 62 | this.game = game; 63 | this.frequency = frequency; 64 | } 65 | 66 | public GameResult() { 67 | super(); 68 | } 69 | 70 | @Override 71 | public String toString() { 72 | return "GameResult [game=" + game + ", frequency=" + frequency + "]"; 73 | } 74 | 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /chapter04/flink-table/src/main/java/com/demo/flink/table/Record.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.table; 2 | 3 | public class Record { 4 | 5 | private String playerName; 6 | private String country; 7 | private int year; 8 | private String game; 9 | private int gold; 10 | private int silver; 11 | private int bronze; 12 | private int total; 13 | 14 | public String getPlayerName() { 15 | return playerName; 16 | } 17 | 18 | public void setPlayerName(String playerName) { 19 | this.playerName = playerName; 20 | } 21 | 22 | public String getCountry() { 23 | return country; 24 | } 25 | 26 | public void setCountry(String country) { 27 | this.country = country; 28 | } 29 | 30 | public int getYear() { 31 | return year; 32 | } 33 | 34 | public void setYear(int year) { 35 | this.year = year; 36 | } 37 | 38 | public String getGame() { 39 | return game; 40 | } 41 | 42 | public void setGame(String game) { 43 | this.game = game; 44 | } 45 | 46 | public int getGold() { 47 | return gold; 48 | } 49 | 50 | public void setGold(int gold) { 51 | this.gold = gold; 52 | } 53 | 54 | public int getSilver() { 55 | return silver; 56 | } 57 | 58 | public void setSilver(int silver) { 59 | this.silver = silver; 60 | } 61 | 62 | public int getBronze() { 63 | return bronze; 64 | } 65 | 66 | public void setBronze(int bronze) { 67 | this.bronze = bronze; 68 | } 69 | 70 | public int getTotal() { 71 | return total; 72 | } 73 | 74 | public void setTotal(int total) { 75 | this.total = total; 76 | } 77 | 78 | @Override 79 | public String toString() { 80 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game 81 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]"; 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /chapter04/flink-table/src/main/resources/data/olympic-athletes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter04/flink-table/src/main/resources/data/olympic-athletes.csv -------------------------------------------------------------------------------- /chapter04/flink-table/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter05/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.demo 6 | chapter05 7 | 1.0 8 | jar 9 | 10 | chapter05 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | junit 20 | junit 21 | 3.8.1 22 | test 23 | 24 | 25 | 26 | org.apache.flink 27 | flink-cep-scala_2.10 28 | 1.1.2 29 | 30 | 31 | 32 | org.apache.flink 33 | flink-streaming-java_2.10 34 | 1.1.2 35 | 36 | 37 | 38 | org.apache.flink 39 | flink-streaming-scala_2.10 40 | 1.1.2 41 | 42 | 43 | org.apache.flink 44 | flink-connector-kafka-0.9_2.10 45 | 1.0.0 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/Alert.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | public class Alert { 4 | 5 | private String message; 6 | 7 | public String getMessage() { 8 | return message; 9 | } 10 | 11 | public void setMessage(String message) { 12 | this.message = message; 13 | } 14 | 15 | public Alert(String message) { 16 | super(); 17 | this.message = message; 18 | } 19 | 20 | @Override 21 | public String toString() { 22 | return "Alert [message=" + message + "]"; 23 | } 24 | 25 | @Override 26 | public int hashCode() { 27 | final int prime = 31; 28 | int result = 1; 29 | result = prime * result + ((message == null) ? 0 : message.hashCode()); 30 | return result; 31 | } 32 | 33 | @Override 34 | public boolean equals(Object obj) { 35 | if (this == obj) 36 | return true; 37 | if (obj == null) 38 | return false; 39 | if (getClass() != obj.getClass()) 40 | return false; 41 | Alert other = (Alert) obj; 42 | if (message == null) { 43 | if (other.message != null) 44 | return false; 45 | } else if (!message.equals(other.message)) 46 | return false; 47 | return true; 48 | } 49 | 50 | 51 | } 52 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/App.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | import java.util.Map; 4 | 5 | import org.apache.flink.api.common.functions.FilterFunction; 6 | import org.apache.flink.cep.CEP; 7 | import org.apache.flink.cep.PatternSelectFunction; 8 | import org.apache.flink.cep.pattern.Pattern; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.time.Time; 12 | 13 | public class App { 14 | public static void main(String[] args) throws Exception { 15 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 16 | DataStream inputEventStream = env.fromElements(new TemperatureEvent("xyz", 22.0), 17 | new TemperatureEvent("xyz", 20.1), new TemperatureEvent("xyz", 21.1), new TemperatureEvent("xyz", 22.2), 18 | new TemperatureEvent("xyz", 29.1), new TemperatureEvent("xyz", 22.3), new TemperatureEvent("xyz", 22.1), 19 | new TemperatureEvent("xyz", 22.4), new TemperatureEvent("xyz", 22.7), 20 | new TemperatureEvent("xyz", 27.0)); 21 | 22 | Pattern warningPattern = Pattern. begin("first") 23 | .subtype(TemperatureEvent.class).where(new FilterFunction() { 24 | private static final long serialVersionUID = 1L; 25 | 26 | public boolean filter(TemperatureEvent value) { 27 | if (value.getTemperature() >= 26.0) { 28 | return true; 29 | } 30 | return false; 31 | } 32 | }).within(Time.seconds(10)); 33 | 34 | DataStream patternStream = CEP.pattern(inputEventStream, warningPattern) 35 | .select(new PatternSelectFunction() { 36 | private static final long serialVersionUID = 1L; 37 | 38 | public Alert select(Map event) throws Exception { 39 | 40 | return new Alert("Temperature Rise Detected"); 41 | } 42 | 43 | }); 44 | 45 | patternStream.print(); 46 | env.execute("CEP on Temperature Sensor"); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/EventDeserializationSchema.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | import java.io.IOException; 4 | import java.nio.charset.StandardCharsets; 5 | 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.typeutils.TypeExtractor; 8 | import org.apache.flink.streaming.util.serialization.DeserializationSchema; 9 | 10 | public class EventDeserializationSchema implements DeserializationSchema { 11 | 12 | public TypeInformation getProducedType() { 13 | return TypeExtractor.getForClass(TemperatureEvent.class); 14 | } 15 | 16 | public TemperatureEvent deserialize(byte[] arg0) throws IOException { 17 | String str = new String(arg0, StandardCharsets.UTF_8); 18 | 19 | String[] parts = str.split("="); 20 | return new TemperatureEvent(parts[0], Double.parseDouble(parts[1])); 21 | } 22 | 23 | public boolean isEndOfStream(TemperatureEvent arg0) { 24 | return false; 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/KafkaApp.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | import java.util.Map; 4 | import java.util.Properties; 5 | 6 | import org.apache.flink.api.common.functions.FilterFunction; 7 | import org.apache.flink.cep.CEP; 8 | import org.apache.flink.cep.PatternSelectFunction; 9 | import org.apache.flink.cep.pattern.Pattern; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; 14 | 15 | public class KafkaApp { 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | 19 | Properties properties = new Properties(); 20 | properties.setProperty("bootstrap.servers", "localhost:9092"); 21 | properties.setProperty("group.id", "test"); 22 | 23 | DataStream inputEventStream = env.addSource( 24 | new FlinkKafkaConsumer09("test", new EventDeserializationSchema(), properties)); 25 | 26 | Pattern warningPattern = Pattern. begin("first") 27 | .subtype(TemperatureEvent.class).where(new FilterFunction() { 28 | private static final long serialVersionUID = 1L; 29 | 30 | public boolean filter(TemperatureEvent value) { 31 | if (value.getTemperature() >= 26.0) { 32 | return true; 33 | } 34 | return false; 35 | } 36 | }).within(Time.seconds(10)); 37 | 38 | DataStream patternStream = CEP.pattern(inputEventStream, warningPattern) 39 | .select(new PatternSelectFunction() { 40 | private static final long serialVersionUID = 1L; 41 | 42 | public Alert select(Map event) throws Exception { 43 | 44 | return new Alert("Temperature Rise Detected:" + event.get("first").getTemperature() 45 | + " on machine name:" + event.get("first").getMachineName()); 46 | } 47 | 48 | }); 49 | 50 | patternStream.print(); 51 | env.execute("CEP on Temperature Sensor"); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/MonitoringEvent.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | public abstract class MonitoringEvent { 4 | 5 | private String machineName; 6 | 7 | public String getMachineName() { 8 | return machineName; 9 | } 10 | 11 | public void setMachineName(String machineName) { 12 | this.machineName = machineName; 13 | } 14 | 15 | @Override 16 | public int hashCode() { 17 | final int prime = 31; 18 | int result = 1; 19 | result = prime * result + ((machineName == null) ? 0 : machineName.hashCode()); 20 | return result; 21 | } 22 | 23 | @Override 24 | public boolean equals(Object obj) { 25 | if (this == obj) 26 | return true; 27 | if (obj == null) 28 | return false; 29 | if (getClass() != obj.getClass()) 30 | return false; 31 | MonitoringEvent other = (MonitoringEvent) obj; 32 | if (machineName == null) { 33 | if (other.machineName != null) 34 | return false; 35 | } else if (!machineName.equals(other.machineName)) 36 | return false; 37 | return true; 38 | } 39 | 40 | public MonitoringEvent(String machineName) { 41 | super(); 42 | this.machineName = machineName; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /chapter05/src/main/java/com/demo/chapter05/TemperatureEvent.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | public class TemperatureEvent extends MonitoringEvent { 4 | 5 | public TemperatureEvent(String machineName) { 6 | super(machineName); 7 | } 8 | 9 | private double temperature; 10 | 11 | public double getTemperature() { 12 | return temperature; 13 | } 14 | 15 | public void setTemperature(double temperature) { 16 | this.temperature = temperature; 17 | } 18 | 19 | @Override 20 | public int hashCode() { 21 | final int prime = 31; 22 | int result = super.hashCode(); 23 | long temp; 24 | temp = Double.doubleToLongBits(temperature); 25 | result = prime * result + (int) (temp ^ (temp >>> 32)); 26 | return result; 27 | } 28 | 29 | @Override 30 | public boolean equals(Object obj) { 31 | if (this == obj) 32 | return true; 33 | if (!super.equals(obj)) 34 | return false; 35 | if (getClass() != obj.getClass()) 36 | return false; 37 | TemperatureEvent other = (TemperatureEvent) obj; 38 | if (Double.doubleToLongBits(temperature) != Double.doubleToLongBits(other.temperature)) 39 | return false; 40 | return true; 41 | } 42 | 43 | public TemperatureEvent(String machineName, double temperature) { 44 | super(machineName); 45 | this.temperature = temperature; 46 | } 47 | 48 | @Override 49 | public String toString() { 50 | return "TemperatureEvent [getTemperature()=" + getTemperature() + ", getMachineName()=" + getMachineName() 51 | + "]"; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /chapter05/src/test/java/com/demo/chapter05/AppTest.java: -------------------------------------------------------------------------------- 1 | package com.demo.chapter05; 2 | 3 | import junit.framework.Test; 4 | import junit.framework.TestCase; 5 | import junit.framework.TestSuite; 6 | 7 | /** 8 | * Unit test for simple App. 9 | */ 10 | public class AppTest 11 | extends TestCase 12 | { 13 | /** 14 | * Create the test case 15 | * 16 | * @param testName name of the test case 17 | */ 18 | public AppTest( String testName ) 19 | { 20 | super( testName ); 21 | } 22 | 23 | /** 24 | * @return the suite of tests being tested 25 | */ 26 | public static Test suite() 27 | { 28 | return new TestSuite( AppTest.class ); 29 | } 30 | 31 | /** 32 | * Rigourous Test :-) 33 | */ 34 | public void testApp() 35 | { 36 | assertTrue( true ); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /chapter06/flink-ml/pom.xml: -------------------------------------------------------------------------------- 1 | 11 | 13 | 4.0.0 14 | 15 | com.demo 16 | flink-ml 17 | 1.0 18 | jar 19 | 20 | Flink Quickstart Job 21 | http://www.myorganization.org 22 | 23 | 24 | 25 | apache.snapshots 26 | Apache Development Snapshot Repository 27 | https://repository.apache.org/content/repositories/snapshots/ 28 | 29 | false 30 | 31 | 32 | true 33 | 34 | 35 | 36 | 37 | 38 | UTF-8 39 | 1.1.4 40 | 41 | 42 | 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-scala_2.11 58 | ${flink.version} 59 | 60 | 61 | org.apache.flink 62 | flink-streaming-scala_2.11 63 | ${flink.version} 64 | 65 | 66 | org.apache.flink 67 | flink-clients_2.11 68 | ${flink.version} 69 | 70 | 71 | org.apache.flink 72 | flink-ml_2.11 73 | ${flink.version} 74 | 75 | 76 | 77 | 81 | 82 | 83 | 87 | 88 | org.apache.maven.plugins 89 | maven-shade-plugin 90 | 2.4.1 91 | 92 | 93 | 94 | package 95 | 96 | shade 97 | 98 | 99 | 100 | 101 | 103 | org.apache.flink:flink-shaded-*_2.11 104 | org.apache.flink:flink-core_2.11 105 | org.apache.flink:flink-java_2.11 106 | org.apache.flink:flink-scala_2.11 107 | org.apache.flink:flink-runtime_2.11 108 | org.apache.flink:flink-optimizer_2.11 109 | org.apache.flink:flink-clients_2.11 110 | org.apache.flink:flink-avro_2.11 111 | org.apache.flink:flink-java-examples_2.11 112 | org.apache.flink:flink-scala-examples_2.11 113 | org.apache.flink:flink-streaming-examples_2.11 114 | org.apache.flink:flink-streaming-java_2.11 115 | 116 | 119 | 120 | org.scala-lang:scala-library 121 | org.scala-lang:scala-compiler 122 | org.scala-lang:scala-reflect 123 | com.amazonaws:aws-java-sdk 124 | com.typesafe.akka:akka-actor_* 125 | com.typesafe.akka:akka-remote_* 126 | com.typesafe.akka:akka-slf4j_* 127 | io.netty:netty-all 128 | io.netty:netty 129 | org.eclipse.jetty:jetty-server 130 | org.eclipse.jetty:jetty-continuation 131 | org.eclipse.jetty:jetty-http 132 | org.eclipse.jetty:jetty-io 133 | org.eclipse.jetty:jetty-util 134 | org.eclipse.jetty:jetty-security 135 | org.eclipse.jetty:jetty-servlet 136 | commons-fileupload:commons-fileupload 137 | org.apache.avro:avro 138 | commons-collections:commons-collections 139 | org.codehaus.jackson:jackson-core-asl 140 | org.codehaus.jackson:jackson-mapper-asl 141 | com.thoughtworks.paranamer:paranamer 142 | org.xerial.snappy:snappy-java 143 | org.apache.commons:commons-compress 144 | org.tukaani:xz 145 | com.esotericsoftware.kryo:kryo 146 | com.esotericsoftware.minlog:minlog 147 | org.objenesis:objenesis 148 | com.twitter:chill_* 149 | com.twitter:chill-java 150 | com.twitter:chill-avro_* 151 | com.twitter:chill-bijection_* 152 | com.twitter:bijection-core_* 153 | com.twitter:bijection-avro_* 154 | commons-lang:commons-lang 155 | junit:junit 156 | de.javakaffee:kryo-serializers 157 | joda-time:joda-time 158 | org.apache.commons:commons-lang3 159 | org.slf4j:slf4j-api 160 | org.slf4j:slf4j-log4j12 161 | log4j:log4j 162 | org.apache.commons:commons-math 163 | org.apache.sling:org.apache.sling.commons.json 164 | commons-logging:commons-logging 165 | org.apache.httpcomponents:httpclient 166 | org.apache.httpcomponents:httpcore 167 | commons-codec:commons-codec 168 | com.fasterxml.jackson.core:jackson-core 169 | com.fasterxml.jackson.core:jackson-databind 170 | com.fasterxml.jackson.core:jackson-annotations 171 | org.codehaus.jettison:jettison 172 | stax:stax-api 173 | com.typesafe:config 174 | org.uncommons.maths:uncommons-maths 175 | com.github.scopt:scopt_* 176 | org.mortbay.jetty:servlet-api 177 | commons-io:commons-io 178 | commons-cli:commons-cli 179 | 180 | 181 | 182 | 183 | org.apache.flink:* 184 | 185 | org/apache/flink/shaded/** 186 | web-docs/** 187 | 188 | 189 | 190 | 192 | *:* 193 | 194 | META-INF/*.SF 195 | META-INF/*.DSA 196 | META-INF/*.RSA 197 | 198 | 199 | 200 | 201 | 202 | 204 | com.demo.flink.ml.Job 205 | 206 | 207 | false 208 | 209 | 210 | 211 | 212 | 213 | 214 | org.apache.maven.plugins 215 | maven-compiler-plugin 216 | 3.1 217 | 218 | 1.7 219 | 1.7 220 | 221 | 222 | 223 | net.alchim31.maven 224 | scala-maven-plugin 225 | 3.1.4 226 | 227 | 228 | 229 | compile 230 | testCompile 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | org.apache.maven.plugins 239 | maven-eclipse-plugin 240 | 2.8 241 | 242 | true 243 | 244 | org.scala-ide.sdt.core.scalanature 245 | org.eclipse.jdt.core.javanature 246 | 247 | 248 | org.scala-ide.sdt.core.scalabuilder 249 | 250 | 251 | org.scala-ide.sdt.launching.SCALA_CONTAINER 252 | 253 | org.eclipse.jdt.launching.JRE_CONTAINER 254 | 255 | 256 | 257 | org.scala-lang:scala-library 258 | org.scala-lang:scala-compiler 259 | 260 | 261 | **/*.scala 262 | **/*.java 263 | 264 | 265 | 266 | 267 | 268 | 269 | org.codehaus.mojo 270 | build-helper-maven-plugin 271 | 1.7 272 | 273 | 274 | 275 | add-source 276 | generate-sources 277 | 278 | add-source 279 | 280 | 281 | 282 | src/main/scala 283 | 284 | 285 | 286 | 287 | 288 | add-test-source 289 | generate-test-sources 290 | 291 | add-test-source 292 | 293 | 294 | 295 | src/test/scala 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 307 | build-jar 308 | 309 | false 310 | 311 | 312 | 313 | org.apache.flink 314 | flink-scala_2.11 315 | ${flink.version} 316 | provided 317 | 318 | 319 | org.apache.flink 320 | flink-streaming-java_2.11 321 | ${flink.version} 322 | provided 323 | 324 | 325 | org.apache.flink 326 | flink-clients_2.11 327 | ${flink.version} 328 | provided 329 | 330 | 331 | 332 | 333 | 334 | -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/resources/data/iris-test.txt: -------------------------------------------------------------------------------- 1 | 1 1:5.1 2:3.5 3:1.4 4:0.2 2 | 1 1:4.9 2:3.0 3:1.4 4:0.2 3 | 1 1:4.7 2:3.2 3:1.3 4:0.2 4 | 1 1:4.6 2:3.1 3:1.5 4:0.2 5 | 1 1:5.0 2:3.6 3:1.4 4:0.2 6 | 1 1:5.4 2:3.9 3:1.7 4:0.4 7 | 1 1:4.6 2:3.4 3:1.4 4:0.3 8 | 1 1:5.0 2:3.4 3:1.5 4:0.2 9 | 1 1:4.4 2:2.9 3:1.4 4:0.2 10 | 1 1:4.9 2:3.1 3:1.5 4:0.1 11 | 1 1:5.4 2:3.7 3:1.5 4:0.2 12 | 1 1:4.8 2:3.4 3:1.6 4:0.2 13 | 1 1:4.8 2:3.0 3:1.4 4:0.1 14 | 1 1:4.3 2:3.0 3:1.1 4:0.1 15 | 1 1:5.8 2:4.0 3:1.2 4:0.2 16 | 1 1:5.7 2:4.4 3:1.5 4:0.4 -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/resources/data/iris-train.txt: -------------------------------------------------------------------------------- 1 | 1 1:5.1 2:3.5 3:1.4 4:0.2 2 | 1 1:4.9 2:3.0 3:1.4 4:0.2 3 | 1 1:4.7 2:3.2 3:1.3 4:0.2 4 | 1 1:4.6 2:3.1 3:1.5 4:0.2 5 | 1 1:5.0 2:3.6 3:1.4 4:0.2 6 | 1 1:5.4 2:3.9 3:1.7 4:0.4 7 | 1 1:4.6 2:3.4 3:1.4 4:0.3 8 | 1 1:5.0 2:3.4 3:1.5 4:0.2 9 | 1 1:4.4 2:2.9 3:1.4 4:0.2 10 | 1 1:4.9 2:3.1 3:1.5 4:0.1 11 | 1 1:5.4 2:3.7 3:1.5 4:0.2 12 | 1 1:4.8 2:3.4 3:1.6 4:0.2 13 | 1 1:4.8 2:3.0 3:1.4 4:0.1 14 | 1 1:4.3 2:3.0 3:1.1 4:0.1 15 | 1 1:5.8 2:4.0 3:1.2 4:0.2 16 | 1 1:5.7 2:4.4 3:1.5 4:0.4 17 | 1 1:5.4 2:3.9 3:1.3 4:0.4 18 | 1 1:5.1 2:3.5 3:1.4 4:0.3 19 | 1 1:5.7 2:3.8 3:1.7 4:0.3 20 | 1 1:5.1 2:3.8 3:1.5 4:0.3 21 | 1 1:5.4 2:3.4 3:1.7 4:0.2 22 | 1 1:5.1 2:3.7 3:1.5 4:0.4 23 | 1 1:4.6 2:3.6 3:1.0 4:0.2 24 | 1 1:5.1 2:3.3 3:1.7 4:0.5 25 | 1 1:4.8 2:3.4 3:1.9 4:0.2 26 | 1 1:5.0 2:3.0 3:1.6 4:0.2 27 | 1 1:5.0 2:3.4 3:1.6 4:0.4 28 | 1 1:5.2 2:3.5 3:1.5 4:0.2 29 | 1 1:5.2 2:3.4 3:1.4 4:0.2 30 | 1 1:4.7 2:3.2 3:1.6 4:0.2 31 | 1 1:4.8 2:3.1 3:1.6 4:0.2 32 | 1 1:5.4 2:3.4 3:1.5 4:0.4 33 | 1 1:5.2 2:4.1 3:1.5 4:0.1 34 | 1 1:5.5 2:4.2 3:1.4 4:0.2 35 | 1 1:4.9 2:3.1 3:1.5 4:0.1 36 | 1 1:5.0 2:3.2 3:1.2 4:0.2 37 | 1 1:5.5 2:3.5 3:1.3 4:0.2 38 | 1 1:4.9 2:3.1 3:1.5 4:0.1 39 | 1 1:4.4 2:3.0 3:1.3 4:0.2 40 | 1 1:5.1 2:3.4 3:1.5 4:0.2 41 | 1 1:5.0 2:3.5 3:1.3 4:0.3 42 | 1 1:4.5 2:2.3 3:1.3 4:0.3 43 | 1 1:4.4 2:3.2 3:1.3 4:0.2 44 | 1 1:5.0 2:3.5 3:1.6 4:0.6 45 | 1 1:5.1 2:3.8 3:1.9 4:0.4 46 | 1 1:4.8 2:3.0 3:1.4 4:0.3 47 | 1 1:5.1 2:3.8 3:1.6 4:0.2 48 | 1 1:4.6 2:3.2 3:1.4 4:0.2 49 | 1 1:5.3 2:3.7 3:1.5 4:0.2 50 | 1 1:5.0 2:3.3 3:1.4 4:0.2 51 | 2 1:7.0 2:3.2 3:4.7 4:1.4 52 | 2 1:6.4 2:3.2 3:4.5 4:1.5 53 | 2 1:6.9 2:3.1 3:4.9 4:1.5 54 | 2 1:5.5 2:2.3 3:4.0 4:1.3 55 | 2 1:6.5 2:2.8 3:4.6 4:1.5 56 | 2 1:5.7 2:2.8 3:4.5 4:1.3 57 | 2 1:6.3 2:3.3 3:4.7 4:1.6 58 | 2 1:4.9 2:2.4 3:3.3 4:1.0 59 | 2 1:6.6 2:2.9 3:4.6 4:1.3 60 | 2 1:5.2 2:2.7 3:3.9 4:1.4 61 | 2 1:5.0 2:2.0 3:3.5 4:1.0 62 | 2 1:5.9 2:3.0 3:4.2 4:1.5 63 | 2 1:6.0 2:2.2 3:4.0 4:1.0 64 | 2 1:6.1 2:2.9 3:4.7 4:1.4 65 | 2 1:5.6 2:2.9 3:3.6 4:1.3 66 | 2 1:6.7 2:3.1 3:4.4 4:1.4 67 | 2 1:5.6 2:3.0 3:4.5 4:1.5 68 | 2 1:5.8 2:2.7 3:4.1 4:1.0 69 | 2 1:6.2 2:2.2 3:4.5 4:1.5 70 | 2 1:5.6 2:2.5 3:3.9 4:1.1 71 | 2 1:5.9 2:3.2 3:4.8 4:1.8 72 | 2 1:6.1 2:2.8 3:4.0 4:1.3 73 | 2 1:6.3 2:2.5 3:4.9 4:1.5 74 | 2 1:6.1 2:2.8 3:4.7 4:1.2 75 | 2 1:6.4 2:2.9 3:4.3 4:1.3 76 | 2 1:6.6 2:3.0 3:4.4 4:1.4 77 | 2 1:6.8 2:2.8 3:4.8 4:1.4 78 | 2 1:6.7 2:3.0 3:5.0 4:1.7 79 | 2 1:6.0 2:2.9 3:4.5 4:1.5 80 | 2 1:5.7 2:2.6 3:3.5 4:1.0 81 | 2 1:5.5 2:2.4 3:3.8 4:1.1 82 | 2 1:5.5 2:2.4 3:3.7 4:1.0 83 | 2 1:5.8 2:2.7 3:3.9 4:1.2 84 | 2 1:6.0 2:2.7 3:5.1 4:1.6 85 | 2 1:5.4 2:3.0 3:4.5 4:1.5 86 | 2 1:6.0 2:3.4 3:4.5 4:1.6 87 | 2 1:6.7 2:3.1 3:4.7 4:1.5 88 | 2 1:6.3 2:2.3 3:4.4 4:1.3 89 | 2 1:5.6 2:3.0 3:4.1 4:1.3 90 | 2 1:5.5 2:2.5 3:4.0 4:1.3 91 | 2 1:5.5 2:2.6 3:4.4 4:1.2 92 | 2 1:6.1 2:3.0 3:4.6 4:1.4 93 | 2 1:5.8 2:2.6 3:4.0 4:1.2 94 | 2 1:5.0 2:2.3 3:3.3 4:1.0 95 | 2 1:5.6 2:2.7 3:4.2 4:1.3 96 | 2 1:5.7 2:3.0 3:4.2 4:1.2 97 | 2 1:5.7 2:2.9 3:4.2 4:1.3 98 | 2 1:6.2 2:2.9 3:4.3 4:1.3 99 | 2 1:5.1 2:2.5 3:3.0 4:1.1 100 | 2 1:5.7 2:2.8 3:4.1 4:1.3 101 | 3 1:6.3 2:3.3 3:6.0 4:2.5 102 | 3 1:5.8 2:2.7 3:5.1 4:1.9 103 | 3 1:7.1 2:3.0 3:5.9 4:2.1 104 | 3 1:6.3 2:2.9 3:5.6 4:1.8 105 | 3 1:6.5 2:3.0 3:5.8 4:2.2 106 | 3 1:7.6 2:3.0 3:6.6 4:2.1 107 | 3 1:4.9 2:2.5 3:4.5 4:1.7 108 | 3 1:7.3 2:2.9 3:6.3 4:1.8 109 | 3 1:6.7 2:2.5 3:5.8 4:1.8 110 | 3 1:7.2 2:3.6 3:6.1 4:2.5 111 | 3 1:6.5 2:3.2 3:5.1 4:2.0 112 | 3 1:6.4 2:2.7 3:5.3 4:1.9 113 | 3 1:6.8 2:3.0 3:5.5 4:2.1 114 | 3 1:5.7 2:2.5 3:5.0 4:2.0 115 | 3 1:5.8 2:2.8 3:5.1 4:2.4 116 | 3 1:6.4 2:3.2 3:5.3 4:2.3 117 | 3 1:6.5 2:3.0 3:5.5 4:1.8 118 | 3 1:7.7 2:3.8 3:6.7 4:2.2 119 | 3 1:7.7 2:2.6 3:6.9 4:2.3 120 | 3 1:6.0 2:2.2 3:5.0 4:1.5 121 | 3 1:6.9 2:3.2 3:5.7 4:2.3 122 | 3 1:5.6 2:2.8 3:4.9 4:2.0 123 | 3 1:7.7 2:2.8 3:6.7 4:2.0 124 | 3 1:6.3 2:2.7 3:4.9 4:1.8 125 | 3 1:6.7 2:3.3 3:5.7 4:2.1 126 | 3 1:7.2 2:3.2 3:6.0 4:1.8 127 | 3 1:6.2 2:2.8 3:4.8 4:1.8 128 | 3 1:6.1 2:3.0 3:4.9 4:1.8 129 | 3 1:6.4 2:2.8 3:5.6 4:2.1 130 | 3 1:7.2 2:3.0 3:5.8 4:1.6 131 | 3 1:7.4 2:2.8 3:6.1 4:1.9 132 | 3 1:7.9 2:3.8 3:6.4 4:2.0 133 | 3 1:6.4 2:2.8 3:5.6 4:2.2 134 | 3 1:6.3 2:2.8 3:5.1 4:1.5 135 | 3 1:6.1 2:2.6 3:5.6 4:1.4 136 | 3 1:7.7 2:3.0 3:6.1 4:2.3 137 | 3 1:6.3 2:3.4 3:5.6 4:2.4 138 | 3 1:6.4 2:3.1 3:5.5 4:1.8 139 | 3 1:6.0 2:3.0 3:4.8 4:1.8 140 | 3 1:6.9 2:3.1 3:5.4 4:2.1 141 | 3 1:6.7 2:3.1 3:5.6 4:2.4 142 | 3 1:6.9 2:3.1 3:5.1 4:2.3 143 | 3 1:5.8 2:2.7 3:5.1 4:1.9 144 | 3 1:6.8 2:3.2 3:5.9 4:2.3 145 | 3 1:6.7 2:3.3 3:5.7 4:2.5 146 | 3 1:6.7 2:3.0 3:5.2 4:2.3 147 | 3 1:6.3 2:2.5 3:5.0 4:1.9 148 | 3 1:6.5 2:3.0 3:5.2 4:2.0 149 | 3 1:6.2 2:3.4 3:5.4 4:2.3 150 | 3 1:5.9 2:3.0 3:5.1 4:1.8 151 | -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/scala/com/demo/flink/ml/Job.scala: -------------------------------------------------------------------------------- 1 | package com.demo.flink.ml 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml._ 5 | import org.apache.flink.ml.common.LabeledVector 6 | import org.apache.flink.ml.math.DenseVector 7 | import org.apache.flink.ml.math.Vector 8 | import org.apache.flink.ml.preprocessing.Splitter 9 | import org.apache.flink.ml.regression.MultipleLinearRegression 10 | 11 | object Job { 12 | def main(args: Array[String]) { 13 | // set up the execution environment 14 | val env = ExecutionEnvironment.getExecutionEnvironment 15 | 16 | val iriscsv = env.readCsvFile[(String, String, String, String, String)]("iris.csv") 17 | val irisLV = iriscsv 18 | .map { tuple => 19 | val list = tuple.productIterator.toList 20 | val numList = list.map(_.asInstanceOf[String].toDouble) 21 | LabeledVector(numList(4), DenseVector(numList.take(4).toArray)) 22 | } 23 | 24 | // irisLV.print 25 | // val trainTestData = Splitter.trainTestSplit(irisLV) 26 | val trainTestData = Splitter.trainTestSplit(irisLV, .6, true) 27 | val trainingData: DataSet[LabeledVector] = trainTestData.training 28 | 29 | val testingData: DataSet[Vector] = trainTestData.testing.map(lv => lv.vector) 30 | 31 | testingData.print() 32 | 33 | val mlr = MultipleLinearRegression() 34 | .setStepsize(1.0) 35 | .setIterations(5) 36 | .setConvergenceThreshold(0.001) 37 | 38 | mlr.fit(trainingData) 39 | 40 | // The fitted model can now be used to make predictions 41 | val predictions = mlr.predict(testingData) 42 | 43 | predictions.print() 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/scala/com/demo/flink/ml/MLRJob.scala: -------------------------------------------------------------------------------- 1 | package com.demo.flink.ml 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml._ 5 | import org.apache.flink.ml.common.LabeledVector 6 | import org.apache.flink.ml.math.DenseVector 7 | import org.apache.flink.ml.math.Vector 8 | import org.apache.flink.ml.preprocessing.Splitter 9 | import org.apache.flink.ml.regression.MultipleLinearRegression 10 | import org.apache.flink.ml.preprocessing.PolynomialFeatures 11 | 12 | object MLRJob { 13 | def main(args: Array[String]) { 14 | // set up the execution environment 15 | val env = ExecutionEnvironment.getExecutionEnvironment 16 | 17 | 18 | val trainingDataset = MLUtils.readLibSVM(env, "iris-train.txt") 19 | val testingDataset = MLUtils.readLibSVM(env, "iris-test.txt").map { lv => lv.vector } 20 | val mlr = MultipleLinearRegression() 21 | .setStepsize(1.0) 22 | .setIterations(5) 23 | .setConvergenceThreshold(0.001) 24 | 25 | mlr.fit(trainingDataset) 26 | 27 | // The fitted model can now be used to make predictions 28 | val predictions = mlr.predict(testingDataset) 29 | 30 | predictions.print() 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /chapter06/flink-ml/src/main/scala/com/demo/flink/ml/MLRJobPipelines.scala: -------------------------------------------------------------------------------- 1 | package com.demo.flink.ml 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml._ 5 | import org.apache.flink.ml.common.LabeledVector 6 | import org.apache.flink.ml.math.DenseVector 7 | import org.apache.flink.ml.math.Vector 8 | import org.apache.flink.ml.preprocessing.Splitter 9 | import org.apache.flink.ml.regression.MultipleLinearRegression 10 | import org.apache.flink.ml.preprocessing.PolynomialFeatures 11 | import org.apache.flink.ml.preprocessing.StandardScaler 12 | import org.apache.flink.ml.preprocessing.MinMaxScaler 13 | 14 | /** 15 | * This class shows how to solve classification problems using Flink ML 16 | * 17 | * Machine Learning Algorithm - Multiple Linear Regression 18 | * Data Pre-processing - Using Standard Scaler and Polynomial Feature 19 | */ 20 | object MLRJobPipelines { 21 | def main(args: Array[String]) { 22 | // set up the execution environment 23 | val env = ExecutionEnvironment.getExecutionEnvironment 24 | // Use polynomial feature with degree 3 25 | val polyFeatures = PolynomialFeatures() 26 | .setDegree(3) 27 | 28 | val scaler = StandardScaler() 29 | .setMean(10.0) 30 | .setStd(2.0) 31 | 32 | val minMaxscaler = MinMaxScaler() 33 | .setMin(1.0) 34 | .setMax(3.0) 35 | 36 | val trainingDataset = MLUtils.readLibSVM(env, "iris-train.txt") 37 | val testingDataset = MLUtils.readLibSVM(env, "iris-test.txt").map { lv => lv.vector } 38 | val mlr = MultipleLinearRegression() 39 | .setStepsize(1.0) 40 | .setIterations(5) 41 | .setConvergenceThreshold(0.001) 42 | 43 | // Learn the mean and standard deviation of the training data 44 | // scaler.fit(trainingDataset) 45 | minMaxscaler.fit(trainingDataset) 46 | 47 | // Scale the provided data set to have mean=10.0 and std=2.0 48 | //val scaledDS = scaler.transform(trainingDataset) 49 | 50 | val scaledDS = minMaxscaler.transform(trainingDataset) 51 | 52 | scaledDS.print() 53 | // Create pipeline PolynomialFeatures -> MultipleLinearRegression 54 | val pipeline = polyFeatures.chainPredictor(mlr) 55 | 56 | // train the model 57 | pipeline.fit(scaledDS) 58 | 59 | // The fitted model can now be used to make predictions 60 | val predictions = pipeline.predict(testingDataset) 61 | 62 | predictions.print() 63 | 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /chapter06/ml-examples/pom.xml: -------------------------------------------------------------------------------- 1 | 11 | 13 | 4.0.0 14 | 15 | com.demo 16 | chapter06 17 | 1.0 18 | jar 19 | 20 | Flink Quickstart Job 21 | http://www.myorganization.org 22 | 23 | 24 | 25 | apache.snapshots 26 | Apache Development Snapshot Repository 27 | https://repository.apache.org/content/repositories/snapshots/ 28 | 29 | false 30 | 31 | 32 | true 33 | 34 | 35 | 36 | 37 | 38 | UTF-8 39 | 0.10.2 40 | 41 | 42 | 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-scala_2.11 58 | ${flink.version} 59 | 60 | 61 | org.apache.flink 62 | flink-streaming-scala_2.11 63 | ${flink.version} 64 | 65 | 66 | org.apache.flink 67 | flink-clients_2.11 68 | ${flink.version} 69 | 70 | 71 | 72 | org.apache.flink 73 | flink-ml_2.11 74 | ${flink.version} 75 | 76 | 77 | 78 | 79 | 83 | 84 | 85 | 89 | 90 | org.apache.maven.plugins 91 | maven-shade-plugin 92 | 2.4.1 93 | 94 | 95 | 96 | package 97 | 98 | shade 99 | 100 | 101 | 102 | 103 | 105 | org.apache.flink:flink-shaded-*_2.11 106 | org.apache.flink:flink-core_2.11 107 | org.apache.flink:flink-java_2.11 108 | org.apache.flink:flink-scala_2.11 109 | org.apache.flink:flink-runtime_2.11 110 | org.apache.flink:flink-optimizer_2.11 111 | org.apache.flink:flink-clients_2.11 112 | org.apache.flink:flink-avro_2.11 113 | org.apache.flink:flink-java-examples_2.11 114 | org.apache.flink:flink-scala-examples_2.11 115 | org.apache.flink:flink-streaming-examples_2.11 116 | org.apache.flink:flink-streaming-java_2.11 117 | 118 | 121 | 122 | org.scala-lang:scala-library 123 | org.scala-lang:scala-compiler 124 | org.scala-lang:scala-reflect 125 | com.amazonaws:aws-java-sdk 126 | com.typesafe.akka:akka-actor_* 127 | com.typesafe.akka:akka-remote_* 128 | com.typesafe.akka:akka-slf4j_* 129 | io.netty:netty-all 130 | io.netty:netty 131 | org.eclipse.jetty:jetty-server 132 | org.eclipse.jetty:jetty-continuation 133 | org.eclipse.jetty:jetty-http 134 | org.eclipse.jetty:jetty-io 135 | org.eclipse.jetty:jetty-util 136 | org.eclipse.jetty:jetty-security 137 | org.eclipse.jetty:jetty-servlet 138 | commons-fileupload:commons-fileupload 139 | org.apache.avro:avro 140 | commons-collections:commons-collections 141 | org.codehaus.jackson:jackson-core-asl 142 | org.codehaus.jackson:jackson-mapper-asl 143 | com.thoughtworks.paranamer:paranamer 144 | org.xerial.snappy:snappy-java 145 | org.apache.commons:commons-compress 146 | org.tukaani:xz 147 | com.esotericsoftware.kryo:kryo 148 | com.esotericsoftware.minlog:minlog 149 | org.objenesis:objenesis 150 | com.twitter:chill_* 151 | com.twitter:chill-java 152 | com.twitter:chill-avro_* 153 | com.twitter:chill-bijection_* 154 | com.twitter:bijection-core_* 155 | com.twitter:bijection-avro_* 156 | commons-lang:commons-lang 157 | junit:junit 158 | de.javakaffee:kryo-serializers 159 | joda-time:joda-time 160 | org.apache.commons:commons-lang3 161 | org.slf4j:slf4j-api 162 | org.slf4j:slf4j-log4j12 163 | log4j:log4j 164 | org.apache.commons:commons-math 165 | org.apache.sling:org.apache.sling.commons.json 166 | commons-logging:commons-logging 167 | org.apache.httpcomponents:httpclient 168 | org.apache.httpcomponents:httpcore 169 | commons-codec:commons-codec 170 | com.fasterxml.jackson.core:jackson-core 171 | com.fasterxml.jackson.core:jackson-databind 172 | com.fasterxml.jackson.core:jackson-annotations 173 | org.codehaus.jettison:jettison 174 | stax:stax-api 175 | com.typesafe:config 176 | org.uncommons.maths:uncommons-maths 177 | com.github.scopt:scopt_* 178 | org.mortbay.jetty:servlet-api 179 | commons-io:commons-io 180 | commons-cli:commons-cli 181 | 182 | 183 | 184 | 185 | org.apache.flink:* 186 | 187 | org/apache/flink/shaded/** 188 | web-docs/** 189 | 190 | 191 | 192 | 194 | *:* 195 | 196 | META-INF/*.SF 197 | META-INF/*.DSA 198 | META-INF/*.RSA 199 | 200 | 201 | 202 | 203 | 204 | 206 | com.demo.chapter06.Job 207 | 208 | 209 | false 210 | 211 | 212 | 213 | 214 | 215 | 216 | org.apache.maven.plugins 217 | maven-compiler-plugin 218 | 3.1 219 | 220 | 1.7 221 | 1.7 222 | 223 | 224 | 225 | net.alchim31.maven 226 | scala-maven-plugin 227 | 3.1.4 228 | 229 | 230 | 231 | compile 232 | testCompile 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | org.apache.maven.plugins 241 | maven-eclipse-plugin 242 | 2.8 243 | 244 | true 245 | 246 | org.scala-ide.sdt.core.scalanature 247 | org.eclipse.jdt.core.javanature 248 | 249 | 250 | org.scala-ide.sdt.core.scalabuilder 251 | 252 | 253 | org.scala-ide.sdt.launching.SCALA_CONTAINER 254 | 255 | org.eclipse.jdt.launching.JRE_CONTAINER 256 | 257 | 258 | 259 | org.scala-lang:scala-library 260 | org.scala-lang:scala-compiler 261 | 262 | 263 | **/*.scala 264 | **/*.java 265 | 266 | 267 | 268 | 269 | 270 | 271 | org.codehaus.mojo 272 | build-helper-maven-plugin 273 | 1.7 274 | 275 | 276 | 279 | 280 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 291 | build-jar 292 | 293 | false 294 | 295 | 296 | 297 | org.apache.flink 298 | flink-scala_2.11 299 | ${flink.version} 300 | provided 301 | 302 | 303 | org.apache.flink 304 | flink-streaming-java_2.11 305 | ${flink.version} 306 | provided 307 | 308 | 309 | org.apache.flink 310 | flink-clients_2.11 311 | ${flink.version} 312 | provided 313 | 314 | 315 | 316 | 317 | 318 | -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/resources/data/books-test.csv: -------------------------------------------------------------------------------- 1 | 1,10 2 | 1,11 3 | 1,12 4 | 1,13 5 | 1,14 6 | 1,15 7 | 1,16 8 | 1,17 9 | 1,18 10 | 2,10 11 | 2,11 12 | 2,15 13 | 2,16 14 | 2,17 15 | 2,18 16 | 3,11 17 | 3,12 18 | 3,13 19 | 3,14 20 | 3,15 21 | 3,16 22 | 3,17 23 | 3,18 24 | 4,10 25 | 4,11 26 | 4,12 27 | 4,13 28 | 4,14 29 | 4,15 30 | 4,16 31 | 4,17 32 | 4,18 33 | -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/resources/data/books.csv: -------------------------------------------------------------------------------- 1 | 1,10,1.0 2 | 1,11,2.0 3 | 1,12,5.0 4 | 1,13,5.0 5 | 1,14,5.0 6 | 1,15,4.0 7 | 1,16,5.0 8 | 1,17,1.0 9 | 1,18,5.0 10 | 2,10,1.0 11 | 2,11,2.0 12 | 2,15,5.0 13 | 2,16,4.5 14 | 2,17,1.0 15 | 2,18,5.0 16 | 3,11,2.5 17 | 3,12,4.5 18 | 3,13,4.0 19 | 3,14,3.0 20 | 3,15,3.5 21 | 3,16,4.5 22 | 3,17,4.0 23 | 3,18,5.0 24 | 4,10,5.0 25 | 4,11,5.0 26 | 4,12,5.0 27 | 4,13,0.0 28 | 4,14,2.0 29 | 4,15,3.0 30 | 4,16,1.0 31 | 4,17,4.0 32 | 4,18,1.0 -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/resources/data/iris-test.txt: -------------------------------------------------------------------------------- 1 | 1 1:5.1 2:3.5 3:1.4 4:0.2 2 | 1 1:4.9 2:3.0 3:1.4 4:0.2 3 | 1 1:4.7 2:3.2 3:1.3 4:0.2 4 | 1 1:4.6 2:3.1 3:1.5 4:0.2 5 | 1 1:5.0 2:3.6 3:1.4 4:0.2 6 | 1 1:5.4 2:3.9 3:1.7 4:0.4 7 | 1 1:4.6 2:3.4 3:1.4 4:0.3 8 | 1 1:5.0 2:3.4 3:1.5 4:0.2 9 | 1 1:4.4 2:2.9 3:1.4 4:0.2 10 | 1 1:4.9 2:3.1 3:1.5 4:0.1 11 | 1 1:5.4 2:3.7 3:1.5 4:0.2 12 | 1 1:4.8 2:3.4 3:1.6 4:0.2 13 | 1 1:4.8 2:3.0 3:1.4 4:0.1 14 | 1 1:4.3 2:3.0 3:1.1 4:0.1 15 | 1 1:5.8 2:4.0 3:1.2 4:0.2 16 | 1 1:5.7 2:4.4 3:1.5 4:0.4 -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/resources/data/iris-train.txt: -------------------------------------------------------------------------------- 1 | 1 1:5.1 2:3.5 3:1.4 4:0.2 2 | 1 1:4.9 2:3.0 3:1.4 4:0.2 3 | 1 1:4.7 2:3.2 3:1.3 4:0.2 4 | 1 1:4.6 2:3.1 3:1.5 4:0.2 5 | 1 1:5.0 2:3.6 3:1.4 4:0.2 6 | 1 1:5.4 2:3.9 3:1.7 4:0.4 7 | 1 1:4.6 2:3.4 3:1.4 4:0.3 8 | 1 1:5.0 2:3.4 3:1.5 4:0.2 9 | 1 1:4.4 2:2.9 3:1.4 4:0.2 10 | 1 1:4.9 2:3.1 3:1.5 4:0.1 11 | 1 1:5.4 2:3.7 3:1.5 4:0.2 12 | 1 1:4.8 2:3.4 3:1.6 4:0.2 13 | 1 1:4.8 2:3.0 3:1.4 4:0.1 14 | 1 1:4.3 2:3.0 3:1.1 4:0.1 15 | 1 1:5.8 2:4.0 3:1.2 4:0.2 16 | 1 1:5.7 2:4.4 3:1.5 4:0.4 17 | 1 1:5.4 2:3.9 3:1.3 4:0.4 18 | 1 1:5.1 2:3.5 3:1.4 4:0.3 19 | 1 1:5.7 2:3.8 3:1.7 4:0.3 20 | 1 1:5.1 2:3.8 3:1.5 4:0.3 21 | 1 1:5.4 2:3.4 3:1.7 4:0.2 22 | 1 1:5.1 2:3.7 3:1.5 4:0.4 23 | 1 1:4.6 2:3.6 3:1.0 4:0.2 24 | 1 1:5.1 2:3.3 3:1.7 4:0.5 25 | 1 1:4.8 2:3.4 3:1.9 4:0.2 26 | 1 1:5.0 2:3.0 3:1.6 4:0.2 27 | 1 1:5.0 2:3.4 3:1.6 4:0.4 28 | 1 1:5.2 2:3.5 3:1.5 4:0.2 29 | 1 1:5.2 2:3.4 3:1.4 4:0.2 30 | 1 1:4.7 2:3.2 3:1.6 4:0.2 31 | 1 1:4.8 2:3.1 3:1.6 4:0.2 32 | 1 1:5.4 2:3.4 3:1.5 4:0.4 33 | 1 1:5.2 2:4.1 3:1.5 4:0.1 34 | 1 1:5.5 2:4.2 3:1.4 4:0.2 35 | 1 1:4.9 2:3.1 3:1.5 4:0.1 36 | 1 1:5.0 2:3.2 3:1.2 4:0.2 37 | 1 1:5.5 2:3.5 3:1.3 4:0.2 38 | 1 1:4.9 2:3.1 3:1.5 4:0.1 39 | 1 1:4.4 2:3.0 3:1.3 4:0.2 40 | 1 1:5.1 2:3.4 3:1.5 4:0.2 41 | 1 1:5.0 2:3.5 3:1.3 4:0.3 42 | 1 1:4.5 2:2.3 3:1.3 4:0.3 43 | 1 1:4.4 2:3.2 3:1.3 4:0.2 44 | 1 1:5.0 2:3.5 3:1.6 4:0.6 45 | 1 1:5.1 2:3.8 3:1.9 4:0.4 46 | 1 1:4.8 2:3.0 3:1.4 4:0.3 47 | 1 1:5.1 2:3.8 3:1.6 4:0.2 48 | 1 1:4.6 2:3.2 3:1.4 4:0.2 49 | 1 1:5.3 2:3.7 3:1.5 4:0.2 50 | 1 1:5.0 2:3.3 3:1.4 4:0.2 51 | 2 1:7.0 2:3.2 3:4.7 4:1.4 52 | 2 1:6.4 2:3.2 3:4.5 4:1.5 53 | 2 1:6.9 2:3.1 3:4.9 4:1.5 54 | 2 1:5.5 2:2.3 3:4.0 4:1.3 55 | 2 1:6.5 2:2.8 3:4.6 4:1.5 56 | 2 1:5.7 2:2.8 3:4.5 4:1.3 57 | 2 1:6.3 2:3.3 3:4.7 4:1.6 58 | 2 1:4.9 2:2.4 3:3.3 4:1.0 59 | 2 1:6.6 2:2.9 3:4.6 4:1.3 60 | 2 1:5.2 2:2.7 3:3.9 4:1.4 61 | 2 1:5.0 2:2.0 3:3.5 4:1.0 62 | 2 1:5.9 2:3.0 3:4.2 4:1.5 63 | 2 1:6.0 2:2.2 3:4.0 4:1.0 64 | 2 1:6.1 2:2.9 3:4.7 4:1.4 65 | 2 1:5.6 2:2.9 3:3.6 4:1.3 66 | 2 1:6.7 2:3.1 3:4.4 4:1.4 67 | 2 1:5.6 2:3.0 3:4.5 4:1.5 68 | 2 1:5.8 2:2.7 3:4.1 4:1.0 69 | 2 1:6.2 2:2.2 3:4.5 4:1.5 70 | 2 1:5.6 2:2.5 3:3.9 4:1.1 71 | 2 1:5.9 2:3.2 3:4.8 4:1.8 72 | 2 1:6.1 2:2.8 3:4.0 4:1.3 73 | 2 1:6.3 2:2.5 3:4.9 4:1.5 74 | 2 1:6.1 2:2.8 3:4.7 4:1.2 75 | 2 1:6.4 2:2.9 3:4.3 4:1.3 76 | 2 1:6.6 2:3.0 3:4.4 4:1.4 77 | 2 1:6.8 2:2.8 3:4.8 4:1.4 78 | 2 1:6.7 2:3.0 3:5.0 4:1.7 79 | 2 1:6.0 2:2.9 3:4.5 4:1.5 80 | 2 1:5.7 2:2.6 3:3.5 4:1.0 81 | 2 1:5.5 2:2.4 3:3.8 4:1.1 82 | 2 1:5.5 2:2.4 3:3.7 4:1.0 83 | 2 1:5.8 2:2.7 3:3.9 4:1.2 84 | 2 1:6.0 2:2.7 3:5.1 4:1.6 85 | 2 1:5.4 2:3.0 3:4.5 4:1.5 86 | 2 1:6.0 2:3.4 3:4.5 4:1.6 87 | 2 1:6.7 2:3.1 3:4.7 4:1.5 88 | 2 1:6.3 2:2.3 3:4.4 4:1.3 89 | 2 1:5.6 2:3.0 3:4.1 4:1.3 90 | 2 1:5.5 2:2.5 3:4.0 4:1.3 91 | 2 1:5.5 2:2.6 3:4.4 4:1.2 92 | 2 1:6.1 2:3.0 3:4.6 4:1.4 93 | 2 1:5.8 2:2.6 3:4.0 4:1.2 94 | 2 1:5.0 2:2.3 3:3.3 4:1.0 95 | 2 1:5.6 2:2.7 3:4.2 4:1.3 96 | 2 1:5.7 2:3.0 3:4.2 4:1.2 97 | 2 1:5.7 2:2.9 3:4.2 4:1.3 98 | 2 1:6.2 2:2.9 3:4.3 4:1.3 99 | 2 1:5.1 2:2.5 3:3.0 4:1.1 100 | 2 1:5.7 2:2.8 3:4.1 4:1.3 101 | 3 1:6.3 2:3.3 3:6.0 4:2.5 102 | 3 1:5.8 2:2.7 3:5.1 4:1.9 103 | 3 1:7.1 2:3.0 3:5.9 4:2.1 104 | 3 1:6.3 2:2.9 3:5.6 4:1.8 105 | 3 1:6.5 2:3.0 3:5.8 4:2.2 106 | 3 1:7.6 2:3.0 3:6.6 4:2.1 107 | 3 1:4.9 2:2.5 3:4.5 4:1.7 108 | 3 1:7.3 2:2.9 3:6.3 4:1.8 109 | 3 1:6.7 2:2.5 3:5.8 4:1.8 110 | 3 1:7.2 2:3.6 3:6.1 4:2.5 111 | 3 1:6.5 2:3.2 3:5.1 4:2.0 112 | 3 1:6.4 2:2.7 3:5.3 4:1.9 113 | 3 1:6.8 2:3.0 3:5.5 4:2.1 114 | 3 1:5.7 2:2.5 3:5.0 4:2.0 115 | 3 1:5.8 2:2.8 3:5.1 4:2.4 116 | 3 1:6.4 2:3.2 3:5.3 4:2.3 117 | 3 1:6.5 2:3.0 3:5.5 4:1.8 118 | 3 1:7.7 2:3.8 3:6.7 4:2.2 119 | 3 1:7.7 2:2.6 3:6.9 4:2.3 120 | 3 1:6.0 2:2.2 3:5.0 4:1.5 121 | 3 1:6.9 2:3.2 3:5.7 4:2.3 122 | 3 1:5.6 2:2.8 3:4.9 4:2.0 123 | 3 1:7.7 2:2.8 3:6.7 4:2.0 124 | 3 1:6.3 2:2.7 3:4.9 4:1.8 125 | 3 1:6.7 2:3.3 3:5.7 4:2.1 126 | 3 1:7.2 2:3.2 3:6.0 4:1.8 127 | 3 1:6.2 2:2.8 3:4.8 4:1.8 128 | 3 1:6.1 2:3.0 3:4.9 4:1.8 129 | 3 1:6.4 2:2.8 3:5.6 4:2.1 130 | 3 1:7.2 2:3.0 3:5.8 4:1.6 131 | 3 1:7.4 2:2.8 3:6.1 4:1.9 132 | 3 1:7.9 2:3.8 3:6.4 4:2.0 133 | 3 1:6.4 2:2.8 3:5.6 4:2.2 134 | 3 1:6.3 2:2.8 3:5.1 4:1.5 135 | 3 1:6.1 2:2.6 3:5.6 4:1.4 136 | 3 1:7.7 2:3.0 3:6.1 4:2.3 137 | 3 1:6.3 2:3.4 3:5.6 4:2.4 138 | 3 1:6.4 2:3.1 3:5.5 4:1.8 139 | 3 1:6.0 2:3.0 3:4.8 4:1.8 140 | 3 1:6.9 2:3.1 3:5.4 4:2.1 141 | 3 1:6.7 2:3.1 3:5.6 4:2.4 142 | 3 1:6.9 2:3.1 3:5.1 4:2.3 143 | 3 1:5.8 2:2.7 3:5.1 4:1.9 144 | 3 1:6.8 2:3.2 3:5.9 4:2.3 145 | 3 1:6.7 2:3.3 3:5.7 4:2.5 146 | 3 1:6.7 2:3.0 3:5.2 4:2.3 147 | 3 1:6.3 2:2.5 3:5.0 4:1.9 148 | 3 1:6.5 2:3.0 3:5.2 4:2.0 149 | 3 1:6.2 2:3.4 3:5.4 4:2.3 150 | 3 1:5.9 2:3.0 3:5.1 4:1.8 151 | -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/scala/com/demo/chapter06/MyALSApp.scala: -------------------------------------------------------------------------------- 1 | package com.demo.chapter06 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml.recommendation._ 5 | import org.apache.flink.ml.common.ParameterMap 6 | 7 | object MyALSApp { 8 | def main(args: Array[String]): Unit = { 9 | 10 | val env = ExecutionEnvironment.getExecutionEnvironment 11 | val inputDS: DataSet[(Int, Int, Double)] = env.readCsvFile[(Int, Int, Double)]("books.csv") 12 | 13 | // Setup the ALS learner 14 | val als = ALS() 15 | .setIterations(10) 16 | .setNumFactors(10) 17 | .setBlocks(100) 18 | .setTemporaryPath("D:\\tmp") 19 | 20 | // Set the other parameters via a parameter map 21 | val parameters = ParameterMap() 22 | .add(ALS.Lambda, 0.9) 23 | .add(ALS.Seed, 42L) 24 | 25 | // Calculate the factorization 26 | als.fit(inputDS, parameters) 27 | 28 | // Read the testing data set from a csv file 29 | val testingDS: DataSet[(Int, Int)] = env.readCsvFile[(Int, Int)]("books-test.csv") 30 | 31 | // Calculate the ratings according to the matrix factorization 32 | val predictedRatings = als.predict(testingDS) 33 | 34 | predictedRatings.writeAsCsv("books-output") 35 | 36 | env.execute("Flink Recommendation App") 37 | } 38 | } -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/scala/com/demo/chapter06/MyMRLApp.scala: -------------------------------------------------------------------------------- 1 | package com.demo.chapter06 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml.regression.MultipleLinearRegression 5 | import org.apache.flink.ml.common.LabeledVector 6 | import org.apache.flink.ml.math.Vector 7 | import org.apache.flink.ml.math.{ SparseVector, DenseVector } 8 | 9 | object MyMRLApp { 10 | 11 | def main(args: Array[String]): Unit = { 12 | val env = ExecutionEnvironment.getExecutionEnvironment 13 | 14 | // Create multiple linear regression learner 15 | val mlr = MultipleLinearRegression() 16 | .setIterations(10) 17 | .setStepsize(0.5) 18 | .setConvergenceThreshold(0.001) 19 | 20 | // Obtain training and testing data set 21 | val trainingDS: DataSet[LabeledVector] = // input data 22 | // val testingDS: DataSet[Vector] = // output data 23 | 24 | // Fit the linear model to the provided data 25 | mlr.fit(trainingDS) 26 | 27 | // Calculate the predictions for the test data 28 | // val predictions = mlr.predict(testingDS) 29 | predictions.writeAsText("mlr-out") 30 | 31 | env.execute("Flink MLR App") 32 | } 33 | } -------------------------------------------------------------------------------- /chapter06/ml-examples/src/main/scala/com/demo/chapter06/MySVMApp.scala: -------------------------------------------------------------------------------- 1 | package com.demo.chapter06 2 | 3 | import org.apache.flink.api.scala._ 4 | import org.apache.flink.ml.math.Vector 5 | import org.apache.flink.ml.common.LabeledVector 6 | import org.apache.flink.ml.classification.SVM 7 | import org.apache.flink.ml.RichExecutionEnvironment 8 | 9 | object MySVMApp { 10 | def main(args: Array[String]) { 11 | // set up the execution environment 12 | val pathToTrainingFile: String = "iris-train.txt" 13 | val pathToTestingFile: String = "iris-train.txt" 14 | val env = ExecutionEnvironment.getExecutionEnvironment 15 | 16 | // Read the training data set, from a LibSVM formatted file 17 | val trainingDS: DataSet[LabeledVector] = env.readLibSVM(pathToTrainingFile) 18 | 19 | // Create the SVM learner 20 | val svm = SVM() 21 | .setBlocks(10) 22 | 23 | // Learn the SVM model 24 | svm.fit(trainingDS) 25 | 26 | // Read the testing data set 27 | val testingDS: DataSet[Vector] = env.readLibSVM(pathToTestingFile).map(_.vector) 28 | 29 | // Calculate the predictions for the testing data set 30 | val predictionDS: DataSet[(Vector, Double)] = svm.predict(testingDS) 31 | predictionDS.writeAsText("out") 32 | 33 | env.execute("Flink Scala API Skeleton") 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /chapter07/flink-gelly/pom.xml: -------------------------------------------------------------------------------- 1 | 11 | 13 | 4.0.0 14 | 15 | com.demo 16 | flink-gelly 17 | 0.0.1-SNAPSHOT 18 | jar 19 | 20 | Flink Quickstart Job 21 | http://www.myorganization.org 22 | 23 | 24 | UTF-8 25 | 1.1.4 26 | 1.7.7 27 | 1.2.17 28 | 29 | 30 | 31 | 32 | apache.snapshots 33 | Apache Development Snapshot Repository 34 | https://repository.apache.org/content/repositories/snapshots/ 35 | 36 | false 37 | 38 | 39 | true 40 | 41 | 42 | 43 | 44 | 55 | 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-java 61 | ${flink.version} 62 | 63 | 64 | org.apache.flink 65 | flink-streaming-java_2.11 66 | ${flink.version} 67 | 68 | 69 | org.apache.flink 70 | flink-clients_2.11 71 | ${flink.version} 72 | 73 | 74 | 75 | org.apache.flink 76 | flink-gelly_2.11 77 | ${flink.version} 78 | 79 | 81 | 82 | org.slf4j 83 | slf4j-log4j12 84 | ${slf4j.version} 85 | 86 | 87 | log4j 88 | log4j 89 | ${log4j.version} 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | build-jar 98 | 99 | 100 | false 101 | 102 | 103 | 104 | 105 | org.apache.flink 106 | flink-java 107 | ${flink.version} 108 | provided 109 | 110 | 111 | org.apache.flink 112 | flink-streaming-java_2.11 113 | ${flink.version} 114 | provided 115 | 116 | 117 | org.apache.flink 118 | flink-clients_2.11 119 | ${flink.version} 120 | provided 121 | 122 | 123 | org.slf4j 124 | slf4j-log4j12 125 | ${slf4j.version} 126 | provided 127 | 128 | 129 | log4j 130 | log4j 131 | ${log4j.version} 132 | provided 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.apache.maven.plugins 141 | maven-shade-plugin 142 | 2.4.1 143 | 144 | 145 | package 146 | 147 | shade 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 168 | 169 | org.apache.maven.plugins 170 | maven-shade-plugin 171 | 2.4.1 172 | 173 | 174 | 175 | package 176 | 177 | shade 178 | 179 | 180 | 181 | 182 | 184 | org.apache.flink:flink-annotations 185 | org.apache.flink:flink-shaded-hadoop2 186 | org.apache.flink:flink-shaded-curator-recipes 187 | org.apache.flink:flink-core 188 | org.apache.flink:flink-java 189 | org.apache.flink:flink-scala_2.11 190 | org.apache.flink:flink-runtime_2.11 191 | org.apache.flink:flink-optimizer_2.11 192 | org.apache.flink:flink-clients_2.11 193 | org.apache.flink:flink-avro_2.11 194 | org.apache.flink:flink-examples-batch_2.11 195 | org.apache.flink:flink-examples-streaming_2.11 196 | org.apache.flink:flink-streaming-java_2.11 197 | org.apache.flink:flink-streaming-scala_2.11 198 | org.apache.flink:flink-scala-shell_2.11 199 | org.apache.flink:flink-python 200 | org.apache.flink:flink-metrics-core 201 | org.apache.flink:flink-metrics-jmx 202 | org.apache.flink:flink-statebackend-rocksdb_2.11 203 | 204 | 207 | 208 | log4j:log4j 209 | org.scala-lang:scala-library 210 | org.scala-lang:scala-compiler 211 | org.scala-lang:scala-reflect 212 | com.data-artisans:flakka-actor_* 213 | com.data-artisans:flakka-remote_* 214 | com.data-artisans:flakka-slf4j_* 215 | io.netty:netty-all 216 | io.netty:netty 217 | commons-fileupload:commons-fileupload 218 | org.apache.avro:avro 219 | commons-collections:commons-collections 220 | org.codehaus.jackson:jackson-core-asl 221 | org.codehaus.jackson:jackson-mapper-asl 222 | com.thoughtworks.paranamer:paranamer 223 | org.xerial.snappy:snappy-java 224 | org.apache.commons:commons-compress 225 | org.tukaani:xz 226 | com.esotericsoftware.kryo:kryo 227 | com.esotericsoftware.minlog:minlog 228 | org.objenesis:objenesis 229 | com.twitter:chill_* 230 | com.twitter:chill-java 231 | commons-lang:commons-lang 232 | junit:junit 233 | org.apache.commons:commons-lang3 234 | org.slf4j:slf4j-api 235 | org.slf4j:slf4j-log4j12 236 | log4j:log4j 237 | org.apache.commons:commons-math 238 | org.apache.sling:org.apache.sling.commons.json 239 | commons-logging:commons-logging 240 | commons-codec:commons-codec 241 | com.fasterxml.jackson.core:jackson-core 242 | com.fasterxml.jackson.core:jackson-databind 243 | com.fasterxml.jackson.core:jackson-annotations 244 | stax:stax-api 245 | com.typesafe:config 246 | org.uncommons.maths:uncommons-maths 247 | com.github.scopt:scopt_* 248 | commons-io:commons-io 249 | commons-cli:commons-cli 250 | 251 | 252 | 253 | 254 | org.apache.flink:* 255 | 256 | 257 | org/apache/flink/shaded/com/** 258 | web-docs/** 259 | 260 | 261 | 262 | 264 | *:* 265 | 266 | META-INF/*.SF 267 | META-INF/*.DSA 268 | META-INF/*.RSA 269 | 270 | 271 | 272 | 274 | 276 | false 277 | 278 | 279 | 280 | 281 | 282 | 283 | org.apache.maven.plugins 284 | maven-compiler-plugin 285 | 3.1 286 | 287 | 1.7 288 | 1.7 289 | 290 | 291 | 292 | 293 | 294 | 296 | 310 | 311 | 312 | 313 | -------------------------------------------------------------------------------- /chapter07/flink-gelly/src/main/java/com/demo/flink/gelly/BatchJob.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.gelly; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.api.java.tuple.Tuple3; 8 | import org.apache.flink.graph.Edge; 9 | import org.apache.flink.graph.Graph; 10 | import org.apache.flink.graph.Vertex; 11 | import org.apache.flink.graph.pregel.ComputeFunction; 12 | import org.apache.flink.graph.pregel.MessageCombiner; 13 | import org.apache.flink.graph.pregel.MessageIterator; 14 | 15 | public class BatchJob { 16 | 17 | final static String srcId = "s15"; 18 | 19 | public static void main(String[] args) throws Exception { 20 | // set up the batch execution environment 21 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | // Create graph by reading from CSV files 24 | DataSet> airportVertices = env 25 | .readCsvFile("D://work//Mastering Flink//Chapter 7//data//nodes.csv").types(String.class, Double.class); 26 | 27 | DataSet> airportEdges = env 28 | .readCsvFile("D://work//Mastering Flink//Chapter 7//data//edges.csv") 29 | .types(String.class, String.class, Double.class); 30 | 31 | Graph graph = Graph.fromTupleDataSet(airportVertices, airportEdges, env); 32 | 33 | // Find out no. of airports and routes 34 | System.out.println("No. of Routes in Graph:" + graph.numberOfEdges()); 35 | System.out.println("No. of Airports in Graph:" + graph.numberOfVertices()); 36 | 37 | // define the maximum number of iterations 38 | int maxIterations = 10; 39 | 40 | // Execute the vertex-centric iteration 41 | Graph result = graph.runVertexCentricIteration(new SSSPComputeFunction(), 42 | new SSSPCombiner(), maxIterations); 43 | 44 | // Extract the vertices as the result 45 | DataSet> singleSourceShortestPaths = result.getVertices(); 46 | 47 | singleSourceShortestPaths.print(); 48 | 49 | 50 | 51 | } 52 | 53 | final static class SSSPComputeFunction extends ComputeFunction { 54 | 55 | @Override 56 | public void compute(Vertex vertex, MessageIterator messages) throws Exception { 57 | double minDistance = (vertex.getId().equals(srcId)) ? 0d : Double.POSITIVE_INFINITY; 58 | for (Double msg : messages) { 59 | minDistance = Math.min(minDistance, msg); 60 | } 61 | 62 | if (minDistance < vertex.getValue()) { 63 | setNewVertexValue(minDistance); 64 | for (Edge e : getEdges()) { 65 | sendMessageTo(e.getTarget(), minDistance + e.getValue()); 66 | } 67 | } 68 | 69 | } 70 | 71 | } 72 | 73 | final static class SSSPCombiner extends MessageCombiner { 74 | 75 | public void combineMessages(MessageIterator messages) { 76 | 77 | double minMessage = Double.POSITIVE_INFINITY; 78 | for (Double msg : messages) { 79 | minMessage = Math.min(minMessage, msg); 80 | } 81 | sendCombinedMessage(minMessage); 82 | } 83 | 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /chapter07/flink-gelly/src/main/resources/data/edges.csv: -------------------------------------------------------------------------------- 1 | s01,s02,10 2 | s01,s02,12 3 | s01,s03,22 4 | s01,s04,21 5 | s04,s11,22 6 | s05,s15,21 7 | s06,s17,21 8 | s08,s09,11 9 | s08,s09,12 10 | s03,s04,22 11 | s04,s03,23 12 | s01,s15,20 13 | s15,s01,11 14 | s15,s01,11 15 | s16,s17,21 16 | s16,s06,23 17 | s06,s16,21 18 | s09,s10,21 19 | s08,s07,21 20 | s07,s08,22 21 | s07,s10,21 22 | s05,s02,21 23 | s02,s03,21 24 | s02,s01,23 25 | s03,s01,21 26 | s12,s13,22 27 | s12,s14,22 28 | s14,s13,21 29 | s13,s12,21 30 | s05,s09,2 31 | s02,s10,5 32 | s03,s12,1 33 | s04,s06,1 34 | s10,s03,2 35 | s03,s10,2 36 | s04,s12,3 37 | s13,s17,1 38 | s06,s06,1 39 | s14,s11,1 40 | s03,s11,1 41 | s12,s06,2 42 | s04,s17,2 43 | s17,s04,4 44 | s08,s03,2 45 | s03,s08,4 46 | s07,s14,4 47 | s15,s06,4 48 | s15,s04,1 49 | s05,s01,1 50 | s02,s09,1 51 | s03,s05,1 52 | s07,s03,1 53 | -------------------------------------------------------------------------------- /chapter07/flink-gelly/src/main/resources/data/nodes.csv: -------------------------------------------------------------------------------- 1 | s01,1 2 | s02,2 3 | s03,3 4 | s04,4 5 | s05,5 6 | s06,6 7 | s07,7 8 | s08,8 9 | s09,9 10 | s10,10 11 | s11,11 12 | s12,12 13 | s13,13 14 | s14,14 15 | s15,15 16 | s16,16 17 | s17,17 18 | -------------------------------------------------------------------------------- /chapter07/flink-gelly/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /chapter08/readme.txt: -------------------------------------------------------------------------------- 1 | This chapter does not contain any coding examples. -------------------------------------------------------------------------------- /chapter09/readme.txt: -------------------------------------------------------------------------------- 1 | This chapter does not contain any coding examples. -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/pom.xml: -------------------------------------------------------------------------------- 1 | 11 | 13 | 4.0.0 14 | 15 | com.demo 16 | flink-batch-adv 17 | 1.0 18 | jar 19 | 20 | Flink Quickstart Job 21 | http://www.myorganization.org 22 | 23 | 24 | UTF-8 25 | 1.1.3 26 | 1.7.7 27 | 1.2.17 28 | 29 | 30 | 31 | 32 | apache.snapshots 33 | Apache Development Snapshot Repository 34 | https://repository.apache.org/content/repositories/snapshots/ 35 | 36 | false 37 | 38 | 39 | true 40 | 41 | 42 | 43 | 44 | 55 | 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-java 61 | ${flink.version} 62 | 63 | 64 | org.apache.flink 65 | flink-streaming-java_2.11 66 | ${flink.version} 67 | 68 | 69 | org.apache.flink 70 | flink-clients_2.11 71 | ${flink.version} 72 | 73 | 74 | 75 | com.esotericsoftware 76 | kryo 77 | 4.0.0 78 | 79 | 80 | 81 | 83 | 84 | org.slf4j 85 | slf4j-log4j12 86 | ${slf4j.version} 87 | 88 | 89 | log4j 90 | log4j 91 | ${log4j.version} 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | build-jar 100 | 101 | 102 | false 103 | 104 | 105 | 106 | 107 | org.apache.flink 108 | flink-java 109 | ${flink.version} 110 | provided 111 | 112 | 113 | org.apache.flink 114 | flink-streaming-java_2.11 115 | ${flink.version} 116 | provided 117 | 118 | 119 | org.apache.flink 120 | flink-clients_2.11 121 | ${flink.version} 122 | provided 123 | 124 | 125 | org.slf4j 126 | slf4j-log4j12 127 | ${slf4j.version} 128 | provided 129 | 130 | 131 | log4j 132 | log4j 133 | ${log4j.version} 134 | provided 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | org.apache.maven.plugins 143 | maven-shade-plugin 144 | 2.4.1 145 | 146 | 147 | package 148 | 149 | shade 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 170 | 171 | org.apache.maven.plugins 172 | maven-shade-plugin 173 | 2.4.1 174 | 175 | 176 | 177 | package 178 | 179 | shade 180 | 181 | 182 | 183 | 184 | 186 | org.apache.flink:flink-annotations 187 | org.apache.flink:flink-shaded-hadoop2 188 | org.apache.flink:flink-shaded-curator-recipes 189 | org.apache.flink:flink-core 190 | org.apache.flink:flink-java 191 | org.apache.flink:flink-scala_2.11 192 | org.apache.flink:flink-runtime_2.11 193 | org.apache.flink:flink-optimizer_2.11 194 | org.apache.flink:flink-clients_2.11 195 | org.apache.flink:flink-avro_2.11 196 | org.apache.flink:flink-examples-batch_2.11 197 | org.apache.flink:flink-examples-streaming_2.11 198 | org.apache.flink:flink-streaming-java_2.11 199 | org.apache.flink:flink-streaming-scala_2.11 200 | org.apache.flink:flink-scala-shell_2.11 201 | org.apache.flink:flink-python 202 | org.apache.flink:flink-metrics-core 203 | org.apache.flink:flink-metrics-jmx 204 | org.apache.flink:flink-statebackend-rocksdb_2.11 205 | 206 | 209 | 210 | log4j:log4j 211 | org.scala-lang:scala-library 212 | org.scala-lang:scala-compiler 213 | org.scala-lang:scala-reflect 214 | com.data-artisans:flakka-actor_* 215 | com.data-artisans:flakka-remote_* 216 | com.data-artisans:flakka-slf4j_* 217 | io.netty:netty-all 218 | io.netty:netty 219 | commons-fileupload:commons-fileupload 220 | org.apache.avro:avro 221 | commons-collections:commons-collections 222 | org.codehaus.jackson:jackson-core-asl 223 | org.codehaus.jackson:jackson-mapper-asl 224 | com.thoughtworks.paranamer:paranamer 225 | org.xerial.snappy:snappy-java 226 | org.apache.commons:commons-compress 227 | org.tukaani:xz 228 | com.esotericsoftware.kryo:kryo 229 | com.esotericsoftware.minlog:minlog 230 | org.objenesis:objenesis 231 | com.twitter:chill_* 232 | com.twitter:chill-java 233 | commons-lang:commons-lang 234 | junit:junit 235 | org.apache.commons:commons-lang3 236 | org.slf4j:slf4j-api 237 | org.slf4j:slf4j-log4j12 238 | log4j:log4j 239 | org.apache.commons:commons-math 240 | org.apache.sling:org.apache.sling.commons.json 241 | commons-logging:commons-logging 242 | commons-codec:commons-codec 243 | com.fasterxml.jackson.core:jackson-core 244 | com.fasterxml.jackson.core:jackson-databind 245 | com.fasterxml.jackson.core:jackson-annotations 246 | stax:stax-api 247 | com.typesafe:config 248 | org.uncommons.maths:uncommons-maths 249 | com.github.scopt:scopt_* 250 | commons-io:commons-io 251 | commons-cli:commons-cli 252 | 253 | 254 | 255 | 256 | org.apache.flink:* 257 | 258 | 259 | org/apache/flink/shaded/com/** 260 | web-docs/** 261 | 262 | 263 | 264 | 266 | *:* 267 | 268 | META-INF/*.SF 269 | META-INF/*.DSA 270 | META-INF/*.RSA 271 | 272 | 273 | 274 | 276 | 278 | false 279 | 280 | 281 | 282 | 283 | 284 | 285 | org.apache.maven.plugins 286 | maven-compiler-plugin 287 | 3.1 288 | 289 | 1.7 290 | 1.7 291 | 292 | 293 | 294 | 295 | 296 | 298 | 312 | 313 | 314 | 315 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/IterativePiExample.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | 7 | import org.apache.flink.api.java.operators.IterativeDataSet; 8 | /** 9 | * Iterative Pi example, makes use of iteration data set to compute Pi. 10 | * @author TDeshpande 11 | * 12 | */ 13 | public class IterativePiExample { 14 | 15 | public static void main(String[] args) throws Exception { 16 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | // Create initial IterativeDataSet 19 | IterativeDataSet initial = env.fromElements(0).iterate(10000); 20 | 21 | DataSet iteration = initial.map(new MapFunction() { 22 | @Override 23 | public Integer map(Integer i) throws Exception { 24 | double x = Math.random(); 25 | double y = Math.random(); 26 | 27 | return i + ((x * x + y * y < 1) ? 1 : 0); 28 | } 29 | }); 30 | 31 | // Iteratively transform the IterativeDataSet 32 | DataSet count = initial.closeWith(iteration); 33 | 34 | count.map(new MapFunction() { 35 | @Override 36 | public Double map(Integer count) throws Exception { 37 | return count / (double) 10000 * 4; 38 | } 39 | }).print(); 40 | 41 | 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/OlympicsAthletesBatchJob.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | 6 | import java.util.List; 7 | 8 | import org.apache.flink.api.common.functions.FlatMapFunction; 9 | 10 | import org.apache.flink.api.common.functions.RichMapFunction; 11 | import org.apache.flink.api.java.tuple.Tuple2; 12 | import org.apache.flink.configuration.Configuration; 13 | import org.apache.flink.util.Collector; 14 | 15 | /** 16 | * Implements the Oylympics Athletes program that gives insights about games 17 | * played and medals won. 18 | * 19 | * Sample input file is provided in src/main/resources/data folder 20 | *

21 | * This example shows how to: 22 | *

    23 | *
  • write a simple Flink batch program. 24 | *
  • use Tuple data types. 25 | *
  • write and use user-defined functions. 26 | *
27 | * 28 | */ 29 | public class OlympicsAthletesBatchJob { 30 | 31 | public static void main(String[] args) throws Exception { 32 | 33 | // set up the execution environment 34 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 35 | env.getConfig().registerTypeWithKryoSerializer(Record.class, RecordSerializer.class); 36 | 37 | DataSet csvInput = env 38 | .readCsvFile("D://NOTBACKEDUP//dataflow//flink-batch//src//main//resources//data//olympic-athletes.csv") 39 | .pojoType(Record.class, "playerName", "country", "year", "game", "gold", "silver", "bronze", "total"); 40 | 41 | DataSet> groupedByCountry = csvInput 42 | .flatMap(new FlatMapFunction>() { 43 | 44 | private static final long serialVersionUID = 1L; 45 | 46 | @Override 47 | public void flatMap(Record record, Collector> out) throws Exception { 48 | 49 | out.collect(new Tuple2(record.getCountry(), 1)); 50 | } 51 | }).groupBy(0).sum(1); 52 | groupedByCountry.print(); 53 | 54 | DataSet> groupedByGame = csvInput 55 | .flatMap(new FlatMapFunction>() { 56 | 57 | private static final long serialVersionUID = 1L; 58 | 59 | @Override 60 | public void flatMap(Record record, Collector> out) throws Exception { 61 | 62 | out.collect(new Tuple2(record.getGame(), 1)); 63 | } 64 | }).groupBy(0).sum(1); 65 | groupedByGame.print(); 66 | 67 | // Get a data set to be broadcasted 68 | DataSet toBroadcast = env.fromElements(1, 2, 3); 69 | DataSet data = env.fromElements("India", "USA", "UK").map(new RichMapFunction() { 70 | private List toBroadcast; 71 | 72 | // We have to use open method to get broadcast set from the context 73 | @Override 74 | public void open(Configuration parameters) throws Exception { 75 | // Get the broadcast set, available as collection 76 | this.toBroadcast = getRuntimeContext().getBroadcastVariable("country"); 77 | } 78 | 79 | @Override 80 | public String map(String input) throws Exception { 81 | 82 | int sum = 0; 83 | for (int a : toBroadcast) { 84 | sum = a + sum; 85 | } 86 | return input.toUpperCase() + sum; 87 | } 88 | }).withBroadcastSet(toBroadcast, "country"); // Broadcast the set with 89 | // name 90 | data.print(); 91 | 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/Record.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | public class Record { 4 | 5 | private String playerName; 6 | private String country; 7 | private int year; 8 | private String game; 9 | private int gold; 10 | private int silver; 11 | private int bronze; 12 | private int total; 13 | 14 | public String getPlayerName() { 15 | return playerName; 16 | } 17 | 18 | public void setPlayerName(String playerName) { 19 | this.playerName = playerName; 20 | } 21 | 22 | public String getCountry() { 23 | return country; 24 | } 25 | 26 | public void setCountry(String country) { 27 | this.country = country; 28 | } 29 | 30 | public int getYear() { 31 | return year; 32 | } 33 | 34 | public void setYear(int year) { 35 | this.year = year; 36 | } 37 | 38 | public String getGame() { 39 | return game; 40 | } 41 | 42 | public void setGame(String game) { 43 | this.game = game; 44 | } 45 | 46 | public int getGold() { 47 | return gold; 48 | } 49 | 50 | public void setGold(int gold) { 51 | this.gold = gold; 52 | } 53 | 54 | public int getSilver() { 55 | return silver; 56 | } 57 | 58 | public void setSilver(int silver) { 59 | this.silver = silver; 60 | } 61 | 62 | public int getBronze() { 63 | return bronze; 64 | } 65 | 66 | public void setBronze(int bronze) { 67 | this.bronze = bronze; 68 | } 69 | 70 | public int getTotal() { 71 | return total; 72 | } 73 | 74 | public void setTotal(int total) { 75 | this.total = total; 76 | } 77 | 78 | public Record() { 79 | 80 | } 81 | 82 | public Record(String playerName, String country, int year, String game, int gold, int silver, int bronze, 83 | int total) { 84 | super(); 85 | this.playerName = playerName; 86 | this.country = country; 87 | this.year = year; 88 | this.game = game; 89 | this.gold = gold; 90 | this.silver = silver; 91 | this.bronze = bronze; 92 | this.total = total; 93 | } 94 | 95 | @Override 96 | public String toString() { 97 | return "Record [playerName=" + playerName + ", country=" + country + ", year=" + year + ", game=" + game 98 | + ", gold=" + gold + ", silver=" + silver + ", bronze=" + bronze + ", total=" + total + "]"; 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/RecordSerializer.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | import com.esotericsoftware.kryo.Kryo; 4 | import com.esotericsoftware.kryo.Serializer; 5 | import com.esotericsoftware.kryo.io.Input; 6 | import com.esotericsoftware.kryo.io.Output; 7 | 8 | public class RecordSerializer extends Serializer { 9 | 10 | @Override 11 | public Record read(Kryo kryo, Input input, Class type) { 12 | 13 | return new Record(input.readString(), input.readString(), input.read(), input.readString(), input.read(), 14 | input.read(), input.read(), input.read()); 15 | } 16 | 17 | @Override 18 | public void write(Kryo kryo, Output output, Record object) { 19 | output.writeString(object.getPlayerName()); 20 | output.writeString(object.getCountry()); 21 | output.writeInt(object.getYear()); 22 | output.writeString(object.getGame()); 23 | output.writeInt(object.getGold()); 24 | output.writeInt(object.getSilver()); 25 | output.writeInt(object.getBronze()); 26 | output.writeInt(object.getTotal()); 27 | 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/java/com/demo/flink/batch/RecordTuple.java: -------------------------------------------------------------------------------- 1 | package com.demo.flink.batch; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple8; 4 | 5 | public class RecordTuple extends Tuple8 { 6 | 7 | private static final long serialVersionUID = 1L; 8 | 9 | public RecordTuple() { 10 | super(); 11 | } 12 | 13 | public RecordTuple(String value0, String value1, Integer value2, String value3, Integer value4, Integer value5, 14 | Integer value6, Integer value7) { 15 | super(value0, value1, value2, value3, value4, value5, value6, value7); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/resources/data/olympic-athletes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/chapter10/flink-batch-adv/src/main/resources/data/olympic-athletes.csv -------------------------------------------------------------------------------- /chapter10/flink-batch-adv/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /images/Tanmay_Books.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/images/Tanmay_Books.png -------------------------------------------------------------------------------- /images/flink.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deshpandetanmay/mastering-flink/397d7b84e46d6e9c6efa1cd4743173ae5f84b0e0/images/flink.JPG --------------------------------------------------------------------------------