├── .gitignore ├── README.md ├── build.sbt ├── project ├── build.properties └── plugins.sbt └── src └── main └── scala └── com └── madhukaraphatak └── flink ├── examples └── WordCount.scala └── streaming └── examples ├── CustomSource.scala ├── EventTimeExample.scala ├── Models.scala ├── StreamingWordCount.scala ├── WindowAnatomy.scala ├── WindowExample.scala ├── WindowedStreamingWordCount.scala └── sessionwindow ├── SessionTrigger.scala └── SessionWindowExample.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .idea/ 3 | *.iml 4 | target/ 5 | project/target 6 | dependency-reduced-pom.xml 7 | *.pdf 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository has examples for Apache flink. 2 | 3 | ## Build 4 | 5 | sbt clean package 6 | 7 | ## Running 8 | 9 | Load the project into eclipse or IntelliJ to run 10 | 11 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | 2 | name := "flink-examples" 3 | 4 | version := "1.0" 5 | scalaVersion := "2.10.4" 6 | 7 | libraryDependencies ++= Seq("org.apache.flink" %% "flink-scala" % "1.0.0", 8 | "org.apache.flink" %% "flink-clients" % "1.0.0", 9 | "org.apache.flink" %% "flink-streaming-scala" % "1.0.0") 10 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 0.13.8 -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | logLevel := Level.Warn -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/examples/WordCount.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.examples 2 | 3 | import org.apache.flink.api.scala.ExecutionEnvironment 4 | 5 | import org.apache.flink.api.scala._ 6 | 7 | /** 8 | * Wordcount example 9 | */ 10 | 11 | object WordCount { 12 | 13 | def main(args: Array[String]) { 14 | 15 | val env = ExecutionEnvironment.getExecutionEnvironment 16 | 17 | val data = List("hi","how are you","hi") 18 | 19 | val dataSet = env.fromCollection(data) 20 | 21 | val words = dataSet.flatMap(value => value.split("\\s+")) 22 | 23 | val mappedWords = words.map(value => (value,1)) 24 | 25 | val grouped = mappedWords.groupBy(0) 26 | 27 | val sum = grouped.sum(1) 28 | 29 | println(sum.collect()) 30 | 31 | 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/CustomSource.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 4 | import org.apache.flink.streaming.api.scala._ 5 | 6 | import scala.util.Random 7 | 8 | object CustomSource { 9 | 10 | def generateRandomStringSource(out:SourceContext[String]) = { 11 | val lines = Array("how are you","you are how", " i am fine") 12 | while (true) { 13 | val index = Random.nextInt(3) 14 | Thread.sleep(200) 15 | out.collect(lines(index)) 16 | } 17 | } 18 | 19 | 20 | def main(args: Array[String]) { 21 | 22 | val env = StreamExecutionEnvironment.getExecutionEnvironment 23 | 24 | val customSource = env.addSource(generateRandomStringSource _) 25 | 26 | customSource.print() 27 | 28 | env.execute() 29 | 30 | 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/EventTimeExample.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.TimeCharacteristic 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.streaming.api.windowing.time.Time 6 | 7 | 8 | object EventTimeExample { 9 | 10 | case class Stock(time:Long, symbol:String,value:Double) 11 | 12 | def main(args: Array[String]) { 13 | 14 | val env = StreamExecutionEnvironment.getExecutionEnvironment 15 | 16 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 17 | val source = env.socketTextStream("localhost",50050) 18 | val parsedStream = source.map(value => { 19 | val columns = value.split(",") 20 | Stock(columns(0).toLong, columns(1),columns(2).toDouble) 21 | }) 22 | 23 | val timedValue = parsedStream.assignAscendingTimestamps(_.time) 24 | 25 | val keyedStream = timedValue.keyBy(_.symbol) 26 | 27 | val timeWindow = keyedStream.timeWindow(Time.seconds(10)).max("value").name("timedwindow") 28 | 29 | timeWindow.print.name("print sink") 30 | 31 | env.execute() 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/Models.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | object Models { 4 | case class Session(sessionId:String, value:Double, endSignal:Option[String]) 5 | } 6 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/StreamingWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.scala._ 4 | 5 | object StreamingWordCount { 6 | 7 | def main(args: Array[String]) { 8 | 9 | val env = StreamExecutionEnvironment.getExecutionEnvironment 10 | 11 | // create a stream using socket 12 | 13 | val socketStream = env.socketTextStream("localhost",9000) 14 | 15 | // implement word count 16 | 17 | val wordsStream = socketStream.flatMap(value => value.split("\\s+")).map(value => (value,1)) 18 | 19 | val keyValuePair = wordsStream.keyBy(0) 20 | 21 | val countPair = keyValuePair.sum(1) 22 | 23 | // print the results 24 | 25 | countPair.print() 26 | 27 | // execute the program 28 | 29 | env.execute() 30 | 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/WindowAnatomy.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.scala._ 4 | import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows 5 | import org.apache.flink.streaming.api.windowing.triggers.{CountTrigger, PurgingTrigger} 6 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow 7 | 8 | 9 | object WindowAnatomy { 10 | def main(args: Array[String]) { 11 | 12 | val env = StreamExecutionEnvironment.getExecutionEnvironment 13 | 14 | val source = env.socketTextStream("localhost",9000) 15 | 16 | val values = source.flatMap(value => value.split("\\s+")).map(value => (value,1)) 17 | 18 | val keyValue = values.keyBy(0) 19 | 20 | // define the count window without purge 21 | 22 | val countWindowWithoutPurge = keyValue.window(GlobalWindows.create()). 23 | trigger(CountTrigger.of(2)) 24 | 25 | 26 | val countWindowWithPurge = keyValue.window(GlobalWindows.create()). 27 | trigger(PurgingTrigger.of(CountTrigger.of[GlobalWindow](2))) 28 | 29 | countWindowWithoutPurge.sum(1).print() 30 | 31 | countWindowWithPurge.sum(1).print() 32 | 33 | env.execute() 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/WindowExample.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.scala._ 4 | import org.apache.flink.streaming.api.windowing.time.Time 5 | 6 | object WindowExample { 7 | 8 | def main(args: Array[String]) { 9 | 10 | 11 | val env = StreamExecutionEnvironment.getExecutionEnvironment 12 | 13 | val source = env.socketTextStream("localhost",9000) 14 | 15 | //word map 16 | 17 | val values = source.flatMap(value => value.split("\\s+")).map(value => (value,1)) 18 | 19 | val keyValue = values.keyBy(0) 20 | 21 | //tumbling window : Calculate wordcount for each 15 seconds 22 | 23 | val tumblingWindow = keyValue.timeWindow(Time.seconds(15)) 24 | // sliding window : Calculate wordcount for last 5 seconds 25 | val slidingWindow = keyValue.timeWindow(Time.seconds(15),Time.seconds(5)) 26 | //count window : Calculate for every 5 records 27 | val countWindow = keyValue.countWindow(5) 28 | 29 | 30 | //tumblingWindow.sum(1).name("tumblingwindow").print() 31 | //slidingWindow.sum(1).name("slidingwindow").print() 32 | countWindow.sum(1).name("count window").print() 33 | 34 | env.execute() 35 | 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/WindowedStreamingWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples 2 | 3 | import org.apache.flink.streaming.api.scala._ 4 | import org.apache.flink.streaming.api.windowing.time.Time 5 | 6 | 7 | object WindowedStreamingWordCount { 8 | 9 | def main(args: Array[String]) { 10 | 11 | val env = StreamExecutionEnvironment.getExecutionEnvironment 12 | 13 | // create a stream using socket 14 | 15 | val socketStream = env.socketTextStream("localhost",9000) 16 | 17 | // implement word count 18 | 19 | val wordsStream = socketStream.flatMap(value => value.split("\\s+")).map(value => (value,1)) 20 | 21 | val keyValuePair = wordsStream.keyBy(0).timeWindow(Time.seconds(15)) 22 | 23 | val countStream = keyValuePair.sum(1) 24 | 25 | countStream.print() 26 | 27 | env.execute() 28 | 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/sessionwindow/SessionTrigger.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples.sessionwindow 2 | 3 | import com.madhukaraphatak.flink.streaming.examples.Models.Session 4 | import org.apache.flink.streaming.api.windowing.triggers.Trigger.TriggerContext 5 | import org.apache.flink.streaming.api.windowing.triggers.{Trigger, TriggerResult} 6 | import org.apache.flink.streaming.api.windowing.windows.Window 7 | 8 | class SessionTrigger[W <: Window] extends Trigger[Session,W] { 9 | override def onElement(element: Session, timestamp: Long, window: W, ctx: TriggerContext): TriggerResult = { 10 | if(element.endSignal.isDefined) TriggerResult.FIRE 11 | else TriggerResult.CONTINUE 12 | } 13 | 14 | override def onProcessingTime(time: Long, window: W, ctx: TriggerContext): TriggerResult = { 15 | TriggerResult.CONTINUE 16 | } 17 | override def onEventTime(time: Long, window: W, ctx: TriggerContext): TriggerResult = { 18 | TriggerResult.CONTINUE 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/com/madhukaraphatak/flink/streaming/examples/sessionwindow/SessionWindowExample.scala: -------------------------------------------------------------------------------- 1 | package com.madhukaraphatak.flink.streaming.examples.sessionwindow 2 | 3 | import com.madhukaraphatak.flink.streaming.examples.Models.Session 4 | import org.apache.flink.streaming.api.scala._ 5 | import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows 6 | import org.apache.flink.streaming.api.windowing.triggers.PurgingTrigger 7 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow 8 | 9 | import scala.util.Try 10 | 11 | 12 | object SessionWindowExample { 13 | 14 | def main(args: Array[String]) { 15 | 16 | 17 | val env = StreamExecutionEnvironment.getExecutionEnvironment 18 | 19 | val source = env.socketTextStream("localhost", 9000) 20 | 21 | //session map 22 | 23 | val values = source.map(value => { 24 | val columns = value.split(",") 25 | val endSignal = Try(Some(columns(2))).getOrElse(None) 26 | Session(columns(0), columns(1).toDouble, endSignal) 27 | }) 28 | 29 | val keyValue = values.keyBy(_.sessionId) 30 | 31 | // create global window 32 | 33 | val sessionWindowStream = keyValue. 34 | window(GlobalWindows.create()). 35 | trigger(PurgingTrigger.of(new SessionTrigger[GlobalWindow]())) 36 | 37 | sessionWindowStream.sum("value").print() 38 | 39 | env.execute() 40 | 41 | 42 | } 43 | } 44 | --------------------------------------------------------------------------------