├── .gitignore ├── README ├── build.sbt ├── idea.sbt ├── project ├── assembly.sbt └── build.properties └── src └── main ├── resources └── log4j.properties └── scala └── org └── example ├── Job.scala ├── SocketTextStreamWordCount.scala └── WordCount.scala /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | project/ 3 | target/ 4 | .cache 5 | .classpath 6 | .project 7 | .settings 8 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | A Flink application project using Scala and SBT. 2 | 3 | To run and test your application use SBT invoke: 'sbt run' 4 | 5 | In order to run your application from within IntelliJ, you have to select the classpath of the 'mainRunner' module in the run/debug configurations. 6 | Simply open 'Run -> Edit configurations...' and then select 'mainRunner' from the "Use classpath of module" dropbox. 7 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | ThisBuild / resolvers ++= Seq( 2 | "Apache Development Snapshot Repository" at "https://repository.apache.org/content/repositories/snapshots/", 3 | Resolver.mavenLocal 4 | ) 5 | 6 | name := "Flink Project" 7 | 8 | version := "0.1-SNAPSHOT" 9 | 10 | organization := "org.example" 11 | 12 | ThisBuild / scalaVersion := "2.12.7" 13 | 14 | val flinkVersion = "1.7.0" 15 | 16 | val flinkDependencies = Seq( 17 | "org.apache.flink" %% "flink-scala" % flinkVersion % "provided", 18 | "org.apache.flink" %% "flink-streaming-scala" % flinkVersion % "provided") 19 | 20 | lazy val root = (project in file(".")). 21 | settings( 22 | libraryDependencies ++= flinkDependencies 23 | ) 24 | 25 | assembly / mainClass := Some("org.example.Job") 26 | 27 | // make run command include the provided dependencies 28 | Compile / run := Defaults.runTask(Compile / fullClasspath, 29 | Compile / run / mainClass, 30 | Compile / run / runner 31 | ).evaluated 32 | 33 | // stays inside the sbt console when we press "ctrl-c" while a Flink programme executes with "run" or "runMain" 34 | Compile / run / fork := true 35 | Global / cancelable := true 36 | 37 | // exclude Scala library from assembly 38 | assembly / assemblyOption := (assembly / assemblyOption).value.copy(includeScala = false) 39 | -------------------------------------------------------------------------------- /idea.sbt: -------------------------------------------------------------------------------- 1 | lazy val mainRunner = project.in(file("mainRunner")).dependsOn(RootProject(file("."))).settings( 2 | // we set all provided dependencies to none, so that they are included in the classpath of mainRunner 3 | libraryDependencies := (libraryDependencies in RootProject(file("."))).value.map{ 4 | module => module.configurations match { 5 | case Some("provided") => module.withConfigurations(None) 6 | case _ => module 7 | } 8 | } 9 | ) -------------------------------------------------------------------------------- /project/assembly.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6") -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=1.1.1 -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | -------------------------------------------------------------------------------- /src/main/scala/org/example/Job.scala: -------------------------------------------------------------------------------- 1 | package org.example 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import org.apache.flink.api.scala._ 22 | 23 | /** 24 | * Skeleton for a Flink Job. 25 | * 26 | * For a full example of a Flink Job, see the WordCountJob.scala file in the 27 | * same package/directory or have a look at the website. 28 | * 29 | * You can also generate a .jar file that you can submit on your Flink 30 | * cluster. Just type 31 | * {{{ 32 | * sbt clean assembly 33 | * }}} 34 | * in the projects root directory. You will find the jar in 35 | * target/scala-2.11/Flink\ Project-assembly-0.1-SNAPSHOT.jar 36 | * 37 | */ 38 | object Job { 39 | def main(args: Array[String]) { 40 | // set up the execution environment 41 | val env = ExecutionEnvironment.getExecutionEnvironment 42 | 43 | /** 44 | * Here, you can start creating your execution plan for Flink. 45 | * 46 | * Start with getting some data from the environment, like 47 | * env.readTextFile(textPath); 48 | * 49 | * then, transform the resulting DataSet[String] using operations 50 | * like: 51 | * .filter() 52 | * .flatMap() 53 | * .join() 54 | * .group() 55 | * 56 | * and many more. 57 | * Have a look at the programming guide: 58 | * 59 | * http://flink.apache.org/docs/latest/programming_guide.html 60 | * 61 | * and the examples 62 | * 63 | * http://flink.apache.org/docs/latest/examples.html 64 | * 65 | */ 66 | 67 | 68 | // execute program 69 | env.execute("Flink Scala API Skeleton") 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/scala/org/example/SocketTextStreamWordCount.scala: -------------------------------------------------------------------------------- 1 | package org.example 2 | 3 | /* 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import org.apache.flink.streaming.api.scala._ 22 | 23 | /** 24 | * This example shows an implementation of WordCount with data from a text socket. 25 | * To run the example make sure that the service providing the text data is already up and running. 26 | * 27 | * To start an example socket text stream on your local machine run netcat from a command line, 28 | * where the parameter specifies the port number: 29 | * 30 | * {{{ 31 | * nc -lk 9999 32 | * }}} 33 | * 34 | * Usage: 35 | * {{{ 36 | * SocketTextStreamWordCount 37 | * }}} 38 | * 39 | * This example shows how to: 40 | * 41 | * - use StreamExecutionEnvironment.socketTextStream 42 | * - write a simple Flink Streaming program in scala. 43 | * - write and use user-defined functions. 44 | */ 45 | object SocketTextStreamWordCount { 46 | 47 | def main(args: Array[String]) { 48 | if (args.length != 2) { 49 | System.err.println("USAGE:\nSocketTextStreamWordCount ") 50 | return 51 | } 52 | 53 | val hostName = args(0) 54 | val port = args(1).toInt 55 | 56 | val env = StreamExecutionEnvironment.getExecutionEnvironment 57 | 58 | //Create streams for names and ages by mapping the inputs to the corresponding objects 59 | val text = env.socketTextStream(hostName, port) 60 | val counts = text.flatMap { _.toLowerCase.split("\\W+") filter { _.nonEmpty } } 61 | .map { (_, 1) } 62 | .keyBy(0) 63 | .sum(1) 64 | 65 | counts print 66 | 67 | env.execute("Scala SocketTextStreamWordCount Example") 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/scala/org/example/WordCount.scala: -------------------------------------------------------------------------------- 1 | package org.example 2 | 3 | /** 4 | * Licensed to the Apache Software Foundation (ASF) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. The ASF licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import org.apache.flink.api.scala._ 22 | 23 | /** 24 | * Implements the "WordCount" program that computes a simple word occurrence histogram 25 | * over some sample data 26 | * 27 | * This example shows how to: 28 | * 29 | * - write a simple Flink program. 30 | * - use Tuple data types. 31 | * - write and use user-defined functions. 32 | */ 33 | object WordCount { 34 | def main(args: Array[String]) { 35 | 36 | // set up the execution environment 37 | val env = ExecutionEnvironment.getExecutionEnvironment 38 | 39 | // get input data 40 | val text = env.fromElements("To be, or not to be,--that is the question:--", 41 | "Whether 'tis nobler in the mind to suffer", "The slings and arrows of outrageous fortune", 42 | "Or to take arms against a sea of troubles,") 43 | 44 | val counts = text.flatMap { _.toLowerCase.split("\\W+") } 45 | .map { (_, 1) } 46 | .groupBy(0) 47 | .sum(1) 48 | 49 | // execute and print result 50 | counts.print() 51 | 52 | } 53 | } 54 | --------------------------------------------------------------------------------