├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .scalafmt.conf ├── LICENSE.md ├── README.md ├── build.sbt ├── project ├── build.properties └── plugins.sbt └── src ├── main └── scala │ └── io │ └── findify │ └── flink │ └── api │ ├── AllWindowedStream.scala │ ├── AsyncDataStream.scala │ ├── BroadcastConnectedStream.scala │ ├── CloseableIterator.scala │ ├── ClosureCleaner.scala │ ├── CoGroupedStreams.scala │ ├── ConnectedStreams.scala │ ├── DataStream.scala │ ├── DataStreamUtils.scala │ ├── JoinedStreams.scala │ ├── KeyedStream.scala │ ├── OutputTag.scala │ ├── ScalaStreamOps.scala │ ├── StreamExecutionEnvironment.scala │ ├── WindowedStream.scala │ ├── async │ ├── AsyncFunction.scala │ ├── JavaResultFutureWrapper.scala │ ├── ResultFuture.scala │ ├── RichAsyncFunction.scala │ └── ScalaRichAsyncFunctionWrapper.scala │ ├── extensions │ ├── impl │ │ └── acceptPartialFunctions │ │ │ ├── OnConnectedStream.scala │ │ │ ├── OnDataStream.scala │ │ │ ├── OnJoinedStream.scala │ │ │ ├── OnKeyedStream.scala │ │ │ └── OnWindowedStream.scala │ └── package.scala │ └── function │ ├── AllWindowFunction.scala │ ├── ProcessAllWindowFunction.scala │ ├── ProcessWindowFunction.scala │ ├── RichAllWindowFunction.scala │ ├── RichWindowFunction.scala │ ├── StatefulFunction.scala │ ├── WindowFunction.scala │ └── util │ ├── ScalaAllWindowFunction.scala │ ├── ScalaAllWindowFunctionWrapper.scala │ ├── ScalaProcessWindowFunctionWrapper.scala │ ├── ScalaReduceFunction.scala │ ├── ScalaWindowFunction.scala │ └── ScalaWindowFunctionWrapper.scala └── test └── scala └── io └── findify └── flink └── api ├── CoGroupedStreamsTest.scala ├── DataStreamTest.scala ├── JoinedStreamsTest.scala └── StreamExecutionEnvironmentTest.scala /.gitattributes: -------------------------------------------------------------------------------- 1 | *.tsv filter=lfs diff=lfs merge=lfs -text 2 | *.gz filter=lfs diff=lfs merge=lfs -text 3 | *.json filter=lfs diff=lfs merge=lfs -text 4 | *.dat filter=lfs diff=lfs merge=lfs -text 5 | *.jpg filter=lfs diff=lfs merge=lfs -text 6 | *.svg filter=lfs diff=lfs merge=lfs -text 7 | *.png filter=lfs diff=lfs merge=lfs -text 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build a Java project with Maven 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven 3 | 4 | name: CI 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.platform }} 15 | strategy: 16 | matrix: 17 | java: [11, 17] 18 | scala: [2.12.15, 2.13.8, 3.1.2] 19 | platform: [ubuntu-20.04] 20 | steps: 21 | - uses: actions/checkout@v2 22 | with: 23 | lfs: true 24 | - name: Set up JDK 25 | uses: actions/setup-java@v1 26 | with: 27 | java-version: ${{ matrix.java }} 28 | 29 | - name: Cache maven packages 30 | uses: actions/cache@v2 31 | env: 32 | cache-name: cache-sbt 33 | with: 34 | path: ~/.m2 ~/.coursier ~/.cache/coursier ~/.ivy2 ~/.sbt 35 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/pom.xml') }} 36 | restore-keys: | 37 | ${{ runner.os }}-build-${{ env.cache-name }}- 38 | ${{ runner.os }}-build- 39 | ${{ runner.os }}- 40 | - name: Run tests 41 | run: JAVA_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" sbt "++ ${{ matrix.scala }} test" 42 | 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | project/project 3 | project/target 4 | target 5 | .DS_STORE 6 | .git 7 | .bsp 8 | .run -------------------------------------------------------------------------------- /.scalafmt.conf: -------------------------------------------------------------------------------- 1 | style = defaultWithAlign 2 | maxColumn = 120 3 | version = 3.5.3 4 | assumeStandardLibraryStripMargin = true 5 | align.stripMargin = true 6 | runner.dialect = scala212 -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scala 2.12/2.13/3.x API for Apache Flink 2 | 3 | [![CI Status](https://github.com/findify/flink-scala-api/workflows/CI/badge.svg)](https://github.com/findify/flink-scala-api/actions) 4 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.findify/flink-scala-api_2.12/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/io.findify/flink-scala-api_2.12) 5 | [![License: Apache 2](https://img.shields.io/badge/License-Apache2-green.svg)](https://opensource.org/licenses/Apache-2.0) 6 | ![Last commit](https://img.shields.io/github/last-commit/findify/flink-scala-api) 7 | ![Last release](https://img.shields.io/github/release/findify/flink-scala-api) 8 | 9 | This project is a community-maintained fork of official Apache Flink 1.15 scala API, cross-built for scala 2.12, 2.13 and 3.x. 10 | 11 | ## Differences 12 | 13 | ### New [magnolia](https://github.com/softwaremill/magnolia)-based serialization framework 14 | 15 | Official Flink's serialization framework has two important drawbacks complicating the upgrade to Scala 2.13+: 16 | * it used a complicated `TypeInformation` derivation macro, which required a complete rewrite to work on Scala 3. 17 | * for serializing a `Traversable[_]` it serialized an actual scala code of the corresponding `CanBuildFrom[_]` builder, 18 | which was compiled and executed on deserialization. There is no more `CanBuildFrom[_]` on Scala 2.13+, so there is 19 | no easy way of migration 20 | 21 | This project relies on the [Flink-ADT](https://github.com/findify/flink-adt) library to derive serializers for all 22 | types with the following perks: 23 | * ADT support: so your `sealed trait` members won't fall back to extremely slow Kryo serializer 24 | * case objects: no more problems with `None` 25 | * uses implicits (and typeclasses in Scala 3) to customize the serialization 26 | 27 | But there are some drawbacks: 28 | * Savepoints written using Flink's official serialization API are not compatible, so you need to re-bootstrap your job 29 | from scratch. 30 | * As serializer derivation happens in a compile-time and uses zero runtime reflection, for deeply-nested rich case 31 | classes the compile times are quite high. 32 | 33 | See [Flink-ADT](https://github.com/findify/flink-adt) readme for more details. 34 | 35 | ### Using a POJO-only flink serialization framework 36 | 37 | If you don't want to use a `Flink-ADT` for serialization for some reasons, you can always fall back to a flink's 38 | [POJO serializer](https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/datastream/fault-tolerance/serialization/types_serialization/#rules-for-pojo-types), 39 | explicitly calling it: 40 | ```scala 41 | val env = StreamingExecutionEnvironment.createLocalEnvironment() 42 | env 43 | .fromCollection(1,2,3) 44 | .map(x => x + 1)(TypeInformation.of[Int]) // explicit call 45 | ``` 46 | 47 | With this approach: 48 | * savepoint compatibility between this and official Flink API 49 | * slower serialization type due to frequent Kryo fallback 50 | * larger savepoint size (again, due to Kryo) 51 | 52 | ### Closure cleaner from Spark 3.x 53 | 54 | Flink historically used quite an old forked version of the ClosureCleaner for scala lambdas, which has some minor 55 | compatibility issues with Java 17 and Scala 2.13+. This project uses a more recent version, hopefully with less 56 | compatibility issues. 57 | 58 | ### No Legacy DataSet API 59 | 60 | Sorry, but it's already deprecated and as a community project we have no resources to support it. If you need it, 61 | PRs are welcome. 62 | 63 | ## Migration 64 | 65 | `flink-scala-api` uses a different package name for all api-related classes like `DataStream`, so you can do 66 | gradual migration of a big project and use both upstream and this versions of scala API in the same project. 67 | 68 | The actual migration should be straightforward and simple, replace old import to the new ones: 69 | ```scala 70 | // original api import 71 | import org.apache.flink.streaming.api.scala._ 72 | 73 | // flink-scala-api imports 74 | import io.findify.flink.api._ 75 | import io.findify.flinkadt.api._ 76 | ``` 77 | 78 | ## Usage 79 | 80 | `flink-scala-api` is released to Maven-central for 2.12, 2.13 and 3. For SBT, add this snippet to `build.sbt`: 81 | ```scala 82 | libraryDependencies += "io.findify" %% "flink-scala-api" % "1.15-1" 83 | ``` 84 | 85 | We suggest to remove `flink-scala` and `flink-streaming-scala` dependencies altogether to simplify the migration and 86 | not to mix two flavors of API in the same project. But it's technically possible and not required. 87 | 88 | ## Scala 3 89 | 90 | Scala 3 support is highly experimental and not well-tested in production. Good thing is that most of the issues are compile-time, 91 | so quite easy to reproduce. If you have issues with `flink-adt` not deriving `TypeInformation[T]` for the `T` you want, 92 | submit a bug report! 93 | 94 | ## Compile times 95 | 96 | They may be quite bad for rich nested case classes due to compile-time serializer derivation. 97 | Derivation happens each time `flink-scala-api` needs an instance of the `TypeInformation[T]` implicit/type class: 98 | ```scala 99 | case class Foo(x: Int) { 100 | def inc(a: Int) = copy(x = x + a) 101 | } 102 | 103 | val env = StreamingExecutionEnvironment.createLocalEnvironment() 104 | env 105 | .fromCollection(List(Foo(1),Foo(2),Foo(3))) 106 | .map(x => x.inc(1)) // here the TypeInformation[Foo] is generated 107 | .map(x => x.inc(2)) // generated one more time again 108 | ``` 109 | 110 | If you're using the same instances of data structures in multiple jobs (or in multiple tests), consider caching the 111 | derived serializer in a separate compile unit and just importing it when needed: 112 | 113 | ```scala 114 | // file FooTypeInfo.scala 115 | object FooTypeInfo { 116 | lazy val fooTypeInfo: TypeInformation[Foo] = deriveTypeInformation[Foo] 117 | } 118 | 119 | // file SomeJob.scala 120 | case class Foo(x: Int) { 121 | def inc(a: Int) = copy(x = x + a) 122 | } 123 | 124 | import FooTypeInfo._ 125 | 126 | val env = StreamingExecutionEnvironment.createLocalEnvironment() 127 | env 128 | .fromCollection(List(Foo(1),Foo(2),Foo(3))) 129 | .map(x => x.inc(1)) // taken as an implicit 130 | .map(x => x.inc(2)) // again, no re-derivation 131 | 132 | ``` 133 | 134 | ## License 135 | 136 | This project is using parts of the Apache Flink codebase, so the whole project 137 | is licensed under an [Apache 2.0](LICENSE.md) software license. -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | ThisBuild / version := "1.15-2" 2 | 3 | lazy val root = (project in file(".")) 4 | .settings( 5 | name := "flink-scala-api", 6 | scalaVersion := "3.1.2", 7 | crossScalaVersions := Seq("2.12.15", "2.13.8", "3.1.2"), 8 | libraryDependencies ++= Seq( 9 | "org.apache.flink" % "flink-streaming-java" % "1.15.0", 10 | "org.apache.flink" % "flink-java" % "1.15.0", 11 | "io.findify" %% "flink-adt" % "0.6.1", 12 | "org.scalatest" %% "scalatest" % "3.2.12" % Test, 13 | "org.apache.flink" % "flink-test-utils" % "1.15.0" % Test, 14 | "org.apache.flink" % "flink-test-utils-junit" % "1.15.0" % Test, 15 | "com.github.sbt" % "junit-interface" % "0.13.3" % Test, 16 | "org.scala-lang.modules" %% "scala-collection-compat" % "2.7.0" 17 | ), 18 | libraryDependencies += { 19 | if (scalaBinaryVersion.value.startsWith("2")) { 20 | "org.scala-lang" % "scala-reflect" % scalaVersion.value 21 | } else { 22 | "org.scala-lang" %% "scala3-compiler" % scalaVersion.value 23 | } 24 | }, 25 | organization := "io.findify", 26 | licenses := Seq("APL2" -> url("http://www.apache.org/licenses/LICENSE-2.0.txt")), 27 | homepage := Some(url("https://github.com/findify/flink-scala-api")), 28 | publishMavenStyle := true, 29 | publishTo := sonatypePublishToBundle.value, 30 | scalacOptions ++= Seq( 31 | "-deprecation", 32 | "-feature", 33 | "-language:higherKinds" 34 | ), 35 | scmInfo := Some( 36 | ScmInfo( 37 | url("https://github.com/findify/flink-scala-api"), 38 | "scm:git@github.com:findify/flink-scala-api.git" 39 | ) 40 | ), 41 | developers := List( 42 | Developer( 43 | id = "romangrebennikov", 44 | name = "Roman Grebennikov", 45 | email = "grv@dfdx.me", 46 | url = url("https://dfdx.me/") 47 | ) 48 | ) 49 | ) 50 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version = 1.6.2 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.6.2") 2 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.13") 3 | addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") 4 | addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") 5 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/AllWindowedStream.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.{Public, PublicEvolving} 4 | import org.apache.flink.api.common.functions.{AggregateFunction, ReduceFunction} 5 | import org.apache.flink.api.common.typeinfo.TypeInformation 6 | import org.apache.flink.streaming.api.datastream.{AllWindowedStream => JavaAllWStream} 7 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType 8 | import org.apache.flink.streaming.api.functions.aggregation.{ComparableAggregator, SumAggregator} 9 | import io.findify.flink.api.function.util.{ 10 | ScalaAllWindowFunction, 11 | ScalaAllWindowFunctionWrapper, 12 | ScalaProcessAllWindowFunctionWrapper, 13 | ScalaReduceFunction 14 | } 15 | import io.findify.flink.api.function.{AllWindowFunction, ProcessAllWindowFunction} 16 | import org.apache.flink.streaming.api.windowing.evictors.Evictor 17 | import org.apache.flink.streaming.api.windowing.time.Time 18 | import org.apache.flink.streaming.api.windowing.triggers.Trigger 19 | import org.apache.flink.streaming.api.windowing.windows.Window 20 | import org.apache.flink.util.Collector 21 | import org.apache.flink.util.Preconditions.checkNotNull 22 | import ScalaStreamOps._ 23 | 24 | /** A [[AllWindowedStream]] represents a data stream where the stream of elements is split into windows based on a 25 | * [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]]. Window emission is triggered based on a 26 | * [[Trigger]]. 27 | * 28 | * If an [[Evictor]] is specified it will be used to evict elements from the window after evaluation was triggered by 29 | * the [[Trigger]] but before the actual evaluation of the window. When using an evictor window performance will 30 | * degrade significantly, since pre-aggregation of window results cannot be used. 31 | * 32 | * Note that the [[AllWindowedStream()]] is purely and API construct, during runtime the [[AllWindowedStream()]] will 33 | * be collapsed together with the operation over the window into one single operation. 34 | * 35 | * @tparam T 36 | * The type of elements in the stream. 37 | * @tparam W 38 | * The type of [[Window]] that the [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]] assigns the 39 | * elements to. 40 | */ 41 | @Public 42 | class AllWindowedStream[T, W <: Window](javaStream: JavaAllWStream[T, W]) { 43 | 44 | /** Sets the allowed lateness to a user-specified value. If not explicitly set, the allowed lateness is [[0L]]. 45 | * Setting the allowed lateness is only valid for event-time windows. If a value different than 0 is provided with a 46 | * processing-time [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]], then an exception is 47 | * thrown. 48 | */ 49 | @PublicEvolving 50 | def allowedLateness(lateness: Time): AllWindowedStream[T, W] = { 51 | javaStream.allowedLateness(lateness) 52 | this 53 | } 54 | 55 | /** Send late arriving data to the side output identified by the given [[OutputTag]]. Data is considered late after 56 | * the watermark has passed the end of the window plus the allowed lateness set using [[allowedLateness(Time)]]. 57 | * 58 | * You can get the stream of late data using [[DataStream.getSideOutput()]] on the [[DataStream]] resulting from the 59 | * windowed operation with the same [[OutputTag]]. 60 | */ 61 | @PublicEvolving 62 | def sideOutputLateData(outputTag: OutputTag[T]): AllWindowedStream[T, W] = { 63 | javaStream.sideOutputLateData(outputTag) 64 | this 65 | } 66 | 67 | /** Sets the [[Trigger]] that should be used to trigger window emission. 68 | */ 69 | @PublicEvolving 70 | def trigger(trigger: Trigger[_ >: T, _ >: W]): AllWindowedStream[T, W] = { 71 | javaStream.trigger(trigger) 72 | this 73 | } 74 | 75 | /** Sets the [[Evictor]] that should be used to evict elements from a window before emission. 76 | * 77 | * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window results 78 | * cannot be used. 79 | */ 80 | @PublicEvolving 81 | def evictor(evictor: Evictor[_ >: T, _ >: W]): AllWindowedStream[T, W] = { 82 | javaStream.evictor(evictor) 83 | this 84 | } 85 | 86 | // ------------------------------------------------------------------------ 87 | // Operations on the windows 88 | // ------------------------------------------------------------------------ 89 | 90 | // ---------------------------- reduce() ------------------------------------ 91 | 92 | /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each 93 | * key individually. The output of the reduce function is interpreted as a regular non-windowed stream. 94 | * 95 | * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time 96 | * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time 97 | * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per 98 | * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an 99 | * aggregation tree. 100 | * 101 | * @param function 102 | * The reduce function. 103 | * @return 104 | * The data stream that is the result of applying the reduce function to the window. 105 | */ 106 | def reduce(function: ReduceFunction[T]): DataStream[T] = { 107 | asScalaStream(javaStream.reduce(clean(function))) 108 | } 109 | 110 | /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each 111 | * key individually. The output of the reduce function is interpreted as a regular non-windowed stream. 112 | * 113 | * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time 114 | * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time 115 | * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per 116 | * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an 117 | * aggregation tree. 118 | * 119 | * @param function 120 | * The reduce function. 121 | * @return 122 | * The data stream that is the result of applying the reduce function to the window. 123 | */ 124 | def reduce(function: (T, T) => T): DataStream[T] = { 125 | if (function == null) { 126 | throw new NullPointerException("Reduce function must not be null.") 127 | } 128 | val cleanFun = clean(function) 129 | val reducer = new ScalaReduceFunction[T](cleanFun) 130 | 131 | reduce(reducer) 132 | } 133 | 134 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 135 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 136 | * 137 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 138 | * 139 | * @param preAggregator 140 | * The reduce function that is used for pre-aggregation 141 | * @param windowFunction 142 | * The window function. 143 | * @return 144 | * The data stream that is the result of applying the window function to the window. 145 | */ 146 | def reduce[R: TypeInformation]( 147 | preAggregator: ReduceFunction[T], 148 | windowFunction: AllWindowFunction[T, R, W] 149 | ): DataStream[R] = { 150 | 151 | val cleanedReducer = clean(preAggregator) 152 | val cleanedWindowFunction = clean(windowFunction) 153 | 154 | val applyFunction = new ScalaAllWindowFunctionWrapper[T, R, W](cleanedWindowFunction) 155 | 156 | val returnType: TypeInformation[R] = implicitly[TypeInformation[R]] 157 | asScalaStream(javaStream.reduce(cleanedReducer, applyFunction, returnType)) 158 | } 159 | 160 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 161 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 162 | * 163 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 164 | * 165 | * @param preAggregator 166 | * The reduce function that is used for pre-aggregation 167 | * @param windowFunction 168 | * The window function. 169 | * @return 170 | * The data stream that is the result of applying the window function to the window. 171 | */ 172 | def reduce[R: TypeInformation]( 173 | preAggregator: (T, T) => T, 174 | windowFunction: (W, Iterable[T], Collector[R]) => Unit 175 | ): DataStream[R] = { 176 | 177 | if (preAggregator == null) { 178 | throw new NullPointerException("Reduce function must not be null.") 179 | } 180 | if (windowFunction == null) { 181 | throw new NullPointerException("WindowApply function must not be null.") 182 | } 183 | 184 | val cleanReducer = clean(preAggregator) 185 | val cleanWindowFunction = clean(windowFunction) 186 | 187 | val reducer = new ScalaReduceFunction[T](cleanReducer) 188 | val applyFunction = new ScalaAllWindowFunction[T, R, W](cleanWindowFunction) 189 | 190 | val returnType: TypeInformation[R] = implicitly[TypeInformation[R]] 191 | asScalaStream(javaStream.reduce(reducer, applyFunction, returnType)) 192 | } 193 | 194 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 195 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 196 | * 197 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 198 | * 199 | * @param preAggregator 200 | * The reduce function that is used for pre-aggregation 201 | * @param windowFunction 202 | * The process window function. 203 | * @return 204 | * The data stream that is the result of applying the window function to the window. 205 | */ 206 | @PublicEvolving 207 | def reduce[R: TypeInformation]( 208 | preAggregator: ReduceFunction[T], 209 | windowFunction: ProcessAllWindowFunction[T, R, W] 210 | ): DataStream[R] = { 211 | 212 | val cleanedReducer = clean(preAggregator) 213 | val cleanedWindowFunction = clean(windowFunction) 214 | 215 | val applyFunction = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanedWindowFunction) 216 | 217 | val returnType: TypeInformation[R] = implicitly[TypeInformation[R]] 218 | asScalaStream(javaStream.reduce(cleanedReducer, applyFunction, returnType)) 219 | } 220 | 221 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 222 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 223 | * 224 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 225 | * 226 | * @param preAggregator 227 | * The reduce function that is used for pre-aggregation 228 | * @param windowFunction 229 | * The process window function. 230 | * @return 231 | * The data stream that is the result of applying the window function to the window. 232 | */ 233 | @PublicEvolving 234 | def reduce[R: TypeInformation]( 235 | preAggregator: (T, T) => T, 236 | windowFunction: ProcessAllWindowFunction[T, R, W] 237 | ): DataStream[R] = { 238 | 239 | if (preAggregator == null) { 240 | throw new NullPointerException("Reduce function must not be null.") 241 | } 242 | if (windowFunction == null) { 243 | throw new NullPointerException("WindowApply function must not be null.") 244 | } 245 | 246 | val cleanReducer = clean(preAggregator) 247 | val cleanWindowFunction = clean(windowFunction) 248 | 249 | val reducer = new ScalaReduceFunction[T](cleanReducer) 250 | val applyFunction = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanWindowFunction) 251 | 252 | val returnType: TypeInformation[R] = implicitly[TypeInformation[R]] 253 | asScalaStream(javaStream.reduce(reducer, applyFunction, returnType)) 254 | } 255 | 256 | // --------------------------- aggregate() ---------------------------------- 257 | 258 | /** Applies the given aggregation function to each window. The aggregation function is called for each element, 259 | * aggregating values incrementally and keeping the state to one accumulator per window. 260 | * 261 | * @param aggregateFunction 262 | * The aggregation function. 263 | * @return 264 | * The data stream that is the result of applying the aggregate function to the window. 265 | */ 266 | @PublicEvolving 267 | def aggregate[ACC: TypeInformation, R: TypeInformation]( 268 | aggregateFunction: AggregateFunction[T, ACC, R] 269 | ): DataStream[R] = { 270 | 271 | checkNotNull(aggregateFunction, "AggregationFunction must not be null") 272 | 273 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 274 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 275 | 276 | asScalaStream(javaStream.aggregate(clean(aggregateFunction), accumulatorType, resultType)) 277 | } 278 | 279 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 280 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 281 | * 282 | * Arriving data is pre-aggregated using the given aggregation function. 283 | * 284 | * @param preAggregator 285 | * The aggregation function that is used for pre-aggregation 286 | * @param windowFunction 287 | * The window function. 288 | * @return 289 | * The data stream that is the result of applying the window function to the window. 290 | */ 291 | @PublicEvolving 292 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 293 | preAggregator: AggregateFunction[T, ACC, V], 294 | windowFunction: AllWindowFunction[V, R, W] 295 | ): DataStream[R] = { 296 | 297 | checkNotNull(preAggregator, "AggregationFunction must not be null") 298 | checkNotNull(windowFunction, "Window function must not be null") 299 | 300 | val cleanedPreAggregator = clean(preAggregator) 301 | val cleanedWindowFunction = clean(windowFunction) 302 | 303 | val applyFunction = new ScalaAllWindowFunctionWrapper[V, R, W](cleanedWindowFunction) 304 | 305 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 306 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 307 | 308 | asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType)) 309 | } 310 | 311 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 312 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 313 | * 314 | * Arriving data is pre-aggregated using the given aggregation function. 315 | * 316 | * @param preAggregator 317 | * The aggregation function that is used for pre-aggregation 318 | * @param windowFunction 319 | * The process window function. 320 | * @return 321 | * The data stream that is the result of applying the window function to the window. 322 | */ 323 | @PublicEvolving 324 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 325 | preAggregator: AggregateFunction[T, ACC, V], 326 | windowFunction: ProcessAllWindowFunction[V, R, W] 327 | ): DataStream[R] = { 328 | 329 | checkNotNull(preAggregator, "AggregationFunction must not be null") 330 | checkNotNull(windowFunction, "Window function must not be null") 331 | 332 | val cleanedPreAggregator = clean(preAggregator) 333 | val cleanedWindowFunction = clean(windowFunction) 334 | 335 | val applyFunction = new ScalaProcessAllWindowFunctionWrapper[V, R, W](cleanedWindowFunction) 336 | 337 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 338 | val aggregationResultType: TypeInformation[V] = implicitly[TypeInformation[V]] 339 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 340 | 341 | asScalaStream( 342 | javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, aggregationResultType, resultType) 343 | ) 344 | } 345 | 346 | /** Applies the given window function to each window. The window function is called for each evaluation of the window. 347 | * The output of the window function is interpreted as a regular non-windowed stream. 348 | * 349 | * Arriving data is pre-aggregated using the given aggregation function. 350 | * 351 | * @param preAggregator 352 | * The aggregation function that is used for pre-aggregation 353 | * @param windowFunction 354 | * The window function. 355 | * @return 356 | * The data stream that is the result of applying the window function to the window. 357 | */ 358 | @PublicEvolving 359 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 360 | preAggregator: AggregateFunction[T, ACC, V], 361 | windowFunction: (W, Iterable[V], Collector[R]) => Unit 362 | ): DataStream[R] = { 363 | 364 | checkNotNull(preAggregator, "AggregationFunction must not be null") 365 | checkNotNull(windowFunction, "Window function must not be null") 366 | 367 | val cleanPreAggregator = clean(preAggregator) 368 | val cleanWindowFunction = clean(windowFunction) 369 | 370 | val applyFunction = new ScalaAllWindowFunction[V, R, W](cleanWindowFunction) 371 | 372 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 373 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 374 | 375 | asScalaStream(javaStream.aggregate(cleanPreAggregator, applyFunction, accumulatorType, resultType)) 376 | } 377 | 378 | // ---------------------------- apply() ------------------------------------- 379 | 380 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 381 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 382 | * 383 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 384 | * function provides no means of pre-aggregation. 385 | * 386 | * @param function 387 | * The process window function. 388 | * @return 389 | * The data stream that is the result of applying the window function to the window. 390 | */ 391 | @PublicEvolving 392 | def process[R: TypeInformation](function: ProcessAllWindowFunction[T, R, W]): DataStream[R] = { 393 | 394 | val cleanedFunction = clean(function) 395 | val javaFunction = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanedFunction) 396 | 397 | asScalaStream(javaStream.process(javaFunction, implicitly[TypeInformation[R]])) 398 | } 399 | 400 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 401 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 402 | * 403 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 404 | * function provides no means of pre-aggregation. 405 | * 406 | * @param function 407 | * The window function. 408 | * @return 409 | * The data stream that is the result of applying the window function to the window. 410 | */ 411 | def apply[R: TypeInformation](function: AllWindowFunction[T, R, W]): DataStream[R] = { 412 | 413 | val cleanedFunction = clean(function) 414 | val javaFunction = new ScalaAllWindowFunctionWrapper[T, R, W](cleanedFunction) 415 | 416 | asScalaStream(javaStream.apply(javaFunction, implicitly[TypeInformation[R]])) 417 | } 418 | 419 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 420 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 421 | * 422 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 423 | * function provides no means of pre-aggregation. 424 | * 425 | * @param function 426 | * The window function. 427 | * @return 428 | * The data stream that is the result of applying the window function to the window. 429 | */ 430 | def apply[R: TypeInformation](function: (W, Iterable[T], Collector[R]) => Unit): DataStream[R] = { 431 | 432 | val cleanedFunction = clean(function) 433 | val applyFunction = new ScalaAllWindowFunction[T, R, W](cleanedFunction) 434 | 435 | asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]])) 436 | } 437 | 438 | // ------------------------------------------------------------------------ 439 | // Aggregations on the keyed windows 440 | // ------------------------------------------------------------------------ 441 | 442 | /** Applies an aggregation that that gives the maximum of the elements in the window at the given position. 443 | */ 444 | def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position) 445 | 446 | /** Applies an aggregation that that gives the maximum of the elements in the window at the given field. 447 | */ 448 | def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field) 449 | 450 | /** Applies an aggregation that that gives the minimum of the elements in the window at the given position. 451 | */ 452 | def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position) 453 | 454 | /** Applies an aggregation that that gives the minimum of the elements in the window at the given field. 455 | */ 456 | def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field) 457 | 458 | /** Applies an aggregation that sums the elements in the window at the given position. 459 | */ 460 | def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position) 461 | 462 | /** Applies an aggregation that sums the elements in the window at the given field. 463 | */ 464 | def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field) 465 | 466 | /** Applies an aggregation that that gives the maximum element of the window by the given position. When equality, 467 | * returns the first. 468 | */ 469 | def maxBy(position: Int): DataStream[T] = aggregate(AggregationType.MAXBY, position) 470 | 471 | /** Applies an aggregation that that gives the maximum element of the window by the given field. When equality, 472 | * returns the first. 473 | */ 474 | def maxBy(field: String): DataStream[T] = aggregate(AggregationType.MAXBY, field) 475 | 476 | /** Applies an aggregation that that gives the minimum element of the window by the given position. When equality, 477 | * returns the first. 478 | */ 479 | def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position) 480 | 481 | /** Applies an aggregation that that gives the minimum element of the window by the given field. When equality, 482 | * returns the first. 483 | */ 484 | def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field) 485 | 486 | private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = { 487 | val position = fieldNames2Indices(getInputType(), Array(field))(0) 488 | aggregate(aggregationType, position) 489 | } 490 | 491 | def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = { 492 | 493 | val jStream = javaStream.asInstanceOf[JavaAllWStream[Product, W]] 494 | 495 | val reducer = aggregationType match { 496 | case AggregationType.SUM => 497 | new SumAggregator(position, jStream.getInputType, jStream.getExecutionEnvironment.getConfig) 498 | 499 | case _ => 500 | new ComparableAggregator( 501 | position, 502 | jStream.getInputType, 503 | aggregationType, 504 | true, 505 | jStream.getExecutionEnvironment.getConfig 506 | ) 507 | } 508 | 509 | new DataStream[Product](jStream.reduce(reducer)).asInstanceOf[DataStream[T]] 510 | } 511 | 512 | // ------------------------------------------------------------------------ 513 | // Utilities 514 | // ------------------------------------------------------------------------ 515 | 516 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 517 | * [[org.apache.flink.api.common.ExecutionConfig]]. 518 | */ 519 | private[flink] def clean[F <: AnyRef](f: F): F = { 520 | new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) 521 | } 522 | 523 | /** Gets the output type. 524 | */ 525 | private def getInputType(): TypeInformation[T] = javaStream.getInputType 526 | } 527 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/AsyncDataStream.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import io.findify.flink.api.async.{ 4 | AsyncFunction, 5 | JavaResultFutureWrapper, 6 | ResultFuture, 7 | RichAsyncFunction, 8 | ScalaRichAsyncFunctionWrapper 9 | } 10 | import org.apache.flink.annotation.PublicEvolving 11 | import org.apache.flink.api.common.typeinfo.TypeInformation 12 | import org.apache.flink.streaming.api.datastream.{AsyncDataStream => JavaAsyncDataStream} 13 | import org.apache.flink.streaming.api.functions.async.{ 14 | AsyncFunction => JavaAsyncFunction, 15 | ResultFuture => JavaResultFuture 16 | } 17 | import org.apache.flink.util.Preconditions 18 | import ScalaStreamOps._ 19 | import scala.concurrent.duration.TimeUnit 20 | 21 | /** A helper class to apply [[AsyncFunction]] to a data stream. 22 | * 23 | * Example: 24 | * {{{ 25 | * val input: DataStream[String] = ... 26 | * val asyncFunction: (String, ResultFuture[String]) => Unit = ... 27 | * 28 | * AsyncDataStream.orderedWait(input, asyncFunction, timeout, TimeUnit.MILLISECONDS, 100) 29 | * }}} 30 | */ 31 | @PublicEvolving 32 | object AsyncDataStream { 33 | 34 | private val DEFAULT_QUEUE_CAPACITY = 100 35 | 36 | /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to 37 | * watermarks. Stream records which lie between the same two watermarks, can be re-ordered. 38 | * 39 | * @param input 40 | * to apply the async function on 41 | * @param asyncFunction 42 | * to use 43 | * @param timeout 44 | * for the asynchronous operation to complete 45 | * @param timeUnit 46 | * of the timeout 47 | * @param capacity 48 | * of the operator which is equivalent to the number of concurrent asynchronous operations 49 | * @tparam IN 50 | * Type of the input record 51 | * @tparam OUT 52 | * Type of the output record 53 | * @return 54 | * the resulting stream containing the asynchronous results 55 | */ 56 | def unorderedWait[IN, OUT: TypeInformation]( 57 | input: DataStream[IN], 58 | asyncFunction: AsyncFunction[IN, OUT], 59 | timeout: Long, 60 | timeUnit: TimeUnit, 61 | capacity: Int 62 | ): DataStream[OUT] = { 63 | 64 | val javaAsyncFunction = wrapAsJavaAsyncFunction(asyncFunction) 65 | 66 | val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 67 | 68 | asScalaStream( 69 | JavaAsyncDataStream 70 | .unorderedWait[IN, OUT](input.javaStream, javaAsyncFunction, timeout, timeUnit, capacity) 71 | .returns(outType) 72 | ) 73 | } 74 | 75 | /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to 76 | * watermarks. Stream records which lie between the same two watermarks, can be re-ordered. 77 | * 78 | * @param input 79 | * to apply the async function on 80 | * @param asyncFunction 81 | * to use 82 | * @param timeout 83 | * for the asynchronous operation to complete 84 | * @param timeUnit 85 | * of the timeout 86 | * @tparam IN 87 | * Type of the input record 88 | * @tparam OUT 89 | * Type of the output record 90 | * @return 91 | * the resulting stream containing the asynchronous results 92 | */ 93 | def unorderedWait[IN, OUT: TypeInformation]( 94 | input: DataStream[IN], 95 | asyncFunction: AsyncFunction[IN, OUT], 96 | timeout: Long, 97 | timeUnit: TimeUnit 98 | ): DataStream[OUT] = { 99 | 100 | unorderedWait(input, asyncFunction, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY) 101 | } 102 | 103 | /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to 104 | * watermarks. Stream records which lie between the same two watermarks, can be re-ordered. 105 | * 106 | * @param input 107 | * to apply the async function on 108 | * @param timeout 109 | * for the asynchronous operation to complete 110 | * @param timeUnit 111 | * of the timeout 112 | * @param capacity 113 | * of the operator which is equivalent to the number of concurrent asynchronous operations 114 | * @param asyncFunction 115 | * to use 116 | * @tparam IN 117 | * Type of the input record 118 | * @tparam OUT 119 | * Type of the output record 120 | * @return 121 | * the resulting stream containing the asynchronous results 122 | */ 123 | def unorderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit, capacity: Int)( 124 | asyncFunction: (IN, ResultFuture[OUT]) => Unit 125 | ): DataStream[OUT] = { 126 | 127 | Preconditions.checkNotNull(asyncFunction) 128 | 129 | val cleanAsyncFunction = input.executionEnvironment.scalaClean(asyncFunction) 130 | 131 | val func = new JavaAsyncFunction[IN, OUT] { 132 | override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = { 133 | 134 | cleanAsyncFunction(input, new JavaResultFutureWrapper[OUT](resultFuture)) 135 | } 136 | } 137 | 138 | val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 139 | 140 | asScalaStream( 141 | JavaAsyncDataStream.unorderedWait[IN, OUT](input.javaStream, func, timeout, timeUnit, capacity).returns(outType) 142 | ) 143 | } 144 | 145 | /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to 146 | * watermarks. Stream records which lie between the same two watermarks, can be re-ordered. 147 | * 148 | * @param input 149 | * to apply the async function on 150 | * @param timeout 151 | * for the asynchronous operation to complete 152 | * @param timeUnit 153 | * of the timeout 154 | * @param asyncFunction 155 | * to use 156 | * @tparam IN 157 | * Type of the input record 158 | * @tparam OUT 159 | * Type of the output record 160 | * @return 161 | * the resulting stream containing the asynchronous results 162 | */ 163 | def unorderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit)( 164 | asyncFunction: (IN, ResultFuture[OUT]) => Unit 165 | ): DataStream[OUT] = { 166 | unorderedWait(input, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)(asyncFunction) 167 | } 168 | 169 | /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the 170 | * elements. 171 | * 172 | * @param input 173 | * to apply the async function on 174 | * @param asyncFunction 175 | * to use 176 | * @param timeout 177 | * for the asynchronous operation to complete 178 | * @param timeUnit 179 | * of the timeout 180 | * @param capacity 181 | * of the operator which is equivalent to the number of concurrent asynchronous operations 182 | * @tparam IN 183 | * Type of the input record 184 | * @tparam OUT 185 | * Type of the output record 186 | * @return 187 | * the resulting stream containing the asynchronous results 188 | */ 189 | def orderedWait[IN, OUT: TypeInformation]( 190 | input: DataStream[IN], 191 | asyncFunction: AsyncFunction[IN, OUT], 192 | timeout: Long, 193 | timeUnit: TimeUnit, 194 | capacity: Int 195 | ): DataStream[OUT] = { 196 | 197 | val javaAsyncFunction = wrapAsJavaAsyncFunction(asyncFunction) 198 | 199 | val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 200 | 201 | asScalaStream( 202 | JavaAsyncDataStream 203 | .orderedWait[IN, OUT](input.javaStream, javaAsyncFunction, timeout, timeUnit, capacity) 204 | .returns(outType) 205 | ) 206 | } 207 | 208 | /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the 209 | * elements. 210 | * 211 | * @param input 212 | * to apply the async function on 213 | * @param asyncFunction 214 | * to use 215 | * @param timeout 216 | * for the asynchronous operation to complete 217 | * @param timeUnit 218 | * of the timeout 219 | * @tparam IN 220 | * Type of the input record 221 | * @tparam OUT 222 | * Type of the output record 223 | * @return 224 | * the resulting stream containing the asynchronous results 225 | */ 226 | def orderedWait[IN, OUT: TypeInformation]( 227 | input: DataStream[IN], 228 | asyncFunction: AsyncFunction[IN, OUT], 229 | timeout: Long, 230 | timeUnit: TimeUnit 231 | ): DataStream[OUT] = { 232 | orderedWait(input, asyncFunction, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY) 233 | } 234 | 235 | /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the 236 | * elements. 237 | * 238 | * @param input 239 | * to apply the async function on 240 | * @param timeout 241 | * for the asynchronous operation to complete 242 | * @param timeUnit 243 | * of the timeout 244 | * @param capacity 245 | * of the operator which is equivalent to the number of concurrent asynchronous operations 246 | * @param asyncFunction 247 | * to use 248 | * @tparam IN 249 | * Type of the input record 250 | * @tparam OUT 251 | * Type of the output record 252 | * @return 253 | * the resulting stream containing the asynchronous results 254 | */ 255 | def orderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit, capacity: Int)( 256 | asyncFunction: (IN, ResultFuture[OUT]) => Unit 257 | ): DataStream[OUT] = { 258 | 259 | Preconditions.checkNotNull(asyncFunction) 260 | 261 | val cleanAsyncFunction = input.executionEnvironment.scalaClean(asyncFunction) 262 | 263 | val func = new JavaAsyncFunction[IN, OUT] { 264 | override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = { 265 | cleanAsyncFunction(input, new JavaResultFutureWrapper[OUT](resultFuture)) 266 | } 267 | } 268 | 269 | val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 270 | 271 | asScalaStream( 272 | JavaAsyncDataStream.orderedWait[IN, OUT](input.javaStream, func, timeout, timeUnit, capacity).returns(outType) 273 | ) 274 | } 275 | 276 | /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the 277 | * elements. 278 | * 279 | * @param input 280 | * to apply the async function on 281 | * @param timeout 282 | * for the asynchronous operation to complete 283 | * @param timeUnit 284 | * of the timeout 285 | * @param asyncFunction 286 | * to use 287 | * @tparam IN 288 | * Type of the input record 289 | * @tparam OUT 290 | * Type of the output record 291 | * @return 292 | * the resulting stream containing the asynchronous results 293 | */ 294 | def orderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit)( 295 | asyncFunction: (IN, ResultFuture[OUT]) => Unit 296 | ): DataStream[OUT] = { 297 | 298 | orderedWait(input, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)(asyncFunction) 299 | } 300 | 301 | private def wrapAsJavaAsyncFunction[IN, OUT: TypeInformation]( 302 | asyncFunction: AsyncFunction[IN, OUT] 303 | ): JavaAsyncFunction[IN, OUT] = asyncFunction match { 304 | case richAsyncFunction: RichAsyncFunction[IN, OUT] => 305 | new ScalaRichAsyncFunctionWrapper[IN, OUT](richAsyncFunction) 306 | case _ => 307 | new JavaAsyncFunction[IN, OUT] { 308 | override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = { 309 | asyncFunction.asyncInvoke(input, new JavaResultFutureWrapper[OUT](resultFuture)) 310 | } 311 | 312 | override def timeout(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = { 313 | asyncFunction.timeout(input, new JavaResultFutureWrapper[OUT](resultFuture)) 314 | } 315 | } 316 | } 317 | } 318 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/BroadcastConnectedStream.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.PublicEvolving 4 | import org.apache.flink.api.common.typeinfo.TypeInformation 5 | import org.apache.flink.streaming.api.datastream.{BroadcastConnectedStream => JavaBCStream} 6 | import org.apache.flink.streaming.api.functions.co.{BroadcastProcessFunction, KeyedBroadcastProcessFunction} 7 | import ScalaStreamOps._ 8 | 9 | class BroadcastConnectedStream[IN1, IN2](javaStream: JavaBCStream[IN1, IN2]) { 10 | 11 | /** Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] and a [[KeyedStream]] and 12 | * applies the given [[KeyedBroadcastProcessFunction]] on them, thereby creating a transformed output stream. 13 | * 14 | * @param function 15 | * The [[KeyedBroadcastProcessFunction]] applied to each element in the stream. 16 | * @tparam KS 17 | * The type of the keys in the keyed stream. 18 | * @tparam OUT 19 | * The type of the output elements. 20 | * @return 21 | * The transformed [[DataStream]]. 22 | */ 23 | @PublicEvolving 24 | def process[KS, OUT: TypeInformation](function: KeyedBroadcastProcessFunction[KS, IN1, IN2, OUT]): DataStream[OUT] = { 25 | 26 | if (function == null) { 27 | throw new NullPointerException("KeyedBroadcastProcessFunction function must not be null.") 28 | } 29 | 30 | val outputTypeInfo: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 31 | asScalaStream(javaStream.process(function, outputTypeInfo)) 32 | } 33 | 34 | /** Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] and a non-keyed [[DataStream]] 35 | * and applies the given [[org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction]] on them, thereby 36 | * creating a transformed output stream. 37 | * 38 | * @param function 39 | * The [[BroadcastProcessFunction]] applied to each element in the stream. 40 | * @tparam OUT 41 | * The type of the output elements. 42 | * @return 43 | * The transformed { @link DataStream}. 44 | */ 45 | @PublicEvolving 46 | def process[OUT: TypeInformation](function: BroadcastProcessFunction[IN1, IN2, OUT]): DataStream[OUT] = { 47 | 48 | if (function == null) { 49 | throw new NullPointerException("BroadcastProcessFunction function must not be null.") 50 | } 51 | 52 | val outputTypeInfo: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 53 | asScalaStream(javaStream.process(function, outputTypeInfo)) 54 | } 55 | 56 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 57 | * [[org.apache.flink.api.common.ExecutionConfig]] 58 | */ 59 | private[flink] def clean[F <: AnyRef](f: F) = { 60 | new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/CloseableIterator.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.util.{CloseableIterator => JCloseableIterator} 4 | 5 | /** This interface represents an [[Iterator]] that is also [[AutoCloseable]]. A typical use-case for this interface are 6 | * iterators that are based on native-resources such as files, network, or database connections. Clients must call 7 | * close after using the iterator. 8 | */ 9 | trait CloseableIterator[T] extends Iterator[T] with AutoCloseable {} 10 | 11 | object CloseableIterator { 12 | 13 | def fromJava[T](it: JCloseableIterator[T]): CloseableIterator[T] = 14 | new CloseableIterator[T] { 15 | override def hasNext: Boolean = it.hasNext 16 | 17 | override def next(): T = it.next 18 | 19 | override def close(): Unit = it.close() 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/CoGroupedStreams.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.{PublicEvolving, Public} 4 | import org.apache.flink.api.common.functions.CoGroupFunction 5 | import org.apache.flink.api.common.typeinfo.TypeInformation 6 | import org.apache.flink.api.java.functions.KeySelector 7 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable 8 | import org.apache.flink.streaming.api.datastream.{CoGroupedStreams => JavaCoGroupedStreams} 9 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner 10 | import org.apache.flink.streaming.api.windowing.evictors.Evictor 11 | import org.apache.flink.streaming.api.windowing.time.Time 12 | import org.apache.flink.streaming.api.windowing.triggers.Trigger 13 | import org.apache.flink.streaming.api.windowing.windows.Window 14 | import org.apache.flink.util.Collector 15 | import ScalaStreamOps._ 16 | import scala.jdk.CollectionConverters._ 17 | 18 | /** `CoGroupedStreams` represents two [[DataStream]]s that have been co-grouped. A streaming co-group operation is 19 | * evaluated over elements in a window. 20 | * 21 | * To finalize the co-group operation you also need to specify a [[KeySelector]] for both the first and second input 22 | * and a [[WindowAssigner]] 23 | * 24 | * Note: Right now, the groups are being built in memory so you need to ensure that they don't get too big. Otherwise 25 | * the JVM might crash. 26 | * 27 | * Example: 28 | * 29 | * {{{ 30 | * val one: DataStream[(String, Int)] = ... 31 | * val two: DataStream[(String, Int)] = ... 32 | * 33 | * val result = one.coGroup(two) 34 | * .where(new MyFirstKeySelector()) 35 | * .equalTo(new MyFirstKeySelector()) 36 | * .window(TumblingEventTimeWindows.of(Time.of(5, TimeUnit.SECONDS))) 37 | * .apply(new MyCoGroupFunction()) 38 | * } 39 | * }}} 40 | */ 41 | @Public 42 | class CoGroupedStreams[T1, T2](input1: DataStream[T1], input2: DataStream[T2]) { 43 | 44 | /** Specifies a [[KeySelector]] for elements from the first input. 45 | */ 46 | def where[KEY: TypeInformation](keySelector: T1 => KEY): Where[KEY] = { 47 | val cleanFun = clean(keySelector) 48 | val keyType = implicitly[TypeInformation[KEY]] 49 | val javaSelector = new KeySelector[T1, KEY] with ResultTypeQueryable[KEY] { 50 | def getKey(in: T1) = cleanFun(in) 51 | override def getProducedType: TypeInformation[KEY] = keyType 52 | } 53 | new Where[KEY](javaSelector, keyType) 54 | } 55 | 56 | /** A co-group operation that has [[KeySelector]]s defined for the first input. 57 | * 58 | * You need to specify a [[KeySelector]] for the second input using [[equalTo()]] before you can proceed with 59 | * specifying a [[WindowAssigner]] using [[EqualTo.window()]]. 60 | * 61 | * @tparam KEY 62 | * Type of the key. This must be the same for both inputs 63 | */ 64 | class Where[KEY](keySelector1: KeySelector[T1, KEY], keyType: TypeInformation[KEY]) { 65 | 66 | /** Specifies a [[KeySelector]] for elements from the second input. 67 | */ 68 | def equalTo(keySelector: T2 => KEY): EqualTo = { 69 | val cleanFun = clean(keySelector) 70 | val localKeyType = keyType 71 | val javaSelector = new KeySelector[T2, KEY] with ResultTypeQueryable[KEY] { 72 | def getKey(in: T2) = cleanFun(in) 73 | override def getProducedType: TypeInformation[KEY] = localKeyType 74 | } 75 | new EqualTo(javaSelector) 76 | } 77 | 78 | /** A co-group operation that a [[KeySelector]] defined for the first and the second input. 79 | * 80 | * A window can now be specified using [[window()]]. 81 | */ 82 | class EqualTo(keySelector2: KeySelector[T2, KEY]) { 83 | 84 | /** Specifies the window on which the co-group operation works. 85 | */ 86 | @PublicEvolving 87 | def window[W <: Window]( 88 | assigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W] 89 | ): WithWindow[W] = { 90 | if (keySelector1 == null || keySelector2 == null) { 91 | throw new UnsupportedOperationException( 92 | "You first need to specify KeySelectors for both inputs using where() and equalTo()." 93 | ) 94 | } 95 | new WithWindow[W](clean(assigner), null, null, null) 96 | } 97 | 98 | /** A co-group operation that has [[KeySelector]]s defined for both inputs as well as a [[WindowAssigner]]. 99 | * 100 | * @tparam W 101 | * Type of { @link Window} on which the co-group operation works. 102 | */ 103 | @PublicEvolving 104 | class WithWindow[W <: Window]( 105 | windowAssigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W], 106 | trigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W], 107 | evictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W], 108 | val allowedLateness: Time 109 | ) { 110 | 111 | /** Sets the [[Trigger]] that should be used to trigger window emission. 112 | */ 113 | @PublicEvolving 114 | def trigger(newTrigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = { 115 | new WithWindow[W](windowAssigner, newTrigger, evictor, allowedLateness) 116 | } 117 | 118 | /** Sets the [[Evictor]] that should be used to evict elements from a window before emission. 119 | * 120 | * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window 121 | * results cannot be used. 122 | */ 123 | @PublicEvolving 124 | def evictor(newEvictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = { 125 | new WithWindow[W](windowAssigner, trigger, newEvictor, allowedLateness) 126 | } 127 | 128 | /** Sets the time by which elements are allowed to be late. Delegates to 129 | * [[WindowedStream#allowedLateness(Time)]] 130 | */ 131 | @PublicEvolving 132 | def allowedLateness(newLateness: Time): WithWindow[W] = { 133 | new WithWindow[W](windowAssigner, trigger, evictor, newLateness) 134 | } 135 | 136 | /** Completes the co-group operation with the user function that is executed for windowed groups. 137 | */ 138 | def apply[O: TypeInformation](fun: (Iterator[T1], Iterator[T2]) => O): DataStream[O] = { 139 | require(fun != null, "CoGroup function must not be null.") 140 | 141 | val coGrouper = new CoGroupFunction[T1, T2, O] { 142 | val cleanFun = clean(fun) 143 | def coGroup(left: java.lang.Iterable[T1], right: java.lang.Iterable[T2], out: Collector[O]) = { 144 | out.collect(cleanFun(left.iterator().asScala, right.iterator().asScala)) 145 | } 146 | } 147 | apply(coGrouper) 148 | } 149 | 150 | /** Completes the co-group operation with the user function that is executed for windowed groups. 151 | */ 152 | def apply[O: TypeInformation](fun: (Iterator[T1], Iterator[T2], Collector[O]) => Unit): DataStream[O] = { 153 | require(fun != null, "CoGroup function must not be null.") 154 | 155 | val coGrouper = new CoGroupFunction[T1, T2, O] { 156 | val cleanFun = clean(fun) 157 | def coGroup(left: java.lang.Iterable[T1], right: java.lang.Iterable[T2], out: Collector[O]) = { 158 | cleanFun(left.iterator.asScala, right.iterator.asScala, out) 159 | } 160 | } 161 | apply(coGrouper) 162 | } 163 | 164 | /** Completes the co-group operation with the user function that is executed for windowed groups. 165 | */ 166 | def apply[T: TypeInformation](function: CoGroupFunction[T1, T2, T]): DataStream[T] = { 167 | 168 | val coGroup = new JavaCoGroupedStreams[T1, T2](input1.javaStream, input2.javaStream) 169 | 170 | asScalaStream( 171 | coGroup 172 | .where(keySelector1) 173 | .equalTo(keySelector2) 174 | .window(windowAssigner) 175 | .trigger(trigger) 176 | .evictor(evictor) 177 | .allowedLateness(allowedLateness) 178 | .apply(clean(function), implicitly[TypeInformation[T]]) 179 | ) 180 | } 181 | } 182 | 183 | } 184 | } 185 | 186 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 187 | * [[org.apache.flink.api.common.ExecutionConfig]]. 188 | */ 189 | private[flink] def clean[F <: AnyRef](f: F): F = { 190 | new StreamExecutionEnvironment(input1.javaStream.getExecutionEnvironment).scalaClean(f) 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/ConnectedStreams.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.{Internal, Public, PublicEvolving} 4 | import org.apache.flink.api.common.typeinfo.TypeInformation 5 | import org.apache.flink.api.java.functions.KeySelector 6 | import org.apache.flink.streaming.api.datastream.{ConnectedStreams => JavaCStream, DataStream => JavaStream} 7 | import org.apache.flink.streaming.api.functions.co._ 8 | import org.apache.flink.streaming.api.operators.{TwoInputStreamOperator, TwoInputStreamOperatorFactory} 9 | import org.apache.flink.util.Collector 10 | import ScalaStreamOps._ 11 | 12 | /** [[ConnectedStreams]] represents two connected streams of (possibly) different data types. Connected streams are 13 | * useful for cases where operations on one stream directly affect the operations on the other stream, usually via 14 | * shared state between the streams. 15 | * 16 | * An example for the use of connected streams would be to apply rules that change over time onto another stream. One 17 | * of the connected streams has the rules, the other stream the elements to apply the rules to. The operation on the 18 | * connected stream maintains the current set of rules in the state. It may receive either a rule update and update the 19 | * state or a data element and apply the rules in the state to the element. 20 | * 21 | * The connected stream can be conceptually viewed as a union stream of an Either type, that holds either the first 22 | * stream's type or the second stream's type. 23 | */ 24 | @Public 25 | class ConnectedStreams[IN1, IN2](javaStream: JavaCStream[IN1, IN2]) { 26 | 27 | // ------------------------------------------------------ 28 | // Transformations 29 | // ------------------------------------------------------ 30 | 31 | /** Applies a CoMap transformation on the connected streams. 32 | * 33 | * The transformation consists of two separate functions, where the first one is called for each element of the first 34 | * connected stream, and the second one is called for each element of the second connected stream. 35 | * 36 | * @param fun1 37 | * Function called per element of the first input. 38 | * @param fun2 39 | * Function called per element of the second input. 40 | * @return 41 | * The resulting data stream. 42 | */ 43 | def map[R: TypeInformation](fun1: IN1 => R, fun2: IN2 => R): DataStream[R] = { 44 | 45 | if (fun1 == null || fun2 == null) { 46 | throw new NullPointerException("Map function must not be null.") 47 | } 48 | val cleanFun1 = clean(fun1) 49 | val cleanFun2 = clean(fun2) 50 | val comapper = new CoMapFunction[IN1, IN2, R] { 51 | def map1(in1: IN1): R = cleanFun1(in1) 52 | def map2(in2: IN2): R = cleanFun2(in2) 53 | } 54 | 55 | map(comapper) 56 | } 57 | 58 | /** Applies a CoMap transformation on these connected streams. 59 | * 60 | * The transformation calls [[CoMapFunction#map1]] for each element in the first stream and [[CoMapFunction#map2]] 61 | * for each element of the second stream. 62 | * 63 | * On can pass a subclass of [[org.apache.flink.streaming.api.functions.co.RichCoMapFunction]] to gain access to the 64 | * [[org.apache.flink.api.common.functions.RuntimeContext]] and to additional life cycle methods. 65 | * 66 | * @param coMapper 67 | * The CoMapFunction used to transform the two connected streams 68 | * @return 69 | * The resulting data stream 70 | */ 71 | def map[R: TypeInformation](coMapper: CoMapFunction[IN1, IN2, R]): DataStream[R] = { 72 | if (coMapper == null) { 73 | throw new NullPointerException("Map function must not be null.") 74 | } 75 | 76 | val outType: TypeInformation[R] = implicitly[TypeInformation[R]] 77 | asScalaStream(javaStream.map(coMapper, outType).asInstanceOf[JavaStream[R]]) 78 | } 79 | 80 | /** Applies the given [[CoProcessFunction]] on the connected input streams, thereby creating a transformed output 81 | * stream. 82 | * 83 | * The function will be called for every element in the input streams and can produce zero or more output elements. 84 | * Contrary to the [[flatMap(CoFlatMapFunction)]] function, this function can also query the time and set timers. 85 | * When reacting to the firing of set timers the function can directly emit elements and/or register yet more timers. 86 | * 87 | * @param coProcessFunction 88 | * The [[CoProcessFunction]] that is called for each element in the stream. 89 | * @return 90 | * The transformed [[DataStream]]. 91 | */ 92 | @PublicEvolving 93 | def process[R: TypeInformation](coProcessFunction: CoProcessFunction[IN1, IN2, R]): DataStream[R] = { 94 | 95 | if (coProcessFunction == null) { 96 | throw new NullPointerException("CoProcessFunction function must not be null.") 97 | } 98 | 99 | val outType: TypeInformation[R] = implicitly[TypeInformation[R]] 100 | 101 | asScalaStream(javaStream.process(coProcessFunction, outType)) 102 | } 103 | 104 | /** Applies the given [[KeyedCoProcessFunction]] on the connected input keyed streams, thereby creating a transformed 105 | * output stream. 106 | * 107 | * The function will be called for every element in the input keyed streams and can produce zero or more output 108 | * elements. Contrary to the [[flatMap(CoFlatMapFunction)]] function, this function can also query the time and set 109 | * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more 110 | * timers. 111 | * 112 | * @param keyedCoProcessFunction 113 | * The [[KeyedCoProcessFunction]] that is called for each element in the stream. 114 | * @return 115 | * The transformed [[DataStream]]. 116 | */ 117 | @PublicEvolving 118 | def process[K, R: TypeInformation](keyedCoProcessFunction: KeyedCoProcessFunction[K, IN1, IN2, R]): DataStream[R] = { 119 | if (keyedCoProcessFunction == null) { 120 | throw new NullPointerException("KeyedCoProcessFunction function must not be null.") 121 | } 122 | 123 | val outType: TypeInformation[R] = implicitly[TypeInformation[R]] 124 | 125 | asScalaStream(javaStream.process(keyedCoProcessFunction, outType)) 126 | } 127 | 128 | /** Applies a CoFlatMap transformation on these connected streams. 129 | * 130 | * The transformation calls [[CoFlatMapFunction#flatMap1]] for each element in the first stream and 131 | * [[CoFlatMapFunction#flatMap2]] for each element of the second stream. 132 | * 133 | * On can pass a subclass of [[org.apache.flink.streaming.api.functions.co.RichCoFlatMapFunction]] to gain access to 134 | * the [[org.apache.flink.api.common.functions.RuntimeContext]] and to additional life cycle methods. 135 | * 136 | * @param coFlatMapper 137 | * The CoFlatMapFunction used to transform the two connected streams 138 | * @return 139 | * The resulting data stream. 140 | */ 141 | def flatMap[R: TypeInformation](coFlatMapper: CoFlatMapFunction[IN1, IN2, R]): DataStream[R] = { 142 | 143 | if (coFlatMapper == null) { 144 | throw new NullPointerException("FlatMap function must not be null.") 145 | } 146 | 147 | val outType: TypeInformation[R] = implicitly[TypeInformation[R]] 148 | asScalaStream(javaStream.flatMap(coFlatMapper, outType).asInstanceOf[JavaStream[R]]) 149 | } 150 | 151 | /** Applies a CoFlatMap transformation on the connected streams. 152 | * 153 | * The transformation consists of two separate functions, where the first one is called for each element of the first 154 | * connected stream, and the second one is called for each element of the second connected stream. 155 | * 156 | * @param fun1 157 | * Function called per element of the first input. 158 | * @param fun2 159 | * Function called per element of the second input. 160 | * @return 161 | * The resulting data stream. 162 | */ 163 | def flatMap[R: TypeInformation]( 164 | fun1: (IN1, Collector[R]) => Unit, 165 | fun2: (IN2, Collector[R]) => Unit 166 | ): DataStream[R] = { 167 | 168 | if (fun1 == null || fun2 == null) { 169 | throw new NullPointerException("FlatMap functions must not be null.") 170 | } 171 | val cleanFun1 = clean(fun1) 172 | val cleanFun2 = clean(fun2) 173 | val flatMapper = new CoFlatMapFunction[IN1, IN2, R] { 174 | def flatMap1(value: IN1, out: Collector[R]): Unit = cleanFun1(value, out) 175 | def flatMap2(value: IN2, out: Collector[R]): Unit = cleanFun2(value, out) 176 | } 177 | flatMap(flatMapper) 178 | } 179 | 180 | /** Applies a CoFlatMap transformation on the connected streams. 181 | * 182 | * The transformation consists of two separate functions, where the first one is called for each element of the first 183 | * connected stream, and the second one is called for each element of the second connected stream. 184 | * 185 | * @param fun1 186 | * Function called per element of the first input. 187 | * @param fun2 188 | * Function called per element of the second input. 189 | * @return 190 | * The resulting data stream. 191 | */ 192 | def flatMap[R: TypeInformation](fun1: IN1 => TraversableOnce[R], fun2: IN2 => TraversableOnce[R]): DataStream[R] = { 193 | 194 | if (fun1 == null || fun2 == null) { 195 | throw new NullPointerException("FlatMap functions must not be null.") 196 | } 197 | val cleanFun1 = clean(fun1) 198 | val cleanFun2 = clean(fun2) 199 | 200 | val flatMapper = new CoFlatMapFunction[IN1, IN2, R] { 201 | def flatMap1(value: IN1, out: Collector[R]) = { cleanFun1(value) foreach out.collect } 202 | def flatMap2(value: IN2, out: Collector[R]) = { cleanFun2(value) foreach out.collect } 203 | } 204 | 205 | flatMap(flatMapper) 206 | } 207 | 208 | // ------------------------------------------------------ 209 | // grouping and partitioning 210 | // ------------------------------------------------------ 211 | 212 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 213 | * will be sent to the same parallel instance of the transformation functions. 214 | * 215 | * @param keyPosition1 216 | * The first stream's key field 217 | * @param keyPosition2 218 | * The second stream's key field 219 | * @return 220 | * The key-grouped connected streams 221 | */ 222 | def keyBy(keyPosition1: Int, keyPosition2: Int): ConnectedStreams[IN1, IN2] = { 223 | asScalaStream(javaStream.keyBy(keyPosition1, keyPosition2)) 224 | } 225 | 226 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 227 | * will be sent to the same parallel instance of the transformation functions. 228 | * 229 | * @param keyPositions1 230 | * The first stream's key fields 231 | * @param keyPositions2 232 | * The second stream's key fields 233 | * @return 234 | * The key-grouped connected streams 235 | */ 236 | def keyBy(keyPositions1: Array[Int], keyPositions2: Array[Int]): ConnectedStreams[IN1, IN2] = { 237 | asScalaStream(javaStream.keyBy(keyPositions1, keyPositions2)) 238 | } 239 | 240 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 241 | * will be sent to the same parallel instance of the transformation functions. 242 | * 243 | * @param field1 244 | * The first stream's key expression 245 | * @param field2 246 | * The second stream's key expression 247 | * @return 248 | * The key-grouped connected streams 249 | */ 250 | def keyBy(field1: String, field2: String): ConnectedStreams[IN1, IN2] = { 251 | asScalaStream(javaStream.keyBy(field1, field2)) 252 | } 253 | 254 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 255 | * will be sent to the same parallel instance of the transformation functions. 256 | * 257 | * @param fields1 258 | * The first stream's key expressions 259 | * @param fields2 260 | * The second stream's key expressions 261 | * @return 262 | * The key-grouped connected streams 263 | */ 264 | def keyBy(fields1: Array[String], fields2: Array[String]): ConnectedStreams[IN1, IN2] = { 265 | asScalaStream(javaStream.keyBy(fields1, fields2)) 266 | } 267 | 268 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 269 | * will be sent to the same parallel instance of the transformation functions. 270 | * 271 | * @param fun1 272 | * The first stream's key function 273 | * @param fun2 274 | * The second stream's key function 275 | * @return 276 | * The key-grouped connected streams 277 | */ 278 | def keyBy[KEY: TypeInformation](fun1: IN1 => KEY, fun2: IN2 => KEY): ConnectedStreams[IN1, IN2] = { 279 | 280 | val keyType = implicitly[TypeInformation[KEY]] 281 | 282 | val cleanFun1 = clean(fun1) 283 | val cleanFun2 = clean(fun2) 284 | 285 | val keyExtractor1 = new JavaKeySelector[IN1, KEY](cleanFun1) 286 | val keyExtractor2 = new JavaKeySelector[IN2, KEY](cleanFun2) 287 | 288 | asScalaStream(javaStream.keyBy(keyExtractor1, keyExtractor2, keyType)) 289 | } 290 | 291 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 292 | * [[org.apache.flink.api.common.ExecutionConfig]] 293 | */ 294 | private[flink] def clean[F <: AnyRef](f: F): F = { 295 | new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) 296 | } 297 | 298 | @PublicEvolving 299 | def transform[R: TypeInformation]( 300 | functionName: String, 301 | operator: TwoInputStreamOperator[IN1, IN2, R] 302 | ): DataStream[R] = { 303 | asScalaStream(javaStream.transform(functionName, implicitly[TypeInformation[R]], operator)) 304 | } 305 | 306 | @PublicEvolving 307 | def transform[R: TypeInformation]( 308 | functionName: String, 309 | factory: TwoInputStreamOperatorFactory[IN1, IN2, R] 310 | ): DataStream[R] = { 311 | asScalaStream(javaStream.transform(functionName, implicitly[TypeInformation[R]], factory)) 312 | } 313 | } 314 | 315 | @Internal 316 | class JavaKeySelector[IN, K](private[this] val fun: IN => K) extends KeySelector[IN, K] { 317 | override def getKey(value: IN): K = fun(value) 318 | } 319 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/DataStreamUtils.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.Experimental 4 | import org.apache.flink.api.common.typeinfo.TypeInformation 5 | import org.apache.flink.api.java.functions.KeySelector 6 | import org.apache.flink.streaming.api.datastream.{DataStreamUtils => JavaStreamUtils} 7 | 8 | import scala.jdk.CollectionConverters._ 9 | import scala.reflect.ClassTag 10 | import ScalaStreamOps._ 11 | 12 | /** This class provides simple utility methods for collecting a [[DataStream]], effectively enriching it with the 13 | * functionality encapsulated by [[DataStreamUtils]]. 14 | * 15 | * This experimental class is relocated from flink-streaming-contrib. 16 | * 17 | * @param self 18 | * DataStream 19 | */ 20 | @Experimental 21 | class DataStreamUtils[T: TypeInformation: ClassTag](val self: DataStream[T]) { 22 | 23 | /** Returns a scala iterator to iterate over the elements of the DataStream. 24 | * @return 25 | * The iterator 26 | * 27 | * @deprecated 28 | * Replaced with [[DataStream#executeAndCollect]]. 29 | */ 30 | def collect(): Iterator[T] = { 31 | JavaStreamUtils.collect(self.javaStream).asScala 32 | } 33 | 34 | /** Reinterprets the given [[DataStream]] as a [[KeyedStream]], which extracts keys with the given [[KeySelector]]. 35 | * 36 | * IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be partitioned 37 | * exactly in the same way as if it was created through a [[DataStream#keyBy(KeySelector)]]. 38 | * 39 | * @param keySelector 40 | * Function that defines how keys are extracted from the data stream. 41 | * @return 42 | * The reinterpretation of the [[DataStream]] as a [[KeyedStream]]. 43 | */ 44 | def reinterpretAsKeyedStream[K: TypeInformation](keySelector: T => K): KeyedStream[T, K] = { 45 | 46 | val keyTypeInfo = implicitly[TypeInformation[K]] 47 | val cleanSelector = clean(keySelector) 48 | val javaKeySelector = new JavaKeySelector[T, K](cleanSelector) 49 | 50 | asScalaStream(JavaStreamUtils.reinterpretAsKeyedStream(self.javaStream, javaKeySelector, keyTypeInfo)) 51 | } 52 | 53 | private[flink] def clean[F <: AnyRef](f: F): F = { 54 | new StreamExecutionEnvironment(self.javaStream.getExecutionEnvironment).scalaClean(f) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/JoinedStreams.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.{PublicEvolving, Public} 4 | import org.apache.flink.api.common.functions.{FlatJoinFunction, JoinFunction} 5 | import org.apache.flink.api.common.typeinfo.TypeInformation 6 | import org.apache.flink.api.java.functions.KeySelector 7 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable 8 | import org.apache.flink.streaming.api.datastream.{ 9 | JoinedStreams => JavaJoinedStreams, 10 | CoGroupedStreams => JavaCoGroupedStreams 11 | } 12 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner 13 | import org.apache.flink.streaming.api.windowing.evictors.Evictor 14 | import org.apache.flink.streaming.api.windowing.time.Time 15 | import org.apache.flink.streaming.api.windowing.triggers.Trigger 16 | import org.apache.flink.streaming.api.windowing.windows.Window 17 | import org.apache.flink.util.Collector 18 | import ScalaStreamOps._ 19 | 20 | /** `JoinedStreams` represents two [[DataStream]]s that have been joined. A streaming join operation is evaluated over 21 | * elements in a window. 22 | * 23 | * To finalize the join operation you also need to specify a [[KeySelector]] for both the first and second input and a 24 | * [[WindowAssigner]] 25 | * 26 | * Note: Right now, the groups are being built in memory so you need to ensure that they don't get too big. Otherwise 27 | * the JVM might crash. 28 | * 29 | * Example: 30 | * 31 | * {{{ 32 | * val one: DataStream[(String, Int)] = ... 33 | * val two: DataStream[(String, Int)] = ... 34 | * 35 | * val result = one.join(two) 36 | * .where {t => ... } 37 | * .equal {t => ... } 38 | * .window(TumblingEventTimeWindows.of(Time.of(5, TimeUnit.SECONDS))) 39 | * .apply(new MyJoinFunction()) 40 | * } 41 | * }}} 42 | */ 43 | @Public 44 | class JoinedStreams[T1, T2](input1: DataStream[T1], input2: DataStream[T2]) { 45 | 46 | /** Specifies a [[KeySelector]] for elements from the first input. 47 | */ 48 | def where[KEY: TypeInformation](keySelector: T1 => KEY): Where[KEY] = { 49 | val cleanFun = clean(keySelector) 50 | val keyType = implicitly[TypeInformation[KEY]] 51 | val javaSelector = new KeySelector[T1, KEY] with ResultTypeQueryable[KEY] { 52 | def getKey(in: T1) = cleanFun(in) 53 | override def getProducedType: TypeInformation[KEY] = keyType 54 | } 55 | new Where[KEY](javaSelector, keyType) 56 | } 57 | 58 | /** A join operation that has a [[KeySelector]] defined for the first input. 59 | * 60 | * You need to specify a [[KeySelector]] for the second input using [[equalTo()]] before you can proceed with 61 | * specifying a [[WindowAssigner]] using [[EqualTo.window()]]. 62 | * 63 | * @tparam KEY 64 | * Type of the key. This must be the same for both inputs 65 | */ 66 | class Where[KEY](keySelector1: KeySelector[T1, KEY], keyType: TypeInformation[KEY]) { 67 | 68 | /** Specifies a [[KeySelector]] for elements from the second input. 69 | */ 70 | def equalTo(keySelector: T2 => KEY): EqualTo = { 71 | val cleanFun = clean(keySelector) 72 | val localKeyType = keyType 73 | val javaSelector = new KeySelector[T2, KEY] with ResultTypeQueryable[KEY] { 74 | def getKey(in: T2) = cleanFun(in) 75 | override def getProducedType: TypeInformation[KEY] = localKeyType 76 | } 77 | new EqualTo(javaSelector) 78 | } 79 | 80 | /** A join operation that has a [[KeySelector]] defined for the first and the second input. 81 | * 82 | * A window can now be specified using [[window()]]. 83 | */ 84 | class EqualTo(keySelector2: KeySelector[T2, KEY]) { 85 | 86 | /** Specifies the window on which the join operation works. 87 | */ 88 | @PublicEvolving 89 | def window[W <: Window]( 90 | assigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W] 91 | ): WithWindow[W] = { 92 | if (keySelector1 == null || keySelector2 == null) { 93 | throw new UnsupportedOperationException( 94 | "You first need to specify KeySelectors for both inputs using where() and equalTo()." 95 | ) 96 | } 97 | 98 | new WithWindow[W](clean(assigner), null, null, null) 99 | } 100 | 101 | /** A join operation that has [[KeySelector]]s defined for both inputs as well as a [[WindowAssigner]]. 102 | * 103 | * @tparam W 104 | * Type of { @link Window} on which the join operation works. 105 | */ 106 | class WithWindow[W <: Window]( 107 | windowAssigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W], 108 | trigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W], 109 | evictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W], 110 | val allowedLateness: Time 111 | ) { 112 | 113 | /** Sets the [[Trigger]] that should be used to trigger window emission. 114 | */ 115 | @PublicEvolving 116 | def trigger(newTrigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = { 117 | new WithWindow[W](windowAssigner, newTrigger, evictor, allowedLateness) 118 | } 119 | 120 | /** Sets the [[Evictor]] that should be used to evict elements from a window before emission. 121 | * 122 | * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window 123 | * results cannot be used. 124 | */ 125 | @PublicEvolving 126 | def evictor(newEvictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = { 127 | new WithWindow[W](windowAssigner, trigger, newEvictor, allowedLateness) 128 | } 129 | 130 | /** Sets the time by which elements are allowed to be late. Delegates to 131 | * [[WindowedStream#allowedLateness(Time)]] 132 | */ 133 | @PublicEvolving 134 | def allowedLateness(newLateness: Time): WithWindow[W] = { 135 | new WithWindow[W](windowAssigner, trigger, evictor, newLateness) 136 | } 137 | 138 | /** Completes the join operation with the user function that is executed for windowed groups. 139 | */ 140 | def apply[O: TypeInformation](fun: (T1, T2) => O): DataStream[O] = { 141 | require(fun != null, "Join function must not be null.") 142 | 143 | val joiner = new FlatJoinFunction[T1, T2, O] { 144 | val cleanFun = clean(fun) 145 | def join(left: T1, right: T2, out: Collector[O]) = { 146 | out.collect(cleanFun(left, right)) 147 | } 148 | } 149 | apply(joiner) 150 | } 151 | 152 | /** Completes the join operation with the user function that is executed for windowed groups. 153 | */ 154 | def apply[O: TypeInformation](fun: (T1, T2, Collector[O]) => Unit): DataStream[O] = { 155 | require(fun != null, "Join function must not be null.") 156 | 157 | val joiner = new FlatJoinFunction[T1, T2, O] { 158 | val cleanFun = clean(fun) 159 | def join(left: T1, right: T2, out: Collector[O]) = { 160 | cleanFun(left, right, out) 161 | } 162 | } 163 | apply(joiner) 164 | } 165 | 166 | /** Completes the join operation with the user function that is executed for windowed groups. 167 | */ 168 | def apply[T: TypeInformation](function: JoinFunction[T1, T2, T]): DataStream[T] = { 169 | 170 | val join = new JavaJoinedStreams[T1, T2](input1.javaStream, input2.javaStream) 171 | 172 | asScalaStream( 173 | join 174 | .where(keySelector1) 175 | .equalTo(keySelector2) 176 | .window(windowAssigner) 177 | .trigger(trigger) 178 | .evictor(evictor) 179 | .allowedLateness(allowedLateness) 180 | .apply(clean(function), implicitly[TypeInformation[T]]) 181 | ) 182 | } 183 | 184 | /** Completes the join operation with the user function that is executed for windowed groups. 185 | */ 186 | def apply[T: TypeInformation](function: FlatJoinFunction[T1, T2, T]): DataStream[T] = { 187 | 188 | val join = new JavaJoinedStreams[T1, T2](input1.javaStream, input2.javaStream) 189 | 190 | asScalaStream( 191 | join 192 | .where(keySelector1) 193 | .equalTo(keySelector2) 194 | .window(windowAssigner) 195 | .trigger(trigger) 196 | .evictor(evictor) 197 | .allowedLateness(allowedLateness) 198 | .apply(clean(function), implicitly[TypeInformation[T]]) 199 | ) 200 | } 201 | } 202 | } 203 | } 204 | 205 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 206 | * [[org.apache.flink.api.common.ExecutionConfig]]. 207 | */ 208 | private[flink] def clean[F <: AnyRef](f: F): F = { 209 | new StreamExecutionEnvironment(input1.javaStream.getExecutionEnvironment).scalaClean(f) 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/KeyedStream.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import io.findify.flink.api.function.StatefulFunction 4 | import org.apache.flink.annotation.{Internal, Public, PublicEvolving} 5 | import org.apache.flink.api.common.functions._ 6 | import org.apache.flink.api.common.state.{ReducingStateDescriptor, ValueStateDescriptor} 7 | import org.apache.flink.api.common.typeinfo.TypeInformation 8 | import org.apache.flink.api.common.typeutils.TypeSerializer 9 | import org.apache.flink.streaming.api.datastream.{ 10 | QueryableStateStream, 11 | KeyedStream => KeyedJavaStream, 12 | WindowedStream => WindowedJavaStream 13 | } 14 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType 15 | import org.apache.flink.streaming.api.functions.aggregation.{AggregationFunction, ComparableAggregator, SumAggregator} 16 | import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction 17 | import org.apache.flink.streaming.api.functions.query.{QueryableAppendingStateOperator, QueryableValueStateOperator} 18 | import org.apache.flink.streaming.api.functions.{KeyedProcessFunction, ProcessFunction} 19 | import org.apache.flink.streaming.api.windowing.assigners._ 20 | import org.apache.flink.streaming.api.windowing.time.Time 21 | import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow, Window} 22 | import org.apache.flink.util.Collector 23 | import ScalaStreamOps._ 24 | 25 | @Public 26 | class KeyedStream[T, K](javaStream: KeyedJavaStream[T, K]) extends DataStream[T](javaStream) { 27 | 28 | // ------------------------------------------------------------------------ 29 | // Properties 30 | // ------------------------------------------------------------------------ 31 | 32 | /** Gets the type of the key by which this stream is keyed. 33 | */ 34 | @Internal 35 | def getKeyType = javaStream.getKeyType() 36 | 37 | // ------------------------------------------------------------------------ 38 | // basic transformations 39 | // ------------------------------------------------------------------------ 40 | 41 | /** Applies the given [[ProcessFunction]] on the input stream, thereby creating a transformed output stream. 42 | * 43 | * The function will be called for every element in the stream and can produce zero or more output. The function can 44 | * also query the time and set timers. When reacting to the firing of set timers the function can emit yet more 45 | * elements. 46 | * 47 | * The function will be called for every element in the input streams and can produce zero or more output elements. 48 | * Contrary to the [[DataStream#flatMap(FlatMapFunction)]] function, this function can also query the time and set 49 | * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more 50 | * timers. 51 | * 52 | * @param processFunction 53 | * The [[ProcessFunction]] that is called for each element in the stream. 54 | * 55 | * @deprecated 56 | * Use [[KeyedStream#process(KeyedProcessFunction)]] 57 | */ 58 | @deprecated("will be removed in a future version") 59 | @PublicEvolving 60 | override def process[R: TypeInformation](processFunction: ProcessFunction[T, R]): DataStream[R] = { 61 | 62 | if (processFunction == null) { 63 | throw new NullPointerException("ProcessFunction must not be null.") 64 | } 65 | 66 | asScalaStream(javaStream.process(processFunction, implicitly[TypeInformation[R]])) 67 | } 68 | 69 | /** Applies the given [[KeyedProcessFunction]] on the input stream, thereby creating a transformed output stream. 70 | * 71 | * The function will be called for every element in the stream and can produce zero or more output. The function can 72 | * also query the time and set timers. When reacting to the firing of set timers the function can emit yet more 73 | * elements. 74 | * 75 | * The function will be called for every element in the input streams and can produce zero or more output elements. 76 | * Contrary to the [[DataStream#flatMap(FlatMapFunction)]] function, this function can also query the time and set 77 | * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more 78 | * timers. 79 | * 80 | * @param keyedProcessFunction 81 | * The [[KeyedProcessFunction]] that is called for each element in the stream. 82 | */ 83 | @PublicEvolving 84 | def process[R: TypeInformation](keyedProcessFunction: KeyedProcessFunction[K, T, R]): DataStream[R] = { 85 | 86 | if (keyedProcessFunction == null) { 87 | throw new NullPointerException("KeyedProcessFunction must not be null.") 88 | } 89 | 90 | asScalaStream(javaStream.process(keyedProcessFunction, implicitly[TypeInformation[R]])) 91 | } 92 | 93 | // ------------------------------------------------------------------------ 94 | // Joining 95 | // ------------------------------------------------------------------------ 96 | 97 | /** Join elements of this [[KeyedStream]] with elements of another [[KeyedStream]] over a time interval that can be 98 | * specified with [[IntervalJoin.between]]. 99 | * 100 | * @param otherStream 101 | * The other keyed stream to join this keyed stream with 102 | * @tparam OTHER 103 | * Type parameter of elements in the other stream 104 | * @return 105 | * An instance of [[IntervalJoin]] with this keyed stream and the other keyed stream 106 | */ 107 | @PublicEvolving 108 | def intervalJoin[OTHER](otherStream: KeyedStream[OTHER, K]): IntervalJoin[T, OTHER, K] = { 109 | new IntervalJoin[T, OTHER, K](this, otherStream) 110 | } 111 | 112 | /** Perform a join over a time interval. 113 | * 114 | * @tparam IN1 115 | * The type parameter of the elements in the first streams 116 | * @tparam IN2 117 | * The type parameter of the elements in the second stream 118 | */ 119 | @PublicEvolving 120 | class IntervalJoin[IN1, IN2, KEY](val streamOne: KeyedStream[IN1, KEY], val streamTwo: KeyedStream[IN2, KEY]) { 121 | 122 | /** Specifies the time boundaries over which the join operation works, so that
leftElement.timestamp +
123 |       * lowerBound <= rightElement.timestamp <= leftElement.timestamp + upperBound
By default both the lower and 124 | * the upper bound are inclusive. This can be configured with [[IntervalJoined.lowerBoundExclusive]] and 125 | * [[IntervalJoined.upperBoundExclusive]] 126 | * 127 | * @param lowerBound 128 | * The lower bound. Needs to be smaller than or equal to the upperBound 129 | * @param upperBound 130 | * The upper bound. Needs to be bigger than or equal to the lowerBound 131 | */ 132 | @PublicEvolving 133 | def between(lowerBound: Time, upperBound: Time): IntervalJoined[IN1, IN2, KEY] = { 134 | val lowerMillis = lowerBound.toMilliseconds 135 | val upperMillis = upperBound.toMilliseconds 136 | new IntervalJoined[IN1, IN2, KEY](streamOne, streamTwo, lowerMillis, upperMillis) 137 | } 138 | } 139 | 140 | /** IntervalJoined is a container for two streams that have keys for both sides as well as the time boundaries over 141 | * which elements should be joined. 142 | * 143 | * @tparam IN1 144 | * Input type of elements from the first stream 145 | * @tparam IN2 146 | * Input type of elements from the second stream 147 | * @tparam KEY 148 | * The type of the key 149 | */ 150 | @PublicEvolving 151 | class IntervalJoined[IN1, IN2, KEY]( 152 | private val firstStream: KeyedStream[IN1, KEY], 153 | private val secondStream: KeyedStream[IN2, KEY], 154 | private val lowerBound: Long, 155 | private val upperBound: Long 156 | ) { 157 | 158 | private var lowerBoundInclusive = true 159 | private var upperBoundInclusive = true 160 | 161 | /** Set the lower bound to be exclusive 162 | */ 163 | @PublicEvolving 164 | def lowerBoundExclusive(): IntervalJoined[IN1, IN2, KEY] = { 165 | this.lowerBoundInclusive = false 166 | this 167 | } 168 | 169 | /** Set the upper bound to be exclusive 170 | */ 171 | @PublicEvolving 172 | def upperBoundExclusive(): IntervalJoined[IN1, IN2, KEY] = { 173 | this.upperBoundInclusive = false 174 | this 175 | } 176 | 177 | /** Completes the join operation with the user function that is executed for each joined pair of elements. 178 | * 179 | * @param processJoinFunction 180 | * The user-defined function 181 | * @tparam OUT 182 | * The output type 183 | * @return 184 | * Returns a DataStream 185 | */ 186 | @PublicEvolving 187 | def process[OUT: TypeInformation](processJoinFunction: ProcessJoinFunction[IN1, IN2, OUT]): DataStream[OUT] = { 188 | 189 | val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]] 190 | 191 | val javaJoined = new KeyedJavaStream.IntervalJoined[IN1, IN2, KEY]( 192 | firstStream.javaStream.asInstanceOf[KeyedJavaStream[IN1, KEY]], 193 | secondStream.javaStream.asInstanceOf[KeyedJavaStream[IN2, KEY]], 194 | lowerBound, 195 | upperBound, 196 | lowerBoundInclusive, 197 | upperBoundInclusive 198 | ) 199 | asScalaStream(javaJoined.process(processJoinFunction, outType)) 200 | } 201 | } 202 | 203 | // ------------------------------------------------------------------------ 204 | // Windowing 205 | // ------------------------------------------------------------------------ 206 | 207 | /** Windows this [[KeyedStream]] into tumbling time windows. 208 | * 209 | * This is a shortcut for either `.window(TumblingEventTimeWindows.of(size))` or 210 | * `.window(TumblingProcessingTimeWindows.of(size))` depending on the time characteristic set using 211 | * [[StreamExecutionEnvironment.setStreamTimeCharacteristic()]] 212 | * 213 | * @param size 214 | * The size of the window. 215 | * 216 | * @deprecated 217 | * Please use [[window()]] with either [[TumblingEventTimeWindows]] or [[TumblingProcessingTimeWindows]]. For more 218 | * information, see the deprecation notice on [[org.apache.flink.streaming.api.TimeCharacteristic]]. 219 | */ 220 | @deprecated 221 | def timeWindow(size: Time): WindowedStream[T, K, TimeWindow] = { 222 | new WindowedStream(javaStream.timeWindow(size)) 223 | } 224 | 225 | /** Windows this [[KeyedStream]] into sliding time windows. 226 | * 227 | * This is a shortcut for either `.window(SlidingEventTimeWindows.of(size))` or 228 | * `.window(SlidingProcessingTimeWindows.of(size))` depending on the time characteristic set using 229 | * [[StreamExecutionEnvironment.setStreamTimeCharacteristic()]] 230 | * 231 | * @param size 232 | * The size of the window. 233 | * 234 | * @deprecated 235 | * Please use [[window()]] with either [[SlidingEventTimeWindows]] or [[SlidingProcessingTimeWindows]]. For more 236 | * information, see the deprecation notice on [[org.apache.flink.streaming.api.TimeCharacteristic]]. 237 | */ 238 | @deprecated 239 | def timeWindow(size: Time, slide: Time): WindowedStream[T, K, TimeWindow] = { 240 | new WindowedStream(javaStream.timeWindow(size, slide)) 241 | } 242 | 243 | /** Windows this [[KeyedStream]] into sliding count windows. 244 | * 245 | * @param size 246 | * The size of the windows in number of elements. 247 | * @param slide 248 | * The slide interval in number of elements. 249 | */ 250 | def countWindow(size: Long, slide: Long): WindowedStream[T, K, GlobalWindow] = { 251 | new WindowedStream(javaStream.countWindow(size, slide)) 252 | } 253 | 254 | /** Windows this [[KeyedStream]] into tumbling count windows. 255 | * 256 | * @param size 257 | * The size of the windows in number of elements. 258 | */ 259 | def countWindow(size: Long): WindowedStream[T, K, GlobalWindow] = { 260 | new WindowedStream(javaStream.countWindow(size)) 261 | } 262 | 263 | /** Windows this data stream to a [[WindowedStream]], which evaluates windows over a key grouped stream. Elements are 264 | * put into windows by a [[WindowAssigner]]. The grouping of elements is done both by key and by window. 265 | * 266 | * A [[org.apache.flink.streaming.api.windowing.triggers.Trigger]] can be defined to specify when windows are 267 | * evaluated. However, `WindowAssigner` have a default `Trigger` that is used if a `Trigger` is not specified. 268 | * 269 | * @param assigner 270 | * The `WindowAssigner` that assigns elements to windows. 271 | * @return 272 | * The trigger windows data stream. 273 | */ 274 | @PublicEvolving 275 | def window[W <: Window](assigner: WindowAssigner[_ >: T, W]): WindowedStream[T, K, W] = { 276 | new WindowedStream(new WindowedJavaStream[T, K, W](javaStream, assigner)) 277 | } 278 | 279 | // ------------------------------------------------------------------------ 280 | // Non-Windowed aggregation operations 281 | // ------------------------------------------------------------------------ 282 | 283 | /** Creates a new [[DataStream]] by reducing the elements of this DataStream using an associative reduce function. An 284 | * independent aggregate is kept per key. 285 | */ 286 | def reduce(reducer: ReduceFunction[T]): DataStream[T] = { 287 | if (reducer == null) { 288 | throw new NullPointerException("Reduce function must not be null.") 289 | } 290 | 291 | asScalaStream(javaStream.reduce(reducer)) 292 | } 293 | 294 | /** Creates a new [[DataStream]] by reducing the elements of this DataStream using an associative reduce function. An 295 | * independent aggregate is kept per key. 296 | */ 297 | def reduce(fun: (T, T) => T): DataStream[T] = { 298 | if (fun == null) { 299 | throw new NullPointerException("Reduce function must not be null.") 300 | } 301 | val cleanFun = clean(fun) 302 | val reducer = new ReduceFunction[T] { 303 | def reduce(v1: T, v2: T): T = { cleanFun(v1, v2) } 304 | } 305 | reduce(reducer) 306 | } 307 | 308 | /** Applies an aggregation that that gives the current maximum of the data stream at the given position by the given 309 | * key. An independent aggregate is kept per key. 310 | * 311 | * @param position 312 | * The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and 313 | * primitive types (which is considered as having one field). 314 | */ 315 | def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position) 316 | 317 | /** Applies an aggregation that that gives the current maximum of the data stream at the given field by the given key. 318 | * An independent aggregate is kept per key. 319 | * 320 | * @param field 321 | * In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the 322 | * aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`. 323 | * Furthermore "*" can be specified in case of a basic type (which is considered as having only one field). 324 | */ 325 | def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field) 326 | 327 | /** Applies an aggregation that that gives the current minimum of the data stream at the given position by the given 328 | * key. An independent aggregate is kept per key. 329 | * 330 | * @param position 331 | * The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and 332 | * primitive types (which is considered as having one field). 333 | */ 334 | def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position) 335 | 336 | /** Applies an aggregation that that gives the current minimum of the data stream at the given field by the given key. 337 | * An independent aggregate is kept per key. 338 | * 339 | * @param field 340 | * In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the 341 | * aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`. 342 | * Furthermore "*" can be specified in case of a basic type (which is considered as having only one field). 343 | */ 344 | def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field) 345 | 346 | /** Applies an aggregation that sums the data stream at the given position by the given key. An independent aggregate 347 | * is kept per key. 348 | * 349 | * @param position 350 | * The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and 351 | * primitive types (which is considered as having one field). 352 | */ 353 | def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position) 354 | 355 | /** Applies an aggregation that sums the data stream at the given field by the given key. An independent aggregate is 356 | * kept per key. 357 | * 358 | * @param field 359 | * In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the 360 | * aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`. 361 | * Furthermore "*" can be specified in case of a basic type (which is considered as having only one field). 362 | */ 363 | def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field) 364 | 365 | /** Applies an aggregation that that gives the current minimum element of the data stream by the given position by the 366 | * given key. An independent aggregate is kept per key. When equality, the first element is returned with the minimal 367 | * value. 368 | * 369 | * @param position 370 | * The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and 371 | * primitive types (which is considered as having one field). 372 | */ 373 | def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position) 374 | 375 | /** Applies an aggregation that that gives the current minimum element of the data stream by the given field by the 376 | * given key. An independent aggregate is kept per key. When equality, the first element is returned with the minimal 377 | * value. 378 | * 379 | * @param field 380 | * In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the 381 | * aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`. 382 | * Furthermore "*" can be specified in case of a basic type (which is considered as having only one field). 383 | */ 384 | def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field) 385 | 386 | /** Applies an aggregation that that gives the current maximum element of the data stream by the given position by the 387 | * given key. An independent aggregate is kept per key. When equality, the first element is returned with the maximal 388 | * value. 389 | * 390 | * @param position 391 | * The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and 392 | * primitive types (which is considered as having one field). 393 | */ 394 | def maxBy(position: Int): DataStream[T] = 395 | aggregate(AggregationType.MAXBY, position) 396 | 397 | /** Applies an aggregation that that gives the current maximum element of the data stream by the given field by the 398 | * given key. An independent aggregate is kept per key. When equality, the first element is returned with the maximal 399 | * value. 400 | * 401 | * @param field 402 | * In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the 403 | * aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`. 404 | * Furthermore "*" can be specified in case of a basic type (which is considered as having only one field). 405 | */ 406 | def maxBy(field: String): DataStream[T] = 407 | aggregate(AggregationType.MAXBY, field) 408 | 409 | private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = { 410 | val aggregationFunc = aggregationType match { 411 | case AggregationType.SUM => 412 | new SumAggregator(field, javaStream.getType, javaStream.getExecutionConfig) 413 | case _ => 414 | new ComparableAggregator(field, javaStream.getType, aggregationType, true, javaStream.getExecutionConfig) 415 | } 416 | 417 | aggregate(aggregationFunc) 418 | } 419 | 420 | private def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = { 421 | val aggregationFunc = aggregationType match { 422 | case AggregationType.SUM => 423 | new SumAggregator(position, javaStream.getType, javaStream.getExecutionConfig) 424 | case _ => 425 | new ComparableAggregator(position, javaStream.getType, aggregationType, true, javaStream.getExecutionConfig) 426 | } 427 | 428 | aggregate(aggregationFunc) 429 | } 430 | 431 | private def aggregate(aggregationFunc: AggregationFunction[T]): DataStream[T] = { 432 | reduce(aggregationFunc).name("Keyed Aggregation") 433 | } 434 | 435 | // ------------------------------------------------------------------------ 436 | // functions with state 437 | // ------------------------------------------------------------------------ 438 | 439 | /** Creates a new DataStream that contains only the elements satisfying the given stateful filter predicate. To use 440 | * state partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept per 441 | * key. 442 | * 443 | * Note that the user state object needs to be serializable. 444 | */ 445 | def filterWithState[S: TypeInformation](fun: (T, Option[S]) => (Boolean, Option[S])): DataStream[T] = { 446 | if (fun == null) { 447 | throw new NullPointerException("Filter function must not be null.") 448 | } 449 | 450 | val cleanFun = clean(fun) 451 | val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]] 452 | val serializer: TypeSerializer[S] = stateTypeInfo.createSerializer(javaStream.getExecutionConfig) 453 | 454 | val filterFun = new RichFilterFunction[T] with StatefulFunction[T, Boolean, S] { 455 | 456 | override val stateSerializer: TypeSerializer[S] = serializer 457 | 458 | override def filter(in: T): Boolean = { 459 | applyWithState(in, cleanFun) 460 | } 461 | } 462 | 463 | filter(filterFun) 464 | } 465 | 466 | /** Creates a new DataStream by applying the given stateful function to every element of this DataStream. To use state 467 | * partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept per key. 468 | * 469 | * Note that the user state object needs to be serializable. 470 | */ 471 | def mapWithState[R: TypeInformation, S: TypeInformation](fun: (T, Option[S]) => (R, Option[S])): DataStream[R] = { 472 | if (fun == null) { 473 | throw new NullPointerException("Map function must not be null.") 474 | } 475 | 476 | val cleanFun = clean(fun) 477 | val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]] 478 | val serializer: TypeSerializer[S] = stateTypeInfo.createSerializer(javaStream.getExecutionConfig) 479 | 480 | val mapper = new RichMapFunction[T, R] with StatefulFunction[T, R, S] { 481 | 482 | override val stateSerializer: TypeSerializer[S] = serializer 483 | 484 | override def map(in: T): R = { 485 | applyWithState(in, cleanFun) 486 | } 487 | } 488 | 489 | map(mapper) 490 | } 491 | 492 | /** Creates a new DataStream by applying the given stateful function to every element and flattening the results. To 493 | * use state partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept 494 | * per key. 495 | * 496 | * Note that the user state object needs to be serializable. 497 | */ 498 | def flatMapWithState[R: TypeInformation, S: TypeInformation]( 499 | fun: (T, Option[S]) => (TraversableOnce[R], Option[S]) 500 | ): DataStream[R] = { 501 | if (fun == null) { 502 | throw new NullPointerException("Flatmap function must not be null.") 503 | } 504 | 505 | val cleanFun = clean(fun) 506 | val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]] 507 | val serializer: TypeSerializer[S] = stateTypeInfo.createSerializer(javaStream.getExecutionConfig) 508 | 509 | val flatMapper = new RichFlatMapFunction[T, R] with StatefulFunction[T, TraversableOnce[R], S] { 510 | 511 | override val stateSerializer: TypeSerializer[S] = serializer 512 | 513 | override def flatMap(in: T, out: Collector[R]): Unit = { 514 | applyWithState(in, cleanFun) foreach out.collect 515 | } 516 | } 517 | 518 | flatMap(flatMapper) 519 | } 520 | 521 | /** Publishes the keyed stream as a queryable ValueState instance. 522 | * 523 | * @param queryableStateName 524 | * Name under which to the publish the queryable state instance 525 | * @return 526 | * Queryable state instance 527 | */ 528 | @PublicEvolving 529 | def asQueryableState(queryableStateName: String): QueryableStateStream[K, T] = { 530 | val stateDescriptor = new ValueStateDescriptor(queryableStateName, dataType.createSerializer(executionConfig)) 531 | 532 | asQueryableState(queryableStateName, stateDescriptor) 533 | } 534 | 535 | /** Publishes the keyed stream as a queryable ValueState instance. 536 | * 537 | * @param queryableStateName 538 | * Name under which to the publish the queryable state instance 539 | * @param stateDescriptor 540 | * State descriptor to create state instance from 541 | * @return 542 | * Queryable state instance 543 | */ 544 | @PublicEvolving 545 | def asQueryableState( 546 | queryableStateName: String, 547 | stateDescriptor: ValueStateDescriptor[T] 548 | ): QueryableStateStream[K, T] = { 549 | 550 | transform( 551 | s"Queryable state: $queryableStateName", 552 | new QueryableValueStateOperator(queryableStateName, stateDescriptor) 553 | )(dataType) 554 | 555 | stateDescriptor.initializeSerializerUnlessSet(executionConfig) 556 | 557 | new QueryableStateStream(queryableStateName, stateDescriptor, getKeyType.createSerializer(executionConfig)) 558 | } 559 | 560 | /** Publishes the keyed stream as a queryable ReducingState instance. 561 | * 562 | * @param queryableStateName 563 | * Name under which to the publish the queryable state instance 564 | * @param stateDescriptor 565 | * State descriptor to create state instance from 566 | * @return 567 | * Queryable state instance 568 | */ 569 | @PublicEvolving 570 | def asQueryableState( 571 | queryableStateName: String, 572 | stateDescriptor: ReducingStateDescriptor[T] 573 | ): QueryableStateStream[K, T] = { 574 | 575 | transform( 576 | s"Queryable state: $queryableStateName", 577 | new QueryableAppendingStateOperator(queryableStateName, stateDescriptor) 578 | )(dataType) 579 | 580 | stateDescriptor.initializeSerializerUnlessSet(executionConfig) 581 | 582 | new QueryableStateStream(queryableStateName, stateDescriptor, getKeyType.createSerializer(executionConfig)) 583 | } 584 | 585 | } 586 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/OutputTag.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import org.apache.flink.annotation.PublicEvolving 4 | import org.apache.flink.api.common.typeinfo.TypeInformation 5 | import org.apache.flink.util.{OutputTag => JOutputTag} 6 | 7 | /** An [[OutputTag]] is a typed and named tag to use for tagging side outputs of an operator. 8 | * 9 | * Example: 10 | * {{{ 11 | * val outputTag = OutputTag[String]("late-data") 12 | * }}} 13 | * 14 | * @tparam T 15 | * the type of elements in the side-output stream. 16 | */ 17 | @PublicEvolving 18 | class OutputTag[T: TypeInformation](id: String) extends JOutputTag[T](id, implicitly[TypeInformation[T]]) 19 | 20 | object OutputTag { 21 | def apply[T: TypeInformation](id: String): OutputTag[T] = new OutputTag(id) 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/ScalaStreamOps.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import io.findify.flinkadt.api.typeinfo.CaseClassTypeInfo 4 | import org.apache.flink.api.common.typeinfo.TypeInformation 5 | import org.apache.flink.streaming.api.datastream.{DataStream => JavaStream} 6 | import org.apache.flink.streaming.api.datastream.{ConnectedStreams => ConnectedJavaStreams} 7 | import org.apache.flink.streaming.api.datastream.{BroadcastConnectedStream => BroadcastConnectedJavaStreams} 8 | import org.apache.flink.streaming.api.datastream.{KeyedStream => KeyedJavaStream} 9 | 10 | import language.implicitConversions 11 | import language.experimental.macros 12 | 13 | object ScalaStreamOps { 14 | 15 | /** Converts an [[org.apache.flink.streaming.api.datastream.DataStream]] to a [[io.findify.flink.api.DataStream]]. 16 | */ 17 | def asScalaStream[R](stream: JavaStream[R]) = new DataStream[R](stream) 18 | 19 | /** Converts an [[org.apache.flink.streaming.api.datastream.KeyedStream]] to a [[io.findify.flink.api.KeyedStream]]. 20 | */ 21 | def asScalaStream[R, K](stream: KeyedJavaStream[R, K]) = new KeyedStream[R, K](stream) 22 | 23 | /** Converts an [[org.apache.flink.streaming.api.datastream.ConnectedStreams]] to a 24 | * [[io.findify.flink.api.ConnectedStreams]]. 25 | */ 26 | def asScalaStream[IN1, IN2](stream: ConnectedJavaStreams[IN1, IN2]) = new ConnectedStreams[IN1, IN2](stream) 27 | 28 | /** Converts an [[org.apache.flink.streaming.api.datastream.BroadcastConnectedStream]] to a 29 | * [[io.findify.flink.api.BroadcastConnectedStream]]. 30 | */ 31 | def asScalaStream[IN1, IN2](stream: BroadcastConnectedJavaStreams[IN1, IN2]) = 32 | new BroadcastConnectedStream[IN1, IN2](stream) 33 | 34 | private[flink] def fieldNames2Indices(typeInfo: TypeInformation[_], fields: Array[String]): Array[Int] = { 35 | typeInfo match { 36 | case ti: CaseClassTypeInfo[_] => 37 | val result = ti.getFieldIndices(fields) 38 | 39 | if (result.contains(-1)) { 40 | throw new IllegalArgumentException( 41 | "Fields '" + fields.mkString(", ") + 42 | "' are not valid for '" + ti.toString + "'." 43 | ) 44 | } 45 | 46 | result 47 | 48 | case _ => 49 | throw new UnsupportedOperationException( 50 | "Specifying fields by name is only" + 51 | "supported on Case Classes (for now)." 52 | ) 53 | } 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/WindowedStream.scala: -------------------------------------------------------------------------------- 1 | package io.findify.flink.api 2 | 3 | import io.findify.flink.api.function.{ProcessWindowFunction, WindowFunction} 4 | import io.findify.flink.api.function.util.{ 5 | ScalaProcessWindowFunctionWrapper, 6 | ScalaReduceFunction, 7 | ScalaWindowFunction, 8 | ScalaWindowFunctionWrapper 9 | } 10 | import org.apache.flink.annotation.{Public, PublicEvolving} 11 | import org.apache.flink.api.common.functions.{AggregateFunction, ReduceFunction} 12 | import org.apache.flink.api.common.typeinfo.TypeInformation 13 | import org.apache.flink.streaming.api.datastream.{WindowedStream => JavaWStream} 14 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType 15 | import org.apache.flink.streaming.api.functions.aggregation.{ComparableAggregator, SumAggregator} 16 | import org.apache.flink.streaming.api.windowing.evictors.Evictor 17 | import org.apache.flink.streaming.api.windowing.time.Time 18 | import org.apache.flink.streaming.api.windowing.triggers.Trigger 19 | import org.apache.flink.streaming.api.windowing.windows.Window 20 | import org.apache.flink.util.Collector 21 | import ScalaStreamOps._ 22 | 23 | /** A [[WindowedStream]] represents a data stream where elements are grouped by key, and for each key, the stream of 24 | * elements is split into windows based on a [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]]. 25 | * Window emission is triggered based on a [[Trigger]]. 26 | * 27 | * The windows are conceptually evaluated for each key individually, meaning windows can trigger at different points 28 | * for each key. 29 | * 30 | * If an [[org.apache.flink.streaming.api.windowing.evictors.Evictor]] is specified it will be used to evict elements 31 | * from the window after evaluation was triggered by the [[Trigger]] but before the actual evaluation of the window. 32 | * When using an evictor window performance will degrade significantly, since pre-aggregation of window results cannot 33 | * be used. 34 | * 35 | * Note that the [[WindowedStream]] is purely and API construct, during runtime the [[WindowedStream]] will be 36 | * collapsed together with the [[KeyedStream]] and the operation over the window into one single operation. 37 | * 38 | * @tparam T 39 | * The type of elements in the stream. 40 | * @tparam K 41 | * The type of the key by which elements are grouped. 42 | * @tparam W 43 | * The type of [[Window]] that the [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]] assigns the 44 | * elements to. 45 | */ 46 | @Public 47 | class WindowedStream[T, K, W <: Window](javaStream: JavaWStream[T, K, W]) { 48 | 49 | /** Sets the allowed lateness to a user-specified value. If not explicitly set, the allowed lateness is [[0L]]. 50 | * Setting the allowed lateness is only valid for event-time windows. If a value different than 0 is provided with a 51 | * processing-time [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]], then an exception is 52 | * thrown. 53 | */ 54 | @PublicEvolving 55 | def allowedLateness(lateness: Time): WindowedStream[T, K, W] = { 56 | javaStream.allowedLateness(lateness) 57 | this 58 | } 59 | 60 | /** Send late arriving data to the side output identified by the given [[OutputTag]]. Data is considered late after 61 | * the watermark has passed the end of the window plus the allowed lateness set using [[allowedLateness(Time)]]. 62 | * 63 | * You can get the stream of late data using [[DataStream.getSideOutput()]] on the [[DataStream]] resulting from the 64 | * windowed operation with the same [[OutputTag]]. 65 | */ 66 | @PublicEvolving 67 | def sideOutputLateData(outputTag: OutputTag[T]): WindowedStream[T, K, W] = { 68 | javaStream.sideOutputLateData(outputTag) 69 | this 70 | } 71 | 72 | /** Sets the [[Trigger]] that should be used to trigger window emission. 73 | */ 74 | @PublicEvolving 75 | def trigger(trigger: Trigger[_ >: T, _ >: W]): WindowedStream[T, K, W] = { 76 | javaStream.trigger(trigger) 77 | this 78 | } 79 | 80 | /** Sets the [[Evictor]] that should be used to evict elements from a window before emission. 81 | * 82 | * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window results 83 | * cannot be used. 84 | */ 85 | @PublicEvolving 86 | def evictor(evictor: Evictor[_ >: T, _ >: W]): WindowedStream[T, K, W] = { 87 | javaStream.evictor(evictor) 88 | this 89 | } 90 | 91 | // ------------------------------------------------------------------------ 92 | // Operations on the keyed windows 93 | // ------------------------------------------------------------------------ 94 | 95 | // --------------------------- reduce() ----------------------------------- 96 | 97 | /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each 98 | * key individually. The output of the reduce function is interpreted as a regular non-windowed stream. 99 | * 100 | * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time 101 | * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time 102 | * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per 103 | * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an 104 | * aggregation tree. 105 | * 106 | * @param function 107 | * The reduce function. 108 | * @return 109 | * The data stream that is the result of applying the reduce function to the window. 110 | */ 111 | def reduce(function: ReduceFunction[T]): DataStream[T] = { 112 | asScalaStream(javaStream.reduce(clean(function))) 113 | } 114 | 115 | /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each 116 | * key individually. The output of the reduce function is interpreted as a regular non-windowed stream. 117 | * 118 | * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time 119 | * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time 120 | * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per 121 | * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an 122 | * aggregation tree. 123 | * 124 | * @param function 125 | * The reduce function. 126 | * @return 127 | * The data stream that is the result of applying the reduce function to the window. 128 | */ 129 | def reduce(function: (T, T) => T): DataStream[T] = { 130 | if (function == null) { 131 | throw new NullPointerException("Reduce function must not be null.") 132 | } 133 | val cleanFun = clean(function) 134 | val reducer = new ScalaReduceFunction[T](cleanFun) 135 | reduce(reducer) 136 | } 137 | 138 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 139 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 140 | * 141 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 142 | * 143 | * @param preAggregator 144 | * The reduce function that is used for pre-aggregation 145 | * @param function 146 | * The window function. 147 | * @return 148 | * The data stream that is the result of applying the window function to the window. 149 | */ 150 | def reduce[R: TypeInformation]( 151 | preAggregator: ReduceFunction[T], 152 | function: WindowFunction[T, R, K, W] 153 | ): DataStream[R] = { 154 | 155 | val cleanedPreAggregator = clean(preAggregator) 156 | val cleanedWindowFunction = clean(function) 157 | 158 | val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction) 159 | 160 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 161 | asScalaStream(javaStream.reduce(cleanedPreAggregator, applyFunction, resultType)) 162 | } 163 | 164 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 165 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 166 | * 167 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 168 | * 169 | * @param preAggregator 170 | * The reduce function that is used for pre-aggregation 171 | * @param windowFunction 172 | * The window function. 173 | * @return 174 | * The data stream that is the result of applying the window function to the window. 175 | */ 176 | def reduce[R: TypeInformation]( 177 | preAggregator: (T, T) => T, 178 | windowFunction: (K, W, Iterable[T], Collector[R]) => Unit 179 | ): DataStream[R] = { 180 | 181 | if (preAggregator == null) { 182 | throw new NullPointerException("Reduce function must not be null.") 183 | } 184 | if (windowFunction == null) { 185 | throw new NullPointerException("WindowApply function must not be null.") 186 | } 187 | 188 | val cleanReducer = clean(preAggregator) 189 | val cleanWindowFunction = clean(windowFunction) 190 | 191 | val reducer = new ScalaReduceFunction[T](cleanReducer) 192 | val applyFunction = new ScalaWindowFunction[T, R, K, W](cleanWindowFunction) 193 | 194 | asScalaStream(javaStream.reduce(reducer, applyFunction, implicitly[TypeInformation[R]])) 195 | } 196 | 197 | /** Applies the given reduce function to each window. The window reduced value is then passed as input of the window 198 | * function. The output of the window function is interpreted as a regular non-windowed stream. 199 | * 200 | * @param preAggregator 201 | * The reduce function that is used for pre-aggregation 202 | * @param function 203 | * The process window function. 204 | * @return 205 | * The data stream that is the result of applying the window function to the window. 206 | */ 207 | @PublicEvolving 208 | def reduce[R: TypeInformation]( 209 | preAggregator: (T, T) => T, 210 | function: ProcessWindowFunction[T, R, K, W] 211 | ): DataStream[R] = { 212 | 213 | val cleanedPreAggregator = clean(preAggregator) 214 | val cleanedWindowFunction = clean(function) 215 | 216 | val reducer = new ScalaReduceFunction[T](cleanedPreAggregator) 217 | val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction) 218 | 219 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 220 | asScalaStream(javaStream.reduce(reducer, applyFunction, resultType)) 221 | } 222 | 223 | /** Applies the given reduce function to each window. The window reduced value is then passed as input of the window 224 | * function. The output of the window function is interpreted as a regular non-windowed stream. 225 | * 226 | * @param preAggregator 227 | * The reduce function that is used for pre-aggregation 228 | * @param function 229 | * The process window function. 230 | * @return 231 | * The data stream that is the result of applying the window function to the window. 232 | */ 233 | @PublicEvolving 234 | def reduce[R: TypeInformation]( 235 | preAggregator: ReduceFunction[T], 236 | function: ProcessWindowFunction[T, R, K, W] 237 | ): DataStream[R] = { 238 | 239 | val cleanedPreAggregator = clean(preAggregator) 240 | val cleanedWindowFunction = clean(function) 241 | 242 | val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction) 243 | 244 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 245 | asScalaStream(javaStream.reduce(cleanedPreAggregator, applyFunction, resultType)) 246 | } 247 | 248 | // -------------------------- aggregate() --------------------------------- 249 | 250 | /** Applies the given aggregation function to each window and key. The aggregation function is called for each 251 | * element, aggregating values incrementally and keeping the state to one accumulator per key and window. 252 | * 253 | * @param aggregateFunction 254 | * The aggregation function. 255 | * @return 256 | * The data stream that is the result of applying the fold function to the window. 257 | */ 258 | @PublicEvolving 259 | def aggregate[ACC: TypeInformation, R: TypeInformation]( 260 | aggregateFunction: AggregateFunction[T, ACC, R] 261 | ): DataStream[R] = { 262 | 263 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 264 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 265 | 266 | asScalaStream(javaStream.aggregate(clean(aggregateFunction), accumulatorType, resultType)) 267 | } 268 | 269 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 270 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 271 | * 272 | * Arriving data is pre-aggregated using the given aggregation function. 273 | * 274 | * @param preAggregator 275 | * The aggregation function that is used for pre-aggregation 276 | * @param windowFunction 277 | * The window function. 278 | * @return 279 | * The data stream that is the result of applying the window function to the window. 280 | */ 281 | @PublicEvolving 282 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 283 | preAggregator: AggregateFunction[T, ACC, V], 284 | windowFunction: WindowFunction[V, R, K, W] 285 | ): DataStream[R] = { 286 | 287 | val cleanedPreAggregator = clean(preAggregator) 288 | val cleanedWindowFunction = clean(windowFunction) 289 | 290 | val applyFunction = new ScalaWindowFunctionWrapper[V, R, K, W](cleanedWindowFunction) 291 | 292 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 293 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 294 | 295 | asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType)) 296 | } 297 | 298 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 299 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 300 | * 301 | * Arriving data is pre-aggregated using the given aggregation function. 302 | * 303 | * @param preAggregator 304 | * The aggregation function that is used for pre-aggregation 305 | * @param windowFunction 306 | * The window function. 307 | * @return 308 | * The data stream that is the result of applying the window function to the window. 309 | */ 310 | @PublicEvolving 311 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 312 | preAggregator: AggregateFunction[T, ACC, V], 313 | windowFunction: (K, W, Iterable[V], Collector[R]) => Unit 314 | ): DataStream[R] = { 315 | 316 | val cleanedPreAggregator = clean(preAggregator) 317 | val cleanedWindowFunction = clean(windowFunction) 318 | 319 | val applyFunction = new ScalaWindowFunction[V, R, K, W](cleanedWindowFunction) 320 | 321 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 322 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 323 | 324 | asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType)) 325 | } 326 | 327 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 328 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 329 | * 330 | * Arriving data is pre-aggregated using the given aggregation function. 331 | * 332 | * @param preAggregator 333 | * The aggregation function that is used for pre-aggregation 334 | * @param windowFunction 335 | * The window function. 336 | * @return 337 | * The data stream that is the result of applying the window function to the window. 338 | */ 339 | @PublicEvolving 340 | def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation]( 341 | preAggregator: AggregateFunction[T, ACC, V], 342 | windowFunction: ProcessWindowFunction[V, R, K, W] 343 | ): DataStream[R] = { 344 | 345 | val cleanedPreAggregator = clean(preAggregator) 346 | val cleanedWindowFunction = clean(windowFunction) 347 | 348 | val applyFunction = new ScalaProcessWindowFunctionWrapper[V, R, K, W](cleanedWindowFunction) 349 | 350 | val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]] 351 | val aggregationResultType: TypeInformation[V] = implicitly[TypeInformation[V]] 352 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 353 | 354 | asScalaStream( 355 | javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, aggregationResultType, resultType) 356 | ) 357 | } 358 | 359 | // ---------------------------- apply() ------------------------------------- 360 | 361 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 362 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 363 | * 364 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 365 | * function provides no means of pre-aggregation. 366 | * 367 | * @param function 368 | * The window function. 369 | * @return 370 | * The data stream that is the result of applying the window function to the window. 371 | */ 372 | @PublicEvolving 373 | def process[R: TypeInformation](function: ProcessWindowFunction[T, R, K, W]): DataStream[R] = { 374 | 375 | val cleanFunction = clean(function) 376 | val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanFunction) 377 | asScalaStream(javaStream.process(applyFunction, implicitly[TypeInformation[R]])) 378 | } 379 | 380 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 381 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 382 | * 383 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 384 | * function provides no means of pre-aggregation. 385 | * 386 | * @param function 387 | * The window function. 388 | * @return 389 | * The data stream that is the result of applying the window function to the window. 390 | */ 391 | def apply[R: TypeInformation](function: WindowFunction[T, R, K, W]): DataStream[R] = { 392 | 393 | val cleanFunction = clean(function) 394 | val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanFunction) 395 | asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]])) 396 | } 397 | 398 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 399 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 400 | * 401 | * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the 402 | * function provides no means of pre-aggregation. 403 | * 404 | * @param function 405 | * The window function. 406 | * @return 407 | * The data stream that is the result of applying the window function to the window. 408 | */ 409 | def apply[R: TypeInformation](function: (K, W, Iterable[T], Collector[R]) => Unit): DataStream[R] = { 410 | if (function == null) { 411 | throw new NullPointerException("WindowApply function must not be null.") 412 | } 413 | 414 | val cleanedFunction = clean(function) 415 | val applyFunction = new ScalaWindowFunction[T, R, K, W](cleanedFunction) 416 | 417 | asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]])) 418 | } 419 | 420 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 421 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 422 | * 423 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 424 | * 425 | * @param preAggregator 426 | * The reduce function that is used for pre-aggregation 427 | * @param function 428 | * The window function. 429 | * @return 430 | * The data stream that is the result of applying the window function to the window. 431 | * @deprecated 432 | * Use [[reduce(ReduceFunction, WindowFunction)]] instead. 433 | */ 434 | @deprecated 435 | def apply[R: TypeInformation]( 436 | preAggregator: ReduceFunction[T], 437 | function: WindowFunction[T, R, K, W] 438 | ): DataStream[R] = { 439 | 440 | val cleanedPreAggregator = clean(preAggregator) 441 | val cleanedWindowFunction = clean(function) 442 | 443 | val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction) 444 | 445 | val resultType: TypeInformation[R] = implicitly[TypeInformation[R]] 446 | asScalaStream(javaStream.apply(cleanedPreAggregator, applyFunction, resultType)) 447 | } 448 | 449 | /** Applies the given window function to each window. The window function is called for each evaluation of the window 450 | * for each key individually. The output of the window function is interpreted as a regular non-windowed stream. 451 | * 452 | * Arriving data is pre-aggregated using the given pre-aggregation reducer. 453 | * 454 | * @param preAggregator 455 | * The reduce function that is used for pre-aggregation 456 | * @param windowFunction 457 | * The window function. 458 | * @return 459 | * The data stream that is the result of applying the window function to the window. 460 | * @deprecated 461 | * Use [[reduce(ReduceFunction, WindowFunction)]] instead. 462 | */ 463 | @deprecated 464 | def apply[R: TypeInformation]( 465 | preAggregator: (T, T) => T, 466 | windowFunction: (K, W, Iterable[T], Collector[R]) => Unit 467 | ): DataStream[R] = { 468 | 469 | if (preAggregator == null) { 470 | throw new NullPointerException("Reduce function must not be null.") 471 | } 472 | if (windowFunction == null) { 473 | throw new NullPointerException("WindowApply function must not be null.") 474 | } 475 | 476 | val cleanReducer = clean(preAggregator) 477 | val cleanWindowFunction = clean(windowFunction) 478 | 479 | val reducer = new ScalaReduceFunction[T](cleanReducer) 480 | val applyFunction = new ScalaWindowFunction[T, R, K, W](cleanWindowFunction) 481 | 482 | asScalaStream(javaStream.apply(reducer, applyFunction, implicitly[TypeInformation[R]])) 483 | } 484 | 485 | // ------------------------------------------------------------------------ 486 | // Aggregations on the keyed windows 487 | // ------------------------------------------------------------------------ 488 | 489 | /** Applies an aggregation that that gives the maximum of the elements in the window at the given position. 490 | */ 491 | def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position) 492 | 493 | /** Applies an aggregation that that gives the maximum of the elements in the window at the given field. 494 | */ 495 | def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field) 496 | 497 | /** Applies an aggregation that that gives the minimum of the elements in the window at the given position. 498 | */ 499 | def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position) 500 | 501 | /** Applies an aggregation that that gives the minimum of the elements in the window at the given field. 502 | */ 503 | def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field) 504 | 505 | /** Applies an aggregation that sums the elements in the window at the given position. 506 | */ 507 | def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position) 508 | 509 | /** Applies an aggregation that sums the elements in the window at the given field. 510 | */ 511 | def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field) 512 | 513 | /** Applies an aggregation that that gives the maximum element of the window by the given position. When equality, 514 | * returns the first. 515 | */ 516 | def maxBy(position: Int): DataStream[T] = aggregate(AggregationType.MAXBY, position) 517 | 518 | /** Applies an aggregation that that gives the maximum element of the window by the given field. When equality, 519 | * returns the first. 520 | */ 521 | def maxBy(field: String): DataStream[T] = aggregate(AggregationType.MAXBY, field) 522 | 523 | /** Applies an aggregation that that gives the minimum element of the window by the given position. When equality, 524 | * returns the first. 525 | */ 526 | def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position) 527 | 528 | /** Applies an aggregation that that gives the minimum element of the window by the given field. When equality, 529 | * returns the first. 530 | */ 531 | def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field) 532 | 533 | private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = { 534 | val position = fieldNames2Indices(getInputType(), Array(field))(0) 535 | aggregate(aggregationType, position) 536 | } 537 | 538 | def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = { 539 | 540 | val jStream = javaStream.asInstanceOf[JavaWStream[Product, K, W]] 541 | 542 | val reducer = aggregationType match { 543 | case AggregationType.SUM => 544 | new SumAggregator(position, jStream.getInputType, jStream.getExecutionEnvironment.getConfig) 545 | 546 | case _ => 547 | new ComparableAggregator( 548 | position, 549 | jStream.getInputType, 550 | aggregationType, 551 | true, 552 | jStream.getExecutionEnvironment.getConfig 553 | ) 554 | } 555 | 556 | new DataStream[Product](jStream.reduce(reducer)).asInstanceOf[DataStream[T]] 557 | } 558 | 559 | // ------------------------------------------------------------------------ 560 | // Utilities 561 | // ------------------------------------------------------------------------ 562 | 563 | /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the 564 | * [[org.apache.flink.api.common.ExecutionConfig]]. 565 | */ 566 | private[flink] def clean[F <: AnyRef](f: F): F = { 567 | new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f) 568 | } 569 | 570 | /** Gets the output type. 571 | */ 572 | private def getInputType(): TypeInformation[T] = javaStream.getInputType 573 | } 574 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/async/AsyncFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.async 20 | 21 | import org.apache.flink.annotation.PublicEvolving 22 | import org.apache.flink.api.common.functions.Function 23 | 24 | import java.util.concurrent.TimeoutException 25 | 26 | /** A function to trigger async I/O operations. 27 | * 28 | * For each asyncInvoke an async io operation can be triggered, and once it has been done, the result can be collected 29 | * by calling ResultFuture.complete. For each async operation, its context is stored in the operator immediately after 30 | * invoking asyncInvoke, avoiding blocking for each stream input as long as the internal buffer is not full. 31 | * 32 | * [[ResultFuture]] can be passed into callbacks or futures to collect the result data. An error can also be propagate 33 | * to the async IO operator by [[ResultFuture.completeExceptionally(Throwable)]]. 34 | * 35 | * @tparam IN 36 | * The type of the input element 37 | * @tparam OUT 38 | * The type of the output elements 39 | */ 40 | @PublicEvolving 41 | trait AsyncFunction[IN, OUT] extends Function { 42 | 43 | /** Trigger the async operation for each stream input 44 | * 45 | * @param input 46 | * element coming from an upstream task 47 | * @param resultFuture 48 | * to be completed with the result data 49 | */ 50 | def asyncInvoke(input: IN, resultFuture: ResultFuture[OUT]): Unit 51 | 52 | /** [[AsyncFunction.asyncInvoke]] timeout occurred. By default, the result future is exceptionally completed with a 53 | * timeout exception. 54 | * 55 | * @param input 56 | * element coming from an upstream task 57 | * @param resultFuture 58 | * to be completed with the result data 59 | */ 60 | def timeout(input: IN, resultFuture: ResultFuture[OUT]): Unit = { 61 | resultFuture.completeExceptionally(new TimeoutException("Async function call has timed out.")) 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/async/JavaResultFutureWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.async 20 | 21 | import org.apache.flink.annotation.Internal 22 | import org.apache.flink.streaming.api.functions.async 23 | 24 | import scala.jdk.CollectionConverters._ 25 | 26 | /** Internal wrapper class to map a Flink's Java API [[org.apache.flink.streaming.api.functions.async.ResultFuture]] to 27 | * a Scala [[io.findify.flink.api.async.ResultFuture]]. 28 | * 29 | * @param javaResultFuture 30 | * to forward the calls to 31 | * @tparam OUT 32 | * type of the output elements 33 | */ 34 | @Internal 35 | class JavaResultFutureWrapper[OUT](val javaResultFuture: async.ResultFuture[OUT]) extends ResultFuture[OUT] { 36 | override def complete(result: Iterable[OUT]): Unit = { 37 | javaResultFuture.complete(result.asJavaCollection) 38 | } 39 | 40 | override def completeExceptionally(throwable: Throwable): Unit = { 41 | javaResultFuture.completeExceptionally(throwable) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/async/ResultFuture.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.async 20 | 21 | import org.apache.flink.annotation.PublicEvolving 22 | 23 | /** The result future collects data/errors from the user code while processing asynchronous I/O operations. 24 | * 25 | * @tparam OUT 26 | * type of the output element 27 | */ 28 | @PublicEvolving 29 | trait ResultFuture[OUT] { 30 | 31 | /** Complete the ResultFuture with a set of result elements. 32 | * 33 | * Note that it should be called for exactly one time in the user code. Calling this function for multiple times will 34 | * cause data lose. 35 | * 36 | * Put all results in a [[Iterable]] and then issue ResultFuture.complete(Iterable). 37 | * 38 | * @param result 39 | * to complete the async collector with 40 | */ 41 | def complete(result: Iterable[OUT]): Unit 42 | 43 | /** Complete this ResultFuture with an error. 44 | * 45 | * @param throwable 46 | * to complete the async collector with 47 | */ 48 | def completeExceptionally(throwable: Throwable): Unit 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/async/RichAsyncFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.async 20 | 21 | import org.apache.flink.api.common.functions.AbstractRichFunction 22 | 23 | /** Rich variant of [[AsyncFunction]]. As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to 24 | * the [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and teardown methods. 25 | * 26 | * State related apis in [[org.apache.flink.api.common.functions.RuntimeContext]] are not supported yet because the key 27 | * may get changed while accessing states in the working thread. 28 | * 29 | * [[org.apache.flink.api.common.functions.IterationRuntimeContext#getIterationAggregator(String)]] is not supported 30 | * since the aggregator may be modified by multiple threads. 31 | * 32 | * @tparam IN 33 | * The type of the input value. 34 | * @tparam OUT 35 | * The type of the output value. 36 | */ 37 | abstract class RichAsyncFunction[IN, OUT] extends AbstractRichFunction with AsyncFunction[IN, OUT] {} 38 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/async/ScalaRichAsyncFunctionWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.async 20 | 21 | import org.apache.flink.api.common.functions.RuntimeContext 22 | import org.apache.flink.configuration.Configuration 23 | import org.apache.flink.streaming.api.functions.async.{ 24 | ResultFuture => JResultFuture, 25 | RichAsyncFunction => JRichAsyncFunction 26 | } 27 | 28 | /** A wrapper function that exposes a Scala RichAsyncFunction as a Java Rich Async Function. 29 | * 30 | * The Scala and Java RichAsyncFunctions differ in their type of "ResultFuture" 31 | * - Scala RichAsyncFunction: [[io.findify.flink.api.async.ResultFuture]] 32 | * - Java RichAsyncFunction: [[org.apache.flink.streaming.api.functions.async.ResultFuture]] 33 | */ 34 | final class ScalaRichAsyncFunctionWrapper[IN, OUT](func: RichAsyncFunction[IN, OUT]) 35 | extends JRichAsyncFunction[IN, OUT] { 36 | 37 | override def asyncInvoke(input: IN, resultFuture: JResultFuture[OUT]): Unit = { 38 | func.asyncInvoke(input, new JavaResultFutureWrapper[OUT](resultFuture)) 39 | } 40 | 41 | override def timeout(input: IN, resultFuture: JResultFuture[OUT]): Unit = { 42 | func.timeout(input, new JavaResultFutureWrapper[OUT](resultFuture)) 43 | } 44 | 45 | override def open(parameters: Configuration): Unit = { 46 | func.open(parameters) 47 | } 48 | 49 | override def close(): Unit = { 50 | func.close() 51 | } 52 | 53 | override def setRuntimeContext(runtimeContext: RuntimeContext): Unit = { 54 | super.setRuntimeContext(runtimeContext) 55 | func.setRuntimeContext(super.getRuntimeContext) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnConnectedStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import org.apache.flink.api.common.typeinfo.TypeInformation 22 | import io.findify.flink.api.{ConnectedStreams, DataStream} 23 | 24 | /** Wraps a connected data stream, allowing to use anonymous partial functions to perform extraction of items in a 25 | * tuple, case class instance or collection 26 | * 27 | * @param stream 28 | * The wrapped data stream 29 | * @tparam IN1 30 | * The type of the data stream items coming from the first connection 31 | * @tparam IN2 32 | * The type of the data stream items coming from the second connection 33 | */ 34 | class OnConnectedStream[IN1, IN2](stream: ConnectedStreams[IN1, IN2]) { 35 | 36 | /** Applies a CoMap transformation on the connected streams. 37 | * 38 | * The transformation consists of two separate functions, where the first one is called for each element of the first 39 | * connected stream, and the second one is called for each element of the second connected stream. 40 | * 41 | * @param map1 42 | * Function called per element of the first input. 43 | * @param map2 44 | * Function called per element of the second input. 45 | * @return 46 | * The resulting data stream. 47 | */ 48 | @PublicEvolving 49 | def mapWith[R: TypeInformation](map1: IN1 => R, map2: IN2 => R): DataStream[R] = 50 | stream.map(map1, map2) 51 | 52 | /** Applies a CoFlatMap transformation on the connected streams. 53 | * 54 | * The transformation consists of two separate functions, where the first one is called for each element of the first 55 | * connected stream, and the second one is called for each element of the second connected stream. 56 | * 57 | * @param flatMap1 58 | * Function called per element of the first input. 59 | * @param flatMap2 60 | * Function called per element of the second input. 61 | * @return 62 | * The resulting data stream. 63 | */ 64 | @PublicEvolving 65 | def flatMapWith[R: TypeInformation]( 66 | flatMap1: IN1 => TraversableOnce[R], 67 | flatMap2: IN2 => TraversableOnce[R] 68 | ): DataStream[R] = 69 | stream.flatMap(flatMap1, flatMap2) 70 | 71 | /** Keys the two connected streams together. After this operation, all elements with the same key from both streams 72 | * will be sent to the same parallel instance of the transformation functions. 73 | * 74 | * @param key1 75 | * The first stream's key function 76 | * @param key2 77 | * The second stream's key function 78 | * @return 79 | * The key-grouped connected streams 80 | */ 81 | @PublicEvolving 82 | def keyingBy[KEY: TypeInformation](key1: IN1 => KEY, key2: IN2 => KEY): ConnectedStreams[IN1, IN2] = 83 | stream.keyBy(key1, key2) 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnDataStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import org.apache.flink.api.common.typeinfo.TypeInformation 22 | import io.findify.flink.api.{DataStream, KeyedStream} 23 | 24 | /** Wraps a data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple, case 25 | * class instance or collection 26 | * 27 | * @param stream 28 | * The wrapped data stream 29 | * @tparam T 30 | * The type of the data stream items 31 | */ 32 | class OnDataStream[T](stream: DataStream[T]) { 33 | 34 | /** Applies a function `fun` to each item of the stream 35 | * 36 | * @param fun 37 | * The function to be applied to each item 38 | * @tparam R 39 | * The type of the items in the returned stream 40 | * @return 41 | * A dataset of R 42 | */ 43 | @PublicEvolving 44 | def mapWith[R: TypeInformation](fun: T => R): DataStream[R] = 45 | stream.map(fun) 46 | 47 | /** Applies a function `fun` to each item of the stream, producing a collection of items that will be flattened in the 48 | * resulting stream 49 | * 50 | * @param fun 51 | * The function to be applied to each item 52 | * @tparam R 53 | * The type of the items in the returned stream 54 | * @return 55 | * A dataset of R 56 | */ 57 | @PublicEvolving 58 | def flatMapWith[R: TypeInformation](fun: T => TraversableOnce[R]): DataStream[R] = 59 | stream.flatMap(fun) 60 | 61 | /** Applies a predicate `fun` to each item of the stream, keeping only those for which the predicate holds 62 | * 63 | * @param fun 64 | * The predicate to be tested on each item 65 | * @return 66 | * A dataset of R 67 | */ 68 | @PublicEvolving 69 | def filterWith(fun: T => Boolean): DataStream[T] = 70 | stream.filter(fun) 71 | 72 | /** Keys the items according to a keying function `fun` 73 | * 74 | * @param fun 75 | * The keying function 76 | * @tparam K 77 | * The type of the key, for which type information must be known 78 | * @return 79 | * A stream of Ts keyed by Ks 80 | */ 81 | @PublicEvolving 82 | def keyingBy[K: TypeInformation](fun: T => K): KeyedStream[T, K] = 83 | stream.keyBy(fun) 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnJoinedStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import org.apache.flink.api.common.typeinfo.TypeInformation 22 | import io.findify.flink.api.{DataStream, JoinedStreams} 23 | import org.apache.flink.streaming.api.windowing.windows.Window 24 | 25 | /** Wraps a joined data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple, 26 | * case class instance or collection 27 | * 28 | * @param stream 29 | * The wrapped data stream 30 | * @tparam L 31 | * The type of the data stream items from the left side of the join 32 | * @tparam R 33 | * The type of the data stream items from the right input of the join 34 | * @tparam K 35 | * The type of key 36 | * @tparam W 37 | * The type of the window 38 | */ 39 | class OnJoinedStream[L, R, K, W <: Window](stream: JoinedStreams[L, R]#Where[K]#EqualTo#WithWindow[W]) { 40 | 41 | /** Completes the join operation with the user function that is executed for windowed groups. 42 | * 43 | * @param fun 44 | * The function that defines the projection of the join 45 | * @tparam O 46 | * The return type of the projection, for which type information must be known 47 | * @return 48 | * A fully joined data set of Os 49 | */ 50 | @PublicEvolving 51 | def projecting[O: TypeInformation](fun: (L, R) => O): DataStream[O] = 52 | stream.apply(fun) 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnKeyedStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import org.apache.flink.api.common.typeinfo.TypeInformation 22 | import io.findify.flink.api.{DataStream, KeyedStream} 23 | 24 | /** Wraps a keyed data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple, 25 | * case class instance or collection 26 | * 27 | * @param stream 28 | * The wrapped data stream 29 | * @tparam T 30 | * The type of the data stream items 31 | * @tparam K 32 | * The type of key 33 | */ 34 | class OnKeyedStream[T, K](stream: KeyedStream[T, K]) { 35 | 36 | /** Applies a reducer `fun` to the stream 37 | * 38 | * @param fun 39 | * The reducing function to be applied on the keyed stream 40 | * @return 41 | * A data set of Ts 42 | */ 43 | @PublicEvolving 44 | def reduceWith(fun: (T, T) => T): DataStream[T] = 45 | stream.reduce(fun) 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnWindowedStream.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import org.apache.flink.api.common.typeinfo.TypeInformation 22 | import io.findify.flink.api.{DataStream, WindowedStream} 23 | import org.apache.flink.streaming.api.windowing.windows.Window 24 | import org.apache.flink.util.Collector 25 | 26 | /** Wraps a joined data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple, 27 | * case class instance or collection 28 | * 29 | * @param stream 30 | * The wrapped data stream 31 | * @tparam T 32 | * The type of the data stream items from the right input of the join 33 | * @tparam K 34 | * The type of key 35 | * @tparam W 36 | * The type of the window 37 | */ 38 | class OnWindowedStream[T, K, W <: Window](stream: WindowedStream[T, K, W]) { 39 | 40 | /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each 41 | * key individually. The output of the reduce function is interpreted as a regular non-windowed stream. 42 | * 43 | * This window will try and pre-aggregate data as much as the window policies permit. For example,tumbling time 44 | * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time 45 | * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per 46 | * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an 47 | * aggregation tree. 48 | * 49 | * @param function 50 | * The reduce function. 51 | * @return 52 | * The data stream that is the result of applying the reduce function to the window. 53 | */ 54 | @PublicEvolving 55 | def reduceWith(function: (T, T) => T): DataStream[T] = 56 | stream.reduce(function) 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/extensions/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api 19 | 20 | import org.apache.flink.annotation.PublicEvolving 21 | import io.findify.flink.api.extensions.impl.acceptPartialFunctions._ 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | 24 | /** acceptPartialFunctions extends the original DataStream with methods with unique names that delegate to core 25 | * higher-order functions (e.g. `map`) so that we can work around the fact that overloaded methods taking functions as 26 | * parameters can't accept partial functions as well. This enables the possibility to directly apply pattern matching 27 | * to decompose inputs such as tuples, case classes and collections. 28 | * 29 | * The following is a small example that showcases how this extensions would work on a Flink data stream: 30 | * 31 | * {{{ 32 | * object Main { 33 | * import org.apache.flink.streaming.api.scala.extensions._ 34 | * case class Point(x: Double, y: Double) 35 | * def main(args: Array[String]): Unit = { 36 | * val env = StreamExecutionEnvironment.getExecutionEnvironment 37 | * val ds = env.fromElements(Point(1, 2), Point(3, 4), Point(5, 6)) 38 | * ds.filterWith { 39 | * case Point(x, _) => x > 1 40 | * }.reduceWith { 41 | * case (Point(x1, y1), (Point(x2, y2))) => Point(x1 + y1, x2 + y2) 42 | * }.mapWith { 43 | * case Point(x, y) => (x, y) 44 | * }.flatMapWith { 45 | * case (x, y) => Seq('x' -> x, 'y' -> y) 46 | * }.keyingBy { 47 | * case (id, value) => id 48 | * } 49 | * } 50 | * } 51 | * }}} 52 | * 53 | * The extension consists of several implicit conversions over all the data stream representations that could gain from 54 | * this feature. To use this set of extensions methods the user has to explicitly opt-in by importing 55 | * `org.apache.flink.streaming.api.scala.extensions.acceptPartialFunctions`. 56 | * 57 | * For more information and usage examples please consult the Apache Flink official documentation. 58 | */ 59 | package object extensions { 60 | 61 | @PublicEvolving 62 | implicit def acceptPartialFunctions[T](ds: DataStream[T]): OnDataStream[T] = 63 | new OnDataStream[T](ds) 64 | 65 | @PublicEvolving 66 | implicit def acceptPartialFunctions[T, K](ds: KeyedStream[T, K]): OnKeyedStream[T, K] = 67 | new OnKeyedStream[T, K](ds) 68 | 69 | @PublicEvolving 70 | implicit def acceptPartialFunctions[L, R, K, W <: Window]( 71 | ds: JoinedStreams[L, R]#Where[K]#EqualTo#WithWindow[W] 72 | ): OnJoinedStream[L, R, K, W] = 73 | new OnJoinedStream[L, R, K, W](ds) 74 | 75 | @PublicEvolving 76 | implicit def acceptPartialFunctions[IN1, IN2](ds: ConnectedStreams[IN1, IN2]): OnConnectedStream[IN1, IN2] = 77 | new OnConnectedStream[IN1, IN2](ds) 78 | 79 | @PublicEvolving 80 | implicit def acceptPartialFunctions[T, K, W <: Window](ds: WindowedStream[T, K, W]): OnWindowedStream[T, K, W] = 81 | new OnWindowedStream[T, K, W](ds) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/AllWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.function 19 | 20 | import org.apache.flink.annotation.Public 21 | import org.apache.flink.api.common.functions.Function 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | import org.apache.flink.util.Collector 24 | 25 | import java.io.Serializable 26 | 27 | /** Base interface for functions that are evaluated over non-grouped windows, 28 | * i.e., windows over all stream partitions. 29 | * 30 | * @tparam IN 31 | * The type of the input value. 32 | * @tparam OUT 33 | * The type of the output value. 34 | */ 35 | @Public 36 | trait AllWindowFunction[IN, OUT, W <: Window] extends Function with Serializable { 37 | 38 | /** Evaluates the window and outputs none or several elements. 39 | * 40 | * @param window 41 | * The window that is being evaluated. 42 | * @param input 43 | * The elements in the window being evaluated. 44 | * @param out 45 | * A collector for emitting elements. 46 | * @throws Exception 47 | * The function may throw exceptions to fail the program and trigger recovery. 48 | */ 49 | def apply(window: W, input: Iterable[IN], out: Collector[OUT]): Unit 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/ProcessAllWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function 20 | 21 | import io.findify.flink.api.OutputTag 22 | import org.apache.flink.annotation.PublicEvolving 23 | import org.apache.flink.api.common.functions.AbstractRichFunction 24 | import org.apache.flink.api.common.state.KeyedStateStore 25 | import org.apache.flink.streaming.api.windowing.windows.Window 26 | import org.apache.flink.util.Collector 27 | 28 | /** Base abstract class for functions that are evaluated over keyed (grouped) windows using a context for retrieving 29 | * extra information. 30 | * 31 | * @tparam IN 32 | * The type of the input value. 33 | * @tparam OUT 34 | * The type of the output value. 35 | * @tparam W 36 | * The type of the window. 37 | */ 38 | @PublicEvolving 39 | abstract class ProcessAllWindowFunction[IN, OUT, W <: Window] extends AbstractRichFunction { 40 | 41 | /** Evaluates the window and outputs none or several elements. 42 | * 43 | * @param context 44 | * The context in which the window is being evaluated. 45 | * @param elements 46 | * The elements in the window being evaluated. 47 | * @param out 48 | * A collector for emitting elements. 49 | * @throws Exception 50 | * The function may throw exceptions to fail the program and trigger recovery. 51 | */ 52 | @throws[Exception] 53 | def process(context: Context, elements: Iterable[IN], out: Collector[OUT]): Unit 54 | 55 | /** Deletes any state in the [[Context]] when the Window expires (the watermark passes its `maxTimestamp` + 56 | * `allowedLateness`). 57 | * 58 | * @param context 59 | * The context to which the window is being evaluated 60 | * @throws Exception 61 | * The function may throw exceptions to fail the program and trigger recovery. 62 | */ 63 | @throws[Exception] 64 | def clear(context: Context): Unit = {} 65 | 66 | /** The context holding window metadata 67 | */ 68 | abstract class Context { 69 | 70 | /** @return 71 | * The window that is being evaluated. 72 | */ 73 | def window: W 74 | 75 | /** State accessor for per-key and per-window state. 76 | */ 77 | def windowState: KeyedStateStore 78 | 79 | /** State accessor for per-key global state. 80 | */ 81 | def globalState: KeyedStateStore 82 | 83 | /** Emits a record to the side output identified by the [[OutputTag]]. 84 | */ 85 | def output[X](outputTag: OutputTag[X], value: X): Unit 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/ProcessWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function 20 | 21 | import io.findify.flink.api.OutputTag 22 | import org.apache.flink.annotation.PublicEvolving 23 | import org.apache.flink.api.common.functions.AbstractRichFunction 24 | import org.apache.flink.api.common.state.KeyedStateStore 25 | import org.apache.flink.streaming.api.windowing.windows.Window 26 | import org.apache.flink.util.Collector 27 | 28 | /** Base abstract class for functions that are evaluated over keyed (grouped) windows using a context for retrieving 29 | * extra information. 30 | * 31 | * @tparam IN 32 | * The type of the input value. 33 | * @tparam OUT 34 | * The type of the output value. 35 | * @tparam KEY 36 | * The type of the key. 37 | * @tparam W 38 | * The type of the window. 39 | */ 40 | @PublicEvolving 41 | abstract class ProcessWindowFunction[IN, OUT, KEY, W <: Window] extends AbstractRichFunction { 42 | 43 | /** Evaluates the window and outputs none or several elements. 44 | * 45 | * @param key 46 | * The key for which this window is evaluated. 47 | * @param context 48 | * The context in which the window is being evaluated. 49 | * @param elements 50 | * The elements in the window being evaluated. 51 | * @param out 52 | * A collector for emitting elements. 53 | * @throws Exception 54 | * The function may throw exceptions to fail the program and trigger recovery. 55 | */ 56 | @throws[Exception] 57 | def process(key: KEY, context: Context, elements: Iterable[IN], out: Collector[OUT]): Unit 58 | 59 | /** Deletes any state in the [[Context]] when the Window expires (the watermark passes its `maxTimestamp` + 60 | * `allowedLateness`). 61 | * 62 | * @param context 63 | * The context to which the window is being evaluated 64 | * @throws Exception 65 | * The function may throw exceptions to fail the program and trigger recovery. 66 | */ 67 | @throws[Exception] 68 | def clear(context: Context): Unit = {} 69 | 70 | /** The context holding window metadata 71 | */ 72 | abstract class Context { 73 | 74 | /** Returns the window that is being evaluated. 75 | */ 76 | def window: W 77 | 78 | /** Returns the current processing time. 79 | */ 80 | def currentProcessingTime: Long 81 | 82 | /** Returns the current event-time watermark. 83 | */ 84 | def currentWatermark: Long 85 | 86 | /** State accessor for per-key and per-window state. 87 | */ 88 | def windowState: KeyedStateStore 89 | 90 | /** State accessor for per-key global state. 91 | */ 92 | def globalState: KeyedStateStore 93 | 94 | /** Emits a record to the side output identified by the [[OutputTag]]. 95 | */ 96 | def output[X](outputTag: OutputTag[X], value: X): Unit 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/RichAllWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function 20 | 21 | import org.apache.flink.api.common.functions.AbstractRichFunction 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | 24 | /** Rich variant of the [[io.findify.flink.api.function.AllWindowFunction]]. 25 | * 26 | * As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to the 27 | * [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and tear-down methods. 28 | * 29 | * @tparam IN 30 | * The type of the input value. 31 | * @tparam OUT 32 | * The type of the output value. 33 | * @tparam W 34 | * The type of Window that this window function can be applied on. 35 | */ 36 | abstract class RichAllWindowFunction[IN, OUT, W <: Window] 37 | extends AbstractRichFunction 38 | with AllWindowFunction[IN, OUT, W] {} 39 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/RichWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function 20 | 21 | import org.apache.flink.api.common.functions.AbstractRichFunction 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | 24 | /** Rich variant of the [[io.findify.flink.api.function.WindowFunction]]. 25 | * 26 | * As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to the 27 | * [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and tear-down methods. 28 | * 29 | * @tparam IN 30 | * The type of the input value. 31 | * @tparam OUT 32 | * The type of the output value. 33 | * @tparam KEY 34 | * The type of the key. 35 | * @tparam W 36 | * The type of Window that this window function can be applied on. 37 | */ 38 | abstract class RichWindowFunction[IN, OUT, KEY, W <: Window] 39 | extends AbstractRichFunction 40 | with WindowFunction[IN, OUT, KEY, W] {} 41 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/StatefulFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function 20 | 21 | import org.apache.flink.annotation.Public 22 | import org.apache.flink.api.common.functions.RichFunction 23 | import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor} 24 | import org.apache.flink.api.common.typeutils.TypeSerializer 25 | import org.apache.flink.configuration.Configuration 26 | 27 | /** Trait implementing the functionality necessary to apply stateful functions in RichFunctions without exposing the 28 | * OperatorStates to the user. The user should call the applyWithState method in his own RichFunction implementation. 29 | */ 30 | @Public 31 | trait StatefulFunction[I, O, S] extends RichFunction { 32 | 33 | protected val stateSerializer: TypeSerializer[S] 34 | 35 | private[this] var state: ValueState[S] = _ 36 | 37 | def applyWithState(in: I, fun: (I, Option[S]) => (O, Option[S])): O = { 38 | val (o, s: Option[S]) = fun(in, Option(state.value())) 39 | s match { 40 | case Some(v) => state.update(v) 41 | case None => state.update(null.asInstanceOf[S]) 42 | } 43 | o 44 | } 45 | 46 | override def open(c: Configuration) = { 47 | val info = new ValueStateDescriptor[S]("state", stateSerializer) 48 | state = getRuntimeContext().getState(info) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/WindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package io.findify.flink.api.function 19 | 20 | import org.apache.flink.annotation.Public 21 | import org.apache.flink.api.common.functions.Function 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | import org.apache.flink.util.Collector 24 | 25 | import java.io.Serializable 26 | 27 | /** Base interface for functions that are evaluated over keyed (grouped) windows. 28 | * 29 | * @tparam IN 30 | * The type of the input value. 31 | * @tparam OUT 32 | * The type of the output value. 33 | * @tparam KEY 34 | * The type of the key. 35 | */ 36 | @Public 37 | trait WindowFunction[IN, OUT, KEY, W <: Window] extends Function with Serializable { 38 | 39 | /** Evaluates the window and outputs none or several elements. 40 | * 41 | * @param key 42 | * The key for which this window is evaluated. 43 | * @param window 44 | * The window that is being evaluated. 45 | * @param input 46 | * The elements in the window being evaluated. 47 | * @param out 48 | * A collector for emitting elements. 49 | * @throws Exception 50 | * The function may throw exceptions to fail the program and trigger recovery. 51 | */ 52 | def apply(key: KEY, window: W, input: Iterable[IN], out: Collector[OUT]): Unit 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaAllWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import org.apache.flink.streaming.api.functions.windowing.{AllWindowFunction => JAllWindowFunction} 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | import org.apache.flink.util.Collector 24 | 25 | import scala.collection.JavaConverters._ 26 | 27 | /** A wrapper function that exposes a Scala Function3 as a Java AllWindowFunction. 28 | */ 29 | final class ScalaAllWindowFunction[IN, OUT, W <: Window]( 30 | private[this] val function: (W, Iterable[IN], Collector[OUT]) => Unit 31 | ) extends JAllWindowFunction[IN, OUT, W] { 32 | 33 | @throws(classOf[Exception]) 34 | override def apply(window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = { 35 | function.apply(window, input.asScala, out) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaAllWindowFunctionWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import org.apache.flink.api.common.functions.{IterationRuntimeContext, RichFunction, RuntimeContext} 22 | import org.apache.flink.api.java.operators.translation.WrappingFunction 23 | import org.apache.flink.streaming.api.functions.windowing.{AllWindowFunction => JAllWindowFunction} 24 | import io.findify.flink.api.function.AllWindowFunction 25 | import org.apache.flink.streaming.api.windowing.windows.Window 26 | import org.apache.flink.util.Collector 27 | 28 | import scala.jdk.CollectionConverters._ 29 | 30 | /** A wrapper function that exposes a Scala WindowFunction as a JavaWindow function. 31 | * 32 | * The Scala and Java Window functions differ in their type of "Iterable": 33 | * - Scala WindowFunction: scala.Iterable 34 | * - Java WindowFunction: java.lang.Iterable 35 | */ 36 | final class ScalaAllWindowFunctionWrapper[IN, OUT, W <: Window](func: AllWindowFunction[IN, OUT, W]) 37 | extends WrappingFunction[AllWindowFunction[IN, OUT, W]](func) 38 | with JAllWindowFunction[IN, OUT, W] 39 | with RichFunction { 40 | 41 | @throws(classOf[Exception]) 42 | override def apply(window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = { 43 | wrappedFunction.apply(window, input.asScala, out) 44 | } 45 | 46 | override def getRuntimeContext: RuntimeContext = { 47 | throw new RuntimeException("This should never be called") 48 | } 49 | 50 | override def getIterationRuntimeContext: IterationRuntimeContext = { 51 | throw new RuntimeException("This should never be called") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaProcessWindowFunctionWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import io.findify.flink.api.OutputTag 22 | import io.findify.flink.api.function.{ProcessAllWindowFunction, ProcessWindowFunction} 23 | import org.apache.flink.api.common.functions.RuntimeContext 24 | import org.apache.flink.configuration.Configuration 25 | import org.apache.flink.streaming.api.functions.windowing.{ 26 | ProcessAllWindowFunction => JProcessAllWindowFunction, 27 | ProcessWindowFunction => JProcessWindowFunction 28 | } 29 | import org.apache.flink.streaming.api.windowing.windows.Window 30 | import org.apache.flink.util.Collector 31 | 32 | import scala.collection.JavaConverters._ 33 | 34 | /** A wrapper function that exposes a Scala ProcessWindowFunction as a ProcessWindowFunction function. 35 | * 36 | * The Scala and Java Window functions differ in their type of "Iterable": 37 | * - Scala WindowFunction: scala.Iterable 38 | * - Java WindowFunction: java.lang.Iterable 39 | */ 40 | final class ScalaProcessWindowFunctionWrapper[IN, OUT, KEY, W <: Window]( 41 | private[this] val func: ProcessWindowFunction[IN, OUT, KEY, W] 42 | ) extends JProcessWindowFunction[IN, OUT, KEY, W] { 43 | 44 | override def process( 45 | key: KEY, 46 | context: JProcessWindowFunction[IN, OUT, KEY, W]#Context, 47 | elements: java.lang.Iterable[IN], 48 | out: Collector[OUT] 49 | ): Unit = { 50 | val ctx = new func.Context { 51 | override def window = context.window 52 | 53 | override def currentProcessingTime = context.currentProcessingTime 54 | 55 | override def currentWatermark = context.currentWatermark 56 | 57 | override def windowState = context.windowState() 58 | 59 | override def globalState = context.globalState() 60 | 61 | override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value) 62 | } 63 | func.process(key, ctx, elements.asScala, out) 64 | } 65 | 66 | override def clear(context: JProcessWindowFunction[IN, OUT, KEY, W]#Context): Unit = { 67 | val ctx = new func.Context { 68 | override def window = context.window 69 | 70 | override def currentProcessingTime = context.currentProcessingTime 71 | 72 | override def currentWatermark = context.currentWatermark 73 | 74 | override def windowState = context.windowState() 75 | 76 | override def globalState = context.globalState() 77 | 78 | override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value) 79 | } 80 | func.clear(ctx) 81 | } 82 | 83 | override def setRuntimeContext(t: RuntimeContext): Unit = { 84 | super.setRuntimeContext(t) 85 | func match { 86 | case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.setRuntimeContext(t) 87 | case null => 88 | } 89 | } 90 | 91 | override def open(parameters: Configuration): Unit = { 92 | super.open(parameters) 93 | func match { 94 | case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.open(parameters) 95 | case null => 96 | } 97 | } 98 | 99 | override def close(): Unit = { 100 | super.close() 101 | func match { 102 | case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.close() 103 | case null => 104 | } 105 | } 106 | } 107 | 108 | /** A wrapper function that exposes a Scala ProcessWindowFunction as a ProcessWindowFunction function. 109 | * 110 | * The Scala and Java Window functions differ in their type of "Iterable": 111 | * - Scala WindowFunction: scala.Iterable 112 | * - Java WindowFunction: java.lang.Iterable 113 | */ 114 | final class ScalaProcessAllWindowFunctionWrapper[IN, OUT, W <: Window]( 115 | private[this] val func: ProcessAllWindowFunction[IN, OUT, W] 116 | ) extends JProcessAllWindowFunction[IN, OUT, W] { 117 | 118 | override def process( 119 | context: JProcessAllWindowFunction[IN, OUT, W]#Context, 120 | elements: java.lang.Iterable[IN], 121 | out: Collector[OUT] 122 | ): Unit = { 123 | val ctx = new func.Context { 124 | override def window = context.window 125 | 126 | override def windowState = context.windowState() 127 | 128 | override def globalState = context.globalState() 129 | 130 | override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value) 131 | } 132 | func.process(ctx, elements.asScala, out) 133 | } 134 | 135 | override def clear(context: JProcessAllWindowFunction[IN, OUT, W]#Context): Unit = { 136 | val ctx = new func.Context { 137 | override def window = context.window 138 | 139 | override def windowState = context.windowState() 140 | 141 | override def globalState = context.globalState() 142 | 143 | override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value) 144 | } 145 | func.clear(ctx) 146 | } 147 | 148 | override def setRuntimeContext(t: RuntimeContext): Unit = { 149 | super.setRuntimeContext(t) 150 | func match { 151 | case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.setRuntimeContext(t) 152 | case null => 153 | } 154 | } 155 | 156 | override def open(parameters: Configuration): Unit = { 157 | super.open(parameters) 158 | func match { 159 | case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.open(parameters) 160 | case null => 161 | } 162 | } 163 | 164 | override def close(): Unit = { 165 | super.close() 166 | func match { 167 | case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.close() 168 | case null => 169 | } 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaReduceFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import org.apache.flink.api.common.functions.ReduceFunction 22 | 23 | /** A wrapper function that exposes a Scala Function2 as a [[ReduceFunction]]. 24 | */ 25 | final class ScalaReduceFunction[T](private[this] val function: (T, T) => T) extends ReduceFunction[T] { 26 | 27 | @throws(classOf[Exception]) 28 | override def reduce(a: T, b: T): T = { 29 | function(a, b) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaWindowFunction.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import org.apache.flink.streaming.api.functions.windowing.{WindowFunction => JWindowFunction} 22 | import org.apache.flink.streaming.api.windowing.windows.Window 23 | import org.apache.flink.util.Collector 24 | 25 | import scala.collection.JavaConverters._ 26 | 27 | /** A wrapper function that exposes a Scala Function4 as a Java WindowFunction. 28 | */ 29 | final class ScalaWindowFunction[IN, OUT, KEY, W <: Window]( 30 | private[this] val function: (KEY, W, Iterable[IN], Collector[OUT]) => Unit 31 | ) extends JWindowFunction[IN, OUT, KEY, W] { 32 | 33 | @throws(classOf[Exception]) 34 | override def apply(key: KEY, window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = { 35 | function.apply(key, window, input.asScala, out) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/io/findify/flink/api/function/util/ScalaWindowFunctionWrapper.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api.function.util 20 | 21 | import io.findify.flink.api.function.WindowFunction 22 | import org.apache.flink.api.common.functions.{IterationRuntimeContext, RichFunction, RuntimeContext} 23 | import org.apache.flink.api.java.operators.translation.WrappingFunction 24 | import org.apache.flink.streaming.api.functions.windowing.{WindowFunction => JWindowFunction} 25 | import org.apache.flink.streaming.api.windowing.windows.Window 26 | import org.apache.flink.util.Collector 27 | 28 | import scala.jdk.CollectionConverters._ 29 | 30 | /** A wrapper function that exposes a Scala WindowFunction as a JavaWindow function. 31 | * 32 | * The Scala and Java Window functions differ in their type of "Iterable": 33 | * - Scala WindowFunction: scala.Iterable 34 | * - Java WindowFunction: java.lang.Iterable 35 | */ 36 | final class ScalaWindowFunctionWrapper[IN, OUT, KEY, W <: Window](func: WindowFunction[IN, OUT, KEY, W]) 37 | extends WrappingFunction[WindowFunction[IN, OUT, KEY, W]](func) 38 | with JWindowFunction[IN, OUT, KEY, W] 39 | with RichFunction { 40 | 41 | @throws(classOf[Exception]) 42 | override def apply(key: KEY, window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = { 43 | wrappedFunction.apply(key, window, input.asScala, out) 44 | } 45 | 46 | override def getRuntimeContext: RuntimeContext = { 47 | throw new RuntimeException("This should never be called") 48 | } 49 | 50 | override def getIterationRuntimeContext: IterationRuntimeContext = { 51 | throw new RuntimeException("This should never be called") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/scala/io/findify/flink/api/CoGroupedStreamsTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api 20 | 21 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows 22 | import org.apache.flink.streaming.api.windowing.time.Time 23 | import org.junit.{Assert, Test} 24 | import io.findify.flinkadt.api._ 25 | 26 | /** Unit test for [[org.apache.flink.streaming.api.scala.CoGroupedStreams]] 27 | */ 28 | class CoGroupedStreamsTest { 29 | private val env = StreamExecutionEnvironment.getExecutionEnvironment 30 | 31 | private val dataStream1 = env.fromElements("a1", "a2", "a3") 32 | private val dataStream2 = env.fromElements("a1", "a2") 33 | private val keySelector = (s: String) => s 34 | private val tsAssigner = TumblingEventTimeWindows.of(Time.milliseconds(1)) 35 | 36 | @Test 37 | def testSetAllowedLateness(): Unit = { 38 | val lateness = Time.milliseconds(42) 39 | val withLateness = dataStream1 40 | .coGroup(dataStream2) 41 | .where(keySelector) 42 | .equalTo(keySelector) 43 | .window(tsAssigner) 44 | .allowedLateness(lateness) 45 | Assert.assertEquals(lateness.toMilliseconds, withLateness.allowedLateness.toMilliseconds) 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/test/scala/io/findify/flink/api/JoinedStreamsTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api 20 | 21 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows 22 | import org.apache.flink.streaming.api.windowing.time.Time 23 | import org.junit.{Assert, Test} 24 | import io.findify.flinkadt.api._ 25 | 26 | /** Unit test for [[org.apache.flink.streaming.api.scala.JoinedStreams]] 27 | */ 28 | class JoinedStreamsTest { 29 | private val env = StreamExecutionEnvironment.getExecutionEnvironment 30 | 31 | private val dataStream1 = env.fromElements("a1", "a2", "a3") 32 | private val dataStream2 = env.fromElements("a1", "a2") 33 | private val keySelector = (s: String) => s 34 | private val tsAssigner = TumblingEventTimeWindows.of(Time.milliseconds(1)) 35 | 36 | @Test 37 | def testSetAllowedLateness(): Unit = { 38 | val lateness = Time.milliseconds(42) 39 | val withLateness = dataStream1 40 | .join(dataStream2) 41 | .where(keySelector) 42 | .equalTo(keySelector) 43 | .window(tsAssigner) 44 | .allowedLateness(lateness) 45 | Assert.assertEquals(lateness.toMilliseconds, withLateness.allowedLateness.toMilliseconds) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/scala/io/findify/flink/api/StreamExecutionEnvironmentTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package io.findify.flink.api 20 | 21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy 22 | import org.apache.flink.api.common.typeinfo.TypeInformation 23 | import org.apache.flink.api.connector.source.Boundedness 24 | import org.apache.flink.api.connector.source.mocks.MockSource 25 | import org.apache.flink.api.java.typeutils.GenericTypeInfo 26 | import org.junit.Assert.assertEquals 27 | import org.junit.Test 28 | 29 | /** Tests for the [[StreamExecutionEnvironment]]. 30 | */ 31 | class StreamExecutionEnvironmentTest { 32 | 33 | /** Verifies that calls to fromSource() don't throw and create a stream of the expected type. 34 | */ 35 | @Test 36 | def testFromSource(): Unit = { 37 | implicit val typeInfo: TypeInformation[Integer] = new MockTypeInfo() 38 | val env = StreamExecutionEnvironment.getExecutionEnvironment 39 | 40 | val stream = env.fromSource( 41 | new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 1), 42 | WatermarkStrategy.noWatermarks(), 43 | "test source" 44 | ) 45 | 46 | assertEquals(typeInfo, stream.dataType) 47 | } 48 | 49 | /** Verifies that calls to fromSequence() instantiate a new DataStream that contains a sequence of numbers. 50 | */ 51 | @Test 52 | def testFromSequence(): Unit = { 53 | import io.findify.flinkadt.api._ 54 | val typeInfo = implicitly[TypeInformation[Long]] 55 | val env = StreamExecutionEnvironment.getExecutionEnvironment 56 | 57 | val stream = env.fromSequence(1, 100) 58 | 59 | assertEquals(typeInfo, stream.dataType) 60 | } 61 | 62 | // -------------------------------------------------------------------------- 63 | // mocks 64 | // -------------------------------------------------------------------------- 65 | 66 | private class MockTypeInfo extends GenericTypeInfo[Integer](classOf[Integer]) {} 67 | } 68 | --------------------------------------------------------------------------------