├── .gitattributes
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .scalafmt.conf
├── LICENSE.md
├── README.md
├── build.sbt
├── project
    ├── build.properties
    └── plugins.sbt
└── src
    ├── main
        └── scala
        │   └── io
        │       └── findify
        │           └── flink
        │               └── api
        │                   ├── AllWindowedStream.scala
        │                   ├── AsyncDataStream.scala
        │                   ├── BroadcastConnectedStream.scala
        │                   ├── CloseableIterator.scala
        │                   ├── ClosureCleaner.scala
        │                   ├── CoGroupedStreams.scala
        │                   ├── ConnectedStreams.scala
        │                   ├── DataStream.scala
        │                   ├── DataStreamUtils.scala
        │                   ├── JoinedStreams.scala
        │                   ├── KeyedStream.scala
        │                   ├── OutputTag.scala
        │                   ├── ScalaStreamOps.scala
        │                   ├── StreamExecutionEnvironment.scala
        │                   ├── WindowedStream.scala
        │                   ├── async
        │                       ├── AsyncFunction.scala
        │                       ├── JavaResultFutureWrapper.scala
        │                       ├── ResultFuture.scala
        │                       ├── RichAsyncFunction.scala
        │                       └── ScalaRichAsyncFunctionWrapper.scala
        │                   ├── extensions
        │                       ├── impl
        │                       │   └── acceptPartialFunctions
        │                       │   │   ├── OnConnectedStream.scala
        │                       │   │   ├── OnDataStream.scala
        │                       │   │   ├── OnJoinedStream.scala
        │                       │   │   ├── OnKeyedStream.scala
        │                       │   │   └── OnWindowedStream.scala
        │                       └── package.scala
        │                   └── function
        │                       ├── AllWindowFunction.scala
        │                       ├── ProcessAllWindowFunction.scala
        │                       ├── ProcessWindowFunction.scala
        │                       ├── RichAllWindowFunction.scala
        │                       ├── RichWindowFunction.scala
        │                       ├── StatefulFunction.scala
        │                       ├── WindowFunction.scala
        │                       └── util
        │                           ├── ScalaAllWindowFunction.scala
        │                           ├── ScalaAllWindowFunctionWrapper.scala
        │                           ├── ScalaProcessWindowFunctionWrapper.scala
        │                           ├── ScalaReduceFunction.scala
        │                           ├── ScalaWindowFunction.scala
        │                           └── ScalaWindowFunctionWrapper.scala
    └── test
        └── scala
            └── io
                └── findify
                    └── flink
                        └── api
                            ├── CoGroupedStreamsTest.scala
                            ├── DataStreamTest.scala
                            ├── JoinedStreamsTest.scala
                            └── StreamExecutionEnvironmentTest.scala


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.tsv filter=lfs diff=lfs merge=lfs -text
2 | *.gz filter=lfs diff=lfs merge=lfs -text
3 | *.json filter=lfs diff=lfs merge=lfs -text
4 | *.dat filter=lfs diff=lfs merge=lfs -text
5 | *.jpg filter=lfs diff=lfs merge=lfs -text
6 | *.svg filter=lfs diff=lfs merge=lfs -text
7 | *.png filter=lfs diff=lfs merge=lfs -text
8 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will build a Java project with Maven
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
 3 | 
 4 | name: CI
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ${{ matrix.platform }}
15 |     strategy:
16 |       matrix:
17 |         java: [11, 17]
18 |         scala: [2.12.15, 2.13.8, 3.1.2]
19 |         platform: [ubuntu-20.04]
20 |     steps:
21 |       - uses: actions/checkout@v2
22 |         with:
23 |           lfs: true
24 |       - name: Set up JDK
25 |         uses: actions/setup-java@v1
26 |         with:
27 |           java-version: ${{ matrix.java }}
28 | 
29 |       - name: Cache maven packages
30 |         uses: actions/cache@v2
31 |         env:
32 |           cache-name: cache-sbt
33 |         with:
34 |           path: ~/.m2 ~/.coursier ~/.cache/coursier ~/.ivy2 ~/.sbt
35 |           key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/pom.xml') }}
36 |           restore-keys: |
37 |             ${{ runner.os }}-build-${{ env.cache-name }}-
38 |             ${{ runner.os }}-build-
39 |             ${{ runner.os }}-
40 |       - name: Run tests
41 |         run: JAVA_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" sbt "++ ${{ matrix.scala }} test"
42 | 
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | project/project
3 | project/target
4 | target
5 | .DS_STORE
6 | .git
7 | .bsp
8 | .run


--------------------------------------------------------------------------------
/.scalafmt.conf:
--------------------------------------------------------------------------------
1 | style       = defaultWithAlign
2 | maxColumn   = 120
3 | version = 3.5.3
4 | assumeStandardLibraryStripMargin = true
5 | align.stripMargin = true
6 | runner.dialect = scala212


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Scala 2.12/2.13/3.x API for Apache Flink
  2 | 
  3 | [![CI Status](https://github.com/findify/flink-scala-api/workflows/CI/badge.svg)](https://github.com/findify/flink-scala-api/actions)
  4 | [![Maven Central](https://maven-badges.herokuapp.com/maven-central/io.findify/flink-scala-api_2.12/badge.svg?style=plastic)](https://maven-badges.herokuapp.com/maven-central/io.findify/flink-scala-api_2.12)
  5 | [![License: Apache 2](https://img.shields.io/badge/License-Apache2-green.svg)](https://opensource.org/licenses/Apache-2.0)
  6 | ![Last commit](https://img.shields.io/github/last-commit/findify/flink-scala-api)
  7 | ![Last release](https://img.shields.io/github/release/findify/flink-scala-api)
  8 | 
  9 | This project is a community-maintained fork of official Apache Flink 1.15 scala API, cross-built for scala 2.12, 2.13 and 3.x.
 10 | 
 11 | ## Differences
 12 | 
 13 | ### New [magnolia](https://github.com/softwaremill/magnolia)-based serialization framework
 14 | 
 15 | Official Flink's serialization framework has two important drawbacks complicating the upgrade to Scala 2.13+:
 16 | * it used a complicated `TypeInformation` derivation macro, which required a complete rewrite to work on Scala 3.
 17 | * for serializing a `Traversable[_]` it serialized an actual scala code of the corresponding `CanBuildFrom[_]` builder,
 18 | which was compiled and executed on deserialization. There is no more `CanBuildFrom[_]` on Scala 2.13+, so there is
 19 | no easy way of migration
 20 | 
 21 | This project relies on the [Flink-ADT](https://github.com/findify/flink-adt) library to derive serializers for all 
 22 | types with the following perks:
 23 | * ADT support: so your `sealed trait` members won't fall back to extremely slow Kryo serializer
 24 | * case objects: no more problems with `None`
 25 | * uses implicits (and typeclasses in Scala 3) to customize the serialization
 26 | 
 27 | But there are some drawbacks:
 28 | * Savepoints written using Flink's official serialization API are not compatible, so you need to re-bootstrap your job
 29 | from scratch.
 30 | * As serializer derivation happens in a compile-time and uses zero runtime reflection, for deeply-nested rich case
 31 | classes the compile times are quite high.
 32 | 
 33 | See [Flink-ADT](https://github.com/findify/flink-adt) readme for more details.
 34 | 
 35 | ### Using a POJO-only flink serialization framework
 36 | 
 37 | If you don't want to use a `Flink-ADT` for serialization for some reasons, you can always fall back to a flink's
 38 | [POJO serializer](https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/datastream/fault-tolerance/serialization/types_serialization/#rules-for-pojo-types),
 39 | explicitly calling it:
 40 | ```scala
 41 | val env = StreamingExecutionEnvironment.createLocalEnvironment()
 42 | env
 43 |   .fromCollection(1,2,3)
 44 |   .map(x => x + 1)(TypeInformation.of[Int]) // explicit call
 45 | ```
 46 | 
 47 | With this approach:
 48 | * savepoint compatibility between this and official Flink API
 49 | * slower serialization type due to frequent Kryo fallback
 50 | * larger savepoint size (again, due to Kryo)
 51 | 
 52 | ### Closure cleaner from Spark 3.x
 53 | 
 54 | Flink historically used quite an old forked version of the ClosureCleaner for scala lambdas, which has some minor
 55 | compatibility issues with Java 17 and Scala 2.13+. This project uses a more recent version, hopefully with less
 56 | compatibility issues.
 57 | 
 58 | ### No Legacy DataSet API
 59 | 
 60 | Sorry, but it's already deprecated and as a community project we have no resources to support it. If you need it,
 61 | PRs are welcome.
 62 | 
 63 | ## Migration 
 64 | 
 65 | `flink-scala-api` uses a different package name for all api-related classes like `DataStream`, so you can do
 66 | gradual migration of a big project and use both upstream and this versions of scala API in the same project. 
 67 | 
 68 | The actual migration should be straightforward and simple, replace old import to the new ones:
 69 | ```scala
 70 | // original api import
 71 | import org.apache.flink.streaming.api.scala._
 72 | 
 73 | // flink-scala-api imports
 74 | import io.findify.flink.api._
 75 | import io.findify.flinkadt.api._
 76 | ```
 77 | 
 78 | ## Usage 
 79 | 
 80 | `flink-scala-api` is released to Maven-central for 2.12, 2.13 and 3. For SBT, add this snippet to `build.sbt`:
 81 | ```scala
 82 | libraryDependencies += "io.findify" %% "flink-scala-api" % "1.15-1"
 83 | ```
 84 | 
 85 | We suggest to remove `flink-scala` and `flink-streaming-scala` dependencies altogether to simplify the migration and
 86 | not to mix two flavors of API in the same project. But it's technically possible and not required.
 87 | 
 88 | ## Scala 3
 89 | 
 90 | Scala 3 support is highly experimental and not well-tested in production. Good thing is that most of the issues are compile-time, 
 91 | so quite easy to reproduce. If you have issues with `flink-adt` not deriving `TypeInformation[T]` for the `T` you want, 
 92 | submit a bug report!
 93 | 
 94 | ## Compile times
 95 | 
 96 | They may be quite bad for rich nested case classes due to compile-time serializer derivation. 
 97 | Derivation happens each time `flink-scala-api` needs an instance of the `TypeInformation[T]` implicit/type class:
 98 | ```scala
 99 | case class Foo(x: Int) {
100 |   def inc(a: Int) = copy(x = x + a)
101 | }
102 | 
103 | val env = StreamingExecutionEnvironment.createLocalEnvironment()
104 | env
105 |   .fromCollection(List(Foo(1),Foo(2),Foo(3)))
106 |   .map(x => x.inc(1)) // here the TypeInformation[Foo] is generated
107 |   .map(x => x.inc(2)) // generated one more time again
108 | ```
109 | 
110 | If you're using the same instances of data structures in multiple jobs (or in multiple tests), consider caching the
111 | derived serializer in a separate compile unit and just importing it when needed:
112 | 
113 | ```scala
114 | // file FooTypeInfo.scala
115 | object FooTypeInfo {
116 |   lazy val fooTypeInfo: TypeInformation[Foo] = deriveTypeInformation[Foo]
117 | }
118 | 
119 | // file SomeJob.scala
120 | case class Foo(x: Int) {
121 |   def inc(a: Int) = copy(x = x + a)
122 | }
123 | 
124 | import FooTypeInfo._
125 | 
126 | val env = StreamingExecutionEnvironment.createLocalEnvironment()
127 | env
128 |   .fromCollection(List(Foo(1),Foo(2),Foo(3)))
129 |   .map(x => x.inc(1)) // taken as an implicit
130 |   .map(x => x.inc(2)) // again, no re-derivation
131 | 
132 | ```
133 | 
134 | ## License
135 | 
136 | This project is using parts of the Apache Flink codebase, so the whole project
137 | is licensed under an [Apache 2.0](LICENSE.md) software license.


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | ThisBuild / version := "1.15-2"
 2 | 
 3 | lazy val root = (project in file("."))
 4 |   .settings(
 5 |     name               := "flink-scala-api",
 6 |     scalaVersion       := "3.1.2",
 7 |     crossScalaVersions := Seq("2.12.15", "2.13.8", "3.1.2"),
 8 |     libraryDependencies ++= Seq(
 9 |       "org.apache.flink"        % "flink-streaming-java"    % "1.15.0",
10 |       "org.apache.flink"        % "flink-java"              % "1.15.0",
11 |       "io.findify"             %% "flink-adt"               % "0.6.1",
12 |       "org.scalatest"          %% "scalatest"               % "3.2.12" % Test,
13 |       "org.apache.flink"        % "flink-test-utils"        % "1.15.0" % Test,
14 |       "org.apache.flink"        % "flink-test-utils-junit"  % "1.15.0" % Test,
15 |       "com.github.sbt"          % "junit-interface"         % "0.13.3" % Test,
16 |       "org.scala-lang.modules" %% "scala-collection-compat" % "2.7.0"
17 |     ),
18 |     libraryDependencies += {
19 |       if (scalaBinaryVersion.value.startsWith("2")) {
20 |         "org.scala-lang" % "scala-reflect" % scalaVersion.value
21 |       } else {
22 |         "org.scala-lang" %% "scala3-compiler" % scalaVersion.value
23 |       }
24 |     },
25 |     organization      := "io.findify",
26 |     licenses          := Seq("APL2" -> url("http://www.apache.org/licenses/LICENSE-2.0.txt")),
27 |     homepage          := Some(url("https://github.com/findify/flink-scala-api")),
28 |     publishMavenStyle := true,
29 |     publishTo         := sonatypePublishToBundle.value,
30 |     scalacOptions ++= Seq(
31 |       "-deprecation",
32 |       "-feature",
33 |       "-language:higherKinds"
34 |     ),
35 |     scmInfo := Some(
36 |       ScmInfo(
37 |         url("https://github.com/findify/flink-scala-api"),
38 |         "scm:git@github.com:findify/flink-scala-api.git"
39 |       )
40 |     ),
41 |     developers := List(
42 |       Developer(
43 |         id = "romangrebennikov",
44 |         name = "Roman Grebennikov",
45 |         email = "grv@dfdx.me",
46 |         url = url("https://dfdx.me/")
47 |       )
48 |     )
49 |   )
50 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 1.6.2
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.timushev.sbt" % "sbt-updates"  % "0.6.2")
2 | addSbtPlugin("org.xerial.sbt"   % "sbt-sonatype" % "3.9.13")
3 | addSbtPlugin("com.github.sbt"   % "sbt-pgp"      % "2.1.2")
4 | addSbtPlugin("org.scalameta"    % "sbt-scalafmt" % "2.4.6")
5 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/AllWindowedStream.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import org.apache.flink.annotation.{Public, PublicEvolving}
  4 | import org.apache.flink.api.common.functions.{AggregateFunction, ReduceFunction}
  5 | import org.apache.flink.api.common.typeinfo.TypeInformation
  6 | import org.apache.flink.streaming.api.datastream.{AllWindowedStream => JavaAllWStream}
  7 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType
  8 | import org.apache.flink.streaming.api.functions.aggregation.{ComparableAggregator, SumAggregator}
  9 | import io.findify.flink.api.function.util.{
 10 |   ScalaAllWindowFunction,
 11 |   ScalaAllWindowFunctionWrapper,
 12 |   ScalaProcessAllWindowFunctionWrapper,
 13 |   ScalaReduceFunction
 14 | }
 15 | import io.findify.flink.api.function.{AllWindowFunction, ProcessAllWindowFunction}
 16 | import org.apache.flink.streaming.api.windowing.evictors.Evictor
 17 | import org.apache.flink.streaming.api.windowing.time.Time
 18 | import org.apache.flink.streaming.api.windowing.triggers.Trigger
 19 | import org.apache.flink.streaming.api.windowing.windows.Window
 20 | import org.apache.flink.util.Collector
 21 | import org.apache.flink.util.Preconditions.checkNotNull
 22 | import ScalaStreamOps._
 23 | 
 24 | /** A [[AllWindowedStream]] represents a data stream where the stream of elements is split into windows based on a
 25 |   * [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]]. Window emission is triggered based on a
 26 |   * [[Trigger]].
 27 |   *
 28 |   * If an [[Evictor]] is specified it will be used to evict elements from the window after evaluation was triggered by
 29 |   * the [[Trigger]] but before the actual evaluation of the window. When using an evictor window performance will
 30 |   * degrade significantly, since pre-aggregation of window results cannot be used.
 31 |   *
 32 |   * Note that the [[AllWindowedStream()]] is purely and API construct, during runtime the [[AllWindowedStream()]] will
 33 |   * be collapsed together with the operation over the window into one single operation.
 34 |   *
 35 |   * @tparam T
 36 |   *   The type of elements in the stream.
 37 |   * @tparam W
 38 |   *   The type of [[Window]] that the [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]] assigns the
 39 |   *   elements to.
 40 |   */
 41 | @Public
 42 | class AllWindowedStream[T, W <: Window](javaStream: JavaAllWStream[T, W]) {
 43 | 
 44 |   /** Sets the allowed lateness to a user-specified value. If not explicitly set, the allowed lateness is [[0L]].
 45 |     * Setting the allowed lateness is only valid for event-time windows. If a value different than 0 is provided with a
 46 |     * processing-time [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]], then an exception is
 47 |     * thrown.
 48 |     */
 49 |   @PublicEvolving
 50 |   def allowedLateness(lateness: Time): AllWindowedStream[T, W] = {
 51 |     javaStream.allowedLateness(lateness)
 52 |     this
 53 |   }
 54 | 
 55 |   /** Send late arriving data to the side output identified by the given [[OutputTag]]. Data is considered late after
 56 |     * the watermark has passed the end of the window plus the allowed lateness set using [[allowedLateness(Time)]].
 57 |     *
 58 |     * You can get the stream of late data using [[DataStream.getSideOutput()]] on the [[DataStream]] resulting from the
 59 |     * windowed operation with the same [[OutputTag]].
 60 |     */
 61 |   @PublicEvolving
 62 |   def sideOutputLateData(outputTag: OutputTag[T]): AllWindowedStream[T, W] = {
 63 |     javaStream.sideOutputLateData(outputTag)
 64 |     this
 65 |   }
 66 | 
 67 |   /** Sets the [[Trigger]] that should be used to trigger window emission.
 68 |     */
 69 |   @PublicEvolving
 70 |   def trigger(trigger: Trigger[_ >: T, _ >: W]): AllWindowedStream[T, W] = {
 71 |     javaStream.trigger(trigger)
 72 |     this
 73 |   }
 74 | 
 75 |   /** Sets the [[Evictor]] that should be used to evict elements from a window before emission.
 76 |     *
 77 |     * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window results
 78 |     * cannot be used.
 79 |     */
 80 |   @PublicEvolving
 81 |   def evictor(evictor: Evictor[_ >: T, _ >: W]): AllWindowedStream[T, W] = {
 82 |     javaStream.evictor(evictor)
 83 |     this
 84 |   }
 85 | 
 86 |   // ------------------------------------------------------------------------
 87 |   //  Operations on the windows
 88 |   // ------------------------------------------------------------------------
 89 | 
 90 |   // ---------------------------- reduce() ------------------------------------
 91 | 
 92 |   /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each
 93 |     * key individually. The output of the reduce function is interpreted as a regular non-windowed stream.
 94 |     *
 95 |     * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time
 96 |     * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time
 97 |     * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per
 98 |     * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an
 99 |     * aggregation tree.
100 |     *
101 |     * @param function
102 |     *   The reduce function.
103 |     * @return
104 |     *   The data stream that is the result of applying the reduce function to the window.
105 |     */
106 |   def reduce(function: ReduceFunction[T]): DataStream[T] = {
107 |     asScalaStream(javaStream.reduce(clean(function)))
108 |   }
109 | 
110 |   /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each
111 |     * key individually. The output of the reduce function is interpreted as a regular non-windowed stream.
112 |     *
113 |     * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time
114 |     * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time
115 |     * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per
116 |     * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an
117 |     * aggregation tree.
118 |     *
119 |     * @param function
120 |     *   The reduce function.
121 |     * @return
122 |     *   The data stream that is the result of applying the reduce function to the window.
123 |     */
124 |   def reduce(function: (T, T) => T): DataStream[T] = {
125 |     if (function == null) {
126 |       throw new NullPointerException("Reduce function must not be null.")
127 |     }
128 |     val cleanFun = clean(function)
129 |     val reducer  = new ScalaReduceFunction[T](cleanFun)
130 | 
131 |     reduce(reducer)
132 |   }
133 | 
134 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
135 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
136 |     *
137 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
138 |     *
139 |     * @param preAggregator
140 |     *   The reduce function that is used for pre-aggregation
141 |     * @param windowFunction
142 |     *   The window function.
143 |     * @return
144 |     *   The data stream that is the result of applying the window function to the window.
145 |     */
146 |   def reduce[R: TypeInformation](
147 |       preAggregator: ReduceFunction[T],
148 |       windowFunction: AllWindowFunction[T, R, W]
149 |   ): DataStream[R] = {
150 | 
151 |     val cleanedReducer        = clean(preAggregator)
152 |     val cleanedWindowFunction = clean(windowFunction)
153 | 
154 |     val applyFunction = new ScalaAllWindowFunctionWrapper[T, R, W](cleanedWindowFunction)
155 | 
156 |     val returnType: TypeInformation[R] = implicitly[TypeInformation[R]]
157 |     asScalaStream(javaStream.reduce(cleanedReducer, applyFunction, returnType))
158 |   }
159 | 
160 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
161 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
162 |     *
163 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
164 |     *
165 |     * @param preAggregator
166 |     *   The reduce function that is used for pre-aggregation
167 |     * @param windowFunction
168 |     *   The window function.
169 |     * @return
170 |     *   The data stream that is the result of applying the window function to the window.
171 |     */
172 |   def reduce[R: TypeInformation](
173 |       preAggregator: (T, T) => T,
174 |       windowFunction: (W, Iterable[T], Collector[R]) => Unit
175 |   ): DataStream[R] = {
176 | 
177 |     if (preAggregator == null) {
178 |       throw new NullPointerException("Reduce function must not be null.")
179 |     }
180 |     if (windowFunction == null) {
181 |       throw new NullPointerException("WindowApply function must not be null.")
182 |     }
183 | 
184 |     val cleanReducer        = clean(preAggregator)
185 |     val cleanWindowFunction = clean(windowFunction)
186 | 
187 |     val reducer       = new ScalaReduceFunction[T](cleanReducer)
188 |     val applyFunction = new ScalaAllWindowFunction[T, R, W](cleanWindowFunction)
189 | 
190 |     val returnType: TypeInformation[R] = implicitly[TypeInformation[R]]
191 |     asScalaStream(javaStream.reduce(reducer, applyFunction, returnType))
192 |   }
193 | 
194 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
195 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
196 |     *
197 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
198 |     *
199 |     * @param preAggregator
200 |     *   The reduce function that is used for pre-aggregation
201 |     * @param windowFunction
202 |     *   The process window function.
203 |     * @return
204 |     *   The data stream that is the result of applying the window function to the window.
205 |     */
206 |   @PublicEvolving
207 |   def reduce[R: TypeInformation](
208 |       preAggregator: ReduceFunction[T],
209 |       windowFunction: ProcessAllWindowFunction[T, R, W]
210 |   ): DataStream[R] = {
211 | 
212 |     val cleanedReducer        = clean(preAggregator)
213 |     val cleanedWindowFunction = clean(windowFunction)
214 | 
215 |     val applyFunction = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanedWindowFunction)
216 | 
217 |     val returnType: TypeInformation[R] = implicitly[TypeInformation[R]]
218 |     asScalaStream(javaStream.reduce(cleanedReducer, applyFunction, returnType))
219 |   }
220 | 
221 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
222 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
223 |     *
224 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
225 |     *
226 |     * @param preAggregator
227 |     *   The reduce function that is used for pre-aggregation
228 |     * @param windowFunction
229 |     *   The process window function.
230 |     * @return
231 |     *   The data stream that is the result of applying the window function to the window.
232 |     */
233 |   @PublicEvolving
234 |   def reduce[R: TypeInformation](
235 |       preAggregator: (T, T) => T,
236 |       windowFunction: ProcessAllWindowFunction[T, R, W]
237 |   ): DataStream[R] = {
238 | 
239 |     if (preAggregator == null) {
240 |       throw new NullPointerException("Reduce function must not be null.")
241 |     }
242 |     if (windowFunction == null) {
243 |       throw new NullPointerException("WindowApply function must not be null.")
244 |     }
245 | 
246 |     val cleanReducer        = clean(preAggregator)
247 |     val cleanWindowFunction = clean(windowFunction)
248 | 
249 |     val reducer       = new ScalaReduceFunction[T](cleanReducer)
250 |     val applyFunction = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanWindowFunction)
251 | 
252 |     val returnType: TypeInformation[R] = implicitly[TypeInformation[R]]
253 |     asScalaStream(javaStream.reduce(reducer, applyFunction, returnType))
254 |   }
255 | 
256 |   // --------------------------- aggregate() ----------------------------------
257 | 
258 |   /** Applies the given aggregation function to each window. The aggregation function is called for each element,
259 |     * aggregating values incrementally and keeping the state to one accumulator per window.
260 |     *
261 |     * @param aggregateFunction
262 |     *   The aggregation function.
263 |     * @return
264 |     *   The data stream that is the result of applying the aggregate function to the window.
265 |     */
266 |   @PublicEvolving
267 |   def aggregate[ACC: TypeInformation, R: TypeInformation](
268 |       aggregateFunction: AggregateFunction[T, ACC, R]
269 |   ): DataStream[R] = {
270 | 
271 |     checkNotNull(aggregateFunction, "AggregationFunction must not be null")
272 | 
273 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
274 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
275 | 
276 |     asScalaStream(javaStream.aggregate(clean(aggregateFunction), accumulatorType, resultType))
277 |   }
278 | 
279 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
280 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
281 |     *
282 |     * Arriving data is pre-aggregated using the given aggregation function.
283 |     *
284 |     * @param preAggregator
285 |     *   The aggregation function that is used for pre-aggregation
286 |     * @param windowFunction
287 |     *   The window function.
288 |     * @return
289 |     *   The data stream that is the result of applying the window function to the window.
290 |     */
291 |   @PublicEvolving
292 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
293 |       preAggregator: AggregateFunction[T, ACC, V],
294 |       windowFunction: AllWindowFunction[V, R, W]
295 |   ): DataStream[R] = {
296 | 
297 |     checkNotNull(preAggregator, "AggregationFunction must not be null")
298 |     checkNotNull(windowFunction, "Window function must not be null")
299 | 
300 |     val cleanedPreAggregator  = clean(preAggregator)
301 |     val cleanedWindowFunction = clean(windowFunction)
302 | 
303 |     val applyFunction = new ScalaAllWindowFunctionWrapper[V, R, W](cleanedWindowFunction)
304 | 
305 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
306 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
307 | 
308 |     asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType))
309 |   }
310 | 
311 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
312 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
313 |     *
314 |     * Arriving data is pre-aggregated using the given aggregation function.
315 |     *
316 |     * @param preAggregator
317 |     *   The aggregation function that is used for pre-aggregation
318 |     * @param windowFunction
319 |     *   The process window function.
320 |     * @return
321 |     *   The data stream that is the result of applying the window function to the window.
322 |     */
323 |   @PublicEvolving
324 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
325 |       preAggregator: AggregateFunction[T, ACC, V],
326 |       windowFunction: ProcessAllWindowFunction[V, R, W]
327 |   ): DataStream[R] = {
328 | 
329 |     checkNotNull(preAggregator, "AggregationFunction must not be null")
330 |     checkNotNull(windowFunction, "Window function must not be null")
331 | 
332 |     val cleanedPreAggregator  = clean(preAggregator)
333 |     val cleanedWindowFunction = clean(windowFunction)
334 | 
335 |     val applyFunction = new ScalaProcessAllWindowFunctionWrapper[V, R, W](cleanedWindowFunction)
336 | 
337 |     val accumulatorType: TypeInformation[ACC]     = implicitly[TypeInformation[ACC]]
338 |     val aggregationResultType: TypeInformation[V] = implicitly[TypeInformation[V]]
339 |     val resultType: TypeInformation[R]            = implicitly[TypeInformation[R]]
340 | 
341 |     asScalaStream(
342 |       javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, aggregationResultType, resultType)
343 |     )
344 |   }
345 | 
346 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window.
347 |     * The output of the window function is interpreted as a regular non-windowed stream.
348 |     *
349 |     * Arriving data is pre-aggregated using the given aggregation function.
350 |     *
351 |     * @param preAggregator
352 |     *   The aggregation function that is used for pre-aggregation
353 |     * @param windowFunction
354 |     *   The window function.
355 |     * @return
356 |     *   The data stream that is the result of applying the window function to the window.
357 |     */
358 |   @PublicEvolving
359 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
360 |       preAggregator: AggregateFunction[T, ACC, V],
361 |       windowFunction: (W, Iterable[V], Collector[R]) => Unit
362 |   ): DataStream[R] = {
363 | 
364 |     checkNotNull(preAggregator, "AggregationFunction must not be null")
365 |     checkNotNull(windowFunction, "Window function must not be null")
366 | 
367 |     val cleanPreAggregator  = clean(preAggregator)
368 |     val cleanWindowFunction = clean(windowFunction)
369 | 
370 |     val applyFunction = new ScalaAllWindowFunction[V, R, W](cleanWindowFunction)
371 | 
372 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
373 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
374 | 
375 |     asScalaStream(javaStream.aggregate(cleanPreAggregator, applyFunction, accumulatorType, resultType))
376 |   }
377 | 
378 |   // ---------------------------- apply() -------------------------------------
379 | 
380 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
381 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
382 |     *
383 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
384 |     * function provides no means of pre-aggregation.
385 |     *
386 |     * @param function
387 |     *   The process window function.
388 |     * @return
389 |     *   The data stream that is the result of applying the window function to the window.
390 |     */
391 |   @PublicEvolving
392 |   def process[R: TypeInformation](function: ProcessAllWindowFunction[T, R, W]): DataStream[R] = {
393 | 
394 |     val cleanedFunction = clean(function)
395 |     val javaFunction    = new ScalaProcessAllWindowFunctionWrapper[T, R, W](cleanedFunction)
396 | 
397 |     asScalaStream(javaStream.process(javaFunction, implicitly[TypeInformation[R]]))
398 |   }
399 | 
400 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
401 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
402 |     *
403 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
404 |     * function provides no means of pre-aggregation.
405 |     *
406 |     * @param function
407 |     *   The window function.
408 |     * @return
409 |     *   The data stream that is the result of applying the window function to the window.
410 |     */
411 |   def apply[R: TypeInformation](function: AllWindowFunction[T, R, W]): DataStream[R] = {
412 | 
413 |     val cleanedFunction = clean(function)
414 |     val javaFunction    = new ScalaAllWindowFunctionWrapper[T, R, W](cleanedFunction)
415 | 
416 |     asScalaStream(javaStream.apply(javaFunction, implicitly[TypeInformation[R]]))
417 |   }
418 | 
419 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
420 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
421 |     *
422 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
423 |     * function provides no means of pre-aggregation.
424 |     *
425 |     * @param function
426 |     *   The window function.
427 |     * @return
428 |     *   The data stream that is the result of applying the window function to the window.
429 |     */
430 |   def apply[R: TypeInformation](function: (W, Iterable[T], Collector[R]) => Unit): DataStream[R] = {
431 | 
432 |     val cleanedFunction = clean(function)
433 |     val applyFunction   = new ScalaAllWindowFunction[T, R, W](cleanedFunction)
434 | 
435 |     asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]]))
436 |   }
437 | 
438 |   // ------------------------------------------------------------------------
439 |   //  Aggregations on the keyed windows
440 |   // ------------------------------------------------------------------------
441 | 
442 |   /** Applies an aggregation that that gives the maximum of the elements in the window at the given position.
443 |     */
444 |   def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position)
445 | 
446 |   /** Applies an aggregation that that gives the maximum of the elements in the window at the given field.
447 |     */
448 |   def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field)
449 | 
450 |   /** Applies an aggregation that that gives the minimum of the elements in the window at the given position.
451 |     */
452 |   def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position)
453 | 
454 |   /** Applies an aggregation that that gives the minimum of the elements in the window at the given field.
455 |     */
456 |   def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field)
457 | 
458 |   /** Applies an aggregation that sums the elements in the window at the given position.
459 |     */
460 |   def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position)
461 | 
462 |   /** Applies an aggregation that sums the elements in the window at the given field.
463 |     */
464 |   def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field)
465 | 
466 |   /** Applies an aggregation that that gives the maximum element of the window by the given position. When equality,
467 |     * returns the first.
468 |     */
469 |   def maxBy(position: Int): DataStream[T] = aggregate(AggregationType.MAXBY, position)
470 | 
471 |   /** Applies an aggregation that that gives the maximum element of the window by the given field. When equality,
472 |     * returns the first.
473 |     */
474 |   def maxBy(field: String): DataStream[T] = aggregate(AggregationType.MAXBY, field)
475 | 
476 |   /** Applies an aggregation that that gives the minimum element of the window by the given position. When equality,
477 |     * returns the first.
478 |     */
479 |   def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position)
480 | 
481 |   /** Applies an aggregation that that gives the minimum element of the window by the given field. When equality,
482 |     * returns the first.
483 |     */
484 |   def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field)
485 | 
486 |   private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = {
487 |     val position = fieldNames2Indices(getInputType(), Array(field))(0)
488 |     aggregate(aggregationType, position)
489 |   }
490 | 
491 |   def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = {
492 | 
493 |     val jStream = javaStream.asInstanceOf[JavaAllWStream[Product, W]]
494 | 
495 |     val reducer = aggregationType match {
496 |       case AggregationType.SUM =>
497 |         new SumAggregator(position, jStream.getInputType, jStream.getExecutionEnvironment.getConfig)
498 | 
499 |       case _ =>
500 |         new ComparableAggregator(
501 |           position,
502 |           jStream.getInputType,
503 |           aggregationType,
504 |           true,
505 |           jStream.getExecutionEnvironment.getConfig
506 |         )
507 |     }
508 | 
509 |     new DataStream[Product](jStream.reduce(reducer)).asInstanceOf[DataStream[T]]
510 |   }
511 | 
512 |   // ------------------------------------------------------------------------
513 |   //  Utilities
514 |   // ------------------------------------------------------------------------
515 | 
516 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
517 |     * [[org.apache.flink.api.common.ExecutionConfig]].
518 |     */
519 |   private[flink] def clean[F <: AnyRef](f: F): F = {
520 |     new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f)
521 |   }
522 | 
523 |   /** Gets the output type.
524 |     */
525 |   private def getInputType(): TypeInformation[T] = javaStream.getInputType
526 | }
527 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/AsyncDataStream.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import io.findify.flink.api.async.{
  4 |   AsyncFunction,
  5 |   JavaResultFutureWrapper,
  6 |   ResultFuture,
  7 |   RichAsyncFunction,
  8 |   ScalaRichAsyncFunctionWrapper
  9 | }
 10 | import org.apache.flink.annotation.PublicEvolving
 11 | import org.apache.flink.api.common.typeinfo.TypeInformation
 12 | import org.apache.flink.streaming.api.datastream.{AsyncDataStream => JavaAsyncDataStream}
 13 | import org.apache.flink.streaming.api.functions.async.{
 14 |   AsyncFunction => JavaAsyncFunction,
 15 |   ResultFuture => JavaResultFuture
 16 | }
 17 | import org.apache.flink.util.Preconditions
 18 | import ScalaStreamOps._
 19 | import scala.concurrent.duration.TimeUnit
 20 | 
 21 | /** A helper class to apply [[AsyncFunction]] to a data stream.
 22 |   *
 23 |   * Example:
 24 |   * {{{
 25 |   *   val input: DataStream[String] = ...
 26 |   *   val asyncFunction: (String, ResultFuture[String]) => Unit = ...
 27 |   *
 28 |   *   AsyncDataStream.orderedWait(input, asyncFunction, timeout, TimeUnit.MILLISECONDS, 100)
 29 |   * }}}
 30 |   */
 31 | @PublicEvolving
 32 | object AsyncDataStream {
 33 | 
 34 |   private val DEFAULT_QUEUE_CAPACITY = 100
 35 | 
 36 |   /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to
 37 |     * watermarks. Stream records which lie between the same two watermarks, can be re-ordered.
 38 |     *
 39 |     * @param input
 40 |     *   to apply the async function on
 41 |     * @param asyncFunction
 42 |     *   to use
 43 |     * @param timeout
 44 |     *   for the asynchronous operation to complete
 45 |     * @param timeUnit
 46 |     *   of the timeout
 47 |     * @param capacity
 48 |     *   of the operator which is equivalent to the number of concurrent asynchronous operations
 49 |     * @tparam IN
 50 |     *   Type of the input record
 51 |     * @tparam OUT
 52 |     *   Type of the output record
 53 |     * @return
 54 |     *   the resulting stream containing the asynchronous results
 55 |     */
 56 |   def unorderedWait[IN, OUT: TypeInformation](
 57 |       input: DataStream[IN],
 58 |       asyncFunction: AsyncFunction[IN, OUT],
 59 |       timeout: Long,
 60 |       timeUnit: TimeUnit,
 61 |       capacity: Int
 62 |   ): DataStream[OUT] = {
 63 | 
 64 |     val javaAsyncFunction = wrapAsJavaAsyncFunction(asyncFunction)
 65 | 
 66 |     val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
 67 | 
 68 |     asScalaStream(
 69 |       JavaAsyncDataStream
 70 |         .unorderedWait[IN, OUT](input.javaStream, javaAsyncFunction, timeout, timeUnit, capacity)
 71 |         .returns(outType)
 72 |     )
 73 |   }
 74 | 
 75 |   /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to
 76 |     * watermarks. Stream records which lie between the same two watermarks, can be re-ordered.
 77 |     *
 78 |     * @param input
 79 |     *   to apply the async function on
 80 |     * @param asyncFunction
 81 |     *   to use
 82 |     * @param timeout
 83 |     *   for the asynchronous operation to complete
 84 |     * @param timeUnit
 85 |     *   of the timeout
 86 |     * @tparam IN
 87 |     *   Type of the input record
 88 |     * @tparam OUT
 89 |     *   Type of the output record
 90 |     * @return
 91 |     *   the resulting stream containing the asynchronous results
 92 |     */
 93 |   def unorderedWait[IN, OUT: TypeInformation](
 94 |       input: DataStream[IN],
 95 |       asyncFunction: AsyncFunction[IN, OUT],
 96 |       timeout: Long,
 97 |       timeUnit: TimeUnit
 98 |   ): DataStream[OUT] = {
 99 | 
100 |     unorderedWait(input, asyncFunction, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)
101 |   }
102 | 
103 |   /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to
104 |     * watermarks. Stream records which lie between the same two watermarks, can be re-ordered.
105 |     *
106 |     * @param input
107 |     *   to apply the async function on
108 |     * @param timeout
109 |     *   for the asynchronous operation to complete
110 |     * @param timeUnit
111 |     *   of the timeout
112 |     * @param capacity
113 |     *   of the operator which is equivalent to the number of concurrent asynchronous operations
114 |     * @param asyncFunction
115 |     *   to use
116 |     * @tparam IN
117 |     *   Type of the input record
118 |     * @tparam OUT
119 |     *   Type of the output record
120 |     * @return
121 |     *   the resulting stream containing the asynchronous results
122 |     */
123 |   def unorderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit, capacity: Int)(
124 |       asyncFunction: (IN, ResultFuture[OUT]) => Unit
125 |   ): DataStream[OUT] = {
126 | 
127 |     Preconditions.checkNotNull(asyncFunction)
128 | 
129 |     val cleanAsyncFunction = input.executionEnvironment.scalaClean(asyncFunction)
130 | 
131 |     val func = new JavaAsyncFunction[IN, OUT] {
132 |       override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = {
133 | 
134 |         cleanAsyncFunction(input, new JavaResultFutureWrapper[OUT](resultFuture))
135 |       }
136 |     }
137 | 
138 |     val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
139 | 
140 |     asScalaStream(
141 |       JavaAsyncDataStream.unorderedWait[IN, OUT](input.javaStream, func, timeout, timeUnit, capacity).returns(outType)
142 |     )
143 |   }
144 | 
145 |   /** Apply an asynchronous function on the input data stream. The output order is only maintained with respect to
146 |     * watermarks. Stream records which lie between the same two watermarks, can be re-ordered.
147 |     *
148 |     * @param input
149 |     *   to apply the async function on
150 |     * @param timeout
151 |     *   for the asynchronous operation to complete
152 |     * @param timeUnit
153 |     *   of the timeout
154 |     * @param asyncFunction
155 |     *   to use
156 |     * @tparam IN
157 |     *   Type of the input record
158 |     * @tparam OUT
159 |     *   Type of the output record
160 |     * @return
161 |     *   the resulting stream containing the asynchronous results
162 |     */
163 |   def unorderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit)(
164 |       asyncFunction: (IN, ResultFuture[OUT]) => Unit
165 |   ): DataStream[OUT] = {
166 |     unorderedWait(input, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)(asyncFunction)
167 |   }
168 | 
169 |   /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the
170 |     * elements.
171 |     *
172 |     * @param input
173 |     *   to apply the async function on
174 |     * @param asyncFunction
175 |     *   to use
176 |     * @param timeout
177 |     *   for the asynchronous operation to complete
178 |     * @param timeUnit
179 |     *   of the timeout
180 |     * @param capacity
181 |     *   of the operator which is equivalent to the number of concurrent asynchronous operations
182 |     * @tparam IN
183 |     *   Type of the input record
184 |     * @tparam OUT
185 |     *   Type of the output record
186 |     * @return
187 |     *   the resulting stream containing the asynchronous results
188 |     */
189 |   def orderedWait[IN, OUT: TypeInformation](
190 |       input: DataStream[IN],
191 |       asyncFunction: AsyncFunction[IN, OUT],
192 |       timeout: Long,
193 |       timeUnit: TimeUnit,
194 |       capacity: Int
195 |   ): DataStream[OUT] = {
196 | 
197 |     val javaAsyncFunction = wrapAsJavaAsyncFunction(asyncFunction)
198 | 
199 |     val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
200 | 
201 |     asScalaStream(
202 |       JavaAsyncDataStream
203 |         .orderedWait[IN, OUT](input.javaStream, javaAsyncFunction, timeout, timeUnit, capacity)
204 |         .returns(outType)
205 |     )
206 |   }
207 | 
208 |   /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the
209 |     * elements.
210 |     *
211 |     * @param input
212 |     *   to apply the async function on
213 |     * @param asyncFunction
214 |     *   to use
215 |     * @param timeout
216 |     *   for the asynchronous operation to complete
217 |     * @param timeUnit
218 |     *   of the timeout
219 |     * @tparam IN
220 |     *   Type of the input record
221 |     * @tparam OUT
222 |     *   Type of the output record
223 |     * @return
224 |     *   the resulting stream containing the asynchronous results
225 |     */
226 |   def orderedWait[IN, OUT: TypeInformation](
227 |       input: DataStream[IN],
228 |       asyncFunction: AsyncFunction[IN, OUT],
229 |       timeout: Long,
230 |       timeUnit: TimeUnit
231 |   ): DataStream[OUT] = {
232 |     orderedWait(input, asyncFunction, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)
233 |   }
234 | 
235 |   /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the
236 |     * elements.
237 |     *
238 |     * @param input
239 |     *   to apply the async function on
240 |     * @param timeout
241 |     *   for the asynchronous operation to complete
242 |     * @param timeUnit
243 |     *   of the timeout
244 |     * @param capacity
245 |     *   of the operator which is equivalent to the number of concurrent asynchronous operations
246 |     * @param asyncFunction
247 |     *   to use
248 |     * @tparam IN
249 |     *   Type of the input record
250 |     * @tparam OUT
251 |     *   Type of the output record
252 |     * @return
253 |     *   the resulting stream containing the asynchronous results
254 |     */
255 |   def orderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit, capacity: Int)(
256 |       asyncFunction: (IN, ResultFuture[OUT]) => Unit
257 |   ): DataStream[OUT] = {
258 | 
259 |     Preconditions.checkNotNull(asyncFunction)
260 | 
261 |     val cleanAsyncFunction = input.executionEnvironment.scalaClean(asyncFunction)
262 | 
263 |     val func = new JavaAsyncFunction[IN, OUT] {
264 |       override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = {
265 |         cleanAsyncFunction(input, new JavaResultFutureWrapper[OUT](resultFuture))
266 |       }
267 |     }
268 | 
269 |     val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
270 | 
271 |     asScalaStream(
272 |       JavaAsyncDataStream.orderedWait[IN, OUT](input.javaStream, func, timeout, timeUnit, capacity).returns(outType)
273 |     )
274 |   }
275 | 
276 |   /** Apply an asynchronous function on the input data stream. The output order is the same as the input order of the
277 |     * elements.
278 |     *
279 |     * @param input
280 |     *   to apply the async function on
281 |     * @param timeout
282 |     *   for the asynchronous operation to complete
283 |     * @param timeUnit
284 |     *   of the timeout
285 |     * @param asyncFunction
286 |     *   to use
287 |     * @tparam IN
288 |     *   Type of the input record
289 |     * @tparam OUT
290 |     *   Type of the output record
291 |     * @return
292 |     *   the resulting stream containing the asynchronous results
293 |     */
294 |   def orderedWait[IN, OUT: TypeInformation](input: DataStream[IN], timeout: Long, timeUnit: TimeUnit)(
295 |       asyncFunction: (IN, ResultFuture[OUT]) => Unit
296 |   ): DataStream[OUT] = {
297 | 
298 |     orderedWait(input, timeout, timeUnit, DEFAULT_QUEUE_CAPACITY)(asyncFunction)
299 |   }
300 | 
301 |   private def wrapAsJavaAsyncFunction[IN, OUT: TypeInformation](
302 |       asyncFunction: AsyncFunction[IN, OUT]
303 |   ): JavaAsyncFunction[IN, OUT] = asyncFunction match {
304 |     case richAsyncFunction: RichAsyncFunction[IN, OUT] =>
305 |       new ScalaRichAsyncFunctionWrapper[IN, OUT](richAsyncFunction)
306 |     case _ =>
307 |       new JavaAsyncFunction[IN, OUT] {
308 |         override def asyncInvoke(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = {
309 |           asyncFunction.asyncInvoke(input, new JavaResultFutureWrapper[OUT](resultFuture))
310 |         }
311 | 
312 |         override def timeout(input: IN, resultFuture: JavaResultFuture[OUT]): Unit = {
313 |           asyncFunction.timeout(input, new JavaResultFutureWrapper[OUT](resultFuture))
314 |         }
315 |       }
316 |   }
317 | }
318 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/BroadcastConnectedStream.scala:
--------------------------------------------------------------------------------
 1 | package io.findify.flink.api
 2 | 
 3 | import org.apache.flink.annotation.PublicEvolving
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation
 5 | import org.apache.flink.streaming.api.datastream.{BroadcastConnectedStream => JavaBCStream}
 6 | import org.apache.flink.streaming.api.functions.co.{BroadcastProcessFunction, KeyedBroadcastProcessFunction}
 7 | import ScalaStreamOps._
 8 | 
 9 | class BroadcastConnectedStream[IN1, IN2](javaStream: JavaBCStream[IN1, IN2]) {
10 | 
11 |   /** Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] and a [[KeyedStream]] and
12 |     * applies the given [[KeyedBroadcastProcessFunction]] on them, thereby creating a transformed output stream.
13 |     *
14 |     * @param function
15 |     *   The [[KeyedBroadcastProcessFunction]] applied to each element in the stream.
16 |     * @tparam KS
17 |     *   The type of the keys in the keyed stream.
18 |     * @tparam OUT
19 |     *   The type of the output elements.
20 |     * @return
21 |     *   The transformed [[DataStream]].
22 |     */
23 |   @PublicEvolving
24 |   def process[KS, OUT: TypeInformation](function: KeyedBroadcastProcessFunction[KS, IN1, IN2, OUT]): DataStream[OUT] = {
25 | 
26 |     if (function == null) {
27 |       throw new NullPointerException("KeyedBroadcastProcessFunction function must not be null.")
28 |     }
29 | 
30 |     val outputTypeInfo: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
31 |     asScalaStream(javaStream.process(function, outputTypeInfo))
32 |   }
33 | 
34 |   /** Assumes as inputs a [[org.apache.flink.streaming.api.datastream.BroadcastStream]] and a non-keyed [[DataStream]]
35 |     * and applies the given [[org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction]] on them, thereby
36 |     * creating a transformed output stream.
37 |     *
38 |     * @param function
39 |     *   The [[BroadcastProcessFunction]] applied to each element in the stream.
40 |     * @tparam OUT
41 |     *   The type of the output elements.
42 |     * @return
43 |     *   The transformed { @link DataStream}.
44 |     */
45 |   @PublicEvolving
46 |   def process[OUT: TypeInformation](function: BroadcastProcessFunction[IN1, IN2, OUT]): DataStream[OUT] = {
47 | 
48 |     if (function == null) {
49 |       throw new NullPointerException("BroadcastProcessFunction function must not be null.")
50 |     }
51 | 
52 |     val outputTypeInfo: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
53 |     asScalaStream(javaStream.process(function, outputTypeInfo))
54 |   }
55 | 
56 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
57 |     * [[org.apache.flink.api.common.ExecutionConfig]]
58 |     */
59 |   private[flink] def clean[F <: AnyRef](f: F) = {
60 |     new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f)
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/CloseableIterator.scala:
--------------------------------------------------------------------------------
 1 | package io.findify.flink.api
 2 | 
 3 | import org.apache.flink.util.{CloseableIterator => JCloseableIterator}
 4 | 
 5 | /** This interface represents an [[Iterator]] that is also [[AutoCloseable]]. A typical use-case for this interface are
 6 |   * iterators that are based on native-resources such as files, network, or database connections. Clients must call
 7 |   * close after using the iterator.
 8 |   */
 9 | trait CloseableIterator[T] extends Iterator[T] with AutoCloseable {}
10 | 
11 | object CloseableIterator {
12 | 
13 |   def fromJava[T](it: JCloseableIterator[T]): CloseableIterator[T] =
14 |     new CloseableIterator[T] {
15 |       override def hasNext: Boolean = it.hasNext
16 | 
17 |       override def next(): T = it.next
18 | 
19 |       override def close(): Unit = it.close()
20 |     }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/CoGroupedStreams.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import org.apache.flink.annotation.{PublicEvolving, Public}
  4 | import org.apache.flink.api.common.functions.CoGroupFunction
  5 | import org.apache.flink.api.common.typeinfo.TypeInformation
  6 | import org.apache.flink.api.java.functions.KeySelector
  7 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable
  8 | import org.apache.flink.streaming.api.datastream.{CoGroupedStreams => JavaCoGroupedStreams}
  9 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner
 10 | import org.apache.flink.streaming.api.windowing.evictors.Evictor
 11 | import org.apache.flink.streaming.api.windowing.time.Time
 12 | import org.apache.flink.streaming.api.windowing.triggers.Trigger
 13 | import org.apache.flink.streaming.api.windowing.windows.Window
 14 | import org.apache.flink.util.Collector
 15 | import ScalaStreamOps._
 16 | import scala.jdk.CollectionConverters._
 17 | 
 18 | /** `CoGroupedStreams` represents two [[DataStream]]s that have been co-grouped. A streaming co-group operation is
 19 |   * evaluated over elements in a window.
 20 |   *
 21 |   * To finalize the co-group operation you also need to specify a [[KeySelector]] for both the first and second input
 22 |   * and a [[WindowAssigner]]
 23 |   *
 24 |   * Note: Right now, the groups are being built in memory so you need to ensure that they don't get too big. Otherwise
 25 |   * the JVM might crash.
 26 |   *
 27 |   * Example:
 28 |   *
 29 |   * {{{
 30 |   * val one: DataStream[(String, Int)]  = ...
 31 |   * val two: DataStream[(String, Int)] = ...
 32 |   *
 33 |   * val result = one.coGroup(two)
 34 |   *     .where(new MyFirstKeySelector())
 35 |   *     .equalTo(new MyFirstKeySelector())
 36 |   *     .window(TumblingEventTimeWindows.of(Time.of(5, TimeUnit.SECONDS)))
 37 |   *     .apply(new MyCoGroupFunction())
 38 |   * }
 39 |   * }}}
 40 |   */
 41 | @Public
 42 | class CoGroupedStreams[T1, T2](input1: DataStream[T1], input2: DataStream[T2]) {
 43 | 
 44 |   /** Specifies a [[KeySelector]] for elements from the first input.
 45 |     */
 46 |   def where[KEY: TypeInformation](keySelector: T1 => KEY): Where[KEY] = {
 47 |     val cleanFun = clean(keySelector)
 48 |     val keyType  = implicitly[TypeInformation[KEY]]
 49 |     val javaSelector = new KeySelector[T1, KEY] with ResultTypeQueryable[KEY] {
 50 |       def getKey(in: T1)                                 = cleanFun(in)
 51 |       override def getProducedType: TypeInformation[KEY] = keyType
 52 |     }
 53 |     new Where[KEY](javaSelector, keyType)
 54 |   }
 55 | 
 56 |   /** A co-group operation that has [[KeySelector]]s defined for the first input.
 57 |     *
 58 |     * You need to specify a [[KeySelector]] for the second input using [[equalTo()]] before you can proceed with
 59 |     * specifying a [[WindowAssigner]] using [[EqualTo.window()]].
 60 |     *
 61 |     * @tparam KEY
 62 |     *   Type of the key. This must be the same for both inputs
 63 |     */
 64 |   class Where[KEY](keySelector1: KeySelector[T1, KEY], keyType: TypeInformation[KEY]) {
 65 | 
 66 |     /** Specifies a [[KeySelector]] for elements from the second input.
 67 |       */
 68 |     def equalTo(keySelector: T2 => KEY): EqualTo = {
 69 |       val cleanFun     = clean(keySelector)
 70 |       val localKeyType = keyType
 71 |       val javaSelector = new KeySelector[T2, KEY] with ResultTypeQueryable[KEY] {
 72 |         def getKey(in: T2)                                 = cleanFun(in)
 73 |         override def getProducedType: TypeInformation[KEY] = localKeyType
 74 |       }
 75 |       new EqualTo(javaSelector)
 76 |     }
 77 | 
 78 |     /** A co-group operation that a [[KeySelector]] defined for the first and the second input.
 79 |       *
 80 |       * A window can now be specified using [[window()]].
 81 |       */
 82 |     class EqualTo(keySelector2: KeySelector[T2, KEY]) {
 83 | 
 84 |       /** Specifies the window on which the co-group operation works.
 85 |         */
 86 |       @PublicEvolving
 87 |       def window[W <: Window](
 88 |           assigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W]
 89 |       ): WithWindow[W] = {
 90 |         if (keySelector1 == null || keySelector2 == null) {
 91 |           throw new UnsupportedOperationException(
 92 |             "You first need to specify KeySelectors for both inputs using where() and equalTo()."
 93 |           )
 94 |         }
 95 |         new WithWindow[W](clean(assigner), null, null, null)
 96 |       }
 97 | 
 98 |       /** A co-group operation that has [[KeySelector]]s defined for both inputs as well as a [[WindowAssigner]].
 99 |         *
100 |         * @tparam W
101 |         *   Type of { @link Window} on which the co-group operation works.
102 |         */
103 |       @PublicEvolving
104 |       class WithWindow[W <: Window](
105 |           windowAssigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W],
106 |           trigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W],
107 |           evictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W],
108 |           val allowedLateness: Time
109 |       ) {
110 | 
111 |         /** Sets the [[Trigger]] that should be used to trigger window emission.
112 |           */
113 |         @PublicEvolving
114 |         def trigger(newTrigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = {
115 |           new WithWindow[W](windowAssigner, newTrigger, evictor, allowedLateness)
116 |         }
117 | 
118 |         /** Sets the [[Evictor]] that should be used to evict elements from a window before emission.
119 |           *
120 |           * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window
121 |           * results cannot be used.
122 |           */
123 |         @PublicEvolving
124 |         def evictor(newEvictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = {
125 |           new WithWindow[W](windowAssigner, trigger, newEvictor, allowedLateness)
126 |         }
127 | 
128 |         /** Sets the time by which elements are allowed to be late. Delegates to
129 |           * [[WindowedStream#allowedLateness(Time)]]
130 |           */
131 |         @PublicEvolving
132 |         def allowedLateness(newLateness: Time): WithWindow[W] = {
133 |           new WithWindow[W](windowAssigner, trigger, evictor, newLateness)
134 |         }
135 | 
136 |         /** Completes the co-group operation with the user function that is executed for windowed groups.
137 |           */
138 |         def apply[O: TypeInformation](fun: (Iterator[T1], Iterator[T2]) => O): DataStream[O] = {
139 |           require(fun != null, "CoGroup function must not be null.")
140 | 
141 |           val coGrouper = new CoGroupFunction[T1, T2, O] {
142 |             val cleanFun = clean(fun)
143 |             def coGroup(left: java.lang.Iterable[T1], right: java.lang.Iterable[T2], out: Collector[O]) = {
144 |               out.collect(cleanFun(left.iterator().asScala, right.iterator().asScala))
145 |             }
146 |           }
147 |           apply(coGrouper)
148 |         }
149 | 
150 |         /** Completes the co-group operation with the user function that is executed for windowed groups.
151 |           */
152 |         def apply[O: TypeInformation](fun: (Iterator[T1], Iterator[T2], Collector[O]) => Unit): DataStream[O] = {
153 |           require(fun != null, "CoGroup function must not be null.")
154 | 
155 |           val coGrouper = new CoGroupFunction[T1, T2, O] {
156 |             val cleanFun = clean(fun)
157 |             def coGroup(left: java.lang.Iterable[T1], right: java.lang.Iterable[T2], out: Collector[O]) = {
158 |               cleanFun(left.iterator.asScala, right.iterator.asScala, out)
159 |             }
160 |           }
161 |           apply(coGrouper)
162 |         }
163 | 
164 |         /** Completes the co-group operation with the user function that is executed for windowed groups.
165 |           */
166 |         def apply[T: TypeInformation](function: CoGroupFunction[T1, T2, T]): DataStream[T] = {
167 | 
168 |           val coGroup = new JavaCoGroupedStreams[T1, T2](input1.javaStream, input2.javaStream)
169 | 
170 |           asScalaStream(
171 |             coGroup
172 |               .where(keySelector1)
173 |               .equalTo(keySelector2)
174 |               .window(windowAssigner)
175 |               .trigger(trigger)
176 |               .evictor(evictor)
177 |               .allowedLateness(allowedLateness)
178 |               .apply(clean(function), implicitly[TypeInformation[T]])
179 |           )
180 |         }
181 |       }
182 | 
183 |     }
184 |   }
185 | 
186 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
187 |     * [[org.apache.flink.api.common.ExecutionConfig]].
188 |     */
189 |   private[flink] def clean[F <: AnyRef](f: F): F = {
190 |     new StreamExecutionEnvironment(input1.javaStream.getExecutionEnvironment).scalaClean(f)
191 |   }
192 | }
193 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/ConnectedStreams.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import org.apache.flink.annotation.{Internal, Public, PublicEvolving}
  4 | import org.apache.flink.api.common.typeinfo.TypeInformation
  5 | import org.apache.flink.api.java.functions.KeySelector
  6 | import org.apache.flink.streaming.api.datastream.{ConnectedStreams => JavaCStream, DataStream => JavaStream}
  7 | import org.apache.flink.streaming.api.functions.co._
  8 | import org.apache.flink.streaming.api.operators.{TwoInputStreamOperator, TwoInputStreamOperatorFactory}
  9 | import org.apache.flink.util.Collector
 10 | import ScalaStreamOps._
 11 | 
 12 | /** [[ConnectedStreams]] represents two connected streams of (possibly) different data types. Connected streams are
 13 |   * useful for cases where operations on one stream directly affect the operations on the other stream, usually via
 14 |   * shared state between the streams.
 15 |   *
 16 |   * An example for the use of connected streams would be to apply rules that change over time onto another stream. One
 17 |   * of the connected streams has the rules, the other stream the elements to apply the rules to. The operation on the
 18 |   * connected stream maintains the current set of rules in the state. It may receive either a rule update and update the
 19 |   * state or a data element and apply the rules in the state to the element.
 20 |   *
 21 |   * The connected stream can be conceptually viewed as a union stream of an Either type, that holds either the first
 22 |   * stream's type or the second stream's type.
 23 |   */
 24 | @Public
 25 | class ConnectedStreams[IN1, IN2](javaStream: JavaCStream[IN1, IN2]) {
 26 | 
 27 |   // ------------------------------------------------------
 28 |   //  Transformations
 29 |   // ------------------------------------------------------
 30 | 
 31 |   /** Applies a CoMap transformation on the connected streams.
 32 |     *
 33 |     * The transformation consists of two separate functions, where the first one is called for each element of the first
 34 |     * connected stream, and the second one is called for each element of the second connected stream.
 35 |     *
 36 |     * @param fun1
 37 |     *   Function called per element of the first input.
 38 |     * @param fun2
 39 |     *   Function called per element of the second input.
 40 |     * @return
 41 |     *   The resulting data stream.
 42 |     */
 43 |   def map[R: TypeInformation](fun1: IN1 => R, fun2: IN2 => R): DataStream[R] = {
 44 | 
 45 |     if (fun1 == null || fun2 == null) {
 46 |       throw new NullPointerException("Map function must not be null.")
 47 |     }
 48 |     val cleanFun1 = clean(fun1)
 49 |     val cleanFun2 = clean(fun2)
 50 |     val comapper = new CoMapFunction[IN1, IN2, R] {
 51 |       def map1(in1: IN1): R = cleanFun1(in1)
 52 |       def map2(in2: IN2): R = cleanFun2(in2)
 53 |     }
 54 | 
 55 |     map(comapper)
 56 |   }
 57 | 
 58 |   /** Applies a CoMap transformation on these connected streams.
 59 |     *
 60 |     * The transformation calls [[CoMapFunction#map1]] for each element in the first stream and [[CoMapFunction#map2]]
 61 |     * for each element of the second stream.
 62 |     *
 63 |     * On can pass a subclass of [[org.apache.flink.streaming.api.functions.co.RichCoMapFunction]] to gain access to the
 64 |     * [[org.apache.flink.api.common.functions.RuntimeContext]] and to additional life cycle methods.
 65 |     *
 66 |     * @param coMapper
 67 |     *   The CoMapFunction used to transform the two connected streams
 68 |     * @return
 69 |     *   The resulting data stream
 70 |     */
 71 |   def map[R: TypeInformation](coMapper: CoMapFunction[IN1, IN2, R]): DataStream[R] = {
 72 |     if (coMapper == null) {
 73 |       throw new NullPointerException("Map function must not be null.")
 74 |     }
 75 | 
 76 |     val outType: TypeInformation[R] = implicitly[TypeInformation[R]]
 77 |     asScalaStream(javaStream.map(coMapper, outType).asInstanceOf[JavaStream[R]])
 78 |   }
 79 | 
 80 |   /** Applies the given [[CoProcessFunction]] on the connected input streams, thereby creating a transformed output
 81 |     * stream.
 82 |     *
 83 |     * The function will be called for every element in the input streams and can produce zero or more output elements.
 84 |     * Contrary to the [[flatMap(CoFlatMapFunction)]] function, this function can also query the time and set timers.
 85 |     * When reacting to the firing of set timers the function can directly emit elements and/or register yet more timers.
 86 |     *
 87 |     * @param coProcessFunction
 88 |     *   The [[CoProcessFunction]] that is called for each element in the stream.
 89 |     * @return
 90 |     *   The transformed [[DataStream]].
 91 |     */
 92 |   @PublicEvolving
 93 |   def process[R: TypeInformation](coProcessFunction: CoProcessFunction[IN1, IN2, R]): DataStream[R] = {
 94 | 
 95 |     if (coProcessFunction == null) {
 96 |       throw new NullPointerException("CoProcessFunction function must not be null.")
 97 |     }
 98 | 
 99 |     val outType: TypeInformation[R] = implicitly[TypeInformation[R]]
100 | 
101 |     asScalaStream(javaStream.process(coProcessFunction, outType))
102 |   }
103 | 
104 |   /** Applies the given [[KeyedCoProcessFunction]] on the connected input keyed streams, thereby creating a transformed
105 |     * output stream.
106 |     *
107 |     * The function will be called for every element in the input keyed streams and can produce zero or more output
108 |     * elements. Contrary to the [[flatMap(CoFlatMapFunction)]] function, this function can also query the time and set
109 |     * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more
110 |     * timers.
111 |     *
112 |     * @param keyedCoProcessFunction
113 |     *   The [[KeyedCoProcessFunction]] that is called for each element in the stream.
114 |     * @return
115 |     *   The transformed [[DataStream]].
116 |     */
117 |   @PublicEvolving
118 |   def process[K, R: TypeInformation](keyedCoProcessFunction: KeyedCoProcessFunction[K, IN1, IN2, R]): DataStream[R] = {
119 |     if (keyedCoProcessFunction == null) {
120 |       throw new NullPointerException("KeyedCoProcessFunction function must not be null.")
121 |     }
122 | 
123 |     val outType: TypeInformation[R] = implicitly[TypeInformation[R]]
124 | 
125 |     asScalaStream(javaStream.process(keyedCoProcessFunction, outType))
126 |   }
127 | 
128 |   /** Applies a CoFlatMap transformation on these connected streams.
129 |     *
130 |     * The transformation calls [[CoFlatMapFunction#flatMap1]] for each element in the first stream and
131 |     * [[CoFlatMapFunction#flatMap2]] for each element of the second stream.
132 |     *
133 |     * On can pass a subclass of [[org.apache.flink.streaming.api.functions.co.RichCoFlatMapFunction]] to gain access to
134 |     * the [[org.apache.flink.api.common.functions.RuntimeContext]] and to additional life cycle methods.
135 |     *
136 |     * @param coFlatMapper
137 |     *   The CoFlatMapFunction used to transform the two connected streams
138 |     * @return
139 |     *   The resulting data stream.
140 |     */
141 |   def flatMap[R: TypeInformation](coFlatMapper: CoFlatMapFunction[IN1, IN2, R]): DataStream[R] = {
142 | 
143 |     if (coFlatMapper == null) {
144 |       throw new NullPointerException("FlatMap function must not be null.")
145 |     }
146 | 
147 |     val outType: TypeInformation[R] = implicitly[TypeInformation[R]]
148 |     asScalaStream(javaStream.flatMap(coFlatMapper, outType).asInstanceOf[JavaStream[R]])
149 |   }
150 | 
151 |   /** Applies a CoFlatMap transformation on the connected streams.
152 |     *
153 |     * The transformation consists of two separate functions, where the first one is called for each element of the first
154 |     * connected stream, and the second one is called for each element of the second connected stream.
155 |     *
156 |     * @param fun1
157 |     *   Function called per element of the first input.
158 |     * @param fun2
159 |     *   Function called per element of the second input.
160 |     * @return
161 |     *   The resulting data stream.
162 |     */
163 |   def flatMap[R: TypeInformation](
164 |       fun1: (IN1, Collector[R]) => Unit,
165 |       fun2: (IN2, Collector[R]) => Unit
166 |   ): DataStream[R] = {
167 | 
168 |     if (fun1 == null || fun2 == null) {
169 |       throw new NullPointerException("FlatMap functions must not be null.")
170 |     }
171 |     val cleanFun1 = clean(fun1)
172 |     val cleanFun2 = clean(fun2)
173 |     val flatMapper = new CoFlatMapFunction[IN1, IN2, R] {
174 |       def flatMap1(value: IN1, out: Collector[R]): Unit = cleanFun1(value, out)
175 |       def flatMap2(value: IN2, out: Collector[R]): Unit = cleanFun2(value, out)
176 |     }
177 |     flatMap(flatMapper)
178 |   }
179 | 
180 |   /** Applies a CoFlatMap transformation on the connected streams.
181 |     *
182 |     * The transformation consists of two separate functions, where the first one is called for each element of the first
183 |     * connected stream, and the second one is called for each element of the second connected stream.
184 |     *
185 |     * @param fun1
186 |     *   Function called per element of the first input.
187 |     * @param fun2
188 |     *   Function called per element of the second input.
189 |     * @return
190 |     *   The resulting data stream.
191 |     */
192 |   def flatMap[R: TypeInformation](fun1: IN1 => TraversableOnce[R], fun2: IN2 => TraversableOnce[R]): DataStream[R] = {
193 | 
194 |     if (fun1 == null || fun2 == null) {
195 |       throw new NullPointerException("FlatMap functions must not be null.")
196 |     }
197 |     val cleanFun1 = clean(fun1)
198 |     val cleanFun2 = clean(fun2)
199 | 
200 |     val flatMapper = new CoFlatMapFunction[IN1, IN2, R] {
201 |       def flatMap1(value: IN1, out: Collector[R]) = { cleanFun1(value) foreach out.collect }
202 |       def flatMap2(value: IN2, out: Collector[R]) = { cleanFun2(value) foreach out.collect }
203 |     }
204 | 
205 |     flatMap(flatMapper)
206 |   }
207 | 
208 |   // ------------------------------------------------------
209 |   //  grouping and partitioning
210 |   // ------------------------------------------------------
211 | 
212 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
213 |     * will be sent to the same parallel instance of the transformation functions.
214 |     *
215 |     * @param keyPosition1
216 |     *   The first stream's key field
217 |     * @param keyPosition2
218 |     *   The second stream's key field
219 |     * @return
220 |     *   The key-grouped connected streams
221 |     */
222 |   def keyBy(keyPosition1: Int, keyPosition2: Int): ConnectedStreams[IN1, IN2] = {
223 |     asScalaStream(javaStream.keyBy(keyPosition1, keyPosition2))
224 |   }
225 | 
226 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
227 |     * will be sent to the same parallel instance of the transformation functions.
228 |     *
229 |     * @param keyPositions1
230 |     *   The first stream's key fields
231 |     * @param keyPositions2
232 |     *   The second stream's key fields
233 |     * @return
234 |     *   The key-grouped connected streams
235 |     */
236 |   def keyBy(keyPositions1: Array[Int], keyPositions2: Array[Int]): ConnectedStreams[IN1, IN2] = {
237 |     asScalaStream(javaStream.keyBy(keyPositions1, keyPositions2))
238 |   }
239 | 
240 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
241 |     * will be sent to the same parallel instance of the transformation functions.
242 |     *
243 |     * @param field1
244 |     *   The first stream's key expression
245 |     * @param field2
246 |     *   The second stream's key expression
247 |     * @return
248 |     *   The key-grouped connected streams
249 |     */
250 |   def keyBy(field1: String, field2: String): ConnectedStreams[IN1, IN2] = {
251 |     asScalaStream(javaStream.keyBy(field1, field2))
252 |   }
253 | 
254 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
255 |     * will be sent to the same parallel instance of the transformation functions.
256 |     *
257 |     * @param fields1
258 |     *   The first stream's key expressions
259 |     * @param fields2
260 |     *   The second stream's key expressions
261 |     * @return
262 |     *   The key-grouped connected streams
263 |     */
264 |   def keyBy(fields1: Array[String], fields2: Array[String]): ConnectedStreams[IN1, IN2] = {
265 |     asScalaStream(javaStream.keyBy(fields1, fields2))
266 |   }
267 | 
268 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
269 |     * will be sent to the same parallel instance of the transformation functions.
270 |     *
271 |     * @param fun1
272 |     *   The first stream's key function
273 |     * @param fun2
274 |     *   The second stream's key function
275 |     * @return
276 |     *   The key-grouped connected streams
277 |     */
278 |   def keyBy[KEY: TypeInformation](fun1: IN1 => KEY, fun2: IN2 => KEY): ConnectedStreams[IN1, IN2] = {
279 | 
280 |     val keyType = implicitly[TypeInformation[KEY]]
281 | 
282 |     val cleanFun1 = clean(fun1)
283 |     val cleanFun2 = clean(fun2)
284 | 
285 |     val keyExtractor1 = new JavaKeySelector[IN1, KEY](cleanFun1)
286 |     val keyExtractor2 = new JavaKeySelector[IN2, KEY](cleanFun2)
287 | 
288 |     asScalaStream(javaStream.keyBy(keyExtractor1, keyExtractor2, keyType))
289 |   }
290 | 
291 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
292 |     * [[org.apache.flink.api.common.ExecutionConfig]]
293 |     */
294 |   private[flink] def clean[F <: AnyRef](f: F): F = {
295 |     new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f)
296 |   }
297 | 
298 |   @PublicEvolving
299 |   def transform[R: TypeInformation](
300 |       functionName: String,
301 |       operator: TwoInputStreamOperator[IN1, IN2, R]
302 |   ): DataStream[R] = {
303 |     asScalaStream(javaStream.transform(functionName, implicitly[TypeInformation[R]], operator))
304 |   }
305 | 
306 |   @PublicEvolving
307 |   def transform[R: TypeInformation](
308 |       functionName: String,
309 |       factory: TwoInputStreamOperatorFactory[IN1, IN2, R]
310 |   ): DataStream[R] = {
311 |     asScalaStream(javaStream.transform(functionName, implicitly[TypeInformation[R]], factory))
312 |   }
313 | }
314 | 
315 | @Internal
316 | class JavaKeySelector[IN, K](private[this] val fun: IN => K) extends KeySelector[IN, K] {
317 |   override def getKey(value: IN): K = fun(value)
318 | }
319 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/DataStreamUtils.scala:
--------------------------------------------------------------------------------
 1 | package io.findify.flink.api
 2 | 
 3 | import org.apache.flink.annotation.Experimental
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation
 5 | import org.apache.flink.api.java.functions.KeySelector
 6 | import org.apache.flink.streaming.api.datastream.{DataStreamUtils => JavaStreamUtils}
 7 | 
 8 | import scala.jdk.CollectionConverters._
 9 | import scala.reflect.ClassTag
10 | import ScalaStreamOps._
11 | 
12 | /** This class provides simple utility methods for collecting a [[DataStream]], effectively enriching it with the
13 |   * functionality encapsulated by [[DataStreamUtils]].
14 |   *
15 |   * This experimental class is relocated from flink-streaming-contrib.
16 |   *
17 |   * @param self
18 |   *   DataStream
19 |   */
20 | @Experimental
21 | class DataStreamUtils[T: TypeInformation: ClassTag](val self: DataStream[T]) {
22 | 
23 |   /** Returns a scala iterator to iterate over the elements of the DataStream.
24 |     * @return
25 |     *   The iterator
26 |     *
27 |     * @deprecated
28 |     *   Replaced with [[DataStream#executeAndCollect]].
29 |     */
30 |   def collect(): Iterator[T] = {
31 |     JavaStreamUtils.collect(self.javaStream).asScala
32 |   }
33 | 
34 |   /** Reinterprets the given [[DataStream]] as a [[KeyedStream]], which extracts keys with the given [[KeySelector]].
35 |     *
36 |     * IMPORTANT: For every partition of the base stream, the keys of events in the base stream must be partitioned
37 |     * exactly in the same way as if it was created through a [[DataStream#keyBy(KeySelector)]].
38 |     *
39 |     * @param keySelector
40 |     *   Function that defines how keys are extracted from the data stream.
41 |     * @return
42 |     *   The reinterpretation of the [[DataStream]] as a [[KeyedStream]].
43 |     */
44 |   def reinterpretAsKeyedStream[K: TypeInformation](keySelector: T => K): KeyedStream[T, K] = {
45 | 
46 |     val keyTypeInfo     = implicitly[TypeInformation[K]]
47 |     val cleanSelector   = clean(keySelector)
48 |     val javaKeySelector = new JavaKeySelector[T, K](cleanSelector)
49 | 
50 |     asScalaStream(JavaStreamUtils.reinterpretAsKeyedStream(self.javaStream, javaKeySelector, keyTypeInfo))
51 |   }
52 | 
53 |   private[flink] def clean[F <: AnyRef](f: F): F = {
54 |     new StreamExecutionEnvironment(self.javaStream.getExecutionEnvironment).scalaClean(f)
55 |   }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/JoinedStreams.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import org.apache.flink.annotation.{PublicEvolving, Public}
  4 | import org.apache.flink.api.common.functions.{FlatJoinFunction, JoinFunction}
  5 | import org.apache.flink.api.common.typeinfo.TypeInformation
  6 | import org.apache.flink.api.java.functions.KeySelector
  7 | import org.apache.flink.api.java.typeutils.ResultTypeQueryable
  8 | import org.apache.flink.streaming.api.datastream.{
  9 |   JoinedStreams => JavaJoinedStreams,
 10 |   CoGroupedStreams => JavaCoGroupedStreams
 11 | }
 12 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner
 13 | import org.apache.flink.streaming.api.windowing.evictors.Evictor
 14 | import org.apache.flink.streaming.api.windowing.time.Time
 15 | import org.apache.flink.streaming.api.windowing.triggers.Trigger
 16 | import org.apache.flink.streaming.api.windowing.windows.Window
 17 | import org.apache.flink.util.Collector
 18 | import ScalaStreamOps._
 19 | 
 20 | /** `JoinedStreams` represents two [[DataStream]]s that have been joined. A streaming join operation is evaluated over
 21 |   * elements in a window.
 22 |   *
 23 |   * To finalize the join operation you also need to specify a [[KeySelector]] for both the first and second input and a
 24 |   * [[WindowAssigner]]
 25 |   *
 26 |   * Note: Right now, the groups are being built in memory so you need to ensure that they don't get too big. Otherwise
 27 |   * the JVM might crash.
 28 |   *
 29 |   * Example:
 30 |   *
 31 |   * {{{
 32 |   * val one: DataStream[(String, Int)]  = ...
 33 |   * val two: DataStream[(String, Int)] = ...
 34 |   *
 35 |   * val result = one.join(two)
 36 |   *     .where {t => ... }
 37 |   *     .equal {t => ... }
 38 |   *     .window(TumblingEventTimeWindows.of(Time.of(5, TimeUnit.SECONDS)))
 39 |   *     .apply(new MyJoinFunction())
 40 |   * }
 41 |   * }}}
 42 |   */
 43 | @Public
 44 | class JoinedStreams[T1, T2](input1: DataStream[T1], input2: DataStream[T2]) {
 45 | 
 46 |   /** Specifies a [[KeySelector]] for elements from the first input.
 47 |     */
 48 |   def where[KEY: TypeInformation](keySelector: T1 => KEY): Where[KEY] = {
 49 |     val cleanFun = clean(keySelector)
 50 |     val keyType  = implicitly[TypeInformation[KEY]]
 51 |     val javaSelector = new KeySelector[T1, KEY] with ResultTypeQueryable[KEY] {
 52 |       def getKey(in: T1)                                 = cleanFun(in)
 53 |       override def getProducedType: TypeInformation[KEY] = keyType
 54 |     }
 55 |     new Where[KEY](javaSelector, keyType)
 56 |   }
 57 | 
 58 |   /** A join operation that has a [[KeySelector]] defined for the first input.
 59 |     *
 60 |     * You need to specify a [[KeySelector]] for the second input using [[equalTo()]] before you can proceed with
 61 |     * specifying a [[WindowAssigner]] using [[EqualTo.window()]].
 62 |     *
 63 |     * @tparam KEY
 64 |     *   Type of the key. This must be the same for both inputs
 65 |     */
 66 |   class Where[KEY](keySelector1: KeySelector[T1, KEY], keyType: TypeInformation[KEY]) {
 67 | 
 68 |     /** Specifies a [[KeySelector]] for elements from the second input.
 69 |       */
 70 |     def equalTo(keySelector: T2 => KEY): EqualTo = {
 71 |       val cleanFun     = clean(keySelector)
 72 |       val localKeyType = keyType
 73 |       val javaSelector = new KeySelector[T2, KEY] with ResultTypeQueryable[KEY] {
 74 |         def getKey(in: T2)                                 = cleanFun(in)
 75 |         override def getProducedType: TypeInformation[KEY] = localKeyType
 76 |       }
 77 |       new EqualTo(javaSelector)
 78 |     }
 79 | 
 80 |     /** A join operation that has a [[KeySelector]] defined for the first and the second input.
 81 |       *
 82 |       * A window can now be specified using [[window()]].
 83 |       */
 84 |     class EqualTo(keySelector2: KeySelector[T2, KEY]) {
 85 | 
 86 |       /** Specifies the window on which the join operation works.
 87 |         */
 88 |       @PublicEvolving
 89 |       def window[W <: Window](
 90 |           assigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W]
 91 |       ): WithWindow[W] = {
 92 |         if (keySelector1 == null || keySelector2 == null) {
 93 |           throw new UnsupportedOperationException(
 94 |             "You first need to specify KeySelectors for both inputs using where() and equalTo()."
 95 |           )
 96 |         }
 97 | 
 98 |         new WithWindow[W](clean(assigner), null, null, null)
 99 |       }
100 | 
101 |       /** A join operation that has [[KeySelector]]s defined for both inputs as well as a [[WindowAssigner]].
102 |         *
103 |         * @tparam W
104 |         *   Type of { @link Window} on which the join operation works.
105 |         */
106 |       class WithWindow[W <: Window](
107 |           windowAssigner: WindowAssigner[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], W],
108 |           trigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W],
109 |           evictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W],
110 |           val allowedLateness: Time
111 |       ) {
112 | 
113 |         /** Sets the [[Trigger]] that should be used to trigger window emission.
114 |           */
115 |         @PublicEvolving
116 |         def trigger(newTrigger: Trigger[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = {
117 |           new WithWindow[W](windowAssigner, newTrigger, evictor, allowedLateness)
118 |         }
119 | 
120 |         /** Sets the [[Evictor]] that should be used to evict elements from a window before emission.
121 |           *
122 |           * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window
123 |           * results cannot be used.
124 |           */
125 |         @PublicEvolving
126 |         def evictor(newEvictor: Evictor[_ >: JavaCoGroupedStreams.TaggedUnion[T1, T2], _ >: W]): WithWindow[W] = {
127 |           new WithWindow[W](windowAssigner, trigger, newEvictor, allowedLateness)
128 |         }
129 | 
130 |         /** Sets the time by which elements are allowed to be late. Delegates to
131 |           * [[WindowedStream#allowedLateness(Time)]]
132 |           */
133 |         @PublicEvolving
134 |         def allowedLateness(newLateness: Time): WithWindow[W] = {
135 |           new WithWindow[W](windowAssigner, trigger, evictor, newLateness)
136 |         }
137 | 
138 |         /** Completes the join operation with the user function that is executed for windowed groups.
139 |           */
140 |         def apply[O: TypeInformation](fun: (T1, T2) => O): DataStream[O] = {
141 |           require(fun != null, "Join function must not be null.")
142 | 
143 |           val joiner = new FlatJoinFunction[T1, T2, O] {
144 |             val cleanFun = clean(fun)
145 |             def join(left: T1, right: T2, out: Collector[O]) = {
146 |               out.collect(cleanFun(left, right))
147 |             }
148 |           }
149 |           apply(joiner)
150 |         }
151 | 
152 |         /** Completes the join operation with the user function that is executed for windowed groups.
153 |           */
154 |         def apply[O: TypeInformation](fun: (T1, T2, Collector[O]) => Unit): DataStream[O] = {
155 |           require(fun != null, "Join function must not be null.")
156 | 
157 |           val joiner = new FlatJoinFunction[T1, T2, O] {
158 |             val cleanFun = clean(fun)
159 |             def join(left: T1, right: T2, out: Collector[O]) = {
160 |               cleanFun(left, right, out)
161 |             }
162 |           }
163 |           apply(joiner)
164 |         }
165 | 
166 |         /** Completes the join operation with the user function that is executed for windowed groups.
167 |           */
168 |         def apply[T: TypeInformation](function: JoinFunction[T1, T2, T]): DataStream[T] = {
169 | 
170 |           val join = new JavaJoinedStreams[T1, T2](input1.javaStream, input2.javaStream)
171 | 
172 |           asScalaStream(
173 |             join
174 |               .where(keySelector1)
175 |               .equalTo(keySelector2)
176 |               .window(windowAssigner)
177 |               .trigger(trigger)
178 |               .evictor(evictor)
179 |               .allowedLateness(allowedLateness)
180 |               .apply(clean(function), implicitly[TypeInformation[T]])
181 |           )
182 |         }
183 | 
184 |         /** Completes the join operation with the user function that is executed for windowed groups.
185 |           */
186 |         def apply[T: TypeInformation](function: FlatJoinFunction[T1, T2, T]): DataStream[T] = {
187 | 
188 |           val join = new JavaJoinedStreams[T1, T2](input1.javaStream, input2.javaStream)
189 | 
190 |           asScalaStream(
191 |             join
192 |               .where(keySelector1)
193 |               .equalTo(keySelector2)
194 |               .window(windowAssigner)
195 |               .trigger(trigger)
196 |               .evictor(evictor)
197 |               .allowedLateness(allowedLateness)
198 |               .apply(clean(function), implicitly[TypeInformation[T]])
199 |           )
200 |         }
201 |       }
202 |     }
203 |   }
204 | 
205 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
206 |     * [[org.apache.flink.api.common.ExecutionConfig]].
207 |     */
208 |   private[flink] def clean[F <: AnyRef](f: F): F = {
209 |     new StreamExecutionEnvironment(input1.javaStream.getExecutionEnvironment).scalaClean(f)
210 |   }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/KeyedStream.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import io.findify.flink.api.function.StatefulFunction
  4 | import org.apache.flink.annotation.{Internal, Public, PublicEvolving}
  5 | import org.apache.flink.api.common.functions._
  6 | import org.apache.flink.api.common.state.{ReducingStateDescriptor, ValueStateDescriptor}
  7 | import org.apache.flink.api.common.typeinfo.TypeInformation
  8 | import org.apache.flink.api.common.typeutils.TypeSerializer
  9 | import org.apache.flink.streaming.api.datastream.{
 10 |   QueryableStateStream,
 11 |   KeyedStream => KeyedJavaStream,
 12 |   WindowedStream => WindowedJavaStream
 13 | }
 14 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType
 15 | import org.apache.flink.streaming.api.functions.aggregation.{AggregationFunction, ComparableAggregator, SumAggregator}
 16 | import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction
 17 | import org.apache.flink.streaming.api.functions.query.{QueryableAppendingStateOperator, QueryableValueStateOperator}
 18 | import org.apache.flink.streaming.api.functions.{KeyedProcessFunction, ProcessFunction}
 19 | import org.apache.flink.streaming.api.windowing.assigners._
 20 | import org.apache.flink.streaming.api.windowing.time.Time
 21 | import org.apache.flink.streaming.api.windowing.windows.{GlobalWindow, TimeWindow, Window}
 22 | import org.apache.flink.util.Collector
 23 | import ScalaStreamOps._
 24 | 
 25 | @Public
 26 | class KeyedStream[T, K](javaStream: KeyedJavaStream[T, K]) extends DataStream[T](javaStream) {
 27 | 
 28 |   // ------------------------------------------------------------------------
 29 |   //  Properties
 30 |   // ------------------------------------------------------------------------
 31 | 
 32 |   /** Gets the type of the key by which this stream is keyed.
 33 |     */
 34 |   @Internal
 35 |   def getKeyType = javaStream.getKeyType()
 36 | 
 37 |   // ------------------------------------------------------------------------
 38 |   //  basic transformations
 39 |   // ------------------------------------------------------------------------
 40 | 
 41 |   /** Applies the given [[ProcessFunction]] on the input stream, thereby creating a transformed output stream.
 42 |     *
 43 |     * The function will be called for every element in the stream and can produce zero or more output. The function can
 44 |     * also query the time and set timers. When reacting to the firing of set timers the function can emit yet more
 45 |     * elements.
 46 |     *
 47 |     * The function will be called for every element in the input streams and can produce zero or more output elements.
 48 |     * Contrary to the [[DataStream#flatMap(FlatMapFunction)]] function, this function can also query the time and set
 49 |     * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more
 50 |     * timers.
 51 |     *
 52 |     * @param processFunction
 53 |     *   The [[ProcessFunction]] that is called for each element in the stream.
 54 |     *
 55 |     * @deprecated
 56 |     *   Use [[KeyedStream#process(KeyedProcessFunction)]]
 57 |     */
 58 |   @deprecated("will be removed in a future version")
 59 |   @PublicEvolving
 60 |   override def process[R: TypeInformation](processFunction: ProcessFunction[T, R]): DataStream[R] = {
 61 | 
 62 |     if (processFunction == null) {
 63 |       throw new NullPointerException("ProcessFunction must not be null.")
 64 |     }
 65 | 
 66 |     asScalaStream(javaStream.process(processFunction, implicitly[TypeInformation[R]]))
 67 |   }
 68 | 
 69 |   /** Applies the given [[KeyedProcessFunction]] on the input stream, thereby creating a transformed output stream.
 70 |     *
 71 |     * The function will be called for every element in the stream and can produce zero or more output. The function can
 72 |     * also query the time and set timers. When reacting to the firing of set timers the function can emit yet more
 73 |     * elements.
 74 |     *
 75 |     * The function will be called for every element in the input streams and can produce zero or more output elements.
 76 |     * Contrary to the [[DataStream#flatMap(FlatMapFunction)]] function, this function can also query the time and set
 77 |     * timers. When reacting to the firing of set timers the function can directly emit elements and/or register yet more
 78 |     * timers.
 79 |     *
 80 |     * @param keyedProcessFunction
 81 |     *   The [[KeyedProcessFunction]] that is called for each element in the stream.
 82 |     */
 83 |   @PublicEvolving
 84 |   def process[R: TypeInformation](keyedProcessFunction: KeyedProcessFunction[K, T, R]): DataStream[R] = {
 85 | 
 86 |     if (keyedProcessFunction == null) {
 87 |       throw new NullPointerException("KeyedProcessFunction must not be null.")
 88 |     }
 89 | 
 90 |     asScalaStream(javaStream.process(keyedProcessFunction, implicitly[TypeInformation[R]]))
 91 |   }
 92 | 
 93 |   // ------------------------------------------------------------------------
 94 |   //  Joining
 95 |   // ------------------------------------------------------------------------
 96 | 
 97 |   /** Join elements of this [[KeyedStream]] with elements of another [[KeyedStream]] over a time interval that can be
 98 |     * specified with [[IntervalJoin.between]].
 99 |     *
100 |     * @param otherStream
101 |     *   The other keyed stream to join this keyed stream with
102 |     * @tparam OTHER
103 |     *   Type parameter of elements in the other stream
104 |     * @return
105 |     *   An instance of [[IntervalJoin]] with this keyed stream and the other keyed stream
106 |     */
107 |   @PublicEvolving
108 |   def intervalJoin[OTHER](otherStream: KeyedStream[OTHER, K]): IntervalJoin[T, OTHER, K] = {
109 |     new IntervalJoin[T, OTHER, K](this, otherStream)
110 |   }
111 | 
112 |   /** Perform a join over a time interval.
113 |     *
114 |     * @tparam IN1
115 |     *   The type parameter of the elements in the first streams
116 |     * @tparam IN2
117 |     *   The type parameter of the elements in the second stream
118 |     */
119 |   @PublicEvolving
120 |   class IntervalJoin[IN1, IN2, KEY](val streamOne: KeyedStream[IN1, KEY], val streamTwo: KeyedStream[IN2, KEY]) {
121 | 
122 |     /** Specifies the time boundaries over which the join operation works, so that <pre>leftElement.timestamp +
123 |       * lowerBound <= rightElement.timestamp <= leftElement.timestamp + upperBound</pre> By default both the lower and
124 |       * the upper bound are inclusive. This can be configured with [[IntervalJoined.lowerBoundExclusive]] and
125 |       * [[IntervalJoined.upperBoundExclusive]]
126 |       *
127 |       * @param lowerBound
128 |       *   The lower bound. Needs to be smaller than or equal to the upperBound
129 |       * @param upperBound
130 |       *   The upper bound. Needs to be bigger than or equal to the lowerBound
131 |       */
132 |     @PublicEvolving
133 |     def between(lowerBound: Time, upperBound: Time): IntervalJoined[IN1, IN2, KEY] = {
134 |       val lowerMillis = lowerBound.toMilliseconds
135 |       val upperMillis = upperBound.toMilliseconds
136 |       new IntervalJoined[IN1, IN2, KEY](streamOne, streamTwo, lowerMillis, upperMillis)
137 |     }
138 |   }
139 | 
140 |   /** IntervalJoined is a container for two streams that have keys for both sides as well as the time boundaries over
141 |     * which elements should be joined.
142 |     *
143 |     * @tparam IN1
144 |     *   Input type of elements from the first stream
145 |     * @tparam IN2
146 |     *   Input type of elements from the second stream
147 |     * @tparam KEY
148 |     *   The type of the key
149 |     */
150 |   @PublicEvolving
151 |   class IntervalJoined[IN1, IN2, KEY](
152 |       private val firstStream: KeyedStream[IN1, KEY],
153 |       private val secondStream: KeyedStream[IN2, KEY],
154 |       private val lowerBound: Long,
155 |       private val upperBound: Long
156 |   ) {
157 | 
158 |     private var lowerBoundInclusive = true
159 |     private var upperBoundInclusive = true
160 | 
161 |     /** Set the lower bound to be exclusive
162 |       */
163 |     @PublicEvolving
164 |     def lowerBoundExclusive(): IntervalJoined[IN1, IN2, KEY] = {
165 |       this.lowerBoundInclusive = false
166 |       this
167 |     }
168 | 
169 |     /** Set the upper bound to be exclusive
170 |       */
171 |     @PublicEvolving
172 |     def upperBoundExclusive(): IntervalJoined[IN1, IN2, KEY] = {
173 |       this.upperBoundInclusive = false
174 |       this
175 |     }
176 | 
177 |     /** Completes the join operation with the user function that is executed for each joined pair of elements.
178 |       *
179 |       * @param processJoinFunction
180 |       *   The user-defined function
181 |       * @tparam OUT
182 |       *   The output type
183 |       * @return
184 |       *   Returns a DataStream
185 |       */
186 |     @PublicEvolving
187 |     def process[OUT: TypeInformation](processJoinFunction: ProcessJoinFunction[IN1, IN2, OUT]): DataStream[OUT] = {
188 | 
189 |       val outType: TypeInformation[OUT] = implicitly[TypeInformation[OUT]]
190 | 
191 |       val javaJoined = new KeyedJavaStream.IntervalJoined[IN1, IN2, KEY](
192 |         firstStream.javaStream.asInstanceOf[KeyedJavaStream[IN1, KEY]],
193 |         secondStream.javaStream.asInstanceOf[KeyedJavaStream[IN2, KEY]],
194 |         lowerBound,
195 |         upperBound,
196 |         lowerBoundInclusive,
197 |         upperBoundInclusive
198 |       )
199 |       asScalaStream(javaJoined.process(processJoinFunction, outType))
200 |     }
201 |   }
202 | 
203 |   // ------------------------------------------------------------------------
204 |   //  Windowing
205 |   // ------------------------------------------------------------------------
206 | 
207 |   /** Windows this [[KeyedStream]] into tumbling time windows.
208 |     *
209 |     * This is a shortcut for either `.window(TumblingEventTimeWindows.of(size))` or
210 |     * `.window(TumblingProcessingTimeWindows.of(size))` depending on the time characteristic set using
211 |     * [[StreamExecutionEnvironment.setStreamTimeCharacteristic()]]
212 |     *
213 |     * @param size
214 |     *   The size of the window.
215 |     *
216 |     * @deprecated
217 |     *   Please use [[window()]] with either [[TumblingEventTimeWindows]] or [[TumblingProcessingTimeWindows]]. For more
218 |     *   information, see the deprecation notice on [[org.apache.flink.streaming.api.TimeCharacteristic]].
219 |     */
220 |   @deprecated
221 |   def timeWindow(size: Time): WindowedStream[T, K, TimeWindow] = {
222 |     new WindowedStream(javaStream.timeWindow(size))
223 |   }
224 | 
225 |   /** Windows this [[KeyedStream]] into sliding time windows.
226 |     *
227 |     * This is a shortcut for either `.window(SlidingEventTimeWindows.of(size))` or
228 |     * `.window(SlidingProcessingTimeWindows.of(size))` depending on the time characteristic set using
229 |     * [[StreamExecutionEnvironment.setStreamTimeCharacteristic()]]
230 |     *
231 |     * @param size
232 |     *   The size of the window.
233 |     *
234 |     * @deprecated
235 |     *   Please use [[window()]] with either [[SlidingEventTimeWindows]] or [[SlidingProcessingTimeWindows]]. For more
236 |     *   information, see the deprecation notice on [[org.apache.flink.streaming.api.TimeCharacteristic]].
237 |     */
238 |   @deprecated
239 |   def timeWindow(size: Time, slide: Time): WindowedStream[T, K, TimeWindow] = {
240 |     new WindowedStream(javaStream.timeWindow(size, slide))
241 |   }
242 | 
243 |   /** Windows this [[KeyedStream]] into sliding count windows.
244 |     *
245 |     * @param size
246 |     *   The size of the windows in number of elements.
247 |     * @param slide
248 |     *   The slide interval in number of elements.
249 |     */
250 |   def countWindow(size: Long, slide: Long): WindowedStream[T, K, GlobalWindow] = {
251 |     new WindowedStream(javaStream.countWindow(size, slide))
252 |   }
253 | 
254 |   /** Windows this [[KeyedStream]] into tumbling count windows.
255 |     *
256 |     * @param size
257 |     *   The size of the windows in number of elements.
258 |     */
259 |   def countWindow(size: Long): WindowedStream[T, K, GlobalWindow] = {
260 |     new WindowedStream(javaStream.countWindow(size))
261 |   }
262 | 
263 |   /** Windows this data stream to a [[WindowedStream]], which evaluates windows over a key grouped stream. Elements are
264 |     * put into windows by a [[WindowAssigner]]. The grouping of elements is done both by key and by window.
265 |     *
266 |     * A [[org.apache.flink.streaming.api.windowing.triggers.Trigger]] can be defined to specify when windows are
267 |     * evaluated. However, `WindowAssigner` have a default `Trigger` that is used if a `Trigger` is not specified.
268 |     *
269 |     * @param assigner
270 |     *   The `WindowAssigner` that assigns elements to windows.
271 |     * @return
272 |     *   The trigger windows data stream.
273 |     */
274 |   @PublicEvolving
275 |   def window[W <: Window](assigner: WindowAssigner[_ >: T, W]): WindowedStream[T, K, W] = {
276 |     new WindowedStream(new WindowedJavaStream[T, K, W](javaStream, assigner))
277 |   }
278 | 
279 |   // ------------------------------------------------------------------------
280 |   //  Non-Windowed aggregation operations
281 |   // ------------------------------------------------------------------------
282 | 
283 |   /** Creates a new [[DataStream]] by reducing the elements of this DataStream using an associative reduce function. An
284 |     * independent aggregate is kept per key.
285 |     */
286 |   def reduce(reducer: ReduceFunction[T]): DataStream[T] = {
287 |     if (reducer == null) {
288 |       throw new NullPointerException("Reduce function must not be null.")
289 |     }
290 | 
291 |     asScalaStream(javaStream.reduce(reducer))
292 |   }
293 | 
294 |   /** Creates a new [[DataStream]] by reducing the elements of this DataStream using an associative reduce function. An
295 |     * independent aggregate is kept per key.
296 |     */
297 |   def reduce(fun: (T, T) => T): DataStream[T] = {
298 |     if (fun == null) {
299 |       throw new NullPointerException("Reduce function must not be null.")
300 |     }
301 |     val cleanFun = clean(fun)
302 |     val reducer = new ReduceFunction[T] {
303 |       def reduce(v1: T, v2: T): T = { cleanFun(v1, v2) }
304 |     }
305 |     reduce(reducer)
306 |   }
307 | 
308 |   /** Applies an aggregation that that gives the current maximum of the data stream at the given position by the given
309 |     * key. An independent aggregate is kept per key.
310 |     *
311 |     * @param position
312 |     *   The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and
313 |     *   primitive types (which is considered as having one field).
314 |     */
315 |   def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position)
316 | 
317 |   /** Applies an aggregation that that gives the current maximum of the data stream at the given field by the given key.
318 |     * An independent aggregate is kept per key.
319 |     *
320 |     * @param field
321 |     *   In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the
322 |     *   aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`.
323 |     *   Furthermore "*" can be specified in case of a basic type (which is considered as having only one field).
324 |     */
325 |   def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field)
326 | 
327 |   /** Applies an aggregation that that gives the current minimum of the data stream at the given position by the given
328 |     * key. An independent aggregate is kept per key.
329 |     *
330 |     * @param position
331 |     *   The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and
332 |     *   primitive types (which is considered as having one field).
333 |     */
334 |   def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position)
335 | 
336 |   /** Applies an aggregation that that gives the current minimum of the data stream at the given field by the given key.
337 |     * An independent aggregate is kept per key.
338 |     *
339 |     * @param field
340 |     *   In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the
341 |     *   aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`.
342 |     *   Furthermore "*" can be specified in case of a basic type (which is considered as having only one field).
343 |     */
344 |   def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field)
345 | 
346 |   /** Applies an aggregation that sums the data stream at the given position by the given key. An independent aggregate
347 |     * is kept per key.
348 |     *
349 |     * @param position
350 |     *   The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and
351 |     *   primitive types (which is considered as having one field).
352 |     */
353 |   def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position)
354 | 
355 |   /** Applies an aggregation that sums the data stream at the given field by the given key. An independent aggregate is
356 |     * kept per key.
357 |     *
358 |     * @param field
359 |     *   In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the
360 |     *   aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`.
361 |     *   Furthermore "*" can be specified in case of a basic type (which is considered as having only one field).
362 |     */
363 |   def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field)
364 | 
365 |   /** Applies an aggregation that that gives the current minimum element of the data stream by the given position by the
366 |     * given key. An independent aggregate is kept per key. When equality, the first element is returned with the minimal
367 |     * value.
368 |     *
369 |     * @param position
370 |     *   The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and
371 |     *   primitive types (which is considered as having one field).
372 |     */
373 |   def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position)
374 | 
375 |   /** Applies an aggregation that that gives the current minimum element of the data stream by the given field by the
376 |     * given key. An independent aggregate is kept per key. When equality, the first element is returned with the minimal
377 |     * value.
378 |     *
379 |     * @param field
380 |     *   In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the
381 |     *   aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`.
382 |     *   Furthermore "*" can be specified in case of a basic type (which is considered as having only one field).
383 |     */
384 |   def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field)
385 | 
386 |   /** Applies an aggregation that that gives the current maximum element of the data stream by the given position by the
387 |     * given key. An independent aggregate is kept per key. When equality, the first element is returned with the maximal
388 |     * value.
389 |     *
390 |     * @param position
391 |     *   The field position in the data points to minimize. This is applicable to Tuple types, Scala case classes, and
392 |     *   primitive types (which is considered as having one field).
393 |     */
394 |   def maxBy(position: Int): DataStream[T] =
395 |     aggregate(AggregationType.MAXBY, position)
396 | 
397 |   /** Applies an aggregation that that gives the current maximum element of the data stream by the given field by the
398 |     * given key. An independent aggregate is kept per key. When equality, the first element is returned with the maximal
399 |     * value.
400 |     *
401 |     * @param field
402 |     *   In case of a POJO, Scala case class, or Tuple type, the name of the (public) field on which to perform the
403 |     *   aggregation. Additionally, a dot can be used to drill down into nested objects, as in `"field1.fieldxy"`.
404 |     *   Furthermore "*" can be specified in case of a basic type (which is considered as having only one field).
405 |     */
406 |   def maxBy(field: String): DataStream[T] =
407 |     aggregate(AggregationType.MAXBY, field)
408 | 
409 |   private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = {
410 |     val aggregationFunc = aggregationType match {
411 |       case AggregationType.SUM =>
412 |         new SumAggregator(field, javaStream.getType, javaStream.getExecutionConfig)
413 |       case _ =>
414 |         new ComparableAggregator(field, javaStream.getType, aggregationType, true, javaStream.getExecutionConfig)
415 |     }
416 | 
417 |     aggregate(aggregationFunc)
418 |   }
419 | 
420 |   private def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = {
421 |     val aggregationFunc = aggregationType match {
422 |       case AggregationType.SUM =>
423 |         new SumAggregator(position, javaStream.getType, javaStream.getExecutionConfig)
424 |       case _ =>
425 |         new ComparableAggregator(position, javaStream.getType, aggregationType, true, javaStream.getExecutionConfig)
426 |     }
427 | 
428 |     aggregate(aggregationFunc)
429 |   }
430 | 
431 |   private def aggregate(aggregationFunc: AggregationFunction[T]): DataStream[T] = {
432 |     reduce(aggregationFunc).name("Keyed Aggregation")
433 |   }
434 | 
435 |   // ------------------------------------------------------------------------
436 |   //  functions with state
437 |   // ------------------------------------------------------------------------
438 | 
439 |   /** Creates a new DataStream that contains only the elements satisfying the given stateful filter predicate. To use
440 |     * state partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept per
441 |     * key.
442 |     *
443 |     * Note that the user state object needs to be serializable.
444 |     */
445 |   def filterWithState[S: TypeInformation](fun: (T, Option[S]) => (Boolean, Option[S])): DataStream[T] = {
446 |     if (fun == null) {
447 |       throw new NullPointerException("Filter function must not be null.")
448 |     }
449 | 
450 |     val cleanFun                          = clean(fun)
451 |     val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]]
452 |     val serializer: TypeSerializer[S]     = stateTypeInfo.createSerializer(javaStream.getExecutionConfig)
453 | 
454 |     val filterFun = new RichFilterFunction[T] with StatefulFunction[T, Boolean, S] {
455 | 
456 |       override val stateSerializer: TypeSerializer[S] = serializer
457 | 
458 |       override def filter(in: T): Boolean = {
459 |         applyWithState(in, cleanFun)
460 |       }
461 |     }
462 | 
463 |     filter(filterFun)
464 |   }
465 | 
466 |   /** Creates a new DataStream by applying the given stateful function to every element of this DataStream. To use state
467 |     * partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept per key.
468 |     *
469 |     * Note that the user state object needs to be serializable.
470 |     */
471 |   def mapWithState[R: TypeInformation, S: TypeInformation](fun: (T, Option[S]) => (R, Option[S])): DataStream[R] = {
472 |     if (fun == null) {
473 |       throw new NullPointerException("Map function must not be null.")
474 |     }
475 | 
476 |     val cleanFun                          = clean(fun)
477 |     val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]]
478 |     val serializer: TypeSerializer[S]     = stateTypeInfo.createSerializer(javaStream.getExecutionConfig)
479 | 
480 |     val mapper = new RichMapFunction[T, R] with StatefulFunction[T, R, S] {
481 | 
482 |       override val stateSerializer: TypeSerializer[S] = serializer
483 | 
484 |       override def map(in: T): R = {
485 |         applyWithState(in, cleanFun)
486 |       }
487 |     }
488 | 
489 |     map(mapper)
490 |   }
491 | 
492 |   /** Creates a new DataStream by applying the given stateful function to every element and flattening the results. To
493 |     * use state partitioning, a key must be defined using .keyBy(..), in which case an independent state will be kept
494 |     * per key.
495 |     *
496 |     * Note that the user state object needs to be serializable.
497 |     */
498 |   def flatMapWithState[R: TypeInformation, S: TypeInformation](
499 |       fun: (T, Option[S]) => (TraversableOnce[R], Option[S])
500 |   ): DataStream[R] = {
501 |     if (fun == null) {
502 |       throw new NullPointerException("Flatmap function must not be null.")
503 |     }
504 | 
505 |     val cleanFun                          = clean(fun)
506 |     val stateTypeInfo: TypeInformation[S] = implicitly[TypeInformation[S]]
507 |     val serializer: TypeSerializer[S]     = stateTypeInfo.createSerializer(javaStream.getExecutionConfig)
508 | 
509 |     val flatMapper = new RichFlatMapFunction[T, R] with StatefulFunction[T, TraversableOnce[R], S] {
510 | 
511 |       override val stateSerializer: TypeSerializer[S] = serializer
512 | 
513 |       override def flatMap(in: T, out: Collector[R]): Unit = {
514 |         applyWithState(in, cleanFun) foreach out.collect
515 |       }
516 |     }
517 | 
518 |     flatMap(flatMapper)
519 |   }
520 | 
521 |   /** Publishes the keyed stream as a queryable ValueState instance.
522 |     *
523 |     * @param queryableStateName
524 |     *   Name under which to the publish the queryable state instance
525 |     * @return
526 |     *   Queryable state instance
527 |     */
528 |   @PublicEvolving
529 |   def asQueryableState(queryableStateName: String): QueryableStateStream[K, T] = {
530 |     val stateDescriptor = new ValueStateDescriptor(queryableStateName, dataType.createSerializer(executionConfig))
531 | 
532 |     asQueryableState(queryableStateName, stateDescriptor)
533 |   }
534 | 
535 |   /** Publishes the keyed stream as a queryable ValueState instance.
536 |     *
537 |     * @param queryableStateName
538 |     *   Name under which to the publish the queryable state instance
539 |     * @param stateDescriptor
540 |     *   State descriptor to create state instance from
541 |     * @return
542 |     *   Queryable state instance
543 |     */
544 |   @PublicEvolving
545 |   def asQueryableState(
546 |       queryableStateName: String,
547 |       stateDescriptor: ValueStateDescriptor[T]
548 |   ): QueryableStateStream[K, T] = {
549 | 
550 |     transform(
551 |       s"Queryable state: $queryableStateName",
552 |       new QueryableValueStateOperator(queryableStateName, stateDescriptor)
553 |     )(dataType)
554 | 
555 |     stateDescriptor.initializeSerializerUnlessSet(executionConfig)
556 | 
557 |     new QueryableStateStream(queryableStateName, stateDescriptor, getKeyType.createSerializer(executionConfig))
558 |   }
559 | 
560 |   /** Publishes the keyed stream as a queryable ReducingState instance.
561 |     *
562 |     * @param queryableStateName
563 |     *   Name under which to the publish the queryable state instance
564 |     * @param stateDescriptor
565 |     *   State descriptor to create state instance from
566 |     * @return
567 |     *   Queryable state instance
568 |     */
569 |   @PublicEvolving
570 |   def asQueryableState(
571 |       queryableStateName: String,
572 |       stateDescriptor: ReducingStateDescriptor[T]
573 |   ): QueryableStateStream[K, T] = {
574 | 
575 |     transform(
576 |       s"Queryable state: $queryableStateName",
577 |       new QueryableAppendingStateOperator(queryableStateName, stateDescriptor)
578 |     )(dataType)
579 | 
580 |     stateDescriptor.initializeSerializerUnlessSet(executionConfig)
581 | 
582 |     new QueryableStateStream(queryableStateName, stateDescriptor, getKeyType.createSerializer(executionConfig))
583 |   }
584 | 
585 | }
586 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/OutputTag.scala:
--------------------------------------------------------------------------------
 1 | package io.findify.flink.api
 2 | 
 3 | import org.apache.flink.annotation.PublicEvolving
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation
 5 | import org.apache.flink.util.{OutputTag => JOutputTag}
 6 | 
 7 | /** An [[OutputTag]] is a typed and named tag to use for tagging side outputs of an operator.
 8 |   *
 9 |   * Example:
10 |   * {{{
11 |   *   val outputTag = OutputTag[String]("late-data")
12 |   * }}}
13 |   *
14 |   * @tparam T
15 |   *   the type of elements in the side-output stream.
16 |   */
17 | @PublicEvolving
18 | class OutputTag[T: TypeInformation](id: String) extends JOutputTag[T](id, implicitly[TypeInformation[T]])
19 | 
20 | object OutputTag {
21 |   def apply[T: TypeInformation](id: String): OutputTag[T] = new OutputTag(id)
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/ScalaStreamOps.scala:
--------------------------------------------------------------------------------
 1 | package io.findify.flink.api
 2 | 
 3 | import io.findify.flinkadt.api.typeinfo.CaseClassTypeInfo
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation
 5 | import org.apache.flink.streaming.api.datastream.{DataStream => JavaStream}
 6 | import org.apache.flink.streaming.api.datastream.{ConnectedStreams => ConnectedJavaStreams}
 7 | import org.apache.flink.streaming.api.datastream.{BroadcastConnectedStream => BroadcastConnectedJavaStreams}
 8 | import org.apache.flink.streaming.api.datastream.{KeyedStream => KeyedJavaStream}
 9 | 
10 | import language.implicitConversions
11 | import language.experimental.macros
12 | 
13 | object ScalaStreamOps {
14 | 
15 |   /** Converts an [[org.apache.flink.streaming.api.datastream.DataStream]] to a [[io.findify.flink.api.DataStream]].
16 |     */
17 |   def asScalaStream[R](stream: JavaStream[R]) = new DataStream[R](stream)
18 | 
19 |   /** Converts an [[org.apache.flink.streaming.api.datastream.KeyedStream]] to a [[io.findify.flink.api.KeyedStream]].
20 |     */
21 |   def asScalaStream[R, K](stream: KeyedJavaStream[R, K]) = new KeyedStream[R, K](stream)
22 | 
23 |   /** Converts an [[org.apache.flink.streaming.api.datastream.ConnectedStreams]] to a
24 |     * [[io.findify.flink.api.ConnectedStreams]].
25 |     */
26 |   def asScalaStream[IN1, IN2](stream: ConnectedJavaStreams[IN1, IN2]) = new ConnectedStreams[IN1, IN2](stream)
27 | 
28 |   /** Converts an [[org.apache.flink.streaming.api.datastream.BroadcastConnectedStream]] to a
29 |     * [[io.findify.flink.api.BroadcastConnectedStream]].
30 |     */
31 |   def asScalaStream[IN1, IN2](stream: BroadcastConnectedJavaStreams[IN1, IN2]) =
32 |     new BroadcastConnectedStream[IN1, IN2](stream)
33 | 
34 |   private[flink] def fieldNames2Indices(typeInfo: TypeInformation[_], fields: Array[String]): Array[Int] = {
35 |     typeInfo match {
36 |       case ti: CaseClassTypeInfo[_] =>
37 |         val result = ti.getFieldIndices(fields)
38 | 
39 |         if (result.contains(-1)) {
40 |           throw new IllegalArgumentException(
41 |             "Fields '" + fields.mkString(", ") +
42 |               "' are not valid for '" + ti.toString + "'."
43 |           )
44 |         }
45 | 
46 |         result
47 | 
48 |       case _ =>
49 |         throw new UnsupportedOperationException(
50 |           "Specifying fields by name is only" +
51 |             "supported on Case Classes (for now)."
52 |         )
53 |     }
54 |   }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/WindowedStream.scala:
--------------------------------------------------------------------------------
  1 | package io.findify.flink.api
  2 | 
  3 | import io.findify.flink.api.function.{ProcessWindowFunction, WindowFunction}
  4 | import io.findify.flink.api.function.util.{
  5 |   ScalaProcessWindowFunctionWrapper,
  6 |   ScalaReduceFunction,
  7 |   ScalaWindowFunction,
  8 |   ScalaWindowFunctionWrapper
  9 | }
 10 | import org.apache.flink.annotation.{Public, PublicEvolving}
 11 | import org.apache.flink.api.common.functions.{AggregateFunction, ReduceFunction}
 12 | import org.apache.flink.api.common.typeinfo.TypeInformation
 13 | import org.apache.flink.streaming.api.datastream.{WindowedStream => JavaWStream}
 14 | import org.apache.flink.streaming.api.functions.aggregation.AggregationFunction.AggregationType
 15 | import org.apache.flink.streaming.api.functions.aggregation.{ComparableAggregator, SumAggregator}
 16 | import org.apache.flink.streaming.api.windowing.evictors.Evictor
 17 | import org.apache.flink.streaming.api.windowing.time.Time
 18 | import org.apache.flink.streaming.api.windowing.triggers.Trigger
 19 | import org.apache.flink.streaming.api.windowing.windows.Window
 20 | import org.apache.flink.util.Collector
 21 | import ScalaStreamOps._
 22 | 
 23 | /** A [[WindowedStream]] represents a data stream where elements are grouped by key, and for each key, the stream of
 24 |   * elements is split into windows based on a [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]].
 25 |   * Window emission is triggered based on a [[Trigger]].
 26 |   *
 27 |   * The windows are conceptually evaluated for each key individually, meaning windows can trigger at different points
 28 |   * for each key.
 29 |   *
 30 |   * If an [[org.apache.flink.streaming.api.windowing.evictors.Evictor]] is specified it will be used to evict elements
 31 |   * from the window after evaluation was triggered by the [[Trigger]] but before the actual evaluation of the window.
 32 |   * When using an evictor window performance will degrade significantly, since pre-aggregation of window results cannot
 33 |   * be used.
 34 |   *
 35 |   * Note that the [[WindowedStream]] is purely and API construct, during runtime the [[WindowedStream]] will be
 36 |   * collapsed together with the [[KeyedStream]] and the operation over the window into one single operation.
 37 |   *
 38 |   * @tparam T
 39 |   *   The type of elements in the stream.
 40 |   * @tparam K
 41 |   *   The type of the key by which elements are grouped.
 42 |   * @tparam W
 43 |   *   The type of [[Window]] that the [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]] assigns the
 44 |   *   elements to.
 45 |   */
 46 | @Public
 47 | class WindowedStream[T, K, W <: Window](javaStream: JavaWStream[T, K, W]) {
 48 | 
 49 |   /** Sets the allowed lateness to a user-specified value. If not explicitly set, the allowed lateness is [[0L]].
 50 |     * Setting the allowed lateness is only valid for event-time windows. If a value different than 0 is provided with a
 51 |     * processing-time [[org.apache.flink.streaming.api.windowing.assigners.WindowAssigner]], then an exception is
 52 |     * thrown.
 53 |     */
 54 |   @PublicEvolving
 55 |   def allowedLateness(lateness: Time): WindowedStream[T, K, W] = {
 56 |     javaStream.allowedLateness(lateness)
 57 |     this
 58 |   }
 59 | 
 60 |   /** Send late arriving data to the side output identified by the given [[OutputTag]]. Data is considered late after
 61 |     * the watermark has passed the end of the window plus the allowed lateness set using [[allowedLateness(Time)]].
 62 |     *
 63 |     * You can get the stream of late data using [[DataStream.getSideOutput()]] on the [[DataStream]] resulting from the
 64 |     * windowed operation with the same [[OutputTag]].
 65 |     */
 66 |   @PublicEvolving
 67 |   def sideOutputLateData(outputTag: OutputTag[T]): WindowedStream[T, K, W] = {
 68 |     javaStream.sideOutputLateData(outputTag)
 69 |     this
 70 |   }
 71 | 
 72 |   /** Sets the [[Trigger]] that should be used to trigger window emission.
 73 |     */
 74 |   @PublicEvolving
 75 |   def trigger(trigger: Trigger[_ >: T, _ >: W]): WindowedStream[T, K, W] = {
 76 |     javaStream.trigger(trigger)
 77 |     this
 78 |   }
 79 | 
 80 |   /** Sets the [[Evictor]] that should be used to evict elements from a window before emission.
 81 |     *
 82 |     * Note: When using an evictor window performance will degrade significantly, since pre-aggregation of window results
 83 |     * cannot be used.
 84 |     */
 85 |   @PublicEvolving
 86 |   def evictor(evictor: Evictor[_ >: T, _ >: W]): WindowedStream[T, K, W] = {
 87 |     javaStream.evictor(evictor)
 88 |     this
 89 |   }
 90 | 
 91 |   // ------------------------------------------------------------------------
 92 |   //  Operations on the keyed windows
 93 |   // ------------------------------------------------------------------------
 94 | 
 95 |   // --------------------------- reduce() -----------------------------------
 96 | 
 97 |   /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each
 98 |     * key individually. The output of the reduce function is interpreted as a regular non-windowed stream.
 99 |     *
100 |     * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time
101 |     * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time
102 |     * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per
103 |     * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an
104 |     * aggregation tree.
105 |     *
106 |     * @param function
107 |     *   The reduce function.
108 |     * @return
109 |     *   The data stream that is the result of applying the reduce function to the window.
110 |     */
111 |   def reduce(function: ReduceFunction[T]): DataStream[T] = {
112 |     asScalaStream(javaStream.reduce(clean(function)))
113 |   }
114 | 
115 |   /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each
116 |     * key individually. The output of the reduce function is interpreted as a regular non-windowed stream.
117 |     *
118 |     * This window will try and pre-aggregate data as much as the window policies permit. For example, tumbling time
119 |     * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time
120 |     * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per
121 |     * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an
122 |     * aggregation tree.
123 |     *
124 |     * @param function
125 |     *   The reduce function.
126 |     * @return
127 |     *   The data stream that is the result of applying the reduce function to the window.
128 |     */
129 |   def reduce(function: (T, T) => T): DataStream[T] = {
130 |     if (function == null) {
131 |       throw new NullPointerException("Reduce function must not be null.")
132 |     }
133 |     val cleanFun = clean(function)
134 |     val reducer  = new ScalaReduceFunction[T](cleanFun)
135 |     reduce(reducer)
136 |   }
137 | 
138 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
139 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
140 |     *
141 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
142 |     *
143 |     * @param preAggregator
144 |     *   The reduce function that is used for pre-aggregation
145 |     * @param function
146 |     *   The window function.
147 |     * @return
148 |     *   The data stream that is the result of applying the window function to the window.
149 |     */
150 |   def reduce[R: TypeInformation](
151 |       preAggregator: ReduceFunction[T],
152 |       function: WindowFunction[T, R, K, W]
153 |   ): DataStream[R] = {
154 | 
155 |     val cleanedPreAggregator  = clean(preAggregator)
156 |     val cleanedWindowFunction = clean(function)
157 | 
158 |     val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction)
159 | 
160 |     val resultType: TypeInformation[R] = implicitly[TypeInformation[R]]
161 |     asScalaStream(javaStream.reduce(cleanedPreAggregator, applyFunction, resultType))
162 |   }
163 | 
164 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
165 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
166 |     *
167 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
168 |     *
169 |     * @param preAggregator
170 |     *   The reduce function that is used for pre-aggregation
171 |     * @param windowFunction
172 |     *   The window function.
173 |     * @return
174 |     *   The data stream that is the result of applying the window function to the window.
175 |     */
176 |   def reduce[R: TypeInformation](
177 |       preAggregator: (T, T) => T,
178 |       windowFunction: (K, W, Iterable[T], Collector[R]) => Unit
179 |   ): DataStream[R] = {
180 | 
181 |     if (preAggregator == null) {
182 |       throw new NullPointerException("Reduce function must not be null.")
183 |     }
184 |     if (windowFunction == null) {
185 |       throw new NullPointerException("WindowApply function must not be null.")
186 |     }
187 | 
188 |     val cleanReducer        = clean(preAggregator)
189 |     val cleanWindowFunction = clean(windowFunction)
190 | 
191 |     val reducer       = new ScalaReduceFunction[T](cleanReducer)
192 |     val applyFunction = new ScalaWindowFunction[T, R, K, W](cleanWindowFunction)
193 | 
194 |     asScalaStream(javaStream.reduce(reducer, applyFunction, implicitly[TypeInformation[R]]))
195 |   }
196 | 
197 |   /** Applies the given reduce function to each window. The window reduced value is then passed as input of the window
198 |     * function. The output of the window function is interpreted as a regular non-windowed stream.
199 |     *
200 |     * @param preAggregator
201 |     *   The reduce function that is used for pre-aggregation
202 |     * @param function
203 |     *   The process window function.
204 |     * @return
205 |     *   The data stream that is the result of applying the window function to the window.
206 |     */
207 |   @PublicEvolving
208 |   def reduce[R: TypeInformation](
209 |       preAggregator: (T, T) => T,
210 |       function: ProcessWindowFunction[T, R, K, W]
211 |   ): DataStream[R] = {
212 | 
213 |     val cleanedPreAggregator  = clean(preAggregator)
214 |     val cleanedWindowFunction = clean(function)
215 | 
216 |     val reducer       = new ScalaReduceFunction[T](cleanedPreAggregator)
217 |     val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction)
218 | 
219 |     val resultType: TypeInformation[R] = implicitly[TypeInformation[R]]
220 |     asScalaStream(javaStream.reduce(reducer, applyFunction, resultType))
221 |   }
222 | 
223 |   /** Applies the given reduce function to each window. The window reduced value is then passed as input of the window
224 |     * function. The output of the window function is interpreted as a regular non-windowed stream.
225 |     *
226 |     * @param preAggregator
227 |     *   The reduce function that is used for pre-aggregation
228 |     * @param function
229 |     *   The process window function.
230 |     * @return
231 |     *   The data stream that is the result of applying the window function to the window.
232 |     */
233 |   @PublicEvolving
234 |   def reduce[R: TypeInformation](
235 |       preAggregator: ReduceFunction[T],
236 |       function: ProcessWindowFunction[T, R, K, W]
237 |   ): DataStream[R] = {
238 | 
239 |     val cleanedPreAggregator  = clean(preAggregator)
240 |     val cleanedWindowFunction = clean(function)
241 | 
242 |     val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction)
243 | 
244 |     val resultType: TypeInformation[R] = implicitly[TypeInformation[R]]
245 |     asScalaStream(javaStream.reduce(cleanedPreAggregator, applyFunction, resultType))
246 |   }
247 | 
248 |   // -------------------------- aggregate() ---------------------------------
249 | 
250 |   /** Applies the given aggregation function to each window and key. The aggregation function is called for each
251 |     * element, aggregating values incrementally and keeping the state to one accumulator per key and window.
252 |     *
253 |     * @param aggregateFunction
254 |     *   The aggregation function.
255 |     * @return
256 |     *   The data stream that is the result of applying the fold function to the window.
257 |     */
258 |   @PublicEvolving
259 |   def aggregate[ACC: TypeInformation, R: TypeInformation](
260 |       aggregateFunction: AggregateFunction[T, ACC, R]
261 |   ): DataStream[R] = {
262 | 
263 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
264 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
265 | 
266 |     asScalaStream(javaStream.aggregate(clean(aggregateFunction), accumulatorType, resultType))
267 |   }
268 | 
269 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
270 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
271 |     *
272 |     * Arriving data is pre-aggregated using the given aggregation function.
273 |     *
274 |     * @param preAggregator
275 |     *   The aggregation function that is used for pre-aggregation
276 |     * @param windowFunction
277 |     *   The window function.
278 |     * @return
279 |     *   The data stream that is the result of applying the window function to the window.
280 |     */
281 |   @PublicEvolving
282 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
283 |       preAggregator: AggregateFunction[T, ACC, V],
284 |       windowFunction: WindowFunction[V, R, K, W]
285 |   ): DataStream[R] = {
286 | 
287 |     val cleanedPreAggregator  = clean(preAggregator)
288 |     val cleanedWindowFunction = clean(windowFunction)
289 | 
290 |     val applyFunction = new ScalaWindowFunctionWrapper[V, R, K, W](cleanedWindowFunction)
291 | 
292 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
293 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
294 | 
295 |     asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType))
296 |   }
297 | 
298 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
299 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
300 |     *
301 |     * Arriving data is pre-aggregated using the given aggregation function.
302 |     *
303 |     * @param preAggregator
304 |     *   The aggregation function that is used for pre-aggregation
305 |     * @param windowFunction
306 |     *   The window function.
307 |     * @return
308 |     *   The data stream that is the result of applying the window function to the window.
309 |     */
310 |   @PublicEvolving
311 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
312 |       preAggregator: AggregateFunction[T, ACC, V],
313 |       windowFunction: (K, W, Iterable[V], Collector[R]) => Unit
314 |   ): DataStream[R] = {
315 | 
316 |     val cleanedPreAggregator  = clean(preAggregator)
317 |     val cleanedWindowFunction = clean(windowFunction)
318 | 
319 |     val applyFunction = new ScalaWindowFunction[V, R, K, W](cleanedWindowFunction)
320 | 
321 |     val accumulatorType: TypeInformation[ACC] = implicitly[TypeInformation[ACC]]
322 |     val resultType: TypeInformation[R]        = implicitly[TypeInformation[R]]
323 | 
324 |     asScalaStream(javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, resultType))
325 |   }
326 | 
327 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
328 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
329 |     *
330 |     * Arriving data is pre-aggregated using the given aggregation function.
331 |     *
332 |     * @param preAggregator
333 |     *   The aggregation function that is used for pre-aggregation
334 |     * @param windowFunction
335 |     *   The window function.
336 |     * @return
337 |     *   The data stream that is the result of applying the window function to the window.
338 |     */
339 |   @PublicEvolving
340 |   def aggregate[ACC: TypeInformation, V: TypeInformation, R: TypeInformation](
341 |       preAggregator: AggregateFunction[T, ACC, V],
342 |       windowFunction: ProcessWindowFunction[V, R, K, W]
343 |   ): DataStream[R] = {
344 | 
345 |     val cleanedPreAggregator  = clean(preAggregator)
346 |     val cleanedWindowFunction = clean(windowFunction)
347 | 
348 |     val applyFunction = new ScalaProcessWindowFunctionWrapper[V, R, K, W](cleanedWindowFunction)
349 | 
350 |     val accumulatorType: TypeInformation[ACC]     = implicitly[TypeInformation[ACC]]
351 |     val aggregationResultType: TypeInformation[V] = implicitly[TypeInformation[V]]
352 |     val resultType: TypeInformation[R]            = implicitly[TypeInformation[R]]
353 | 
354 |     asScalaStream(
355 |       javaStream.aggregate(cleanedPreAggregator, applyFunction, accumulatorType, aggregationResultType, resultType)
356 |     )
357 |   }
358 | 
359 |   // ---------------------------- apply() -------------------------------------
360 | 
361 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
362 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
363 |     *
364 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
365 |     * function provides no means of pre-aggregation.
366 |     *
367 |     * @param function
368 |     *   The window function.
369 |     * @return
370 |     *   The data stream that is the result of applying the window function to the window.
371 |     */
372 |   @PublicEvolving
373 |   def process[R: TypeInformation](function: ProcessWindowFunction[T, R, K, W]): DataStream[R] = {
374 | 
375 |     val cleanFunction = clean(function)
376 |     val applyFunction = new ScalaProcessWindowFunctionWrapper[T, R, K, W](cleanFunction)
377 |     asScalaStream(javaStream.process(applyFunction, implicitly[TypeInformation[R]]))
378 |   }
379 | 
380 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
381 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
382 |     *
383 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
384 |     * function provides no means of pre-aggregation.
385 |     *
386 |     * @param function
387 |     *   The window function.
388 |     * @return
389 |     *   The data stream that is the result of applying the window function to the window.
390 |     */
391 |   def apply[R: TypeInformation](function: WindowFunction[T, R, K, W]): DataStream[R] = {
392 | 
393 |     val cleanFunction = clean(function)
394 |     val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanFunction)
395 |     asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]]))
396 |   }
397 | 
398 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
399 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
400 |     *
401 |     * Note that this function requires that all data in the windows is buffered until the window is evaluated, as the
402 |     * function provides no means of pre-aggregation.
403 |     *
404 |     * @param function
405 |     *   The window function.
406 |     * @return
407 |     *   The data stream that is the result of applying the window function to the window.
408 |     */
409 |   def apply[R: TypeInformation](function: (K, W, Iterable[T], Collector[R]) => Unit): DataStream[R] = {
410 |     if (function == null) {
411 |       throw new NullPointerException("WindowApply function must not be null.")
412 |     }
413 | 
414 |     val cleanedFunction = clean(function)
415 |     val applyFunction   = new ScalaWindowFunction[T, R, K, W](cleanedFunction)
416 | 
417 |     asScalaStream(javaStream.apply(applyFunction, implicitly[TypeInformation[R]]))
418 |   }
419 | 
420 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
421 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
422 |     *
423 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
424 |     *
425 |     * @param preAggregator
426 |     *   The reduce function that is used for pre-aggregation
427 |     * @param function
428 |     *   The window function.
429 |     * @return
430 |     *   The data stream that is the result of applying the window function to the window.
431 |     * @deprecated
432 |     *   Use [[reduce(ReduceFunction, WindowFunction)]] instead.
433 |     */
434 |   @deprecated
435 |   def apply[R: TypeInformation](
436 |       preAggregator: ReduceFunction[T],
437 |       function: WindowFunction[T, R, K, W]
438 |   ): DataStream[R] = {
439 | 
440 |     val cleanedPreAggregator  = clean(preAggregator)
441 |     val cleanedWindowFunction = clean(function)
442 | 
443 |     val applyFunction = new ScalaWindowFunctionWrapper[T, R, K, W](cleanedWindowFunction)
444 | 
445 |     val resultType: TypeInformation[R] = implicitly[TypeInformation[R]]
446 |     asScalaStream(javaStream.apply(cleanedPreAggregator, applyFunction, resultType))
447 |   }
448 | 
449 |   /** Applies the given window function to each window. The window function is called for each evaluation of the window
450 |     * for each key individually. The output of the window function is interpreted as a regular non-windowed stream.
451 |     *
452 |     * Arriving data is pre-aggregated using the given pre-aggregation reducer.
453 |     *
454 |     * @param preAggregator
455 |     *   The reduce function that is used for pre-aggregation
456 |     * @param windowFunction
457 |     *   The window function.
458 |     * @return
459 |     *   The data stream that is the result of applying the window function to the window.
460 |     * @deprecated
461 |     *   Use [[reduce(ReduceFunction, WindowFunction)]] instead.
462 |     */
463 |   @deprecated
464 |   def apply[R: TypeInformation](
465 |       preAggregator: (T, T) => T,
466 |       windowFunction: (K, W, Iterable[T], Collector[R]) => Unit
467 |   ): DataStream[R] = {
468 | 
469 |     if (preAggregator == null) {
470 |       throw new NullPointerException("Reduce function must not be null.")
471 |     }
472 |     if (windowFunction == null) {
473 |       throw new NullPointerException("WindowApply function must not be null.")
474 |     }
475 | 
476 |     val cleanReducer        = clean(preAggregator)
477 |     val cleanWindowFunction = clean(windowFunction)
478 | 
479 |     val reducer       = new ScalaReduceFunction[T](cleanReducer)
480 |     val applyFunction = new ScalaWindowFunction[T, R, K, W](cleanWindowFunction)
481 | 
482 |     asScalaStream(javaStream.apply(reducer, applyFunction, implicitly[TypeInformation[R]]))
483 |   }
484 | 
485 |   // ------------------------------------------------------------------------
486 |   //  Aggregations on the keyed windows
487 |   // ------------------------------------------------------------------------
488 | 
489 |   /** Applies an aggregation that that gives the maximum of the elements in the window at the given position.
490 |     */
491 |   def max(position: Int): DataStream[T] = aggregate(AggregationType.MAX, position)
492 | 
493 |   /** Applies an aggregation that that gives the maximum of the elements in the window at the given field.
494 |     */
495 |   def max(field: String): DataStream[T] = aggregate(AggregationType.MAX, field)
496 | 
497 |   /** Applies an aggregation that that gives the minimum of the elements in the window at the given position.
498 |     */
499 |   def min(position: Int): DataStream[T] = aggregate(AggregationType.MIN, position)
500 | 
501 |   /** Applies an aggregation that that gives the minimum of the elements in the window at the given field.
502 |     */
503 |   def min(field: String): DataStream[T] = aggregate(AggregationType.MIN, field)
504 | 
505 |   /** Applies an aggregation that sums the elements in the window at the given position.
506 |     */
507 |   def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position)
508 | 
509 |   /** Applies an aggregation that sums the elements in the window at the given field.
510 |     */
511 |   def sum(field: String): DataStream[T] = aggregate(AggregationType.SUM, field)
512 | 
513 |   /** Applies an aggregation that that gives the maximum element of the window by the given position. When equality,
514 |     * returns the first.
515 |     */
516 |   def maxBy(position: Int): DataStream[T] = aggregate(AggregationType.MAXBY, position)
517 | 
518 |   /** Applies an aggregation that that gives the maximum element of the window by the given field. When equality,
519 |     * returns the first.
520 |     */
521 |   def maxBy(field: String): DataStream[T] = aggregate(AggregationType.MAXBY, field)
522 | 
523 |   /** Applies an aggregation that that gives the minimum element of the window by the given position. When equality,
524 |     * returns the first.
525 |     */
526 |   def minBy(position: Int): DataStream[T] = aggregate(AggregationType.MINBY, position)
527 | 
528 |   /** Applies an aggregation that that gives the minimum element of the window by the given field. When equality,
529 |     * returns the first.
530 |     */
531 |   def minBy(field: String): DataStream[T] = aggregate(AggregationType.MINBY, field)
532 | 
533 |   private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = {
534 |     val position = fieldNames2Indices(getInputType(), Array(field))(0)
535 |     aggregate(aggregationType, position)
536 |   }
537 | 
538 |   def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = {
539 | 
540 |     val jStream = javaStream.asInstanceOf[JavaWStream[Product, K, W]]
541 | 
542 |     val reducer = aggregationType match {
543 |       case AggregationType.SUM =>
544 |         new SumAggregator(position, jStream.getInputType, jStream.getExecutionEnvironment.getConfig)
545 | 
546 |       case _ =>
547 |         new ComparableAggregator(
548 |           position,
549 |           jStream.getInputType,
550 |           aggregationType,
551 |           true,
552 |           jStream.getExecutionEnvironment.getConfig
553 |         )
554 |     }
555 | 
556 |     new DataStream[Product](jStream.reduce(reducer)).asInstanceOf[DataStream[T]]
557 |   }
558 | 
559 |   // ------------------------------------------------------------------------
560 |   //  Utilities
561 |   // ------------------------------------------------------------------------
562 | 
563 |   /** Returns a "closure-cleaned" version of the given function. Cleans only if closure cleaning is not disabled in the
564 |     * [[org.apache.flink.api.common.ExecutionConfig]].
565 |     */
566 |   private[flink] def clean[F <: AnyRef](f: F): F = {
567 |     new StreamExecutionEnvironment(javaStream.getExecutionEnvironment).scalaClean(f)
568 |   }
569 | 
570 |   /** Gets the output type.
571 |     */
572 |   private def getInputType(): TypeInformation[T] = javaStream.getInputType
573 | }
574 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/async/AsyncFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.async
20 | 
21 | import org.apache.flink.annotation.PublicEvolving
22 | import org.apache.flink.api.common.functions.Function
23 | 
24 | import java.util.concurrent.TimeoutException
25 | 
26 | /** A function to trigger async I/O operations.
27 |   *
28 |   * For each asyncInvoke an async io operation can be triggered, and once it has been done, the result can be collected
29 |   * by calling ResultFuture.complete. For each async operation, its context is stored in the operator immediately after
30 |   * invoking asyncInvoke, avoiding blocking for each stream input as long as the internal buffer is not full.
31 |   *
32 |   * [[ResultFuture]] can be passed into callbacks or futures to collect the result data. An error can also be propagate
33 |   * to the async IO operator by [[ResultFuture.completeExceptionally(Throwable)]].
34 |   *
35 |   * @tparam IN
36 |   *   The type of the input element
37 |   * @tparam OUT
38 |   *   The type of the output elements
39 |   */
40 | @PublicEvolving
41 | trait AsyncFunction[IN, OUT] extends Function {
42 | 
43 |   /** Trigger the async operation for each stream input
44 |     *
45 |     * @param input
46 |     *   element coming from an upstream task
47 |     * @param resultFuture
48 |     *   to be completed with the result data
49 |     */
50 |   def asyncInvoke(input: IN, resultFuture: ResultFuture[OUT]): Unit
51 | 
52 |   /** [[AsyncFunction.asyncInvoke]] timeout occurred. By default, the result future is exceptionally completed with a
53 |     * timeout exception.
54 |     *
55 |     * @param input
56 |     *   element coming from an upstream task
57 |     * @param resultFuture
58 |     *   to be completed with the result data
59 |     */
60 |   def timeout(input: IN, resultFuture: ResultFuture[OUT]): Unit = {
61 |     resultFuture.completeExceptionally(new TimeoutException("Async function call has timed out."))
62 |   }
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/async/JavaResultFutureWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.async
20 | 
21 | import org.apache.flink.annotation.Internal
22 | import org.apache.flink.streaming.api.functions.async
23 | 
24 | import scala.jdk.CollectionConverters._
25 | 
26 | /** Internal wrapper class to map a Flink's Java API [[org.apache.flink.streaming.api.functions.async.ResultFuture]] to
27 |   * a Scala [[io.findify.flink.api.async.ResultFuture]].
28 |   *
29 |   * @param javaResultFuture
30 |   *   to forward the calls to
31 |   * @tparam OUT
32 |   *   type of the output elements
33 |   */
34 | @Internal
35 | class JavaResultFutureWrapper[OUT](val javaResultFuture: async.ResultFuture[OUT]) extends ResultFuture[OUT] {
36 |   override def complete(result: Iterable[OUT]): Unit = {
37 |     javaResultFuture.complete(result.asJavaCollection)
38 |   }
39 | 
40 |   override def completeExceptionally(throwable: Throwable): Unit = {
41 |     javaResultFuture.completeExceptionally(throwable)
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/async/ResultFuture.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.async
20 | 
21 | import org.apache.flink.annotation.PublicEvolving
22 | 
23 | /** The result future collects data/errors from the user code while processing asynchronous I/O operations.
24 |   *
25 |   * @tparam OUT
26 |   *   type of the output element
27 |   */
28 | @PublicEvolving
29 | trait ResultFuture[OUT] {
30 | 
31 |   /** Complete the ResultFuture with a set of result elements.
32 |     *
33 |     * Note that it should be called for exactly one time in the user code. Calling this function for multiple times will
34 |     * cause data lose.
35 |     *
36 |     * Put all results in a [[Iterable]] and then issue ResultFuture.complete(Iterable).
37 |     *
38 |     * @param result
39 |     *   to complete the async collector with
40 |     */
41 |   def complete(result: Iterable[OUT]): Unit
42 | 
43 |   /** Complete this ResultFuture with an error.
44 |     *
45 |     * @param throwable
46 |     *   to complete the async collector with
47 |     */
48 |   def completeExceptionally(throwable: Throwable): Unit
49 | }
50 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/async/RichAsyncFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.async
20 | 
21 | import org.apache.flink.api.common.functions.AbstractRichFunction
22 | 
23 | /** Rich variant of [[AsyncFunction]]. As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to
24 |   * the [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and teardown methods.
25 |   *
26 |   * State related apis in [[org.apache.flink.api.common.functions.RuntimeContext]] are not supported yet because the key
27 |   * may get changed while accessing states in the working thread.
28 |   *
29 |   * [[org.apache.flink.api.common.functions.IterationRuntimeContext#getIterationAggregator(String)]] is not supported
30 |   * since the aggregator may be modified by multiple threads.
31 |   *
32 |   * @tparam IN
33 |   *   The type of the input value.
34 |   * @tparam OUT
35 |   *   The type of the output value.
36 |   */
37 | abstract class RichAsyncFunction[IN, OUT] extends AbstractRichFunction with AsyncFunction[IN, OUT] {}
38 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/async/ScalaRichAsyncFunctionWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.async
20 | 
21 | import org.apache.flink.api.common.functions.RuntimeContext
22 | import org.apache.flink.configuration.Configuration
23 | import org.apache.flink.streaming.api.functions.async.{
24 |   ResultFuture => JResultFuture,
25 |   RichAsyncFunction => JRichAsyncFunction
26 | }
27 | 
28 | /** A wrapper function that exposes a Scala RichAsyncFunction as a Java Rich Async Function.
29 |   *
30 |   * The Scala and Java RichAsyncFunctions differ in their type of "ResultFuture"
31 |   *   - Scala RichAsyncFunction: [[io.findify.flink.api.async.ResultFuture]]
32 |   *   - Java RichAsyncFunction: [[org.apache.flink.streaming.api.functions.async.ResultFuture]]
33 |   */
34 | final class ScalaRichAsyncFunctionWrapper[IN, OUT](func: RichAsyncFunction[IN, OUT])
35 |     extends JRichAsyncFunction[IN, OUT] {
36 | 
37 |   override def asyncInvoke(input: IN, resultFuture: JResultFuture[OUT]): Unit = {
38 |     func.asyncInvoke(input, new JavaResultFutureWrapper[OUT](resultFuture))
39 |   }
40 | 
41 |   override def timeout(input: IN, resultFuture: JResultFuture[OUT]): Unit = {
42 |     func.timeout(input, new JavaResultFutureWrapper[OUT](resultFuture))
43 |   }
44 | 
45 |   override def open(parameters: Configuration): Unit = {
46 |     func.open(parameters)
47 |   }
48 | 
49 |   override def close(): Unit = {
50 |     func.close()
51 |   }
52 | 
53 |   override def setRuntimeContext(runtimeContext: RuntimeContext): Unit = {
54 |     super.setRuntimeContext(runtimeContext)
55 |     func.setRuntimeContext(super.getRuntimeContext)
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnConnectedStream.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import org.apache.flink.api.common.typeinfo.TypeInformation
22 | import io.findify.flink.api.{ConnectedStreams, DataStream}
23 | 
24 | /** Wraps a connected data stream, allowing to use anonymous partial functions to perform extraction of items in a
25 |   * tuple, case class instance or collection
26 |   *
27 |   * @param stream
28 |   *   The wrapped data stream
29 |   * @tparam IN1
30 |   *   The type of the data stream items coming from the first connection
31 |   * @tparam IN2
32 |   *   The type of the data stream items coming from the second connection
33 |   */
34 | class OnConnectedStream[IN1, IN2](stream: ConnectedStreams[IN1, IN2]) {
35 | 
36 |   /** Applies a CoMap transformation on the connected streams.
37 |     *
38 |     * The transformation consists of two separate functions, where the first one is called for each element of the first
39 |     * connected stream, and the second one is called for each element of the second connected stream.
40 |     *
41 |     * @param map1
42 |     *   Function called per element of the first input.
43 |     * @param map2
44 |     *   Function called per element of the second input.
45 |     * @return
46 |     *   The resulting data stream.
47 |     */
48 |   @PublicEvolving
49 |   def mapWith[R: TypeInformation](map1: IN1 => R, map2: IN2 => R): DataStream[R] =
50 |     stream.map(map1, map2)
51 | 
52 |   /** Applies a CoFlatMap transformation on the connected streams.
53 |     *
54 |     * The transformation consists of two separate functions, where the first one is called for each element of the first
55 |     * connected stream, and the second one is called for each element of the second connected stream.
56 |     *
57 |     * @param flatMap1
58 |     *   Function called per element of the first input.
59 |     * @param flatMap2
60 |     *   Function called per element of the second input.
61 |     * @return
62 |     *   The resulting data stream.
63 |     */
64 |   @PublicEvolving
65 |   def flatMapWith[R: TypeInformation](
66 |       flatMap1: IN1 => TraversableOnce[R],
67 |       flatMap2: IN2 => TraversableOnce[R]
68 |   ): DataStream[R] =
69 |     stream.flatMap(flatMap1, flatMap2)
70 | 
71 |   /** Keys the two connected streams together. After this operation, all elements with the same key from both streams
72 |     * will be sent to the same parallel instance of the transformation functions.
73 |     *
74 |     * @param key1
75 |     *   The first stream's key function
76 |     * @param key2
77 |     *   The second stream's key function
78 |     * @return
79 |     *   The key-grouped connected streams
80 |     */
81 |   @PublicEvolving
82 |   def keyingBy[KEY: TypeInformation](key1: IN1 => KEY, key2: IN2 => KEY): ConnectedStreams[IN1, IN2] =
83 |     stream.keyBy(key1, key2)
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnDataStream.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import org.apache.flink.api.common.typeinfo.TypeInformation
22 | import io.findify.flink.api.{DataStream, KeyedStream}
23 | 
24 | /** Wraps a data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple, case
25 |   * class instance or collection
26 |   *
27 |   * @param stream
28 |   *   The wrapped data stream
29 |   * @tparam T
30 |   *   The type of the data stream items
31 |   */
32 | class OnDataStream[T](stream: DataStream[T]) {
33 | 
34 |   /** Applies a function `fun` to each item of the stream
35 |     *
36 |     * @param fun
37 |     *   The function to be applied to each item
38 |     * @tparam R
39 |     *   The type of the items in the returned stream
40 |     * @return
41 |     *   A dataset of R
42 |     */
43 |   @PublicEvolving
44 |   def mapWith[R: TypeInformation](fun: T => R): DataStream[R] =
45 |     stream.map(fun)
46 | 
47 |   /** Applies a function `fun` to each item of the stream, producing a collection of items that will be flattened in the
48 |     * resulting stream
49 |     *
50 |     * @param fun
51 |     *   The function to be applied to each item
52 |     * @tparam R
53 |     *   The type of the items in the returned stream
54 |     * @return
55 |     *   A dataset of R
56 |     */
57 |   @PublicEvolving
58 |   def flatMapWith[R: TypeInformation](fun: T => TraversableOnce[R]): DataStream[R] =
59 |     stream.flatMap(fun)
60 | 
61 |   /** Applies a predicate `fun` to each item of the stream, keeping only those for which the predicate holds
62 |     *
63 |     * @param fun
64 |     *   The predicate to be tested on each item
65 |     * @return
66 |     *   A dataset of R
67 |     */
68 |   @PublicEvolving
69 |   def filterWith(fun: T => Boolean): DataStream[T] =
70 |     stream.filter(fun)
71 | 
72 |   /** Keys the items according to a keying function `fun`
73 |     *
74 |     * @param fun
75 |     *   The keying function
76 |     * @tparam K
77 |     *   The type of the key, for which type information must be known
78 |     * @return
79 |     *   A stream of Ts keyed by Ks
80 |     */
81 |   @PublicEvolving
82 |   def keyingBy[K: TypeInformation](fun: T => K): KeyedStream[T, K] =
83 |     stream.keyBy(fun)
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnJoinedStream.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import org.apache.flink.api.common.typeinfo.TypeInformation
22 | import io.findify.flink.api.{DataStream, JoinedStreams}
23 | import org.apache.flink.streaming.api.windowing.windows.Window
24 | 
25 | /** Wraps a joined data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple,
26 |   * case class instance or collection
27 |   *
28 |   * @param stream
29 |   *   The wrapped data stream
30 |   * @tparam L
31 |   *   The type of the data stream items from the left side of the join
32 |   * @tparam R
33 |   *   The type of the data stream items from the right input of the join
34 |   * @tparam K
35 |   *   The type of key
36 |   * @tparam W
37 |   *   The type of the window
38 |   */
39 | class OnJoinedStream[L, R, K, W <: Window](stream: JoinedStreams[L, R]#Where[K]#EqualTo#WithWindow[W]) {
40 | 
41 |   /** Completes the join operation with the user function that is executed for windowed groups.
42 |     *
43 |     * @param fun
44 |     *   The function that defines the projection of the join
45 |     * @tparam O
46 |     *   The return type of the projection, for which type information must be known
47 |     * @return
48 |     *   A fully joined data set of Os
49 |     */
50 |   @PublicEvolving
51 |   def projecting[O: TypeInformation](fun: (L, R) => O): DataStream[O] =
52 |     stream.apply(fun)
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnKeyedStream.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import org.apache.flink.api.common.typeinfo.TypeInformation
22 | import io.findify.flink.api.{DataStream, KeyedStream}
23 | 
24 | /** Wraps a keyed data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple,
25 |   * case class instance or collection
26 |   *
27 |   * @param stream
28 |   *   The wrapped data stream
29 |   * @tparam T
30 |   *   The type of the data stream items
31 |   * @tparam K
32 |   *   The type of key
33 |   */
34 | class OnKeyedStream[T, K](stream: KeyedStream[T, K]) {
35 | 
36 |   /** Applies a reducer `fun` to the stream
37 |     *
38 |     * @param fun
39 |     *   The reducing function to be applied on the keyed stream
40 |     * @return
41 |     *   A data set of Ts
42 |     */
43 |   @PublicEvolving
44 |   def reduceWith(fun: (T, T) => T): DataStream[T] =
45 |     stream.reduce(fun)
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/impl/acceptPartialFunctions/OnWindowedStream.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.extensions.impl.acceptPartialFunctions
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import org.apache.flink.api.common.typeinfo.TypeInformation
22 | import io.findify.flink.api.{DataStream, WindowedStream}
23 | import org.apache.flink.streaming.api.windowing.windows.Window
24 | import org.apache.flink.util.Collector
25 | 
26 | /** Wraps a joined data stream, allowing to use anonymous partial functions to perform extraction of items in a tuple,
27 |   * case class instance or collection
28 |   *
29 |   * @param stream
30 |   *   The wrapped data stream
31 |   * @tparam T
32 |   *   The type of the data stream items from the right input of the join
33 |   * @tparam K
34 |   *   The type of key
35 |   * @tparam W
36 |   *   The type of the window
37 |   */
38 | class OnWindowedStream[T, K, W <: Window](stream: WindowedStream[T, K, W]) {
39 | 
40 |   /** Applies a reduce function to the window. The window function is called for each evaluation of the window for each
41 |     * key individually. The output of the reduce function is interpreted as a regular non-windowed stream.
42 |     *
43 |     * This window will try and pre-aggregate data as much as the window policies permit. For example,tumbling time
44 |     * windows can perfectly pre-aggregate the data, meaning that only one element per key is stored. Sliding time
45 |     * windows will pre-aggregate on the granularity of the slide interval, so a few elements are stored per key (one per
46 |     * slide interval). Custom windows may not be able to pre-aggregate, or may need to store extra values in an
47 |     * aggregation tree.
48 |     *
49 |     * @param function
50 |     *   The reduce function.
51 |     * @return
52 |     *   The data stream that is the result of applying the reduce function to the window.
53 |     */
54 |   @PublicEvolving
55 |   def reduceWith(function: (T, T) => T): DataStream[T] =
56 |     stream.reduce(function)
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/extensions/package.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api
19 | 
20 | import org.apache.flink.annotation.PublicEvolving
21 | import io.findify.flink.api.extensions.impl.acceptPartialFunctions._
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | 
24 | /** acceptPartialFunctions extends the original DataStream with methods with unique names that delegate to core
25 |   * higher-order functions (e.g. `map`) so that we can work around the fact that overloaded methods taking functions as
26 |   * parameters can't accept partial functions as well. This enables the possibility to directly apply pattern matching
27 |   * to decompose inputs such as tuples, case classes and collections.
28 |   *
29 |   * The following is a small example that showcases how this extensions would work on a Flink data stream:
30 |   *
31 |   * {{{
32 |   *   object Main {
33 |   *     import org.apache.flink.streaming.api.scala.extensions._
34 |   *     case class Point(x: Double, y: Double)
35 |   *     def main(args: Array[String]): Unit = {
36 |   *       val env = StreamExecutionEnvironment.getExecutionEnvironment
37 |   *       val ds = env.fromElements(Point(1, 2), Point(3, 4), Point(5, 6))
38 |   *       ds.filterWith {
39 |   *         case Point(x, _) => x > 1
40 |   *       }.reduceWith {
41 |   *         case (Point(x1, y1), (Point(x2, y2))) => Point(x1 + y1, x2 + y2)
42 |   *       }.mapWith {
43 |   *         case Point(x, y) => (x, y)
44 |   *       }.flatMapWith {
45 |   *         case (x, y) => Seq('x' -> x, 'y' -> y)
46 |   *       }.keyingBy {
47 |   *         case (id, value) => id
48 |   *       }
49 |   *     }
50 |   *   }
51 |   * }}}
52 |   *
53 |   * The extension consists of several implicit conversions over all the data stream representations that could gain from
54 |   * this feature. To use this set of extensions methods the user has to explicitly opt-in by importing
55 |   * `org.apache.flink.streaming.api.scala.extensions.acceptPartialFunctions`.
56 |   *
57 |   * For more information and usage examples please consult the Apache Flink official documentation.
58 |   */
59 | package object extensions {
60 | 
61 |   @PublicEvolving
62 |   implicit def acceptPartialFunctions[T](ds: DataStream[T]): OnDataStream[T] =
63 |     new OnDataStream[T](ds)
64 | 
65 |   @PublicEvolving
66 |   implicit def acceptPartialFunctions[T, K](ds: KeyedStream[T, K]): OnKeyedStream[T, K] =
67 |     new OnKeyedStream[T, K](ds)
68 | 
69 |   @PublicEvolving
70 |   implicit def acceptPartialFunctions[L, R, K, W <: Window](
71 |       ds: JoinedStreams[L, R]#Where[K]#EqualTo#WithWindow[W]
72 |   ): OnJoinedStream[L, R, K, W] =
73 |     new OnJoinedStream[L, R, K, W](ds)
74 | 
75 |   @PublicEvolving
76 |   implicit def acceptPartialFunctions[IN1, IN2](ds: ConnectedStreams[IN1, IN2]): OnConnectedStream[IN1, IN2] =
77 |     new OnConnectedStream[IN1, IN2](ds)
78 | 
79 |   @PublicEvolving
80 |   implicit def acceptPartialFunctions[T, K, W <: Window](ds: WindowedStream[T, K, W]): OnWindowedStream[T, K, W] =
81 |     new OnWindowedStream[T, K, W](ds)
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/AllWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.function
19 | 
20 | import org.apache.flink.annotation.Public
21 | import org.apache.flink.api.common.functions.Function
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | import org.apache.flink.util.Collector
24 | 
25 | import java.io.Serializable
26 | 
27 | /** Base interface for functions that are evaluated over non-grouped windows,
28 |   * i.e., windows over all stream partitions.
29 |   *
30 |   * @tparam IN
31 |   *   The type of the input value.
32 |   * @tparam OUT
33 |   *   The type of the output value.
34 |   */
35 | @Public
36 | trait AllWindowFunction[IN, OUT, W <: Window] extends Function with Serializable {
37 | 
38 |   /** Evaluates the window and outputs none or several elements.
39 |     *
40 |     * @param window
41 |     *   The window that is being evaluated.
42 |     * @param input
43 |     *   The elements in the window being evaluated.
44 |     * @param out
45 |     *   A collector for emitting elements.
46 |     * @throws Exception
47 |     *   The function may throw exceptions to fail the program and trigger recovery.
48 |     */
49 |   def apply(window: W, input: Iterable[IN], out: Collector[OUT]): Unit
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/ProcessAllWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function
20 | 
21 | import io.findify.flink.api.OutputTag
22 | import org.apache.flink.annotation.PublicEvolving
23 | import org.apache.flink.api.common.functions.AbstractRichFunction
24 | import org.apache.flink.api.common.state.KeyedStateStore
25 | import org.apache.flink.streaming.api.windowing.windows.Window
26 | import org.apache.flink.util.Collector
27 | 
28 | /** Base abstract class for functions that are evaluated over keyed (grouped) windows using a context for retrieving
29 |   * extra information.
30 |   *
31 |   * @tparam IN
32 |   *   The type of the input value.
33 |   * @tparam OUT
34 |   *   The type of the output value.
35 |   * @tparam W
36 |   *   The type of the window.
37 |   */
38 | @PublicEvolving
39 | abstract class ProcessAllWindowFunction[IN, OUT, W <: Window] extends AbstractRichFunction {
40 | 
41 |   /** Evaluates the window and outputs none or several elements.
42 |     *
43 |     * @param context
44 |     *   The context in which the window is being evaluated.
45 |     * @param elements
46 |     *   The elements in the window being evaluated.
47 |     * @param out
48 |     *   A collector for emitting elements.
49 |     * @throws Exception
50 |     *   The function may throw exceptions to fail the program and trigger recovery.
51 |     */
52 |   @throws[Exception]
53 |   def process(context: Context, elements: Iterable[IN], out: Collector[OUT]): Unit
54 | 
55 |   /** Deletes any state in the [[Context]] when the Window expires (the watermark passes its `maxTimestamp` +
56 |     * `allowedLateness`).
57 |     *
58 |     * @param context
59 |     *   The context to which the window is being evaluated
60 |     * @throws Exception
61 |     *   The function may throw exceptions to fail the program and trigger recovery.
62 |     */
63 |   @throws[Exception]
64 |   def clear(context: Context): Unit = {}
65 | 
66 |   /** The context holding window metadata
67 |     */
68 |   abstract class Context {
69 | 
70 |     /** @return
71 |       *   The window that is being evaluated.
72 |       */
73 |     def window: W
74 | 
75 |     /** State accessor for per-key and per-window state.
76 |       */
77 |     def windowState: KeyedStateStore
78 | 
79 |     /** State accessor for per-key global state.
80 |       */
81 |     def globalState: KeyedStateStore
82 | 
83 |     /** Emits a record to the side output identified by the [[OutputTag]].
84 |       */
85 |     def output[X](outputTag: OutputTag[X], value: X): Unit
86 |   }
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/ProcessWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function
20 | 
21 | import io.findify.flink.api.OutputTag
22 | import org.apache.flink.annotation.PublicEvolving
23 | import org.apache.flink.api.common.functions.AbstractRichFunction
24 | import org.apache.flink.api.common.state.KeyedStateStore
25 | import org.apache.flink.streaming.api.windowing.windows.Window
26 | import org.apache.flink.util.Collector
27 | 
28 | /** Base abstract class for functions that are evaluated over keyed (grouped) windows using a context for retrieving
29 |   * extra information.
30 |   *
31 |   * @tparam IN
32 |   *   The type of the input value.
33 |   * @tparam OUT
34 |   *   The type of the output value.
35 |   * @tparam KEY
36 |   *   The type of the key.
37 |   * @tparam W
38 |   *   The type of the window.
39 |   */
40 | @PublicEvolving
41 | abstract class ProcessWindowFunction[IN, OUT, KEY, W <: Window] extends AbstractRichFunction {
42 | 
43 |   /** Evaluates the window and outputs none or several elements.
44 |     *
45 |     * @param key
46 |     *   The key for which this window is evaluated.
47 |     * @param context
48 |     *   The context in which the window is being evaluated.
49 |     * @param elements
50 |     *   The elements in the window being evaluated.
51 |     * @param out
52 |     *   A collector for emitting elements.
53 |     * @throws Exception
54 |     *   The function may throw exceptions to fail the program and trigger recovery.
55 |     */
56 |   @throws[Exception]
57 |   def process(key: KEY, context: Context, elements: Iterable[IN], out: Collector[OUT]): Unit
58 | 
59 |   /** Deletes any state in the [[Context]] when the Window expires (the watermark passes its `maxTimestamp` +
60 |     * `allowedLateness`).
61 |     *
62 |     * @param context
63 |     *   The context to which the window is being evaluated
64 |     * @throws Exception
65 |     *   The function may throw exceptions to fail the program and trigger recovery.
66 |     */
67 |   @throws[Exception]
68 |   def clear(context: Context): Unit = {}
69 | 
70 |   /** The context holding window metadata
71 |     */
72 |   abstract class Context {
73 | 
74 |     /** Returns the window that is being evaluated.
75 |       */
76 |     def window: W
77 | 
78 |     /** Returns the current processing time.
79 |       */
80 |     def currentProcessingTime: Long
81 | 
82 |     /** Returns the current event-time watermark.
83 |       */
84 |     def currentWatermark: Long
85 | 
86 |     /** State accessor for per-key and per-window state.
87 |       */
88 |     def windowState: KeyedStateStore
89 | 
90 |     /** State accessor for per-key global state.
91 |       */
92 |     def globalState: KeyedStateStore
93 | 
94 |     /** Emits a record to the side output identified by the [[OutputTag]].
95 |       */
96 |     def output[X](outputTag: OutputTag[X], value: X): Unit
97 |   }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/RichAllWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function
20 | 
21 | import org.apache.flink.api.common.functions.AbstractRichFunction
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | 
24 | /** Rich variant of the [[io.findify.flink.api.function.AllWindowFunction]].
25 |   *
26 |   * As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to the
27 |   * [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and tear-down methods.
28 |   *
29 |   * @tparam IN
30 |   *   The type of the input value.
31 |   * @tparam OUT
32 |   *   The type of the output value.
33 |   * @tparam W
34 |   *   The type of Window that this window function can be applied on.
35 |   */
36 | abstract class RichAllWindowFunction[IN, OUT, W <: Window]
37 |     extends AbstractRichFunction
38 |     with AllWindowFunction[IN, OUT, W] {}
39 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/RichWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function
20 | 
21 | import org.apache.flink.api.common.functions.AbstractRichFunction
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | 
24 | /** Rich variant of the [[io.findify.flink.api.function.WindowFunction]].
25 |   *
26 |   * As a [[org.apache.flink.api.common.functions.RichFunction]], it gives access to the
27 |   * [[org.apache.flink.api.common.functions.RuntimeContext]] and provides setup and tear-down methods.
28 |   *
29 |   * @tparam IN
30 |   *   The type of the input value.
31 |   * @tparam OUT
32 |   *   The type of the output value.
33 |   * @tparam KEY
34 |   *   The type of the key.
35 |   * @tparam W
36 |   *   The type of Window that this window function can be applied on.
37 |   */
38 | abstract class RichWindowFunction[IN, OUT, KEY, W <: Window]
39 |     extends AbstractRichFunction
40 |     with WindowFunction[IN, OUT, KEY, W] {}
41 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/StatefulFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function
20 | 
21 | import org.apache.flink.annotation.Public
22 | import org.apache.flink.api.common.functions.RichFunction
23 | import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
24 | import org.apache.flink.api.common.typeutils.TypeSerializer
25 | import org.apache.flink.configuration.Configuration
26 | 
27 | /** Trait implementing the functionality necessary to apply stateful functions in RichFunctions without exposing the
28 |   * OperatorStates to the user. The user should call the applyWithState method in his own RichFunction implementation.
29 |   */
30 | @Public
31 | trait StatefulFunction[I, O, S] extends RichFunction {
32 | 
33 |   protected val stateSerializer: TypeSerializer[S]
34 | 
35 |   private[this] var state: ValueState[S] = _
36 | 
37 |   def applyWithState(in: I, fun: (I, Option[S]) => (O, Option[S])): O = {
38 |     val (o, s: Option[S]) = fun(in, Option(state.value()))
39 |     s match {
40 |       case Some(v) => state.update(v)
41 |       case None    => state.update(null.asInstanceOf[S])
42 |     }
43 |     o
44 |   }
45 | 
46 |   override def open(c: Configuration) = {
47 |     val info = new ValueStateDescriptor[S]("state", stateSerializer)
48 |     state = getRuntimeContext().getState(info)
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/WindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package io.findify.flink.api.function
19 | 
20 | import org.apache.flink.annotation.Public
21 | import org.apache.flink.api.common.functions.Function
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | import org.apache.flink.util.Collector
24 | 
25 | import java.io.Serializable
26 | 
27 | /** Base interface for functions that are evaluated over keyed (grouped) windows.
28 |   *
29 |   * @tparam IN
30 |   *   The type of the input value.
31 |   * @tparam OUT
32 |   *   The type of the output value.
33 |   * @tparam KEY
34 |   *   The type of the key.
35 |   */
36 | @Public
37 | trait WindowFunction[IN, OUT, KEY, W <: Window] extends Function with Serializable {
38 | 
39 |   /** Evaluates the window and outputs none or several elements.
40 |     *
41 |     * @param key
42 |     *   The key for which this window is evaluated.
43 |     * @param window
44 |     *   The window that is being evaluated.
45 |     * @param input
46 |     *   The elements in the window being evaluated.
47 |     * @param out
48 |     *   A collector for emitting elements.
49 |     * @throws Exception
50 |     *   The function may throw exceptions to fail the program and trigger recovery.
51 |     */
52 |   def apply(key: KEY, window: W, input: Iterable[IN], out: Collector[OUT]): Unit
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaAllWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function.util
20 | 
21 | import org.apache.flink.streaming.api.functions.windowing.{AllWindowFunction => JAllWindowFunction}
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | import org.apache.flink.util.Collector
24 | 
25 | import scala.collection.JavaConverters._
26 | 
27 | /** A wrapper function that exposes a Scala Function3 as a Java AllWindowFunction.
28 |   */
29 | final class ScalaAllWindowFunction[IN, OUT, W <: Window](
30 |     private[this] val function: (W, Iterable[IN], Collector[OUT]) => Unit
31 | ) extends JAllWindowFunction[IN, OUT, W] {
32 | 
33 |   @throws(classOf[Exception])
34 |   override def apply(window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = {
35 |     function.apply(window, input.asScala, out)
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaAllWindowFunctionWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function.util
20 | 
21 | import org.apache.flink.api.common.functions.{IterationRuntimeContext, RichFunction, RuntimeContext}
22 | import org.apache.flink.api.java.operators.translation.WrappingFunction
23 | import org.apache.flink.streaming.api.functions.windowing.{AllWindowFunction => JAllWindowFunction}
24 | import io.findify.flink.api.function.AllWindowFunction
25 | import org.apache.flink.streaming.api.windowing.windows.Window
26 | import org.apache.flink.util.Collector
27 | 
28 | import scala.jdk.CollectionConverters._
29 | 
30 | /** A wrapper function that exposes a Scala WindowFunction as a JavaWindow function.
31 |   *
32 |   * The Scala and Java Window functions differ in their type of "Iterable":
33 |   *   - Scala WindowFunction: scala.Iterable
34 |   *   - Java WindowFunction: java.lang.Iterable
35 |   */
36 | final class ScalaAllWindowFunctionWrapper[IN, OUT, W <: Window](func: AllWindowFunction[IN, OUT, W])
37 |     extends WrappingFunction[AllWindowFunction[IN, OUT, W]](func)
38 |     with JAllWindowFunction[IN, OUT, W]
39 |     with RichFunction {
40 | 
41 |   @throws(classOf[Exception])
42 |   override def apply(window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = {
43 |     wrappedFunction.apply(window, input.asScala, out)
44 |   }
45 | 
46 |   override def getRuntimeContext: RuntimeContext = {
47 |     throw new RuntimeException("This should never be called")
48 |   }
49 | 
50 |   override def getIterationRuntimeContext: IterationRuntimeContext = {
51 |     throw new RuntimeException("This should never be called")
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaProcessWindowFunctionWrapper.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  *     http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | 
 19 | package io.findify.flink.api.function.util
 20 | 
 21 | import io.findify.flink.api.OutputTag
 22 | import io.findify.flink.api.function.{ProcessAllWindowFunction, ProcessWindowFunction}
 23 | import org.apache.flink.api.common.functions.RuntimeContext
 24 | import org.apache.flink.configuration.Configuration
 25 | import org.apache.flink.streaming.api.functions.windowing.{
 26 |   ProcessAllWindowFunction => JProcessAllWindowFunction,
 27 |   ProcessWindowFunction => JProcessWindowFunction
 28 | }
 29 | import org.apache.flink.streaming.api.windowing.windows.Window
 30 | import org.apache.flink.util.Collector
 31 | 
 32 | import scala.collection.JavaConverters._
 33 | 
 34 | /** A wrapper function that exposes a Scala ProcessWindowFunction as a ProcessWindowFunction function.
 35 |   *
 36 |   * The Scala and Java Window functions differ in their type of "Iterable":
 37 |   *   - Scala WindowFunction: scala.Iterable
 38 |   *   - Java WindowFunction: java.lang.Iterable
 39 |   */
 40 | final class ScalaProcessWindowFunctionWrapper[IN, OUT, KEY, W <: Window](
 41 |     private[this] val func: ProcessWindowFunction[IN, OUT, KEY, W]
 42 | ) extends JProcessWindowFunction[IN, OUT, KEY, W] {
 43 | 
 44 |   override def process(
 45 |       key: KEY,
 46 |       context: JProcessWindowFunction[IN, OUT, KEY, W]#Context,
 47 |       elements: java.lang.Iterable[IN],
 48 |       out: Collector[OUT]
 49 |   ): Unit = {
 50 |     val ctx = new func.Context {
 51 |       override def window = context.window
 52 | 
 53 |       override def currentProcessingTime = context.currentProcessingTime
 54 | 
 55 |       override def currentWatermark = context.currentWatermark
 56 | 
 57 |       override def windowState = context.windowState()
 58 | 
 59 |       override def globalState = context.globalState()
 60 | 
 61 |       override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value)
 62 |     }
 63 |     func.process(key, ctx, elements.asScala, out)
 64 |   }
 65 | 
 66 |   override def clear(context: JProcessWindowFunction[IN, OUT, KEY, W]#Context): Unit = {
 67 |     val ctx = new func.Context {
 68 |       override def window = context.window
 69 | 
 70 |       override def currentProcessingTime = context.currentProcessingTime
 71 | 
 72 |       override def currentWatermark = context.currentWatermark
 73 | 
 74 |       override def windowState = context.windowState()
 75 | 
 76 |       override def globalState = context.globalState()
 77 | 
 78 |       override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value)
 79 |     }
 80 |     func.clear(ctx)
 81 |   }
 82 | 
 83 |   override def setRuntimeContext(t: RuntimeContext): Unit = {
 84 |     super.setRuntimeContext(t)
 85 |     func match {
 86 |       case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.setRuntimeContext(t)
 87 |       case null                                          =>
 88 |     }
 89 |   }
 90 | 
 91 |   override def open(parameters: Configuration): Unit = {
 92 |     super.open(parameters)
 93 |     func match {
 94 |       case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.open(parameters)
 95 |       case null                                          =>
 96 |     }
 97 |   }
 98 | 
 99 |   override def close(): Unit = {
100 |     super.close()
101 |     func match {
102 |       case rfunc: ProcessWindowFunction[IN, OUT, KEY, W] => rfunc.close()
103 |       case null                                          =>
104 |     }
105 |   }
106 | }
107 | 
108 | /** A wrapper function that exposes a Scala ProcessWindowFunction as a ProcessWindowFunction function.
109 |   *
110 |   * The Scala and Java Window functions differ in their type of "Iterable":
111 |   *   - Scala WindowFunction: scala.Iterable
112 |   *   - Java WindowFunction: java.lang.Iterable
113 |   */
114 | final class ScalaProcessAllWindowFunctionWrapper[IN, OUT, W <: Window](
115 |     private[this] val func: ProcessAllWindowFunction[IN, OUT, W]
116 | ) extends JProcessAllWindowFunction[IN, OUT, W] {
117 | 
118 |   override def process(
119 |       context: JProcessAllWindowFunction[IN, OUT, W]#Context,
120 |       elements: java.lang.Iterable[IN],
121 |       out: Collector[OUT]
122 |   ): Unit = {
123 |     val ctx = new func.Context {
124 |       override def window = context.window
125 | 
126 |       override def windowState = context.windowState()
127 | 
128 |       override def globalState = context.globalState()
129 | 
130 |       override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value)
131 |     }
132 |     func.process(ctx, elements.asScala, out)
133 |   }
134 | 
135 |   override def clear(context: JProcessAllWindowFunction[IN, OUT, W]#Context): Unit = {
136 |     val ctx = new func.Context {
137 |       override def window = context.window
138 | 
139 |       override def windowState = context.windowState()
140 | 
141 |       override def globalState = context.globalState()
142 | 
143 |       override def output[X](outputTag: OutputTag[X], value: X) = context.output(outputTag, value)
144 |     }
145 |     func.clear(ctx)
146 |   }
147 | 
148 |   override def setRuntimeContext(t: RuntimeContext): Unit = {
149 |     super.setRuntimeContext(t)
150 |     func match {
151 |       case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.setRuntimeContext(t)
152 |       case null                                        =>
153 |     }
154 |   }
155 | 
156 |   override def open(parameters: Configuration): Unit = {
157 |     super.open(parameters)
158 |     func match {
159 |       case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.open(parameters)
160 |       case null                                        =>
161 |     }
162 |   }
163 | 
164 |   override def close(): Unit = {
165 |     super.close()
166 |     func match {
167 |       case rfunc: ProcessAllWindowFunction[IN, OUT, W] => rfunc.close()
168 |       case null                                        =>
169 |     }
170 |   }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaReduceFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function.util
20 | 
21 | import org.apache.flink.api.common.functions.ReduceFunction
22 | 
23 | /** A wrapper function that exposes a Scala Function2 as a [[ReduceFunction]].
24 |   */
25 | final class ScalaReduceFunction[T](private[this] val function: (T, T) => T) extends ReduceFunction[T] {
26 | 
27 |   @throws(classOf[Exception])
28 |   override def reduce(a: T, b: T): T = {
29 |     function(a, b)
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function.util
20 | 
21 | import org.apache.flink.streaming.api.functions.windowing.{WindowFunction => JWindowFunction}
22 | import org.apache.flink.streaming.api.windowing.windows.Window
23 | import org.apache.flink.util.Collector
24 | 
25 | import scala.collection.JavaConverters._
26 | 
27 | /** A wrapper function that exposes a Scala Function4 as a Java WindowFunction.
28 |   */
29 | final class ScalaWindowFunction[IN, OUT, KEY, W <: Window](
30 |     private[this] val function: (KEY, W, Iterable[IN], Collector[OUT]) => Unit
31 | ) extends JWindowFunction[IN, OUT, KEY, W] {
32 | 
33 |   @throws(classOf[Exception])
34 |   override def apply(key: KEY, window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = {
35 |     function.apply(key, window, input.asScala, out)
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/scala/io/findify/flink/api/function/util/ScalaWindowFunctionWrapper.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api.function.util
20 | 
21 | import io.findify.flink.api.function.WindowFunction
22 | import org.apache.flink.api.common.functions.{IterationRuntimeContext, RichFunction, RuntimeContext}
23 | import org.apache.flink.api.java.operators.translation.WrappingFunction
24 | import org.apache.flink.streaming.api.functions.windowing.{WindowFunction => JWindowFunction}
25 | import org.apache.flink.streaming.api.windowing.windows.Window
26 | import org.apache.flink.util.Collector
27 | 
28 | import scala.jdk.CollectionConverters._
29 | 
30 | /** A wrapper function that exposes a Scala WindowFunction as a JavaWindow function.
31 |   *
32 |   * The Scala and Java Window functions differ in their type of "Iterable":
33 |   *   - Scala WindowFunction: scala.Iterable
34 |   *   - Java WindowFunction: java.lang.Iterable
35 |   */
36 | final class ScalaWindowFunctionWrapper[IN, OUT, KEY, W <: Window](func: WindowFunction[IN, OUT, KEY, W])
37 |     extends WrappingFunction[WindowFunction[IN, OUT, KEY, W]](func)
38 |     with JWindowFunction[IN, OUT, KEY, W]
39 |     with RichFunction {
40 | 
41 |   @throws(classOf[Exception])
42 |   override def apply(key: KEY, window: W, input: java.lang.Iterable[IN], out: Collector[OUT]) = {
43 |     wrappedFunction.apply(key, window, input.asScala, out)
44 |   }
45 | 
46 |   override def getRuntimeContext: RuntimeContext = {
47 |     throw new RuntimeException("This should never be called")
48 |   }
49 | 
50 |   override def getIterationRuntimeContext: IterationRuntimeContext = {
51 |     throw new RuntimeException("This should never be called")
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/scala/io/findify/flink/api/CoGroupedStreamsTest.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api
20 | 
21 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
22 | import org.apache.flink.streaming.api.windowing.time.Time
23 | import org.junit.{Assert, Test}
24 | import io.findify.flinkadt.api._
25 | 
26 | /** Unit test for [[org.apache.flink.streaming.api.scala.CoGroupedStreams]]
27 |   */
28 | class CoGroupedStreamsTest {
29 |   private val env = StreamExecutionEnvironment.getExecutionEnvironment
30 | 
31 |   private val dataStream1 = env.fromElements("a1", "a2", "a3")
32 |   private val dataStream2 = env.fromElements("a1", "a2")
33 |   private val keySelector = (s: String) => s
34 |   private val tsAssigner  = TumblingEventTimeWindows.of(Time.milliseconds(1))
35 | 
36 |   @Test
37 |   def testSetAllowedLateness(): Unit = {
38 |     val lateness = Time.milliseconds(42)
39 |     val withLateness = dataStream1
40 |       .coGroup(dataStream2)
41 |       .where(keySelector)
42 |       .equalTo(keySelector)
43 |       .window(tsAssigner)
44 |       .allowedLateness(lateness)
45 |     Assert.assertEquals(lateness.toMilliseconds, withLateness.allowedLateness.toMilliseconds)
46 |   }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/test/scala/io/findify/flink/api/JoinedStreamsTest.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api
20 | 
21 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
22 | import org.apache.flink.streaming.api.windowing.time.Time
23 | import org.junit.{Assert, Test}
24 | import io.findify.flinkadt.api._
25 | 
26 | /** Unit test for [[org.apache.flink.streaming.api.scala.JoinedStreams]]
27 |   */
28 | class JoinedStreamsTest {
29 |   private val env = StreamExecutionEnvironment.getExecutionEnvironment
30 | 
31 |   private val dataStream1 = env.fromElements("a1", "a2", "a3")
32 |   private val dataStream2 = env.fromElements("a1", "a2")
33 |   private val keySelector = (s: String) => s
34 |   private val tsAssigner  = TumblingEventTimeWindows.of(Time.milliseconds(1))
35 | 
36 |   @Test
37 |   def testSetAllowedLateness(): Unit = {
38 |     val lateness = Time.milliseconds(42)
39 |     val withLateness = dataStream1
40 |       .join(dataStream2)
41 |       .where(keySelector)
42 |       .equalTo(keySelector)
43 |       .window(tsAssigner)
44 |       .allowedLateness(lateness)
45 |     Assert.assertEquals(lateness.toMilliseconds, withLateness.allowedLateness.toMilliseconds)
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/test/scala/io/findify/flink/api/StreamExecutionEnvironmentTest.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package io.findify.flink.api
20 | 
21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy
22 | import org.apache.flink.api.common.typeinfo.TypeInformation
23 | import org.apache.flink.api.connector.source.Boundedness
24 | import org.apache.flink.api.connector.source.mocks.MockSource
25 | import org.apache.flink.api.java.typeutils.GenericTypeInfo
26 | import org.junit.Assert.assertEquals
27 | import org.junit.Test
28 | 
29 | /** Tests for the [[StreamExecutionEnvironment]].
30 |   */
31 | class StreamExecutionEnvironmentTest {
32 | 
33 |   /** Verifies that calls to fromSource() don't throw and create a stream of the expected type.
34 |     */
35 |   @Test
36 |   def testFromSource(): Unit = {
37 |     implicit val typeInfo: TypeInformation[Integer] = new MockTypeInfo()
38 |     val env                                         = StreamExecutionEnvironment.getExecutionEnvironment
39 | 
40 |     val stream = env.fromSource(
41 |       new MockSource(Boundedness.CONTINUOUS_UNBOUNDED, 1),
42 |       WatermarkStrategy.noWatermarks(),
43 |       "test source"
44 |     )
45 | 
46 |     assertEquals(typeInfo, stream.dataType)
47 |   }
48 | 
49 |   /** Verifies that calls to fromSequence() instantiate a new DataStream that contains a sequence of numbers.
50 |     */
51 |   @Test
52 |   def testFromSequence(): Unit = {
53 |     import io.findify.flinkadt.api._
54 |     val typeInfo = implicitly[TypeInformation[Long]]
55 |     val env      = StreamExecutionEnvironment.getExecutionEnvironment
56 | 
57 |     val stream = env.fromSequence(1, 100)
58 | 
59 |     assertEquals(typeInfo, stream.dataType)
60 |   }
61 | 
62 |   // --------------------------------------------------------------------------
63 |   //  mocks
64 |   // --------------------------------------------------------------------------
65 | 
66 |   private class MockTypeInfo extends GenericTypeInfo[Integer](classOf[Integer]) {}
67 | }
68 | 


--------------------------------------------------------------------------------