├── .github └── workflows │ ├── publish.yaml │ └── test.yaml ├── .gitignore ├── .zuul.yaml ├── LICENSE.txt ├── README.md ├── checkstyle └── suppressions.xml ├── flink-quickstart-archetype ├── .gitignore ├── README.md ├── pom.xml └── src │ ├── main │ └── resources │ │ ├── META-INF │ │ └── maven │ │ │ └── archetype-metadata.xml │ │ └── archetype-resources │ │ ├── pom.xml │ │ └── src │ │ └── main │ │ ├── java │ │ └── StreamingJob.java │ │ └── resources │ │ └── log4j2.properties │ └── test │ └── resources │ └── projects │ └── basic │ ├── archetype.properties │ └── goal.txt ├── flink-quickstart-skeleton ├── README.md ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── cloudera │ │ └── streaming │ │ └── flink │ │ └── StreamingJob.java │ └── resources │ └── log4j2.properties ├── flink-secure-tutorial ├── .gitignore ├── LICENSE.txt ├── README.md ├── config │ └── job.properties ├── images │ └── RangerSettings.png ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── cloudera │ │ └── streaming │ │ └── examples │ │ └── flink │ │ ├── AvroDataGeneratorJob.java │ │ ├── Constants.java │ │ ├── KafkaToHDFSAvroJob.java │ │ ├── KafkaToHDFSSimpleJob.java │ │ ├── RandomKafkaDataGeneratorJob.java │ │ └── Utils.java │ └── resources │ └── avro │ └── message.avsc ├── flink-simple-tutorial ├── .gitignore ├── LICENSE.txt ├── README.md ├── images │ ├── TaskLogs.png │ └── YarnApp.png ├── kafka-appender │ └── log4j2.xml ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── cloudera │ │ │ └── streaming │ │ │ └── examples │ │ │ └── flink │ │ │ ├── AlertingFunction.java │ │ │ ├── HeapMonitorPipeline.java │ │ │ ├── HeapMonitorSource.java │ │ │ ├── LogSink.java │ │ │ └── types │ │ │ ├── HeapAlert.java │ │ │ └── HeapMetrics.java │ └── resources │ │ └── log4j2.xml │ └── test │ ├── java │ └── com │ │ └── cloudera │ │ └── streaming │ │ └── examples │ │ └── flink │ │ └── HeapMonitorPipelineTest.java │ └── resources │ └── log4j2-test.properties ├── flink-stateful-tutorial ├── .gitignore ├── LICENSE.txt ├── README.md ├── config │ └── job.properties ├── images │ ├── InventoryLogic.png │ ├── ItemManager.png │ ├── bp_ok.png │ └── cp.png ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── cloudera │ │ └── streaming │ │ └── examples │ │ └── flink │ │ ├── ItemTransactionJob.java │ │ ├── KafkaDataGeneratorJob.java │ │ ├── KafkaItemTransactionJob.java │ │ ├── operators │ │ ├── ItemInfoEnrichment.java │ │ ├── ItemTransactionGeneratorSource.java │ │ ├── MaxWatermarkGeneratorSupplier.java │ │ ├── QueryGeneratorSource.java │ │ ├── SummaryAlertingCondition.java │ │ ├── TransactionProcessor.java │ │ └── TransactionSummaryAggregator.java │ │ ├── types │ │ ├── ItemInfo.java │ │ ├── ItemTransaction.java │ │ ├── JsonKafkaSerializationSchema.java │ │ ├── Query.java │ │ ├── QueryResult.java │ │ ├── QueryResultSchema.java │ │ ├── QuerySchema.java │ │ ├── TransactionResult.java │ │ ├── TransactionSchema.java │ │ └── TransactionSummary.java │ │ └── utils │ │ ├── ExponentialHistogram.java │ │ └── Utils.java │ └── test │ ├── java │ └── com │ │ └── cloudera │ │ └── streaming │ │ └── examples │ │ └── flink │ │ ├── SocketTransactionProcessorJob.java │ │ └── TransactionProcessorTest.java │ └── resources │ └── log4j2-test.properties └── pom.xml /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Publish 19 | 20 | on: 21 | push: 22 | branches: 23 | - CSA* 24 | 25 | env: 26 | MVN_CMD: /grid/0/jenkins/tools/maven/apache-maven-3.5.2/bin/mvn 27 | JVM_ARGS: -Xms2g -Xmx4g -XX:MaxPermSize=1024m 28 | JAVA_OPTS: -Xms2g -Xmx4g -XX:MaxPermSize=1024m -verbose:gc -XX:+UseConcMarkSweepGC -XX:-UseGCOverheadLimit 29 | MAVEN_OPTS: -Xmx6g -XX:MaxPermSize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:ReservedCodeCacheSize=1024m -Djava.net.preferIPv4Stack=true 30 | 31 | jobs: 32 | test: 33 | uses: ./.github/workflows/test.yaml 34 | 35 | publish: 36 | needs: test 37 | runs-on: ci-builds 38 | 39 | env: 40 | MVN_CMD: /grid/0/jenkins/tools/maven/apache-maven-3.5.2/bin/mvn 41 | 42 | steps: 43 | - uses: actions/checkout@v3 44 | with: 45 | fetch-depth: 0 46 | 47 | - uses: actions/setup-java@v3 48 | with: 49 | java-version: 8 50 | distribution: 'adopt' 51 | 52 | - name: Download settings.xml 53 | run: wget https://github.infra.cloudera.com/raw/CDH/flink-tools/master/csa-build/cloudera-mirrors-releng-settings.xml -q -O /tmp/settings.xml 54 | 55 | - name: Build flink-tutorials 56 | run: ${MVN_CMD} clean deploy -DskipTests -B -s /tmp/settings.xml 57 | 58 | - name: Slack Notification Build Success 59 | uses: CDH/flink-tools/github-action/action-slack@master 60 | if: success() 61 | with: 62 | status: ${{ job.status }} 63 | fields: repo,message,commit,ref,workflow,took 64 | github_base_url: https://github.infra.cloudera.com/ 65 | icon_emoji: ':thumbsup:' 66 | author_name: 'Github Actions' 67 | text: 'GitHub Actions' 68 | env: 69 | SLACK_WEBHOOK_URL: ${{ secrets.ENG_FLINK_BUILDS }} 70 | 71 | - name: Slack Notification Build Failed 72 | uses: CDH/flink-tools/github-action/action-slack@master 73 | if: failure() 74 | with: 75 | status: ${{ job.status }} 76 | fields: repo,message,commit,ref,workflow,took,mention_author,mention_committer 77 | github_base_url: https://github.infra.cloudera.com/ 78 | icon_emoji: ':fire:' 79 | author_name: 'Github Actions' 80 | text: 'GitHub Actions' 81 | env: 82 | SLACK_WEBHOOK_URL: ${{ secrets.ENG_FLINK_BUILD_FAILURES }} 83 | -------------------------------------------------------------------------------- /.github/workflows/test.yaml: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | name: Test 19 | 20 | on: 21 | pull_request: 22 | workflow_call: 23 | 24 | env: 25 | MVN_CMD: /grid/0/jenkins/tools/maven/apache-maven-3.5.2/bin/mvn 26 | JVM_ARGS: -Xms2g -Xmx4g -XX:MaxPermSize=1024m 27 | JAVA_OPTS: -Xms2g -Xmx4g -XX:MaxPermSize=1024m -verbose:gc -XX:+UseConcMarkSweepGC -XX:-UseGCOverheadLimit 28 | MAVEN_OPTS: -Xmx6g -XX:MaxPermSize=1024m -XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:ReservedCodeCacheSize=1024m -Djava.net.preferIPv4Stack=true 29 | 30 | jobs: 31 | test: 32 | runs-on: ci-builds 33 | 34 | env: 35 | MVN_CMD: /grid/0/jenkins/tools/maven/apache-maven-3.5.2/bin/mvn 36 | 37 | steps: 38 | - uses: actions/checkout@v3 39 | with: 40 | fetch-depth: 0 41 | 42 | - uses: actions/setup-java@v3 43 | with: 44 | java-version: 8 45 | distribution: 'adopt' 46 | 47 | - name: Download settings.xml 48 | run: wget https://github.infra.cloudera.com/raw/CDH/flink-tools/master/csa-build/cloudera-mirrors-releng-settings.xml -q -O /tmp/settings.xml 49 | 50 | - name: Test flink-tutorials 51 | run: ${MVN_CMD} clean install -B -s /tmp/settings.xml 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | */.DS_Store 3 | .idea 4 | *iml 5 | -------------------------------------------------------------------------------- /.zuul.yaml: -------------------------------------------------------------------------------- 1 | - project: 2 | post-github-infra: 3 | jobs: 4 | - common-snapshot-build-flink-tutorials: 5 | vars: 6 | slack_notify: true 7 | secrets: 8 | - name: failure_slack_url 9 | secret: slack_channel_flink_build_failures_tutorial 10 | pass-to-parent: true 11 | - name: success_slack_url 12 | secret: slack_channel_flink_builds_tutorial 13 | pass-to-parent: true 14 | required-projects: 15 | - name: CDH/flink-tools 16 | override-checkout: master 17 | 18 | - secret: 19 | name: slack_channel_flink_build_failures_tutorial 20 | data: 21 | api_url: !encrypted/pkcs1-oaep 22 | - TKi+EHOMb4dnjFFBzWzzgl7pORteTgzQGOySW0Lva2QMr5jR4mdW4R+1iIlJ/Eiwe5fvc 23 | /lz6xiGvSKTLRXR4lks/YLd9wqtMGfEkVN2P7j3F2lK4ma8enzP577A0jUifdXTtXbsKM 24 | ByyuyPaH1akoBjhFQ427dAERFzVL3d+k/n0vIj/lKl9GzaifgBZM7oNN4++UbiHxonvdy 25 | vXC8cxfDRomQjsD1cJuXtCaHHcwiWsOPU+VV1KJn2KHu98iMtRN/ZuN6r3qeWTFNp3gKL 26 | HdcgaADWObxClPj209hIq+QImifAezIezmwUM5Edu5i0zOp2YztLWEriZZw5gzzKIVf+5 27 | Wh5g8l/v7UuIcQg7ZjzJsNoNdrsd8e55r/aLsXhXqJwYdB63MHrK/+hykUCj83XTwoFp0 28 | F8Q9+HHzZQ0N7E9+06uVbAJkdC7Mn5IVll0AthoG1ZonzxGPGucJ6ZBFfUJMmaHAylz1s 29 | swvuaLpML52EqouZIw/NEubnBd2N+GWsiDHrmFCyBkmt+ldGnHkwcsfCGqqbnweNEEocR 30 | g+XHmARFvT/34sP3xvnd8MMPx8sqrlSWb0hJPCfK+MojGPRR8zaBsZMPmHukzPek7qp+j 31 | LD3HjdS4L2yv5iMN7Tt+c4lK/9ho93/uq9fLwgHovloRIWU4pRKczsS8KyaBtA= 32 | 33 | - secret: 34 | name: slack_channel_flink_builds_tutorial 35 | data: 36 | api_url: !encrypted/pkcs1-oaep 37 | - Pp0n4w4wdBKRlHkS0j++FJRZYEK5pNBSFLDZ5TZjwVcPmkB3mDbWjBAyYIJnA9D50LcP3 38 | Z/HyrvhqRGwl4Hd22poXTNYbNqJvfJVfeQrU3dJ1e98k+3XQ6iuYvWmyR0ukvMcvvFm3V 39 | mTBm4or514bg04J4oP5DJ+MWQX079u2JQ9KqAAFddKXvZxc3KbRkwIggQ9o+gMySX3p5r 40 | dLMs0TO7jBy+fyX+5jcqq43iUJGeJMo2PYO6k2XxbpgguejV30A02nlnoYRJ62LW1lD9A 41 | hB3PL1+0ORpOmvqFUDzxIJzAYbGDdbRyUpxhOUtO4vxZS2Da2hL9sUG6za5LEDvKYwAIh 42 | A3ptgzcHV3NTbNPPihRe+TEK9yn/V8TiTw/lzsQHAGK/x8Q9p3jGuQG0JGGFqDw3aITjU 43 | +s5QzaJxW9fYbXInXpYuxYqq9f4BQHRO7h8fzRqrwm+NBKRonSWgMoj8JB51c5UN61ca+ 44 | DOqqWc3pDoGOAejc/nQMNInWXe3jafg9DJFpI+ko+8Lv8QgeNkEso3kEY1sR9h93dhZ5r 45 | 5iYT0Z5ciEPVsqMz+s1Q45PJGsYnSKhW7NgmjeywMG1vgU0AqgUX1WYD5H9od4LMRvRpX 46 | pOe7HXDmOAty3BJ1l8ScbuM4TE2ahcwWLv4cTJo3wTqpOYyhUJOWs3MYIeBTz4= 47 | 48 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2009-2019 Cloudera Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tutorials for Flink on Cloudera 2 | 3 | This repo contains reference Flink Streaming applications for a few example use-cases. These examples should serve as solid starting points when building production grade streaming applications as they include detailed development, configuration and deployment guidelines. 4 | 5 | We suggest to refer the tutorials in the following order even though they are designed to be standalone references: 6 | 7 | 8 | ## Stateless Monitoring Application 9 | 10 | The [flink-simple-tutorial](flink-simple-tutorial) application demonstrates some basic capabilities of the DataStream API to build a simple monitoring application with alerting capabilities. It displays the essentials of Flink applications alongside best practices for setting up a robust logging configuration using Kafka. 11 | 12 | 13 | ## Stateful Transaction and Query Processor Service 14 | 15 | The [flink-stateful-tutorial](flink-stateful-tutorial) application implements a production grade stateful service for handling incoming item transactions, while also exposing query capabilities. 16 | 17 | We dive deeper into structuring streaming application code, state handling and resource configuration. We also present a detailed set up of Kafka data sources and sinks for scalability and discuss validation of our pipeline before deployment. 18 | 19 | 20 | ## Flink Security Showcase Application 21 | 22 | The [flink-secure-tutorial](flink-secure-tutorial) application demonstrates Flink's security features for applications intended to run in secured CDP environments. It covers Kerberos authentication and TLS encryption for HDFS and Kafka connectors. 23 | 24 | 25 | ## Flink Quickstart Archetype 26 | 27 | The [flink-quickstart-archetype](flink-quickstart-archetype) is a maven archetype for generating application skeletons specificly for Flink on Cloudera. 28 | 29 | 30 | ## Flink Quickstart Skeleton 31 | 32 | The [flink-quickstart-skeleton](flink-quickstart-skeleton) is a maven project with the same content the archetype generates. This is for users who do not want or are unable to use the archetype. 33 | -------------------------------------------------------------------------------- /checkstyle/suppressions.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 21 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | dependency-reduced-pom.xml 4 | *.iml 5 | .DS_Store 6 | 7 | # eclipse 8 | /bin/ 9 | .classpath 10 | .project 11 | .settings 12 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/README.md: -------------------------------------------------------------------------------- 1 | # Flink Quickstart Archetype 2 | This is a maven archetype for generating Java projects for Flink on Cloudera. 3 | 4 | ## Prerequisites 5 | Cloudera does not release maven archetypes to the Maven Central Repository thus the archetype should be installed locally on your host before usage. 6 | 7 | ```shell 8 | git clone https://github.com/cloudera/flink-tutorials 9 | cd flink-quickstart-archetype 10 | mvn install 11 | cd .. 12 | ``` 13 | 14 | When complete an entry is added to your local archetype catalog: 15 | ``` 16 | cat ~/.m2/repository/archetype-catalog.xml 17 | ... 18 | 19 | 20 | com.cloudera.flink 21 | flink-quickstart-archetype 22 | 1.20.1-csa1.15.0.0 23 | flink-quickstart-archetype 24 | 25 | 26 | ... 27 | ``` 28 | 29 | ## Usage 30 | Once the archetype is installed you can generate project skeletons by running: 31 | ```shell 32 | mvn archetype:generate \ 33 | -DarchetypeGroupId=com.cloudera.flink \ 34 | -DarchetypeArtifactId=flink-quickstart-archetype \ 35 | -DarchetypeVersion=1.20.1-csa1.15.0.0 36 | ``` 37 | 38 | You must provide some basic information about your new project when prompted. 39 | ``` 40 | Define value for property 'groupId': com.cloudera.flink 41 | Define value for property 'artifactId': sample-project 42 | Define value for property 'version' 1.0-SNAPSHOT: : 43 | Define value for property 'package' com.cloudera.flink: : 44 | Confirm properties configuration: 45 | groupId: com.cloudera.flink 46 | artifactId: sample-project 47 | version: 1.0-SNAPSHOT 48 | package: com.cloudera.flink 49 | Y: : 50 | ``` 51 | 52 | > **Note:** By hitting 'Enter' you can accept default values. 53 | 54 | The generated project structure will look like the following: 55 | ``` 56 | sample-project 57 | ├── pom.xml 58 | └── src 59 | └── main 60 | ├── java 61 | │ └── com 62 | │ └── cloudera 63 | │ └── flink 64 | │ └── StreamingJob.java 65 | └── resources 66 | └── log4j2.properties 67 | ``` 68 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 20 | 23 | 4.0.0 24 | 25 | com.cloudera.flink 26 | flink-quickstart-archetype 27 | 1.20.1-csa1.15.0.0 28 | maven-archetype 29 | Flink Quickstart Archetype 30 | 31 | 32 | 3.1.2 33 | 34 | 35 | 36 | 37 | 38 | org.apache.maven.archetype 39 | archetype-packaging 40 | ${archetype.packaging.version} 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/main/resources/META-INF/maven/archetype-metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | src/main/java 9 | 10 | **/*.java 11 | 12 | 13 | 14 | 15 | src/main/resources 16 | 17 | **/*.properties 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/main/resources/archetype-resources/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | ${groupId} 25 | ${artifactId} 26 | ${version} 27 | 28 | Flink Quickstart Job 29 | 30 | 31 | UTF-8 32 | 1.8 33 | 2.12 34 | ${java.version} 35 | ${java.version} 36 | 37 | 1.20.1-csa1.15.0.0 38 | 39 | 5.10.1 40 | 2.22.0 41 | 42 | 43 | 44 | 45 | org.apache.flink 46 | flink-clients 47 | ${flink.version} 48 | provided 49 | 50 | 51 | 52 | org.apache.flink 53 | flink-streaming-java 54 | ${flink.version} 55 | provided 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-java 61 | ${flink.version} 62 | provided 63 | 64 | 65 | 66 | org.apache.logging.log4j 67 | log4j-slf4j-impl 68 | ${log4j.version} 69 | provided 70 | 71 | 72 | 73 | org.apache.logging.log4j 74 | log4j-api 75 | ${log4j.version} 76 | runtime 77 | 78 | 79 | 80 | org.apache.logging.log4j 81 | log4j-core 82 | ${log4j.version} 83 | runtime 84 | 85 | 86 | 87 | org.junit.jupiter 88 | junit-jupiter-engine 89 | ${junit-jupiter.version} 90 | test 91 | 92 | 93 | 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-shade-plugin 99 | 3.4.1 100 | 101 | 102 | package 103 | 104 | shade 105 | 106 | 107 | 108 | 109 | org.apache.flink:force-shading 110 | com.google.code.findbugs:jsr305 111 | org.slf4j:* 112 | log4j:* 113 | 114 | 115 | 116 | 117 | *:* 118 | 119 | META-INF/*.SF 120 | META-INF/*.DSA 121 | META-INF/*.RSA 122 | 123 | 124 | 125 | 126 | 127 | 128 | ${package}.StreamingJob 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.eclipse.m2e 141 | lifecycle-mapping 142 | 1.0.0 143 | 144 | 145 | 146 | 147 | 148 | org.apache.maven.plugins 149 | maven-shade-plugin 150 | [3.1.1,) 151 | 152 | shade 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | org.apache.maven.plugins 162 | maven-compiler-plugin 163 | [3.1,) 164 | 165 | testCompile 166 | compile 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | cloudera 184 | https://repository.cloudera.com/artifactory/libs-release-local/ 185 | 186 | 187 | 188 | hortonworks 189 | https://repo.hortonworks.com/content/repositories/releases/ 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/main/resources/archetype-resources/src/main/java/StreamingJob.java: -------------------------------------------------------------------------------- 1 | #set( $symbol_pound = '#' ) 2 | #set( $symbol_dollar = '$' ) 3 | #set( $symbol_escape = '\' ) 4 | /* 5 | * Licensed to the Apache Software Foundation (ASF) under one 6 | * or more contributor license agreements. See the NOTICE file 7 | * distributed with this work for additional information 8 | * regarding copyright ownership. The ASF licenses this file 9 | * to you under the Apache License, Version 2.0 (the 10 | * "License"); you may not use this file except in compliance 11 | * with the License. You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | */ 21 | 22 | package ${package}; 23 | 24 | import org.apache.flink.streaming.api.datastream.DataStream; 25 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 26 | 27 | public class StreamingJob { 28 | 29 | public static void main(String[] args) throws Exception { 30 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | 32 | DataStream ds = env.fromElements(1,2,3,4); 33 | ds.printToErr(); 34 | 35 | env.execute("Flink Streaming Java API Skeleton"); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/main/resources/archetype-resources/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.console.ref = ConsoleAppender 21 | 22 | appender.console.name = ConsoleAppender 23 | appender.console.type = CONSOLE 24 | appender.console.layout.type = PatternLayout 25 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 26 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/test/resources/projects/basic/archetype.properties: -------------------------------------------------------------------------------- 1 | #Mon Mar 30 09:41:43 CEST 2020 2 | package=it.pkg 3 | groupId=archetype.it 4 | artifactId=basic 5 | version=0.1-SNAPSHOT 6 | -------------------------------------------------------------------------------- /flink-quickstart-archetype/src/test/resources/projects/basic/goal.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-quickstart-archetype/src/test/resources/projects/basic/goal.txt -------------------------------------------------------------------------------- /flink-quickstart-skeleton/README.md: -------------------------------------------------------------------------------- 1 | # Flink Quickstart Skeleton 2 | 3 | This is a maven skeleton project for Flink on Cloudera. 4 | Its content is identical with the project [flink-quickstart-archetype](../flink-quickstart-archetype) generates. 5 | -------------------------------------------------------------------------------- /flink-quickstart-skeleton/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | com.cloudera.streaming.flink 25 | flink-quickstart-skeleton 26 | 1.0-SNAPSHOT 27 | 28 | Flink Quickstart Job 29 | 30 | 31 | UTF-8 32 | 1.8 33 | 2.12 34 | ${java.version} 35 | ${java.version} 36 | 37 | 1.20.1-csa1.15.0.0 38 | 39 | 5.10.1 40 | 2.22.0 41 | 42 | 43 | 44 | 45 | org.apache.flink 46 | flink-clients 47 | ${flink.version} 48 | provided 49 | 50 | 51 | 52 | org.apache.flink 53 | flink-streaming-java 54 | ${flink.version} 55 | provided 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-java 61 | ${flink.version} 62 | provided 63 | 64 | 65 | 66 | org.apache.logging.log4j 67 | log4j-slf4j-impl 68 | ${log4j.version} 69 | provided 70 | 71 | 72 | 73 | org.apache.logging.log4j 74 | log4j-api 75 | ${log4j.version} 76 | runtime 77 | 78 | 79 | 80 | org.apache.logging.log4j 81 | log4j-core 82 | ${log4j.version} 83 | runtime 84 | 85 | 86 | 87 | org.junit.jupiter 88 | junit-jupiter-engine 89 | ${junit-jupiter.version} 90 | test 91 | 92 | 93 | 94 | 95 | 96 | 97 | org.apache.maven.plugins 98 | maven-shade-plugin 99 | 3.4.1 100 | 101 | 102 | package 103 | 104 | shade 105 | 106 | 107 | 108 | 109 | org.apache.flink:force-shading 110 | com.google.code.findbugs:jsr305 111 | org.slf4j:* 112 | log4j:* 113 | 114 | 115 | 116 | 117 | *:* 118 | 119 | META-INF/*.SF 120 | META-INF/*.DSA 121 | META-INF/*.RSA 122 | 123 | 124 | 125 | 126 | 127 | 128 | com.cloudera.streaming.flink.StreamingJob 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.eclipse.m2e 141 | lifecycle-mapping 142 | 1.0.0 143 | 144 | 145 | 146 | 147 | 148 | org.apache.maven.plugins 149 | maven-shade-plugin 150 | [3.1.1,) 151 | 152 | shade 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | org.apache.maven.plugins 162 | maven-compiler-plugin 163 | [3.1,) 164 | 165 | testCompile 166 | compile 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | cloudera 184 | https://repository.cloudera.com/artifactory/libs-release-local/ 185 | 186 | 187 | 188 | hortonworks 189 | https://repo.hortonworks.com/content/repositories/releases/ 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /flink-quickstart-skeleton/src/main/java/com/cloudera/streaming/flink/StreamingJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.flink; 20 | 21 | import org.apache.flink.streaming.api.datastream.DataStream; 22 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 23 | 24 | public class StreamingJob { 25 | 26 | public static void main(String[] args) throws Exception { 27 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | 29 | DataStream ds = env.fromElements(1,2,3,4); 30 | ds.printToErr(); 31 | 32 | env.execute("Flink Streaming Java API Skeleton"); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /flink-quickstart-skeleton/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.console.ref = ConsoleAppender 21 | 22 | appender.console.name = ConsoleAppender 23 | appender.console.type = CONSOLE 24 | appender.console.layout.type = PatternLayout 25 | appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 26 | -------------------------------------------------------------------------------- /flink-secure-tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target 3 | build.sh 4 | run.sh 5 | dependency-reduced-pom.xml 6 | *.iml 7 | src/main/java/com/cloudera/streaming/examples/flink/data/ 8 | 9 | # eclipse 10 | /bin/ 11 | .classpath 12 | .project 13 | .settings 14 | -------------------------------------------------------------------------------- /flink-secure-tutorial/LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2009-2019 Cloudera Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /flink-secure-tutorial/config/job.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to Cloudera, Inc. under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | kafkaTopic=flink 20 | hdfsOutput=hdfs:///tmp/flink-sec-tutorial 21 | kafka.bootstrap.servers=:9093 22 | kafka.security.protocol=SASL_SSL 23 | kafka.sasl.kerberos.service.name=kafka 24 | kafka.ssl.truststore.location=/var/lib/cloudera-scm-agent/agent-cert/cm-auto-global_truststore.jks 25 | schema.registry.url=https://:7790/api/v1 26 | schema.registry.client.ssl.trustStorePath=/var/lib/cloudera-scm-agent/agent-cert/cm-auto-global_truststore.jks 27 | schema.registry.client.ssl.trustStorePassword= 28 | -------------------------------------------------------------------------------- /flink-secure-tutorial/images/RangerSettings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-secure-tutorial/images/RangerSettings.png -------------------------------------------------------------------------------- /flink-secure-tutorial/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | 25 | com.cloudera.flink 26 | flink-tutorials 27 | 1.20.1-csa1.15.0.0 28 | 29 | 30 | flink-secure-tutorial 31 | Flink Tutorials :: Flink Secure Tutorial 32 | 33 | 34 | UTF-8 35 | 1.8 36 | 2.12 37 | ${java.version} 38 | ${java.version} 39 | 40 | 41 | 42 | 43 | 44 | org.apache.flink 45 | flink-clients 46 | provided 47 | 48 | 49 | 50 | org.apache.flink 51 | flink-core 52 | provided 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-java 58 | provided 59 | 60 | 61 | 62 | org.apache.flink 63 | flink-streaming-java 64 | provided 65 | 66 | 67 | 68 | org.apache.logging.log4j 69 | log4j-slf4j-impl 70 | provided 71 | 72 | 73 | 74 | 75 | org.apache.flink 76 | flink-connector-cloudera-registry 77 | 78 | 79 | org.everit.json 80 | org.everit.json.schema 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | org.apache.flink 89 | flink-connector-kafka 90 | 91 | 92 | 93 | org.apache.flink 94 | flink-connector-files 95 | 96 | 97 | 98 | 99 | org.apache.logging.log4j 100 | log4j-api 101 | runtime 102 | 103 | 104 | 105 | org.apache.logging.log4j 106 | log4j-core 107 | runtime 108 | 109 | 110 | 111 | 112 | org.junit.jupiter 113 | junit-jupiter-engine 114 | test 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-shade-plugin 125 | 126 | 127 | 128 | package 129 | 130 | shade 131 | 132 | 133 | 134 | 135 | org.apache.flink:force-shading 136 | com.google.code.findbugs:jsr305 137 | org.slf4j:* 138 | log4j:* 139 | 140 | 141 | 142 | 143 | 145 | *:* 146 | 147 | META-INF/*.SF 148 | META-INF/*.DSA 149 | META-INF/*.RSA 150 | 151 | 152 | 153 | 154 | 155 | com.cloudera.streaming.examples.flink.KafkaToHDFSSimpleJob 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | org.apache.avro 165 | avro-maven-plugin 166 | 167 | 168 | schemas 169 | generate-sources 170 | 171 | schema 172 | protocol 173 | idl-protocol 174 | 175 | 176 | ${project.basedir}/src/main/resources/avro 177 | ${project.basedir}/target/generated-sources/avro/ 178 | String 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/AvroDataGeneratorJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.connector.base.DeliveryGuarantee; 23 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 24 | import org.apache.flink.connector.kafka.sink.KafkaSink; 25 | import org.apache.flink.formats.registry.cloudera.avro.ClouderaRegistryAvroKafkaRecordSerializationSchema; 26 | import org.apache.flink.streaming.api.datastream.DataStream; 27 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 28 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 29 | 30 | import com.cloudera.streaming.examples.flink.data.Message; 31 | import org.apache.commons.lang3.RandomStringUtils; 32 | 33 | import java.util.concurrent.ThreadLocalRandom; 34 | 35 | import static com.cloudera.streaming.examples.flink.Constants.K_BOOTSTRAP_SERVERS; 36 | import static com.cloudera.streaming.examples.flink.Constants.K_KAFKA_TOPIC; 37 | 38 | /** 39 | * Generates random Messages to a Kafka topic. 40 | */ 41 | public class AvroDataGeneratorJob { 42 | 43 | public static void main(String[] args) throws Exception { 44 | ParameterTool params = Utils.parseArgs(args); 45 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 46 | 47 | String topic = params.getRequired(K_KAFKA_TOPIC); 48 | KafkaRecordSerializationSchema schema = ClouderaRegistryAvroKafkaRecordSerializationSchema 49 | .builder(topic) 50 | .setConfig(Utils.readSchemaRegistryProperties(params)) 51 | .setKey(Message::getId) 52 | .build(); 53 | 54 | KafkaSink kafkaSink = KafkaSink.builder() 55 | .setBootstrapServers(params.get(K_BOOTSTRAP_SERVERS)) 56 | .setRecordSerializer(schema) 57 | .setKafkaProducerConfig(Utils.readKafkaProperties(params)) 58 | .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 59 | .build(); 60 | 61 | DataStream input = env.addSource(new DataGeneratorSource()) 62 | .name("Data Generator Source"); 63 | 64 | input.sinkTo(kafkaSink) 65 | .name("Kafka Sink") 66 | .uid("kafka-sink"); 67 | 68 | input.print(); 69 | 70 | env.execute("Avro Data Generator Job"); 71 | } 72 | 73 | /** 74 | * Generates Message objects with random content at random interval. 75 | */ 76 | public static class DataGeneratorSource implements ParallelSourceFunction { 77 | 78 | private volatile boolean isRunning = true; 79 | 80 | @Override 81 | public void run(SourceContext ctx) throws Exception { 82 | ThreadLocalRandom rnd = ThreadLocalRandom.current(); 83 | while (isRunning) { 84 | synchronized (ctx.getCheckpointLock()) { 85 | ctx.collect(new Message(System.currentTimeMillis(), 86 | RandomStringUtils.randomAlphabetic(10), 87 | RandomStringUtils.randomAlphanumeric(20))); 88 | } 89 | 90 | Thread.sleep(Math.abs(rnd.nextInt()) % 1000); 91 | } 92 | } 93 | 94 | @Override 95 | public void cancel() { 96 | isRunning = false; 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/Constants.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | /** 22 | * Property names and other constants used in this tutorial. 23 | */ 24 | public class Constants { 25 | 26 | public static final String SENSITIVE_KEYS_KEY = "sensitive.keys"; 27 | public static final String MASK = "********"; 28 | public static final String KAFKA_PREFIX = "kafka."; 29 | public static final String K_BOOTSTRAP_SERVERS = "kafka.bootstrap.servers"; 30 | public static final String K_KAFKA_TOPIC = "kafkaTopic"; 31 | public static final String K_HDFS_OUTPUT = "hdfsOutput"; 32 | 33 | public static final String K_SCHEMA_REG_URL = "schema.registry.url"; 34 | public static final String K_SCHEMA_REG_SSL_CLIENT_KEY = "schema.registry.client.ssl"; 35 | public static final String K_TRUSTSTORE_PATH = "trustStorePath"; 36 | public static final String K_TRUSTSTORE_PASSWORD = "trustStorePassword"; 37 | public static final String K_KEYSTORE_PASSWORD = "keyStorePassword"; 38 | 39 | public static final String K_PROPERTIES_FILE = "properties.file"; 40 | 41 | } 42 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/KafkaToHDFSAvroJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 22 | import org.apache.flink.api.common.serialization.SimpleStringEncoder; 23 | import org.apache.flink.api.java.utils.ParameterTool; 24 | import org.apache.flink.connector.file.sink.FileSink; 25 | import org.apache.flink.connector.kafka.source.KafkaSource; 26 | import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema; 27 | import org.apache.flink.core.fs.Path; 28 | import org.apache.flink.formats.registry.cloudera.avro.ClouderaRegistryAvroKafkaDeserializationSchema; 29 | import org.apache.flink.streaming.api.datastream.DataStream; 30 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 31 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 32 | 33 | import com.cloudera.streaming.examples.flink.data.Message; 34 | 35 | import static com.cloudera.streaming.examples.flink.Constants.K_BOOTSTRAP_SERVERS; 36 | import static com.cloudera.streaming.examples.flink.Constants.K_HDFS_OUTPUT; 37 | import static com.cloudera.streaming.examples.flink.Constants.K_KAFKA_TOPIC; 38 | 39 | /** 40 | * Channels a Kafka topic to an HDFS after converting it to a string. 41 | */ 42 | public class KafkaToHDFSAvroJob { 43 | 44 | public static void main(String[] args) throws Exception { 45 | ParameterTool params = Utils.parseArgs(args); 46 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 47 | 48 | KafkaDeserializationSchema schema = ClouderaRegistryAvroKafkaDeserializationSchema 49 | .builder(Message.class) 50 | .setConfig(Utils.readSchemaRegistryProperties(params)) 51 | .build(); 52 | 53 | KafkaSource kafkaSource = KafkaSource.builder() 54 | .setBootstrapServers(params.get(K_BOOTSTRAP_SERVERS)) 55 | .setTopics(params.get(K_KAFKA_TOPIC)) 56 | .setDeserializer(KafkaRecordDeserializationSchema.of(schema)) 57 | .setProperties(Utils.readKafkaProperties(params)) 58 | .build(); 59 | 60 | DataStream source = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "Kafka Source") 61 | .uid("kafka-source") 62 | .map(record -> record.getId() + "," + record.getName() + "," + record.getDescription()) 63 | .name("To Output String") 64 | .uid("to-output-string"); 65 | 66 | FileSink sink = FileSink 67 | .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder("UTF-8")) 68 | .build(); 69 | 70 | source.sinkTo(sink) 71 | .name("FS Sink") 72 | .uid("fs-sink"); 73 | 74 | source.print(); 75 | 76 | env.execute("Secured Avro Flink Streaming Job"); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/KafkaToHDFSSimpleJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 22 | import org.apache.flink.api.common.serialization.SimpleStringEncoder; 23 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 24 | import org.apache.flink.api.java.utils.ParameterTool; 25 | import org.apache.flink.connector.file.sink.FileSink; 26 | import org.apache.flink.connector.kafka.source.KafkaSource; 27 | import org.apache.flink.core.fs.Path; 28 | import org.apache.flink.streaming.api.datastream.DataStream; 29 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 30 | 31 | import static com.cloudera.streaming.examples.flink.Constants.K_BOOTSTRAP_SERVERS; 32 | import static com.cloudera.streaming.examples.flink.Constants.K_HDFS_OUTPUT; 33 | import static com.cloudera.streaming.examples.flink.Constants.K_KAFKA_TOPIC; 34 | 35 | /** 36 | * Channels a Kafka topic to an HDFS file. 37 | */ 38 | public class KafkaToHDFSSimpleJob { 39 | 40 | public static void main(String[] args) throws Exception { 41 | ParameterTool params = Utils.parseArgs(args); 42 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 43 | 44 | KafkaSource consumer = KafkaSource.builder() 45 | .setBootstrapServers(params.get(K_BOOTSTRAP_SERVERS)) 46 | .setTopics(params.get(K_KAFKA_TOPIC)) 47 | .setValueOnlyDeserializer(new SimpleStringSchema()) 48 | .setProperties(Utils.readKafkaProperties(params)) 49 | .build(); 50 | DataStream source = env.fromSource(consumer, WatermarkStrategy.noWatermarks(), "Kafka Source").uid("kafka-source"); 51 | 52 | FileSink sink = FileSink 53 | .forRowFormat(new Path(params.getRequired(K_HDFS_OUTPUT)), new SimpleStringEncoder("UTF-8")) 54 | .build(); 55 | 56 | source.sinkTo(sink) 57 | .name("FS Sink") 58 | .uid("fs-sink"); 59 | source.print(); 60 | 61 | env.execute("Secured Flink Streaming Job"); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/RandomKafkaDataGeneratorJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 24 | import org.apache.flink.connector.kafka.sink.KafkaSink; 25 | import org.apache.flink.streaming.api.datastream.DataStream; 26 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 27 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 28 | 29 | import java.util.UUID; 30 | import java.util.concurrent.ThreadLocalRandom; 31 | 32 | import static com.cloudera.streaming.examples.flink.Constants.K_BOOTSTRAP_SERVERS; 33 | import static com.cloudera.streaming.examples.flink.Constants.K_KAFKA_TOPIC; 34 | 35 | /** 36 | * Generates random UUID strings to a Kafka topic. 37 | */ 38 | public class RandomKafkaDataGeneratorJob { 39 | 40 | public static void main(String[] args) throws Exception { 41 | ParameterTool params = Utils.parseArgs(args); 42 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 43 | 44 | KafkaSink kafkaSink = KafkaSink.builder() 45 | .setBootstrapServers(params.get(K_BOOTSTRAP_SERVERS)) 46 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 47 | .setTopic(params.get(K_KAFKA_TOPIC)) 48 | .setValueSerializationSchema(new SimpleStringSchema()) 49 | .build() 50 | ) 51 | .setKafkaProducerConfig(Utils.readKafkaProperties(params)) 52 | .build(); 53 | 54 | DataStream input = env.addSource(new UUIDGeneratorSource()) 55 | .name("Data Generator Source") 56 | .uid("data-generator-source"); 57 | 58 | input.sinkTo(kafkaSink) 59 | .name("Kafka Sink") 60 | .uid("kafka-sink"); 61 | 62 | input.print(); 63 | 64 | env.execute("String Data Generator Job"); 65 | } 66 | 67 | /** 68 | * Source generating random UUID strings. 69 | */ 70 | public static class UUIDGeneratorSource implements ParallelSourceFunction { 71 | 72 | private volatile boolean isRunning = true; 73 | 74 | @Override 75 | public void run(SourceContext ctx) throws Exception { 76 | while (isRunning) { 77 | ctx.collect(UUID.randomUUID().toString()); 78 | Thread.sleep(Math.abs(ThreadLocalRandom.current().nextInt()) % 1000); 79 | } 80 | } 81 | 82 | @Override 83 | public void cancel() { 84 | isRunning = false; 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/java/com/cloudera/streaming/examples/flink/Utils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.client.cli.CliFrontend; 23 | import org.apache.flink.configuration.Configuration; 24 | import org.apache.flink.configuration.GlobalConfiguration; 25 | import org.apache.flink.util.Preconditions; 26 | import org.apache.flink.util.encrypttool.EncryptTool; 27 | 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import java.io.IOException; 32 | import java.util.HashMap; 33 | import java.util.Map; 34 | import java.util.Properties; 35 | 36 | import static com.cloudera.streaming.examples.flink.Constants.KAFKA_PREFIX; 37 | import static com.cloudera.streaming.examples.flink.Constants.K_KEYSTORE_PASSWORD; 38 | import static com.cloudera.streaming.examples.flink.Constants.K_PROPERTIES_FILE; 39 | import static com.cloudera.streaming.examples.flink.Constants.K_SCHEMA_REG_SSL_CLIENT_KEY; 40 | import static com.cloudera.streaming.examples.flink.Constants.K_SCHEMA_REG_URL; 41 | import static com.cloudera.streaming.examples.flink.Constants.K_TRUSTSTORE_PASSWORD; 42 | import static com.cloudera.streaming.examples.flink.Constants.K_TRUSTSTORE_PATH; 43 | import static com.cloudera.streaming.examples.flink.Constants.MASK; 44 | import static com.cloudera.streaming.examples.flink.Constants.SENSITIVE_KEYS_KEY; 45 | 46 | /** 47 | * Utility functions for the security tutorial. 48 | */ 49 | public final class Utils { 50 | 51 | private Utils() { 52 | throw new UnsupportedOperationException("Utils should not be instantiated"); 53 | } 54 | 55 | private static final Logger LOG = LoggerFactory.getLogger(Utils.class); 56 | 57 | public static ParameterTool parseArgs(String[] args) throws IOException { 58 | 59 | // Processing job properties 60 | ParameterTool params = ParameterTool.fromArgs(args); 61 | if (params.has(K_PROPERTIES_FILE)) { 62 | params = ParameterTool.fromPropertiesFile(params.getRequired(K_PROPERTIES_FILE)).mergeWith(params); 63 | } 64 | 65 | LOG.info("### Job parameters:"); 66 | for (String key : params.getProperties().stringPropertyNames()) { 67 | LOG.info("Job Param: {}={}", key, isSensitive(key, params) ? MASK : params.get(key)); 68 | } 69 | return params; 70 | } 71 | 72 | public static Properties readKafkaProperties(ParameterTool params) { 73 | Properties properties = new Properties(); 74 | for (String key : params.getProperties().stringPropertyNames()) { 75 | if (key.startsWith(KAFKA_PREFIX)) { 76 | String keyWithoutPrefix = key.substring(KAFKA_PREFIX.length()); 77 | String value = isSensitive(key, params) ? decrypt(params.get(key)) : params.get(key); 78 | 79 | properties.setProperty(keyWithoutPrefix, value); 80 | } 81 | } 82 | 83 | LOG.info("### Kafka parameters:"); 84 | for (String key : properties.stringPropertyNames()) { 85 | LOG.info("Loading configuration property: {}, {}", key, isSensitive(key, params) ? MASK : properties.get(key)); 86 | } 87 | return properties; 88 | } 89 | 90 | public static Map readSchemaRegistryProperties(ParameterTool params) { 91 | 92 | //Setting up schema registry client 93 | 94 | Map schemaRegistryConf = new HashMap<>(); 95 | schemaRegistryConf.put(K_SCHEMA_REG_URL, params.getRequired(K_SCHEMA_REG_URL)); 96 | 97 | if (params.getRequired(K_SCHEMA_REG_URL).startsWith("https")) { 98 | Map sslClientConfig = new HashMap<>(); 99 | String sslKey = K_SCHEMA_REG_SSL_CLIENT_KEY + "." + K_TRUSTSTORE_PATH; 100 | sslClientConfig.put(K_TRUSTSTORE_PATH, isSensitive(sslKey, params) ? decrypt(params.getRequired(sslKey)) : params.getRequired(sslKey)); 101 | sslKey = K_SCHEMA_REG_SSL_CLIENT_KEY + "." + K_TRUSTSTORE_PASSWORD; 102 | sslClientConfig.put(K_TRUSTSTORE_PASSWORD, isSensitive(sslKey, params) ? decrypt(params.getRequired(sslKey)) : params.getRequired(sslKey)); 103 | sslClientConfig.put(K_KEYSTORE_PASSWORD, ""); //ugly hack needed for SchemaRegistryClient 104 | 105 | schemaRegistryConf.put(K_SCHEMA_REG_SSL_CLIENT_KEY, sslClientConfig); 106 | } 107 | 108 | LOG.info("### Schema Registry parameters:"); 109 | for (String key : schemaRegistryConf.keySet()) { 110 | LOG.info("Schema Registry param: {}={}", key, isSensitive(key, params) ? MASK : schemaRegistryConf.get(key)); 111 | } 112 | return schemaRegistryConf; 113 | } 114 | 115 | public static boolean isSensitive(String key, ParameterTool params) { 116 | Preconditions.checkNotNull(key, "key is null"); 117 | final String value = params.get(SENSITIVE_KEYS_KEY); 118 | if (value == null) { 119 | return false; 120 | } 121 | String keyInLower = key.toLowerCase(); 122 | String[] sensitiveKeys = value.split(","); 123 | 124 | for (String hideKey : sensitiveKeys) { 125 | if (keyInLower.length() >= hideKey.length() && keyInLower.contains(hideKey)) { 126 | return true; 127 | } 128 | } 129 | return false; 130 | } 131 | 132 | public static String decrypt(String input) { 133 | Preconditions.checkNotNull(input, "key is null"); 134 | return EncryptTool.getInstance(getConfiguration()).decrypt(input); 135 | } 136 | 137 | public static Configuration getConfiguration() { 138 | return ConfigHolder.INSTANCE; 139 | } 140 | 141 | private static class ConfigHolder { 142 | static final Configuration INSTANCE = GlobalConfiguration.loadConfiguration(CliFrontend.getConfigurationDirectoryFromEnv()); 143 | } 144 | 145 | } 146 | -------------------------------------------------------------------------------- /flink-secure-tutorial/src/main/resources/avro/message.avsc: -------------------------------------------------------------------------------- 1 | {"namespace": "com.cloudera.streaming.examples.flink.data", 2 | "type": "record", 3 | "name": "Message", 4 | "fields": [ 5 | {"name": "id", "type": "long"}, 6 | {"name": "name", "type": "string"}, 7 | {"name": "description", "type": "string"} 8 | ] 9 | } -------------------------------------------------------------------------------- /flink-simple-tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | dependency-reduced-pom.xml 4 | .DS_Store 5 | 6 | # eclipse 7 | /bin/ 8 | .classpath 9 | .project 10 | .settings 11 | 12 | -------------------------------------------------------------------------------- /flink-simple-tutorial/LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2009-2019 Cloudera Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /flink-simple-tutorial/images/TaskLogs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-simple-tutorial/images/TaskLogs.png -------------------------------------------------------------------------------- /flink-simple-tutorial/images/YarnApp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-simple-tutorial/images/YarnApp.png -------------------------------------------------------------------------------- /flink-simple-tutorial/kafka-appender/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | your_broker_1:9092,your_broker_2:9092,your_broker_3:9092 7 | 8 | 9 | %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 10 | 11 | 12 | 13 | 14 | 15 | %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /flink-simple-tutorial/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | 25 | com.cloudera.flink 26 | flink-tutorials 27 | 1.20.1-csa1.15.0.0 28 | 29 | 30 | flink-simple-tutorial 31 | Flink Tutorials :: Flink Simple Tutorial 32 | 33 | 34 | UTF-8 35 | 1.8 36 | 2.12 37 | ${java.version} 38 | ${java.version} 39 | 40 | 41 | 42 | 43 | 44 | org.apache.flink 45 | flink-clients 46 | provided 47 | 48 | 49 | 50 | org.apache.flink 51 | flink-java 52 | provided 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-streaming-java 58 | provided 59 | 60 | 61 | 62 | org.apache.flink 63 | flink-connector-files 64 | provided 65 | 66 | 67 | 68 | org.apache.logging.log4j 69 | log4j-slf4j-impl 70 | provided 71 | 72 | 73 | 74 | 75 | org.apache.logging.log4j 76 | log4j-api 77 | runtime 78 | 79 | 80 | 81 | org.apache.logging.log4j 82 | log4j-core 83 | runtime 84 | 85 | 86 | 87 | 88 | org.junit.jupiter 89 | junit-jupiter-engine 90 | test 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | org.apache.maven.plugins 100 | maven-shade-plugin 101 | 102 | 103 | 104 | package 105 | 106 | shade 107 | 108 | 109 | 110 | 111 | org.apache.flink:force-shading 112 | com.google.code.findbugs:jsr305 113 | org.slf4j:* 114 | log4j:* 115 | 116 | 117 | 118 | 119 | 121 | *:* 122 | 123 | META-INF/*.SF 124 | META-INF/*.DSA 125 | META-INF/*.RSA 126 | 127 | 128 | 129 | 130 | 131 | com.cloudera.streaming.examples.flink.HeapMonitorPipeline 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/AlertingFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.functions.FlatMapFunction; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.util.Collector; 24 | 25 | import com.cloudera.streaming.examples.flink.types.HeapAlert; 26 | import com.cloudera.streaming.examples.flink.types.HeapMetrics; 27 | 28 | /** 29 | * Example filtering function for finding mask in the heap data ratio. 30 | */ 31 | public class AlertingFunction implements FlatMapFunction { 32 | 33 | public static final String ALERT_MASK = "alertMask"; 34 | private final String alertMask; 35 | 36 | public AlertingFunction(ParameterTool params) { 37 | alertMask = params.get(ALERT_MASK, "42"); 38 | } 39 | 40 | @Override 41 | public void flatMap(HeapMetrics stats, Collector out) { 42 | if (Double.toString(stats.ratio).contains(alertMask)) { 43 | out.collect(HeapAlert.maskRatioMatch(alertMask, stats)); 44 | } 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/HeapMonitorPipeline.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.serialization.SimpleStringEncoder; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.connector.file.sink.FileSink; 24 | import org.apache.flink.core.fs.Path; 25 | import org.apache.flink.streaming.api.datastream.DataStream; 26 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 27 | 28 | import com.cloudera.streaming.examples.flink.types.HeapAlert; 29 | import com.cloudera.streaming.examples.flink.types.HeapMetrics; 30 | 31 | /** 32 | * Simple Flink example program for the tutorial that identifies patterns in the stream of java heap metrics, and 33 | * creates dummy alerts for them if they contain a certain mask. 34 | */ 35 | public class HeapMonitorPipeline { 36 | 37 | public static void main(String[] args) throws Exception { 38 | 39 | // Read the parameters from the commandline 40 | ParameterTool params = ParameterTool.fromArgs(args); 41 | final boolean clusterExec = params.getBoolean("cluster", false); 42 | final String output = params.get("output", "hdfs:///tmp/flink-heap-stats"); 43 | 44 | // Create and configure the execution environment 45 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 46 | env.enableCheckpointing(10_000); 47 | 48 | // Define our source 49 | DataStream heapStats = env.addSource(new HeapMonitorSource(100)) 50 | .name("Heap Monitor Source"); 51 | 52 | // Define the sink for the whole statistics stream 53 | if (!clusterExec) { 54 | // In local execution mode print the stats to stdout 55 | heapStats.print(); 56 | } else { 57 | // In cluster execution mode write the stats to HDFS 58 | final FileSink sfs = FileSink 59 | .forRowFormat(new Path(output), new SimpleStringEncoder("UTF-8")) 60 | .build(); 61 | 62 | heapStats.map(HeapMetrics::toString).sinkTo(sfs).name("HDFS Sink"); 63 | } 64 | 65 | // Detect suspicious events in the statistics stream, defining this as a separate function enables testing 66 | DataStream alertStream = computeHeapAlerts(heapStats, params); 67 | 68 | // Write the output to the log stream, we can direct this to stderr or to Kafka via the log4j configuration 69 | alertStream.addSink(new LogSink<>()).name("Logger Sink"); 70 | 71 | env.execute("HeapMonitor"); 72 | } 73 | 74 | public static DataStream computeHeapAlerts(DataStream statsInput, ParameterTool params) { 75 | return statsInput.flatMap(new AlertingFunction(params)).name("Create Alerts"); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/HeapMonitorSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 22 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 23 | 24 | import com.cloudera.streaming.examples.flink.types.HeapMetrics; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import java.lang.management.ManagementFactory; 29 | import java.lang.management.MemoryPoolMXBean; 30 | import java.lang.management.MemoryType; 31 | import java.lang.management.MemoryUsage; 32 | import java.net.InetAddress; 33 | 34 | /** 35 | * Source for the example, provides a stream of heap usage data. 36 | */ 37 | public class HeapMonitorSource extends RichParallelSourceFunction { 38 | 39 | private static final Logger LOG = LoggerFactory.getLogger(HeapMonitorSource.class); 40 | 41 | private final long sleepMillis; 42 | private volatile boolean running = true; 43 | 44 | public HeapMonitorSource(long sleepMillis) { 45 | this.sleepMillis = sleepMillis; 46 | } 47 | 48 | @Override 49 | public void run(SourceFunction.SourceContext sourceContext) throws Exception { 50 | LOG.info("starting HeapMonitorSource"); 51 | 52 | int subtaskIndex = this.getRuntimeContext().getIndexOfThisSubtask(); 53 | String hostname = InetAddress.getLocalHost().getHostName(); 54 | 55 | while (running) { 56 | Thread.sleep(sleepMillis); 57 | 58 | for (MemoryPoolMXBean mpBean : ManagementFactory.getMemoryPoolMXBeans()) { 59 | if (mpBean.getType() == MemoryType.HEAP) { 60 | MemoryUsage memoryUsage = mpBean.getUsage(); 61 | long used = memoryUsage.getUsed(); 62 | long max = memoryUsage.getMax(); 63 | 64 | synchronized (sourceContext.getCheckpointLock()) { 65 | sourceContext.collect(new HeapMetrics(mpBean.getName(), used, max, (double) used / max, subtaskIndex, hostname)); 66 | } 67 | } 68 | } 69 | } 70 | } 71 | 72 | @Override 73 | public void cancel() { 74 | this.running = false; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/LogSink.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | /** 27 | * Dummy sink that sends every value to the logging framework. 28 | */ 29 | public class LogSink implements SinkFunction { 30 | 31 | private static final Logger LOG = LoggerFactory.getLogger(LogSink.class); 32 | 33 | @Override 34 | public void invoke(T value, Context context) { 35 | if (value != null) { 36 | LOG.info(value.toString()); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/HeapAlert.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Dummy alert for founding a mask in the heap usage ratios. 25 | */ 26 | public class HeapAlert { 27 | 28 | private static final String MASK_RATIO_MATCH_MSG = " was found in the HeapMetrics ratio."; 29 | 30 | public String message; 31 | public HeapMetrics triggeringMetrics; 32 | 33 | public HeapAlert() {} 34 | 35 | public HeapAlert(String message, HeapMetrics triggeringMetrics) { 36 | this.message = message; 37 | this.triggeringMetrics = triggeringMetrics; 38 | } 39 | 40 | public static HeapAlert maskRatioMatch(String alertMask, HeapMetrics heapMetrics){ 41 | return new HeapAlert(alertMask + MASK_RATIO_MATCH_MSG, heapMetrics); 42 | } 43 | 44 | @Override 45 | public String toString() { 46 | return "HeapAlert{" + 47 | "message='" + message + '\'' + 48 | ", triggeringStats=" + triggeringMetrics + 49 | '}'; 50 | } 51 | 52 | @Override 53 | public boolean equals(Object o) { 54 | if (this == o) { 55 | return true; 56 | } 57 | if (o == null || getClass() != o.getClass()) { 58 | return false; 59 | } 60 | 61 | HeapAlert heapAlert = (HeapAlert) o; 62 | return Objects.equals(message, heapAlert.message) && 63 | Objects.equals(triggeringMetrics, heapAlert.triggeringMetrics); 64 | } 65 | 66 | @Override 67 | public int hashCode() { 68 | return Objects.hash(message, triggeringMetrics); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/HeapMetrics.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Represents the stream of heap metrics data used as the source for the example. 25 | */ 26 | public class HeapMetrics { 27 | 28 | public static final String OLD_GEN = "PS Old Gen"; 29 | public static final String EDEN = "PS Eden Space"; 30 | public static final String SURVIVOR = "PS Survivor Space"; 31 | 32 | public String area; 33 | /** 34 | * Bytes used for the old generation of the heap. 35 | */ 36 | public long used; 37 | /** 38 | * Maximum bytes allocated for the old generation of the heap. 39 | */ 40 | public long max; 41 | /** 42 | * Ratio of used out of the maximum old generation heap. 43 | */ 44 | public double ratio; 45 | 46 | /** 47 | * ID of the Flink job. 48 | */ 49 | public Integer jobId; 50 | 51 | /** 52 | * Host the Flink job is running on. 53 | */ 54 | public String hostname; 55 | 56 | public HeapMetrics() { 57 | } 58 | 59 | public HeapMetrics(String area, long used, long max, double ratio, Integer jobId, String hostname) { 60 | this.area = area; 61 | this.used = used; 62 | this.max = max; 63 | this.ratio = ratio; 64 | this.jobId = jobId; 65 | this.hostname = hostname; 66 | } 67 | 68 | @Override 69 | public String toString() { 70 | return "HeapMetrics{" + 71 | "area=" + area + 72 | ", used=" + used + 73 | ", max=" + max + 74 | ", ratio=" + ratio + 75 | ", jobId=" + jobId + 76 | ", hostname='" + hostname + '\'' + 77 | '}'; 78 | } 79 | 80 | @Override 81 | public boolean equals(Object o) { 82 | if (this == o) { 83 | return true; 84 | } 85 | if (o == null || getClass() != o.getClass()) { 86 | return false; 87 | } 88 | 89 | HeapMetrics heapMetrics = (HeapMetrics) o; 90 | return used == heapMetrics.used && 91 | max == heapMetrics.max && 92 | Double.compare(heapMetrics.ratio, ratio) == 0 && 93 | Objects.equals(area, heapMetrics.area) && 94 | Objects.equals(jobId, heapMetrics.jobId) && 95 | Objects.equals(hostname, heapMetrics.hostname); 96 | } 97 | 98 | @Override 99 | public int hashCode() { 100 | return Objects.hash(area, used, max, ratio, jobId, hostname); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/main/resources/log4j2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 7 | 8 | 9 | 10 | 11 | 12 | %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/test/java/com/cloudera/streaming/examples/flink/HeapMonitorPipelineTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 23 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 24 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; 25 | 26 | import com.cloudera.streaming.examples.flink.types.HeapAlert; 27 | import com.cloudera.streaming.examples.flink.types.HeapMetrics; 28 | import org.apache.commons.compress.utils.Sets; 29 | import org.junit.jupiter.api.Test; 30 | 31 | import java.util.HashSet; 32 | import java.util.Set; 33 | 34 | import static org.junit.jupiter.api.Assertions.assertEquals; 35 | 36 | /** 37 | * Simple unit test for the simple example. 38 | */ 39 | class HeapMonitorPipelineTest { 40 | 41 | private static Set testOutput = new HashSet<>(); 42 | 43 | @Test 44 | void testPipeline() throws Exception { 45 | 46 | final String alertMask = "42"; 47 | 48 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 49 | 50 | HeapMetrics alert1 = testStats(0.42); 51 | HeapMetrics regular1 = testStats(0.452); 52 | HeapMetrics regular2 = testStats(0.245); 53 | HeapMetrics alert2 = testStats(0.9423); 54 | 55 | DataStreamSource testInput = env.fromElements(alert1, alert2, regular1, regular2); 56 | HeapMonitorPipeline.computeHeapAlerts(testInput, ParameterTool.fromArgs(new String[]{"--alertMask", alertMask})) 57 | .addSink(new SinkFunction() { 58 | @Override 59 | public void invoke(HeapAlert value, Context context) { 60 | testOutput.add(value); 61 | } 62 | }) 63 | .setParallelism(1); 64 | 65 | env.execute(); 66 | 67 | assertEquals(Sets.newHashSet(HeapAlert.maskRatioMatch(alertMask, alert1), 68 | HeapAlert.maskRatioMatch(alertMask, alert2)), testOutput); 69 | } 70 | 71 | private HeapMetrics testStats(double ratio) { 72 | return new HeapMetrics(HeapMetrics.OLD_GEN, 0, 0, ratio, 0, "testhost"); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /flink-simple-tutorial/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to Cloudera, Inc. under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.test.ref = TestLogger 21 | 22 | appender.testlogger.name = TestLogger 23 | appender.testlogger.type = CONSOLE 24 | appender.testlogger.target = SYSTEM_OUT 25 | appender.testlogger.layout.type = PatternLayout 26 | appender.testlogger.layout.pattern = %d %p %C{1.} [%t] %m%n 27 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dependency-reduced-pom.xml 3 | .DS_Store 4 | 5 | # idea 6 | .idea/ 7 | *.iml 8 | 9 | # eclipse 10 | /bin/ 11 | .classpath 12 | .project 13 | .settings 14 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/config/job.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to Cloudera, Inc. under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | kafka.bootstrap.servers=:9092,:9092,:9092 20 | kafka.group.id=flink 21 | kafka.partition.discovery.interval.ms=60000 22 | kafka.commit.offsets.on.checkpoint=true 23 | kafka.retries=3 24 | transaction.input.topic=transaction.log.1 25 | generate.queries=false 26 | query.input.topic=query.input.log.1 27 | query.output.topic=query.output.log.1 28 | num.items=1000000 29 | sleep=100 30 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/images/InventoryLogic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-stateful-tutorial/images/InventoryLogic.png -------------------------------------------------------------------------------- /flink-stateful-tutorial/images/ItemManager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-stateful-tutorial/images/ItemManager.png -------------------------------------------------------------------------------- /flink-stateful-tutorial/images/bp_ok.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-stateful-tutorial/images/bp_ok.png -------------------------------------------------------------------------------- /flink-stateful-tutorial/images/cp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudera/flink-tutorials/04560e95090d21686d0485331d64adce63abab87/flink-stateful-tutorial/images/cp.png -------------------------------------------------------------------------------- /flink-stateful-tutorial/pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | 25 | com.cloudera.flink 26 | flink-tutorials 27 | 1.20.1-csa1.15.0.0 28 | 29 | 30 | flink-stateful-tutorial 31 | Flink Tutorials :: Flink Stateful Tutorial 32 | 33 | 34 | UTF-8 35 | 1.8 36 | 2.12 37 | ${java.version} 38 | ${java.version} 39 | 40 | 41 | 42 | 43 | 44 | org.apache.flink 45 | flink-clients 46 | provided 47 | 48 | 49 | 50 | org.apache.flink 51 | flink-connector-files 52 | provided 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-java 58 | provided 59 | 60 | 61 | 62 | org.apache.flink 63 | flink-statebackend-rocksdb 64 | provided 65 | 66 | 67 | 68 | org.apache.flink 69 | flink-streaming-java 70 | provided 71 | 72 | 73 | 74 | org.apache.logging.log4j 75 | log4j-slf4j-impl 76 | provided 77 | 78 | 79 | 80 | 81 | org.apache.flink 82 | flink-connector-kafka 83 | 84 | 85 | 86 | com.fasterxml.jackson.core 87 | jackson-databind 88 | 89 | 90 | 91 | io.dropwizard.metrics 92 | metrics-core 93 | 94 | 95 | 96 | 97 | org.apache.logging.log4j 98 | log4j-api 99 | runtime 100 | 101 | 102 | 103 | org.apache.logging.log4j 104 | log4j-core 105 | runtime 106 | 107 | 108 | 109 | mysql 110 | mysql-connector-java 111 | runtime 112 | 113 | 114 | 115 | 116 | org.apache.flink 117 | flink-test-utils 118 | test 119 | 120 | 121 | 122 | org.junit.jupiter 123 | junit-jupiter-engine 124 | test 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | org.apache.maven.plugins 134 | maven-shade-plugin 135 | 136 | 137 | 138 | package 139 | 140 | shade 141 | 142 | 143 | 144 | 145 | org.apache.flink:force-shading 146 | com.google.code.findbugs:jsr305 147 | org.slf4j:* 148 | log4j:* 149 | 150 | 151 | 152 | 153 | 155 | *:* 156 | 157 | META-INF/*.SF 158 | META-INF/*.DSA 159 | META-INF/*.RSA 160 | 161 | 162 | 163 | 164 | 165 | com.cloudera.streaming.examples.flink.KafkaItemTransactionJob 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/ItemTransactionJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 22 | import org.apache.flink.api.common.typeinfo.TypeInformation; 23 | import org.apache.flink.api.java.utils.ParameterTool; 24 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 25 | import org.apache.flink.streaming.api.datastream.DataStream; 26 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 27 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 28 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 29 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 30 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner; 31 | import org.apache.flink.streaming.api.windowing.time.Time; 32 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 33 | import org.apache.flink.util.OutputTag; 34 | 35 | import com.cloudera.streaming.examples.flink.operators.ItemInfoEnrichment; 36 | import com.cloudera.streaming.examples.flink.operators.MaxWatermarkGeneratorSupplier; 37 | import com.cloudera.streaming.examples.flink.operators.SummaryAlertingCondition; 38 | import com.cloudera.streaming.examples.flink.operators.TransactionProcessor; 39 | import com.cloudera.streaming.examples.flink.operators.TransactionSummaryAggregator; 40 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 41 | import com.cloudera.streaming.examples.flink.types.Query; 42 | import com.cloudera.streaming.examples.flink.types.QueryResult; 43 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 44 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 45 | 46 | import java.util.concurrent.TimeUnit; 47 | 48 | /** 49 | * Base class for out item transaction and query processor pipeline. The core processing functionality is encapsulated here while 50 | * subclasses have to implement input and output methods. Check the {@link KafkaItemTransactionJob} for a Kafka input/output based 51 | * implementation of the pipeline. 52 | */ 53 | public abstract class ItemTransactionJob { 54 | 55 | public static final String EVENT_TIME_KEY = "event.time"; 56 | public static final String ENABLE_SUMMARIES_KEY = "enable.summaries"; 57 | public static final String TIME_INTERVALS_IN_MINUTES = "time.intervals.in.minutes"; 58 | 59 | public static final String ENABLE_DB_ENRICHMENT = "enable.db.enrichment"; 60 | public static final String DB_CONN_STRING = "db.connection.string"; 61 | public static final String ASYNC_TP_SIZE = "async.threadpool.size"; 62 | 63 | public static final OutputTag QUERY_RESULT = new OutputTag<>("query-result", TypeInformation.of(QueryResult.class)); 64 | 65 | public final StreamExecutionEnvironment createApplicationPipeline(ParameterTool params) { 66 | 67 | // Create the StreamExecutionEnvironment 68 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 69 | 70 | // Read transaction stream 71 | DataStream transactionStream = readTransactionStream(params, env); 72 | 73 | // We read the query stream and exclude it from watermark tracking by assigning Watermark.MAX_WATERMARK 74 | DataStream queryStream = readQueryStream(params, env) 75 | .assignTimestampsAndWatermarks(WatermarkStrategy.forGenerator(new MaxWatermarkGeneratorSupplier<>())) 76 | .name("MaxWatermark") 77 | .uid("max-watermark"); 78 | 79 | // Connect transactions with queries using the same itemId key and apply our transaction processor 80 | // The main output is the transaction result, query results are accessed as a side output. 81 | SingleOutputStreamOperator processedTransactions = transactionStream.keyBy(t -> t.itemId) 82 | .connect(queryStream.keyBy(q -> q.itemId)) 83 | .process(new TransactionProcessor()) 84 | .name("Transaction Processor") 85 | .uid("transaction-processor"); 86 | 87 | // Query results are accessed as a side output of the transaction processor 88 | DataStream queryResultStream = processedTransactions.getSideOutput(QUERY_RESULT); 89 | 90 | // If needed we enrich each query result by implementing an asynchronous enrichment operator (ItemInfoEnrichment) 91 | if (params.getBoolean(ENABLE_DB_ENRICHMENT, false)) { 92 | queryResultStream = AsyncDataStream.unorderedWait( 93 | queryResultStream, 94 | new ItemInfoEnrichment(params.getInt(ASYNC_TP_SIZE, 5), params.getRequired(DB_CONN_STRING)), 95 | 10, TimeUnit.SECONDS) 96 | .name("Query Result Enrichment") 97 | .uid("query-result-enrichment"); 98 | } 99 | 100 | // Handle the output of transaction and query results separately 101 | writeTransactionResults(params, processedTransactions); 102 | writeQueryOutput(params, queryResultStream); 103 | 104 | // If needed we create a window computation of the transaction summaries by item and time window 105 | if (params.getBoolean(ENABLE_SUMMARIES_KEY, false)) { 106 | DataStream transactionSummaryStream = processedTransactions 107 | .keyBy(res -> res.transaction.itemId) 108 | .window(createTimeWindow(params)) 109 | .aggregate(new TransactionSummaryAggregator()) 110 | .name("Create Transaction Summary") 111 | .uid("create-transaction-summary") 112 | .filter(new SummaryAlertingCondition(params)) 113 | .name("Filter High failure rate") 114 | .uid("filter-high-failure-rate"); 115 | 116 | writeTransactionSummaries(params, transactionSummaryStream); 117 | } 118 | 119 | return env; 120 | } 121 | 122 | private WindowAssigner createTimeWindow(ParameterTool params) { 123 | if (params.getBoolean(EVENT_TIME_KEY, false)) { 124 | return TumblingEventTimeWindows.of(Time.minutes(params.getInt(TIME_INTERVALS_IN_MINUTES, 10))); 125 | } 126 | return TumblingProcessingTimeWindows.of(Time.minutes(params.getInt(TIME_INTERVALS_IN_MINUTES, 10))); 127 | } 128 | 129 | protected abstract DataStream readQueryStream(ParameterTool params, StreamExecutionEnvironment env); 130 | 131 | protected abstract DataStream readTransactionStream(ParameterTool params, StreamExecutionEnvironment env); 132 | 133 | protected abstract void writeQueryOutput(ParameterTool params, DataStream queryResultStream); 134 | 135 | protected abstract void writeTransactionResults(ParameterTool params, DataStream transactionResults); 136 | 137 | protected abstract void writeTransactionSummaries(ParameterTool params, DataStream transactionSummaryStream); 138 | 139 | } 140 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/KafkaDataGeneratorJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.connector.base.DeliveryGuarantee; 23 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 24 | import org.apache.flink.connector.kafka.sink.KafkaSink; 25 | import org.apache.flink.streaming.api.datastream.DataStream; 26 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 27 | 28 | import com.cloudera.streaming.examples.flink.operators.ItemTransactionGeneratorSource; 29 | import com.cloudera.streaming.examples.flink.operators.QueryGeneratorSource; 30 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 31 | import com.cloudera.streaming.examples.flink.types.Query; 32 | import com.cloudera.streaming.examples.flink.types.QuerySchema; 33 | import com.cloudera.streaming.examples.flink.types.TransactionSchema; 34 | import com.cloudera.streaming.examples.flink.utils.Utils; 35 | 36 | /** 37 | * Simple Flink job that generates {@link ItemTransaction} data to Kafka. 38 | */ 39 | public class KafkaDataGeneratorJob { 40 | 41 | private static final String GENERATE_QUERIES = "generate.queries"; 42 | public static final String KAFKA_BOOTSTRAP_SERVERS = "kafka.bootstrap.servers"; 43 | 44 | public static void main(String[] args) throws Exception { 45 | if (args.length != 1) { 46 | throw new RuntimeException("Path to the properties file is expected as the only argument."); 47 | } 48 | ParameterTool params = ParameterTool.fromPropertiesFile(args[0]); 49 | 50 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 51 | DataStream generatedInput = 52 | env.addSource(new ItemTransactionGeneratorSource(params)) 53 | .name("Item Transaction Generator") 54 | .uid("item-transaction-generator"); 55 | 56 | String transactionTopic = params.getRequired(KafkaItemTransactionJob.TRANSACTION_INPUT_TOPIC_KEY); 57 | KafkaSink kafkaSink = KafkaSink.builder() 58 | .setBootstrapServers(params.get(KAFKA_BOOTSTRAP_SERVERS)) 59 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 60 | .setTopic(transactionTopic) 61 | .setValueSerializationSchema(new TransactionSchema(transactionTopic)) 62 | .build()) 63 | .setKafkaProducerConfig(Utils.readKafkaProperties(params)) 64 | .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 65 | .build(); 66 | 67 | generatedInput.keyBy(t -> t.itemId) 68 | .sinkTo(kafkaSink) 69 | .name("Transaction Kafka Sink") 70 | .uid("transaction-kafka-sink"); 71 | 72 | if (params.getBoolean(GENERATE_QUERIES, false)) { 73 | DataStream queries = env.addSource(new QueryGeneratorSource(params)) 74 | .name("Query Generator") 75 | .uid("query-generator"); 76 | 77 | String queryTopic = params.getRequired(KafkaItemTransactionJob.QUERY_INPUT_TOPIC_KEY); 78 | KafkaSink querySink = KafkaSink.builder() 79 | .setBootstrapServers(params.get(KAFKA_BOOTSTRAP_SERVERS)) 80 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 81 | .setTopic(queryTopic) 82 | .setValueSerializationSchema(new QuerySchema(queryTopic)) 83 | .build()) 84 | .setKafkaProducerConfig(Utils.readKafkaProperties(params)) 85 | .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 86 | .build(); 87 | 88 | queries.keyBy(q -> q.itemId) 89 | .sinkTo(querySink) 90 | .name("Query Kafka Sink") 91 | .uid("query-kafka-sink"); 92 | } 93 | 94 | env.execute("Kafka Data generator"); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/KafkaItemTransactionJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.connector.base.DeliveryGuarantee; 24 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 25 | import org.apache.flink.connector.kafka.sink.KafkaSink; 26 | import org.apache.flink.connector.kafka.source.KafkaSource; 27 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; 28 | import org.apache.flink.streaming.api.datastream.DataStream; 29 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 30 | 31 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 32 | import com.cloudera.streaming.examples.flink.types.Query; 33 | import com.cloudera.streaming.examples.flink.types.QueryResult; 34 | import com.cloudera.streaming.examples.flink.types.QueryResultSchema; 35 | import com.cloudera.streaming.examples.flink.types.QuerySchema; 36 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 37 | import com.cloudera.streaming.examples.flink.types.TransactionSchema; 38 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 39 | import com.cloudera.streaming.examples.flink.utils.Utils; 40 | 41 | import java.time.Duration; 42 | 43 | /** 44 | * {@link ItemTransactionJob} implementation that reads and writes data using Kafka. 45 | */ 46 | public class KafkaItemTransactionJob extends ItemTransactionJob { 47 | 48 | public static final String TRANSACTION_INPUT_TOPIC_KEY = "transaction.input.topic"; 49 | public static final String QUERY_INPUT_TOPIC_KEY = "query.input.topic"; 50 | public static final String QUERY_OUTPUT_TOPIC_KEY = "query.output.topic"; 51 | public static final String KAFKA_BOOTSTRAP_SERVERS = "kafka.bootstrap.servers"; 52 | 53 | public static void main(String[] args) throws Exception { 54 | if (args.length != 1) { 55 | throw new RuntimeException("Path to the properties file is expected as the only argument."); 56 | } 57 | ParameterTool params = ParameterTool.fromPropertiesFile(args[0]); 58 | new KafkaItemTransactionJob() 59 | .createApplicationPipeline(params) 60 | .execute("Kafka Transaction Processor Job"); 61 | } 62 | 63 | public DataStream readQueryStream(ParameterTool params, StreamExecutionEnvironment env) { 64 | // We read queries in a simple String format and parse it to our Query object 65 | String topic = params.getRequired(QUERY_INPUT_TOPIC_KEY); 66 | KafkaSource rawQuerySource = KafkaSource.builder() 67 | .setBootstrapServers(params.get(KAFKA_BOOTSTRAP_SERVERS)) 68 | .setTopics(topic) 69 | .setValueOnlyDeserializer(new QuerySchema(topic)) 70 | // The first time the job is started we start from the end of the queue, ignoring earlier queries 71 | .setStartingOffsets(OffsetsInitializer.latest()) 72 | .setProperties(Utils.readKafkaProperties(params)) 73 | .build(); 74 | 75 | return env.fromSource(rawQuerySource, WatermarkStrategy.noWatermarks(), "Kafka Query Source") 76 | .uid("kafka-query-source"); 77 | } 78 | 79 | public DataStream readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) { 80 | // We read the ItemTransaction objects directly using the schema 81 | String topic = params.getRequired(TRANSACTION_INPUT_TOPIC_KEY); 82 | KafkaSource transactionSource = KafkaSource.builder() 83 | .setBootstrapServers(params.get(KAFKA_BOOTSTRAP_SERVERS)) 84 | .setTopics(topic) 85 | .setValueOnlyDeserializer(new TransactionSchema(topic)) 86 | .setStartingOffsets(OffsetsInitializer.earliest()) 87 | .setProperties(Utils.readKafkaProperties(params)) 88 | .build(); 89 | 90 | // In case event time processing is enabled we assign trailing watermarks for each partition 91 | return env.fromSource(transactionSource, WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofMinutes(1)), "Kafka Transaction Source") 92 | .uid("kafka-transaction-source"); 93 | } 94 | 95 | public void writeQueryOutput(ParameterTool params, DataStream queryResultStream) { 96 | // Query output is written back to kafka in a tab delimited format for readability 97 | String topic = params.getRequired(QUERY_OUTPUT_TOPIC_KEY); 98 | KafkaSink queryOutputSink = KafkaSink.builder() 99 | .setBootstrapServers(params.get(KAFKA_BOOTSTRAP_SERVERS)) 100 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 101 | .setTopic(topic) 102 | .setValueSerializationSchema(new QueryResultSchema(topic)) 103 | .build()) 104 | .setKafkaProducerConfig(Utils.readKafkaProperties(params)) 105 | .setDeliveryGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 106 | .build(); 107 | 108 | queryResultStream.sinkTo(queryOutputSink) 109 | .name("Kafka Query Result Sink") 110 | .uid("kafka-query-result-sink"); 111 | } 112 | 113 | @Override 114 | protected void writeTransactionResults(ParameterTool params, DataStream transactionResults) { 115 | // Ignore for now 116 | } 117 | 118 | @Override 119 | protected void writeTransactionSummaries(ParameterTool params, DataStream transactionSummaryStream) { 120 | // Ignore for now 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/ItemInfoEnrichment.java: -------------------------------------------------------------------------------- 1 | package com.cloudera.streaming.examples.flink.operators; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.streaming.api.functions.async.ResultFuture; 5 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; 6 | 7 | import com.cloudera.streaming.examples.flink.types.QueryResult; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.sql.Connection; 12 | import java.sql.DriverManager; 13 | import java.sql.PreparedStatement; 14 | import java.sql.ResultSet; 15 | import java.sql.SQLException; 16 | import java.util.Collections; 17 | import java.util.concurrent.ExecutorService; 18 | import java.util.concurrent.Executors; 19 | 20 | /** 21 | * Enriches the item info by fetching the item name by item id. 22 | */ 23 | public class ItemInfoEnrichment extends RichAsyncFunction { 24 | 25 | private static final Logger LOG = LoggerFactory.getLogger(ItemInfoEnrichment.class); 26 | 27 | private static final String ITEM_QUERY = "SELECT name FROM items WHERE itemId = ?;"; 28 | 29 | private final int threadPoolSize; 30 | private final String dbConnectionString; 31 | 32 | private transient Connection dbConnection; 33 | private transient ExecutorService executor; 34 | private transient PreparedStatement itemQuery; 35 | 36 | public ItemInfoEnrichment(int threadPoolSize, String dbConnectionString) { 37 | this.threadPoolSize = threadPoolSize; 38 | this.dbConnectionString = dbConnectionString; 39 | } 40 | 41 | @Override 42 | public void asyncInvoke(QueryResult queryResult, ResultFuture resultFuture) { 43 | executor.submit(() -> { 44 | try { 45 | itemQuery.setString(1, queryResult.itemInfo.itemId); 46 | ResultSet rs = itemQuery.executeQuery(); 47 | if (rs.next()) { 48 | queryResult.itemInfo.setItemName(rs.getString("name")); 49 | } 50 | 51 | resultFuture.complete(Collections.singletonList(queryResult)); 52 | } catch (SQLException t) { 53 | resultFuture.completeExceptionally(t); 54 | } 55 | }); 56 | } 57 | 58 | @Override 59 | public void open(Configuration configuration) throws Exception { 60 | executor = Executors.newFixedThreadPool(threadPoolSize); 61 | Class.forName("com.mysql.jdbc.Driver"); 62 | dbConnection = DriverManager.getConnection(dbConnectionString); 63 | itemQuery = dbConnection.prepareStatement(ITEM_QUERY); 64 | } 65 | 66 | @Override 67 | public void close() { 68 | try { 69 | executor.shutdownNow(); 70 | } catch (Throwable t) { 71 | LOG.error("Error while shutting down executor service.", t); 72 | } 73 | 74 | try { 75 | itemQuery.close(); 76 | } catch (Throwable t) { 77 | LOG.error("Error while closing connection.", t); 78 | } 79 | 80 | try { 81 | dbConnection.close(); 82 | } catch (Throwable t) { 83 | LOG.error("Error while closing connection.", t); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/ItemTransactionGeneratorSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 23 | 24 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 25 | import org.apache.commons.math3.distribution.ParetoDistribution; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.util.concurrent.ThreadLocalRandom; 30 | 31 | /** 32 | * A simple data generator that generates {@link ItemTransaction} data with a skewed itemId distribution 33 | * to better simulate real world access patterns with hot items. 34 | */ 35 | public class ItemTransactionGeneratorSource implements ParallelSourceFunction { 36 | 37 | public static final String NUM_ITEMS_KEY = "num.items"; 38 | public static final String SLEEP_KEY = "sleep"; 39 | public static final String PARETO_SHAPE_KEY = "pareto.shape"; 40 | public static final int DEFAULT_NUM_ITEMS = 1_000; 41 | public static final int DEFAULT_SHAPE = 15; 42 | private static final Logger LOG = LoggerFactory.getLogger(ItemTransactionGeneratorSource.class); 43 | private final int numItems; 44 | private final long sleep; 45 | private final int shape; 46 | private volatile boolean isRunning = true; 47 | 48 | public ItemTransactionGeneratorSource(ParameterTool params) { 49 | this.numItems = params.getInt(NUM_ITEMS_KEY, DEFAULT_NUM_ITEMS); 50 | this.sleep = params.getLong(SLEEP_KEY, 0); 51 | this.shape = params.getInt(PARETO_SHAPE_KEY, DEFAULT_SHAPE); 52 | } 53 | 54 | @Override 55 | public void run(SourceContext ctx) throws Exception { 56 | ThreadLocalRandom rnd = ThreadLocalRandom.current(); 57 | ParetoDistribution paretoDistribution = new ParetoDistribution(numItems, shape); 58 | 59 | LOG.info("Starting transaction generator for {} items and {} sleep", numItems, sleep); 60 | 61 | while (isRunning) { 62 | long nextItemId; 63 | do { 64 | nextItemId = sample(paretoDistribution); 65 | } while (nextItemId > numItems); 66 | String itemId = "item_" + nextItemId; 67 | 68 | int quantity = (int) (Math.round(rnd.nextGaussian() / 2 * 10) * 10) + 5; 69 | if (quantity == 0) { 70 | continue; 71 | } 72 | long transactionId = rnd.nextLong(Long.MAX_VALUE); 73 | synchronized (ctx.getCheckpointLock()) { 74 | ctx.collect(new ItemTransaction(transactionId, System.currentTimeMillis(), itemId, quantity)); 75 | } 76 | if (sleep > 0) { 77 | Thread.sleep(sleep); 78 | } 79 | } 80 | 81 | } 82 | 83 | private long sample(ParetoDistribution paretoDistribution) { 84 | return (Math.round(paretoDistribution.sample() - paretoDistribution.getScale()) + 1); 85 | } 86 | 87 | @Override 88 | public void cancel() { 89 | isRunning = false; 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/MaxWatermarkGeneratorSupplier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.common.eventtime.Watermark; 22 | import org.apache.flink.api.common.eventtime.WatermarkGenerator; 23 | import org.apache.flink.api.common.eventtime.WatermarkGeneratorSupplier; 24 | import org.apache.flink.api.common.eventtime.WatermarkOutput; 25 | 26 | /** 27 | * Watermark implementation that emits {@link Watermark#MAX_WATERMARK}, basically removing 28 | * this stream from watermark computation. 29 | * 30 | *

Should only be used on streams that won't be aggregated to the window. 31 | */ 32 | public class MaxWatermarkGeneratorSupplier implements WatermarkGeneratorSupplier { 33 | 34 | @Override 35 | public WatermarkGenerator createWatermarkGenerator(Context context) { 36 | return new WatermarkGenerator() { 37 | @Override 38 | public void onEvent(T t, long l, WatermarkOutput watermarkOutput) { 39 | watermarkOutput.emitWatermark(Watermark.MAX_WATERMARK); 40 | } 41 | 42 | @Override 43 | public void onPeriodicEmit(WatermarkOutput watermarkOutput) { 44 | watermarkOutput.emitWatermark(Watermark.MAX_WATERMARK); 45 | } 46 | }; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/QueryGeneratorSource.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 23 | 24 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 25 | import com.cloudera.streaming.examples.flink.types.Query; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import java.util.concurrent.ThreadLocalRandom; 30 | 31 | /** 32 | * A simple data generator that generates {@link ItemTransaction} data with a skewed itemId distribution 33 | * to better simulate real world access patterns with hot items. 34 | */ 35 | public class QueryGeneratorSource implements ParallelSourceFunction { 36 | 37 | public static final String NUM_ITEMS_KEY = "num.items"; 38 | public static final String SLEEP_KEY = "sleep"; 39 | public static final int DEFAULT_NUM_ITEMS = 1_000; 40 | private static final Logger LOG = LoggerFactory.getLogger(QueryGeneratorSource.class); 41 | private final int numItems; 42 | private final long sleep; 43 | private volatile boolean isRunning = true; 44 | 45 | public QueryGeneratorSource(ParameterTool params) { 46 | this.numItems = params.getInt(NUM_ITEMS_KEY, DEFAULT_NUM_ITEMS); 47 | this.sleep = Math.min(1000, params.getLong(SLEEP_KEY, 1) * 100); 48 | } 49 | 50 | @Override 51 | public void run(SourceContext ctx) throws Exception { 52 | ThreadLocalRandom rnd = ThreadLocalRandom.current(); 53 | 54 | LOG.info("Starting query generator for {} items and {} sleep", numItems, sleep); 55 | 56 | while (isRunning) { 57 | String itemId = "item_" + (rnd.nextInt(numItems) + 1); 58 | synchronized (ctx.getCheckpointLock()) { 59 | ctx.collect(new Query(rnd.nextLong(Long.MAX_VALUE), System.currentTimeMillis(), itemId)); 60 | } 61 | if (sleep > 0) { 62 | Thread.sleep(sleep); 63 | } 64 | } 65 | 66 | } 67 | 68 | @Override 69 | public void cancel() { 70 | isRunning = false; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/SummaryAlertingCondition.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.common.functions.FilterFunction; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | 24 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 25 | 26 | /** 27 | * Raises an alert if the ratio of failed transactions is over a ratio, in a large enough sample. 28 | */ 29 | public class SummaryAlertingCondition implements FilterFunction { 30 | 31 | public static final String REPORTING_NUMBER_KEY = "transaction.num.min"; 32 | public static final String REPORTING_FAILURE_RATE_KEY = "transaction.failure.rate.min"; 33 | 34 | private final int minNum; 35 | private final double minFailureRate; 36 | 37 | public SummaryAlertingCondition(ParameterTool params) { 38 | minNum = params.getInt(REPORTING_NUMBER_KEY, 100); 39 | minFailureRate = params.getDouble(REPORTING_FAILURE_RATE_KEY, 0.5); 40 | } 41 | 42 | @Override 43 | public boolean filter(TransactionSummary transactionSummary) { 44 | int total = transactionSummary.numSuccessfulTransactions + transactionSummary.numFailedTransactions; 45 | return total > minNum && (((double) transactionSummary.numFailedTransactions) / total) > minFailureRate; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/TransactionProcessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.common.state.ValueState; 22 | import org.apache.flink.api.common.state.ValueStateDescriptor; 23 | import org.apache.flink.configuration.Configuration; 24 | import org.apache.flink.metrics.Histogram; 25 | import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction; 26 | import org.apache.flink.util.Collector; 27 | 28 | import com.cloudera.streaming.examples.flink.KafkaItemTransactionJob; 29 | import com.cloudera.streaming.examples.flink.types.ItemInfo; 30 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 31 | import com.cloudera.streaming.examples.flink.types.Query; 32 | import com.cloudera.streaming.examples.flink.types.QueryResult; 33 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 34 | import com.cloudera.streaming.examples.flink.utils.ExponentialHistogram; 35 | 36 | /** 37 | * Core transaction and query processing logic. {@link #processElement1(ItemTransaction, Context, Collector)} receives 38 | * transactions and executes them if there is sufficient quantity already stored in the state. 39 | * 40 | *

{@link #processElement2(Query, Context, Collector)} receives item queries that just returns the current info for the 41 | * queried item. 42 | * 43 | *

Both processing functions are keyed by the itemId field. 44 | * 45 | *

In addition to the core logic we added custom histogram metrics to track state access time for future optimizations. 46 | */ 47 | public class TransactionProcessor extends KeyedCoProcessFunction { 48 | 49 | private transient ValueState itemState; 50 | private transient Histogram itemRead; 51 | private transient Histogram itemWrite; 52 | 53 | @Override 54 | public void processElement1(ItemTransaction transaction, Context ctx, Collector out) throws Exception { 55 | long startTime = System.nanoTime(); 56 | ItemInfo info = itemState.value(); 57 | itemRead.update(System.nanoTime() - startTime); 58 | 59 | if (info == null) { 60 | info = new ItemInfo(transaction.itemId, 0); 61 | } 62 | int newQuantity = info.quantity + transaction.quantity; 63 | 64 | boolean success = newQuantity >= 0; 65 | if (success) { 66 | info.quantity = newQuantity; 67 | startTime = System.nanoTime(); 68 | itemState.update(info); 69 | itemWrite.update(System.nanoTime() - startTime); 70 | } 71 | out.collect(new TransactionResult(transaction, success)); 72 | } 73 | 74 | @Override 75 | public void processElement2(Query query, Context ctx, Collector out) throws Exception { 76 | ItemInfo info = itemState.value(); 77 | ctx.output(KafkaItemTransactionJob.QUERY_RESULT, new QueryResult(query.queryId, info != null ? info : new ItemInfo(query.itemId, 0))); 78 | } 79 | 80 | @Override 81 | public void open(Configuration parameters) { 82 | // We create state read/write time metrics for later performance tuning 83 | itemRead = getRuntimeContext().getMetricGroup().histogram("ItemRead", new ExponentialHistogram()); 84 | itemWrite = getRuntimeContext().getMetricGroup().histogram("ItemWrite", new ExponentialHistogram()); 85 | 86 | itemState = getRuntimeContext().getState(new ValueStateDescriptor<>("itemInfo", ItemInfo.class)); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/operators/TransactionSummaryAggregator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.operators; 20 | 21 | import org.apache.flink.api.common.functions.AggregateFunction; 22 | 23 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 24 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 25 | 26 | /** 27 | * Aggregate function for summarizing the transaction successfulness. 28 | */ 29 | public class TransactionSummaryAggregator implements AggregateFunction { 30 | 31 | @Override 32 | public TransactionSummary createAccumulator() { 33 | return new TransactionSummary(); 34 | } 35 | 36 | @Override 37 | public TransactionSummary add(TransactionResult tr, TransactionSummary acc) { 38 | acc.itemId = tr.transaction.itemId; 39 | if (tr.success) { 40 | acc.numSuccessfulTransactions++; 41 | } else { 42 | acc.numFailedTransactions++; 43 | } 44 | acc.totalVolume += Math.abs(tr.transaction.quantity); 45 | return acc; 46 | } 47 | 48 | @Override 49 | public TransactionSummary getResult(TransactionSummary acc) { 50 | return acc; 51 | } 52 | 53 | @Override 54 | public TransactionSummary merge(TransactionSummary ts1, TransactionSummary ts2) { 55 | return new TransactionSummary(ts2.itemId != null ? ts2.itemId : ts1.itemId, 56 | ts1.numFailedTransactions + ts2.numFailedTransactions, 57 | ts1.numSuccessfulTransactions + ts2.numSuccessfulTransactions, 58 | ts1.totalVolume + ts2.totalVolume); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/ItemInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Item stock information. 25 | */ 26 | public class ItemInfo { 27 | 28 | public String itemId; 29 | 30 | public int quantity; 31 | 32 | public String itemName = "UNKNOWN"; 33 | 34 | public ItemInfo() { 35 | } 36 | 37 | public ItemInfo(String itemId, int quantity) { 38 | this.itemId = itemId; 39 | this.quantity = quantity; 40 | } 41 | 42 | public ItemInfo(String itemId, int quantity, String itemName) { 43 | this.itemId = itemId; 44 | this.quantity = quantity; 45 | this.itemName = itemName; 46 | } 47 | 48 | public void setItemName(String name) { 49 | this.itemName = name; 50 | } 51 | 52 | @Override 53 | public boolean equals(Object o) { 54 | if (this == o) { 55 | return true; 56 | } 57 | if (o == null || getClass() != o.getClass()) { 58 | return false; 59 | } 60 | 61 | ItemInfo itemInfo = (ItemInfo) o; 62 | return quantity == itemInfo.quantity && 63 | Objects.equals(itemId, itemInfo.itemId) && 64 | Objects.equals(itemName, itemInfo.itemName); 65 | } 66 | 67 | @Override 68 | public int hashCode() { 69 | return Objects.hash(itemId, quantity, itemName); 70 | } 71 | 72 | @Override 73 | public String toString() { 74 | return "ItemInfo{" + 75 | "itemId='" + itemId + '\'' + 76 | ", quantity=" + quantity + 77 | ", itemName='" + itemName + '\'' + 78 | '}'; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/ItemTransaction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Transaction request for an item. 25 | */ 26 | public class ItemTransaction { 27 | 28 | public long transactionId; 29 | 30 | public long ts; 31 | 32 | public String itemId; 33 | 34 | public int quantity; 35 | 36 | public ItemTransaction() {} 37 | 38 | public ItemTransaction(long transactionId, long ts, String itemId, int quantity) { 39 | this.transactionId = transactionId; 40 | this.ts = ts; 41 | this.itemId = itemId; 42 | this.quantity = quantity; 43 | } 44 | 45 | @Override 46 | public boolean equals(Object o) { 47 | if (this == o) { 48 | return true; 49 | } 50 | if (o == null || getClass() != o.getClass()) { 51 | return false; 52 | } 53 | 54 | ItemTransaction that = (ItemTransaction) o; 55 | return transactionId == that.transactionId && 56 | ts == that.ts && 57 | quantity == that.quantity && 58 | Objects.equals(itemId, that.itemId); 59 | } 60 | 61 | @Override 62 | public int hashCode() { 63 | return Objects.hash(transactionId, ts, itemId, quantity); 64 | } 65 | 66 | @Override 67 | public String toString() { 68 | return "ItemTransaction{" + 69 | "transactionId=" + transactionId + 70 | ", ts=" + ts + 71 | ", itemId='" + itemId + '\'' + 72 | ", quantity=" + quantity + 73 | '}'; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/JsonKafkaSerializationSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import org.apache.flink.api.common.serialization.DeserializationSchema; 22 | import org.apache.flink.api.common.serialization.SerializationSchema; 23 | 24 | import com.fasterxml.jackson.core.JsonProcessingException; 25 | import com.fasterxml.jackson.databind.ObjectMapper; 26 | 27 | /** 28 | * Common serialization logic for JSON schemas. 29 | */ 30 | public abstract class JsonKafkaSerializationSchema implements SerializationSchema, DeserializationSchema { 31 | 32 | protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); 33 | 34 | protected final String topic; 35 | 36 | protected JsonKafkaSerializationSchema(String topic) { 37 | this.topic = topic; 38 | } 39 | 40 | @Override 41 | public boolean isEndOfStream(T nextElement) { 42 | return false; 43 | } 44 | 45 | @Override 46 | public byte[] serialize(T obj) { 47 | try { 48 | return OBJECT_MAPPER.writeValueAsBytes(obj); 49 | } catch (JsonProcessingException e) { 50 | throw new RuntimeException(e); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/Query.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Query for the actually available quantity of an item. 25 | */ 26 | public class Query { 27 | 28 | public long queryId; 29 | 30 | public long ts = System.currentTimeMillis(); 31 | 32 | public String itemId; 33 | 34 | public Query() {} 35 | 36 | public Query(long queryId, String itemId) { 37 | this(queryId, null, itemId); 38 | } 39 | 40 | public Query(long queryId, Long ts, String itemId) { 41 | this.queryId = queryId; 42 | this.itemId = itemId; 43 | if (ts != null) { 44 | this.ts = ts; 45 | } 46 | } 47 | 48 | @Override 49 | public boolean equals(Object o) { 50 | if (this == o) { 51 | return true; 52 | } 53 | if (o == null || getClass() != o.getClass()) { 54 | return false; 55 | } 56 | 57 | Query query = (Query) o; 58 | return queryId == query.queryId && 59 | ts == query.ts && 60 | Objects.equals(itemId, query.itemId); 61 | } 62 | 63 | @Override 64 | public int hashCode() { 65 | return Objects.hash(queryId, ts, itemId); 66 | } 67 | 68 | @Override 69 | public String toString() { 70 | return "Query{" + 71 | "queryId=" + queryId + 72 | ", ts=" + ts + 73 | ", itemId='" + itemId + '\'' + 74 | '}'; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/QueryResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Results for an item quantity query. 25 | */ 26 | public class QueryResult { 27 | 28 | public long queryId; 29 | 30 | public ItemInfo itemInfo; 31 | 32 | public QueryResult() { 33 | } 34 | 35 | public QueryResult(long queryId, ItemInfo itemInfo) { 36 | this.queryId = queryId; 37 | this.itemInfo = itemInfo; 38 | } 39 | 40 | @Override 41 | public boolean equals(Object o) { 42 | if (this == o) { 43 | return true; 44 | } 45 | if (o == null || getClass() != o.getClass()) { 46 | return false; 47 | } 48 | 49 | QueryResult that = (QueryResult) o; 50 | return queryId == that.queryId && 51 | Objects.equals(itemInfo, that.itemInfo); 52 | } 53 | 54 | @Override 55 | public int hashCode() { 56 | return Objects.hash(queryId, itemInfo); 57 | } 58 | 59 | @Override 60 | public String toString() { 61 | return "QueryResult{" + 62 | "queryId=" + queryId + 63 | ", itemInfo=" + itemInfo + 64 | '}'; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/QueryResultSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import org.apache.flink.api.common.typeinfo.TypeHint; 22 | import org.apache.flink.api.common.typeinfo.TypeInformation; 23 | 24 | import java.io.IOException; 25 | 26 | /** 27 | * Query result serialization schema for running the example with kafka. 28 | */ 29 | public class QueryResultSchema extends JsonKafkaSerializationSchema { 30 | 31 | public QueryResultSchema(String topic) { 32 | super(topic); 33 | } 34 | 35 | @Override 36 | public QueryResult deserialize(byte[] message) { 37 | try { 38 | return OBJECT_MAPPER.readValue(message, QueryResult.class); 39 | } catch (IOException e) { 40 | return null; 41 | } 42 | } 43 | 44 | @Override 45 | public TypeInformation getProducedType() { 46 | return new TypeHint() { 47 | }.getTypeInfo(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/QuerySchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import org.apache.flink.api.common.typeinfo.TypeHint; 22 | import org.apache.flink.api.common.typeinfo.TypeInformation; 23 | 24 | import java.io.IOException; 25 | 26 | /** 27 | * Query serialization schema for running the example with kafka. 28 | */ 29 | public class QuerySchema extends JsonKafkaSerializationSchema { 30 | 31 | public QuerySchema(String topic) { 32 | super(topic); 33 | } 34 | 35 | @Override 36 | public Query deserialize(byte[] message) { 37 | try { 38 | return OBJECT_MAPPER.readValue(message, Query.class); 39 | } catch (IOException e) { 40 | return null; 41 | } 42 | } 43 | 44 | @Override 45 | public TypeInformation getProducedType() { 46 | return new TypeHint() { 47 | }.getTypeInfo(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/TransactionResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * The outcome of an item transaction request. 25 | */ 26 | public class TransactionResult { 27 | 28 | public ItemTransaction transaction; 29 | 30 | public boolean success; 31 | 32 | public TransactionResult() {} 33 | 34 | public TransactionResult(ItemTransaction transaction, boolean success) { 35 | this.transaction = transaction; 36 | this.success = success; 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return "ItemTransactionResult{" + 42 | "transaction=" + transaction + 43 | ", success=" + success + 44 | '}'; 45 | } 46 | 47 | @Override 48 | public boolean equals(Object o) { 49 | if (this == o) { 50 | return true; 51 | } 52 | if (o == null || getClass() != o.getClass()) { 53 | return false; 54 | } 55 | 56 | TransactionResult that = (TransactionResult) o; 57 | return success == that.success && 58 | Objects.equals(transaction, that.transaction); 59 | } 60 | 61 | @Override 62 | public int hashCode() { 63 | return Objects.hash(transaction, success); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/TransactionSchema.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import org.apache.flink.api.common.typeinfo.TypeHint; 22 | import org.apache.flink.api.common.typeinfo.TypeInformation; 23 | 24 | import java.io.IOException; 25 | 26 | /** 27 | * Transaction serialization schema for running the example with kafka. 28 | */ 29 | public class TransactionSchema extends JsonKafkaSerializationSchema { 30 | 31 | public TransactionSchema(String topic) { 32 | super(topic); 33 | } 34 | 35 | @Override 36 | public ItemTransaction deserialize(byte[] message) { 37 | try { 38 | return OBJECT_MAPPER.readValue(message, ItemTransaction.class); 39 | } catch (IOException e) { 40 | return null; 41 | } 42 | } 43 | 44 | @Override 45 | public TypeInformation getProducedType() { 46 | return new TypeHint() { 47 | }.getTypeInfo(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/types/TransactionSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.types; 20 | 21 | import java.util.Objects; 22 | 23 | /** 24 | * Transaction successfulness summary. 25 | */ 26 | public class TransactionSummary { 27 | 28 | public String itemId; 29 | 30 | public int numSuccessfulTransactions = 0; 31 | 32 | public int numFailedTransactions = 0; 33 | 34 | public long totalVolume = 0; 35 | 36 | public TransactionSummary() { 37 | } 38 | 39 | public TransactionSummary(String itemId, int numSuccessfulTransactions, int numFailedTransactions, long totalVolume) { 40 | this.itemId = itemId; 41 | this.numSuccessfulTransactions = numSuccessfulTransactions; 42 | this.numFailedTransactions = numFailedTransactions; 43 | this.totalVolume = totalVolume; 44 | } 45 | 46 | @Override 47 | public boolean equals(Object o) { 48 | if (this == o) { 49 | return true; 50 | } 51 | if (o == null || getClass() != o.getClass()) { 52 | return false; 53 | } 54 | 55 | TransactionSummary that = (TransactionSummary) o; 56 | return numSuccessfulTransactions == that.numSuccessfulTransactions && 57 | numFailedTransactions == that.numFailedTransactions && 58 | totalVolume == that.totalVolume && 59 | Objects.equals(itemId, that.itemId); 60 | } 61 | 62 | @Override 63 | public int hashCode() { 64 | return Objects.hash(itemId, numSuccessfulTransactions, numFailedTransactions, totalVolume); 65 | } 66 | 67 | @Override 68 | public String toString() { 69 | return "TransactionSummary{" + 70 | "itemId='" + itemId + '\'' + 71 | ", numSuccessfulTransactions=" + numSuccessfulTransactions + 72 | ", numFailedTransactions=" + numFailedTransactions + 73 | ", totalVolume=" + totalVolume + 74 | '}'; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/utils/ExponentialHistogram.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.utils; 20 | 21 | import org.apache.flink.metrics.Histogram; 22 | import org.apache.flink.metrics.HistogramStatistics; 23 | 24 | import com.codahale.metrics.ExponentiallyDecayingReservoir; 25 | 26 | /** 27 | * Wrapper for exponential histogram at com.codahale.metrics. 28 | */ 29 | public class ExponentialHistogram implements Histogram { 30 | 31 | private final com.codahale.metrics.Histogram dropwizardHistogram; 32 | 33 | public ExponentialHistogram(int size, double alpha) { 34 | this.dropwizardHistogram = new com.codahale.metrics.Histogram( 35 | new ExponentiallyDecayingReservoir(size, alpha)); 36 | } 37 | 38 | public ExponentialHistogram() { 39 | this.dropwizardHistogram = new com.codahale.metrics.Histogram( 40 | new ExponentiallyDecayingReservoir()); 41 | } 42 | 43 | @Override 44 | public void update(long value) { 45 | dropwizardHistogram.update(value); 46 | 47 | } 48 | 49 | @Override 50 | public long getCount() { 51 | return dropwizardHistogram.getCount(); 52 | } 53 | 54 | @Override 55 | public HistogramStatistics getStatistics() { 56 | return new SlidingHistogramStatistics(dropwizardHistogram.getSnapshot()); 57 | } 58 | 59 | /** 60 | * Wrapper for Snapshot at com.codahale.metrics. 61 | */ 62 | public static final class SlidingHistogramStatistics extends HistogramStatistics { 63 | 64 | private final com.codahale.metrics.Snapshot snapshot; 65 | 66 | SlidingHistogramStatistics(com.codahale.metrics.Snapshot snapshot) { 67 | this.snapshot = snapshot; 68 | } 69 | 70 | @Override 71 | public double getQuantile(double quantile) { 72 | return snapshot.getValue(quantile); 73 | } 74 | 75 | @Override 76 | public long[] getValues() { 77 | return snapshot.getValues(); 78 | } 79 | 80 | @Override 81 | public int size() { 82 | return snapshot.size(); 83 | } 84 | 85 | @Override 86 | public double getMean() { 87 | return snapshot.getMean(); 88 | } 89 | 90 | @Override 91 | public double getStdDev() { 92 | return snapshot.getStdDev(); 93 | } 94 | 95 | @Override 96 | public long getMax() { 97 | return snapshot.getMax(); 98 | } 99 | 100 | @Override 101 | public long getMin() { 102 | return snapshot.getMin(); 103 | } 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/main/java/com/cloudera/streaming/examples/flink/utils/Utils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink.utils; 20 | 21 | import org.apache.flink.api.java.utils.ParameterTool; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import java.util.Properties; 27 | 28 | /** 29 | * Utility functions. 30 | */ 31 | public class Utils { 32 | 33 | private static final Logger LOG = LoggerFactory.getLogger(Utils.class); 34 | 35 | public static final String KAFKA_PREFIX = "kafka."; 36 | 37 | public static Properties readKafkaProperties(ParameterTool params) { 38 | Properties properties = new Properties(); 39 | for (String key : params.getProperties().stringPropertyNames()) { 40 | if (key.startsWith(KAFKA_PREFIX)) { 41 | properties.setProperty(key.substring(KAFKA_PREFIX.length()), params.get(key)); 42 | } 43 | } 44 | 45 | LOG.info("### Kafka parameters:"); 46 | for (String key : properties.stringPropertyNames()) { 47 | LOG.info("Kafka param: {}={}", key, properties.get(key)); 48 | } 49 | return properties; 50 | } 51 | 52 | private Utils() { 53 | throw new UnsupportedOperationException("Utils should not be instantiated!"); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/test/java/com/cloudera/streaming/examples/flink/SocketTransactionProcessorJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.functions.FlatMapFunction; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.streaming.api.datastream.DataStream; 24 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 25 | import org.apache.flink.util.Collector; 26 | 27 | import com.cloudera.streaming.examples.flink.operators.ItemTransactionGeneratorSource; 28 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 29 | import com.cloudera.streaming.examples.flink.types.Query; 30 | import com.cloudera.streaming.examples.flink.types.QueryResult; 31 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 32 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 33 | import com.fasterxml.jackson.databind.ObjectMapper; 34 | 35 | /** 36 | * Simple socket based pipeline for testing the application locally. Before running start a socket connection: 37 | * 38 | *

nc -lk 9999 39 | * 40 | *

Once the job started you can send queries in the form: 41 | * 42 | *

{"queryId":$queryId, "itemId":"$itemId"} 43 | * 44 | *

For example: 45 | * 46 | *

{"queryId":123, "itemId":"item_2"} 47 | */ 48 | public class SocketTransactionProcessorJob extends ItemTransactionJob { 49 | 50 | public static void main(String[] args) throws Exception { 51 | new SocketTransactionProcessorJob() 52 | .createApplicationPipeline(ParameterTool.fromArgs(new String[]{"--minimum.summary.vol", "850", "--sleep", "1"})) 53 | .execute(); 54 | } 55 | 56 | @Override 57 | public void writeQueryOutput(ParameterTool params, DataStream queryResultStream) { 58 | queryResultStream.printToErr(); 59 | } 60 | 61 | @Override 62 | protected void writeTransactionResults(ParameterTool params, DataStream transactionResults) { 63 | // Ignore them for now 64 | } 65 | 66 | @Override 67 | protected void writeTransactionSummaries(ParameterTool params, DataStream transactionSummaryStream) { 68 | // transactionSummaryStream.print(); 69 | } 70 | 71 | @Override 72 | public DataStream readQueryStream(ParameterTool params, StreamExecutionEnvironment env) { 73 | return env.socketTextStream("localhost", 9999).flatMap(new FlatMapFunction() { 74 | private final ObjectMapper om = new ObjectMapper(); 75 | 76 | @Override 77 | public void flatMap(String s, Collector out) { 78 | try { 79 | out.collect(om.readValue(s, Query.class)); 80 | } catch (Throwable ignored) {} 81 | } 82 | }); 83 | } 84 | 85 | @Override 86 | public DataStream readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) { 87 | return env.addSource(new ItemTransactionGeneratorSource(params)); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/test/java/com/cloudera/streaming/examples/flink/TransactionProcessorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to Cloudera, Inc. under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.streaming.examples.flink; 20 | 21 | import org.apache.flink.api.common.typeinfo.TypeInformation; 22 | import org.apache.flink.api.java.utils.ParameterTool; 23 | import org.apache.flink.streaming.api.datastream.DataStream; 24 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 25 | import org.apache.flink.test.util.CollectingSink; 26 | import org.apache.flink.test.util.JobTester; 27 | import org.apache.flink.test.util.ManualSource; 28 | 29 | import com.cloudera.streaming.examples.flink.types.ItemInfo; 30 | import com.cloudera.streaming.examples.flink.types.ItemTransaction; 31 | import com.cloudera.streaming.examples.flink.types.Query; 32 | import com.cloudera.streaming.examples.flink.types.QueryResult; 33 | import com.cloudera.streaming.examples.flink.types.TransactionResult; 34 | import com.cloudera.streaming.examples.flink.types.TransactionSummary; 35 | import org.junit.jupiter.api.Test; 36 | 37 | import static org.junit.jupiter.api.Assertions.assertEquals; 38 | import static org.junit.jupiter.api.Assertions.assertTrue; 39 | 40 | /** 41 | * Simple unit test covering the core functions of the stateful example. 42 | */ 43 | class TransactionProcessorTest extends ItemTransactionJob { 44 | 45 | private final CollectingSink transactionResultSink = new CollectingSink<>(); 46 | private final CollectingSink queryResultSink = new CollectingSink<>(); 47 | 48 | private ManualSource transactionSource; 49 | private ManualSource querySource; 50 | 51 | @Test 52 | void runTest() throws Exception { 53 | JobTester.startTest(createApplicationPipeline(ParameterTool.fromArgs(new String[]{}))); 54 | 55 | ItemTransaction it1 = new ItemTransaction(1, 2, "item_1", 100); 56 | transactionSource.sendRecord(it1); 57 | assertEquals(new TransactionResult(it1, true), transactionResultSink.poll()); 58 | 59 | querySource.sendRecord(new Query(0, "item_1")); 60 | assertEquals(new QueryResult(0, new ItemInfo("item_1", 100)), queryResultSink.poll()); 61 | 62 | querySource.sendRecord(new Query(3, "item_2")); 63 | assertEquals(new QueryResult(3, new ItemInfo("item_2", 0)), queryResultSink.poll()); 64 | 65 | JobTester.stopTest(); 66 | 67 | assertTrue(transactionResultSink.isEmpty()); 68 | assertTrue(queryResultSink.isEmpty()); 69 | } 70 | 71 | @Override 72 | public void writeQueryOutput(ParameterTool params, DataStream queryResultStream) { 73 | queryResultStream.addSink(queryResultSink); 74 | } 75 | 76 | @Override 77 | protected void writeTransactionResults(ParameterTool params, DataStream transactionResults) { 78 | transactionResults.addSink(transactionResultSink); 79 | } 80 | 81 | @Override 82 | protected void writeTransactionSummaries(ParameterTool params, DataStream transactionSummaryStream) { 83 | //ignore 84 | } 85 | 86 | @Override 87 | public DataStream readQueryStream(ParameterTool params, StreamExecutionEnvironment env) { 88 | querySource = JobTester.createManualSource(env, TypeInformation.of(Query.class)); 89 | return querySource.getDataStream(); 90 | } 91 | 92 | @Override 93 | public DataStream readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) { 94 | transactionSource = JobTester.createManualSource(env, TypeInformation.of(ItemTransaction.class)); 95 | return transactionSource.getDataStream(); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /flink-stateful-tutorial/src/test/resources/log4j2-test.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to Cloudera, Inc. under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | rootLogger.level = INFO 20 | rootLogger.appenderRef.test.ref = TestLogger 21 | 22 | appender.testlogger.name = TestLogger 23 | appender.testlogger.type = CONSOLE 24 | appender.testlogger.target = SYSTEM_OUT 25 | appender.testlogger.layout.type = PatternLayout 26 | appender.testlogger.layout.pattern = %d %p %C{1.} [%t] %m%n 27 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 22 | 4.0.0 23 | 24 | com.cloudera.flink 25 | flink-tutorials 26 | 1.20.1-csa1.15.0.0 27 | pom 28 | Flink Tutorials 29 | 30 | 31 | flink-simple-tutorial 32 | flink-stateful-tutorial 33 | flink-secure-tutorial 34 | 35 | 36 | 37 | UTF-8 38 | 2.12 39 | 40 | 1.20.1-csa1.15.0.0 41 | 1.2.0-csa1.15.0.0 42 | 3.4.0-csa1.15.0.0 43 | 44 | 45 | 2.14.3 46 | 5.10.1 47 | 2.22.0 48 | 4.2.22 49 | 8.0.33 50 | 51 | 52 | 1.9.2 53 | 1.0.0 54 | 2.17 55 | 3.4.1 56 | 3.2.2 57 | 58 | 59 | 60 | 61 | 62 | 63 | org.apache.flink 64 | flink-clients 65 | ${flink.version} 66 | 67 | 68 | 69 | org.apache.flink 70 | flink-connector-cloudera-registry 71 | ${flink.cloudera.connector.version} 72 | 73 | 74 | junit 75 | junit 76 | 77 | 78 | 79 | 80 | 81 | org.apache.flink 82 | flink-connector-base 83 | ${flink.version} 84 | 85 | 86 | 87 | org.apache.flink 88 | flink-connector-kafka 89 | ${flink.kafka.connector.version} 90 | 91 | 92 | 93 | org.apache.flink 94 | flink-connector-files 95 | ${flink.version} 96 | 97 | 98 | 99 | org.apache.flink 100 | flink-core 101 | ${flink.version} 102 | 103 | 104 | 105 | org.apache.flink 106 | flink-java 107 | ${flink.version} 108 | 109 | 110 | 111 | org.apache.flink 112 | flink-statebackend-rocksdb 113 | ${flink.version} 114 | 115 | 116 | 117 | org.apache.flink 118 | flink-streaming-java 119 | ${flink.version} 120 | 121 | 122 | 123 | org.apache.flink 124 | flink-test-utils 125 | ${flink.version} 126 | 127 | 128 | junit 129 | junit 130 | 131 | 132 | org.junit.vintage 133 | junit-vintage-engine 134 | 135 | 136 | 137 | 138 | 139 | 140 | org.apache.logging.log4j 141 | log4j-api 142 | ${log4j.version} 143 | 144 | 145 | 146 | org.apache.logging.log4j 147 | log4j-core 148 | ${log4j.version} 149 | 150 | 151 | 152 | org.apache.logging.log4j 153 | log4j-slf4j-impl 154 | ${log4j.version} 155 | 156 | 157 | 158 | 159 | org.junit.jupiter 160 | junit-jupiter-engine 161 | ${junit-jupiter.version} 162 | test 163 | 164 | 165 | 166 | 167 | com.fasterxml.jackson.core 168 | jackson-databind 169 | ${jackson.version} 170 | 171 | 172 | 173 | io.dropwizard.metrics 174 | metrics-core 175 | ${metrics.version} 176 | 177 | 178 | 179 | mysql 180 | mysql-connector-java 181 | ${mysql.version} 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | org.apache.maven.plugins 190 | maven-surefire-plugin 191 | 192 | 193 | 194 | 195 | 196 | 197 | org.apache.avro 198 | avro-maven-plugin 199 | ${avro.plugin.version} 200 | 201 | 202 | 203 | org.apache.maven.plugins 204 | maven-shade-plugin 205 | ${maven.shade.plugin.version} 206 | 207 | 208 | 209 | org.apache.maven.plugins 210 | maven-surefire-plugin 211 | ${maven.surefire.plugin.version} 212 | 213 | 214 | 215 | org.apache.maven.plugins 216 | maven-checkstyle-plugin 217 | ${maven.checkstyle.plugin.version} 218 | 219 | 220 | com.puppycrawl.tools 221 | checkstyle 222 | 223 | 8.14 224 | 225 | 226 | 227 | 228 | validate 229 | validate 230 | 231 | check 232 | 233 | 234 | 235 | 236 | checkstyle/suppressions.xml 237 | true 238 | 239 | https://raw.githubusercontent.com/apache/flink/master/tools/maven/checkstyle.xml 240 | 241 | true 242 | true 243 | 244 | 245 | 246 | 247 | 248 | org.eclipse.m2e 249 | lifecycle-mapping 250 | ${lifecycle.mapping.version} 251 | 252 | 253 | 254 | 255 | 256 | org.apache.maven.plugins 257 | maven-shade-plugin 258 | [3.0.0,) 259 | 260 | shade 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | org.apache.maven.plugins 270 | maven-compiler-plugin 271 | [3.1,) 272 | 273 | testCompile 274 | compile 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | cloudera 292 | https://repository.cloudera.com/artifactory/libs-release-local/ 293 | 294 | 295 | 296 | nexus-public-snapshot 297 | https://nexus-private.hortonworks.com/nexus/repository/PUBLIC-SNAPSHOT 298 | 299 | false 300 | 301 | 302 | true 303 | 304 | 305 | 306 | 307 | hortonworks 308 | https://repo.hortonworks.com/content/repositories/releases/ 309 | 310 | 311 | 312 | 313 | 314 | in-qa-snapshot 315 | Cloudera Snapshot Artifactory 316 | https://nexus-private.hortonworks.com/nexus/repository/IN-QA-SNAPSHOT 317 | 318 | 319 | 320 | 321 | --------------------------------------------------------------------------------