├── .gitignore ├── LICENSE ├── README.md ├── flinkDemo.iml ├── pom.xml └── src └── main ├── java └── com │ └── z │ └── flinkStreamOptimizatiion │ ├── broadcast │ └── BroadcastDemo.java │ ├── datesetOp │ ├── WordCountData.java │ ├── WordCountDemo.java │ └── WordCountExample.java │ ├── hbase │ ├── Flink2HBase.java │ ├── FlinkGHBaseByDataSet.java │ └── loader │ │ ├── HBaseLoader.java │ │ ├── HBaseRetryingUtils.java │ │ ├── HBaseUtils.java │ │ └── ILoader.java │ ├── kafka │ ├── ReadFromKafka.java │ └── WriteIntoKafka.java │ ├── metricsOp │ └── gaugesOp.java │ ├── rpc │ ├── client │ │ ├── MessageCollector.java │ │ ├── RPCClient.java │ │ ├── RPCException.java │ │ └── RpcFuture.java │ ├── common │ │ ├── Charsets.java │ │ ├── IMessageHandler.java │ │ ├── MessageDecoder.java │ │ ├── MessageEncoder.java │ │ ├── MessageHandlers.java │ │ ├── MessageInput.java │ │ ├── MessageOutput.java │ │ ├── MessageRegistry.java │ │ └── RequestId.java │ ├── demo │ │ ├── DemoClient.java │ │ └── DemoServer.java │ └── server │ │ ├── DefaultHandler.java │ │ ├── MessageCollector.java │ │ └── RPCServer.java │ ├── state │ ├── CountWindowAverage.java │ └── KeyStateMsgDemo.java │ ├── stream │ ├── MyNoParalleSource.java │ ├── MyNoParalleStrSource.java │ ├── MyParalleSource.java │ ├── MyPartition.java │ ├── MyRichParalleSource.java │ ├── NumberStreamDemo.java │ ├── StreamJoinDataSource1.java │ ├── StreamJoinDataSource2.java │ ├── StreamJoinDemo.java │ ├── TimeWindowDemo.java │ └── WindowComputeUtil.java │ └── test │ ├── SliceActVV.java │ └── test1.java └── resources ├── consumer.properties ├── hbase-site.xml ├── log4j.properties └── producer.properties /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | 22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 23 | hs_err_pid* 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 基于Flink多流Join优化的研究与实现 2 | 1 伪代码 3 | 2 单流场景下的TimeWindow滚动窗口边界与数据延迟问题 4 | 3 多流Join场景下的窗口计算触发时机、延时数据丢失问题 5 | 4 针对flink流算子中rpc调用场景,利用netty自研rpc工具 6 | ## 1 伪代码: 7 | Flink stream join的形式为Windows join 8 | ```$xslt 9 | stream.join(otherStream) 10 | .where() 11 | .equalTo() 12 | .window() 13 | .apply() 14 | ``` 15 | ## 2 单流场景下的TimeWindow滚动窗口边界与数据延迟问题 16 | ### 2.1 问题陈述 17 | 多流Join的思路是在同一窗口对多流进行Join,针对每条单流: 18 | 每条流都是使用Flink的timeWindow api中的window size、delay、timestamp,计算触发窗口计算的时机, 19 | 每条流的延时数据,Flink根据window size、delay、延时数据的timestamp,判断是否丢弃, 20 | 本节通过调节windows size、delay,分析触发窗口计算的条件,以及触发延时数据丢失的条件。 21 | ### 2.2 数据所属窗口计算逻辑 22 | Flink源码中,数据所属窗口的计算逻辑: 23 | ```$xslt 24 | //Flink源码的窗口计算函数,该函数根据每条数据的timestamp、window size计算该条数据所属的[窗口开始时间,窗口结束时间] 25 | public static long getWindowStartWithOffset(long timestamp, long offset, long windowSize) { 26 | return timestamp - (timestamp - offset + windowSize) % windowSize; 27 | } 28 | ``` 29 | 测试:根据event time和窗口时间大小,计算数据所属的窗口的开始时间和结束时间 30 | 代码位置:https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java 31 | 文件中的test1() 32 | ```$xslt 33 | //结果展示: 34 | 1000000050000 -> 2001-09-09 09:47:30.000 所属窗口的开始时间是:1000000050000 -> 2001-09-09 09:47:30.000 35 | 1000000054000 -> 2001-09-09 09:47:34.000 所属窗口的起始时间是: 1000000050000 -> 2001-09-09 09:47:30.000 36 | 1000000079900 -> 2001-09-09 09:47:59.900 所属窗口的起始时间是: 1000000070000 -> 2001-09-09 09:47:50.000 37 | 1000000120000 -> 2001-09-09 09:48:40.000 所属窗口的起始时间是: 1000000120000 -> 2001-09-09 09:48:40.000 38 | 1000000111000 -> 2001-09-09 09:48:31.000 所属窗口的起始时间是: 1000000110000 -> 2001-09-09 09:48:30.000 39 | 1000000089000 -> 2001-09-09 09:48:09.000 所属窗口的起始时间是: 1000000080000 -> 2001-09-09 09:48:00.000 40 | ``` 41 | ### 2.3 单流的窗口计算触发时机 42 | 代码位置:https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java 43 | 文件中的test2() 44 | 数据源: 45 | ```$xslt 46 | Tuple3[] elements = new Tuple3[]{ 47 | Tuple3.of("a", "1", 1000000050000L), 48 | Tuple3.of("a", "2", 1000000054000L), 49 | Tuple3.of("a", "3", 1000000079900L), 50 | Tuple3.of("a", "4", 1000000120000L), 51 | Tuple3.of("b", "5", 1000000111000L), 52 | Tuple3.of("b", "6", 1000000089000L) 53 | }; 54 | ``` 55 | 窗口属性设置: 56 | ```$xslt 57 | long delay = 5000L; 58 | int windowSize = 10; 59 | ``` 60 | 水位线计算逻辑: 61 | ```$xslt 62 | //水位线的目标是使水位线以下的record触发窗口计算 63 | private final long maxOutOfOrderness = delay; 64 | private long currentMaxTimestamp = 0L; 65 | 66 | @Nullable 67 | @Override 68 | public Watermark getCurrentWatermark() { 69 | return new Watermark(currentMaxTimestamp - maxOutOfOrderness); 70 | } 71 | ``` 72 | 针对流的每条record,跟踪水位线,窗口开始时间,窗口结束时间,时间戳等日志 73 | ```$xslt 74 | #### 第 1 个record #### 75 | currentMaxTimestamp: 1000000050000 76 | 水位线(watermark): 1000000045000 -> 2001-09-09 09:47:25.000 77 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 78 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 79 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000 80 | #### 第 2 个record #### 81 | currentMaxTimestamp: 1000000054000 82 | 水位线(watermark): 1000000049000 -> 2001-09-09 09:47:29.000 83 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 84 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 85 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000 86 | #### 第 3 个record #### 87 | currentMaxTimestamp: 1000000079900 88 | 水位线(watermark): 1000000074900 -> 2001-09-09 09:47:54.900 89 | 窗口开始时间:1000000070000 -> 2001-09-09 09:47:50.000 90 | 窗口结束时间:1000000080000 -> 2001-09-09 09:48:00.000 91 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900 92 | (a,[1,2],1) 93 | #### 第 4 个record #### 94 | currentMaxTimestamp: 1000000120000 95 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 96 | 窗口开始时间:1000000120000 -> 2001-09-09 09:48:40.000 97 | 窗口结束时间:1000000130000 -> 2001-09-09 09:48:50.000 98 | 4 -> 1000000120000 -> 2001-09-09 09:48:40.000 99 | (a,3,1000000079900) 100 | #### 第 5 个record #### 101 | currentMaxTimestamp: 1000000120000 102 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 103 | 窗口开始时间:1000000110000 -> 2001-09-09 09:48:30.000 104 | 窗口结束时间:1000000120000 -> 2001-09-09 09:48:40.000 105 | 5 -> 1000000111000 -> 2001-09-09 09:48:31.000 106 | #### 第 6 个record #### 107 | currentMaxTimestamp: 1000000120000 108 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 109 | 窗口开始时间:1000000080000 -> 2001-09-09 09:48:00.000 110 | 窗口结束时间:1000000090000 -> 2001-09-09 09:48:10.000 111 | 6 -> 1000000089000 -> 2001-09-09 09:48:09.000 112 | (b,5,1000000111000) 113 | (a,4,1000000120000) 114 | ``` 115 | 日志分析: 116 | ```$xslt 117 | /** 118 | * 触发窗口运算时机: 119 | * 当一条数据过来, 120 | * 1)水位线 > 上一批次的记录的窗口结束时间,之前的数据要进行窗口运算 121 | * 2)水位线 > 上一批次的记录的timestamp,之前的数据要进行窗口计算 122 | * 123 | * 关于是否丢数据: 124 | * 1)如果当前数据的EventTime在WaterMark之上,也就是EventTime > WaterMark。由于数据所属窗口 125 | * 的WindowEndTime,一定是大于EventTime的。这时有WindowEndTime > EventTime > WaterMark 126 | * 这种情况是一定不会丢数据的。 127 | * 2)如果当前数据的EventTime在WaterMark之下,也就是WaterMark > EventTime,这时要分两种情况: 128 | * 2.1)如果该数据所属窗口的WindowEndTime > WaterMark,表示窗口还没被触发,例如第5个record的情况, 129 | * 即WindowEndTime > WaterMark > EventTime,这种情况数据也是不会丢失的。 130 | * 2.2)如果该数据所属窗口的WaterMark > WindowEndTime, 则表示窗口已经无法被触发, 131 | * 即WaterMark > WindowEndTime > EventTime, 这种情况数据也就丢失了。 132 | * 133 | * 特殊record: 134 | * 第5条record,元素在水位以下,但windows还没被触发计算,参照record 5 135 | * 第6条record,由于watermark > windows end time ,第6条数据所属的窗口就永远不会被触发计算了。 136 | */ 137 | ``` 138 | ### 2.4 单流的窗口计算数据丢失场景 139 | 窗口延时数据丢失情况:元素在水位以下,但windows已经无法被触发计算了 140 | 代码位置:https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java 141 | 文件中的test3() 142 | 数据源: 143 | ```$xslt 144 | Tuple3[] elements = new Tuple3[]{ 145 | Tuple3.of("a", "1", 1000000050000L), 146 | Tuple3.of("a", "2", 1000000054000L), 147 | Tuple3.of("a", "3", 1000000079900L), 148 | Tuple3.of("a", "4", 1000000120000L), 149 | Tuple3.of("b", "5", 1000000100001L), 150 | Tuple3.of("b", "6", 1000000109000L) 151 | }; 152 | ``` 153 | 窗口属性设置: 154 | ```$xslt 155 | long delay = 5000L; 156 | int windowSize = 10; 157 | ``` 158 | 针对流的每条record,跟踪水位线,窗口开始时间,窗口结束时间,时间戳等日志: 159 | ```$xslt 160 | #### 第 1 个record #### 161 | currentMaxTimestamp: 1000000050000 162 | 水位线(watermark): 1000000045000 -> 2001-09-09 09:47:25.000 163 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 164 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 165 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000 166 | #### 第 2 个record #### 167 | currentMaxTimestamp: 1000000054000 168 | 水位线(watermark): 1000000049000 -> 2001-09-09 09:47:29.000 169 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 170 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 171 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000 172 | #### 第 3 个record #### 173 | currentMaxTimestamp: 1000000079900 174 | 水位线(watermark): 1000000074900 -> 2001-09-09 09:47:54.900 175 | 窗口开始时间:1000000070000 -> 2001-09-09 09:47:50.000 176 | 窗口结束时间:1000000080000 -> 2001-09-09 09:48:00.000 177 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900 178 | (a,[1,2],1) 179 | #### 第 4 个record #### 180 | currentMaxTimestamp: 1000000120000 181 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 182 | 窗口开始时间:1000000120000 -> 2001-09-09 09:48:40.000 183 | 窗口结束时间:1000000130000 -> 2001-09-09 09:48:50.000 184 | 4 -> 1000000120000 -> 2001-09-09 09:48:40.000 185 | (a,3,1000000079900) 186 | #### 第 5 个record #### 187 | currentMaxTimestamp: 1000000120000 188 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 189 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 190 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 191 | 5 -> 1000000100001 -> 2001-09-09 09:48:20.001 192 | #### 第 6 个record #### 193 | currentMaxTimestamp: 1000000120000 194 | 水位线(watermark): 1000000115000 -> 2001-09-09 09:48:35.000 195 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 196 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 197 | 6 -> 1000000109000 -> 2001-09-09 09:48:29.000 198 | (a,4,1000000120000) 199 | ``` 200 | 日志分析: 201 | ```$xslt 202 | /** 203 | * 观察record 5 和 record 6,它们的窗口属性如下: 204 | * 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 205 | * 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 206 | * windows end time < watermark, 这个窗口已经无法被触发计算了。 207 | * 也就是说,这个窗口创建时,已经 windows end time < watermark,相当于第5第6条记录都丢失了。 208 | */ 209 | ``` 210 | ### 2.5 针对单流延时数据的丢失问题,提出增大delay的解决方案 211 | 解决思路:通过增大delay,来增大失序的容忍程度,确保不丢数据 212 | 代码位置:https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java 213 | 文件中的test4() 214 | 数据源: 215 | ```$xslt 216 | Tuple3[] elements = new Tuple3[]{ 217 | Tuple3.of("a", "1", 1000000050000L), 218 | Tuple3.of("a", "2", 1000000054000L), 219 | Tuple3.of("a", "3", 1000000079900L), 220 | Tuple3.of("a", "4", 1000000115000L), 221 | Tuple3.of("b", "5", 1000000100000L), 222 | Tuple3.of("b", "6", 1000000108000L) 223 | }; 224 | ``` 225 | 窗口属性设置: 226 | ```$xslt 227 | long delay = 5100L; 228 | int windowSize = 10; 229 | ``` 230 | 针对流的每条record,跟踪水位线,窗口开始时间,窗口结束时间,时间戳等日志: 231 | ```$xslt 232 | #### 第 1 个record #### 233 | currentMaxTimestamp: 1000000050000 234 | 水位线(watermark): 1000000044900 -> 2001-09-09 09:47:24.900 235 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 236 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 237 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000 238 | #### 第 2 个record #### 239 | currentMaxTimestamp: 1000000054000 240 | 水位线(watermark): 1000000048900 -> 2001-09-09 09:47:28.900 241 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 242 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 243 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000 244 | #### 第 3 个record #### 245 | currentMaxTimestamp: 1000000079900 246 | 水位线(watermark): 1000000074800 -> 2001-09-09 09:47:54.800 247 | 窗口开始时间:1000000070000 -> 2001-09-09 09:47:50.000 248 | 窗口结束时间:1000000080000 -> 2001-09-09 09:48:00.000 249 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900 250 | (a,[1,2],1) 251 | #### 第 4 个record #### 252 | currentMaxTimestamp: 1000000115000 253 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 254 | 窗口开始时间:1000000110000 -> 2001-09-09 09:48:30.000 255 | 窗口结束时间:1000000120000 -> 2001-09-09 09:48:40.000 256 | 4 -> 1000000115000 -> 2001-09-09 09:48:35.000 257 | (a,3,1000000079900) 258 | #### 第 5 个record #### 259 | currentMaxTimestamp: 1000000115000 260 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 261 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 262 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 263 | 5 -> 1000000100000 -> 2001-09-09 09:48:20.000 264 | #### 第 6 个record #### 265 | currentMaxTimestamp: 1000000115000 266 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 267 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 268 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 269 | 6 -> 1000000108000 -> 2001-09-09 09:48:28.000 270 | (b,[5,6],1) 271 | (a,4,1000000115000) 272 | ``` 273 | 日志分析: 274 | ```$xslt 275 | /** 276 | * 观察 record 5 和 record 6, 它们的时间窗口如下: 277 | * 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 278 | * 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 279 | * 它们进来的时候水位线如下: 280 | * 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 281 | * 也就是说,它们进来的时候,watermark < windows end time 282 | * 这种情况下,就算数据的 eventtime < watermark,数据还是被保留下来,没有丢失。 283 | */ 284 | ``` 285 | ## 3 多流Join场景下的窗口计算触发时机、延时数据丢失问题 286 | 代码位置:https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDemo.java 287 | 数据源:两条流 288 | ```$xslt 289 | //StreamJoinDataSource1: 290 | Tuple3[] elements = new Tuple3[]{ 291 | Tuple3.of("a", "1", 1000000050000L), 292 | Tuple3.of("a", "2", 1000000054000L), 293 | Tuple3.of("a", "3", 1000000079900L), 294 | Tuple3.of("a", "4", 1000000115000L), 295 | Tuple3.of("b", "5", 1000000100000L), 296 | Tuple3.of("b", "6", 1000000108000L) 297 | }; 298 | //StreamJoinDataSource2: 299 | Tuple3[] elements = new Tuple3[]{ 300 | Tuple3.of("a", "hangzhou", 1000000059000L), 301 | Tuple3.of("b", "beijing", 1000000105000L), 302 | }; 303 | 304 | ``` 305 | 窗口属性设置: 306 | ```$xslt 307 | //毫秒为单位 308 | int windowSize = 10; 309 | long delay = 5100L; 310 | ``` 311 | 针对流的每条record,跟踪水位线,窗口开始时间,窗口结束时间,时间戳等日志: 312 | ```$xslt 313 | #################################### 314 | element.f1: hangzhou 315 | 水位线(watermark): 1000000053900 -> 2001-09-09 09:47:33.900 316 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 317 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 318 | hangzhou -> 1000000059000 -> 2001-09-09 09:47:39.000 319 | #################################### 320 | element.f1: 1 321 | 水位线(watermark): 1000000044900 -> 2001-09-09 09:47:24.900 322 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 323 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 324 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000 325 | #################################### 326 | element.f1: 2 327 | 水位线(watermark): 1000000048900 -> 2001-09-09 09:47:28.900 328 | 窗口开始时间:1000000050000 -> 2001-09-09 09:47:30.000 329 | 窗口结束时间:1000000060000 -> 2001-09-09 09:47:40.000 330 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000 331 | #################################### 332 | element.f1: beijing 333 | 水位线(watermark): 1000000099900 -> 2001-09-09 09:48:19.900 334 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 335 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 336 | beijing -> 1000000105000 -> 2001-09-09 09:48:25.000 337 | #################################### 338 | element.f1: 3 339 | 水位线(watermark): 1000000074800 -> 2001-09-09 09:47:54.800 340 | 窗口开始时间:1000000070000 -> 2001-09-09 09:47:50.000 341 | 窗口结束时间:1000000080000 -> 2001-09-09 09:48:00.000 342 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900 343 | 触发双流join窗口运算 344 | (a,1,hangzhou,1000000050000,1000000059000) 345 | 触发双流join窗口运算 346 | (a,2,hangzhou,1000000054000,1000000059000) 347 | #################################### 348 | element.f1: 4 349 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 350 | 窗口开始时间:1000000110000 -> 2001-09-09 09:48:30.000 351 | 窗口结束时间:1000000120000 -> 2001-09-09 09:48:40.000 352 | 4 -> 1000000115000 -> 2001-09-09 09:48:35.000 353 | #################################### 354 | element.f1: 5 355 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 356 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 357 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 358 | 5 -> 1000000100000 -> 2001-09-09 09:48:20.000 359 | #################################### 360 | element.f1: 6 361 | 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 362 | 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 363 | 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 364 | 6 -> 1000000108000 -> 2001-09-09 09:48:28.000 365 | 触发双流join窗口运算 366 | (b,5,beijing,1000000100000,1000000105000) 367 | 触发双流join窗口运算 368 | (b,6,beijing,1000000108000,1000000105000) 369 | ``` 370 | 日志分析: 371 | ```$xslt 372 | 结论1:如果source1触发窗口计算的时候,source2还没有触发窗口计算,也就是说,source2在窗口中没有数据, 373 | 需要等待source2触发窗口计算,把数据放置到窗口中,才能进行基于多流的join操作。 374 | 结论2:假设缩小delay,也就是提升水位线,有可能导致watermark > window end time,导致丢数据,例子: 375 | /** 376 | * 当设置参数int windowSize = 10; long delay = 5000L;时 377 | * 输出为: 378 | * (a,1,hangzhou,1000000050000,1000000059000) 379 | * (a,2,hangzhou,1000000054000,1000000059000) 380 | * 原因: 381 | * window_end_time < watermark, 导致数据丢失了。 382 | */ 383 | ``` 384 | ## 4 针对flink流算子中rpc调用场景,利用netty自研rpc工具 385 | server端启动: 386 | https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoServer.java 387 | flink的算子flatmap中初始化client端,调用rpc服务: 388 | https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java 389 | 中的test6() 390 | ```$xslt 391 | 调用流程: 392 | 1)一般flink算子中调用rpc都是在每个task上去建立连接,调用,销毁连接。 393 | 2)在flatmap之上统一new rpc client不可行,因为此rpc使用netty实现,而netty的bootstrap是final类型,也不能序列化。 394 | 3)所以综上所述,需要在每个task上去调用rpc服务。 395 | ``` 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | -------------------------------------------------------------------------------- /flinkDemo.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | flinkDemo 8 | flinkDemo 9 | 1.0-SNAPSHOT 10 | 11 | 4.1.1.Final 12 | UTF-8 13 | 14 | 15 | 16 | 17 | org.apache.flink 18 | flink-java 19 | 1.5.0 20 | 21 | 22 | 23 | org.apache.flink 24 | flink-streaming-java_2.11 25 | 1.5.0 26 | 27 | 28 | 29 | 30 | org.apache.flink 31 | flink-connector-kafka-0.10_2.11 32 | 1.5.0 33 | 34 | 35 | 36 | org.apache.flink 37 | flink-hbase_2.11 38 | 1.5.0 39 | 40 | 41 | 42 | org.apache.kafka 43 | kafka-clients 44 | 0.10.1.1 45 | 46 | 47 | 48 | org.apache.hbase 49 | hbase-client 50 | 1.1.2 51 | 52 | 53 | 54 | org.projectlombok 55 | lombok 56 | 1.16.10 57 | compile 58 | 59 | 60 | com.google.code.gson 61 | gson 62 | 2.8.2 63 | 64 | 65 | com.github.rholder 66 | guava-retrying 67 | 2.0.0 68 | 69 | 70 | com.alibaba 71 | fastjson 72 | 1.2.5 73 | 74 | 75 | io.netty 76 | netty-common 77 | ${netty.version} 78 | 79 | 80 | io.netty 81 | netty-buffer 82 | ${netty.version} 83 | 84 | 85 | io.netty 86 | netty-transport 87 | ${netty.version} 88 | 89 | 90 | io.netty 91 | netty-handler 92 | ${netty.version} 93 | 94 | 95 | io.netty 96 | netty-codec 97 | ${netty.version} 98 | 99 | 100 | 101 | 102 | 103 | 104 | org.apache.maven.plugins 105 | maven-compiler-plugin 106 | 3.5.1 107 | 108 | 1.8 109 | 1.8 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/broadcast/BroadcastDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.broadcast; 2 | 3 | import com.z.flinkStreamOptimizatiion.stream.MyNoParalleSource; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.api.java.DataSet; 7 | import org.apache.flink.api.java.ExecutionEnvironment; 8 | import org.apache.flink.api.java.operators.DataSource; 9 | import org.apache.flink.api.java.tuple.Tuple2; 10 | import org.apache.flink.configuration.Configuration; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.windowing.time.Time; 15 | 16 | import java.util.ArrayList; 17 | import java.util.Collection; 18 | import java.util.HashMap; 19 | import java.util.List; 20 | 21 | public class BroadcastDemo { 22 | 23 | public static void main(String[] args) throws Exception { 24 | 25 | // broadcast 26 | //test1(); 27 | 28 | // StreamSource Broadcast 流的广播 29 | //test2(); 30 | 31 | // batch broadcast 广播变量 32 | test3(); 33 | 34 | } 35 | 36 | private static void test3() throws Exception { 37 | /** 38 | * 1, 封装dataset,调用withbroadcastSet 39 | * 2, getRuntimeContext().getBroadcastVariable, 获取广播变量 40 | * 3, RichMapFunction中执行获得广播变量的逻辑 41 | */ 42 | 43 | //获取运行环境 44 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 45 | 46 | //1:准备需要广播的数据 47 | ArrayList> broadData = new ArrayList<>(); 48 | broadData.add(new Tuple2<>("zs", 18)); 49 | broadData.add(new Tuple2<>("ls",20)); 50 | broadData.add(new Tuple2<>("ww",17)); 51 | DataSet> tupleData = env.fromCollection(broadData); 52 | 53 | //1.1:处理需要广播的数据,把数据集转换成map类型,map中的key就是用户姓名,value就是用户年龄 54 | DataSet> toBroadcast = tupleData.map(new MapFunction, HashMap>() { 55 | @Override 56 | public HashMap map(Tuple2 value) throws Exception { 57 | HashMap res = new HashMap<>(); 58 | res.put(value.f0, value.f1); 59 | return res; 60 | } 61 | }); 62 | 63 | //源数据 64 | DataSource data = env.fromElements("zs", "ls", "ww"); 65 | 66 | //注意:在这里需要使用到RichMapFunction获取广播变量 67 | DataSet result = data.map(new RichMapFunction() { 68 | 69 | List> broadCastMap = new ArrayList>(); 70 | HashMap allMap = new HashMap<>(); 71 | 72 | 73 | /** 74 | * 这个方法只会执行一次 75 | * 可以在这里实现一些初始化的功能 76 | * 77 | * 所以,就可以在open方法中获取广播变量数据 78 | * 79 | */ 80 | 81 | @Override 82 | public void open(Configuration parameters) throws Exception { 83 | super.open(parameters); 84 | 85 | //3:获取广播数据 86 | this.broadCastMap = getRuntimeContext().getBroadcastVariable("broadCastMapName"); 87 | for (HashMap map : broadCastMap) { 88 | allMap.putAll(map); 89 | } 90 | 91 | } 92 | 93 | @Override 94 | public String map(String value) throws Exception { 95 | Integer age = allMap.get(value); 96 | return value + "," + age; 97 | } 98 | }).withBroadcastSet(toBroadcast, "broadCastMapName");//2:执行广播数据的操作 99 | 100 | result.print(); 101 | 102 | } 103 | 104 | private static void test2() throws Exception { 105 | 106 | //实现元素的重复广播 107 | 108 | //获取Flink的运行环境 109 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 110 | //4个并行 111 | env.setParallelism(4); 112 | 113 | //获取数据源 114 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1 115 | //整个map元素分别处理了4次 116 | DataStream num = text.broadcast().map(new MapFunction() { 117 | @Override 118 | public Long map(Long value) throws Exception { 119 | long id = Thread.currentThread().getId(); 120 | System.out.println("线程id:"+id+",接收到数据:" + value); 121 | return value; 122 | } 123 | }); 124 | 125 | //每2秒钟处理一次数据 126 | DataStream sum = num.timeWindowAll(Time.seconds(2)).sum(0); 127 | 128 | //打印结果 129 | sum.print().setParallelism(1); 130 | 131 | String jobName = BroadcastDemo.class.getSimpleName(); 132 | env.execute(jobName); 133 | 134 | 135 | } 136 | 137 | private static void test1() { 138 | //获取运行环境 139 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 140 | 141 | //1 准备等待广播的DataSet数据 142 | DataSet toBroadcast = env.fromElements(1, 2, 3); 143 | DataSet data = env.fromElements("a", "b", "c"); 144 | 145 | data.map(new RichMapFunction() { 146 | 147 | @Override 148 | public String map(String s) throws Exception { 149 | return null; 150 | } 151 | 152 | @Override 153 | public void open(Configuration parameters) throws Exception { 154 | 155 | //3 获取广播的DataSet数据 作为一个Collection 156 | Collection broadcastSet = getRuntimeContext().getBroadcastVariable("broadcastSetName"); 157 | 158 | } 159 | }).withBroadcastSet(toBroadcast, "broadcastSetName"); //2 广播DataSset 160 | 161 | 162 | } 163 | 164 | } 165 | 166 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountData.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.datesetOp; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | 6 | /** 7 | *

8 | *

  • @author:jyj019
  • 9 | *
  • Date: 2018/9/3 10:46
  • 10 | *
  • @version: 2.0.0
  • 11 | *
  • @since JDK 1.8
  • 12 | */ 13 | 14 | public class WordCountData { 15 | 16 | public static final String[] WORDS = new String[] { 17 | "To be, or not to be,--that is the question:--", 18 | "Whether 'tis nobler in the mind to suffer", 19 | "The slings and arrows of outrageous fortune", 20 | "Or to take arms against a sea of troubles,", 21 | "And by opposing end them?--To die,--to sleep,--", 22 | "No more; and by a sleep to say we end", 23 | "The heartache, and the thousand natural shocks", 24 | "That flesh is heir to,--'tis a consummation", 25 | "Devoutly to be wish'd. To die,--to sleep;--", 26 | "To sleep! perchance to dream:--ay, there's the rub;", 27 | "For in that sleep of death what dreams may come,", 28 | "When we have shuffled off this mortal coil,", 29 | "Must give us pause: there's the respect", 30 | "That makes calamity of so long life;", 31 | "For who would bear the whips and scorns of time,", 32 | "The oppressor's wrong, the proud man's contumely,", 33 | "The pangs of despis'd love, the law's delay,", 34 | "The insolence of office, and the spurns", 35 | "That patient merit of the unworthy takes,", 36 | "When he himself might his quietus make", 37 | "With a bare bodkin? who would these fardels bear,", 38 | "To grunt and sweat under a weary life,", 39 | "But that the dread of something after death,--", 40 | "The undiscover'd country, from whose bourn", 41 | "No traveller returns,--puzzles the will,", 42 | "And makes us rather bear those ills we have", 43 | "Than fly to others that we know not of?", 44 | "Thus conscience does make cowards of us all;", 45 | "And thus the native hue of resolution", 46 | "Is sicklied o'er with the pale cast of thought;", 47 | "And enterprises of great pith and moment,", 48 | "With this regard, their currents turn awry,", 49 | "And lose the name of action.--Soft you now!", 50 | "The fair Ophelia!--Nymph, in thy orisons", 51 | "Be all my sins remember'd." 52 | }; 53 | 54 | public static DataSet getDefaultTextLineDataSet(ExecutionEnvironment env) { 55 | return env.fromElements(WORDS); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.datesetOp; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.functions.JoinFunction; 5 | import org.apache.flink.api.common.functions.MapPartitionFunction; 6 | import org.apache.flink.api.common.operators.Order; 7 | import org.apache.flink.api.java.DataSet; 8 | import org.apache.flink.api.java.ExecutionEnvironment; 9 | import org.apache.flink.api.java.operators.CrossOperator; 10 | import org.apache.flink.api.java.operators.DataSource; 11 | import org.apache.flink.api.java.operators.FlatMapOperator; 12 | import org.apache.flink.api.java.tuple.Tuple2; 13 | import org.apache.flink.api.java.tuple.Tuple3; 14 | import org.apache.flink.api.java.utils.ParameterTool; 15 | 16 | import org.apache.flink.util.Collector; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Iterator; 20 | import java.util.List; 21 | 22 | // dataset 的一些通用操作 23 | public class WordCountDemo { 24 | 25 | public static void main(String[] args) throws Exception { 26 | // get input data 27 | DataSet text = getDataSet(args); 28 | 29 | 30 | // Map:输入一个元素,然后返回一个元素,中间可以做一些清洗转换等操作 31 | // FlatMap:输入一个元素,可以返回零个,一个或者多个元素 32 | // MapPartition:类似map,一次处理一个分区的数据【如果在进行map处理的时候需要获取第三方资源链接,建议使用MapPartition】 33 | 34 | //map 35 | // test1(text); 36 | 37 | //map partition - batch 38 | //test2(); 39 | 40 | //distinct 41 | //test3(); 42 | 43 | //join 内连接 44 | //test4(); 45 | 46 | //outer join 外连接 47 | //test5(); 48 | 49 | //cross 笛卡尔积 50 | //test6(); 51 | 52 | //sort partition 在本地对数据集的所有分区进行排序,通过sortPartition()的链接调用来完成对多个字段的排序 53 | // test7(); 54 | 55 | 56 | } 57 | 58 | 59 | 60 | private static void test7() throws Exception { 61 | //获取运行环境 62 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 63 | ArrayList> data = new ArrayList<>(); 64 | data.add(new Tuple2<>(2,"zs")); 65 | data.add(new Tuple2<>(4,"ls")); 66 | data.add(new Tuple2<>(3,"ww")); 67 | data.add(new Tuple2<>(1,"xw")); 68 | data.add(new Tuple2<>(1,"aw")); 69 | data.add(new Tuple2<>(1,"mw")); 70 | 71 | DataSource> text = env.fromCollection(data); 72 | //获取前3条数据,按照数据插入的顺序 73 | text.first(3).print(); 74 | System.out.println("=============================="); 75 | 76 | //根据数据中的第一列进行分组,获取每组的前2个元素 77 | text.groupBy(0).first(2).print(); 78 | 79 | //根据数据中的第一列分组,再根据第二列进行组内排序[升序],获取每组的前2个元素 80 | text.groupBy(0).sortGroup(1, Order.ASCENDING).first(2).print(); 81 | System.out.println("=============================="); 82 | 83 | //不分组,全局排序获取集合中的前3个元素,针对第一个元素升序,第二个元素倒序 84 | text.sortPartition(0, Order.ASCENDING).sortPartition(1, Order.DESCENDING).first(3).print(); 85 | text.sortPartition(0, Order.ASCENDING).sortPartition(1, Order.DESCENDING).first(3).print(); 86 | 87 | 88 | 89 | } 90 | 91 | private static void test6() throws Exception { 92 | //获取运行环境 93 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 94 | 95 | //tuple2<用户id,用户姓名> 96 | ArrayList data1 = new ArrayList<>(); 97 | data1.add("zs"); 98 | data1.add("ww"); 99 | //tuple2<用户id,用户所在城市> 100 | ArrayList data2 = new ArrayList<>(); 101 | data2.add(1); 102 | data2.add(2); 103 | DataSource text1 = env.fromCollection(data1); 104 | DataSource text2 = env.fromCollection(data2); 105 | CrossOperator.DefaultCross cross = text1.cross(text2); 106 | cross.print(); 107 | 108 | } 109 | 110 | private static void test5() throws Exception { 111 | //获取运行环境 112 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 113 | 114 | //tuple2<用户id,用户姓名> 115 | ArrayList> data1 = new ArrayList<>(); 116 | data1.add(new Tuple2<>(1,"zs")); 117 | data1.add(new Tuple2<>(2,"ls")); 118 | data1.add(new Tuple2<>(3,"ww")); 119 | 120 | 121 | //tuple2<用户id,用户所在城市> 122 | ArrayList> data2 = new ArrayList<>(); 123 | data2.add(new Tuple2<>(1,"beijing")); 124 | data2.add(new Tuple2<>(2,"shanghai")); 125 | data2.add(new Tuple2<>(4,"guangzhou")); 126 | 127 | 128 | DataSource> text1 = env.fromCollection(data1); 129 | DataSource> text2 = env.fromCollection(data2); 130 | 131 | /** 132 | * 左外连接 133 | * 134 | * 注意:second这个tuple中的元素可能为null 135 | * 136 | */ 137 | 138 | text1.leftOuterJoin(text2) 139 | .where(0) 140 | .equalTo(0) 141 | .with(new JoinFunction, Tuple2, Tuple3>() { 142 | 143 | @Override 144 | public Tuple3 join(Tuple2 first, Tuple2 second) throws Exception { 145 | if (second == null) { 146 | return new Tuple3<>(first.f0, first.f1, "null"); 147 | } else { 148 | return new Tuple3<>(first.f0, first.f1, second.f1); 149 | } 150 | } 151 | }).print(); 152 | 153 | /** 154 | * 右外连接 155 | * 156 | * 注意:first这个tuple中的数据可能为null 157 | * 158 | */ 159 | text1.rightOuterJoin(text2) 160 | .where(0) 161 | .equalTo(0) 162 | .with(new JoinFunction, Tuple2, Tuple3>() { 163 | @Override 164 | public Tuple3 join(Tuple2 first, Tuple2 second) throws Exception { 165 | if(first==null){ 166 | return new Tuple3<>(second.f0,"null",second.f1); 167 | } 168 | return new Tuple3<>(first.f0,first.f1,second.f1); 169 | } 170 | }).print(); 171 | /** 172 | * 全外连接 173 | * 174 | * 注意:first和second这两个tuple都有可能为null 175 | * 176 | */ 177 | text1.fullOuterJoin(text2) 178 | .where(0) 179 | .equalTo(0) 180 | .with(new JoinFunction, Tuple2, Tuple3>() { 181 | @Override 182 | public Tuple3 join(Tuple2 first, Tuple2 second) throws Exception { 183 | if(first==null){ 184 | return new Tuple3<>(second.f0,"null",second.f1); 185 | }else if(second == null){ 186 | return new Tuple3<>(first.f0,first.f1,"null"); 187 | }else{ 188 | return new Tuple3<>(first.f0,first.f1,second.f1); 189 | } 190 | } 191 | }).print(); 192 | 193 | 194 | } 195 | 196 | private static void test4() throws Exception { 197 | //获取运行环境 198 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 199 | //tuple2<用户id,用户姓名> 200 | List> data1 = new ArrayList<>(); 201 | data1.add(new Tuple2<>(1, "zs")); 202 | data1.add(new Tuple2<>(2, "ls")); 203 | data1.add(new Tuple2<>(3, "ww")); 204 | 205 | //tuple2<用户id,用户所在城市> 206 | List> data2 = new ArrayList<>(); 207 | data2.add(new Tuple2<>(1, "beijing")); 208 | data2.add(new Tuple2<>(2, "shanghai")); 209 | data2.add(new Tuple2<>(3, "guangzhou")); 210 | 211 | DataSource> text1 = env.fromCollection(data1); 212 | DataSource> text2 = env.fromCollection(data2); 213 | 214 | text1.join(text2).where(0)//指定第一个数据集中需要进行比较的元素的角标 215 | .equalTo(0)//指定第二个数据集中需要进行比较的元素的角标 216 | .with(new JoinFunction, Tuple2, Object>() { 217 | 218 | @Override 219 | public Object join(Tuple2 first, Tuple2 second) throws Exception { 220 | return new Tuple3<>(first.f0, first.f1, second.f1); 221 | } 222 | }).print(); 223 | //注意,这里用map和上面使用的with最终效果是一致的。 224 | /*text1.join(text2).where(0)//指定第一个数据集中需要进行比较的元素角标 225 | .equalTo(0)//指定第二个数据集中需要进行比较的元素角标 226 | .map(new MapFunction,Tuple2>, Tuple3>() { 227 | @Override 228 | public Tuple3 map(Tuple2, Tuple2> value) throws Exception { 229 | return new Tuple3<>(value.f0.f0,value.f0.f1,value.f1.f1); 230 | } 231 | }).print();*/ 232 | 233 | } 234 | 235 | private static void test3() throws Exception { 236 | //获取运行环境 237 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 238 | ArrayList data = new ArrayList<>(); 239 | data.add("hello you"); 240 | data.add("hello me"); 241 | 242 | DataSource text = env.fromCollection(data); 243 | FlatMapOperator flatMapData = text.flatMap(new FlatMapFunction() { 244 | @Override 245 | public void flatMap(String value, Collector out) throws Exception { 246 | String[] split = value.toLowerCase().split("\\W+"); 247 | for (String word : split) { 248 | System.out.println("单词: " + word); 249 | out.collect(word); 250 | } 251 | } 252 | }); 253 | flatMapData.distinct().print(); 254 | } 255 | 256 | private static void test2() throws Exception { 257 | //获取运行环境 258 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 259 | 260 | ArrayList data = new ArrayList<>(); 261 | data.add("hello you"); 262 | data.add("hello me"); 263 | DataSource text = env.fromCollection(data); 264 | /*text.map(new MapFunction() { 265 | @Override 266 | public String map(String value) throws Exception { 267 | //获取数据库连接--注意,此时是每过来一条数据就获取一次链接 268 | //处理数据 269 | //关闭连接 270 | return value; 271 | } 272 | });*/ 273 | DataSet mapPartitionData = text.mapPartition(new MapPartitionFunction() { 274 | @Override 275 | public void mapPartition(Iterable values, Collector out) throws Exception { 276 | //获取数据库连接--注意,此时是一个分区的数据获取一次连接【优点,每个分区获取一次链接】 277 | //values中保存了一个分区的数据 278 | //处理数据 279 | Iterator it = values.iterator(); 280 | while (it.hasNext()) { 281 | String next = it.next(); 282 | String[] split = next.split("\\W+"); 283 | for (String word : split) { 284 | out.collect(word); 285 | } 286 | } 287 | //关闭连接 288 | } 289 | }); 290 | mapPartitionData.print(); 291 | } 292 | 293 | private static void test1(DataSet text) throws Exception { 294 | DataSet> counts = 295 | // split up the lines in pairs (2-tuples) containing: (word,1) 296 | text.flatMap(new Tokenizer()) 297 | // group by the tuple field "0" and sum up tuple field "1" 298 | .groupBy(0) 299 | .sum(1); 300 | 301 | counts.print(); 302 | } 303 | 304 | public static DataSet getDataSet(String[] args) { 305 | final ParameterTool params = ParameterTool.fromArgs(args); 306 | DataSet text; 307 | // create execution environment 308 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 309 | //env.getConfig().setGlobalJobParameters(params); 310 | if (params.has("input")) { 311 | // read the text file from given input path 312 | text = env.readTextFile(params.get("input")); 313 | } else { 314 | // get default test text data 315 | System.out.println("Executing WordCount example with default input data set."); 316 | System.out.println("Use --input to specify file input."); 317 | text = WordCountData.getDefaultTextLineDataSet(env); 318 | } 319 | return text; 320 | } 321 | 322 | // ************************************************************************* 323 | // USER FUNCTIONS 324 | // ************************************************************************* 325 | 326 | 327 | /** 328 | * Implements the string tokenizer that splits sentences into words as a user-defined 329 | * FlatMapFunction. The function takes a line (String) and splits it into 330 | * multiple pairs in the form of "(word,1)" ({@code Tuple2}). 331 | */ 332 | public static final class Tokenizer implements FlatMapFunction> { 333 | 334 | @Override 335 | public void flatMap(String value, Collector> out) { 336 | // normalize and split the line 337 | String[] tokens = value.toLowerCase().split("\\W+"); 338 | 339 | // emit the pairs 340 | for (String token : tokens) { 341 | if (token.length() > 0) { 342 | out.collect(new Tuple2<>(token, 1)); 343 | } 344 | } 345 | } 346 | } 347 | } 348 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountExample.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.datesetOp; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.util.Collector; 8 | 9 | /** 10 | *

    11 | *

  • @author: jyj019
  • 12 | *
  • Date: 2018/9/3 11:07
  • 13 | *
  • @version: 2.0.0
  • 14 | *
  • @since JDK 1.8
  • 15 | */ 16 | public class WordCountExample { 17 | public static void main(String[] args) throws Exception { 18 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 19 | env.setParallelism(100); 20 | 21 | DataSet text = env.fromElements( 22 | "Who's there?", 23 | "I think I hear them. Stand, ho! Who's there?", 24 | "I think I hear them. Stand, ho! Who's there?"); 25 | 26 | DataSet> wordCounts = text 27 | .flatMap(new LineSplitter()) 28 | .groupBy(0) 29 | .sum(1); 30 | 31 | wordCounts.print(); 32 | } 33 | 34 | public static class LineSplitter implements FlatMapFunction> { 35 | @Override 36 | public void flatMap(String line, Collector> out) { 37 | for (String word : line.split(" ")) { 38 | // for (String word2 : line.split(",")) { 39 | out.collect(new Tuple2<>(word, 1)); 40 | // } 41 | } 42 | } 43 | } 44 | } -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/Flink2HBase.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase; 2 | 3 | 4 | import com.z.flinkStreamOptimizatiion.hbase.loader.HBaseLoader; 5 | import com.z.flinkStreamOptimizatiion.hbase.loader.HBaseUtils; 6 | import org.apache.flink.api.common.functions.MapFunction; 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 8 | import org.apache.flink.api.java.utils.ParameterTool; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; 13 | import org.apache.hadoop.hbase.TableName; 14 | import org.apache.hadoop.hbase.client.*; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.io.IOException; 19 | import java.util.*; 20 | 21 | 22 | public class Flink2HBase { 23 | 24 | private static final Logger LOGGER = LoggerFactory.getLogger(Flink2HBase.class); 25 | 26 | public static void main(String[] args) { 27 | 28 | System.setProperty("hadoop.home.dir", "C:\\hbase-1.1.2"); 29 | System.setProperty("HADOOP_USER_NAME", "hdfs"); 30 | 31 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | env.enableCheckpointing(1000); // 非常关键,一定要设置启动检查点!! 33 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 34 | 35 | Map properties= new HashMap(); 36 | properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667"); 37 | properties.put("group.id", "dec-esc-group-vib-calc"); 38 | properties.put("enable.auto.commit", "true"); 39 | properties.put("auto.commit.interval.ms", "1000"); 40 | properties.put("auto.offset.reset", "earliest"); 41 | properties.put("session.timeout.ms", "30000"); 42 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 43 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 44 | properties.put("topic", "dec-vibration-test"); 45 | //KafkaConsumer kafkaConsumer = new KafkaConsumer(properties); 46 | // parse user parameters 47 | //ParameterTool parameterTool = ParameterTool.fromArgs(args); 48 | ParameterTool parameterTool = ParameterTool.fromMap(properties); 49 | 50 | DataStream transction = env.addSource(new FlinkKafkaConsumer010<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties())); 51 | //DataStream transction1 = env.addSource(new FlinkKafkaConsumer010("test3",new SimpleStringSchema(), props)); 52 | 53 | //DataStream eventDataStream=transction.map((line)->parse(line)); 54 | 55 | transction.rebalance().map(new MapFunction() { 56 | 57 | public String map(String value)throws IOException { 58 | 59 | writeIntoHBase(value); 60 | return value; 61 | } 62 | 63 | }); 64 | 65 | 66 | transction.rebalance().map(new MapFunction() { 67 | 68 | @Override 69 | public String map(String value)throws IOException { 70 | 71 | writeIntoHBase(value); 72 | return value; 73 | } 74 | 75 | }); 76 | 77 | //transction.writeAsText("/home/admin/log2"); 78 | // transction.addSink(new HBaseOutputFormat(); 79 | try { 80 | env.execute(); 81 | } catch (Exception ex) { 82 | ex.printStackTrace(); 83 | } 84 | } 85 | 86 | 87 | 88 | 89 | public static void writeIntoHBase(String value)throws IOException { 90 | HBaseLoader hBaseLoader= new HBaseLoader(); 91 | String hBaseTable="dfdq_rhm_aly:f_turbine_event_data"; 92 | String hBaseTableCF="f"; 93 | Table table = null; 94 | // 常量 95 | 96 | // org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create(); 97 | // config.set("hbase.zookeeper.property.clientPort", "2181"); 98 | // 99 | // config.set("hbase.zookeeper.quorum", "bigdata-master2.phmcluster.calabar,bigdata-master1.phmcluster.calabar,bigdata-slave1.phmcluster.calabar,bigdata-slave2.phmcluster.calabar,bigdata-slave3.phmcluster.calabar"); 100 | // config.set("zookeeper.znode.parent", "/hbase-unsecure"); 101 | 102 | //config.set(TableOutputFormat.OUTPUT_TABLE, hbasetable); 103 | 104 | try { 105 | table = HBaseUtils.getConnection().getTable(TableName.valueOf(hBaseTable)); 106 | } catch (Exception e) { 107 | LOGGER.error("HBase连接建立出错",e); 108 | e.printStackTrace(); 109 | } 110 | 111 | hBaseLoader.loadSpeed(table, hBaseTableCF,String.valueOf(value)); 112 | 113 | // Connection c = ConnectionFactory.createConnection(config); 114 | // 115 | // Admin admin = c.getAdmin(); 116 | // if(!admin.tableExists(tableName)){ 117 | // admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily))); 118 | // } 119 | // Table t = c.getTable(tableName); 120 | // 121 | // TimeStamp ts = new TimeStamp(new Date()); 122 | // 123 | // Date date = ts.getDate(); 124 | // 125 | // Put put = new Put(org.apache.hadoop.hbase.util.Bytes.toBytes(date.toString())); 126 | // 127 | // put.addColumn(org.apache.hadoop.hbase.util.Bytes.toBytes(columnFamily), org.apache.hadoop.hbase.util.Bytes.toBytes("test"), 128 | // org.apache.hadoop.hbase.util.Bytes.toBytes(m)); 129 | // t.put(put); 130 | // 131 | // t.close(); 132 | // c.close(); 133 | } 134 | 135 | 136 | 137 | 138 | } 139 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/FlinkGHBaseByDataSet.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase; 2 | 3 | import org.apache.flink.addons.hbase.TableInputFormat; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.hadoop.hbase.client.Result; 8 | import org.apache.hadoop.hbase.client.Scan; 9 | import org.apache.hadoop.hbase.util.Bytes; 10 | 11 | 12 | public class FlinkGHBaseByDataSet { 13 | 14 | public static void main(String[] args) { 15 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 16 | 17 | DataSet> hbaseInput = env.createInput(new TableInputFormat>(){ 18 | @Override 19 | protected Scan getScanner() { 20 | Scan scan = new Scan(); 21 | scan.setStartRow(Bytes.toBytes("lastSpeed1")); 22 | scan.setStopRow(Bytes.toBytes("lastSpeed4")); 23 | return scan; 24 | } 25 | @Override 26 | protected String getTableName() { 27 | return "dfdq_rhm_aly:f_turbine_event_data"; 28 | } 29 | @Override 30 | protected Tuple2 mapResultToTuple(Result result) { 31 | 32 | Tuple2 tup = new Tuple2(); 33 | tup.setField(Bytes.toString(result.getRow()),0); 34 | tup.setField(Bytes.toString(result.getValue("f".getBytes(), "slv".getBytes())), 1); 35 | return tup; 36 | } 37 | }); 38 | 39 | try { 40 | hbaseInput.print(); 41 | } catch (Exception e) { 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseLoader.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase.loader; 2 | 3 | import org.apache.commons.collections.CollectionUtils; 4 | import org.apache.hadoop.conf.Configuration; 5 | import org.apache.hadoop.hbase.HBaseConfiguration; 6 | import org.apache.hadoop.hbase.TableName; 7 | import org.apache.hadoop.hbase.client.Connection; 8 | import org.apache.hadoop.hbase.client.ConnectionFactory; 9 | import org.apache.hadoop.hbase.client.Put; 10 | import org.apache.hadoop.hbase.client.Table; 11 | import org.apache.hadoop.hbase.util.Bytes; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.util.LinkedList; 16 | import java.util.List; 17 | 18 | 19 | public class HBaseLoader implements ILoader { 20 | 21 | private static final Logger LOGGER = LoggerFactory.getLogger(HBaseLoader.class); 22 | @Override 23 | public void loader() throws Exception { 24 | 25 | Table table = null; 26 | try { 27 | Configuration conf = HBaseConfiguration.create(); 28 | Connection conn = ConnectionFactory.createConnection(conf); 29 | table = conn.getTable(TableName.valueOf("dfdq_rhm_aly:f_aly_point_data_test")); 30 | Put put = new Put("kkk".getBytes()); 31 | put.addColumn(Bytes.toBytes("f"),Bytes.toBytes("t"),Bytes.toBytes(System.currentTimeMillis())); 32 | table.put(put); 33 | } catch (Exception e) { 34 | throw new Exception("批量存储数据失败!", e); 35 | } finally { 36 | // table.close(); 37 | } 38 | } 39 | 40 | public static void main(String[] args) throws Exception { 41 | ILoader loader = new HBaseLoader(); 42 | loader.loader(); 43 | } 44 | 45 | 46 | public void loadSpeed(Table table, String family, String value) { 47 | long start = System.currentTimeMillis(); 48 | byte[] fam_b = Bytes.toBytes(family); 49 | byte[] slv_b = Bytes.toBytes("slv"); 50 | 51 | // 装入多行数据 52 | List puts = new LinkedList<>(); 53 | Put put; 54 | //for (VibSaveEntry aData : data) { 55 | put = new Put(Bytes.toBytes("lastSpeed"+value)); 56 | put.addColumn(fam_b, slv_b, Bytes.toBytes(value)); 57 | puts.add(put); 58 | //} 59 | if (CollectionUtils.isNotEmpty(puts)) { 60 | try { 61 | HBaseRetryingUtils.retrying(table, puts); 62 | } catch (Exception e) { 63 | e.printStackTrace(); 64 | } 65 | } 66 | long end = System.currentTimeMillis(); 67 | LOGGER.debug("数据存储耗时:"+(end-start)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseRetryingUtils.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase.loader; 2 | 3 | import com.github.rholder.retry.Retryer; 4 | import com.github.rholder.retry.RetryerBuilder; 5 | import com.github.rholder.retry.StopStrategies; 6 | import com.github.rholder.retry.WaitStrategies; 7 | import org.apache.hadoop.hbase.client.Put; 8 | import org.apache.hadoop.hbase.client.Table; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.io.Serializable; 13 | import java.util.List; 14 | import java.util.concurrent.TimeUnit; 15 | 16 | /** 17 | *

    18 | *

  • @author: jinyujie
  • 19 | *
  • Date: 2018/6/12 9:35
  • 20 | *
  • @version: 2.0.0
  • 21 | *
  • @since JDK 1.8
  • 22 | */ 23 | public class HBaseRetryingUtils implements Serializable { 24 | /** 25 | * 日志记录 26 | */ 27 | private static final Logger LOGGER = LoggerFactory.getLogger(HBaseRetryingUtils.class); 28 | 29 | /** 30 | * 重试发送数据到hbase 31 | * 32 | * @param table 33 | * @param puts List 34 | * @throws Exception 连接异常 35 | */ 36 | public static void retrying(Table table, List puts) throws Exception { 37 | // 异常或者返回null都继续重试、每3秒重试一次、最多重试5次 38 | Retryer retryer = RetryerBuilder.newBuilder() 39 | .retryIfException() 40 | .withWaitStrategy(WaitStrategies.fixedWait(500, TimeUnit.MILLISECONDS)) 41 | .withStopStrategy(StopStrategies.stopAfterAttempt(6)) 42 | .build(); 43 | 44 | try { 45 | retryer.call(() -> HBaseUtils.batchPuts(table, puts)); 46 | } catch (Exception e) { 47 | LOGGER.error("多次重试发送数据到hbase失败!", e); 48 | throw new Exception("多次重试发送数据到hbase失败!", e); 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseUtils.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase.loader; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.hbase.*; 5 | import org.apache.hadoop.hbase.client.*; 6 | import org.apache.hadoop.hbase.io.compress.Compression; 7 | import org.slf4j.Logger; 8 | import org.slf4j.LoggerFactory; 9 | 10 | import java.io.IOException; 11 | import java.io.Serializable; 12 | import java.util.Arrays; 13 | import java.util.LinkedList; 14 | import java.util.List; 15 | import java.util.concurrent.ExecutorService; 16 | import java.util.concurrent.Executors; 17 | 18 | 19 | public class HBaseUtils implements Serializable { 20 | /** 21 | * 日志记录 22 | */ 23 | private static final Logger LOGGER = LoggerFactory.getLogger(HBaseUtils.class); 24 | 25 | /** 26 | * Hbase 连接对象 27 | */ 28 | private static Connection CONN; 29 | 30 | /** 31 | * 获取Hbase的连接 32 | * 33 | * @return Hbase connection 34 | * @throws Exception the exception 35 | */ 36 | public synchronized static Connection getConnection() throws Exception { 37 | if (null == CONN || CONN.isClosed()) { 38 | try { 39 | Configuration conf = HBaseConfiguration.create(); 40 | CONN = ConnectionFactory.createConnection(conf); 41 | } catch (IOException e) { 42 | LOGGER.error("can not establish hbase connection.", e); 43 | throw new Exception("can not establish hbase connection.", e); 44 | } 45 | } 46 | return CONN; 47 | } 48 | 49 | /** 50 | * 创建命名空间 51 | * 52 | * @param namespace 命名空间 53 | * @throws Exception Exception 54 | */ 55 | public static void createNamespace(String namespace) throws Exception { 56 | Admin admin = null; 57 | try { 58 | admin = HBaseUtils.getConnection().getAdmin(); 59 | if (HBaseUtils.namespaceIsExists(admin, namespace)) { 60 | LOGGER.warn("The namespace " + namespace + " already exists !"); 61 | return; 62 | } 63 | admin.createNamespace(NamespaceDescriptor.create(namespace).build()); 64 | LOGGER.info("create namespace " + namespace + " seccuss."); 65 | } finally { 66 | HBaseUtils.closeAdmin(admin); 67 | } 68 | } 69 | 70 | /** 71 | * 判断表是否存在 72 | * 73 | * @param tableName tableName 74 | * @return true:存在, false:不存在 75 | * @throws Exception Exception 76 | */ 77 | public static boolean tableExists(String tableName) throws Exception { 78 | Admin admin = null; 79 | try { 80 | admin = HBaseUtils.getConnection().getAdmin(); 81 | return admin.tableExists(TableName.valueOf(tableName)); 82 | } finally { 83 | HBaseUtils.closeAdmin(admin); 84 | } 85 | } 86 | 87 | /** 88 | * 创建一个表,这个表没有任何region 89 | * 90 | * @param tableName 表名 91 | * @param cfs 列族 92 | * @throws Exception Exception 93 | */ 94 | public static void createTable(String tableName, String... cfs) throws Exception { 95 | Admin admin = null; 96 | try { 97 | admin = HBaseUtils.getConnection().getAdmin(); 98 | HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)); 99 | for (String family : cfs) { 100 | HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(family); 101 | hColumnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY); 102 | hTableDescriptor.addFamily(hColumnDescriptor); 103 | hColumnDescriptor.setMaxVersions(3); 104 | } 105 | admin.createTable(hTableDescriptor); 106 | LOGGER.info("create table " + tableName + " seccuss."); 107 | } finally { 108 | HBaseUtils.closeAdmin(admin); 109 | } 110 | } 111 | 112 | /** 113 | * 清空表数据, 保留分区 114 | * 115 | * @param tableName 表名 116 | * @throws Exception Exception 117 | */ 118 | public static void truncateTable(String tableName) throws Exception { 119 | Admin admin = null; 120 | TableName tableNameObj = TableName.valueOf(tableName); 121 | try { 122 | admin = HBaseUtils.getConnection().getAdmin(); 123 | if (!admin.tableExists(tableNameObj)) { 124 | LOGGER.error("The table " + tableName + " does not exists!"); 125 | return; 126 | } 127 | admin.disableTable(tableNameObj); 128 | admin.truncateTable(tableNameObj, true); 129 | } finally { 130 | HBaseUtils.closeAdmin(admin); 131 | } 132 | } 133 | 134 | /** 135 | * 获取hbase表中的列族字段 136 | * 137 | * @param tableName 表名 138 | * @return 列族字段集合 family fields 139 | * @throws Exception Exception 140 | */ 141 | public static List getFamilyFields(String tableName) throws Exception { 142 | Admin admin = null; 143 | List families = new LinkedList<>(); 144 | try { 145 | admin = HBaseUtils.getConnection().getAdmin(); 146 | HTableDescriptor hTableDesc = admin.getTableDescriptor(TableName.valueOf(tableName)); 147 | hTableDesc.getFamilies().forEach(desc -> families.add(desc.getNameAsString())); 148 | return families; 149 | } finally { 150 | HBaseUtils.closeAdmin(admin); 151 | } 152 | } 153 | 154 | /** 155 | * 追加新的列族 156 | * 157 | * @param tableName tableName 158 | * @param families families 159 | * @throws Exception Exception 160 | */ 161 | public static void addColumnFamily(String tableName, String... families) throws Exception { 162 | Admin admin = null; 163 | try { 164 | admin = HBaseUtils.getConnection().getAdmin(); 165 | for (String family : families) { 166 | HColumnDescriptor columnDescriptor = new HColumnDescriptor(family); 167 | admin.addColumn(TableName.valueOf(tableName), columnDescriptor); 168 | } 169 | } finally { 170 | HBaseUtils.closeAdmin(admin); 171 | } 172 | } 173 | 174 | 175 | /* 176 | * 查询hbase表 177 | * 178 | * @tableName 表名 179 | */ 180 | public static ResultScanner getResult(Table table, Scan scan) throws Exception { 181 | //Table table = null; 182 | ResultScanner rs = null; 183 | try { 184 | //table = HbaseUtils.getConnection().getTable(TableName.valueOf(tableName)); 185 | rs = table.getScanner(scan); 186 | } catch (Exception e) { 187 | LOGGER.error("批量读取数据失败!", e); 188 | throw new Exception("批量读取数据失败!", e); 189 | } finally { 190 | closeTable(table); 191 | } 192 | return rs; 193 | } 194 | 195 | /** 196 | * 批量插入数据 197 | * 198 | * @param table 199 | * @param puts List 200 | * @throws Exception Exception 201 | */ 202 | public static boolean batchPuts(Table table, List puts) throws Exception { 203 | //Table table = null; 204 | try { 205 | //table = HBaseUtils.getConnection().getTable(TableName.valueOf(tableName)); 206 | table.put(puts); 207 | } catch (Exception e) { 208 | LOGGER.error("批量存储数据失败!", e); 209 | throw new Exception("批量存储数据失败!", e); 210 | } finally { 211 | closeTable(table); 212 | } 213 | 214 | return true; 215 | } 216 | 217 | /** 218 | * 多线程批量插入hbase 219 | * 220 | * @param tableName 表名 221 | * @param puts List 222 | */ 223 | public static void batchPut(final String tableName, List puts) { 224 | ExecutorService pool = Executors.newFixedThreadPool(5); 225 | pool.submit(() -> { 226 | BufferedMutator mutator = null; 227 | try { 228 | Connection conn = HBaseUtils.getConnection(); 229 | //HBaseUtils.enableTable(tableName); 230 | BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(tableName)); 231 | params.writeBufferSize(5 * 1024 * 1024); 232 | mutator = conn.getBufferedMutator(params); 233 | mutator.mutate(puts); 234 | mutator.flush(); 235 | } catch (Exception e) { 236 | LOGGER.error("write data to hbase failed!", e); 237 | } finally { 238 | try { 239 | assert null != mutator; 240 | mutator.close(); 241 | } catch (IOException e) { 242 | LOGGER.error("close mutator failed", e); 243 | } 244 | } 245 | }); 246 | } 247 | 248 | /** 249 | * 判断命名空间是否存在 250 | * 251 | * @param admin Admin 252 | * @param namespace 命名空间 253 | * @return true:存在、false:不存在 254 | * @throws Exception Exception 255 | */ 256 | private static boolean namespaceIsExists(Admin admin, String namespace) throws Exception { 257 | NamespaceDescriptor[] namespaceDescs = admin.listNamespaceDescriptors(); 258 | List ns = new LinkedList<>(); 259 | Arrays.stream(namespaceDescs).forEach(namespaceDesc -> ns.add(namespaceDesc.getName())); 260 | 261 | return ns.contains(namespace); 262 | } 263 | 264 | /** 265 | * 启用表, 若表状态为disable使其状态变为enable 266 | * 267 | * @param tableName 表名 268 | * @throws Exception Exception 269 | */ 270 | private static void enableTable(String tableName) throws Exception { 271 | // 若表是disable状态, 则启用表 272 | Admin admin = HBaseUtils.getConnection().getAdmin(); 273 | if (admin.isTableAvailable(TableName.valueOf(tableName))) { 274 | LOGGER.info("The table " + tableName + " is available !"); 275 | return; 276 | } 277 | admin.enableTable(TableName.valueOf(tableName)); 278 | LOGGER.info("enable talbe " + tableName + " seccuss."); 279 | } 280 | 281 | /** 282 | * 刷新表空间 283 | * 284 | * @param tableName tableName 285 | * @throws Exception Exception 286 | */ 287 | public static void flushTable(String tableName) throws Exception { 288 | Admin admin = null; 289 | try { 290 | admin = HBaseUtils.getConnection().getAdmin(); 291 | admin.flush(TableName.valueOf(tableName)); 292 | } catch (Exception e) { 293 | throw new Exception(e); 294 | } finally { 295 | HBaseUtils.closeAdmin(admin); 296 | } 297 | } 298 | 299 | /** 300 | * 关闭hbase表管理对象(DDL)的Admin 301 | * 302 | * @param admin hbase表管理对象 303 | */ 304 | public static void closeAdmin(Admin admin) { 305 | if (null != admin) { 306 | try { 307 | admin.close(); 308 | } catch (IOException e) { 309 | LOGGER.error("close connection failure !", e); 310 | } 311 | } 312 | } 313 | 314 | /** 315 | * 关闭table 316 | * 317 | * @param table 表对象 318 | */ 319 | public static void closeTable(Table table) { 320 | if (null != table) { 321 | try { 322 | table.close(); 323 | } catch (IOException e) { 324 | LOGGER.error("close table failure !", e); 325 | } 326 | } 327 | } 328 | 329 | /** 330 | * 关闭hbase连接 331 | */ 332 | public static void closeConn() { 333 | if (null != CONN) { 334 | try { 335 | CONN.close(); 336 | } catch (IOException e) { 337 | LOGGER.error("close connection failure !", e); 338 | } 339 | } 340 | } 341 | } 342 | 343 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/ILoader.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.hbase.loader; 2 | 3 | public interface ILoader { 4 | 5 | void loader() throws Exception; 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/kafka/ReadFromKafka.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.kafka; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.java.utils.ParameterTool; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; 9 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema; 10 | 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | 14 | 15 | /** 16 | *

    17 | *

  • @author:jyj019
  • 18 | *
  • Date: 2018/9/17 14:50
  • 19 | *
  • @version: 2.0.0
  • 20 | *
  • @since JDK 1.8
  • 21 | */ 22 | 23 | 24 | public class ReadFromKafka { 25 | 26 | public static void main(String[] args) throws Exception { 27 | // create execution environment 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | Map properties= new HashMap(); 31 | properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667"); 32 | properties.put("group.id", "dec-esc-group-vib-calc"); 33 | properties.put("enable.auto.commit", "true"); 34 | properties.put("auto.commit.interval.ms", "1000"); 35 | properties.put("auto.offset.reset", "earliest"); 36 | properties.put("session.timeout.ms", "30000"); 37 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 38 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | properties.put("topic", "dec-vibration-test"); 40 | //KafkaConsumer kafkaConsumer = new KafkaConsumer(properties); 41 | // parse user parameters 42 | //ParameterTool parameterTool = ParameterTool.fromArgs(args); 43 | ParameterTool parameterTool = ParameterTool.fromMap(properties); 44 | 45 | FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010( 46 | parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()); 47 | 48 | // consumer010.setStartFromEarliest(); 49 | 50 | DataStream messageStream = env 51 | .addSource(consumer010); 52 | 53 | // print() will write the contents of the stream to the TaskManager's standard out stream 54 | // the rebelance call is causing a repartitioning of the data so that all machines 55 | // see the messages (for example in cases when "num kafka partitions" < "num flink operators" 56 | messageStream.rebalance().map(new MapFunction() { 57 | private static final long serialVersionUID = 1L; 58 | 59 | @Override 60 | public String map(String value) throws Exception { 61 | return value; 62 | 63 | } 64 | }); 65 | 66 | 67 | messageStream.print(); 68 | 69 | env.execute(); 70 | } 71 | } -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/kafka/WriteIntoKafka.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.kafka; 2 | 3 | import org.apache.commons.lang3.RandomUtils; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010; 10 | 11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema; 12 | 13 | import java.util.HashMap; 14 | import java.util.Map; 15 | 16 | /** 17 | *

    18 | *

  • @author: jyj019
  • 19 | *
  • Date: 2018/9/17 15:38
  • 20 | *
  • @version: 2.0.0
  • 21 | *
  • @since JDK 1.8
  • 22 | */ 23 | public class WriteIntoKafka { 24 | public static void main(String[] args) throws Exception { 25 | // create execution environment 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | Map properties= new HashMap(); 29 | properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667"); 30 | properties.put("group.id", "t10"); 31 | properties.put("enable.auto.commit", "false"); 32 | properties.put("auto.commit.interval.ms", "1000"); 33 | properties.put("auto.offset.reset", "earliest"); 34 | properties.put("session.timeout.ms", "30000"); 35 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 36 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 37 | properties.put("topic", "kks-topic-FFT"); 38 | //KafkaConsumer kafkaConsumer = new KafkaConsumer(properties); 39 | // parse user parameters 40 | //ParameterTool parameterTool = ParameterTool.fromArgs(args); 41 | ParameterTool parameterTool = ParameterTool.fromMap(properties); 42 | 43 | // add a simple source which is writing some strings 44 | DataStream messageStream = env.addSource(new SimpleStringGenerator()); 45 | 46 | // write stream to Kafka 47 | messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"), 48 | parameterTool.getRequired("topic"), 49 | new SimpleStringSchema())); 50 | 51 | messageStream.rebalance().map(new MapFunction() { 52 | private static final long serialVersionUID = 1L; 53 | 54 | @Override 55 | public String map(String value) throws Exception { 56 | return value; 57 | } 58 | }); 59 | 60 | messageStream.print(); 61 | 62 | env.execute(); 63 | } 64 | 65 | public static class SimpleStringGenerator implements SourceFunction { 66 | private static final long serialVersionUID = 2174904787118597072L; 67 | boolean running = true; 68 | 69 | @Override 70 | public void run(SourceContext ctx) throws Exception { 71 | //int i=0; 72 | while(running) { 73 | //i++; 74 | ctx.collect(prouderJson()); 75 | //System.out.println(prouderJson()); 76 | 77 | } 78 | } 79 | 80 | @Override 81 | public void cancel() { 82 | running = false; 83 | } 84 | } 85 | 86 | public static String prouderJson() throws Exception { 87 | // long start = System.currentTimeMillis(); 88 | Integer value; 89 | String[] channels = new String[]{"000000007946", "000000007947","000000007948","000000007949","000000007950","000000007951","000000007952","000000007953", 90 | "000000007954","000000007955","000000007956","000000007957","000000007958","000000007959","000000007960","000000007961","000000007966", 91 | "000000007967","000000007968","000000007969","000000007970","000000007971","000000007986","000000007987"}; 92 | StringBuffer json = new StringBuffer(); 93 | json.append("{\n" + " \"header\": {\n" + " \"head\": \"EB90EB90EB90\",\n" + " \"plant_code\": 1,\n" + " " + 94 | "\"set_code\": 1,\n" + " \"device_type\": 1,\n" + " \"time\": "+System.currentTimeMillis() +",\n" 95 | + " \"data_length\": 4999\n" + " },\n" + " \"base_info\": {\n" + " \"work_identity\": 1,\n" + 96 | " \"sample_points_per_cycle\": 1024,\n" + " \"sampling_period\": 8,\n" + " \"sampling_number\": 1024,\n" + 97 | " \"rotate_speed\": "+randmomUtils1(RandomUtils.nextInt(0, 3)) +",\n" + " \"fast_variable_channels\": 24\n" + " },\n \"channel\":{"); 98 | for (int i=0;i<23;i ++) { 99 | json.append("\"" + channels[i] + "\":{\"peak\":" + randmomUtils2(RandomUtils.nextInt(0, 10)) + ",\n" 100 | + "\"phase_1x\":" + RandomUtils.nextFloat(0, 500) + ",\n" 101 | + "\"amplitude_1x\":" + (RandomUtils.nextFloat( 0, (float) 6.28)-3.14) + ",\n" 102 | + "\"phase_2x\":" + RandomUtils.nextFloat(0, 50) + ",\n" 103 | + "\"amplitude_2x\":" + (RandomUtils.nextFloat(0, (float) 6.28)-3.14) + ",\n" 104 | + "\"half_amplitud\":" + RandomUtils.nextFloat(0, 50) + ",\n" 105 | + "\"voltage\":" + RandomUtils.nextFloat(0, 5) + ",\n" 106 | +"\"waveform_data\":["); 107 | for(int j=1;j<1024;j ++){ 108 | value = (int) (5 * Math.sin(360 / 32 * j) + (8 * (Math.sin((360 / 64) * j)))); 109 | json.append(value+","); 110 | } 111 | value=(int)(5*(Math.sin((360/32)*1024))+8*(Math.sin((360/64)*1024))); 112 | json.append(value+"]},\n"); 113 | } 114 | json.append("\""+channels[23] +"\":{\"peak\":" + randmomUtils1(RandomUtils.nextInt(0, 10)) + ",\n" 115 | + "\"phase_1x\":" + RandomUtils.nextFloat(0, 500) + ",\n" 116 | + "\"amplitude_1x\":" + (RandomUtils.nextFloat( 0, (float) 6.28)-3.14) + ",\n" 117 | + "\"phase_2x\":" + RandomUtils.nextFloat(0, 50) + ",\n" 118 | + "\"amplitude_2x\":" + (RandomUtils.nextFloat(0, (float) 6.28)-3.14) + ",\n" 119 | + "\"half_amplitud\":" + RandomUtils.nextFloat(0, 50) + ",\n" 120 | + "\"voltage\":" + RandomUtils.nextFloat(0, 5) + ",\n" 121 | +"\"waveform_data\":["); 122 | for(int j=1;j<1024;j ++){ 123 | value=(int)(5*(Math.sin((360/32)*j))+8*(Math.sin((360/64)*j))); 124 | json.append(value+","); 125 | } 126 | value=(int)(5*(Math.sin((360/32)*1024))+8*(Math.sin((360/64)*1024))); 127 | json.append(value+"]}}}\n"); 128 | // long end = System.currentTimeMillis(); 129 | // LOGGER.info("制造数据,耗时:-->"+(start-end) ); 130 | return String.valueOf(json); 131 | } 132 | 133 | public static Float randmomUtils1(int i) throws Exception{ 134 | Float value=RandomUtils.nextFloat(2950, 3080); 135 | switch (i){ 136 | case 1: 137 | value=RandomUtils.nextFloat(10, 90); 138 | break; 139 | case 2: 140 | value=RandomUtils.nextFloat(0, 80); 141 | break; 142 | } 143 | return value; 144 | } 145 | 146 | public static Float randmomUtils2(int i) throws Exception{ 147 | Float value=RandomUtils.nextFloat(290, 300); 148 | switch (i){ 149 | case 1: 150 | value=RandomUtils.nextFloat(0, 200); 151 | break; 152 | } 153 | return value; 154 | } 155 | 156 | // public static class SimpleStringSchema implements DeserializationSchema, SerializationSchema { 157 | // private static final long serialVersionUID = 1L; 158 | // 159 | // public SimpleStringSchema() { 160 | // } 161 | // 162 | // public String deserialize(byte[] message) { 163 | // return new String(message); 164 | // } 165 | // 166 | // public boolean isEndOfStream(String nextElement) { 167 | // return false; 168 | // } 169 | // 170 | // public byte[] serialize(String element) { 171 | // return element.getBytes(); 172 | // } 173 | // 174 | // public TypeInformation getProducedType() { 175 | // return TypeExtractor.getForClass(String.class); 176 | // } 177 | // } 178 | } 179 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/metricsOp/gaugesOp.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.metricsOp; 2 | 3 | import com.codahale.metrics.ConsoleReporter; 4 | import com.codahale.metrics.Gauge; 5 | import com.codahale.metrics.JmxReporter; 6 | import com.codahale.metrics.MetricRegistry; 7 | 8 | import java.util.Queue; 9 | import java.util.concurrent.LinkedBlockingDeque; 10 | import java.util.concurrent.TimeUnit; 11 | 12 | /** 13 | * Gauges是一个简单的计量,一般用来统计瞬间状态的数量信息,比如系统中处于pending状态的job 14 | * 这里是测试Gauges,实时统计pending状态的job个数 15 | */ 16 | public class gaugesOp { 17 | /** 18 | * 实例化一个registry,最核心的一个模块,相当于应用程序的metrics系统的容器,维护一个Map 19 | */ 20 | private static final MetricRegistry metrics = new MetricRegistry(); 21 | private static Queue queue = new LinkedBlockingDeque(); 22 | /** 23 | * 在控制台上打印输出 24 | */ 25 | private static ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).build(); 26 | public static void main(String[] args) throws InterruptedException { 27 | reporter.start(3, TimeUnit.SECONDS); 28 | 29 | //实例化一个Gauge 30 | Gauge gauge = new Gauge() { 31 | @Override 32 | public Integer getValue() { 33 | return queue.size(); 34 | } 35 | }; 36 | 37 | //注册到容器中 38 | metrics.register(MetricRegistry.name(gaugesOp.class, "pending-job", "size"), gauge); 39 | 40 | //测试JMX 41 | JmxReporter jmxReporter = JmxReporter.forRegistry(metrics).build(); 42 | jmxReporter.start(); 43 | 44 | //模拟数据 45 | for (int i=0; i< 20; i++) { 46 | queue.add("a"); 47 | Thread.sleep(1000); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/MessageCollector.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.client; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput; 4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageOutput; 5 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageRegistry; 6 | import io.netty.channel.ChannelHandler; 7 | import io.netty.channel.ChannelHandlerContext; 8 | import io.netty.channel.ChannelInboundHandlerAdapter; 9 | import org.slf4j.Logger; 10 | import org.slf4j.LoggerFactory; 11 | 12 | import java.util.concurrent.ConcurrentHashMap; 13 | import java.util.concurrent.ConcurrentMap; 14 | import java.util.concurrent.TimeUnit; 15 | 16 | @ChannelHandler.Sharable 17 | public class MessageCollector extends ChannelInboundHandlerAdapter { 18 | 19 | private final static Logger LOG = LoggerFactory.getLogger(MessageCollector.class); 20 | private MessageRegistry registry; 21 | private RPCClient client; 22 | private ChannelHandlerContext context; 23 | private Throwable ConnectionClosed = new Exception("rpc connection not active error"); 24 | private ConcurrentMap> pendingTasks = new ConcurrentHashMap<>(); 25 | 26 | 27 | public MessageCollector(MessageRegistry registry, RPCClient client) { 28 | this.registry = registry; 29 | this.client = client; 30 | } 31 | 32 | @Override 33 | public void channelActive(ChannelHandlerContext ctx) throws Exception { 34 | this.context = ctx; 35 | } 36 | 37 | @Override 38 | public void channelInactive(ChannelHandlerContext ctx) throws Exception { 39 | this.context = null; 40 | pendingTasks.forEach((__, future) -> { 41 | future.fail(ConnectionClosed); 42 | }); 43 | pendingTasks.clear(); 44 | // 尝试重连 45 | ctx.channel().eventLoop().schedule(() -> { 46 | client.reconnect(); 47 | }, 1, TimeUnit.SECONDS); 48 | } 49 | 50 | public RpcFuture send(MessageOutput output) { 51 | ChannelHandlerContext ctx = context; 52 | RpcFuture future = new RpcFuture(); 53 | if (ctx != null) { 54 | ctx.channel().eventLoop().execute(() -> { 55 | pendingTasks.put(output.getRequestId(), future); 56 | ctx.writeAndFlush(output); 57 | }); 58 | } else { 59 | future.fail(ConnectionClosed); 60 | } 61 | return future; 62 | } 63 | 64 | //客户端拿到服务端返回的数据 65 | @Override 66 | public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { 67 | if (!(msg instanceof MessageInput)) { 68 | return; 69 | } 70 | MessageInput input = (MessageInput) msg; 71 | // 业务逻辑在这里 72 | Class clazz = registry.get(input.getType()); 73 | if (clazz == null) { 74 | LOG.error("unrecognized msg type {}", input.getType()); 75 | return; 76 | } 77 | Object o = input.getPayload(clazz); 78 | @SuppressWarnings("unchecked") 79 | RpcFuture future = (RpcFuture) pendingTasks.remove(input.getRequestId()); 80 | if (future == null) { 81 | LOG.error("future not found with type {}", input.getType()); 82 | return; 83 | } 84 | System.out.println("客户端拿到服务端返回的数据: " + o); 85 | future.success(o); 86 | } 87 | 88 | 89 | @Override 90 | public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { 91 | 92 | } 93 | 94 | public void close() { 95 | ChannelHandlerContext ctx = context; 96 | if (ctx != null) { 97 | ctx.close(); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RPCClient.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.client; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.common.*; 4 | import io.netty.bootstrap.Bootstrap; 5 | import io.netty.channel.ChannelInitializer; 6 | import io.netty.channel.ChannelOption; 7 | import io.netty.channel.ChannelPipeline; 8 | import io.netty.channel.EventLoopGroup; 9 | import io.netty.channel.nio.NioEventLoopGroup; 10 | import io.netty.channel.socket.SocketChannel; 11 | import io.netty.channel.socket.nio.NioSocketChannel; 12 | import io.netty.handler.timeout.ReadTimeoutHandler; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import java.io.Serializable; 17 | import java.util.concurrent.ExecutionException; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | //连接管理 读写消息 链接重连 21 | public class RPCClient implements Serializable { 22 | private final static Logger LOG = LoggerFactory.getLogger(RPCClient.class); 23 | 24 | private String ip; 25 | private int port; 26 | private Bootstrap bootstrap; 27 | private EventLoopGroup group; 28 | private MessageCollector collector; 29 | private boolean started; 30 | private boolean stopped; 31 | private MessageRegistry registry = new MessageRegistry(); 32 | 33 | public RPCClient(String ip, int port) { 34 | this.ip = ip; 35 | this.port = port; 36 | this.init(); 37 | } 38 | 39 | /** 40 | * 41 | * @param type 42 | * @param rspClass 服务端返回结果的类型 43 | * @return 44 | */ 45 | public RPCClient rpc(String type, Class rspClass) { 46 | //rpc响应类型的注册快速入口 47 | registry.register(type, rspClass); 48 | return this; 49 | } 50 | 51 | public RpcFuture sendAsync(String type, Object payload) { 52 | if (!started) { 53 | connect(); 54 | started = true; 55 | } 56 | String requestId = RequestId.next(); 57 | MessageOutput output = new MessageOutput(requestId, type, payload); 58 | return collector.send(output); 59 | } 60 | 61 | public T send(String type, Object payload) { 62 | //普通rpc请求,正常获取相应 63 | RpcFuture future = sendAsync(type, payload); 64 | try { 65 | return future.get(); 66 | } catch (InterruptedException | ExecutionException e) { 67 | throw new RPCException(e); 68 | } 69 | } 70 | 71 | public void init() { 72 | bootstrap = new Bootstrap(); 73 | group = new NioEventLoopGroup(1); 74 | bootstrap.group(group); 75 | MessageEncoder encoder = new MessageEncoder(); 76 | collector = new MessageCollector(registry, this); 77 | bootstrap.channel(NioSocketChannel.class).handler(new ChannelInitializer() { 78 | 79 | @Override 80 | protected void initChannel(SocketChannel ch) throws Exception { 81 | ChannelPipeline pipe = ch.pipeline(); 82 | pipe.addLast(new ReadTimeoutHandler(60)); 83 | pipe.addLast(new MessageDecoder()); 84 | pipe.addLast(encoder); 85 | pipe.addLast(collector); 86 | } 87 | 88 | }); 89 | bootstrap.option(ChannelOption.TCP_NODELAY, true).option(ChannelOption.SO_KEEPALIVE, true); 90 | } 91 | 92 | public void connect() { 93 | bootstrap.connect(ip, port).syncUninterruptibly(); 94 | } 95 | 96 | public void reconnect() { 97 | if (stopped) { 98 | return; 99 | } 100 | bootstrap.connect(ip, port).addListener(future -> { 101 | if (future.isSuccess()) { 102 | return; 103 | } 104 | if (!stopped) { 105 | group.schedule(() -> { 106 | reconnect(); 107 | }, 1, TimeUnit.SECONDS); 108 | } 109 | LOG.error("connect {}:{} failure", ip, port, future.cause()); 110 | }); 111 | } 112 | 113 | public void close() { 114 | stopped = true; 115 | collector.close(); 116 | group.shutdownGracefully(0, 5000, TimeUnit.SECONDS); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RPCException.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.client; 2 | //定义客户端异常,用于同一抛出RPC错误 3 | public class RPCException extends RuntimeException { 4 | 5 | private static final long serialVersionUID = 1L; 6 | 7 | public RPCException(String message, Throwable cause) { 8 | super(message, cause); 9 | } 10 | 11 | public RPCException(String message) { 12 | super(message); 13 | } 14 | 15 | public RPCException(Throwable cause) { 16 | super(cause); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RpcFuture.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.client; 2 | 3 | import org.apache.flink.runtime.executiongraph.Execution; 4 | 5 | import java.util.concurrent.*; 6 | 7 | public class RpcFuture implements Future { 8 | 9 | private T result; 10 | private Throwable error; 11 | private CountDownLatch latch = new CountDownLatch(1); 12 | 13 | @Override 14 | public boolean cancel(boolean mayInterruptIfRunning) { 15 | return false; 16 | } 17 | 18 | @Override 19 | public boolean isCancelled() { 20 | return false; 21 | } 22 | 23 | @Override 24 | public boolean isDone() { 25 | return result != null || error != null; 26 | } 27 | 28 | @Override 29 | public T get() throws InterruptedException, ExecutionException { 30 | latch.await(); 31 | if (error != null) { 32 | throw new ExecutionException(error); 33 | } 34 | return result; 35 | } 36 | 37 | public void success(T result) { 38 | this.result = result; 39 | latch.countDown(); 40 | } 41 | 42 | public void fail(Throwable error) { 43 | this.error = error; 44 | latch.countDown(); 45 | } 46 | 47 | @Override 48 | public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { 49 | latch.await(timeout, unit); 50 | if (error != null) { 51 | throw new ExecutionException(error); 52 | } 53 | return result; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/Charsets.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import java.nio.charset.Charset; 4 | 5 | public class Charsets { 6 | 7 | public static Charset UTF8 = Charset.forName("utf8"); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/IMessageHandler.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | 4 | import io.netty.channel.ChannelHandlerContext; 5 | 6 | //消息处理器接口,每个自定义服务必须实现handle方法 7 | @FunctionalInterface 8 | public interface IMessageHandler { 9 | void handle(ChannelHandlerContext ctx, String requestId, T message); 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageDecoder.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import io.netty.buffer.ByteBuf; 4 | import io.netty.channel.ChannelHandlerContext; 5 | import io.netty.handler.codec.DecoderException; 6 | import io.netty.handler.codec.ReplayingDecoder; 7 | 8 | import java.util.List; 9 | 10 | //消息解码器 11 | //使用Netty的ReplayingDecoder实现。简单起见,这里没有使用checkpoint去优化性能了 12 | public class MessageDecoder extends ReplayingDecoder { 13 | 14 | @Override 15 | protected void decode(ChannelHandlerContext ctx, ByteBuf in, List out) throws Exception { 16 | String requestId = readStr(in); 17 | String type = readStr(in); 18 | String content = readStr(in); 19 | out.add(new MessageInput(type, requestId, content)); 20 | } 21 | 22 | private String readStr(ByteBuf in) { 23 | int len = in.readInt(); 24 | if (len < 0 || len > (1 << 20)) { 25 | throw new DecoderException("string too long len=" + len); 26 | } 27 | byte[] bytes = new byte[len]; 28 | in.readBytes(bytes); 29 | return new String(bytes, Charsets.UTF8); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageEncoder.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import io.netty.buffer.ByteBuf; 5 | import io.netty.buffer.PooledByteBufAllocator; 6 | import io.netty.channel.ChannelHandler; 7 | import io.netty.channel.ChannelHandlerContext; 8 | import io.netty.handler.codec.MessageToMessageEncoder; 9 | 10 | import java.util.List; 11 | 12 | //消息编码器 13 | @ChannelHandler.Sharable 14 | public class MessageEncoder extends MessageToMessageEncoder { 15 | @Override 16 | protected void encode(ChannelHandlerContext ctx, MessageOutput msg, List out) throws Exception { 17 | ByteBuf buf = PooledByteBufAllocator.DEFAULT.directBuffer(); 18 | writeStr(buf, msg.getRequestId()); 19 | writeStr(buf, msg.getType()); 20 | writeStr(buf, JSON.toJSONString(msg.getPayload())); 21 | out.add(buf); 22 | } 23 | private void writeStr(ByteBuf buf, String s) { 24 | buf.writeInt(s.length()); 25 | buf.writeBytes(s.getBytes(Charsets.UTF8)); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageHandlers.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import com.google.common.collect.Maps; 4 | 5 | import java.util.Map; 6 | 7 | public class MessageHandlers { 8 | private Map> handlers = Maps.newHashMap(); 9 | private IMessageHandler defaultHandler; 10 | 11 | public void register(String type, IMessageHandler handler) { 12 | handlers.put(type, handler); 13 | } 14 | 15 | public MessageHandlers defaultHandler(IMessageHandler defaultHandler) { 16 | this.defaultHandler = defaultHandler; 17 | return this; 18 | } 19 | 20 | public IMessageHandler defaultHandler() { 21 | return defaultHandler; 22 | } 23 | 24 | public IMessageHandler get(String type) { 25 | IMessageHandler handler = handlers.get(type); 26 | return handler; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageInput.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | 5 | //定义消息输入输出格式,消息类型、消息唯一ID和消息的json序列化字符串内容。 6 | // 消息唯一ID是用来客户端验证服务器请求和响应是否匹配。 7 | public class MessageInput { 8 | private String type; 9 | private String requestId; 10 | private String payload; 11 | 12 | public MessageInput(String type, String requestId, String payload) { 13 | this.type = type; 14 | this.requestId = requestId; 15 | this.payload = payload; 16 | } 17 | 18 | public String getType() { 19 | return type; 20 | } 21 | 22 | public String getRequestId() { 23 | return requestId; 24 | } 25 | //因为我们想直接拿到对象,所以要提供对象的类型参数 26 | public T getPayload(Class clazz) { 27 | if (payload == null) { 28 | return null; 29 | } 30 | return JSON.parseObject(payload, clazz); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageOutput.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | public class MessageOutput { 4 | private String requestId; 5 | private String type; 6 | private Object payload; 7 | 8 | public MessageOutput(String requestId, String type, Object payload) { 9 | this.requestId = requestId; 10 | this.type = type; 11 | this.payload = payload; 12 | } 13 | 14 | public String getType() { 15 | return this.type; 16 | } 17 | 18 | public String getRequestId() { 19 | return requestId; 20 | } 21 | 22 | public Object getPayload() { 23 | return payload; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageRegistry.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | 4 | import com.google.common.collect.Maps; 5 | 6 | import java.util.Map; 7 | 8 | public class MessageRegistry { 9 | private Map> clazzes = Maps.newHashMap(); 10 | //type是命令字,clazz是服务端返回数据的类型 11 | public void register(String type, Class clazz) { 12 | clazzes.put(type, clazz); 13 | } 14 | public Class get(String type) { 15 | return clazzes.get(type); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/RequestId.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.common; 2 | 3 | import java.util.UUID; 4 | 5 | public class RequestId { 6 | //简单UUID 64 7 | public static String next() { 8 | return UUID.randomUUID().toString(); 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoClient.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.demo; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCClient; 4 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCException; 5 | 6 | import java.io.Serializable; 7 | 8 | //RPC客户端 9 | public class DemoClient implements Serializable { 10 | 11 | private RPCClient client; 12 | 13 | public DemoClient(RPCClient client) { 14 | this.client = client; 15 | this.client.rpc("fib_res", Long.class); 16 | } 17 | 18 | public long fib(int n) { 19 | return (Long) client.send("fib", n); 20 | } 21 | 22 | //RPC客户端要链接远程IP端口,并注册服务输出类(RPC响应类), 23 | // 然后分别调用20次斐波那契服务和指数服务,输出结果 24 | 25 | public static void main(String[] args) throws InterruptedException { 26 | RPCClient client = new RPCClient("localhost", 8888); 27 | DemoClient demo = new DemoClient(client); 28 | for (int i = 0; i < 30; i++) { 29 | try { 30 | System.out.printf("fib(%d) = %d\n", i, demo.fib(i)); 31 | Thread.sleep(100); 32 | } catch (RPCException e) { 33 | i--; // retry 34 | } 35 | } 36 | Thread.sleep(3000); 37 | 38 | 39 | client.close(); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoServer.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.demo; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler; 4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageOutput; 5 | import com.z.flinkStreamOptimizatiion.rpc.server.RPCServer; 6 | import io.netty.channel.ChannelHandlerContext; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | 12 | //斐波那契和指数计算处理 13 | class FibRequestHandler implements IMessageHandler { 14 | 15 | private List fibs = new ArrayList<>(); 16 | 17 | { 18 | fibs.add(1L); // fib(0) = 1 19 | fibs.add(1L); // fib(1) = 1 20 | } 21 | 22 | @Override 23 | public void handle(ChannelHandlerContext ctx, String requestId, Integer n) { 24 | for (int i = fibs.size(); i < n + 1; i++) { 25 | long value = fibs.get(i - 2) + fibs.get(i - 1); 26 | fibs.add(value); 27 | } 28 | //响应输出 29 | ctx.writeAndFlush(new MessageOutput(requestId, "fib_res", fibs.get(n))); 30 | } 31 | 32 | } 33 | 34 | //构建RPC服务器 35 | //RPC服务器要监听指定IP端口,设定IO线程数和业务线程数 36 | //然后注册斐波那契服务输入类,还有响应的计算处理器 37 | public class DemoServer { 38 | 39 | 40 | public static void main(String[] args) { 41 | //RPCServer server = new RPCServer("localhost", 8888, 2, 16); 42 | RPCServer server = new RPCServer("localhost", 8888, 2, 16); 43 | server.service("fib", Integer.class, new FibRequestHandler()); 44 | server.start(); 45 | 46 | 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/DefaultHandler.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.server; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler; 4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput; 5 | import io.netty.channel.ChannelHandlerContext; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | public class DefaultHandler implements IMessageHandler { 10 | 11 | private final static Logger LOG = LoggerFactory.getLogger(DefaultHandler.class); 12 | @Override 13 | public void handle(ChannelHandlerContext ctx, String requestId, MessageInput input) { 14 | LOG.error("unrecognized message type {} comes", input.getType()); 15 | ctx.close(); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/MessageCollector.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.server; 2 | 3 | import com.google.common.collect.Queues; 4 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler; 5 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageHandlers; 6 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput; 7 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageRegistry; 8 | import io.netty.channel.ChannelHandler; 9 | import io.netty.channel.ChannelHandlerContext; 10 | import io.netty.channel.ChannelInboundHandlerAdapter; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | import java.util.concurrent.ArrayBlockingQueue; 15 | import java.util.concurrent.BlockingQueue; 16 | import java.util.concurrent.ThreadFactory; 17 | import java.util.concurrent.ThreadPoolExecutor; 18 | import java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy; 19 | import java.util.concurrent.TimeUnit; 20 | import java.util.concurrent.atomic.AtomicInteger; 21 | 22 | //Netty事件回调类 23 | //标注一个channel handler可以被多个channel安全地共享 24 | @ChannelHandler.Sharable 25 | public class MessageCollector extends ChannelInboundHandlerAdapter { 26 | 27 | private final static Logger LOG = LoggerFactory.getLogger(MessageCollector.class); 28 | //业务线程池 29 | private ThreadPoolExecutor executor; 30 | private MessageHandlers handlers; 31 | private MessageRegistry registry; 32 | 33 | public MessageCollector(MessageHandlers handlers, MessageRegistry registry, int workerThreads) { 34 | System.out.println("[2] MessageCollector 构造"); 35 | //业务队列最大1000,避免堆积 36 | //如果子线程处理不过来,io线程也会加入业务逻辑(callerRunsPolicy) 37 | BlockingQueue queue = new ArrayBlockingQueue<>(1000); 38 | //给业务线程命名 39 | ThreadFactory factory = new ThreadFactory() { 40 | 41 | AtomicInteger seq = new AtomicInteger(); 42 | 43 | @Override 44 | public Thread newThread(Runnable r) { 45 | Thread t = new Thread(r); 46 | t.setName("rpc-" + seq.getAndIncrement()); 47 | return t; 48 | } 49 | 50 | }; 51 | //闲置时间超过30秒的线程就自动销毁 52 | this.executor = new ThreadPoolExecutor(1, workerThreads, 30, TimeUnit.SECONDS, queue, factory, 53 | new CallerRunsPolicy()); 54 | this.handlers = handlers; 55 | this.registry = registry; 56 | } 57 | 58 | public void closeGracefully() { 59 | //优雅一点关闭,先通知,再等待,最后强制关闭 60 | this.executor.shutdown(); 61 | try { 62 | this.executor.awaitTermination(10, TimeUnit.SECONDS); 63 | } catch (InterruptedException e) { 64 | } 65 | this.executor.shutdownNow(); 66 | } 67 | 68 | @Override 69 | public void channelActive(ChannelHandlerContext ctx) throws Exception { 70 | //客户端来了一个新的连接 71 | LOG.debug("connection comes"); 72 | } 73 | 74 | @Override 75 | public void channelInactive(ChannelHandlerContext ctx) throws Exception { 76 | //客户端走了一个 77 | LOG.debug("connection leaves"); 78 | } 79 | 80 | @Override 81 | public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception { 82 | if (msg instanceof MessageInput) { 83 | //用业务线程处理消息 84 | this.executor.execute(() -> { 85 | this.handleMessage(ctx, (MessageInput) msg); 86 | }); 87 | } 88 | } 89 | 90 | //业务逻辑处理 91 | private void handleMessage(ChannelHandlerContext ctx, MessageInput input) { 92 | Class clazz = registry.get(input.getType()); 93 | if (clazz == null) { 94 | //没注册的消息用默认的处理器处理 95 | handlers.defaultHandler().handle(ctx, input.getRequestId(), input); 96 | return; 97 | } 98 | 99 | Object o = input.getPayload(clazz); 100 | //这里有问题 101 | @SuppressWarnings("unchecked") 102 | IMessageHandler handler = (IMessageHandler) handlers.get(input.getType()); 103 | if (handler != null) { 104 | handler.handle(ctx, input.getRequestId(), o); 105 | } else { 106 | handlers.defaultHandler().handle(ctx, input.getRequestId(), input); 107 | } 108 | 109 | } 110 | 111 | @Override 112 | public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { 113 | //此处可能因为客户机器突发重启 114 | //也可能客户端连接时间超时,后面的REadTimeoutHandle抛出异常 115 | //也可能消息协议错误,序列化异常 116 | LOG.warn("connection error", cause); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/RPCServer.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.rpc.server; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.common.*; 4 | import io.netty.bootstrap.ServerBootstrap; 5 | import io.netty.channel.*; 6 | import io.netty.channel.nio.NioEventLoopGroup; 7 | import io.netty.channel.socket.SocketChannel; 8 | import io.netty.channel.socket.nio.NioServerSocketChannel; 9 | import io.netty.handler.timeout.ReadTimeoutHandler; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | public class RPCServer { 14 | 15 | private final static Logger LOG = LoggerFactory.getLogger(RPCServer.class); 16 | private String ip; 17 | private int port; 18 | private int ioThreads; //用来处理网络流的读写线程 19 | private int workerThreads; //用来业务处理的计算线程 20 | private MessageHandlers handlers = new MessageHandlers(); 21 | private MessageRegistry registry = new MessageRegistry(); 22 | 23 | // 在构造函数执行之前执行 24 | { 25 | handlers.defaultHandler(new DefaultHandler()); 26 | } 27 | 28 | public RPCServer(String ip, int port, int ioThreads, int workerThreads) { 29 | this.ip = ip; 30 | this.port = port; 31 | this.ioThreads = ioThreads; 32 | this.workerThreads = workerThreads; 33 | } 34 | 35 | private ServerBootstrap bootstrap; 36 | private EventLoopGroup group; 37 | private MessageCollector collector; 38 | private Channel serverChannel; 39 | 40 | // 注册服务的快捷方式 41 | 42 | /** 43 | * 44 | * @param type 45 | * @param reqClass 客户端请求数据的类型 46 | * @param handler 47 | * @return 48 | */ 49 | public RPCServer service(String type, Class reqClass, IMessageHandler handler) { 50 | registry.register(type, reqClass); 51 | handlers.register(type, handler); 52 | return this; 53 | } 54 | 55 | // 启动RPC服务 56 | public void start() { 57 | System.out.println("[1] RPCServer start"); 58 | bootstrap = new ServerBootstrap(); 59 | //用来接收进来的连接 60 | group = new NioEventLoopGroup(ioThreads); 61 | bootstrap.group(group); 62 | collector = new MessageCollector(handlers, registry, workerThreads); 63 | MessageEncoder encoder = new MessageEncoder(); 64 | 65 | //配置Channel 66 | bootstrap.channel(NioServerSocketChannel.class).childHandler(new ChannelInitializer() { 67 | 68 | @Override 69 | protected void initChannel(SocketChannel ch) throws Exception { 70 | //注册handler 71 | ChannelPipeline pipe = ch.pipeline(); 72 | //如果客户端60秒没有任何请求,就关闭客户端连接 73 | pipe.addLast(new ReadTimeoutHandler(60)); 74 | //解码器 75 | pipe.addLast(new MessageDecoder()); 76 | //编码器 77 | pipe.addLast(encoder); 78 | //让业务处理器放在最后 79 | pipe.addLast(collector); 80 | 81 | } 82 | }); 83 | 84 | bootstrap.option(ChannelOption.SO_BACKLOG, 100) //客户端套接字默认接受队列的大小 85 | .option(ChannelOption.SO_REUSEADDR, true) //reuse addr 避免端口冲突 86 | .option(ChannelOption.TCP_NODELAY, true) //关闭小烈合并,保证消息的及时性 87 | .childOption(ChannelOption.SO_KEEPALIVE, true); //长时间没动静的连接自动关闭 88 | 89 | //绑定端口,开始接收进来的连接 90 | serverChannel = bootstrap.bind(this.ip, this.port).channel(); 91 | 92 | LOG.warn("server started @ {}:{}\n", ip, port); 93 | } 94 | 95 | public void stop() { 96 | // 先关闭服务端套件字 97 | serverChannel.close(); 98 | // 再斩断消息来源,停止io线程池 99 | group.shutdownGracefully(); 100 | // 最后停止业务线程 101 | collector.closeGracefully(); 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/state/CountWindowAverage.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.state; 2 | 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 4 | import org.apache.flink.api.common.state.ValueState; 5 | import org.apache.flink.api.common.state.ValueStateDescriptor; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.configuration.Configuration; 10 | import org.apache.flink.util.Collector; 11 | 12 | public class CountWindowAverage extends RichFlatMapFunction, Tuple2> { 13 | 14 | // trasient 不参加序列化过程,不存储 15 | private transient ValueState> sum; 16 | 17 | @Override 18 | public void flatMap(Tuple2 input, Collector> out) throws Exception { 19 | // access the state value 20 | Tuple2 currentSum = sum.value(); 21 | 22 | // update the count 23 | currentSum.f0 += 1; 24 | 25 | // add the second field of the input value 26 | currentSum.f1 += input.f1; 27 | 28 | // update the state 29 | sum.update(currentSum); 30 | 31 | // if the count reaches 2, emit the average and clear the state 32 | 33 | if (currentSum.f0 >= 2) { 34 | out.collect(new Tuple2<>(input.f0, currentSum.f1 / currentSum.f0)); 35 | sum.clear(); 36 | } 37 | } 38 | 39 | @Override 40 | public void open(Configuration config) { 41 | ValueStateDescriptor> descriptor = new ValueStateDescriptor>( 42 | "average", 43 | TypeInformation.of(new TypeHint>() {}), 44 | Tuple2.of(0L, 0L) 45 | ); // default value of the state, if nothing was set 46 | sum = getRuntimeContext().getState(descriptor); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/state/KeyStateMsgDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.state; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * 与key相关的状态管理(以key分组进行状态管理) 8 | * 9 | * 补充: 10 | * 与key无关的state,就是与operator绑定的state,整个operator只对应一个state 11 | * 保存operator state的数据结构为ListState 12 | * 举例来说,Flink中的Kafka Connector,就是用来operator state,它会在每个connector实例中,保存该实例中消费 13 | * topic的所有(partition, offset)映射 14 | * 继承CheckpointedFunction, 实现snapshotState和restoreState 15 | * 16 | */ 17 | 18 | public class KeyStateMsgDemo { 19 | 20 | /** 21 | * if the count reaches 2, emit the average and clear the state 22 | * 所以Tuple2.of(1L, 3L), Tuple2.of(1L, 5L) 一组 23 | * 所以Tuple2.of(1L, 7L),Tuple2.of(1L, 4L)一组 24 | * @param args 25 | * @throws Exception 26 | */ 27 | 28 | public static void main(String[] args) throws Exception { 29 | //获取Flink的运行环境 30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.fromElements(Tuple2.of(1L, 3L), Tuple2.of(1L, 5L), Tuple2.of(1L, 7L), Tuple2.of(1L, 4L), Tuple2.of(1L, 2L)) 32 | .keyBy(0) 33 | .flatMap(new CountWindowAverage()) 34 | .print(); 35 | env.execute("StafulOperator"); 36 | System.out.println("**********************"); 37 | 38 | } 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/MyNoParalleSource.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 4 | 5 | public class MyNoParalleSource implements SourceFunction { 6 | 7 | private long count = 1L; 8 | 9 | private boolean isRunning = true; 10 | 11 | @Override 12 | public void run(SourceContext sourceContext) throws Exception { 13 | while(isRunning) { 14 | sourceContext.collect(count); 15 | count ++; 16 | Thread.sleep(1000); 17 | } 18 | } 19 | 20 | @Override 21 | public void cancel() { 22 | isRunning = false; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/MyNoParalleStrSource.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 4 | 5 | public class MyNoParalleStrSource implements SourceFunction { 6 | 7 | private long count = 1L; 8 | private String str = "test1,test2,"; 9 | private boolean isRunning = true; 10 | 11 | @Override 12 | public void run(SourceContext sourceContext) throws Exception { 13 | while(isRunning) { 14 | sourceContext.collect(str+count); 15 | count ++; 16 | Thread.sleep(1000); 17 | } 18 | } 19 | 20 | @Override 21 | public void cancel() { 22 | isRunning = false; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/MyParalleSource.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 4 | 5 | public class MyParalleSource implements ParallelSourceFunction { 6 | private long count = 1L; 7 | private boolean isRunning = true; 8 | 9 | @Override 10 | public void run(SourceContext sourceContext) throws Exception { 11 | while(isRunning) { 12 | sourceContext.collect(count); 13 | count ++; 14 | Thread.sleep(1000); 15 | } 16 | } 17 | 18 | @Override 19 | public void cancel() { 20 | isRunning = false; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/MyPartition.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | 4 | import org.apache.flink.api.common.functions.Partitioner; 5 | 6 | public class MyPartition implements Partitioner { 7 | 8 | 9 | @Override 10 | public int partition(Long key, int numPartitions) { 11 | System.out.println("分区总数:"+numPartitions); 12 | if (key % 2 == 0) { 13 | return 0; 14 | } else { 15 | return 1; 16 | } 17 | } 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/MyRichParalleSource.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | public class MyRichParalleSource extends RichParallelSourceFunction { 7 | private long count = 1L; 8 | private boolean isRunning = true; 9 | /** 10 | * 主要的方法 11 | * 启动一个source 12 | * 大部分情况下,都需要在这个run方法中实现一个循环,这样就可以循环产生数据了 13 | * @param ctx 14 | * @throws Exception 15 | */ 16 | @Override 17 | public void run(SourceContext ctx) throws Exception { 18 | while(isRunning){ 19 | ctx.collect(count); 20 | count++; 21 | //每秒产生一条数据 22 | Thread.sleep(1000); 23 | } 24 | } 25 | /** 26 | * 取消一个cancel的时候会调用的方法 27 | * 28 | */ 29 | @Override 30 | public void cancel() { 31 | isRunning = false; 32 | } 33 | 34 | /** 35 | * 这个方法只会在最开始的时候被调用一次 36 | * 实现获取链接的代码 37 | * @param parameters 38 | * @throws Exception 39 | */ 40 | @Override 41 | public void open(Configuration parameters) throws Exception { 42 | System.out.println("open ..............."); 43 | super.open(parameters); 44 | } 45 | 46 | /** 47 | * 实现关闭链接的代码 48 | * @throws Exception 49 | */ 50 | @Override 51 | public void close() throws Exception { 52 | super.close(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/NumberStreamDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import com.google.common.collect.Lists; 4 | import org.apache.flink.api.common.functions.FilterFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.common.functions.ReduceFunction; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.api.java.tuple.Tuple1; 9 | import org.apache.flink.api.java.tuple.Tuple2; 10 | import org.apache.flink.api.java.tuple.Tuple3; 11 | import org.apache.flink.streaming.api.collector.selector.OutputSelector; 12 | import org.apache.flink.streaming.api.datastream.*; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.functions.co.CoMapFunction; 15 | import org.apache.flink.streaming.api.functions.windowing.AggregateApplyAllWindowFunction; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | //这个类是测试source产生流数据,然后做一些通用操作 24 | public class NumberStreamDemo { 25 | public static void main(String[] args) throws Exception { 26 | 27 | //no paralleSource 流中的wordcount窗口 28 | test1(); 29 | 30 | //paralleSource 31 | //test2(); 32 | 33 | //richParalleSource 34 | // test3(); 35 | 36 | //from Collection 37 | //test4(); 38 | 39 | //filter 40 | //test5(); 41 | 42 | //multi stream source union 43 | //test6(); 44 | 45 | //two stream source connect 46 | //test7(); 47 | 48 | // split 根据规则把一个数据流切分为多个流,select和split配合使用,选择切分后的流 49 | // test8(); 50 | 51 | // 自定义分区需要实现Partitioner接口 52 | // test9(); 53 | 54 | 55 | } 56 | 57 | private static void test9() throws Exception { 58 | 59 | //dataStream.partitionCustom(partitioner, “someKey”) 针对对象 60 | //dataStream.partitionCustom(partitioner, 0) 针对Tuple 61 | 62 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 63 | env.setParallelism(2); 64 | DataStreamSource text = env.addSource(new MyNoParalleSource()); 65 | 66 | //对数据进行转换,把long类型转成tuple1类型 67 | DataStream> tupleData = text.map(new MapFunction>() { 68 | @Override 69 | 70 | public Tuple1 map(Long value) throws Exception { 71 | return new Tuple1<>(value); 72 | } 73 | }); 74 | 75 | //分区之后的数据 76 | //一条线程一个task,分别处理奇数,偶数 77 | DataStream> partitionData = tupleData.partitionCustom(new MyPartition(), 0); 78 | DataStream result = partitionData.map(new MapFunction, Long>() { 79 | @Override 80 | public Long map(Tuple1 value) throws Exception { 81 | System.out.println("当前线程id:" + Thread.currentThread().getId() + ",value: " + value); 82 | return value.getField(0); 83 | } 84 | }); 85 | 86 | result.print().setParallelism(1); 87 | env.execute("NumberStreamDemo"); 88 | } 89 | 90 | private static void test8() throws Exception { 91 | 92 | // split 根据规则把一个数据流切分为多个流,select和split配合使用,选择切分后的流 93 | 94 | //获取Flink的运行环境 95 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 96 | 97 | //获取数据源 98 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1 99 | 100 | //对流进行切分,按照数据的奇偶性进行区分 101 | SplitStream splitStream = text.split(new OutputSelector() { 102 | @Override 103 | public Iterable select(Long value) { 104 | List outPut = Lists.newArrayList(); 105 | if (value % 2 == 0) { 106 | outPut.add("even"); //偶数 107 | } else { 108 | outPut.add("odd"); //奇数 109 | } 110 | return outPut; 111 | } 112 | }); 113 | 114 | //选择一个或者多个切分后的流 115 | DataStream evenStream = splitStream.select("even"); 116 | DataStream oddStream = splitStream.select("odd"); 117 | 118 | DataStream moreStream = splitStream.select("odd", "even"); 119 | 120 | //打印结果 121 | evenStream.print().setParallelism(1); 122 | String jobName = NumberStreamDemo.class.getSimpleName(); 123 | env.execute(jobName); 124 | } 125 | 126 | private static void test7() throws Exception { 127 | //Connect:和union类似,但是只能连接两个流,两个流的数据类型可以不同,会对两个流中的数据应用不同的处理方法。 128 | //获取Flink的运行环境 129 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 130 | //获取数据源 131 | DataStreamSource text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1 132 | DataStreamSource text2 = env.addSource(new MyNoParalleSource()).setParallelism(1); 133 | 134 | SingleOutputStreamOperator text2_str = text2.map(new MapFunction() { 135 | @Override 136 | public String map(Long value) throws Exception { 137 | return "str_" + value; 138 | } 139 | }); 140 | 141 | ConnectedStreams connectedStreams = text1.connect(text2_str); 142 | SingleOutputStreamOperator result = connectedStreams.map(new CoMapFunction() { 143 | 144 | @Override 145 | public Object map1(Long value) throws Exception { 146 | return value; 147 | } 148 | 149 | @Override 150 | public Object map2(String value) throws Exception { 151 | return value; 152 | } 153 | }); 154 | 155 | //打印结果 156 | result.print().setParallelism(1); 157 | String jobName = NumberStreamDemo.class.getSimpleName(); 158 | env.execute(jobName); 159 | } 160 | 161 | private static void test6() throws Exception { 162 | // Union:合并多个流,新的流会包含所有流中的数据,但是union是一个限制,就是所有合并的流类型必须是一致的。 163 | //获取Flink的运行环境 164 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 165 | 166 | //获取数据源 167 | DataStreamSource text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1 168 | 169 | DataStreamSource text2 = env.addSource(new MyNoParalleSource()).setParallelism(1); 170 | 171 | //把text1和text2组装到一起 172 | DataStream text = text1.union(text2); 173 | 174 | DataStream num = text.map(new MapFunction() { 175 | @Override 176 | public Long map(Long value) throws Exception { 177 | System.out.println("原始接收到数据:" + value); 178 | return value; 179 | } 180 | }); 181 | 182 | //每2秒钟处理一次数据 183 | DataStream sum = num.timeWindowAll(Time.seconds(2)).sum(0); 184 | //打印结果 185 | sum.print().setParallelism(1); 186 | String jobName = NumberStreamDemo.class.getSimpleName(); 187 | env.execute(jobName); 188 | } 189 | 190 | private static void test5() throws Exception { 191 | //获取Flink的运行环境 192 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 193 | //获取数据源 194 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意:针对此source,并行度只能设置为1 195 | DataStream num = text.map(new MapFunction() { 196 | @Override 197 | public Long map(Long value) throws Exception { 198 | System.out.println("原始接收到数据:" + value); 199 | return value; 200 | } 201 | }); 202 | //执行filter过滤,满足条件的数据会被留下 203 | DataStream filterData = num.filter(new FilterFunction() { 204 | //把所有的奇数过滤掉 205 | @Override 206 | public boolean filter(Long value) throws Exception { 207 | return value % 2 == 0; 208 | } 209 | }); 210 | 211 | DataStream resultData = filterData.map(new MapFunction() { 212 | @Override 213 | public Long map(Long value) throws Exception { 214 | System.out.println("过滤之后的数据:" + value); 215 | return value; 216 | } 217 | }); 218 | 219 | //每2秒钟处理一次数据 220 | DataStream sum = resultData.timeWindowAll(Time.seconds(2)).sum(0); 221 | 222 | //打印结果 223 | sum.print().setParallelism(1); 224 | 225 | String jobName = NumberStreamDemo.class.getSimpleName(); 226 | env.execute(jobName); 227 | 228 | } 229 | 230 | 231 | private static void test4() throws Exception { 232 | //获取Flink的运行环境 233 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 234 | ArrayList data = new ArrayList<>(); 235 | data.add(10); 236 | data.add(15); 237 | data.add(20); 238 | 239 | //指定数据源 240 | DataStreamSource collectionData = env.fromCollection(data); 241 | //通map对数据进行处理 242 | DataStream num = collectionData.map(new MapFunction() { 243 | @Override 244 | public Integer map(Integer value) throws Exception { 245 | return value + 1; 246 | } 247 | }); 248 | 249 | num.print().setParallelism(1); 250 | env.execute("Streaming From Collection"); 251 | } 252 | 253 | private static void test3() throws Exception { 254 | //获取Flink的运行环境 255 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 256 | 257 | //获取数据源 258 | DataStreamSource text = env.addSource(new MyRichParalleSource()).setParallelism(1); 259 | 260 | DataStream num = text.map(new MapFunction() { 261 | @Override 262 | public Long map(Long value) throws Exception { 263 | System.out.println("接收到数据:" + value); 264 | return value; 265 | } 266 | }); 267 | 268 | //每2秒钟处理一次数据 269 | DataStream sum = num.timeWindowAll(Time.seconds(2)).sum(0); 270 | 271 | //打印结果 272 | sum.print().setParallelism(1); 273 | 274 | String jobName = NumberStreamDemo.class.getSimpleName(); 275 | env.execute(jobName); 276 | 277 | } 278 | 279 | private static void test2() throws Exception { 280 | //获取Flink的运行环境 281 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 282 | //获取数据源 283 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1); //注意:针对此source,并行度只能设置为1 284 | DataStream num = text.map(new MapFunction() { 285 | @Override 286 | public Long map(Long value) throws Exception { 287 | System.out.println("接收到数据:" + value); 288 | return value; 289 | } 290 | }); 291 | //每2秒钟处理一次数据 292 | DataStream sum = num.timeWindowAll(Time.seconds(2)).sum(0); 293 | //打印结果 294 | sum.print().setParallelism(1); 295 | String jobName = NumberStreamDemo.class.getSimpleName(); 296 | env.execute(jobName); 297 | } 298 | 299 | private static void test1() throws Exception { 300 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 301 | //DataStream someIntegers = env.generateSequence(0, 1000); 302 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1); 303 | DataStream num = text.map(new MapFunction() { 304 | @Override 305 | public Long map(Long value) throws Exception { 306 | return value; 307 | } 308 | }); 309 | 310 | DataStream testBeanStream = num.filter(new FilterFunction() { 311 | @Override 312 | public boolean filter(Long value) throws Exception { 313 | if (value % 2 == 0) { 314 | return true; 315 | } 316 | return false; 317 | } 318 | }).map(new MapFunction() { 319 | @Override 320 | public TestBean map(Long value) throws Exception { 321 | System.out.println("接受到数据:" + new TestBean("a", value, 1L).toString()); 322 | return new TestBean("a", value, 1L); 323 | } 324 | }); 325 | DataStream testBeanStream2 = num.filter(new FilterFunction() { 326 | @Override 327 | public boolean filter(Long value) throws Exception { 328 | if (value % 2 != 0) { 329 | return true; 330 | } 331 | return false; 332 | } 333 | }).map(new MapFunction() { 334 | @Override 335 | public TestBean map(Long value) throws Exception { 336 | System.out.println("接受到数据:" + new TestBean("b", value, 1L).toString()); 337 | return new TestBean("b", value, 1L); 338 | } 339 | }); 340 | 341 | testBeanStream.union(testBeanStream2).keyBy("word").timeWindow(Time.seconds(2), Time.seconds(10)) 342 | .reduce(new ReduceFunction() { 343 | @Override 344 | public TestBean reduce(TestBean first, TestBean second) throws Exception { 345 | return new TestBean(first.word, (first.value + second.value) / (first.count + second.count), first.count + second.count); 346 | } 347 | }) 348 | // .sum("value") 349 | .print(); 350 | 351 | String jobName = NumberStreamDemo.class.getSimpleName(); 352 | env.execute(jobName); 353 | } 354 | 355 | public static class TestBean { 356 | 357 | public String word; 358 | public long value; 359 | public long count;//1 360 | 361 | public TestBean() { 362 | } 363 | 364 | public TestBean(String word, long value, long count) { 365 | this.word = word; 366 | this.count = count; 367 | this.value = value; 368 | } 369 | 370 | @Override 371 | public String toString() { 372 | return "TestBean{" + 373 | "word='" + word + '\'' + 374 | ", value=" + value + 375 | ", count=" + count + 376 | '}'; 377 | } 378 | 379 | } 380 | 381 | } 382 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDataSource1.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | public class StreamJoinDataSource1 extends RichParallelSourceFunction> { 7 | 8 | private volatile boolean running = true; 9 | 10 | @Override 11 | public void run(SourceContext> ctx) throws Exception { 12 | Tuple3[] elements = new Tuple3[]{ 13 | Tuple3.of("a", "1", 1000000050000L), 14 | Tuple3.of("a", "2", 1000000054000L), 15 | Tuple3.of("a", "3", 1000000079900L), 16 | Tuple3.of("a", "4", 1000000115000L), 17 | Tuple3.of("b", "5", 1000000100000L), 18 | Tuple3.of("b", "6", 1000000108000L) 19 | }; 20 | 21 | int count = 0; 22 | while (running && count < elements.length) { 23 | ctx.collect(new Tuple3<>( 24 | (String)elements[count].f0, 25 | (String)elements[count].f1, 26 | (Long) elements[count].f2 27 | )); 28 | 29 | count ++; 30 | Thread.sleep(1000); 31 | } 32 | } 33 | 34 | 35 | 36 | @Override 37 | public void cancel() { 38 | running = false; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDataSource2.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | public class StreamJoinDataSource2 extends RichParallelSourceFunction> { 7 | 8 | private volatile boolean running = true; 9 | 10 | 11 | @Override 12 | public void run(SourceContext> ctx) throws Exception { 13 | Tuple3[] elements = new Tuple3[]{ 14 | Tuple3.of("a", "hangzhou", 1000000059000L), 15 | Tuple3.of("b", "beijing", 1000000105000L), 16 | }; 17 | 18 | int count = 0; 19 | while(running && count < elements.length) { 20 | ctx.collect(new Tuple3<>( 21 | (String)elements[count].f0, 22 | (String)elements[count].f1, 23 | (Long)elements[count].f2 24 | )); 25 | 26 | count ++; 27 | Thread.sleep(1000); 28 | } 29 | } 30 | 31 | @Override 32 | public void cancel() { 33 | running = false; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import org.apache.flink.api.common.functions.JoinFunction; 4 | import org.apache.flink.api.java.functions.KeySelector; 5 | import org.apache.flink.api.java.tuple.Tuple3; 6 | import org.apache.flink.api.java.tuple.Tuple5; 7 | import org.apache.flink.streaming.api.TimeCharacteristic; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 12 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 13 | import org.apache.flink.streaming.api.windowing.time.Time; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | import java.text.SimpleDateFormat; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | import static com.z.flinkStreamOptimizatiion.stream.WindowComputeUtil.myGetWindowStartWithOffset; 21 | 22 | public class StreamJoinDemo { 23 | 24 | private static final Logger LOGGER = LoggerFactory.getLogger(StreamJoinDemo.class); 25 | 26 | /** 27 | * 只有在一个窗口内的数据才能join 28 | * @param args 29 | * @throws Exception 30 | */ 31 | public static void main(String[] args) throws Exception { 32 | 33 | // 双流join 34 | //test1(); 35 | 36 | // default join 37 | test2(); 38 | 39 | 40 | 41 | 42 | } 43 | 44 | private static void test2() throws Exception { 45 | //毫秒为单位 46 | int windowSize = 10; 47 | long delay = 5100L; 48 | 49 | 50 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 51 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 52 | env.setParallelism(1); 53 | 54 | // 设置数据源 55 | DataStream> leftSource = env.addSource(new StreamJoinDataSource1()).name("Demo Source"); 56 | DataStream> rightSource = env.addSource(new StreamJoinDataSource2()).name("Demo Source"); 57 | 58 | 59 | // join 操作 60 | leftSource.join(rightSource) 61 | .where(new LeftSelectKey()) 62 | .equalTo(new RightSelectKey()) 63 | .window(TumblingProcessingTimeWindows.of(Time.of(5, TimeUnit.SECONDS))) 64 | .apply(new JoinFunction, Tuple3, Tuple5>() { 65 | @Override 66 | public Tuple5 join(Tuple3 first, Tuple3 second) { 67 | LOGGER.info("触发双流join窗口运算"); 68 | return new Tuple5<>(first.f0, first.f1, second.f1, first.f2, second.f2); 69 | } 70 | }).print(); 71 | 72 | 73 | env.execute("TimeWindowDemo"); 74 | } 75 | 76 | /** 77 | * 普通双流join处理方式: 78 | * 缺陷:join窗口的双流数据都是被缓存在内存中的,也就是说,如果某个key上的窗口数据太多就会导致JVM OOM。 79 | * 双流join的难点也正是在这里。 80 | * @throws Exception 81 | */ 82 | private static void test1() throws Exception { 83 | /** 84 | * 当设置参数int windowSize = 10; long delay = 5000L;时 85 | * 输出为: 86 | * (a,1,hangzhou,1000000050000,1000000059000) 87 | * (a,2,hangzhou,1000000054000,1000000059000) 88 | * 原因: 89 | * window_end_time < watermark, 导致数据丢失了。 90 | */ 91 | 92 | //毫秒为单位 93 | int windowSize = 10; 94 | long delay = 5100L; 95 | 96 | 97 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 98 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 99 | env.setParallelism(1); 100 | 101 | // 设置数据源 102 | DataStream> leftSource = env.addSource(new StreamJoinDataSource1()).name("Demo Source"); 103 | DataStream> rightSource = env.addSource(new StreamJoinDataSource2()).name("Demo Source"); 104 | 105 | // 设置水位线 106 | DataStream> leftStream = leftSource.assignTimestampsAndWatermarks( 107 | new BoundedOutOfOrdernessTimestampExtractor>(Time.milliseconds(delay)) { 108 | private final long maxOutOfOrderness = delay; 109 | private long currentMaxTimestamp = 0L; 110 | @Override 111 | public long extractTimestamp(Tuple3 element) { 112 | long timestamp = element.f2; 113 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 114 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 115 | System.out.println("####################################"); 116 | System.out.println("element.f1: " + element.f1 ); 117 | //System.out.println("currentMaxTimestamp: " + currentMaxTimestamp); 118 | System.out.println("水位线(watermark): " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness)); 119 | System.out.println("窗口开始时间:" + myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000))); 120 | System.out.println("窗口结束时间:" + (myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000))); 121 | System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp)); 122 | return timestamp; 123 | } 124 | } 125 | ); 126 | 127 | DataStream> rightStream = rightSource.assignTimestampsAndWatermarks( 128 | new BoundedOutOfOrdernessTimestampExtractor>(Time.milliseconds(delay)) { 129 | private final long maxOutOfOrderness = delay; 130 | private long currentMaxTimestamp = 0L; 131 | @Override 132 | public long extractTimestamp(Tuple3 element) { 133 | long timestamp = element.f2; 134 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 135 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 136 | System.out.println("####################################"); 137 | System.out.println("element.f1: " + element.f1 ); 138 | //System.out.println("currentMaxTimestamp: " + currentMaxTimestamp); 139 | System.out.println("水位线(watermark): " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness)); 140 | System.out.println("窗口开始时间:" + myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000))); 141 | System.out.println("窗口结束时间:" + (myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000))); 142 | System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp)); 143 | return timestamp; 144 | } 145 | } 146 | ); 147 | 148 | // join 操作 149 | leftStream.join(rightStream) 150 | .where(new LeftSelectKey()) 151 | .equalTo(new RightSelectKey()) 152 | .window(TumblingEventTimeWindows.of(Time.seconds(windowSize))) 153 | .apply(new JoinFunction, Tuple3, Tuple5>() { 154 | @Override 155 | public Tuple5 join(Tuple3 first, Tuple3 second) { 156 | System.out.println("触发双流join窗口运算"); 157 | return new Tuple5<>(first.f0, first.f1, second.f1, first.f2, second.f2); 158 | } 159 | }).print(); 160 | 161 | 162 | env.execute("TimeWindowDemo"); 163 | } 164 | 165 | private static class LeftSelectKey implements KeySelector, String> { 166 | @Override 167 | public String getKey(Tuple3 w) throws Exception { 168 | return w.f0; 169 | } 170 | } 171 | 172 | 173 | private static class RightSelectKey implements KeySelector, String> { 174 | @Override 175 | public String getKey(Tuple3 w) throws Exception { 176 | return w.f0; 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCClient; 4 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCException; 5 | import com.z.flinkStreamOptimizatiion.rpc.demo.DemoClient; 6 | import org.apache.flink.api.common.functions.FlatMapFunction; 7 | import org.apache.flink.api.common.functions.MapFunction; 8 | import org.apache.flink.api.common.functions.ReduceFunction; 9 | import org.apache.flink.api.java.tuple.Tuple3; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 15 | import org.apache.flink.streaming.api.watermark.Watermark; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.util.Collector; 18 | 19 | import javax.annotation.Nullable; 20 | import java.text.SimpleDateFormat; 21 | 22 | 23 | /** 24 | * 单流场景下: 25 | * Flink中timeWindow滚动窗口边界和数据延迟问题 26 | * delay代表了能够容忍的时序程度 27 | * 水位 = 目前最大的时间戳 - delay 28 | */ 29 | public class TimeWindowDemo { 30 | 31 | public static void main(String[] args) throws Exception { 32 | // 根据event time和窗口时间大小,计算event time所属的窗口开始时间和结束时间 33 | // test1(); 34 | 35 | // 参考因素:delay + windowSize, 情况一,元素在水位以下,但windows还没被触发计算,参照record 5 36 | // test2(); 37 | 38 | // 参考因素:delay + windowSize, 情况二,元素在水位以下,但windows已经无法被触发计算了 39 | // test3(); 40 | 41 | // 参考因素:delay + windowSize,通过增大delay,来增大失序的容忍程度,确保不丢数据 42 | // test4(); 43 | 44 | // 测试 parallism 45 | // test5(); 46 | 47 | // 测试 flink中的rpc调用(比如flatmap) 48 | // test6(); 49 | 50 | 51 | } 52 | 53 | private static void test6() throws Exception { 54 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 55 | 56 | // 设置数据源 57 | //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 58 | DataStream> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source").setParallelism(1); 59 | 60 | DataStream windowCount = dataStream.flatMap(new FlatMapFunction, WordWithCount>() { 61 | @Override 62 | public void flatMap(Tuple3 value, Collector collector) throws Exception { 63 | 64 | // 在flink的map算子中,加入rpc调用,作为中间结果获取的模拟 65 | int testValue = Integer.valueOf(value.f1); 66 | { 67 | RPCClient client = new RPCClient("localhost", 8888); 68 | DemoClient demo = new DemoClient(client); 69 | for (int i = 0; i < 2; i++) { 70 | try { 71 | System.out.printf("fib(%d) = %d\n", i, demo.fib(testValue)); 72 | Thread.sleep(100); 73 | } catch (RPCException e) { 74 | System.out.println(e.getMessage()); 75 | } 76 | } 77 | Thread.sleep(3000); 78 | client.close(); 79 | } 80 | 81 | 82 | collector.collect(new WordWithCount(value.f0, 1L)); 83 | } 84 | }).keyBy("word") 85 | .sum("count"); 86 | 87 | windowCount.print(); 88 | env.execute("streaming word count"); 89 | //Thread.sleep(3000); 90 | 91 | } 92 | 93 | /** 94 | * 主要为了存储单词以及单词出现的次数 95 | */ 96 | public static class WordWithCount{ 97 | public String word; 98 | public long count; 99 | public WordWithCount(){} 100 | public WordWithCount(String word, long count) { 101 | this.word = word; 102 | this.count = count; 103 | } 104 | 105 | @Override 106 | public String toString() { 107 | return "WordWithCount{" + 108 | "word='" + word + '\'' + 109 | ", count=" + count + 110 | '}'; 111 | } 112 | } 113 | 114 | private static void test5() throws Exception { 115 | 116 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 117 | 118 | // 设置数据源 119 | //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 120 | DataStream> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source").setParallelism(2); 121 | 122 | DataStream windowCount = dataStream.flatMap(new FlatMapFunction, WordWithCount>() { 123 | @Override 124 | public void flatMap(Tuple3 value, Collector collector) throws Exception { 125 | collector.collect(new WordWithCount(value.f0, 1L)); 126 | } 127 | }).keyBy("word") 128 | .sum("count"); 129 | 130 | windowCount.print(); 131 | env.execute("streaming word count"); 132 | } 133 | 134 | 135 | /** 136 | * 观察 record 5 和 record 6, 它们的时间窗口如下: 137 | * 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 138 | * 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 139 | * 它们进来的时候水位线如下: 140 | * 水位线(watermark): 1000000109900 -> 2001-09-09 09:48:29.900 141 | * 也就是说,它们进来的时候,watermark < windows end time 142 | * 这种情况下,就算数据的 eventtime < watermark,数据还是被保留下来,没有丢失。 143 | * @throws Exception 144 | */ 145 | private static void test4() throws Exception { 146 | long delay = 5100L; 147 | int windowSize = 10; 148 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 149 | 150 | // 设置数据源 151 | env.setParallelism(1); 152 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 153 | DataStream> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source"); 154 | 155 | // 设置水位线 156 | DataStream> watermark = dataStream.assignTimestampsAndWatermarks( 157 | new AssignerWithPeriodicWatermarks>() { 158 | private final long maxOutOfOrderness = delay; 159 | private long currentMaxTimestamp = 0L; 160 | 161 | @Nullable 162 | @Override 163 | public Watermark getCurrentWatermark() { 164 | return new Watermark(currentMaxTimestamp - maxOutOfOrderness); 165 | } 166 | 167 | @Override 168 | public long extractTimestamp(Tuple3 element, long previousElementTimestamp) { 169 | long timestamp = element.f2; 170 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 171 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 172 | System.out.println("#### 第 " + element.f1 + " 个record ####"); 173 | System.out.println("currentMaxTimestamp: " + currentMaxTimestamp); 174 | System.out.println("水位线(watermark): " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness)); 175 | System.out.println("窗口开始时间:" + WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000))); 176 | System.out.println("窗口结束时间:" + (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000))); 177 | System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp)); 178 | 179 | return timestamp; 180 | } 181 | } 182 | ); 183 | 184 | // 窗口函数进行处理 185 | DataStream> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize)) 186 | .reduce(new ReduceFunction>() { 187 | @Override 188 | public Tuple3 reduce(Tuple3 value1, Tuple3 value2) throws Exception { 189 | return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L); 190 | } 191 | }); 192 | 193 | resStream.print(); 194 | env.execute("event time demo"); 195 | } 196 | 197 | /** 198 | * 观察record 5 和 record 6,它们的窗口属性如下: 199 | * 窗口开始时间:1000000100000 -> 2001-09-09 09:48:20.000 200 | * 窗口结束时间:1000000110000 -> 2001-09-09 09:48:30.000 201 | * windows end time < watermark, 这个窗口已经无法被触发计算了。 202 | * 也就是说,这个窗口创建时,已经 windows end time < watermark,相当于第5第6条记录都丢失了。 203 | * @throws Exception 204 | */ 205 | private static void test3() throws Exception { 206 | long delay = 5000L; 207 | int windowSize = 10; 208 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 209 | 210 | // 设置数据源 211 | env.setParallelism(1); 212 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 213 | DataStream> dataStream = env.addSource(new DataSourceForTest3()).name("Demo Source"); 214 | 215 | // 设置水位线 216 | DataStream> watermark = dataStream.assignTimestampsAndWatermarks( 217 | new AssignerWithPeriodicWatermarks>() { 218 | private final long maxOutOfOrderness = delay; 219 | private long currentMaxTimestamp = 0L; 220 | 221 | @Nullable 222 | @Override 223 | public Watermark getCurrentWatermark() { 224 | return new Watermark(currentMaxTimestamp - maxOutOfOrderness); 225 | } 226 | 227 | @Override 228 | public long extractTimestamp(Tuple3 element, long previousElementTimestamp) { 229 | long timestamp = element.f2; 230 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 231 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 232 | System.out.println("#### 第 " + element.f1 + " 个record ####"); 233 | System.out.println("currentMaxTimestamp: " + currentMaxTimestamp); 234 | System.out.println("水位线(watermark): " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness)); 235 | System.out.println("窗口开始时间:" + WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000))); 236 | System.out.println("窗口结束时间:" + (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000))); 237 | System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp)); 238 | 239 | return timestamp; 240 | } 241 | } 242 | ); 243 | 244 | // 窗口函数进行处理 245 | DataStream> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize)) 246 | .reduce(new ReduceFunction>() { 247 | @Override 248 | public Tuple3 reduce(Tuple3 value1, Tuple3 value2) throws Exception { 249 | return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L); 250 | } 251 | }); 252 | 253 | resStream.print(); 254 | env.execute("event time demo"); 255 | } 256 | 257 | /** 258 | * 观察record 5,对于此条记录,元素在水位以下,但windows还没被触发计算 259 | * 到了record 6,水位线在record 5 之上,windows被触发计算 260 | * @throws Exception 261 | */ 262 | private static void test2() throws Exception { 263 | 264 | long delay = 5000L; 265 | int windowSize = 10; 266 | 267 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 268 | 269 | // 设置数据源 270 | env.setParallelism(1); 271 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 272 | DataStream> dataStream = env.addSource(new DataSource()).name("Demo Source"); 273 | 274 | // 设置水位线 275 | DataStream> watermark = dataStream.assignTimestampsAndWatermarks( 276 | new AssignerWithPeriodicWatermarks>() { 277 | private final long maxOutOfOrderness = delay; 278 | private long currentMaxTimestamp = 0L; 279 | 280 | @Nullable 281 | @Override 282 | public Watermark getCurrentWatermark() { 283 | return new Watermark(currentMaxTimestamp - maxOutOfOrderness); 284 | } 285 | 286 | /** 287 | * 触发窗口运算时机: 288 | * 当一条数据过来, 289 | * 1)水位线 > 上一批次的记录的窗口结束时间,之前的数据要进行窗口运算 290 | * 2)水位线 > 上一批次的记录的timestamp,之前的数据要进行窗口计算 291 | * 292 | * 关于是否丢数据: 293 | * 1)如果当前数据的EventTime在WaterMark之上,也就是EventTime > WaterMark。由于数据所属窗口 294 | * 的WindowEndTime,一定是大于EventTime的。这时有WindowEndTime > EventTime > WaterMark 295 | * 这种情况是一定不会丢数据的。 296 | * 2)如果当前数据的EventTime在WaterMark之下,也就是WaterMark > EventTime,这时要分两种情况: 297 | * 2.1)如果该数据所属窗口的WindowEndTime > WaterMark,表示窗口还没被触发,例如第5个record的情况, 298 | * 即WindowEndTime > WaterMark > EventTime,这种情况数据也是不会丢失的。 299 | * 2.2)如果该数据所属窗口的WaterMark > WindowEndTime, 则表示窗口已经无法被触发, 300 | * 即WaterMark > WindowEndTime > EventTime, 这种情况数据也就丢失了。 301 | * 302 | * 如果第6条record,由于watermark > windows end time ,第6条数据所属的窗口就永远不会被触发计算了。 303 | * @param element 304 | * @param previousElementTimestamp 305 | * @return 306 | */ 307 | @Override 308 | public long extractTimestamp(Tuple3 element, long previousElementTimestamp) { 309 | long timestamp = element.f2; 310 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 311 | currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); 312 | System.out.println("#### 第 " + element.f1 + " 个record ####"); 313 | System.out.println("currentMaxTimestamp: " + currentMaxTimestamp); 314 | System.out.println("水位线(watermark): " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness)); 315 | System.out.println("窗口开始时间:" + WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000))); 316 | System.out.println("窗口结束时间:" + (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000))); 317 | System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp)); 318 | 319 | return timestamp; 320 | } 321 | } 322 | ); 323 | 324 | // 窗口函数进行处理 325 | DataStream> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize)) 326 | .reduce(new ReduceFunction>() { 327 | @Override 328 | public Tuple3 reduce(Tuple3 value1, Tuple3 value2) throws Exception { 329 | return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L); 330 | } 331 | }); 332 | 333 | resStream.print(); 334 | env.execute("event time demo"); 335 | 336 | 337 | 338 | } 339 | 340 | private static class DataSourceForTest4 extends RichParallelSourceFunction> { 341 | private volatile boolean running = true; 342 | 343 | @Override 344 | public void run(SourceContext> ctx) throws InterruptedException { 345 | Tuple3[] elements = new Tuple3[]{ 346 | Tuple3.of("a", "1", 1000000050000L), 347 | Tuple3.of("a", "2", 1000000054000L), 348 | Tuple3.of("a", "3", 1000000079900L), 349 | Tuple3.of("a", "4", 1000000115000L), 350 | Tuple3.of("b", "5", 1000000100000L), 351 | Tuple3.of("b", "6", 1000000108000L) 352 | }; 353 | 354 | int count = 0; 355 | while (running && count < elements.length) { 356 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2)); 357 | count++; 358 | Thread.sleep(1000); 359 | } 360 | } 361 | 362 | @Override 363 | public void cancel() { 364 | running = false; 365 | } 366 | } 367 | 368 | private static class DataSourceForTest3 extends RichParallelSourceFunction> { 369 | private volatile boolean running = true; 370 | 371 | @Override 372 | public void run(SourceContext> ctx) throws InterruptedException { 373 | Tuple3[] elements = new Tuple3[]{ 374 | Tuple3.of("a", "1", 1000000050000L), 375 | Tuple3.of("a", "2", 1000000054000L), 376 | Tuple3.of("a", "3", 1000000079900L), 377 | Tuple3.of("a", "4", 1000000120000L), 378 | Tuple3.of("b", "5", 1000000100001L), 379 | Tuple3.of("b", "6", 1000000109000L) 380 | }; 381 | 382 | int count = 0; 383 | while (running && count < elements.length) { 384 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2)); 385 | count++; 386 | Thread.sleep(1000); 387 | } 388 | } 389 | 390 | @Override 391 | public void cancel() { 392 | running = false; 393 | } 394 | } 395 | 396 | private static class DataSource extends RichParallelSourceFunction> { 397 | private volatile boolean running = true; 398 | 399 | @Override 400 | public void run(SourceContext> ctx) throws InterruptedException { 401 | Tuple3[] elements = new Tuple3[]{ 402 | Tuple3.of("a", "1", 1000000050000L), 403 | Tuple3.of("a", "2", 1000000054000L), 404 | Tuple3.of("a", "3", 1000000079900L), 405 | Tuple3.of("a", "4", 1000000120000L), 406 | Tuple3.of("b", "5", 1000000111000L), 407 | Tuple3.of("b", "6", 1000000089000L) 408 | }; 409 | 410 | int count = 0; 411 | while (running && count < elements.length) { 412 | ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2)); 413 | count++; 414 | Thread.sleep(1000); 415 | } 416 | } 417 | 418 | @Override 419 | public void cancel() { 420 | running = false; 421 | } 422 | } 423 | 424 | 425 | private static void test1() { 426 | // 毫秒为单位 427 | long windowsize = 10000L; 428 | 429 | // 毫秒为单位, 滚动窗口 offset = 0L 430 | long offset = 0L; 431 | 432 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 433 | long a1 = 1000000050000L; 434 | long a2 = 1000000054000L; 435 | long a3 = 1000000079900L; 436 | long a4 = 1000000120000L; 437 | long b5 = 1000000111000L; 438 | long b6 = 1000000089000L; 439 | 440 | System.out.println(a1 + " -> " + format.format(a1) + "\t所属窗口的开始时间是:" + 441 | WindowComputeUtil.myGetWindowStartWithOffset(a1, offset, windowsize) + " -> " + 442 | format.format( WindowComputeUtil.myGetWindowStartWithOffset(a1, offset, windowsize))); 443 | 444 | System.out.println(a2 + " -> " + format.format(a2) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a2, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a2, offset, windowsize))); 445 | System.out.println(a3 + " -> " + format.format(a3) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a3, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a3, offset, windowsize))); 446 | System.out.println(a4 + " -> " + format.format(a4) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a4, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a4, offset, windowsize))); 447 | System.out.println(b5 + " -> " + format.format(b5) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(b5, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(b5, offset, windowsize))); 448 | System.out.println(b6 + " -> " + format.format(b6) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(b6, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(b6, offset, windowsize))); 449 | 450 | 451 | System.out.println("-----------------------------------------"); 452 | 453 | } 454 | 455 | 456 | } 457 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/stream/WindowComputeUtil.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.stream; 2 | 3 | public class WindowComputeUtil { 4 | public static long myGetWindowStartWithOffset(long timestamp, long offset, long windowSize) { 5 | return timestamp - (timestamp - offset + windowSize) % windowSize; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/z/flinkStreamOptimizatiion/test/test1.java: -------------------------------------------------------------------------------- 1 | package com.z.flinkStreamOptimizatiion.test; 2 | 3 | import org.codehaus.jackson.JsonEncoding; 4 | import org.codehaus.jackson.JsonGenerator; 5 | import org.codehaus.jackson.map.ObjectMapper; 6 | 7 | import java.io.IOException; 8 | import java.text.SimpleDateFormat; 9 | import java.util.Date; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | class A { 13 | void test(int i) { 14 | System.out.println("A " + i); 15 | } 16 | } 17 | 18 | class B extends A { 19 | @Override 20 | void test(int i) { 21 | System.out.println("B " + i); 22 | System.out.println("bbbbb"); 23 | } 24 | } 25 | 26 | class C extends B { 27 | 28 | } 29 | public class test1 { 30 | public static void main(String[] args) throws Exception { 31 | // tm='1908.0', duration='22000.0', count=0.08672727272727272} 32 | // tm='55041.0', duration='55000.0', count=0.0 33 | // tm='47097.0', duration='46000.0', count=1.0238478260869566 34 | // double exp = testExp(); 35 | // testTimestamp(); 36 | // testMap2Json(); 37 | // testpb(); 38 | // testTypeHandler(); 39 | testObj(); 40 | 41 | } 42 | 43 | private static void testObj() { 44 | C c = new C(); 45 | c.test(1); 46 | } 47 | 48 | private static void testTypeHandler() { 49 | Map typeHandler = new HashMap<>(); 50 | typeHandler.put(AllType.a.name(), new AHandler()); 51 | typeHandler.put(AllType.b.name(), new BHandler()); 52 | // typeHandler.put("a", new AHandler()); 53 | // typeHandler.put("b", new BHandler()); 54 | 55 | typeHandler.get("b").handleSink(1); 56 | typeHandler.get("a").handleSink(1); 57 | 58 | } 59 | 60 | public enum AllType { 61 | a, 62 | b; 63 | } 64 | 65 | interface Handler { 66 | void handleSink(int data); 67 | } 68 | 69 | static class AHandler implements Handler { 70 | @Override 71 | public void handleSink(int data) { 72 | System.out.println("type: a, value: " + data); 73 | } 74 | } 75 | 76 | static class BHandler implements Handler { 77 | 78 | @Override 79 | public void handleSink(int data) { 80 | System.out.println("type: b, value: " + data); 81 | } 82 | } 83 | 84 | private static void testpb() { 85 | SliceActVV.userInfo.Builder usrInfo = SliceActVV.userInfo.newBuilder(); 86 | usrInfo.setRtUClick(1L); 87 | usrInfo.setRtUReveal(2L); 88 | usrInfo.setRtURpt(0.5); 89 | SliceActVV.userInfo userInfo2 = usrInfo.build(); 90 | userInfo2 = userInfo2.toBuilder().setRtURpt(0.6).build(); 91 | System.out.println(userInfo2); 92 | } 93 | 94 | private static void testMap2Json() throws IOException { 95 | Map map = new HashMap(); 96 | map.put("users", 1); 97 | map.put("u", 1); 98 | ObjectMapper objectMapper = new ObjectMapper(); 99 | byte[] ob = objectMapper.writeValueAsBytes(map); 100 | Map map2 = (HashMap)objectMapper.readValue(ob, Map.class); 101 | System.out.println(map2); 102 | 103 | } 104 | 105 | private static void testTimestamp() { 106 | long timestamp = System.currentTimeMillis(); 107 | System.out.println("timestamp: " + timestamp); 108 | SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式 109 | String date = df.format(new Date());// new Date()为获取当前系统时间,也可使用当前时间戳 110 | System.out.println("date: " + date); 111 | System.out.println("timestamp date: " + df.format(timestamp)); 112 | } 113 | 114 | private static double testExp() throws Exception{ 115 | double res = 47097.0 / 46000.0; 116 | System.out.println(res); 117 | throw new RuntimeException("cao"); 118 | 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /src/main/resources/consumer.properties: -------------------------------------------------------------------------------- 1 | ############################# 2 | # kafka consumer配置 3 | ############################# 4 | #kafka common 5 | bootstrap.servers=localhost:9092 6 | # Kafka Consumer 7 | group.id=consumer-1 8 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer 9 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer 10 | heartbeat.interval.ms=5000 11 | session.timeout.ms=10000 12 | enable.auto.commit=true 13 | auto.commit.interval.ms=10000 14 | #auto.offset.reset=earliest 15 | auto.offset.reset=latest 16 | connections.max.idle.ms=540000 17 | max.poll.records=10 18 | #client.id=id1 19 | -------------------------------------------------------------------------------- /src/main/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hbase.zookeeper.property.clientPort 6 | 2181 7 | 8 | 9 | 10 | hbase.zookeeper.quorum 11 | localhost:9092 12 | 13 | 14 | 15 | 16 | zookeeper.znode.parent 17 | /hbase-unsecure 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengxiaosen/flinkMultiStreamOptimization/4f1df37f8a5053e4f2bac9b06b59e1027faf158f/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /src/main/resources/producer.properties: -------------------------------------------------------------------------------- 1 | ############################# 2 | # kafka producer配置 3 | ############################# 4 | bootstrap.servers=localhost:9092 5 | # ack方式,all,会等所有的commit最慢的方式 6 | acks=1 7 | # 客户端如果发送失败则会重新发送 8 | retries=5 9 | # 默认立即发送,这里这是延时毫秒数 10 | linger.ms=10 11 | # 生产者用来缓存等待发送到服务器的消息的内存总字节数,不宜过大 12 | key.serializer=org.apache.kafka.common.serialization.StringSerializer 13 | value.serializer=org.apache.kafka.common.serialization.StringSerializer 14 | # producer会阻塞max.block.ms,超时则抛出异常,此处设为3m 15 | max.block.ms=3 16 | #Producer可以用来缓存数据的内存大小。该值实际为RecordAccumulator类中的BufferPool, 17 | #即Producer所管理的最大内存。如果数据产生速度大于向broker发送的速度, 18 | buffer.memory=3145728 19 | #Producer用于压缩数据的压缩类型,取值:none, gzip, snappy, or lz4 20 | compression.type=snappy 21 | # 当多个消息要发送到相同分区的时,生产者尝试将消息批量打包在一起,以减少请求交互 22 | #Producer可以将发往同一个Partition的数据做成一个Produce Request发送请求, 23 | # 即Batch批处理,以减少请求次数,该值即为每次批处理的大小。 24 | #另外每个Request请求包含多个Batch,每个Batch对应一个Partition, 25 | #且一个Request发送的目的Broker均为这些partition的leader副本。 26 | #若将该值设为0,则不会进行批处理,此处设为1m 27 | batch.size=1048576 28 | maxRatePerPartition=10 29 | send.buffer.bytes=131072 30 | #请求的最大字节数。这也是对最大消息大小的有效限制。注意:server具有自己对消息大小的限制, 31 | #这些大小和这个设置不同。此项设置将会限制producer每次批量发送请求的数目,以防发出巨量的请求。 32 | #此处设置为3m 33 | max.request.size=3145728 --------------------------------------------------------------------------------