├── .gitignore
├── LICENSE
├── README.md
├── flinkDemo.iml
├── pom.xml
└── src
    └── main
        ├── java
            └── com
            │   └── z
            │       └── flinkStreamOptimizatiion
            │           ├── broadcast
            │               └── BroadcastDemo.java
            │           ├── datesetOp
            │               ├── WordCountData.java
            │               ├── WordCountDemo.java
            │               └── WordCountExample.java
            │           ├── hbase
            │               ├── Flink2HBase.java
            │               ├── FlinkGHBaseByDataSet.java
            │               └── loader
            │               │   ├── HBaseLoader.java
            │               │   ├── HBaseRetryingUtils.java
            │               │   ├── HBaseUtils.java
            │               │   └── ILoader.java
            │           ├── kafka
            │               ├── ReadFromKafka.java
            │               └── WriteIntoKafka.java
            │           ├── metricsOp
            │               └── gaugesOp.java
            │           ├── rpc
            │               ├── client
            │               │   ├── MessageCollector.java
            │               │   ├── RPCClient.java
            │               │   ├── RPCException.java
            │               │   └── RpcFuture.java
            │               ├── common
            │               │   ├── Charsets.java
            │               │   ├── IMessageHandler.java
            │               │   ├── MessageDecoder.java
            │               │   ├── MessageEncoder.java
            │               │   ├── MessageHandlers.java
            │               │   ├── MessageInput.java
            │               │   ├── MessageOutput.java
            │               │   ├── MessageRegistry.java
            │               │   └── RequestId.java
            │               ├── demo
            │               │   ├── DemoClient.java
            │               │   └── DemoServer.java
            │               └── server
            │               │   ├── DefaultHandler.java
            │               │   ├── MessageCollector.java
            │               │   └── RPCServer.java
            │           ├── state
            │               ├── CountWindowAverage.java
            │               └── KeyStateMsgDemo.java
            │           ├── stream
            │               ├── MyNoParalleSource.java
            │               ├── MyNoParalleStrSource.java
            │               ├── MyParalleSource.java
            │               ├── MyPartition.java
            │               ├── MyRichParalleSource.java
            │               ├── NumberStreamDemo.java
            │               ├── StreamJoinDataSource1.java
            │               ├── StreamJoinDataSource2.java
            │               ├── StreamJoinDemo.java
            │               ├── TimeWindowDemo.java
            │               └── WindowComputeUtil.java
            │           └── test
            │               ├── SliceActVV.java
            │               └── test1.java
        └── resources
            ├── consumer.properties
            ├── hbase-site.xml
            ├── log4j.properties
            └── producer.properties


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled class file
 2 | *.class
 3 | 
 4 | # Log file
 5 | *.log
 6 | 
 7 | # BlueJ files
 8 | *.ctxt
 9 | 
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 | 
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | 
22 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
23 | hs_err_pid*
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 基于Flink多流Join优化的研究与实现
  2 | 1 伪代码  
  3 | 2 单流场景下的TimeWindow滚动窗口边界与数据延迟问题  
  4 | 3 多流Join场景下的窗口计算触发时机、延时数据丢失问题  
  5 | 4 针对flink流算子中rpc调用场景，利用netty自研rpc工具    
  6 | ## 1 伪代码：
  7 | Flink stream join的形式为Windows join 
  8 | ```$xslt
  9 | stream.join(otherStream)
 10 |     .where(<KeySelector>)
 11 |     .equalTo(<KeySelector>)
 12 |     .window(<WindowAssigner>)
 13 |     .apply(<JoinFunction>)
 14 | ```
 15 | ## 2 单流场景下的TimeWindow滚动窗口边界与数据延迟问题
 16 | ### 2.1 问题陈述
 17 | 多流Join的思路是在同一窗口对多流进行Join，针对每条单流：  
 18 | 每条流都是使用Flink的timeWindow api中的window size、delay、timestamp,计算触发窗口计算的时机，  
 19 | 每条流的延时数据，Flink根据window size、delay、延时数据的timestamp，判断是否丢弃，  
 20 | 本节通过调节windows size、delay，分析触发窗口计算的条件，以及触发延时数据丢失的条件。
 21 | ### 2.2 数据所属窗口计算逻辑
 22 | Flink源码中，数据所属窗口的计算逻辑：
 23 | ```$xslt
 24 | //Flink源码的窗口计算函数，该函数根据每条数据的timestamp、window size计算该条数据所属的[窗口开始时间，窗口结束时间]
 25 | public static long getWindowStartWithOffset(long timestamp, long offset, long windowSize) {
 26 |     return timestamp - (timestamp - offset + windowSize) % windowSize;
 27 | }
 28 | ```
 29 | 测试：根据event time和窗口时间大小，计算数据所属的窗口的开始时间和结束时间  
 30 | 代码位置：https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java  
 31 | 文件中的test1()
 32 | ```$xslt
 33 | //结果展示：
 34 | 1000000050000 -> 2001-09-09 09:47:30.000	所属窗口的开始时间是：1000000050000 -> 2001-09-09 09:47:30.000
 35 | 1000000054000 -> 2001-09-09 09:47:34.000	所属窗口的起始时间是: 1000000050000 -> 2001-09-09 09:47:30.000
 36 | 1000000079900 -> 2001-09-09 09:47:59.900	所属窗口的起始时间是: 1000000070000 -> 2001-09-09 09:47:50.000
 37 | 1000000120000 -> 2001-09-09 09:48:40.000	所属窗口的起始时间是: 1000000120000 -> 2001-09-09 09:48:40.000
 38 | 1000000111000 -> 2001-09-09 09:48:31.000	所属窗口的起始时间是: 1000000110000 -> 2001-09-09 09:48:30.000
 39 | 1000000089000 -> 2001-09-09 09:48:09.000	所属窗口的起始时间是: 1000000080000 -> 2001-09-09 09:48:00.000
 40 | ```
 41 | ### 2.3 单流的窗口计算触发时机
 42 | 代码位置：https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java   
 43 | 文件中的test2()  
 44 | 数据源：  
 45 | ```$xslt
 46 | Tuple3[] elements = new Tuple3[]{
 47 |                     Tuple3.of("a", "1", 1000000050000L),
 48 |                     Tuple3.of("a", "2", 1000000054000L),
 49 |                     Tuple3.of("a", "3", 1000000079900L),
 50 |                     Tuple3.of("a", "4", 1000000120000L),
 51 |                     Tuple3.of("b", "5", 1000000111000L),
 52 |                     Tuple3.of("b", "6", 1000000089000L)
 53 |             };
 54 | ```
 55 | 窗口属性设置：
 56 | ```$xslt
 57 | long delay = 5000L;
 58 | int windowSize = 10;
 59 | ```
 60 | 水位线计算逻辑：
 61 | ```$xslt
 62 | //水位线的目标是使水位线以下的record触发窗口计算
 63 | private final long maxOutOfOrderness = delay;
 64 | private long currentMaxTimestamp = 0L;
 65 | 
 66 | @Nullable
 67 | @Override
 68 | public Watermark getCurrentWatermark() {
 69 |     return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
 70 | }
 71 | ```
 72 | 针对流的每条record，跟踪水位线，窗口开始时间，窗口结束时间，时间戳等日志
 73 | ```$xslt
 74 | #### 第 1 个record ####
 75 | currentMaxTimestamp: 1000000050000
 76 | 水位线(watermark)： 1000000045000 -> 2001-09-09 09:47:25.000
 77 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
 78 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
 79 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000
 80 | #### 第 2 个record ####
 81 | currentMaxTimestamp: 1000000054000
 82 | 水位线(watermark)： 1000000049000 -> 2001-09-09 09:47:29.000
 83 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
 84 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
 85 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000
 86 | #### 第 3 个record ####
 87 | currentMaxTimestamp: 1000000079900
 88 | 水位线(watermark)： 1000000074900 -> 2001-09-09 09:47:54.900
 89 | 窗口开始时间：1000000070000 -> 2001-09-09 09:47:50.000
 90 | 窗口结束时间：1000000080000 -> 2001-09-09 09:48:00.000
 91 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900
 92 | (a,[1,2],1)
 93 | #### 第 4 个record ####
 94 | currentMaxTimestamp: 1000000120000
 95 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
 96 | 窗口开始时间：1000000120000 -> 2001-09-09 09:48:40.000
 97 | 窗口结束时间：1000000130000 -> 2001-09-09 09:48:50.000
 98 | 4 -> 1000000120000 -> 2001-09-09 09:48:40.000
 99 | (a,3,1000000079900)
100 | #### 第 5 个record ####
101 | currentMaxTimestamp: 1000000120000
102 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
103 | 窗口开始时间：1000000110000 -> 2001-09-09 09:48:30.000
104 | 窗口结束时间：1000000120000 -> 2001-09-09 09:48:40.000
105 | 5 -> 1000000111000 -> 2001-09-09 09:48:31.000
106 | #### 第 6 个record ####
107 | currentMaxTimestamp: 1000000120000
108 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
109 | 窗口开始时间：1000000080000 -> 2001-09-09 09:48:00.000
110 | 窗口结束时间：1000000090000 -> 2001-09-09 09:48:10.000
111 | 6 -> 1000000089000 -> 2001-09-09 09:48:09.000
112 | (b,5,1000000111000)
113 | (a,4,1000000120000)
114 | ```
115 | 日志分析：
116 | ```$xslt
117 | /**
118 |                      * 触发窗口运算时机：
119 |                      * 当一条数据过来，
120 |                      * 1）水位线 > 上一批次的记录的窗口结束时间，之前的数据要进行窗口运算
121 |                      * 2）水位线 > 上一批次的记录的timestamp，之前的数据要进行窗口计算
122 |                      *
123 |                      * 关于是否丢数据：
124 |                      * 1）如果当前数据的EventTime在WaterMark之上，也就是EventTime > WaterMark。由于数据所属窗口
125 |                      * 的WindowEndTime，一定是大于EventTime的。这时有WindowEndTime > EventTime > WaterMark
126 |                      * 这种情况是一定不会丢数据的。
127 |                      * 2）如果当前数据的EventTime在WaterMark之下，也就是WaterMark > EventTime，这时要分两种情况：
128 |                      *  2.1）如果该数据所属窗口的WindowEndTime > WaterMark，表示窗口还没被触发，例如第5个record的情况，
129 |                      *  即WindowEndTime > WaterMark > EventTime,这种情况数据也是不会丢失的。
130 |                      *  2.2）如果该数据所属窗口的WaterMark > WindowEndTime, 则表示窗口已经无法被触发，
131 |                      *  即WaterMark > WindowEndTime > EventTime, 这种情况数据也就丢失了。
132 |                      *
133 |                      * 特殊record：
134 |                      * 第5条record，元素在水位以下，但windows还没被触发计算，参照record 5
135 |                      * 第6条record，由于watermark > windows end time ，第6条数据所属的窗口就永远不会被触发计算了。
136 |                      */
137 | ```
138 | ### 2.4 单流的窗口计算数据丢失场景
139 | 窗口延时数据丢失情况：元素在水位以下，但windows已经无法被触发计算了  
140 | 代码位置：https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java   
141 | 文件中的test3()  
142 | 数据源：
143 | ```$xslt
144 | Tuple3[] elements = new Tuple3[]{
145 |                     Tuple3.of("a", "1", 1000000050000L),
146 |                     Tuple3.of("a", "2", 1000000054000L),
147 |                     Tuple3.of("a", "3", 1000000079900L),
148 |                     Tuple3.of("a", "4", 1000000120000L),
149 |                     Tuple3.of("b", "5", 1000000100001L),
150 |                     Tuple3.of("b", "6", 1000000109000L)
151 |             };
152 | ```
153 | 窗口属性设置：
154 | ```$xslt
155 | long delay = 5000L;
156 | int windowSize = 10;
157 | ```
158 | 针对流的每条record，跟踪水位线，窗口开始时间，窗口结束时间，时间戳等日志：
159 | ```$xslt
160 | #### 第 1 个record ####
161 | currentMaxTimestamp: 1000000050000
162 | 水位线(watermark)： 1000000045000 -> 2001-09-09 09:47:25.000
163 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
164 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
165 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000
166 | #### 第 2 个record ####
167 | currentMaxTimestamp: 1000000054000
168 | 水位线(watermark)： 1000000049000 -> 2001-09-09 09:47:29.000
169 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
170 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
171 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000
172 | #### 第 3 个record ####
173 | currentMaxTimestamp: 1000000079900
174 | 水位线(watermark)： 1000000074900 -> 2001-09-09 09:47:54.900
175 | 窗口开始时间：1000000070000 -> 2001-09-09 09:47:50.000
176 | 窗口结束时间：1000000080000 -> 2001-09-09 09:48:00.000
177 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900
178 | (a,[1,2],1)
179 | #### 第 4 个record ####
180 | currentMaxTimestamp: 1000000120000
181 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
182 | 窗口开始时间：1000000120000 -> 2001-09-09 09:48:40.000
183 | 窗口结束时间：1000000130000 -> 2001-09-09 09:48:50.000
184 | 4 -> 1000000120000 -> 2001-09-09 09:48:40.000
185 | (a,3,1000000079900)
186 | #### 第 5 个record ####
187 | currentMaxTimestamp: 1000000120000
188 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
189 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
190 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
191 | 5 -> 1000000100001 -> 2001-09-09 09:48:20.001
192 | #### 第 6 个record ####
193 | currentMaxTimestamp: 1000000120000
194 | 水位线(watermark)： 1000000115000 -> 2001-09-09 09:48:35.000
195 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
196 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
197 | 6 -> 1000000109000 -> 2001-09-09 09:48:29.000
198 | (a,4,1000000120000)
199 | ```
200 | 日志分析：
201 | ```$xslt
202 | /**
203 |      * 观察record 5 和 record 6，它们的窗口属性如下：
204 |      * 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
205 |      * 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
206 |      * windows end time < watermark, 这个窗口已经无法被触发计算了。
207 |      * 也就是说，这个窗口创建时，已经 windows end time < watermark，相当于第5第6条记录都丢失了。
208 |      */
209 | ```
210 | ### 2.5 针对单流延时数据的丢失问题，提出增大delay的解决方案
211 | 解决思路：通过增大delay，来增大失序的容忍程度，确保不丢数据  
212 | 代码位置：https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java   
213 | 文件中的test4()  
214 | 数据源：
215 | ```$xslt
216 | Tuple3[] elements = new Tuple3[]{
217 |                     Tuple3.of("a", "1", 1000000050000L),
218 |                     Tuple3.of("a", "2", 1000000054000L),
219 |                     Tuple3.of("a", "3", 1000000079900L),
220 |                     Tuple3.of("a", "4", 1000000115000L),
221 |                     Tuple3.of("b", "5", 1000000100000L),
222 |                     Tuple3.of("b", "6", 1000000108000L)
223 |             };
224 | ```
225 | 窗口属性设置：
226 | ```$xslt
227 | long delay = 5100L;
228 | int windowSize = 10;
229 | ```
230 | 针对流的每条record，跟踪水位线，窗口开始时间，窗口结束时间，时间戳等日志：
231 | ```$xslt
232 | #### 第 1 个record ####
233 | currentMaxTimestamp: 1000000050000
234 | 水位线(watermark)： 1000000044900 -> 2001-09-09 09:47:24.900
235 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
236 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
237 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000
238 | #### 第 2 个record ####
239 | currentMaxTimestamp: 1000000054000
240 | 水位线(watermark)： 1000000048900 -> 2001-09-09 09:47:28.900
241 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
242 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
243 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000
244 | #### 第 3 个record ####
245 | currentMaxTimestamp: 1000000079900
246 | 水位线(watermark)： 1000000074800 -> 2001-09-09 09:47:54.800
247 | 窗口开始时间：1000000070000 -> 2001-09-09 09:47:50.000
248 | 窗口结束时间：1000000080000 -> 2001-09-09 09:48:00.000
249 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900
250 | (a,[1,2],1)
251 | #### 第 4 个record ####
252 | currentMaxTimestamp: 1000000115000
253 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
254 | 窗口开始时间：1000000110000 -> 2001-09-09 09:48:30.000
255 | 窗口结束时间：1000000120000 -> 2001-09-09 09:48:40.000
256 | 4 -> 1000000115000 -> 2001-09-09 09:48:35.000
257 | (a,3,1000000079900)
258 | #### 第 5 个record ####
259 | currentMaxTimestamp: 1000000115000
260 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
261 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
262 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
263 | 5 -> 1000000100000 -> 2001-09-09 09:48:20.000
264 | #### 第 6 个record ####
265 | currentMaxTimestamp: 1000000115000
266 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
267 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
268 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
269 | 6 -> 1000000108000 -> 2001-09-09 09:48:28.000
270 | (b,[5,6],1)
271 | (a,4,1000000115000)
272 | ```
273 | 日志分析：
274 | ```$xslt
275 | /**
276 |      * 观察 record 5 和 record 6, 它们的时间窗口如下：
277 |      * 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
278 |      * 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
279 |      * 它们进来的时候水位线如下：
280 |      * 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
281 |      * 也就是说，它们进来的时候，watermark < windows end time
282 |      * 这种情况下，就算数据的 eventtime < watermark，数据还是被保留下来，没有丢失。
283 |      */
284 | ```
285 | ## 3 多流Join场景下的窗口计算触发时机、延时数据丢失问题
286 | 代码位置：https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDemo.java  
287 | 数据源：两条流
288 | ```$xslt
289 | //StreamJoinDataSource1:
290 | Tuple3[] elements = new Tuple3[]{
291 |                 Tuple3.of("a", "1", 1000000050000L),
292 |                 Tuple3.of("a", "2", 1000000054000L),
293 |                 Tuple3.of("a", "3", 1000000079900L),
294 |                 Tuple3.of("a", "4", 1000000115000L),
295 |                 Tuple3.of("b", "5", 1000000100000L),
296 |                 Tuple3.of("b", "6", 1000000108000L)
297 |         };
298 | //StreamJoinDataSource2:
299 | Tuple3[] elements = new Tuple3[]{
300 |                 Tuple3.of("a", "hangzhou", 1000000059000L),
301 |                 Tuple3.of("b", "beijing", 1000000105000L),
302 |         };
303 | 
304 | ```
305 | 窗口属性设置：
306 | ```$xslt
307 | //毫秒为单位
308 | int windowSize = 10;
309 | long delay = 5100L;
310 | ```
311 | 针对流的每条record，跟踪水位线，窗口开始时间，窗口结束时间，时间戳等日志：
312 | ```$xslt
313 | ####################################
314 | element.f1: hangzhou
315 | 水位线(watermark)： 1000000053900 -> 2001-09-09 09:47:33.900
316 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
317 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
318 | hangzhou -> 1000000059000 -> 2001-09-09 09:47:39.000
319 | ####################################
320 | element.f1: 1
321 | 水位线(watermark)： 1000000044900 -> 2001-09-09 09:47:24.900
322 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
323 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
324 | 1 -> 1000000050000 -> 2001-09-09 09:47:30.000
325 | ####################################
326 | element.f1: 2
327 | 水位线(watermark)： 1000000048900 -> 2001-09-09 09:47:28.900
328 | 窗口开始时间：1000000050000 -> 2001-09-09 09:47:30.000
329 | 窗口结束时间：1000000060000 -> 2001-09-09 09:47:40.000
330 | 2 -> 1000000054000 -> 2001-09-09 09:47:34.000
331 | ####################################
332 | element.f1: beijing
333 | 水位线(watermark)： 1000000099900 -> 2001-09-09 09:48:19.900
334 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
335 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
336 | beijing -> 1000000105000 -> 2001-09-09 09:48:25.000
337 | ####################################
338 | element.f1: 3
339 | 水位线(watermark)： 1000000074800 -> 2001-09-09 09:47:54.800
340 | 窗口开始时间：1000000070000 -> 2001-09-09 09:47:50.000
341 | 窗口结束时间：1000000080000 -> 2001-09-09 09:48:00.000
342 | 3 -> 1000000079900 -> 2001-09-09 09:47:59.900
343 | 触发双流join窗口运算
344 | (a,1,hangzhou,1000000050000,1000000059000)
345 | 触发双流join窗口运算
346 | (a,2,hangzhou,1000000054000,1000000059000)
347 | ####################################
348 | element.f1: 4
349 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
350 | 窗口开始时间：1000000110000 -> 2001-09-09 09:48:30.000
351 | 窗口结束时间：1000000120000 -> 2001-09-09 09:48:40.000
352 | 4 -> 1000000115000 -> 2001-09-09 09:48:35.000
353 | ####################################
354 | element.f1: 5
355 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
356 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
357 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
358 | 5 -> 1000000100000 -> 2001-09-09 09:48:20.000
359 | ####################################
360 | element.f1: 6
361 | 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
362 | 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
363 | 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
364 | 6 -> 1000000108000 -> 2001-09-09 09:48:28.000
365 | 触发双流join窗口运算
366 | (b,5,beijing,1000000100000,1000000105000)
367 | 触发双流join窗口运算
368 | (b,6,beijing,1000000108000,1000000105000)
369 | ```
370 | 日志分析：
371 | ```$xslt
372 | 结论1：如果source1触发窗口计算的时候，source2还没有触发窗口计算，也就是说，source2在窗口中没有数据，
373 | 需要等待source2触发窗口计算，把数据放置到窗口中，才能进行基于多流的join操作。
374 | 结论2：假设缩小delay，也就是提升水位线，有可能导致watermark > window end time，导致丢数据，例子：
375 | /**
376 |          * 当设置参数int windowSize = 10; long delay = 5000L;时
377 |          * 输出为：
378 |          * (a,1,hangzhou,1000000050000,1000000059000)
379 |          * (a,2,hangzhou,1000000054000,1000000059000)
380 |          * 原因：
381 |          * window_end_time < watermark, 导致数据丢失了。
382 |          */
383 | ```
384 | ## 4 针对flink流算子中rpc调用场景，利用netty自研rpc工具
385 | server端启动：
386 | https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoServer.java  
387 | flink的算子flatmap中初始化client端，调用rpc服务：
388 | https://github.com/zengxiaosen/flinkMultiStreamOptimization/blob/master/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java  
389 | 中的test6()  
390 | ```$xslt
391 | 调用流程：
392 | 1）一般flink算子中调用rpc都是在每个task上去建立连接，调用，销毁连接。  
393 | 2）在flatmap之上统一new rpc client不可行，因为此rpc使用netty实现，而netty的bootstrap是final类型，也不能序列化。  
394 | 3）所以综上所述，需要在每个task上去调用rpc服务。 
395 | ``` 
396 | 
397 | 
398 | 
399 | 
400 | 
401 | 
402 | 
403 | 
404 | 
405 | 
406 | 
407 | 
408 | 


--------------------------------------------------------------------------------
/flinkDemo.iml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
  3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
  4 |     <output url="file://$MODULE_DIR$/target/classes" />
  5 |     <output-test url="file://$MODULE_DIR$/target/test-classes" />
  6 |     <content url="file://$MODULE_DIR$">
  7 |       <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
  8 |       <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
  9 |       <excludeFolder url="file://$MODULE_DIR$/target" />
 10 |     </content>
 11 |     <orderEntry type="inheritedJdk" />
 12 |     <orderEntry type="sourceFolder" forTests="false" />
 13 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-java:1.5.0" level="project" />
 14 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-core:1.5.0" level="project" />
 15 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-annotations:1.5.0" level="project" />
 16 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-metrics-core:1.5.0" level="project" />
 17 |     <orderEntry type="library" name="Maven: com.esotericsoftware.kryo:kryo:2.24.0" level="project" />
 18 |     <orderEntry type="library" name="Maven: com.esotericsoftware.minlog:minlog:1.2" level="project" />
 19 |     <orderEntry type="library" name="Maven: org.objenesis:objenesis:2.1" level="project" />
 20 |     <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
 21 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
 22 |     <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
 23 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-shaded-asm:5.0.4-2.0" level="project" />
 24 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.3.2" level="project" />
 25 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.5" level="project" />
 26 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.7" level="project" />
 27 |     <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" />
 28 |     <orderEntry type="library" name="Maven: org.apache.flink:force-shading:1.5.0" level="project" />
 29 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-streaming-java_2.11:1.5.0" level="project" />
 30 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-runtime_2.11:1.5.0" level="project" />
 31 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-queryable-state-client-java_2.11:1.5.0" level="project" />
 32 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-hadoop-fs:1.5.0" level="project" />
 33 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-shaded-netty:4.0.27.Final-2.0" level="project" />
 34 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-shaded-jackson:2.7.9-3.0" level="project" />
 35 |     <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.3.1" level="project" />
 36 |     <orderEntry type="library" name="Maven: org.javassist:javassist:3.18.2-GA" level="project" />
 37 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.12" level="project" />
 38 |     <orderEntry type="library" name="Maven: com.typesafe.akka:akka-actor_2.11:2.4.20" level="project" />
 39 |     <orderEntry type="library" name="Maven: com.typesafe:config:1.3.0" level="project" />
 40 |     <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-java8-compat_2.11:0.7.0" level="project" />
 41 |     <orderEntry type="library" name="Maven: com.typesafe.akka:akka-stream_2.11:2.4.20" level="project" />
 42 |     <orderEntry type="library" name="Maven: org.reactivestreams:reactive-streams:1.0.0" level="project" />
 43 |     <orderEntry type="library" name="Maven: com.typesafe:ssl-config-core_2.11:0.2.1" level="project" />
 44 |     <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.4" level="project" />
 45 |     <orderEntry type="library" name="Maven: com.typesafe.akka:akka-protobuf_2.11:2.4.20" level="project" />
 46 |     <orderEntry type="library" name="Maven: com.typesafe.akka:akka-slf4j_2.11:2.4.20" level="project" />
 47 |     <orderEntry type="library" name="Maven: org.clapper:grizzled-slf4j_2.11:1.0.2" level="project" />
 48 |     <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.11:3.5.0" level="project" />
 49 |     <orderEntry type="library" name="Maven: com.twitter:chill_2.11:0.7.4" level="project" />
 50 |     <orderEntry type="library" name="Maven: com.twitter:chill-java:0.7.4" level="project" />
 51 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-clients_2.11:1.5.0" level="project" />
 52 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-optimizer_2.11:1.5.0" level="project" />
 53 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-shaded-guava:18.0-2.0" level="project" />
 54 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-connector-kafka-0.10_2.11:1.5.0" level="project" />
 55 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-connector-kafka-0.9_2.11:1.5.0" level="project" />
 56 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-connector-kafka-base_2.11:1.5.0" level="project" />
 57 |     <orderEntry type="library" name="Maven: org.apache.flink:flink-hbase_2.11:1.5.0" level="project" />
 58 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-server:1.4.3" level="project" />
 59 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-procedure:1.4.3" level="project" />
 60 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-common:tests:1.4.3" level="project" />
 61 |     <orderEntry type="library" scope="RUNTIME" name="Maven: org.apache.hbase:hbase-prefix-tree:1.4.3" level="project" />
 62 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-metrics-api:1.4.3" level="project" />
 63 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-metrics:1.4.3" level="project" />
 64 |     <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-core:3.1.2" level="project" />
 65 |     <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
 66 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-hadoop-compat:1.4.3" level="project" />
 67 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-hadoop2-compat:1.4.3" level="project" />
 68 |     <orderEntry type="library" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
 69 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-math:2.2" level="project" />
 70 |     <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
 71 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.9.13" level="project" />
 72 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.9.13" level="project" />
 73 |     <orderEntry type="library" name="Maven: com.lmax:disruptor:3.3.0" level="project" />
 74 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.2" level="project" />
 75 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.4" level="project" />
 76 |     <orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:0.10.1.1" level="project" />
 77 |     <orderEntry type="library" name="Maven: net.jpountz.lz4:lz4:1.3.0" level="project" />
 78 |     <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.2.6" level="project" />
 79 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-client:1.1.2" level="project" />
 80 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-annotations:1.1.2" level="project" />
 81 |     <orderEntry type="module-library">
 82 |       <library name="Maven: jdk.tools:jdk.tools:1.7">
 83 |         <CLASSES>
 84 |           <root url="jar://C:/Program Files/Java/jdk1.8.0_201/lib/tools.jar!/" />
 85 |         </CLASSES>
 86 |         <JAVADOC />
 87 |         <SOURCES />
 88 |       </library>
 89 |     </orderEntry>
 90 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-common:1.1.2" level="project" />
 91 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26" level="project" />
 92 |     <orderEntry type="library" name="Maven: org.apache.hbase:hbase-protocol:1.1.2" level="project" />
 93 |     <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.9" level="project" />
 94 |     <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
 95 |     <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
 96 |     <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.2" level="project" />
 97 |     <orderEntry type="library" name="Maven: com.google.guava:guava:12.0.1" level="project" />
 98 |     <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
 99 |     <orderEntry type="library" name="Maven: io.netty:netty-all:4.0.23.Final" level="project" />
100 |     <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.6" level="project" />
101 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.6.1" level="project" />
102 |     <orderEntry type="library" name="Maven: org.apache.htrace:htrace-core:3.1.0-incubating" level="project" />
103 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.13" level="project" />
104 |     <orderEntry type="library" name="Maven: org.jruby.jcodings:jcodings:1.0.8" level="project" />
105 |     <orderEntry type="library" name="Maven: org.jruby.joni:joni:2.1.2" level="project" />
106 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.5.1" level="project" />
107 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
108 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
109 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
110 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
111 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.5.1" level="project" />
112 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.5.1" level="project" />
113 |     <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
114 |     <orderEntry type="library" name="Maven: commons-net:commons-net:3.1" level="project" />
115 |     <orderEntry type="library" scope="RUNTIME" name="Maven: commons-el:commons-el:1.0" level="project" />
116 |     <orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
117 |     <orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
118 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
119 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
120 |     <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.4" level="project" />
121 |     <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
122 |     <orderEntry type="library" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
123 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:2.5.1" level="project" />
124 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-common:2.5.1" level="project" />
125 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.5.1" level="project" />
126 |     <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
127 |     <orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
128 |     <orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
129 |     <orderEntry type="library" name="Maven: io.netty:netty:3.6.2.Final" level="project" />
130 |     <orderEntry type="library" name="Maven: com.github.stephenc.findbugs:findbugs-annotations:1.3.9-1" level="project" />
131 |     <orderEntry type="library" name="Maven: junit:junit:4.11" level="project" />
132 |     <orderEntry type="library" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
133 |     <orderEntry type="library" name="Maven: org.projectlombok:lombok:1.16.10" level="project" />
134 |     <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.2" level="project" />
135 |     <orderEntry type="library" name="Maven: com.github.rholder:guava-retrying:2.0.0" level="project" />
136 |   </component>
137 | </module>


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>flinkDemo</groupId>
  8 |     <artifactId>flinkDemo</artifactId>
  9 |     <version>1.0-SNAPSHOT</version>
 10 |     <properties>
 11 |         <netty.version>4.1.1.Final</netty.version>
 12 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 13 |     </properties>
 14 | 
 15 |     <dependencies>
 16 |         <dependency>
 17 |             <groupId>org.apache.flink</groupId>
 18 |             <artifactId>flink-java</artifactId>
 19 |             <version>1.5.0</version>
 20 |             <!--<scope>provided</scope>-->
 21 |         </dependency>
 22 |         <dependency>
 23 |             <groupId>org.apache.flink</groupId>
 24 |             <artifactId>flink-streaming-java_2.11</artifactId>
 25 |             <version>1.5.0</version>
 26 |             <!--<scope>provided</scope>-->
 27 |         </dependency>
 28 |         <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.10 -->
 29 |         <dependency>
 30 |             <groupId>org.apache.flink</groupId>
 31 |             <artifactId>flink-connector-kafka-0.10_2.11</artifactId>
 32 |             <version>1.5.0</version>
 33 |         </dependency>
 34 |         <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-hbase -->
 35 |         <dependency>
 36 |             <groupId>org.apache.flink</groupId>
 37 |             <artifactId>flink-hbase_2.11</artifactId>
 38 |             <version>1.5.0</version>
 39 |         </dependency>
 40 | 
 41 |         <dependency>
 42 |             <groupId>org.apache.kafka</groupId>
 43 |             <artifactId>kafka-clients</artifactId>
 44 |             <version>0.10.1.1</version>
 45 |         </dependency>
 46 | 
 47 |         <dependency>
 48 |             <groupId>org.apache.hbase</groupId>
 49 |             <artifactId>hbase-client</artifactId>
 50 |             <version>1.1.2</version>
 51 |         </dependency>
 52 | 
 53 |         <dependency>
 54 |             <groupId>org.projectlombok</groupId>
 55 |             <artifactId>lombok</artifactId>
 56 |             <version>1.16.10</version>
 57 |             <scope>compile</scope>
 58 |         </dependency>
 59 |         <dependency>
 60 |             <groupId>com.google.code.gson</groupId>
 61 |             <artifactId>gson</artifactId>
 62 |             <version>2.8.2</version>
 63 |         </dependency>
 64 |         <dependency>
 65 |             <groupId>com.github.rholder</groupId>
 66 |             <artifactId>guava-retrying</artifactId>
 67 |             <version>2.0.0</version>
 68 |         </dependency>
 69 |         <dependency>
 70 |             <groupId>com.alibaba</groupId>
 71 |             <artifactId>fastjson</artifactId>
 72 |             <version>1.2.5</version>
 73 |         </dependency>
 74 |         <dependency>
 75 |             <groupId>io.netty</groupId>
 76 |             <artifactId>netty-common</artifactId>
 77 |             <version>${netty.version}</version>
 78 |         </dependency>
 79 |         <dependency>
 80 |             <groupId>io.netty</groupId>
 81 |             <artifactId>netty-buffer</artifactId>
 82 |             <version>${netty.version}</version>
 83 |         </dependency>
 84 |         <dependency>
 85 |             <groupId>io.netty</groupId>
 86 |             <artifactId>netty-transport</artifactId>
 87 |             <version>${netty.version}</version>
 88 |         </dependency>
 89 |         <dependency>
 90 |             <groupId>io.netty</groupId>
 91 |             <artifactId>netty-handler</artifactId>
 92 |             <version>${netty.version}</version>
 93 |         </dependency>
 94 |         <dependency>
 95 |             <groupId>io.netty</groupId>
 96 |             <artifactId>netty-codec</artifactId>
 97 |             <version>${netty.version}</version>
 98 |         </dependency>
 99 |     </dependencies>
100 | 
101 |     <build>
102 |         <plugins>
103 |             <plugin>
104 |                 <groupId>org.apache.maven.plugins</groupId>
105 |                 <artifactId>maven-compiler-plugin</artifactId>
106 |                 <version>3.5.1</version>
107 |                 <configuration>
108 |                     <source>1.8</source>
109 |                     <target>1.8</target>
110 |                 </configuration>
111 |             </plugin>
112 |         </plugins>
113 |     </build>
114 | </project>


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/broadcast/BroadcastDemo.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.broadcast;
  2 | 
  3 | import com.z.flinkStreamOptimizatiion.stream.MyNoParalleSource;
  4 | import org.apache.flink.api.common.functions.MapFunction;
  5 | import org.apache.flink.api.common.functions.RichMapFunction;
  6 | import org.apache.flink.api.java.DataSet;
  7 | import org.apache.flink.api.java.ExecutionEnvironment;
  8 | import org.apache.flink.api.java.operators.DataSource;
  9 | import org.apache.flink.api.java.tuple.Tuple2;
 10 | import org.apache.flink.configuration.Configuration;
 11 | import org.apache.flink.streaming.api.datastream.DataStream;
 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 14 | import org.apache.flink.streaming.api.windowing.time.Time;
 15 | 
 16 | import java.util.ArrayList;
 17 | import java.util.Collection;
 18 | import java.util.HashMap;
 19 | import java.util.List;
 20 | 
 21 | public class BroadcastDemo {
 22 | 
 23 |     public static void main(String[] args) throws Exception {
 24 | 
 25 |         // broadcast
 26 |         //test1();
 27 | 
 28 |         // StreamSource Broadcast 流的广播
 29 |         //test2();
 30 | 
 31 |         // batch broadcast 广播变量
 32 |         test3();
 33 | 
 34 |     }
 35 | 
 36 |     private static void test3() throws Exception {
 37 |         /**
 38 |          * 1, 封装dataset，调用withbroadcastSet
 39 |          * 2， getRuntimeContext().getBroadcastVariable, 获取广播变量
 40 |          * 3, RichMapFunction中执行获得广播变量的逻辑
 41 |          */
 42 | 
 43 |         //获取运行环境
 44 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
 45 | 
 46 |         //1：准备需要广播的数据
 47 |         ArrayList<Tuple2<String, Integer>> broadData = new ArrayList<>();
 48 |         broadData.add(new Tuple2<>("zs", 18));
 49 |         broadData.add(new Tuple2<>("ls",20));
 50 |         broadData.add(new Tuple2<>("ww",17));
 51 |         DataSet<Tuple2<String, Integer>> tupleData = env.fromCollection(broadData);
 52 | 
 53 |         //1.1:处理需要广播的数据,把数据集转换成map类型，map中的key就是用户姓名，value就是用户年龄
 54 |         DataSet<HashMap<String, Integer>> toBroadcast = tupleData.map(new MapFunction<Tuple2<String, Integer>, HashMap<String, Integer>>() {
 55 |             @Override
 56 |             public HashMap<String, Integer> map(Tuple2<String, Integer> value) throws Exception {
 57 |                 HashMap<String, Integer> res = new HashMap<>();
 58 |                 res.put(value.f0, value.f1);
 59 |                 return res;
 60 |             }
 61 |         });
 62 | 
 63 |         //源数据
 64 |         DataSource<String> data = env.fromElements("zs", "ls", "ww");
 65 | 
 66 |         //注意：在这里需要使用到RichMapFunction获取广播变量
 67 |         DataSet<String> result = data.map(new RichMapFunction<String, String>() {
 68 | 
 69 |             List<HashMap<String, Integer>> broadCastMap = new ArrayList<HashMap<String, Integer>>();
 70 |             HashMap<String, Integer> allMap = new HashMap<>();
 71 | 
 72 | 
 73 |             /**
 74 |              * 这个方法只会执行一次
 75 |              * 可以在这里实现一些初始化的功能
 76 |              *
 77 |              * 所以，就可以在open方法中获取广播变量数据
 78 |              *
 79 |              */
 80 | 
 81 |             @Override
 82 |             public void open(Configuration parameters) throws Exception {
 83 |                 super.open(parameters);
 84 | 
 85 |                 //3:获取广播数据
 86 |                 this.broadCastMap = getRuntimeContext().getBroadcastVariable("broadCastMapName");
 87 |                 for (HashMap map : broadCastMap) {
 88 |                     allMap.putAll(map);
 89 |                 }
 90 | 
 91 |             }
 92 | 
 93 |             @Override
 94 |             public String map(String value) throws Exception {
 95 |                 Integer age = allMap.get(value);
 96 |                 return value + "," + age;
 97 |             }
 98 |         }).withBroadcastSet(toBroadcast, "broadCastMapName");//2：执行广播数据的操作
 99 | 
100 |         result.print();
101 | 
102 |     }
103 | 
104 |     private static void test2() throws Exception {
105 | 
106 |         //实现元素的重复广播
107 | 
108 |         //获取Flink的运行环境
109 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
110 |         //4个并行
111 |         env.setParallelism(4);
112 | 
113 |         //获取数据源
114 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意：针对此source，并行度只能设置为1
115 |         //整个map元素分别处理了4次
116 |         DataStream<Long> num = text.broadcast().map(new MapFunction<Long, Long>() {
117 |             @Override
118 |             public Long map(Long value) throws Exception {
119 |                 long id = Thread.currentThread().getId();
120 |                 System.out.println("线程id："+id+",接收到数据：" + value);
121 |                 return value;
122 |             }
123 |         });
124 | 
125 |         //每2秒钟处理一次数据
126 |         DataStream<Long> sum = num.timeWindowAll(Time.seconds(2)).sum(0);
127 | 
128 |         //打印结果
129 |         sum.print().setParallelism(1);
130 | 
131 |         String jobName = BroadcastDemo.class.getSimpleName();
132 |         env.execute(jobName);
133 | 
134 | 
135 |     }
136 | 
137 |     private static void test1() {
138 |         //获取运行环境
139 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
140 | 
141 |         //1 准备等待广播的DataSet数据
142 |         DataSet<Integer> toBroadcast = env.fromElements(1, 2, 3);
143 |         DataSet<String> data = env.fromElements("a", "b", "c");
144 | 
145 |         data.map(new RichMapFunction<String, String>() {
146 | 
147 |             @Override
148 |             public String map(String s) throws Exception {
149 |                 return null;
150 |             }
151 | 
152 |             @Override
153 |             public void open(Configuration parameters) throws Exception {
154 | 
155 |                 //3 获取广播的DataSet数据 作为一个Collection
156 |                 Collection<Integer> broadcastSet = getRuntimeContext().getBroadcastVariable("broadcastSetName");
157 | 
158 |             }
159 |         }).withBroadcastSet(toBroadcast, "broadcastSetName"); //2 广播DataSset
160 | 
161 | 
162 |     }
163 | 
164 | }
165 | 
166 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountData.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.datesetOp;
 2 | 
 3 | import org.apache.flink.api.java.DataSet;
 4 | import org.apache.flink.api.java.ExecutionEnvironment;
 5 | 
 6 | /**
 7 |  * <p/>
 8 |  * <li>@author:jyj019 </li>
 9 |  * <li>Date: 2018/9/3 10:46</li>
10 |  * <li>@version: 2.0.0 </li>
11 |  * <li>@since JDK 1.8 </li>
12 |  */
13 | 
14 | public class WordCountData {
15 | 
16 |     public static final String[] WORDS = new String[] {
17 |                 "To be, or not to be,--that is the question:--",
18 |                 "Whether 'tis nobler in the mind to suffer",
19 |                 "The slings and arrows of outrageous fortune",
20 |                 "Or to take arms against a sea of troubles,",
21 |                 "And by opposing end them?--To die,--to sleep,--",
22 |                 "No more; and by a sleep to say we end",
23 |                 "The heartache, and the thousand natural shocks",
24 |                 "That flesh is heir to,--'tis a consummation",
25 |                 "Devoutly to be wish'd. To die,--to sleep;--",
26 |                 "To sleep! perchance to dream:--ay, there's the rub;",
27 |                 "For in that sleep of death what dreams may come,",
28 |                 "When we have shuffled off this mortal coil,",
29 |                 "Must give us pause: there's the respect",
30 |                 "That makes calamity of so long life;",
31 |                 "For who would bear the whips and scorns of time,",
32 |                 "The oppressor's wrong, the proud man's contumely,",
33 |                 "The pangs of despis'd love, the law's delay,",
34 |                 "The insolence of office, and the spurns",
35 |                 "That patient merit of the unworthy takes,",
36 |                 "When he himself might his quietus make",
37 |                 "With a bare bodkin? who would these fardels bear,",
38 |                 "To grunt and sweat under a weary life,",
39 |                 "But that the dread of something after death,--",
40 |                 "The undiscover'd country, from whose bourn",
41 |                 "No traveller returns,--puzzles the will,",
42 |                 "And makes us rather bear those ills we have",
43 |                 "Than fly to others that we know not of?",
44 |                 "Thus conscience does make cowards of us all;",
45 |                 "And thus the native hue of resolution",
46 |                 "Is sicklied o'er with the pale cast of thought;",
47 |                 "And enterprises of great pith and moment,",
48 |                 "With this regard, their currents turn awry,",
49 |                 "And lose the name of action.--Soft you now!",
50 |                 "The fair Ophelia!--Nymph, in thy orisons",
51 |                 "Be all my sins remember'd."
52 |     };
53 | 
54 |     public static DataSet<String> getDefaultTextLineDataSet(ExecutionEnvironment env) {
55 |             return env.fromElements(WORDS);
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountDemo.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.datesetOp;
  2 | 
  3 | import org.apache.flink.api.common.functions.FlatMapFunction;
  4 | import org.apache.flink.api.common.functions.JoinFunction;
  5 | import org.apache.flink.api.common.functions.MapPartitionFunction;
  6 | import org.apache.flink.api.common.operators.Order;
  7 | import org.apache.flink.api.java.DataSet;
  8 | import org.apache.flink.api.java.ExecutionEnvironment;
  9 | import org.apache.flink.api.java.operators.CrossOperator;
 10 | import org.apache.flink.api.java.operators.DataSource;
 11 | import org.apache.flink.api.java.operators.FlatMapOperator;
 12 | import org.apache.flink.api.java.tuple.Tuple2;
 13 | import org.apache.flink.api.java.tuple.Tuple3;
 14 | import org.apache.flink.api.java.utils.ParameterTool;
 15 | 
 16 | import org.apache.flink.util.Collector;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.Iterator;
 20 | import java.util.List;
 21 | 
 22 | // dataset 的一些通用操作
 23 | public class WordCountDemo {
 24 | 
 25 |     public static void main(String[] args) throws Exception {
 26 |         // get input data
 27 |         DataSet<String> text = getDataSet(args);
 28 | 
 29 | 
 30 |         // Map：输入一个元素，然后返回一个元素，中间可以做一些清洗转换等操作
 31 |         // FlatMap：输入一个元素，可以返回零个，一个或者多个元素
 32 |         // MapPartition：类似map，一次处理一个分区的数据【如果在进行map处理的时候需要获取第三方资源链接，建议使用MapPartition】
 33 | 
 34 |         //map
 35 |         // test1(text);
 36 | 
 37 |         //map partition - batch
 38 |         //test2();
 39 | 
 40 |         //distinct
 41 |         //test3();
 42 | 
 43 |         //join 内连接
 44 |         //test4();
 45 | 
 46 |         //outer join 外连接
 47 |         //test5();
 48 | 
 49 |         //cross 笛卡尔积
 50 |         //test6();
 51 | 
 52 |         //sort partition 在本地对数据集的所有分区进行排序，通过sortPartition()的链接调用来完成对多个字段的排序
 53 |         // test7();
 54 | 
 55 | 
 56 |     }
 57 | 
 58 | 
 59 | 
 60 |     private static void test7() throws Exception {
 61 |         //获取运行环境
 62 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
 63 |         ArrayList<Tuple2<Integer, String>> data = new ArrayList<>();
 64 |         data.add(new Tuple2<>(2,"zs"));
 65 |         data.add(new Tuple2<>(4,"ls"));
 66 |         data.add(new Tuple2<>(3,"ww"));
 67 |         data.add(new Tuple2<>(1,"xw"));
 68 |         data.add(new Tuple2<>(1,"aw"));
 69 |         data.add(new Tuple2<>(1,"mw"));
 70 | 
 71 |         DataSource<Tuple2<Integer, String>> text = env.fromCollection(data);
 72 |         //获取前3条数据，按照数据插入的顺序
 73 |         text.first(3).print();
 74 |         System.out.println("==============================");
 75 | 
 76 |         //根据数据中的第一列进行分组，获取每组的前2个元素
 77 |         text.groupBy(0).first(2).print();
 78 | 
 79 |         //根据数据中的第一列分组，再根据第二列进行组内排序[升序]，获取每组的前2个元素
 80 |         text.groupBy(0).sortGroup(1, Order.ASCENDING).first(2).print();
 81 |         System.out.println("==============================");
 82 | 
 83 |         //不分组，全局排序获取集合中的前3个元素，针对第一个元素升序，第二个元素倒序
 84 |         text.sortPartition(0, Order.ASCENDING).sortPartition(1, Order.DESCENDING).first(3).print();
 85 |         text.sortPartition(0, Order.ASCENDING).sortPartition(1, Order.DESCENDING).first(3).print();
 86 | 
 87 | 
 88 | 
 89 |     }
 90 | 
 91 |     private static void test6() throws Exception {
 92 |         //获取运行环境
 93 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
 94 | 
 95 |         //tuple2<用户id，用户姓名>
 96 |         ArrayList<String> data1 = new ArrayList<>();
 97 |         data1.add("zs");
 98 |         data1.add("ww");
 99 |         //tuple2<用户id，用户所在城市>
100 |         ArrayList<Integer> data2 = new ArrayList<>();
101 |         data2.add(1);
102 |         data2.add(2);
103 |         DataSource<String> text1 = env.fromCollection(data1);
104 |         DataSource<Integer> text2 = env.fromCollection(data2);
105 |         CrossOperator.DefaultCross<String, Integer> cross = text1.cross(text2);
106 |         cross.print();
107 | 
108 |     }
109 | 
110 |     private static void test5() throws Exception {
111 |         //获取运行环境
112 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
113 | 
114 |         //tuple2<用户id，用户姓名>
115 |         ArrayList<Tuple2<Integer, String>> data1 = new ArrayList<>();
116 |         data1.add(new Tuple2<>(1,"zs"));
117 |         data1.add(new Tuple2<>(2,"ls"));
118 |         data1.add(new Tuple2<>(3,"ww"));
119 | 
120 | 
121 |         //tuple2<用户id，用户所在城市>
122 |         ArrayList<Tuple2<Integer, String>> data2 = new ArrayList<>();
123 |         data2.add(new Tuple2<>(1,"beijing"));
124 |         data2.add(new Tuple2<>(2,"shanghai"));
125 |         data2.add(new Tuple2<>(4,"guangzhou"));
126 | 
127 | 
128 |         DataSource<Tuple2<Integer, String>> text1 = env.fromCollection(data1);
129 |         DataSource<Tuple2<Integer, String>> text2 = env.fromCollection(data2);
130 | 
131 |         /**
132 |          * 左外连接
133 |          *
134 |          * 注意：second这个tuple中的元素可能为null
135 |          *
136 |          */
137 |         
138 |         text1.leftOuterJoin(text2)
139 |                 .where(0)
140 |                 .equalTo(0)
141 |                 .with(new JoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple3<Integer, String, String>>() {
142 | 
143 |                     @Override
144 |                     public Tuple3<Integer, String, String> join(Tuple2<Integer, String> first, Tuple2<Integer, String> second) throws Exception {
145 |                         if (second == null) {
146 |                             return new Tuple3<>(first.f0, first.f1, "null");
147 |                         } else {
148 |                             return new Tuple3<>(first.f0, first.f1, second.f1);
149 |                         }
150 |                     }
151 |                 }).print();
152 | 
153 |         /**
154 |          * 右外连接
155 |          *
156 |          * 注意：first这个tuple中的数据可能为null
157 |          *
158 |          */
159 |         text1.rightOuterJoin(text2)
160 |                 .where(0)
161 |                 .equalTo(0)
162 |                 .with(new JoinFunction<Tuple2<Integer,String>, Tuple2<Integer,String>, Tuple3<Integer,String,String>>() {
163 |                     @Override
164 |                     public Tuple3<Integer, String, String> join(Tuple2<Integer, String> first, Tuple2<Integer, String> second) throws Exception {
165 |                         if(first==null){
166 |                             return new Tuple3<>(second.f0,"null",second.f1);
167 |                         }
168 |                         return new Tuple3<>(first.f0,first.f1,second.f1);
169 |                     }
170 |                 }).print();
171 |         /**
172 |          * 全外连接
173 |          *
174 |          * 注意：first和second这两个tuple都有可能为null
175 |          *
176 |          */
177 |         text1.fullOuterJoin(text2)
178 |                 .where(0)
179 |                 .equalTo(0)
180 |                 .with(new JoinFunction<Tuple2<Integer,String>, Tuple2<Integer,String>, Tuple3<Integer,String,String>>() {
181 |                     @Override
182 |                     public Tuple3<Integer, String, String> join(Tuple2<Integer, String> first, Tuple2<Integer, String> second) throws Exception {
183 |                         if(first==null){
184 |                             return new Tuple3<>(second.f0,"null",second.f1);
185 |                         }else if(second == null){
186 |                             return new Tuple3<>(first.f0,first.f1,"null");
187 |                         }else{
188 |                             return new Tuple3<>(first.f0,first.f1,second.f1);
189 |                         }
190 |                     }
191 |                 }).print();
192 | 
193 | 
194 |     }
195 | 
196 |     private static void test4() throws Exception {
197 |         //获取运行环境
198 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
199 |         //tuple2<用户id，用户姓名>
200 |         List<Tuple2<Integer, String>> data1 = new ArrayList<>();
201 |         data1.add(new Tuple2<>(1, "zs"));
202 |         data1.add(new Tuple2<>(2, "ls"));
203 |         data1.add(new Tuple2<>(3, "ww"));
204 | 
205 |         //tuple2<用户id，用户所在城市>
206 |         List<Tuple2<Integer, String>> data2 = new ArrayList<>();
207 |         data2.add(new Tuple2<>(1, "beijing"));
208 |         data2.add(new Tuple2<>(2, "shanghai"));
209 |         data2.add(new Tuple2<>(3, "guangzhou"));
210 | 
211 |         DataSource<Tuple2<Integer, String>> text1 = env.fromCollection(data1);
212 |         DataSource<Tuple2<Integer, String>> text2 = env.fromCollection(data2);
213 | 
214 |         text1.join(text2).where(0)//指定第一个数据集中需要进行比较的元素的角标
215 |                 .equalTo(0)//指定第二个数据集中需要进行比较的元素的角标
216 |                 .with(new JoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Object>() {
217 | 
218 |                     @Override
219 |                     public Object join(Tuple2<Integer, String> first, Tuple2<Integer, String> second) throws Exception {
220 |                         return new Tuple3<>(first.f0, first.f1, second.f1);
221 |                     }
222 |                 }).print();
223 |         //注意，这里用map和上面使用的with最终效果是一致的。
224 |         /*text1.join(text2).where(0)//指定第一个数据集中需要进行比较的元素角标
225 |               .equalTo(0)//指定第二个数据集中需要进行比较的元素角标
226 |               .map(new MapFunction<Tuple2<Tuple2<Integer,String>,Tuple2<Integer,String>>, Tuple3<Integer,String,String>>() {
227 |                   @Override
228 |                   public Tuple3<Integer, String, String> map(Tuple2<Tuple2<Integer, String>, Tuple2<Integer, String>> value) throws Exception {
229 |                       return new Tuple3<>(value.f0.f0,value.f0.f1,value.f1.f1);
230 |                   }
231 |               }).print();*/
232 | 
233 |     }
234 | 
235 |     private static void test3() throws Exception {
236 |         //获取运行环境
237 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
238 |         ArrayList<String> data = new ArrayList<>();
239 |         data.add("hello you");
240 |         data.add("hello me");
241 | 
242 |         DataSource<String> text = env.fromCollection(data);
243 |         FlatMapOperator<String, String> flatMapData = text.flatMap(new FlatMapFunction<String, String>() {
244 |             @Override
245 |             public void flatMap(String value, Collector<String> out) throws Exception {
246 |                 String[] split = value.toLowerCase().split("\\W+");
247 |                 for (String word : split) {
248 |                     System.out.println("单词: " + word);
249 |                     out.collect(word);
250 |                 }
251 |             }
252 |         });
253 |         flatMapData.distinct().print();
254 |     }
255 | 
256 |     private static void test2() throws Exception {
257 |         //获取运行环境
258 |         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
259 | 
260 |         ArrayList<String> data = new ArrayList<>();
261 |         data.add("hello you");
262 |         data.add("hello me");
263 |         DataSource<String> text = env.fromCollection(data);
264 |         /*text.map(new MapFunction<String, String>() {
265 |           @Override
266 |           public String map(String value) throws Exception {
267 |               //获取数据库连接--注意，此时是每过来一条数据就获取一次链接
268 |               //处理数据
269 |               //关闭连接
270 |               return value;
271 |           }
272 |       });*/
273 |         DataSet<String> mapPartitionData = text.mapPartition(new MapPartitionFunction<String, String>() {
274 |             @Override
275 |             public void mapPartition(Iterable<String> values, Collector<String> out) throws Exception {
276 |                 //获取数据库连接--注意，此时是一个分区的数据获取一次连接【优点，每个分区获取一次链接】
277 |                 //values中保存了一个分区的数据
278 |                 //处理数据
279 |                 Iterator<String> it = values.iterator();
280 |                 while (it.hasNext()) {
281 |                     String next = it.next();
282 |                     String[] split = next.split("\\W+");
283 |                     for (String word : split) {
284 |                         out.collect(word);
285 |                     }
286 |                 }
287 |                 //关闭连接
288 |             }
289 |         });
290 |         mapPartitionData.print();
291 |     }
292 | 
293 |     private static void test1(DataSet<String> text) throws Exception {
294 |         DataSet<Tuple2<String, Integer>> counts =
295 |                 // split up the lines in pairs (2-tuples) containing: (word,1)
296 |                 text.flatMap(new Tokenizer())
297 |                         // group by the tuple field "0" and sum up tuple field "1"
298 |                         .groupBy(0)
299 |                         .sum(1);
300 | 
301 |         counts.print();
302 |     }
303 | 
304 |     public static DataSet<String> getDataSet(String[] args) {
305 |         final ParameterTool params = ParameterTool.fromArgs(args);
306 |         DataSet<String> text;
307 |         // create execution environment
308 |         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
309 |         //env.getConfig().setGlobalJobParameters(params);
310 |         if (params.has("input")) {
311 |             // read the text file from given input path
312 |             text = env.readTextFile(params.get("input"));
313 |         } else {
314 |             // get default test text data
315 |             System.out.println("Executing WordCount example with default input data set.");
316 |             System.out.println("Use --input to specify file input.");
317 |             text = WordCountData.getDefaultTextLineDataSet(env);
318 |         }
319 |         return text;
320 |     }
321 | 
322 |     // *************************************************************************
323 |     //     USER FUNCTIONS
324 |     // *************************************************************************
325 | 
326 | 
327 |     /**
328 |      * Implements the string tokenizer that splits sentences into words as a user-defined
329 |      * FlatMapFunction. The function takes a line (String) and splits it into
330 |      * multiple pairs in the form of "(word,1)" ({@code Tuple2<String, Integer>}).
331 |      */
332 |     public static final class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {
333 | 
334 |         @Override
335 |         public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
336 |             // normalize and split the line
337 |             String[] tokens = value.toLowerCase().split("\\W+");
338 | 
339 |             // emit the pairs
340 |             for (String token : tokens) {
341 |                 if (token.length() > 0) {
342 |                     out.collect(new Tuple2<>(token, 1));
343 |                 }
344 |             }
345 |         }
346 |     }
347 | }
348 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/datesetOp/WordCountExample.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.datesetOp;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.DataSet;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.util.Collector;
 8 | 
 9 | /**
10 |  * <p/>
11 |  * <li>@author: jyj019 </li>
12 |  * <li>Date: 2018/9/3 11:07</li>
13 |  * <li>@version: 2.0.0 </li>
14 |  * <li>@since JDK 1.8 </li>
15 |  */
16 | public class WordCountExample {
17 |     public static void main(String[] args) throws Exception {
18 |         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
19 |         env.setParallelism(100);
20 | 
21 |         DataSet<String> text = env.fromElements(
22 |                 "Who's there?",
23 |                 "I think I hear them. Stand, ho! Who's there?",
24 |                 "I think I hear them. Stand, ho! Who's there?");
25 | 
26 |         DataSet<Tuple2<String, Integer>> wordCounts = text
27 |                 .flatMap(new LineSplitter())
28 |                 .groupBy(0)
29 |                 .sum(1);
30 | 
31 |         wordCounts.print();
32 |     }
33 | 
34 |     public static class LineSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
35 |         @Override
36 |         public void flatMap(String line, Collector<Tuple2<String, Integer>> out) {
37 |             for (String word : line.split(" ")) {
38 |               //  for (String word2 : line.split(",")) {
39 |                     out.collect(new Tuple2<>(word, 1));
40 |            //     }
41 |             }
42 |         }
43 |     }
44 | }


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/Flink2HBase.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.hbase;
  2 | 
  3 | 
  4 | import com.z.flinkStreamOptimizatiion.hbase.loader.HBaseLoader;
  5 | import com.z.flinkStreamOptimizatiion.hbase.loader.HBaseUtils;
  6 | import org.apache.flink.api.common.functions.MapFunction;
  7 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
  8 | import org.apache.flink.api.java.utils.ParameterTool;
  9 | import org.apache.flink.streaming.api.TimeCharacteristic;
 10 | import org.apache.flink.streaming.api.datastream.DataStream;
 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 12 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
 13 | import org.apache.hadoop.hbase.TableName;
 14 | import org.apache.hadoop.hbase.client.*;
 15 | import org.slf4j.Logger;
 16 | import org.slf4j.LoggerFactory;
 17 | 
 18 | import java.io.IOException;
 19 | import java.util.*;
 20 | 
 21 | 
 22 | public class Flink2HBase {
 23 | 
 24 |     private static final Logger LOGGER = LoggerFactory.getLogger(Flink2HBase.class);
 25 | 
 26 |     public static void main(String[] args) {
 27 | 
 28 |         System.setProperty("hadoop.home.dir", "C:\\hbase-1.1.2");
 29 |         System.setProperty("HADOOP_USER_NAME", "hdfs");
 30 | 
 31 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 32 |         env.enableCheckpointing(1000); // 非常关键，一定要设置启动检查点！！
 33 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 34 | 
 35 |         Map properties= new HashMap();
 36 |         properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
 37 |         properties.put("group.id", "dec-esc-group-vib-calc");
 38 |         properties.put("enable.auto.commit", "true");
 39 |         properties.put("auto.commit.interval.ms", "1000");
 40 |         properties.put("auto.offset.reset", "earliest");
 41 |         properties.put("session.timeout.ms", "30000");
 42 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 43 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 44 |         properties.put("topic", "dec-vibration-test");
 45 |         //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
 46 |         // parse user parameters
 47 |         //ParameterTool parameterTool = ParameterTool.fromArgs(args);
 48 |         ParameterTool parameterTool = ParameterTool.fromMap(properties);
 49 | 
 50 |         DataStream<String> transction = env.addSource(new FlinkKafkaConsumer010<>(parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties()));
 51 |         //DataStream<String> transction1 = env.addSource(new FlinkKafkaConsumer010<String>("test3",new SimpleStringSchema(), props));
 52 | 
 53 |         //DataStream<Event> eventDataStream=transction.map((line)->parse(line));
 54 | 
 55 |         transction.rebalance().map(new MapFunction<String, Object>() {
 56 | 
 57 |             public String map(String value)throws IOException {
 58 | 
 59 |                 writeIntoHBase(value);
 60 |                 return value;
 61 |             }
 62 | 
 63 |         });
 64 | 
 65 | 
 66 |         transction.rebalance().map(new MapFunction<String, Object>() {
 67 | 
 68 |             @Override
 69 |             public String map(String value)throws IOException {
 70 | 
 71 |                 writeIntoHBase(value);
 72 |                 return value;
 73 |             }
 74 | 
 75 |         });
 76 | 
 77 |         //transction.writeAsText("/home/admin/log2");
 78 |         // transction.addSink(new HBaseOutputFormat();
 79 |         try {
 80 |             env.execute();
 81 |         } catch (Exception ex) {
 82 |             ex.printStackTrace();
 83 |         }
 84 |     }
 85 | 
 86 | 
 87 | 
 88 | 
 89 |     public static void writeIntoHBase(String value)throws IOException {
 90 |         HBaseLoader hBaseLoader= new HBaseLoader();
 91 |         String hBaseTable="dfdq_rhm_aly:f_turbine_event_data";
 92 |         String hBaseTableCF="f";
 93 |         Table table = null;
 94 |         // 常量
 95 | 
 96 | //        org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create();
 97 | //        config.set("hbase.zookeeper.property.clientPort", "2181");
 98 | //
 99 | //        config.set("hbase.zookeeper.quorum", "bigdata-master2.phmcluster.calabar,bigdata-master1.phmcluster.calabar,bigdata-slave1.phmcluster.calabar,bigdata-slave2.phmcluster.calabar,bigdata-slave3.phmcluster.calabar");
100 | //        config.set("zookeeper.znode.parent", "/hbase-unsecure");
101 | 
102 |         //config.set(TableOutputFormat.OUTPUT_TABLE, hbasetable);
103 | 
104 |         try {
105 |             table = HBaseUtils.getConnection().getTable(TableName.valueOf(hBaseTable));
106 |         } catch (Exception e) {
107 |             LOGGER.error("HBase连接建立出错",e);
108 |             e.printStackTrace();
109 |         }
110 | 
111 |         hBaseLoader.loadSpeed(table, hBaseTableCF,String.valueOf(value));
112 | 
113 | //        Connection c = ConnectionFactory.createConnection(config);
114 | //
115 | //        Admin admin = c.getAdmin();
116 | //        if(!admin.tableExists(tableName)){
117 | //            admin.createTable(new HTableDescriptor(tableName).addFamily(new HColumnDescriptor(columnFamily)));
118 | //        }
119 | //        Table t = c.getTable(tableName);
120 | //
121 | //        TimeStamp ts = new TimeStamp(new Date());
122 | //
123 | //        Date date = ts.getDate();
124 | //
125 | //        Put put = new Put(org.apache.hadoop.hbase.util.Bytes.toBytes(date.toString()));
126 | //
127 | //        put.addColumn(org.apache.hadoop.hbase.util.Bytes.toBytes(columnFamily), org.apache.hadoop.hbase.util.Bytes.toBytes("test"),
128 | //                org.apache.hadoop.hbase.util.Bytes.toBytes(m));
129 | //        t.put(put);
130 | //
131 | //        t.close();
132 | //        c.close();
133 |     }
134 | 
135 | 
136 | 
137 | 
138 | }
139 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/FlinkGHBaseByDataSet.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.hbase;
 2 | 
 3 | import org.apache.flink.addons.hbase.TableInputFormat;
 4 | import org.apache.flink.api.java.DataSet;
 5 | import org.apache.flink.api.java.ExecutionEnvironment;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.hadoop.hbase.client.Result;
 8 | import org.apache.hadoop.hbase.client.Scan;
 9 | import org.apache.hadoop.hbase.util.Bytes;
10 | 
11 | 
12 | public class FlinkGHBaseByDataSet {
13 | 
14 |     public static void main(String[] args) {
15 |         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
16 | 
17 |         DataSet<Tuple2<String, String>> hbaseInput =  env.createInput(new TableInputFormat<Tuple2<String, String>>(){
18 |             @Override
19 |             protected Scan getScanner() {
20 |                 Scan scan = new Scan();
21 |                 scan.setStartRow(Bytes.toBytes("lastSpeed1"));
22 |                 scan.setStopRow(Bytes.toBytes("lastSpeed4"));
23 |                 return scan;
24 |             }
25 |             @Override
26 |             protected String getTableName() {
27 |                 return "dfdq_rhm_aly:f_turbine_event_data";
28 |             }
29 |             @Override
30 |             protected Tuple2<String, String> mapResultToTuple(Result result) {
31 | 
32 |                 Tuple2<String,String> tup = new Tuple2<String,String>();
33 |                 tup.setField(Bytes.toString(result.getRow()),0);
34 |                 tup.setField(Bytes.toString(result.getValue("f".getBytes(), "slv".getBytes())), 1);
35 |                 return tup;
36 |             }
37 |         });
38 | 
39 |         try {
40 |             hbaseInput.print();
41 |         } catch (Exception e) {
42 |             e.printStackTrace();
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseLoader.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.hbase.loader;
 2 | 
 3 | import org.apache.commons.collections.CollectionUtils;
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.hbase.HBaseConfiguration;
 6 | import org.apache.hadoop.hbase.TableName;
 7 | import org.apache.hadoop.hbase.client.Connection;
 8 | import org.apache.hadoop.hbase.client.ConnectionFactory;
 9 | import org.apache.hadoop.hbase.client.Put;
10 | import org.apache.hadoop.hbase.client.Table;
11 | import org.apache.hadoop.hbase.util.Bytes;
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 | 
15 | import java.util.LinkedList;
16 | import java.util.List;
17 | 
18 | 
19 | public class HBaseLoader implements ILoader {
20 | 
21 |     private static final Logger LOGGER = LoggerFactory.getLogger(HBaseLoader.class);
22 |     @Override
23 |     public void loader() throws Exception {
24 | 
25 |         Table table = null;
26 |         try {
27 |             Configuration conf = HBaseConfiguration.create();
28 |             Connection conn = ConnectionFactory.createConnection(conf);
29 |             table = conn.getTable(TableName.valueOf("dfdq_rhm_aly:f_aly_point_data_test"));
30 |             Put put = new Put("kkk".getBytes());
31 |             put.addColumn(Bytes.toBytes("f"),Bytes.toBytes("t"),Bytes.toBytes(System.currentTimeMillis()));
32 |             table.put(put);
33 |         } catch (Exception e) {
34 |             throw new Exception("批量存储数据失败！", e);
35 |         } finally {
36 | //            table.close();
37 |         }
38 |     }
39 | 
40 |     public static void main(String[] args) throws Exception {
41 |         ILoader loader = new HBaseLoader();
42 |         loader.loader();
43 |     }
44 | 
45 | 
46 |     public void loadSpeed(Table table, String family, String value) {
47 |         long start = System.currentTimeMillis();
48 |         byte[] fam_b = Bytes.toBytes(family);
49 |         byte[] slv_b = Bytes.toBytes("slv");
50 | 
51 |         // 装入多行数据
52 |         List<Put> puts = new LinkedList<>();
53 |         Put put;
54 |         //for (VibSaveEntry aData : data) {
55 |         put = new Put(Bytes.toBytes("lastSpeed"+value));
56 |         put.addColumn(fam_b, slv_b, Bytes.toBytes(value));
57 |         puts.add(put);
58 |         //}
59 |         if (CollectionUtils.isNotEmpty(puts)) {
60 |             try {
61 |                 HBaseRetryingUtils.retrying(table, puts);
62 |             } catch (Exception e) {
63 |                 e.printStackTrace();
64 |             }
65 |         }
66 |         long end = System.currentTimeMillis();
67 |         LOGGER.debug("数据存储耗时："+(end-start));
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseRetryingUtils.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.hbase.loader;
 2 | 
 3 | import com.github.rholder.retry.Retryer;
 4 | import com.github.rholder.retry.RetryerBuilder;
 5 | import com.github.rholder.retry.StopStrategies;
 6 | import com.github.rholder.retry.WaitStrategies;
 7 | import org.apache.hadoop.hbase.client.Put;
 8 | import org.apache.hadoop.hbase.client.Table;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | 
12 | import java.io.Serializable;
13 | import java.util.List;
14 | import java.util.concurrent.TimeUnit;
15 | 
16 | /**
17 |  * <p/>
18 |  * <li>@author: jinyujie <yujie.jin@cdcalabar.com> </li>
19 |  * <li>Date: 2018/6/12 9:35</li>
20 |  * <li>@version: 2.0.0 </li>
21 |  * <li>@since JDK 1.8 </li>
22 |  */
23 | public class HBaseRetryingUtils implements Serializable {
24 |     /**
25 |      * 日志记录
26 |      */
27 |     private static final Logger LOGGER = LoggerFactory.getLogger(HBaseRetryingUtils.class);
28 | 
29 |     /**
30 |      * 重试发送数据到hbase
31 |      *
32 |      * @param table
33 |      * @param puts      List<Put>
34 |      * @throws Exception 连接异常
35 |      */
36 |     public static void retrying(Table table, List<Put> puts) throws Exception {
37 |         // 异常或者返回null都继续重试、每3秒重试一次、最多重试5次
38 |         Retryer<Boolean> retryer = RetryerBuilder.<Boolean>newBuilder()
39 |                 .retryIfException()
40 |                 .withWaitStrategy(WaitStrategies.fixedWait(500, TimeUnit.MILLISECONDS))
41 |                 .withStopStrategy(StopStrategies.stopAfterAttempt(6))
42 |                 .build();
43 | 
44 |         try {
45 |             retryer.call(() -> HBaseUtils.batchPuts(table, puts));
46 |         } catch (Exception e) {
47 |             LOGGER.error("多次重试发送数据到hbase失败！", e);
48 |             throw new Exception("多次重试发送数据到hbase失败！", e);
49 |         }
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/HBaseUtils.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.hbase.loader;
  2 | 
  3 | import org.apache.hadoop.conf.Configuration;
  4 | import org.apache.hadoop.hbase.*;
  5 | import org.apache.hadoop.hbase.client.*;
  6 | import org.apache.hadoop.hbase.io.compress.Compression;
  7 | import org.slf4j.Logger;
  8 | import org.slf4j.LoggerFactory;
  9 | 
 10 | import java.io.IOException;
 11 | import java.io.Serializable;
 12 | import java.util.Arrays;
 13 | import java.util.LinkedList;
 14 | import java.util.List;
 15 | import java.util.concurrent.ExecutorService;
 16 | import java.util.concurrent.Executors;
 17 | 
 18 | 
 19 | public class HBaseUtils implements Serializable {
 20 |     /**
 21 |      * 日志记录
 22 |      */
 23 |     private static final Logger LOGGER = LoggerFactory.getLogger(HBaseUtils.class);
 24 | 
 25 |     /**
 26 |      * Hbase 连接对象
 27 |      */
 28 |     private static Connection CONN;
 29 | 
 30 |     /**
 31 |      * 获取Hbase的连接
 32 |      *
 33 |      * @return Hbase connection
 34 |      * @throws Exception the exception
 35 |      */
 36 |     public synchronized static Connection getConnection() throws Exception {
 37 |         if (null == CONN || CONN.isClosed()) {
 38 |             try {
 39 |                 Configuration conf = HBaseConfiguration.create();
 40 |                 CONN = ConnectionFactory.createConnection(conf);
 41 |             } catch (IOException e) {
 42 |                 LOGGER.error("can not establish hbase connection.", e);
 43 |                 throw new Exception("can not establish hbase connection.", e);
 44 |             }
 45 |         }
 46 |         return CONN;
 47 |     }
 48 | 
 49 |     /**
 50 |      * 创建命名空间
 51 |      *
 52 |      * @param namespace 命名空间
 53 |      * @throws Exception Exception
 54 |      */
 55 |     public static void createNamespace(String namespace) throws Exception {
 56 |         Admin admin = null;
 57 |         try {
 58 |             admin = HBaseUtils.getConnection().getAdmin();
 59 |             if (HBaseUtils.namespaceIsExists(admin, namespace)) {
 60 |                 LOGGER.warn("The namespace " + namespace + " already exists !");
 61 |                 return;
 62 |             }
 63 |             admin.createNamespace(NamespaceDescriptor.create(namespace).build());
 64 |             LOGGER.info("create namespace " + namespace + " seccuss.");
 65 |         } finally {
 66 |             HBaseUtils.closeAdmin(admin);
 67 |         }
 68 |     }
 69 | 
 70 |     /**
 71 |      * 判断表是否存在
 72 |      *
 73 |      * @param tableName tableName
 74 |      * @return true:存在, false:不存在
 75 |      * @throws Exception Exception
 76 |      */
 77 |     public static boolean tableExists(String tableName) throws Exception {
 78 |         Admin admin = null;
 79 |         try {
 80 |             admin = HBaseUtils.getConnection().getAdmin();
 81 |             return admin.tableExists(TableName.valueOf(tableName));
 82 |         } finally {
 83 |             HBaseUtils.closeAdmin(admin);
 84 |         }
 85 |     }
 86 | 
 87 |     /**
 88 |      * 创建一个表，这个表没有任何region
 89 |      *
 90 |      * @param tableName 表名
 91 |      * @param cfs       列族
 92 |      * @throws Exception Exception
 93 |      */
 94 |     public static void createTable(String tableName, String... cfs) throws Exception {
 95 |         Admin admin = null;
 96 |         try {
 97 |             admin = HBaseUtils.getConnection().getAdmin();
 98 |             HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf(tableName));
 99 |             for (String family : cfs) {
100 |                 HColumnDescriptor hColumnDescriptor = new HColumnDescriptor(family);
101 |                 hColumnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY);
102 |                 hTableDescriptor.addFamily(hColumnDescriptor);
103 |                 hColumnDescriptor.setMaxVersions(3);
104 |             }
105 |             admin.createTable(hTableDescriptor);
106 |             LOGGER.info("create table " + tableName + " seccuss.");
107 |         } finally {
108 |             HBaseUtils.closeAdmin(admin);
109 |         }
110 |     }
111 | 
112 |     /**
113 |      * 清空表数据, 保留分区
114 |      *
115 |      * @param tableName 表名
116 |      * @throws Exception Exception
117 |      */
118 |     public static void truncateTable(String tableName) throws Exception {
119 |         Admin admin = null;
120 |         TableName tableNameObj = TableName.valueOf(tableName);
121 |         try {
122 |             admin = HBaseUtils.getConnection().getAdmin();
123 |             if (!admin.tableExists(tableNameObj)) {
124 |                 LOGGER.error("The table " + tableName + " does not exists!");
125 |                 return;
126 |             }
127 |             admin.disableTable(tableNameObj);
128 |             admin.truncateTable(tableNameObj, true);
129 |         } finally {
130 |             HBaseUtils.closeAdmin(admin);
131 |         }
132 |     }
133 | 
134 |     /**
135 |      * 获取hbase表中的列族字段
136 |      *
137 |      * @param tableName 表名
138 |      * @return 列族字段集合 family fields
139 |      * @throws Exception Exception
140 |      */
141 |     public static List<String> getFamilyFields(String tableName) throws Exception {
142 |         Admin admin = null;
143 |         List<String> families = new LinkedList<>();
144 |         try {
145 |             admin = HBaseUtils.getConnection().getAdmin();
146 |             HTableDescriptor hTableDesc = admin.getTableDescriptor(TableName.valueOf(tableName));
147 |             hTableDesc.getFamilies().forEach(desc -> families.add(desc.getNameAsString()));
148 |             return families;
149 |         } finally {
150 |             HBaseUtils.closeAdmin(admin);
151 |         }
152 |     }
153 | 
154 |     /**
155 |      * 追加新的列族
156 |      *
157 |      * @param tableName tableName
158 |      * @param families  families
159 |      * @throws Exception Exception
160 |      */
161 |     public static void addColumnFamily(String tableName, String... families) throws Exception {
162 |         Admin admin = null;
163 |         try {
164 |             admin = HBaseUtils.getConnection().getAdmin();
165 |             for (String family : families) {
166 |                 HColumnDescriptor columnDescriptor = new HColumnDescriptor(family);
167 |                 admin.addColumn(TableName.valueOf(tableName), columnDescriptor);
168 |             }
169 |         } finally {
170 |             HBaseUtils.closeAdmin(admin);
171 |         }
172 |     }
173 | 
174 | 
175 |     /*
176 | * 查询hbase表
177 | *
178 | * @tableName 表名
179 | */
180 |     public static ResultScanner getResult(Table table, Scan scan) throws Exception {
181 |         //Table table = null;
182 |         ResultScanner rs = null;
183 |         try {
184 |             //table = HbaseUtils.getConnection().getTable(TableName.valueOf(tableName));
185 |             rs = table.getScanner(scan);
186 |         } catch (Exception e) {
187 |             LOGGER.error("批量读取数据失败！", e);
188 |             throw new Exception("批量读取数据失败！", e);
189 |         } finally {
190 |             closeTable(table);
191 |         }
192 |         return rs;
193 |     }
194 | 
195 |     /**
196 |      * 批量插入数据
197 |      *
198 |      * @param table
199 |      * @param puts      List<Put>
200 |      * @throws Exception Exception
201 |      */
202 |     public static boolean batchPuts(Table table, List<Put> puts) throws Exception {
203 |         //Table table = null;
204 |         try {
205 |             //table = HBaseUtils.getConnection().getTable(TableName.valueOf(tableName));
206 |             table.put(puts);
207 |         } catch (Exception e) {
208 |             LOGGER.error("批量存储数据失败！", e);
209 |             throw new Exception("批量存储数据失败！", e);
210 |         } finally {
211 |             closeTable(table);
212 |         }
213 | 
214 |         return true;
215 |     }
216 | 
217 |     /**
218 |      * 多线程批量插入hbase
219 |      *
220 |      * @param tableName 表名
221 |      * @param puts      List<Put>
222 |      */
223 |     public static void batchPut(final String tableName, List<Put> puts) {
224 |         ExecutorService pool = Executors.newFixedThreadPool(5);
225 |         pool.submit(() -> {
226 |             BufferedMutator mutator = null;
227 |             try {
228 |                 Connection conn = HBaseUtils.getConnection();
229 |                 //HBaseUtils.enableTable(tableName);
230 |                 BufferedMutatorParams params = new BufferedMutatorParams(TableName.valueOf(tableName));
231 |                 params.writeBufferSize(5 * 1024 * 1024);
232 |                 mutator = conn.getBufferedMutator(params);
233 |                 mutator.mutate(puts);
234 |                 mutator.flush();
235 |             } catch (Exception e) {
236 |                 LOGGER.error("write data to hbase failed!", e);
237 |             } finally {
238 |                 try {
239 |                     assert null != mutator;
240 |                     mutator.close();
241 |                 } catch (IOException e) {
242 |                     LOGGER.error("close mutator failed", e);
243 |                 }
244 |             }
245 |         });
246 |     }
247 | 
248 |     /**
249 |      * 判断命名空间是否存在
250 |      *
251 |      * @param admin     Admin
252 |      * @param namespace 命名空间
253 |      * @return true:存在、false:不存在
254 |      * @throws Exception Exception
255 |      */
256 |     private static boolean namespaceIsExists(Admin admin, String namespace) throws Exception {
257 |         NamespaceDescriptor[] namespaceDescs = admin.listNamespaceDescriptors();
258 |         List<String> ns = new LinkedList<>();
259 |         Arrays.stream(namespaceDescs).forEach(namespaceDesc -> ns.add(namespaceDesc.getName()));
260 | 
261 |         return ns.contains(namespace);
262 |     }
263 | 
264 |     /**
265 |      * 启用表, 若表状态为disable使其状态变为enable
266 |      *
267 |      * @param tableName 表名
268 |      * @throws Exception Exception
269 |      */
270 |     private static void enableTable(String tableName) throws Exception {
271 |         // 若表是disable状态, 则启用表
272 |         Admin admin = HBaseUtils.getConnection().getAdmin();
273 |         if (admin.isTableAvailable(TableName.valueOf(tableName))) {
274 |             LOGGER.info("The table " + tableName + " is available !");
275 |             return;
276 |         }
277 |         admin.enableTable(TableName.valueOf(tableName));
278 |         LOGGER.info("enable talbe " + tableName + " seccuss.");
279 |     }
280 | 
281 |     /**
282 |      * 刷新表空间
283 |      *
284 |      * @param tableName tableName
285 |      * @throws Exception Exception
286 |      */
287 |     public static void flushTable(String tableName) throws Exception {
288 |         Admin admin = null;
289 |         try {
290 |             admin = HBaseUtils.getConnection().getAdmin();
291 |             admin.flush(TableName.valueOf(tableName));
292 |         } catch (Exception e) {
293 |             throw new Exception(e);
294 |         } finally {
295 |             HBaseUtils.closeAdmin(admin);
296 |         }
297 |     }
298 | 
299 |     /**
300 |      * 关闭hbase表管理对象（DDL）的Admin
301 |      *
302 |      * @param admin hbase表管理对象
303 |      */
304 |     public static void closeAdmin(Admin admin) {
305 |         if (null != admin) {
306 |             try {
307 |                 admin.close();
308 |             } catch (IOException e) {
309 |                 LOGGER.error("close connection failure !", e);
310 |             }
311 |         }
312 |     }
313 | 
314 |     /**
315 |      * 关闭table
316 |      *
317 |      * @param table 表对象
318 |      */
319 |     public static void closeTable(Table table) {
320 |         if (null != table) {
321 |             try {
322 |                 table.close();
323 |             } catch (IOException e) {
324 |                 LOGGER.error("close table failure !", e);
325 |             }
326 |         }
327 |     }
328 | 
329 |     /**
330 |      * 关闭hbase连接
331 |      */
332 |     public static void closeConn() {
333 |         if (null != CONN) {
334 |             try {
335 |                 CONN.close();
336 |             } catch (IOException e) {
337 |                 LOGGER.error("close connection failure !", e);
338 |             }
339 |         }
340 |     }
341 | }
342 | 
343 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/hbase/loader/ILoader.java:
--------------------------------------------------------------------------------
1 | package com.z.flinkStreamOptimizatiion.hbase.loader;
2 | 
3 | public interface ILoader {
4 | 
5 |     void loader() throws Exception;
6 | }
7 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/kafka/ReadFromKafka.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.kafka;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | 
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.java.utils.ParameterTool;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
 9 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
10 | 
11 | import java.util.HashMap;
12 | import java.util.Map;
13 | 
14 | 
15 | /**
16 |  * <p/>
17 |  * <li>@author:jyj019 </li>
18 |  * <li>Date: 2018/9/17 14:50</li>
19 |  * <li>@version: 2.0.0 </li>
20 |  * <li>@since JDK 1.8 </li>
21 |  */
22 | 
23 | 
24 | public class ReadFromKafka {
25 | 
26 |     public static void main(String[] args) throws Exception {
27 |         // create execution environment
28 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
29 | 
30 |         Map properties= new HashMap();
31 |         properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
32 |         properties.put("group.id", "dec-esc-group-vib-calc");
33 |         properties.put("enable.auto.commit", "true");
34 |         properties.put("auto.commit.interval.ms", "1000");
35 |         properties.put("auto.offset.reset", "earliest");
36 |         properties.put("session.timeout.ms", "30000");
37 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
38 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
39 |         properties.put("topic", "dec-vibration-test");
40 |         //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
41 |         // parse user parameters
42 |         //ParameterTool parameterTool = ParameterTool.fromArgs(args);
43 |         ParameterTool parameterTool = ParameterTool.fromMap(properties);
44 | 
45 |         FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010(
46 |                          parameterTool.getRequired("topic"), new SimpleStringSchema(), parameterTool.getProperties());
47 | 
48 |       //  consumer010.setStartFromEarliest();
49 | 
50 |         DataStream<String> messageStream = env
51 |                 .addSource(consumer010);
52 | 
53 |         // print() will write the contents of the stream to the TaskManager's standard out stream
54 |         // the rebelance call is causing a repartitioning of the data so that all machines
55 |         // see the messages (for example in cases when "num kafka partitions" < "num flink operators"
56 |         messageStream.rebalance().map(new MapFunction<String, String>() {
57 |             private static final long serialVersionUID = 1L;
58 | 
59 |             @Override
60 |             public String map(String value) throws Exception {
61 |                 return value;
62 | 
63 |             }
64 |         });
65 | 
66 | 
67 |         messageStream.print();
68 | 
69 |         env.execute();
70 |     }
71 | }


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/kafka/WriteIntoKafka.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.kafka;
  2 | 
  3 | import org.apache.commons.lang3.RandomUtils;
  4 | import org.apache.flink.api.common.functions.MapFunction;
  5 | import org.apache.flink.api.java.utils.ParameterTool;
  6 | import org.apache.flink.streaming.api.datastream.DataStream;
  7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  8 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
  9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010;
 10 | 
 11 | import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
 12 | 
 13 | import java.util.HashMap;
 14 | import java.util.Map;
 15 | 
 16 | /**
 17 |  * <p/>
 18 |  * <li>@author: jyj019 </li>
 19 |  * <li>Date: 2018/9/17 15:38</li>
 20 |  * <li>@version: 2.0.0 </li>
 21 |  * <li>@since JDK 1.8 </li>
 22 |  */
 23 | public class WriteIntoKafka {
 24 |     public static void main(String[] args) throws Exception {
 25 |         // create execution environment
 26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 27 | 
 28 |         Map properties= new HashMap();
 29 |         properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
 30 |         properties.put("group.id", "t10");
 31 |         properties.put("enable.auto.commit", "false");
 32 |         properties.put("auto.commit.interval.ms", "1000");
 33 |         properties.put("auto.offset.reset", "earliest");
 34 |         properties.put("session.timeout.ms", "30000");
 35 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 36 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
 37 |         properties.put("topic", "kks-topic-FFT");
 38 |         //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
 39 |         // parse user parameters
 40 |         //ParameterTool parameterTool = ParameterTool.fromArgs(args);
 41 |         ParameterTool parameterTool = ParameterTool.fromMap(properties);
 42 | 
 43 |         // add a simple source which is writing some strings
 44 |         DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());
 45 | 
 46 |         // write stream to Kafka
 47 |         messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"),
 48 |                 parameterTool.getRequired("topic"),
 49 |                 new SimpleStringSchema()));
 50 | 
 51 |         messageStream.rebalance().map(new MapFunction<String, String>() {
 52 |             private static final long serialVersionUID = 1L;
 53 | 
 54 |             @Override
 55 |             public String map(String value) throws Exception {
 56 |                 return value;
 57 |             }
 58 |         });
 59 | 
 60 |         messageStream.print();
 61 | 
 62 |         env.execute();
 63 |     }
 64 | 
 65 |     public static class SimpleStringGenerator implements SourceFunction<String> {
 66 |         private static final long serialVersionUID = 2174904787118597072L;
 67 |         boolean running = true;
 68 | 
 69 |         @Override
 70 |         public void run(SourceContext<String> ctx) throws Exception {
 71 |             //int i=0;
 72 |             while(running) {
 73 |                 //i++;
 74 |                 ctx.collect(prouderJson());
 75 |                 //System.out.println(prouderJson());
 76 | 
 77 |             }
 78 |         }
 79 | 
 80 |         @Override
 81 |         public void cancel() {
 82 |             running = false;
 83 |         }
 84 |     }
 85 | 
 86 |     public static String prouderJson() throws Exception {
 87 |         //  long start = System.currentTimeMillis();
 88 |         Integer value;
 89 |         String[] channels = new String[]{"000000007946", "000000007947","000000007948","000000007949","000000007950","000000007951","000000007952","000000007953",
 90 |                 "000000007954","000000007955","000000007956","000000007957","000000007958","000000007959","000000007960","000000007961","000000007966",
 91 |                 "000000007967","000000007968","000000007969","000000007970","000000007971","000000007986","000000007987"};
 92 |         StringBuffer json = new StringBuffer();
 93 |         json.append("{\n" + "    \"header\": {\n" + "        \"head\": \"EB90EB90EB90\",\n" + "        \"plant_code\": 1,\n" + "        " +
 94 |                 "\"set_code\": 1,\n" + "        \"device_type\": 1,\n" + "        \"time\": "+System.currentTimeMillis() +",\n"
 95 |                 + "        \"data_length\": 4999\n" + "    },\n" + "    \"base_info\": {\n" + "        \"work_identity\": 1,\n" +
 96 |                 "        \"sample_points_per_cycle\": 1024,\n" + "        \"sampling_period\": 8,\n" + "        \"sampling_number\": 1024,\n" +
 97 |                 "        \"rotate_speed\": "+randmomUtils1(RandomUtils.nextInt(0, 3)) +",\n" + "        \"fast_variable_channels\": 24\n" + "    },\n \"channel\":{");
 98 |         for (int i=0;i<23;i ++) {
 99 |             json.append("\"" +  channels[i] + "\":{\"peak\":" + randmomUtils2(RandomUtils.nextInt(0, 10)) + ",\n"
100 |                     + "\"phase_1x\":" + RandomUtils.nextFloat(0, 500) + ",\n"
101 |                     + "\"amplitude_1x\":" + (RandomUtils.nextFloat( 0, (float) 6.28)-3.14) + ",\n"
102 |                     + "\"phase_2x\":" + RandomUtils.nextFloat(0, 50) + ",\n"
103 |                     + "\"amplitude_2x\":" + (RandomUtils.nextFloat(0, (float) 6.28)-3.14) + ",\n"
104 |                     + "\"half_amplitud\":" + RandomUtils.nextFloat(0, 50) + ",\n"
105 |                     + "\"voltage\":" + RandomUtils.nextFloat(0, 5) + ",\n"
106 |                     +"\"waveform_data\":[");
107 |             for(int j=1;j<1024;j ++){
108 |                 value = (int) (5 * Math.sin(360 / 32 * j) + (8 * (Math.sin((360 / 64) * j))));
109 |                 json.append(value+",");
110 |             }
111 |             value=(int)(5*(Math.sin((360/32)*1024))+8*(Math.sin((360/64)*1024)));
112 |             json.append(value+"]},\n");
113 |         }
114 |         json.append("\""+channels[23] +"\":{\"peak\":" + randmomUtils1(RandomUtils.nextInt(0, 10)) + ",\n"
115 |                 + "\"phase_1x\":" + RandomUtils.nextFloat(0, 500) + ",\n"
116 |                 + "\"amplitude_1x\":" + (RandomUtils.nextFloat( 0, (float) 6.28)-3.14) + ",\n"
117 |                 + "\"phase_2x\":" + RandomUtils.nextFloat(0, 50) + ",\n"
118 |                 + "\"amplitude_2x\":" + (RandomUtils.nextFloat(0, (float) 6.28)-3.14) + ",\n"
119 |                 + "\"half_amplitud\":" + RandomUtils.nextFloat(0, 50) + ",\n"
120 |                 + "\"voltage\":" + RandomUtils.nextFloat(0, 5) + ",\n"
121 |                 +"\"waveform_data\":[");
122 |         for(int j=1;j<1024;j ++){
123 |             value=(int)(5*(Math.sin((360/32)*j))+8*(Math.sin((360/64)*j)));
124 |             json.append(value+",");
125 |         }
126 |         value=(int)(5*(Math.sin((360/32)*1024))+8*(Math.sin((360/64)*1024)));
127 |         json.append(value+"]}}}\n");
128 |         //  long end = System.currentTimeMillis();
129 |         // LOGGER.info("制造数据，耗时：-->"+(start-end) );
130 |         return String.valueOf(json);
131 |     }
132 | 
133 |     public static Float randmomUtils1(int i) throws Exception{
134 |         Float value=RandomUtils.nextFloat(2950, 3080);
135 |         switch (i){
136 |             case 1:
137 |                 value=RandomUtils.nextFloat(10, 90);
138 |                 break;
139 |             case 2:
140 |                 value=RandomUtils.nextFloat(0, 80);
141 |                 break;
142 |         }
143 |         return value;
144 |     }
145 | 
146 |     public static Float randmomUtils2(int i) throws Exception{
147 |         Float value=RandomUtils.nextFloat(290, 300);
148 |         switch (i){
149 |             case 1:
150 |                 value=RandomUtils.nextFloat(0, 200);
151 |                 break;
152 |         }
153 |         return value;
154 |     }
155 | 
156 | //    public static class SimpleStringSchema implements DeserializationSchema<String>, SerializationSchema<String, byte[]> {
157 | //        private static final long serialVersionUID = 1L;
158 | //
159 | //        public SimpleStringSchema() {
160 | //        }
161 | //
162 | //        public String deserialize(byte[] message) {
163 | //            return new String(message);
164 | //        }
165 | //
166 | //        public boolean isEndOfStream(String nextElement) {
167 | //            return false;
168 | //        }
169 | //
170 | //        public byte[] serialize(String element) {
171 | //            return element.getBytes();
172 | //        }
173 | //
174 | //        public TypeInformation<String> getProducedType() {
175 | //            return TypeExtractor.getForClass(String.class);
176 | //        }
177 | //    }
178 | }
179 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/metricsOp/gaugesOp.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.metricsOp;
 2 | 
 3 | import com.codahale.metrics.ConsoleReporter;
 4 | import com.codahale.metrics.Gauge;
 5 | import com.codahale.metrics.JmxReporter;
 6 | import com.codahale.metrics.MetricRegistry;
 7 | 
 8 | import java.util.Queue;
 9 | import java.util.concurrent.LinkedBlockingDeque;
10 | import java.util.concurrent.TimeUnit;
11 | 
12 | /**
13 |  * Gauges是一个简单的计量，一般用来统计瞬间状态的数量信息，比如系统中处于pending状态的job
14 |  * 这里是测试Gauges，实时统计pending状态的job个数
15 |  */
16 | public class gaugesOp {
17 |     /**
18 |      * 实例化一个registry，最核心的一个模块，相当于应用程序的metrics系统的容器，维护一个Map
19 |      */
20 |     private static final MetricRegistry metrics = new MetricRegistry();
21 |     private static Queue<String> queue = new LinkedBlockingDeque<String>();
22 |     /**
23 |      * 在控制台上打印输出
24 |      */
25 |     private static ConsoleReporter reporter = ConsoleReporter.forRegistry(metrics).build();
26 |     public static void main(String[] args) throws InterruptedException {
27 |         reporter.start(3, TimeUnit.SECONDS);
28 | 
29 |         //实例化一个Gauge
30 |         Gauge<Integer> gauge = new Gauge<Integer>() {
31 |             @Override
32 |             public Integer getValue() {
33 |                 return queue.size();
34 |             }
35 |         };
36 | 
37 |         //注册到容器中
38 |         metrics.register(MetricRegistry.name(gaugesOp.class, "pending-job", "size"), gauge);
39 | 
40 |         //测试JMX
41 |         JmxReporter jmxReporter = JmxReporter.forRegistry(metrics).build();
42 |         jmxReporter.start();
43 | 
44 |         //模拟数据
45 |         for (int i=0; i< 20; i++) {
46 |             queue.add("a");
47 |             Thread.sleep(1000);
48 |         }
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/MessageCollector.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.rpc.client;
  2 | 
  3 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput;
  4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageOutput;
  5 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageRegistry;
  6 | import io.netty.channel.ChannelHandler;
  7 | import io.netty.channel.ChannelHandlerContext;
  8 | import io.netty.channel.ChannelInboundHandlerAdapter;
  9 | import org.slf4j.Logger;
 10 | import org.slf4j.LoggerFactory;
 11 | 
 12 | import java.util.concurrent.ConcurrentHashMap;
 13 | import java.util.concurrent.ConcurrentMap;
 14 | import java.util.concurrent.TimeUnit;
 15 | 
 16 | @ChannelHandler.Sharable
 17 | public class MessageCollector extends ChannelInboundHandlerAdapter {
 18 | 
 19 |     private final static Logger LOG = LoggerFactory.getLogger(MessageCollector.class);
 20 |     private MessageRegistry registry;
 21 |     private RPCClient client;
 22 |     private ChannelHandlerContext context;
 23 |     private Throwable ConnectionClosed = new Exception("rpc connection not active error");
 24 |     private ConcurrentMap<String, RpcFuture<?>> pendingTasks = new ConcurrentHashMap<>();
 25 | 
 26 | 
 27 |     public MessageCollector(MessageRegistry registry, RPCClient client) {
 28 |         this.registry = registry;
 29 |         this.client = client;
 30 |     }
 31 | 
 32 |     @Override
 33 |     public void channelActive(ChannelHandlerContext ctx) throws Exception {
 34 |         this.context = ctx;
 35 |     }
 36 | 
 37 |     @Override
 38 |     public void channelInactive(ChannelHandlerContext ctx) throws Exception {
 39 |         this.context = null;
 40 |         pendingTasks.forEach((__, future) -> {
 41 |             future.fail(ConnectionClosed);
 42 |         });
 43 |         pendingTasks.clear();
 44 |         // 尝试重连
 45 |         ctx.channel().eventLoop().schedule(() -> {
 46 |             client.reconnect();
 47 |         }, 1, TimeUnit.SECONDS);
 48 |     }
 49 | 
 50 |     public <T> RpcFuture<T> send(MessageOutput output) {
 51 |         ChannelHandlerContext ctx = context;
 52 |         RpcFuture<T> future = new RpcFuture<T>();
 53 |         if (ctx != null) {
 54 |             ctx.channel().eventLoop().execute(() -> {
 55 |                 pendingTasks.put(output.getRequestId(), future);
 56 |                 ctx.writeAndFlush(output);
 57 |             });
 58 |         } else {
 59 |             future.fail(ConnectionClosed);
 60 |         }
 61 |         return future;
 62 |     }
 63 | 
 64 |    //客户端拿到服务端返回的数据
 65 |     @Override
 66 |     public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
 67 |         if (!(msg instanceof MessageInput)) {
 68 |             return;
 69 |         }
 70 |         MessageInput input = (MessageInput) msg;
 71 |         // 业务逻辑在这里
 72 |         Class<?> clazz = registry.get(input.getType());
 73 |         if (clazz == null) {
 74 |             LOG.error("unrecognized msg type {}", input.getType());
 75 |             return;
 76 |         }
 77 |         Object o = input.getPayload(clazz);
 78 |         @SuppressWarnings("unchecked")
 79 |         RpcFuture<Object> future = (RpcFuture<Object>) pendingTasks.remove(input.getRequestId());
 80 |         if (future == null) {
 81 |             LOG.error("future not found with type {}", input.getType());
 82 |             return;
 83 |         }
 84 |         System.out.println("客户端拿到服务端返回的数据: " + o);
 85 |         future.success(o);
 86 |     }
 87 | 
 88 | 
 89 |     @Override
 90 |     public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
 91 | 
 92 |     }
 93 | 
 94 |     public void close() {
 95 |         ChannelHandlerContext ctx = context;
 96 |         if (ctx != null) {
 97 |             ctx.close();
 98 |         }
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RPCClient.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.rpc.client;
  2 | 
  3 | import com.z.flinkStreamOptimizatiion.rpc.common.*;
  4 | import io.netty.bootstrap.Bootstrap;
  5 | import io.netty.channel.ChannelInitializer;
  6 | import io.netty.channel.ChannelOption;
  7 | import io.netty.channel.ChannelPipeline;
  8 | import io.netty.channel.EventLoopGroup;
  9 | import io.netty.channel.nio.NioEventLoopGroup;
 10 | import io.netty.channel.socket.SocketChannel;
 11 | import io.netty.channel.socket.nio.NioSocketChannel;
 12 | import io.netty.handler.timeout.ReadTimeoutHandler;
 13 | import org.slf4j.Logger;
 14 | import org.slf4j.LoggerFactory;
 15 | 
 16 | import java.io.Serializable;
 17 | import java.util.concurrent.ExecutionException;
 18 | import java.util.concurrent.TimeUnit;
 19 | 
 20 | //连接管理 读写消息 链接重连
 21 | public class RPCClient implements Serializable {
 22 |     private final static Logger LOG = LoggerFactory.getLogger(RPCClient.class);
 23 | 
 24 |     private String ip;
 25 |     private int port;
 26 |     private Bootstrap bootstrap;
 27 |     private EventLoopGroup group;
 28 |     private MessageCollector collector;
 29 |     private boolean started;
 30 |     private boolean stopped;
 31 |     private MessageRegistry registry = new MessageRegistry();
 32 | 
 33 |     public RPCClient(String ip, int port) {
 34 |         this.ip = ip;
 35 |         this.port = port;
 36 |         this.init();
 37 |     }
 38 | 
 39 |     /**
 40 |      *
 41 |      * @param type
 42 |      * @param rspClass 服务端返回结果的类型
 43 |      * @return
 44 |      */
 45 |     public RPCClient rpc(String type, Class<?> rspClass) {
 46 |         //rpc响应类型的注册快速入口
 47 |         registry.register(type, rspClass);
 48 |         return this;
 49 |     }
 50 | 
 51 |     public <T> RpcFuture<T> sendAsync(String type, Object payload) {
 52 |         if (!started) {
 53 |             connect();
 54 |             started = true;
 55 |         }
 56 |         String requestId = RequestId.next();
 57 |         MessageOutput output = new MessageOutput(requestId, type, payload);
 58 |         return collector.send(output);
 59 |     }
 60 | 
 61 |     public <T> T send(String type, Object payload) {
 62 |         //普通rpc请求,正常获取相应
 63 |         RpcFuture<T> future = sendAsync(type, payload);
 64 |         try {
 65 |             return future.get();
 66 |         } catch (InterruptedException | ExecutionException e) {
 67 |             throw new RPCException(e);
 68 |         }
 69 |     }
 70 | 
 71 |     public void init() {
 72 |         bootstrap = new Bootstrap();
 73 |         group = new NioEventLoopGroup(1);
 74 |         bootstrap.group(group);
 75 |         MessageEncoder encoder = new MessageEncoder();
 76 |         collector = new MessageCollector(registry, this);
 77 |         bootstrap.channel(NioSocketChannel.class).handler(new ChannelInitializer<SocketChannel>() {
 78 | 
 79 |             @Override
 80 |             protected void initChannel(SocketChannel ch) throws Exception {
 81 |                 ChannelPipeline pipe = ch.pipeline();
 82 |                 pipe.addLast(new ReadTimeoutHandler(60));
 83 |                 pipe.addLast(new MessageDecoder());
 84 |                 pipe.addLast(encoder);
 85 |                 pipe.addLast(collector);
 86 |             }
 87 | 
 88 |         });
 89 |         bootstrap.option(ChannelOption.TCP_NODELAY, true).option(ChannelOption.SO_KEEPALIVE, true);
 90 |     }
 91 | 
 92 |     public void connect() {
 93 |         bootstrap.connect(ip, port).syncUninterruptibly();
 94 |     }
 95 | 
 96 |     public void reconnect() {
 97 |         if (stopped) {
 98 |             return;
 99 |         }
100 |         bootstrap.connect(ip, port).addListener(future -> {
101 |             if (future.isSuccess()) {
102 |                 return;
103 |             }
104 |             if (!stopped) {
105 |                 group.schedule(() -> {
106 |                     reconnect();
107 |                 }, 1, TimeUnit.SECONDS);
108 |             }
109 |             LOG.error("connect {}:{} failure", ip, port, future.cause());
110 |         });
111 |     }
112 | 
113 |     public void close() {
114 |         stopped = true;
115 |         collector.close();
116 |         group.shutdownGracefully(0, 5000, TimeUnit.SECONDS);
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RPCException.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.client;
 2 | //定义客户端异常,用于同一抛出RPC错误
 3 | public class RPCException extends RuntimeException {
 4 | 
 5 | 	private static final long serialVersionUID = 1L;
 6 | 
 7 | 	public RPCException(String message, Throwable cause) {
 8 | 		super(message, cause);
 9 | 	}
10 | 
11 | 	public RPCException(String message) {
12 | 		super(message);
13 | 	}
14 | 	
15 | 	public RPCException(Throwable cause) {
16 | 		super(cause);
17 | 	}
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/client/RpcFuture.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.client;
 2 | 
 3 | import org.apache.flink.runtime.executiongraph.Execution;
 4 | 
 5 | import java.util.concurrent.*;
 6 | 
 7 | public class RpcFuture<T> implements Future<T> {
 8 | 
 9 |     private T result;
10 |     private Throwable error;
11 |     private CountDownLatch latch = new CountDownLatch(1);
12 | 
13 |     @Override
14 |     public boolean cancel(boolean mayInterruptIfRunning) {
15 |         return false;
16 |     }
17 | 
18 |     @Override
19 |     public boolean isCancelled() {
20 |         return false;
21 |     }
22 | 
23 |     @Override
24 |     public boolean isDone() {
25 |         return result != null || error != null;
26 |     }
27 | 
28 |     @Override
29 |     public T get() throws InterruptedException, ExecutionException {
30 |         latch.await();
31 |         if (error != null) {
32 |             throw new ExecutionException(error);
33 |         }
34 |         return result;
35 |     }
36 | 
37 |     public void success(T result) {
38 |         this.result = result;
39 |         latch.countDown();
40 |     }
41 | 
42 |     public void fail(Throwable error) {
43 |         this.error = error;
44 |         latch.countDown();
45 |     }
46 | 
47 |     @Override
48 |     public T get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
49 |         latch.await(timeout, unit);
50 |         if (error != null) {
51 |             throw new ExecutionException(error);
52 |         }
53 |         return result;
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/Charsets.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import java.nio.charset.Charset;
 4 | 
 5 | public class Charsets {
 6 | 
 7 | 	public static Charset UTF8 = Charset.forName("utf8");
 8 | 	
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/IMessageHandler.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | 
 4 | import io.netty.channel.ChannelHandlerContext;
 5 | 
 6 | //消息处理器接口，每个自定义服务必须实现handle方法
 7 | @FunctionalInterface
 8 | public interface IMessageHandler<T> {
 9 |     void handle(ChannelHandlerContext ctx, String requestId, T message);
10 | }
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageDecoder.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import io.netty.buffer.ByteBuf;
 4 | import io.netty.channel.ChannelHandlerContext;
 5 | import io.netty.handler.codec.DecoderException;
 6 | import io.netty.handler.codec.ReplayingDecoder;
 7 | 
 8 | import java.util.List;
 9 | 
10 | //消息解码器
11 | //使用Netty的ReplayingDecoder实现。简单起见，这里没有使用checkpoint去优化性能了
12 | public class MessageDecoder extends ReplayingDecoder<MessageInput> {
13 | 
14 | 	@Override
15 | 	protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
16 | 		String requestId = readStr(in);
17 | 		String type = readStr(in);
18 | 		String content = readStr(in);
19 | 		out.add(new MessageInput(type, requestId, content));
20 | 	}
21 | 
22 | 	private String readStr(ByteBuf in) {
23 | 		int len = in.readInt();
24 | 		if (len < 0 || len > (1 << 20)) {
25 | 			throw new DecoderException("string too long len=" + len);
26 | 		}
27 | 		byte[] bytes = new byte[len];
28 | 		in.readBytes(bytes);
29 | 		return new String(bytes, Charsets.UTF8);
30 | 	}
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageEncoder.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import io.netty.buffer.ByteBuf;
 5 | import io.netty.buffer.PooledByteBufAllocator;
 6 | import io.netty.channel.ChannelHandler;
 7 | import io.netty.channel.ChannelHandlerContext;
 8 | import io.netty.handler.codec.MessageToMessageEncoder;
 9 | 
10 | import java.util.List;
11 | 
12 | //消息编码器
13 | @ChannelHandler.Sharable
14 | public class MessageEncoder extends MessageToMessageEncoder<MessageOutput> {
15 |     @Override
16 |     protected void encode(ChannelHandlerContext ctx, MessageOutput msg, List<Object> out) throws Exception {
17 |         ByteBuf buf = PooledByteBufAllocator.DEFAULT.directBuffer();
18 |         writeStr(buf, msg.getRequestId());
19 |         writeStr(buf, msg.getType());
20 |         writeStr(buf, JSON.toJSONString(msg.getPayload()));
21 |         out.add(buf);
22 |     }
23 |     private void writeStr(ByteBuf buf, String s) {
24 |         buf.writeInt(s.length());
25 |         buf.writeBytes(s.getBytes(Charsets.UTF8));
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageHandlers.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import com.google.common.collect.Maps;
 4 | 
 5 | import java.util.Map;
 6 | 
 7 | public class MessageHandlers {
 8 |     private Map<String, IMessageHandler<?>> handlers = Maps.newHashMap();
 9 |     private IMessageHandler<MessageInput> defaultHandler;
10 | 
11 |     public void register(String type, IMessageHandler<?> handler) {
12 |         handlers.put(type, handler);
13 |     }
14 | 
15 |     public MessageHandlers defaultHandler(IMessageHandler<MessageInput> defaultHandler) {
16 |         this.defaultHandler = defaultHandler;
17 |         return this;
18 |     }
19 | 
20 |     public IMessageHandler<MessageInput> defaultHandler() {
21 |         return defaultHandler;
22 |     }
23 | 
24 |     public IMessageHandler<?> get(String type) {
25 |         IMessageHandler<?> handler = handlers.get(type);
26 |         return handler;
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageInput.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | 
 5 | //定义消息输入输出格式，消息类型、消息唯一ID和消息的json序列化字符串内容。
 6 | // 消息唯一ID是用来客户端验证服务器请求和响应是否匹配。
 7 | public class MessageInput {
 8 | 	private String type;
 9 | 	private String requestId;
10 | 	private String payload;
11 | 
12 | 	public MessageInput(String type, String requestId, String payload) {
13 | 		this.type = type;
14 | 		this.requestId = requestId;
15 | 		this.payload = payload;
16 | 	}
17 | 
18 | 	public String getType() {
19 | 		return type;
20 | 	}
21 | 
22 | 	public String getRequestId() {
23 | 		return requestId;
24 | 	}
25 | 	//因为我们想直接拿到对象,所以要提供对象的类型参数
26 | 	public <T> T getPayload(Class<T> clazz) {
27 | 		if (payload == null) {
28 | 			return null;
29 | 		}
30 | 		return JSON.parseObject(payload, clazz);
31 | 	}
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageOutput.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | public class MessageOutput {
 4 |     private String requestId;
 5 |     private String type;
 6 |     private Object payload;
 7 | 
 8 |     public MessageOutput(String requestId, String type, Object payload) {
 9 |         this.requestId = requestId;
10 |         this.type = type;
11 |         this.payload = payload;
12 |     }
13 | 
14 |     public String getType() {
15 |         return this.type;
16 |     }
17 | 
18 |     public String getRequestId() {
19 |         return requestId;
20 |     }
21 | 
22 |     public Object getPayload() {
23 |         return payload;
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/MessageRegistry.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | 
 4 | import com.google.common.collect.Maps;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | public class MessageRegistry {
 9 |     private Map<String, Class<?>> clazzes = Maps.newHashMap();
10 |     //type是命令字，clazz是服务端返回数据的类型
11 |     public void register(String type, Class<?> clazz) {
12 |         clazzes.put(type, clazz);
13 |     }
14 |     public Class<?> get(String type) {
15 |         return clazzes.get(type);
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/common/RequestId.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.common;
 2 | 
 3 | import java.util.UUID;
 4 | 
 5 | public class RequestId {
 6 | 	//简单UUID 64
 7 | 	public static String next() {
 8 | 		return UUID.randomUUID().toString();
 9 | 	}
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoClient.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.demo;
 2 | 
 3 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCClient;
 4 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCException;
 5 | 
 6 | import java.io.Serializable;
 7 | 
 8 | //RPC客户端
 9 | public class DemoClient implements Serializable {
10 | 
11 |     private RPCClient client;
12 | 
13 |     public DemoClient(RPCClient client) {
14 |         this.client = client;
15 |         this.client.rpc("fib_res", Long.class);
16 |     }
17 | 
18 |     public long fib(int n) {
19 |         return (Long) client.send("fib", n);
20 |     }
21 | 
22 |     //RPC客户端要链接远程IP端口，并注册服务输出类(RPC响应类)，
23 |     // 然后分别调用20次斐波那契服务和指数服务，输出结果
24 | 
25 |     public static void main(String[] args) throws InterruptedException {
26 |         RPCClient client = new RPCClient("localhost", 8888);
27 |         DemoClient demo = new DemoClient(client);
28 |         for (int i = 0; i < 30; i++) {
29 |             try {
30 |                 System.out.printf("fib(%d) = %d\n", i, demo.fib(i));
31 |                 Thread.sleep(100);
32 |             } catch (RPCException e) {
33 |                 i--; // retry
34 |             }
35 |         }
36 |         Thread.sleep(3000);
37 | 
38 | 
39 |         client.close();
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/demo/DemoServer.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.demo;
 2 | 
 3 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler;
 4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageOutput;
 5 | import com.z.flinkStreamOptimizatiion.rpc.server.RPCServer;
 6 | import io.netty.channel.ChannelHandlerContext;
 7 | 
 8 | import java.util.ArrayList;
 9 | import java.util.List;
10 | 
11 | 
12 | //斐波那契和指数计算处理
13 | class FibRequestHandler implements IMessageHandler<Integer> {
14 | 
15 |     private List<Long> fibs = new ArrayList<>();
16 | 
17 |     {
18 |         fibs.add(1L); // fib(0) = 1
19 |         fibs.add(1L); // fib(1) = 1
20 |     }
21 | 
22 |     @Override
23 |     public void handle(ChannelHandlerContext ctx, String requestId, Integer n) {
24 |         for (int i = fibs.size(); i < n + 1; i++) {
25 |             long value = fibs.get(i - 2) + fibs.get(i - 1);
26 |             fibs.add(value);
27 |         }
28 |         //响应输出
29 |         ctx.writeAndFlush(new MessageOutput(requestId, "fib_res", fibs.get(n)));
30 |     }
31 | 
32 | }
33 | 
34 | //构建RPC服务器
35 | //RPC服务器要监听指定IP端口，设定IO线程数和业务线程数
36 | //然后注册斐波那契服务输入类，还有响应的计算处理器
37 | public class DemoServer {
38 | 
39 | 
40 |     public static void main(String[] args) {
41 |         //RPCServer server = new RPCServer("localhost", 8888, 2, 16);
42 |         RPCServer server = new RPCServer("localhost", 8888, 2, 16);
43 |         server.service("fib", Integer.class, new FibRequestHandler());
44 |         server.start();
45 | 
46 | 
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/DefaultHandler.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.rpc.server;
 2 | 
 3 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler;
 4 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput;
 5 | import io.netty.channel.ChannelHandlerContext;
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | public class DefaultHandler implements IMessageHandler<MessageInput> {
10 | 
11 |     private final static Logger LOG = LoggerFactory.getLogger(DefaultHandler.class);
12 |     @Override
13 |     public void handle(ChannelHandlerContext ctx, String requestId, MessageInput input) {
14 |         LOG.error("unrecognized message type {} comes", input.getType());
15 |         ctx.close();
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/MessageCollector.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.rpc.server;
  2 | 
  3 | import com.google.common.collect.Queues;
  4 | import com.z.flinkStreamOptimizatiion.rpc.common.IMessageHandler;
  5 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageHandlers;
  6 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageInput;
  7 | import com.z.flinkStreamOptimizatiion.rpc.common.MessageRegistry;
  8 | import io.netty.channel.ChannelHandler;
  9 | import io.netty.channel.ChannelHandlerContext;
 10 | import io.netty.channel.ChannelInboundHandlerAdapter;
 11 | import org.slf4j.Logger;
 12 | import org.slf4j.LoggerFactory;
 13 | 
 14 | import java.util.concurrent.ArrayBlockingQueue;
 15 | import java.util.concurrent.BlockingQueue;
 16 | import java.util.concurrent.ThreadFactory;
 17 | import java.util.concurrent.ThreadPoolExecutor;
 18 | import java.util.concurrent.ThreadPoolExecutor.CallerRunsPolicy;
 19 | import java.util.concurrent.TimeUnit;
 20 | import java.util.concurrent.atomic.AtomicInteger;
 21 | 
 22 | //Netty事件回调类
 23 | //标注一个channel handler可以被多个channel安全地共享
 24 | @ChannelHandler.Sharable
 25 | public class MessageCollector extends ChannelInboundHandlerAdapter {
 26 | 
 27 |     private final static Logger LOG = LoggerFactory.getLogger(MessageCollector.class);
 28 |     //业务线程池
 29 |     private ThreadPoolExecutor executor;
 30 |     private MessageHandlers handlers;
 31 |     private MessageRegistry registry;
 32 | 
 33 |     public MessageCollector(MessageHandlers handlers, MessageRegistry registry, int workerThreads) {
 34 |         System.out.println("[2] MessageCollector 构造");
 35 |         //业务队列最大1000,避免堆积
 36 |         //如果子线程处理不过来,io线程也会加入业务逻辑(callerRunsPolicy)
 37 |         BlockingQueue<Runnable> queue = new ArrayBlockingQueue<>(1000);
 38 |         //给业务线程命名
 39 |         ThreadFactory factory = new ThreadFactory() {
 40 | 
 41 |             AtomicInteger seq = new AtomicInteger();
 42 | 
 43 |             @Override
 44 |             public Thread newThread(Runnable r) {
 45 |                 Thread t = new Thread(r);
 46 |                 t.setName("rpc-" + seq.getAndIncrement());
 47 |                 return t;
 48 |             }
 49 | 
 50 |         };
 51 |         //闲置时间超过30秒的线程就自动销毁
 52 |         this.executor = new ThreadPoolExecutor(1, workerThreads, 30, TimeUnit.SECONDS, queue, factory,
 53 |                 new CallerRunsPolicy());
 54 |         this.handlers = handlers;
 55 |         this.registry = registry;
 56 |     }
 57 | 
 58 |     public void closeGracefully() {
 59 |         //优雅一点关闭,先通知,再等待,最后强制关闭
 60 |         this.executor.shutdown();
 61 |         try {
 62 |             this.executor.awaitTermination(10, TimeUnit.SECONDS);
 63 |         } catch (InterruptedException e) {
 64 |         }
 65 |         this.executor.shutdownNow();
 66 |     }
 67 | 
 68 |     @Override
 69 |     public void channelActive(ChannelHandlerContext ctx) throws Exception {
 70 |         //客户端来了一个新的连接
 71 |         LOG.debug("connection comes");
 72 |     }
 73 | 
 74 |     @Override
 75 |     public void channelInactive(ChannelHandlerContext ctx) throws Exception {
 76 |         //客户端走了一个
 77 |         LOG.debug("connection leaves");
 78 |     }
 79 | 
 80 |     @Override
 81 |     public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
 82 |         if (msg instanceof MessageInput) {
 83 |             //用业务线程处理消息
 84 |             this.executor.execute(() -> {
 85 |                 this.handleMessage(ctx, (MessageInput) msg);
 86 |             });
 87 |         }
 88 |     }
 89 | 
 90 |     //业务逻辑处理
 91 |     private void handleMessage(ChannelHandlerContext ctx, MessageInput input) {
 92 |         Class<?> clazz = registry.get(input.getType());
 93 |         if (clazz == null) {
 94 |             //没注册的消息用默认的处理器处理
 95 |             handlers.defaultHandler().handle(ctx, input.getRequestId(), input);
 96 |             return;
 97 |         }
 98 | 
 99 |         Object o = input.getPayload(clazz);
100 |         //这里有问题
101 |         @SuppressWarnings("unchecked")
102 |         IMessageHandler<Object> handler = (IMessageHandler<Object>) handlers.get(input.getType());
103 |         if (handler != null) {
104 |             handler.handle(ctx, input.getRequestId(), o);
105 |         } else {
106 |             handlers.defaultHandler().handle(ctx, input.getRequestId(), input);
107 |         }
108 | 
109 |     }
110 | 
111 |     @Override
112 |     public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
113 |         //此处可能因为客户机器突发重启
114 |         //也可能客户端连接时间超时,后面的REadTimeoutHandle抛出异常
115 |         //也可能消息协议错误,序列化异常
116 |         LOG.warn("connection error", cause);
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/rpc/server/RPCServer.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.rpc.server;
  2 | 
  3 | import com.z.flinkStreamOptimizatiion.rpc.common.*;
  4 | import io.netty.bootstrap.ServerBootstrap;
  5 | import io.netty.channel.*;
  6 | import io.netty.channel.nio.NioEventLoopGroup;
  7 | import io.netty.channel.socket.SocketChannel;
  8 | import io.netty.channel.socket.nio.NioServerSocketChannel;
  9 | import io.netty.handler.timeout.ReadTimeoutHandler;
 10 | import org.slf4j.Logger;
 11 | import org.slf4j.LoggerFactory;
 12 | 
 13 | public class RPCServer {
 14 | 
 15 |     private final static Logger LOG = LoggerFactory.getLogger(RPCServer.class);
 16 |     private String ip;
 17 |     private int port;
 18 |     private int ioThreads;  //用来处理网络流的读写线程
 19 |     private int workerThreads;  //用来业务处理的计算线程
 20 |     private MessageHandlers handlers = new MessageHandlers();
 21 |     private MessageRegistry registry = new MessageRegistry();
 22 | 
 23 |     // 在构造函数执行之前执行
 24 |     {
 25 |         handlers.defaultHandler(new DefaultHandler());
 26 |     }
 27 | 
 28 |     public RPCServer(String ip, int port, int ioThreads, int workerThreads) {
 29 |         this.ip = ip;
 30 |         this.port = port;
 31 |         this.ioThreads = ioThreads;
 32 |         this.workerThreads = workerThreads;
 33 |     }
 34 | 
 35 |     private ServerBootstrap bootstrap;
 36 |     private EventLoopGroup group;
 37 |     private MessageCollector collector;
 38 |     private Channel serverChannel;
 39 | 
 40 |     // 注册服务的快捷方式
 41 | 
 42 |     /**
 43 |      *
 44 |      * @param type
 45 |      * @param reqClass 客户端请求数据的类型
 46 |      * @param handler
 47 |      * @return
 48 |      */
 49 |     public RPCServer service(String type, Class<?> reqClass, IMessageHandler<?> handler) {
 50 |         registry.register(type, reqClass);
 51 |         handlers.register(type, handler);
 52 |         return this;
 53 |     }
 54 | 
 55 |     // 启动RPC服务
 56 |     public void start() {
 57 |         System.out.println("[1] RPCServer start");
 58 |         bootstrap = new ServerBootstrap();
 59 |         //用来接收进来的连接
 60 |         group = new NioEventLoopGroup(ioThreads);
 61 |         bootstrap.group(group);
 62 |         collector = new MessageCollector(handlers, registry, workerThreads);
 63 |         MessageEncoder encoder = new MessageEncoder();
 64 | 
 65 |         //配置Channel
 66 |         bootstrap.channel(NioServerSocketChannel.class).childHandler(new ChannelInitializer<SocketChannel>() {
 67 | 
 68 |             @Override
 69 |             protected void initChannel(SocketChannel ch) throws Exception {
 70 |                 //注册handler
 71 |                 ChannelPipeline pipe = ch.pipeline();
 72 |                 //如果客户端60秒没有任何请求,就关闭客户端连接
 73 |                 pipe.addLast(new ReadTimeoutHandler(60));
 74 |                 //解码器
 75 |                 pipe.addLast(new MessageDecoder());
 76 |                 //编码器
 77 |                 pipe.addLast(encoder);
 78 |                 //让业务处理器放在最后
 79 |                 pipe.addLast(collector);
 80 | 
 81 |             }
 82 |         });
 83 | 
 84 |         bootstrap.option(ChannelOption.SO_BACKLOG, 100) //客户端套接字默认接受队列的大小
 85 |                 .option(ChannelOption.SO_REUSEADDR, true) //reuse addr 避免端口冲突
 86 |                 .option(ChannelOption.TCP_NODELAY, true) //关闭小烈合并，保证消息的及时性
 87 |                 .childOption(ChannelOption.SO_KEEPALIVE, true); //长时间没动静的连接自动关闭
 88 | 
 89 |         //绑定端口,开始接收进来的连接
 90 |         serverChannel = bootstrap.bind(this.ip, this.port).channel();
 91 | 
 92 |         LOG.warn("server started @ {}:{}\n", ip, port);
 93 |     }
 94 | 
 95 |     public void stop() {
 96 |         // 先关闭服务端套件字
 97 |         serverChannel.close();
 98 |         // 再斩断消息来源，停止io线程池
 99 |         group.shutdownGracefully();
100 |         // 最后停止业务线程
101 |         collector.closeGracefully();
102 |     }
103 | 
104 | }
105 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/state/CountWindowAverage.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.state;
 2 | 
 3 | import org.apache.flink.api.common.functions.RichFlatMapFunction;
 4 | import org.apache.flink.api.common.state.ValueState;
 5 | import org.apache.flink.api.common.state.ValueStateDescriptor;
 6 | import org.apache.flink.api.common.typeinfo.TypeHint;
 7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 8 | import org.apache.flink.api.java.tuple.Tuple2;
 9 | import org.apache.flink.configuration.Configuration;
10 | import org.apache.flink.util.Collector;
11 | 
12 | public class CountWindowAverage extends RichFlatMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> {
13 | 
14 |     // trasient 不参加序列化过程，不存储
15 |     private transient ValueState<Tuple2<Long, Long>> sum;
16 | 
17 |     @Override
18 |     public void flatMap(Tuple2<Long, Long> input, Collector<Tuple2<Long, Long>> out) throws Exception {
19 |         // access the state value
20 |         Tuple2<Long, Long> currentSum = sum.value();
21 | 
22 |         // update the count
23 |         currentSum.f0 += 1;
24 | 
25 |         // add the second field of the input value
26 |         currentSum.f1 += input.f1;
27 | 
28 |         // update the state
29 |         sum.update(currentSum);
30 | 
31 |         // if the count reaches 2, emit the average and clear the state
32 | 
33 |         if (currentSum.f0 >= 2) {
34 |             out.collect(new Tuple2<>(input.f0, currentSum.f1 / currentSum.f0));
35 |             sum.clear();
36 |         }
37 |     }
38 | 
39 |     @Override
40 |     public void open(Configuration config) {
41 |         ValueStateDescriptor<Tuple2<Long, Long>> descriptor = new ValueStateDescriptor<Tuple2<Long, Long>>(
42 |                 "average",
43 |                 TypeInformation.of(new TypeHint<Tuple2<Long, Long>>() {}),
44 |                 Tuple2.of(0L, 0L)
45 |         ); // default value of the state, if nothing was set
46 |         sum = getRuntimeContext().getState(descriptor);
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/state/KeyStateMsgDemo.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.state;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple2;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | 
 6 | /**
 7 |  * 与key相关的状态管理（以key分组进行状态管理）
 8 |  *
 9 |  * 补充：
10 |  * 与key无关的state，就是与operator绑定的state，整个operator只对应一个state
11 |  * 保存operator state的数据结构为ListState
12 |  * 举例来说，Flink中的Kafka Connector，就是用来operator state，它会在每个connector实例中，保存该实例中消费
13 |  * topic的所有(partition, offset)映射
14 |  * 继承CheckpointedFunction, 实现snapshotState和restoreState
15 |  *
16 |  */
17 | 
18 | public class KeyStateMsgDemo {
19 | 
20 |     /**
21 |      * if the count reaches 2, emit the average and clear the state
22 |      * 所以Tuple2.of(1L, 3L), Tuple2.of(1L, 5L) 一组
23 |      * 所以Tuple2.of(1L, 7L),Tuple2.of(1L, 4L)一组
24 |      * @param args
25 |      * @throws Exception
26 |      */
27 | 
28 |     public static void main(String[] args) throws Exception {
29 |         //获取Flink的运行环境
30 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         env.fromElements(Tuple2.of(1L, 3L), Tuple2.of(1L, 5L), Tuple2.of(1L, 7L), Tuple2.of(1L, 4L), Tuple2.of(1L, 2L))
32 |                 .keyBy(0)
33 |                 .flatMap(new CountWindowAverage())
34 |                 .print();
35 |         env.execute("StafulOperator");
36 |         System.out.println("**********************");
37 | 
38 |     }
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/MyNoParalleSource.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
 4 | 
 5 | public class MyNoParalleSource implements SourceFunction<Long> {
 6 | 
 7 |     private long count = 1L;
 8 | 
 9 |     private boolean isRunning = true;
10 | 
11 |     @Override
12 |     public void run(SourceContext<Long> sourceContext) throws Exception {
13 |         while(isRunning) {
14 |             sourceContext.collect(count);
15 |             count ++;
16 |             Thread.sleep(1000);
17 |         }
18 |     }
19 | 
20 |     @Override
21 |     public void cancel() {
22 |         isRunning = false;
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/MyNoParalleStrSource.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
 4 | 
 5 | public class MyNoParalleStrSource implements SourceFunction<String> {
 6 | 
 7 |     private long count = 1L;
 8 |     private String str = "test1,test2,";
 9 |     private boolean isRunning = true;
10 | 
11 |     @Override
12 |     public void run(SourceContext<String> sourceContext) throws Exception {
13 |         while(isRunning) {
14 |             sourceContext.collect(str+count);
15 |             count ++;
16 |             Thread.sleep(1000);
17 |         }
18 |     }
19 | 
20 |     @Override
21 |     public void cancel() {
22 |         isRunning = false;
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/MyParalleSource.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;
 4 | 
 5 | public class MyParalleSource implements ParallelSourceFunction<Long> {
 6 |     private long count = 1L;
 7 |     private boolean isRunning = true;
 8 | 
 9 |     @Override
10 |     public void run(SourceContext<Long> sourceContext) throws Exception {
11 |         while(isRunning) {
12 |             sourceContext.collect(count);
13 |             count ++;
14 |             Thread.sleep(1000);
15 |         }
16 |     }
17 | 
18 |     @Override
19 |     public void cancel() {
20 |         isRunning = false;
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/MyPartition.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.functions.Partitioner;
 5 | 
 6 | public class MyPartition  implements Partitioner<Long> {
 7 | 
 8 | 
 9 |     @Override
10 |     public int partition(Long key, int numPartitions) {
11 |         System.out.println("分区总数："+numPartitions);
12 |         if (key % 2 == 0) {
13 |             return 0;
14 |         } else {
15 |             return 1;
16 |         }
17 |     }
18 | 
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/MyRichParalleSource.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.configuration.Configuration;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | public class MyRichParalleSource extends RichParallelSourceFunction<Long> {
 7 |     private long count = 1L;
 8 |     private boolean isRunning = true;
 9 |     /**
10 |      * 主要的方法
11 |      * 启动一个source
12 |      * 大部分情况下，都需要在这个run方法中实现一个循环，这样就可以循环产生数据了
13 |      * @param ctx
14 |      * @throws Exception
15 |      */
16 |     @Override
17 |     public void run(SourceContext<Long> ctx) throws Exception {
18 |         while(isRunning){
19 |             ctx.collect(count);
20 |             count++;
21 |             //每秒产生一条数据
22 |             Thread.sleep(1000);
23 |         }
24 |     }
25 |     /**
26 |      * 取消一个cancel的时候会调用的方法
27 |      *
28 |      */
29 |     @Override
30 |     public void cancel() {
31 |         isRunning = false;
32 |     }
33 | 
34 |     /**
35 |      * 这个方法只会在最开始的时候被调用一次
36 |      * 实现获取链接的代码
37 |      * @param parameters
38 |      * @throws Exception
39 |      */
40 |     @Override
41 |     public void open(Configuration parameters) throws Exception {
42 |         System.out.println("open ...............");
43 |         super.open(parameters);
44 |     }
45 | 
46 |     /**
47 |      * 实现关闭链接的代码
48 |      * @throws Exception
49 |      */
50 |     @Override
51 |     public void close() throws Exception {
52 |         super.close();
53 |     }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/NumberStreamDemo.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.stream;
  2 | 
  3 | import com.google.common.collect.Lists;
  4 | import org.apache.flink.api.common.functions.FilterFunction;
  5 | import org.apache.flink.api.common.functions.MapFunction;
  6 | import org.apache.flink.api.common.functions.ReduceFunction;
  7 | import org.apache.flink.api.java.functions.KeySelector;
  8 | import org.apache.flink.api.java.tuple.Tuple1;
  9 | import org.apache.flink.api.java.tuple.Tuple2;
 10 | import org.apache.flink.api.java.tuple.Tuple3;
 11 | import org.apache.flink.streaming.api.collector.selector.OutputSelector;
 12 | import org.apache.flink.streaming.api.datastream.*;
 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 14 | import org.apache.flink.streaming.api.functions.co.CoMapFunction;
 15 | import org.apache.flink.streaming.api.functions.windowing.AggregateApplyAllWindowFunction;
 16 | import org.apache.flink.streaming.api.windowing.time.Time;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.HashMap;
 20 | import java.util.List;
 21 | import java.util.Map;
 22 | 
 23 | //这个类是测试source产生流数据,然后做一些通用操作
 24 | public class NumberStreamDemo {
 25 |     public static void main(String[] args) throws Exception {
 26 | 
 27 |         //no paralleSource 流中的wordcount窗口
 28 |         test1();
 29 | 
 30 |         //paralleSource
 31 |         //test2();
 32 | 
 33 |         //richParalleSource
 34 |         // test3();
 35 | 
 36 |         //from Collection
 37 |         //test4();
 38 | 
 39 |         //filter
 40 |         //test5();
 41 | 
 42 |         //multi stream source union
 43 |         //test6();
 44 | 
 45 |         //two stream source connect
 46 |         //test7();
 47 | 
 48 |         // split 根据规则把一个数据流切分为多个流，select和split配合使用，选择切分后的流
 49 |         // test8();
 50 | 
 51 |         // 自定义分区需要实现Partitioner接口
 52 |         // test9();
 53 | 
 54 | 
 55 |     }
 56 | 
 57 |     private static void test9() throws Exception {
 58 | 
 59 |         //dataStream.partitionCustom(partitioner, “someKey”) 针对对象
 60 |         //dataStream.partitionCustom(partitioner, 0) 针对Tuple
 61 | 
 62 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 63 |         env.setParallelism(2);
 64 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource());
 65 | 
 66 |         //对数据进行转换，把long类型转成tuple1类型
 67 |         DataStream<Tuple1<Long>> tupleData = text.map(new MapFunction<Long, Tuple1<Long>>() {
 68 |             @Override
 69 | 
 70 |             public Tuple1<Long> map(Long value) throws Exception {
 71 |                 return new Tuple1<>(value);
 72 |             }
 73 |         });
 74 | 
 75 |         //分区之后的数据
 76 |         //一条线程一个task，分别处理奇数，偶数
 77 |         DataStream<Tuple1<Long>> partitionData = tupleData.partitionCustom(new MyPartition(), 0);
 78 |         DataStream<Long> result = partitionData.map(new MapFunction<Tuple1<Long>, Long>() {
 79 |             @Override
 80 |             public Long map(Tuple1<Long> value) throws Exception {
 81 |                 System.out.println("当前线程id：" + Thread.currentThread().getId() + ",value: " + value);
 82 |                 return value.getField(0);
 83 |             }
 84 |         });
 85 | 
 86 |         result.print().setParallelism(1);
 87 |         env.execute("NumberStreamDemo");
 88 |     }
 89 | 
 90 |     private static void test8() throws Exception {
 91 | 
 92 |         // split 根据规则把一个数据流切分为多个流，select和split配合使用，选择切分后的流
 93 | 
 94 |         //获取Flink的运行环境
 95 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 96 | 
 97 |         //获取数据源
 98 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意：针对此source，并行度只能设置为1
 99 | 
100 |         //对流进行切分，按照数据的奇偶性进行区分
101 |         SplitStream<Long> splitStream = text.split(new OutputSelector<Long>() {
102 |             @Override
103 |             public Iterable<String> select(Long value) {
104 |                 List<String> outPut = Lists.newArrayList();
105 |                 if (value % 2 == 0) {
106 |                     outPut.add("even"); //偶数
107 |                 } else {
108 |                     outPut.add("odd"); //奇数
109 |                 }
110 |                 return outPut;
111 |             }
112 |         });
113 | 
114 |         //选择一个或者多个切分后的流
115 |         DataStream<Long> evenStream = splitStream.select("even");
116 |         DataStream<Long> oddStream = splitStream.select("odd");
117 | 
118 |         DataStream<Long> moreStream = splitStream.select("odd", "even");
119 | 
120 |         //打印结果
121 |         evenStream.print().setParallelism(1);
122 |         String jobName = NumberStreamDemo.class.getSimpleName();
123 |         env.execute(jobName);
124 |     }
125 | 
126 |     private static void test7() throws Exception {
127 |         //Connect：和union类似，但是只能连接两个流，两个流的数据类型可以不同，会对两个流中的数据应用不同的处理方法。
128 |         //获取Flink的运行环境
129 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
130 |         //获取数据源
131 |         DataStreamSource<Long> text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意：针对此source，并行度只能设置为1
132 |         DataStreamSource<Long> text2 = env.addSource(new MyNoParalleSource()).setParallelism(1);
133 | 
134 |         SingleOutputStreamOperator<String> text2_str = text2.map(new MapFunction<Long, String>() {
135 |             @Override
136 |             public String map(Long value) throws Exception {
137 |                 return "str_" + value;
138 |             }
139 |         });
140 | 
141 |         ConnectedStreams<Long, String> connectedStreams = text1.connect(text2_str);
142 |         SingleOutputStreamOperator<Object> result = connectedStreams.map(new CoMapFunction<Long, String, Object>() {
143 | 
144 |             @Override
145 |             public Object map1(Long value) throws Exception {
146 |                 return value;
147 |             }
148 | 
149 |             @Override
150 |             public Object map2(String value) throws Exception {
151 |                 return value;
152 |             }
153 |         });
154 | 
155 |         //打印结果
156 |         result.print().setParallelism(1);
157 |         String jobName = NumberStreamDemo.class.getSimpleName();
158 |         env.execute(jobName);
159 |     }
160 | 
161 |     private static void test6() throws Exception {
162 |         // Union：合并多个流，新的流会包含所有流中的数据，但是union是一个限制，就是所有合并的流类型必须是一致的。
163 |         //获取Flink的运行环境
164 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
165 | 
166 |         //获取数据源
167 |         DataStreamSource<Long> text1 = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意：针对此source，并行度只能设置为1
168 | 
169 |         DataStreamSource<Long> text2 = env.addSource(new MyNoParalleSource()).setParallelism(1);
170 | 
171 |         //把text1和text2组装到一起
172 |         DataStream<Long> text = text1.union(text2);
173 | 
174 |         DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
175 |             @Override
176 |             public Long map(Long value) throws Exception {
177 |                 System.out.println("原始接收到数据：" + value);
178 |                 return value;
179 |             }
180 |         });
181 | 
182 |         //每2秒钟处理一次数据
183 |         DataStream<Long> sum = num.timeWindowAll(Time.seconds(2)).sum(0);
184 |         //打印结果
185 |         sum.print().setParallelism(1);
186 |         String jobName = NumberStreamDemo.class.getSimpleName();
187 |         env.execute(jobName);
188 |     }
189 | 
190 |     private static void test5() throws Exception {
191 |         //获取Flink的运行环境
192 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
193 |         //获取数据源
194 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1);//注意：针对此source，并行度只能设置为1
195 |         DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
196 |             @Override
197 |             public Long map(Long value) throws Exception {
198 |                 System.out.println("原始接收到数据：" + value);
199 |                 return value;
200 |             }
201 |         });
202 |         //执行filter过滤，满足条件的数据会被留下
203 |         DataStream<Long> filterData = num.filter(new FilterFunction<Long>() {
204 |             //把所有的奇数过滤掉
205 |             @Override
206 |             public boolean filter(Long value) throws Exception {
207 |                 return value % 2 == 0;
208 |             }
209 |         });
210 | 
211 |         DataStream<Long> resultData = filterData.map(new MapFunction<Long, Long>() {
212 |             @Override
213 |             public Long map(Long value) throws Exception {
214 |                 System.out.println("过滤之后的数据：" + value);
215 |                 return value;
216 |             }
217 |         });
218 | 
219 |         //每2秒钟处理一次数据
220 |         DataStream<Long> sum = resultData.timeWindowAll(Time.seconds(2)).sum(0);
221 | 
222 |         //打印结果
223 |         sum.print().setParallelism(1);
224 | 
225 |         String jobName = NumberStreamDemo.class.getSimpleName();
226 |         env.execute(jobName);
227 | 
228 |     }
229 | 
230 | 
231 |     private static void test4() throws Exception {
232 |         //获取Flink的运行环境
233 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
234 |         ArrayList<Integer> data = new ArrayList<>();
235 |         data.add(10);
236 |         data.add(15);
237 |         data.add(20);
238 | 
239 |         //指定数据源
240 |         DataStreamSource<Integer> collectionData = env.fromCollection(data);
241 |         //通map对数据进行处理
242 |         DataStream<Integer> num = collectionData.map(new MapFunction<Integer, Integer>() {
243 |             @Override
244 |             public Integer map(Integer value) throws Exception {
245 |                 return value + 1;
246 |             }
247 |         });
248 | 
249 |         num.print().setParallelism(1);
250 |         env.execute("Streaming From Collection");
251 |     }
252 | 
253 |     private static void test3() throws Exception {
254 |         //获取Flink的运行环境
255 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
256 | 
257 |         //获取数据源
258 |         DataStreamSource<Long> text = env.addSource(new MyRichParalleSource()).setParallelism(1);
259 | 
260 |         DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
261 |             @Override
262 |             public Long map(Long value) throws Exception {
263 |                 System.out.println("接收到数据：" + value);
264 |                 return value;
265 |             }
266 |         });
267 | 
268 |         //每2秒钟处理一次数据
269 |         DataStream<Long> sum = num.timeWindowAll(Time.seconds(2)).sum(0);
270 | 
271 |         //打印结果
272 |         sum.print().setParallelism(1);
273 | 
274 |         String jobName = NumberStreamDemo.class.getSimpleName();
275 |         env.execute(jobName);
276 | 
277 |     }
278 | 
279 |     private static void test2() throws Exception {
280 |         //获取Flink的运行环境
281 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
282 |         //获取数据源
283 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1); //注意：针对此source，并行度只能设置为1
284 |         DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
285 |             @Override
286 |             public Long map(Long value) throws Exception {
287 |                 System.out.println("接收到数据：" + value);
288 |                 return value;
289 |             }
290 |         });
291 |         //每2秒钟处理一次数据
292 |         DataStream<Long> sum = num.timeWindowAll(Time.seconds(2)).sum(0);
293 |         //打印结果
294 |         sum.print().setParallelism(1);
295 |         String jobName = NumberStreamDemo.class.getSimpleName();
296 |         env.execute(jobName);
297 |     }
298 | 
299 |     private static void test1() throws Exception {
300 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
301 |         //DataStream<Long> someIntegers = env.generateSequence(0, 1000);
302 |         DataStreamSource<Long> text = env.addSource(new MyNoParalleSource()).setParallelism(1);
303 |         DataStream<Long> num = text.map(new MapFunction<Long, Long>() {
304 |             @Override
305 |             public Long map(Long value) throws Exception {
306 |                 return value;
307 |             }
308 |         });
309 | 
310 |         DataStream<TestBean> testBeanStream = num.filter(new FilterFunction<Long>() {
311 |             @Override
312 |             public boolean filter(Long value) throws Exception {
313 |                 if (value % 2 == 0) {
314 |                     return true;
315 |                 }
316 |                 return false;
317 |             }
318 |         }).map(new MapFunction<Long, TestBean>() {
319 |             @Override
320 |             public TestBean map(Long value) throws Exception {
321 |                 System.out.println("接受到数据：" + new TestBean("a", value, 1L).toString());
322 |                 return new TestBean("a", value, 1L);
323 |             }
324 |         });
325 |         DataStream<TestBean> testBeanStream2 = num.filter(new FilterFunction<Long>() {
326 |             @Override
327 |             public boolean filter(Long value) throws Exception {
328 |                 if (value % 2 != 0) {
329 |                     return true;
330 |                 }
331 |                 return false;
332 |             }
333 |         }).map(new MapFunction<Long, TestBean>() {
334 |             @Override
335 |             public TestBean map(Long value) throws Exception {
336 |                 System.out.println("接受到数据：" + new TestBean("b", value, 1L).toString());
337 |                 return new TestBean("b", value, 1L);
338 |             }
339 |         });
340 | 
341 |         testBeanStream.union(testBeanStream2).keyBy("word").timeWindow(Time.seconds(2), Time.seconds(10))
342 |                 .reduce(new ReduceFunction<TestBean>() {
343 |                     @Override
344 |                     public TestBean reduce(TestBean first, TestBean second) throws Exception {
345 |                         return new TestBean(first.word, (first.value + second.value) / (first.count + second.count), first.count + second.count);
346 |                     }
347 |                 })
348 | //                .sum("value")
349 |                 .print();
350 | 
351 |         String jobName = NumberStreamDemo.class.getSimpleName();
352 |         env.execute(jobName);
353 |     }
354 | 
355 |     public static class TestBean {
356 | 
357 |         public String word;
358 |         public long value;
359 |         public long count;//1
360 | 
361 |         public TestBean() {
362 |         }
363 | 
364 |         public TestBean(String word, long value, long count) {
365 |             this.word = word;
366 |             this.count = count;
367 |             this.value = value;
368 |         }
369 | 
370 |         @Override
371 |         public String toString() {
372 |             return "TestBean{" +
373 |                     "word='" + word + '\'' +
374 |                     ", value=" + value +
375 |                     ", count=" + count +
376 |                     '}';
377 |         }
378 | 
379 |     }
380 | 
381 | }
382 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDataSource1.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | public class StreamJoinDataSource1 extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
 7 | 
 8 |     private volatile boolean running = true;
 9 | 
10 |     @Override
11 |     public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {
12 |         Tuple3[] elements = new Tuple3[]{
13 |                 Tuple3.of("a", "1", 1000000050000L),
14 |                 Tuple3.of("a", "2", 1000000054000L),
15 |                 Tuple3.of("a", "3", 1000000079900L),
16 |                 Tuple3.of("a", "4", 1000000115000L),
17 |                 Tuple3.of("b", "5", 1000000100000L),
18 |                 Tuple3.of("b", "6", 1000000108000L)
19 |         };
20 | 
21 |         int count = 0;
22 |         while (running && count < elements.length) {
23 |             ctx.collect(new Tuple3<>(
24 |                     (String)elements[count].f0,
25 |                     (String)elements[count].f1,
26 |                     (Long) elements[count].f2
27 |             ));
28 | 
29 |             count ++;
30 |             Thread.sleep(1000);
31 |         }
32 |     }
33 | 
34 | 
35 | 
36 |     @Override
37 |     public void cancel() {
38 |         running = false;
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDataSource2.java:
--------------------------------------------------------------------------------
 1 | package com.z.flinkStreamOptimizatiion.stream;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple3;
 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 5 | 
 6 | public class StreamJoinDataSource2 extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
 7 | 
 8 |     private volatile boolean running = true;
 9 | 
10 | 
11 |     @Override
12 |     public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws Exception {
13 |         Tuple3[] elements = new Tuple3[]{
14 |                 Tuple3.of("a", "hangzhou", 1000000059000L),
15 |                 Tuple3.of("b", "beijing", 1000000105000L),
16 |         };
17 | 
18 |         int count = 0;
19 |         while(running && count < elements.length) {
20 |             ctx.collect(new Tuple3<>(
21 |                     (String)elements[count].f0,
22 |                     (String)elements[count].f1,
23 |                     (Long)elements[count].f2
24 |             ));
25 | 
26 |             count ++;
27 |             Thread.sleep(1000);
28 |         }
29 |     }
30 | 
31 |     @Override
32 |     public void cancel() {
33 |         running = false;
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/StreamJoinDemo.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.stream;
  2 | 
  3 | import org.apache.flink.api.common.functions.JoinFunction;
  4 | import org.apache.flink.api.java.functions.KeySelector;
  5 | import org.apache.flink.api.java.tuple.Tuple3;
  6 | import org.apache.flink.api.java.tuple.Tuple5;
  7 | import org.apache.flink.streaming.api.TimeCharacteristic;
  8 | import org.apache.flink.streaming.api.datastream.DataStream;
  9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 10 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
 11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
 12 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
 13 | import org.apache.flink.streaming.api.windowing.time.Time;
 14 | import org.slf4j.Logger;
 15 | import org.slf4j.LoggerFactory;
 16 | 
 17 | import java.text.SimpleDateFormat;
 18 | import java.util.concurrent.TimeUnit;
 19 | 
 20 | import static com.z.flinkStreamOptimizatiion.stream.WindowComputeUtil.myGetWindowStartWithOffset;
 21 | 
 22 | public class StreamJoinDemo {
 23 | 
 24 |     private static final Logger LOGGER = LoggerFactory.getLogger(StreamJoinDemo.class);
 25 | 
 26 |     /**
 27 |      * 只有在一个窗口内的数据才能join
 28 |      * @param args
 29 |      * @throws Exception
 30 |      */
 31 |     public static void main(String[] args) throws Exception {
 32 | 
 33 |         // 双流join
 34 |         //test1();
 35 | 
 36 |         // default join
 37 |         test2();
 38 | 
 39 | 
 40 | 
 41 | 
 42 |     }
 43 | 
 44 |     private static void test2() throws Exception {
 45 |         //毫秒为单位
 46 |         int windowSize = 10;
 47 |         long delay = 5100L;
 48 | 
 49 | 
 50 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 51 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 52 |         env.setParallelism(1);
 53 | 
 54 |         // 设置数据源
 55 |         DataStream<Tuple3<String, String, Long>> leftSource = env.addSource(new StreamJoinDataSource1()).name("Demo Source");
 56 |         DataStream<Tuple3<String, String, Long>> rightSource = env.addSource(new StreamJoinDataSource2()).name("Demo Source");
 57 | 
 58 | 
 59 |         // join 操作
 60 |         leftSource.join(rightSource)
 61 |                 .where(new LeftSelectKey())
 62 |                 .equalTo(new RightSelectKey())
 63 |                 .window(TumblingProcessingTimeWindows.of(Time.of(5, TimeUnit.SECONDS)))
 64 |                 .apply(new JoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>() {
 65 |                     @Override
 66 |                     public Tuple5<String, String, String, Long, Long> join(Tuple3<String, String, Long> first, Tuple3<String, String, Long> second) {
 67 |                         LOGGER.info("触发双流join窗口运算");
 68 |                         return new Tuple5<>(first.f0, first.f1, second.f1, first.f2, second.f2);
 69 |                     }
 70 |                 }).print();
 71 | 
 72 | 
 73 |         env.execute("TimeWindowDemo");
 74 |     }
 75 | 
 76 |     /**
 77 |      * 普通双流join处理方式：
 78 |      * 缺陷：join窗口的双流数据都是被缓存在内存中的，也就是说，如果某个key上的窗口数据太多就会导致JVM OOM。
 79 |      * 双流join的难点也正是在这里。
 80 |      * @throws Exception
 81 |      */
 82 |     private static void test1() throws Exception {
 83 |         /**
 84 |          * 当设置参数int windowSize = 10; long delay = 5000L;时
 85 |          * 输出为：
 86 |          * (a,1,hangzhou,1000000050000,1000000059000)
 87 |          * (a,2,hangzhou,1000000054000,1000000059000)
 88 |          * 原因：
 89 |          * window_end_time < watermark, 导致数据丢失了。
 90 |          */
 91 | 
 92 |         //毫秒为单位
 93 |         int windowSize = 10;
 94 |         long delay = 5100L;
 95 | 
 96 | 
 97 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 98 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 99 |         env.setParallelism(1);
100 | 
101 |         // 设置数据源
102 |         DataStream<Tuple3<String, String, Long>> leftSource = env.addSource(new StreamJoinDataSource1()).name("Demo Source");
103 |         DataStream<Tuple3<String, String, Long>> rightSource = env.addSource(new StreamJoinDataSource2()).name("Demo Source");
104 | 
105 |         // 设置水位线
106 |         DataStream<Tuple3<String, String, Long>> leftStream = leftSource.assignTimestampsAndWatermarks(
107 |                 new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {
108 |                     private final long maxOutOfOrderness = delay;
109 |                     private long currentMaxTimestamp = 0L;
110 |                     @Override
111 |                     public long extractTimestamp(Tuple3<String, String, Long> element) {
112 |                         long timestamp = element.f2;
113 |                         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
114 |                         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
115 |                         System.out.println("####################################");
116 |                         System.out.println("element.f1: " + element.f1 );
117 |                         //System.out.println("currentMaxTimestamp: " + currentMaxTimestamp);
118 |                         System.out.println("水位线(watermark)： " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness));
119 |                         System.out.println("窗口开始时间：" +  myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000)));
120 |                         System.out.println("窗口结束时间：" +  (myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000)));
121 |                         System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp));
122 |                         return timestamp;
123 |                     }
124 |                 }
125 |         );
126 | 
127 |         DataStream<Tuple3<String, String, Long>> rightStream = rightSource.assignTimestampsAndWatermarks(
128 |                 new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, String, Long>>(Time.milliseconds(delay)) {
129 |                     private final long maxOutOfOrderness = delay;
130 |                     private long currentMaxTimestamp = 0L;
131 |                     @Override
132 |                     public long extractTimestamp(Tuple3<String, String, Long> element) {
133 |                         long timestamp = element.f2;
134 |                         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
135 |                         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
136 |                         System.out.println("####################################");
137 |                         System.out.println("element.f1: " + element.f1 );
138 |                         //System.out.println("currentMaxTimestamp: " + currentMaxTimestamp);
139 |                         System.out.println("水位线(watermark)： " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness));
140 |                         System.out.println("窗口开始时间：" +  myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000)));
141 |                         System.out.println("窗口结束时间：" +  (myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000)));
142 |                         System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp));
143 |                         return timestamp;
144 |                     }
145 |                 }
146 |         );
147 | 
148 |         // join 操作
149 |         leftStream.join(rightStream)
150 |                 .where(new LeftSelectKey())
151 |                 .equalTo(new RightSelectKey())
152 |                 .window(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
153 |                 .apply(new JoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>() {
154 |                     @Override
155 |                     public Tuple5<String, String, String, Long, Long> join(Tuple3<String, String, Long> first, Tuple3<String, String, Long> second) {
156 |                         System.out.println("触发双流join窗口运算");
157 |                         return new Tuple5<>(first.f0, first.f1, second.f1, first.f2, second.f2);
158 |                     }
159 |                 }).print();
160 | 
161 | 
162 |         env.execute("TimeWindowDemo");
163 |     }
164 | 
165 |     private static class LeftSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {
166 |         @Override
167 |         public String getKey(Tuple3<String, String, Long> w) throws Exception {
168 |             return w.f0;
169 |         }
170 |     }
171 | 
172 | 
173 |     private static class RightSelectKey implements KeySelector<Tuple3<String, String, Long>, String> {
174 |         @Override
175 |         public String getKey(Tuple3<String, String, Long> w) throws Exception {
176 |             return w.f0;
177 |         }
178 |     }
179 | }
180 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/TimeWindowDemo.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.stream;
  2 | 
  3 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCClient;
  4 | import com.z.flinkStreamOptimizatiion.rpc.client.RPCException;
  5 | import com.z.flinkStreamOptimizatiion.rpc.demo.DemoClient;
  6 | import org.apache.flink.api.common.functions.FlatMapFunction;
  7 | import org.apache.flink.api.common.functions.MapFunction;
  8 | import org.apache.flink.api.common.functions.ReduceFunction;
  9 | import org.apache.flink.api.java.tuple.Tuple3;
 10 | import org.apache.flink.streaming.api.TimeCharacteristic;
 11 | import org.apache.flink.streaming.api.datastream.DataStream;
 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
 14 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
 15 | import org.apache.flink.streaming.api.watermark.Watermark;
 16 | import org.apache.flink.streaming.api.windowing.time.Time;
 17 | import org.apache.flink.util.Collector;
 18 | 
 19 | import javax.annotation.Nullable;
 20 | import java.text.SimpleDateFormat;
 21 | 
 22 | 
 23 | /**
 24 |  * 单流场景下：
 25 |  * Flink中timeWindow滚动窗口边界和数据延迟问题
 26 |  * delay代表了能够容忍的时序程度
 27 |  * 水位 = 目前最大的时间戳 - delay
 28 |  */
 29 | public class TimeWindowDemo {
 30 |     
 31 |     public static void main(String[] args) throws Exception {
 32 |         // 根据event time和窗口时间大小，计算event time所属的窗口开始时间和结束时间
 33 |         // test1();
 34 | 
 35 |         // 参考因素：delay + windowSize, 情况一，元素在水位以下，但windows还没被触发计算，参照record 5
 36 |         // test2();
 37 | 
 38 |         // 参考因素：delay + windowSize, 情况二，元素在水位以下，但windows已经无法被触发计算了
 39 |         // test3();
 40 | 
 41 |         // 参考因素：delay + windowSize，通过增大delay，来增大失序的容忍程度，确保不丢数据
 42 |         // test4();
 43 | 
 44 |         // 测试 parallism
 45 |         // test5();
 46 | 
 47 |         // 测试 flink中的rpc调用（比如flatmap）
 48 |         // test6();
 49 | 
 50 | 
 51 |     }
 52 | 
 53 |     private static void test6() throws Exception {
 54 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 55 | 
 56 |         // 设置数据源
 57 |         //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 58 |         DataStream<Tuple3<String, String, Long>> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source").setParallelism(1);
 59 | 
 60 |         DataStream<WordWithCount> windowCount = dataStream.flatMap(new FlatMapFunction<Tuple3<String, String, Long>, WordWithCount>() {
 61 |             @Override
 62 |             public void flatMap(Tuple3<String, String, Long> value, Collector<WordWithCount> collector) throws Exception {
 63 | 
 64 |                 // 在flink的map算子中，加入rpc调用，作为中间结果获取的模拟
 65 |                 int testValue = Integer.valueOf(value.f1);
 66 |                 {
 67 |                     RPCClient client = new RPCClient("localhost", 8888);
 68 |                     DemoClient demo = new DemoClient(client);
 69 |                     for (int i = 0; i < 2; i++) {
 70 |                         try {
 71 |                             System.out.printf("fib(%d) = %d\n", i, demo.fib(testValue));
 72 |                             Thread.sleep(100);
 73 |                         } catch (RPCException e) {
 74 |                             System.out.println(e.getMessage());
 75 |                         }
 76 |                     }
 77 |                     Thread.sleep(3000);
 78 |                     client.close();
 79 |                 }
 80 | 
 81 | 
 82 |                 collector.collect(new WordWithCount(value.f0, 1L));
 83 |             }
 84 |         }).keyBy("word")
 85 |                 .sum("count");
 86 | 
 87 |         windowCount.print();
 88 |         env.execute("streaming word count");
 89 |         //Thread.sleep(3000);
 90 | 
 91 |     }
 92 | 
 93 |     /**
 94 |      * 主要为了存储单词以及单词出现的次数
 95 |      */
 96 |     public static class WordWithCount{
 97 |         public String word;
 98 |         public long count;
 99 |         public WordWithCount(){}
100 |         public WordWithCount(String word, long count) {
101 |             this.word = word;
102 |             this.count = count;
103 |         }
104 | 
105 |         @Override
106 |         public String toString() {
107 |             return "WordWithCount{" +
108 |                     "word='" + word + '\'' +
109 |                     ", count=" + count +
110 |                     '}';
111 |         }
112 |     }
113 | 
114 |     private static void test5() throws Exception {
115 | 
116 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
117 | 
118 |         // 设置数据源
119 |         //env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
120 |         DataStream<Tuple3<String, String, Long>> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source").setParallelism(2);
121 | 
122 |         DataStream<WordWithCount> windowCount = dataStream.flatMap(new FlatMapFunction<Tuple3<String, String, Long>, WordWithCount>() {
123 |             @Override
124 |             public void flatMap(Tuple3<String, String, Long> value, Collector<WordWithCount> collector) throws Exception {
125 |                 collector.collect(new WordWithCount(value.f0, 1L));
126 |             }
127 |         }).keyBy("word")
128 |         .sum("count");
129 | 
130 |         windowCount.print();
131 |         env.execute("streaming word count");
132 |     }
133 | 
134 | 
135 |     /**
136 |      * 观察 record 5 和 record 6, 它们的时间窗口如下：
137 |      * 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
138 |      * 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
139 |      * 它们进来的时候水位线如下：
140 |      * 水位线(watermark)： 1000000109900 -> 2001-09-09 09:48:29.900
141 |      * 也就是说，它们进来的时候，watermark < windows end time
142 |      * 这种情况下，就算数据的 eventtime < watermark，数据还是被保留下来，没有丢失。
143 |      * @throws Exception
144 |      */
145 |     private static void test4() throws Exception {
146 |         long delay = 5100L;
147 |         int windowSize = 10;
148 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
149 | 
150 |         // 设置数据源
151 |         env.setParallelism(1);
152 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
153 |         DataStream<Tuple3<String, String, Long>> dataStream = env.addSource(new DataSourceForTest4()).name("Demo Source");
154 | 
155 |         // 设置水位线
156 |         DataStream<Tuple3<String, String, Long>> watermark = dataStream.assignTimestampsAndWatermarks(
157 |                 new AssignerWithPeriodicWatermarks<Tuple3<String, String, Long>>() {
158 |                     private final long maxOutOfOrderness = delay;
159 |                     private long currentMaxTimestamp = 0L;
160 | 
161 |                     @Nullable
162 |                     @Override
163 |                     public Watermark getCurrentWatermark() {
164 |                         return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
165 |                     }
166 | 
167 |                     @Override
168 |                     public long extractTimestamp(Tuple3<String, String, Long> element, long previousElementTimestamp) {
169 |                         long timestamp = element.f2;
170 |                         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
171 |                         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
172 |                         System.out.println("#### 第 " + element.f1 + " 个record ####");
173 |                         System.out.println("currentMaxTimestamp: " + currentMaxTimestamp);
174 |                         System.out.println("水位线(watermark)： " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness));
175 |                         System.out.println("窗口开始时间：" +  WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000)));
176 |                         System.out.println("窗口结束时间：" +  (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000)));
177 |                         System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp));
178 | 
179 |                         return timestamp;
180 |                     }
181 |                 }
182 |         );
183 | 
184 |         // 窗口函数进行处理
185 |         DataStream<Tuple3<String, String, Long>> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize))
186 |                 .reduce(new ReduceFunction<Tuple3<String, String, Long>>() {
187 |                     @Override
188 |                     public Tuple3<String, String, Long> reduce(Tuple3<String, String, Long> value1, Tuple3<String, String, Long> value2) throws Exception {
189 |                         return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L);
190 |                     }
191 |                 });
192 | 
193 |         resStream.print();
194 |         env.execute("event time demo");
195 |     }
196 | 
197 |     /**
198 |      * 观察record 5 和 record 6，它们的窗口属性如下：
199 |      * 窗口开始时间：1000000100000 -> 2001-09-09 09:48:20.000
200 |      * 窗口结束时间：1000000110000 -> 2001-09-09 09:48:30.000
201 |      * windows end time < watermark, 这个窗口已经无法被触发计算了。
202 |      * 也就是说，这个窗口创建时，已经 windows end time < watermark，相当于第5第6条记录都丢失了。
203 |      * @throws Exception
204 |      */
205 |     private static void test3() throws Exception {
206 |         long delay = 5000L;
207 |         int windowSize = 10;
208 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
209 | 
210 |         // 设置数据源
211 |         env.setParallelism(1);
212 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
213 |         DataStream<Tuple3<String, String, Long>> dataStream = env.addSource(new DataSourceForTest3()).name("Demo Source");
214 | 
215 |         // 设置水位线
216 |         DataStream<Tuple3<String, String, Long>> watermark = dataStream.assignTimestampsAndWatermarks(
217 |                 new AssignerWithPeriodicWatermarks<Tuple3<String, String, Long>>() {
218 |                     private final long maxOutOfOrderness = delay;
219 |                     private long currentMaxTimestamp = 0L;
220 | 
221 |                     @Nullable
222 |                     @Override
223 |                     public Watermark getCurrentWatermark() {
224 |                         return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
225 |                     }
226 | 
227 |                     @Override
228 |                     public long extractTimestamp(Tuple3<String, String, Long> element, long previousElementTimestamp) {
229 |                         long timestamp = element.f2;
230 |                         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
231 |                         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
232 |                         System.out.println("#### 第 " + element.f1 + " 个record ####");
233 |                         System.out.println("currentMaxTimestamp: " + currentMaxTimestamp);
234 |                         System.out.println("水位线(watermark)： " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness));
235 |                         System.out.println("窗口开始时间：" +  WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000)));
236 |                         System.out.println("窗口结束时间：" +  (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000)));
237 |                         System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp));
238 | 
239 |                         return timestamp;
240 |                     }
241 |                 }
242 |         );
243 | 
244 |         // 窗口函数进行处理
245 |         DataStream<Tuple3<String, String, Long>> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize))
246 |                 .reduce(new ReduceFunction<Tuple3<String, String, Long>>() {
247 |                     @Override
248 |                     public Tuple3<String, String, Long> reduce(Tuple3<String, String, Long> value1, Tuple3<String, String, Long> value2) throws Exception {
249 |                         return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L);
250 |                     }
251 |                 });
252 | 
253 |         resStream.print();
254 |         env.execute("event time demo");
255 |     }
256 | 
257 |     /**
258 |      * 观察record 5，对于此条记录，元素在水位以下，但windows还没被触发计算
259 |      * 到了record 6，水位线在record 5 之上，windows被触发计算
260 |      * @throws Exception
261 |      */
262 |     private static void test2() throws Exception {
263 | 
264 |         long delay = 5000L;
265 |         int windowSize = 10;
266 | 
267 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
268 | 
269 |         // 设置数据源
270 |         env.setParallelism(1);
271 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
272 |         DataStream<Tuple3<String, String, Long>> dataStream = env.addSource(new DataSource()).name("Demo Source");
273 | 
274 |         // 设置水位线
275 |         DataStream<Tuple3<String, String, Long>> watermark = dataStream.assignTimestampsAndWatermarks(
276 |                 new AssignerWithPeriodicWatermarks<Tuple3<String, String, Long>>() {
277 |                     private final long maxOutOfOrderness = delay;
278 |                     private long currentMaxTimestamp = 0L;
279 | 
280 |                     @Nullable
281 |                     @Override
282 |                     public Watermark getCurrentWatermark() {
283 |                         return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
284 |                     }
285 | 
286 |                     /**
287 |                      * 触发窗口运算时机：
288 |                      * 当一条数据过来，
289 |                      * 1）水位线 > 上一批次的记录的窗口结束时间，之前的数据要进行窗口运算
290 |                      * 2）水位线 > 上一批次的记录的timestamp，之前的数据要进行窗口计算
291 |                      *
292 |                      * 关于是否丢数据：
293 |                      * 1）如果当前数据的EventTime在WaterMark之上，也就是EventTime > WaterMark。由于数据所属窗口
294 |                      * 的WindowEndTime，一定是大于EventTime的。这时有WindowEndTime > EventTime > WaterMark
295 |                      * 这种情况是一定不会丢数据的。
296 |                      * 2）如果当前数据的EventTime在WaterMark之下，也就是WaterMark > EventTime，这时要分两种情况：
297 |                      *  2.1）如果该数据所属窗口的WindowEndTime > WaterMark，表示窗口还没被触发，例如第5个record的情况，
298 |                      *  即WindowEndTime > WaterMark > EventTime,这种情况数据也是不会丢失的。
299 |                      *  2.2）如果该数据所属窗口的WaterMark > WindowEndTime, 则表示窗口已经无法被触发，
300 |                      *  即WaterMark > WindowEndTime > EventTime, 这种情况数据也就丢失了。
301 |                      *
302 |                      * 如果第6条record，由于watermark > windows end time ，第6条数据所属的窗口就永远不会被触发计算了。
303 |                      * @param element
304 |                      * @param previousElementTimestamp
305 |                      * @return
306 |                      */
307 |                     @Override
308 |                     public long extractTimestamp(Tuple3<String, String, Long> element, long previousElementTimestamp) {
309 |                         long timestamp = element.f2;
310 |                         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
311 |                         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
312 |                         System.out.println("#### 第 " + element.f1 + " 个record ####");
313 |                         System.out.println("currentMaxTimestamp: " + currentMaxTimestamp);
314 |                         System.out.println("水位线(watermark)： " + (currentMaxTimestamp - maxOutOfOrderness) + " -> " + format.format(currentMaxTimestamp - maxOutOfOrderness));
315 |                         System.out.println("窗口开始时间：" +  WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000)));
316 |                         System.out.println("窗口结束时间：" +  (WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000) + " -> " + format.format((WindowComputeUtil.myGetWindowStartWithOffset(timestamp, 0, windowSize * 1000) + windowSize * 1000)));
317 |                         System.out.println(element.f1 + " -> " + timestamp + " -> " + format.format(timestamp));
318 | 
319 |                         return timestamp;
320 |                     }
321 |                 }
322 |         );
323 | 
324 |         // 窗口函数进行处理
325 |         DataStream<Tuple3<String, String, Long>> resStream = watermark.keyBy(0).timeWindow(Time.seconds(windowSize))
326 |                 .reduce(new ReduceFunction<Tuple3<String, String, Long>>() {
327 |                     @Override
328 |                     public Tuple3<String, String, Long> reduce(Tuple3<String, String, Long> value1, Tuple3<String, String, Long> value2) throws Exception {
329 |                         return Tuple3.of(value1.f0, "[" + value1.f1 + "," + value2.f1 + "]", 1L);
330 |                     }
331 |                 });
332 | 
333 |         resStream.print();
334 |         env.execute("event time demo");
335 | 
336 | 
337 | 
338 |     }
339 | 
340 |     private static class DataSourceForTest4 extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
341 |         private volatile boolean running = true;
342 | 
343 |         @Override
344 |         public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
345 |             Tuple3[] elements = new Tuple3[]{
346 |                     Tuple3.of("a", "1", 1000000050000L),
347 |                     Tuple3.of("a", "2", 1000000054000L),
348 |                     Tuple3.of("a", "3", 1000000079900L),
349 |                     Tuple3.of("a", "4", 1000000115000L),
350 |                     Tuple3.of("b", "5", 1000000100000L),
351 |                     Tuple3.of("b", "6", 1000000108000L)
352 |             };
353 | 
354 |             int count = 0;
355 |             while (running && count < elements.length) {
356 |                 ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2));
357 |                 count++;
358 |                 Thread.sleep(1000);
359 |             }
360 |         }
361 | 
362 |         @Override
363 |         public void cancel() {
364 |             running = false;
365 |         }
366 |     }
367 | 
368 |     private static class DataSourceForTest3 extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
369 |         private volatile boolean running = true;
370 | 
371 |         @Override
372 |         public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
373 |             Tuple3[] elements = new Tuple3[]{
374 |                     Tuple3.of("a", "1", 1000000050000L),
375 |                     Tuple3.of("a", "2", 1000000054000L),
376 |                     Tuple3.of("a", "3", 1000000079900L),
377 |                     Tuple3.of("a", "4", 1000000120000L),
378 |                     Tuple3.of("b", "5", 1000000100001L),
379 |                     Tuple3.of("b", "6", 1000000109000L)
380 |             };
381 | 
382 |             int count = 0;
383 |             while (running && count < elements.length) {
384 |                 ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2));
385 |                 count++;
386 |                 Thread.sleep(1000);
387 |             }
388 |         }
389 | 
390 |         @Override
391 |         public void cancel() {
392 |             running = false;
393 |         }
394 |     }
395 | 
396 |     private static class DataSource extends RichParallelSourceFunction<Tuple3<String, String, Long>> {
397 |         private volatile boolean running = true;
398 | 
399 |         @Override
400 |         public void run(SourceContext<Tuple3<String, String, Long>> ctx) throws InterruptedException {
401 |             Tuple3[] elements = new Tuple3[]{
402 |                     Tuple3.of("a", "1", 1000000050000L),
403 |                     Tuple3.of("a", "2", 1000000054000L),
404 |                     Tuple3.of("a", "3", 1000000079900L),
405 |                     Tuple3.of("a", "4", 1000000120000L),
406 |                     Tuple3.of("b", "5", 1000000111000L),
407 |                     Tuple3.of("b", "6", 1000000089000L)
408 |             };
409 | 
410 |             int count = 0;
411 |             while (running && count < elements.length) {
412 |                 ctx.collect(new Tuple3<>((String) elements[count].f0, (String) elements[count].f1, (Long) elements[count].f2));
413 |                 count++;
414 |                 Thread.sleep(1000);
415 |             }
416 |         }
417 | 
418 |         @Override
419 |         public void cancel() {
420 |             running = false;
421 |         }
422 |     }
423 | 
424 | 
425 |     private static void test1() {
426 |         // 毫秒为单位
427 |         long windowsize = 10000L;
428 | 
429 |         // 毫秒为单位, 滚动窗口 offset = 0L
430 |         long offset = 0L;
431 | 
432 |         SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
433 |         long a1 = 1000000050000L;
434 |         long a2 = 1000000054000L;
435 |         long a3 = 1000000079900L;
436 |         long a4 = 1000000120000L;
437 |         long b5 = 1000000111000L;
438 |         long b6 = 1000000089000L;
439 | 
440 |         System.out.println(a1 + " -> " + format.format(a1) + "\t所属窗口的开始时间是：" +
441 |                 WindowComputeUtil.myGetWindowStartWithOffset(a1, offset, windowsize) + " -> " +
442 |                 format.format( WindowComputeUtil.myGetWindowStartWithOffset(a1, offset, windowsize)));
443 | 
444 |         System.out.println(a2 + " -> " + format.format(a2) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a2, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a2, offset, windowsize)));
445 |         System.out.println(a3 + " -> " + format.format(a3) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a3, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a3, offset, windowsize)));
446 |         System.out.println(a4 + " -> " + format.format(a4) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(a4, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(a4, offset, windowsize)));
447 |         System.out.println(b5 + " -> " + format.format(b5) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(b5, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(b5, offset, windowsize)));
448 |         System.out.println(b6 + " -> " + format.format(b6) + "\t所属窗口的起始时间是: " + WindowComputeUtil.myGetWindowStartWithOffset(b6, offset, windowsize) + " -> " + format.format(WindowComputeUtil.myGetWindowStartWithOffset(b6, offset, windowsize)));
449 | 
450 | 
451 |         System.out.println("-----------------------------------------");
452 | 
453 |     }
454 | 
455 | 
456 | }
457 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/stream/WindowComputeUtil.java:
--------------------------------------------------------------------------------
1 | package com.z.flinkStreamOptimizatiion.stream;
2 | 
3 | public class WindowComputeUtil {
4 |     public static long myGetWindowStartWithOffset(long timestamp, long offset, long windowSize) {
5 |         return timestamp - (timestamp - offset + windowSize) % windowSize;
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/java/com/z/flinkStreamOptimizatiion/test/test1.java:
--------------------------------------------------------------------------------
  1 | package com.z.flinkStreamOptimizatiion.test;
  2 | 
  3 | import org.codehaus.jackson.JsonEncoding;
  4 | import org.codehaus.jackson.JsonGenerator;
  5 | import org.codehaus.jackson.map.ObjectMapper;
  6 | 
  7 | import java.io.IOException;
  8 | import java.text.SimpleDateFormat;
  9 | import java.util.Date;
 10 | import java.util.HashMap;
 11 | import java.util.Map;
 12 | class A {
 13 |     void test(int i) {
 14 |         System.out.println("A " + i);
 15 |     }
 16 | }
 17 | 
 18 | class B extends A {
 19 |     @Override
 20 |     void test(int i) {
 21 |         System.out.println("B " + i);
 22 |         System.out.println("bbbbb");
 23 |     }
 24 | }
 25 | 
 26 | class C extends B {
 27 | 
 28 | }
 29 | public class test1 {
 30 |     public static void main(String[] args) throws Exception {
 31 |         // tm='1908.0', duration='22000.0', count=0.08672727272727272}
 32 |         // tm='55041.0', duration='55000.0', count=0.0
 33 |         // tm='47097.0', duration='46000.0', count=1.0238478260869566
 34 |         // double exp = testExp();
 35 |         // testTimestamp();
 36 |         // testMap2Json();
 37 |         // testpb();
 38 |         // testTypeHandler();
 39 |         testObj();
 40 | 
 41 |     }
 42 | 
 43 |     private static void testObj() {
 44 |         C c = new C();
 45 |         c.test(1);
 46 |     }
 47 | 
 48 |     private static void testTypeHandler() {
 49 |         Map<String, Handler> typeHandler = new HashMap<>();
 50 |         typeHandler.put(AllType.a.name(), new AHandler());
 51 |         typeHandler.put(AllType.b.name(), new BHandler());
 52 | //        typeHandler.put("a", new AHandler());
 53 | //        typeHandler.put("b", new BHandler());
 54 | 
 55 |         typeHandler.get("b").handleSink(1);
 56 |         typeHandler.get("a").handleSink(1);
 57 | 
 58 |     }
 59 | 
 60 |     public enum AllType {
 61 |         a,
 62 |         b;
 63 |     }
 64 | 
 65 |     interface Handler {
 66 |         void handleSink(int data);
 67 |     }
 68 | 
 69 |     static class AHandler implements Handler {
 70 |         @Override
 71 |         public void handleSink(int data) {
 72 |             System.out.println("type: a, value: " + data);
 73 |         }
 74 |     }
 75 | 
 76 |     static class BHandler implements Handler {
 77 | 
 78 |         @Override
 79 |         public void handleSink(int data) {
 80 |             System.out.println("type: b, value: " + data);
 81 |         }
 82 |     }
 83 | 
 84 |     private static void testpb() {
 85 |         SliceActVV.userInfo.Builder usrInfo = SliceActVV.userInfo.newBuilder();
 86 |         usrInfo.setRtUClick(1L);
 87 |         usrInfo.setRtUReveal(2L);
 88 |         usrInfo.setRtURpt(0.5);
 89 |         SliceActVV.userInfo userInfo2 = usrInfo.build();
 90 |         userInfo2 = userInfo2.toBuilder().setRtURpt(0.6).build();
 91 |         System.out.println(userInfo2);
 92 |     }
 93 | 
 94 |     private static void testMap2Json() throws IOException {
 95 |         Map<String,Object> map = new HashMap<String,Object>();
 96 |         map.put("users", 1);
 97 |         map.put("u", 1);
 98 |         ObjectMapper objectMapper = new ObjectMapper();
 99 |         byte[] ob = objectMapper.writeValueAsBytes(map);
100 |         Map<String, Object> map2 = (HashMap<String, Object>)objectMapper.readValue(ob, Map.class);
101 |         System.out.println(map2);
102 | 
103 |     }
104 | 
105 |     private static void testTimestamp() {
106 |         long timestamp = System.currentTimeMillis();
107 |         System.out.println("timestamp: " + timestamp);
108 |         SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
109 |         String date = df.format(new Date());// new Date()为获取当前系统时间，也可使用当前时间戳
110 |         System.out.println("date: " + date);
111 |         System.out.println("timestamp date: " + df.format(timestamp));
112 |     }
113 | 
114 |     private static double testExp() throws Exception{
115 |         double res = 47097.0 / 46000.0;
116 |         System.out.println(res);
117 |         throw new RuntimeException("cao");
118 | 
119 |     }
120 | 
121 | }
122 | 


--------------------------------------------------------------------------------
/src/main/resources/consumer.properties:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # kafka consumer配置
 3 | #############################
 4 | #kafka common
 5 | bootstrap.servers=localhost:9092
 6 | # Kafka Consumer
 7 | group.id=consumer-1
 8 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
 9 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
10 | heartbeat.interval.ms=5000
11 | session.timeout.ms=10000
12 | enable.auto.commit=true
13 | auto.commit.interval.ms=10000
14 | #auto.offset.reset=earliest
15 | auto.offset.reset=latest
16 | connections.max.idle.ms=540000
17 | max.poll.records=10
18 | #client.id=id1
19 | 


--------------------------------------------------------------------------------
/src/main/resources/hbase-site.xml:
--------------------------------------------------------------------------------
 1 |   <configuration>
 2 |     
 3 |    
 4 |       <property>
 5 |           <name>hbase.zookeeper.property.clientPort</name>
 6 |           <value>2181</value>
 7 |       </property>
 8 | 
 9 |       <property>
10 |           <name>hbase.zookeeper.quorum</name>
11 |           <value>localhost：9092</value>
12 |       </property>
13 | 
14 | 
15 |       <property>
16 |           <name>zookeeper.znode.parent</name>
17 |           <value>/hbase-unsecure</value>
18 |       </property>
19 | 
20 | 
21 |   </configuration>


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zengxiaosen/flinkMultiStreamOptimization/4f1df37f8a5053e4f2bac9b06b59e1027faf158f/src/main/resources/log4j.properties


--------------------------------------------------------------------------------
/src/main/resources/producer.properties:
--------------------------------------------------------------------------------
 1 | #############################
 2 | # kafka producer配置
 3 | #############################
 4 | bootstrap.servers=localhost：9092
 5 | # ack方式，all，会等所有的commit最慢的方式
 6 | acks=1
 7 | # 客户端如果发送失败则会重新发送
 8 | retries=5
 9 | # 默认立即发送，这里这是延时毫秒数
10 | linger.ms=10
11 | # 生产者用来缓存等待发送到服务器的消息的内存总字节数,不宜过大
12 | key.serializer=org.apache.kafka.common.serialization.StringSerializer
13 | value.serializer=org.apache.kafka.common.serialization.StringSerializer
14 | # producer会阻塞max.block.ms，超时则抛出异常,此处设为3m
15 | max.block.ms=3
16 | #Producer可以用来缓存数据的内存大小。该值实际为RecordAccumulator类中的BufferPool，
17 | #即Producer所管理的最大内存。如果数据产生速度大于向broker发送的速度，
18 | buffer.memory=3145728
19 | #Producer用于压缩数据的压缩类型，取值：none, gzip, snappy, or lz4
20 | compression.type=snappy
21 | # 当多个消息要发送到相同分区的时，生产者尝试将消息批量打包在一起，以减少请求交互
22 | #Producer可以将发往同一个Partition的数据做成一个Produce Request发送请求，
23 | # 即Batch批处理，以减少请求次数，该值即为每次批处理的大小。
24 | #另外每个Request请求包含多个Batch，每个Batch对应一个Partition，
25 | #且一个Request发送的目的Broker均为这些partition的leader副本。
26 | #若将该值设为0，则不会进行批处理,此处设为1m
27 | batch.size=1048576
28 | maxRatePerPartition=10
29 | send.buffer.bytes=131072
30 | #请求的最大字节数。这也是对最大消息大小的有效限制。注意：server具有自己对消息大小的限制，
31 | #这些大小和这个设置不同。此项设置将会限制producer每次批量发送请求的数目，以防发出巨量的请求。
32 | #此处设置为3m
33 | max.request.size=3145728


--------------------------------------------------------------------------------