├── pom.xml
└── src
└── main
├── java
└── org
│ └── myorg
│ └── quickstart
│ ├── BatchJob.java
│ ├── CEP11
│ ├── AlertEvent.java
│ ├── LogInEvent.java
│ ├── LoginStreamingCEP.java
│ ├── PayEvent.java
│ ├── PayStreamingCEP.java
│ ├── ResultPayEvent.java
│ ├── StreamingCep.java
│ ├── TransactionEvent.java
│ └── TransactionStreamingCEP.java
│ ├── CoreConcepts07
│ └── BatchJob.java
│ ├── DataSkew
│ ├── CountAggregate.java
│ ├── CountProcessFunction.java
│ ├── CountRecord.java
│ └── Record.java
│ ├── DataStreamAPI04
│ ├── MyStreamingSource.java
│ └── StreamingDemo2.java
│ ├── Dim19
│ ├── DimSync.java
│ ├── LRU.java
│ ├── Order.java
│ └── WholeLoad.java
│ ├── Distinct20
│ ├── BitMapDistinct.java
│ ├── BloomFilterDistinct.java
│ ├── HyperLogLogDistinct.java
│ ├── MapStateDistinctFunction.java
│ └── RedisSinkDistinct.java
│ ├── RedisSink27
│ ├── RedisConnector.java
│ ├── RedisSink01.java
│ ├── RedisSink02.java
│ └── SelfRedisSink.java
│ ├── SideOutPut10
│ ├── StreamingDemoFilter.java
│ ├── StreamingDemoSideOutPut.java
│ └── StreamingDemoSplit.java
│ ├── State09
│ └── BatchJob1.java
│ ├── StreamingJob.java
│ ├── Table05
│ ├── Item.java
│ ├── MyStreamingSource.java
│ └── ResultItem.java
│ ├── WordCountSQL.java
│ ├── shizhan01
│ ├── CustomDeSerializationSchema.java
│ ├── KafkaConsumer.java
│ ├── KafkaProducer.java
│ └── MyNoParalleSource.java
│ ├── shizhan02
│ ├── DateUtil.java
│ ├── KafkaConsumer.java
│ ├── KafkaProducer.java
│ ├── MyFlatMapFunction.java
│ ├── MyHbaseSink.java
│ ├── MyProcessAllWindowFunction.java
│ ├── MyProcessWindowFunction.java
│ ├── MyProcessWindowFunctionBitMap.java
│ ├── MyRedisSink.java
│ ├── PVUVCount.java
│ ├── PVUVCountBitMap.java
│ ├── PVUVCountKeyById.java
│ ├── PVUVCountKeyByIdMysqlSink.java
│ ├── UserActionFilter.java
│ ├── UserActionProcessFunction.java
│ └── UserClick.java
│ ├── topn28
│ ├── OrderDetail.java
│ ├── TopN.java
│ └── TopNAllWindowFunction.java
│ ├── watermark08
│ └── WindowWaterMark.java
│ └── windowfunction26
│ ├── CounterTest.java
│ ├── MyAggregateFunction.java
│ └── MyReduceFunction.java
└── resources
└── log4j.properties
/pom.xml:
--------------------------------------------------------------------------------
1 |
19 |
21 | 4.0.0
22 |
23 | org.myorg.quickstart
24 | quickstart
25 | 0.1
26 | jar
27 |
28 | Flink Quickstart Job
29 | http://www.myorganization.org
30 |
31 |
32 | UTF-8
33 | 1.10.0
34 | 1.8
35 | 2.11
36 | ${java.version}
37 | ${java.version}
38 |
39 |
40 |
41 |
42 | apache.snapshots
43 | Apache Development Snapshot Repository
44 | https://repository.apache.org/content/repositories/snapshots/
45 |
46 | false
47 |
48 |
49 | true
50 |
51 |
52 |
53 |
54 |
55 |
56 | org.apache.flink
57 | flink-java
58 | ${flink.version}
59 |
60 |
61 | org.apache.flink
62 | flink-streaming-java_${scala.binary.version}
63 | ${flink.version}
64 |
65 |
66 | org.apache.flink
67 | flink-table-api-java-bridge_2.11
68 | 1.10.0
69 |
70 |
71 |
72 | org.apache.flink
73 | flink-table-planner-blink_2.11
74 | 1.10.0
75 |
76 |
77 | org.apache.flink
78 | flink-jdbc_2.11
79 | 1.10.0
80 |
81 |
82 | mysql
83 | mysql-connector-java
84 | 5.1.46
85 |
86 |
87 | org.apache.flink
88 | flink-table-planner_2.11
89 | 1.10.0
90 |
91 |
92 | org.apache.flink
93 | flink-table-api-scala-bridge_2.11
94 | 1.10.0
95 |
96 |
97 | org.slf4j
98 | slf4j-log4j12
99 | 1.7.7
100 |
101 |
102 | log4j
103 | log4j
104 | 1.2.17
105 |
106 |
107 | org.apache.flink
108 | flink-cep_2.11
109 | 1.10.0
110 |
111 |
112 |
113 | com.alibaba
114 | fastjson
115 | 1.2.71
116 |
117 |
118 | org.hbase
119 | asynchbase
120 | 1.8.2
121 |
122 |
123 |
124 | net.agkn
125 | hll
126 | 1.6.0
127 |
128 |
129 | org.roaringbitmap
130 | RoaringBitmap
131 | 0.8.0
132 |
133 |
134 | org.apache.flink
135 | flink-connector-kafka_2.11
136 | 1.10.0
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 | org.apache.flink
145 | flink-connector-redis_2.11
146 | 1.1.5
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 | org.apache.flink
156 | flink-connector-jdbc_2.11
157 | 1.11.0
158 |
159 |
160 | //habse 依赖
161 |
162 | org.apache.hbase
163 | hbase-client
164 | 1.2.6.1
165 |
166 |
167 |
168 | org.apache.hadoop
169 | hadoop-common
170 | 2.7.5
171 |
172 |
173 | com.google.code.gson
174 | gson
175 | 2.8.5
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 | org.apache.maven.plugins
185 | maven-compiler-plugin
186 | 3.1
187 |
188 | ${java.version}
189 | ${java.version}
190 |
191 |
192 |
193 |
194 |
195 |
196 | org.apache.maven.plugins
197 | maven-shade-plugin
198 | 3.1.1
199 |
200 |
201 |
202 | package
203 |
204 | shade
205 |
206 |
207 |
208 |
209 | org.apache.flink:force-shading
210 | com.google.code.findbugs:jsr305
211 | org.slf4j:*
212 | log4j:*
213 |
214 |
215 |
216 |
217 |
219 | *:*
220 |
221 | META-INF/*.SF
222 | META-INF/*.DSA
223 | META-INF/*.RSA
224 |
225 |
226 |
227 |
228 |
229 | org.myorg.quickstart.StreamingJob
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 | org.eclipse.m2e
244 | lifecycle-mapping
245 | 1.0.0
246 |
247 |
248 |
249 |
250 |
251 | org.apache.maven.plugins
252 | maven-shade-plugin
253 | [3.1.1,)
254 |
255 | shade
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 | org.apache.maven.plugins
265 | maven-compiler-plugin
266 | [3.1,)
267 |
268 | testCompile
269 | compile
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/BatchJob.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart;/*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | import org.apache.flink.api.common.functions.FlatMapFunction;
20 | import org.apache.flink.api.java.DataSet;
21 | import org.apache.flink.api.java.ExecutionEnvironment;
22 | import org.apache.flink.api.java.tuple.Tuple2;
23 | import org.apache.flink.util.Collector;
24 |
25 | public class BatchJob {
26 |
27 |
28 | public static void main(String[] args) throws Exception {
29 |
30 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
31 | env.setParallelism(5);
32 | // get input data
33 | DataSet text = env.fromElements(
34 | "Flink Spark Storm",
35 | "Flink Flink Flink",
36 | "Spark Spark Spark",
37 | "Storm Storm Storm"
38 | );
39 |
40 |
41 | DataSet> counts =
42 | text.flatMap(new LineSplitter())
43 | .groupBy(0)
44 | .sum(1).setParallelism(1);
45 |
46 | counts.printToErr();
47 |
48 | }
49 |
50 |
51 | public static final class LineSplitter implements FlatMapFunction> {
52 |
53 | @Override
54 | public void flatMap(String value, Collector> out) {
55 | // normalize and split the line
56 | String[] tokens = value.toLowerCase().split("\\W+");
57 |
58 | for (String token : tokens) {
59 | if (token.length() > 0) {
60 | out.collect(new Tuple2(token, 1));
61 | }
62 | }
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/AlertEvent.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 |
4 | public class AlertEvent {
5 | private String id;
6 | private String message;
7 |
8 | public String getId() {
9 | return id;
10 | }
11 |
12 | public void setId(String id) {
13 | this.id = id;
14 | }
15 |
16 | public String getMessage() {
17 | return message;
18 | }
19 |
20 | public void setMessage(String message) {
21 | this.message = message;
22 | }
23 |
24 | public AlertEvent(String id, String message) {
25 | this.id = id;
26 | this.message = message;
27 | }
28 |
29 | @Override
30 | public String toString() {
31 | return "AlertEvent{" +
32 | "id='" + id + '\'' +
33 | ", message='" + message + '\'' +
34 | '}';
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/LogInEvent.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 |
4 | public class LogInEvent {
5 |
6 | private Long userId;
7 | private String isSuccess;
8 | private Long timeStamp;
9 |
10 | public Long getUserId() {
11 | return userId;
12 | }
13 |
14 | public void setUserId(Long userId) {
15 | this.userId = userId;
16 | }
17 |
18 | public String getIsSuccess() {
19 | return isSuccess;
20 | }
21 |
22 | public void setIsSuccess(String isSuccess) {
23 | this.isSuccess = isSuccess;
24 | }
25 |
26 | public Long getTimeStamp() {
27 | return timeStamp;
28 | }
29 |
30 | public void setTimeStamp(Long timeStamp) {
31 | this.timeStamp = timeStamp;
32 | }
33 |
34 |
35 | public LogInEvent(Long userId, String isSuccess, Long timeStamp) {
36 | this.userId = userId;
37 | this.isSuccess = isSuccess;
38 | this.timeStamp = timeStamp;
39 | }
40 |
41 | @Override
42 | public String toString() {
43 | return "LogInEvent{" +
44 | "userId=" + userId +
45 | ", isSuccess='" + isSuccess + '\'' +
46 | ", timeStamp=" + timeStamp +
47 | '}';
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/LoginStreamingCEP.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 | import org.apache.flink.api.java.functions.KeySelector;
4 | import org.apache.flink.cep.CEP;
5 | import org.apache.flink.cep.PatternStream;
6 | import org.apache.flink.cep.functions.PatternProcessFunction;
7 | import org.apache.flink.cep.pattern.Pattern;
8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition;
9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
14 | import org.apache.flink.streaming.api.watermark.Watermark;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.util.Collector;
17 |
18 | import javax.annotation.Nullable;
19 | import java.util.List;
20 | import java.util.Map;
21 |
22 | public class LoginStreamingCEP {
23 |
24 | public static void main(String[] args) throws Exception{
25 |
26 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | env.setParallelism(1);
28 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
29 |
30 | DataStream source = env.fromElements(
31 | new LogInEvent(1L, "fail", 1597905234000L),
32 | new LogInEvent(1L, "success", 1597905235000L),
33 | new LogInEvent(2L, "fail", 1597905236000L),
34 | new LogInEvent(2L, "fail", 1597905237000L),
35 | new LogInEvent(2L, "fail", 1597905238000L),
36 | new LogInEvent(3L, "fail", 1597905239000L),
37 | new LogInEvent(3L, "success", 1597905240000L)
38 |
39 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() {
40 | @Override
41 | public Object getKey(LogInEvent value) throws Exception {
42 | return value.getUserId();
43 | }
44 | });
45 |
46 | Pattern pattern = Pattern.begin("start").where(new IterativeCondition() {
47 | @Override
48 | public boolean filter(LogInEvent value, Context ctx) throws Exception {
49 | return value.getIsSuccess().equals("fail");
50 | }
51 | }).next("next").where(new IterativeCondition() {
52 | @Override
53 | public boolean filter(LogInEvent value, Context ctx) throws Exception {
54 | return value.getIsSuccess().equals("fail");
55 | }
56 | }).within(Time.seconds(5));
57 |
58 | PatternStream patternStream = CEP.pattern(source, pattern);
59 |
60 | SingleOutputStreamOperator process = patternStream.process(new PatternProcessFunction() {
61 | @Override
62 | public void processMatch(Map> match, Context ctx, Collector out) throws Exception {
63 |
64 | List start = match.get("start");
65 | List next = match.get("next");
66 | System.err.println("start:" + start + ",next:" + next);
67 |
68 |
69 | out.collect(new AlertEvent(String.valueOf(start.get(0).getUserId()), "出现连续登陆失败"));
70 | }
71 | });
72 |
73 | process.printToErr();
74 | env.execute("execute cep");
75 |
76 | }
77 |
78 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{
79 |
80 | private final long maxOutOfOrderness = 5000L;
81 | private long currentTimeStamp;
82 |
83 | @Nullable
84 | @Override
85 | public Watermark getCurrentWatermark() {
86 | return new Watermark(currentTimeStamp - maxOutOfOrderness);
87 | }
88 |
89 | @Override
90 | public long extractTimestamp(LogInEvent element, long previousElementTimestamp) {
91 |
92 | Long timeStamp = element.getTimeStamp();
93 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp);
94 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness));
95 | return timeStamp;
96 | }
97 | }
98 |
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/PayEvent.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 |
4 | public class PayEvent {
5 |
6 | private Long userId;
7 | private String action;
8 | private Long timeStamp;
9 |
10 | public Long getUserId() {
11 | return userId;
12 | }
13 |
14 | public void setUserId(Long userId) {
15 | this.userId = userId;
16 | }
17 |
18 | public String getAction() {
19 | return action;
20 | }
21 |
22 | public void setAction(String action) {
23 | this.action = action;
24 | }
25 |
26 | public Long getTimeStamp() {
27 | return timeStamp;
28 | }
29 |
30 | public void setTimeStamp(Long timeStamp) {
31 | this.timeStamp = timeStamp;
32 | }
33 |
34 | public PayEvent(Long userId, String action, Long timeStamp) {
35 | this.userId = userId;
36 | this.action = action;
37 | this.timeStamp = timeStamp;
38 | }
39 |
40 | @Override
41 | public String toString() {
42 | return "PayEvent{" +
43 | "userId=" + userId +
44 | ", action='" + action + '\'' +
45 | ", timeStamp=" + timeStamp +
46 | '}';
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/PayStreamingCEP.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 | import org.apache.flink.api.java.functions.KeySelector;
4 | import org.apache.flink.cep.CEP;
5 | import org.apache.flink.cep.PatternSelectFunction;
6 | import org.apache.flink.cep.PatternStream;
7 | import org.apache.flink.cep.PatternTimeoutFunction;
8 | import org.apache.flink.cep.pattern.Pattern;
9 | import org.apache.flink.cep.pattern.conditions.IterativeCondition;
10 | import org.apache.flink.streaming.api.TimeCharacteristic;
11 | import org.apache.flink.streaming.api.datastream.DataStream;
12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
15 | import org.apache.flink.streaming.api.watermark.Watermark;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | import org.apache.flink.util.OutputTag;
18 |
19 | import javax.annotation.Nullable;
20 | import java.util.List;
21 | import java.util.Map;
22 |
23 | public class PayStreamingCEP {
24 |
25 | public static void main(String[] args) throws Exception{
26 |
27 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 | env.setParallelism(1);
29 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
30 |
31 | DataStream source = env.fromElements(
32 | new PayEvent(1L, "create", 1597905234000L),
33 | new PayEvent(1L, "pay", 1597905235000L),
34 | new PayEvent(2L, "create", 1597905236000L),
35 | new PayEvent(2L, "pay", 1597905237000L),
36 | new PayEvent(3L, "create", 1597905239000L)
37 |
38 |
39 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() {
40 | @Override
41 | public Object getKey(PayEvent value) throws Exception {
42 | return value.getUserId();
43 | }
44 | });
45 |
46 | OutputTag orderTimeoutOutput = new OutputTag("orderTimeout") {};
47 |
48 | Pattern pattern = Pattern.
49 | begin("begin")
50 | .where(new IterativeCondition() {
51 | @Override
52 | public boolean filter(PayEvent payEvent, Context context) throws Exception {
53 | return payEvent.getAction().equals("create");
54 | }
55 | })
56 | .next("next")
57 | .where(new IterativeCondition() {
58 | @Override
59 | public boolean filter(PayEvent payEvent, Context context) throws Exception {
60 | return payEvent.getAction().equals("pay");
61 | }
62 | })
63 | .within(Time.seconds(600));
64 |
65 | PatternStream patternStream = CEP.pattern(source, pattern);
66 |
67 | SingleOutputStreamOperator result = patternStream.select(orderTimeoutOutput, new PatternTimeoutFunction() {
68 | @Override
69 | public PayEvent timeout(Map> map, long l) throws Exception {
70 | return map.get("begin").get(0);
71 | }
72 | }, new PatternSelectFunction() {
73 | @Override
74 | public PayEvent select(Map> map) throws Exception {
75 | return map.get("next").get(0);
76 | }
77 | });
78 |
79 |
80 | DataStream sideOutput = result.getSideOutput(orderTimeoutOutput);
81 | sideOutput.printToErr();
82 |
83 | env.execute("execute cep");
84 |
85 | }
86 |
87 |
88 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{
89 |
90 | private final long maxOutOfOrderness = 5000L;
91 | private long currentTimeStamp;
92 |
93 | @Nullable
94 | @Override
95 | public Watermark getCurrentWatermark() {
96 | return new Watermark(currentTimeStamp - maxOutOfOrderness);
97 | }
98 |
99 | @Override
100 | public long extractTimestamp(PayEvent element, long previousElementTimestamp) {
101 |
102 | Long timeStamp = element.getTimeStamp();
103 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp);
104 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness));
105 | return timeStamp;
106 | }
107 | }
108 |
109 |
110 | }
111 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/ResultPayEvent.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 | public class ResultPayEvent {
4 |
5 | private Long userId;
6 | private String type;
7 |
8 | public ResultPayEvent(Long userId, String type) {
9 | this.userId = userId;
10 | this.type = type;
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/StreamingCep.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 | import org.apache.flink.api.java.tuple.Tuple3;
4 | import org.apache.flink.cep.CEP;
5 | import org.apache.flink.cep.PatternSelectFunction;
6 | import org.apache.flink.cep.PatternStream;
7 | import org.apache.flink.cep.PatternTimeoutFunction;
8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition;
9 | import org.apache.flink.cep.pattern.conditions.SimpleCondition;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
12 | import org.apache.flink.streaming.api.datastream.KeyedStream;
13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
15 | import org.apache.flink.cep.pattern.Pattern;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | import org.apache.flink.util.OutputTag;
18 |
19 | import java.util.List;
20 | import java.util.Map;
21 |
22 | public class StreamingCep {
23 |
24 | public static void main(String[] args) throws Exception{
25 |
26 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | env.setParallelism(1);
28 |
29 | DataStreamSource source = env.fromElements(
30 | //浏览记录
31 | Tuple3.of("Marry", "外套", 1L),
32 |
33 | Tuple3.of("Marry", "帽子",1L),
34 | Tuple3.of("Marry", "帽子",2L),
35 | Tuple3.of("Marry", "帽子",3L),
36 |
37 | Tuple3.of("Ming", "衣服",1L),
38 |
39 | Tuple3.of("Marry", "鞋子",1L),
40 | Tuple3.of("Marry", "鞋子",2L),
41 |
42 | Tuple3.of("LiLei", "帽子",1L),
43 | Tuple3.of("LiLei", "帽子",2L),
44 | Tuple3.of("LiLei", "帽子",3L)
45 | );
46 | //定义Pattern,寻找连续搜索帽子的用户
47 | Pattern, Tuple3> pattern = Pattern
48 | .>begin("start")
49 | .where(new SimpleCondition>() {
50 | @Override
51 | public boolean filter(Tuple3 value) throws Exception {
52 | return value.f1.equals("帽子");
53 | }
54 | }) //.timesOrMore(3);
55 | .next("middle")
56 | .where(new SimpleCondition>() {
57 | @Override
58 | public boolean filter(Tuple3 value) throws Exception {
59 | return value.f1.equals("帽子");
60 | }
61 | });
62 |
63 | // Pattern.begin("start").where(new IterativeCondition() {
64 | // @Override
65 | // public boolean filter(LogInEvent value, Context ctx) throws Exception {
66 | // return value.getIsSuccess().equals("fail");
67 | // }
68 | // }).next("next").where(new IterativeCondition() {
69 | // @Override
70 | // public boolean filter(LogInEvent value, Context ctx) throws Exception {
71 | // return value.getIsSuccess().equals("fail");
72 | // }
73 | // }).within(Time.seconds(5));
74 |
75 |
76 |
77 | KeyedStream keyedStream = source.keyBy(0);
78 | PatternStream patternStream = CEP.pattern(keyedStream, pattern);
79 |
80 | SingleOutputStreamOperator matchStream = patternStream.select(new PatternSelectFunction, String>() {
81 | @Override
82 | public String select(Map>> pattern) throws Exception {
83 | List> middle = pattern.get("middle");
84 | return middle.get(0).f0 + ":" + middle.get(0).f2 + ":" + "连续搜索两次帽子!";
85 | }
86 | });
87 |
88 | //////////////
89 |
90 | // Pattern.
91 | // begin("begin")
92 | // .where(new IterativeCondition() {
93 | // @Override
94 | // public boolean filter(PayEvent payEvent, Context context) throws Exception {
95 | // return payEvent.getAction().equals("create");
96 | // }
97 | // })
98 | // .next("next")
99 | // .where(new IterativeCondition() {
100 | // @Override
101 | // public boolean filter(PayEvent payEvent, Context context) throws Exception {
102 | // return payEvent.getAction().equals("pay");
103 | // }
104 | // })
105 | // .within(Time.seconds(600));
106 | // OutputTag orderTiemoutOutput = new OutputTag("orderTimeout") {};
107 | //
108 | // SingleOutputStreamOperator selectResult = patternStream.select(orderTiemoutOutput,
109 | // (PatternTimeoutFunction) (map, l) -> new ResultPayEvent(map.get("begin").get(0).getUserId(), "timeout"),
110 | // (PatternSelectFunction) map -> new ResultPayEvent(map.get("next").get(0).getUserId(), "success")
111 | // );
112 | // DataStream timeOutSideOutputStream = selectResult.getSideOutput(orderTiemoutOutput);
113 |
114 | ///////////
115 |
116 | Pattern.begin("start").where(
117 | new SimpleCondition() {
118 | @Override
119 | public boolean filter(TransactionEvent transactionEvent) {
120 | return transactionEvent.getAmount() > 0;
121 | }
122 | }
123 | ).timesOrMore(5)
124 | .within(Time.hours(24));
125 |
126 |
127 |
128 |
129 |
130 |
131 | ////////////
132 |
133 |
134 |
135 | matchStream.printToErr();
136 | env.execute("execute cep");
137 |
138 | }
139 |
140 |
141 | // class ResultPayEvent{
142 | // private Long userId;
143 | // private String type;
144 | //
145 | // public ResultPayEvent(Long userId, String type) {
146 | // this.userId = userId;
147 | // this.type = type;
148 | // }
149 | // }
150 |
151 | }//
152 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/TransactionEvent.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 |
4 | public class TransactionEvent {
5 |
6 |
7 | private String accout;
8 | private Double amount;
9 | private Long timeStamp;
10 |
11 | public String getAccout() {
12 | return accout;
13 | }
14 |
15 | public void setAccout(String accout) {
16 | this.accout = accout;
17 | }
18 |
19 | public Double getAmount() {
20 | return amount;
21 | }
22 |
23 | public void setAmount(Double amount) {
24 | this.amount = amount;
25 | }
26 |
27 | public Long getTimeStamp() {
28 | return timeStamp;
29 | }
30 |
31 | public void setTimeStamp(Long timeStamp) {
32 | this.timeStamp = timeStamp;
33 | }
34 |
35 | public TransactionEvent(String accout, Double amount, Long timeStamp) {
36 | this.accout = accout;
37 | this.amount = amount;
38 | this.timeStamp = timeStamp;
39 | }
40 |
41 | @Override
42 | public String toString() {
43 | return "TransactionEvent{" +
44 | "accout='" + accout + '\'' +
45 | ", amount=" + amount +
46 | ", timeStamp=" + timeStamp +
47 | '}';
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CEP11/TransactionStreamingCEP.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.CEP11;
2 |
3 | import org.apache.flink.api.java.functions.KeySelector;
4 | import org.apache.flink.cep.CEP;
5 | import org.apache.flink.cep.PatternStream;
6 | import org.apache.flink.cep.functions.PatternProcessFunction;
7 | import org.apache.flink.cep.pattern.Pattern;
8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition;
9 | import org.apache.flink.cep.pattern.conditions.SimpleCondition;
10 | import org.apache.flink.streaming.api.TimeCharacteristic;
11 | import org.apache.flink.streaming.api.datastream.DataStream;
12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
15 | import org.apache.flink.streaming.api.watermark.Watermark;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | import org.apache.flink.util.Collector;
18 |
19 | import javax.annotation.Nullable;
20 | import java.util.List;
21 | import java.util.Map;
22 |
23 | public class TransactionStreamingCEP {
24 |
25 | public static void main(String[] args) throws Exception{
26 |
27 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 | env.setParallelism(1);
29 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
30 |
31 | DataStream source = env.fromElements(
32 | new TransactionEvent("100XX", 0.0D, 1597905234000L),
33 | new TransactionEvent("100XX", 100.0D, 1597905235000L),
34 | new TransactionEvent("100XX", 200.0D, 1597905236000L),
35 | new TransactionEvent("100XX", 300.0D, 1597905237000L),
36 | new TransactionEvent("100XX", 400.0D, 1597905238000L),
37 | new TransactionEvent("100XX", 500.0D, 1597905239000L),
38 | new TransactionEvent("101XX", 0.0D, 1597905240000L),
39 | new TransactionEvent("101XX", 100.0D, 1597905241000L)
40 |
41 |
42 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() {
43 | @Override
44 | public Object getKey(TransactionEvent value) throws Exception {
45 | return value.getAccout();
46 | }
47 | });
48 |
49 | Pattern pattern = Pattern.begin("start").where(
50 | new SimpleCondition() {
51 | @Override
52 | public boolean filter(TransactionEvent transactionEvent) {
53 | return transactionEvent.getAmount() > 0;
54 | }
55 | }
56 | ).timesOrMore(5)
57 | .within(Time.hours(24));
58 |
59 | PatternStream patternStream = CEP.pattern(source, pattern);
60 |
61 | SingleOutputStreamOperator process = patternStream.process(new PatternProcessFunction() {
62 | @Override
63 | public void processMatch(Map> match, Context ctx, Collector out) throws Exception {
64 |
65 | List start = match.get("start");
66 | List next = match.get("next");
67 | System.err.println("start:" + start + ",next:" + next);
68 |
69 | out.collect(new AlertEvent(start.get(0).getAccout(), "连续有效交易!"));
70 | }
71 | });
72 |
73 | process.printToErr();
74 | env.execute("execute cep");
75 |
76 | }
77 |
78 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{
79 |
80 | private final long maxOutOfOrderness = 5000L;
81 | private long currentTimeStamp;
82 |
83 | @Nullable
84 | @Override
85 | public Watermark getCurrentWatermark() {
86 | return new Watermark(currentTimeStamp - maxOutOfOrderness);
87 | }
88 |
89 | @Override
90 | public long extractTimestamp(TransactionEvent element, long previousElementTimestamp) {
91 |
92 | Long timeStamp = element.getTimeStamp();
93 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp);
94 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness));
95 | return timeStamp;
96 | }
97 | }
98 |
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/CoreConcepts07/BatchJob.java:
--------------------------------------------------------------------------------
1 |
2 | package org.myorg.quickstart.CoreConcepts07;
3 |
4 | import org.apache.commons.io.FileUtils;
5 | import org.apache.flink.api.common.functions.RichMapFunction;
6 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
7 | import org.apache.flink.api.common.time.Time;
8 | import org.apache.flink.api.java.DataSet;
9 | import org.apache.flink.api.java.ExecutionEnvironment;
10 | import org.apache.flink.api.java.operators.DataSource;
11 | import org.apache.flink.configuration.Configuration;
12 | import org.apache.flink.configuration.RestartStrategyOptions;
13 | import org.apache.flink.runtime.executiongraph.restart.RestartStrategy;
14 |
15 | import java.io.File;
16 | import java.util.ArrayList;
17 | import java.util.List;
18 | import java.util.concurrent.TimeUnit;
19 |
20 | public class BatchJob {
21 |
22 |
23 | public static void main(String[] args) throws Exception {
24 |
25 | // set up the execution environment
26 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
27 | // env.setRestartStrategy(RestartStrategies.noRestart());
28 | // env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
29 | // 3, // 尝试重启的次数
30 | // Time.of(10, TimeUnit.SECONDS) // 延时
31 | // ));
32 |
33 | env.setRestartStrategy(RestartStrategies.failureRateRestart(
34 | 3, // 每个时间间隔的最大故障次数
35 | Time.of(5, TimeUnit.MINUTES), // 测量故障率的时间间隔
36 | Time.of(5, TimeUnit.SECONDS) // 延时
37 | ));
38 |
39 |
40 |
41 |
42 |
43 |
44 | env.registerCachedFile("/Users/wangchangye/WorkSpace/quickstart/distributedcache.txt", "distributedCache");
45 | //1:注册一个文件,可以使用hdfs上的文件 也可以是本地文件进行测试
46 | DataSource data = env.fromElements("Linea", "Lineb", "Linec", "Lined");
47 |
48 | DataSet result = data.map(new RichMapFunction() {
49 | private ArrayList dataList = new ArrayList();
50 |
51 | @Override
52 | public void open(Configuration parameters) throws Exception {
53 | super.open(parameters);
54 | //2:使用文件
55 | File myFile = getRuntimeContext().getDistributedCache().getFile("distributedCache");
56 | List lines = FileUtils.readLines(myFile);
57 | for (String line : lines) {
58 | this.dataList.add(line);
59 | System.err.println("分布式缓存为:" + line);
60 | }
61 | }
62 |
63 | @Override
64 | public String map(String value) throws Exception {
65 | //在这里就可以使用dataList
66 | System.err.println("使用datalist:" + dataList + "------------" +value);
67 | //业务逻辑
68 | return dataList +":" + value;
69 | }
70 | });
71 |
72 | result.printToErr();
73 | }
74 | }
75 |
76 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/DataSkew/CountAggregate.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.DataSkew;
2 |
3 | import org.apache.flink.api.common.functions.AggregateFunction;
4 |
5 |
6 |
7 | public class CountAggregate implements AggregateFunction {
8 |
9 |
10 | @Override
11 | public CountRecord createAccumulator() {
12 | return new CountRecord(null, 0L);
13 | }
14 |
15 | @Override
16 | public CountRecord add(Record value, CountRecord accumulator) {
17 |
18 | if(accumulator.getKey() == null){
19 | accumulator.setKey(value.key);
20 | }
21 | accumulator.setCount(value.count);
22 | return accumulator;
23 | }
24 |
25 | @Override
26 | public CountRecord getResult(CountRecord accumulator) {
27 | return accumulator;
28 | }
29 |
30 | @Override
31 | public CountRecord merge(CountRecord a, CountRecord b) {
32 | return new CountRecord(a.getKey(),a.getCount()+b.getCount()) ;
33 | }
34 | }//
35 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/DataSkew/CountProcessFunction.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.DataSkew;
2 |
3 | import org.apache.flink.api.common.state.ValueState;
4 | import org.apache.flink.api.common.state.ValueStateDescriptor;
5 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
6 | import org.apache.flink.util.Collector;
7 |
8 |
9 | public class CountProcessFunction extends KeyedProcessFunction {
10 |
11 | private ValueState state = this.getRuntimeContext().getState(new ValueStateDescriptor("count",Long.class));
12 | @Override
13 | public void processElement(CountRecord value, Context ctx, Collector out) throws Exception {
14 |
15 | if(state.value()==0){
16 | state.update(value.count);
17 | ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + 1000L * 5);
18 | }else{
19 | state.update(state.value() + value.count);
20 | }
21 | }
22 |
23 | @Override
24 | public void onTimer(long timestamp, OnTimerContext ctx, Collector out) throws Exception {
25 |
26 | //这里可以做业务操作,例如每5分钟将统计结果发送出去
27 | //out.collect(...);
28 | //清除状态
29 | state.clear();
30 |
31 | //注册新的定时器
32 | ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + 1000L * 5);
33 |
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/DataSkew/CountRecord.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.DataSkew;
2 |
3 |
4 | public class CountRecord {
5 | String key;
6 | Long count;
7 |
8 | public CountRecord(String key, Long count) {
9 | this.key = key;
10 | this.count = count;
11 | }
12 |
13 | public String getKey() {
14 | return key;
15 | }
16 |
17 | public void setKey(String key) {
18 | this.key = key;
19 | }
20 |
21 | public Long getCount() {
22 | return count;
23 | }
24 |
25 | public void setCount(Long count) {
26 | this.count = count;
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/DataSkew/Record.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.DataSkew;
2 |
3 |
4 | public class Record {
5 | String key;
6 | Long count;
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/org/myorg/quickstart/DataStreamAPI04/MyStreamingSource.java:
--------------------------------------------------------------------------------
1 | package org.myorg.quickstart.DataStreamAPI04;
2 |
3 |
4 | import org.apache.flink.api.common.functions.FilterFunction;
5 | import org.apache.flink.api.common.functions.FlatMapFunction;
6 | import org.apache.flink.api.common.functions.MapFunction;
7 | import org.apache.flink.api.common.functions.RichMapFunction;
8 | import org.apache.flink.streaming.api.datastream.DataStream;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
13 | import org.apache.flink.util.Collector;
14 |
15 | import java.util.Random;
16 |
17 | public class MyStreamingSource implements SourceFunction {
18 |
19 | private boolean isRunning = true;
20 |
21 | /**
22 | * 重写run方法产生一个源源不断的数据发送源
23 | * @param ctx
24 | * @throws Exception
25 | */
26 | @Override
27 | public void run(SourceContext- ctx) throws Exception {
28 | while(isRunning){
29 | Item item = generateItem();
30 | ctx.collect(item);
31 |
32 | //每秒产生一条数据
33 | Thread.sleep(1000);
34 | }
35 | }
36 | @Override
37 | public void cancel() {
38 | isRunning = false;
39 | }
40 |
41 | //随机产生一条商品数据
42 | private Item generateItem(){
43 | int i = new Random().nextInt(100);
44 |
45 | Item item = new Item();
46 | item.setName("name" + i);
47 | item.setId(i);
48 | return item;
49 | }
50 |
51 | class Item{
52 | private String name;
53 | private Integer id;
54 |
55 | Item() {
56 | }
57 |
58 | public String getName() {
59 | return name;
60 | }
61 |
62 | public void setName(String name) {
63 | this.name = name;
64 | }
65 |
66 | public Integer getId() {
67 | return id;
68 | }
69 |
70 | public void setId(Integer id) {
71 | this.id = id;
72 | }
73 |
74 | @Override
75 | public String toString() {
76 | return "Item{" +
77 | "name='" + name + '\'' +
78 | ", id=" + id +
79 | '}';
80 | }
81 | }
82 | }
83 |
84 |
85 | class StreamingDemo {
86 | public static void main(String[] args) throws Exception {
87 |
88 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
89 | //获取数据源
90 | DataStreamSource items = env.addSource(new MyStreamingSource()).setParallelism(1);
91 | //Map
92 | //SingleOutputStreamOperator