├── pom.xml └── src └── main ├── java └── org │ └── myorg │ └── quickstart │ ├── BatchJob.java │ ├── CEP11 │ ├── AlertEvent.java │ ├── LogInEvent.java │ ├── LoginStreamingCEP.java │ ├── PayEvent.java │ ├── PayStreamingCEP.java │ ├── ResultPayEvent.java │ ├── StreamingCep.java │ ├── TransactionEvent.java │ └── TransactionStreamingCEP.java │ ├── CoreConcepts07 │ └── BatchJob.java │ ├── DataSkew │ ├── CountAggregate.java │ ├── CountProcessFunction.java │ ├── CountRecord.java │ └── Record.java │ ├── DataStreamAPI04 │ ├── MyStreamingSource.java │ └── StreamingDemo2.java │ ├── Dim19 │ ├── DimSync.java │ ├── LRU.java │ ├── Order.java │ └── WholeLoad.java │ ├── Distinct20 │ ├── BitMapDistinct.java │ ├── BloomFilterDistinct.java │ ├── HyperLogLogDistinct.java │ ├── MapStateDistinctFunction.java │ └── RedisSinkDistinct.java │ ├── RedisSink27 │ ├── RedisConnector.java │ ├── RedisSink01.java │ ├── RedisSink02.java │ └── SelfRedisSink.java │ ├── SideOutPut10 │ ├── StreamingDemoFilter.java │ ├── StreamingDemoSideOutPut.java │ └── StreamingDemoSplit.java │ ├── State09 │ └── BatchJob1.java │ ├── StreamingJob.java │ ├── Table05 │ ├── Item.java │ ├── MyStreamingSource.java │ └── ResultItem.java │ ├── WordCountSQL.java │ ├── shizhan01 │ ├── CustomDeSerializationSchema.java │ ├── KafkaConsumer.java │ ├── KafkaProducer.java │ └── MyNoParalleSource.java │ ├── shizhan02 │ ├── DateUtil.java │ ├── KafkaConsumer.java │ ├── KafkaProducer.java │ ├── MyFlatMapFunction.java │ ├── MyHbaseSink.java │ ├── MyProcessAllWindowFunction.java │ ├── MyProcessWindowFunction.java │ ├── MyProcessWindowFunctionBitMap.java │ ├── MyRedisSink.java │ ├── PVUVCount.java │ ├── PVUVCountBitMap.java │ ├── PVUVCountKeyById.java │ ├── PVUVCountKeyByIdMysqlSink.java │ ├── UserActionFilter.java │ ├── UserActionProcessFunction.java │ └── UserClick.java │ ├── topn28 │ ├── OrderDetail.java │ ├── TopN.java │ └── TopNAllWindowFunction.java │ ├── watermark08 │ └── WindowWaterMark.java │ └── windowfunction26 │ ├── CounterTest.java │ ├── MyAggregateFunction.java │ └── MyReduceFunction.java └── resources └── log4j.properties /pom.xml: -------------------------------------------------------------------------------- 1 | 19 | 21 | 4.0.0 22 | 23 | org.myorg.quickstart 24 | quickstart 25 | 0.1 26 | jar 27 | 28 | Flink Quickstart Job 29 | http://www.myorganization.org 30 | 31 | 32 | UTF-8 33 | 1.10.0 34 | 1.8 35 | 2.11 36 | ${java.version} 37 | ${java.version} 38 | 39 | 40 | 41 | 42 | apache.snapshots 43 | Apache Development Snapshot Repository 44 | https://repository.apache.org/content/repositories/snapshots/ 45 | 46 | false 47 | 48 | 49 | true 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-java 58 | ${flink.version} 59 | 60 | 61 | org.apache.flink 62 | flink-streaming-java_${scala.binary.version} 63 | ${flink.version} 64 | 65 | 66 | org.apache.flink 67 | flink-table-api-java-bridge_2.11 68 | 1.10.0 69 | 70 | 71 | 72 | org.apache.flink 73 | flink-table-planner-blink_2.11 74 | 1.10.0 75 | 76 | 77 | org.apache.flink 78 | flink-jdbc_2.11 79 | 1.10.0 80 | 81 | 82 | mysql 83 | mysql-connector-java 84 | 5.1.46 85 | 86 | 87 | org.apache.flink 88 | flink-table-planner_2.11 89 | 1.10.0 90 | 91 | 92 | org.apache.flink 93 | flink-table-api-scala-bridge_2.11 94 | 1.10.0 95 | 96 | 97 | org.slf4j 98 | slf4j-log4j12 99 | 1.7.7 100 | 101 | 102 | log4j 103 | log4j 104 | 1.2.17 105 | 106 | 107 | org.apache.flink 108 | flink-cep_2.11 109 | 1.10.0 110 | 111 | 112 | 113 | com.alibaba 114 | fastjson 115 | 1.2.71 116 | 117 | 118 | org.hbase 119 | asynchbase 120 | 1.8.2 121 | 122 | 123 | 124 | net.agkn 125 | hll 126 | 1.6.0 127 | 128 | 129 | org.roaringbitmap 130 | RoaringBitmap 131 | 0.8.0 132 | 133 | 134 | org.apache.flink 135 | flink-connector-kafka_2.11 136 | 1.10.0 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | org.apache.flink 145 | flink-connector-redis_2.11 146 | 1.1.5 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | org.apache.flink 156 | flink-connector-jdbc_2.11 157 | 1.11.0 158 | 159 | 160 | //habse 依赖 161 | 162 | org.apache.hbase 163 | hbase-client 164 | 1.2.6.1 165 | 166 | 167 | 168 | org.apache.hadoop 169 | hadoop-common 170 | 2.7.5 171 | 172 | 173 | com.google.code.gson 174 | gson 175 | 2.8.5 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | org.apache.maven.plugins 185 | maven-compiler-plugin 186 | 3.1 187 | 188 | ${java.version} 189 | ${java.version} 190 | 191 | 192 | 193 | 194 | 195 | 196 | org.apache.maven.plugins 197 | maven-shade-plugin 198 | 3.1.1 199 | 200 | 201 | 202 | package 203 | 204 | shade 205 | 206 | 207 | 208 | 209 | org.apache.flink:force-shading 210 | com.google.code.findbugs:jsr305 211 | org.slf4j:* 212 | log4j:* 213 | 214 | 215 | 216 | 217 | 219 | *:* 220 | 221 | META-INF/*.SF 222 | META-INF/*.DSA 223 | META-INF/*.RSA 224 | 225 | 226 | 227 | 228 | 229 | org.myorg.quickstart.StreamingJob 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | org.eclipse.m2e 244 | lifecycle-mapping 245 | 1.0.0 246 | 247 | 248 | 249 | 250 | 251 | org.apache.maven.plugins 252 | maven-shade-plugin 253 | [3.1.1,) 254 | 255 | shade 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | org.apache.maven.plugins 265 | maven-compiler-plugin 266 | [3.1,) 267 | 268 | testCompile 269 | compile 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/BatchJob.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart;/* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | import org.apache.flink.api.common.functions.FlatMapFunction; 20 | import org.apache.flink.api.java.DataSet; 21 | import org.apache.flink.api.java.ExecutionEnvironment; 22 | import org.apache.flink.api.java.tuple.Tuple2; 23 | import org.apache.flink.util.Collector; 24 | 25 | public class BatchJob { 26 | 27 | 28 | public static void main(String[] args) throws Exception { 29 | 30 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 31 | env.setParallelism(5); 32 | // get input data 33 | DataSet text = env.fromElements( 34 | "Flink Spark Storm", 35 | "Flink Flink Flink", 36 | "Spark Spark Spark", 37 | "Storm Storm Storm" 38 | ); 39 | 40 | 41 | DataSet> counts = 42 | text.flatMap(new LineSplitter()) 43 | .groupBy(0) 44 | .sum(1).setParallelism(1); 45 | 46 | counts.printToErr(); 47 | 48 | } 49 | 50 | 51 | public static final class LineSplitter implements FlatMapFunction> { 52 | 53 | @Override 54 | public void flatMap(String value, Collector> out) { 55 | // normalize and split the line 56 | String[] tokens = value.toLowerCase().split("\\W+"); 57 | 58 | for (String token : tokens) { 59 | if (token.length() > 0) { 60 | out.collect(new Tuple2(token, 1)); 61 | } 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/AlertEvent.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | 4 | public class AlertEvent { 5 | private String id; 6 | private String message; 7 | 8 | public String getId() { 9 | return id; 10 | } 11 | 12 | public void setId(String id) { 13 | this.id = id; 14 | } 15 | 16 | public String getMessage() { 17 | return message; 18 | } 19 | 20 | public void setMessage(String message) { 21 | this.message = message; 22 | } 23 | 24 | public AlertEvent(String id, String message) { 25 | this.id = id; 26 | this.message = message; 27 | } 28 | 29 | @Override 30 | public String toString() { 31 | return "AlertEvent{" + 32 | "id='" + id + '\'' + 33 | ", message='" + message + '\'' + 34 | '}'; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/LogInEvent.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | 4 | public class LogInEvent { 5 | 6 | private Long userId; 7 | private String isSuccess; 8 | private Long timeStamp; 9 | 10 | public Long getUserId() { 11 | return userId; 12 | } 13 | 14 | public void setUserId(Long userId) { 15 | this.userId = userId; 16 | } 17 | 18 | public String getIsSuccess() { 19 | return isSuccess; 20 | } 21 | 22 | public void setIsSuccess(String isSuccess) { 23 | this.isSuccess = isSuccess; 24 | } 25 | 26 | public Long getTimeStamp() { 27 | return timeStamp; 28 | } 29 | 30 | public void setTimeStamp(Long timeStamp) { 31 | this.timeStamp = timeStamp; 32 | } 33 | 34 | 35 | public LogInEvent(Long userId, String isSuccess, Long timeStamp) { 36 | this.userId = userId; 37 | this.isSuccess = isSuccess; 38 | this.timeStamp = timeStamp; 39 | } 40 | 41 | @Override 42 | public String toString() { 43 | return "LogInEvent{" + 44 | "userId=" + userId + 45 | ", isSuccess='" + isSuccess + '\'' + 46 | ", timeStamp=" + timeStamp + 47 | '}'; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/LoginStreamingCEP.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | import org.apache.flink.cep.CEP; 5 | import org.apache.flink.cep.PatternStream; 6 | import org.apache.flink.cep.functions.PatternProcessFunction; 7 | import org.apache.flink.cep.pattern.Pattern; 8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 14 | import org.apache.flink.streaming.api.watermark.Watermark; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.util.Collector; 17 | 18 | import javax.annotation.Nullable; 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | public class LoginStreamingCEP { 23 | 24 | public static void main(String[] args) throws Exception{ 25 | 26 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setParallelism(1); 28 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 29 | 30 | DataStream source = env.fromElements( 31 | new LogInEvent(1L, "fail", 1597905234000L), 32 | new LogInEvent(1L, "success", 1597905235000L), 33 | new LogInEvent(2L, "fail", 1597905236000L), 34 | new LogInEvent(2L, "fail", 1597905237000L), 35 | new LogInEvent(2L, "fail", 1597905238000L), 36 | new LogInEvent(3L, "fail", 1597905239000L), 37 | new LogInEvent(3L, "success", 1597905240000L) 38 | 39 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() { 40 | @Override 41 | public Object getKey(LogInEvent value) throws Exception { 42 | return value.getUserId(); 43 | } 44 | }); 45 | 46 | Pattern pattern = Pattern.begin("start").where(new IterativeCondition() { 47 | @Override 48 | public boolean filter(LogInEvent value, Context ctx) throws Exception { 49 | return value.getIsSuccess().equals("fail"); 50 | } 51 | }).next("next").where(new IterativeCondition() { 52 | @Override 53 | public boolean filter(LogInEvent value, Context ctx) throws Exception { 54 | return value.getIsSuccess().equals("fail"); 55 | } 56 | }).within(Time.seconds(5)); 57 | 58 | PatternStream patternStream = CEP.pattern(source, pattern); 59 | 60 | SingleOutputStreamOperator process = patternStream.process(new PatternProcessFunction() { 61 | @Override 62 | public void processMatch(Map> match, Context ctx, Collector out) throws Exception { 63 | 64 | List start = match.get("start"); 65 | List next = match.get("next"); 66 | System.err.println("start:" + start + ",next:" + next); 67 | 68 | 69 | out.collect(new AlertEvent(String.valueOf(start.get(0).getUserId()), "出现连续登陆失败")); 70 | } 71 | }); 72 | 73 | process.printToErr(); 74 | env.execute("execute cep"); 75 | 76 | } 77 | 78 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{ 79 | 80 | private final long maxOutOfOrderness = 5000L; 81 | private long currentTimeStamp; 82 | 83 | @Nullable 84 | @Override 85 | public Watermark getCurrentWatermark() { 86 | return new Watermark(currentTimeStamp - maxOutOfOrderness); 87 | } 88 | 89 | @Override 90 | public long extractTimestamp(LogInEvent element, long previousElementTimestamp) { 91 | 92 | Long timeStamp = element.getTimeStamp(); 93 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp); 94 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness)); 95 | return timeStamp; 96 | } 97 | } 98 | 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/PayEvent.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | 4 | public class PayEvent { 5 | 6 | private Long userId; 7 | private String action; 8 | private Long timeStamp; 9 | 10 | public Long getUserId() { 11 | return userId; 12 | } 13 | 14 | public void setUserId(Long userId) { 15 | this.userId = userId; 16 | } 17 | 18 | public String getAction() { 19 | return action; 20 | } 21 | 22 | public void setAction(String action) { 23 | this.action = action; 24 | } 25 | 26 | public Long getTimeStamp() { 27 | return timeStamp; 28 | } 29 | 30 | public void setTimeStamp(Long timeStamp) { 31 | this.timeStamp = timeStamp; 32 | } 33 | 34 | public PayEvent(Long userId, String action, Long timeStamp) { 35 | this.userId = userId; 36 | this.action = action; 37 | this.timeStamp = timeStamp; 38 | } 39 | 40 | @Override 41 | public String toString() { 42 | return "PayEvent{" + 43 | "userId=" + userId + 44 | ", action='" + action + '\'' + 45 | ", timeStamp=" + timeStamp + 46 | '}'; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/PayStreamingCEP.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | import org.apache.flink.cep.CEP; 5 | import org.apache.flink.cep.PatternSelectFunction; 6 | import org.apache.flink.cep.PatternStream; 7 | import org.apache.flink.cep.PatternTimeoutFunction; 8 | import org.apache.flink.cep.pattern.Pattern; 9 | import org.apache.flink.cep.pattern.conditions.IterativeCondition; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 15 | import org.apache.flink.streaming.api.watermark.Watermark; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.util.OutputTag; 18 | 19 | import javax.annotation.Nullable; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | public class PayStreamingCEP { 24 | 25 | public static void main(String[] args) throws Exception{ 26 | 27 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | env.setParallelism(1); 29 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 30 | 31 | DataStream source = env.fromElements( 32 | new PayEvent(1L, "create", 1597905234000L), 33 | new PayEvent(1L, "pay", 1597905235000L), 34 | new PayEvent(2L, "create", 1597905236000L), 35 | new PayEvent(2L, "pay", 1597905237000L), 36 | new PayEvent(3L, "create", 1597905239000L) 37 | 38 | 39 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() { 40 | @Override 41 | public Object getKey(PayEvent value) throws Exception { 42 | return value.getUserId(); 43 | } 44 | }); 45 | 46 | OutputTag orderTimeoutOutput = new OutputTag("orderTimeout") {}; 47 | 48 | Pattern pattern = Pattern. 49 | begin("begin") 50 | .where(new IterativeCondition() { 51 | @Override 52 | public boolean filter(PayEvent payEvent, Context context) throws Exception { 53 | return payEvent.getAction().equals("create"); 54 | } 55 | }) 56 | .next("next") 57 | .where(new IterativeCondition() { 58 | @Override 59 | public boolean filter(PayEvent payEvent, Context context) throws Exception { 60 | return payEvent.getAction().equals("pay"); 61 | } 62 | }) 63 | .within(Time.seconds(600)); 64 | 65 | PatternStream patternStream = CEP.pattern(source, pattern); 66 | 67 | SingleOutputStreamOperator result = patternStream.select(orderTimeoutOutput, new PatternTimeoutFunction() { 68 | @Override 69 | public PayEvent timeout(Map> map, long l) throws Exception { 70 | return map.get("begin").get(0); 71 | } 72 | }, new PatternSelectFunction() { 73 | @Override 74 | public PayEvent select(Map> map) throws Exception { 75 | return map.get("next").get(0); 76 | } 77 | }); 78 | 79 | 80 | DataStream sideOutput = result.getSideOutput(orderTimeoutOutput); 81 | sideOutput.printToErr(); 82 | 83 | env.execute("execute cep"); 84 | 85 | } 86 | 87 | 88 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{ 89 | 90 | private final long maxOutOfOrderness = 5000L; 91 | private long currentTimeStamp; 92 | 93 | @Nullable 94 | @Override 95 | public Watermark getCurrentWatermark() { 96 | return new Watermark(currentTimeStamp - maxOutOfOrderness); 97 | } 98 | 99 | @Override 100 | public long extractTimestamp(PayEvent element, long previousElementTimestamp) { 101 | 102 | Long timeStamp = element.getTimeStamp(); 103 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp); 104 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness)); 105 | return timeStamp; 106 | } 107 | } 108 | 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/ResultPayEvent.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | public class ResultPayEvent { 4 | 5 | private Long userId; 6 | private String type; 7 | 8 | public ResultPayEvent(Long userId, String type) { 9 | this.userId = userId; 10 | this.type = type; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/StreamingCep.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.cep.CEP; 5 | import org.apache.flink.cep.PatternSelectFunction; 6 | import org.apache.flink.cep.PatternStream; 7 | import org.apache.flink.cep.PatternTimeoutFunction; 8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition; 9 | import org.apache.flink.cep.pattern.conditions.SimpleCondition; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 12 | import org.apache.flink.streaming.api.datastream.KeyedStream; 13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.cep.pattern.Pattern; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.util.OutputTag; 18 | 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | public class StreamingCep { 23 | 24 | public static void main(String[] args) throws Exception{ 25 | 26 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setParallelism(1); 28 | 29 | DataStreamSource source = env.fromElements( 30 | //浏览记录 31 | Tuple3.of("Marry", "外套", 1L), 32 | 33 | Tuple3.of("Marry", "帽子",1L), 34 | Tuple3.of("Marry", "帽子",2L), 35 | Tuple3.of("Marry", "帽子",3L), 36 | 37 | Tuple3.of("Ming", "衣服",1L), 38 | 39 | Tuple3.of("Marry", "鞋子",1L), 40 | Tuple3.of("Marry", "鞋子",2L), 41 | 42 | Tuple3.of("LiLei", "帽子",1L), 43 | Tuple3.of("LiLei", "帽子",2L), 44 | Tuple3.of("LiLei", "帽子",3L) 45 | ); 46 | //定义Pattern,寻找连续搜索帽子的用户 47 | Pattern, Tuple3> pattern = Pattern 48 | .>begin("start") 49 | .where(new SimpleCondition>() { 50 | @Override 51 | public boolean filter(Tuple3 value) throws Exception { 52 | return value.f1.equals("帽子"); 53 | } 54 | }) //.timesOrMore(3); 55 | .next("middle") 56 | .where(new SimpleCondition>() { 57 | @Override 58 | public boolean filter(Tuple3 value) throws Exception { 59 | return value.f1.equals("帽子"); 60 | } 61 | }); 62 | 63 | // Pattern.begin("start").where(new IterativeCondition() { 64 | // @Override 65 | // public boolean filter(LogInEvent value, Context ctx) throws Exception { 66 | // return value.getIsSuccess().equals("fail"); 67 | // } 68 | // }).next("next").where(new IterativeCondition() { 69 | // @Override 70 | // public boolean filter(LogInEvent value, Context ctx) throws Exception { 71 | // return value.getIsSuccess().equals("fail"); 72 | // } 73 | // }).within(Time.seconds(5)); 74 | 75 | 76 | 77 | KeyedStream keyedStream = source.keyBy(0); 78 | PatternStream patternStream = CEP.pattern(keyedStream, pattern); 79 | 80 | SingleOutputStreamOperator matchStream = patternStream.select(new PatternSelectFunction, String>() { 81 | @Override 82 | public String select(Map>> pattern) throws Exception { 83 | List> middle = pattern.get("middle"); 84 | return middle.get(0).f0 + ":" + middle.get(0).f2 + ":" + "连续搜索两次帽子!"; 85 | } 86 | }); 87 | 88 | ////////////// 89 | 90 | // Pattern. 91 | // begin("begin") 92 | // .where(new IterativeCondition() { 93 | // @Override 94 | // public boolean filter(PayEvent payEvent, Context context) throws Exception { 95 | // return payEvent.getAction().equals("create"); 96 | // } 97 | // }) 98 | // .next("next") 99 | // .where(new IterativeCondition() { 100 | // @Override 101 | // public boolean filter(PayEvent payEvent, Context context) throws Exception { 102 | // return payEvent.getAction().equals("pay"); 103 | // } 104 | // }) 105 | // .within(Time.seconds(600)); 106 | // OutputTag orderTiemoutOutput = new OutputTag("orderTimeout") {}; 107 | // 108 | // SingleOutputStreamOperator selectResult = patternStream.select(orderTiemoutOutput, 109 | // (PatternTimeoutFunction) (map, l) -> new ResultPayEvent(map.get("begin").get(0).getUserId(), "timeout"), 110 | // (PatternSelectFunction) map -> new ResultPayEvent(map.get("next").get(0).getUserId(), "success") 111 | // ); 112 | // DataStream timeOutSideOutputStream = selectResult.getSideOutput(orderTiemoutOutput); 113 | 114 | /////////// 115 | 116 | Pattern.begin("start").where( 117 | new SimpleCondition() { 118 | @Override 119 | public boolean filter(TransactionEvent transactionEvent) { 120 | return transactionEvent.getAmount() > 0; 121 | } 122 | } 123 | ).timesOrMore(5) 124 | .within(Time.hours(24)); 125 | 126 | 127 | 128 | 129 | 130 | 131 | //////////// 132 | 133 | 134 | 135 | matchStream.printToErr(); 136 | env.execute("execute cep"); 137 | 138 | } 139 | 140 | 141 | // class ResultPayEvent{ 142 | // private Long userId; 143 | // private String type; 144 | // 145 | // public ResultPayEvent(Long userId, String type) { 146 | // this.userId = userId; 147 | // this.type = type; 148 | // } 149 | // } 150 | 151 | }// 152 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/TransactionEvent.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | 4 | public class TransactionEvent { 5 | 6 | 7 | private String accout; 8 | private Double amount; 9 | private Long timeStamp; 10 | 11 | public String getAccout() { 12 | return accout; 13 | } 14 | 15 | public void setAccout(String accout) { 16 | this.accout = accout; 17 | } 18 | 19 | public Double getAmount() { 20 | return amount; 21 | } 22 | 23 | public void setAmount(Double amount) { 24 | this.amount = amount; 25 | } 26 | 27 | public Long getTimeStamp() { 28 | return timeStamp; 29 | } 30 | 31 | public void setTimeStamp(Long timeStamp) { 32 | this.timeStamp = timeStamp; 33 | } 34 | 35 | public TransactionEvent(String accout, Double amount, Long timeStamp) { 36 | this.accout = accout; 37 | this.amount = amount; 38 | this.timeStamp = timeStamp; 39 | } 40 | 41 | @Override 42 | public String toString() { 43 | return "TransactionEvent{" + 44 | "accout='" + accout + '\'' + 45 | ", amount=" + amount + 46 | ", timeStamp=" + timeStamp + 47 | '}'; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CEP11/TransactionStreamingCEP.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.CEP11; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | import org.apache.flink.cep.CEP; 5 | import org.apache.flink.cep.PatternStream; 6 | import org.apache.flink.cep.functions.PatternProcessFunction; 7 | import org.apache.flink.cep.pattern.Pattern; 8 | import org.apache.flink.cep.pattern.conditions.IterativeCondition; 9 | import org.apache.flink.cep.pattern.conditions.SimpleCondition; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 15 | import org.apache.flink.streaming.api.watermark.Watermark; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.util.Collector; 18 | 19 | import javax.annotation.Nullable; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | public class TransactionStreamingCEP { 24 | 25 | public static void main(String[] args) throws Exception{ 26 | 27 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | env.setParallelism(1); 29 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 30 | 31 | DataStream source = env.fromElements( 32 | new TransactionEvent("100XX", 0.0D, 1597905234000L), 33 | new TransactionEvent("100XX", 100.0D, 1597905235000L), 34 | new TransactionEvent("100XX", 200.0D, 1597905236000L), 35 | new TransactionEvent("100XX", 300.0D, 1597905237000L), 36 | new TransactionEvent("100XX", 400.0D, 1597905238000L), 37 | new TransactionEvent("100XX", 500.0D, 1597905239000L), 38 | new TransactionEvent("101XX", 0.0D, 1597905240000L), 39 | new TransactionEvent("101XX", 100.0D, 1597905241000L) 40 | 41 | 42 | ).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()).keyBy(new KeySelector() { 43 | @Override 44 | public Object getKey(TransactionEvent value) throws Exception { 45 | return value.getAccout(); 46 | } 47 | }); 48 | 49 | Pattern pattern = Pattern.begin("start").where( 50 | new SimpleCondition() { 51 | @Override 52 | public boolean filter(TransactionEvent transactionEvent) { 53 | return transactionEvent.getAmount() > 0; 54 | } 55 | } 56 | ).timesOrMore(5) 57 | .within(Time.hours(24)); 58 | 59 | PatternStream patternStream = CEP.pattern(source, pattern); 60 | 61 | SingleOutputStreamOperator process = patternStream.process(new PatternProcessFunction() { 62 | @Override 63 | public void processMatch(Map> match, Context ctx, Collector out) throws Exception { 64 | 65 | List start = match.get("start"); 66 | List next = match.get("next"); 67 | System.err.println("start:" + start + ",next:" + next); 68 | 69 | out.collect(new AlertEvent(start.get(0).getAccout(), "连续有效交易!")); 70 | } 71 | }); 72 | 73 | process.printToErr(); 74 | env.execute("execute cep"); 75 | 76 | } 77 | 78 | private static class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks{ 79 | 80 | private final long maxOutOfOrderness = 5000L; 81 | private long currentTimeStamp; 82 | 83 | @Nullable 84 | @Override 85 | public Watermark getCurrentWatermark() { 86 | return new Watermark(currentTimeStamp - maxOutOfOrderness); 87 | } 88 | 89 | @Override 90 | public long extractTimestamp(TransactionEvent element, long previousElementTimestamp) { 91 | 92 | Long timeStamp = element.getTimeStamp(); 93 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp); 94 | // System.err.println(element.toString() + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness)); 95 | return timeStamp; 96 | } 97 | } 98 | 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/CoreConcepts07/BatchJob.java: -------------------------------------------------------------------------------- 1 | 2 | package org.myorg.quickstart.CoreConcepts07; 3 | 4 | import org.apache.commons.io.FileUtils; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 7 | import org.apache.flink.api.common.time.Time; 8 | import org.apache.flink.api.java.DataSet; 9 | import org.apache.flink.api.java.ExecutionEnvironment; 10 | import org.apache.flink.api.java.operators.DataSource; 11 | import org.apache.flink.configuration.Configuration; 12 | import org.apache.flink.configuration.RestartStrategyOptions; 13 | import org.apache.flink.runtime.executiongraph.restart.RestartStrategy; 14 | 15 | import java.io.File; 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | public class BatchJob { 21 | 22 | 23 | public static void main(String[] args) throws Exception { 24 | 25 | // set up the execution environment 26 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 27 | // env.setRestartStrategy(RestartStrategies.noRestart()); 28 | // env.setRestartStrategy(RestartStrategies.fixedDelayRestart( 29 | // 3, // 尝试重启的次数 30 | // Time.of(10, TimeUnit.SECONDS) // 延时 31 | // )); 32 | 33 | env.setRestartStrategy(RestartStrategies.failureRateRestart( 34 | 3, // 每个时间间隔的最大故障次数 35 | Time.of(5, TimeUnit.MINUTES), // 测量故障率的时间间隔 36 | Time.of(5, TimeUnit.SECONDS) // 延时 37 | )); 38 | 39 | 40 | 41 | 42 | 43 | 44 | env.registerCachedFile("/Users/wangchangye/WorkSpace/quickstart/distributedcache.txt", "distributedCache"); 45 | //1:注册一个文件,可以使用hdfs上的文件 也可以是本地文件进行测试 46 | DataSource data = env.fromElements("Linea", "Lineb", "Linec", "Lined"); 47 | 48 | DataSet result = data.map(new RichMapFunction() { 49 | private ArrayList dataList = new ArrayList(); 50 | 51 | @Override 52 | public void open(Configuration parameters) throws Exception { 53 | super.open(parameters); 54 | //2:使用文件 55 | File myFile = getRuntimeContext().getDistributedCache().getFile("distributedCache"); 56 | List lines = FileUtils.readLines(myFile); 57 | for (String line : lines) { 58 | this.dataList.add(line); 59 | System.err.println("分布式缓存为:" + line); 60 | } 61 | } 62 | 63 | @Override 64 | public String map(String value) throws Exception { 65 | //在这里就可以使用dataList 66 | System.err.println("使用datalist:" + dataList + "------------" +value); 67 | //业务逻辑 68 | return dataList +":" + value; 69 | } 70 | }); 71 | 72 | result.printToErr(); 73 | } 74 | } 75 | 76 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataSkew/CountAggregate.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataSkew; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | 5 | 6 | 7 | public class CountAggregate implements AggregateFunction { 8 | 9 | 10 | @Override 11 | public CountRecord createAccumulator() { 12 | return new CountRecord(null, 0L); 13 | } 14 | 15 | @Override 16 | public CountRecord add(Record value, CountRecord accumulator) { 17 | 18 | if(accumulator.getKey() == null){ 19 | accumulator.setKey(value.key); 20 | } 21 | accumulator.setCount(value.count); 22 | return accumulator; 23 | } 24 | 25 | @Override 26 | public CountRecord getResult(CountRecord accumulator) { 27 | return accumulator; 28 | } 29 | 30 | @Override 31 | public CountRecord merge(CountRecord a, CountRecord b) { 32 | return new CountRecord(a.getKey(),a.getCount()+b.getCount()) ; 33 | } 34 | }// 35 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataSkew/CountProcessFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataSkew; 2 | 3 | import org.apache.flink.api.common.state.ValueState; 4 | import org.apache.flink.api.common.state.ValueStateDescriptor; 5 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 6 | import org.apache.flink.util.Collector; 7 | 8 | 9 | public class CountProcessFunction extends KeyedProcessFunction { 10 | 11 | private ValueState state = this.getRuntimeContext().getState(new ValueStateDescriptor("count",Long.class)); 12 | @Override 13 | public void processElement(CountRecord value, Context ctx, Collector out) throws Exception { 14 | 15 | if(state.value()==0){ 16 | state.update(value.count); 17 | ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + 1000L * 5); 18 | }else{ 19 | state.update(state.value() + value.count); 20 | } 21 | } 22 | 23 | @Override 24 | public void onTimer(long timestamp, OnTimerContext ctx, Collector out) throws Exception { 25 | 26 | //这里可以做业务操作,例如每5分钟将统计结果发送出去 27 | //out.collect(...); 28 | //清除状态 29 | state.clear(); 30 | 31 | //注册新的定时器 32 | ctx.timerService().registerProcessingTimeTimer(ctx.timerService().currentProcessingTime() + 1000L * 5); 33 | 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataSkew/CountRecord.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataSkew; 2 | 3 | 4 | public class CountRecord { 5 | String key; 6 | Long count; 7 | 8 | public CountRecord(String key, Long count) { 9 | this.key = key; 10 | this.count = count; 11 | } 12 | 13 | public String getKey() { 14 | return key; 15 | } 16 | 17 | public void setKey(String key) { 18 | this.key = key; 19 | } 20 | 21 | public Long getCount() { 22 | return count; 23 | } 24 | 25 | public void setCount(Long count) { 26 | this.count = count; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataSkew/Record.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataSkew; 2 | 3 | 4 | public class Record { 5 | String key; 6 | Long count; 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataStreamAPI04/MyStreamingSource.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataStreamAPI04; 2 | 3 | 4 | import org.apache.flink.api.common.functions.FilterFunction; 5 | import org.apache.flink.api.common.functions.FlatMapFunction; 6 | import org.apache.flink.api.common.functions.MapFunction; 7 | import org.apache.flink.api.common.functions.RichMapFunction; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 13 | import org.apache.flink.util.Collector; 14 | 15 | import java.util.Random; 16 | 17 | public class MyStreamingSource implements SourceFunction { 18 | 19 | private boolean isRunning = true; 20 | 21 | /** 22 | * 重写run方法产生一个源源不断的数据发送源 23 | * @param ctx 24 | * @throws Exception 25 | */ 26 | @Override 27 | public void run(SourceContext ctx) throws Exception { 28 | while(isRunning){ 29 | Item item = generateItem(); 30 | ctx.collect(item); 31 | 32 | //每秒产生一条数据 33 | Thread.sleep(1000); 34 | } 35 | } 36 | @Override 37 | public void cancel() { 38 | isRunning = false; 39 | } 40 | 41 | //随机产生一条商品数据 42 | private Item generateItem(){ 43 | int i = new Random().nextInt(100); 44 | 45 | Item item = new Item(); 46 | item.setName("name" + i); 47 | item.setId(i); 48 | return item; 49 | } 50 | 51 | class Item{ 52 | private String name; 53 | private Integer id; 54 | 55 | Item() { 56 | } 57 | 58 | public String getName() { 59 | return name; 60 | } 61 | 62 | public void setName(String name) { 63 | this.name = name; 64 | } 65 | 66 | public Integer getId() { 67 | return id; 68 | } 69 | 70 | public void setId(Integer id) { 71 | this.id = id; 72 | } 73 | 74 | @Override 75 | public String toString() { 76 | return "Item{" + 77 | "name='" + name + '\'' + 78 | ", id=" + id + 79 | '}'; 80 | } 81 | } 82 | } 83 | 84 | 85 | class StreamingDemo { 86 | public static void main(String[] args) throws Exception { 87 | 88 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 89 | //获取数据源 90 | DataStreamSource items = env.addSource(new MyStreamingSource()).setParallelism(1); 91 | //Map 92 | //SingleOutputStreamOperator mapItems = items.map(item -> item.getName()); 93 | //SingleOutputStreamOperator mapItems = items.map(new MyMapFunction()); 94 | 95 | //flatmap 96 | // SingleOutputStreamOperator flatMapItems = items.flatMap(new FlatMapFunction() { 97 | // @Override 98 | // public void flatMap(MyStreamingSource.Item item, Collector collector) throws Exception { 99 | // String name = item.getName(); 100 | // collector.collect(name); 101 | // } 102 | // }); 103 | 104 | //filter 105 | SingleOutputStreamOperator filterItems = items.filter( item -> item.getId() % 2 == 0); 106 | 107 | //打印结果 108 | filterItems.print().setParallelism(1); 109 | String jobName = "user defined streaming source"; 110 | env.execute(jobName); 111 | } 112 | 113 | static class MyMapFunction extends RichMapFunction { 114 | 115 | @Override 116 | public String map(MyStreamingSource.Item item) throws Exception { 117 | return item.getName(); 118 | } 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/DataStreamAPI04/StreamingDemo2.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.DataStreamAPI04; 2 | 3 | 4 | import org.apache.flink.api.common.functions.ReduceFunction; 5 | import org.apache.flink.api.java.tuple.Tuple3; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | 14 | class StreamingDemo2 { 15 | public static void main(String[] args) throws Exception { 16 | 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | //获取数据源 19 | List data = new ArrayList>(); 20 | data.add(new Tuple3<>(0,1,0)); 21 | data.add(new Tuple3<>(0,1,1)); 22 | data.add(new Tuple3<>(0,2,2)); 23 | data.add(new Tuple3<>(0,1,3)); 24 | data.add(new Tuple3<>(1,2,5)); 25 | data.add(new Tuple3<>(1,2,9)); 26 | data.add(new Tuple3<>(1,2,11)); 27 | data.add(new Tuple3<>(1,2,13)); 28 | 29 | 30 | DataStreamSource> items = env.fromCollection(data); 31 | //items.keyBy(0).max(2).printToErr(); 32 | 33 | SingleOutputStreamOperator> reduce = items.keyBy(0).reduce(new ReduceFunction>() { 34 | @Override 35 | public Tuple3 reduce(Tuple3 t1, Tuple3 t2) throws Exception { 36 | Tuple3 newTuple = new Tuple3<>(); 37 | 38 | newTuple.setFields(0,0,(Integer)t1.getField(2) + (Integer) t2.getField(2)); 39 | return newTuple; 40 | } 41 | }); 42 | 43 | reduce.printToErr().setParallelism(1); 44 | 45 | //打印结果 46 | String jobName = "user defined streaming source"; 47 | env.execute(jobName); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Dim19/DimSync.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Dim19; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.sql.Connection; 10 | import java.sql.DriverManager; 11 | import java.sql.PreparedStatement; 12 | import java.sql.ResultSet; 13 | 14 | public class DimSync extends RichMapFunction { 15 | 16 | private static final Logger LOGGER = LoggerFactory.getLogger(DimSync.class); 17 | 18 | private Connection conn = null; 19 | public void open(Configuration parameters) throws Exception { 20 | super.open(parameters); 21 | conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/dim?characterEncoding=UTF-8", "admin", "admin"); 22 | } 23 | 24 | public Order map(String in) throws Exception { 25 | 26 | JSONObject jsonObject = JSONObject.parseObject(in); 27 | Integer cityId = jsonObject.getInteger("city_id"); 28 | String userName = jsonObject.getString("user_name"); 29 | String items = jsonObject.getString("items"); 30 | 31 | //根据city_id 查询 city_name 32 | PreparedStatement pst = conn.prepareStatement("select city_name from info where city_id = ?"); 33 | pst.setInt(1,cityId); 34 | ResultSet resultSet = pst.executeQuery(); 35 | String cityName = null; 36 | while (resultSet.next()){ 37 | cityName = resultSet.getString(1); 38 | } 39 | pst.close(); 40 | return new Order(cityId,userName,items,cityName); 41 | } 42 | 43 | public void close() throws Exception { 44 | super.close(); 45 | conn.close(); 46 | } 47 | 48 | }// 49 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Dim19/LRU.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Dim19; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.stumbleupon.async.Callback; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.shaded.guava18.com.google.common.cache.Cache; 7 | import org.apache.flink.shaded.guava18.com.google.common.cache.CacheBuilder; 8 | import org.apache.flink.streaming.api.functions.async.ResultFuture; 9 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; 10 | import org.hbase.async.GetRequest; 11 | import org.hbase.async.HBaseClient; 12 | import org.hbase.async.KeyValue; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | import java.util.ArrayList; 17 | import java.util.Collections; 18 | import java.util.concurrent.TimeUnit; 19 | 20 | public class LRU extends RichAsyncFunction { 21 | 22 | private static final Logger LOGGER = LoggerFactory.getLogger(LRU.class); 23 | String table = "info"; 24 | Cache cache = null; 25 | private HBaseClient client = null; 26 | @Override 27 | public void open(Configuration parameters) throws Exception { 28 | super.open(parameters); 29 | //创建hbase客户端 30 | client = new HBaseClient("127.0.0.1","7071"); 31 | cache = CacheBuilder.newBuilder() 32 | //最多存储10000条 33 | .maximumSize(10000) 34 | //过期时间为1分钟 35 | .expireAfterWrite(60, TimeUnit.SECONDS) 36 | .build(); 37 | } 38 | 39 | @Override 40 | public void asyncInvoke(String input, ResultFuture resultFuture) throws Exception { 41 | 42 | JSONObject jsonObject = JSONObject.parseObject(input); 43 | Integer cityId = jsonObject.getInteger("city_id"); 44 | String userName = jsonObject.getString("user_name"); 45 | String items = jsonObject.getString("items"); 46 | //读缓存 47 | String cacheCityName = cache.getIfPresent(cityId); 48 | //如果缓存获取失败再从hbase获取维度数据 49 | if(cacheCityName != null){ 50 | Order order = new Order(); 51 | order.setCityId(cityId); 52 | order.setItems(items); 53 | order.setUserName(userName); 54 | order.setCityName(cacheCityName); 55 | resultFuture.complete(Collections.singleton(order)); 56 | }else { 57 | 58 | client.get(new GetRequest(table,String.valueOf(cityId))).addCallback((Callback>) arg -> { 59 | for (KeyValue kv : arg) { 60 | String value = new String(kv.value()); 61 | Order order = new Order(); 62 | order.setCityId(cityId); 63 | order.setItems(items); 64 | order.setUserName(userName); 65 | order.setCityName(value); 66 | resultFuture.complete(Collections.singleton(order)); 67 | cache.put(String.valueOf(cityId), value); 68 | } 69 | return null; 70 | }); 71 | 72 | } 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Dim19/Order.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Dim19; 2 | 3 | public class Order { 4 | private Integer cityId; 5 | private String userName; 6 | private String items; 7 | private String cityName; 8 | 9 | public Order(Integer cityId, String userName, String items, String cityName) { 10 | this.cityId = cityId; 11 | this.userName = userName; 12 | this.items = items; 13 | this.cityName = cityName; 14 | } 15 | 16 | public Order() { 17 | } 18 | 19 | public Integer getCityId() { 20 | return cityId; 21 | } 22 | 23 | public void setCityId(Integer cityId) { 24 | this.cityId = cityId; 25 | } 26 | 27 | public String getUserName() { 28 | return userName; 29 | } 30 | 31 | public void setUserName(String userName) { 32 | this.userName = userName; 33 | } 34 | 35 | public String getItems() { 36 | return items; 37 | } 38 | 39 | public void setItems(String items) { 40 | this.items = items; 41 | } 42 | 43 | public String getCityName() { 44 | return cityName; 45 | } 46 | 47 | public void setCityName(String cityName) { 48 | this.cityName = cityName; 49 | } 50 | 51 | @Override 52 | public String toString() { 53 | return "Order{" + 54 | "cityId=" + cityId + 55 | ", userName='" + userName + '\'' + 56 | ", items='" + items + '\'' + 57 | ", cityName='" + cityName + '\'' + 58 | '}'; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Dim19/WholeLoad.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Dim19; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.sql.*; 10 | import java.util.Map; 11 | import java.util.concurrent.ScheduledExecutorService; 12 | import java.util.concurrent.TimeUnit; 13 | 14 | public class WholeLoad extends RichMapFunction { 15 | 16 | 17 | private static final Logger LOGGER = LoggerFactory.getLogger(WholeLoad.class); 18 | ScheduledExecutorService executor = null; 19 | private Map cache; 20 | 21 | @Override 22 | public void open(Configuration parameters) throws Exception { 23 | super.open(parameters); 24 | executor.scheduleAtFixedRate(new Runnable() { 25 | @Override 26 | public void run() { 27 | try { 28 | load(); 29 | } catch (Exception e) { 30 | e.printStackTrace(); 31 | } 32 | } 33 | },5,5, TimeUnit.MINUTES); 34 | } 35 | 36 | @Override 37 | public Order map(String value) throws Exception { 38 | JSONObject jsonObject = JSONObject.parseObject(value); 39 | Integer cityId = jsonObject.getInteger("city_id"); 40 | String userName = jsonObject.getString("user_name"); 41 | String items = jsonObject.getString("items"); 42 | String cityName = cache.get(cityId); 43 | return new Order(cityId,userName,items,cityName); 44 | } 45 | 46 | public void load() throws Exception { 47 | Class.forName("com.mysql.jdbc.Driver"); 48 | Connection con = DriverManager.getConnection("jdbc:mysql://localhost:3306/dim?characterEncoding=UTF-8", "admin", "admin"); 49 | PreparedStatement statement = con.prepareStatement("select city_id,city_name from info"); 50 | ResultSet rs = statement.executeQuery(); 51 | //全量更新维度数据到内存 52 | while (rs.next()) { 53 | String cityId = rs.getString("city_id"); 54 | String cityName = rs.getString("city_name"); 55 | cache.put(cityId, cityName); 56 | } 57 | con.close(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Distinct20/BitMapDistinct.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Distinct20; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.roaringbitmap.longlong.Roaring64NavigableMap; 5 | 6 | public class BitMapDistinct implements AggregateFunction { 7 | 8 | 9 | @Override 10 | public Roaring64NavigableMap createAccumulator() { 11 | return new Roaring64NavigableMap(); 12 | } 13 | 14 | @Override 15 | public Roaring64NavigableMap add(Long value, Roaring64NavigableMap accumulator) { 16 | accumulator.add(value); 17 | return accumulator; 18 | } 19 | 20 | 21 | @Override 22 | public Long getResult(Roaring64NavigableMap accumulator) { 23 | return accumulator.getLongCardinality(); 24 | } 25 | 26 | @Override 27 | public Roaring64NavigableMap merge(Roaring64NavigableMap a, Roaring64NavigableMap b) { 28 | return null; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Distinct20/BloomFilterDistinct.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Distinct20; 2 | 3 | import com.google.common.hash.BloomFilter; 4 | import com.google.common.hash.Funnels; 5 | import org.apache.flink.api.common.state.ValueState; 6 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 7 | import org.apache.flink.util.Collector; 8 | 9 | public class BloomFilterDistinct extends KeyedProcessFunction { 10 | 11 | private transient ValueState bloomState; 12 | private transient ValueState countState; 13 | 14 | 15 | @Override 16 | public void processElement(String value, Context ctx, Collector out) throws Exception { 17 | 18 | BloomFilter bloomFilter = bloomState.value(); 19 | Long skuCount = countState.value(); 20 | 21 | if(bloomFilter == null){ 22 | BloomFilter.create(Funnels.unencodedCharsFunnel(), 10000000); 23 | } 24 | 25 | if(skuCount == null){ 26 | skuCount = 0L; 27 | } 28 | 29 | if(!bloomFilter.mightContain(value)){ 30 | bloomFilter.put(value); 31 | skuCount = skuCount + 1; 32 | } 33 | 34 | bloomState.update(bloomFilter); 35 | countState.update(skuCount); 36 | out.collect(countState.value()); 37 | } 38 | }// 39 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Distinct20/HyperLogLogDistinct.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Distinct20; 2 | 3 | import net.agkn.hll.HLL; 4 | import org.apache.flink.api.common.functions.AggregateFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | 7 | public class HyperLogLogDistinct implements AggregateFunction,HLL,Long> { 8 | 9 | 10 | @Override 11 | public HLL createAccumulator() { 12 | 13 | return new HLL(14, 5); 14 | } 15 | 16 | @Override 17 | public HLL add(Tuple2 value, HLL accumulator) { 18 | 19 | //value为购买记录 <商品sku, 用户id> 20 | accumulator.addRaw(value.f1); 21 | return accumulator; 22 | } 23 | 24 | @Override 25 | public Long getResult(HLL accumulator) { 26 | long cardinality = accumulator.cardinality(); 27 | return cardinality; 28 | } 29 | 30 | 31 | @Override 32 | public HLL merge(HLL a, HLL b) { 33 | a.union(b); 34 | return a; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Distinct20/MapStateDistinctFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Distinct20; 2 | 3 | 4 | import org.apache.flink.api.common.state.StateTtlConfig; 5 | import org.apache.flink.api.common.state.ValueState; 6 | import org.apache.flink.api.common.state.ValueStateDescriptor; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.configuration.Configuration; 9 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 10 | import org.apache.flink.util.Collector; 11 | 12 | public class MapStateDistinctFunction extends KeyedProcessFunction,Tuple2> { 13 | 14 | private transient ValueState counts; 15 | 16 | @Override 17 | public void open(Configuration parameters) throws Exception { 18 | 19 | //我们设置ValueState的TTL的生命周期为24小时,到期自动清除状态 20 | StateTtlConfig ttlConfig = StateTtlConfig 21 | .newBuilder(org.apache.flink.api.common.time.Time.minutes(24 * 60)) 22 | .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) 23 | .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) 24 | .build(); 25 | 26 | //设置ValueState的默认值 27 | ValueStateDescriptor descriptor = new ValueStateDescriptor("skuNum", Integer.class); 28 | descriptor.enableTimeToLive(ttlConfig); 29 | counts = getRuntimeContext().getState(descriptor); 30 | super.open(parameters); 31 | } 32 | 33 | 34 | @Override 35 | public void processElement(Tuple2 value, Context ctx, Collector> out) throws Exception { 36 | 37 | String f0 = value.f0; 38 | 39 | //如果不存在则新增 40 | if(counts.value() == null){ 41 | counts.update(1); 42 | }else{ 43 | //如果存在则加1 44 | counts.update(counts.value()+1); 45 | } 46 | 47 | out.collect(Tuple2.of(f0, counts.value())); 48 | 49 | } 50 | 51 | }// 52 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Distinct20/RedisSinkDistinct.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Distinct20; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.util.Collector; 6 | 7 | 8 | public class RedisSinkDistinct implements FlatMapFunction, Tuple2> { 9 | @Override 10 | public void flatMap(Tuple2 value, Collector> out) throws Exception { 11 | 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/RedisSink27/RedisConnector.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.RedisSink27; 2 | 3 | 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.connectors.redis.RedisSink; 9 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 10 | 11 | import java.net.InetSocketAddress; 12 | import java.util.Arrays; 13 | import java.util.HashSet; 14 | 15 | public class RedisConnector { 16 | 17 | public static void main(String[] args) throws Exception{ 18 | 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | DataStream> stream = env.fromElements("Flink","Spark","Storm").map(new MapFunction>() { 22 | @Override 23 | public Tuple2 map(String s) throws Exception { 24 | return new Tuple2<>(s, s+"_sink2"); 25 | } 26 | }); 27 | 28 | FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build(); 29 | stream.addSink(new RedisSink<>(conf, new RedisSink02())); 30 | env.execute("redis sink01"); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/RedisSink27/RedisSink01.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.RedisSink27; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 6 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 7 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 8 | 9 | public class RedisSink01 implements RedisMapper>{ 10 | 11 | /** 12 | * 设置redis数据类型 13 | */ 14 | @Override 15 | public RedisCommandDescription getCommandDescription() { 16 | return new RedisCommandDescription(RedisCommand.SET); 17 | } 18 | 19 | /** 20 | * 设置Key 21 | */ 22 | @Override 23 | public String getKeyFromData(Tuple2 data) { 24 | return data.f0; 25 | } 26 | 27 | /** 28 | * 设置value 29 | */ 30 | @Override 31 | public String getValueFromData(Tuple2 data) { 32 | return data.f1; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/RedisSink27/RedisSink02.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.RedisSink27; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 6 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 7 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 8 | 9 | public class RedisSink02 implements RedisMapper> { 10 | 11 | /** 12 | * 设置redis数据类型 13 | */ 14 | @Override 15 | public RedisCommandDescription getCommandDescription() { 16 | return new RedisCommandDescription(RedisCommand.SET); 17 | } 18 | 19 | /** 20 | * 设置Key 21 | */ 22 | @Override 23 | public String getKeyFromData(Tuple2 data) { 24 | return data.f0; 25 | } 26 | 27 | /** 28 | * 设置value 29 | */ 30 | @Override 31 | public String getValueFromData(Tuple2 data) { 32 | return data.f1; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/RedisSink27/SelfRedisSink.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.RedisSink27; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 7 | import redis.clients.jedis.Jedis; 8 | 9 | public class SelfRedisSink extends RichSinkFunction { 10 | 11 | 12 | private transient Jedis jedis; 13 | 14 | public void open(Configuration config) { 15 | jedis = new Jedis("localhost", 6379); 16 | } 17 | 18 | public void invoke(Tuple2 value, Context context) throws Exception { 19 | if (!jedis.isConnected()) { 20 | jedis.connect(); 21 | } 22 | jedis.set(value.f0, value.f1); 23 | } 24 | 25 | @Override 26 | public void close() throws Exception { 27 | jedis.close(); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/SideOutPut10/StreamingDemoFilter.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.SideOutPut10; 2 | 3 | 4 | import org.apache.flink.api.common.functions.FilterFunction; 5 | import org.apache.flink.api.common.functions.ReduceFunction; 6 | import org.apache.flink.api.java.tuple.Tuple3; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | 14 | 15 | class StreamingDemoFilter { 16 | public static void main(String[] args) throws Exception { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | //获取数据源 20 | List data = new ArrayList>(); 21 | data.add(new Tuple3<>(0,1,0)); 22 | data.add(new Tuple3<>(0,1,1)); 23 | data.add(new Tuple3<>(0,2,2)); 24 | data.add(new Tuple3<>(0,1,3)); 25 | data.add(new Tuple3<>(1,2,5)); 26 | data.add(new Tuple3<>(1,2,9)); 27 | data.add(new Tuple3<>(1,2,11)); 28 | data.add(new Tuple3<>(1,2,13)); 29 | 30 | 31 | DataStreamSource> items = env.fromCollection(data); 32 | 33 | 34 | SingleOutputStreamOperator> zeroStream = items.filter((FilterFunction>) value -> value.f0 == 0); 35 | SingleOutputStreamOperator> oneStream = items.filter((FilterFunction>) value -> value.f0 == 1); 36 | 37 | zeroStream.print(); 38 | oneStream.printToErr(); 39 | 40 | 41 | //打印结果 42 | String jobName = "user defined streaming source"; 43 | env.execute(jobName); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/SideOutPut10/StreamingDemoSideOutPut.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.SideOutPut10; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.collector.selector.OutputSelector; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.SplitStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.ProcessFunction; 12 | import org.apache.flink.util.Collector; 13 | import org.apache.flink.util.OutputTag; 14 | 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | 19 | class StreamingDemoSideOutPut { 20 | public static void main(String[] args) throws Exception { 21 | 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | //获取数据源 24 | List data = new ArrayList>(); 25 | data.add(new Tuple3<>(0,1,0)); 26 | data.add(new Tuple3<>(0,1,1)); 27 | data.add(new Tuple3<>(0,2,2)); 28 | data.add(new Tuple3<>(0,1,3)); 29 | data.add(new Tuple3<>(1,2,5)); 30 | data.add(new Tuple3<>(1,2,9)); 31 | data.add(new Tuple3<>(1,2,11)); 32 | data.add(new Tuple3<>(1,2,13)); 33 | 34 | 35 | DataStreamSource> items = env.fromCollection(data); 36 | 37 | OutputTag> zeroStream = new OutputTag>("zeroStream") {}; 38 | OutputTag> oneStream = new OutputTag>("oneStream") {}; 39 | 40 | 41 | SingleOutputStreamOperator> processStream= items.process(new ProcessFunction, Tuple3>() { 42 | @Override 43 | public void processElement(Tuple3 value, Context ctx, Collector> out) throws Exception { 44 | 45 | if (value.f0 == 0) { 46 | ctx.output(zeroStream, value); 47 | } else if (value.f0 == 1) { 48 | ctx.output(oneStream, value); 49 | } 50 | } 51 | }); 52 | 53 | DataStream> zeroSideOutput = processStream.getSideOutput(zeroStream); 54 | DataStream> oneSideOutput = processStream.getSideOutput(oneStream); 55 | 56 | zeroSideOutput.print(); 57 | oneSideOutput.printToErr(); 58 | 59 | 60 | //打印结果 61 | String jobName = "user defined streaming source"; 62 | env.execute(jobName); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/SideOutPut10/StreamingDemoSplit.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.SideOutPut10; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.collector.selector.OutputSelector; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SplitStream; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | 14 | class StreamingDemoSplit { 15 | public static void main(String[] args) throws Exception { 16 | 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | //获取数据源 19 | List data = new ArrayList>(); 20 | data.add(new Tuple3<>(0,1,0)); 21 | data.add(new Tuple3<>(0,1,1)); 22 | data.add(new Tuple3<>(0,2,2)); 23 | data.add(new Tuple3<>(0,1,3)); 24 | data.add(new Tuple3<>(1,2,5)); 25 | data.add(new Tuple3<>(1,2,9)); 26 | data.add(new Tuple3<>(1,2,11)); 27 | data.add(new Tuple3<>(1,2,13)); 28 | 29 | 30 | DataStreamSource> items = env.fromCollection(data); 31 | 32 | 33 | SplitStream> splitStream = items.split(new OutputSelector>() { 34 | @Override 35 | public Iterable select(Tuple3 value) { 36 | List tags = new ArrayList<>(); 37 | if (value.f0 == 0) { 38 | tags.add("zeroStream"); 39 | } else if (value.f0 == 1) { 40 | tags.add("oneStream"); 41 | } 42 | return tags; 43 | } 44 | }); 45 | 46 | splitStream.select("zeroStream").print(); 47 | splitStream.select("oneStream").printToErr(); 48 | 49 | //打印结果 50 | String jobName = "user defined streaming source"; 51 | env.execute(jobName); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/State09/BatchJob1.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.myorg.quickstart.State09; 20 | 21 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 22 | import org.apache.flink.api.common.state.StateTtlConfig; 23 | import org.apache.flink.api.common.state.ValueState; 24 | import org.apache.flink.api.common.state.ValueStateDescriptor; 25 | import org.apache.flink.api.common.time.Time; 26 | import org.apache.flink.api.common.typeinfo.TypeHint; 27 | import org.apache.flink.api.common.typeinfo.TypeInformation; 28 | import org.apache.flink.api.java.tuple.Tuple2; 29 | import org.apache.flink.configuration.Configuration; 30 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 31 | import org.apache.flink.runtime.state.memory.MemoryStateBackend; 32 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 33 | import org.apache.flink.util.Collector; 34 | 35 | import static org.apache.flink.runtime.state.memory.MemoryStateBackend.DEFAULT_MAX_STATE_SIZE; 36 | 37 | public class BatchJob1 { 38 | 39 | 40 | public static void main(String[] args) throws Exception { 41 | 42 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 43 | 44 | env.setStateBackend(new FsStateBackend("hdfs://namenode:40010/flink/checkpoints", false)); 45 | 46 | env.fromElements(Tuple2.of(1L, 3L), Tuple2.of(1L, 5L), Tuple2.of(1L, 7L), Tuple2.of(1L, 5L), Tuple2.of(1L, 2L)) 47 | .keyBy(0) 48 | .flatMap(new CountWindowAverage()) 49 | .printToErr(); 50 | 51 | env.execute("submit job"); 52 | 53 | } 54 | 55 | 56 | public static class CountWindowAverage extends RichFlatMapFunction, Tuple2> { 57 | 58 | private transient ValueState> sum; 59 | public void flatMap(Tuple2 input, Collector> out) throws Exception { 60 | 61 | Tuple2 currentSum; 62 | // 访问ValueState 63 | if(sum.value()==null){ 64 | currentSum = Tuple2.of(0L, 0L); 65 | }else { 66 | currentSum = sum.value(); 67 | } 68 | 69 | // 更新 70 | currentSum.f0 += 1; 71 | 72 | // 第二个元素加1 73 | currentSum.f1 += input.f1; 74 | 75 | // 更新state 76 | sum.update(currentSum); 77 | 78 | // 如果count的值大于等于2,求知道并清空state 79 | if (currentSum.f0 >= 2) { 80 | out.collect(new Tuple2<>(input.f0, currentSum.f1 / currentSum.f0)); 81 | sum.clear(); 82 | } 83 | } 84 | 85 | 86 | public void open(Configuration config) { 87 | ValueStateDescriptor> descriptor = 88 | new ValueStateDescriptor<>( 89 | "average", // state的名字 90 | TypeInformation.of(new TypeHint>() {}) 91 | ); // 设置默认值 92 | 93 | 94 | StateTtlConfig ttlConfig = StateTtlConfig 95 | .newBuilder(Time.seconds(10)) 96 | .setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite) 97 | .setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired) 98 | .build(); 99 | 100 | descriptor.enableTimeToLive(ttlConfig); 101 | 102 | sum = getRuntimeContext().getState(descriptor); 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/StreamingJob.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.myorg.quickstart; 20 | 21 | import org.apache.flink.api.common.functions.FlatMapFunction; 22 | import org.apache.flink.api.common.functions.ReduceFunction; 23 | import org.apache.flink.streaming.api.datastream.DataStream; 24 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 25 | import org.apache.flink.streaming.api.windowing.time.Time; 26 | import org.apache.flink.util.Collector; 27 | 28 | public class StreamingJob { 29 | 30 | public static void main(String[] args) throws Exception { 31 | 32 | // get the execution environment 33 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | 35 | // get input data by connecting to the socket 36 | DataStream text = env.socketTextStream("127.0.0.1", 9000, "\n"); 37 | 38 | // parse the data, group it, window it, and aggregate the counts 39 | DataStream windowCounts = text 40 | .flatMap(new FlatMapFunction() { 41 | @Override 42 | public void flatMap(String value, Collector out) { 43 | for (String word : value.split("\\s")) { 44 | out.collect(new WordWithCount(word, 1L)); 45 | } 46 | } 47 | }) 48 | .keyBy("word") 49 | .timeWindow(Time.seconds(5), Time.seconds(1)) 50 | .reduce(new ReduceFunction() { 51 | @Override 52 | public WordWithCount reduce(WordWithCount a, WordWithCount b) { 53 | return new WordWithCount(a.word, a.count + b.count); 54 | } 55 | }); 56 | 57 | // print the results with a single thread, rather than in parallel 58 | windowCounts.print().setParallelism(1); 59 | 60 | env.execute("Socket Window WordCount"); 61 | } 62 | 63 | // Data type for words with count 64 | public static class WordWithCount { 65 | 66 | public String word; 67 | public long count; 68 | 69 | public WordWithCount() {} 70 | 71 | public WordWithCount(String word, long count) { 72 | this.word = word; 73 | this.count = count; 74 | } 75 | 76 | @Override 77 | public String toString() { 78 | return word + " : " + count; 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Table05/Item.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Table05; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | * Created by wangchangye on 2020/4/23. 7 | */ 8 | public class Item implements Serializable{ 9 | private String name; 10 | private Integer id; 11 | 12 | public Item() { 13 | } 14 | 15 | public Item(String name, Integer id) { 16 | this.name = name; 17 | this.id = id; 18 | } 19 | 20 | public String getName() { 21 | return name; 22 | } 23 | 24 | public void setName(String name) { 25 | this.name = name; 26 | } 27 | 28 | public Integer getId() { 29 | return id; 30 | } 31 | 32 | public void setId(Integer id) { 33 | this.id = id; 34 | } 35 | 36 | @Override 37 | public String toString() { 38 | return "Item{" + 39 | "name='" + name + '\'' + 40 | ", id=" + id + 41 | '}'; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Table05/MyStreamingSource.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Table05; 2 | 3 | 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.common.functions.RichMapFunction; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.TypeInfo; 8 | import org.apache.flink.api.common.typeinfo.TypeInformation; 9 | import org.apache.flink.api.java.tuple.Tuple4; 10 | import org.apache.flink.streaming.api.collector.selector.OutputSelector; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 16 | import org.apache.flink.table.api.EnvironmentSettings; 17 | import org.apache.flink.table.api.Table; 18 | import org.apache.flink.table.api.java.StreamTableEnvironment; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import java.util.Random; 23 | 24 | public class MyStreamingSource implements SourceFunction { 25 | 26 | private boolean isRunning = true; 27 | 28 | /** 29 | * 重写run方法产生一个源源不断的数据发送源 30 | * @param ctx 31 | * @throws Exception 32 | */ 33 | public void run(SourceContext ctx) throws Exception { 34 | while(isRunning){ 35 | Item item = generateItem(); 36 | ctx.collect(item); 37 | 38 | //每秒产生一条数据 39 | Thread.sleep(1000); 40 | } 41 | } 42 | @Override 43 | public void cancel() { 44 | isRunning = false; 45 | } 46 | 47 | //随机产生一条商品数据 48 | private Item generateItem(){ 49 | int i = new Random().nextInt(100); 50 | ArrayList list = new ArrayList(); 51 | list.add("HAT"); 52 | list.add("TIE"); 53 | list.add("SHOE"); 54 | Item item = new Item(); 55 | item.setName(list.get(new Random().nextInt(3))); 56 | item.setId(i); 57 | return item; 58 | } 59 | } 60 | 61 | 62 | class StreamingDemo { 63 | public static void main(String[] args) throws Exception { 64 | 65 | EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); 66 | StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment(); 67 | StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings); 68 | 69 | SingleOutputStreamOperator source = bsEnv.addSource(new MyStreamingSource()).map(new MapFunction() { 70 | @Override 71 | public Item map(Item item) throws Exception { 72 | return item; 73 | } 74 | }); 75 | 76 | DataStream evenSelect = source.split(new OutputSelector() { 77 | @Override 78 | public Iterable select(Item value) { 79 | List output = new ArrayList<>(); 80 | if (value.getId() % 2 == 0) { 81 | output.add("even"); 82 | } else { 83 | output.add("odd"); 84 | } 85 | return output; 86 | } 87 | }).select("even"); 88 | 89 | DataStream oddSelect = source.split(new OutputSelector() { 90 | @Override 91 | public Iterable select(Item value) { 92 | List output = new ArrayList<>(); 93 | if (value.getId() % 2 == 0) { 94 | output.add("even"); 95 | } else { 96 | output.add("odd"); 97 | } 98 | return output; 99 | } 100 | }).select("odd"); 101 | 102 | 103 | bsTableEnv.createTemporaryView("evenTable", evenSelect, "name,id"); 104 | bsTableEnv.createTemporaryView("oddTable", oddSelect, "name,id"); 105 | 106 | Table queryTable = bsTableEnv.sqlQuery("select a.id,a.name,b.id,b.name from evenTable as a join oddTable as b on a.name = b.name"); 107 | 108 | queryTable.printSchema(); 109 | 110 | bsTableEnv.toRetractStream(queryTable, TypeInformation.of(new TypeHint>(){})).print(); 111 | 112 | bsEnv.execute("streaming sql job"); 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/Table05/ResultItem.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.Table05; 2 | 3 | /** 4 | * Created by wangchangye on 2020/4/23. 5 | */ 6 | public class ResultItem { 7 | private String aname; 8 | private Integer aid; 9 | private String bname; 10 | private Integer bid; 11 | 12 | public String getAname() { 13 | return aname; 14 | } 15 | 16 | public void setAname(String aname) { 17 | this.aname = aname; 18 | } 19 | 20 | public Integer getAid() { 21 | return aid; 22 | } 23 | 24 | public void setAid(Integer aid) { 25 | this.aid = aid; 26 | } 27 | 28 | public String getBname() { 29 | return bname; 30 | } 31 | 32 | public void setBname(String bname) { 33 | this.bname = bname; 34 | } 35 | 36 | public Integer getBid() { 37 | return bid; 38 | } 39 | 40 | public void setBid(Integer bid) { 41 | this.bid = bid; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/WordCountSQL.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.java.BatchTableEnvironment; 7 | 8 | import java.util.ArrayList; 9 | 10 | 11 | public class WordCountSQL { 12 | 13 | public static void main(String[] args) throws Exception{ 14 | 15 | //获取运行环境 16 | ExecutionEnvironment fbEnv = ExecutionEnvironment.getExecutionEnvironment(); 17 | //创建一个tableEnvironment 18 | BatchTableEnvironment fbTableEnv = BatchTableEnvironment.create(fbEnv); 19 | 20 | String words = "hello flink hello lagou"; 21 | 22 | String[] split = words.split("\\W+"); 23 | ArrayList list = new ArrayList<>(); 24 | 25 | for(String word : split){ 26 | 27 | WC wc = new WC(word,1); 28 | list.add(wc); 29 | } 30 | 31 | DataSet input = fbEnv.fromCollection(list); 32 | 33 | //DataSet 转sql, 指定字段名 34 | Table table = fbTableEnv.fromDataSet(input, "word,frequency"); 35 | table.printSchema(); 36 | 37 | //注册为一个表 38 | fbTableEnv.createTemporaryView("WordCount", table); 39 | 40 | Table table02 = fbTableEnv.sqlQuery("select word as word, sum(frequency) as frequency from WordCount GROUP BY word"); 41 | 42 | //将表转换DataSet 43 | DataSet ds3 = fbTableEnv.toDataSet(table02, WC.class); 44 | ds3.printToErr(); 45 | } 46 | 47 | public static class WC { 48 | public String word; 49 | public long frequency; 50 | 51 | public WC() {} 52 | 53 | public WC(String word, long frequency) { 54 | this.word = word; 55 | this.frequency = frequency; 56 | } 57 | 58 | @Override 59 | public String toString() { 60 | return word + ", " + frequency; 61 | } 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan01/CustomDeSerializationSchema.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan01; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 6 | import org.apache.kafka.clients.consumer.ConsumerRecord; 7 | 8 | public class CustomDeSerializationSchema implements KafkaDeserializationSchema> { 9 | 10 | //是否表示流的最后一条元素,设置为false,表示数据会源源不断的到来 11 | @Override 12 | public boolean isEndOfStream(ConsumerRecord nextElement) { 13 | return false; 14 | } 15 | 16 | //这里返回一个ConsumerRecord类型的数据,除了原数据还包括topic,offset,partition等信息 17 | @Override 18 | public ConsumerRecord deserialize(ConsumerRecord record) throws Exception { 19 | 20 | return new ConsumerRecord( 21 | record.topic(), 22 | record.partition(), 23 | record.offset(), 24 | new String(record.key()), 25 | new String(record.value()) 26 | ); 27 | } 28 | 29 | //指定数据的输入类型 30 | @Override 31 | public TypeInformation> getProducedType() { 32 | return TypeInformation.of(new TypeHint>(){}); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan01/KafkaConsumer.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan01; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.streaming.api.CheckpointingMode; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 9 | import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; 10 | import org.apache.flink.util.Collector; 11 | 12 | import java.util.HashMap; 13 | import java.util.Properties; 14 | import java.util.regex.Pattern; 15 | 16 | public class KafkaConsumer { 17 | 18 | public static void main(String[] args) throws Exception { 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 21 | env.enableCheckpointing(5000); 22 | 23 | Properties properties = new Properties(); 24 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 25 | //设置消费组 26 | properties.setProperty("group.id", "group_test"); 27 | 28 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 29 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties); 30 | 31 | //设置从最早的ffset消费 32 | consumer.setStartFromEarliest(); 33 | 34 | env.addSource(consumer).flatMap(new FlatMapFunction() { 35 | @Override 36 | public void flatMap(String value, Collector out) throws Exception { 37 | System.out.println(value); 38 | } 39 | }); 40 | 41 | env.execute("start consumer..."); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan01/KafkaProducer.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan01; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.api.CheckpointingMode; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 8 | 9 | import java.util.Properties; 10 | 11 | public class KafkaProducer { 12 | 13 | 14 | public static void main(String[] args) throws Exception{ 15 | 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 18 | env.enableCheckpointing(5000); 19 | 20 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1); 21 | 22 | Properties properties = new Properties(); 23 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 24 | 25 | // 2.0 配置 kafkaProducer 26 | FlinkKafkaProducer producer = new FlinkKafkaProducer( 27 | "127.0.0.1:9092", //broker列表 28 | "test", //topic 29 | new SimpleStringSchema()); // 消息序列化 30 | 31 | //写入Kafka时附加记录的事件时间戳 32 | producer.setWriteTimestampToKafka(true); 33 | 34 | text.addSink(producer); 35 | env.execute(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan01/MyNoParalleSource.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan01; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.Random; 13 | 14 | 15 | //并行度为1的source 16 | public class MyNoParalleSource implements SourceFunction { 17 | 18 | //private long count = 1L; 19 | private boolean isRunning = true; 20 | 21 | /** 22 | * 主要的方法 23 | * 启动一个source 24 | * 大部分情况下,都需要在这个run方法中实现一个循环,这样就可以循环产生数据了 25 | * 26 | * @param ctx 27 | * @throws Exception 28 | */ 29 | @Override 30 | public void run(SourceContext ctx) throws Exception { 31 | while(isRunning){ 32 | //图书的排行榜 33 | List books = new ArrayList<>(); 34 | books.add("Pyhton从入门到放弃");//10 35 | books.add("Java从入门到放弃");//8 36 | books.add("Php从入门到放弃");//5 37 | books.add("C++从入门到放弃");//3 38 | books.add("Scala从入门到放弃"); 39 | int i = new Random().nextInt(5); 40 | ctx.collect(books.get(i)); 41 | 42 | //每2秒产生一条数据 43 | Thread.sleep(2000); 44 | } 45 | } 46 | //取消一个cancel的时候会调用的方法 47 | @Override 48 | public void cancel() { 49 | isRunning = false; 50 | } 51 | } 52 | 53 | 54 | class StreamingDemoWithMyNoPralalleSource { 55 | public static void main(String[] args) throws Exception { 56 | //获取Flink的运行环境 57 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 58 | //获取数据源 59 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1); //注意:针对此source,并行度只能设置为1 60 | DataStream num = text.map(new MapFunction() { 61 | @Override 62 | public String map(String value) throws Exception { 63 | //System.out.println("接收到数据:" + value); 64 | return value; 65 | } 66 | }); 67 | 68 | //每2秒钟处理一次数据 1 2 3 4 5 6 7 8 9 ... 69 | DataStream sum = num.timeWindowAll(Time.seconds(2)).sum(0); 70 | //打印结果 71 | sum.print().setParallelism(1); 72 | String jobName = StreamingDemoWithMyNoPralalleSource.class.getSimpleName(); 73 | env.execute(jobName); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/DateUtil.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import java.text.SimpleDateFormat; 4 | import java.util.Date; 5 | 6 | public class DateUtil { 7 | 8 | 9 | public static String timeStampToDate(Long timestamp){ 10 | 11 | ThreadLocal threadLocal 12 | = ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")); 13 | String format = threadLocal.get().format(new Date(timestamp)); 14 | return format.substring(0,10); 15 | } 16 | 17 | public static void main(String[] args) { 18 | System.out.println(timeStampToDate(System.currentTimeMillis())); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/KafkaConsumer.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.streaming.api.CheckpointingMode; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 11 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 12 | import org.apache.flink.util.Collector; 13 | import scala.tools.nsc.doc.model.Val; 14 | 15 | import java.util.Properties; 16 | 17 | public class KafkaConsumer { 18 | 19 | public static void main(String[] args) throws Exception { 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 22 | env.enableCheckpointing(5000); 23 | 24 | Properties properties = new Properties(); 25 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 26 | //设置消费组 27 | properties.setProperty("group.id", "group_test"); 28 | 29 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 30 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties); 31 | 32 | //设置从最早的ffset消费 33 | consumer.setStartFromEarliest(); 34 | 35 | env.addSource(consumer) 36 | .filter(new UserActionFilter()) 37 | .flatMap(new MyFlatMapFunction()) 38 | .returns(TypeInformation.of(String.class)) 39 | .addSink(new FlinkKafkaProducer( 40 | "127.0.0.1:9092", 41 | "log_user_action", 42 | new SimpleStringSchema() 43 | )); 44 | 45 | 46 | 47 | env.addSource(consumer).flatMap(new FlatMapFunction() { 48 | @Override 49 | public void flatMap(String value, Collector out) throws Exception { 50 | System.out.println(value); 51 | } 52 | }); 53 | 54 | env.execute("start consumer..."); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/KafkaProducer.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.api.CheckpointingMode; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; 8 | import org.myorg.quickstart.shizhan01.MyNoParalleSource; 9 | 10 | import java.util.Properties; 11 | 12 | public class KafkaProducer { 13 | 14 | 15 | public static void main(String[] args) throws Exception{ 16 | 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); 19 | env.enableCheckpointing(5000); 20 | 21 | DataStreamSource text = env.addSource(new MyNoParalleSource()).setParallelism(1); 22 | 23 | Properties properties = new Properties(); 24 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 25 | 26 | // 2.0 配置 kafkaProducer 27 | FlinkKafkaProducer producer = new FlinkKafkaProducer( 28 | "127.0.0.1:9092", //broker列表 29 | "log_user_action", //topic 30 | new SimpleStringSchema()); // 消息序列化 31 | 32 | //写入Kafka时附加记录的事件时间戳 33 | producer.setWriteTimestampToKafka(true); 34 | 35 | text.addSink(producer); 36 | env.execute(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyFlatMapFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import com.alibaba.fastjson.JSON; 5 | import com.alibaba.fastjson.JSONObject; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.apache.flink.api.common.functions.FlatMapFunction; 8 | import org.apache.flink.util.Collector; 9 | 10 | public class MyFlatMapFunction implements FlatMapFunction { 11 | @Override 12 | public void flatMap(String input, Collector out) throws Exception { 13 | 14 | JSONObject jsonObject = JSON.parseObject(input); 15 | String user_id = jsonObject.getString("user_id"); 16 | String action = jsonObject.getString("action"); 17 | Long timestamp = jsonObject.getLong("timestamp"); 18 | 19 | if(!StringUtils.isEmpty(user_id) || !StringUtils.isEmpty(action)){ 20 | UserClick userClick = new UserClick(); 21 | userClick.setUserId(user_id); 22 | userClick.setTimestamp(timestamp); 23 | userClick.setAction(action); 24 | 25 | out.collect(JSON.toJSONString(userClick)); 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyHbaseSink.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 7 | import org.apache.hadoop.hbase.HBaseConfiguration; 8 | import org.apache.hadoop.hbase.HConstants; 9 | import org.apache.hadoop.hbase.TableName; 10 | import org.apache.hadoop.hbase.client.Connection; 11 | import org.apache.hadoop.hbase.client.ConnectionFactory; 12 | import org.apache.hadoop.hbase.client.Put; 13 | import org.apache.hadoop.hbase.client.Table; 14 | import org.apache.hadoop.hbase.util.Bytes; 15 | 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | 19 | public class MyHbaseSink extends RichSinkFunction> { 20 | 21 | 22 | private transient Connection connection; 23 | private transient List puts = new ArrayList<>(100); 24 | 25 | 26 | 27 | @Override 28 | public void open(Configuration parameters) throws Exception { 29 | super.open(parameters); 30 | org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); 31 | conf.set(HConstants.ZOOKEEPER_QUORUM, "localhost:2181"); 32 | connection = ConnectionFactory.createConnection(conf); 33 | } 34 | 35 | @Override 36 | public void invoke(Tuple3 value, Context context) throws Exception { 37 | 38 | String tableName = "database:pvuv_result"; 39 | String family = "f"; 40 | Table table = connection.getTable(TableName.valueOf(tableName)); 41 | Put put = new Put(value.f0.getBytes()); 42 | put.addColumn(Bytes.toBytes(family),Bytes.toBytes(value.f1),Bytes.toBytes(value.f2)); 43 | puts.add(put); 44 | 45 | if(puts.size() == 100){ 46 | table.put(puts); 47 | puts.clear(); 48 | } 49 | table.close(); 50 | } 51 | 52 | @Override 53 | public void close() throws Exception { 54 | super.close(); 55 | connection.close(); 56 | } 57 | }// 58 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyProcessAllWindowFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import com.google.common.collect.Sets; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction; 7 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 8 | import org.apache.flink.util.Collector; 9 | 10 | import java.util.HashSet; 11 | import java.util.Iterator; 12 | 13 | public class MyProcessAllWindowFunction extends ProcessAllWindowFunction,TimeWindow> { 14 | @Override 15 | public void process(Context context, Iterable elements, Collector> out) throws Exception { 16 | 17 | HashSet uv = Sets.newHashSet(); 18 | Integer pv = 0; 19 | Iterator iterator = elements.iterator(); 20 | while (iterator.hasNext()){ 21 | String userId = iterator.next().getUserId(); 22 | uv.add(userId); 23 | pv = pv + 1; 24 | } 25 | out.collect(Tuple2.of("pv",pv)); 26 | out.collect(Tuple2.of("uv",uv.size())); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyProcessWindowFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import org.apache.flink.api.common.state.MapState; 4 | import org.apache.flink.api.common.state.MapStateDescriptor; 5 | import org.apache.flink.api.common.state.ValueState; 6 | import org.apache.flink.api.common.state.ValueStateDescriptor; 7 | import org.apache.flink.api.java.tuple.Tuple3; 8 | import org.apache.flink.configuration.Configuration; 9 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 11 | import org.apache.flink.util.Collector; 12 | 13 | import java.util.Iterator; 14 | 15 | public class MyProcessWindowFunction extends ProcessWindowFunction,String,TimeWindow>{ 16 | 17 | private transient MapState uvState; 18 | private transient ValueState pvState; 19 | 20 | @Override 21 | public void open(Configuration parameters) throws Exception { 22 | 23 | super.open(parameters); 24 | uvState = this.getRuntimeContext().getMapState(new MapStateDescriptor<>("uv", String.class, String.class)); 25 | pvState = this.getRuntimeContext().getState(new ValueStateDescriptor("pv", Integer.class)); 26 | } 27 | 28 | @Override 29 | public void process(String s, Context context, Iterable elements, Collector> out) throws Exception { 30 | 31 | Integer pv = 0; 32 | Iterator iterator = elements.iterator(); 33 | while (iterator.hasNext()){ 34 | pv = pv + 1; 35 | String userId = iterator.next().getUserId(); 36 | uvState.put(userId,null); 37 | } 38 | pvState.update(pvState.value() + pv); 39 | 40 | Integer uv = 0; 41 | Iterator uvIterator = uvState.keys().iterator(); 42 | while (uvIterator.hasNext()){ 43 | String next = uvIterator.next(); 44 | uv = uv + 1; 45 | } 46 | 47 | Integer value = pvState.value(); 48 | if(null == value){ 49 | pvState.update(pv); 50 | }else { 51 | pvState.update(value + pv); 52 | } 53 | 54 | out.collect(Tuple3.of(s,"uv",uv)); 55 | out.collect(Tuple3.of(s,"pv",pvState.value())); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyProcessWindowFunctionBitMap.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import org.apache.flink.api.common.state.MapState; 4 | import org.apache.flink.api.common.state.MapStateDescriptor; 5 | import org.apache.flink.api.common.state.ValueState; 6 | import org.apache.flink.api.common.state.ValueStateDescriptor; 7 | import org.apache.flink.api.common.typeinfo.TypeHint; 8 | import org.apache.flink.api.common.typeinfo.TypeInformation; 9 | import org.apache.flink.api.java.tuple.Tuple3; 10 | import org.apache.flink.configuration.Configuration; 11 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 12 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 13 | import org.apache.flink.util.Collector; 14 | import org.roaringbitmap.longlong.Roaring64NavigableMap; 15 | 16 | import java.util.Iterator; 17 | 18 | public class MyProcessWindowFunctionBitMap extends ProcessWindowFunction,String,TimeWindow>{ 19 | 20 | private transient ValueState uvState; 21 | private transient ValueState pvState; 22 | private transient ValueState bitMapState; 23 | 24 | 25 | @Override 26 | public void open(Configuration parameters) throws Exception { 27 | 28 | super.open(parameters); 29 | uvState = this.getRuntimeContext().getState(new ValueStateDescriptor("pv", Integer.class)); 30 | pvState = this.getRuntimeContext().getState(new ValueStateDescriptor("uv", Integer.class)); 31 | bitMapState = this.getRuntimeContext().getState(new ValueStateDescriptor("bitMap", TypeInformation.of(new TypeHint() { 32 | }))); 33 | } 34 | 35 | @Override 36 | public void process(String s, Context context, Iterable elements, Collector> out) throws Exception { 37 | 38 | Integer uv = uvState.value(); 39 | Integer pv = pvState.value(); 40 | Roaring64NavigableMap bitMap = bitMapState.value(); 41 | 42 | if(bitMap == null){ 43 | bitMap = new Roaring64NavigableMap(); 44 | uv = 0; 45 | pv = 0; 46 | } 47 | 48 | Iterator iterator = elements.iterator(); 49 | while (iterator.hasNext()){ 50 | pv = pv + 1; 51 | String userId = iterator.next().getUserId(); 52 | //如果userId可以转成long 53 | bitMap.add(Long.valueOf(userId)); 54 | } 55 | 56 | out.collect(Tuple3.of(s,"uv",bitMap.getIntCardinality())); 57 | out.collect(Tuple3.of(s,"pv",pv)); 58 | 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/MyRedisSink.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 6 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 7 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 8 | 9 | public class MyRedisSink implements RedisMapper>{ 10 | 11 | /** 12 | * 设置redis数据类型 13 | */ 14 | @Override 15 | public RedisCommandDescription getCommandDescription() { 16 | return new RedisCommandDescription(RedisCommand.HSET,"flink_pv_uv"); 17 | } 18 | 19 | @Override 20 | public String getKeyFromData(Tuple3 data) { 21 | return data.f1; 22 | } 23 | 24 | @Override 25 | public String getValueFromData(Tuple3 data) { 26 | return data.f2.toString(); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/PVUVCount.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.runtime.state.memory.MemoryStateBackend; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 13 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 14 | import org.apache.flink.streaming.api.windowing.evictors.TimeEvictor; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger; 17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 18 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 19 | 20 | import java.util.Properties; 21 | 22 | public class PVUVCount { 23 | 24 | public static void main(String[] args) { 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 27 | // 检查点配置略。但后面要用到状态,所以状态后端必须预先配置,在flink-conf.yaml或者这里均可 28 | env.setStateBackend(new MemoryStateBackend(true)); 29 | 30 | Properties properties = new Properties(); 31 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 32 | 33 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 34 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("log_user_action", new SimpleStringSchema(), properties); 35 | //设置从最早的offset消费 36 | consumer.setStartFromEarliest(); 37 | 38 | 39 | DataStream dataStream = env 40 | .addSource(consumer) 41 | .name("log_user_action") 42 | .map(message -> { 43 | JSONObject record = JSON.parseObject(message); 44 | 45 | return new UserClick( 46 | record.getString("user_id"), 47 | record.getLong("timestamp"), 48 | record.getString("action") 49 | ); 50 | }) 51 | .returns(TypeInformation.of(UserClick.class)); 52 | 53 | 54 | SingleOutputStreamOperator userClickSingleOutputStreamOperator = dataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(30)) { 55 | @Override 56 | public long extractTimestamp(UserClick element) { 57 | return element.getTimestamp(); 58 | } 59 | }); 60 | 61 | userClickSingleOutputStreamOperator 62 | .windowAll(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))) 63 | .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(20))) 64 | .evictor(TimeEvictor.of(Time.seconds(0), true)); 65 | 66 | 67 | 68 | 69 | 70 | }// 71 | 72 | 73 | 74 | 75 | 76 | 77 | }// 78 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/PVUVCountBitMap.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.runtime.state.memory.MemoryStateBackend; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 15 | import org.apache.flink.streaming.api.windowing.evictors.TimeEvictor; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger; 18 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 19 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 20 | 21 | import java.util.Properties; 22 | 23 | public class PVUVCountBitMap { 24 | 25 | public static void main(String[] args) { 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 28 | // 检查点配置略。但后面要用到状态,所以状态后端必须预先配置,在flink-conf.yaml或者这里均可 29 | env.setStateBackend(new MemoryStateBackend(true)); 30 | 31 | Properties properties = new Properties(); 32 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 33 | 34 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 35 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("log_user_action", new SimpleStringSchema(), properties); 36 | //设置从最早的offset消费 37 | consumer.setStartFromEarliest(); 38 | 39 | 40 | DataStream dataStream = env 41 | .addSource(consumer) 42 | .name("log_user_action") 43 | .map(message -> { 44 | JSONObject record = JSON.parseObject(message); 45 | 46 | return new UserClick( 47 | record.getString("user_id"), 48 | record.getLong("timestamp"), 49 | record.getString("action") 50 | ); 51 | }) 52 | .returns(TypeInformation.of(UserClick.class)); 53 | 54 | 55 | SingleOutputStreamOperator userClickSingleOutputStreamOperator = dataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(30)) { 56 | @Override 57 | public long extractTimestamp(UserClick element) { 58 | return element.getTimestamp(); 59 | } 60 | }); 61 | 62 | userClickSingleOutputStreamOperator 63 | .keyBy(new KeySelector() { 64 | @Override 65 | public String getKey(UserClick value) throws Exception { 66 | return DateUtil.timeStampToDate(value.getTimestamp()); 67 | } 68 | }) 69 | .window(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))) 70 | .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(20))) 71 | .evictor(TimeEvictor.of(Time.seconds(0), true)) 72 | .process(new MyProcessWindowFunction()); 73 | 74 | 75 | 76 | 77 | 78 | }// 79 | 80 | 81 | 82 | 83 | 84 | 85 | }// 86 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/PVUVCountKeyById.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.api.java.tuple.Tuple3; 9 | import org.apache.flink.runtime.state.memory.MemoryStateBackend; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; 15 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 16 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 17 | import org.apache.flink.streaming.api.windowing.evictors.TimeEvictor; 18 | import org.apache.flink.streaming.api.windowing.time.Time; 19 | import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger; 20 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 21 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 22 | import org.apache.flink.streaming.connectors.redis.RedisSink; 23 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 24 | 25 | import java.util.Properties; 26 | 27 | public class PVUVCountKeyById { 28 | 29 | public static void main(String[] args) { 30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 32 | // 检查点配置略。但后面要用到状态,所以状态后端必须预先配置,在flink-conf.yaml或者这里均可 33 | env.setStateBackend(new MemoryStateBackend(true)); 34 | 35 | Properties properties = new Properties(); 36 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 37 | 38 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 39 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("log_user_action", new SimpleStringSchema(), properties); 40 | //设置从最早的offset消费 41 | consumer.setStartFromEarliest(); 42 | 43 | 44 | DataStream dataStream = env 45 | .addSource(consumer) 46 | .name("log_user_action") 47 | .map(message -> { 48 | JSONObject record = JSON.parseObject(message); 49 | 50 | return new UserClick( 51 | record.getString("user_id"), 52 | record.getLong("timestamp"), 53 | record.getString("action") 54 | ); 55 | }) 56 | .returns(TypeInformation.of(UserClick.class)); 57 | 58 | 59 | SingleOutputStreamOperator userClickSingleOutputStreamOperator = dataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(30)) { 60 | @Override 61 | public long extractTimestamp(UserClick element) { 62 | return element.getTimestamp(); 63 | } 64 | }); 65 | 66 | FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build(); 67 | 68 | userClickSingleOutputStreamOperator 69 | .keyBy(new KeySelector() { 70 | @Override 71 | public String getKey(UserClick value) throws Exception { 72 | return DateUtil.timeStampToDate(value.getTimestamp()); 73 | } 74 | }) 75 | .window(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))) 76 | .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(20))) 77 | .evictor(TimeEvictor.of(Time.seconds(0), true)) 78 | .process(new MyProcessWindowFunction()) 79 | .addSink(new RedisSink<>(conf,new MyRedisSink())); 80 | }// 81 | 82 | 83 | 84 | 85 | 86 | 87 | }// 88 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/PVUVCountKeyByIdMysqlSink.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.connector.jdbc.JdbcConnectionOptions; 9 | import org.apache.flink.connector.jdbc.JdbcSink; 10 | import org.apache.flink.runtime.state.memory.MemoryStateBackend; 11 | import org.apache.flink.streaming.api.TimeCharacteristic; 12 | import org.apache.flink.streaming.api.datastream.DataStream; 13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; 16 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 17 | import org.apache.flink.streaming.api.windowing.evictors.TimeEvictor; 18 | import org.apache.flink.streaming.api.windowing.time.Time; 19 | import org.apache.flink.streaming.api.windowing.triggers.ContinuousProcessingTimeTrigger; 20 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 21 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase; 22 | import org.apache.flink.streaming.connectors.redis.RedisSink; 23 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 24 | 25 | import java.util.Properties; 26 | 27 | public class PVUVCountKeyByIdMysqlSink { 28 | 29 | public static void main(String[] args) { 30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 32 | // 检查点配置略。但后面要用到状态,所以状态后端必须预先配置,在flink-conf.yaml或者这里均可 33 | env.setStateBackend(new MemoryStateBackend(true)); 34 | 35 | Properties properties = new Properties(); 36 | properties.setProperty("bootstrap.servers", "127.0.0.1:9092"); 37 | 38 | properties.setProperty(FlinkKafkaConsumerBase.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS, "10"); 39 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("log_user_action", new SimpleStringSchema(), properties); 40 | //设置从最早的offset消费 41 | consumer.setStartFromEarliest(); 42 | 43 | 44 | DataStream dataStream = env 45 | .addSource(consumer) 46 | .name("log_user_action") 47 | .map(message -> { 48 | JSONObject record = JSON.parseObject(message); 49 | 50 | return new UserClick( 51 | record.getString("user_id"), 52 | record.getLong("timestamp"), 53 | record.getString("action") 54 | ); 55 | }) 56 | .returns(TypeInformation.of(UserClick.class)); 57 | 58 | 59 | SingleOutputStreamOperator userClickSingleOutputStreamOperator = dataStream.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor(Time.seconds(30)) { 60 | @Override 61 | public long extractTimestamp(UserClick element) { 62 | return element.getTimestamp(); 63 | } 64 | }); 65 | 66 | FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build(); 67 | String driverClass = "com.mysql.jdbc.Driver"; 68 | String dbUrl = "jdbc:mysql://127.0.0.1:3306/test"; 69 | String userNmae = "root"; 70 | String passWord = "123456"; 71 | 72 | 73 | userClickSingleOutputStreamOperator 74 | .keyBy(new KeySelector() { 75 | @Override 76 | public String getKey(UserClick value) throws Exception { 77 | return DateUtil.timeStampToDate(value.getTimestamp()); 78 | } 79 | }) 80 | .window(TumblingProcessingTimeWindows.of(Time.days(1), Time.hours(-8))) 81 | .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(20))) 82 | .evictor(TimeEvictor.of(Time.seconds(0), true)) 83 | .process(new MyProcessWindowFunction()) 84 | .addSink( 85 | JdbcSink.sink( 86 | "replace into pvuv_result (type,value) values (?,?)", 87 | (ps, value) -> { 88 | ps.setString(1, value.f1); 89 | ps.setInt(2,value.f2); 90 | 91 | }, 92 | new JdbcConnectionOptions.JdbcConnectionOptionsBuilder() 93 | .withUrl(dbUrl) 94 | .withDriverName(driverClass) 95 | .withUsername(userNmae) 96 | .withPassword(passWord) 97 | .build()) 98 | ); 99 | } 100 | 101 | 102 | 103 | 104 | 105 | 106 | }// 107 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/UserActionFilter.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import org.apache.flink.api.common.functions.FilterFunction; 5 | 6 | public class UserActionFilter implements FilterFunction { 7 | @Override 8 | public boolean filter(String input) throws Exception { 9 | return input.contains("CLICK") && input.startsWith("{") && input.endsWith("}"); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/UserActionProcessFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | import com.alibaba.fastjson.JSON; 5 | import com.alibaba.fastjson.JSONObject; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.apache.flink.streaming.api.functions.ProcessFunction; 8 | import org.apache.flink.util.Collector; 9 | 10 | public class UserActionProcessFunction extends ProcessFunction { 11 | @Override 12 | public void processElement(String input, Context ctx, Collector out) throws Exception { 13 | 14 | if(! input.contains("CLICK") || input.startsWith("{") || input.endsWith("}")){ 15 | return; 16 | } 17 | 18 | JSONObject jsonObject = JSON.parseObject(input); 19 | String user_id = jsonObject.getString("user_id"); 20 | String action = jsonObject.getString("action"); 21 | Long timestamp = jsonObject.getLong("timestamp"); 22 | 23 | if(!StringUtils.isEmpty(user_id) || !StringUtils.isEmpty(action)){ 24 | UserClick userClick = new UserClick(); 25 | userClick.setUserId(user_id); 26 | userClick.setTimestamp(timestamp); 27 | userClick.setAction(action); 28 | 29 | out.collect(JSON.toJSONString(userClick)); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/shizhan02/UserClick.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.shizhan02; 2 | 3 | 4 | public class UserClick { 5 | 6 | private String userId; 7 | private Long timestamp; 8 | private String action; 9 | 10 | public UserClick() { 11 | 12 | } 13 | 14 | public String getUserId() { 15 | return userId; 16 | } 17 | 18 | public void setUserId(String userId) { 19 | this.userId = userId; 20 | } 21 | 22 | public Long getTimestamp() { 23 | return timestamp; 24 | } 25 | 26 | public void setTimestamp(Long timestamp) { 27 | this.timestamp = timestamp; 28 | } 29 | 30 | public String getAction() { 31 | return action; 32 | } 33 | 34 | public void setAction(String action) { 35 | this.action = action; 36 | } 37 | 38 | public UserClick(String userId, Long timestamp, String action) { 39 | this.userId = userId; 40 | this.timestamp = timestamp; 41 | this.action = action; 42 | } 43 | } 44 | 45 | enum UserAction{ 46 | //点击 47 | CLICK("CLICK"), 48 | //购买 49 | PURCHASE("PURCHASE"), 50 | //其他 51 | OTHER("OTHER"); 52 | 53 | private String action; 54 | UserAction(String action) { 55 | this.action = action; 56 | } 57 | } 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/topn28/OrderDetail.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.topn28; 2 | 3 | 4 | public class OrderDetail { 5 | 6 | private Long userId; //下单用户id 7 | private Long itemId; //商品id 8 | private String citeName;//用户所在城市 9 | private Double price;//订单金额 10 | private Long timeStamp;//下单时间 11 | 12 | public Long getUserId() { 13 | return userId; 14 | } 15 | 16 | public void setUserId(Long userId) { 17 | this.userId = userId; 18 | } 19 | 20 | public Long getItemId() { 21 | return itemId; 22 | } 23 | 24 | public void setItemId(Long itemId) { 25 | this.itemId = itemId; 26 | } 27 | 28 | public String getCiteName() { 29 | return citeName; 30 | } 31 | 32 | public void setCiteName(String citeName) { 33 | this.citeName = citeName; 34 | } 35 | 36 | public Double getPrice() { 37 | return price; 38 | } 39 | 40 | public void setPrice(Double price) { 41 | this.price = price; 42 | } 43 | 44 | public Long getTimeStamp() { 45 | return timeStamp; 46 | } 47 | 48 | public void setTimeStamp(Long timeStamp) { 49 | this.timeStamp = timeStamp; 50 | } 51 | 52 | @Override 53 | public String toString() { 54 | return "OrderDetail{" + 55 | "userId=" + userId + 56 | ", itemId=" + itemId + 57 | ", citeName='" + citeName + '\'' + 58 | ", price=" + price + 59 | ", timeStamp=" + timeStamp + 60 | '}'; 61 | } 62 | 63 | public OrderDetail(Long userId, Long itemId, String citeName, Double price, Long timeStamp) { 64 | this.userId = userId; 65 | this.itemId = itemId; 66 | this.citeName = citeName; 67 | this.price = price; 68 | this.timeStamp = timeStamp; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/topn28/TopN.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.topn28; 2 | 3 | 4 | import com.alibaba.fastjson.JSON; 5 | import org.apache.flink.api.common.functions.ReduceFunction; 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.streaming.api.CheckpointingMode; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 15 | import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction; 16 | import org.apache.flink.streaming.api.watermark.Watermark; 17 | import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows; 18 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 19 | import org.apache.flink.streaming.api.windowing.time.Time; 20 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 21 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 22 | import org.apache.flink.streaming.connectors.redis.RedisSink; 23 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 24 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 25 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 26 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 27 | import org.apache.flink.util.Collector; 28 | import org.myorg.quickstart.RedisSink27.RedisSink02; 29 | 30 | import java.util.*; 31 | 32 | public class TopN { 33 | 34 | public static void main(String[] args) throws Exception{ 35 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 36 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 37 | env.enableCheckpointing(60 * 1000, CheckpointingMode.EXACTLY_ONCE); 38 | env.getCheckpointConfig().setCheckpointTimeout(30 * 1000); 39 | 40 | Properties properties = new Properties(); 41 | properties.setProperty("bootstrap.servers", "localhost:9092"); 42 | 43 | FlinkKafkaConsumer consumer = new FlinkKafkaConsumer<>("test", new SimpleStringSchema(), properties); 44 | //从最早开始消费 45 | consumer.setStartFromEarliest(); 46 | 47 | DataStream stream = env 48 | .addSource(consumer); 49 | 50 | DataStream orderStream = stream.map(message -> JSON.parseObject(message, OrderDetail.class)); 51 | 52 | DataStream dataStream = orderStream.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks() { 53 | 54 | private Long currentTimeStamp = 0L; 55 | //设置允许乱序时间 56 | private Long maxOutOfOrderness = 3000L; 57 | @Override 58 | public Watermark getCurrentWatermark() { 59 | 60 | return new Watermark(currentTimeStamp - maxOutOfOrderness); 61 | } 62 | @Override 63 | public long extractTimestamp(OrderDetail element, long previousElementTimestamp) { 64 | return element.getTimeStamp(); 65 | } 66 | }); 67 | 68 | DataStream reduce = dataStream 69 | .keyBy((KeySelector) value -> value.getUserId()) 70 | .windowAll(SlidingProcessingTimeWindows.of(Time.seconds(600), Time.seconds(20))) 71 | .reduce(new ReduceFunction() { 72 | @Override 73 | public OrderDetail reduce(OrderDetail value1, OrderDetail value2) throws Exception { 74 | return new OrderDetail( 75 | value1.getUserId(), value1.getItemId(), value1.getCiteName(), value1.getPrice() + value2.getPrice(), value1.getTimeStamp() 76 | ); 77 | } 78 | }); 79 | 80 | 81 | //每20秒计算一次 82 | DataStream> process = reduce.windowAll(TumblingEventTimeWindows.of(Time.seconds(20))) 83 | .process(new ProcessAllWindowFunction, TimeWindow>() { 84 | @Override 85 | public void process(Context context, Iterable elements, Collector> out) throws Exception { 86 | TreeMap treeMap = new TreeMap(new Comparator() { 87 | @Override 88 | public int compare(Double x, Double y) { 89 | return (x < y) ? -1 : 1; 90 | } 91 | }); 92 | 93 | Iterator iterator = elements.iterator(); 94 | if (iterator.hasNext()) { 95 | treeMap.put(iterator.next().getPrice(), iterator.next()); 96 | if (treeMap.size() > 10) { 97 | treeMap.pollLastEntry(); 98 | } 99 | } 100 | 101 | for (Map.Entry entry : treeMap.entrySet()) { 102 | out.collect(Tuple2.of(entry.getKey(), entry.getValue())); 103 | } 104 | } 105 | } 106 | ); 107 | FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build(); 108 | process.addSink(new RedisSink<>(conf, new RedisMapper>() { 109 | 110 | private final String TOPN_PREFIX = "TOPN:"; 111 | @Override 112 | public RedisCommandDescription getCommandDescription() { 113 | return new RedisCommandDescription(RedisCommand.HSET,TOPN_PREFIX); 114 | } 115 | 116 | @Override 117 | public String getKeyFromData(Tuple2 data) { 118 | return String.valueOf(data.f0); 119 | } 120 | 121 | @Override 122 | public String getValueFromData(Tuple2 data) { 123 | return String.valueOf(data.f1.toString()); 124 | } 125 | })); 126 | 127 | env.execute("execute topn"); 128 | 129 | 130 | } 131 | }// 132 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/topn28/TopNAllWindowFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.topn28; 2 | 3 | 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction; 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 7 | import org.apache.flink.util.Collector; 8 | import scala.collection.Iterable; 9 | import scala.collection.Iterator; 10 | 11 | import java.util.Comparator; 12 | import java.util.Map; 13 | import java.util.TreeMap; 14 | 15 | public class TopNAllWindowFunction extends ProcessAllWindowFunction,TimeWindow>{ 16 | 17 | private int size = 10; 18 | 19 | public TopNAllWindowFunction(int size) { 20 | this.size = size; 21 | } 22 | 23 | 24 | @Override 25 | public void process(Context context, Iterable elements, Collector> out) throws Exception { 26 | 27 | TreeMap treeMap = new TreeMap(new Comparator() { 28 | @Override 29 | public int compare(Double x, Double y) { 30 | return (x < y) ? -1 : 1; 31 | } 32 | }); 33 | 34 | Iterator iterator = elements.iterator(); 35 | if(iterator.hasNext()){ 36 | treeMap.put(iterator.next().getPrice(),iterator.next()); 37 | if(treeMap.size() > 10){ 38 | treeMap.pollLastEntry(); 39 | } 40 | } 41 | 42 | for (Map.Entry entry : treeMap.entrySet()) { 43 | out.collect(Tuple2.of(entry.getKey(),entry.getValue())); 44 | } 45 | 46 | 47 | } 48 | }// 49 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/watermark08/WindowWaterMark.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.watermark08; 2 | 3 | import org.apache.flink.api.common.functions.FoldFunction; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 10 | import org.apache.flink.streaming.api.watermark.Watermark; 11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | 14 | public class WindowWaterMark { 15 | 16 | 17 | public static void main(String[] args) throws Exception { 18 | 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); 20 | 21 | //设置为eventtime事件类型 22 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 23 | //设置水印生成时间间隔100ms 24 | env.getConfig().setAutoWatermarkInterval(100); 25 | 26 | DataStream dataStream = env 27 | .socketTextStream("127.0.0.1", 9000) 28 | .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks() { 29 | private Long currentTimeStamp = 0L; 30 | //设置允许乱序时间 31 | private Long maxOutOfOrderness = 5000L; 32 | 33 | @Override 34 | public Watermark getCurrentWatermark() { 35 | 36 | return new Watermark(currentTimeStamp - maxOutOfOrderness); 37 | } 38 | 39 | @Override 40 | public long extractTimestamp(String s, long l) { 41 | String[] arr = s.split(","); 42 | long timeStamp = Long.parseLong(arr[1]); 43 | currentTimeStamp = Math.max(timeStamp, currentTimeStamp); 44 | System.err.println(s + ",EventTime:" + timeStamp + ",watermark:" + (currentTimeStamp - maxOutOfOrderness)); 45 | return timeStamp; 46 | } 47 | }); 48 | 49 | dataStream.map(new MapFunction>() { 50 | @Override 51 | public Tuple2 map(String s) throws Exception { 52 | 53 | String[] split = s.split(","); 54 | return new Tuple2(split[0], Long.parseLong(split[1])); 55 | } 56 | }) 57 | .keyBy(0) 58 | .window(TumblingEventTimeWindows.of(Time.seconds(5))) 59 | .minBy(1) 60 | .print(); 61 | 62 | env.execute("WaterMark Test Demo"); 63 | 64 | }// 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/windowfunction26/CounterTest.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.windowfunction26; 2 | 3 | 4 | import org.apache.flink.api.common.JobExecutionResult; 5 | import org.apache.flink.api.common.accumulators.IntCounter; 6 | import org.apache.flink.api.common.functions.RichMapFunction; 7 | import org.apache.flink.configuration.Configuration; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | public class CounterTest { 12 | 13 | public static void main(String[] args) throws Exception { 14 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | 16 | DataStream dataStream = env.socketTextStream("127.0.0.1", 9000, "\n"); 17 | 18 | dataStream.map(new RichMapFunction() { 19 | 20 | //定义累加器 21 | private IntCounter numLines = new IntCounter(); 22 | 23 | @Override 24 | public void open(Configuration parameters) throws Exception { 25 | super.open(parameters); 26 | 27 | //注册累加器 28 | getRuntimeContext().addAccumulator("counter", this.numLines); 29 | } 30 | 31 | @Override 32 | public String map(String s) throws Exception { 33 | //累加 34 | this.numLines.add(1); 35 | return s; 36 | } 37 | }); 38 | 39 | dataStream.print(); 40 | JobExecutionResult result = env.execute("counter"); 41 | //第四步:结束后输出总量;如果不需要结束后持久化,可以省去 42 | Object counter = result.getAccumulatorResult("counter"); 43 | System.out.println("累加器计算结果:" + counter); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/windowfunction26/MyAggregateFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.windowfunction26; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | 6 | public class MyAggregateFunction implements AggregateFunction, Integer, Integer>{ 7 | 8 | @Override 9 | public Integer createAccumulator() { 10 | return 0; 11 | } 12 | 13 | @Override 14 | public Integer add(Tuple2 value, Integer accumulator) { 15 | return accumulator + value.f1; 16 | } 17 | 18 | @Override 19 | public Integer getResult(Integer accumulator) { 20 | return accumulator; 21 | } 22 | 23 | @Override 24 | public Integer merge(Integer a, Integer b) { 25 | return a + b; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/org/myorg/quickstart/windowfunction26/MyReduceFunction.java: -------------------------------------------------------------------------------- 1 | package org.myorg.quickstart.windowfunction26; 2 | 3 | 4 | import org.apache.flink.api.common.functions.ReduceFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | public class MyReduceFunction { 10 | 11 | public static void main(String[] args) throws Exception { 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | 14 | DataStream> input = env.fromElements(courses); 15 | DataStream> total = input.keyBy(0).reduce(new ReduceFunction>() { 16 | @Override 17 | public Tuple2 reduce(Tuple2 value1, Tuple2 value2) throws Exception { 18 | return new Tuple2<>(value1.f0, value1.f1 + value2.f1); 19 | } 20 | }); 21 | total.printToErr(); 22 | env.execute("ReduceFunction"); 23 | } 24 | 25 | public static final Tuple2[] courses = new Tuple2[]{ 26 | Tuple2.of("张三",100), 27 | Tuple2.of("李四",80), 28 | Tuple2.of("张三",80), 29 | Tuple2.of("李四",95), 30 | Tuple2.of("张三",90), 31 | Tuple2.of("李四",100), 32 | }; 33 | }// 34 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n 24 | --------------------------------------------------------------------------------