├── .gitignore
├── README.md
├── conf
└── hiveconf
│ └── hive-site.xml
├── data
├── csv
│ └── a.csv
├── json
│ ├── qiantao
│ │ └── a.txt
│ ├── qiantao2
│ │ └── a.txt
│ └── qiantao3
│ │ └── a.txt
└── sqldemo
│ └── a.txt
├── datagen
├── pom.xml
└── src
│ └── main
│ └── java
│ └── cn
│ └── doitedu
│ ├── ActionLogAutoGen.java
│ ├── ActionLogGenOne.java
│ └── module
│ ├── Collector.java
│ ├── CollectorConsoleImpl.java
│ ├── CollectorKafkaImpl.java
│ ├── LogBean.java
│ ├── LogBeanWrapper.java
│ ├── LogRunnable.java
│ ├── UserProfileDataGen.java
│ └── UserUtils.java
├── flink_course
├── data
│ ├── transformation_input
│ │ └── userinfo.txt
│ └── wc
│ │ └── input
│ │ └── wc.txt
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ ├── cn
│ │ │ └── doitedu
│ │ │ │ ├── flink
│ │ │ │ ├── TaskTest.java
│ │ │ │ ├── TestWindow.java
│ │ │ │ ├── avro
│ │ │ │ │ └── schema
│ │ │ │ │ │ ├── AvroEventLog.java
│ │ │ │ │ │ └── AvroEventLogBean.java
│ │ │ │ ├── exercise
│ │ │ │ │ ├── EventCount.java
│ │ │ │ │ ├── EventUserInfo.java
│ │ │ │ │ ├── Exercise_1.java
│ │ │ │ │ └── UserInfo.java
│ │ │ │ ├── java
│ │ │ │ │ └── demos
│ │ │ │ │ │ ├── EventBean2.java
│ │ │ │ │ │ ├── EventLog.java
│ │ │ │ │ │ ├── ParallelismDe.java
│ │ │ │ │ │ ├── _01_StreamWordCount.java
│ │ │ │ │ │ ├── _02_BatchWordCount.java
│ │ │ │ │ │ ├── _03_StreamBatchWordCount.java
│ │ │ │ │ │ ├── _04_WordCount_LambdaTest.java
│ │ │ │ │ │ ├── _05_SourceOperator_Demos.java
│ │ │ │ │ │ ├── _06_CustomSourceFunction.java
│ │ │ │ │ │ ├── _07_Transformation_Demos.java
│ │ │ │ │ │ ├── _08_SinkOperator_Demos.java
│ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo1.java
│ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo2.java
│ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo3.java
│ │ │ │ │ │ ├── _10_KafkaSinkOperator_Demo1.java
│ │ │ │ │ │ ├── _11_JdbcSinkOperator_Demo1.java
│ │ │ │ │ │ ├── _12_RedisSinkOperator_Demo1.java
│ │ │ │ │ │ ├── _13_SideOutput_Demo.java
│ │ │ │ │ │ ├── _14_StreamConnect_Union_Demo.java
│ │ │ │ │ │ ├── _15_StreamCoGroup_Join_Demo.java
│ │ │ │ │ │ ├── _16_BroadCast_Demo.java
│ │ │ │ │ │ ├── _17_ProcessFunctions_Demo.java
│ │ │ │ │ │ ├── _18_ChannalSelector_Partitioner_Demo.java
│ │ │ │ │ │ ├── _19_WaterMark_Api_Demo.java
│ │ │ │ │ │ ├── _19_WaterMark_Api_Demo2.java
│ │ │ │ │ │ ├── _20_Window_Api_Demo1.java
│ │ │ │ │ │ ├── _21_Window_Api_Demo2.java
│ │ │ │ │ │ ├── _21_Window_Api_Demo3.java
│ │ │ │ │ │ ├── _21_Window_Api_Demo4.java
│ │ │ │ │ │ ├── _22_StateBasic_Demo.java
│ │ │ │ │ │ ├── _23_State_OperatorState_Demo.java
│ │ │ │ │ │ ├── _24_State_KeyedState_Demo.java
│ │ │ │ │ │ ├── _25_State_DataStructure_Demo.java
│ │ │ │ │ │ ├── _26_State_TTL_Demo.java
│ │ │ │ │ │ ├── _27_ToleranceConfig_Demo.java
│ │ │ │ │ │ └── _28_ToleranceSideToSideTest.java
│ │ │ │ ├── scala
│ │ │ │ │ └── demos
│ │ │ │ │ │ └── _01_入门程序WordCount.scala
│ │ │ │ └── task
│ │ │ │ │ ├── Mapper1.java
│ │ │ │ │ ├── Mapper2.java
│ │ │ │ │ ├── Task1.java
│ │ │ │ │ ├── Task2.java
│ │ │ │ │ ├── Task3.java
│ │ │ │ │ └── TaskRunner.java
│ │ │ │ └── flinksql
│ │ │ │ ├── demos
│ │ │ │ ├── Demo10_KafkaConnectorDetail.java
│ │ │ │ ├── Demo11_UpsertKafkaConnectorTest.java
│ │ │ │ ├── Demo11_UpsertKafkaConnectorTest2.java
│ │ │ │ ├── Demo12_JdbcConnectorTest1.java
│ │ │ │ ├── Demo12_JdbcConnectorTest2.java
│ │ │ │ ├── Demo13_FileSystemConnectorTest.java
│ │ │ │ ├── Demo14_MysqlCdcConnector.java
│ │ │ │ ├── Demo14_StreamFromToTable.java
│ │ │ │ ├── Demo16_TimeWindowDemo.java
│ │ │ │ ├── Demo17_TimeWindowJoin.java
│ │ │ │ ├── Demo18_IntervalJoin.java
│ │ │ │ ├── Demo18_RegularJoin.java
│ │ │ │ ├── Demo19_ArrayJoin.java
│ │ │ │ ├── Demo19_LookupJoin.java
│ │ │ │ ├── Demo1_TableSql.java
│ │ │ │ ├── Demo20_Temporal_Join.java
│ │ │ │ ├── Demo21_CustomScalarFunction.java
│ │ │ │ ├── Demo22_CustomAggregateFunction.java
│ │ │ │ ├── Demo23_TableFunction.java
│ │ │ │ ├── Demo24_TableAggregateFunction.java
│ │ │ │ ├── Demo24_TableAggregateFunction2.java
│ │ │ │ ├── Demo25_MetricDemos.java
│ │ │ │ ├── Demo2_TableApi.java
│ │ │ │ ├── Demo3_TableObjectCreate.java
│ │ │ │ ├── Demo4_SqlTableCreate.java
│ │ │ │ ├── Demo5_CatalogDemo.java
│ │ │ │ ├── Demo6_Exercise.java
│ │ │ │ ├── Demo7_ColumnDetail1_Sql.java
│ │ │ │ ├── Demo7_ColumnDetail2_TableApi.java
│ │ │ │ ├── Demo8_CsvFormat.java
│ │ │ │ ├── Demo8_JsonFormat.java
│ │ │ │ ├── Demo9_EventTimeAndWatermark.java
│ │ │ │ ├── Demo9_EventTimeAndWatermark2.java
│ │ │ │ └── Demo9_EventTimeAndWatermark3.java
│ │ │ │ └── fuxi
│ │ │ │ ├── EventBean.java
│ │ │ │ ├── Exercise.java
│ │ │ │ ├── KeyedStateDemo.java
│ │ │ │ └── TimerDemo.java
│ │ ├── org
│ │ │ └── apache
│ │ │ │ └── flink
│ │ │ │ ├── api
│ │ │ │ └── common
│ │ │ │ │ └── eventtime
│ │ │ │ │ └── BoundedOutOfOrdernessWatermarks.java
│ │ │ │ ├── runtime
│ │ │ │ └── state
│ │ │ │ │ └── ttl
│ │ │ │ │ └── CXTtlIncrementalCleanup.java
│ │ │ │ └── streaming
│ │ │ │ ├── api
│ │ │ │ └── operators
│ │ │ │ │ └── AbstractStreamOperator.java
│ │ │ │ └── runtime
│ │ │ │ └── operators
│ │ │ │ ├── TimestampsAndWatermarksOperator.java
│ │ │ │ └── windowing
│ │ │ │ └── WindowOperator.java
│ │ └── tmp
│ │ │ ├── FlinkKafkaDemo.java
│ │ │ ├── pojos
│ │ │ ├── MysqlUser.java
│ │ │ └── UserSlotGame.java
│ │ │ ├── sqls.sql
│ │ │ └── utils
│ │ │ └── SqlHolder.java
│ └── resources
│ │ ├── log4j.properties
│ │ └── prts.avsc.bak
│ └── test
│ └── java
│ └── cn
│ └── doitedu
│ └── flink
│ └── TestChangelog.java
├── kafka_course
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── cn
│ │ │ └── doitedu
│ │ │ └── kafka
│ │ │ ├── AdminClientDemo.java
│ │ │ ├── ConsumerDemo.java
│ │ │ ├── ConsumerDemo2.java
│ │ │ ├── ConsumerDemo3.java
│ │ │ ├── Consumer实现ExactlyOnce手段1.java
│ │ │ ├── Kafka编程练习.java
│ │ │ ├── Kafka编程练习_消费者.java
│ │ │ ├── Kafka编程练习_消费者_Bitmap.java
│ │ │ ├── Kafka编程练习_消费者_判重.java
│ │ │ ├── Kafka自身事务机制.java
│ │ │ ├── MyPartitioner.java
│ │ │ └── ProducerDemo.java
│ └── resources
│ │ ├── bitmap示意图.png
│ │ └── consumer.properties
│ └── test
│ └── java
│ └── RoaringBitmapTest.java
└── pom.xml
/.gitignore:
--------------------------------------------------------------------------------
1 | # Project exclude paths
2 | /kafka_course/target/
3 | # 项目排除路径
4 | /flink_course/target/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # doit30_flink
2 |
3 | # 多易教育[涛哥] DOE30期 FLINK 课程配套源码
4 |
5 | # 更多给力资料和课程,可入群,长期蹲守:
6 | ①群: 1071917730(已满,不可加)
7 | ②群: 813383827(已满,不可加)
8 | ③群: 955021790(可加)
9 | ④群: 1108285618(可加)
10 |
--------------------------------------------------------------------------------
/conf/hiveconf/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | hive.metastore.uris
4 | thrift://doitedu:9083
5 |
6 |
--------------------------------------------------------------------------------
/data/csv/a.csv:
--------------------------------------------------------------------------------
1 | |1|,|zs|,|18|
2 | # 哈哈哈哈
3 | |2|,|ls|,|20|
4 | |3|,|ww|,\N
--------------------------------------------------------------------------------
/data/json/qiantao/a.txt:
--------------------------------------------------------------------------------
1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1"}}
2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2"}}
3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3"}}
--------------------------------------------------------------------------------
/data/json/qiantao2/a.txt:
--------------------------------------------------------------------------------
1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1","height":180}}
2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2","height":170}}
3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3","height":160}}
--------------------------------------------------------------------------------
/data/json/qiantao3/a.txt:
--------------------------------------------------------------------------------
1 | {"id":1,"friends":[{"name":"a","info":{"addr":"bj","gender":"male"}},{"name":"b","info":{"addr":"sh","gender":"female"}}]}
2 | {"id":2,"friends":[{"name":"b","info":{"addr":"sh","gender":"male"}},{"name":"c","info":{"addr":"bj","gender":"female"}}]}
--------------------------------------------------------------------------------
/data/sqldemo/a.txt:
--------------------------------------------------------------------------------
1 | 1,zs,18,male
2 | 2,ls,28,fe,male
3 | 3,ww,38,male
4 |
--------------------------------------------------------------------------------
/datagen/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | doit30_flink
7 | cn.doitedu
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | datagen
13 |
14 |
15 | 8
16 | 8
17 |
18 |
19 |
20 |
21 | org.apache.kafka
22 | kafka-clients
23 | ${kafka.version}
24 |
25 |
26 |
27 | commons-lang
28 | commons-lang
29 | 2.6
30 |
31 |
32 |
33 | org.apache.commons
34 | commons-lang3
35 | 3.12.0
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/ActionLogAutoGen.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu;
2 |
3 | import cn.doitedu.module.*;
4 |
5 | import java.util.ArrayList;
6 | import java.util.HashMap;
7 | import java.util.List;
8 |
9 | /**
10 | * @author 涛哥
11 | * @nick_name "deep as the sea"
12 | * @contact qq:657270652 wx:doit_edu
13 | * @site www.doitedu.cn
14 | * @date 2021-03-27
15 | * @desc 行为日志生成模拟器(自动连续生成)
16 | *
17 | * {
18 | * "account": "Vz54E9Ya",
19 | * "appId": "cn.doitedu.app1",
20 | * "appVersion": "3.4",
21 | * "carrier": "中国移动",
22 | * "deviceId": "WEISLD0235S0934OL",
23 | * "deviceType": "MI-6",
24 | * "ip": "24.93.136.175",
25 | * "latitude": 42.09287620431088,
26 | * "longitude": 79.42106825764643,
27 | * "netType": "WIFI",
28 | * "osName": "android",
29 | * "osVersion": "6.5",
30 | * "releaseChannel": "豌豆荚",
31 | * "resolution": "1024*768",
32 | * "sessionId": "SE18329583458",
33 | * "timeStamp": 1594534406220
34 | * "eventId": "productView",
35 | * "properties": {
36 | * "pageId": "646",
37 | * "productId": "157",
38 | * "refType": "4",
39 | * "refUrl": "805",
40 | * "title": "爱得堡 男靴中高帮马丁靴秋冬雪地靴 H1878 复古黄 40码",
41 | * "url": "https://item.jd.com/36506691363.html",
42 | * "utm_campain": "4",
43 | * "utm_loctype": "1",
44 | * "utm_source": "10"
45 | * }
46 | * }
47 | *
48 | *
49 | * kafka中要先创建好topic
50 | * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --create --topic yinew_applog --partitions 2 --replication-factor 1 --zookeeper hdp01:2181,hdp02:2181,hdp03:2181
51 | *
52 | * 创建完后,检查一下是否创建成功:
53 | * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --list --zookeeper hdp01:2181
54 | */
55 | public class ActionLogAutoGen {
56 | public static void main(String[] args) throws Exception {
57 |
58 | // 加载历史用户
59 | // String filePath = "data/users/hisu-1654943006977.txt";
60 | // HashMap hisUsers = UserUtils.loadHisUsers(filePath);
61 |
62 | // 添加新用户
63 | HashMap hisUsers = new HashMap<>();
64 | UserUtils.addNewUsers(hisUsers, 1000, true);
65 |
66 | UserUtils.saveUsers(hisUsers);
67 |
68 | // 转成带状态用户数据
69 | List wrapperedUsers = UserUtils.userToWrapper(hisUsers);
70 |
71 | System.out.println("日活用户总数:" + wrapperedUsers.size() + "-------");
72 |
73 | // 多线程并行生成日志
74 | // CollectorConsoleImpl collector = new CollectorConsoleImpl();
75 | CollectorKafkaImpl collector = new CollectorKafkaImpl("doit-events");
76 | genBatchToConsole(wrapperedUsers, 3,collector);
77 |
78 |
79 | }
80 |
81 | private static void genBatchToConsole(List wrapperedUsers, int threads , Collector collector) {
82 | int partSize = wrapperedUsers.size() / threads;
83 |
84 | ArrayList> partList = new ArrayList<>();
85 |
86 | for (int i = 0; i < threads; i++) {
87 | List userPart = new ArrayList<>();
88 |
89 | for (int j = i * partSize; j < (i != threads - 1 ? (i + 1) * partSize : wrapperedUsers.size()); j++) {
90 | userPart.add(wrapperedUsers.get(j));
91 | }
92 | new Thread(new LogRunnable(userPart,collector,10)).start();
93 | }
94 | }
95 |
96 | }
97 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/ActionLogGenOne.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu;
2 |
3 | import cn.doitedu.module.LogBean;
4 | import com.alibaba.fastjson.JSON;
5 | import org.apache.kafka.clients.producer.KafkaProducer;
6 | import org.apache.kafka.clients.producer.ProducerRecord;
7 |
8 | import java.util.HashMap;
9 | import java.util.Map;
10 | import java.util.Properties;
11 |
12 | /***
13 | * @author hunter.d
14 | * @qq 657270652
15 | * @wx haitao-duan
16 | * @date 2021/4/5
17 | *
18 | * 运行一次,生成一条行为日志
19 | *
20 | **/
21 | public class ActionLogGenOne {
22 | public static void main(String[] args) {
23 | Properties props = new Properties();
24 | props.setProperty("bootstrap.servers", "hdp01:9092,hdp02:9092,hdp03:9092");
25 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
26 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
27 | KafkaProducer kafkaProducer = new KafkaProducer<>(props);
28 |
29 | LogBean logBean = new LogBean();
30 | logBean.setDeviceId("000053");
31 | logBean.setEventId("E");
32 | Map ps = new HashMap();
33 | props.put("p1", "v1");
34 | logBean.setProperties(ps);
35 | logBean.setTimeStamp(System.currentTimeMillis());
36 |
37 | String log = JSON.toJSONString(logBean);
38 | ProducerRecord record = new ProducerRecord<>("zenniu_applog", log);
39 | kafkaProducer.send(record);
40 | kafkaProducer.flush();
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/Collector.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 |
3 | public interface Collector {
4 | public void collect(String logdata);
5 | }
6 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/CollectorConsoleImpl.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 |
3 | public class CollectorConsoleImpl implements Collector {
4 | @Override
5 | public void collect(String logdata) {
6 | System.out.println(logdata);
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/CollectorKafkaImpl.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 |
3 | import org.apache.kafka.clients.producer.KafkaProducer;
4 | import org.apache.kafka.clients.producer.ProducerRecord;
5 |
6 | import java.util.Properties;
7 |
8 | public class CollectorKafkaImpl implements Collector {
9 |
10 | private KafkaProducer kafkaProducer;
11 | private String topicName;
12 |
13 | int messageSeq = 0;
14 |
15 | public CollectorKafkaImpl(String topicName){
16 | Properties props = new Properties();
17 | props.setProperty("bootstrap.servers", "doitedu:9092");
18 | props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
19 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
20 |
21 |
22 | // 构造一个kafka生产者客户端
23 | this.kafkaProducer = new KafkaProducer<>(props);
24 |
25 | this.topicName = topicName;
26 | }
27 | @Override
28 | public void collect(String logdata) {
29 | this.messageSeq ++;
30 |
31 | ProducerRecord record = new ProducerRecord<>(topicName, this.messageSeq, logdata);
32 | kafkaProducer.send(record);
33 |
34 | kafkaProducer.flush();
35 |
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/LogBean.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 |
3 | import lombok.Data;
4 |
5 | import java.util.Map;
6 |
7 | @Data
8 | public class LogBean {
9 | private String account ;
10 | private String appId ;
11 | private String appVersion ;
12 | private String carrier ;
13 | private String deviceId ;
14 | private String deviceType ;
15 | private String ip ;
16 | private double latitude ;
17 | private double longitude ;
18 | private String netType ;
19 | private String osName ;
20 | private String osVersion ;
21 | private String releaseChannel ;
22 | private String resolution ;
23 | private String sessionId ;
24 | private long timeStamp ;
25 | private String eventId ;
26 | private Map properties;
27 |
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/LogBeanWrapper.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 |
7 | @Data
8 | @AllArgsConstructor
9 | @NoArgsConstructor
10 | public class LogBeanWrapper {
11 | private LogBean logBean;
12 | private String sessionId;
13 | private long lastTime;
14 |
15 | private boolean isExists = true;
16 | private boolean isPushback = false;
17 |
18 | //private String currPage;
19 |
20 | private int sessionMax = 0;
21 |
22 | public LogBeanWrapper(LogBean logBean,String sessionId,long lastTime){
23 | this.logBean = logBean;
24 | this.sessionId = sessionId;
25 | this.lastTime = lastTime;
26 |
27 | }
28 |
29 |
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/UserProfileDataGen.java:
--------------------------------------------------------------------------------
1 | //package cn.doitedu;
2 | //
3 | //import org.apache.commons.lang3.RandomUtils;
4 | //import org.apache.commons.lang3.StringUtils;
5 | //import org.apache.hadoop.conf.Configuration;
6 | //import org.apache.hadoop.hbase.TableName;
7 | //import org.apache.hadoop.hbase.client.Connection;
8 | //import org.apache.hadoop.hbase.client.ConnectionFactory;
9 | //import org.apache.hadoop.hbase.client.Put;
10 | //import org.apache.hadoop.hbase.client.Table;
11 | //import org.apache.hadoop.hbase.util.Bytes;
12 | //
13 | //import java.io.IOException;
14 | //import java.util.ArrayList;
15 | //
16 | ///**
17 | // * @author 涛哥
18 | // * @nick_name "deep as the sea"
19 | // * @contact qq:657270652 wx:doit_edu
20 | // * @site www.doitedu.cn
21 | // * @date 2021-03-27
22 | // * @desc 用户画像数据模拟器
23 | // *
24 | // * deviceid,k1=v1
25 | // *
26 | // * hbase中需要先创建好画像标签表
27 | // * [root@hdp01 ~]# hbase shell
28 | // * hbase> create 'yinew_profile','f'
29 | // */
30 | //public class UserProfileDataGen {
31 | // public static void main(String[] args) throws IOException {
32 | //
33 | // Configuration conf = new Configuration();
34 | // conf.set("hbase.zookeeper.quorum", "hdp01:2181,hdp02:2181,hdp03:2181");
35 | //
36 | // Connection conn = ConnectionFactory.createConnection(conf);
37 | // Table table = conn.getTable(TableName.valueOf("zenniu_profile"));
38 | //
39 | // ArrayList puts = new ArrayList<>();
40 | // for (int i = 0; i < 100000; i++) {
41 | //
42 | // // 生成一个用户的画像标签数据
43 | // String deviceId = StringUtils.leftPad(i + "", 6, "0");
44 | // Put put = new Put(Bytes.toBytes(deviceId));
45 | // for (int k = 1; k <= 100; k++) {
46 | // String key = "tag" + k;
47 | // String value = "v" + RandomUtils.nextInt(1, 3);
48 | // put.addColumn(Bytes.toBytes("f"), Bytes.toBytes(key), Bytes.toBytes(value));
49 | // }
50 | //
51 | // // 将这一条画像数据,添加到list中
52 | // puts.add(put);
53 | //
54 | // // 攒满100条一批
55 | // if(puts.size()==100) {
56 | // table.put(puts);
57 | // puts.clear();
58 | // }
59 | //
60 | // }
61 | //
62 | // // 提交最后一批
63 | // if(puts.size()>0) table.put(puts);
64 | //
65 | // conn.close();
66 | // }
67 | //}
68 |
--------------------------------------------------------------------------------
/flink_course/data/transformation_input/userinfo.txt:
--------------------------------------------------------------------------------
1 | {"uid":1,"gender":"male","name":"ua","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]}
2 | {"uid":2,"gender":"male","name":"ub","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"}]}
3 | {"uid":3,"gender":"female","name":"uc","friends":[{"fid":2,"name":"aa"}]}
4 | {"uid":4,"gender":"female","name":"ud","friends":[{"fid":3,"name":"bb"}]}
5 | {"uid":5,"gender":"male","name":"ue","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]}
6 | {"uid":6,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"}]}
7 | {"uid":7,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]}
8 | {"uid":8,"gender":"male","name":"xx","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]}
--------------------------------------------------------------------------------
/flink_course/data/wc/input/wc.txt:
--------------------------------------------------------------------------------
1 | a a a a b b b c
2 | d e d d f
3 | c d
4 | c c c
5 | d d d
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/TaskTest.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeHint;
4 | import org.apache.flink.api.java.tuple.Tuple2;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.streaming.api.functions.ProcessFunction;
9 | import org.apache.flink.util.Collector;
10 |
11 | public class TaskTest {
12 | public static void main(String[] args) throws Exception {
13 |
14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境
15 | env.setParallelism(1);
16 |
17 | SingleOutputStreamOperator st = env.socketTextStream("localhost", 9999)
18 | .process(new ProcessFunction() {
19 | @Override
20 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception {
21 | System.out.println("第1级map收到数据: " + value + "线程号:" + Thread.currentThread().getId());
22 | System.out.println("第1级subtask:" + getRuntimeContext().getTaskNameWithSubtasks());
23 |
24 | // System.out.println("1- aaa");
25 | // System.out.println("1- bbb");
26 | out.collect(value);
27 | }
28 | });
29 |
30 | SingleOutputStreamOperator map = st.process(
31 | new ProcessFunction() {
32 | @Override
33 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception {
34 | System.out.println("第2级map算子,收到数据 "+ value + ", 线程" + Thread.currentThread().getId());
35 | System.out.println("第2级subtask:" + getRuntimeContext().getTaskNameWithSubtasks());
36 | out.collect(value);
37 | }
38 | }
39 | ).setParallelism(2);
40 |
41 |
42 | map.print();
43 | env.execute();
44 |
45 |
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/TestWindow.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink;
2 |
3 | import org.apache.commons.lang3.RandomUtils;
4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
6 | import org.apache.flink.api.common.typeinfo.TypeHint;
7 | import org.apache.flink.api.java.ExecutionEnvironment;
8 | import org.apache.flink.api.java.tuple.Tuple2;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.ProcessFunction;
13 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
17 | import org.apache.flink.util.Collector;
18 |
19 | public class TestWindow {
20 | public static void main(String[] args) throws Exception {
21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境
22 | env.setParallelism(1);
23 |
24 | DataStreamSource st = env.socketTextStream("localhost", 9999);
25 | SingleOutputStreamOperator> map = st.map(s -> {
26 | String[] split = s.split(",");
27 | return Tuple2.of(split[0], Long.parseLong(split[1]));
28 | }).returns(new TypeHint>() {
29 | });
30 |
31 | SingleOutputStreamOperator> wmed = map.assignTimestampsAndWatermarks(WatermarkStrategy.>forMonotonousTimestamps().withTimestampAssigner(new SerializableTimestampAssigner>() {
32 | @Override
33 | public long extractTimestamp(Tuple2 element, long recordTimestamp) {
34 | return element.f1;
35 | }
36 | }));
37 | SingleOutputStreamOperator wind = wmed.keyBy(tp -> tp.f0)
38 | .window(TumblingEventTimeWindows.of(Time.seconds(5)))
39 | .process(new ProcessWindowFunction, String, String, TimeWindow>() {
40 | @Override
41 | public void process(String s, ProcessWindowFunction, String, String, TimeWindow>.Context context, Iterable> elements, Collector out) throws Exception {
42 | System.out.println(s + "==== window中用户函数触发开始:" + Thread.currentThread().getId());
43 | // System.out.println(s + " window中的线程号: " + Thread.currentThread().getId());
44 | // System.out.println(s + " window中的watermark : " + context.currentWatermark());
45 | Thread.sleep(10000);
46 | //System.out.println(s + "睡眠完毕");
47 | int i = RandomUtils.nextInt(1, 100);
48 | //System.out.println(s+ " 准备返回数据: " + i);
49 | out.collect(s+ "," + i);
50 | System.out.println(s + "==== window中用户函数触发结束: " + Thread.currentThread().getId());
51 | }
52 | });
53 |
54 |
55 | wind.process(new ProcessFunction() {
56 | @Override
57 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception {
58 | System.out.println("%%%% 下游processElement开始:" +value + "线程号: " + Thread.currentThread().getId());
59 | System.out.println("%%%% 下游当前watermark: " + ctx.timerService().currentWatermark());
60 | System.out.println("%%%% 下游processElement结束:" + "线程号: " + Thread.currentThread().getId());
61 | }
62 | }).startNewChain();
63 |
64 |
65 | env.execute();
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/EventCount.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.exercise;
2 |
3 | import lombok.*;
4 |
5 | @Data
6 | @NoArgsConstructor
7 | @AllArgsConstructor
8 | public class EventCount {
9 | private int id;
10 | private String eventId;
11 | private int cnt;
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/EventUserInfo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.exercise;
2 |
3 | import lombok.*;
4 |
5 | @Data
6 | @AllArgsConstructor
7 | @NoArgsConstructor
8 | public class EventUserInfo {
9 |
10 | private int id;
11 | private String eventId;
12 | private int cnt;
13 | private String gender;
14 | private String city;
15 |
16 | }
17 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/UserInfo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.exercise;
2 |
3 | import lombok.*;
4 |
5 | import java.io.ByteArrayOutputStream;
6 | import java.io.IOException;
7 | import java.io.ObjectOutput;
8 | import java.io.ObjectOutputStream;
9 |
10 | @Data
11 | @NoArgsConstructor
12 | @AllArgsConstructor
13 | public class UserInfo {
14 | private int id;
15 | private String gender;
16 | private String city;
17 | }
18 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/EventBean2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 |
7 | @Data
8 | @NoArgsConstructor
9 | @AllArgsConstructor
10 | public class EventBean2 {
11 | private long guid;
12 | private String eventId;
13 | private long timeStamp;
14 | private String pageId;
15 | private int actTimelong; // 行为时长
16 | }
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/EventLog.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import lombok.*;
4 | import org.apache.flink.streaming.connectors.redis.RedisSink;
5 |
6 | import java.util.Map;
7 |
8 | @NoArgsConstructor
9 | @AllArgsConstructor
10 | @Getter
11 | @Setter
12 | @ToString
13 | public class EventLog{
14 | private long guid;
15 | private String sessionId;
16 | private String eventId;
17 | private long timeStamp;
18 | private Map eventInfo;
19 | }
20 |
21 |
22 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/ParallelismDe.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.streaming.api.datastream.DataStream;
4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
5 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
7 |
8 | public class ParallelismDe {
9 |
10 | public static void main(String[] args) {
11 |
12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 | SingleOutputStreamOperator s1 = env.fromElements(1, 2, 3, 4, 5, 6).map(s -> s).setParallelism(3);
14 | SingleOutputStreamOperator s2 = env.fromElements(11, 12, 13, 14, 15, 16).map(s -> s).setParallelism(5);
15 | DataStream s3 = s2.union(s1);
16 | System.out.println(s3.getParallelism());
17 |
18 |
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_01_StreamWordCount.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.FlatMapFunction;
4 | import org.apache.flink.api.java.ExecutionEnvironment;
5 | import org.apache.flink.api.java.functions.KeySelector;
6 | import org.apache.flink.api.java.tuple.Tuple2;
7 | import org.apache.flink.configuration.Configuration;
8 | import org.apache.flink.streaming.api.datastream.DataStream;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.KeyedStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.util.Collector;
14 |
15 | /**
16 | * 通过socket数据源,去请求一个socket服务(doit01:9999)得到数据流
17 | * 然后统计数据流中出现的单词及其个数
18 | */
19 | public class _01_StreamWordCount {
20 |
21 | public static void main(String[] args) throws Exception {
22 |
23 |
24 | // 创建一个编程入口环境
25 | // ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // 批处理的入口环境
26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境
27 |
28 |
29 | // 显式声明为本地运行环境,且带webUI
30 | //Configuration configuration = new Configuration();
31 | //configuration.setInteger("rest.port", 8081);
32 | //StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
33 |
34 |
35 | /**
36 | * 本地运行模式时,程序的默认并行度为 ,你的cpu的逻辑核数
37 | */
38 | env.setParallelism(1); // 默认并行度可以通过env人为指定
39 |
40 |
41 | // 通过source算子,把socket数据源加载为一个dataStream(数据流)
42 | // [root@doit01 ~]# nc -lk 9999
43 | SingleOutputStreamOperator source = env.socketTextStream("localhost", 9999)
44 | .setParallelism(1)
45 | .slotSharingGroup("g1");
46 |
47 | // 然后通过算子对数据流进行各种转换(计算逻辑)
48 | DataStream> words = source.flatMap(new FlatMapFunction>() {
49 | @Override
50 | public void flatMap(String s, Collector> collector) throws Exception {
51 | // 切单词
52 | String[] split = s.split("\\s+");
53 | for (String word : split) {
54 | // 返回每一对 (单词,1)
55 | collector.collect(Tuple2.of(word, 1));
56 | }
57 | }
58 | })
59 | /*.setParallelism(10)
60 | .slotSharingGroup("g2")
61 | .shuffle()*/;
62 |
63 | //SingleOutputStreamOperator> words2 = words.map(tp -> Tuple2.of(tp.f0, tp.f1 * 10));
64 |
65 |
66 | KeyedStream, String> keyed = words.keyBy(new KeySelector, String>() {
67 | @Override
68 | public String getKey(Tuple2 tuple2) throws Exception {
69 |
70 | return tuple2.f0;
71 | }
72 | });
73 |
74 |
75 | SingleOutputStreamOperator> resultStream = keyed.sum("f1")/*.slotSharingGroup("g1")*/;
76 |
77 | // 通过sink算子,将结果输出
78 | resultStream.print("wcSink");
79 |
80 | // 触发程序的提交运行
81 | env.execute();
82 |
83 |
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_02_BatchWordCount.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.FlatMapFunction;
4 | import org.apache.flink.api.java.ExecutionEnvironment;
5 | import org.apache.flink.api.java.operators.DataSource;
6 | import org.apache.flink.api.java.tuple.Tuple2;
7 | import org.apache.flink.util.Collector;
8 |
9 | /**
10 | * @Author: deep as the sea
11 | * @Site: 多易教育
12 | * @QQ: 657270652
13 | * @Date: 2022/4/30
14 | * @Desc: 批处理计算模式的wordcount示例
15 | **/
16 | public class _02_BatchWordCount {
17 |
18 | public static void main(String[] args) throws Exception {
19 |
20 | // 批计算入口环境
21 | ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
22 |
23 | // 读数据 -- : 批计算中得到的数据抽象,是一个 DataSet
24 | DataSource stringDataSource = batchEnv.readTextFile("flink_course/data/wc/input/");
25 |
26 | // 在dataset上调用各种dataset的算子
27 | stringDataSource
28 | .flatMap(new MyFlatMapFunction())
29 | .groupBy(0)
30 | .sum(1)
31 | .print();
32 | }
33 | }
34 |
35 | class MyFlatMapFunction implements FlatMapFunction>{
36 |
37 | @Override
38 | public void flatMap(String value, Collector> out) throws Exception {
39 | String[] words = value.split("\\s+");
40 | for (String word : words) {
41 | out.collect(Tuple2.of(word,1));
42 | }
43 | }
44 | }
45 |
46 |
47 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_03_StreamBatchWordCount.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.api.common.functions.FlatMapFunction;
5 | import org.apache.flink.api.java.functions.KeySelector;
6 | import org.apache.flink.api.java.tuple.Tuple2;
7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
9 | import org.apache.flink.util.Collector;
10 |
11 | public class _03_StreamBatchWordCount {
12 |
13 | public static void main(String[] args) throws Exception {
14 |
15 | // 流处理的编程环境入口
16 | StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
17 | streamEnv.setParallelism(1);
18 |
19 | // 按批计算模式去执行
20 | streamEnv.setRuntimeMode(RuntimeExecutionMode.BATCH);
21 |
22 | // 按流计算模式去执行
23 | // streamEnv.setRuntimeMode(RuntimeExecutionMode.STREAMING);
24 |
25 | // flink自己判断决定
26 | // streamEnv.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
27 |
28 | // 读文件 得到 dataStream
29 | DataStreamSource streamSource = streamEnv.readTextFile("flink_course/data/wc/input/wc.txt");
30 |
31 |
32 | // 调用dataStream的算子做计算
33 | streamSource.flatMap(new FlatMapFunction>() {
34 | @Override
35 | public void flatMap(String value, Collector> out) throws Exception {
36 | String[] words = value.split("\\s+");
37 | for (String word : words) {
38 | out.collect(Tuple2.of(word, 1));
39 | }
40 | }
41 | })
42 | .keyBy(new KeySelector, String>() {
43 | @Override
44 | public String getKey(Tuple2 value) throws Exception {
45 | return value.f0;
46 | }
47 | })
48 | .sum(1)
49 | .print();
50 |
51 |
52 | streamEnv.execute();
53 |
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_04_WordCount_LambdaTest.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.FlatMapFunction;
4 | import org.apache.flink.api.common.functions.MapFunction;
5 | import org.apache.flink.api.common.typeinfo.TypeHint;
6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
7 | import org.apache.flink.api.common.typeinfo.Types;
8 | import org.apache.flink.api.java.ExecutionEnvironment;
9 | import org.apache.flink.api.java.functions.KeySelector;
10 | import org.apache.flink.api.java.tuple.Tuple2;
11 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
12 | import org.apache.flink.streaming.api.datastream.KeyedStream;
13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
15 | import org.apache.flink.util.Collector;
16 |
17 | public class _04_WordCount_LambdaTest {
18 |
19 | public static void main(String[] args) throws Exception {
20 |
21 | // 创建一个编程入口(执行环境)
22 |
23 | // 流式处理入口环境
24 | StreamExecutionEnvironment envStream = StreamExecutionEnvironment.getExecutionEnvironment();
25 |
26 | DataStreamSource streamSource = envStream.readTextFile("flink_course/data/wc/input/wc.txt");
27 |
28 | // 先把句子变大写
29 | /* 从map算子接收的MapFunction接口实现来看,它是一个单抽象方法的接口
30 | 所以这个接口的实现类的核心功能,就在它的方法上
31 | 那就可以用lambda表达式来简洁实现
32 | streamSource.map(new MapFunction() {
33 | @Override
34 | public String map(String value) throws Exception {
35 | return null;
36 | }
37 | });*/
38 |
39 | /**
40 | * lambda表达式怎么写,看你要实现的那个接口的方法接收什么参数,返回什么结果
41 | */
42 | // 然后就按lambda语法来表达: (参数1,参数2,...) -> { 函数体 }
43 | // streamSource.map( (value) -> { return value.toUpperCase();});
44 |
45 | // 由于上面的lambda表达式,参数列表只有一个,且函数体只有一行代码,则可以简化
46 | // streamSource.map( value -> value.toUpperCase() ) ;
47 |
48 | // 由于上面的lambda表达式, 函数体只有一行代码,且参数只使用了一次,可以把函数调用转成 “方法引用”
49 | SingleOutputStreamOperator upperCased = streamSource.map(String::toUpperCase);
50 |
51 | // 然后切成单词,并转成(单词,1),并压平
52 | /*upperCased.flatMap(new FlatMapFunction>() {
53 | @Override
54 | public void flatMap(String value, Collector> out) throws Exception {
55 |
56 | }
57 | });*/
58 | // 从上面的接口来看,它依然是一个 单抽象方法的 接口,所以它的方法实现,依然可以用lambda表达式来实现
59 | SingleOutputStreamOperator> wordAndOne = upperCased.flatMap((String s, Collector> collector) -> {
60 | String[] words = s.split("\\s+");
61 | for (String word : words) {
62 | collector.collect(Tuple2.of(word, 1));
63 | }
64 | })
65 | // .returns(new TypeHint>() {}); // 通过 TypeHint 传达返回数据类型
66 | // .returns(TypeInformation.of(new TypeHint>() {})); // 更通用的,是传入TypeInformation,上面的TypeHint也是封装了TypeInformation
67 | .returns(Types.TUPLE(Types.STRING, Types.INT)); // 利用工具类Types的各种静态方法,来生成TypeInformation
68 |
69 |
70 | // 按单词分组
71 | /*wordAndOne.keyBy(new KeySelector, String>() {
72 | @Override
73 | public String getKey(Tuple2 value) throws Exception {
74 | return null;
75 | }
76 | })*/
77 | // 从上面的KeySelector接口来看,它依然是一个 单抽象方法的 接口,所以它的方法实现,依然可以用lambda表达式来实现
78 | KeyedStream, String> keyedStream = wordAndOne.keyBy((value) -> value.f0);
79 |
80 |
81 | // 统计单词个数
82 | keyedStream.sum(1)
83 | .print();
84 |
85 |
86 | envStream.execute();
87 |
88 |
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.avro.Schema;
4 | import org.apache.avro.SchemaBuilder;
5 | import org.apache.avro.generic.GenericData;
6 | import org.apache.avro.generic.GenericRecord;
7 | import org.apache.flink.api.common.functions.MapFunction;
8 | import org.apache.flink.connector.file.sink.FileSink;
9 | import org.apache.flink.core.fs.Path;
10 | import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo;
11 | import org.apache.flink.formats.parquet.ParquetWriterFactory;
12 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
13 | import org.apache.flink.streaming.api.CheckpointingMode;
14 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
15 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
16 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
17 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
18 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
19 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
20 |
21 | /**
22 | *
23 | * @Author: deep as the sea
24 | * @Site: www.51doit.com
25 | * @QQ: 657270652
26 | * @Date: 2022/4/26
27 | * @Desc: 要把处理好的数据流,输出到文件系统(hdfs)
28 | * 使用的sink算子,是扩展包中的 StreamFileSink
29 | **/
30 | public class _09_StreamFileSinkOperator_Demo1 {
31 |
32 | public static void main(String[] args) throws Exception {
33 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
34 | // 开启checkpoint
35 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
36 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
37 |
38 | // 构造好一个数据流
39 | DataStreamSource streamSource = env.addSource(new MySourceFunction());
40 |
41 | // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流)
42 |
43 |
44 | /**
45 | * 方式一:
46 | * 核心逻辑:
47 | * - 构造一个schema
48 | * - 利用schema构造一个parquetWriterFactory
49 | * - 利用parquetWriterFactory构造一个FileSink算子
50 | * - 将原始数据转成GenericRecord流,输出到FileSink算子
51 | */
52 | // 1. 先定义GenericRecord的数据模式
53 | Schema schema = SchemaBuilder.builder()
54 | .record("DataRecord")
55 | .namespace("cn.doitedu.flink.avro.schema")
56 | .doc("用户行为事件数据模式")
57 | .fields()
58 | .requiredInt("gid")
59 | .requiredLong("ts")
60 | .requiredString("eventId")
61 | .requiredString("sessionId")
62 | .name("eventInfo")
63 | .type()
64 | .map()
65 | .values()
66 | .type("string")
67 | .noDefault()
68 | .endRecord();
69 |
70 |
71 | // 2. 通过定义好的schema模式,来得到一个parquetWriter
72 | ParquetWriterFactory writerFactory = ParquetAvroWriters.forGenericRecord(schema);
73 |
74 | // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子
75 | FileSink sink1 = FileSink.forBulkFormat(new Path("d:/datasink/"), writerFactory)
76 | .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH"))
77 | .withRollingPolicy(OnCheckpointRollingPolicy.build())
78 | .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
79 | .build();
80 |
81 |
82 | // 4. 将自定义javabean的流,转成 上述sink算子中parquetWriter所需要的 GenericRecord流
83 | SingleOutputStreamOperator recordStream = streamSource
84 | .map((MapFunction) eventLog -> {
85 | // 构造一个Record对象
86 | GenericData.Record record = new GenericData.Record(schema);
87 |
88 | // 将数据填入record
89 | record.put("gid", (int) eventLog.getGuid());
90 | record.put("eventId", eventLog.getEventId());
91 | record.put("ts", eventLog.getTimeStamp());
92 | record.put("sessionId", eventLog.getSessionId());
93 | record.put("eventInfo", eventLog.getEventInfo());
94 |
95 | return record;
96 | }).returns(new GenericRecordAvroTypeInfo(schema)); // 由于avro的相关类、对象需要用avro的序列化器,所以需要显式指定AvroTypeInfo来提供AvroSerializer
97 |
98 | // 5. 输出数据
99 | recordStream.sinkTo(sink1);
100 |
101 | env.execute();
102 |
103 |
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo2.java:
--------------------------------------------------------------------------------
1 | //package cn.doitedu.flink.java.demos;
2 | //
3 | //import cn.doitedu.flink.avro.schema.AvroEventLog;
4 | //import org.apache.avro.Schema;
5 | //import org.apache.avro.SchemaBuilder;
6 | //import org.apache.avro.generic.GenericData;
7 | //import org.apache.avro.generic.GenericRecord;
8 | //import org.apache.flink.api.common.functions.MapFunction;
9 | //import org.apache.flink.connector.file.sink.FileSink;
10 | //import org.apache.flink.core.fs.Path;
11 | //import org.apache.flink.formats.parquet.ParquetWriterFactory;
12 | //import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
13 | //import org.apache.flink.streaming.api.CheckpointingMode;
14 | //import org.apache.flink.streaming.api.datastream.DataStreamSource;
15 | //import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
16 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
17 | //import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
18 | //import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
19 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
20 | //
21 | //import java.util.HashMap;
22 | //import java.util.Map;
23 | //import java.util.Set;
24 | //
25 | ///**
26 | // *
27 | // * @Author: deep as the sea
28 | // * @Site: www.51doit.com
29 | // * @QQ: 657270652
30 | // * @Date: 2022/4/26
31 | // * @Desc: 要把处理好的数据流,输出到文件系统(hdfs)
32 | // * 使用的sink算子,是扩展包中的 StreamFileSink
33 | // **/
34 | //public class _09_StreamFileSinkOperator_Demo2 {
35 | //
36 | // public static void main(String[] args) throws Exception {
37 | // StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
38 | // // 开启checkpoint
39 | // env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
40 | // env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
41 | //
42 | // // 构造好一个数据流
43 | // DataStreamSource streamSource = env.addSource(new MySourceFunction());
44 | //
45 | // // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流)
46 | //
47 | //
48 | // /**
49 | // * 方式二:
50 | // * 核心逻辑:
51 | // * - 编写一个avsc文本文件(json),来描述数据模式
52 | // * - 添加 maven代码生成器插件,来针对上述的avsc生成avro特定格式的JavaBean类
53 | // * - 利用代码生成器生成的 JavaBean,来构造一个 parquetWriterFactory
54 | // * - 利用parquetWriterFactory构造一个FileSink算子
55 | // * - 将原始数据流 转成 特定格式JavaBean流,输出到 FileSink算子
56 | // */
57 | //
58 | // // 1. 先定义avsc文件放在resources文件夹中,并用maven的插件,来编译一下,生成特定格式的JavaBean : AvroEventLog
59 | // // 这种根据avsc生成的JavaBean类,自身就已经带有了Schema对象
60 | // // AvroEventLog avroEventLog = new AvroEventLog();
61 | // // Schema schema = avroEventLog.getSchema();
62 | //
63 | // // 2. 通过自动生成 AvroEventLog类,来得到一个parquetWriter
64 | // ParquetWriterFactory parquetWriterFactory = ParquetAvroWriters.forSpecificRecord(AvroEventLog.class);
65 | //
66 | // // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子
67 | // FileSink bulkSink = FileSink.forBulkFormat(new Path("d:/datasink2/"), parquetWriterFactory)
68 | // .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH"))
69 | // .withRollingPolicy(OnCheckpointRollingPolicy.build())
70 | // .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
71 | // .build();
72 | //
73 | //
74 | // // 4. 将自定义javabean的 EventLog 流,转成 上述sink算子中parquetWriter所需要的 AvroEventLog 流
75 | // SingleOutputStreamOperator avroEventLogStream = streamSource.map(new MapFunction() {
76 | // @Override
77 | // public AvroEventLog map(EventLog eventLog) throws Exception {
78 | // HashMap eventInfo1 = new HashMap<>();
79 | //
80 | // // 进行hashmap类型的数据转移
81 | // Map eventInfo2 = eventLog.getEventInfo();
82 | // Set> entries = eventInfo2.entrySet();
83 | // for (Map.Entry entry : entries) {
84 | // eventInfo1.put(entry.getKey(), entry.getValue());
85 | // }
86 | //
87 | // return new AvroEventLog(eventLog.getGuid(), eventLog.getSessionId(), eventLog.getEventId(), eventLog.getTimeStamp(), eventInfo1);
88 | // }
89 | // });
90 | //
91 | //
92 | // // 5. 输出数据
93 | // avroEventLogStream.sinkTo(bulkSink);
94 | //
95 | // env.execute();
96 | //
97 | // }
98 | //}
99 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo3.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.MapFunction;
4 | import org.apache.flink.connector.file.sink.FileSink;
5 | import org.apache.flink.core.fs.Path;
6 | import org.apache.flink.formats.parquet.ParquetWriterFactory;
7 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
8 | import org.apache.flink.streaming.api.CheckpointingMode;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
14 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
15 |
16 | import java.util.HashMap;
17 | import java.util.Map;
18 | import java.util.Set;
19 |
20 | /**
21 | *
22 | * @Author: deep as the sea
23 | * @Site: www.51doit.com
24 | * @QQ: 657270652
25 | * @Date: 2022/4/26
26 | * @Desc: 要把处理好的数据流,输出到文件系统(hdfs)
27 | * 使用的sink算子,是扩展包中的 StreamFileSink
28 | **/
29 | public class _09_StreamFileSinkOperator_Demo3 {
30 |
31 | public static void main(String[] args) throws Exception {
32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 | // 开启checkpoint
34 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
35 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
36 | env.setParallelism(1);
37 |
38 | // 构造好一个数据流
39 | DataStreamSource streamSource = env.addSource(new MySourceFunction());
40 |
41 | // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流)
42 |
43 |
44 | /**
45 | * 方式三:
46 | * 核心逻辑:
47 | * - 利用自己的JavaBean类,来构造一个 parquetWriterFactory
48 | * - 利用parquetWriterFactory构造一个FileSink算子
49 | * - 将原始数据流,输出到 FileSink算子
50 | */
51 |
52 | // 2. 通过自己的JavaBean类,来得到一个parquetWriter
53 | ParquetWriterFactory parquetWriterFactory = ParquetAvroWriters.forReflectRecord(EventLog.class);
54 |
55 | // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子
56 | FileSink bulkSink = FileSink.forBulkFormat(new Path("d:/datasink3/"), parquetWriterFactory)
57 | .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH"))
58 | .withRollingPolicy(OnCheckpointRollingPolicy.build())
59 | .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
60 | .build();
61 |
62 | // 5. 输出数据
63 | streamSource.sinkTo(bulkSink);
64 |
65 |
66 | env.execute();
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_10_KafkaSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.connector.base.DeliveryGuarantee;
7 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
8 | import org.apache.flink.connector.kafka.sink.KafkaSink;
9 | import org.apache.flink.streaming.api.CheckpointingMode;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 |
13 | /**
14 | *
15 | * @Author: deep as the sea
16 | * @Site: www.51doit.com
17 | * @QQ: 657270652
18 | * @Date: 2022/4/26
19 | * @Desc:
20 | * 利用KafkaSink将数据流写入kafka
21 | * 测试准备,创建目标topic:
22 | * [root@doit01 ~]# kafka-topics.sh --create --topic event-log --partitions 3 --replication-factor 2 --zookeeper doit01:2181
23 | **/
24 | public class _10_KafkaSinkOperator_Demo1 {
25 | public static void main(String[] args) throws Exception {
26 |
27 | Configuration configuration = new Configuration();
28 | configuration.setInteger("rest.port",8822);
29 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
30 |
31 |
32 | // 开启checkpoint
33 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
34 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
35 |
36 | // 构造好一个数据流
37 | DataStreamSource streamSource = env.addSource(new MySourceFunction());
38 |
39 |
40 | // 把数据写入kafka
41 | // 1. 构造一个kafka的sink算子
42 | KafkaSink kafkaSink = KafkaSink.builder()
43 | .setBootstrapServers("doit01:9092,doit02:9092")
44 | .setRecordSerializer(KafkaRecordSerializationSchema.builder()
45 | .setTopic("event-log")
46 | .setValueSerializationSchema(new SimpleStringSchema())
47 | .build()
48 | )
49 | .setDeliverGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
50 | .setTransactionalIdPrefix("doitedu-")
51 | .build();
52 |
53 | // 2. 把数据流输出到构造好的sink算子
54 | streamSource
55 | .map(JSON::toJSONString).disableChaining()
56 | .sinkTo(kafkaSink);
57 |
58 | env.execute();
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_12_RedisSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import org.apache.flink.configuration.Configuration;
5 | import org.apache.flink.streaming.api.CheckpointingMode;
6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.streaming.connectors.redis.RedisSink;
9 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
10 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
11 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
12 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
13 |
14 | import java.util.Optional;
15 |
16 | /**
17 | *
18 | * @Author: deep as the sea
19 | * @Site: www.51doit.com
20 | * @QQ: 657270652
21 | * @Date: 2022/4/26
22 | * @Desc:
23 | * 将数据流写入redis,利用RedisSink算子
24 | *
25 | **/
26 | public class _12_RedisSinkOperator_Demo1 {
27 |
28 |
29 | public static void main(String[] args) throws Exception {
30 |
31 | Configuration configuration = new Configuration();
32 | configuration.setInteger("rest.port",8822);
33 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
34 |
35 |
36 | // 开启checkpoint
37 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
38 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
39 |
40 | // 构造好一个数据流
41 | DataStreamSource streamSource = env.addSource(new MySourceFunction());
42 |
43 | // eventLog数据插入redis,你想用什么结构来存储?
44 | FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder().setHost("doit01").build();
45 |
46 | RedisSink redisSink = new RedisSink<>(config, new StringInsertMapper());
47 |
48 | streamSource.addSink(redisSink);
49 |
50 | env.execute();
51 |
52 | }
53 |
54 |
55 | static class StringInsertMapper implements RedisMapper{
56 |
57 | @Override
58 | public RedisCommandDescription getCommandDescription() {
59 | return new RedisCommandDescription(RedisCommand.SET);
60 | }
61 |
62 | /**
63 | * 如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key
64 | * 如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,二把上面Description中传入的值作为大key
65 | * @param data
66 | * @return
67 | */
68 | @Override
69 | public String getKeyFromData(EventLog data) {
70 | return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp(); // 这里就是string数据的大key
71 | }
72 |
73 | @Override
74 | public String getValueFromData(EventLog data) {
75 | return JSON.toJSONString(data); // 这里就是string数据的value
76 | }
77 | }
78 |
79 |
80 | /**
81 | * HASH结构数据插入
82 | */
83 | static class HsetInsertMapper implements RedisMapper{
84 | // 可以根据具体数据, 选择额外key(就是hash这种结构,它有额外key(大key)
85 | @Override
86 | public Optional getAdditionalKey(EventLog data) {
87 | return RedisMapper.super.getAdditionalKey(data);
88 | }
89 |
90 | // 可以根据具体数据,设置不同的TTL(time to live,数据的存活时长)
91 | @Override
92 | public Optional getAdditionalTTL(EventLog data) {
93 | return RedisMapper.super.getAdditionalTTL(data);
94 | }
95 |
96 | @Override
97 | public RedisCommandDescription getCommandDescription() {
98 | return new RedisCommandDescription(RedisCommand.HSET,"event-logs");
99 | }
100 |
101 | /**
102 | * 如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key
103 | * 如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,二把上面Description中传入的值作为大key
104 | * @param data
105 | * @return
106 | */
107 | @Override
108 | public String getKeyFromData(EventLog data) {
109 | return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp(); // 这里就是hset中的field(小key)
110 | }
111 |
112 | @Override
113 | public String getValueFromData(EventLog data) {
114 | return data.getEventId(); // 这里就是hset中的value
115 | }
116 |
117 |
118 | }
119 |
120 |
121 |
122 |
123 |
124 |
125 | }
126 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_13_SideOutput_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.streaming.api.CheckpointingMode;
7 | import org.apache.flink.streaming.api.datastream.DataStream;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.ProcessFunction;
12 | import org.apache.flink.util.Collector;
13 | import org.apache.flink.util.OutputTag;
14 |
15 | /**
16 | * @Author: deep as the sea
17 | * @Site: 多易教育
18 | * @QQ: 657270652
19 | * @Date: 2022/4/26
20 | * @Desc: 测流输出 代码示例(process算子)
21 | **/
22 | public class _13_SideOutput_Demo {
23 |
24 |
25 | public static void main(String[] args) throws Exception {
26 |
27 | Configuration configuration = new Configuration();
28 | configuration.setInteger("rest.port", 8822);
29 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
30 | env.setParallelism(1);
31 |
32 |
33 | // 开启checkpoint
34 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
35 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
36 |
37 | // 构造好一个数据流
38 | DataStreamSource streamSource = env.addSource(new MySourceFunction());
39 |
40 |
41 | // 需求: 将行为事件流,进行分流
42 | // appLaunch 事件 ,分到一个流
43 | // putBack 事件,分到一个流
44 | // 其他事件保留在主流
45 | SingleOutputStreamOperator processed = streamSource.process(new ProcessFunction() {
46 | /**
47 | *
48 | * @param eventLog 输入数据
49 | * @param ctx 上下文,它能提供“测输出“功能
50 | * @param out 主流输出收集器
51 | * @throws Exception
52 | */
53 | @Override
54 | public void processElement(EventLog eventLog, ProcessFunction.Context ctx, Collector out) throws Exception {
55 | String eventId = eventLog.getEventId();
56 |
57 | if ("appLaunch".equals(eventId)) {
58 |
59 | ctx.output(new OutputTag("launch", TypeInformation.of(EventLog.class)), eventLog);
60 |
61 | } else if ("putBack".equals(eventId)) {
62 |
63 | ctx.output(new OutputTag("back",TypeInformation.of(String.class)), JSON.toJSONString(eventLog));
64 | }
65 |
66 | out.collect(eventLog);
67 |
68 | }
69 | });
70 |
71 | // 获取 launch 测流数据
72 | DataStream launchStream = processed.getSideOutput(new OutputTag("launch", TypeInformation.of(EventLog.class)));
73 |
74 | // 获取back 测流数据
75 | DataStream backStream = processed.getSideOutput(new OutputTag("back",TypeInformation.of(String.class)));
76 |
77 | launchStream.print("launch");
78 |
79 | backStream.print("back");
80 |
81 |
82 | env.execute();
83 |
84 | }
85 |
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_14_StreamConnect_Union_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
5 | import org.apache.flink.configuration.Configuration;
6 | import org.apache.flink.streaming.api.CheckpointingMode;
7 | import org.apache.flink.streaming.api.datastream.ConnectedStreams;
8 | import org.apache.flink.streaming.api.datastream.DataStream;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.ProcessFunction;
13 | import org.apache.flink.streaming.api.functions.co.CoMapFunction;
14 | import org.apache.flink.util.Collector;
15 | import org.apache.flink.util.OutputTag;
16 |
17 | /**
18 | * @Author: deep as the sea
19 | * @Site: www.51doit.com
20 | * @QQ: 657270652
21 | * @Date: 2022/4/26
22 | * @Desc: 流的连接connect算子 及 流的关联join算子 代码示例
23 | **/
24 | public class _14_StreamConnect_Union_Demo {
25 |
26 |
27 | public static void main(String[] args) throws Exception {
28 |
29 | Configuration configuration = new Configuration();
30 | configuration.setInteger("rest.port", 8822);
31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
32 | env.setParallelism(1);
33 |
34 | // 数字字符流
35 | DataStreamSource stream1 = env.socketTextStream("localhost", 9998);
36 |
37 | // 字母字符流
38 | DataStreamSource stream2 = env.socketTextStream("localhost", 9999);
39 |
40 | /**
41 | * 流的 connect
42 | */
43 | ConnectedStreams connectedStreams = stream1.connect(stream2);
44 |
45 | SingleOutputStreamOperator resultStream = connectedStreams.map(new CoMapFunction() {
46 | // 共同的状态数据
47 |
48 | String prefix = "doitedu_";
49 |
50 | /**
51 | * 对 左流 处理的逻辑
52 | * @param value
53 | * @return
54 | * @throws Exception
55 | */
56 | @Override
57 | public String map1(String value) throws Exception {
58 | // 把数字*10,再返回字符串
59 | return prefix + (Integer.parseInt(value)*10) + "";
60 | }
61 |
62 | /**
63 | * 对 右流 处理的逻辑
64 | * @param value
65 | * @return
66 | * @throws Exception
67 | */
68 | @Override
69 | public String map2(String value) throws Exception {
70 |
71 | return prefix + value.toUpperCase();
72 | }
73 | });
74 | /*resultStream.print();*/
75 |
76 |
77 | /**
78 | * 流的 union
79 | * 参与 union的流,必须数据类型一致
80 | */
81 | // stream1.map(Integer::parseInt).union(stream2); // union左右两边的流类型不一致,不通过
82 | DataStream unioned = stream1.union(stream2);
83 | unioned.map(s-> "doitedu_"+s).print();
84 |
85 |
86 | env.execute();
87 |
88 | }
89 |
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_17_ProcessFunctions_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.CoGroupFunction;
4 | import org.apache.flink.api.common.functions.JoinFunction;
5 | import org.apache.flink.api.common.functions.RuntimeContext;
6 | import org.apache.flink.api.common.typeinfo.TypeHint;
7 | import org.apache.flink.api.common.typeinfo.Types;
8 | import org.apache.flink.api.java.tuple.Tuple2;
9 | import org.apache.flink.api.java.tuple.Tuple3;
10 | import org.apache.flink.configuration.Configuration;
11 | import org.apache.flink.streaming.api.datastream.DataStream;
12 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
13 | import org.apache.flink.streaming.api.datastream.KeyedStream;
14 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
15 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
16 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
17 | import org.apache.flink.streaming.api.functions.ProcessFunction;
18 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
19 | import org.apache.flink.streaming.api.windowing.time.Time;
20 | import org.apache.flink.util.Collector;
21 | import org.apache.flink.util.OutputTag;
22 |
23 | /**
24 | * @Author: deep as the sea
25 | * @Site: www.51doit.com
26 | * @QQ: 657270652
27 | * @Date: 2022/4/26
28 | * @Desc: process算子及ProcessFunction示例
29 | *
30 | * 在不同类型的 数据流上,调用process算子时,所需要传入的ProcessFunction也会有不同
31 | **/
32 | public class _17_ProcessFunctions_Demo {
33 |
34 |
35 | public static void main(String[] args) throws Exception {
36 |
37 | Configuration configuration = new Configuration();
38 | configuration.setInteger("rest.port", 8822);
39 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
40 | env.setParallelism(1);
41 |
42 | // id,eventId
43 | DataStreamSource stream1 = env.socketTextStream("localhost", 9998);
44 |
45 | /**
46 | * 在普通的datastream上调用process算子,传入的是 "ProcessFunction"
47 | */
48 | SingleOutputStreamOperator> s1 = stream1.process(new ProcessFunction>() {
49 | // 可以使用 生命周期 open 方法
50 | @Override
51 | public void open(Configuration parameters) throws Exception {
52 | // 可以调用 getRuntimeContext 方法拿到各种运行时上下文信息
53 | RuntimeContext runtimeContext = getRuntimeContext();
54 | runtimeContext.getTaskName();
55 |
56 | super.open(parameters);
57 | }
58 |
59 | @Override
60 | public void processElement(String value, ProcessFunction>.Context ctx, Collector> out) throws Exception {
61 |
62 | // 可以做测流输出
63 | ctx.output(new OutputTag("s1", Types.STRING),value);
64 |
65 | // 可以做主流输出
66 | String[] arr = value.split(",");
67 | out.collect(Tuple2.of(arr[0], arr[1]));
68 | }
69 |
70 | // 可以使用 生命周期close方法
71 | @Override
72 | public void close() throws Exception {
73 | super.close();
74 | }
75 | });
76 |
77 |
78 |
79 | /**
80 | * 在 keyedStream上调用 process算子,传入的是 "KeyedProcessFunction"
81 | * KeyedProcessFunction 中的 ,泛型1: 流中的 key 的类型; 泛型2: 流中的数据的类型 ; 泛型3: 处理后的输出结果的类型
82 | */
83 | // 对s1流进行keyby分组
84 | KeyedStream, String> keyedStream = s1.keyBy(tp2 -> tp2.f0);
85 | // 然后在keyby后的数据流上调用process算子
86 | SingleOutputStreamOperator> s2 = keyedStream.process(new KeyedProcessFunction, Tuple2>() {
87 | @Override
88 | public void processElement(Tuple2 value, KeyedProcessFunction, Tuple2>.Context ctx, Collector> out) throws Exception {
89 | // 把id变整数,把eventId变大写
90 | out.collect(Tuple2.of(Integer.parseInt(value.f0), value.f1.toUpperCase()));
91 | }
92 | });
93 |
94 |
95 | s2.print();
96 |
97 |
98 | env.execute();
99 |
100 | }
101 |
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_18_ChannalSelector_Partitioner_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.functions.FlatMapFunction;
4 | import org.apache.flink.configuration.Configuration;
5 | import org.apache.flink.streaming.api.datastream.DataStream;
6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
9 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
10 | import org.apache.flink.util.Collector;
11 |
12 | public class _18_ChannalSelector_Partitioner_Demo {
13 |
14 | public static void main(String[] args) throws Exception {
15 |
16 | Configuration conf = new Configuration();
17 | conf.setInteger("rest.port", 8081);
18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
19 |
20 |
21 | DataStreamSource s1 = env.socketTextStream("localhost", 9999);
22 |
23 | DataStream s2 = s1
24 | .map(s -> s.toUpperCase())
25 | .setParallelism(4)
26 | .flatMap(new FlatMapFunction() {
27 | @Override
28 | public void flatMap(String value, Collector out) throws Exception {
29 | String[] arr = value.split(",");
30 | for (String s : arr) {
31 | out.collect(s);
32 | }
33 | }
34 | })
35 | .setParallelism(4)
36 | .forward();
37 |
38 | SingleOutputStreamOperator s3 = s2.map(s -> s.toLowerCase()).setParallelism(4);
39 |
40 |
41 | SingleOutputStreamOperator s4 = s3.keyBy(s -> s.substring(0, 2))
42 | .process(new KeyedProcessFunction() {
43 | @Override
44 | public void processElement(String value, KeyedProcessFunction.Context ctx, Collector out) throws Exception {
45 | out.collect(value + ">");
46 | }
47 | }).setParallelism(4);
48 |
49 | DataStream s5 = s4.filter(s -> s.startsWith("b")).setParallelism(4);
50 |
51 | s5.print().setParallelism(4);
52 |
53 | env.execute();
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_19_WaterMark_Api_Demo2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 |
4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
9 | import org.apache.flink.streaming.api.functions.ProcessFunction;
10 | import org.apache.flink.util.Collector;
11 |
12 | /**
13 | * @Author: deep as the sea
14 | * @Site: 多易教育
15 | * @QQ: 657270652
16 | * @Date: 2022/5/1
17 | * @Desc: watermark生成设置相关代码演示
18 | * 及单并行度情况下的watermark推进观察
19 | *
20 | * ==> 在socket端口依次输入如下两条数据:
21 | * 1,e06,3000,page02
22 | * 1,e06,3000,page02
23 | *
24 | * ==> 程序的控制台上会依次输出如下信息:
25 | * 本次收到的数据EventBean(guid=1, eventId=e05, timeStamp=2000, pageId=page01)
26 | * 此刻的watermark: -9223372036854775808
27 | * 此刻的处理时间(processing time): 1651396210778
28 | * ----------------------
29 | * 本次收到的数据EventBean(guid=1, eventId=e06, timeStamp=3000, pageId=page02)
30 | * 此刻的watermark: 1999
31 | * 此刻的处理时间(processing time): 1651396273755
32 | *
33 | **/
34 | public class _19_WaterMark_Api_Demo2 {
35 |
36 | public static void main(String[] args) throws Exception {
37 |
38 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
39 | env.setParallelism(1);
40 |
41 |
42 | // 1,e01,168673487846,pg01
43 | DataStreamSource s1 = env.socketTextStream("localhost", 9999);
44 |
45 |
46 | SingleOutputStreamOperator s2 = s1.map(s -> {
47 | String[] split = s.split(",");
48 | return new EventBean(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3]);
49 | }).returns(EventBean.class)
50 | .assignTimestampsAndWatermarks(
51 | WatermarkStrategy
52 | .forMonotonousTimestamps()
53 | .withTimestampAssigner(new SerializableTimestampAssigner() {
54 | @Override
55 | public long extractTimestamp(EventBean eventBean, long recordTimestamp) {
56 | return eventBean.getTimeStamp();
57 | }
58 | })
59 | ).setParallelism(2);
60 |
61 | s2.process(new ProcessFunction() {
62 | @Override
63 | public void processElement(EventBean eventBean, ProcessFunction.Context ctx, Collector out) throws Exception {
64 |
65 | Thread.sleep(1000);
66 | System.out.println("睡醒了,准备打印");
67 |
68 | // 打印此刻的 watermark
69 | long processTime = ctx.timerService().currentProcessingTime();
70 | long watermark = ctx.timerService().currentWatermark();
71 |
72 | System.out.println("本次收到的数据" + eventBean);
73 | System.out.println("此刻的watermark: " + watermark);
74 | System.out.println("此刻的处理时间(processing time): " + processTime );
75 |
76 | out.collect(eventBean);
77 | }
78 | }).setParallelism(1).print();
79 |
80 |
81 | env.execute();
82 |
83 |
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_21_Window_Api_Demo3.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
6 | import org.apache.flink.api.common.typeinfo.TypeHint;
7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
8 | import org.apache.flink.api.java.tuple.Tuple2;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
14 | import org.apache.flink.streaming.api.windowing.assigners.*;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
17 | import org.apache.flink.util.Collector;
18 | import org.apache.flink.util.OutputTag;
19 |
20 | import java.time.Duration;
21 |
22 | public class _21_Window_Api_Demo3 {
23 |
24 | public static void main(String[] args) throws Exception {
25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
26 | env.setParallelism(1);
27 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
28 |
29 | // 1,e01,3000,pg02
30 | DataStreamSource source = env.socketTextStream("localhost", 9999);
31 |
32 | SingleOutputStreamOperator> beanStream = source.map(s -> {
33 | String[] split = s.split(",");
34 | EventBean2 bean = new EventBean2(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3], Integer.parseInt(split[4]));
35 | return Tuple2.of(bean,1);
36 | }).returns(new TypeHint>() {})
37 | .assignTimestampsAndWatermarks(WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofMillis(0))
38 | .withTimestampAssigner(new SerializableTimestampAssigner>() {
39 | @Override
40 | public long extractTimestamp(Tuple2 element, long recordTimestamp) {
41 | return element.f0.getTimeStamp();
42 | }
43 | }));
44 |
45 |
46 | OutputTag> lateDataOutputTag = new OutputTag<>("late_data", TypeInformation.of(new TypeHint>() {}));
47 |
48 | SingleOutputStreamOperator sumResult = beanStream.keyBy(tp -> tp.f0.getGuid())
49 | .window(TumblingEventTimeWindows.of(Time.seconds(10))) // 事件时间滚动窗口,窗口长度为10
50 | .allowedLateness(Time.seconds(2)) // 允许迟到2s
51 | .sideOutputLateData(lateDataOutputTag) // 迟到超过允许时限的数据,输出到该“outputtag”所标记的测流
52 | /*.sum("f1")*/
53 | .apply(new WindowFunction, String, Long, TimeWindow>() {
54 | @Override
55 | public void apply(Long aLong, TimeWindow window, Iterable> input, Collector out) throws Exception {
56 | int count = 0;
57 | for (Tuple2 eventBean2IntegerTuple2 : input) {
58 | count ++;
59 | }
60 | out.collect(window.getStart()+":"+ window.getEnd()+","+count);
61 | }
62 | });
63 |
64 |
65 | DataStream> lateDataSideStream = sumResult.getSideOutput(lateDataOutputTag);
66 |
67 |
68 | sumResult.print("主流结果");
69 |
70 | lateDataSideStream.print("迟到数据");
71 |
72 | env.execute();
73 |
74 | }
75 |
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_22_StateBasic_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.api.common.functions.MapFunction;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
7 |
8 | public class _22_StateBasic_Demo {
9 |
10 | public static void main(String[] args) throws Exception {
11 |
12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 | env.setParallelism(1);
14 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
15 |
16 | // a
17 | DataStreamSource source = env.socketTextStream("localhost", 9999);
18 |
19 | // 需要使用map算子来达到一个效果:
20 | // 没来一条数据(字符串),输出 该条字符串拼接此前到达过的所有字符串
21 | source.map(new MapFunction() {
22 |
23 | // 自己定义、自己管理的状态,持久化和容错都很困难
24 | // 这种状态(state) 叫做: (自管理状态)raw状态 => raw state
25 | String acc = "";
26 |
27 | /**
28 | * 要让flink来帮助管理的状态数据
29 | * ,那就不要自己定义一个变量
30 | * 而是要从flink的api中去获取一个状态管理器,用这个状态管理器来进行数据的增删改查等操作
31 | *
32 | * 这种状态: 叫做 托管状态 ! (flink state)
33 | */
34 |
35 | @Override
36 | public String map(String value) throws Exception {
37 | acc = acc + value;
38 | return acc;
39 | }
40 | }).print();
41 |
42 | env.execute();
43 |
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_24_State_KeyedState_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 | import org.apache.commons.lang3.RandomUtils;
4 | import org.apache.flink.api.common.RuntimeExecutionMode;
5 | import org.apache.flink.api.common.functions.AggregateFunction;
6 | import org.apache.flink.api.common.functions.MapFunction;
7 | import org.apache.flink.api.common.functions.RichMapFunction;
8 | import org.apache.flink.api.common.functions.RuntimeContext;
9 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
10 | import org.apache.flink.api.common.state.*;
11 | import org.apache.flink.configuration.Configuration;
12 | import org.apache.flink.runtime.state.FunctionInitializationContext;
13 | import org.apache.flink.runtime.state.FunctionSnapshotContext;
14 | import org.apache.flink.streaming.api.CheckpointingMode;
15 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
16 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
17 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
18 |
19 | /**
20 | * @Author: deep as the sea
21 | * @Site: 多易教育
22 | * @QQ: 657270652
23 | * @Date: 2022/5/5
24 | * @Desc: 键控状态使用演示
25 | **/
26 | public class _24_State_KeyedState_Demo {
27 |
28 | public static void main(String[] args) throws Exception {
29 |
30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 | env.setParallelism(1);
32 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
33 |
34 | // 开启状态数据的checkpoint机制(快照的周期,快照的模式)
35 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
36 |
37 | // 开启快照后,就需要指定快照数据的持久化存储位置
38 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint/");
39 |
40 |
41 | // 开启 task级别故障自动 failover
42 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,1000));
43 |
44 |
45 | DataStreamSource source = env.socketTextStream("localhost", 9999);
46 |
47 | // 需要使用map算子来达到一个效果:
48 | // 没来一条数据(字符串),输出 该条字符串拼接此前到达过的所有字符串
49 | source
50 | .keyBy(s->"0")
51 | .map(new RichMapFunction() {
52 |
53 | ListState lstState;
54 | @Override
55 | public void open(Configuration parameters) throws Exception {
56 | RuntimeContext runtimeContext = getRuntimeContext();
57 | // 获取一个List结构的状态存储器
58 | lstState = runtimeContext.getListState(new ListStateDescriptor("lst", String.class));
59 |
60 | // 获取一个 单值 结构的状态存储器
61 | // TODO 自己去点一点 ValueState 的 各种操作方法
62 |
63 | // 获取一个 Map 结构的状态存储器
64 | MapState mapState = runtimeContext.getMapState(new MapStateDescriptor("xx", String.class, String.class));
65 | // TODO 自己去点一点 MapState的各种操作方法
66 | }
67 |
68 | @Override
69 | public String map(String value) throws Exception {
70 |
71 | // 将本条数据,装入状态存储器
72 | lstState.add(value);
73 |
74 | // 遍历所有的历史字符串,拼接结果
75 | StringBuilder sb = new StringBuilder();
76 | for (String s : lstState.get()) {
77 | sb.append(s);
78 | }
79 |
80 | return sb.toString();
81 | }
82 | }).setParallelism(2)
83 | .print().setParallelism(2);
84 |
85 | // 提交一个job
86 | env.execute();
87 |
88 | }
89 |
90 | }
91 |
92 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_27_ToleranceConfig_Demo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.java.demos;
2 |
3 |
4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
5 | import org.apache.flink.api.common.time.Time;
6 | import org.apache.flink.configuration.Configuration;
7 | import org.apache.flink.streaming.api.CheckpointingMode;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 |
12 | import java.time.Duration;
13 | /**
14 | * @Author: deep as the sea
15 | * @Site: 多易教育
16 | * @QQ: 657270652
17 | * @Date: 2022/5/8
18 | * @Desc: flink容错机制相关参数配置示例
19 | * checkpoint 相关配置
20 | * restartStrategy 相关配置
21 | **/
22 | public class _27_ToleranceConfig_Demo {
23 |
24 | public static void main(String[] args) throws Exception {
25 |
26 | /**
27 | * 在idea中做测试时,指定从某个保存点来恢复状态
28 | */
29 | Configuration conf = new Configuration();
30 | //conf.setString("execution.savepoint.path", "file:///D:/checkpoint/7ecbd4f9106957c42109bcde/chk-544");
31 |
32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
33 |
34 | /* *
35 | * checkpoint相关配置
36 | */
37 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); // 传入两个最基本ck参数:间隔时长,ck模式
38 | CheckpointConfig checkpointConfig = env.getCheckpointConfig();
39 | checkpointConfig.setCheckpointStorage("hdfs://doit01:8020/ckpt");
40 | checkpointConfig.setAlignedCheckpointTimeout(Duration.ofMinutes(10000)); // 设置ck对齐的超时时长
41 | checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); // 设置ck算法模式
42 | checkpointConfig.setCheckpointInterval(2000); // ck的间隔时长
43 | //checkpointConfig.setCheckpointIdOfIgnoredInFlightData(5); // 用于非对齐算法模式下,在job恢复时让各个算子自动抛弃掉ck-5中飞行数据
44 | checkpointConfig.setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); // job cancel调时,保留最后一次ck数据
45 | checkpointConfig.setForceUnalignedCheckpoints(false); // 是否强制使用 非对齐的checkpoint模式
46 | checkpointConfig.setMaxConcurrentCheckpoints(5); // 允许在系统中同时存在的飞行中(未完成的)的ck数
47 | checkpointConfig.setMinPauseBetweenCheckpoints(2000); // 设置两次ck之间的最小时间间隔,用于防止checkpoint过多地占用算子的处理时间
48 | checkpointConfig.setCheckpointTimeout(3000); // 一个算子在一次checkpoint执行过程中的总耗费时长超时上限
49 | checkpointConfig.setTolerableCheckpointFailureNumber(10); // 允许的checkpoint失败最大次数
50 |
51 |
52 |
53 |
54 |
55 | /* *
56 | * task失败自动重启策略配置
57 | */
58 | RestartStrategies.RestartStrategyConfiguration restartStrategy = null;
59 |
60 | // 固定、延迟重启(参数 1: 故障重启最大次数;参数2: 两次重启之间的延迟间隔)
61 | restartStrategy = RestartStrategies.fixedDelayRestart(5, 2000);
62 |
63 | // 默认的故障重启策略:不重启(只要有task失败,整个job就失败)
64 | restartStrategy = RestartStrategies.noRestart();
65 |
66 |
67 | /* *
68 | * 本策略:故障越频繁,两次重启间的惩罚间隔就越长
69 | *
70 | * initialBackoff 重启间隔惩罚时长的初始值 : 1s
71 | * maxBackoff 重启间隔最大惩罚时长 : 60s
72 | * backoffMultiplier 重启间隔时长的惩罚倍数: 2( 每多故障一次,重启延迟惩罚就在 上一次的惩罚时长上 * 倍数)
73 | * resetBackoffThreshold 重置惩罚时长的平稳运行时长阈值(平稳运行达到这个阈值后,如果再故障,则故障重启延迟时间重置为了初始值:1s)
74 | * jitterFactor 取一个随机数来加在重启时间点上,让每次重启的时间点呈现一定随机性
75 | * job1: 9.51 9.53+2*0.1 9.57 ......
76 | * job2: 9.51 9.53+2*0.15 9.57 ......
77 | * job3: 9.51 9.53+2*0.8 9.57 ......
78 | */
79 | restartStrategy = RestartStrategies.exponentialDelayRestart(Time.seconds(1),Time.seconds(60),2.0,Time.hours(1),1.0);
80 |
81 | /* *
82 | * failureRate : 在指定时长内的最大失败次数
83 | * failureInterval 指定的衡量时长
84 | * delayInterval 两次重启之间的时间间隔
85 | */
86 | restartStrategy = RestartStrategies.failureRateRestart(5,Time.hours(1),Time.seconds(5));
87 |
88 | /* *
89 | * 本策略就是退回到配置文件所配置的策略
90 | * 常用于自定义 RestartStrategy
91 | * 用户自定义了重启策略类,常常配置在 flink-conf.yaml 文件中
92 | */
93 | restartStrategy = RestartStrategies.fallBackRestart();
94 |
95 |
96 | // 设置指定的重启策略
97 | env.setRestartStrategy(restartStrategy);
98 |
99 |
100 | // 数据处理
101 | DataStreamSource source = env.socketTextStream("localhost", 9999);
102 |
103 |
104 | // 提交执行
105 | env.execute();
106 | }
107 |
108 |
109 | }
110 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/scala/demos/_01_入门程序WordCount.scala:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.scala.demos
2 |
3 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, createTypeInformation}
4 |
5 | object _01_入门程序WordCount {
6 | def main(args: Array[String]): Unit = {
7 |
8 | val env = StreamExecutionEnvironment.getExecutionEnvironment
9 |
10 | val sourceStream = env.socketTextStream("doit01", 9999)
11 |
12 | // sourceStream.flatMap(s=>s.split("\\s+")).map(w=>(w,1))
13 |
14 | sourceStream
15 | .flatMap(s => {
16 | s.split("\\s+").map(w => (w, 1))
17 | })
18 | .keyBy(tp => tp._1)
19 | .sum(1)
20 | .print("我爱你")
21 |
22 | env.execute("我的job"); // 提交job
23 |
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Mapper1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class Mapper1 {
4 |
5 | public String map(String s){
6 | return s.toUpperCase();
7 | }
8 |
9 | }
10 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Mapper2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class Mapper2 {
4 | public String map(String s){
5 | return s+".txt";
6 | }
7 |
8 | }
9 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class Task1 implements Runnable{
4 |
5 | @Override
6 | public void run() {
7 |
8 | // 从上游接收数据
9 | //String data = receive();
10 |
11 | Mapper1 mapper1 = new Mapper1();
12 | //String res = mapper1.map(data);
13 |
14 | // 把结果发往下游
15 | // channel.send(res);
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class Task2 implements Runnable{
4 |
5 | @Override
6 | public void run() {
7 |
8 | //String data = receive();
9 |
10 | Mapper2 mapper2 = new Mapper2();
11 | // String res = mapper2.map(data);
12 |
13 | // send(res);
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task3.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class Task3 implements Runnable{
4 | @Override
5 | public void run() {
6 | Mapper1 mapper1 = new Mapper1();
7 | Mapper2 mapper2 = new Mapper2();
8 |
9 |
10 | String res1 = mapper1.map("aaaa");
11 | String res2 = mapper2.map(res1);
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/TaskRunner.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 |
3 | public class TaskRunner {
4 |
5 | public static void main(String[] args) {
6 |
7 | // Task1的6个并行实例,每一个并行实例在flink中叫什么 subTask
8 | new Thread(new Task1()).start();
9 | new Thread(new Task1()).start();
10 | new Thread(new Task1()).start();
11 | new Thread(new Task1()).start();
12 | new Thread(new Task1()).start();
13 | new Thread(new Task1()).start();
14 |
15 |
16 | // Task2的6个并行实例,每一个并行实例在flink中叫什么 subTask
17 | new Thread(new Task2()).start();
18 | new Thread(new Task2()).start();
19 | new Thread(new Task2()).start();
20 | new Thread(new Task2()).start();
21 | new Thread(new Task2()).start();
22 | new Thread(new Task2()).start();
23 |
24 |
25 | // Task3的6个并行实例,每一个并行实例在flink中叫什么 subTask
26 | new Thread(new Task3()).start();
27 | new Thread(new Task3()).start();
28 | new Thread(new Task3()).start();
29 | new Thread(new Task3()).start();
30 | new Thread(new Task3()).start();
31 | new Thread(new Task3()).start();
32 |
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo10_KafkaConnectorDetail.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.streaming.api.datastream.DataStream;
4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
5 | import org.apache.flink.streaming.api.functions.ProcessFunction;
6 | import org.apache.flink.table.api.EnvironmentSettings;
7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
8 | import org.apache.flink.types.Row;
9 | import org.apache.flink.util.Collector;
10 |
11 | /**
12 | * @Author: deep as the sea
13 | * @Site: 多易教育
14 | * @QQ: 657270652
15 | * @Date: 2022/6/12
16 | * @Desc: 学大数据,到多易教育
17 | * 流 ===> 表 ,过程中如何传承 事件时间 和 watermark
18 | **/
19 | public class Demo10_KafkaConnectorDetail {
20 |
21 | public static void main(String[] args) throws Exception {
22 |
23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 | env.setParallelism(1);
25 |
26 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
27 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
28 |
29 |
30 | /**
31 | * 对应的kafka中的数据:
32 | * key: {"k1":100,"k2":200}
33 | * value: {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"}
34 | * headers:
35 | * h1 -> vvvv
36 | * h2 -> tttt
37 | */
38 | tenv.executeSql(
39 | " CREATE TABLE t_kafka_connector ( "
40 | + " guid int, "
41 | + " eventId string, "
42 | + " eventTime bigint, "
43 | + " pageId string, "
44 | + " k1 int, "
45 | + " k2 int, "
46 | + " rec_ts timestamp(3) metadata from 'timestamp' , "
47 | + " `offset` bigint metadata , "
48 | + " headers map metadata, "
49 | + " rt as to_timestamp_ltz(eventTime,3) , "
50 | + " watermark for rt as rt - interval '0.001' second "
51 | + " ) WITH ( "
52 | + " 'connector' = 'kafka', "
53 | + " 'topic' = 'doit30-kafka', "
54 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
55 | + " 'properties.group.id' = 'testGroup', "
56 | + " 'scan.startup.mode' = 'earliest-offset', "
57 | + " 'key.format'='json', "
58 | + " 'key.json.ignore-parse-errors' = 'true', "
59 | + " 'key.fields'='k1;k2', "
60 | /* + " 'key.fields-prefix'='', " */
61 | + " 'value.format'='json', "
62 | + " 'value.json.fail-on-missing-field'='false', "
63 | + " 'value.fields-include' = 'EXCEPT_KEY' "
64 | + " ) "
65 |
66 | );
67 |
68 | tenv.executeSql("select * from t_kafka_connector ")/*.print()*/;
69 | tenv.executeSql("select guid,eventId,cast(headers['h1'] as string) as h1, cast(headers['h2'] as string) as h2 from t_kafka_connector ").print();
70 |
71 |
72 | env.execute();
73 |
74 |
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 |
4 | import lombok.AllArgsConstructor;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.apache.flink.configuration.Configuration;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 |
13 |
14 | public class Demo11_UpsertKafkaConnectorTest {
15 | public static void main(String[] args) throws Exception {
16 |
17 | Configuration conf = new Configuration();
18 | /*conf.setInteger("rest.port",9091);*/
19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
21 |
22 |
23 | // 1,male
24 | DataStreamSource s1 = env.socketTextStream("doitedu", 9999);
25 |
26 | SingleOutputStreamOperator bean1 = s1.map(s -> {
27 | String[] arr = s.split(",");
28 | return new Bean1(Integer.parseInt(arr[0]), arr[1]);
29 | });
30 |
31 | // 流转表
32 | tenv.createTemporaryView("bean1",bean1);
33 |
34 | //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print();
35 |
36 |
37 | // 创建目标 kafka映射表
38 | tenv.executeSql(
39 | " create table t_upsert_kafka( "
40 | + " gender string primary key not enforced, "
41 | + " cnt bigint "
42 | + " ) with ( "
43 | + " 'connector' = 'upsert-kafka', "
44 | + " 'topic' = 'doit30-upsert', "
45 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
46 | + " 'key.format' = 'csv', "
47 | + " 'value.format' = 'csv' "
48 | + " ) "
49 |
50 | );
51 | // 查询每种性别的数据行数,并将结果插入到目标表
52 | tenv.executeSql(
53 | "insert into t_upsert_kafka " +
54 | "select gender,count(1) as cnt from bean1 group by gender"
55 | );
56 |
57 | tenv.executeSql("select * from t_upsert_kafka").print();
58 |
59 |
60 | env.execute();
61 |
62 | }
63 |
64 | @Data
65 | @NoArgsConstructor
66 | @AllArgsConstructor
67 | public static class Bean1{
68 | public int id;
69 | public String gender;
70 | }
71 |
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 |
4 | import lombok.AllArgsConstructor;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.apache.flink.configuration.Configuration;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 |
13 |
14 | public class Demo11_UpsertKafkaConnectorTest2 {
15 | public static void main(String[] args) throws Exception {
16 |
17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
19 |
20 |
21 | // 1,male
22 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998);
23 | // 1,zs
24 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999);
25 |
26 | SingleOutputStreamOperator bean1 = s1.map(s -> {
27 | String[] arr = s.split(",");
28 | return new Bean1(Integer.parseInt(arr[0]), arr[1]);
29 | });
30 |
31 | SingleOutputStreamOperator bean2 = s2.map(s -> {
32 | String[] arr = s.split(",");
33 | return new Bean2(Integer.parseInt(arr[0]), arr[1]);
34 | });
35 |
36 |
37 | // 流转表
38 | tenv.createTemporaryView("bean1",bean1);
39 | tenv.createTemporaryView("bean2",bean2);
40 |
41 | //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print();
42 |
43 |
44 | // 创建目标 kafka映射表
45 | tenv.executeSql(
46 | " create table t_upsert_kafka2( "
47 | + " id int primary key not enforced, "
48 | + " gender string, "
49 | + " name string "
50 | + " ) with ( "
51 | + " 'connector' = 'upsert-kafka', "
52 | + " 'topic' = 'doit30-upsert2', "
53 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
54 | + " 'key.format' = 'csv', "
55 | + " 'value.format' = 'csv' "
56 | + " ) "
57 |
58 | );
59 | // 查询每种性别的数据行数,并将结果插入到目标表
60 | tenv.executeSql(
61 | "insert into t_upsert_kafka2 " +
62 | "select bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id"
63 | );
64 |
65 | tenv.executeSql("select * from t_upsert_kafka2").print();
66 |
67 |
68 | env.execute();
69 |
70 | }
71 |
72 | @Data
73 | @NoArgsConstructor
74 | @AllArgsConstructor
75 | public static class Bean1{
76 | public int id;
77 | public String gender;
78 | }
79 |
80 |
81 | @Data
82 | @NoArgsConstructor
83 | @AllArgsConstructor
84 | public static class Bean2{
85 | public int id;
86 | public String name;
87 | }
88 |
89 | }
90 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest1.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
6 | import org.apache.flink.table.api.EnvironmentSettings;
7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
8 |
9 | public class Demo12_JdbcConnectorTest1 {
10 | public static void main(String[] args) throws Exception {
11 |
12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
14 |
15 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
16 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env,environmentSettings);
17 |
18 |
19 | // 建表来映射 mysql中的 flinktest.stu
20 | tenv.executeSql(
21 | "create table flink_stu(\n" +
22 | " id int primary key,\n" +
23 | " name string,\n" +
24 | " age int,\n" +
25 | " gender string\n" +
26 | ") with (\n" +
27 | " 'connector' = 'jdbc',\n" +
28 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
29 | " 'table-name' = 'stu',\n" +
30 | " 'username' = 'root',\n" +
31 | " 'password' = 'root' \n" +
32 | ")"
33 | );
34 |
35 | DataStreamSource doitedu = env.socketTextStream("doitedu", 9999);
36 |
37 | tenv.executeSql("select * from flink_stu").print();
38 |
39 | doitedu.print();
40 |
41 |
42 | env.execute();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 | import org.apache.flink.api.common.RuntimeExecutionMode;
7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.table.api.EnvironmentSettings;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 |
13 | public class Demo12_JdbcConnectorTest2 {
14 | public static void main(String[] args) throws Exception {
15 |
16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
18 |
19 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
20 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
21 |
22 |
23 | // 建表来映射 mysql中的 flinktest.stu
24 | tenv.executeSql(
25 | "create table flink_stu(\n" +
26 | " id int primary key, \n" +
27 | " gender string, \n" +
28 | " name string \n" +
29 | ") with (\n" +
30 | " 'connector' = 'jdbc',\n" +
31 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
32 | " 'table-name' = 'stu2',\n" +
33 | " 'username' = 'root',\n" +
34 | " 'password' = 'root' \n" +
35 | ")"
36 | );
37 |
38 |
39 | // 1,male
40 | SingleOutputStreamOperator bean1 = env
41 | .socketTextStream("doitedu", 9998)
42 | .map(s -> {
43 | String[] arr = s.split(",");
44 | return new Bean1(Integer.parseInt(arr[0]), arr[1]);
45 | });
46 | // 1,zs
47 | SingleOutputStreamOperator bean2 = env.socketTextStream("doitedu", 9999).map(s -> {
48 | String[] arr = s.split(",");
49 | return new Bean2(Integer.parseInt(arr[0]), arr[1]);
50 | });
51 |
52 |
53 | // 流转表
54 | tenv.createTemporaryView("bean1", bean1);
55 | tenv.createTemporaryView("bean2", bean2);
56 |
57 | tenv.executeSql("insert into flink_stu " +
58 | "select bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id");
59 |
60 |
61 | env.execute();
62 | }
63 |
64 |
65 | @Data
66 | @NoArgsConstructor
67 | @AllArgsConstructor
68 | public static class Bean1 {
69 | public int id;
70 | public String gender;
71 | }
72 |
73 |
74 | @Data
75 | @NoArgsConstructor
76 | @AllArgsConstructor
77 | public static class Bean2 {
78 | public int id;
79 | public String name;
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo13_FileSystemConnectorTest.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 | import org.apache.flink.api.common.RuntimeExecutionMode;
7 | import org.apache.flink.api.common.typeinfo.TypeHint;
8 | import org.apache.flink.api.java.tuple.Tuple4;
9 | import org.apache.flink.streaming.api.CheckpointingMode;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.table.api.EnvironmentSettings;
13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
14 |
15 | public class Demo13_FileSystemConnectorTest {
16 | public static void main(String[] args) throws Exception {
17 |
18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
20 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint");
21 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
22 |
23 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
24 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
25 |
26 |
27 | // 建表 fs_table 来映射 mysql中的flinktest.stu
28 | tenv.executeSql(
29 | "CREATE TABLE fs_table (\n" +
30 | " user_id STRING,\n" +
31 | " order_amount DOUBLE,\n" +
32 | " dt STRING,\n" +
33 | " `hour` STRING\n" +
34 | ") PARTITIONED BY (dt, `hour`) WITH (\n" +
35 | " 'connector'='filesystem',\n" +
36 | " 'path'='file:///d:/filetable/',\n" +
37 | " 'format'='json',\n" +
38 | " 'sink.partition-commit.delay'='1 h',\n" +
39 | " 'sink.partition-commit.policy.kind'='success-file',\n" +
40 | " 'sink.rolling-policy.file-size' = '8M',\n" +
41 | " 'sink.rolling-policy.rollover-interval'='30 min',\n" +
42 | " 'sink.rolling-policy.check-interval'='10 second'\n" +
43 | ")"
44 | );
45 |
46 |
47 | // u01,88.8,2022-06-13,14
48 | SingleOutputStreamOperator> stream = env
49 | .socketTextStream("doitedu", 9999)
50 | .map(s -> {
51 | String[] split = s.split(",");
52 | return Tuple4.of(split[0], Double.parseDouble(split[1]), split[2], split[3]);
53 | }).returns(new TypeHint>() {
54 | });
55 |
56 | tenv.createTemporaryView("orders",stream);
57 |
58 | tenv.executeSql("insert into fs_table select * from orders");
59 |
60 |
61 |
62 | env.execute();
63 | }
64 |
65 |
66 | @Data
67 | @NoArgsConstructor
68 | @AllArgsConstructor
69 | public static class Bean1 {
70 | public int id;
71 | public String gender;
72 | }
73 |
74 |
75 | @Data
76 | @NoArgsConstructor
77 | @AllArgsConstructor
78 | public static class Bean2 {
79 | public int id;
80 | public String name;
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo14_MysqlCdcConnector.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.streaming.api.CheckpointingMode;
4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
6 |
7 | /**
8 | * @Author: deep as the sea
9 | * @Site: 多易教育
10 | * @QQ: 657270652
11 | * @Date: 2022/6/13
12 | * @Desc: 学大数据,到多易教育
13 | * mysql的cdc连接器使用测试
14 | **/
15 | public class Demo14_MysqlCdcConnector {
16 |
17 | public static void main(String[] args) {
18 |
19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
21 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint");
22 |
23 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
24 |
25 | // 建表
26 | tenv.executeSql("CREATE TABLE flink_score (\n" +
27 | " id INT,\n" +
28 | " name string,\n" +
29 | " gender string,\n" +
30 | " score double,\n" +
31 | " PRIMARY KEY(id) NOT ENFORCED\n" +
32 | " ) WITH (\n" +
33 | " 'connector' = 'mysql-cdc',\n" +
34 | " 'hostname' = 'doitedu',\n" +
35 | " 'port' = '3306',\n" +
36 | " 'username' = 'root',\n" +
37 | " 'password' = 'root',\n" +
38 | " 'database-name' = 'flinktest',\n" +
39 | " 'table-name' = 'score'\n" +
40 | ")");
41 |
42 | tenv.executeSql("CREATE TABLE t1 (\n" +
43 | " id INT,\n" +
44 | " name string,\n" +
45 | " PRIMARY KEY(id) NOT ENFORCED\n" +
46 | " ) WITH (\n" +
47 | " 'connector' = 'mysql-cdc',\n" +
48 | " 'hostname' = 'doitedu',\n" +
49 | " 'port' = '3306',\n" +
50 | " 'username' = 'root',\n" +
51 | " 'password' = 'root',\n" +
52 | " 'database-name' = 'doitedu',\n" +
53 | " 'table-name' = 't1'\n" +
54 | ")");
55 |
56 | tenv.executeSql("select * from t1").print();
57 | System.exit(1);
58 |
59 | // 查询
60 | tenv.executeSql("select * from flink_score")/*.print()*/;
61 |
62 |
63 | tenv.executeSql("select gender,avg(score) as avg_score from flink_score group by gender")/*.print()*/;
64 |
65 | // 建一个目标表,用来存放查询结果: 每种性别中,总分最高的前2个人
66 | tenv.executeSql(
67 | "create table flink_rank(\n" +
68 | " gender string , \n" +
69 | " name string, \n" +
70 | " score_amt double, \n" +
71 | " rn bigint , \n" +
72 | " primary key(gender,rn) not enforced \n" +
73 | ") with (\n" +
74 | " 'connector' = 'jdbc',\n" +
75 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
76 | " 'table-name' = 'score_rank',\n" +
77 | " 'username' = 'root',\n" +
78 | " 'password' = 'root' \n" +
79 | ")"
80 | );
81 |
82 |
83 | tenv.executeSql("insert into flink_rank \n" +
84 | "SELECT\n" +
85 | " gender,\n" +
86 | " name,\n" +
87 | " score_amt,\n" +
88 | " rn\n" +
89 | "from(\n" +
90 | "SELECT\n" +
91 | " gender,\n" +
92 | " name,\n" +
93 | " score_amt,\n" +
94 | " row_number() over(partition by gender order by score_amt desc) as rn\n" +
95 | "from \n" +
96 | "(\n" +
97 | "SELECT\n" +
98 | "gender,\n" +
99 | "name,\n" +
100 | "sum(score) as score_amt\n" +
101 | "from flink_score\n" +
102 | "group by gender,name\n" +
103 | ") o1\n" +
104 | ") o2\n" +
105 | "where rn<=2");
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_IntervalJoin.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeHint;
4 | import org.apache.flink.api.java.tuple.Tuple3;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.table.api.DataTypes;
9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 |
12 | /**
13 | * @Author: deep as the sea
14 | * @Site: 多易教育
15 | * @QQ: 657270652
16 | * @Date: 2022/6/16
17 | * @Desc: 学大数据,到多易教育
18 | * 常规join示例
19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的
20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限
21 | *
22 | **/
23 | public class Demo18_IntervalJoin {
24 | public static void main(String[] args) {
25 |
26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | env.setParallelism(1);
28 |
29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
30 | // 设置table环境中的状态ttl时长
31 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L);
32 |
33 |
34 |
35 | /**
36 | * 1,a,1000
37 | * 2,b,2000
38 | * 3,c,2500
39 | * 4,d,3000
40 | * 5,e,12000
41 | */
42 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998);
43 | SingleOutputStreamOperator> ss1 = s1.map(s -> {
44 | String[] arr = s.split(",");
45 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
46 | }).returns(new TypeHint>() {
47 | });
48 |
49 | /**
50 | * 1,bj,1000
51 | * 2,sh,2000
52 | * 4,xa,2600
53 | * 5,yn,12000
54 | */
55 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999);
56 | SingleOutputStreamOperator> ss2 = s2.map(s -> {
57 | String[] arr = s.split(",");
58 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
59 | }).returns(new TypeHint>() {
60 | });
61 |
62 |
63 | // 创建两个表
64 | tenv.createTemporaryView("t_left",ss1, Schema.newBuilder()
65 | .column("f0", DataTypes.STRING())
66 | .column("f1", DataTypes.STRING())
67 | .column("f2", DataTypes.BIGINT())
68 | .columnByExpression("rt","to_timestamp_ltz(f2,3)")
69 | .watermark("rt","rt - interval '0' second")
70 | .build());
71 |
72 | tenv.createTemporaryView("t_right",ss2, Schema.newBuilder()
73 | .column("f0", DataTypes.STRING())
74 | .column("f1", DataTypes.STRING())
75 | .column("f2", DataTypes.BIGINT())
76 | .columnByExpression("rt","to_timestamp_ltz(f2,3)")
77 | .watermark("rt","rt - interval '0' second")
78 | .build());
79 |
80 |
81 |
82 | // interval join
83 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a join t_right b " +
84 | "on a.f0=b.f0 " +
85 | "and a.rt between b.rt - interval '2' second and b.rt").print();
86 |
87 |
88 |
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_RegularJoin.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeHint;
4 | import org.apache.flink.api.java.tuple.Tuple3;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.table.api.DataTypes;
9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 |
12 | /**
13 | * @Author: deep as the sea
14 | * @Site: 多易教育
15 | * @QQ: 657270652
16 | * @Date: 2022/6/16
17 | * @Desc: 学大数据,到多易教育
18 | * 常规join示例
19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的
20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限
21 | *
22 | **/
23 | public class Demo18_RegularJoin {
24 | public static void main(String[] args) {
25 |
26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | env.setParallelism(1);
28 |
29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
30 | // 设置table环境中的状态ttl时长
31 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L);
32 |
33 |
34 |
35 | /**
36 | * 1,a,1000
37 | * 2,b,2000
38 | * 3,c,2500
39 | * 4,d,3000
40 | * 5,e,12000
41 | */
42 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998);
43 | SingleOutputStreamOperator> ss1 = s1.map(s -> {
44 | String[] arr = s.split(",");
45 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
46 | }).returns(new TypeHint>() {
47 | });
48 |
49 | /**
50 | * 1,bj,1000
51 | * 2,sh,2000
52 | * 4,xa,2600
53 | * 5,yn,12000
54 | */
55 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999);
56 | SingleOutputStreamOperator> ss2 = s2.map(s -> {
57 | String[] arr = s.split(",");
58 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
59 | }).returns(new TypeHint>() {
60 | });
61 |
62 |
63 | // 创建两个表
64 | tenv.createTemporaryView("t_left",ss1, Schema.newBuilder()
65 | .column("f0", DataTypes.STRING())
66 | .column("f1", DataTypes.STRING())
67 | .column("f2", DataTypes.BIGINT())
68 | .columnByExpression("rt","to_timestamp_ltz(f2,3)")
69 | .watermark("rt","rt - interval '0' second")
70 | .build());
71 |
72 | tenv.createTemporaryView("t_right",ss2, Schema.newBuilder()
73 | .column("f0", DataTypes.STRING())
74 | .column("f1", DataTypes.STRING())
75 | .column("f2", DataTypes.BIGINT())
76 | .columnByExpression("rt","to_timestamp_ltz(f2,3)")
77 | .watermark("rt","rt - interval '0' second")
78 | .build());
79 |
80 |
81 | // left join
82 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a left join t_right b on a.f0=b.f0")/*.print()*/;
83 |
84 | // inner join
85 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a join t_right b on a.f0=b.f0").print();
86 |
87 |
88 |
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_ArrayJoin.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.annotation.DataTypeHint;
4 | import org.apache.flink.table.annotation.FunctionHint;
5 | import org.apache.flink.table.api.*;
6 | import org.apache.flink.table.functions.TableFunction;
7 | import org.apache.flink.types.Row;
8 |
9 | import static org.apache.flink.table.api.Expressions.array;
10 | import static org.apache.flink.table.api.Expressions.row;
11 |
12 | public class Demo19_ArrayJoin {
13 | public static void main(String[] args) {
14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 |
16 | Table table = tenv.fromValues(DataTypes.ROW(
17 | DataTypes.FIELD("id", DataTypes.INT()),
18 | DataTypes.FIELD("name", DataTypes.STRING()),
19 | DataTypes.FIELD("tags", DataTypes.ARRAY(DataTypes.STRING())))
20 | , row("1", "zs", array("stu", "child"))
21 | , row("2", "bb", array("miss"))
22 | );
23 |
24 | tenv.createTemporaryView("t",table);
25 |
26 |
27 | tenv.executeSql("select t.id,t.name,x.tag from t cross join unnest(tags) as x(tag)")/*.print()*/;
28 |
29 |
30 | tenv.createTemporarySystemFunction("mysplit",MySplit.class);
31 | tenv.executeSql("select t.id,t.name,tag from t, lateral table(mysplit(tags)) ")/*.print()*/;
32 | tenv.executeSql("select t.id,t.name,x.tag2 from t, lateral table(mysplit(tags)) x(tag2)")/*.print()*/;
33 | tenv.executeSql("select t.id,t.name,tag from t left join lateral table(mysplit(tags)) on true")/*.print()*/;
34 | tenv.executeSql("select t.id,t.name,x.tag2 from t left join lateral table(mysplit(tags)) x(tag2) on true").print();
35 | }
36 |
37 | @FunctionHint(output = @DataTypeHint("ROW"))
38 | public static class MySplit extends TableFunction {
39 |
40 | public void eval(String[] arr){
41 | for (String s : arr) {
42 | collect(Row.of(s));
43 | }
44 | }
45 |
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_LookupJoin.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.typeinfo.TypeHint;
4 | import org.apache.flink.api.java.tuple.Tuple2;
5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.table.api.DataTypes;
9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 |
12 | /**
13 | * @Author: deep as the sea
14 | * @Site: 多易教育
15 | * @QQ: 657270652
16 | * @Date: 2022/6/16
17 | * @Desc: 学大数据,到多易教育
18 | * 常规join示例
19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的
20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限
21 | **/
22 | public class Demo19_LookupJoin {
23 | public static void main(String[] args) throws Exception {
24 |
25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
26 | env.setParallelism(1);
27 |
28 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
29 | // 设置table环境中的状态ttl时长
30 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl", 60 * 60 * 1000L);
31 |
32 |
33 | /**
34 | * 1,a
35 | * 2,b
36 | * 3,c
37 | * 4,d
38 | * 5,e
39 | */
40 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998);
41 | SingleOutputStreamOperator> ss1 = s1.map(s -> {
42 | String[] arr = s.split(",");
43 | return Tuple2.of(Integer.parseInt(arr[0]), arr[1]);
44 | }).returns(new TypeHint>() {
45 | });
46 |
47 |
48 | // 创建主表(需要声明处理时间属性字段)
49 | tenv.createTemporaryView("a", ss1, Schema.newBuilder()
50 | .column("f0", DataTypes.INT())
51 | .column("f1", DataTypes.STRING())
52 | .columnByExpression("pt", "proctime()") // 定义处理时间属性字段
53 | .build());
54 |
55 | // 创建lookup维表(jdbc connector表)
56 | tenv.executeSql(
57 | "create table b( \n" +
58 | " id int , \n" +
59 | " name string, \n" +
60 | " gender STRING, \n" +
61 | " primary key(id) not enforced \n" +
62 | ") with (\n" +
63 | " 'connector' = 'jdbc',\n" +
64 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
65 | " 'table-name' = 'stu2',\n" +
66 | " 'username' = 'root',\n" +
67 | " 'password' = 'root' \n" +
68 | ")"
69 | );
70 |
71 | // lookup join 查询
72 | tenv.executeSql("select a.*,c.* from a JOIN b FOR SYSTEM_TIME AS OF a.pt AS c \n" +
73 | " ON a.f0 = c.id").print();
74 |
75 |
76 | env.execute();
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo1_TableSql.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.api.EnvironmentSettings;
4 | import org.apache.flink.table.api.Table;
5 | import org.apache.flink.table.api.TableEnvironment;
6 | import org.apache.flink.table.catalog.CatalogDatabaseImpl;
7 | import org.apache.flink.table.catalog.ConnectorCatalogTable;
8 | import org.apache.flink.table.catalog.ObjectPath;
9 | import org.apache.flink.table.catalog.exceptions.DatabaseAlreadyExistException;
10 | import org.apache.flink.table.catalog.hive.HiveCatalog;
11 |
12 | import java.util.HashMap;
13 |
14 | import static org.apache.flink.table.api.Expressions.$;
15 |
16 | public class Demo1_TableSql {
17 |
18 | public static void main(String[] args) throws DatabaseAlreadyExistException {
19 |
20 | EnvironmentSettings envSettings = EnvironmentSettings.inStreamingMode(); // 流计算模式
21 | TableEnvironment tableEnv = TableEnvironment.create(envSettings);
22 |
23 | // 把kafka中的一个topic: doit30-2 数据,映射成一张flinkSql表
24 | // json : {"id":1,"name":"zs","age":28,"gender":"male"}
25 | // create table_x (id int,name string,age int,gender string)
26 | tableEnv.executeSql(
27 | "create table t_kafka "
28 | + " ( "
29 | + " id int, "
30 | + " name string, "
31 | + " age int, "
32 | + " gender string "
33 | + " ) "
34 | + " WITH ( "
35 | + " 'connector' = 'kafka', "
36 | + " 'topic' = 'doit30-3', "
37 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
38 | + " 'properties.group.id' = 'g1', "
39 | + " 'scan.startup.mode' = 'earliest-offset', "
40 | + " 'format' = 'json', "
41 | + " 'json.fail-on-missing-field' = 'false', "
42 | + " 'json.ignore-parse-errors' = 'true' "
43 | + " ) "
44 | );
45 |
46 |
47 | /**
48 | * 把sql表名, 转成 table对象
49 | */
50 | Table table = tableEnv.from("t_kafka");
51 | // 利用table api进行查询计算
52 | table.groupBy($("gender"))
53 | .select($("gender"), $("age").avg())
54 | .execute()
55 | .print();
56 |
57 |
58 | tableEnv.executeSql("select gender,avg(age) as avg_age from t_kafka group by gender").print();
59 |
60 |
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo20_Temporal_Join.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 | import org.apache.flink.api.common.typeinfo.TypeHint;
7 | import org.apache.flink.api.java.tuple.Tuple2;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.DataTypes;
12 | import org.apache.flink.table.api.Schema;
13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
14 |
15 | /**
16 | * @Author: deep as the sea
17 | * @Site: 多易教育
18 | * @QQ: 657270652
19 | * @Date: 2022/6/16
20 | * @Desc: 学大数据,到多易教育
21 | * 时态join代码示例
22 | **/
23 | public class Demo20_Temporal_Join {
24 | public static void main(String[] args) throws Exception {
25 |
26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 | env.setParallelism(1);
28 |
29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
30 |
31 |
32 | /**
33 | * 订单Id,币种,金额,订单时间
34 | * 1,a,100,167438436400
35 | */
36 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998);
37 |
38 | SingleOutputStreamOperator ss1 = s1.map(s -> {
39 | String[] arr = s.split(",");
40 | return new Order(Integer.parseInt(arr[0]), arr[1], Double.parseDouble(arr[2]), Long.parseLong(arr[3]));
41 | });
42 |
43 |
44 | // 创建主表(需要声明处理时间属性字段)
45 | tenv.createTemporaryView("orders", ss1, Schema.newBuilder()
46 | .column("orderId", DataTypes.INT())
47 | .column("currency", DataTypes.STRING())
48 | .column("price", DataTypes.DOUBLE())
49 | .column("orderTime", DataTypes.BIGINT())
50 | .columnByExpression("rt", "to_timestamp_ltz(orderTime,3)") // 定义处理时间属性字段
51 | .watermark("rt","rt")
52 | .build());
53 |
54 |
55 | //tenv.executeSql("select orderId,currency,price,orderTime,rt from orders").print();
56 |
57 | // 创建 temporal 表
58 | tenv.executeSql("CREATE TABLE currency_rate (\n" +
59 | " currency STRING, \n" +
60 | " rate double , \n" +
61 | " update_time bigint , \n" +
62 | " rt as to_timestamp_ltz(update_time,3) ," +
63 | " watermark for rt as rt - interval '0' second ," +
64 | " PRIMARY KEY(currency) NOT ENFORCED\n" +
65 | " ) WITH ( \n" +
66 | " 'connector' = 'mysql-cdc',\n" +
67 | " 'hostname' = 'doitedu',\n" +
68 | " 'port' = '3306',\n" +
69 | " 'username' = 'root',\n" +
70 | " 'password' = 'root',\n" +
71 | " 'database-name' = 'flinktest',\n" +
72 | " 'table-name' = 'currency_rate'\n" +
73 | ")");
74 |
75 | //tenv.executeSql("select * from currency_rate").print();
76 |
77 |
78 |
79 |
80 | // temporal 关联查询
81 | tenv.executeSql(
82 | "SELECT \n" +
83 | " orders.orderId, \n" +
84 | " orders.currency, \n" +
85 | " orders.price, \n" +
86 | " orders.orderTime, \n" +
87 | " rate \n" +
88 | "FROM orders \n" +
89 | "LEFT JOIN currency_rate FOR SYSTEM_TIME AS OF orders.rt \n" +
90 | "ON orders.currency = currency_rate.currency"
91 | ).print();
92 |
93 |
94 | env.execute();
95 | }
96 |
97 |
98 | @Data
99 | @NoArgsConstructor
100 | @AllArgsConstructor
101 | public static class Order {
102 | // 订单Id,币种,金额,订单时间
103 | public int orderId;
104 | public String currency;
105 | public double price;
106 | public long orderTime;
107 |
108 | }
109 | }
110 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo21_CustomScalarFunction.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.api.DataTypes;
4 | import org.apache.flink.table.api.EnvironmentSettings;
5 | import org.apache.flink.table.api.Table;
6 | import org.apache.flink.table.api.TableEnvironment;
7 | import org.apache.flink.table.functions.ScalarFunction;
8 | import org.apache.flink.types.Row;
9 |
10 |
11 | public class Demo21_CustomScalarFunction {
12 | public static void main(String[] args) {
13 |
14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 |
16 | Table table = tenv.fromValues(
17 | DataTypes.ROW(
18 | DataTypes.FIELD("name", DataTypes.STRING())),
19 | Row.of("aaa"),
20 | Row.of("bbb"),
21 | Row.of("ccc")
22 | );
23 |
24 | tenv.createTemporaryView("t",table);
25 |
26 | // 注册自定义的函数
27 | tenv.createTemporarySystemFunction("myupper",MyUpper.class);
28 |
29 | // 注册后,就能在sql中使用了
30 | tenv.executeSql("select myupper(name) from t").print();
31 |
32 | }
33 |
34 |
35 | public static class MyUpper extends ScalarFunction{
36 |
37 | public String eval(String str){
38 | return str.toUpperCase();
39 | }
40 | }
41 |
42 |
43 |
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo22_CustomAggregateFunction.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.api.DataTypes;
4 | import org.apache.flink.table.api.EnvironmentSettings;
5 | import org.apache.flink.table.api.Table;
6 | import org.apache.flink.table.api.TableEnvironment;
7 | import org.apache.flink.table.functions.AggregateFunction;
8 | import org.apache.flink.types.Row;
9 |
10 | /**
11 | * @Author: deep as the sea
12 | * @Site: 多易教育
13 | * @QQ: 657270652
14 | * @Date: 2022/6/16
15 | * @Desc: 学大数据,到多易教育
16 | * 自定义聚合函数
17 | **/
18 | public class Demo22_CustomAggregateFunction {
19 | public static void main(String[] args) {
20 |
21 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
22 |
23 | Table table = tenv.fromValues(
24 | DataTypes.ROW(
25 | DataTypes.FIELD("uid", DataTypes.INT()),
26 | DataTypes.FIELD("gender", DataTypes.STRING()),
27 | DataTypes.FIELD("score", DataTypes.DOUBLE())
28 | ),
29 | Row.of(1,"male",80),
30 | Row.of(2,"male",100),
31 | Row.of(3,"female",90)
32 | );
33 |
34 | tenv.createTemporaryView("t",table);
35 |
36 | // 注册自定义的函数
37 | tenv.createTemporarySystemFunction("myavg",MyAvg.class);
38 |
39 | // 注册后,就能在sql中使用了
40 | tenv.executeSql("select gender,myavg(score) as avg_score from t group by gender ").print();
41 |
42 | }
43 |
44 |
45 | public static class MyAccumulator{
46 | public int count;
47 | public double sum;
48 | }
49 |
50 | public static class MyAvg extends AggregateFunction {
51 |
52 | /**
53 | * 获取累加器的值
54 | * @param accumulator the accumulator which contains the current intermediate results
55 | * @return
56 | */
57 | @Override
58 | public Double getValue(MyAccumulator accumulator) {
59 | return accumulator.sum/ accumulator.count;
60 | }
61 |
62 | /**
63 | * 创建累加器
64 | * @return
65 | */
66 | @Override
67 | public MyAccumulator createAccumulator() {
68 | MyAccumulator myAccumulator = new MyAccumulator();
69 | myAccumulator.count = 0;
70 | myAccumulator.sum = 0;
71 |
72 |
73 | return myAccumulator;
74 | }
75 |
76 |
77 | /**
78 | * 进来输入数据后,如何更新累加器
79 | * @param accumulator
80 | * @param score
81 | */
82 | public void accumulate(MyAccumulator accumulator,Double score){
83 |
84 | accumulator.count = accumulator.count + 1;
85 | accumulator.sum = accumulator.sum + score;
86 |
87 | }
88 |
89 |
90 | }
91 |
92 | }
93 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo23_TableFunction.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.annotation.DataTypeHint;
4 | import org.apache.flink.table.annotation.FunctionHint;
5 | import org.apache.flink.table.api.*;
6 | import org.apache.flink.table.functions.TableFunction;
7 | import org.apache.flink.types.Row;
8 |
9 |
10 | public class Demo23_TableFunction {
11 |
12 | public static void main(String[] args) {
13 |
14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 |
16 | /*
17 | Table table = tenv.fromValues(DataTypes.ROW(
18 | DataTypes.FIELD("id", DataTypes.INT()),
19 | DataTypes.FIELD("name", DataTypes.STRING()),
20 | DataTypes.FIELD("phone_numbers", DataTypes.ARRAY(DataTypes.STRING()))),
21 | Row.of(1, "zs", Expressions.array("138","139","135")),
22 | Row.of(2, "bb", Expressions.array("135","136"))
23 | );
24 |
25 | tenv.createTemporaryView("t",table);
26 | tenv.executeSql("select t.id,t.name,t2.phone_number from t cross join unnest(phone_numbers) as t2(phone_number)").print();
27 | */
28 |
29 | Table table = tenv.fromValues(DataTypes.ROW(
30 | DataTypes.FIELD("id", DataTypes.INT()),
31 | DataTypes.FIELD("name", DataTypes.STRING()),
32 | DataTypes.FIELD("phone_numbers", DataTypes.STRING())),
33 | Row.of(1, "zs", "13888,137,1354455"),
34 | Row.of(2, "bb", "1366688,1374,132224455")
35 | );
36 | tenv.createTemporaryView("t",table);
37 |
38 |
39 | // 注册函数
40 | tenv.createTemporarySystemFunction("mysplit",MySplit.class);
41 |
42 | // 展开手机号字符串
43 | tenv.executeSql("select * from t , lateral table(mysplit(phone_numbers,',')) as t1(p,l) ")/*.print()*/;
44 | tenv.executeSql("select * from t left join lateral table(mysplit(phone_numbers,',')) as t1(p,l) on true ").print();
45 |
46 |
47 |
48 | }
49 |
50 | @FunctionHint(output = @DataTypeHint("ROW"))
51 | public static class MySplit extends TableFunction{
52 |
53 | public void eval(String str,String delimiter){
54 | for (String s : str.split(delimiter)) {
55 | collect(Row.of(s,s.length()));
56 | }
57 | }
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo24_TableAggregateFunction.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.java.tuple.Tuple2;
4 | import org.apache.flink.table.annotation.DataTypeHint;
5 | import org.apache.flink.table.annotation.FunctionHint;
6 | import org.apache.flink.table.api.DataTypes;
7 | import org.apache.flink.table.api.EnvironmentSettings;
8 | import org.apache.flink.table.api.Table;
9 | import org.apache.flink.table.api.TableEnvironment;
10 | import org.apache.flink.table.functions.TableAggregateFunction;
11 | import org.apache.flink.types.Row;
12 | import org.apache.flink.util.Collector;
13 |
14 | import static org.apache.flink.table.api.Expressions.$;
15 | import static org.apache.flink.table.api.Expressions.call;
16 |
17 | /**
18 | * @Author: deep as the sea
19 | * @Site: 多易教育
20 | * @QQ: 657270652
21 | * @Date: 2022/6/17
22 | * @Desc: 学大数据,到多易教育
23 | * 自定义表聚合函数示例
24 | * 什么叫做表聚合函数:
25 | * 1,male,zs,88
26 | * 2,male,bb,99
27 | * 3,male,cc,76
28 | * 4,female,dd,78
29 | * 5,female,ee,92
30 | * 6,female,ff,86
31 | *
32 | * -- 求每种性别中,分数最高的两个成绩
33 | * -- 常规写法
34 | * SELECT
35 | * *
36 | * FROM
37 | * (
38 | * SELECT
39 | * gender,
40 | * score,
41 | * row_number() over(partition by gender order by score desc) as rn
42 | * FROM t
43 | * )
44 | * where rn<=2
45 | *
46 | *
47 | * -- 如果有一种聚合函数,能在分组聚合的模式中,对每组数据输出多行多列聚合结果
48 | * SELECT
49 | * gender,
50 | * top2(score)
51 | * from t
52 | * group by gender
53 | *
54 | * male,88
55 | * male,99
56 | * female,92
57 | * female,86
58 | **/
59 | public class Demo24_TableAggregateFunction {
60 |
61 | public static void main(String[] args) {
62 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
63 | Table table = tenv.fromValues(DataTypes.ROW(
64 | DataTypes.FIELD("id", DataTypes.INT()),
65 | DataTypes.FIELD("gender", DataTypes.STRING()),
66 | DataTypes.FIELD("score", DataTypes.DOUBLE())),
67 | Row.of(1, "male", 67),
68 | Row.of(2, "male", 88),
69 | Row.of(3, "male", 98),
70 | Row.of(4, "female", 99),
71 | Row.of(5, "female", 84),
72 | Row.of(6, "female", 89)
73 | );
74 | tenv.createTemporaryView("t", table);
75 |
76 | // 用一个聚合函数直接求出每种性别中最高的两个成绩
77 | table
78 | .groupBy($("gender"))
79 | .flatAggregate(call(MyTop2.class, $("score")))
80 | .select($("gender"), $("score_top"), $("rank_no"))
81 | .execute().print();
82 |
83 |
84 | }
85 |
86 | public static class MyAccumulator {
87 |
88 | public double first;
89 | public double second;
90 |
91 | }
92 |
93 | @FunctionHint(output = @DataTypeHint("ROW"))
94 | public static class MyTop2 extends TableAggregateFunction {
95 |
96 | @Override
97 | public MyAccumulator createAccumulator() {
98 |
99 | MyAccumulator acc = new MyAccumulator();
100 | acc.first = Double.MIN_VALUE;
101 | acc.second = Double.MIN_VALUE;
102 |
103 | return acc;
104 | }
105 |
106 |
107 | /**
108 | * 累加更新逻辑
109 | *
110 | * @param acc
111 | * @param value
112 | */
113 | public void accumulate(MyAccumulator acc, Double score) {
114 | if (score > acc.first) {
115 | acc.second = acc.first;
116 | acc.first = score;
117 | } else if (score > acc.second) {
118 | acc.second = score;
119 | }
120 | }
121 |
122 | public void merge(MyAccumulator acc, Iterable it) {
123 | for (MyAccumulator otherAcc : it) {
124 | accumulate(acc, otherAcc.first);
125 | accumulate(acc, otherAcc.second);
126 | }
127 | }
128 |
129 | /**
130 | * 输出结果: 可以输出多行,多列
131 | *
132 | * @param acc
133 | * @param out
134 | */
135 | public void emitValue(MyAccumulator acc, Collector out) {
136 | if (acc.first != Double.MIN_VALUE) {
137 | out.collect(Row.of(acc.first, 1));
138 | }
139 | if (acc.second != Double.MIN_VALUE) {
140 | out.collect(Row.of(acc.second, 2));
141 | }
142 | }
143 | }
144 |
145 |
146 | }
147 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo25_MetricDemos.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.accumulators.LongCounter;
4 | import org.apache.flink.configuration.Configuration;
5 | import org.apache.flink.metrics.Gauge;
6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
8 | import org.apache.flink.streaming.api.functions.ProcessFunction;
9 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
10 | import org.apache.flink.util.Collector;
11 |
12 | public class Demo25_MetricDemos {
13 |
14 | public static void main(String[] args) throws Exception {
15 |
16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
17 |
18 | DataStreamSource ds = env.socketTextStream("doitedu", 9999);
19 |
20 | ds.process(new ProcessFunction() {
21 | LongCounter longCounter;
22 |
23 | MyGuage gauge ;
24 | @Override
25 | public void open(Configuration parameters) throws Exception {
26 |
27 | longCounter = getRuntimeContext().getLongCounter("doitedu-counter1");
28 |
29 |
30 | gauge = getRuntimeContext().getMetricGroup().gauge("doitedu-gauge", new MyGuage());
31 | }
32 |
33 | @Override
34 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception {
35 |
36 | // 业务逻辑之外的 metric代码,度量task所输入的数据条数
37 | longCounter.add(1);
38 |
39 | gauge.add(1);
40 |
41 |
42 | out.collect(value.toUpperCase());
43 | }
44 | }).print();
45 |
46 | env.execute();
47 |
48 |
49 | }
50 |
51 |
52 | public static class MyGuage implements Gauge{
53 |
54 | int recordCount = 0;
55 |
56 | public void add(int i){
57 | recordCount += i;
58 | }
59 |
60 | @Override
61 | public Integer getValue() {
62 | return recordCount;
63 | }
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo2_TableApi.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
5 | import org.apache.flink.table.api.*;
6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
7 |
8 | import static org.apache.flink.table.api.Expressions.$;
9 |
10 | public class Demo2_TableApi {
11 |
12 | public static void main(String[] args) {
13 |
14 | // 纯粹表环境
15 | // TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
16 |
17 | // 混合环境创建
18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
20 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
21 |
22 |
23 | // 建表
24 | Table table = tableEnv.from(TableDescriptor
25 | .forConnector("kafka") // 指定连接器
26 | .schema(Schema.newBuilder() // 指定表结构
27 | .column("id", DataTypes.INT())
28 | .column("name", DataTypes.STRING())
29 | .column("age", DataTypes.INT())
30 | .column("gender", DataTypes.STRING())
31 | .build())
32 | .format("json") // 指定数据源的数据格式
33 | .option("topic", "doit30-3") // 连接器及format格式的相关参数
34 | .option("properties.bootstrap.servers", "doit01:9092")
35 | .option("properties.group.id", "g2")
36 | .option("scan.startup.mode", "earliest-offset")
37 | .option("json.fail-on-missing-field", "false")
38 | .option("json.ignore-parse-errors", "true")
39 | .build());
40 |
41 |
42 | // 查询
43 | Table table2 = table.groupBy($("gender"))
44 | .select($("gender"),$("age").avg().as("avg_age"));
45 |
46 |
47 | /**
48 | * 将一个已创建好的 table对象,注册成sql中的视图名
49 | */
50 | tableEnv.createTemporaryView("kafka_table",table);
51 | // 然后就可以写sql语句来进行查询了
52 | tableEnv.executeSql("select gender,avg(age) as avg_age from kafka_table group by gender").print();
53 |
54 |
55 |
56 |
57 | // 输出
58 | table2.execute().print();
59 |
60 |
61 | }
62 |
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo4_SqlTableCreate.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.api.common.RuntimeExecutionMode;
4 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
6 | import org.apache.flink.connector.kafka.source.KafkaSource;
7 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.*;
12 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
13 | import org.apache.kafka.clients.consumer.OffsetResetStrategy;
14 |
15 | /**
16 | * 带sql表名的 表创建
17 | * 各种方式
18 | */
19 | /**
20 | * @Author: deep as the sea
21 | * @Site: 多易教育
22 | * @QQ: 657270652
23 | * @Date: 2022/6/12
24 | * @Desc: 学大数据,到多易教育
25 | * 表创建方式示例: 带sql表名称的
26 | **/
27 | public class Demo4_SqlTableCreate {
28 |
29 | public static void main(String[] args) {
30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
32 |
33 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
34 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
35 |
36 | /**
37 | * 一、 通过构建一个 TableDescriptor 来创建一个 “有名” 表(sql表)
38 | */
39 | tenv.createTable("table_a", // 表名
40 | TableDescriptor.forConnector("filesystem")
41 | .schema(Schema.newBuilder()
42 | .column("id", DataTypes.INT())
43 | .column("name", DataTypes.STRING())
44 | .column("age", DataTypes.INT())
45 | .column("gender", DataTypes.STRING())
46 | .build())
47 | .format("csv")
48 | .option("path", "data/sqldemo/a.txt")
49 | .option("csv.ignore-parse-errors", "true")
50 | .build());
51 |
52 |
53 | tenv.executeSql("select * from table_a").print();
54 | System.exit(1);
55 | tenv.executeSql("select gender,max(age) as max_age from table_a group by gender")/*.print()*/;
56 |
57 |
58 | /**
59 | * 二、 从一个dataStream 上创建“有名”的 视图
60 | */
61 | DataStreamSource stream1 = env.socketTextStream("doit01", 9999);
62 | SingleOutputStreamOperator javaBeanStream = stream1.map(s -> {
63 | String[] split = s.split(",");
64 | return new Demo3_TableObjectCreate.Person(Integer.parseInt(split[0]), split[1], Integer.parseInt(split[2]), split[3]);
65 | });
66 | tenv.createTemporaryView("t_person", javaBeanStream);
67 | tenv.executeSql("select gender,max(age) as max_age from t_person group by gender")/*.print()*/;
68 |
69 |
70 | /**
71 | * 三、 从一个已存在Table对象,得到一个 “有名”的视图
72 | */
73 | Table table_a = tenv.from("table_a");
74 | tenv.createTemporaryView("table_x",table_a);
75 | tenv.executeSql("select * from table_x").print();
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo6_Exercise.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 |
4 | import org.apache.flink.table.api.EnvironmentSettings;
5 | import org.apache.flink.table.api.TableEnvironment;
6 | import org.apache.flink.table.api.TableResult;
7 |
8 | /**
9 | *
10 |
11 | *
12 | */
13 | /**
14 | * @Author: deep as the sea
15 | * @Site: 多易教育
16 | * @QQ: 657270652
17 | * @Date: 2022/6/12
18 | * @Desc: 学大数据,到多易教育
19 | * >>>>> 练习题需求 >>>>>>>
20 | * 基本: kafka中有如下数据:
21 | * {"id":1,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
22 | *
23 | * 高级:kafka中有如下数据:
24 | * {"id":1,"name":{"formal":"zs","nick":"tiedan"},"age":18,"gender":"male"}
25 | *
26 | * 现在需要用flinkSql来对上述数据进行查询统计:
27 | * 截止到当前,每个昵称,都有多少个用户
28 | * 截止到当前,每个性别,年龄最大值
29 | **/
30 | public class Demo6_Exercise {
31 | public static void main(String[] args) {
32 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
33 |
34 | // 建表(数据源表)
35 | tenv.executeSql(
36 | "create table t_person "
37 | + " ( "
38 | + " id int, "
39 | + " name string, "
40 | + " nick string, "
41 | + " age int, "
42 | + " gender string "
43 | + " ) "
44 | + " WITH ( "
45 | + " 'connector' = 'kafka', "
46 | + " 'topic' = 'doit30-4', "
47 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
48 | + " 'properties.group.id' = 'g1', "
49 | + " 'scan.startup.mode' = 'earliest-offset', "
50 | + " 'format' = 'json', "
51 | + " 'json.fail-on-missing-field' = 'false', "
52 | + " 'json.ignore-parse-errors' = 'true' "
53 | + " ) "
54 | );
55 |
56 |
57 | // 建表(目标表)
58 | // kafka 连接器,不能接受 UPDATE 修正模式的数据,只能接受INSERT模式的数据
59 | // 而我们的查询语句产生的结果,存在UPDATE模式,就需要另一种 连接器表(upsert-kafka)来接收
60 | tenv.executeSql(
61 | "create table t_nick_cnt "
62 | + " ( "
63 | + " nick string primary key not enforced, "
64 | + " user_cnt bigint "
65 | + " ) "
66 | + " WITH ( "
67 | + " 'connector' = 'upsert-kafka', "
68 | + " 'topic' = 'doit30-nick', "
69 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
70 | + " 'key.format' = 'json' , "
71 | + " 'value.format' = 'json' "
72 | + " ) "
73 | );
74 |
75 |
76 | // 查询 并 打印
77 | //TableResult tableResult = tenv.executeSql("select nick,count(distinct id) as user_cnt from t_person group by nick");
78 | tenv.executeSql(
79 | "insert into t_nick_cnt " +
80 | "select nick,count(distinct id) as user_cnt from t_person group by nick");
81 |
82 | }
83 |
84 | }
85 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail1_Sql.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.api.EnvironmentSettings;
4 | import org.apache.flink.table.api.TableEnvironment;
5 |
6 | /**
7 | * @Author: deep as the sea
8 | * @Site: 多易教育
9 | * @QQ: 657270652
10 | * @Date: 2022/6/11
11 | * @Desc: schema定义详细示例 (sql DDL语句定义表结构)
12 | **/
13 | public class Demo7_ColumnDetail1_Sql {
14 | public static void main(String[] args) {
15 |
16 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
17 |
18 | // 建表(数据源表)
19 | // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
20 | tenv.executeSql(
21 | "create table t_person "
22 | + " ( "
23 | + " id int , " // -- 物理字段
24 | + " name string, " // -- 物理字段
25 | + " nick string, "
26 | + " age int, "
27 | + " gender string , "
28 | + " guid as id, " // -- 表达式字段(逻辑字段)
29 | + " big_age as age + 10 , " // -- 表达式字段(逻辑字段)
30 | + " offs bigint metadata from 'offset' , " // -- 元数据字段
31 | + " ts TIMESTAMP_LTZ(3) metadata from 'timestamp', " // -- 元数据字段
32 | /*+ " PRIMARY KEY(id,name) NOT ENFORCED "*/ // -- 主键约束
33 | + " ) "
34 | + " WITH ( "
35 | + " 'connector' = 'kafka', "
36 | + " 'topic' = 'doit30-4', "
37 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
38 | + " 'properties.group.id' = 'g1', "
39 | + " 'scan.startup.mode' = 'earliest-offset', "
40 | + " 'format' = 'json', "
41 | + " 'json.fail-on-missing-field' = 'false', "
42 | + " 'json.ignore-parse-errors' = 'true' "
43 | + " ) "
44 | );
45 |
46 | tenv.executeSql("desc t_person").print();
47 | tenv.executeSql("select * from t_person where id>2").print();
48 |
49 |
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail2_TableApi.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.table.api.*;
4 |
5 | import static org.apache.flink.table.api.Expressions.$;
6 |
7 | /**
8 | * @Author: deep as the sea
9 | * @Site: 多易教育
10 | * @QQ: 657270652
11 | * @Date: 2022/6/11
12 | * @Desc: schema定义详细示例(tableApi方式定义表结构)
13 | **/
14 | public class Demo7_ColumnDetail2_TableApi {
15 | public static void main(String[] args) {
16 |
17 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
18 |
19 | // 建表(数据源表)
20 | // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
21 | tenv.createTable("t_person",
22 | TableDescriptor
23 | .forConnector("kafka")
24 | .schema(Schema.newBuilder()
25 | .column("id", DataTypes.INT()) // column是声明物理字段到表结构中来
26 | .column("name", DataTypes.STRING()) // column是声明物理字段到表结构中来
27 | .column("nick", DataTypes.STRING()) // column是声明物理字段到表结构中来
28 | .column("age", DataTypes.INT()) // column是声明物理字段到表结构中来
29 | .column("gender", DataTypes.STRING()) // column是声明物理字段到表结构中来
30 | .columnByExpression("guid","id") // 声明表达式字段
31 | /*.columnByExpression("big_age",$("age").plus(10))*/ // 声明表达式字段
32 | .columnByExpression("big_age","age + 10") // 声明表达式字段
33 | // isVirtual 是表示: 当这个表被sink表时,该字段是否出现在schema中
34 | .columnByMetadata("offs",DataTypes.BIGINT(),"offset",true) // 声明元数据字段
35 | .columnByMetadata("ts",DataTypes.TIMESTAMP_LTZ(3),"timestamp",true) // 声明元数据字段
36 | /*.primaryKey("id","name")*/
37 | .build())
38 | .format("json")
39 | .option("topic","doit30-4")
40 | .option("properties.bootstrap.servers","doitedu:9092")
41 | .option("properties.group.id","g1")
42 | .option("scan.startup.mode","earliest-offset")
43 | .option("json.fail-on-missing-field","false")
44 | .option("json.ignore-parse-errors","true")
45 | .build()
46 | );
47 |
48 | tenv.executeSql("select * from t_person").print();
49 |
50 |
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo8_CsvFormat.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
4 | import org.apache.flink.table.api.DataTypes;
5 | import org.apache.flink.table.api.EnvironmentSettings;
6 | import org.apache.flink.table.api.Schema;
7 | import org.apache.flink.table.api.TableDescriptor;
8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
9 |
10 | /**
11 | * @Author: deep as the sea
12 | * @Site: 多易教育
13 | * @QQ: 657270652
14 | * @Date: 2022/6/12
15 | * @Desc: 学大数据,到多易教育
16 | * csv format详解
17 | **/
18 | public class Demo8_CsvFormat {
19 |
20 | public static void main(String[] args) {
21 |
22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
23 |
24 | EnvironmentSettings settings = EnvironmentSettings.inBatchMode();
25 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
26 |
27 |
28 | tenv.executeSql(
29 | "create table t_csv( "
30 | + " id int, "
31 | + " name string, "
32 | + " age string "
33 | + ") with ( "
34 | + " 'connector' = 'filesystem', "
35 | + " 'path' = 'data/csv/', "
36 | + " 'format'='csv', "
37 | + " 'csv.disable-quote-character' = 'false', "
38 | + " 'csv.quote-character' = '|', "
39 | + " 'csv.ignore-parse-errors' = 'true' , "
40 | + " 'csv.null-literal' = '\\N' , "
41 | + " 'csv.allow-comments' = 'true' "
42 | + ") "
43 | );
44 |
45 | tenv.executeSql("desc t_csv").print();
46 | tenv.executeSql("select * from t_csv").print();
47 |
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
4 | import org.apache.flink.table.api.DataTypes;
5 | import org.apache.flink.table.api.EnvironmentSettings;
6 | import org.apache.flink.table.api.Schema;
7 | import org.apache.flink.table.api.TableDescriptor;
8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
9 |
10 | /**
11 | * @Author: deep as the sea
12 | * @Site: 多易教育
13 | * @QQ: 657270652
14 | * @Date: 2022/6/12
15 | * @Desc: 学大数据,到多易教育
16 | * watermark 在DDL中的定义示例代码
17 | *
18 | * 测试数据:
19 | * {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"}
20 | * {"guid":1,"eventId":"e03","eventTime":1655017434000,"pageId":"p001"}
21 | * {"guid":1,"eventId":"e04","eventTime":1655017435000,"pageId":"p001"}
22 | * {"guid":1,"eventId":"e05","eventTime":1655017436000,"pageId":"p001"}
23 | * {"guid":1,"eventId":"e06","eventTime":1655017437000,"pageId":"p001"}
24 | * {"guid":1,"eventId":"e07","eventTime":1655017438000,"pageId":"p001"}
25 | * {"guid":1,"eventId":"e08","eventTime":1655017439000,"pageId":"p001"}
26 | *
27 | *
28 | *
29 | **/
30 | public class Demo9_EventTimeAndWatermark {
31 |
32 | public static void main(String[] args) {
33 |
34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 |
36 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
37 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
38 |
39 |
40 | /**
41 | * 只有 TIMESTAMP 或 TIMESTAMP_LTZ 类型的字段可以被声明为rowtime(事件时间属性)
42 | */
43 | tenv.executeSql(
44 | " create table t_events( "
45 | + " guid int, "
46 | + " eventId string, "
47 | /*+ " eventTime timestamp(3), "*/
48 | + " eventTime bigint, "
49 | + " pageId string, "
50 | + " pt AS proctime(), " // 利用一个表达式字段,来声明 processing time属性
51 | + " rt as to_timestamp_ltz(eventTime,3), "
52 | + " watermark for rt as rt - interval '0.001' second " // 用watermark for xxx,来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略
53 | + " ) "
54 | + " with ( "
55 | + " 'connector' = 'kafka', "
56 | + " 'topic' = 'doit30-events2', "
57 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
58 | + " 'properties.group.id' = 'g1', "
59 | + " 'scan.startup.mode' = 'earliest-offset', "
60 | + " 'format' = 'json', "
61 | + " 'json.fail-on-missing-field' = 'false', "
62 | + " 'json.ignore-parse-errors' = 'true' "
63 | + " ) "
64 | );
65 |
66 | tenv.executeSql("desc t_events")/*.print()*/;
67 | tenv.executeSql("select guid,eventId,eventTime,pageId,pt,rt,CURRENT_WATERMARK(rt) as wm from t_events").print();
68 |
69 |
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark3.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.demos;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import lombok.AllArgsConstructor;
5 | import lombok.Data;
6 | import lombok.NoArgsConstructor;
7 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
8 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.ProcessFunction;
14 | import org.apache.flink.table.api.DataTypes;
15 | import org.apache.flink.table.api.EnvironmentSettings;
16 | import org.apache.flink.table.api.Schema;
17 | import org.apache.flink.table.api.Table;
18 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
19 | import org.apache.flink.types.Row;
20 | import org.apache.flink.util.Collector;
21 |
22 | /**
23 | * @Author: deep as the sea
24 | * @Site: 多易教育
25 | * @QQ: 657270652
26 | * @Date: 2022/6/12
27 | * @Desc: 学大数据,到多易教育
28 | * 流 ===> 表 ,过程中如何传承 事件时间 和 watermark
29 | **/
30 | public class Demo9_EventTimeAndWatermark3 {
31 |
32 | public static void main(String[] args) throws Exception {
33 |
34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 | env.setParallelism(1);
36 |
37 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
38 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
39 |
40 |
41 | tenv.executeSql(
42 | " create table t_events( "
43 | + " guid int, "
44 | + " eventId string, "
45 | + " eventTime bigint, "
46 | + " pageId string, "
47 | /*+ " pt AS proctime(), "*/ // 利用一个表达式字段,来声明 processing time属性
48 | + " rt as to_timestamp_ltz(eventTime,3), "
49 | + " watermark for rt as rt - interval '1' second " // 用watermark for xxx,来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略
50 | + " ) "
51 | + " with ( "
52 | + " 'connector' = 'kafka', "
53 | + " 'topic' = 'doit30-events2', "
54 | + " 'properties.bootstrap.servers' = 'doitedu:9092', "
55 | + " 'properties.group.id' = 'g1', "
56 | + " 'scan.startup.mode' = 'earliest-offset', "
57 | + " 'format' = 'json', "
58 | + " 'json.fail-on-missing-field' = 'false', "
59 | + " 'json.ignore-parse-errors' = 'true' "
60 | + " ) "
61 | );
62 |
63 |
64 | // tenv.executeSql("select guid,eventId,rt,current_watermark(rt) as wm from t_events").print();
65 |
66 |
67 |
68 | DataStream ds = tenv.toDataStream(tenv.from("t_events"));
69 |
70 | ds.process(new ProcessFunction() {
71 | @Override
72 | public void processElement(Row value, ProcessFunction.Context ctx, Collector out) throws Exception {
73 | out.collect(value + " => " + ctx.timerService().currentWatermark());
74 | }
75 | }).print();
76 |
77 |
78 | env.execute();
79 |
80 |
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/fuxi/EventBean.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.fuxi;
2 |
3 | import lombok.AllArgsConstructor;
4 | import lombok.Data;
5 | import lombok.NoArgsConstructor;
6 |
7 | import java.util.Map;
8 |
9 | @Data
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | public class EventBean {
13 |
14 | private long guid;
15 | private String sessionId;
16 | private String eventId;
17 | private long eventTs;
18 | private Map properties;
19 | }
20 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/fuxi/TimerDemo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flinksql.fuxi;
2 |
3 |
4 | import com.alibaba.fastjson.JSON;
5 | import org.apache.flink.api.common.state.ValueState;
6 | import org.apache.flink.api.common.state.ValueStateDescriptor;
7 | import org.apache.flink.api.java.functions.KeySelector;
8 | import org.apache.flink.configuration.Configuration;
9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
13 | import org.apache.flink.streaming.api.functions.ProcessFunction;
14 | import org.apache.flink.util.Collector;
15 |
16 | /**
17 | * 需求场景:
18 | * 实时监测 用户的行为事件流,如果发现有用户下单事件,则检查下单后30分钟内,该用户是否有订单支付
19 | * 如果没有支付,则输出一条催支付的信息
20 | *
21 | * 关键技术: 定时器功能(定时器就是一个闹钟)
22 | */
23 | public class TimerDemo {
24 |
25 | public static void main(String[] args) throws Exception {
26 |
27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 |
29 | DataStreamSource stream = env.socketTextStream("doitedu", 9999);
30 |
31 | SingleOutputStreamOperator stream2 = stream.map(s -> JSON.parseObject(s, EventBean.class));
32 |
33 | stream2
34 | .keyBy(new KeySelector() {
35 | @Override
36 | public Long getKey(EventBean value) throws Exception {
37 | return value.getGuid();
38 | }
39 | })
40 | .process(new KeyedProcessFunction() {
41 |
42 | ValueState timerTimeState;
43 |
44 | @Override
45 | public void open(Configuration parameters) throws Exception {
46 |
47 | timerTimeState = getRuntimeContext().getState(new ValueStateDescriptor("timerTimeState", Long.class));
48 | }
49 |
50 | @Override
51 | public void processElement(EventBean eventBean, KeyedProcessFunction.Context ctx, Collector out) throws Exception {
52 |
53 | if (eventBean.getEventId().equals("submitOrder")) {
54 | // 注册一个定时器,所定的时间在 : 当前处理时间+30S
55 | long timerTime = ctx.timerService().currentProcessingTime() + 30 * 1000L;
56 | ctx.timerService().registerProcessingTimeTimer(timerTime);
57 | // 将定时器时间,放入状态管理器中
58 | timerTimeState.update(timerTime);
59 |
60 | out.collect("检测到用户:" + ctx.getCurrentKey() + ",下单了,注册了一个定时器: " + timerTimeState.value());
61 | }
62 |
63 | if (eventBean.getEventId().equals("payOrder")) {
64 | ctx.timerService().deleteProcessingTimeTimer(timerTimeState.value());
65 | out.collect("检测到用户:" + ctx.getCurrentKey() + ",在下单后的30s内已经支付,取消定时器 " + timerTimeState.value());
66 | }
67 |
68 | }
69 |
70 | /**
71 | * 定期器被触发时,会调用的方法
72 | */
73 | @Override
74 | public void onTimer(long timestamp, KeyedProcessFunction.OnTimerContext ctx, Collector out) throws Exception {
75 | Long guid = ctx.getCurrentKey();
76 | out.collect("用户: " + guid + ", 您的订单快超时了,赶紧支付!");
77 | }
78 | })
79 | .print();
80 |
81 |
82 | env.execute();
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/org/apache/flink/api/common/eventtime/BoundedOutOfOrdernessWatermarks.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package org.apache.flink.api.common.eventtime;
20 |
21 | import org.apache.flink.annotation.Public;
22 |
23 | import java.time.Duration;
24 |
25 | import static org.apache.flink.util.Preconditions.checkArgument;
26 | import static org.apache.flink.util.Preconditions.checkNotNull;
27 |
28 | /**
29 | * A WatermarkGenerator for situations where records are out of order, but you can place an upper
30 | * bound on how far the events are out of order. An out-of-order bound B means that once an event
31 | * with timestamp T was encountered, no events older than {@code T - B} will follow any more.
32 | *
33 | * The watermarks are generated periodically. The delay introduced by this watermark strategy is
34 | * the periodic interval length, plus the out-of-orderness bound.
35 | */
36 | @Public
37 | public class BoundedOutOfOrdernessWatermarks implements WatermarkGenerator {
38 |
39 | /** The maximum timestamp encountered so far. */
40 | private long maxTimestamp;
41 |
42 | /** The maximum out-of-orderness that this watermark generator assumes. */
43 | private final long outOfOrdernessMillis;
44 |
45 | /**
46 | * Creates a new watermark generator with the given out-of-orderness bound.
47 | *
48 | * @param maxOutOfOrderness The bound for the out-of-orderness of the event timestamps.
49 | */
50 | public BoundedOutOfOrdernessWatermarks(Duration maxOutOfOrderness) {
51 | checkNotNull(maxOutOfOrderness, "maxOutOfOrderness");
52 | checkArgument(!maxOutOfOrderness.isNegative(), "maxOutOfOrderness cannot be negative");
53 |
54 | this.outOfOrdernessMillis = maxOutOfOrderness.toMillis();
55 |
56 | // start so that our lowest watermark would be Long.MIN_VALUE.
57 | this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
58 | }
59 |
60 | // ------------------------------------------------------------------------
61 |
62 | @Override
63 | public void onEvent(T event, long eventTimestamp, WatermarkOutput output) {
64 | maxTimestamp = Math.max(maxTimestamp, eventTimestamp);
65 | }
66 |
67 | @Override
68 | public void onPeriodicEmit(WatermarkOutput output) {
69 | // TODO
70 | // System.out.printf("源头周期输出watermark:%d \n", maxTimestamp - outOfOrdernessMillis - 1);
71 | output.emitWatermark(new Watermark(maxTimestamp - outOfOrdernessMillis - 1));
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/flink_course/src/main/java/tmp/utils/SqlHolder.java:
--------------------------------------------------------------------------------
1 | package tmp.utils;
2 |
3 | public class SqlHolder {
4 |
5 | }
6 |
--------------------------------------------------------------------------------
/flink_course/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | ################################################################################
2 | # Licensed to the Apache Software Foundation (ASF) under one
3 | # or more contributor license agreements. See the NOTICE file
4 | # distributed with this work for additional information
5 | # regarding copyright ownership. The ASF licenses this file
6 | # to you under the Apache License, Version 2.0 (the
7 | # "License"); you may not use this file except in compliance
8 | # with the License. You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 |
19 | log4j.rootLogger = INFO, console, debugFile, errorFile
20 |
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n
24 |
25 | log4j.appender.debugFile = org.apache.log4j.DailyRollingFileAppender
26 | log4j.appender.debugFile.File = src/logs/debug.log
27 | log4j.appender.debugFile.Append = true
28 | log4j.appender.debugFile.Threshold = debug
29 | log4j.appender.debugFile.layout = org.apache.log4j.PatternLayout
30 | log4j.appender.debugFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
31 |
32 | log4j.appender.errorFile = org.apache.log4j.DailyRollingFileAppender
33 | log4j.appender.errorFile.File = src/logs/error.log
34 | log4j.appender.errorFile.Append = true
35 | log4j.appender.errorFile.Threshold = error
36 | log4j.appender.errorFile.layout = org.apache.log4j.PatternLayout
37 | log4j.appender.errorFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n
38 |
39 |
40 |
--------------------------------------------------------------------------------
/flink_course/src/main/resources/prts.avsc.bak:
--------------------------------------------------------------------------------
1 | {"namespace": "cn.doitedu.flink.avro.schema",
2 | "type": "record",
3 | "name": "AvroEventLog",
4 | "fields": [
5 | {"name": "guid", "type": "long"},
6 | {"name": "sessionId", "type": "string"},
7 | {"name": "eventId", "type": "string"},
8 | {"name": "timeStamp", "type": "long"},
9 | {"name": "eventInfo", "type": { "type":"map","values": "string"} }
10 | ]
11 | }
--------------------------------------------------------------------------------
/kafka_course/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | doit30_flink
7 | cn.doitedu
8 | 1.0
9 |
10 | 4.0.0
11 |
12 | kafka_course
13 |
14 |
15 | 8
16 | 8
17 |
18 |
19 |
20 |
21 |
22 | org.apache.kafka
23 | kafka-clients
24 | ${kafka.version}
25 |
26 |
27 |
28 |
29 |
30 |
31 | org.apache.commons
32 | commons-lang3
33 | 3.12.0
34 |
35 |
36 |
37 | com.google.guava
38 | guava
39 | 30.0-jre
40 |
41 |
42 |
43 |
44 | org.roaringbitmap
45 | RoaringBitmap
46 | 0.9.25
47 |
48 |
49 |
50 | mysql
51 | mysql-connector-java
52 | 8.0.27
53 |
54 |
55 |
56 | org.apache.flink
57 | flink-connector-files
58 | 1.14.4
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/AdminClientDemo.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.kafka;
2 |
3 | import org.apache.kafka.clients.admin.*;
4 | import org.apache.kafka.common.KafkaFuture;
5 | import org.apache.kafka.common.Node;
6 | import org.apache.kafka.common.TopicPartitionInfo;
7 |
8 | import java.util.*;
9 | import java.util.concurrent.ExecutionException;
10 |
11 | public class AdminClientDemo {
12 | public static void main(String[] args) throws ExecutionException, InterruptedException {
13 |
14 | Properties props = new Properties();
15 | props.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092,doit02:9092");
16 |
17 | // 管理客户端
18 | AdminClient adminClient = KafkaAdminClient.create(props);
19 |
20 | // 创建一个topic
21 | /*NewTopic zzuzz = new NewTopic("zzuzz", 3, (short) 2);
22 | adminClient.createTopics(Arrays.asList(zzuzz));*/
23 |
24 | // 查看一个topic的详细信息
25 | DescribeTopicsResult topicDescriptions = adminClient.describeTopics(Arrays.asList("zzuzz"));
26 |
27 | KafkaFuture