├── .gitignore
├── README.md
├── conf
    └── hiveconf
    │   └── hive-site.xml
├── data
    ├── csv
    │   └── a.csv
    ├── json
    │   ├── qiantao
    │   │   └── a.txt
    │   ├── qiantao2
    │   │   └── a.txt
    │   └── qiantao3
    │   │   └── a.txt
    └── sqldemo
    │   └── a.txt
├── datagen
    ├── pom.xml
    └── src
    │   └── main
    │       └── java
    │           └── cn
    │               └── doitedu
    │                   ├── ActionLogAutoGen.java
    │                   ├── ActionLogGenOne.java
    │                   └── module
    │                       ├── Collector.java
    │                       ├── CollectorConsoleImpl.java
    │                       ├── CollectorKafkaImpl.java
    │                       ├── LogBean.java
    │                       ├── LogBeanWrapper.java
    │                       ├── LogRunnable.java
    │                       ├── UserProfileDataGen.java
    │                       └── UserUtils.java
├── flink_course
    ├── data
    │   ├── transformation_input
    │   │   └── userinfo.txt
    │   └── wc
    │   │   └── input
    │   │       └── wc.txt
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   ├── cn
    │       │   │   └── doitedu
    │       │   │   │   ├── flink
    │       │   │   │       ├── TaskTest.java
    │       │   │   │       ├── TestWindow.java
    │       │   │   │       ├── avro
    │       │   │   │       │   └── schema
    │       │   │   │       │   │   ├── AvroEventLog.java
    │       │   │   │       │   │   └── AvroEventLogBean.java
    │       │   │   │       ├── exercise
    │       │   │   │       │   ├── EventCount.java
    │       │   │   │       │   ├── EventUserInfo.java
    │       │   │   │       │   ├── Exercise_1.java
    │       │   │   │       │   └── UserInfo.java
    │       │   │   │       ├── java
    │       │   │   │       │   └── demos
    │       │   │   │       │   │   ├── EventBean2.java
    │       │   │   │       │   │   ├── EventLog.java
    │       │   │   │       │   │   ├── ParallelismDe.java
    │       │   │   │       │   │   ├── _01_StreamWordCount.java
    │       │   │   │       │   │   ├── _02_BatchWordCount.java
    │       │   │   │       │   │   ├── _03_StreamBatchWordCount.java
    │       │   │   │       │   │   ├── _04_WordCount_LambdaTest.java
    │       │   │   │       │   │   ├── _05_SourceOperator_Demos.java
    │       │   │   │       │   │   ├── _06_CustomSourceFunction.java
    │       │   │   │       │   │   ├── _07_Transformation_Demos.java
    │       │   │   │       │   │   ├── _08_SinkOperator_Demos.java
    │       │   │   │       │   │   ├── _09_StreamFileSinkOperator_Demo1.java
    │       │   │   │       │   │   ├── _09_StreamFileSinkOperator_Demo2.java
    │       │   │   │       │   │   ├── _09_StreamFileSinkOperator_Demo3.java
    │       │   │   │       │   │   ├── _10_KafkaSinkOperator_Demo1.java
    │       │   │   │       │   │   ├── _11_JdbcSinkOperator_Demo1.java
    │       │   │   │       │   │   ├── _12_RedisSinkOperator_Demo1.java
    │       │   │   │       │   │   ├── _13_SideOutput_Demo.java
    │       │   │   │       │   │   ├── _14_StreamConnect_Union_Demo.java
    │       │   │   │       │   │   ├── _15_StreamCoGroup_Join_Demo.java
    │       │   │   │       │   │   ├── _16_BroadCast_Demo.java
    │       │   │   │       │   │   ├── _17_ProcessFunctions_Demo.java
    │       │   │   │       │   │   ├── _18_ChannalSelector_Partitioner_Demo.java
    │       │   │   │       │   │   ├── _19_WaterMark_Api_Demo.java
    │       │   │   │       │   │   ├── _19_WaterMark_Api_Demo2.java
    │       │   │   │       │   │   ├── _20_Window_Api_Demo1.java
    │       │   │   │       │   │   ├── _21_Window_Api_Demo2.java
    │       │   │   │       │   │   ├── _21_Window_Api_Demo3.java
    │       │   │   │       │   │   ├── _21_Window_Api_Demo4.java
    │       │   │   │       │   │   ├── _22_StateBasic_Demo.java
    │       │   │   │       │   │   ├── _23_State_OperatorState_Demo.java
    │       │   │   │       │   │   ├── _24_State_KeyedState_Demo.java
    │       │   │   │       │   │   ├── _25_State_DataStructure_Demo.java
    │       │   │   │       │   │   ├── _26_State_TTL_Demo.java
    │       │   │   │       │   │   ├── _27_ToleranceConfig_Demo.java
    │       │   │   │       │   │   └── _28_ToleranceSideToSideTest.java
    │       │   │   │       ├── scala
    │       │   │   │       │   └── demos
    │       │   │   │       │   │   └── _01_入门程序WordCount.scala
    │       │   │   │       └── task
    │       │   │   │       │   ├── Mapper1.java
    │       │   │   │       │   ├── Mapper2.java
    │       │   │   │       │   ├── Task1.java
    │       │   │   │       │   ├── Task2.java
    │       │   │   │       │   ├── Task3.java
    │       │   │   │       │   └── TaskRunner.java
    │       │   │   │   └── flinksql
    │       │   │   │       ├── demos
    │       │   │   │           ├── Demo10_KafkaConnectorDetail.java
    │       │   │   │           ├── Demo11_UpsertKafkaConnectorTest.java
    │       │   │   │           ├── Demo11_UpsertKafkaConnectorTest2.java
    │       │   │   │           ├── Demo12_JdbcConnectorTest1.java
    │       │   │   │           ├── Demo12_JdbcConnectorTest2.java
    │       │   │   │           ├── Demo13_FileSystemConnectorTest.java
    │       │   │   │           ├── Demo14_MysqlCdcConnector.java
    │       │   │   │           ├── Demo14_StreamFromToTable.java
    │       │   │   │           ├── Demo16_TimeWindowDemo.java
    │       │   │   │           ├── Demo17_TimeWindowJoin.java
    │       │   │   │           ├── Demo18_IntervalJoin.java
    │       │   │   │           ├── Demo18_RegularJoin.java
    │       │   │   │           ├── Demo19_ArrayJoin.java
    │       │   │   │           ├── Demo19_LookupJoin.java
    │       │   │   │           ├── Demo1_TableSql.java
    │       │   │   │           ├── Demo20_Temporal_Join.java
    │       │   │   │           ├── Demo21_CustomScalarFunction.java
    │       │   │   │           ├── Demo22_CustomAggregateFunction.java
    │       │   │   │           ├── Demo23_TableFunction.java
    │       │   │   │           ├── Demo24_TableAggregateFunction.java
    │       │   │   │           ├── Demo24_TableAggregateFunction2.java
    │       │   │   │           ├── Demo25_MetricDemos.java
    │       │   │   │           ├── Demo2_TableApi.java
    │       │   │   │           ├── Demo3_TableObjectCreate.java
    │       │   │   │           ├── Demo4_SqlTableCreate.java
    │       │   │   │           ├── Demo5_CatalogDemo.java
    │       │   │   │           ├── Demo6_Exercise.java
    │       │   │   │           ├── Demo7_ColumnDetail1_Sql.java
    │       │   │   │           ├── Demo7_ColumnDetail2_TableApi.java
    │       │   │   │           ├── Demo8_CsvFormat.java
    │       │   │   │           ├── Demo8_JsonFormat.java
    │       │   │   │           ├── Demo9_EventTimeAndWatermark.java
    │       │   │   │           ├── Demo9_EventTimeAndWatermark2.java
    │       │   │   │           └── Demo9_EventTimeAndWatermark3.java
    │       │   │   │       └── fuxi
    │       │   │   │           ├── EventBean.java
    │       │   │   │           ├── Exercise.java
    │       │   │   │           ├── KeyedStateDemo.java
    │       │   │   │           └── TimerDemo.java
    │       │   ├── org
    │       │   │   └── apache
    │       │   │   │   └── flink
    │       │   │   │       ├── api
    │       │   │   │           └── common
    │       │   │   │           │   └── eventtime
    │       │   │   │           │       └── BoundedOutOfOrdernessWatermarks.java
    │       │   │   │       ├── runtime
    │       │   │   │           └── state
    │       │   │   │           │   └── ttl
    │       │   │   │           │       └── CXTtlIncrementalCleanup.java
    │       │   │   │       └── streaming
    │       │   │   │           ├── api
    │       │   │   │               └── operators
    │       │   │   │               │   └── AbstractStreamOperator.java
    │       │   │   │           └── runtime
    │       │   │   │               └── operators
    │       │   │   │                   ├── TimestampsAndWatermarksOperator.java
    │       │   │   │                   └── windowing
    │       │   │   │                       └── WindowOperator.java
    │       │   └── tmp
    │       │   │   ├── FlinkKafkaDemo.java
    │       │   │   ├── pojos
    │       │   │       ├── MysqlUser.java
    │       │   │       └── UserSlotGame.java
    │       │   │   ├── sqls.sql
    │       │   │   └── utils
    │       │   │       └── SqlHolder.java
    │       └── resources
    │       │   ├── log4j.properties
    │       │   └── prts.avsc.bak
    │   └── test
    │       └── java
    │           └── cn
    │               └── doitedu
    │                   └── flink
    │                       └── TestChangelog.java
├── kafka_course
    ├── pom.xml
    └── src
    │   ├── main
    │       ├── java
    │       │   └── cn
    │       │   │   └── doitedu
    │       │   │       └── kafka
    │       │   │           ├── AdminClientDemo.java
    │       │   │           ├── ConsumerDemo.java
    │       │   │           ├── ConsumerDemo2.java
    │       │   │           ├── ConsumerDemo3.java
    │       │   │           ├── Consumer实现ExactlyOnce手段1.java
    │       │   │           ├── Kafka编程练习.java
    │       │   │           ├── Kafka编程练习_消费者.java
    │       │   │           ├── Kafka编程练习_消费者_Bitmap.java
    │       │   │           ├── Kafka编程练习_消费者_判重.java
    │       │   │           ├── Kafka自身事务机制.java
    │       │   │           ├── MyPartitioner.java
    │       │   │           └── ProducerDemo.java
    │       └── resources
    │       │   ├── bitmap示意图.png
    │       │   └── consumer.properties
    │   └── test
    │       └── java
    │           └── RoaringBitmapTest.java
└── pom.xml


/.gitignore:
--------------------------------------------------------------------------------
1 | # Project exclude paths
2 | /kafka_course/target/
3 | # 项目排除路径
4 | /flink_course/target/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # doit30_flink
 2 | 
 3 | # 多易教育[涛哥] DOE30期 FLINK 课程配套源码
 4 | 
 5 | # 更多给力资料和课程，可入群，长期蹲守:
 6 | ①群：  1071917730（已满，不可加）  
 7 | ②群：  813383827（已满，不可加）  
 8 | ③群：  955021790（可加）  
 9 | ④群：  1108285618（可加）  
10 | 


--------------------------------------------------------------------------------
/conf/hiveconf/hive-site.xml:
--------------------------------------------------------------------------------
1 | <configuration>
2 |     <property>
3 |         <name>hive.metastore.uris</name>
4 |         <value>thrift://doitedu:9083</value>
5 |     </property>
6 | </configuration>


--------------------------------------------------------------------------------
/data/csv/a.csv:
--------------------------------------------------------------------------------
1 | |1|,|zs|,|18|
2 | # 哈哈哈哈
3 | |2|,|ls|,|20|
4 | |3|,|ww|,\N


--------------------------------------------------------------------------------
/data/json/qiantao/a.txt:
--------------------------------------------------------------------------------
1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1"}}
2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2"}}
3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3"}}


--------------------------------------------------------------------------------
/data/json/qiantao2/a.txt:
--------------------------------------------------------------------------------
1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1","height":180}}
2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2","height":170}}
3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3","height":160}}


--------------------------------------------------------------------------------
/data/json/qiantao3/a.txt:
--------------------------------------------------------------------------------
1 | {"id":1,"friends":[{"name":"a","info":{"addr":"bj","gender":"male"}},{"name":"b","info":{"addr":"sh","gender":"female"}}]}
2 | {"id":2,"friends":[{"name":"b","info":{"addr":"sh","gender":"male"}},{"name":"c","info":{"addr":"bj","gender":"female"}}]}


--------------------------------------------------------------------------------
/data/sqldemo/a.txt:
--------------------------------------------------------------------------------
1 | 1,zs,18,male
2 | 2,ls,28,fe,male
3 | 3,ww,38,male
4 | 


--------------------------------------------------------------------------------
/datagen/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>doit30_flink</artifactId>
 7 |         <groupId>cn.doitedu</groupId>
 8 |         <version>1.0</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>datagen</artifactId>
13 | 
14 |     <properties>
15 |         <maven.compiler.source>8</maven.compiler.source>
16 |         <maven.compiler.target>8</maven.compiler.target>
17 |     </properties>
18 | 
19 |     <dependencies>
20 |         <dependency>
21 |             <groupId>org.apache.kafka</groupId>
22 |             <artifactId>kafka-clients</artifactId>
23 |             <version>${kafka.version}</version>
24 |         </dependency>
25 | 
26 |         <dependency>
27 |             <groupId>commons-lang</groupId>
28 |             <artifactId>commons-lang</artifactId>
29 |             <version>2.6</version>
30 |         </dependency>
31 | 
32 |         <dependency>
33 |             <groupId>org.apache.commons</groupId>
34 |             <artifactId>commons-lang3</artifactId>
35 |             <version>3.12.0</version>
36 |         </dependency>
37 |     </dependencies>
38 | 
39 | </project>


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/ActionLogAutoGen.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu;
 2 | 
 3 | import cn.doitedu.module.*;
 4 | 
 5 | import java.util.ArrayList;
 6 | import java.util.HashMap;
 7 | import java.util.List;
 8 | 
 9 | /**
10 |  * @author 涛哥
11 |  * @nick_name "deep as the sea"
12 |  * @contact qq:657270652 wx:doit_edu
13 |  * @site www.doitedu.cn
14 |  * @date 2021-03-27
15 |  * @desc 行为日志生成模拟器（自动连续生成）
16 |  * <p>
17 |  * {
18 |  * "account": "Vz54E9Ya",
19 |  * "appId": "cn.doitedu.app1",
20 |  * "appVersion": "3.4",
21 |  * "carrier": "中国移动",
22 |  * "deviceId": "WEISLD0235S0934OL",
23 |  * "deviceType": "MI-6",
24 |  * "ip": "24.93.136.175",
25 |  * "latitude": 42.09287620431088,
26 |  * "longitude": 79.42106825764643,
27 |  * "netType": "WIFI",
28 |  * "osName": "android",
29 |  * "osVersion": "6.5",
30 |  * "releaseChannel": "豌豆荚",
31 |  * "resolution": "1024*768",
32 |  * "sessionId": "SE18329583458",
33 |  * "timeStamp": 1594534406220
34 |  * "eventId": "productView",
35 |  * "properties": {
36 |  * "pageId": "646",
37 |  * "productId": "157",
38 |  * "refType": "4",
39 |  * "refUrl": "805",
40 |  * "title": "爱得堡 男靴中高帮马丁靴秋冬雪地靴 H1878 复古黄 40码",
41 |  * "url": "https://item.jd.com/36506691363.html",
42 |  * "utm_campain": "4",
43 |  * "utm_loctype": "1",
44 |  * "utm_source": "10"
45 |  * }
46 |  * }
47 |  * <p>
48 |  * <p>
49 |  * kafka中要先创建好topic
50 |  * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --create --topic yinew_applog --partitions 2 --replication-factor 1 --zookeeper hdp01:2181,hdp02:2181,hdp03:2181
51 |  * <p>
52 |  * 创建完后，检查一下是否创建成功：
53 |  * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --list --zookeeper hdp01:2181
54 |  */
55 | public class ActionLogAutoGen {
56 |     public static void main(String[] args) throws Exception {
57 | 
58 |         // 加载历史用户
59 |         // String filePath = "data/users/hisu-1654943006977.txt";
60 |         // HashMap<String, LogBean> hisUsers = UserUtils.loadHisUsers(filePath);
61 | 
62 |         // 添加新用户
63 |         HashMap<String, LogBean> hisUsers = new HashMap<>();
64 |         UserUtils.addNewUsers(hisUsers, 1000, true);
65 | 
66 |         UserUtils.saveUsers(hisUsers);
67 | 
68 |         // 转成带状态用户数据
69 |         List<LogBeanWrapper> wrapperedUsers = UserUtils.userToWrapper(hisUsers);
70 | 
71 |         System.out.println("日活用户总数：" + wrapperedUsers.size() + "-------");
72 | 
73 |         // 多线程并行生成日志
74 |         // CollectorConsoleImpl collector = new CollectorConsoleImpl();
75 |         CollectorKafkaImpl collector = new CollectorKafkaImpl("doit-events");
76 |         genBatchToConsole(wrapperedUsers, 3,collector);
77 | 
78 | 
79 |     }
80 | 
81 |     private static void genBatchToConsole(List<LogBeanWrapper> wrapperedUsers, int threads , Collector collector) {
82 |         int partSize = wrapperedUsers.size() / threads;
83 | 
84 |         ArrayList<List<LogBeanWrapper>> partList = new ArrayList<>();
85 | 
86 |         for (int i = 0; i < threads; i++) {
87 |             List<LogBeanWrapper> userPart = new ArrayList<>();
88 | 
89 |             for (int j = i * partSize; j < (i != threads - 1 ? (i + 1) * partSize : wrapperedUsers.size()); j++) {
90 |                 userPart.add(wrapperedUsers.get(j));
91 |             }
92 |             new Thread(new LogRunnable(userPart,collector,10)).start();
93 |         }
94 |     }
95 | 
96 | }
97 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/ActionLogGenOne.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu;
 2 | 
 3 | import cn.doitedu.module.LogBean;
 4 | import com.alibaba.fastjson.JSON;
 5 | import org.apache.kafka.clients.producer.KafkaProducer;
 6 | import org.apache.kafka.clients.producer.ProducerRecord;
 7 | 
 8 | import java.util.HashMap;
 9 | import java.util.Map;
10 | import java.util.Properties;
11 | 
12 | /***
13 |  * @author hunter.d
14 |  * @qq 657270652
15 |  * @wx haitao-duan
16 |  * @date 2021/4/5
17 |  *
18 |  * 运行一次，生成一条行为日志
19 |  *
20 |  **/
21 | public class ActionLogGenOne {
22 |     public static void main(String[] args) {
23 |         Properties props = new Properties();
24 |         props.setProperty("bootstrap.servers", "hdp01:9092,hdp02:9092,hdp03:9092");
25 |         props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
26 |         props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
27 |         KafkaProducer<String, String> kafkaProducer = new KafkaProducer<>(props);
28 | 
29 |         LogBean logBean = new LogBean();
30 |         logBean.setDeviceId("000053");
31 |         logBean.setEventId("E");
32 |         Map<String, String> ps = new HashMap();
33 |         props.put("p1", "v1");
34 |         logBean.setProperties(ps);
35 |         logBean.setTimeStamp(System.currentTimeMillis());
36 | 
37 |         String log = JSON.toJSONString(logBean);
38 |         ProducerRecord<String, String> record = new ProducerRecord<>("zenniu_applog", log);
39 |         kafkaProducer.send(record);
40 |         kafkaProducer.flush();
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/Collector.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 | 
3 | public interface Collector {
4 |     public void collect(String logdata);
5 | }
6 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/CollectorConsoleImpl.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.module;
2 | 
3 | public class CollectorConsoleImpl implements Collector {
4 |     @Override
5 |     public void collect(String logdata) {
6 |         System.out.println(logdata);
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/CollectorKafkaImpl.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.module;
 2 | 
 3 | import org.apache.kafka.clients.producer.KafkaProducer;
 4 | import org.apache.kafka.clients.producer.ProducerRecord;
 5 | 
 6 | import java.util.Properties;
 7 | 
 8 | public class CollectorKafkaImpl implements Collector {
 9 | 
10 |     private KafkaProducer<Integer, String> kafkaProducer;
11 |     private String topicName;
12 | 
13 |     int messageSeq = 0;
14 | 
15 |     public CollectorKafkaImpl(String topicName){
16 |         Properties props = new Properties();
17 |         props.setProperty("bootstrap.servers", "doitedu:9092");
18 |         props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer");
19 |         props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
20 | 
21 | 
22 |         // 构造一个kafka生产者客户端
23 |         this.kafkaProducer = new KafkaProducer<>(props);
24 | 
25 |         this.topicName = topicName;
26 |     }
27 |     @Override
28 |     public void collect(String logdata) {
29 |         this.messageSeq ++;
30 | 
31 |         ProducerRecord<Integer, String> record = new ProducerRecord<>(topicName, this.messageSeq, logdata);
32 |         kafkaProducer.send(record);
33 | 
34 |         kafkaProducer.flush();
35 | 
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/LogBean.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.module;
 2 | 
 3 | import lombok.Data;
 4 | 
 5 | import java.util.Map;
 6 | 
 7 | @Data
 8 | public class LogBean {
 9 |     private String account        ;
10 |     private String appId          ;
11 |     private String appVersion     ;
12 |     private String carrier        ;
13 |     private String deviceId       ;
14 |     private String deviceType     ;
15 |     private String ip             ;
16 |     private double latitude       ;
17 |     private double longitude      ;
18 |     private String netType        ;
19 |     private String osName         ;
20 |     private String osVersion      ;
21 |     private String releaseChannel ;
22 |     private String resolution     ;
23 |     private String sessionId      ;
24 |     private long timeStamp        ;
25 |     private String eventId        ;
26 |     private Map<String,String> properties;
27 | 
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/LogBeanWrapper.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.module;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | @Data
 8 | @AllArgsConstructor
 9 | @NoArgsConstructor
10 | public class LogBeanWrapper {
11 |     private LogBean logBean;
12 |     private String sessionId;
13 |     private long lastTime;
14 | 
15 |     private boolean isExists = true;
16 |     private boolean isPushback = false;
17 | 
18 |     //private String currPage;
19 | 
20 |     private int sessionMax = 0;
21 | 
22 |     public LogBeanWrapper(LogBean logBean,String sessionId,long lastTime){
23 |         this.logBean = logBean;
24 |         this.sessionId = sessionId;
25 |         this.lastTime = lastTime;
26 | 
27 |     }
28 | 
29 | 
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/datagen/src/main/java/cn/doitedu/module/UserProfileDataGen.java:
--------------------------------------------------------------------------------
 1 | //package cn.doitedu;
 2 | //
 3 | //import org.apache.commons.lang3.RandomUtils;
 4 | //import org.apache.commons.lang3.StringUtils;
 5 | //import org.apache.hadoop.conf.Configuration;
 6 | //import org.apache.hadoop.hbase.TableName;
 7 | //import org.apache.hadoop.hbase.client.Connection;
 8 | //import org.apache.hadoop.hbase.client.ConnectionFactory;
 9 | //import org.apache.hadoop.hbase.client.Put;
10 | //import org.apache.hadoop.hbase.client.Table;
11 | //import org.apache.hadoop.hbase.util.Bytes;
12 | //
13 | //import java.io.IOException;
14 | //import java.util.ArrayList;
15 | //
16 | ///**
17 | // * @author 涛哥
18 | // * @nick_name "deep as the sea"
19 | // * @contact qq:657270652 wx:doit_edu
20 | // * @site www.doitedu.cn
21 | // * @date 2021-03-27
22 | // * @desc 用户画像数据模拟器
23 | // * <p>
24 | // * deviceid,k1=v1
25 | // * <p>
26 | // * hbase中需要先创建好画像标签表
27 | // * [root@hdp01 ~]# hbase shell
28 | // * hbase> create 'yinew_profile','f'
29 | // */
30 | //public class UserProfileDataGen {
31 | //    public static void main(String[] args) throws IOException {
32 | //
33 | //        Configuration conf = new Configuration();
34 | //        conf.set("hbase.zookeeper.quorum", "hdp01:2181,hdp02:2181,hdp03:2181");
35 | //
36 | //        Connection conn = ConnectionFactory.createConnection(conf);
37 | //        Table table = conn.getTable(TableName.valueOf("zenniu_profile"));
38 | //
39 | //        ArrayList<Put> puts = new ArrayList<>();
40 | //        for (int i = 0; i < 100000; i++) {
41 | //
42 | //            // 生成一个用户的画像标签数据
43 | //            String deviceId = StringUtils.leftPad(i + "", 6, "0");
44 | //            Put put = new Put(Bytes.toBytes(deviceId));
45 | //            for (int k = 1; k <= 100; k++) {
46 | //                String key = "tag" + k;
47 | //                String value = "v" + RandomUtils.nextInt(1, 3);
48 | //                put.addColumn(Bytes.toBytes("f"), Bytes.toBytes(key), Bytes.toBytes(value));
49 | //            }
50 | //
51 | //            // 将这一条画像数据，添加到list中
52 | //            puts.add(put);
53 | //
54 | //            // 攒满100条一批
55 | //            if(puts.size()==100) {
56 | //                table.put(puts);
57 | //                puts.clear();
58 | //            }
59 | //
60 | //        }
61 | //
62 | //        // 提交最后一批
63 | //        if(puts.size()>0) table.put(puts);
64 | //
65 | //        conn.close();
66 | //    }
67 | //}
68 | 


--------------------------------------------------------------------------------
/flink_course/data/transformation_input/userinfo.txt:
--------------------------------------------------------------------------------
1 | {"uid":1,"gender":"male","name":"ua","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]}
2 | {"uid":2,"gender":"male","name":"ub","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"}]}
3 | {"uid":3,"gender":"female","name":"uc","friends":[{"fid":2,"name":"aa"}]}
4 | {"uid":4,"gender":"female","name":"ud","friends":[{"fid":3,"name":"bb"}]}
5 | {"uid":5,"gender":"male","name":"ue","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]}
6 | {"uid":6,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"}]}
7 | {"uid":7,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]}
8 | {"uid":8,"gender":"male","name":"xx","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]}


--------------------------------------------------------------------------------
/flink_course/data/wc/input/wc.txt:
--------------------------------------------------------------------------------
1 | a a a a b b b c
2 | d e d d f
3 | c d
4 | c c c
5 | d d d


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/TaskTest.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeHint;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 9 | import org.apache.flink.util.Collector;
10 | 
11 | public class TaskTest {
12 |     public static void main(String[] args) throws Exception {
13 | 
14 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();  // 流批一体的入口环境
15 |         env.setParallelism(1);
16 | 
17 |         SingleOutputStreamOperator<String> st = env.socketTextStream("localhost", 9999)
18 |                 .process(new ProcessFunction<String, String>() {
19 |                     @Override
20 |                     public void processElement(String value, ProcessFunction<String, String>.Context ctx, Collector<String> out) throws Exception {
21 |                         System.out.println("第1级map收到数据： " + value + "线程号：" + Thread.currentThread().getId());
22 |                         System.out.println("第1级subtask:" + getRuntimeContext().getTaskNameWithSubtasks());
23 | 
24 |                         // System.out.println("1-  aaa");
25 |                         // System.out.println("1-  bbb");
26 |                         out.collect(value);
27 |                     }
28 |                 });
29 | 
30 |         SingleOutputStreamOperator<String> map = st.process(
31 |                 new ProcessFunction<String, String>() {
32 |                     @Override
33 |                     public void processElement(String value, ProcessFunction<String, String>.Context ctx, Collector<String> out) throws Exception {
34 |                         System.out.println("第2级map算子，收到数据 "+ value + ", 线程" + Thread.currentThread().getId());
35 |                         System.out.println("第2级subtask:" +  getRuntimeContext().getTaskNameWithSubtasks());
36 |                         out.collect(value);
37 |                     }
38 |                 }
39 |         ).setParallelism(2);
40 | 
41 | 
42 |         map.print();
43 |         env.execute();
44 | 
45 | 
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/TestWindow.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink;
 2 | 
 3 | import org.apache.commons.lang3.RandomUtils;
 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 6 | import org.apache.flink.api.common.typeinfo.TypeHint;
 7 | import org.apache.flink.api.java.ExecutionEnvironment;
 8 | import org.apache.flink.api.java.tuple.Tuple2;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.ProcessFunction;
13 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
17 | import org.apache.flink.util.Collector;
18 | 
19 | public class TestWindow {
20 |     public static void main(String[] args) throws Exception {
21 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();  // 流批一体的入口环境
22 |         env.setParallelism(1);
23 | 
24 |         DataStreamSource<String> st = env.socketTextStream("localhost", 9999);
25 |         SingleOutputStreamOperator<Tuple2<String, Long>> map = st.map(s -> {
26 |             String[] split = s.split(",");
27 |             return Tuple2.of(split[0], Long.parseLong(split[1]));
28 |         }).returns(new TypeHint<Tuple2<String, Long>>() {
29 |         });
30 | 
31 |         SingleOutputStreamOperator<Tuple2<String, Long>> wmed = map.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<String,Long>>forMonotonousTimestamps().withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<String, Long>>() {
32 |             @Override
33 |             public long extractTimestamp(Tuple2<String, Long> element, long recordTimestamp) {
34 |                 return element.f1;
35 |             }
36 |         }));
37 |         SingleOutputStreamOperator<String> wind = wmed.keyBy(tp -> tp.f0)
38 |                 .window(TumblingEventTimeWindows.of(Time.seconds(5)))
39 |                 .process(new ProcessWindowFunction<Tuple2<String, Long>, String, String, TimeWindow>() {
40 |                     @Override
41 |                     public void process(String s, ProcessWindowFunction<Tuple2<String, Long>, String, String, TimeWindow>.Context context, Iterable<Tuple2<String, Long>> elements, Collector<String> out) throws Exception {
42 |                         System.out.println(s + "====  window中用户函数触发开始：" + Thread.currentThread().getId());
43 |                         // System.out.println(s + " window中的线程号： " + Thread.currentThread().getId());
44 |                         // System.out.println(s + " window中的watermark : " + context.currentWatermark());
45 |                         Thread.sleep(10000);
46 |                         //System.out.println(s + "睡眠完毕");
47 |                         int i = RandomUtils.nextInt(1, 100);
48 |                         //System.out.println(s+ " 准备返回数据： " + i);
49 |                         out.collect(s+ "," + i);
50 |                         System.out.println(s + "====  window中用户函数触发结束： " + Thread.currentThread().getId());
51 |                     }
52 |                 });
53 | 
54 | 
55 |         wind.process(new ProcessFunction<String, String>() {
56 |             @Override
57 |             public void processElement(String value, ProcessFunction<String, String>.Context ctx, Collector<String> out) throws Exception {
58 |                 System.out.println("%%%% 下游processElement开始:" +value + "线程号： " + Thread.currentThread().getId());
59 |                 System.out.println("%%%% 下游当前watermark： " + ctx.timerService().currentWatermark());
60 |                 System.out.println("%%%% 下游processElement结束:" + "线程号： " + Thread.currentThread().getId());
61 |             }
62 |         }).startNewChain();
63 | 
64 | 
65 |         env.execute();
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/EventCount.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.exercise;
 2 | 
 3 | import lombok.*;
 4 | 
 5 | @Data
 6 | @NoArgsConstructor
 7 | @AllArgsConstructor
 8 | public class EventCount {
 9 |     private int id;
10 |     private String eventId;
11 |     private int cnt;
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/EventUserInfo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.exercise;
 2 | 
 3 | import lombok.*;
 4 | 
 5 | @Data
 6 | @AllArgsConstructor
 7 | @NoArgsConstructor
 8 | public class EventUserInfo {
 9 | 
10 |     private int id;
11 |     private String eventId;
12 |     private int cnt;
13 |     private String gender;
14 |     private String city;
15 | 
16 | }
17 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/exercise/UserInfo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.exercise;
 2 | 
 3 | import lombok.*;
 4 | 
 5 | import java.io.ByteArrayOutputStream;
 6 | import java.io.IOException;
 7 | import java.io.ObjectOutput;
 8 | import java.io.ObjectOutputStream;
 9 | 
10 | @Data
11 | @NoArgsConstructor
12 | @AllArgsConstructor
13 | public class UserInfo {
14 |     private int id;
15 |     private String gender;
16 |     private String city;
17 | }
18 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/EventBean2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | @Data
 8 | @NoArgsConstructor
 9 | @AllArgsConstructor
10 | public class EventBean2 {
11 |     private long guid;
12 |     private String eventId;
13 |     private long timeStamp;
14 |     private String pageId;
15 |     private int actTimelong;  // 行为时长
16 | }


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/EventLog.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import lombok.*;
 4 | import org.apache.flink.streaming.connectors.redis.RedisSink;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | @NoArgsConstructor
 9 | @AllArgsConstructor
10 | @Getter
11 | @Setter
12 | @ToString
13 | public   class EventLog{
14 |     private long guid;
15 |     private String sessionId;
16 |     private String eventId;
17 |     private long timeStamp;
18 |     private Map<String,String> eventInfo;
19 | }
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/ParallelismDe.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStream;
 4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 5 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | 
 8 | public class ParallelismDe {
 9 | 
10 |     public static void main(String[] args) {
11 | 
12 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 |         SingleOutputStreamOperator<Integer> s1 = env.fromElements(1, 2, 3, 4, 5, 6).map(s -> s).setParallelism(3);
14 |         SingleOutputStreamOperator<Integer> s2 = env.fromElements(11, 12, 13, 14, 15, 16).map(s -> s).setParallelism(5);
15 |         DataStream<Integer> s3 = s2.union(s1);
16 |         System.out.println(s3.getParallelism());
17 | 
18 | 
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_01_StreamWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.ExecutionEnvironment;
 5 | import org.apache.flink.api.java.functions.KeySelector;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.configuration.Configuration;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.KeyedStream;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.util.Collector;
14 | 
15 | /**
16 |  * 通过socket数据源，去请求一个socket服务（doit01:9999）得到数据流
17 |  * 然后统计数据流中出现的单词及其个数
18 |  */
19 | public class _01_StreamWordCount {
20 | 
21 |     public static void main(String[] args) throws Exception {
22 | 
23 | 
24 |         // 创建一个编程入口环境
25 |         // ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();   // 批处理的入口环境
26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();  // 流批一体的入口环境
27 | 
28 | 
29 |         // 显式声明为本地运行环境，且带webUI
30 |         //Configuration configuration = new Configuration();
31 |         //configuration.setInteger("rest.port", 8081);
32 |         //StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
33 | 
34 | 
35 |         /**
36 |          * 本地运行模式时，程序的默认并行度为 ，你的cpu的逻辑核数
37 |          */
38 |         env.setParallelism(1);  // 默认并行度可以通过env人为指定
39 | 
40 | 
41 |         // 通过source算子，把socket数据源加载为一个dataStream（数据流）
42 |         // [root@doit01 ~]# nc -lk 9999
43 |         SingleOutputStreamOperator<String> source = env.socketTextStream("localhost", 9999)
44 |                 .setParallelism(1)
45 |                 .slotSharingGroup("g1");
46 | 
47 |         // 然后通过算子对数据流进行各种转换（计算逻辑）
48 |         DataStream<Tuple2<String, Integer>> words = source.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
49 |                     @Override
50 |                     public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
51 |                         // 切单词
52 |                         String[] split = s.split("\\s+");
53 |                         for (String word : split) {
54 |                             // 返回每一对  (单词,1)
55 |                             collector.collect(Tuple2.of(word, 1));
56 |                         }
57 |                     }
58 |                 })
59 |                 /*.setParallelism(10)
60 |                 .slotSharingGroup("g2")
61 |                 .shuffle()*/;
62 | 
63 |         //SingleOutputStreamOperator<Tuple2<String, Integer>> words2 = words.map(tp -> Tuple2.of(tp.f0, tp.f1 * 10));
64 | 
65 | 
66 |         KeyedStream<Tuple2<String, Integer>, String> keyed = words.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
67 |             @Override
68 |             public String getKey(Tuple2<String, Integer> tuple2) throws Exception {
69 | 
70 |                 return tuple2.f0;
71 |             }
72 |         });
73 | 
74 | 
75 |         SingleOutputStreamOperator<Tuple2<String, Integer>> resultStream = keyed.sum("f1")/*.slotSharingGroup("g1")*/;
76 | 
77 |         // 通过sink算子，将结果输出
78 |         resultStream.print("wcSink");
79 | 
80 |         // 触发程序的提交运行
81 |         env.execute();
82 | 
83 | 
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_02_BatchWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.java.ExecutionEnvironment;
 5 | import org.apache.flink.api.java.operators.DataSource;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.util.Collector;
 8 | 
 9 | /**
10 |  * @Author: deep as the sea
11 |  * @Site: <a href="www.51doit.com">多易教育</a>
12 |  * @QQ: 657270652
13 |  * @Date: 2022/4/30
14 |  * @Desc: 批处理计算模式的wordcount示例
15 |  **/
16 | public class _02_BatchWordCount {
17 | 
18 |     public static void main(String[] args) throws Exception {
19 | 
20 |         // 批计算入口环境
21 |         ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
22 | 
23 |         // 读数据  -- : 批计算中得到的数据抽象，是一个 DataSet
24 |         DataSource<String> stringDataSource = batchEnv.readTextFile("flink_course/data/wc/input/");
25 | 
26 |         // 在dataset上调用各种dataset的算子
27 |         stringDataSource
28 |                 .flatMap(new MyFlatMapFunction())
29 |                 .groupBy(0)
30 |                 .sum(1)
31 |                 .print();
32 |     }
33 | }
34 | 
35 | class MyFlatMapFunction implements FlatMapFunction<String, Tuple2<String,Integer>>{
36 | 
37 |     @Override
38 |     public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
39 |         String[] words = value.split("\\s+");
40 |         for (String word : words) {
41 |             out.collect(Tuple2.of(word,1));
42 |         }
43 |     }
44 | }
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_03_StreamBatchWordCount.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.java.functions.KeySelector;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.util.Collector;
10 | 
11 | public class _03_StreamBatchWordCount {
12 | 
13 |     public static void main(String[] args) throws Exception {
14 | 
15 |         // 流处理的编程环境入口
16 |         StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
17 |         streamEnv.setParallelism(1);
18 | 
19 |         // 按批计算模式去执行
20 |         streamEnv.setRuntimeMode(RuntimeExecutionMode.BATCH);
21 | 
22 |         // 按流计算模式去执行
23 |         // streamEnv.setRuntimeMode(RuntimeExecutionMode.STREAMING);
24 | 
25 |         // flink自己判断决定
26 |         // streamEnv.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
27 | 
28 |         // 读文件 得到  dataStream
29 |         DataStreamSource<String> streamSource = streamEnv.readTextFile("flink_course/data/wc/input/wc.txt");
30 | 
31 | 
32 |         // 调用dataStream的算子做计算
33 |         streamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
34 |                     @Override
35 |                     public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
36 |                         String[] words = value.split("\\s+");
37 |                         for (String word : words) {
38 |                             out.collect(Tuple2.of(word, 1));
39 |                         }
40 |                     }
41 |                 })
42 |                 .keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
43 |                     @Override
44 |                     public String getKey(Tuple2<String, Integer> value) throws Exception {
45 |                         return value.f0;
46 |                     }
47 |                 })
48 |                 .sum(1)
49 |                 .print();
50 | 
51 | 
52 |         streamEnv.execute();
53 | 
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_04_WordCount_LambdaTest.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.api.common.functions.MapFunction;
 5 | import org.apache.flink.api.common.typeinfo.TypeHint;
 6 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 7 | import org.apache.flink.api.common.typeinfo.Types;
 8 | import org.apache.flink.api.java.ExecutionEnvironment;
 9 | import org.apache.flink.api.java.functions.KeySelector;
10 | import org.apache.flink.api.java.tuple.Tuple2;
11 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
12 | import org.apache.flink.streaming.api.datastream.KeyedStream;
13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
15 | import org.apache.flink.util.Collector;
16 | 
17 | public class _04_WordCount_LambdaTest {
18 | 
19 |     public static void main(String[] args) throws Exception {
20 | 
21 |         // 创建一个编程入口（执行环境）
22 | 
23 |         // 流式处理入口环境
24 |         StreamExecutionEnvironment envStream = StreamExecutionEnvironment.getExecutionEnvironment();
25 | 
26 |         DataStreamSource<String> streamSource = envStream.readTextFile("flink_course/data/wc/input/wc.txt");
27 | 
28 |         // 先把句子变大写
29 |         /* 从map算子接收的MapFunction接口实现来看，它是一个单抽象方法的接口
30 |         所以这个接口的实现类的核心功能，就在它的方法上
31 |         那就可以用lambda表达式来简洁实现
32 |         streamSource.map(new MapFunction<String, String>() {
33 |             @Override
34 |             public String map(String value) throws Exception {
35 |                 return null;
36 |             }
37 |         });*/
38 | 
39 |         /**
40 |          * lambda表达式怎么写，看你要实现的那个接口的方法接收什么参数，返回什么结果
41 |          */
42 |         // 然后就按lambda语法来表达：  (参数1,参数2,...) -> { 函数体 }
43 |         // streamSource.map( (value) -> { return  value.toUpperCase();});
44 | 
45 |         // 由于上面的lambda表达式，参数列表只有一个，且函数体只有一行代码，则可以简化
46 |         // streamSource.map( value ->  value.toUpperCase() ) ;
47 | 
48 |         // 由于上面的lambda表达式， 函数体只有一行代码，且参数只使用了一次，可以把函数调用转成  “方法引用”
49 |         SingleOutputStreamOperator<String> upperCased = streamSource.map(String::toUpperCase);
50 | 
51 |         // 然后切成单词，并转成（单词,1），并压平
52 |         /*upperCased.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
53 |             @Override
54 |             public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
55 | 
56 |             }
57 |         });*/
58 |         // 从上面的接口来看，它依然是一个   单抽象方法的 接口，所以它的方法实现，依然可以用lambda表达式来实现
59 |         SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = upperCased.flatMap((String s, Collector<Tuple2<String, Integer>> collector) -> {
60 |                     String[] words = s.split("\\s+");
61 |                     for (String word : words) {
62 |                         collector.collect(Tuple2.of(word, 1));
63 |                     }
64 |                 })
65 |                 // .returns(new TypeHint<Tuple2<String, Integer>>() {});   // 通过 TypeHint 传达返回数据类型
66 |                 // .returns(TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {}));  // 更通用的，是传入TypeInformation,上面的TypeHint也是封装了TypeInformation
67 |                 .returns(Types.TUPLE(Types.STRING, Types.INT));  // 利用工具类Types的各种静态方法，来生成TypeInformation
68 | 
69 | 
70 |         // 按单词分组
71 |         /*wordAndOne.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
72 |             @Override
73 |             public String getKey(Tuple2<String, Integer> value) throws Exception {
74 |                 return null;
75 |             }
76 |         })*/
77 |         // 从上面的KeySelector接口来看，它依然是一个 单抽象方法的 接口，所以它的方法实现，依然可以用lambda表达式来实现
78 |         KeyedStream<Tuple2<String, Integer>, String> keyedStream = wordAndOne.keyBy((value) -> value.f0);
79 | 
80 | 
81 |         // 统计单词个数
82 |         keyedStream.sum(1)
83 |                 .print();
84 | 
85 | 
86 |         envStream.execute();
87 | 
88 | 
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flink.java.demos;
  2 | 
  3 | import org.apache.avro.Schema;
  4 | import org.apache.avro.SchemaBuilder;
  5 | import org.apache.avro.generic.GenericData;
  6 | import org.apache.avro.generic.GenericRecord;
  7 | import org.apache.flink.api.common.functions.MapFunction;
  8 | import org.apache.flink.connector.file.sink.FileSink;
  9 | import org.apache.flink.core.fs.Path;
 10 | import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo;
 11 | import org.apache.flink.formats.parquet.ParquetWriterFactory;
 12 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
 13 | import org.apache.flink.streaming.api.CheckpointingMode;
 14 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 15 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 16 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 17 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
 18 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
 19 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
 20 | 
 21 | /**
 22 |  *
 23 |  * @Author: deep as the sea
 24 |  * @Site: www.51doit.com
 25 |  * @QQ: 657270652
 26 |  * @Date: 2022/4/26
 27 |  * @Desc: 要把处理好的数据流，输出到文件系统（hdfs）
 28 |  *   使用的sink算子，是扩展包中的 StreamFileSink
 29 |  **/
 30 | public class _09_StreamFileSinkOperator_Demo1 {
 31 | 
 32 |     public static void main(String[] args) throws Exception {
 33 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 34 |         // 开启checkpoint
 35 |         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
 36 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
 37 | 
 38 |         // 构造好一个数据流
 39 |         DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
 40 | 
 41 |         // 将上面的数据流输出到文件系统（假装成一个经过了各种复杂计算后的结果数据流）
 42 | 
 43 | 
 44 |         /**
 45 |          * 方式一：
 46 |          * 核心逻辑：
 47 |          *   - 构造一个schema
 48 |          *   - 利用schema构造一个parquetWriterFactory
 49 |          *   - 利用parquetWriterFactory构造一个FileSink算子
 50 |          *   - 将原始数据转成GenericRecord流，输出到FileSink算子
 51 |          */
 52 |         // 1. 先定义GenericRecord的数据模式
 53 |         Schema schema = SchemaBuilder.builder()
 54 |                 .record("DataRecord")
 55 |                 .namespace("cn.doitedu.flink.avro.schema")
 56 |                 .doc("用户行为事件数据模式")
 57 |                 .fields()
 58 |                     .requiredInt("gid")
 59 |                     .requiredLong("ts")
 60 |                     .requiredString("eventId")
 61 |                     .requiredString("sessionId")
 62 |                     .name("eventInfo")
 63 |                     .type()
 64 |                     .map()
 65 |                     .values()
 66 |                     .type("string")
 67 |                     .noDefault()
 68 |                 .endRecord();
 69 | 
 70 | 
 71 |         // 2. 通过定义好的schema模式，来得到一个parquetWriter
 72 |         ParquetWriterFactory<GenericRecord> writerFactory = ParquetAvroWriters.forGenericRecord(schema);
 73 | 
 74 |         // 3. 利用生成好的parquetWriter，来构造一个 支持列式输出parquet文件的 sink算子
 75 |         FileSink<GenericRecord> sink1 = FileSink.forBulkFormat(new Path("d:/datasink/"), writerFactory)
 76 |                 .withBucketAssigner(new DateTimeBucketAssigner<GenericRecord>("yyyy-MM-dd--HH"))
 77 |                 .withRollingPolicy(OnCheckpointRollingPolicy.build())
 78 |                 .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
 79 |                 .build();
 80 | 
 81 | 
 82 |         // 4. 将自定义javabean的流，转成 上述sink算子中parquetWriter所需要的  GenericRecord流
 83 |         SingleOutputStreamOperator<GenericRecord> recordStream = streamSource
 84 |                 .map((MapFunction<EventLog, GenericRecord>) eventLog -> {
 85 |                     // 构造一个Record对象
 86 |                     GenericData.Record record = new GenericData.Record(schema);
 87 | 
 88 |                     // 将数据填入record
 89 |                     record.put("gid", (int) eventLog.getGuid());
 90 |                     record.put("eventId", eventLog.getEventId());
 91 |                     record.put("ts", eventLog.getTimeStamp());
 92 |                     record.put("sessionId", eventLog.getSessionId());
 93 |                     record.put("eventInfo", eventLog.getEventInfo());
 94 | 
 95 |                     return record;
 96 |                 }).returns(new GenericRecordAvroTypeInfo(schema));  // 由于avro的相关类、对象需要用avro的序列化器，所以需要显式指定AvroTypeInfo来提供AvroSerializer
 97 | 
 98 |         // 5. 输出数据
 99 |         recordStream.sinkTo(sink1);
100 | 
101 |         env.execute();
102 | 
103 | 
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo2.java:
--------------------------------------------------------------------------------
 1 | //package cn.doitedu.flink.java.demos;
 2 | //
 3 | //import cn.doitedu.flink.avro.schema.AvroEventLog;
 4 | //import org.apache.avro.Schema;
 5 | //import org.apache.avro.SchemaBuilder;
 6 | //import org.apache.avro.generic.GenericData;
 7 | //import org.apache.avro.generic.GenericRecord;
 8 | //import org.apache.flink.api.common.functions.MapFunction;
 9 | //import org.apache.flink.connector.file.sink.FileSink;
10 | //import org.apache.flink.core.fs.Path;
11 | //import org.apache.flink.formats.parquet.ParquetWriterFactory;
12 | //import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
13 | //import org.apache.flink.streaming.api.CheckpointingMode;
14 | //import org.apache.flink.streaming.api.datastream.DataStreamSource;
15 | //import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
16 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
17 | //import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
18 | //import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
19 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
20 | //
21 | //import java.util.HashMap;
22 | //import java.util.Map;
23 | //import java.util.Set;
24 | //
25 | ///**
26 | // *
27 | // * @Author: deep as the sea
28 | // * @Site: www.51doit.com
29 | // * @QQ: 657270652
30 | // * @Date: 2022/4/26
31 | // * @Desc: 要把处理好的数据流，输出到文件系统（hdfs）
32 | // *   使用的sink算子，是扩展包中的 StreamFileSink
33 | // **/
34 | //public class _09_StreamFileSinkOperator_Demo2 {
35 | //
36 | //    public static void main(String[] args) throws Exception {
37 | //        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
38 | //        // 开启checkpoint
39 | //        env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
40 | //        env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
41 | //
42 | //        // 构造好一个数据流
43 | //        DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
44 | //
45 | //        // 将上面的数据流输出到文件系统（假装成一个经过了各种复杂计算后的结果数据流）
46 | //
47 | //
48 | //        /**
49 | //         * 方式二：
50 | //         * 核心逻辑：
51 | //         *   - 编写一个avsc文本文件（json），来描述数据模式
52 | //         *   - 添加 maven代码生成器插件，来针对上述的avsc生成avro特定格式的JavaBean类
53 | //         *   - 利用代码生成器生成的 JavaBean，来构造一个 parquetWriterFactory
54 | //         *   - 利用parquetWriterFactory构造一个FileSink算子
55 | //         *   - 将原始数据流 转成 特定格式JavaBean流，输出到 FileSink算子
56 | //         */
57 | //
58 | //        // 1. 先定义avsc文件放在resources文件夹中，并用maven的插件，来编译一下，生成特定格式的JavaBean ： AvroEventLog
59 | //        // 这种根据avsc生成的JavaBean类，自身就已经带有了Schema对象
60 | //        // AvroEventLog avroEventLog = new AvroEventLog();
61 | //        // Schema schema = avroEventLog.getSchema();
62 | //
63 | //        // 2. 通过自动生成 AvroEventLog类，来得到一个parquetWriter
64 | //        ParquetWriterFactory<AvroEventLog> parquetWriterFactory = ParquetAvroWriters.forSpecificRecord(AvroEventLog.class);
65 | //
66 | //        // 3. 利用生成好的parquetWriter，来构造一个 支持列式输出parquet文件的 sink算子
67 | //        FileSink<AvroEventLog> bulkSink = FileSink.forBulkFormat(new Path("d:/datasink2/"), parquetWriterFactory)
68 | //                .withBucketAssigner(new DateTimeBucketAssigner<AvroEventLog>("yyyy-MM-dd--HH"))
69 | //                .withRollingPolicy(OnCheckpointRollingPolicy.build())
70 | //                .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
71 | //                .build();
72 | //
73 | //
74 | //        // 4. 将自定义javabean的 EventLog 流，转成 上述sink算子中parquetWriter所需要的  AvroEventLog 流
75 | //        SingleOutputStreamOperator<AvroEventLog> avroEventLogStream = streamSource.map(new MapFunction<EventLog, AvroEventLog>() {
76 | //            @Override
77 | //            public AvroEventLog map(EventLog eventLog) throws Exception {
78 | //                HashMap<CharSequence, CharSequence> eventInfo1 = new HashMap<>();
79 | //
80 | //                // 进行hashmap<charsequenct,charsequence>类型的数据转移
81 | //                Map<String, String> eventInfo2 = eventLog.getEventInfo();
82 | //                Set<Map.Entry<String, String>> entries = eventInfo2.entrySet();
83 | //                for (Map.Entry<String, String> entry : entries) {
84 | //                    eventInfo1.put(entry.getKey(), entry.getValue());
85 | //                }
86 | //
87 | //                return new AvroEventLog(eventLog.getGuid(), eventLog.getSessionId(), eventLog.getEventId(), eventLog.getTimeStamp(), eventInfo1);
88 | //            }
89 | //        });
90 | //
91 | //
92 | //        // 5. 输出数据
93 | //        avroEventLogStream.sinkTo(bulkSink);
94 | //
95 | //        env.execute();
96 | //
97 | //    }
98 | //}
99 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo3.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.functions.MapFunction;
 4 | import org.apache.flink.connector.file.sink.FileSink;
 5 | import org.apache.flink.core.fs.Path;
 6 | import org.apache.flink.formats.parquet.ParquetWriterFactory;
 7 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters;
 8 | import org.apache.flink.streaming.api.CheckpointingMode;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig;
13 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner;
14 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
15 | 
16 | import java.util.HashMap;
17 | import java.util.Map;
18 | import java.util.Set;
19 | 
20 | /**
21 |  *
22 |  * @Author: deep as the sea
23 |  * @Site: www.51doit.com
24 |  * @QQ: 657270652
25 |  * @Date: 2022/4/26
26 |  * @Desc: 要把处理好的数据流，输出到文件系统（hdfs）
27 |  *   使用的sink算子，是扩展包中的 StreamFileSink
28 |  **/
29 | public class _09_StreamFileSinkOperator_Demo3 {
30 | 
31 |     public static void main(String[] args) throws Exception {
32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
33 |         // 开启checkpoint
34 |         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
35 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
36 |         env.setParallelism(1);
37 | 
38 |         // 构造好一个数据流
39 |         DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
40 | 
41 |         // 将上面的数据流输出到文件系统（假装成一个经过了各种复杂计算后的结果数据流）
42 | 
43 | 
44 |         /**
45 |          * 方式三：
46 |          * 核心逻辑：
47 |          *   - 利用自己的JavaBean类，来构造一个 parquetWriterFactory
48 |          *   - 利用parquetWriterFactory构造一个FileSink算子
49 |          *   - 将原始数据流，输出到 FileSink算子
50 |          */
51 | 
52 |         // 2. 通过自己的JavaBean类，来得到一个parquetWriter
53 |         ParquetWriterFactory<EventLog> parquetWriterFactory = ParquetAvroWriters.forReflectRecord(EventLog.class);
54 | 
55 |         // 3. 利用生成好的parquetWriter，来构造一个 支持列式输出parquet文件的 sink算子
56 |         FileSink<EventLog> bulkSink = FileSink.forBulkFormat(new Path("d:/datasink3/"), parquetWriterFactory)
57 |                 .withBucketAssigner(new DateTimeBucketAssigner<EventLog>("yyyy-MM-dd--HH"))
58 |                 .withRollingPolicy(OnCheckpointRollingPolicy.build())
59 |                 .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build())
60 |                 .build();
61 | 
62 |         // 5. 输出数据
63 |         streamSource.sinkTo(bulkSink);
64 | 
65 | 
66 |         env.execute();
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_10_KafkaSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.connector.base.DeliveryGuarantee;
 7 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
 8 | import org.apache.flink.connector.kafka.sink.KafkaSink;
 9 | import org.apache.flink.streaming.api.CheckpointingMode;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | 
13 | /**
14 |  *
15 |  * @Author: deep as the sea
16 |  * @Site: www.51doit.com
17 |  * @QQ: 657270652
18 |  * @Date: 2022/4/26
19 |  * @Desc:
20 |  *   利用KafkaSink将数据流写入kafka
21 |  *   测试准备，创建目标topic：
22 |  *   [root@doit01 ~]# kafka-topics.sh --create --topic event-log --partitions 3 --replication-factor 2 --zookeeper doit01:2181
23 |  **/
24 | public class _10_KafkaSinkOperator_Demo1 {
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         Configuration configuration = new Configuration();
28 |         configuration.setInteger("rest.port",8822);
29 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
30 | 
31 | 
32 |         // 开启checkpoint
33 |         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
34 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
35 | 
36 |         // 构造好一个数据流
37 |         DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
38 | 
39 | 
40 |         // 把数据写入kafka
41 |         // 1. 构造一个kafka的sink算子
42 |         KafkaSink<String> kafkaSink = KafkaSink.<String>builder()
43 |                 .setBootstrapServers("doit01:9092,doit02:9092")
44 |                 .setRecordSerializer(KafkaRecordSerializationSchema.<String>builder()
45 |                         .setTopic("event-log")
46 |                         .setValueSerializationSchema(new SimpleStringSchema())
47 |                         .build()
48 |                 )
49 |                 .setDeliverGuarantee(DeliveryGuarantee.AT_LEAST_ONCE)
50 |                 .setTransactionalIdPrefix("doitedu-")
51 |                 .build();
52 | 
53 |         // 2. 把数据流输出到构造好的sink算子
54 |         streamSource
55 |                 .map(JSON::toJSONString).disableChaining()
56 |                 .sinkTo(kafkaSink);
57 | 
58 |         env.execute();
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_12_RedisSinkOperator_Demo1.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flink.java.demos;
  2 | 
  3 | import com.alibaba.fastjson.JSON;
  4 | import org.apache.flink.configuration.Configuration;
  5 | import org.apache.flink.streaming.api.CheckpointingMode;
  6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
  7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  8 | import org.apache.flink.streaming.connectors.redis.RedisSink;
  9 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig;
 10 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand;
 11 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription;
 12 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper;
 13 | 
 14 | import java.util.Optional;
 15 | 
 16 | /**
 17 |  *
 18 |  * @Author: deep as the sea
 19 |  * @Site: www.51doit.com
 20 |  * @QQ: 657270652
 21 |  * @Date: 2022/4/26
 22 |  * @Desc:
 23 |  *     将数据流写入redis，利用RedisSink算子
 24 |  *
 25 |  **/
 26 | public class _12_RedisSinkOperator_Demo1 {
 27 | 
 28 | 
 29 |     public static void main(String[] args) throws Exception {
 30 | 
 31 |         Configuration configuration = new Configuration();
 32 |         configuration.setInteger("rest.port",8822);
 33 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
 34 | 
 35 | 
 36 |         // 开启checkpoint
 37 |         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
 38 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
 39 | 
 40 |         // 构造好一个数据流
 41 |         DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
 42 | 
 43 |         // eventLog数据插入redis，你想用什么结构来存储？
 44 |         FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder().setHost("doit01").build();
 45 | 
 46 |         RedisSink<EventLog> redisSink = new RedisSink<>(config, new StringInsertMapper());
 47 | 
 48 |         streamSource.addSink(redisSink);
 49 | 
 50 |         env.execute();
 51 | 
 52 |     }
 53 | 
 54 | 
 55 |     static class StringInsertMapper implements RedisMapper<EventLog>{
 56 | 
 57 |         @Override
 58 |         public RedisCommandDescription getCommandDescription() {
 59 |             return new RedisCommandDescription(RedisCommand.SET);
 60 |         }
 61 | 
 62 |         /**
 63 |          *  如果选择的是没有内部key的redis数据结构，则此方法返回的就是大 key
 64 |          *  如果选择的是有内部key的redis数据结构（hset），则此方法返回的是hset内部的小key，二把上面Description中传入的值作为大key
 65 |          * @param data
 66 |          * @return
 67 |          */
 68 |         @Override
 69 |         public String getKeyFromData(EventLog data) {
 70 |             return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp();   // 这里就是string数据的大key
 71 |         }
 72 | 
 73 |         @Override
 74 |         public String getValueFromData(EventLog data) {
 75 |             return JSON.toJSONString(data);   // 这里就是string数据的value
 76 |         }
 77 |     }
 78 | 
 79 | 
 80 |     /**
 81 |      * HASH结构数据插入
 82 |      */
 83 |     static class HsetInsertMapper implements RedisMapper<EventLog>{
 84 |         // 可以根据具体数据， 选择额外key（就是hash这种结构，它有额外key（大key）
 85 |         @Override
 86 |         public Optional<String> getAdditionalKey(EventLog data) {
 87 |             return RedisMapper.super.getAdditionalKey(data);
 88 |         }
 89 | 
 90 |         // 可以根据具体数据，设置不同的TTL（time to live，数据的存活时长）
 91 |         @Override
 92 |         public Optional<Integer> getAdditionalTTL(EventLog data) {
 93 |             return RedisMapper.super.getAdditionalTTL(data);
 94 |         }
 95 | 
 96 |         @Override
 97 |         public RedisCommandDescription getCommandDescription() {
 98 |             return new RedisCommandDescription(RedisCommand.HSET,"event-logs");
 99 |         }
100 | 
101 |         /**
102 |          *  如果选择的是没有内部key的redis数据结构，则此方法返回的就是大 key
103 |          *  如果选择的是有内部key的redis数据结构（hset），则此方法返回的是hset内部的小key，二把上面Description中传入的值作为大key
104 |          * @param data
105 |          * @return
106 |          */
107 |         @Override
108 |         public String getKeyFromData(EventLog data) {
109 |             return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp();  // 这里就是hset中的field（小key）
110 |         }
111 | 
112 |         @Override
113 |         public String getValueFromData(EventLog data) {
114 |             return data.getEventId();   // 这里就是hset中的value
115 |         }
116 | 
117 | 
118 |     }
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_13_SideOutput_Demo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.streaming.api.CheckpointingMode;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.ProcessFunction;
12 | import org.apache.flink.util.Collector;
13 | import org.apache.flink.util.OutputTag;
14 | 
15 | /**
16 |  * @Author: deep as the sea
17 |  * @Site: <a href="www.51doit.com">多易教育</a>
18 |  * @QQ: 657270652
19 |  * @Date: 2022/4/26
20 |  * @Desc: 测流输出 代码示例（process算子）
21 |  **/
22 | public class _13_SideOutput_Demo {
23 | 
24 | 
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         Configuration configuration = new Configuration();
28 |         configuration.setInteger("rest.port", 8822);
29 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
30 |         env.setParallelism(1);
31 | 
32 | 
33 |         // 开启checkpoint
34 |         env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE);
35 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt");
36 | 
37 |         // 构造好一个数据流
38 |         DataStreamSource<EventLog> streamSource = env.addSource(new MySourceFunction());
39 | 
40 | 
41 |         // 需求： 将行为事件流，进行分流
42 |         //     appLaunch 事件 ，分到一个流
43 |         //     putBack 事件，分到一个流
44 |         //     其他事件保留在主流
45 |         SingleOutputStreamOperator<EventLog> processed = streamSource.process(new ProcessFunction<EventLog, EventLog>() {
46 |             /**
47 |              *
48 |              * @param eventLog  输入数据
49 |              * @param ctx 上下文，它能提供“测输出“功能
50 |              * @param out 主流输出收集器
51 |              * @throws Exception
52 |              */
53 |             @Override
54 |             public void processElement(EventLog eventLog, ProcessFunction<EventLog, EventLog>.Context ctx, Collector<EventLog> out) throws Exception {
55 |                 String eventId = eventLog.getEventId();
56 | 
57 |                 if ("appLaunch".equals(eventId)) {
58 | 
59 |                     ctx.output(new OutputTag<EventLog>("launch", TypeInformation.of(EventLog.class)), eventLog);
60 | 
61 |                 } else if ("putBack".equals(eventId)) {
62 | 
63 |                     ctx.output(new OutputTag<String>("back",TypeInformation.of(String.class)), JSON.toJSONString(eventLog));
64 |                 }
65 | 
66 |                 out.collect(eventLog);
67 | 
68 |             }
69 |         });
70 | 
71 |         // 获取  launch 测流数据
72 |         DataStream<EventLog> launchStream = processed.getSideOutput(new OutputTag<EventLog>("launch", TypeInformation.of(EventLog.class)));
73 | 
74 |         // 获取back 测流数据
75 |         DataStream<String> backStream = processed.getSideOutput(new OutputTag<String>("back",TypeInformation.of(String.class)));
76 | 
77 |         launchStream.print("launch");
78 | 
79 |         backStream.print("back");
80 | 
81 | 
82 |         env.execute();
83 | 
84 |     }
85 | 
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_14_StreamConnect_Union_Demo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.streaming.api.CheckpointingMode;
 7 | import org.apache.flink.streaming.api.datastream.ConnectedStreams;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.ProcessFunction;
13 | import org.apache.flink.streaming.api.functions.co.CoMapFunction;
14 | import org.apache.flink.util.Collector;
15 | import org.apache.flink.util.OutputTag;
16 | 
17 | /**
18 |  * @Author: deep as the sea
19 |  * @Site: www.51doit.com
20 |  * @QQ: 657270652
21 |  * @Date: 2022/4/26
22 |  * @Desc: 流的连接connect算子  及   流的关联join算子  代码示例
23 |  **/
24 | public class _14_StreamConnect_Union_Demo {
25 | 
26 | 
27 |     public static void main(String[] args) throws Exception {
28 | 
29 |         Configuration configuration = new Configuration();
30 |         configuration.setInteger("rest.port", 8822);
31 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
32 |         env.setParallelism(1);
33 | 
34 |         // 数字字符流
35 |         DataStreamSource<String> stream1 = env.socketTextStream("localhost", 9998);
36 |         
37 |         // 字母字符流
38 |         DataStreamSource<String> stream2 = env.socketTextStream("localhost", 9999);
39 | 
40 |         /**
41 |          * 流的 connect
42 |          */
43 |         ConnectedStreams<String, String> connectedStreams = stream1.connect(stream2);
44 | 
45 |         SingleOutputStreamOperator<String> resultStream = connectedStreams.map(new CoMapFunction<String, String, String>() {
46 |             // 共同的状态数据
47 | 
48 |             String  prefix = "doitedu_";
49 | 
50 |             /**
51 |              * 对 左流 处理的逻辑
52 |              * @param value
53 |              * @return
54 |              * @throws Exception
55 |              */
56 |             @Override
57 |             public String map1(String value) throws Exception {
58 |                 // 把数字*10，再返回字符串
59 |                 return  prefix + (Integer.parseInt(value)*10) + "";
60 |             }
61 | 
62 |             /**
63 |              * 对 右流 处理的逻辑
64 |              * @param value
65 |              * @return
66 |              * @throws Exception
67 |              */
68 |             @Override
69 |             public String map2(String value) throws Exception {
70 | 
71 |                 return prefix + value.toUpperCase();
72 |             }
73 |         });
74 |         /*resultStream.print();*/
75 | 
76 | 
77 |         /**
78 |          * 流的 union
79 |          * 参与 union的流，必须数据类型一致
80 |          */
81 |         // stream1.map(Integer::parseInt).union(stream2); // union左右两边的流类型不一致，不通过
82 |         DataStream<String> unioned = stream1.union(stream2);
83 |         unioned.map(s-> "doitedu_"+s).print();
84 | 
85 | 
86 |         env.execute();
87 | 
88 |     }
89 | 
90 | 
91 | }
92 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_17_ProcessFunctions_Demo.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flink.java.demos;
  2 | 
  3 | import org.apache.flink.api.common.functions.CoGroupFunction;
  4 | import org.apache.flink.api.common.functions.JoinFunction;
  5 | import org.apache.flink.api.common.functions.RuntimeContext;
  6 | import org.apache.flink.api.common.typeinfo.TypeHint;
  7 | import org.apache.flink.api.common.typeinfo.Types;
  8 | import org.apache.flink.api.java.tuple.Tuple2;
  9 | import org.apache.flink.api.java.tuple.Tuple3;
 10 | import org.apache.flink.configuration.Configuration;
 11 | import org.apache.flink.streaming.api.datastream.DataStream;
 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 13 | import org.apache.flink.streaming.api.datastream.KeyedStream;
 14 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 15 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 16 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
 17 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 18 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
 19 | import org.apache.flink.streaming.api.windowing.time.Time;
 20 | import org.apache.flink.util.Collector;
 21 | import org.apache.flink.util.OutputTag;
 22 | 
 23 | /**
 24 |  * @Author: deep as the sea
 25 |  * @Site: www.51doit.com
 26 |  * @QQ: 657270652
 27 |  * @Date: 2022/4/26
 28 |  * @Desc: process算子及ProcessFunction示例
 29 |  *
 30 |  * 在不同类型的 数据流上，调用process算子时，所需要传入的ProcessFunction也会有不同
 31 |  **/
 32 | public class _17_ProcessFunctions_Demo {
 33 | 
 34 | 
 35 |     public static void main(String[] args) throws Exception {
 36 | 
 37 |         Configuration configuration = new Configuration();
 38 |         configuration.setInteger("rest.port", 8822);
 39 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
 40 |         env.setParallelism(1);
 41 | 
 42 |         // id,eventId
 43 |         DataStreamSource<String> stream1 = env.socketTextStream("localhost", 9998);
 44 | 
 45 |         /**
 46 |          * 在普通的datastream上调用process算子，传入的是 "ProcessFunction"
 47 |          */
 48 |         SingleOutputStreamOperator<Tuple2<String, String>> s1 = stream1.process(new ProcessFunction<String, Tuple2<String, String>>() {
 49 |             // 可以使用  生命周期 open 方法
 50 |             @Override
 51 |             public void open(Configuration parameters) throws Exception {
 52 |                 // 可以调用 getRuntimeContext 方法拿到各种运行时上下文信息
 53 |                 RuntimeContext runtimeContext = getRuntimeContext();
 54 |                 runtimeContext.getTaskName();
 55 | 
 56 |                 super.open(parameters);
 57 |             }
 58 | 
 59 |             @Override
 60 |             public void processElement(String value, ProcessFunction<String, Tuple2<String, String>>.Context ctx, Collector<Tuple2<String, String>> out) throws Exception {
 61 | 
 62 |                 // 可以做测流输出
 63 |                 ctx.output(new OutputTag<String>("s1", Types.STRING),value);
 64 | 
 65 |                 // 可以做主流输出
 66 |                 String[] arr = value.split(",");
 67 |                 out.collect(Tuple2.of(arr[0], arr[1]));
 68 |             }
 69 | 
 70 |             // 可以使用  生命周期close方法
 71 |             @Override
 72 |             public void close() throws Exception {
 73 |                 super.close();
 74 |             }
 75 |         });
 76 | 
 77 | 
 78 | 
 79 |         /**
 80 |          * 在 keyedStream上调用 process算子，传入的是 "KeyedProcessFunction"
 81 |          * KeyedProcessFunction 中的 ，泛型1： 流中的 key 的类型；  泛型2： 流中的数据的类型  ；  泛型3： 处理后的输出结果的类型
 82 |          */
 83 |         // 对s1流进行keyby分组
 84 |         KeyedStream<Tuple2<String, String>, String> keyedStream = s1.keyBy(tp2 -> tp2.f0);
 85 |         // 然后在keyby后的数据流上调用process算子
 86 |         SingleOutputStreamOperator<Tuple2<Integer, String>> s2 = keyedStream.process(new KeyedProcessFunction<String, Tuple2<String, String>, Tuple2<Integer, String>>() {
 87 |             @Override
 88 |             public void processElement(Tuple2<String, String> value, KeyedProcessFunction<String, Tuple2<String, String>, Tuple2<Integer, String>>.Context ctx, Collector<Tuple2<Integer, String>> out) throws Exception {
 89 |                 // 把id变整数，把eventId变大写
 90 |                 out.collect(Tuple2.of(Integer.parseInt(value.f0), value.f1.toUpperCase()));
 91 |             }
 92 |         });
 93 | 
 94 | 
 95 |         s2.print();
 96 | 
 97 | 
 98 |         env.execute();
 99 | 
100 |     }
101 | 
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_18_ChannalSelector_Partitioner_Demo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.datastream.DataStream;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
10 | import org.apache.flink.util.Collector;
11 | 
12 | public class _18_ChannalSelector_Partitioner_Demo {
13 | 
14 |     public static void main(String[] args) throws Exception {
15 | 
16 |         Configuration conf = new Configuration();
17 |         conf.setInteger("rest.port", 8081);
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
19 | 
20 | 
21 |         DataStreamSource<String> s1 = env.socketTextStream("localhost", 9999);
22 | 
23 |         DataStream<String> s2 = s1
24 |                 .map(s -> s.toUpperCase())
25 |                 .setParallelism(4)
26 |                 .flatMap(new FlatMapFunction<String, String>() {
27 |                     @Override
28 |                     public void flatMap(String value, Collector<String> out) throws Exception {
29 |                         String[] arr = value.split(",");
30 |                         for (String s : arr) {
31 |                             out.collect(s);
32 |                         }
33 |                     }
34 |                 })
35 |                 .setParallelism(4)
36 |                 .forward();
37 | 
38 |         SingleOutputStreamOperator<String> s3 = s2.map(s -> s.toLowerCase()).setParallelism(4);
39 | 
40 | 
41 |         SingleOutputStreamOperator<String> s4 = s3.keyBy(s -> s.substring(0, 2))
42 |                 .process(new KeyedProcessFunction<String, String, String>() {
43 |                     @Override
44 |                     public void processElement(String value, KeyedProcessFunction<String, String, String>.Context ctx, Collector<String> out) throws Exception {
45 |                         out.collect(value + ">");
46 |                     }
47 |                 }).setParallelism(4);
48 | 
49 |         DataStream<String> s5 = s4.filter(s -> s.startsWith("b")).setParallelism(4);
50 | 
51 |         s5.print().setParallelism(4);
52 | 
53 |         env.execute();
54 |     }
55 | 
56 | }
57 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_19_WaterMark_Api_Demo2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | 
 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.streaming.api.functions.ProcessFunction;
10 | import org.apache.flink.util.Collector;
11 | 
12 | /**
13 |  * @Author: deep as the sea
14 |  * @Site: <a href="www.51doit.com">多易教育</a>
15 |  * @QQ: 657270652
16 |  * @Date: 2022/5/1
17 |  * @Desc:  watermark生成设置相关代码演示
18 |  *       及单并行度情况下的watermark推进观察
19 |  *
20 |  *   ==> 在socket端口依次输入如下两条数据：
21 |  *   1,e06,3000,page02
22 |  *   1,e06,3000,page02
23 |  *
24 |  *   ==> 程序的控制台上会依次输出如下信息：
25 |  *     本次收到的数据EventBean(guid=1, eventId=e05, timeStamp=2000, pageId=page01)
26 |  *     此刻的watermark： -9223372036854775808
27 |  *     此刻的处理时间（processing time）： 1651396210778
28 |  *  ----------------------
29 |  *     本次收到的数据EventBean(guid=1, eventId=e06, timeStamp=3000, pageId=page02)
30 |  *     此刻的watermark： 1999
31 |  *     此刻的处理时间（processing time）： 1651396273755
32 |  *
33 |  **/
34 | public class _19_WaterMark_Api_Demo2 {
35 | 
36 |     public static void main(String[] args) throws Exception {
37 | 
38 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
39 |         env.setParallelism(1);
40 | 
41 | 
42 |         // 1,e01,168673487846,pg01
43 |         DataStreamSource<String> s1 = env.socketTextStream("localhost", 9999);
44 | 
45 | 
46 |         SingleOutputStreamOperator<EventBean> s2 = s1.map(s -> {
47 |                     String[] split = s.split(",");
48 |                     return new EventBean(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3]);
49 |                 }).returns(EventBean.class)
50 |                 .assignTimestampsAndWatermarks(
51 |                         WatermarkStrategy
52 |                                 .<EventBean>forMonotonousTimestamps()
53 |                                 .withTimestampAssigner(new SerializableTimestampAssigner<EventBean>() {
54 |                                     @Override
55 |                                     public long extractTimestamp(EventBean eventBean, long recordTimestamp) {
56 |                                         return eventBean.getTimeStamp();
57 |                                     }
58 |                                 })
59 |                 ).setParallelism(2);
60 | 
61 |         s2.process(new ProcessFunction<EventBean, EventBean>() {
62 |             @Override
63 |             public void processElement(EventBean eventBean, ProcessFunction<EventBean, EventBean>.Context ctx, Collector<EventBean> out) throws Exception {
64 | 
65 |                 Thread.sleep(1000);
66 |                 System.out.println("睡醒了，准备打印");
67 | 
68 |                 // 打印此刻的 watermark
69 |                 long processTime = ctx.timerService().currentProcessingTime();
70 |                 long watermark = ctx.timerService().currentWatermark();
71 | 
72 |                 System.out.println("本次收到的数据" + eventBean);
73 |                 System.out.println("此刻的watermark： " + watermark);
74 |                 System.out.println("此刻的处理时间（processing time）： " + processTime );
75 | 
76 |                 out.collect(eventBean);
77 |             }
78 |         }).setParallelism(1).print();
79 | 
80 | 
81 |         env.execute();
82 | 
83 | 
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_21_Window_Api_Demo3.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 6 | import org.apache.flink.api.common.typeinfo.TypeHint;
 7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 8 | import org.apache.flink.api.java.tuple.Tuple2;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
14 | import org.apache.flink.streaming.api.windowing.assigners.*;
15 | import org.apache.flink.streaming.api.windowing.time.Time;
16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
17 | import org.apache.flink.util.Collector;
18 | import org.apache.flink.util.OutputTag;
19 | 
20 | import java.time.Duration;
21 | 
22 | public class _21_Window_Api_Demo3 {
23 | 
24 |     public static void main(String[] args) throws Exception {
25 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
26 |         env.setParallelism(1);
27 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
28 | 
29 |         // 1,e01,3000,pg02
30 |         DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
31 | 
32 |         SingleOutputStreamOperator<Tuple2<EventBean2,Integer>> beanStream = source.map(s -> {
33 |                     String[] split = s.split(",");
34 |                     EventBean2 bean = new EventBean2(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3], Integer.parseInt(split[4]));
35 |                     return Tuple2.of(bean,1);
36 |                 }).returns(new TypeHint<Tuple2<EventBean2, Integer>>() {})
37 |                 .assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple2<EventBean2,Integer>>forBoundedOutOfOrderness(Duration.ofMillis(0))
38 |                         .withTimestampAssigner(new SerializableTimestampAssigner<Tuple2<EventBean2,Integer>>() {
39 |                             @Override
40 |                             public long extractTimestamp(Tuple2<EventBean2,Integer> element, long recordTimestamp) {
41 |                                 return element.f0.getTimeStamp();
42 |                             }
43 |                         }));
44 | 
45 | 
46 |         OutputTag<Tuple2<EventBean2,Integer>> lateDataOutputTag = new OutputTag<>("late_data", TypeInformation.of(new TypeHint<Tuple2<EventBean2, Integer>>() {}));
47 | 
48 |         SingleOutputStreamOperator<String> sumResult = beanStream.keyBy(tp -> tp.f0.getGuid())
49 |                 .window(TumblingEventTimeWindows.of(Time.seconds(10)))  // 事件时间滚动窗口，窗口长度为10
50 |                 .allowedLateness(Time.seconds(2))  // 允许迟到2s
51 |                 .sideOutputLateData(lateDataOutputTag)  // 迟到超过允许时限的数据，输出到该“outputtag”所标记的测流
52 |                 /*.sum("f1")*/
53 |                 .apply(new WindowFunction<Tuple2<EventBean2, Integer>, String, Long, TimeWindow>() {
54 |                     @Override
55 |                     public void apply(Long aLong, TimeWindow window, Iterable<Tuple2<EventBean2, Integer>> input, Collector<String> out) throws Exception {
56 |                         int count = 0;
57 |                         for (Tuple2<EventBean2, Integer> eventBean2IntegerTuple2 : input) {
58 |                             count ++;
59 |                         }
60 |                         out.collect(window.getStart()+":"+ window.getEnd()+","+count);
61 |                     }
62 |                 });
63 | 
64 | 
65 |         DataStream<Tuple2<EventBean2, Integer>> lateDataSideStream = sumResult.getSideOutput(lateDataOutputTag);
66 | 
67 | 
68 |         sumResult.print("主流结果");
69 | 
70 |         lateDataSideStream.print("迟到数据");
71 | 
72 |         env.execute();
73 | 
74 |     }
75 | 
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_22_StateBasic_Demo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.api.common.functions.MapFunction;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | 
 8 | public class _22_StateBasic_Demo {
 9 | 
10 |     public static void main(String[] args) throws Exception {
11 | 
12 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 |         env.setParallelism(1);
14 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
15 | 
16 |         // a
17 |         DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
18 | 
19 |         // 需要使用map算子来达到一个效果：
20 |         // 没来一条数据（字符串），输出 该条字符串拼接此前到达过的所有字符串
21 |         source.map(new MapFunction<String, String>() {
22 | 
23 |             // 自己定义、自己管理的状态，持久化和容错都很困难
24 |             // 这种状态(state) 叫做：  （自管理状态）raw状态  =>  raw state
25 |             String acc = "";
26 | 
27 |             /**
28 |              * 要让flink来帮助管理的状态数据
29 |              * ，那就不要自己定义一个变量
30 |              * 而是要从flink的api中去获取一个状态管理器，用这个状态管理器来进行数据的增删改查等操作
31 |              *
32 |              * 这种状态： 叫做  托管状态 ！ (flink state)
33 |              */
34 | 
35 |             @Override
36 |             public String map(String value) throws Exception {
37 |                 acc = acc + value;
38 |                 return acc;
39 |             }
40 |         }).print();
41 | 
42 |         env.execute();
43 | 
44 |     }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_24_State_KeyedState_Demo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.java.demos;
 2 | 
 3 | import org.apache.commons.lang3.RandomUtils;
 4 | import org.apache.flink.api.common.RuntimeExecutionMode;
 5 | import org.apache.flink.api.common.functions.AggregateFunction;
 6 | import org.apache.flink.api.common.functions.MapFunction;
 7 | import org.apache.flink.api.common.functions.RichMapFunction;
 8 | import org.apache.flink.api.common.functions.RuntimeContext;
 9 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
10 | import org.apache.flink.api.common.state.*;
11 | import org.apache.flink.configuration.Configuration;
12 | import org.apache.flink.runtime.state.FunctionInitializationContext;
13 | import org.apache.flink.runtime.state.FunctionSnapshotContext;
14 | import org.apache.flink.streaming.api.CheckpointingMode;
15 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
16 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
17 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
18 | 
19 | /**
20 |  * @Author: deep as the sea
21 |  * @Site: <a href="www.51doit.com">多易教育</a>
22 |  * @QQ: 657270652
23 |  * @Date: 2022/5/5
24 |  * @Desc: 键控状态使用演示
25 |  **/
26 | public class _24_State_KeyedState_Demo {
27 | 
28 |     public static void main(String[] args) throws Exception {
29 | 
30 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         env.setParallelism(1);
32 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
33 | 
34 |         // 开启状态数据的checkpoint机制（快照的周期，快照的模式）
35 |         env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
36 | 
37 |         // 开启快照后，就需要指定快照数据的持久化存储位置
38 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint/");
39 | 
40 | 
41 |         // 开启  task级别故障自动 failover
42 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,1000));
43 | 
44 | 
45 |         DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
46 | 
47 |         // 需要使用map算子来达到一个效果：
48 |         // 没来一条数据（字符串），输出 该条字符串拼接此前到达过的所有字符串
49 |         source
50 |                 .keyBy(s->"0")
51 |                 .map(new RichMapFunction<String, String>() {
52 | 
53 |                     ListState<String> lstState;
54 |                     @Override
55 |                     public void open(Configuration parameters) throws Exception {
56 |                         RuntimeContext runtimeContext = getRuntimeContext();
57 |                         // 获取一个List结构的状态存储器
58 |                         lstState = runtimeContext.getListState(new ListStateDescriptor<String>("lst", String.class));
59 | 
60 |                         // 获取一个 单值 结构的状态存储器
61 |                         // TODO 自己去点一点  ValueState 的 各种操作方法
62 | 
63 |                         // 获取一个 Map 结构的状态存储器
64 |                         MapState<String, String> mapState = runtimeContext.getMapState(new MapStateDescriptor<String, String>("xx", String.class, String.class));
65 |                         // TODO 自己去点一点  MapState的各种操作方法
66 |                     }
67 | 
68 |                     @Override
69 |                     public String map(String value) throws Exception {
70 | 
71 |                         // 将本条数据，装入状态存储器
72 |                         lstState.add(value);
73 | 
74 |                         // 遍历所有的历史字符串，拼接结果
75 |                         StringBuilder sb = new StringBuilder();
76 |                         for (String s : lstState.get()) {
77 |                             sb.append(s);
78 |                         }
79 | 
80 |                         return sb.toString();
81 |                     }
82 |                 }).setParallelism(2)
83 |                 .print().setParallelism(2);
84 | 
85 |         // 提交一个job
86 |         env.execute();
87 | 
88 |     }
89 | 
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/java/demos/_27_ToleranceConfig_Demo.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flink.java.demos;
  2 | 
  3 | 
  4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
  5 | import org.apache.flink.api.common.time.Time;
  6 | import org.apache.flink.configuration.Configuration;
  7 | import org.apache.flink.streaming.api.CheckpointingMode;
  8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
  9 | import org.apache.flink.streaming.api.environment.CheckpointConfig;
 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 11 | 
 12 | import java.time.Duration;
 13 | /**
 14 |  * @Author: deep as the sea
 15 |  * @Site: <a href="www.51doit.com">多易教育</a>
 16 |  * @QQ: 657270652
 17 |  * @Date: 2022/5/8
 18 |  * @Desc: flink容错机制相关参数配置示例
 19 |  *    checkpoint 相关配置
 20 |  *    restartStrategy 相关配置
 21 |  **/
 22 | public class _27_ToleranceConfig_Demo {
 23 | 
 24 |     public static void main(String[] args) throws Exception {
 25 | 
 26 |         /**
 27 |          * 在idea中做测试时，指定从某个保存点来恢复状态
 28 |          */
 29 |         Configuration conf = new Configuration();
 30 |         //conf.setString("execution.savepoint.path", "file:///D:/checkpoint/7ecbd4f9106957c42109bcde/chk-544");
 31 | 
 32 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
 33 | 
 34 |         /* *
 35 |          * checkpoint相关配置
 36 |          */
 37 |         env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE);  // 传入两个最基本ck参数：间隔时长，ck模式
 38 |         CheckpointConfig checkpointConfig = env.getCheckpointConfig();
 39 |         checkpointConfig.setCheckpointStorage("hdfs://doit01:8020/ckpt");
 40 |         checkpointConfig.setAlignedCheckpointTimeout(Duration.ofMinutes(10000)); // 设置ck对齐的超时时长
 41 |         checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);  // 设置ck算法模式
 42 |         checkpointConfig.setCheckpointInterval(2000); // ck的间隔时长
 43 |         //checkpointConfig.setCheckpointIdOfIgnoredInFlightData(5); // 用于非对齐算法模式下，在job恢复时让各个算子自动抛弃掉ck-5中飞行数据
 44 |         checkpointConfig.setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);  // job cancel调时，保留最后一次ck数据
 45 |         checkpointConfig.setForceUnalignedCheckpoints(false);  // 是否强制使用  非对齐的checkpoint模式
 46 |         checkpointConfig.setMaxConcurrentCheckpoints(5); // 允许在系统中同时存在的飞行中（未完成的）的ck数
 47 |         checkpointConfig.setMinPauseBetweenCheckpoints(2000); //  设置两次ck之间的最小时间间隔，用于防止checkpoint过多地占用算子的处理时间
 48 |         checkpointConfig.setCheckpointTimeout(3000); // 一个算子在一次checkpoint执行过程中的总耗费时长超时上限
 49 |         checkpointConfig.setTolerableCheckpointFailureNumber(10); // 允许的checkpoint失败最大次数
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 |         /* *
 56 |          * task失败自动重启策略配置
 57 |          */
 58 |         RestartStrategies.RestartStrategyConfiguration restartStrategy = null;
 59 | 
 60 |         // 固定、延迟重启（参数 1： 故障重启最大次数；参数2： 两次重启之间的延迟间隔）
 61 |         restartStrategy = RestartStrategies.fixedDelayRestart(5, 2000);
 62 | 
 63 |         // 默认的故障重启策略：不重启（只要有task失败，整个job就失败）
 64 |         restartStrategy = RestartStrategies.noRestart();
 65 | 
 66 | 
 67 |         /* *
 68 |          *  本策略：故障越频繁，两次重启间的惩罚间隔就越长
 69 |          *
 70 |          *  initialBackoff 重启间隔惩罚时长的初始值 ： 1s
 71 |          *  maxBackoff 重启间隔最大惩罚时长 : 60s
 72 |          *  backoffMultiplier 重启间隔时长的惩罚倍数: 2（ 每多故障一次，重启延迟惩罚就在 上一次的惩罚时长上 * 倍数）
 73 |          *  resetBackoffThreshold 重置惩罚时长的平稳运行时长阈值（平稳运行达到这个阈值后，如果再故障，则故障重启延迟时间重置为了初始值：1s）
 74 |          *  jitterFactor 取一个随机数来加在重启时间点上，让每次重启的时间点呈现一定随机性
 75 |          *     job1: 9.51   9.53+2*0.1    9.57   ......
 76 |          *     job2: 9.51   9.53+2*0.15   9.57   ......
 77 |          *     job3: 9.51   9.53+2*0.8    9.57   ......
 78 |          */
 79 |         restartStrategy = RestartStrategies.exponentialDelayRestart(Time.seconds(1),Time.seconds(60),2.0,Time.hours(1),1.0);
 80 | 
 81 |         /* *
 82 |          *  failureRate : 在指定时长内的最大失败次数
 83 |          *  failureInterval 指定的衡量时长
 84 |          *  delayInterval 两次重启之间的时间间隔
 85 |          */
 86 |         restartStrategy = RestartStrategies.failureRateRestart(5,Time.hours(1),Time.seconds(5));
 87 | 
 88 |         /* *
 89 |          *  本策略就是退回到配置文件所配置的策略
 90 |          *  常用于自定义 RestartStrategy
 91 |          *  用户自定义了重启策略类，常常配置在 flink-conf.yaml 文件中
 92 |          */
 93 |         restartStrategy = RestartStrategies.fallBackRestart();
 94 | 
 95 | 
 96 |         // 设置指定的重启策略
 97 |         env.setRestartStrategy(restartStrategy);
 98 | 
 99 | 
100 |         // 数据处理
101 |         DataStreamSource<String> source = env.socketTextStream("localhost", 9999);
102 | 
103 | 
104 |         // 提交执行
105 |         env.execute();
106 |     }
107 | 
108 | 
109 | }
110 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/scala/demos/_01_入门程序WordCount.scala:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.scala.demos
 2 | 
 3 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, createTypeInformation}
 4 | 
 5 | object _01_入门程序WordCount {
 6 |   def main(args: Array[String]): Unit = {
 7 | 
 8 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 9 | 
10 |     val sourceStream = env.socketTextStream("doit01", 9999)
11 | 
12 |     // sourceStream.flatMap(s=>s.split("\\s+")).map(w=>(w,1))
13 | 
14 |     sourceStream
15 |       .flatMap(s => {
16 |         s.split("\\s+").map(w => (w, 1))
17 |       })
18 |       .keyBy(tp => tp._1)
19 |       .sum(1)
20 |       .print("我爱你")
21 | 
22 |     env.execute("我的job"); // 提交job
23 | 
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Mapper1.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.task;
 2 | 
 3 | public class Mapper1 {
 4 | 
 5 |     public String map(String s){
 6 |         return s.toUpperCase();
 7 |     }
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Mapper2.java:
--------------------------------------------------------------------------------
1 | package cn.doitedu.flink.task;
2 | 
3 | public class Mapper2 {
4 |     public String map(String s){
5 |         return s+".txt";
6 |     }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task1.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.task;
 2 | 
 3 | public class Task1 implements Runnable{
 4 | 
 5 |     @Override
 6 |     public void run() {
 7 | 
 8 |         // 从上游接收数据
 9 |         //String data = receive();
10 | 
11 |         Mapper1 mapper1 = new Mapper1();
12 |        //String res = mapper1.map(data);
13 | 
14 |         // 把结果发往下游
15 |         // channel.send(res);
16 |     }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.task;
 2 | 
 3 | public class Task2 implements Runnable{
 4 | 
 5 |     @Override
 6 |     public void run() {
 7 | 
 8 |         //String data = receive();
 9 | 
10 |         Mapper2 mapper2 = new Mapper2();
11 |         // String  res = mapper2.map(data);
12 | 
13 |         // send(res);
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/Task3.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.task;
 2 | 
 3 | public class Task3 implements Runnable{
 4 |     @Override
 5 |     public void run() {
 6 |         Mapper1 mapper1 = new Mapper1();
 7 |         Mapper2 mapper2 = new Mapper2();
 8 | 
 9 | 
10 |         String res1 = mapper1.map("aaaa");
11 |         String res2 = mapper2.map(res1);
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flink/task/TaskRunner.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flink.task;
 2 | 
 3 | public class TaskRunner {
 4 | 
 5 |     public static void main(String[] args) {
 6 | 
 7 |         // Task1的6个并行实例，每一个并行实例在flink中叫什么 subTask
 8 |         new Thread(new Task1()).start();
 9 |         new Thread(new Task1()).start();
10 |         new Thread(new Task1()).start();
11 |         new Thread(new Task1()).start();
12 |         new Thread(new Task1()).start();
13 |         new Thread(new Task1()).start();
14 | 
15 | 
16 |         // Task2的6个并行实例，每一个并行实例在flink中叫什么 subTask
17 |         new Thread(new Task2()).start();
18 |         new Thread(new Task2()).start();
19 |         new Thread(new Task2()).start();
20 |         new Thread(new Task2()).start();
21 |         new Thread(new Task2()).start();
22 |         new Thread(new Task2()).start();
23 | 
24 | 
25 |         // Task3的6个并行实例，每一个并行实例在flink中叫什么 subTask
26 |         new Thread(new Task3()).start();
27 |         new Thread(new Task3()).start();
28 |         new Thread(new Task3()).start();
29 |         new Thread(new Task3()).start();
30 |         new Thread(new Task3()).start();
31 |         new Thread(new Task3()).start();
32 | 
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo10_KafkaConnectorDetail.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStream;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 6 | import org.apache.flink.table.api.EnvironmentSettings;
 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 8 | import org.apache.flink.types.Row;
 9 | import org.apache.flink.util.Collector;
10 | 
11 | /**
12 |  * @Author: deep as the sea
13 |  * @Site: <a href="www.51doit.com">多易教育</a>
14 |  * @QQ: 657270652
15 |  * @Date: 2022/6/12
16 |  * @Desc: 学大数据，到多易教育
17 |  * 流  ===>  表  ，过程中如何传承  事件时间  和  watermark
18 |  **/
19 | public class Demo10_KafkaConnectorDetail {
20 | 
21 |     public static void main(String[] args) throws Exception {
22 | 
23 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         env.setParallelism(1);
25 | 
26 |         EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
27 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
28 | 
29 | 
30 |         /**
31 |          *  对应的kafka中的数据：
32 |          *      key: {"k1":100,"k2":200}
33 |          *      value: {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"}
34 |          *      headers:
35 |          *          h1 ->  vvvv
36 |          *          h2 ->  tttt
37 |          */
38 |         tenv.executeSql(
39 |                 " CREATE TABLE t_kafka_connector (                       "
40 |                         + "     guid   int,                                        "
41 |                         + "     eventId string,                                    "
42 |                         + "     eventTime bigint,                                  "
43 |                         + "     pageId    string,                                  "
44 |                         + "     k1        int,                                     "
45 |                         + "     k2        int,                                     "
46 |                         + " 	rec_ts   timestamp(3) metadata from 'timestamp' ,  "
47 |                         + " 	`offset` bigint metadata ,                         "
48 |                         + " 	headers map<string,bytes> metadata,                "
49 |                         + " 	rt as to_timestamp_ltz(eventTime,3) ,              "
50 |                         + " 	watermark for rt as rt - interval '0.001' second   "
51 |                         + " ) WITH (                                               "
52 |                         + "  'connector' = 'kafka',                                "
53 |                         + "  'topic' = 'doit30-kafka',                             "
54 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',      "
55 |                         + "  'properties.group.id' = 'testGroup',                  "
56 |                         + "  'scan.startup.mode' = 'earliest-offset',           "
57 |                         + "  'key.format'='json',                               "
58 |                         + "  'key.json.ignore-parse-errors' = 'true',           "
59 |                         + "  'key.fields'='k1;k2',                              "
60 |                         /* + "  'key.fields-prefix'='',                   "     */
61 |                         + "  'value.format'='json',                             "
62 |                         + "  'value.json.fail-on-missing-field'='false',        "
63 |                         + "  'value.fields-include' = 'EXCEPT_KEY'              "
64 |                         + " )                                                   "
65 | 
66 |         );
67 | 
68 |         tenv.executeSql("select *  from  t_kafka_connector ")/*.print()*/;
69 |         tenv.executeSql("select guid,eventId,cast(headers['h1'] as string) as h1, cast(headers['h2'] as string) as h2 from  t_kafka_connector ").print();
70 | 
71 | 
72 |         env.execute();
73 | 
74 | 
75 |     }
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | 
 4 | import lombok.AllArgsConstructor;
 5 | import lombok.Data;
 6 | import lombok.NoArgsConstructor;
 7 | import org.apache.flink.configuration.Configuration;
 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 | 
13 | 
14 | public class Demo11_UpsertKafkaConnectorTest {
15 |     public static void main(String[] args) throws Exception {
16 | 
17 |         Configuration conf = new Configuration();
18 |         /*conf.setInteger("rest.port",9091);*/
19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
21 | 
22 | 
23 |         // 1,male
24 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9999);
25 | 
26 |         SingleOutputStreamOperator<Bean1> bean1 = s1.map(s -> {
27 |             String[] arr = s.split(",");
28 |             return new Bean1(Integer.parseInt(arr[0]), arr[1]);
29 |         });
30 | 
31 |         // 流转表
32 |         tenv.createTemporaryView("bean1",bean1);
33 | 
34 |         //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print();
35 | 
36 | 
37 |         // 创建目标 kafka映射表
38 |         tenv.executeSql(
39 |                 " create table t_upsert_kafka(                 "
40 |                         + "    gender string primary key not enforced,  "
41 |                         + "    cnt bigint                               "
42 |                         + " ) with (                                    "
43 |                         + "  'connector' = 'upsert-kafka',              "
44 |                         + "  'topic' = 'doit30-upsert',                 "
45 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',  "
46 |                         + "  'key.format' = 'csv',                             "
47 |                         + "  'value.format' = 'csv'                            "
48 |                         + " )                                                  "
49 | 
50 |         );
51 |         // 查询每种性别的数据行数，并将结果插入到目标表
52 |         tenv.executeSql(
53 |                 "insert into t_upsert_kafka " +
54 |                 "select gender,count(1) as cnt from bean1 group by gender"
55 |         );
56 | 
57 |         tenv.executeSql("select  *  from t_upsert_kafka").print();
58 | 
59 | 
60 |         env.execute();
61 | 
62 |     }
63 | 
64 |     @Data
65 |     @NoArgsConstructor
66 |     @AllArgsConstructor
67 |     public static class Bean1{
68 |         public int id;
69 |         public String gender;
70 |     }
71 | 
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | 
 4 | import lombok.AllArgsConstructor;
 5 | import lombok.Data;
 6 | import lombok.NoArgsConstructor;
 7 | import org.apache.flink.configuration.Configuration;
 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 | 
13 | 
14 | public class Demo11_UpsertKafkaConnectorTest2 {
15 |     public static void main(String[] args) throws Exception {
16 | 
17 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
18 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
19 | 
20 | 
21 |         // 1,male
22 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9998);
23 |         // 1,zs
24 |         DataStreamSource<String> s2 = env.socketTextStream("doitedu", 9999);
25 | 
26 |         SingleOutputStreamOperator<Bean1> bean1 = s1.map(s -> {
27 |             String[] arr = s.split(",");
28 |             return new Bean1(Integer.parseInt(arr[0]), arr[1]);
29 |         });
30 | 
31 |         SingleOutputStreamOperator<Bean2> bean2 = s2.map(s -> {
32 |             String[] arr = s.split(",");
33 |             return new Bean2(Integer.parseInt(arr[0]), arr[1]);
34 |         });
35 | 
36 | 
37 |         // 流转表
38 |         tenv.createTemporaryView("bean1",bean1);
39 |         tenv.createTemporaryView("bean2",bean2);
40 | 
41 |         //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print();
42 | 
43 | 
44 |         // 创建目标 kafka映射表
45 |         tenv.executeSql(
46 |                 " create table t_upsert_kafka2(                 "
47 |                         + "    id int primary key not enforced,        "
48 |                         + "    gender string,                          "
49 |                         + "    name string                             "
50 |                         + " ) with (                                    "
51 |                         + "  'connector' = 'upsert-kafka',              "
52 |                         + "  'topic' = 'doit30-upsert2',                "
53 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',  "
54 |                         + "  'key.format' = 'csv',                             "
55 |                         + "  'value.format' = 'csv'                            "
56 |                         + " )                                                  "
57 | 
58 |         );
59 |         // 查询每种性别的数据行数，并将结果插入到目标表
60 |         tenv.executeSql(
61 |                 "insert into t_upsert_kafka2 " +
62 |                 "select bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id"
63 |         );
64 | 
65 |         tenv.executeSql("select  *  from t_upsert_kafka2").print();
66 | 
67 | 
68 |         env.execute();
69 | 
70 |     }
71 | 
72 |     @Data
73 |     @NoArgsConstructor
74 |     @AllArgsConstructor
75 |     public static class Bean1{
76 |         public int id;
77 |         public String gender;
78 |     }
79 | 
80 | 
81 |     @Data
82 |     @NoArgsConstructor
83 |     @AllArgsConstructor
84 |     public static class Bean2{
85 |         public int id;
86 |         public String name;
87 |     }
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest1.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 6 | import org.apache.flink.table.api.EnvironmentSettings;
 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 8 | 
 9 | public class Demo12_JdbcConnectorTest1 {
10 |     public static void main(String[] args) throws Exception {
11 | 
12 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
13 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
14 | 
15 |         EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
16 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env,environmentSettings);
17 | 
18 | 
19 |         // 建表来映射 mysql中的 flinktest.stu
20 |         tenv.executeSql(
21 |                 "create table flink_stu(\n" +
22 |                         "   id  int primary key,\n" +
23 |                         "   name string,\n" +
24 |                         "   age int,\n" +
25 |                         "   gender string\n" +
26 |                         ") with (\n" +
27 |                         "  'connector' = 'jdbc',\n" +
28 |                         "  'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
29 |                         "  'table-name' = 'stu',\n" +
30 |                         "  'username' = 'root',\n" +
31 |                         "  'password' = 'root' \n" +
32 |                         ")"
33 |         );
34 | 
35 |         DataStreamSource<String> doitedu = env.socketTextStream("doitedu", 9999);
36 | 
37 |         tenv.executeSql("select * from flink_stu").print();
38 | 
39 |         doitedu.print();
40 | 
41 | 
42 |         env.execute();
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | import org.apache.flink.api.common.RuntimeExecutionMode;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.table.api.EnvironmentSettings;
11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12 | 
13 | public class Demo12_JdbcConnectorTest2 {
14 |     public static void main(String[] args) throws Exception {
15 | 
16 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
17 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
18 | 
19 |         EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
20 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
21 | 
22 | 
23 |         // 建表来映射 mysql中的 flinktest.stu
24 |         tenv.executeSql(
25 |                 "create table flink_stu(\n" +
26 |                         "   id  int  primary key, \n" +
27 |                         "   gender string, \n" +
28 |                         "   name string  \n" +
29 |                         ") with (\n" +
30 |                         "  'connector' = 'jdbc',\n" +
31 |                         "  'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
32 |                         "  'table-name' = 'stu2',\n" +
33 |                         "  'username' = 'root',\n" +
34 |                         "  'password' = 'root' \n" +
35 |                         ")"
36 |         );
37 | 
38 | 
39 |         // 1,male
40 |         SingleOutputStreamOperator<Bean1> bean1 = env
41 |                 .socketTextStream("doitedu", 9998)
42 |                 .map(s -> {
43 |                     String[] arr = s.split(",");
44 |                     return new Bean1(Integer.parseInt(arr[0]), arr[1]);
45 |                 });
46 |         // 1,zs
47 |         SingleOutputStreamOperator<Bean2> bean2 = env.socketTextStream("doitedu", 9999).map(s -> {
48 |             String[] arr = s.split(",");
49 |             return new Bean2(Integer.parseInt(arr[0]), arr[1]);
50 |         });
51 | 
52 | 
53 |         // 流转表
54 |         tenv.createTemporaryView("bean1", bean1);
55 |         tenv.createTemporaryView("bean2", bean2);
56 | 
57 |         tenv.executeSql("insert into flink_stu " +
58 |                 "select  bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id");
59 | 
60 | 
61 |         env.execute();
62 |     }
63 | 
64 | 
65 |     @Data
66 |     @NoArgsConstructor
67 |     @AllArgsConstructor
68 |     public static class Bean1 {
69 |         public int id;
70 |         public String gender;
71 |     }
72 | 
73 | 
74 |     @Data
75 |     @NoArgsConstructor
76 |     @AllArgsConstructor
77 |     public static class Bean2 {
78 |         public int id;
79 |         public String name;
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo13_FileSystemConnectorTest.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | import org.apache.flink.api.common.RuntimeExecutionMode;
 7 | import org.apache.flink.api.common.typeinfo.TypeHint;
 8 | import org.apache.flink.api.java.tuple.Tuple4;
 9 | import org.apache.flink.streaming.api.CheckpointingMode;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.table.api.EnvironmentSettings;
13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
14 | 
15 | public class Demo13_FileSystemConnectorTest {
16 |     public static void main(String[] args) throws Exception {
17 | 
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 |         env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
20 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint");
21 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
22 | 
23 |         EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
24 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
25 | 
26 | 
27 |         // 建表 fs_table 来映射 mysql中的flinktest.stu
28 |         tenv.executeSql(
29 |                 "CREATE TABLE fs_table (\n" +
30 |                         "  user_id STRING,\n" +
31 |                         "  order_amount DOUBLE,\n" +
32 |                         "  dt STRING,\n" +
33 |                         "  `hour` STRING\n" +
34 |                         ") PARTITIONED BY (dt, `hour`) WITH (\n" +
35 |                         "  'connector'='filesystem',\n" +
36 |                         "  'path'='file:///d:/filetable/',\n" +
37 |                         "  'format'='json',\n" +
38 |                         "  'sink.partition-commit.delay'='1 h',\n" +
39 |                         "  'sink.partition-commit.policy.kind'='success-file',\n" +
40 |                         "  'sink.rolling-policy.file-size' = '8M',\n" +
41 |                         "  'sink.rolling-policy.rollover-interval'='30 min',\n" +
42 |                         "  'sink.rolling-policy.check-interval'='10 second'\n" +
43 |                         ")"
44 |         );
45 | 
46 | 
47 |         // u01,88.8,2022-06-13,14
48 |         SingleOutputStreamOperator<Tuple4<String, Double, String, String>> stream = env
49 |                 .socketTextStream("doitedu", 9999)
50 |                 .map(s -> {
51 |                     String[] split = s.split(",");
52 |                     return Tuple4.of(split[0], Double.parseDouble(split[1]), split[2], split[3]);
53 |                 }).returns(new TypeHint<Tuple4<String, Double, String, String>>() {
54 |                 });
55 | 
56 |         tenv.createTemporaryView("orders",stream);
57 | 
58 |         tenv.executeSql("insert into fs_table select * from orders");
59 | 
60 | 
61 | 
62 |         env.execute();
63 |     }
64 | 
65 | 
66 |     @Data
67 |     @NoArgsConstructor
68 |     @AllArgsConstructor
69 |     public static class Bean1 {
70 |         public int id;
71 |         public String gender;
72 |     }
73 | 
74 | 
75 |     @Data
76 |     @NoArgsConstructor
77 |     @AllArgsConstructor
78 |     public static class Bean2 {
79 |         public int id;
80 |         public String name;
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo14_MysqlCdcConnector.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flinksql.demos;
  2 | 
  3 | import org.apache.flink.streaming.api.CheckpointingMode;
  4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
  6 | 
  7 | /**
  8 |  * @Author: deep as the sea
  9 |  * @Site: <a href="www.51doit.com">多易教育</a>
 10 |  * @QQ: 657270652
 11 |  * @Date: 2022/6/13
 12 |  * @Desc: 学大数据，到多易教育
 13 |  *     mysql的cdc连接器使用测试
 14 |  **/
 15 | public class Demo14_MysqlCdcConnector {
 16 | 
 17 |     public static void main(String[] args) {
 18 | 
 19 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 20 |         env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
 21 |         env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint");
 22 | 
 23 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
 24 | 
 25 |         // 建表
 26 |         tenv.executeSql("CREATE TABLE flink_score (\n" +
 27 |                 "      id INT,\n" +
 28 |                 "      name string,\n" +
 29 |                 "      gender string,\n" +
 30 |                 "      score double,\n" +
 31 |                 "     PRIMARY KEY(id) NOT ENFORCED\n" +
 32 |                 "     ) WITH (\n" +
 33 |                 "     'connector' = 'mysql-cdc',\n" +
 34 |                 "     'hostname' = 'doitedu',\n" +
 35 |                 "     'port' = '3306',\n" +
 36 |                 "     'username' = 'root',\n" +
 37 |                 "     'password' = 'root',\n" +
 38 |                 "     'database-name' = 'flinktest',\n" +
 39 |                 "     'table-name' = 'score'\n" +
 40 |                 ")");
 41 | 
 42 |         tenv.executeSql("CREATE TABLE t1 (\n" +
 43 |                 "      id INT,\n" +
 44 |                 "      name string,\n" +
 45 |                 "      PRIMARY KEY(id) NOT ENFORCED\n" +
 46 |                 "     ) WITH (\n" +
 47 |                 "     'connector' = 'mysql-cdc',\n" +
 48 |                 "     'hostname' = 'doitedu',\n" +
 49 |                 "     'port' = '3306',\n" +
 50 |                 "     'username' = 'root',\n" +
 51 |                 "     'password' = 'root',\n" +
 52 |                 "     'database-name' = 'doitedu',\n" +
 53 |                 "     'table-name' = 't1'\n" +
 54 |                 ")");
 55 | 
 56 |         tenv.executeSql("select * from t1").print();
 57 |         System.exit(1);
 58 | 
 59 |         // 查询
 60 |         tenv.executeSql("select * from flink_score")/*.print()*/;
 61 | 
 62 | 
 63 |         tenv.executeSql("select  gender,avg(score) as avg_score  from  flink_score group by gender")/*.print()*/;
 64 | 
 65 |         // 建一个目标表，用来存放查询结果： 每种性别中，总分最高的前2个人
 66 |         tenv.executeSql(
 67 |                 "create table flink_rank(\n" +
 68 |                         "   gender  string  , \n" +
 69 |                         "   name string, \n" +
 70 |                         "   score_amt double, \n" +
 71 |                         "   rn bigint  , \n" +
 72 |                         "   primary key(gender,rn) not enforced  \n" +
 73 |                         ") with (\n" +
 74 |                         "  'connector' = 'jdbc',\n" +
 75 |                         "  'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
 76 |                         "  'table-name' = 'score_rank',\n" +
 77 |                         "  'username' = 'root',\n" +
 78 |                         "  'password' = 'root' \n" +
 79 |                         ")"
 80 |         );
 81 | 
 82 | 
 83 |         tenv.executeSql("insert into flink_rank  \n" +
 84 |                 "SELECT\n" +
 85 |                 "  gender,\n" +
 86 |                 "  name,\n" +
 87 |                 "  score_amt,\n" +
 88 |                 "  rn\n" +
 89 |                 "from(\n" +
 90 |                 "SELECT\n" +
 91 |                 "  gender,\n" +
 92 |                 "  name,\n" +
 93 |                 "  score_amt,\n" +
 94 |                 "  row_number() over(partition by gender order by score_amt desc) as rn\n" +
 95 |                 "from \n" +
 96 |                 "(\n" +
 97 |                 "SELECT\n" +
 98 |                 "gender,\n" +
 99 |                 "name,\n" +
100 |                 "sum(score) as score_amt\n" +
101 |                 "from flink_score\n" +
102 |                 "group by gender,name\n" +
103 |                 ") o1\n" +
104 |                 ") o2\n" +
105 |                 "where rn<=2");
106 |     }
107 | }
108 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_IntervalJoin.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeHint;
 4 | import org.apache.flink.api.java.tuple.Tuple3;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.DataTypes;
 9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 | 
12 | /**
13 |  * @Author: deep as the sea
14 |  * @Site: <a href="www.51doit.com">多易教育</a>
15 |  * @QQ: 657270652
16 |  * @Date: 2022/6/16
17 |  * @Desc: 学大数据，到多易教育
18 |  *   常规join示例
19 |  *    常规join的底层实现，是通过在用状态来缓存两表数据实现的
20 |  *    所以，状态体积可能持续膨胀，为了安全起见，可以设置状态的 ttl 时长，来控制状态的体积上限
21 |  *
22 |  **/
23 | public class Demo18_IntervalJoin {
24 |     public static void main(String[] args) {
25 | 
26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 |         env.setParallelism(1);
28 | 
29 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
30 |         // 设置table环境中的状态ttl时长
31 |         tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L);
32 | 
33 | 
34 | 
35 |         /**
36 |          * 1,a,1000
37 |          * 2,b,2000
38 |          * 3,c,2500
39 |          * 4,d,3000
40 |          * 5,e,12000
41 |          */
42 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9998);
43 |         SingleOutputStreamOperator<Tuple3<String, String,Long>> ss1 = s1.map(s -> {
44 |             String[] arr = s.split(",");
45 |             return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
46 |         }).returns(new TypeHint<Tuple3<String, String,Long>>() {
47 |         });
48 | 
49 |         /**
50 |          * 1,bj,1000
51 |          * 2,sh,2000
52 |          * 4,xa,2600
53 |          * 5,yn,12000
54 |          */
55 |         DataStreamSource<String> s2 = env.socketTextStream("doitedu", 9999);
56 |         SingleOutputStreamOperator<Tuple3<String, String,Long>> ss2 = s2.map(s -> {
57 |             String[] arr = s.split(",");
58 |             return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
59 |         }).returns(new TypeHint<Tuple3<String, String,Long>>() {
60 |         });
61 | 
62 | 
63 |         // 创建两个表
64 |         tenv.createTemporaryView("t_left",ss1, Schema.newBuilder()
65 |                 .column("f0", DataTypes.STRING())
66 |                 .column("f1", DataTypes.STRING())
67 |                 .column("f2", DataTypes.BIGINT())
68 |                 .columnByExpression("rt","to_timestamp_ltz(f2,3)")
69 |                 .watermark("rt","rt - interval '0' second")
70 |                 .build());
71 | 
72 |         tenv.createTemporaryView("t_right",ss2, Schema.newBuilder()
73 |                 .column("f0", DataTypes.STRING())
74 |                 .column("f1", DataTypes.STRING())
75 |                 .column("f2", DataTypes.BIGINT())
76 |                 .columnByExpression("rt","to_timestamp_ltz(f2,3)")
77 |                 .watermark("rt","rt - interval '0' second")
78 |                 .build());
79 | 
80 | 
81 | 
82 |         // interval  join
83 |         tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1   from t_left  a  join t_right b " +
84 |                 "on a.f0=b.f0  " +
85 |                 "and a.rt between b.rt  - interval '2' second and b.rt").print();
86 | 
87 | 
88 | 
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_RegularJoin.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeHint;
 4 | import org.apache.flink.api.java.tuple.Tuple3;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.DataTypes;
 9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 | 
12 | /**
13 |  * @Author: deep as the sea
14 |  * @Site: <a href="www.51doit.com">多易教育</a>
15 |  * @QQ: 657270652
16 |  * @Date: 2022/6/16
17 |  * @Desc: 学大数据，到多易教育
18 |  *   常规join示例
19 |  *    常规join的底层实现，是通过在用状态来缓存两表数据实现的
20 |  *    所以，状态体积可能持续膨胀，为了安全起见，可以设置状态的 ttl 时长，来控制状态的体积上限
21 |  *
22 |  **/
23 | public class Demo18_RegularJoin {
24 |     public static void main(String[] args) {
25 | 
26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
27 |         env.setParallelism(1);
28 | 
29 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
30 |         // 设置table环境中的状态ttl时长
31 |         tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L);
32 | 
33 | 
34 | 
35 |         /**
36 |          * 1,a,1000
37 |          * 2,b,2000
38 |          * 3,c,2500
39 |          * 4,d,3000
40 |          * 5,e,12000
41 |          */
42 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9998);
43 |         SingleOutputStreamOperator<Tuple3<String, String,Long>> ss1 = s1.map(s -> {
44 |             String[] arr = s.split(",");
45 |             return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
46 |         }).returns(new TypeHint<Tuple3<String, String,Long>>() {
47 |         });
48 | 
49 |         /**
50 |          * 1,bj,1000
51 |          * 2,sh,2000
52 |          * 4,xa,2600
53 |          * 5,yn,12000
54 |          */
55 |         DataStreamSource<String> s2 = env.socketTextStream("doitedu", 9999);
56 |         SingleOutputStreamOperator<Tuple3<String, String,Long>> ss2 = s2.map(s -> {
57 |             String[] arr = s.split(",");
58 |             return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2]));
59 |         }).returns(new TypeHint<Tuple3<String, String,Long>>() {
60 |         });
61 | 
62 | 
63 |         // 创建两个表
64 |         tenv.createTemporaryView("t_left",ss1, Schema.newBuilder()
65 |                 .column("f0", DataTypes.STRING())
66 |                 .column("f1", DataTypes.STRING())
67 |                 .column("f2", DataTypes.BIGINT())
68 |                 .columnByExpression("rt","to_timestamp_ltz(f2,3)")
69 |                 .watermark("rt","rt - interval '0' second")
70 |                 .build());
71 | 
72 |         tenv.createTemporaryView("t_right",ss2, Schema.newBuilder()
73 |                 .column("f0", DataTypes.STRING())
74 |                 .column("f1", DataTypes.STRING())
75 |                 .column("f2", DataTypes.BIGINT())
76 |                 .columnByExpression("rt","to_timestamp_ltz(f2,3)")
77 |                 .watermark("rt","rt - interval '0' second")
78 |                 .build());
79 | 
80 | 
81 |         //  left  join
82 |         tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1  from t_left  a  left join t_right b on a.f0=b.f0")/*.print()*/;
83 | 
84 |         // inner join
85 |         tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1  from t_left  a  join t_right b on a.f0=b.f0").print();
86 | 
87 | 
88 | 
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_ArrayJoin.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.annotation.DataTypeHint;
 4 | import org.apache.flink.table.annotation.FunctionHint;
 5 | import org.apache.flink.table.api.*;
 6 | import org.apache.flink.table.functions.TableFunction;
 7 | import org.apache.flink.types.Row;
 8 | 
 9 | import static org.apache.flink.table.api.Expressions.array;
10 | import static org.apache.flink.table.api.Expressions.row;
11 | 
12 | public class Demo19_ArrayJoin {
13 |     public static void main(String[] args) {
14 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 | 
16 |         Table table = tenv.fromValues(DataTypes.ROW(
17 |                         DataTypes.FIELD("id", DataTypes.INT()),
18 |                         DataTypes.FIELD("name", DataTypes.STRING()),
19 |                         DataTypes.FIELD("tags", DataTypes.ARRAY(DataTypes.STRING())))
20 |                 , row("1", "zs", array("stu", "child"))
21 |                 , row("2", "bb", array("miss"))
22 |         );
23 | 
24 |         tenv.createTemporaryView("t",table);
25 | 
26 | 
27 |         tenv.executeSql("select t.id,t.name,x.tag from t cross join unnest(tags) as x(tag)")/*.print()*/;
28 | 
29 | 
30 |         tenv.createTemporarySystemFunction("mysplit",MySplit.class);
31 |         tenv.executeSql("select t.id,t.name,tag from t, lateral table(mysplit(tags)) ")/*.print()*/;
32 |         tenv.executeSql("select t.id,t.name,x.tag2 from t, lateral table(mysplit(tags)) x(tag2)")/*.print()*/;
33 |         tenv.executeSql("select t.id,t.name,tag from t left join lateral table(mysplit(tags)) on true")/*.print()*/;
34 |         tenv.executeSql("select t.id,t.name,x.tag2 from t left join lateral table(mysplit(tags)) x(tag2) on true").print();
35 |     }
36 | 
37 |     @FunctionHint(output = @DataTypeHint("ROW<tag string>"))
38 |     public static class MySplit extends TableFunction<Row> {
39 | 
40 |         public void eval(String[] arr){
41 |             for (String s : arr) {
42 |                 collect(Row.of(s));
43 |             }
44 |         }
45 | 
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_LookupJoin.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.typeinfo.TypeHint;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.table.api.DataTypes;
 9 | import org.apache.flink.table.api.Schema;
10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
11 | 
12 | /**
13 |  * @Author: deep as the sea
14 |  * @Site: <a href="www.51doit.com">多易教育</a>
15 |  * @QQ: 657270652
16 |  * @Date: 2022/6/16
17 |  * @Desc: 学大数据，到多易教育
18 |  * 常规join示例
19 |  * 常规join的底层实现，是通过在用状态来缓存两表数据实现的
20 |  * 所以，状态体积可能持续膨胀，为了安全起见，可以设置状态的 ttl 时长，来控制状态的体积上限
21 |  **/
22 | public class Demo19_LookupJoin {
23 |     public static void main(String[] args) throws Exception {
24 | 
25 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
26 |         env.setParallelism(1);
27 | 
28 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
29 |         // 设置table环境中的状态ttl时长
30 |         tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl", 60 * 60 * 1000L);
31 | 
32 | 
33 |         /**
34 |          * 1,a
35 |          * 2,b
36 |          * 3,c
37 |          * 4,d
38 |          * 5,e
39 |          */
40 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9998);
41 |         SingleOutputStreamOperator<Tuple2<Integer, String>> ss1 = s1.map(s -> {
42 |             String[] arr = s.split(",");
43 |             return Tuple2.of(Integer.parseInt(arr[0]), arr[1]);
44 |         }).returns(new TypeHint<Tuple2<Integer, String>>() {
45 |         });
46 | 
47 | 
48 |         // 创建主表（需要声明处理时间属性字段）
49 |         tenv.createTemporaryView("a", ss1, Schema.newBuilder()
50 |                 .column("f0", DataTypes.INT())
51 |                 .column("f1", DataTypes.STRING())
52 |                 .columnByExpression("pt", "proctime()")  // 定义处理时间属性字段
53 |                 .build());
54 | 
55 |         // 创建lookup维表（jdbc connector表）
56 |         tenv.executeSql(
57 |                 "create table b(   \n" +
58 |                         "   id  int  , \n" +
59 |                         "   name string, \n" +
60 |                         "   gender STRING, \n" +
61 |                         "   primary key(id) not enforced  \n" +
62 |                         ") with (\n" +
63 |                         "  'connector' = 'jdbc',\n" +
64 |                         "  'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" +
65 |                         "  'table-name' = 'stu2',\n" +
66 |                         "  'username' = 'root',\n" +
67 |                         "  'password' = 'root' \n" +
68 |                         ")"
69 |         );
70 | 
71 |         // lookup join 查询
72 |         tenv.executeSql("select a.*,c.*   from  a  JOIN  b FOR SYSTEM_TIME AS OF a.pt AS c  \n" +
73 |                 "    ON a.f0 = c.id").print();
74 | 
75 | 
76 |         env.execute();
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo1_TableSql.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.api.EnvironmentSettings;
 4 | import org.apache.flink.table.api.Table;
 5 | import org.apache.flink.table.api.TableEnvironment;
 6 | import org.apache.flink.table.catalog.CatalogDatabaseImpl;
 7 | import org.apache.flink.table.catalog.ConnectorCatalogTable;
 8 | import org.apache.flink.table.catalog.ObjectPath;
 9 | import org.apache.flink.table.catalog.exceptions.DatabaseAlreadyExistException;
10 | import org.apache.flink.table.catalog.hive.HiveCatalog;
11 | 
12 | import java.util.HashMap;
13 | 
14 | import static org.apache.flink.table.api.Expressions.$;
15 | 
16 | public class Demo1_TableSql {
17 | 
18 |     public static void main(String[] args) throws DatabaseAlreadyExistException {
19 | 
20 |         EnvironmentSettings envSettings = EnvironmentSettings.inStreamingMode();  // 流计算模式
21 |         TableEnvironment tableEnv = TableEnvironment.create(envSettings);
22 | 
23 |         // 把kafka中的一个topic： doit30-2 数据，映射成一张flinkSql表
24 |         // json :  {"id":1,"name":"zs","age":28,"gender":"male"}
25 |         // create table_x (id int,name string,age int,gender string)
26 |         tableEnv.executeSql(
27 |                 "create table t_kafka                                  "
28 |                         + " (                                                   "
29 |                         + "   id int,                                           "
30 |                         + "   name string,                                      "
31 |                         + "   age int,                                          "
32 |                         + "   gender string                                     "
33 |                         + " )                                                   "
34 |                         + " WITH (                                              "
35 |                         + "  'connector' = 'kafka',                             "
36 |                         + "  'topic' = 'doit30-3',                              "
37 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',   "
38 |                         + "  'properties.group.id' = 'g1',                      "
39 |                         + "  'scan.startup.mode' = 'earliest-offset',           "
40 |                         + "  'format' = 'json',                                 "
41 |                         + "  'json.fail-on-missing-field' = 'false',            "
42 |                         + "  'json.ignore-parse-errors' = 'true'                "
43 |                         + " )                                                   "
44 |         );
45 | 
46 | 
47 |         /**
48 |          * 把sql表名， 转成 table对象
49 |          */
50 |         Table table = tableEnv.from("t_kafka");
51 |         // 利用table api进行查询计算
52 |         table.groupBy($("gender"))
53 |                 .select($("gender"), $("age").avg())
54 |                 .execute()
55 |                 .print();
56 | 
57 | 
58 |         tableEnv.executeSql("select gender,avg(age) as avg_age  from  t_kafka group by gender").print();
59 | 
60 | 
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo20_Temporal_Join.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flinksql.demos;
  2 | 
  3 | import lombok.AllArgsConstructor;
  4 | import lombok.Data;
  5 | import lombok.NoArgsConstructor;
  6 | import org.apache.flink.api.common.typeinfo.TypeHint;
  7 | import org.apache.flink.api.java.tuple.Tuple2;
  8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
  9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 11 | import org.apache.flink.table.api.DataTypes;
 12 | import org.apache.flink.table.api.Schema;
 13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 14 | 
 15 | /**
 16 |  * @Author: deep as the sea
 17 |  * @Site: <a href="www.51doit.com">多易教育</a>
 18 |  * @QQ: 657270652
 19 |  * @Date: 2022/6/16
 20 |  * @Desc: 学大数据，到多易教育
 21 |  * 时态join代码示例
 22 |  **/
 23 | public class Demo20_Temporal_Join {
 24 |     public static void main(String[] args) throws Exception {
 25 | 
 26 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 27 |         env.setParallelism(1);
 28 | 
 29 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
 30 | 
 31 | 
 32 |         /**
 33 |          * 订单Id，币种，金额，订单时间
 34 |          * 1,a,100,167438436400
 35 |          */
 36 |         DataStreamSource<String> s1 = env.socketTextStream("doitedu", 9998);
 37 | 
 38 |         SingleOutputStreamOperator<Order> ss1 = s1.map(s -> {
 39 |             String[] arr = s.split(",");
 40 |             return new Order(Integer.parseInt(arr[0]), arr[1], Double.parseDouble(arr[2]), Long.parseLong(arr[3]));
 41 |         });
 42 | 
 43 | 
 44 |         // 创建主表（需要声明处理时间属性字段）
 45 |         tenv.createTemporaryView("orders", ss1, Schema.newBuilder()
 46 |                 .column("orderId", DataTypes.INT())
 47 |                 .column("currency", DataTypes.STRING())
 48 |                 .column("price", DataTypes.DOUBLE())
 49 |                 .column("orderTime", DataTypes.BIGINT())
 50 |                 .columnByExpression("rt", "to_timestamp_ltz(orderTime,3)")  // 定义处理时间属性字段
 51 |                 .watermark("rt","rt")
 52 |                 .build());
 53 | 
 54 | 
 55 |         //tenv.executeSql("select orderId,currency,price,orderTime,rt from orders").print();
 56 | 
 57 |         // 创建 temporal 表
 58 |         tenv.executeSql("CREATE TABLE currency_rate (\n" +
 59 |                 "      currency STRING, \n" +
 60 |                 "      rate double ,  \n" +
 61 |                 "      update_time bigint ,  \n" +
 62 |                 "      rt  as to_timestamp_ltz(update_time,3)   ," +
 63 |                 "      watermark for rt as rt - interval '0' second ," +
 64 |                 "      PRIMARY KEY(currency) NOT ENFORCED\n" +
 65 |                 "     ) WITH (  \n" +
 66 |                 "     'connector' = 'mysql-cdc',\n" +
 67 |                 "     'hostname' = 'doitedu',\n" +
 68 |                 "     'port' = '3306',\n" +
 69 |                 "     'username' = 'root',\n" +
 70 |                 "     'password' = 'root',\n" +
 71 |                 "     'database-name' = 'flinktest',\n" +
 72 |                 "     'table-name' = 'currency_rate'\n" +
 73 |                 ")");
 74 | 
 75 |         //tenv.executeSql("select * from currency_rate").print();
 76 | 
 77 | 
 78 | 
 79 | 
 80 |         // temporal 关联查询
 81 |         tenv.executeSql(
 82 |                 "SELECT                  \n" +
 83 |                         "     orders.orderId,      \n" +
 84 |                         "     orders.currency,     \n" +
 85 |                         "     orders.price,        \n" +
 86 |                         "     orders.orderTime,    \n" +
 87 |                         "     rate  \n" +
 88 |                         "FROM orders   \n" +
 89 |                         "LEFT JOIN currency_rate FOR SYSTEM_TIME AS OF orders.rt  \n" +
 90 |                         "ON orders.currency = currency_rate.currency"
 91 |         ).print();
 92 | 
 93 | 
 94 |         env.execute();
 95 |     }
 96 | 
 97 | 
 98 |     @Data
 99 |     @NoArgsConstructor
100 |     @AllArgsConstructor
101 |     public static class Order {
102 |         // 订单Id，币种，金额，订单时间
103 |         public int orderId;
104 |         public String currency;
105 |         public double price;
106 |         public long orderTime;
107 | 
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo21_CustomScalarFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.api.DataTypes;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.Table;
 6 | import org.apache.flink.table.api.TableEnvironment;
 7 | import org.apache.flink.table.functions.ScalarFunction;
 8 | import org.apache.flink.types.Row;
 9 | 
10 | 
11 | public class Demo21_CustomScalarFunction {
12 |     public static void main(String[] args) {
13 | 
14 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 | 
16 |         Table table = tenv.fromValues(
17 |                 DataTypes.ROW(
18 |                         DataTypes.FIELD("name", DataTypes.STRING())),
19 |                 Row.of("aaa"),
20 |                 Row.of("bbb"),
21 |                 Row.of("ccc")
22 |         );
23 | 
24 |         tenv.createTemporaryView("t",table);
25 | 
26 |         // 注册自定义的函数
27 |         tenv.createTemporarySystemFunction("myupper",MyUpper.class);
28 | 
29 |         // 注册后，就能在sql中使用了
30 |         tenv.executeSql("select myupper(name) from t").print();
31 | 
32 |     }
33 | 
34 | 
35 |     public static class MyUpper extends ScalarFunction{
36 | 
37 |         public String eval(String str){
38 |             return str.toUpperCase();
39 |         }
40 |     }
41 | 
42 | 
43 | 
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo22_CustomAggregateFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.api.DataTypes;
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.Table;
 6 | import org.apache.flink.table.api.TableEnvironment;
 7 | import org.apache.flink.table.functions.AggregateFunction;
 8 | import org.apache.flink.types.Row;
 9 | 
10 | /**
11 |  * @Author: deep as the sea
12 |  * @Site: <a href="www.51doit.com">多易教育</a>
13 |  * @QQ: 657270652
14 |  * @Date: 2022/6/16
15 |  * @Desc: 学大数据，到多易教育
16 |  *   自定义聚合函数
17 |  **/
18 | public class Demo22_CustomAggregateFunction {
19 |     public static void main(String[] args) {
20 | 
21 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inBatchMode());
22 | 
23 |         Table table = tenv.fromValues(
24 |                 DataTypes.ROW(
25 |                         DataTypes.FIELD("uid", DataTypes.INT()),
26 |                         DataTypes.FIELD("gender", DataTypes.STRING()),
27 |                         DataTypes.FIELD("score", DataTypes.DOUBLE())
28 |                 ),
29 |                 Row.of(1,"male",80),
30 |                 Row.of(2,"male",100),
31 |                 Row.of(3,"female",90)
32 |         );
33 | 
34 |         tenv.createTemporaryView("t",table);
35 | 
36 |         // 注册自定义的函数
37 |        tenv.createTemporarySystemFunction("myavg",MyAvg.class);
38 | 
39 |         // 注册后，就能在sql中使用了
40 |         tenv.executeSql("select gender,myavg(score) as avg_score  from t group by gender ").print();
41 | 
42 |     }
43 | 
44 | 
45 |     public static class MyAccumulator{
46 |         public int count;
47 |         public double sum;
48 |     }
49 | 
50 |     public static class MyAvg extends AggregateFunction<Double,MyAccumulator> {
51 | 
52 |         /**
53 |          * 获取累加器的值
54 |          * @param accumulator the accumulator which contains the current intermediate results
55 |          * @return
56 |          */
57 |         @Override
58 |         public Double getValue(MyAccumulator accumulator) {
59 |             return accumulator.sum/ accumulator.count;
60 |         }
61 | 
62 |         /**
63 |          *  创建累加器
64 |          * @return
65 |          */
66 |         @Override
67 |         public MyAccumulator createAccumulator() {
68 |             MyAccumulator myAccumulator = new MyAccumulator();
69 |             myAccumulator.count = 0;
70 |             myAccumulator.sum = 0;
71 | 
72 | 
73 |             return myAccumulator;
74 |         }
75 | 
76 | 
77 |         /**
78 |          * 进来输入数据后，如何更新累加器
79 |          * @param accumulator
80 |          * @param score
81 |          */
82 |         public void accumulate(MyAccumulator accumulator,Double score){
83 | 
84 |             accumulator.count = accumulator.count + 1;
85 |             accumulator.sum = accumulator.sum + score;
86 | 
87 |         }
88 | 
89 | 
90 |     }
91 | 
92 | }
93 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo23_TableFunction.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.annotation.DataTypeHint;
 4 | import org.apache.flink.table.annotation.FunctionHint;
 5 | import org.apache.flink.table.api.*;
 6 | import org.apache.flink.table.functions.TableFunction;
 7 | import org.apache.flink.types.Row;
 8 | 
 9 | 
10 | public class Demo23_TableFunction {
11 | 
12 |     public static void main(String[] args) {
13 | 
14 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
15 | 
16 | /*
17 |         Table table = tenv.fromValues(DataTypes.ROW(
18 |                         DataTypes.FIELD("id", DataTypes.INT()),
19 |                         DataTypes.FIELD("name", DataTypes.STRING()),
20 |                         DataTypes.FIELD("phone_numbers", DataTypes.ARRAY(DataTypes.STRING()))),
21 |                 Row.of(1, "zs", Expressions.array("138","139","135")),
22 |                 Row.of(2, "bb", Expressions.array("135","136"))
23 |         );
24 | 
25 |         tenv.createTemporaryView("t",table);
26 |         tenv.executeSql("select t.id,t.name,t2.phone_number  from t cross join unnest(phone_numbers) as t2(phone_number)").print();
27 | */
28 | 
29 |         Table table = tenv.fromValues(DataTypes.ROW(
30 |                         DataTypes.FIELD("id", DataTypes.INT()),
31 |                         DataTypes.FIELD("name", DataTypes.STRING()),
32 |                         DataTypes.FIELD("phone_numbers", DataTypes.STRING())),
33 |                 Row.of(1, "zs", "13888,137,1354455"),
34 |                 Row.of(2, "bb",  "1366688,1374,132224455")
35 |         );
36 |         tenv.createTemporaryView("t",table);
37 | 
38 | 
39 |         // 注册函数
40 |         tenv.createTemporarySystemFunction("mysplit",MySplit.class);
41 | 
42 |         // 展开手机号字符串
43 |         tenv.executeSql("select  *  from  t , lateral  table(mysplit(phone_numbers,',')) as t1(p,l) ")/*.print()*/;
44 |         tenv.executeSql("select  *  from  t  left join lateral  table(mysplit(phone_numbers,',')) as t1(p,l) on true ").print();
45 | 
46 | 
47 | 
48 |     }
49 | 
50 |     @FunctionHint(output = @DataTypeHint("ROW<phone STRING, length INT>"))
51 |     public static class MySplit extends TableFunction<Row>{
52 | 
53 |         public void eval(String str,String delimiter){
54 |             for (String s : str.split(delimiter)) {
55 |                 collect(Row.of(s,s.length()));
56 |             }
57 |         }
58 |     }
59 | 
60 | }
61 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo24_TableAggregateFunction.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.flinksql.demos;
  2 | 
  3 | import org.apache.flink.api.java.tuple.Tuple2;
  4 | import org.apache.flink.table.annotation.DataTypeHint;
  5 | import org.apache.flink.table.annotation.FunctionHint;
  6 | import org.apache.flink.table.api.DataTypes;
  7 | import org.apache.flink.table.api.EnvironmentSettings;
  8 | import org.apache.flink.table.api.Table;
  9 | import org.apache.flink.table.api.TableEnvironment;
 10 | import org.apache.flink.table.functions.TableAggregateFunction;
 11 | import org.apache.flink.types.Row;
 12 | import org.apache.flink.util.Collector;
 13 | 
 14 | import static org.apache.flink.table.api.Expressions.$;
 15 | import static org.apache.flink.table.api.Expressions.call;
 16 | 
 17 | /**
 18 |  * @Author: deep as the sea
 19 |  * @Site: <a href="www.51doit.com">多易教育</a>
 20 |  * @QQ: 657270652
 21 |  * @Date: 2022/6/17
 22 |  * @Desc: 学大数据，到多易教育
 23 |  * 自定义表聚合函数示例
 24 |  * 什么叫做表聚合函数：
 25 |  * 1,male,zs,88
 26 |  * 2,male,bb,99
 27 |  * 3,male,cc,76
 28 |  * 4,female,dd,78
 29 |  * 5,female,ee,92
 30 |  * 6,female,ff,86
 31 |  * <p>
 32 |  * -- 求每种性别中，分数最高的两个成绩
 33 |  * -- 常规写法
 34 |  * SELECT
 35 |  * *
 36 |  * FROM
 37 |  * (
 38 |  * SELECT
 39 |  * gender,
 40 |  * score,
 41 |  * row_number() over(partition by gender order by score desc) as rn
 42 |  * FROM  t
 43 |  * )
 44 |  * where rn<=2
 45 |  * <p>
 46 |  * <p>
 47 |  * -- 如果有一种聚合函数，能在分组聚合的模式中，对每组数据输出多行多列聚合结果
 48 |  * SELECT
 49 |  * gender,
 50 |  * top2(score)
 51 |  * from t
 52 |  * group by gender
 53 |  * <p>
 54 |  * male,88
 55 |  * male,99
 56 |  * female,92
 57 |  * female,86
 58 |  **/
 59 | public class Demo24_TableAggregateFunction {
 60 | 
 61 |     public static void main(String[] args) {
 62 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
 63 |         Table table = tenv.fromValues(DataTypes.ROW(
 64 |                         DataTypes.FIELD("id", DataTypes.INT()),
 65 |                         DataTypes.FIELD("gender", DataTypes.STRING()),
 66 |                         DataTypes.FIELD("score", DataTypes.DOUBLE())),
 67 |                 Row.of(1, "male", 67),
 68 |                 Row.of(2, "male", 88),
 69 |                 Row.of(3, "male", 98),
 70 |                 Row.of(4, "female", 99),
 71 |                 Row.of(5, "female", 84),
 72 |                 Row.of(6, "female", 89)
 73 |         );
 74 |         tenv.createTemporaryView("t", table);
 75 | 
 76 |         // 用一个聚合函数直接求出每种性别中最高的两个成绩
 77 |         table
 78 |                 .groupBy($("gender"))
 79 |                 .flatAggregate(call(MyTop2.class, $("score")))
 80 |                 .select($("gender"), $("score_top"), $("rank_no"))
 81 |                 .execute().print();
 82 | 
 83 | 
 84 |     }
 85 | 
 86 |     public static class MyAccumulator {
 87 | 
 88 |         public double first;
 89 |         public double second;
 90 | 
 91 |     }
 92 | 
 93 |     @FunctionHint(output = @DataTypeHint("ROW<score_top DOUBLE, rank_no INT>"))
 94 |     public static class MyTop2 extends TableAggregateFunction<Row, MyAccumulator> {
 95 | 
 96 |         @Override
 97 |         public MyAccumulator createAccumulator() {
 98 | 
 99 |             MyAccumulator acc = new MyAccumulator();
100 |             acc.first = Double.MIN_VALUE;
101 |             acc.second = Double.MIN_VALUE;
102 | 
103 |             return acc;
104 |         }
105 | 
106 | 
107 |         /**
108 |          * 累加更新逻辑
109 |          *
110 |          * @param acc
111 |          * @param value
112 |          */
113 |         public void accumulate(MyAccumulator acc, Double score) {
114 |             if (score > acc.first) {
115 |                 acc.second = acc.first;
116 |                 acc.first = score;
117 |             } else if (score > acc.second) {
118 |                 acc.second = score;
119 |             }
120 |         }
121 | 
122 |         public void merge(MyAccumulator acc, Iterable<MyAccumulator> it) {
123 |             for (MyAccumulator otherAcc : it) {
124 |                 accumulate(acc, otherAcc.first);
125 |                 accumulate(acc, otherAcc.second);
126 |             }
127 |         }
128 | 
129 |         /**
130 |          * 输出结果： 可以输出多行，多列
131 |          *
132 |          * @param acc
133 |          * @param out
134 |          */
135 |         public void emitValue(MyAccumulator acc, Collector<Row> out) {
136 |             if (acc.first != Double.MIN_VALUE) {
137 |                 out.collect(Row.of(acc.first, 1));
138 |             }
139 |             if (acc.second != Double.MIN_VALUE) {
140 |                 out.collect(Row.of(acc.second, 2));
141 |             }
142 |         }
143 |     }
144 | 
145 | 
146 | }
147 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo25_MetricDemos.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.accumulators.LongCounter;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.metrics.Gauge;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 9 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
10 | import org.apache.flink.util.Collector;
11 | 
12 | public class Demo25_MetricDemos {
13 | 
14 |     public static void main(String[] args) throws Exception {
15 | 
16 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
17 | 
18 |         DataStreamSource<String> ds = env.socketTextStream("doitedu", 9999);
19 | 
20 |         ds.process(new ProcessFunction<String, String>() {
21 |             LongCounter longCounter;
22 | 
23 |             MyGuage gauge ;
24 |             @Override
25 |             public void open(Configuration parameters) throws Exception {
26 | 
27 |                 longCounter = getRuntimeContext().getLongCounter("doitedu-counter1");
28 | 
29 | 
30 |                 gauge = getRuntimeContext().getMetricGroup().gauge("doitedu-gauge", new MyGuage());
31 |             }
32 | 
33 |             @Override
34 |             public void processElement(String value, ProcessFunction<String, String>.Context ctx, Collector<String> out) throws Exception {
35 | 
36 |                 // 业务逻辑之外的  metric代码，度量task所输入的数据条数
37 |                 longCounter.add(1);
38 | 
39 |                 gauge.add(1);
40 | 
41 | 
42 |                 out.collect(value.toUpperCase());
43 |             }
44 |         }).print();
45 | 
46 |         env.execute();
47 | 
48 | 
49 |     }
50 | 
51 | 
52 |     public static class MyGuage implements Gauge<Integer>{
53 | 
54 |         int recordCount = 0;
55 | 
56 |         public void add(int i){
57 |             recordCount += i;
58 |         }
59 | 
60 |         @Override
61 |         public Integer getValue() {
62 |             return recordCount;
63 |         }
64 |     }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo2_TableApi.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | import org.apache.flink.table.api.*;
 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 7 | 
 8 | import static org.apache.flink.table.api.Expressions.$;
 9 | 
10 | public class Demo2_TableApi {
11 | 
12 |     public static void main(String[] args) {
13 | 
14 |         // 纯粹表环境
15 |         // TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
16 | 
17 |         // 混合环境创建
18 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
19 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
20 |         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
21 | 
22 | 
23 |         // 建表
24 |         Table table = tableEnv.from(TableDescriptor
25 |                 .forConnector("kafka")  // 指定连接器
26 |                 .schema(Schema.newBuilder()  // 指定表结构
27 |                         .column("id", DataTypes.INT())
28 |                         .column("name", DataTypes.STRING())
29 |                         .column("age", DataTypes.INT())
30 |                         .column("gender", DataTypes.STRING())
31 |                         .build())
32 |                 .format("json")  // 指定数据源的数据格式
33 |                 .option("topic", "doit30-3")  // 连接器及format格式的相关参数
34 |                 .option("properties.bootstrap.servers", "doit01:9092")
35 |                 .option("properties.group.id", "g2")
36 |                 .option("scan.startup.mode", "earliest-offset")
37 |                 .option("json.fail-on-missing-field", "false")
38 |                 .option("json.ignore-parse-errors", "true")
39 |                 .build());
40 | 
41 | 
42 |         // 查询
43 |         Table table2 = table.groupBy($("gender"))
44 |                 .select($("gender"),$("age").avg().as("avg_age"));
45 | 
46 | 
47 |         /**
48 |          * 将一个已创建好的 table对象，注册成sql中的视图名
49 |          */
50 |         tableEnv.createTemporaryView("kafka_table",table);
51 |         // 然后就可以写sql语句来进行查询了
52 |         tableEnv.executeSql("select  gender,avg(age) as avg_age from kafka_table group by gender").print();
53 | 
54 | 
55 | 
56 | 
57 |         // 输出
58 |         table2.execute().print();
59 | 
60 | 
61 |     }
62 | 
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo4_SqlTableCreate.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.api.common.RuntimeExecutionMode;
 4 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 6 | import org.apache.flink.connector.kafka.source.KafkaSource;
 7 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.table.api.*;
12 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
13 | import org.apache.kafka.clients.consumer.OffsetResetStrategy;
14 | 
15 | /**
16 |  * 带sql表名的 表创建
17 |  * 各种方式
18 |  */
19 | /**
20 |  * @Author: deep as the sea
21 |  * @Site: <a href="www.51doit.com">多易教育</a>
22 |  * @QQ: 657270652
23 |  * @Date: 2022/6/12
24 |  * @Desc: 学大数据，到多易教育
25 |  *   表创建方式示例：  带sql表名称的
26 |  **/
27 | public class Demo4_SqlTableCreate {
28 | 
29 |     public static void main(String[] args) {
30 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
32 | 
33 |         EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode();
34 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings);
35 | 
36 |         /**
37 |          * 一、  通过构建一个 TableDescriptor 来创建一个  “有名” 表（sql表）
38 |          */
39 |         tenv.createTable("table_a",  // 表名
40 |                 TableDescriptor.forConnector("filesystem")
41 |                         .schema(Schema.newBuilder()
42 |                                 .column("id", DataTypes.INT())
43 |                                 .column("name", DataTypes.STRING())
44 |                                 .column("age", DataTypes.INT())
45 |                                 .column("gender", DataTypes.STRING())
46 |                                 .build())
47 |                         .format("csv")
48 |                         .option("path", "data/sqldemo/a.txt")
49 |                         .option("csv.ignore-parse-errors", "true")
50 |                         .build());
51 | 
52 | 
53 |         tenv.executeSql("select * from table_a").print();
54 |         System.exit(1);
55 |         tenv.executeSql("select gender,max(age) as max_age from table_a group by gender")/*.print()*/;
56 | 
57 | 
58 |         /**
59 |          * 二、 从一个dataStream 上创建“有名”的 视图
60 |          */
61 |         DataStreamSource<String> stream1 = env.socketTextStream("doit01", 9999);
62 |         SingleOutputStreamOperator<Demo3_TableObjectCreate.Person> javaBeanStream = stream1.map(s -> {
63 |             String[] split = s.split(",");
64 |             return new Demo3_TableObjectCreate.Person(Integer.parseInt(split[0]), split[1], Integer.parseInt(split[2]), split[3]);
65 |         });
66 |         tenv.createTemporaryView("t_person", javaBeanStream);
67 |         tenv.executeSql("select gender,max(age) as max_age from t_person group by gender")/*.print()*/;
68 | 
69 | 
70 |         /**
71 |          *  三、 从一个已存在Table对象，得到一个 “有名”的视图
72 |          */
73 |         Table table_a = tenv.from("table_a");
74 |         tenv.createTemporaryView("table_x",table_a);
75 |         tenv.executeSql("select * from table_x").print();
76 |     }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo6_Exercise.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | 
 4 | import org.apache.flink.table.api.EnvironmentSettings;
 5 | import org.apache.flink.table.api.TableEnvironment;
 6 | import org.apache.flink.table.api.TableResult;
 7 | 
 8 | /**
 9 |  *
10 | 
11 |  *
12 |  */
13 | /**
14 |  * @Author: deep as the sea
15 |  * @Site: <a href="www.51doit.com">多易教育</a>
16 |  * @QQ: 657270652
17 |  * @Date: 2022/6/12
18 |  * @Desc: 学大数据，到多易教育
19 |  * >>>>> 练习题需求 >>>>>>>
20 |  *     基本： kafka中有如下数据：
21 |  *         {"id":1,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
22 |  *
23 |  *     高级：kafka中有如下数据：
24 |  *      {"id":1,"name":{"formal":"zs","nick":"tiedan"},"age":18,"gender":"male"}
25 |  *
26 |  *      现在需要用flinkSql来对上述数据进行查询统计：
27 |  *        截止到当前,每个昵称,都有多少个用户
28 |  *        截止到当前,每个性别,年龄最大值
29 |  **/
30 | public class Demo6_Exercise {
31 |     public static void main(String[] args) {
32 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
33 | 
34 |         // 建表（数据源表）
35 |         tenv.executeSql(
36 |                 "create table t_person                                 "
37 |                         + " (                                                   "
38 |                         + "   id int,                                           "
39 |                         + "   name string,                                      "
40 |                         + "   nick string,                                      "
41 |                         + "   age int,                                          "
42 |                         + "   gender string                                     "
43 |                         + " )                                                   "
44 |                         + " WITH (                                              "
45 |                         + "  'connector' = 'kafka',                             "
46 |                         + "  'topic' = 'doit30-4',                              "
47 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',   "
48 |                         + "  'properties.group.id' = 'g1',                      "
49 |                         + "  'scan.startup.mode' = 'earliest-offset',           "
50 |                         + "  'format' = 'json',                                 "
51 |                         + "  'json.fail-on-missing-field' = 'false',            "
52 |                         + "  'json.ignore-parse-errors' = 'true'                "
53 |                         + " )                                                   "
54 |         );
55 | 
56 | 
57 |         // 建表（目标表）
58 |         // kafka 连接器，不能接受  UPDATE 修正模式的数据，只能接受INSERT模式的数据
59 |         // 而我们的查询语句产生的结果，存在UPDATE模式，就需要另一种 连接器表（upsert-kafka）来接收
60 |         tenv.executeSql(
61 |                 "create table t_nick_cnt                               "
62 |                         + " (                                                   "
63 |                         + "   nick string primary key not enforced,             "
64 |                         + "   user_cnt bigint                                   "
65 |                         + " )                                                   "
66 |                         + " WITH (                                              "
67 |                         + "  'connector' = 'upsert-kafka',                      "
68 |                         + "  'topic' = 'doit30-nick',                           "
69 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',   "
70 |                         + "  'key.format' = 'json' ,                            "
71 |                         + "  'value.format' = 'json'                            "
72 |                         + " )                                                   "
73 |         );
74 | 
75 | 
76 |         // 查询 并 打印
77 |         //TableResult tableResult = tenv.executeSql("select nick,count(distinct id) as user_cnt from t_person group by nick");
78 |         tenv.executeSql(
79 |                 "insert into t_nick_cnt " +
80 |                 "select nick,count(distinct id) as user_cnt from t_person group by nick");
81 | 
82 |     }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail1_Sql.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.api.EnvironmentSettings;
 4 | import org.apache.flink.table.api.TableEnvironment;
 5 | 
 6 | /**
 7 |  * @Author: deep as the sea
 8 |  * @Site: <a href="www.51doit.com">多易教育</a>
 9 |  * @QQ: 657270652
10 |  * @Date: 2022/6/11
11 |  * @Desc: schema定义详细示例 （sql DDL语句定义表结构）
12 |  **/
13 | public class Demo7_ColumnDetail1_Sql {
14 |     public static void main(String[] args) {
15 | 
16 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
17 | 
18 |         // 建表（数据源表）
19 |         // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
20 |         tenv.executeSql(
21 |                 "create table t_person                                 "
22 |                         + " (                                                   "
23 |                         + "   id int ,                                          "  // -- 物理字段
24 |                         + "   name string,                                      "  // -- 物理字段
25 |                         + "   nick string,                                      "
26 |                         + "   age int,                                          "
27 |                         + "   gender string ,                                   "
28 |                         + "   guid as id,                                       "  // -- 表达式字段（逻辑字段）
29 |                         + "   big_age as age + 10 ,                             "  // -- 表达式字段（逻辑字段）
30 |                         + "   offs  bigint metadata from 'offset' ,             "   // -- 元数据字段
31 |                         + "   ts TIMESTAMP_LTZ(3) metadata from 'timestamp',    "   // -- 元数据字段
32 |                         /*+ "   PRIMARY KEY(id,name) NOT ENFORCED                 "*/    // -- 主键约束
33 |                         + " )                                                   "
34 |                         + " WITH (                                              "
35 |                         + "  'connector' = 'kafka',                             "
36 |                         + "  'topic' = 'doit30-4',                              "
37 |                         + "  'properties.bootstrap.servers' = 'doitedu:9092',   "
38 |                         + "  'properties.group.id' = 'g1',                      "
39 |                         + "  'scan.startup.mode' = 'earliest-offset',           "
40 |                         + "  'format' = 'json',                                 "
41 |                         + "  'json.fail-on-missing-field' = 'false',            "
42 |                         + "  'json.ignore-parse-errors' = 'true'                "
43 |                         + " )                                                   "
44 |         );
45 | 
46 |         tenv.executeSql("desc t_person").print();
47 |         tenv.executeSql("select * from t_person where id>2").print();
48 | 
49 | 
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail2_TableApi.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.table.api.*;
 4 | 
 5 | import static org.apache.flink.table.api.Expressions.$;
 6 | 
 7 | /**
 8 |  * @Author: deep as the sea
 9 |  * @Site: <a href="www.51doit.com">多易教育</a>
10 |  * @QQ: 657270652
11 |  * @Date: 2022/6/11
12 |  * @Desc: schema定义详细示例（tableApi方式定义表结构）
13 |  **/
14 | public class Demo7_ColumnDetail2_TableApi {
15 |     public static void main(String[] args) {
16 | 
17 |         TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode());
18 | 
19 |         // 建表（数据源表）
20 |         // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"}
21 |         tenv.createTable("t_person",
22 |                 TableDescriptor
23 |                         .forConnector("kafka")
24 |                         .schema(Schema.newBuilder()
25 |                                 .column("id", DataTypes.INT())   // column是声明物理字段到表结构中来
26 |                                 .column("name", DataTypes.STRING())   // column是声明物理字段到表结构中来
27 |                                 .column("nick", DataTypes.STRING())   // column是声明物理字段到表结构中来
28 |                                 .column("age", DataTypes.INT())   // column是声明物理字段到表结构中来
29 |                                 .column("gender", DataTypes.STRING())   // column是声明物理字段到表结构中来
30 |                                 .columnByExpression("guid","id")  // 声明表达式字段
31 |                                 /*.columnByExpression("big_age",$("age").plus(10))*/     // 声明表达式字段
32 |                                 .columnByExpression("big_age","age + 10")  // 声明表达式字段
33 |                                 // isVirtual 是表示： 当这个表被sink表时，该字段是否出现在schema中
34 |                                 .columnByMetadata("offs",DataTypes.BIGINT(),"offset",true)  // 声明元数据字段
35 |                                 .columnByMetadata("ts",DataTypes.TIMESTAMP_LTZ(3),"timestamp",true)  // 声明元数据字段
36 |                                 /*.primaryKey("id","name")*/
37 |                                 .build())
38 |                         .format("json")
39 |                         .option("topic","doit30-4")
40 |                         .option("properties.bootstrap.servers","doitedu:9092")
41 |                         .option("properties.group.id","g1")
42 |                         .option("scan.startup.mode","earliest-offset")
43 |                         .option("json.fail-on-missing-field","false")
44 |                         .option("json.ignore-parse-errors","true")
45 |                         .build()
46 |         );
47 | 
48 |         tenv.executeSql("select * from t_person").print();
49 | 
50 | 
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo8_CsvFormat.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.DataTypes;
 5 | import org.apache.flink.table.api.EnvironmentSettings;
 6 | import org.apache.flink.table.api.Schema;
 7 | import org.apache.flink.table.api.TableDescriptor;
 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 9 | 
10 | /**
11 |  * @Author: deep as the sea
12 |  * @Site: <a href="www.51doit.com">多易教育</a>
13 |  * @QQ: 657270652
14 |  * @Date: 2022/6/12
15 |  * @Desc: 学大数据，到多易教育
16 |  *        csv format详解
17 |  **/
18 | public class Demo8_CsvFormat {
19 | 
20 |     public static void main(String[] args) {
21 | 
22 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
23 | 
24 |         EnvironmentSettings settings = EnvironmentSettings.inBatchMode();
25 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
26 | 
27 | 
28 |         tenv.executeSql(
29 |                 "create table t_csv(                          "
30 |                         + "  id int,                                   "
31 |                         + "  name string,                              "
32 |                         + "  age  string                               "
33 |                         + ") with (                                    "
34 |                         + " 'connector' = 'filesystem',                "
35 |                         + " 'path' = 'data/csv/',                      "
36 |                         + " 'format'='csv',                            "
37 |                         + " 'csv.disable-quote-character' = 'false',    "
38 |                         + " 'csv.quote-character' = '|',                "
39 |                         + "  'csv.ignore-parse-errors' = 'true' ,       "
40 |                         + "  'csv.null-literal' = '\\N'    ,            "
41 |                         + "  'csv.allow-comments' = 'true'             "
42 |                         + ")                                           "
43 |         );
44 | 
45 |         tenv.executeSql("desc t_csv").print();
46 |         tenv.executeSql("select * from  t_csv").print();
47 | 
48 |     }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | import org.apache.flink.table.api.DataTypes;
 5 | import org.apache.flink.table.api.EnvironmentSettings;
 6 | import org.apache.flink.table.api.Schema;
 7 | import org.apache.flink.table.api.TableDescriptor;
 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 9 | 
10 | /**
11 |  * @Author: deep as the sea
12 |  * @Site: <a href="www.51doit.com">多易教育</a>
13 |  * @QQ: 657270652
14 |  * @Date: 2022/6/12
15 |  * @Desc: 学大数据，到多易教育
16 |  *        watermark 在DDL中的定义示例代码
17 |  *
18 |  *    测试数据：
19 |  *    {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"}
20 |  *    {"guid":1,"eventId":"e03","eventTime":1655017434000,"pageId":"p001"}
21 |  *    {"guid":1,"eventId":"e04","eventTime":1655017435000,"pageId":"p001"}
22 |  *    {"guid":1,"eventId":"e05","eventTime":1655017436000,"pageId":"p001"}
23 |  *    {"guid":1,"eventId":"e06","eventTime":1655017437000,"pageId":"p001"}
24 |  *    {"guid":1,"eventId":"e07","eventTime":1655017438000,"pageId":"p001"}
25 |  *    {"guid":1,"eventId":"e08","eventTime":1655017439000,"pageId":"p001"}
26 |  *
27 |  *
28 |  *
29 |  **/
30 | public class Demo9_EventTimeAndWatermark {
31 | 
32 |     public static void main(String[] args) {
33 | 
34 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 | 
36 |         EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
37 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
38 | 
39 | 
40 |         /**
41 |          * 只有  TIMESTAMP 或  TIMESTAMP_LTZ 类型的字段可以被声明为rowtime（事件时间属性）
42 |          */
43 |         tenv.executeSql(
44 |                 " create table t_events(                                          "
45 |                         + "   guid int,                                                     "
46 |                         + "   eventId string,                                               "
47 |                         /*+ "   eventTime timestamp(3),                                     "*/
48 |                         + "   eventTime bigint,                                             "
49 |                         + "   pageId  string,                                               "
50 |                         + "   pt AS proctime(),                                             "  // 利用一个表达式字段，来声明 processing time属性
51 |                         + "   rt as to_timestamp_ltz(eventTime,3),                          "
52 |                         + "   watermark for rt  as rt - interval '0.001' second             "  // 用watermark for xxx，来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略
53 |                         + " )                                                               "
54 |                         + " with (                                                          "
55 |                         + "   'connector' = 'kafka',                                        "
56 |                         + "   'topic' = 'doit30-events2',                                   "
57 |                         + "   'properties.bootstrap.servers' = 'doitedu:9092',              "
58 |                         + "   'properties.group.id' = 'g1',                                 "
59 |                         + "   'scan.startup.mode' = 'earliest-offset',                      "
60 |                         + "   'format' = 'json',                                            "
61 |                         + "   'json.fail-on-missing-field' = 'false',                       "
62 |                         + "   'json.ignore-parse-errors' = 'true'                           "
63 |                         + " )                                                               "
64 |         );
65 | 
66 |         tenv.executeSql("desc t_events")/*.print()*/;
67 |         tenv.executeSql("select guid,eventId,eventTime,pageId,pt,rt,CURRENT_WATERMARK(rt) as wm from t_events").print();
68 | 
69 | 
70 |     }
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark3.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.demos;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import lombok.AllArgsConstructor;
 5 | import lombok.Data;
 6 | import lombok.NoArgsConstructor;
 7 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
 8 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.api.functions.ProcessFunction;
14 | import org.apache.flink.table.api.DataTypes;
15 | import org.apache.flink.table.api.EnvironmentSettings;
16 | import org.apache.flink.table.api.Schema;
17 | import org.apache.flink.table.api.Table;
18 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
19 | import org.apache.flink.types.Row;
20 | import org.apache.flink.util.Collector;
21 | 
22 | /**
23 |  * @Author: deep as the sea
24 |  * @Site: <a href="www.51doit.com">多易教育</a>
25 |  * @QQ: 657270652
26 |  * @Date: 2022/6/12
27 |  * @Desc: 学大数据，到多易教育
28 |  *    流  ===>  表  ，过程中如何传承  事件时间  和  watermark
29 |  **/
30 | public class Demo9_EventTimeAndWatermark3 {
31 | 
32 |     public static void main(String[] args) throws Exception {
33 | 
34 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
35 |         env.setParallelism(1);
36 | 
37 |         EnvironmentSettings settings = EnvironmentSettings.inStreamingMode();
38 |         StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);
39 | 
40 | 
41 |         tenv.executeSql(
42 |                 " create table t_events(                                          "
43 |                         + "   guid int,                                                     "
44 |                         + "   eventId string,                                               "
45 |                         + "   eventTime bigint,                                             "
46 |                         + "   pageId  string,                                               "
47 |                         /*+ "   pt AS proctime(),                                             "*/  // 利用一个表达式字段，来声明 processing time属性
48 |                         + "   rt as to_timestamp_ltz(eventTime,3),                          "
49 |                         + "   watermark for rt  as rt - interval '1' second                 "  // 用watermark for xxx，来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略
50 |                         + " )                                                               "
51 |                         + " with (                                                          "
52 |                         + "   'connector' = 'kafka',                                        "
53 |                         + "   'topic' = 'doit30-events2',                                   "
54 |                         + "   'properties.bootstrap.servers' = 'doitedu:9092',              "
55 |                         + "   'properties.group.id' = 'g1',                                 "
56 |                         + "   'scan.startup.mode' = 'earliest-offset',                      "
57 |                         + "   'format' = 'json',                                            "
58 |                         + "   'json.fail-on-missing-field' = 'false',                       "
59 |                         + "   'json.ignore-parse-errors' = 'true'                           "
60 |                         + " )                                                               "
61 |         );
62 | 
63 | 
64 |         // tenv.executeSql("select guid,eventId,rt,current_watermark(rt) as wm from t_events").print();
65 | 
66 | 
67 | 
68 |         DataStream<Row> ds = tenv.toDataStream(tenv.from("t_events"));
69 | 
70 |         ds.process(new ProcessFunction<Row, String>() {
71 |             @Override
72 |             public void processElement(Row value, ProcessFunction<Row, String>.Context ctx, Collector<String> out) throws Exception {
73 |                 out.collect(value + " => " + ctx.timerService().currentWatermark());
74 |             }
75 |         }).print();
76 | 
77 | 
78 |         env.execute();
79 | 
80 | 
81 |     }
82 | 
83 | }
84 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/fuxi/EventBean.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.fuxi;
 2 | 
 3 | import lombok.AllArgsConstructor;
 4 | import lombok.Data;
 5 | import lombok.NoArgsConstructor;
 6 | 
 7 | import java.util.Map;
 8 | 
 9 | @Data
10 | @NoArgsConstructor
11 | @AllArgsConstructor
12 | public class EventBean {
13 | 
14 |     private long guid;
15 |     private String sessionId;
16 |     private String eventId;
17 |     private long eventTs;
18 |     private Map<String,String> properties;
19 | }
20 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/cn/doitedu/flinksql/fuxi/TimerDemo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.flinksql.fuxi;
 2 | 
 3 | 
 4 | import com.alibaba.fastjson.JSON;
 5 | import org.apache.flink.api.common.state.ValueState;
 6 | import org.apache.flink.api.common.state.ValueStateDescriptor;
 7 | import org.apache.flink.api.java.functions.KeySelector;
 8 | import org.apache.flink.configuration.Configuration;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
13 | import org.apache.flink.streaming.api.functions.ProcessFunction;
14 | import org.apache.flink.util.Collector;
15 | 
16 | /**
17 |  * 需求场景：
18 |  * 实时监测 用户的行为事件流，如果发现有用户下单事件，则检查下单后30分钟内，该用户是否有订单支付
19 |  * 如果没有支付，则输出一条催支付的信息
20 |  * <p>
21 |  * 关键技术： 定时器功能（定时器就是一个闹钟）
22 |  */
23 | public class TimerDemo {
24 | 
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 | 
29 |         DataStreamSource<String> stream = env.socketTextStream("doitedu", 9999);
30 | 
31 |         SingleOutputStreamOperator<EventBean> stream2 = stream.map(s -> JSON.parseObject(s, EventBean.class));
32 | 
33 |         stream2
34 |                 .keyBy(new KeySelector<EventBean, Long>() {
35 |                     @Override
36 |                     public Long getKey(EventBean value) throws Exception {
37 |                         return value.getGuid();
38 |                     }
39 |                 })
40 |                 .process(new KeyedProcessFunction<Long, EventBean, String>() {
41 | 
42 |                     ValueState<Long> timerTimeState;
43 | 
44 |                     @Override
45 |                     public void open(Configuration parameters) throws Exception {
46 | 
47 |                         timerTimeState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("timerTimeState", Long.class));
48 |                     }
49 | 
50 |                     @Override
51 |                     public void processElement(EventBean eventBean, KeyedProcessFunction<Long, EventBean, String>.Context ctx, Collector<String> out) throws Exception {
52 | 
53 |                         if (eventBean.getEventId().equals("submitOrder")) {
54 |                             // 注册一个定时器，所定的时间在  ： 当前处理时间+30S
55 |                             long timerTime = ctx.timerService().currentProcessingTime() + 30 * 1000L;
56 |                             ctx.timerService().registerProcessingTimeTimer(timerTime);
57 |                             // 将定时器时间，放入状态管理器中
58 |                             timerTimeState.update(timerTime);
59 | 
60 |                             out.collect("检测到用户：" + ctx.getCurrentKey() + ",下单了,注册了一个定时器： " + timerTimeState.value());
61 |                         }
62 | 
63 |                         if (eventBean.getEventId().equals("payOrder")) {
64 |                             ctx.timerService().deleteProcessingTimeTimer(timerTimeState.value());
65 |                             out.collect("检测到用户：" + ctx.getCurrentKey() + ",在下单后的30s内已经支付，取消定时器 " + timerTimeState.value());
66 |                         }
67 | 
68 |                     }
69 | 
70 |                     /**
71 |                      * 定期器被触发时，会调用的方法
72 |                      */
73 |                     @Override
74 |                     public void onTimer(long timestamp, KeyedProcessFunction<Long, EventBean, String>.OnTimerContext ctx, Collector<String> out) throws Exception {
75 |                         Long guid = ctx.getCurrentKey();
76 |                         out.collect("用户: " + guid + ", 您的订单快超时了，赶紧支付！");
77 |                     }
78 |                 })
79 |                 .print();
80 | 
81 | 
82 |         env.execute();
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/org/apache/flink/api/common/eventtime/BoundedOutOfOrdernessWatermarks.java:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package org.apache.flink.api.common.eventtime;
20 | 
21 | import org.apache.flink.annotation.Public;
22 | 
23 | import java.time.Duration;
24 | 
25 | import static org.apache.flink.util.Preconditions.checkArgument;
26 | import static org.apache.flink.util.Preconditions.checkNotNull;
27 | 
28 | /**
29 |  * A WatermarkGenerator for situations where records are out of order, but you can place an upper
30 |  * bound on how far the events are out of order. An out-of-order bound B means that once an event
31 |  * with timestamp T was encountered, no events older than {@code T - B} will follow any more.
32 |  *
33 |  * <p>The watermarks are generated periodically. The delay introduced by this watermark strategy is
34 |  * the periodic interval length, plus the out-of-orderness bound.
35 |  */
36 | @Public
37 | public class BoundedOutOfOrdernessWatermarks<T> implements WatermarkGenerator<T> {
38 | 
39 |     /** The maximum timestamp encountered so far. */
40 |     private long maxTimestamp;
41 | 
42 |     /** The maximum out-of-orderness that this watermark generator assumes. */
43 |     private final long outOfOrdernessMillis;
44 | 
45 |     /**
46 |      * Creates a new watermark generator with the given out-of-orderness bound.
47 |      *
48 |      * @param maxOutOfOrderness The bound for the out-of-orderness of the event timestamps.
49 |      */
50 |     public BoundedOutOfOrdernessWatermarks(Duration maxOutOfOrderness) {
51 |         checkNotNull(maxOutOfOrderness, "maxOutOfOrderness");
52 |         checkArgument(!maxOutOfOrderness.isNegative(), "maxOutOfOrderness cannot be negative");
53 | 
54 |         this.outOfOrdernessMillis = maxOutOfOrderness.toMillis();
55 | 
56 |         // start so that our lowest watermark would be Long.MIN_VALUE.
57 |         this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
58 |     }
59 | 
60 |     // ------------------------------------------------------------------------
61 | 
62 |     @Override
63 |     public void onEvent(T event, long eventTimestamp, WatermarkOutput output) {
64 |         maxTimestamp = Math.max(maxTimestamp, eventTimestamp);
65 |     }
66 | 
67 |     @Override
68 |     public void onPeriodicEmit(WatermarkOutput output) {
69 |         // TODO
70 |         // System.out.printf("源头周期输出watermark：%d  \n", maxTimestamp - outOfOrdernessMillis - 1);
71 |         output.emitWatermark(new Watermark(maxTimestamp - outOfOrdernessMillis - 1));
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/flink_course/src/main/java/tmp/utils/SqlHolder.java:
--------------------------------------------------------------------------------
1 | package tmp.utils;
2 | 
3 | public class SqlHolder {
4 | 
5 | }
6 | 


--------------------------------------------------------------------------------
/flink_course/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | log4j.rootLogger = INFO, console, debugFile, errorFile
20 | 
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n
24 | 
25 | log4j.appender.debugFile = org.apache.log4j.DailyRollingFileAppender
26 | log4j.appender.debugFile.File = src/logs/debug.log
27 | log4j.appender.debugFile.Append = true
28 | log4j.appender.debugFile.Threshold = debug
29 | log4j.appender.debugFile.layout = org.apache.log4j.PatternLayout
30 | log4j.appender.debugFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss}  [ %t:%r ] - [ %p ]  %m%n
31 | 
32 | log4j.appender.errorFile = org.apache.log4j.DailyRollingFileAppender
33 | log4j.appender.errorFile.File = src/logs/error.log
34 | log4j.appender.errorFile.Append = true
35 | log4j.appender.errorFile.Threshold = error
36 | log4j.appender.errorFile.layout = org.apache.log4j.PatternLayout
37 | log4j.appender.errorFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss}  [ %t:%r ] - [ %p ]  %m%n
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/flink_course/src/main/resources/prts.avsc.bak:
--------------------------------------------------------------------------------
 1 | {"namespace": "cn.doitedu.flink.avro.schema",
 2 |   "type": "record",
 3 |   "name": "AvroEventLog",
 4 |   "fields": [
 5 |       {"name": "guid", "type": "long"},
 6 |       {"name": "sessionId",  "type": "string"},
 7 |       {"name": "eventId",  "type": "string"},
 8 |       {"name": "timeStamp",  "type": "long"},
 9 |       {"name": "eventInfo", "type": { "type":"map","values": "string"} }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/kafka_course/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>doit30_flink</artifactId>
 7 |         <groupId>cn.doitedu</groupId>
 8 |         <version>1.0</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>kafka_course</artifactId>
13 | 
14 |     <properties>
15 |         <maven.compiler.source>8</maven.compiler.source>
16 |         <maven.compiler.target>8</maven.compiler.target>
17 |     </properties>
18 | 
19 | 
20 |     <dependencies>
21 |         <dependency>
22 |             <groupId>org.apache.kafka</groupId>
23 |             <artifactId>kafka-clients</artifactId>
24 |             <version>${kafka.version}</version>
25 |         </dependency>
26 | 
27 | 
28 | 
29 | 
30 |         <dependency>
31 |             <groupId>org.apache.commons</groupId>
32 |             <artifactId>commons-lang3</artifactId>
33 |             <version>3.12.0</version>
34 |         </dependency>
35 | 
36 |         <dependency>
37 |             <groupId>com.google.guava</groupId>
38 |             <artifactId>guava</artifactId>
39 |             <version>30.0-jre</version>
40 |         </dependency>
41 | 
42 | 
43 |         <dependency>
44 |             <groupId>org.roaringbitmap</groupId>
45 |             <artifactId>RoaringBitmap</artifactId>
46 |             <version>0.9.25</version>
47 |         </dependency>
48 | 
49 |         <dependency>
50 |             <groupId>mysql</groupId>
51 |             <artifactId>mysql-connector-java</artifactId>
52 |             <version>8.0.27</version>
53 |         </dependency>
54 | 
55 |         <dependency>
56 |             <groupId>org.apache.flink</groupId>
57 |             <artifactId>flink-connector-files</artifactId>
58 |             <version>1.14.4</version>
59 |         </dependency>
60 | 
61 | 
62 | 
63 | 
64 | 
65 |     </dependencies>
66 | 
67 | 
68 | </project>


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/AdminClientDemo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.admin.*;
 4 | import org.apache.kafka.common.KafkaFuture;
 5 | import org.apache.kafka.common.Node;
 6 | import org.apache.kafka.common.TopicPartitionInfo;
 7 | 
 8 | import java.util.*;
 9 | import java.util.concurrent.ExecutionException;
10 | 
11 | public class AdminClientDemo {
12 |     public static void main(String[] args) throws ExecutionException, InterruptedException {
13 | 
14 |         Properties props = new Properties();
15 |         props.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092,doit02:9092");
16 | 
17 |         // 管理客户端
18 |         AdminClient adminClient = KafkaAdminClient.create(props);
19 | 
20 |         // 创建一个topic
21 |         /*NewTopic zzuzz = new NewTopic("zzuzz", 3, (short) 2);
22 |         adminClient.createTopics(Arrays.asList(zzuzz));*/
23 | 
24 |         // 查看一个topic的详细信息
25 |         DescribeTopicsResult topicDescriptions = adminClient.describeTopics(Arrays.asList("zzuzz"));
26 | 
27 |         KafkaFuture<Map<String, TopicDescription>> descriptions = topicDescriptions.all();
28 |         Map<String, TopicDescription> infos = descriptions.get();
29 |         Set<Map.Entry<String, TopicDescription>> entries = infos.entrySet();
30 |         for (Map.Entry<String, TopicDescription> entry : entries) {
31 |             String topicName = entry.getKey();
32 |             TopicDescription td = entry.getValue();
33 |             List<TopicPartitionInfo> partitions = td.partitions();
34 |             for (TopicPartitionInfo partition : partitions) {
35 |                 int partitionIndex = partition.partition();
36 |                 List<Node> replicas = partition.replicas();
37 |                 List<Node> isr = partition.isr();
38 |                 Node leader = partition.leader();
39 |                 System.out.println(topicName+ "\t" +partitionIndex + "\t" + replicas + "\t" + isr + "\t" + leader);
40 |             }
41 |         }
42 | 
43 | 
44 |         adminClient.close();
45 | 
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 7 | import org.apache.kafka.common.TopicPartition;
 8 | import org.apache.kafka.common.header.Headers;
 9 | import org.apache.kafka.common.record.TimestampType;
10 | import org.apache.kafka.common.serialization.StringDeserializer;
11 | 
12 | import java.time.Duration;
13 | import java.util.*;
14 | 
15 | public class ConsumerDemo {
16 |     public static void main(String[] args) {
17 | 
18 |         // 构造一个properties来存放消费者客户端参数
19 |         Properties props = new Properties();
20 |         props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092");
21 |         props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
22 |         props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
23 | 
24 |         // kafka的消费者，默认是从所属组之前所记录的偏移量开始消费，如果找不到之前记录的便宜量，则从如下参数配置的策略来确定消费起始偏移量
25 |         // 可以选择：earliest(自动重置到每个分区的最前一条消息）/latest（自动重置到每个分区的最新一条消息/none（没有重置策略）
26 |         props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
27 | 
28 |         // 设置消费者所属的组id
29 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-1");
30 | 
31 |         // 设置自动提交最新的消费位移
32 |         props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true"); // 默认就是开启的
33 | 
34 |         // 自动提交最新消费位移的时间间隔
35 |         props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"5000"); // 默认值就是5000ms
36 | 
37 |         // 构造一个消费者客户端
38 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
39 | 
40 |         // 订阅主题（可以是多个）
41 |         consumer.subscribe(Collections.singletonList("abcx"));
42 | 
43 |         // 显式指定消费起始偏移量
44 |         /*TopicPartition abcxP0 = new TopicPartition("abcx", 0);
45 |         TopicPartition abcxP1 = new TopicPartition("abcx", 1);
46 |         consumer.seek(abcxP0,10);
47 |         consumer.seek(abcxP1,15);*/
48 | 
49 | 
50 |         // 循环往复拉取数据
51 |         boolean condition = true;
52 |         while(condition){
53 | 
54 |             // 客户端去拉取数据的时候，如果服务端没有数据响应，会保持连接等待服务端响应
55 |             // poll中传入的超时时长参数，是指等待的最大时长
56 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
57 | 
58 |             // 对数据进行业务逻辑处理
59 |             // Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
60 |             // 直接用for循环来迭代本次取到的一批数据
61 |             for (ConsumerRecord<String, String> record : records) {
62 |                 // ConsumerRecord中，不光有用户的业务数据，还有kafka塞入的元数据
63 |                 String key = record.key();
64 |                 String value = record.value();
65 | 
66 |                 // 本条数据所属的topic
67 |                 String topic = record.topic();
68 |                 // 本条数据所属的分区
69 |                 int partition = record.partition();
70 | 
71 |                 // 本条数据的offset
72 |                 long offset = record.offset();
73 | 
74 |                 // 当前这条数据所在分区的leader的朝代纪年
75 |                 Optional<Integer> leaderEpoch = record.leaderEpoch();
76 | 
77 |                 // 在kafka的数据底层存储中，不光有用户的业务数据，还有大量元数据
78 |                 // timestamp就是其中之一： 记录本条数据的时间戳
79 |                 // 但是时间戳有两种类型： 本条数据的创建时间（生产者）; 本条数据的追加时间（broker写入log文件的时间）
80 |                 // log.message.timestamp.type ==> topic的参数，控制timestamp元数据记录的时间戳的类型
81 |                 TimestampType timestampType = record.timestampType();
82 |                 long timestamp = record.timestamp();
83 | 
84 |                 // 数据头；
85 |                 // 数据头是生产者在写入数据时附加进去的（相当于用户自己自定义的元数据）
86 |                 Headers headers = record.headers();
87 | 
88 |                 System.out.println(String.format("数据key: %s , 数据value：%s ,数据所属的topic: %s ,数据所属的partition： %d, 数据的offset: %d , 数据所属leader的纪元： %s , 数据的时间戳类型: %s , 数据的时间戳： %d ",
89 |                         key,value,topic,partition,offset,leaderEpoch.get(),timestampType.name,timestamp
90 |                         ));
91 | 
92 |             }
93 |         }
94 | 
95 |         // 关闭客户端
96 |         consumer.close();
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo2.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 4 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 5 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 6 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 7 | import org.apache.kafka.common.TopicPartition;
 8 | 
 9 | import java.io.IOException;
10 | import java.time.Duration;
11 | import java.util.Arrays;
12 | import java.util.Collections;
13 | import java.util.Properties;
14 | 
15 | /**
16 |  * 手动指定消费起始偏移量位置
17 |  */
18 | public class ConsumerDemo2 {
19 | 
20 |     public static void main(String[] args) throws IOException {
21 | 
22 |         Properties props = new Properties();
23 |         // 从配置文件中加载写好的参数
24 |         props.load(ConsumerDemo2.class.getClassLoader().getResourceAsStream("consumer.properties"));
25 |         // 手动再set一些参数进去
26 |         props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
27 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-2");
28 | 
29 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
30 | 
31 | /*
32 |         // subscribe订阅 会参与消费组的自动再均衡机制才能真正获得自己要消费的topic及其分区的
33 |         consumer.subscribe(Collections.singletonList("ddd"));
34 |         // 这里无意义地去拉一次数据，主要就是为了确保 分区分配动作已完成
35 |         consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
36 |         // 然后再定位到指定的偏移量，开始正式消费
37 |         consumer.seek(new TopicPartition("ddd",0),2);
38 | */
39 | 
40 | 
41 |         // 既然要自己指定一个确定的起始消费位置，那通常隐含之意是不需要去参与消费组自动再均衡机制
42 |         // 那么，就不要使用 subscribe 来订阅主题
43 |         consumer.assign(Arrays.asList(new TopicPartition("ddd",0)));
44 |         consumer.seek(new TopicPartition("ddd",0),4);
45 | 
46 | 
47 |         while(true){
48 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
49 |             for (ConsumerRecord<String, String> record : records) {
50 |                 System.out.println(String.format("数据key: %s , 数据value：%s ,数据所属的topic: %s ,数据所属的partition： %d, 数据的offset: %d , 数据所属leader的纪元： %s , 数据的时间戳类型: %s , 数据的时间戳： %d ",
51 |                         record.key(),record.value(),record.topic(),record.partition(),record.offset(),record.leaderEpoch().get(),record.timestampType().name,record.timestamp()
52 |                 ));
53 |             }
54 |         }
55 | 
56 | 
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo3.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.consumer.*;
 4 | import org.apache.kafka.common.TopicPartition;
 5 | 
 6 | import java.io.IOException;
 7 | import java.time.Duration;
 8 | import java.util.Arrays;
 9 | import java.util.Collection;
10 | import java.util.Collections;
11 | import java.util.Properties;
12 | 
13 | /**
14 |  * 消费组再均衡观察
15 |  */
16 | public class ConsumerDemo3 {
17 | 
18 |     public static void main(String[] args) throws IOException {
19 | 
20 |         Properties props = new Properties();
21 |         // 从配置文件中加载写好的参数
22 |         props.load(ConsumerDemo3.class.getClassLoader().getResourceAsStream("consumer.properties"));
23 |         // 手动再set一些参数进去
24 |         props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
25 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-2");
26 |         // 指定消费者再均衡策略
27 |         props.setProperty(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,RangeAssignor.class.getName());
28 | 
29 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
30 | 
31 | 
32 |         // reb-1 主题： 3个分区
33 |         // reb-2 主题： 2个分区
34 |         consumer.subscribe(Arrays.asList("reb-1", "reb-2"), new ConsumerRebalanceListener() {
35 |             // 再均衡过程中，消费者会被取消先前所分配的主题，分区
36 |             // 取消了之后，consumer底层就会调用下面的方法
37 |             @Override
38 |             public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
39 |                 System.out.println("我被取消了如下主题，分区：" + partitions);
40 | 
41 |             }
42 | 
43 |             // 再均衡过程中，消费者会被重新分配到新的主题，分区
44 |             // 分配好了新的主题，分区后，consumer底层调用下面的方法
45 |             @Override
46 |             public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
47 |                 System.out.println("我又被分配了如下主题，分区：" + partitions);
48 |             }
49 |         });
50 | 
51 |         while(true){
52 |             consumer.poll(Duration.ofMillis(Long.MAX_VALUE));
53 |         }
54 | 
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.kafka;
  2 | 
  3 | import com.alibaba.fastjson.JSON;
  4 | import lombok.AllArgsConstructor;
  5 | import lombok.Getter;
  6 | import lombok.NoArgsConstructor;
  7 | import lombok.Setter;
  8 | import org.apache.commons.lang3.RandomUtils;
  9 | import org.apache.commons.lang3.RandomStringUtils;
 10 | import org.apache.kafka.clients.producer.KafkaProducer;
 11 | import org.apache.kafka.clients.producer.ProducerConfig;
 12 | import org.apache.kafka.clients.producer.ProducerRecord;
 13 | import org.apache.kafka.common.serialization.StringSerializer;
 14 | 
 15 | import java.util.Properties;
 16 | 
 17 | /**
 18 |  *
 19 |  * 创建一个topic
 20 |  * [root@doit01 ~]# kafka-topics.sh --create --topic doit30-events --partitions 3 --replication-factor 2 --zookeeper doit01:2181
 21 |  *
 22 |  * 可以用命令去监视这个topic是否有数据到达：
 23 |  * [root@doit01 ~]# kafka-console-consumer.sh --topic doit30-events --bootstrap-server doit01:9092
 24 |  *
 25 |  *
 26 |  * 需求：
 27 |  *   写一个生产者，不断去生成 “用户行为事件”数据 并写入kafka
 28 |  *   {"guid":1,"eventId":"pageview","timeStamp":1637868346789}
 29 |  *   {"guid":1,"eventId":"addcart","timeStamp":1637868346966}
 30 |  *   {"guid":2,"eventId":"applaunch","timeStamp":1637868346967}
 31 |  *   .....
 32 |  *
 33 |  *   需求1： 写一个消费者，不断地从kafka中取消费如上“用户行为事件”数据，并做统计计算：
 34 |  *       每 5分钟，输出一次截止到当时的数据中出现过的用户总数
 35 |  *
 36 |  *  需求2： 写一个消费者，不断地从kafka中取消费如上“用户行为事件”数据，并做如下加工处理：
 37 |  *       给每一条数据，添加一个字段，来标识，该条数据所属的用户的id在今天是否是第一次出现，如是，则标注1 ；否则，标注0
 38 |  *   {"guid":1,"eventId":"pageview","timeStamp":1637868346789,"flag":1}
 39 |  *   {"guid":1,"eventId":"addcart","timeStamp":1637868346966,"flag":0}
 40 |  *   {"guid":2,"eventId":"applaunch","timeStamp":1637868346967,"flag":1}
 41 |  *   .......
 42 |  *
 43 |  *   TODO 需求3： 写一个消费者，不断地从kafka中取消费如上“用户行为事件”数据，并做统计计算：
 44 |  *      每 5分钟，统计最近 10分钟内的用户总数并输出
 45 |  *
 46 |  *
 47 |  */
 48 | public class Kafka编程练习 {
 49 | 
 50 |     public static void main(String[] args) throws InterruptedException {
 51 | 
 52 |         MyDataGen myDataGen = new MyDataGen();
 53 |         myDataGen.genData();
 54 |     }
 55 | }
 56 | 
 57 | /**
 58 |  * 业务数据生成器
 59 |  */
 60 | class MyDataGen{
 61 | 
 62 |     KafkaProducer<String, String> producer;
 63 | 
 64 |     public MyDataGen(){
 65 | 
 66 |         Properties props = new Properties();
 67 |         props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092");
 68 |         props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 69 |         props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 70 | 
 71 |         producer = new KafkaProducer<>(props);
 72 | 
 73 |     }
 74 | 
 75 | 
 76 |     public void genData() throws InterruptedException {
 77 |         UserEvent userEvent = new UserEvent();
 78 |         while(true){
 79 |             // 造一条随机的用户行为事件数据对象
 80 |             userEvent.setGuid(RandomUtils.nextInt(1,10000));
 81 |             userEvent.setEventId(RandomStringUtils.randomAlphabetic(5,8));
 82 |             userEvent.setTimeStamp(System.currentTimeMillis());
 83 | 
 84 |             // 转成json串
 85 |             String json = JSON.toJSONString(userEvent);
 86 | 
 87 |             // 将业务数据封装成ProducerRecord对象
 88 |             ProducerRecord<String, String> record = new ProducerRecord<>("doit30-events", json);
 89 | 
 90 |             // 用producer写入kafka
 91 |             producer.send(record);
 92 | 
 93 |             // 控制发送的速度
 94 |             Thread.sleep(RandomUtils.nextInt(200,1500));
 95 |         }
 96 |     }
 97 | 
 98 | }
 99 | 
100 | @NoArgsConstructor
101 | @AllArgsConstructor
102 | @Getter
103 | @Setter
104 | class UserEvent{
105 |     private long guid;
106 |     private String eventId;
107 |     private long timeStamp;
108 |     private Integer flag;
109 | }
110 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import org.apache.commons.lang3.time.DateFormatUtils;
 5 | import org.apache.commons.lang3.time.DateUtils;
 6 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 8 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 9 | import org.apache.kafka.clients.consumer.KafkaConsumer;
10 | import org.apache.kafka.common.serialization.StringDeserializer;
11 | 
12 | import java.time.Duration;
13 | import java.util.*;
14 | import java.util.concurrent.ConcurrentHashMap;
15 | import org.roaringbitmap.RoaringBitmap;
16 | 
17 | public class Kafka编程练习_消费者 {
18 | 
19 |     public static void main(String[] args) {
20 | 
21 |         // 用一个hashmap来记录去重guid的map缓存
22 |         ConcurrentHashMap<Long, String> guidMap = new ConcurrentHashMap<>();
23 | 
24 |         // 启动数据消费线程
25 |         new Thread(new ConsumeRunnable(guidMap)).start();
26 | 
27 | 
28 |         // 启动一个统计及输出结果的线程(每5秒输出一次结果）
29 |         // 优雅一点来实现定时调度，可以用各种定时调度器（有第三方的，也可以用jdk自己的：Timer）
30 |         Timer timer = new Timer();
31 |         timer.scheduleAtFixedRate(new StatisticTask(guidMap),5000,10000);
32 | 
33 | 
34 |     }
35 | }
36 | 
37 | /**
38 |  * 消费拉取数据的线程runnable
39 |  */
40 | class ConsumeRunnable implements Runnable{
41 | 
42 |     ConcurrentHashMap<Long, String> guidMap;
43 | 
44 |     public ConsumeRunnable(ConcurrentHashMap<Long, String> guidMap) {
45 |         this.guidMap = guidMap;
46 |     }
47 | 
48 |     @Override
49 |     public void run() {
50 |         Properties props = new Properties();
51 |         props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092");
52 |         props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
53 |         props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
54 |         props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
55 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"event-01");
56 | 
57 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
58 |         consumer.subscribe(Arrays.asList("doit30-events"));
59 | 
60 |         while(true){
61 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(5000));
62 |             for (ConsumerRecord<String, String> record : records) {
63 |                 String eventJson = record.value();
64 |                 // 解析json, 拿到 guid
65 |                 try {
66 |                     UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class);
67 |                     guidMap.put(userEvent.getGuid(), "");
68 |                 }catch (Exception e){
69 |                     System.out.println("出异常了： " + eventJson);
70 |                 }
71 |             }
72 |         }
73 |     }
74 | }
75 | 
76 | class StatisticTask extends TimerTask{
77 | 
78 |     ConcurrentHashMap<Long, String> guidMap;
79 | 
80 |     public StatisticTask(ConcurrentHashMap<Long, String> guidMap) {
81 |         this.guidMap = guidMap;
82 |     }
83 | 
84 |     @Override
85 |     public void run() {
86 |         System.out.println(DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") + " ,截止到当前的用户总数为： " +  guidMap.size());
87 |     }
88 | }


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者_Bitmap.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import org.apache.commons.lang3.time.DateFormatUtils;
 5 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 6 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 8 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 9 | import org.apache.kafka.common.serialization.StringDeserializer;
10 | import org.roaringbitmap.RoaringBitmap;
11 | 
12 | import java.time.Duration;
13 | import java.util.*;
14 | import java.util.concurrent.ConcurrentHashMap;
15 | 
16 | public class Kafka编程练习_消费者_Bitmap {
17 | 
18 |     public static void main(String[] args) {
19 | 
20 |         // 用一个bitmap来记录去重guid
21 |         RoaringBitmap bitmap = RoaringBitmap.bitmapOf();
22 | 
23 | 
24 |         // 启动数据消费线程
25 |         new Thread(new ConsumeRunnableBitmap(bitmap)).start();
26 | 
27 | 
28 |         // 启动一个统计及输出结果的线程(每5秒输出一次结果）
29 |         // 优雅一点来实现定时调度，可以用各种定时调度器（有第三方的，也可以用jdk自己的：Timer）
30 |         Timer timer = new Timer();
31 |         timer.scheduleAtFixedRate(new StatisticBitmapTask(bitmap),5000,10000);
32 | 
33 | 
34 |     }
35 | }
36 | 
37 | /**
38 |  * 消费拉取数据的线程runnable
39 |  */
40 | class ConsumeRunnableBitmap implements Runnable{
41 | 
42 |     RoaringBitmap bitmap;
43 | 
44 |     public ConsumeRunnableBitmap(RoaringBitmap bitmap) {
45 |         this.bitmap = bitmap;
46 |     }
47 | 
48 |     @Override
49 |     public void run() {
50 |         Properties props = new Properties();
51 |         props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092");
52 |         props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
53 |         props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
54 |         props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
55 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"event-01");
56 | 
57 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
58 |         consumer.subscribe(Arrays.asList("doit30-events"));
59 | 
60 |         while(true){
61 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(5000));
62 |             for (ConsumerRecord<String, String> record : records) {
63 |                 String eventJson = record.value();
64 |                 // 解析json, 拿到 guid
65 |                 try {
66 |                     UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class);
67 | 
68 |                     // 向bitmap中添加元素
69 |                     bitmap.add((int) userEvent.getGuid());
70 | 
71 | 
72 |                 }catch (Exception e){
73 |                     System.out.println("出异常了： " + eventJson);
74 |                 }
75 |             }
76 |         }
77 |     }
78 | }
79 | 
80 | class StatisticBitmapTask extends TimerTask{
81 | 
82 |     RoaringBitmap bitmap;
83 | 
84 |     public StatisticBitmapTask(RoaringBitmap bitmap) {
85 |         this.bitmap = bitmap;
86 |     }
87 | 
88 |     @Override
89 |     public void run() {
90 |         System.out.println(DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") + " ,截止到当前的用户总数为： " +  bitmap.getCardinality());
91 |     }
92 | }


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者_判重.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import com.alibaba.fastjson.JSON;
 4 | import com.google.common.hash.BloomFilter;
 5 | import com.google.common.hash.Funnels;
 6 | import org.apache.commons.lang3.time.DateFormatUtils;
 7 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 8 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 9 | import org.apache.kafka.clients.consumer.ConsumerRecords;
10 | import org.apache.kafka.clients.consumer.KafkaConsumer;
11 | import org.apache.kafka.common.serialization.StringDeserializer;
12 | import org.roaringbitmap.RoaringBitmap;
13 | 
14 | import java.time.Duration;
15 | import java.util.*;
16 | 
17 | public class Kafka编程练习_消费者_判重 {
18 | 
19 |     public static void main(String[] args) {
20 | 
21 |         // 启动数据消费线程
22 |         new Thread(new ConsumeRunnableBloomFilter()).start();
23 |     }
24 | 
25 | 
26 |     /**
27 |      * 消费拉取数据的线程runnable
28 |      */
29 |     static class ConsumeRunnableBloomFilter implements Runnable {
30 | 
31 |         BloomFilter<Long> bloomFilter;
32 | 
33 |         KafkaConsumer<String, String> consumer;
34 | 
35 |         public ConsumeRunnableBloomFilter() {
36 |             bloomFilter = BloomFilter.create(Funnels.longFunnel(), 1000000000, 0.01);
37 | 
38 |             Properties props = new Properties();
39 |             props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092");
40 |             props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
41 |             props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
42 |             props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest");
43 |             props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "event-01");
44 | 
45 |             consumer = new KafkaConsumer<>(props);
46 | 
47 |         }
48 | 
49 |         @Override
50 |         public void run() {
51 |             consumer.subscribe(Arrays.asList("doit30-events"));
52 |             while (true) {
53 |                 ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(5000));
54 |                 for (ConsumerRecord<String, String> record : records) {
55 |                     String eventJson = record.value();
56 |                     // 解析json, 拿到 guid
57 |                     try {
58 |                         UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class);
59 | 
60 |                         // 去布隆过滤器中判断一下，本次出现guid，是否曾经已经记录过
61 |                         boolean mightContain = bloomFilter.mightContain(userEvent.getGuid());
62 | 
63 |                         // 如果本次出现的guid已存在，则将flag设置为0
64 |                         if(mightContain){
65 |                             userEvent.setFlag(0);
66 |                         }
67 |                         // 如果本次出现的guid不存在，则将flag设置为1，并将本次出现的guid映射到布隆过滤
68 |                         else{
69 |                             userEvent.setFlag(1);
70 |                             // 向布隆过滤器中映射新的元素
71 |                             bloomFilter.put(userEvent.getGuid());
72 |                         }
73 | 
74 |                         // 输出结果
75 |                         System.out.println(JSON.toJSONString(userEvent));
76 | 
77 |                     } catch (Exception e) {
78 |                         System.out.println("出异常了： " + eventJson);
79 |                     }
80 |                 }
81 |             }
82 |         }
83 |     }
84 | }


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/Kafka自身事务机制.java:
--------------------------------------------------------------------------------
  1 | package cn.doitedu.kafka;
  2 | 
  3 | import org.apache.kafka.clients.consumer.*;
  4 | import org.apache.kafka.clients.producer.KafkaProducer;
  5 | import org.apache.kafka.clients.producer.ProducerConfig;
  6 | import org.apache.kafka.clients.producer.ProducerRecord;
  7 | import org.apache.kafka.common.TopicPartition;
  8 | import org.apache.kafka.common.serialization.StringDeserializer;
  9 | import org.apache.kafka.common.serialization.StringSerializer;
 10 | 
 11 | import java.time.Duration;
 12 | import java.util.*;
 13 | 
 14 | /**
 15 |  * 从kafka的topic-a中读数据，处理（把读到的数据转大写），处理结果写回kafka的topic-b
 16 |  * 利用kafka自身的事务机制，来实现 端到端的eos语义
 17 |  * 核心点： 让 消费端的偏移量记录更新  和   生产端的数据落地  ，绑定在一个事务中
 18 |  */
 19 | public class Kafka自身事务机制 {
 20 | 
 21 |     public static void main(String[] args) {
 22 | 
 23 |         Properties props = new Properties();
 24 |         /**
 25 |          * 消费者参数
 26 |          */
 27 |         props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092");
 28 |         props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
 29 |         props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
 30 |         props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "trans-001");
 31 |         // 关闭消费者的消费位移自动提交机制
 32 |         props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
 33 | 
 34 |         /**
 35 |          * 生产者参数
 36 |          */
 37 |         props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092");
 38 |         props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 39 |         props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 40 |         props.setProperty(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "x001");
 41 |         props.setProperty(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true");
 42 |         props.setProperty(ProducerConfig.RETRIES_CONFIG, "3");
 43 |         props.setProperty(ProducerConfig.ACKS_CONFIG, "all");
 44 |         props.setProperty(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "4");
 45 | 
 46 | 
 47 |         KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
 48 |         KafkaProducer<String, String> producer = new KafkaProducer<>(props);
 49 |         // 初始化事务
 50 |         producer.initTransactions();
 51 |         // 创建一个自己记录最大消费位移的hashmap
 52 |         HashMap<TopicPartition, OffsetAndMetadata> offsetsMap = new HashMap<>();
 53 | 
 54 |         consumer.subscribe(Arrays.asList("topic-a"));
 55 | 
 56 |         // 开始消费数据，做业务处理
 57 |         boolean flag = true;
 58 |         while (flag) {
 59 |             ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(5000));
 60 | 
 61 |             // 如果要对本次拉取的所有数据的处理绑定在一个事务中，则在此处开启事务
 62 |             producer.beginTransaction();
 63 | 
 64 |             try {
 65 |                 // 从拉取到的数据中，获得本批数据包含哪些分区
 66 |                 Set<TopicPartition> topicPartitionSet = records.partitions();
 67 | 
 68 |                 // 遍历每一个分区
 69 |                 for (TopicPartition topicPartition : topicPartitionSet) {
 70 | 
 71 |                     // 从拉取到的数据中取到本分区的所有数据
 72 |                     List<ConsumerRecord<String, String>> partitionRecords = records.records(topicPartition);
 73 |                     for (ConsumerRecord<String, String> record : partitionRecords) {
 74 |                         // 业务处理逻辑
 75 |                         String result = record.value().toUpperCase();
 76 |                         // 把处理的结果写出去
 77 |                         ProducerRecord<String, String> resultRecord = new ProducerRecord<>("topic-b", result);
 78 |                         producer.send(resultRecord);
 79 | 
 80 |                         // 记录本分区本条消息的offset 到  offsetMap中
 81 |                         long offset = record.offset();
 82 |                         offsetsMap.put(topicPartition, new OffsetAndMetadata(offset + 1));
 83 |                     }
 84 |                 }
 85 | 
 86 |                 // 提交消费位移
 87 |                 consumer.commitSync();  // 它会自动计算出本批拉取到的数据中的每个分区的最大消息offset，来得到每个分区要提交的消费位移
 88 |                 // consumer.commitSync(offsetsMap);  // 或者按照自己想要的各分区消费位移值来提交
 89 | 
 90 |                 // 提交事务
 91 |                 producer.commitTransaction();
 92 |             } catch (Exception e) {
 93 |                 // 如果上面那一批数据处理过程中任意时刻发生了异常，则放弃本次事务
 94 |                 // 下游就可以通过设置 isolation_level=read_committed 来避开本次产生的“脏”数据
 95 |                 producer.abortTransaction();
 96 |             }
 97 |         }
 98 | 
 99 |         consumer.close();
100 |         producer.close();
101 | 
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/MyPartitioner.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.producer.Partitioner;
 4 | import org.apache.kafka.common.Cluster;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | public class MyPartitioner implements Partitioner {
 9 |     @Override
10 |     public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
11 | 
12 | 
13 |         return 0;
14 |     }
15 | 
16 |     @Override
17 |     public void close() {
18 | 
19 |     }
20 | 
21 |     @Override
22 |     public void configure(Map<String, ?> configs) {
23 | 
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/java/cn/doitedu/kafka/ProducerDemo.java:
--------------------------------------------------------------------------------
 1 | package cn.doitedu.kafka;
 2 | 
 3 | import org.apache.kafka.clients.producer.KafkaProducer;
 4 | import org.apache.kafka.clients.producer.ProducerConfig;
 5 | import org.apache.kafka.clients.producer.ProducerRecord;
 6 | import org.apache.kafka.common.serialization.StringSerializer;
 7 | 
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * kafka生产者api代码示例
12 |  */
13 | public class ProducerDemo {
14 | 
15 |     public static void main(String[] args) throws InterruptedException {
16 | 
17 | 
18 |         // 泛型 K: 要发送的数据中的key
19 |         // 泛型 V: 要发送的数据中的value
20 |         // 隐含之意： kafka中的 message，是 Key-value结构的 （可以没有key）
21 |         Properties props = new Properties();
22 |         props.setProperty("bootstrap.servers", "doit01:9092,doit02:9092");
23 | 
24 |         // 因为kafka底层的存储是没有类型维护机制的，用户所发的所有数据类型，都必须变成 序列化后的byte[]
25 |         // 所以，kafka的producer需要一个针对用户要发送的数据类型的序列化工具类
26 |         // 且这个序列化工具类，需要实现kafka所提供的序列工具接口： org.apache.kafka.common.serialization.Serializer
27 |         props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
28 |         props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
29 | 
30 |         /**
31 |          * 代码中进行客户端参数配置的另一种写法
32 |          */
33 |         props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092,doit02:9092");
34 |         props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
35 |         props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
36 |         props.setProperty(ProducerConfig.ACKS_CONFIG, "all"); // 消息发送应答级别
37 |         props.setProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG,MyPartitioner.class.getName()); // 指定自定义的分区器
38 | 
39 | 
40 |         // 构造一个生产者客户端
41 |         KafkaProducer<String, String> producer = new KafkaProducer<>(props);
42 | 
43 | 
44 |         // 检查是否发送成功的消费者命令：
45 |         //    kafka-console-consumer.sh  --bootstrap-server doit01:9092 --topic abcx
46 |         //    kafka-console-consumer.sh  --bootstrap-server doit01:9092 --topic abcy
47 |         for(int i=0;i<100;i++){
48 |             // 将业务数据封装成客户端所能发送的封装格式
49 |             // 0->abc0
50 |             // 1->abc1
51 | 
52 |             // TODO 请修改此处逻辑： i奇数的业务数据，发到 abcx  ;    i为偶数的业务数据，发到 abcy
53 |             ProducerRecord<String, String> message = new ProducerRecord<>("abcx", "user_id:"+i, "doit_edu_" + i);
54 | 
55 |             // 调用客户端去发送
56 |             // 数据的发送动作在producer的底层是异步线程去异步发送的
57 |             producer.send(message);
58 | 
59 |             Thread.sleep(100);
60 |         }
61 | 
62 | 
63 |         // 关闭客户端
64 |         // producer.flush();
65 |         producer.close();
66 | 
67 | 
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/kafka_course/src/main/resources/bitmap示意图.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coderblack/doit30_flink/d693145232e4b3c40a1efbe06e2408ebffcfc3d0/kafka_course/src/main/resources/bitmap示意图.png


--------------------------------------------------------------------------------
/kafka_course/src/main/resources/consumer.properties:
--------------------------------------------------------------------------------
1 | bootstrap.servers=doit01:9092,doit02:9092,doit03:9092
2 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
3 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer


--------------------------------------------------------------------------------
/kafka_course/src/test/java/RoaringBitmapTest.java:
--------------------------------------------------------------------------------
 1 | import com.google.common.hash.BloomFilter;
 2 | import com.google.common.hash.Funnels;
 3 | import org.roaringbitmap.RoaringBitmap;
 4 | 
 5 | import java.nio.charset.Charset;
 6 | 
 7 | public class RoaringBitmapTest {
 8 | 
 9 |     public static void main(String[] args) {
10 | 
11 | 
12 |         RoaringBitmap bitmap1 = RoaringBitmap.bitmapOf(1, 3, 5);
13 |         // 添加元素
14 |         bitmap1.add(8);
15 | 
16 |         // 输出bitmap中的1的个数（元素个数）
17 |         System.out.println(bitmap1.getCardinality());
18 | 
19 |         // 判断一个元素是否已存在
20 |         bitmap1.contains(5);  // true
21 | 
22 | 
23 | 
24 |         RoaringBitmap bitmap2 = RoaringBitmap.bitmapOf();
25 |         // 添加元素
26 |         bitmap2.add(1);
27 |         bitmap2.add(2);
28 |         bitmap2.add(6);
29 |         bitmap2.add(3);
30 | 
31 | 
32 |         // 两个bitmap进行或运算
33 |         // bitmap1.or(bitmap2);
34 |         // System.out.println(bitmap1.getCardinality());   // 6
35 | 
36 |         // 两个bitmap进行与运算
37 |         bitmap1.and(bitmap2);
38 |         System.out.println(bitmap1.getCardinality());   // 2
39 | 
40 | 
41 | 
42 |     }
43 | 
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <modelVersion>4.0.0</modelVersion>
 6 | 
 7 |     <groupId>cn.doitedu</groupId>
 8 |     <artifactId>doit30_flink</artifactId>
 9 |     <packaging>pom</packaging>
10 |     <version>1.0</version>
11 |     <modules>
12 |         <module>kafka_course</module>
13 |         <module>flink_course</module>
14 |         <module>datagen</module>
15 |     </modules>
16 | 
17 |     <properties>
18 |         <maven.compiler.source>8</maven.compiler.source>
19 |         <maven.compiler.target>8</maven.compiler.target>
20 |         <kafka.version>2.3.1</kafka.version>
21 |         <flink.version>1.14.4</flink.version>
22 |         <scala.version>2.12.12</scala.version>
23 |     </properties>
24 | 
25 | 
26 | 
27 |     <dependencies>
28 | 
29 |         <dependency>
30 |             <groupId>org.projectlombok</groupId>
31 |             <artifactId>lombok</artifactId>
32 |             <version>1.18.24</version>
33 |         </dependency>
34 | 
35 | 
36 |         <dependency>
37 |             <groupId>com.alibaba</groupId>
38 |             <artifactId>fastjson</artifactId>
39 |             <version>2.0.7</version>
40 |         </dependency>
41 | 
42 | 
43 |     </dependencies>
44 | 
45 | 
46 | 
47 | </project>


--------------------------------------------------------------------------------