├── .gitignore ├── README.md ├── conf └── hiveconf │ └── hive-site.xml ├── data ├── csv │ └── a.csv ├── json │ ├── qiantao │ │ └── a.txt │ ├── qiantao2 │ │ └── a.txt │ └── qiantao3 │ │ └── a.txt └── sqldemo │ └── a.txt ├── datagen ├── pom.xml └── src │ └── main │ └── java │ └── cn │ └── doitedu │ ├── ActionLogAutoGen.java │ ├── ActionLogGenOne.java │ └── module │ ├── Collector.java │ ├── CollectorConsoleImpl.java │ ├── CollectorKafkaImpl.java │ ├── LogBean.java │ ├── LogBeanWrapper.java │ ├── LogRunnable.java │ ├── UserProfileDataGen.java │ └── UserUtils.java ├── flink_course ├── data │ ├── transformation_input │ │ └── userinfo.txt │ └── wc │ │ └── input │ │ └── wc.txt ├── pom.xml └── src │ ├── main │ ├── java │ │ ├── cn │ │ │ └── doitedu │ │ │ │ ├── flink │ │ │ │ ├── TaskTest.java │ │ │ │ ├── TestWindow.java │ │ │ │ ├── avro │ │ │ │ │ └── schema │ │ │ │ │ │ ├── AvroEventLog.java │ │ │ │ │ │ └── AvroEventLogBean.java │ │ │ │ ├── exercise │ │ │ │ │ ├── EventCount.java │ │ │ │ │ ├── EventUserInfo.java │ │ │ │ │ ├── Exercise_1.java │ │ │ │ │ └── UserInfo.java │ │ │ │ ├── java │ │ │ │ │ └── demos │ │ │ │ │ │ ├── EventBean2.java │ │ │ │ │ │ ├── EventLog.java │ │ │ │ │ │ ├── ParallelismDe.java │ │ │ │ │ │ ├── _01_StreamWordCount.java │ │ │ │ │ │ ├── _02_BatchWordCount.java │ │ │ │ │ │ ├── _03_StreamBatchWordCount.java │ │ │ │ │ │ ├── _04_WordCount_LambdaTest.java │ │ │ │ │ │ ├── _05_SourceOperator_Demos.java │ │ │ │ │ │ ├── _06_CustomSourceFunction.java │ │ │ │ │ │ ├── _07_Transformation_Demos.java │ │ │ │ │ │ ├── _08_SinkOperator_Demos.java │ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo1.java │ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo2.java │ │ │ │ │ │ ├── _09_StreamFileSinkOperator_Demo3.java │ │ │ │ │ │ ├── _10_KafkaSinkOperator_Demo1.java │ │ │ │ │ │ ├── _11_JdbcSinkOperator_Demo1.java │ │ │ │ │ │ ├── _12_RedisSinkOperator_Demo1.java │ │ │ │ │ │ ├── _13_SideOutput_Demo.java │ │ │ │ │ │ ├── _14_StreamConnect_Union_Demo.java │ │ │ │ │ │ ├── _15_StreamCoGroup_Join_Demo.java │ │ │ │ │ │ ├── _16_BroadCast_Demo.java │ │ │ │ │ │ ├── _17_ProcessFunctions_Demo.java │ │ │ │ │ │ ├── _18_ChannalSelector_Partitioner_Demo.java │ │ │ │ │ │ ├── _19_WaterMark_Api_Demo.java │ │ │ │ │ │ ├── _19_WaterMark_Api_Demo2.java │ │ │ │ │ │ ├── _20_Window_Api_Demo1.java │ │ │ │ │ │ ├── _21_Window_Api_Demo2.java │ │ │ │ │ │ ├── _21_Window_Api_Demo3.java │ │ │ │ │ │ ├── _21_Window_Api_Demo4.java │ │ │ │ │ │ ├── _22_StateBasic_Demo.java │ │ │ │ │ │ ├── _23_State_OperatorState_Demo.java │ │ │ │ │ │ ├── _24_State_KeyedState_Demo.java │ │ │ │ │ │ ├── _25_State_DataStructure_Demo.java │ │ │ │ │ │ ├── _26_State_TTL_Demo.java │ │ │ │ │ │ ├── _27_ToleranceConfig_Demo.java │ │ │ │ │ │ └── _28_ToleranceSideToSideTest.java │ │ │ │ ├── scala │ │ │ │ │ └── demos │ │ │ │ │ │ └── _01_入门程序WordCount.scala │ │ │ │ └── task │ │ │ │ │ ├── Mapper1.java │ │ │ │ │ ├── Mapper2.java │ │ │ │ │ ├── Task1.java │ │ │ │ │ ├── Task2.java │ │ │ │ │ ├── Task3.java │ │ │ │ │ └── TaskRunner.java │ │ │ │ └── flinksql │ │ │ │ ├── demos │ │ │ │ ├── Demo10_KafkaConnectorDetail.java │ │ │ │ ├── Demo11_UpsertKafkaConnectorTest.java │ │ │ │ ├── Demo11_UpsertKafkaConnectorTest2.java │ │ │ │ ├── Demo12_JdbcConnectorTest1.java │ │ │ │ ├── Demo12_JdbcConnectorTest2.java │ │ │ │ ├── Demo13_FileSystemConnectorTest.java │ │ │ │ ├── Demo14_MysqlCdcConnector.java │ │ │ │ ├── Demo14_StreamFromToTable.java │ │ │ │ ├── Demo16_TimeWindowDemo.java │ │ │ │ ├── Demo17_TimeWindowJoin.java │ │ │ │ ├── Demo18_IntervalJoin.java │ │ │ │ ├── Demo18_RegularJoin.java │ │ │ │ ├── Demo19_ArrayJoin.java │ │ │ │ ├── Demo19_LookupJoin.java │ │ │ │ ├── Demo1_TableSql.java │ │ │ │ ├── Demo20_Temporal_Join.java │ │ │ │ ├── Demo21_CustomScalarFunction.java │ │ │ │ ├── Demo22_CustomAggregateFunction.java │ │ │ │ ├── Demo23_TableFunction.java │ │ │ │ ├── Demo24_TableAggregateFunction.java │ │ │ │ ├── Demo24_TableAggregateFunction2.java │ │ │ │ ├── Demo25_MetricDemos.java │ │ │ │ ├── Demo2_TableApi.java │ │ │ │ ├── Demo3_TableObjectCreate.java │ │ │ │ ├── Demo4_SqlTableCreate.java │ │ │ │ ├── Demo5_CatalogDemo.java │ │ │ │ ├── Demo6_Exercise.java │ │ │ │ ├── Demo7_ColumnDetail1_Sql.java │ │ │ │ ├── Demo7_ColumnDetail2_TableApi.java │ │ │ │ ├── Demo8_CsvFormat.java │ │ │ │ ├── Demo8_JsonFormat.java │ │ │ │ ├── Demo9_EventTimeAndWatermark.java │ │ │ │ ├── Demo9_EventTimeAndWatermark2.java │ │ │ │ └── Demo9_EventTimeAndWatermark3.java │ │ │ │ └── fuxi │ │ │ │ ├── EventBean.java │ │ │ │ ├── Exercise.java │ │ │ │ ├── KeyedStateDemo.java │ │ │ │ └── TimerDemo.java │ │ ├── org │ │ │ └── apache │ │ │ │ └── flink │ │ │ │ ├── api │ │ │ │ └── common │ │ │ │ │ └── eventtime │ │ │ │ │ └── BoundedOutOfOrdernessWatermarks.java │ │ │ │ ├── runtime │ │ │ │ └── state │ │ │ │ │ └── ttl │ │ │ │ │ └── CXTtlIncrementalCleanup.java │ │ │ │ └── streaming │ │ │ │ ├── api │ │ │ │ └── operators │ │ │ │ │ └── AbstractStreamOperator.java │ │ │ │ └── runtime │ │ │ │ └── operators │ │ │ │ ├── TimestampsAndWatermarksOperator.java │ │ │ │ └── windowing │ │ │ │ └── WindowOperator.java │ │ └── tmp │ │ │ ├── FlinkKafkaDemo.java │ │ │ ├── pojos │ │ │ ├── MysqlUser.java │ │ │ └── UserSlotGame.java │ │ │ ├── sqls.sql │ │ │ └── utils │ │ │ └── SqlHolder.java │ └── resources │ │ ├── log4j.properties │ │ └── prts.avsc.bak │ └── test │ └── java │ └── cn │ └── doitedu │ └── flink │ └── TestChangelog.java ├── kafka_course ├── pom.xml └── src │ ├── main │ ├── java │ │ └── cn │ │ │ └── doitedu │ │ │ └── kafka │ │ │ ├── AdminClientDemo.java │ │ │ ├── ConsumerDemo.java │ │ │ ├── ConsumerDemo2.java │ │ │ ├── ConsumerDemo3.java │ │ │ ├── Consumer实现ExactlyOnce手段1.java │ │ │ ├── Kafka编程练习.java │ │ │ ├── Kafka编程练习_消费者.java │ │ │ ├── Kafka编程练习_消费者_Bitmap.java │ │ │ ├── Kafka编程练习_消费者_判重.java │ │ │ ├── Kafka自身事务机制.java │ │ │ ├── MyPartitioner.java │ │ │ └── ProducerDemo.java │ └── resources │ │ ├── bitmap示意图.png │ │ └── consumer.properties │ └── test │ └── java │ └── RoaringBitmapTest.java └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | # Project exclude paths 2 | /kafka_course/target/ 3 | # 项目排除路径 4 | /flink_course/target/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # doit30_flink 2 | 3 | # 多易教育[涛哥] DOE30期 FLINK 课程配套源码 4 | 5 | # 更多给力资料和课程,可入群,长期蹲守: 6 | ①群: 1071917730(已满,不可加) 7 | ②群: 813383827(已满,不可加) 8 | ③群: 955021790(可加) 9 | ④群: 1108285618(可加) 10 | -------------------------------------------------------------------------------- /conf/hiveconf/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | hive.metastore.uris 4 | thrift://doitedu:9083 5 | 6 | -------------------------------------------------------------------------------- /data/csv/a.csv: -------------------------------------------------------------------------------- 1 | |1|,|zs|,|18| 2 | # 哈哈哈哈 3 | |2|,|ls|,|20| 4 | |3|,|ww|,\N -------------------------------------------------------------------------------- /data/json/qiantao/a.txt: -------------------------------------------------------------------------------- 1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1"}} 2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2"}} 3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3"}} -------------------------------------------------------------------------------- /data/json/qiantao2/a.txt: -------------------------------------------------------------------------------- 1 | {"id":10,"name":{"nick":"doe1","formal":"doit edu1","height":180}} 2 | {"id":11,"name":{"nick":"doe2","formal":"doit edu2","height":170}} 3 | {"id":12,"name":{"nick":"doe3","formal":"doit edu3","height":160}} -------------------------------------------------------------------------------- /data/json/qiantao3/a.txt: -------------------------------------------------------------------------------- 1 | {"id":1,"friends":[{"name":"a","info":{"addr":"bj","gender":"male"}},{"name":"b","info":{"addr":"sh","gender":"female"}}]} 2 | {"id":2,"friends":[{"name":"b","info":{"addr":"sh","gender":"male"}},{"name":"c","info":{"addr":"bj","gender":"female"}}]} -------------------------------------------------------------------------------- /data/sqldemo/a.txt: -------------------------------------------------------------------------------- 1 | 1,zs,18,male 2 | 2,ls,28,fe,male 3 | 3,ww,38,male 4 | -------------------------------------------------------------------------------- /datagen/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | doit30_flink 7 | cn.doitedu 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | datagen 13 | 14 | 15 | 8 16 | 8 17 | 18 | 19 | 20 | 21 | org.apache.kafka 22 | kafka-clients 23 | ${kafka.version} 24 | 25 | 26 | 27 | commons-lang 28 | commons-lang 29 | 2.6 30 | 31 | 32 | 33 | org.apache.commons 34 | commons-lang3 35 | 3.12.0 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/ActionLogAutoGen.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import cn.doitedu.module.*; 4 | 5 | import java.util.ArrayList; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | 9 | /** 10 | * @author 涛哥 11 | * @nick_name "deep as the sea" 12 | * @contact qq:657270652 wx:doit_edu 13 | * @site www.doitedu.cn 14 | * @date 2021-03-27 15 | * @desc 行为日志生成模拟器(自动连续生成) 16 | *

17 | * { 18 | * "account": "Vz54E9Ya", 19 | * "appId": "cn.doitedu.app1", 20 | * "appVersion": "3.4", 21 | * "carrier": "中国移动", 22 | * "deviceId": "WEISLD0235S0934OL", 23 | * "deviceType": "MI-6", 24 | * "ip": "24.93.136.175", 25 | * "latitude": 42.09287620431088, 26 | * "longitude": 79.42106825764643, 27 | * "netType": "WIFI", 28 | * "osName": "android", 29 | * "osVersion": "6.5", 30 | * "releaseChannel": "豌豆荚", 31 | * "resolution": "1024*768", 32 | * "sessionId": "SE18329583458", 33 | * "timeStamp": 1594534406220 34 | * "eventId": "productView", 35 | * "properties": { 36 | * "pageId": "646", 37 | * "productId": "157", 38 | * "refType": "4", 39 | * "refUrl": "805", 40 | * "title": "爱得堡 男靴中高帮马丁靴秋冬雪地靴 H1878 复古黄 40码", 41 | * "url": "https://item.jd.com/36506691363.html", 42 | * "utm_campain": "4", 43 | * "utm_loctype": "1", 44 | * "utm_source": "10" 45 | * } 46 | * } 47 | *

48 | *

49 | * kafka中要先创建好topic 50 | * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --create --topic yinew_applog --partitions 2 --replication-factor 1 --zookeeper hdp01:2181,hdp02:2181,hdp03:2181 51 | *

52 | * 创建完后,检查一下是否创建成功: 53 | * [root@hdp01 kafka_2.11-2.0.0]# bin/kafka-topics.sh --list --zookeeper hdp01:2181 54 | */ 55 | public class ActionLogAutoGen { 56 | public static void main(String[] args) throws Exception { 57 | 58 | // 加载历史用户 59 | // String filePath = "data/users/hisu-1654943006977.txt"; 60 | // HashMap hisUsers = UserUtils.loadHisUsers(filePath); 61 | 62 | // 添加新用户 63 | HashMap hisUsers = new HashMap<>(); 64 | UserUtils.addNewUsers(hisUsers, 1000, true); 65 | 66 | UserUtils.saveUsers(hisUsers); 67 | 68 | // 转成带状态用户数据 69 | List wrapperedUsers = UserUtils.userToWrapper(hisUsers); 70 | 71 | System.out.println("日活用户总数:" + wrapperedUsers.size() + "-------"); 72 | 73 | // 多线程并行生成日志 74 | // CollectorConsoleImpl collector = new CollectorConsoleImpl(); 75 | CollectorKafkaImpl collector = new CollectorKafkaImpl("doit-events"); 76 | genBatchToConsole(wrapperedUsers, 3,collector); 77 | 78 | 79 | } 80 | 81 | private static void genBatchToConsole(List wrapperedUsers, int threads , Collector collector) { 82 | int partSize = wrapperedUsers.size() / threads; 83 | 84 | ArrayList> partList = new ArrayList<>(); 85 | 86 | for (int i = 0; i < threads; i++) { 87 | List userPart = new ArrayList<>(); 88 | 89 | for (int j = i * partSize; j < (i != threads - 1 ? (i + 1) * partSize : wrapperedUsers.size()); j++) { 90 | userPart.add(wrapperedUsers.get(j)); 91 | } 92 | new Thread(new LogRunnable(userPart,collector,10)).start(); 93 | } 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/ActionLogGenOne.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import cn.doitedu.module.LogBean; 4 | import com.alibaba.fastjson.JSON; 5 | import org.apache.kafka.clients.producer.KafkaProducer; 6 | import org.apache.kafka.clients.producer.ProducerRecord; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | import java.util.Properties; 11 | 12 | /*** 13 | * @author hunter.d 14 | * @qq 657270652 15 | * @wx haitao-duan 16 | * @date 2021/4/5 17 | * 18 | * 运行一次,生成一条行为日志 19 | * 20 | **/ 21 | public class ActionLogGenOne { 22 | public static void main(String[] args) { 23 | Properties props = new Properties(); 24 | props.setProperty("bootstrap.servers", "hdp01:9092,hdp02:9092,hdp03:9092"); 25 | props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 26 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 27 | KafkaProducer kafkaProducer = new KafkaProducer<>(props); 28 | 29 | LogBean logBean = new LogBean(); 30 | logBean.setDeviceId("000053"); 31 | logBean.setEventId("E"); 32 | Map ps = new HashMap(); 33 | props.put("p1", "v1"); 34 | logBean.setProperties(ps); 35 | logBean.setTimeStamp(System.currentTimeMillis()); 36 | 37 | String log = JSON.toJSONString(logBean); 38 | ProducerRecord record = new ProducerRecord<>("zenniu_applog", log); 39 | kafkaProducer.send(record); 40 | kafkaProducer.flush(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/Collector.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.module; 2 | 3 | public interface Collector { 4 | public void collect(String logdata); 5 | } 6 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/CollectorConsoleImpl.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.module; 2 | 3 | public class CollectorConsoleImpl implements Collector { 4 | @Override 5 | public void collect(String logdata) { 6 | System.out.println(logdata); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/CollectorKafkaImpl.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.module; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.ProducerRecord; 5 | 6 | import java.util.Properties; 7 | 8 | public class CollectorKafkaImpl implements Collector { 9 | 10 | private KafkaProducer kafkaProducer; 11 | private String topicName; 12 | 13 | int messageSeq = 0; 14 | 15 | public CollectorKafkaImpl(String topicName){ 16 | Properties props = new Properties(); 17 | props.setProperty("bootstrap.servers", "doitedu:9092"); 18 | props.put("key.serializer", "org.apache.kafka.common.serialization.IntegerSerializer"); 19 | props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 20 | 21 | 22 | // 构造一个kafka生产者客户端 23 | this.kafkaProducer = new KafkaProducer<>(props); 24 | 25 | this.topicName = topicName; 26 | } 27 | @Override 28 | public void collect(String logdata) { 29 | this.messageSeq ++; 30 | 31 | ProducerRecord record = new ProducerRecord<>(topicName, this.messageSeq, logdata); 32 | kafkaProducer.send(record); 33 | 34 | kafkaProducer.flush(); 35 | 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/LogBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.module; 2 | 3 | import lombok.Data; 4 | 5 | import java.util.Map; 6 | 7 | @Data 8 | public class LogBean { 9 | private String account ; 10 | private String appId ; 11 | private String appVersion ; 12 | private String carrier ; 13 | private String deviceId ; 14 | private String deviceType ; 15 | private String ip ; 16 | private double latitude ; 17 | private double longitude ; 18 | private String netType ; 19 | private String osName ; 20 | private String osVersion ; 21 | private String releaseChannel ; 22 | private String resolution ; 23 | private String sessionId ; 24 | private long timeStamp ; 25 | private String eventId ; 26 | private Map properties; 27 | 28 | 29 | } 30 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/LogBeanWrapper.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.module; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @AllArgsConstructor 9 | @NoArgsConstructor 10 | public class LogBeanWrapper { 11 | private LogBean logBean; 12 | private String sessionId; 13 | private long lastTime; 14 | 15 | private boolean isExists = true; 16 | private boolean isPushback = false; 17 | 18 | //private String currPage; 19 | 20 | private int sessionMax = 0; 21 | 22 | public LogBeanWrapper(LogBean logBean,String sessionId,long lastTime){ 23 | this.logBean = logBean; 24 | this.sessionId = sessionId; 25 | this.lastTime = lastTime; 26 | 27 | } 28 | 29 | 30 | 31 | } 32 | -------------------------------------------------------------------------------- /datagen/src/main/java/cn/doitedu/module/UserProfileDataGen.java: -------------------------------------------------------------------------------- 1 | //package cn.doitedu; 2 | // 3 | //import org.apache.commons.lang3.RandomUtils; 4 | //import org.apache.commons.lang3.StringUtils; 5 | //import org.apache.hadoop.conf.Configuration; 6 | //import org.apache.hadoop.hbase.TableName; 7 | //import org.apache.hadoop.hbase.client.Connection; 8 | //import org.apache.hadoop.hbase.client.ConnectionFactory; 9 | //import org.apache.hadoop.hbase.client.Put; 10 | //import org.apache.hadoop.hbase.client.Table; 11 | //import org.apache.hadoop.hbase.util.Bytes; 12 | // 13 | //import java.io.IOException; 14 | //import java.util.ArrayList; 15 | // 16 | ///** 17 | // * @author 涛哥 18 | // * @nick_name "deep as the sea" 19 | // * @contact qq:657270652 wx:doit_edu 20 | // * @site www.doitedu.cn 21 | // * @date 2021-03-27 22 | // * @desc 用户画像数据模拟器 23 | // *

24 | // * deviceid,k1=v1 25 | // *

26 | // * hbase中需要先创建好画像标签表 27 | // * [root@hdp01 ~]# hbase shell 28 | // * hbase> create 'yinew_profile','f' 29 | // */ 30 | //public class UserProfileDataGen { 31 | // public static void main(String[] args) throws IOException { 32 | // 33 | // Configuration conf = new Configuration(); 34 | // conf.set("hbase.zookeeper.quorum", "hdp01:2181,hdp02:2181,hdp03:2181"); 35 | // 36 | // Connection conn = ConnectionFactory.createConnection(conf); 37 | // Table table = conn.getTable(TableName.valueOf("zenniu_profile")); 38 | // 39 | // ArrayList puts = new ArrayList<>(); 40 | // for (int i = 0; i < 100000; i++) { 41 | // 42 | // // 生成一个用户的画像标签数据 43 | // String deviceId = StringUtils.leftPad(i + "", 6, "0"); 44 | // Put put = new Put(Bytes.toBytes(deviceId)); 45 | // for (int k = 1; k <= 100; k++) { 46 | // String key = "tag" + k; 47 | // String value = "v" + RandomUtils.nextInt(1, 3); 48 | // put.addColumn(Bytes.toBytes("f"), Bytes.toBytes(key), Bytes.toBytes(value)); 49 | // } 50 | // 51 | // // 将这一条画像数据,添加到list中 52 | // puts.add(put); 53 | // 54 | // // 攒满100条一批 55 | // if(puts.size()==100) { 56 | // table.put(puts); 57 | // puts.clear(); 58 | // } 59 | // 60 | // } 61 | // 62 | // // 提交最后一批 63 | // if(puts.size()>0) table.put(puts); 64 | // 65 | // conn.close(); 66 | // } 67 | //} 68 | -------------------------------------------------------------------------------- /flink_course/data/transformation_input/userinfo.txt: -------------------------------------------------------------------------------- 1 | {"uid":1,"gender":"male","name":"ua","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]} 2 | {"uid":2,"gender":"male","name":"ub","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"}]} 3 | {"uid":3,"gender":"female","name":"uc","friends":[{"fid":2,"name":"aa"}]} 4 | {"uid":4,"gender":"female","name":"ud","friends":[{"fid":3,"name":"bb"}]} 5 | {"uid":5,"gender":"male","name":"ue","friends":[{"fid":1,"name":"cc"},{"fid":3,"name":"bb"}]} 6 | {"uid":6,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"}]} 7 | {"uid":7,"gender":"male","name":"uf","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]} 8 | {"uid":8,"gender":"male","name":"xx","friends":[{"fid":2,"name":"aa"},{"fid":3,"name":"bb"},{"fid":1,"name":"cc"},{"fid":4,"name":"dd"}]} -------------------------------------------------------------------------------- /flink_course/data/wc/input/wc.txt: -------------------------------------------------------------------------------- 1 | a a a a b b b c 2 | d e d d f 3 | c d 4 | c c c 5 | d d d -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/TaskTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.functions.ProcessFunction; 9 | import org.apache.flink.util.Collector; 10 | 11 | public class TaskTest { 12 | public static void main(String[] args) throws Exception { 13 | 14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境 15 | env.setParallelism(1); 16 | 17 | SingleOutputStreamOperator st = env.socketTextStream("localhost", 9999) 18 | .process(new ProcessFunction() { 19 | @Override 20 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception { 21 | System.out.println("第1级map收到数据: " + value + "线程号:" + Thread.currentThread().getId()); 22 | System.out.println("第1级subtask:" + getRuntimeContext().getTaskNameWithSubtasks()); 23 | 24 | // System.out.println("1- aaa"); 25 | // System.out.println("1- bbb"); 26 | out.collect(value); 27 | } 28 | }); 29 | 30 | SingleOutputStreamOperator map = st.process( 31 | new ProcessFunction() { 32 | @Override 33 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception { 34 | System.out.println("第2级map算子,收到数据 "+ value + ", 线程" + Thread.currentThread().getId()); 35 | System.out.println("第2级subtask:" + getRuntimeContext().getTaskNameWithSubtasks()); 36 | out.collect(value); 37 | } 38 | } 39 | ).setParallelism(2); 40 | 41 | 42 | map.print(); 43 | env.execute(); 44 | 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/TestWindow.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink; 2 | 3 | import org.apache.commons.lang3.RandomUtils; 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner; 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.java.ExecutionEnvironment; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.ProcessFunction; 13 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 14 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 17 | import org.apache.flink.util.Collector; 18 | 19 | public class TestWindow { 20 | public static void main(String[] args) throws Exception { 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境 22 | env.setParallelism(1); 23 | 24 | DataStreamSource st = env.socketTextStream("localhost", 9999); 25 | SingleOutputStreamOperator> map = st.map(s -> { 26 | String[] split = s.split(","); 27 | return Tuple2.of(split[0], Long.parseLong(split[1])); 28 | }).returns(new TypeHint>() { 29 | }); 30 | 31 | SingleOutputStreamOperator> wmed = map.assignTimestampsAndWatermarks(WatermarkStrategy.>forMonotonousTimestamps().withTimestampAssigner(new SerializableTimestampAssigner>() { 32 | @Override 33 | public long extractTimestamp(Tuple2 element, long recordTimestamp) { 34 | return element.f1; 35 | } 36 | })); 37 | SingleOutputStreamOperator wind = wmed.keyBy(tp -> tp.f0) 38 | .window(TumblingEventTimeWindows.of(Time.seconds(5))) 39 | .process(new ProcessWindowFunction, String, String, TimeWindow>() { 40 | @Override 41 | public void process(String s, ProcessWindowFunction, String, String, TimeWindow>.Context context, Iterable> elements, Collector out) throws Exception { 42 | System.out.println(s + "==== window中用户函数触发开始:" + Thread.currentThread().getId()); 43 | // System.out.println(s + " window中的线程号: " + Thread.currentThread().getId()); 44 | // System.out.println(s + " window中的watermark : " + context.currentWatermark()); 45 | Thread.sleep(10000); 46 | //System.out.println(s + "睡眠完毕"); 47 | int i = RandomUtils.nextInt(1, 100); 48 | //System.out.println(s+ " 准备返回数据: " + i); 49 | out.collect(s+ "," + i); 50 | System.out.println(s + "==== window中用户函数触发结束: " + Thread.currentThread().getId()); 51 | } 52 | }); 53 | 54 | 55 | wind.process(new ProcessFunction() { 56 | @Override 57 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception { 58 | System.out.println("%%%% 下游processElement开始:" +value + "线程号: " + Thread.currentThread().getId()); 59 | System.out.println("%%%% 下游当前watermark: " + ctx.timerService().currentWatermark()); 60 | System.out.println("%%%% 下游processElement结束:" + "线程号: " + Thread.currentThread().getId()); 61 | } 62 | }).startNewChain(); 63 | 64 | 65 | env.execute(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/exercise/EventCount.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.exercise; 2 | 3 | import lombok.*; 4 | 5 | @Data 6 | @NoArgsConstructor 7 | @AllArgsConstructor 8 | public class EventCount { 9 | private int id; 10 | private String eventId; 11 | private int cnt; 12 | 13 | } 14 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/exercise/EventUserInfo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.exercise; 2 | 3 | import lombok.*; 4 | 5 | @Data 6 | @AllArgsConstructor 7 | @NoArgsConstructor 8 | public class EventUserInfo { 9 | 10 | private int id; 11 | private String eventId; 12 | private int cnt; 13 | private String gender; 14 | private String city; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/exercise/UserInfo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.exercise; 2 | 3 | import lombok.*; 4 | 5 | import java.io.ByteArrayOutputStream; 6 | import java.io.IOException; 7 | import java.io.ObjectOutput; 8 | import java.io.ObjectOutputStream; 9 | 10 | @Data 11 | @NoArgsConstructor 12 | @AllArgsConstructor 13 | public class UserInfo { 14 | private int id; 15 | private String gender; 16 | private String city; 17 | } 18 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/EventBean2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | public class EventBean2 { 11 | private long guid; 12 | private String eventId; 13 | private long timeStamp; 14 | private String pageId; 15 | private int actTimelong; // 行为时长 16 | } -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/EventLog.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import lombok.*; 4 | import org.apache.flink.streaming.connectors.redis.RedisSink; 5 | 6 | import java.util.Map; 7 | 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | @Getter 11 | @Setter 12 | @ToString 13 | public class EventLog{ 14 | private long guid; 15 | private String sessionId; 16 | private String eventId; 17 | private long timeStamp; 18 | private Map eventInfo; 19 | } 20 | 21 | 22 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/ParallelismDe.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStream; 4 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 5 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | 8 | public class ParallelismDe { 9 | 10 | public static void main(String[] args) { 11 | 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | SingleOutputStreamOperator s1 = env.fromElements(1, 2, 3, 4, 5, 6).map(s -> s).setParallelism(3); 14 | SingleOutputStreamOperator s2 = env.fromElements(11, 12, 13, 14, 15, 16).map(s -> s).setParallelism(5); 15 | DataStream s3 = s2.union(s1); 16 | System.out.println(s3.getParallelism()); 17 | 18 | 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_01_StreamWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | import org.apache.flink.api.java.functions.KeySelector; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.configuration.Configuration; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.KeyedStream; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.util.Collector; 14 | 15 | /** 16 | * 通过socket数据源,去请求一个socket服务(doit01:9999)得到数据流 17 | * 然后统计数据流中出现的单词及其个数 18 | */ 19 | public class _01_StreamWordCount { 20 | 21 | public static void main(String[] args) throws Exception { 22 | 23 | 24 | // 创建一个编程入口环境 25 | // ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // 批处理的入口环境 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 流批一体的入口环境 27 | 28 | 29 | // 显式声明为本地运行环境,且带webUI 30 | //Configuration configuration = new Configuration(); 31 | //configuration.setInteger("rest.port", 8081); 32 | //StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 33 | 34 | 35 | /** 36 | * 本地运行模式时,程序的默认并行度为 ,你的cpu的逻辑核数 37 | */ 38 | env.setParallelism(1); // 默认并行度可以通过env人为指定 39 | 40 | 41 | // 通过source算子,把socket数据源加载为一个dataStream(数据流) 42 | // [root@doit01 ~]# nc -lk 9999 43 | SingleOutputStreamOperator source = env.socketTextStream("localhost", 9999) 44 | .setParallelism(1) 45 | .slotSharingGroup("g1"); 46 | 47 | // 然后通过算子对数据流进行各种转换(计算逻辑) 48 | DataStream> words = source.flatMap(new FlatMapFunction>() { 49 | @Override 50 | public void flatMap(String s, Collector> collector) throws Exception { 51 | // 切单词 52 | String[] split = s.split("\\s+"); 53 | for (String word : split) { 54 | // 返回每一对 (单词,1) 55 | collector.collect(Tuple2.of(word, 1)); 56 | } 57 | } 58 | }) 59 | /*.setParallelism(10) 60 | .slotSharingGroup("g2") 61 | .shuffle()*/; 62 | 63 | //SingleOutputStreamOperator> words2 = words.map(tp -> Tuple2.of(tp.f0, tp.f1 * 10)); 64 | 65 | 66 | KeyedStream, String> keyed = words.keyBy(new KeySelector, String>() { 67 | @Override 68 | public String getKey(Tuple2 tuple2) throws Exception { 69 | 70 | return tuple2.f0; 71 | } 72 | }); 73 | 74 | 75 | SingleOutputStreamOperator> resultStream = keyed.sum("f1")/*.slotSharingGroup("g1")*/; 76 | 77 | // 通过sink算子,将结果输出 78 | resultStream.print("wcSink"); 79 | 80 | // 触发程序的提交运行 81 | env.execute(); 82 | 83 | 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_02_BatchWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | import org.apache.flink.api.java.operators.DataSource; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.util.Collector; 8 | 9 | /** 10 | * @Author: deep as the sea 11 | * @Site: 多易教育 12 | * @QQ: 657270652 13 | * @Date: 2022/4/30 14 | * @Desc: 批处理计算模式的wordcount示例 15 | **/ 16 | public class _02_BatchWordCount { 17 | 18 | public static void main(String[] args) throws Exception { 19 | 20 | // 批计算入口环境 21 | ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | // 读数据 -- : 批计算中得到的数据抽象,是一个 DataSet 24 | DataSource stringDataSource = batchEnv.readTextFile("flink_course/data/wc/input/"); 25 | 26 | // 在dataset上调用各种dataset的算子 27 | stringDataSource 28 | .flatMap(new MyFlatMapFunction()) 29 | .groupBy(0) 30 | .sum(1) 31 | .print(); 32 | } 33 | } 34 | 35 | class MyFlatMapFunction implements FlatMapFunction>{ 36 | 37 | @Override 38 | public void flatMap(String value, Collector> out) throws Exception { 39 | String[] words = value.split("\\s+"); 40 | for (String word : words) { 41 | out.collect(Tuple2.of(word,1)); 42 | } 43 | } 44 | } 45 | 46 | 47 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_03_StreamBatchWordCount.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.functions.KeySelector; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.Collector; 10 | 11 | public class _03_StreamBatchWordCount { 12 | 13 | public static void main(String[] args) throws Exception { 14 | 15 | // 流处理的编程环境入口 16 | StreamExecutionEnvironment streamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | streamEnv.setParallelism(1); 18 | 19 | // 按批计算模式去执行 20 | streamEnv.setRuntimeMode(RuntimeExecutionMode.BATCH); 21 | 22 | // 按流计算模式去执行 23 | // streamEnv.setRuntimeMode(RuntimeExecutionMode.STREAMING); 24 | 25 | // flink自己判断决定 26 | // streamEnv.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); 27 | 28 | // 读文件 得到 dataStream 29 | DataStreamSource streamSource = streamEnv.readTextFile("flink_course/data/wc/input/wc.txt"); 30 | 31 | 32 | // 调用dataStream的算子做计算 33 | streamSource.flatMap(new FlatMapFunction>() { 34 | @Override 35 | public void flatMap(String value, Collector> out) throws Exception { 36 | String[] words = value.split("\\s+"); 37 | for (String word : words) { 38 | out.collect(Tuple2.of(word, 1)); 39 | } 40 | } 41 | }) 42 | .keyBy(new KeySelector, String>() { 43 | @Override 44 | public String getKey(Tuple2 value) throws Exception { 45 | return value.f0; 46 | } 47 | }) 48 | .sum(1) 49 | .print(); 50 | 51 | 52 | streamEnv.execute(); 53 | 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_04_WordCount_LambdaTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.common.typeinfo.TypeHint; 6 | import org.apache.flink.api.common.typeinfo.TypeInformation; 7 | import org.apache.flink.api.common.typeinfo.Types; 8 | import org.apache.flink.api.java.ExecutionEnvironment; 9 | import org.apache.flink.api.java.functions.KeySelector; 10 | import org.apache.flink.api.java.tuple.Tuple2; 11 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 12 | import org.apache.flink.streaming.api.datastream.KeyedStream; 13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.util.Collector; 16 | 17 | public class _04_WordCount_LambdaTest { 18 | 19 | public static void main(String[] args) throws Exception { 20 | 21 | // 创建一个编程入口(执行环境) 22 | 23 | // 流式处理入口环境 24 | StreamExecutionEnvironment envStream = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | DataStreamSource streamSource = envStream.readTextFile("flink_course/data/wc/input/wc.txt"); 27 | 28 | // 先把句子变大写 29 | /* 从map算子接收的MapFunction接口实现来看,它是一个单抽象方法的接口 30 | 所以这个接口的实现类的核心功能,就在它的方法上 31 | 那就可以用lambda表达式来简洁实现 32 | streamSource.map(new MapFunction() { 33 | @Override 34 | public String map(String value) throws Exception { 35 | return null; 36 | } 37 | });*/ 38 | 39 | /** 40 | * lambda表达式怎么写,看你要实现的那个接口的方法接收什么参数,返回什么结果 41 | */ 42 | // 然后就按lambda语法来表达: (参数1,参数2,...) -> { 函数体 } 43 | // streamSource.map( (value) -> { return value.toUpperCase();}); 44 | 45 | // 由于上面的lambda表达式,参数列表只有一个,且函数体只有一行代码,则可以简化 46 | // streamSource.map( value -> value.toUpperCase() ) ; 47 | 48 | // 由于上面的lambda表达式, 函数体只有一行代码,且参数只使用了一次,可以把函数调用转成 “方法引用” 49 | SingleOutputStreamOperator upperCased = streamSource.map(String::toUpperCase); 50 | 51 | // 然后切成单词,并转成(单词,1),并压平 52 | /*upperCased.flatMap(new FlatMapFunction>() { 53 | @Override 54 | public void flatMap(String value, Collector> out) throws Exception { 55 | 56 | } 57 | });*/ 58 | // 从上面的接口来看,它依然是一个 单抽象方法的 接口,所以它的方法实现,依然可以用lambda表达式来实现 59 | SingleOutputStreamOperator> wordAndOne = upperCased.flatMap((String s, Collector> collector) -> { 60 | String[] words = s.split("\\s+"); 61 | for (String word : words) { 62 | collector.collect(Tuple2.of(word, 1)); 63 | } 64 | }) 65 | // .returns(new TypeHint>() {}); // 通过 TypeHint 传达返回数据类型 66 | // .returns(TypeInformation.of(new TypeHint>() {})); // 更通用的,是传入TypeInformation,上面的TypeHint也是封装了TypeInformation 67 | .returns(Types.TUPLE(Types.STRING, Types.INT)); // 利用工具类Types的各种静态方法,来生成TypeInformation 68 | 69 | 70 | // 按单词分组 71 | /*wordAndOne.keyBy(new KeySelector, String>() { 72 | @Override 73 | public String getKey(Tuple2 value) throws Exception { 74 | return null; 75 | } 76 | })*/ 77 | // 从上面的KeySelector接口来看,它依然是一个 单抽象方法的 接口,所以它的方法实现,依然可以用lambda表达式来实现 78 | KeyedStream, String> keyedStream = wordAndOne.keyBy((value) -> value.f0); 79 | 80 | 81 | // 统计单词个数 82 | keyedStream.sum(1) 83 | .print(); 84 | 85 | 86 | envStream.execute(); 87 | 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.avro.Schema; 4 | import org.apache.avro.SchemaBuilder; 5 | import org.apache.avro.generic.GenericData; 6 | import org.apache.avro.generic.GenericRecord; 7 | import org.apache.flink.api.common.functions.MapFunction; 8 | import org.apache.flink.connector.file.sink.FileSink; 9 | import org.apache.flink.core.fs.Path; 10 | import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo; 11 | import org.apache.flink.formats.parquet.ParquetWriterFactory; 12 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters; 13 | import org.apache.flink.streaming.api.CheckpointingMode; 14 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 15 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 16 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 17 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; 18 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner; 19 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; 20 | 21 | /** 22 | * 23 | * @Author: deep as the sea 24 | * @Site: www.51doit.com 25 | * @QQ: 657270652 26 | * @Date: 2022/4/26 27 | * @Desc: 要把处理好的数据流,输出到文件系统(hdfs) 28 | * 使用的sink算子,是扩展包中的 StreamFileSink 29 | **/ 30 | public class _09_StreamFileSinkOperator_Demo1 { 31 | 32 | public static void main(String[] args) throws Exception { 33 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 34 | // 开启checkpoint 35 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 36 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 37 | 38 | // 构造好一个数据流 39 | DataStreamSource streamSource = env.addSource(new MySourceFunction()); 40 | 41 | // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流) 42 | 43 | 44 | /** 45 | * 方式一: 46 | * 核心逻辑: 47 | * - 构造一个schema 48 | * - 利用schema构造一个parquetWriterFactory 49 | * - 利用parquetWriterFactory构造一个FileSink算子 50 | * - 将原始数据转成GenericRecord流,输出到FileSink算子 51 | */ 52 | // 1. 先定义GenericRecord的数据模式 53 | Schema schema = SchemaBuilder.builder() 54 | .record("DataRecord") 55 | .namespace("cn.doitedu.flink.avro.schema") 56 | .doc("用户行为事件数据模式") 57 | .fields() 58 | .requiredInt("gid") 59 | .requiredLong("ts") 60 | .requiredString("eventId") 61 | .requiredString("sessionId") 62 | .name("eventInfo") 63 | .type() 64 | .map() 65 | .values() 66 | .type("string") 67 | .noDefault() 68 | .endRecord(); 69 | 70 | 71 | // 2. 通过定义好的schema模式,来得到一个parquetWriter 72 | ParquetWriterFactory writerFactory = ParquetAvroWriters.forGenericRecord(schema); 73 | 74 | // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子 75 | FileSink sink1 = FileSink.forBulkFormat(new Path("d:/datasink/"), writerFactory) 76 | .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH")) 77 | .withRollingPolicy(OnCheckpointRollingPolicy.build()) 78 | .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build()) 79 | .build(); 80 | 81 | 82 | // 4. 将自定义javabean的流,转成 上述sink算子中parquetWriter所需要的 GenericRecord流 83 | SingleOutputStreamOperator recordStream = streamSource 84 | .map((MapFunction) eventLog -> { 85 | // 构造一个Record对象 86 | GenericData.Record record = new GenericData.Record(schema); 87 | 88 | // 将数据填入record 89 | record.put("gid", (int) eventLog.getGuid()); 90 | record.put("eventId", eventLog.getEventId()); 91 | record.put("ts", eventLog.getTimeStamp()); 92 | record.put("sessionId", eventLog.getSessionId()); 93 | record.put("eventInfo", eventLog.getEventInfo()); 94 | 95 | return record; 96 | }).returns(new GenericRecordAvroTypeInfo(schema)); // 由于avro的相关类、对象需要用avro的序列化器,所以需要显式指定AvroTypeInfo来提供AvroSerializer 97 | 98 | // 5. 输出数据 99 | recordStream.sinkTo(sink1); 100 | 101 | env.execute(); 102 | 103 | 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo2.java: -------------------------------------------------------------------------------- 1 | //package cn.doitedu.flink.java.demos; 2 | // 3 | //import cn.doitedu.flink.avro.schema.AvroEventLog; 4 | //import org.apache.avro.Schema; 5 | //import org.apache.avro.SchemaBuilder; 6 | //import org.apache.avro.generic.GenericData; 7 | //import org.apache.avro.generic.GenericRecord; 8 | //import org.apache.flink.api.common.functions.MapFunction; 9 | //import org.apache.flink.connector.file.sink.FileSink; 10 | //import org.apache.flink.core.fs.Path; 11 | //import org.apache.flink.formats.parquet.ParquetWriterFactory; 12 | //import org.apache.flink.formats.parquet.avro.ParquetAvroWriters; 13 | //import org.apache.flink.streaming.api.CheckpointingMode; 14 | //import org.apache.flink.streaming.api.datastream.DataStreamSource; 15 | //import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 16 | //import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 17 | //import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; 18 | //import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner; 19 | //import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; 20 | // 21 | //import java.util.HashMap; 22 | //import java.util.Map; 23 | //import java.util.Set; 24 | // 25 | ///** 26 | // * 27 | // * @Author: deep as the sea 28 | // * @Site: www.51doit.com 29 | // * @QQ: 657270652 30 | // * @Date: 2022/4/26 31 | // * @Desc: 要把处理好的数据流,输出到文件系统(hdfs) 32 | // * 使用的sink算子,是扩展包中的 StreamFileSink 33 | // **/ 34 | //public class _09_StreamFileSinkOperator_Demo2 { 35 | // 36 | // public static void main(String[] args) throws Exception { 37 | // StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 38 | // // 开启checkpoint 39 | // env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 40 | // env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 41 | // 42 | // // 构造好一个数据流 43 | // DataStreamSource streamSource = env.addSource(new MySourceFunction()); 44 | // 45 | // // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流) 46 | // 47 | // 48 | // /** 49 | // * 方式二: 50 | // * 核心逻辑: 51 | // * - 编写一个avsc文本文件(json),来描述数据模式 52 | // * - 添加 maven代码生成器插件,来针对上述的avsc生成avro特定格式的JavaBean类 53 | // * - 利用代码生成器生成的 JavaBean,来构造一个 parquetWriterFactory 54 | // * - 利用parquetWriterFactory构造一个FileSink算子 55 | // * - 将原始数据流 转成 特定格式JavaBean流,输出到 FileSink算子 56 | // */ 57 | // 58 | // // 1. 先定义avsc文件放在resources文件夹中,并用maven的插件,来编译一下,生成特定格式的JavaBean : AvroEventLog 59 | // // 这种根据avsc生成的JavaBean类,自身就已经带有了Schema对象 60 | // // AvroEventLog avroEventLog = new AvroEventLog(); 61 | // // Schema schema = avroEventLog.getSchema(); 62 | // 63 | // // 2. 通过自动生成 AvroEventLog类,来得到一个parquetWriter 64 | // ParquetWriterFactory parquetWriterFactory = ParquetAvroWriters.forSpecificRecord(AvroEventLog.class); 65 | // 66 | // // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子 67 | // FileSink bulkSink = FileSink.forBulkFormat(new Path("d:/datasink2/"), parquetWriterFactory) 68 | // .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH")) 69 | // .withRollingPolicy(OnCheckpointRollingPolicy.build()) 70 | // .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build()) 71 | // .build(); 72 | // 73 | // 74 | // // 4. 将自定义javabean的 EventLog 流,转成 上述sink算子中parquetWriter所需要的 AvroEventLog 流 75 | // SingleOutputStreamOperator avroEventLogStream = streamSource.map(new MapFunction() { 76 | // @Override 77 | // public AvroEventLog map(EventLog eventLog) throws Exception { 78 | // HashMap eventInfo1 = new HashMap<>(); 79 | // 80 | // // 进行hashmap类型的数据转移 81 | // Map eventInfo2 = eventLog.getEventInfo(); 82 | // Set> entries = eventInfo2.entrySet(); 83 | // for (Map.Entry entry : entries) { 84 | // eventInfo1.put(entry.getKey(), entry.getValue()); 85 | // } 86 | // 87 | // return new AvroEventLog(eventLog.getGuid(), eventLog.getSessionId(), eventLog.getEventId(), eventLog.getTimeStamp(), eventInfo1); 88 | // } 89 | // }); 90 | // 91 | // 92 | // // 5. 输出数据 93 | // avroEventLogStream.sinkTo(bulkSink); 94 | // 95 | // env.execute(); 96 | // 97 | // } 98 | //} 99 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_09_StreamFileSinkOperator_Demo3.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.connector.file.sink.FileSink; 5 | import org.apache.flink.core.fs.Path; 6 | import org.apache.flink.formats.parquet.ParquetWriterFactory; 7 | import org.apache.flink.formats.parquet.avro.ParquetAvroWriters; 8 | import org.apache.flink.streaming.api.CheckpointingMode; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; 13 | import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.DateTimeBucketAssigner; 14 | import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; 15 | 16 | import java.util.HashMap; 17 | import java.util.Map; 18 | import java.util.Set; 19 | 20 | /** 21 | * 22 | * @Author: deep as the sea 23 | * @Site: www.51doit.com 24 | * @QQ: 657270652 25 | * @Date: 2022/4/26 26 | * @Desc: 要把处理好的数据流,输出到文件系统(hdfs) 27 | * 使用的sink算子,是扩展包中的 StreamFileSink 28 | **/ 29 | public class _09_StreamFileSinkOperator_Demo3 { 30 | 31 | public static void main(String[] args) throws Exception { 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | // 开启checkpoint 34 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 35 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 36 | env.setParallelism(1); 37 | 38 | // 构造好一个数据流 39 | DataStreamSource streamSource = env.addSource(new MySourceFunction()); 40 | 41 | // 将上面的数据流输出到文件系统(假装成一个经过了各种复杂计算后的结果数据流) 42 | 43 | 44 | /** 45 | * 方式三: 46 | * 核心逻辑: 47 | * - 利用自己的JavaBean类,来构造一个 parquetWriterFactory 48 | * - 利用parquetWriterFactory构造一个FileSink算子 49 | * - 将原始数据流,输出到 FileSink算子 50 | */ 51 | 52 | // 2. 通过自己的JavaBean类,来得到一个parquetWriter 53 | ParquetWriterFactory parquetWriterFactory = ParquetAvroWriters.forReflectRecord(EventLog.class); 54 | 55 | // 3. 利用生成好的parquetWriter,来构造一个 支持列式输出parquet文件的 sink算子 56 | FileSink bulkSink = FileSink.forBulkFormat(new Path("d:/datasink3/"), parquetWriterFactory) 57 | .withBucketAssigner(new DateTimeBucketAssigner("yyyy-MM-dd--HH")) 58 | .withRollingPolicy(OnCheckpointRollingPolicy.build()) 59 | .withOutputFileConfig(OutputFileConfig.builder().withPartPrefix("doit_edu").withPartSuffix(".parquet").build()) 60 | .build(); 61 | 62 | // 5. 输出数据 63 | streamSource.sinkTo(bulkSink); 64 | 65 | 66 | env.execute(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_10_KafkaSinkOperator_Demo1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.connector.base.DeliveryGuarantee; 7 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 8 | import org.apache.flink.connector.kafka.sink.KafkaSink; 9 | import org.apache.flink.streaming.api.CheckpointingMode; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | 13 | /** 14 | * 15 | * @Author: deep as the sea 16 | * @Site: www.51doit.com 17 | * @QQ: 657270652 18 | * @Date: 2022/4/26 19 | * @Desc: 20 | * 利用KafkaSink将数据流写入kafka 21 | * 测试准备,创建目标topic: 22 | * [root@doit01 ~]# kafka-topics.sh --create --topic event-log --partitions 3 --replication-factor 2 --zookeeper doit01:2181 23 | **/ 24 | public class _10_KafkaSinkOperator_Demo1 { 25 | public static void main(String[] args) throws Exception { 26 | 27 | Configuration configuration = new Configuration(); 28 | configuration.setInteger("rest.port",8822); 29 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 30 | 31 | 32 | // 开启checkpoint 33 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 34 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 35 | 36 | // 构造好一个数据流 37 | DataStreamSource streamSource = env.addSource(new MySourceFunction()); 38 | 39 | 40 | // 把数据写入kafka 41 | // 1. 构造一个kafka的sink算子 42 | KafkaSink kafkaSink = KafkaSink.builder() 43 | .setBootstrapServers("doit01:9092,doit02:9092") 44 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 45 | .setTopic("event-log") 46 | .setValueSerializationSchema(new SimpleStringSchema()) 47 | .build() 48 | ) 49 | .setDeliverGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 50 | .setTransactionalIdPrefix("doitedu-") 51 | .build(); 52 | 53 | // 2. 把数据流输出到构造好的sink算子 54 | streamSource 55 | .map(JSON::toJSONString).disableChaining() 56 | .sinkTo(kafkaSink); 57 | 58 | env.execute(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_12_RedisSinkOperator_Demo1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.CheckpointingMode; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.connectors.redis.RedisSink; 9 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 10 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 11 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 12 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 13 | 14 | import java.util.Optional; 15 | 16 | /** 17 | * 18 | * @Author: deep as the sea 19 | * @Site: www.51doit.com 20 | * @QQ: 657270652 21 | * @Date: 2022/4/26 22 | * @Desc: 23 | * 将数据流写入redis,利用RedisSink算子 24 | * 25 | **/ 26 | public class _12_RedisSinkOperator_Demo1 { 27 | 28 | 29 | public static void main(String[] args) throws Exception { 30 | 31 | Configuration configuration = new Configuration(); 32 | configuration.setInteger("rest.port",8822); 33 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 34 | 35 | 36 | // 开启checkpoint 37 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 38 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 39 | 40 | // 构造好一个数据流 41 | DataStreamSource streamSource = env.addSource(new MySourceFunction()); 42 | 43 | // eventLog数据插入redis,你想用什么结构来存储? 44 | FlinkJedisPoolConfig config = new FlinkJedisPoolConfig.Builder().setHost("doit01").build(); 45 | 46 | RedisSink redisSink = new RedisSink<>(config, new StringInsertMapper()); 47 | 48 | streamSource.addSink(redisSink); 49 | 50 | env.execute(); 51 | 52 | } 53 | 54 | 55 | static class StringInsertMapper implements RedisMapper{ 56 | 57 | @Override 58 | public RedisCommandDescription getCommandDescription() { 59 | return new RedisCommandDescription(RedisCommand.SET); 60 | } 61 | 62 | /** 63 | * 如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key 64 | * 如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,二把上面Description中传入的值作为大key 65 | * @param data 66 | * @return 67 | */ 68 | @Override 69 | public String getKeyFromData(EventLog data) { 70 | return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp(); // 这里就是string数据的大key 71 | } 72 | 73 | @Override 74 | public String getValueFromData(EventLog data) { 75 | return JSON.toJSONString(data); // 这里就是string数据的value 76 | } 77 | } 78 | 79 | 80 | /** 81 | * HASH结构数据插入 82 | */ 83 | static class HsetInsertMapper implements RedisMapper{ 84 | // 可以根据具体数据, 选择额外key(就是hash这种结构,它有额外key(大key) 85 | @Override 86 | public Optional getAdditionalKey(EventLog data) { 87 | return RedisMapper.super.getAdditionalKey(data); 88 | } 89 | 90 | // 可以根据具体数据,设置不同的TTL(time to live,数据的存活时长) 91 | @Override 92 | public Optional getAdditionalTTL(EventLog data) { 93 | return RedisMapper.super.getAdditionalTTL(data); 94 | } 95 | 96 | @Override 97 | public RedisCommandDescription getCommandDescription() { 98 | return new RedisCommandDescription(RedisCommand.HSET,"event-logs"); 99 | } 100 | 101 | /** 102 | * 如果选择的是没有内部key的redis数据结构,则此方法返回的就是大 key 103 | * 如果选择的是有内部key的redis数据结构(hset),则此方法返回的是hset内部的小key,二把上面Description中传入的值作为大key 104 | * @param data 105 | * @return 106 | */ 107 | @Override 108 | public String getKeyFromData(EventLog data) { 109 | return data.getGuid()+"-"+data.getSessionId()+"-"+data.getTimeStamp(); // 这里就是hset中的field(小key) 110 | } 111 | 112 | @Override 113 | public String getValueFromData(EventLog data) { 114 | return data.getEventId(); // 这里就是hset中的value 115 | } 116 | 117 | 118 | } 119 | 120 | 121 | 122 | 123 | 124 | 125 | } 126 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_13_SideOutput_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.CheckpointingMode; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.functions.ProcessFunction; 12 | import org.apache.flink.util.Collector; 13 | import org.apache.flink.util.OutputTag; 14 | 15 | /** 16 | * @Author: deep as the sea 17 | * @Site: 多易教育 18 | * @QQ: 657270652 19 | * @Date: 2022/4/26 20 | * @Desc: 测流输出 代码示例(process算子) 21 | **/ 22 | public class _13_SideOutput_Demo { 23 | 24 | 25 | public static void main(String[] args) throws Exception { 26 | 27 | Configuration configuration = new Configuration(); 28 | configuration.setInteger("rest.port", 8822); 29 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 30 | env.setParallelism(1); 31 | 32 | 33 | // 开启checkpoint 34 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 35 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/ckpt"); 36 | 37 | // 构造好一个数据流 38 | DataStreamSource streamSource = env.addSource(new MySourceFunction()); 39 | 40 | 41 | // 需求: 将行为事件流,进行分流 42 | // appLaunch 事件 ,分到一个流 43 | // putBack 事件,分到一个流 44 | // 其他事件保留在主流 45 | SingleOutputStreamOperator processed = streamSource.process(new ProcessFunction() { 46 | /** 47 | * 48 | * @param eventLog 输入数据 49 | * @param ctx 上下文,它能提供“测输出“功能 50 | * @param out 主流输出收集器 51 | * @throws Exception 52 | */ 53 | @Override 54 | public void processElement(EventLog eventLog, ProcessFunction.Context ctx, Collector out) throws Exception { 55 | String eventId = eventLog.getEventId(); 56 | 57 | if ("appLaunch".equals(eventId)) { 58 | 59 | ctx.output(new OutputTag("launch", TypeInformation.of(EventLog.class)), eventLog); 60 | 61 | } else if ("putBack".equals(eventId)) { 62 | 63 | ctx.output(new OutputTag("back",TypeInformation.of(String.class)), JSON.toJSONString(eventLog)); 64 | } 65 | 66 | out.collect(eventLog); 67 | 68 | } 69 | }); 70 | 71 | // 获取 launch 测流数据 72 | DataStream launchStream = processed.getSideOutput(new OutputTag("launch", TypeInformation.of(EventLog.class))); 73 | 74 | // 获取back 测流数据 75 | DataStream backStream = processed.getSideOutput(new OutputTag("back",TypeInformation.of(String.class))); 76 | 77 | launchStream.print("launch"); 78 | 79 | backStream.print("back"); 80 | 81 | 82 | env.execute(); 83 | 84 | } 85 | 86 | 87 | } 88 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_14_StreamConnect_Union_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.CheckpointingMode; 7 | import org.apache.flink.streaming.api.datastream.ConnectedStreams; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.ProcessFunction; 13 | import org.apache.flink.streaming.api.functions.co.CoMapFunction; 14 | import org.apache.flink.util.Collector; 15 | import org.apache.flink.util.OutputTag; 16 | 17 | /** 18 | * @Author: deep as the sea 19 | * @Site: www.51doit.com 20 | * @QQ: 657270652 21 | * @Date: 2022/4/26 22 | * @Desc: 流的连接connect算子 及 流的关联join算子 代码示例 23 | **/ 24 | public class _14_StreamConnect_Union_Demo { 25 | 26 | 27 | public static void main(String[] args) throws Exception { 28 | 29 | Configuration configuration = new Configuration(); 30 | configuration.setInteger("rest.port", 8822); 31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 32 | env.setParallelism(1); 33 | 34 | // 数字字符流 35 | DataStreamSource stream1 = env.socketTextStream("localhost", 9998); 36 | 37 | // 字母字符流 38 | DataStreamSource stream2 = env.socketTextStream("localhost", 9999); 39 | 40 | /** 41 | * 流的 connect 42 | */ 43 | ConnectedStreams connectedStreams = stream1.connect(stream2); 44 | 45 | SingleOutputStreamOperator resultStream = connectedStreams.map(new CoMapFunction() { 46 | // 共同的状态数据 47 | 48 | String prefix = "doitedu_"; 49 | 50 | /** 51 | * 对 左流 处理的逻辑 52 | * @param value 53 | * @return 54 | * @throws Exception 55 | */ 56 | @Override 57 | public String map1(String value) throws Exception { 58 | // 把数字*10,再返回字符串 59 | return prefix + (Integer.parseInt(value)*10) + ""; 60 | } 61 | 62 | /** 63 | * 对 右流 处理的逻辑 64 | * @param value 65 | * @return 66 | * @throws Exception 67 | */ 68 | @Override 69 | public String map2(String value) throws Exception { 70 | 71 | return prefix + value.toUpperCase(); 72 | } 73 | }); 74 | /*resultStream.print();*/ 75 | 76 | 77 | /** 78 | * 流的 union 79 | * 参与 union的流,必须数据类型一致 80 | */ 81 | // stream1.map(Integer::parseInt).union(stream2); // union左右两边的流类型不一致,不通过 82 | DataStream unioned = stream1.union(stream2); 83 | unioned.map(s-> "doitedu_"+s).print(); 84 | 85 | 86 | env.execute(); 87 | 88 | } 89 | 90 | 91 | } 92 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_17_ProcessFunctions_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.CoGroupFunction; 4 | import org.apache.flink.api.common.functions.JoinFunction; 5 | import org.apache.flink.api.common.functions.RuntimeContext; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.Types; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.api.java.tuple.Tuple3; 10 | import org.apache.flink.configuration.Configuration; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 13 | import org.apache.flink.streaming.api.datastream.KeyedStream; 14 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 15 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 16 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 17 | import org.apache.flink.streaming.api.functions.ProcessFunction; 18 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 19 | import org.apache.flink.streaming.api.windowing.time.Time; 20 | import org.apache.flink.util.Collector; 21 | import org.apache.flink.util.OutputTag; 22 | 23 | /** 24 | * @Author: deep as the sea 25 | * @Site: www.51doit.com 26 | * @QQ: 657270652 27 | * @Date: 2022/4/26 28 | * @Desc: process算子及ProcessFunction示例 29 | * 30 | * 在不同类型的 数据流上,调用process算子时,所需要传入的ProcessFunction也会有不同 31 | **/ 32 | public class _17_ProcessFunctions_Demo { 33 | 34 | 35 | public static void main(String[] args) throws Exception { 36 | 37 | Configuration configuration = new Configuration(); 38 | configuration.setInteger("rest.port", 8822); 39 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration); 40 | env.setParallelism(1); 41 | 42 | // id,eventId 43 | DataStreamSource stream1 = env.socketTextStream("localhost", 9998); 44 | 45 | /** 46 | * 在普通的datastream上调用process算子,传入的是 "ProcessFunction" 47 | */ 48 | SingleOutputStreamOperator> s1 = stream1.process(new ProcessFunction>() { 49 | // 可以使用 生命周期 open 方法 50 | @Override 51 | public void open(Configuration parameters) throws Exception { 52 | // 可以调用 getRuntimeContext 方法拿到各种运行时上下文信息 53 | RuntimeContext runtimeContext = getRuntimeContext(); 54 | runtimeContext.getTaskName(); 55 | 56 | super.open(parameters); 57 | } 58 | 59 | @Override 60 | public void processElement(String value, ProcessFunction>.Context ctx, Collector> out) throws Exception { 61 | 62 | // 可以做测流输出 63 | ctx.output(new OutputTag("s1", Types.STRING),value); 64 | 65 | // 可以做主流输出 66 | String[] arr = value.split(","); 67 | out.collect(Tuple2.of(arr[0], arr[1])); 68 | } 69 | 70 | // 可以使用 生命周期close方法 71 | @Override 72 | public void close() throws Exception { 73 | super.close(); 74 | } 75 | }); 76 | 77 | 78 | 79 | /** 80 | * 在 keyedStream上调用 process算子,传入的是 "KeyedProcessFunction" 81 | * KeyedProcessFunction 中的 ,泛型1: 流中的 key 的类型; 泛型2: 流中的数据的类型 ; 泛型3: 处理后的输出结果的类型 82 | */ 83 | // 对s1流进行keyby分组 84 | KeyedStream, String> keyedStream = s1.keyBy(tp2 -> tp2.f0); 85 | // 然后在keyby后的数据流上调用process算子 86 | SingleOutputStreamOperator> s2 = keyedStream.process(new KeyedProcessFunction, Tuple2>() { 87 | @Override 88 | public void processElement(Tuple2 value, KeyedProcessFunction, Tuple2>.Context ctx, Collector> out) throws Exception { 89 | // 把id变整数,把eventId变大写 90 | out.collect(Tuple2.of(Integer.parseInt(value.f0), value.f1.toUpperCase())); 91 | } 92 | }); 93 | 94 | 95 | s2.print(); 96 | 97 | 98 | env.execute(); 99 | 100 | } 101 | 102 | 103 | } 104 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_18_ChannalSelector_Partitioner_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 10 | import org.apache.flink.util.Collector; 11 | 12 | public class _18_ChannalSelector_Partitioner_Demo { 13 | 14 | public static void main(String[] args) throws Exception { 15 | 16 | Configuration conf = new Configuration(); 17 | conf.setInteger("rest.port", 8081); 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf); 19 | 20 | 21 | DataStreamSource s1 = env.socketTextStream("localhost", 9999); 22 | 23 | DataStream s2 = s1 24 | .map(s -> s.toUpperCase()) 25 | .setParallelism(4) 26 | .flatMap(new FlatMapFunction() { 27 | @Override 28 | public void flatMap(String value, Collector out) throws Exception { 29 | String[] arr = value.split(","); 30 | for (String s : arr) { 31 | out.collect(s); 32 | } 33 | } 34 | }) 35 | .setParallelism(4) 36 | .forward(); 37 | 38 | SingleOutputStreamOperator s3 = s2.map(s -> s.toLowerCase()).setParallelism(4); 39 | 40 | 41 | SingleOutputStreamOperator s4 = s3.keyBy(s -> s.substring(0, 2)) 42 | .process(new KeyedProcessFunction() { 43 | @Override 44 | public void processElement(String value, KeyedProcessFunction.Context ctx, Collector out) throws Exception { 45 | out.collect(value + ">"); 46 | } 47 | }).setParallelism(4); 48 | 49 | DataStream s5 = s4.filter(s -> s.startsWith("b")).setParallelism(4); 50 | 51 | s5.print().setParallelism(4); 52 | 53 | env.execute(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_19_WaterMark_Api_Demo2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner; 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.api.functions.ProcessFunction; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @Author: deep as the sea 14 | * @Site: 多易教育 15 | * @QQ: 657270652 16 | * @Date: 2022/5/1 17 | * @Desc: watermark生成设置相关代码演示 18 | * 及单并行度情况下的watermark推进观察 19 | * 20 | * ==> 在socket端口依次输入如下两条数据: 21 | * 1,e06,3000,page02 22 | * 1,e06,3000,page02 23 | * 24 | * ==> 程序的控制台上会依次输出如下信息: 25 | * 本次收到的数据EventBean(guid=1, eventId=e05, timeStamp=2000, pageId=page01) 26 | * 此刻的watermark: -9223372036854775808 27 | * 此刻的处理时间(processing time): 1651396210778 28 | * ---------------------- 29 | * 本次收到的数据EventBean(guid=1, eventId=e06, timeStamp=3000, pageId=page02) 30 | * 此刻的watermark: 1999 31 | * 此刻的处理时间(processing time): 1651396273755 32 | * 33 | **/ 34 | public class _19_WaterMark_Api_Demo2 { 35 | 36 | public static void main(String[] args) throws Exception { 37 | 38 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 39 | env.setParallelism(1); 40 | 41 | 42 | // 1,e01,168673487846,pg01 43 | DataStreamSource s1 = env.socketTextStream("localhost", 9999); 44 | 45 | 46 | SingleOutputStreamOperator s2 = s1.map(s -> { 47 | String[] split = s.split(","); 48 | return new EventBean(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3]); 49 | }).returns(EventBean.class) 50 | .assignTimestampsAndWatermarks( 51 | WatermarkStrategy 52 | .forMonotonousTimestamps() 53 | .withTimestampAssigner(new SerializableTimestampAssigner() { 54 | @Override 55 | public long extractTimestamp(EventBean eventBean, long recordTimestamp) { 56 | return eventBean.getTimeStamp(); 57 | } 58 | }) 59 | ).setParallelism(2); 60 | 61 | s2.process(new ProcessFunction() { 62 | @Override 63 | public void processElement(EventBean eventBean, ProcessFunction.Context ctx, Collector out) throws Exception { 64 | 65 | Thread.sleep(1000); 66 | System.out.println("睡醒了,准备打印"); 67 | 68 | // 打印此刻的 watermark 69 | long processTime = ctx.timerService().currentProcessingTime(); 70 | long watermark = ctx.timerService().currentWatermark(); 71 | 72 | System.out.println("本次收到的数据" + eventBean); 73 | System.out.println("此刻的watermark: " + watermark); 74 | System.out.println("此刻的处理时间(processing time): " + processTime ); 75 | 76 | out.collect(eventBean); 77 | } 78 | }).setParallelism(1).print(); 79 | 80 | 81 | env.execute(); 82 | 83 | 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_21_Window_Api_Demo3.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner; 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction; 14 | import org.apache.flink.streaming.api.windowing.assigners.*; 15 | import org.apache.flink.streaming.api.windowing.time.Time; 16 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 17 | import org.apache.flink.util.Collector; 18 | import org.apache.flink.util.OutputTag; 19 | 20 | import java.time.Duration; 21 | 22 | public class _21_Window_Api_Demo3 { 23 | 24 | public static void main(String[] args) throws Exception { 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | env.setParallelism(1); 27 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 28 | 29 | // 1,e01,3000,pg02 30 | DataStreamSource source = env.socketTextStream("localhost", 9999); 31 | 32 | SingleOutputStreamOperator> beanStream = source.map(s -> { 33 | String[] split = s.split(","); 34 | EventBean2 bean = new EventBean2(Long.parseLong(split[0]), split[1], Long.parseLong(split[2]), split[3], Integer.parseInt(split[4])); 35 | return Tuple2.of(bean,1); 36 | }).returns(new TypeHint>() {}) 37 | .assignTimestampsAndWatermarks(WatermarkStrategy.>forBoundedOutOfOrderness(Duration.ofMillis(0)) 38 | .withTimestampAssigner(new SerializableTimestampAssigner>() { 39 | @Override 40 | public long extractTimestamp(Tuple2 element, long recordTimestamp) { 41 | return element.f0.getTimeStamp(); 42 | } 43 | })); 44 | 45 | 46 | OutputTag> lateDataOutputTag = new OutputTag<>("late_data", TypeInformation.of(new TypeHint>() {})); 47 | 48 | SingleOutputStreamOperator sumResult = beanStream.keyBy(tp -> tp.f0.getGuid()) 49 | .window(TumblingEventTimeWindows.of(Time.seconds(10))) // 事件时间滚动窗口,窗口长度为10 50 | .allowedLateness(Time.seconds(2)) // 允许迟到2s 51 | .sideOutputLateData(lateDataOutputTag) // 迟到超过允许时限的数据,输出到该“outputtag”所标记的测流 52 | /*.sum("f1")*/ 53 | .apply(new WindowFunction, String, Long, TimeWindow>() { 54 | @Override 55 | public void apply(Long aLong, TimeWindow window, Iterable> input, Collector out) throws Exception { 56 | int count = 0; 57 | for (Tuple2 eventBean2IntegerTuple2 : input) { 58 | count ++; 59 | } 60 | out.collect(window.getStart()+":"+ window.getEnd()+","+count); 61 | } 62 | }); 63 | 64 | 65 | DataStream> lateDataSideStream = sumResult.getSideOutput(lateDataOutputTag); 66 | 67 | 68 | sumResult.print("主流结果"); 69 | 70 | lateDataSideStream.print("迟到数据"); 71 | 72 | env.execute(); 73 | 74 | } 75 | 76 | 77 | } 78 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_22_StateBasic_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | 8 | public class _22_StateBasic_Demo { 9 | 10 | public static void main(String[] args) throws Exception { 11 | 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | env.setParallelism(1); 14 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 15 | 16 | // a 17 | DataStreamSource source = env.socketTextStream("localhost", 9999); 18 | 19 | // 需要使用map算子来达到一个效果: 20 | // 没来一条数据(字符串),输出 该条字符串拼接此前到达过的所有字符串 21 | source.map(new MapFunction() { 22 | 23 | // 自己定义、自己管理的状态,持久化和容错都很困难 24 | // 这种状态(state) 叫做: (自管理状态)raw状态 => raw state 25 | String acc = ""; 26 | 27 | /** 28 | * 要让flink来帮助管理的状态数据 29 | * ,那就不要自己定义一个变量 30 | * 而是要从flink的api中去获取一个状态管理器,用这个状态管理器来进行数据的增删改查等操作 31 | * 32 | * 这种状态: 叫做 托管状态 ! (flink state) 33 | */ 34 | 35 | @Override 36 | public String map(String value) throws Exception { 37 | acc = acc + value; 38 | return acc; 39 | } 40 | }).print(); 41 | 42 | env.execute(); 43 | 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_24_State_KeyedState_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | import org.apache.commons.lang3.RandomUtils; 4 | import org.apache.flink.api.common.RuntimeExecutionMode; 5 | import org.apache.flink.api.common.functions.AggregateFunction; 6 | import org.apache.flink.api.common.functions.MapFunction; 7 | import org.apache.flink.api.common.functions.RichMapFunction; 8 | import org.apache.flink.api.common.functions.RuntimeContext; 9 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 10 | import org.apache.flink.api.common.state.*; 11 | import org.apache.flink.configuration.Configuration; 12 | import org.apache.flink.runtime.state.FunctionInitializationContext; 13 | import org.apache.flink.runtime.state.FunctionSnapshotContext; 14 | import org.apache.flink.streaming.api.CheckpointingMode; 15 | import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; 16 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 17 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 18 | 19 | /** 20 | * @Author: deep as the sea 21 | * @Site: 多易教育 22 | * @QQ: 657270652 23 | * @Date: 2022/5/5 24 | * @Desc: 键控状态使用演示 25 | **/ 26 | public class _24_State_KeyedState_Demo { 27 | 28 | public static void main(String[] args) throws Exception { 29 | 30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.setParallelism(1); 32 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 33 | 34 | // 开启状态数据的checkpoint机制(快照的周期,快照的模式) 35 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); 36 | 37 | // 开启快照后,就需要指定快照数据的持久化存储位置 38 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint/"); 39 | 40 | 41 | // 开启 task级别故障自动 failover 42 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3,1000)); 43 | 44 | 45 | DataStreamSource source = env.socketTextStream("localhost", 9999); 46 | 47 | // 需要使用map算子来达到一个效果: 48 | // 没来一条数据(字符串),输出 该条字符串拼接此前到达过的所有字符串 49 | source 50 | .keyBy(s->"0") 51 | .map(new RichMapFunction() { 52 | 53 | ListState lstState; 54 | @Override 55 | public void open(Configuration parameters) throws Exception { 56 | RuntimeContext runtimeContext = getRuntimeContext(); 57 | // 获取一个List结构的状态存储器 58 | lstState = runtimeContext.getListState(new ListStateDescriptor("lst", String.class)); 59 | 60 | // 获取一个 单值 结构的状态存储器 61 | // TODO 自己去点一点 ValueState 的 各种操作方法 62 | 63 | // 获取一个 Map 结构的状态存储器 64 | MapState mapState = runtimeContext.getMapState(new MapStateDescriptor("xx", String.class, String.class)); 65 | // TODO 自己去点一点 MapState的各种操作方法 66 | } 67 | 68 | @Override 69 | public String map(String value) throws Exception { 70 | 71 | // 将本条数据,装入状态存储器 72 | lstState.add(value); 73 | 74 | // 遍历所有的历史字符串,拼接结果 75 | StringBuilder sb = new StringBuilder(); 76 | for (String s : lstState.get()) { 77 | sb.append(s); 78 | } 79 | 80 | return sb.toString(); 81 | } 82 | }).setParallelism(2) 83 | .print().setParallelism(2); 84 | 85 | // 提交一个job 86 | env.execute(); 87 | 88 | } 89 | 90 | } 91 | 92 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/java/demos/_27_ToleranceConfig_Demo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.java.demos; 2 | 3 | 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.common.time.Time; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.streaming.api.CheckpointingMode; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | 12 | import java.time.Duration; 13 | /** 14 | * @Author: deep as the sea 15 | * @Site: 多易教育 16 | * @QQ: 657270652 17 | * @Date: 2022/5/8 18 | * @Desc: flink容错机制相关参数配置示例 19 | * checkpoint 相关配置 20 | * restartStrategy 相关配置 21 | **/ 22 | public class _27_ToleranceConfig_Demo { 23 | 24 | public static void main(String[] args) throws Exception { 25 | 26 | /** 27 | * 在idea中做测试时,指定从某个保存点来恢复状态 28 | */ 29 | Configuration conf = new Configuration(); 30 | //conf.setString("execution.savepoint.path", "file:///D:/checkpoint/7ecbd4f9106957c42109bcde/chk-544"); 31 | 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf); 33 | 34 | /* * 35 | * checkpoint相关配置 36 | */ 37 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); // 传入两个最基本ck参数:间隔时长,ck模式 38 | CheckpointConfig checkpointConfig = env.getCheckpointConfig(); 39 | checkpointConfig.setCheckpointStorage("hdfs://doit01:8020/ckpt"); 40 | checkpointConfig.setAlignedCheckpointTimeout(Duration.ofMinutes(10000)); // 设置ck对齐的超时时长 41 | checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); // 设置ck算法模式 42 | checkpointConfig.setCheckpointInterval(2000); // ck的间隔时长 43 | //checkpointConfig.setCheckpointIdOfIgnoredInFlightData(5); // 用于非对齐算法模式下,在job恢复时让各个算子自动抛弃掉ck-5中飞行数据 44 | checkpointConfig.setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); // job cancel调时,保留最后一次ck数据 45 | checkpointConfig.setForceUnalignedCheckpoints(false); // 是否强制使用 非对齐的checkpoint模式 46 | checkpointConfig.setMaxConcurrentCheckpoints(5); // 允许在系统中同时存在的飞行中(未完成的)的ck数 47 | checkpointConfig.setMinPauseBetweenCheckpoints(2000); // 设置两次ck之间的最小时间间隔,用于防止checkpoint过多地占用算子的处理时间 48 | checkpointConfig.setCheckpointTimeout(3000); // 一个算子在一次checkpoint执行过程中的总耗费时长超时上限 49 | checkpointConfig.setTolerableCheckpointFailureNumber(10); // 允许的checkpoint失败最大次数 50 | 51 | 52 | 53 | 54 | 55 | /* * 56 | * task失败自动重启策略配置 57 | */ 58 | RestartStrategies.RestartStrategyConfiguration restartStrategy = null; 59 | 60 | // 固定、延迟重启(参数 1: 故障重启最大次数;参数2: 两次重启之间的延迟间隔) 61 | restartStrategy = RestartStrategies.fixedDelayRestart(5, 2000); 62 | 63 | // 默认的故障重启策略:不重启(只要有task失败,整个job就失败) 64 | restartStrategy = RestartStrategies.noRestart(); 65 | 66 | 67 | /* * 68 | * 本策略:故障越频繁,两次重启间的惩罚间隔就越长 69 | * 70 | * initialBackoff 重启间隔惩罚时长的初始值 : 1s 71 | * maxBackoff 重启间隔最大惩罚时长 : 60s 72 | * backoffMultiplier 重启间隔时长的惩罚倍数: 2( 每多故障一次,重启延迟惩罚就在 上一次的惩罚时长上 * 倍数) 73 | * resetBackoffThreshold 重置惩罚时长的平稳运行时长阈值(平稳运行达到这个阈值后,如果再故障,则故障重启延迟时间重置为了初始值:1s) 74 | * jitterFactor 取一个随机数来加在重启时间点上,让每次重启的时间点呈现一定随机性 75 | * job1: 9.51 9.53+2*0.1 9.57 ...... 76 | * job2: 9.51 9.53+2*0.15 9.57 ...... 77 | * job3: 9.51 9.53+2*0.8 9.57 ...... 78 | */ 79 | restartStrategy = RestartStrategies.exponentialDelayRestart(Time.seconds(1),Time.seconds(60),2.0,Time.hours(1),1.0); 80 | 81 | /* * 82 | * failureRate : 在指定时长内的最大失败次数 83 | * failureInterval 指定的衡量时长 84 | * delayInterval 两次重启之间的时间间隔 85 | */ 86 | restartStrategy = RestartStrategies.failureRateRestart(5,Time.hours(1),Time.seconds(5)); 87 | 88 | /* * 89 | * 本策略就是退回到配置文件所配置的策略 90 | * 常用于自定义 RestartStrategy 91 | * 用户自定义了重启策略类,常常配置在 flink-conf.yaml 文件中 92 | */ 93 | restartStrategy = RestartStrategies.fallBackRestart(); 94 | 95 | 96 | // 设置指定的重启策略 97 | env.setRestartStrategy(restartStrategy); 98 | 99 | 100 | // 数据处理 101 | DataStreamSource source = env.socketTextStream("localhost", 9999); 102 | 103 | 104 | // 提交执行 105 | env.execute(); 106 | } 107 | 108 | 109 | } 110 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/scala/demos/_01_入门程序WordCount.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.scala.demos 2 | 3 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, createTypeInformation} 4 | 5 | object _01_入门程序WordCount { 6 | def main(args: Array[String]): Unit = { 7 | 8 | val env = StreamExecutionEnvironment.getExecutionEnvironment 9 | 10 | val sourceStream = env.socketTextStream("doit01", 9999) 11 | 12 | // sourceStream.flatMap(s=>s.split("\\s+")).map(w=>(w,1)) 13 | 14 | sourceStream 15 | .flatMap(s => { 16 | s.split("\\s+").map(w => (w, 1)) 17 | }) 18 | .keyBy(tp => tp._1) 19 | .sum(1) 20 | .print("我爱你") 21 | 22 | env.execute("我的job"); // 提交job 23 | 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/Mapper1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class Mapper1 { 4 | 5 | public String map(String s){ 6 | return s.toUpperCase(); 7 | } 8 | 9 | } 10 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/Mapper2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class Mapper2 { 4 | public String map(String s){ 5 | return s+".txt"; 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/Task1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class Task1 implements Runnable{ 4 | 5 | @Override 6 | public void run() { 7 | 8 | // 从上游接收数据 9 | //String data = receive(); 10 | 11 | Mapper1 mapper1 = new Mapper1(); 12 | //String res = mapper1.map(data); 13 | 14 | // 把结果发往下游 15 | // channel.send(res); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/Task2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class Task2 implements Runnable{ 4 | 5 | @Override 6 | public void run() { 7 | 8 | //String data = receive(); 9 | 10 | Mapper2 mapper2 = new Mapper2(); 11 | // String res = mapper2.map(data); 12 | 13 | // send(res); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/Task3.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class Task3 implements Runnable{ 4 | @Override 5 | public void run() { 6 | Mapper1 mapper1 = new Mapper1(); 7 | Mapper2 mapper2 = new Mapper2(); 8 | 9 | 10 | String res1 = mapper1.map("aaaa"); 11 | String res2 = mapper2.map(res1); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flink/task/TaskRunner.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flink.task; 2 | 3 | public class TaskRunner { 4 | 5 | public static void main(String[] args) { 6 | 7 | // Task1的6个并行实例,每一个并行实例在flink中叫什么 subTask 8 | new Thread(new Task1()).start(); 9 | new Thread(new Task1()).start(); 10 | new Thread(new Task1()).start(); 11 | new Thread(new Task1()).start(); 12 | new Thread(new Task1()).start(); 13 | new Thread(new Task1()).start(); 14 | 15 | 16 | // Task2的6个并行实例,每一个并行实例在flink中叫什么 subTask 17 | new Thread(new Task2()).start(); 18 | new Thread(new Task2()).start(); 19 | new Thread(new Task2()).start(); 20 | new Thread(new Task2()).start(); 21 | new Thread(new Task2()).start(); 22 | new Thread(new Task2()).start(); 23 | 24 | 25 | // Task3的6个并行实例,每一个并行实例在flink中叫什么 subTask 26 | new Thread(new Task3()).start(); 27 | new Thread(new Task3()).start(); 28 | new Thread(new Task3()).start(); 29 | new Thread(new Task3()).start(); 30 | new Thread(new Task3()).start(); 31 | new Thread(new Task3()).start(); 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo10_KafkaConnectorDetail.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStream; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.streaming.api.functions.ProcessFunction; 6 | import org.apache.flink.table.api.EnvironmentSettings; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | import org.apache.flink.types.Row; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @Author: deep as the sea 13 | * @Site: 多易教育 14 | * @QQ: 657270652 15 | * @Date: 2022/6/12 16 | * @Desc: 学大数据,到多易教育 17 | * 流 ===> 表 ,过程中如何传承 事件时间 和 watermark 18 | **/ 19 | public class Demo10_KafkaConnectorDetail { 20 | 21 | public static void main(String[] args) throws Exception { 22 | 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | env.setParallelism(1); 25 | 26 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode(); 27 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings); 28 | 29 | 30 | /** 31 | * 对应的kafka中的数据: 32 | * key: {"k1":100,"k2":200} 33 | * value: {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"} 34 | * headers: 35 | * h1 -> vvvv 36 | * h2 -> tttt 37 | */ 38 | tenv.executeSql( 39 | " CREATE TABLE t_kafka_connector ( " 40 | + " guid int, " 41 | + " eventId string, " 42 | + " eventTime bigint, " 43 | + " pageId string, " 44 | + " k1 int, " 45 | + " k2 int, " 46 | + " rec_ts timestamp(3) metadata from 'timestamp' , " 47 | + " `offset` bigint metadata , " 48 | + " headers map metadata, " 49 | + " rt as to_timestamp_ltz(eventTime,3) , " 50 | + " watermark for rt as rt - interval '0.001' second " 51 | + " ) WITH ( " 52 | + " 'connector' = 'kafka', " 53 | + " 'topic' = 'doit30-kafka', " 54 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 55 | + " 'properties.group.id' = 'testGroup', " 56 | + " 'scan.startup.mode' = 'earliest-offset', " 57 | + " 'key.format'='json', " 58 | + " 'key.json.ignore-parse-errors' = 'true', " 59 | + " 'key.fields'='k1;k2', " 60 | /* + " 'key.fields-prefix'='', " */ 61 | + " 'value.format'='json', " 62 | + " 'value.json.fail-on-missing-field'='false', " 63 | + " 'value.fields-include' = 'EXCEPT_KEY' " 64 | + " ) " 65 | 66 | ); 67 | 68 | tenv.executeSql("select * from t_kafka_connector ")/*.print()*/; 69 | tenv.executeSql("select guid,eventId,cast(headers['h1'] as string) as h1, cast(headers['h2'] as string) as h2 from t_kafka_connector ").print(); 70 | 71 | 72 | env.execute(); 73 | 74 | 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.apache.flink.configuration.Configuration; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 12 | 13 | 14 | public class Demo11_UpsertKafkaConnectorTest { 15 | public static void main(String[] args) throws Exception { 16 | 17 | Configuration conf = new Configuration(); 18 | /*conf.setInteger("rest.port",9091);*/ 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 21 | 22 | 23 | // 1,male 24 | DataStreamSource s1 = env.socketTextStream("doitedu", 9999); 25 | 26 | SingleOutputStreamOperator bean1 = s1.map(s -> { 27 | String[] arr = s.split(","); 28 | return new Bean1(Integer.parseInt(arr[0]), arr[1]); 29 | }); 30 | 31 | // 流转表 32 | tenv.createTemporaryView("bean1",bean1); 33 | 34 | //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print(); 35 | 36 | 37 | // 创建目标 kafka映射表 38 | tenv.executeSql( 39 | " create table t_upsert_kafka( " 40 | + " gender string primary key not enforced, " 41 | + " cnt bigint " 42 | + " ) with ( " 43 | + " 'connector' = 'upsert-kafka', " 44 | + " 'topic' = 'doit30-upsert', " 45 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 46 | + " 'key.format' = 'csv', " 47 | + " 'value.format' = 'csv' " 48 | + " ) " 49 | 50 | ); 51 | // 查询每种性别的数据行数,并将结果插入到目标表 52 | tenv.executeSql( 53 | "insert into t_upsert_kafka " + 54 | "select gender,count(1) as cnt from bean1 group by gender" 55 | ); 56 | 57 | tenv.executeSql("select * from t_upsert_kafka").print(); 58 | 59 | 60 | env.execute(); 61 | 62 | } 63 | 64 | @Data 65 | @NoArgsConstructor 66 | @AllArgsConstructor 67 | public static class Bean1{ 68 | public int id; 69 | public String gender; 70 | } 71 | 72 | 73 | } 74 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo11_UpsertKafkaConnectorTest2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.apache.flink.configuration.Configuration; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 12 | 13 | 14 | public class Demo11_UpsertKafkaConnectorTest2 { 15 | public static void main(String[] args) throws Exception { 16 | 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 19 | 20 | 21 | // 1,male 22 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998); 23 | // 1,zs 24 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999); 25 | 26 | SingleOutputStreamOperator bean1 = s1.map(s -> { 27 | String[] arr = s.split(","); 28 | return new Bean1(Integer.parseInt(arr[0]), arr[1]); 29 | }); 30 | 31 | SingleOutputStreamOperator bean2 = s2.map(s -> { 32 | String[] arr = s.split(","); 33 | return new Bean2(Integer.parseInt(arr[0]), arr[1]); 34 | }); 35 | 36 | 37 | // 流转表 38 | tenv.createTemporaryView("bean1",bean1); 39 | tenv.createTemporaryView("bean2",bean2); 40 | 41 | //tenv.executeSql("select gender,count(1) as cnt from bean1 group by gender").print(); 42 | 43 | 44 | // 创建目标 kafka映射表 45 | tenv.executeSql( 46 | " create table t_upsert_kafka2( " 47 | + " id int primary key not enforced, " 48 | + " gender string, " 49 | + " name string " 50 | + " ) with ( " 51 | + " 'connector' = 'upsert-kafka', " 52 | + " 'topic' = 'doit30-upsert2', " 53 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 54 | + " 'key.format' = 'csv', " 55 | + " 'value.format' = 'csv' " 56 | + " ) " 57 | 58 | ); 59 | // 查询每种性别的数据行数,并将结果插入到目标表 60 | tenv.executeSql( 61 | "insert into t_upsert_kafka2 " + 62 | "select bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id" 63 | ); 64 | 65 | tenv.executeSql("select * from t_upsert_kafka2").print(); 66 | 67 | 68 | env.execute(); 69 | 70 | } 71 | 72 | @Data 73 | @NoArgsConstructor 74 | @AllArgsConstructor 75 | public static class Bean1{ 76 | public int id; 77 | public String gender; 78 | } 79 | 80 | 81 | @Data 82 | @NoArgsConstructor 83 | @AllArgsConstructor 84 | public static class Bean2{ 85 | public int id; 86 | public String name; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest1.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.table.api.EnvironmentSettings; 7 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 8 | 9 | public class Demo12_JdbcConnectorTest1 { 10 | public static void main(String[] args) throws Exception { 11 | 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 14 | 15 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode(); 16 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env,environmentSettings); 17 | 18 | 19 | // 建表来映射 mysql中的 flinktest.stu 20 | tenv.executeSql( 21 | "create table flink_stu(\n" + 22 | " id int primary key,\n" + 23 | " name string,\n" + 24 | " age int,\n" + 25 | " gender string\n" + 26 | ") with (\n" + 27 | " 'connector' = 'jdbc',\n" + 28 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" + 29 | " 'table-name' = 'stu',\n" + 30 | " 'username' = 'root',\n" + 31 | " 'password' = 'root' \n" + 32 | ")" 33 | ); 34 | 35 | DataStreamSource doitedu = env.socketTextStream("doitedu", 9999); 36 | 37 | tenv.executeSql("select * from flink_stu").print(); 38 | 39 | doitedu.print(); 40 | 41 | 42 | env.execute(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo12_JdbcConnectorTest2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | import org.apache.flink.api.common.RuntimeExecutionMode; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.table.api.EnvironmentSettings; 11 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 12 | 13 | public class Demo12_JdbcConnectorTest2 { 14 | public static void main(String[] args) throws Exception { 15 | 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 18 | 19 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode(); 20 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings); 21 | 22 | 23 | // 建表来映射 mysql中的 flinktest.stu 24 | tenv.executeSql( 25 | "create table flink_stu(\n" + 26 | " id int primary key, \n" + 27 | " gender string, \n" + 28 | " name string \n" + 29 | ") with (\n" + 30 | " 'connector' = 'jdbc',\n" + 31 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" + 32 | " 'table-name' = 'stu2',\n" + 33 | " 'username' = 'root',\n" + 34 | " 'password' = 'root' \n" + 35 | ")" 36 | ); 37 | 38 | 39 | // 1,male 40 | SingleOutputStreamOperator bean1 = env 41 | .socketTextStream("doitedu", 9998) 42 | .map(s -> { 43 | String[] arr = s.split(","); 44 | return new Bean1(Integer.parseInt(arr[0]), arr[1]); 45 | }); 46 | // 1,zs 47 | SingleOutputStreamOperator bean2 = env.socketTextStream("doitedu", 9999).map(s -> { 48 | String[] arr = s.split(","); 49 | return new Bean2(Integer.parseInt(arr[0]), arr[1]); 50 | }); 51 | 52 | 53 | // 流转表 54 | tenv.createTemporaryView("bean1", bean1); 55 | tenv.createTemporaryView("bean2", bean2); 56 | 57 | tenv.executeSql("insert into flink_stu " + 58 | "select bean1.id,bean1.gender,bean2.name from bean1 left join bean2 on bean1.id=bean2.id"); 59 | 60 | 61 | env.execute(); 62 | } 63 | 64 | 65 | @Data 66 | @NoArgsConstructor 67 | @AllArgsConstructor 68 | public static class Bean1 { 69 | public int id; 70 | public String gender; 71 | } 72 | 73 | 74 | @Data 75 | @NoArgsConstructor 76 | @AllArgsConstructor 77 | public static class Bean2 { 78 | public int id; 79 | public String name; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo13_FileSystemConnectorTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | import org.apache.flink.api.common.RuntimeExecutionMode; 7 | import org.apache.flink.api.common.typeinfo.TypeHint; 8 | import org.apache.flink.api.java.tuple.Tuple4; 9 | import org.apache.flink.streaming.api.CheckpointingMode; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.table.api.EnvironmentSettings; 13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 14 | 15 | public class Demo13_FileSystemConnectorTest { 16 | public static void main(String[] args) throws Exception { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); 20 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint"); 21 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 22 | 23 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode(); 24 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings); 25 | 26 | 27 | // 建表 fs_table 来映射 mysql中的flinktest.stu 28 | tenv.executeSql( 29 | "CREATE TABLE fs_table (\n" + 30 | " user_id STRING,\n" + 31 | " order_amount DOUBLE,\n" + 32 | " dt STRING,\n" + 33 | " `hour` STRING\n" + 34 | ") PARTITIONED BY (dt, `hour`) WITH (\n" + 35 | " 'connector'='filesystem',\n" + 36 | " 'path'='file:///d:/filetable/',\n" + 37 | " 'format'='json',\n" + 38 | " 'sink.partition-commit.delay'='1 h',\n" + 39 | " 'sink.partition-commit.policy.kind'='success-file',\n" + 40 | " 'sink.rolling-policy.file-size' = '8M',\n" + 41 | " 'sink.rolling-policy.rollover-interval'='30 min',\n" + 42 | " 'sink.rolling-policy.check-interval'='10 second'\n" + 43 | ")" 44 | ); 45 | 46 | 47 | // u01,88.8,2022-06-13,14 48 | SingleOutputStreamOperator> stream = env 49 | .socketTextStream("doitedu", 9999) 50 | .map(s -> { 51 | String[] split = s.split(","); 52 | return Tuple4.of(split[0], Double.parseDouble(split[1]), split[2], split[3]); 53 | }).returns(new TypeHint>() { 54 | }); 55 | 56 | tenv.createTemporaryView("orders",stream); 57 | 58 | tenv.executeSql("insert into fs_table select * from orders"); 59 | 60 | 61 | 62 | env.execute(); 63 | } 64 | 65 | 66 | @Data 67 | @NoArgsConstructor 68 | @AllArgsConstructor 69 | public static class Bean1 { 70 | public int id; 71 | public String gender; 72 | } 73 | 74 | 75 | @Data 76 | @NoArgsConstructor 77 | @AllArgsConstructor 78 | public static class Bean2 { 79 | public int id; 80 | public String name; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo14_MysqlCdcConnector.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.streaming.api.CheckpointingMode; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/6/13 12 | * @Desc: 学大数据,到多易教育 13 | * mysql的cdc连接器使用测试 14 | **/ 15 | public class Demo14_MysqlCdcConnector { 16 | 17 | public static void main(String[] args) { 18 | 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); 21 | env.getCheckpointConfig().setCheckpointStorage("file:///d:/checkpoint"); 22 | 23 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 24 | 25 | // 建表 26 | tenv.executeSql("CREATE TABLE flink_score (\n" + 27 | " id INT,\n" + 28 | " name string,\n" + 29 | " gender string,\n" + 30 | " score double,\n" + 31 | " PRIMARY KEY(id) NOT ENFORCED\n" + 32 | " ) WITH (\n" + 33 | " 'connector' = 'mysql-cdc',\n" + 34 | " 'hostname' = 'doitedu',\n" + 35 | " 'port' = '3306',\n" + 36 | " 'username' = 'root',\n" + 37 | " 'password' = 'root',\n" + 38 | " 'database-name' = 'flinktest',\n" + 39 | " 'table-name' = 'score'\n" + 40 | ")"); 41 | 42 | tenv.executeSql("CREATE TABLE t1 (\n" + 43 | " id INT,\n" + 44 | " name string,\n" + 45 | " PRIMARY KEY(id) NOT ENFORCED\n" + 46 | " ) WITH (\n" + 47 | " 'connector' = 'mysql-cdc',\n" + 48 | " 'hostname' = 'doitedu',\n" + 49 | " 'port' = '3306',\n" + 50 | " 'username' = 'root',\n" + 51 | " 'password' = 'root',\n" + 52 | " 'database-name' = 'doitedu',\n" + 53 | " 'table-name' = 't1'\n" + 54 | ")"); 55 | 56 | tenv.executeSql("select * from t1").print(); 57 | System.exit(1); 58 | 59 | // 查询 60 | tenv.executeSql("select * from flink_score")/*.print()*/; 61 | 62 | 63 | tenv.executeSql("select gender,avg(score) as avg_score from flink_score group by gender")/*.print()*/; 64 | 65 | // 建一个目标表,用来存放查询结果: 每种性别中,总分最高的前2个人 66 | tenv.executeSql( 67 | "create table flink_rank(\n" + 68 | " gender string , \n" + 69 | " name string, \n" + 70 | " score_amt double, \n" + 71 | " rn bigint , \n" + 72 | " primary key(gender,rn) not enforced \n" + 73 | ") with (\n" + 74 | " 'connector' = 'jdbc',\n" + 75 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" + 76 | " 'table-name' = 'score_rank',\n" + 77 | " 'username' = 'root',\n" + 78 | " 'password' = 'root' \n" + 79 | ")" 80 | ); 81 | 82 | 83 | tenv.executeSql("insert into flink_rank \n" + 84 | "SELECT\n" + 85 | " gender,\n" + 86 | " name,\n" + 87 | " score_amt,\n" + 88 | " rn\n" + 89 | "from(\n" + 90 | "SELECT\n" + 91 | " gender,\n" + 92 | " name,\n" + 93 | " score_amt,\n" + 94 | " row_number() over(partition by gender order by score_amt desc) as rn\n" + 95 | "from \n" + 96 | "(\n" + 97 | "SELECT\n" + 98 | "gender,\n" + 99 | "name,\n" + 100 | "sum(score) as score_amt\n" + 101 | "from flink_score\n" + 102 | "group by gender,name\n" + 103 | ") o1\n" + 104 | ") o2\n" + 105 | "where rn<=2"); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_IntervalJoin.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.DataTypes; 9 | import org.apache.flink.table.api.Schema; 10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 11 | 12 | /** 13 | * @Author: deep as the sea 14 | * @Site: 多易教育 15 | * @QQ: 657270652 16 | * @Date: 2022/6/16 17 | * @Desc: 学大数据,到多易教育 18 | * 常规join示例 19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的 20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限 21 | * 22 | **/ 23 | public class Demo18_IntervalJoin { 24 | public static void main(String[] args) { 25 | 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setParallelism(1); 28 | 29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 30 | // 设置table环境中的状态ttl时长 31 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L); 32 | 33 | 34 | 35 | /** 36 | * 1,a,1000 37 | * 2,b,2000 38 | * 3,c,2500 39 | * 4,d,3000 40 | * 5,e,12000 41 | */ 42 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998); 43 | SingleOutputStreamOperator> ss1 = s1.map(s -> { 44 | String[] arr = s.split(","); 45 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2])); 46 | }).returns(new TypeHint>() { 47 | }); 48 | 49 | /** 50 | * 1,bj,1000 51 | * 2,sh,2000 52 | * 4,xa,2600 53 | * 5,yn,12000 54 | */ 55 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999); 56 | SingleOutputStreamOperator> ss2 = s2.map(s -> { 57 | String[] arr = s.split(","); 58 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2])); 59 | }).returns(new TypeHint>() { 60 | }); 61 | 62 | 63 | // 创建两个表 64 | tenv.createTemporaryView("t_left",ss1, Schema.newBuilder() 65 | .column("f0", DataTypes.STRING()) 66 | .column("f1", DataTypes.STRING()) 67 | .column("f2", DataTypes.BIGINT()) 68 | .columnByExpression("rt","to_timestamp_ltz(f2,3)") 69 | .watermark("rt","rt - interval '0' second") 70 | .build()); 71 | 72 | tenv.createTemporaryView("t_right",ss2, Schema.newBuilder() 73 | .column("f0", DataTypes.STRING()) 74 | .column("f1", DataTypes.STRING()) 75 | .column("f2", DataTypes.BIGINT()) 76 | .columnByExpression("rt","to_timestamp_ltz(f2,3)") 77 | .watermark("rt","rt - interval '0' second") 78 | .build()); 79 | 80 | 81 | 82 | // interval join 83 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a join t_right b " + 84 | "on a.f0=b.f0 " + 85 | "and a.rt between b.rt - interval '2' second and b.rt").print(); 86 | 87 | 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo18_RegularJoin.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.DataTypes; 9 | import org.apache.flink.table.api.Schema; 10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 11 | 12 | /** 13 | * @Author: deep as the sea 14 | * @Site: 多易教育 15 | * @QQ: 657270652 16 | * @Date: 2022/6/16 17 | * @Desc: 学大数据,到多易教育 18 | * 常规join示例 19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的 20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限 21 | * 22 | **/ 23 | public class Demo18_RegularJoin { 24 | public static void main(String[] args) { 25 | 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setParallelism(1); 28 | 29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 30 | // 设置table环境中的状态ttl时长 31 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl",60*60*1000L); 32 | 33 | 34 | 35 | /** 36 | * 1,a,1000 37 | * 2,b,2000 38 | * 3,c,2500 39 | * 4,d,3000 40 | * 5,e,12000 41 | */ 42 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998); 43 | SingleOutputStreamOperator> ss1 = s1.map(s -> { 44 | String[] arr = s.split(","); 45 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2])); 46 | }).returns(new TypeHint>() { 47 | }); 48 | 49 | /** 50 | * 1,bj,1000 51 | * 2,sh,2000 52 | * 4,xa,2600 53 | * 5,yn,12000 54 | */ 55 | DataStreamSource s2 = env.socketTextStream("doitedu", 9999); 56 | SingleOutputStreamOperator> ss2 = s2.map(s -> { 57 | String[] arr = s.split(","); 58 | return Tuple3.of(arr[0], arr[1],Long.parseLong(arr[2])); 59 | }).returns(new TypeHint>() { 60 | }); 61 | 62 | 63 | // 创建两个表 64 | tenv.createTemporaryView("t_left",ss1, Schema.newBuilder() 65 | .column("f0", DataTypes.STRING()) 66 | .column("f1", DataTypes.STRING()) 67 | .column("f2", DataTypes.BIGINT()) 68 | .columnByExpression("rt","to_timestamp_ltz(f2,3)") 69 | .watermark("rt","rt - interval '0' second") 70 | .build()); 71 | 72 | tenv.createTemporaryView("t_right",ss2, Schema.newBuilder() 73 | .column("f0", DataTypes.STRING()) 74 | .column("f1", DataTypes.STRING()) 75 | .column("f2", DataTypes.BIGINT()) 76 | .columnByExpression("rt","to_timestamp_ltz(f2,3)") 77 | .watermark("rt","rt - interval '0' second") 78 | .build()); 79 | 80 | 81 | // left join 82 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a left join t_right b on a.f0=b.f0")/*.print()*/; 83 | 84 | // inner join 85 | tenv.executeSql("select a.f0,a.f1,a.f2,b.f0,b.f1 from t_left a join t_right b on a.f0=b.f0").print(); 86 | 87 | 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_ArrayJoin.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint; 4 | import org.apache.flink.table.annotation.FunctionHint; 5 | import org.apache.flink.table.api.*; 6 | import org.apache.flink.table.functions.TableFunction; 7 | import org.apache.flink.types.Row; 8 | 9 | import static org.apache.flink.table.api.Expressions.array; 10 | import static org.apache.flink.table.api.Expressions.row; 11 | 12 | public class Demo19_ArrayJoin { 13 | public static void main(String[] args) { 14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 15 | 16 | Table table = tenv.fromValues(DataTypes.ROW( 17 | DataTypes.FIELD("id", DataTypes.INT()), 18 | DataTypes.FIELD("name", DataTypes.STRING()), 19 | DataTypes.FIELD("tags", DataTypes.ARRAY(DataTypes.STRING()))) 20 | , row("1", "zs", array("stu", "child")) 21 | , row("2", "bb", array("miss")) 22 | ); 23 | 24 | tenv.createTemporaryView("t",table); 25 | 26 | 27 | tenv.executeSql("select t.id,t.name,x.tag from t cross join unnest(tags) as x(tag)")/*.print()*/; 28 | 29 | 30 | tenv.createTemporarySystemFunction("mysplit",MySplit.class); 31 | tenv.executeSql("select t.id,t.name,tag from t, lateral table(mysplit(tags)) ")/*.print()*/; 32 | tenv.executeSql("select t.id,t.name,x.tag2 from t, lateral table(mysplit(tags)) x(tag2)")/*.print()*/; 33 | tenv.executeSql("select t.id,t.name,tag from t left join lateral table(mysplit(tags)) on true")/*.print()*/; 34 | tenv.executeSql("select t.id,t.name,x.tag2 from t left join lateral table(mysplit(tags)) x(tag2) on true").print(); 35 | } 36 | 37 | @FunctionHint(output = @DataTypeHint("ROW")) 38 | public static class MySplit extends TableFunction { 39 | 40 | public void eval(String[] arr){ 41 | for (String s : arr) { 42 | collect(Row.of(s)); 43 | } 44 | } 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo19_LookupJoin.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeHint; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.DataTypes; 9 | import org.apache.flink.table.api.Schema; 10 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 11 | 12 | /** 13 | * @Author: deep as the sea 14 | * @Site: 多易教育 15 | * @QQ: 657270652 16 | * @Date: 2022/6/16 17 | * @Desc: 学大数据,到多易教育 18 | * 常规join示例 19 | * 常规join的底层实现,是通过在用状态来缓存两表数据实现的 20 | * 所以,状态体积可能持续膨胀,为了安全起见,可以设置状态的 ttl 时长,来控制状态的体积上限 21 | **/ 22 | public class Demo19_LookupJoin { 23 | public static void main(String[] args) throws Exception { 24 | 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | env.setParallelism(1); 27 | 28 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 29 | // 设置table环境中的状态ttl时长 30 | tenv.getConfig().getConfiguration().setLong("table.exec.state.ttl", 60 * 60 * 1000L); 31 | 32 | 33 | /** 34 | * 1,a 35 | * 2,b 36 | * 3,c 37 | * 4,d 38 | * 5,e 39 | */ 40 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998); 41 | SingleOutputStreamOperator> ss1 = s1.map(s -> { 42 | String[] arr = s.split(","); 43 | return Tuple2.of(Integer.parseInt(arr[0]), arr[1]); 44 | }).returns(new TypeHint>() { 45 | }); 46 | 47 | 48 | // 创建主表(需要声明处理时间属性字段) 49 | tenv.createTemporaryView("a", ss1, Schema.newBuilder() 50 | .column("f0", DataTypes.INT()) 51 | .column("f1", DataTypes.STRING()) 52 | .columnByExpression("pt", "proctime()") // 定义处理时间属性字段 53 | .build()); 54 | 55 | // 创建lookup维表(jdbc connector表) 56 | tenv.executeSql( 57 | "create table b( \n" + 58 | " id int , \n" + 59 | " name string, \n" + 60 | " gender STRING, \n" + 61 | " primary key(id) not enforced \n" + 62 | ") with (\n" + 63 | " 'connector' = 'jdbc',\n" + 64 | " 'url' = 'jdbc:mysql://doitedu:3306/flinktest',\n" + 65 | " 'table-name' = 'stu2',\n" + 66 | " 'username' = 'root',\n" + 67 | " 'password' = 'root' \n" + 68 | ")" 69 | ); 70 | 71 | // lookup join 查询 72 | tenv.executeSql("select a.*,c.* from a JOIN b FOR SYSTEM_TIME AS OF a.pt AS c \n" + 73 | " ON a.f0 = c.id").print(); 74 | 75 | 76 | env.execute(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo1_TableSql.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.api.EnvironmentSettings; 4 | import org.apache.flink.table.api.Table; 5 | import org.apache.flink.table.api.TableEnvironment; 6 | import org.apache.flink.table.catalog.CatalogDatabaseImpl; 7 | import org.apache.flink.table.catalog.ConnectorCatalogTable; 8 | import org.apache.flink.table.catalog.ObjectPath; 9 | import org.apache.flink.table.catalog.exceptions.DatabaseAlreadyExistException; 10 | import org.apache.flink.table.catalog.hive.HiveCatalog; 11 | 12 | import java.util.HashMap; 13 | 14 | import static org.apache.flink.table.api.Expressions.$; 15 | 16 | public class Demo1_TableSql { 17 | 18 | public static void main(String[] args) throws DatabaseAlreadyExistException { 19 | 20 | EnvironmentSettings envSettings = EnvironmentSettings.inStreamingMode(); // 流计算模式 21 | TableEnvironment tableEnv = TableEnvironment.create(envSettings); 22 | 23 | // 把kafka中的一个topic: doit30-2 数据,映射成一张flinkSql表 24 | // json : {"id":1,"name":"zs","age":28,"gender":"male"} 25 | // create table_x (id int,name string,age int,gender string) 26 | tableEnv.executeSql( 27 | "create table t_kafka " 28 | + " ( " 29 | + " id int, " 30 | + " name string, " 31 | + " age int, " 32 | + " gender string " 33 | + " ) " 34 | + " WITH ( " 35 | + " 'connector' = 'kafka', " 36 | + " 'topic' = 'doit30-3', " 37 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 38 | + " 'properties.group.id' = 'g1', " 39 | + " 'scan.startup.mode' = 'earliest-offset', " 40 | + " 'format' = 'json', " 41 | + " 'json.fail-on-missing-field' = 'false', " 42 | + " 'json.ignore-parse-errors' = 'true' " 43 | + " ) " 44 | ); 45 | 46 | 47 | /** 48 | * 把sql表名, 转成 table对象 49 | */ 50 | Table table = tableEnv.from("t_kafka"); 51 | // 利用table api进行查询计算 52 | table.groupBy($("gender")) 53 | .select($("gender"), $("age").avg()) 54 | .execute() 55 | .print(); 56 | 57 | 58 | tableEnv.executeSql("select gender,avg(age) as avg_age from t_kafka group by gender").print(); 59 | 60 | 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo20_Temporal_Join.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.table.api.DataTypes; 12 | import org.apache.flink.table.api.Schema; 13 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 14 | 15 | /** 16 | * @Author: deep as the sea 17 | * @Site: 多易教育 18 | * @QQ: 657270652 19 | * @Date: 2022/6/16 20 | * @Desc: 学大数据,到多易教育 21 | * 时态join代码示例 22 | **/ 23 | public class Demo20_Temporal_Join { 24 | public static void main(String[] args) throws Exception { 25 | 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | env.setParallelism(1); 28 | 29 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 30 | 31 | 32 | /** 33 | * 订单Id,币种,金额,订单时间 34 | * 1,a,100,167438436400 35 | */ 36 | DataStreamSource s1 = env.socketTextStream("doitedu", 9998); 37 | 38 | SingleOutputStreamOperator ss1 = s1.map(s -> { 39 | String[] arr = s.split(","); 40 | return new Order(Integer.parseInt(arr[0]), arr[1], Double.parseDouble(arr[2]), Long.parseLong(arr[3])); 41 | }); 42 | 43 | 44 | // 创建主表(需要声明处理时间属性字段) 45 | tenv.createTemporaryView("orders", ss1, Schema.newBuilder() 46 | .column("orderId", DataTypes.INT()) 47 | .column("currency", DataTypes.STRING()) 48 | .column("price", DataTypes.DOUBLE()) 49 | .column("orderTime", DataTypes.BIGINT()) 50 | .columnByExpression("rt", "to_timestamp_ltz(orderTime,3)") // 定义处理时间属性字段 51 | .watermark("rt","rt") 52 | .build()); 53 | 54 | 55 | //tenv.executeSql("select orderId,currency,price,orderTime,rt from orders").print(); 56 | 57 | // 创建 temporal 表 58 | tenv.executeSql("CREATE TABLE currency_rate (\n" + 59 | " currency STRING, \n" + 60 | " rate double , \n" + 61 | " update_time bigint , \n" + 62 | " rt as to_timestamp_ltz(update_time,3) ," + 63 | " watermark for rt as rt - interval '0' second ," + 64 | " PRIMARY KEY(currency) NOT ENFORCED\n" + 65 | " ) WITH ( \n" + 66 | " 'connector' = 'mysql-cdc',\n" + 67 | " 'hostname' = 'doitedu',\n" + 68 | " 'port' = '3306',\n" + 69 | " 'username' = 'root',\n" + 70 | " 'password' = 'root',\n" + 71 | " 'database-name' = 'flinktest',\n" + 72 | " 'table-name' = 'currency_rate'\n" + 73 | ")"); 74 | 75 | //tenv.executeSql("select * from currency_rate").print(); 76 | 77 | 78 | 79 | 80 | // temporal 关联查询 81 | tenv.executeSql( 82 | "SELECT \n" + 83 | " orders.orderId, \n" + 84 | " orders.currency, \n" + 85 | " orders.price, \n" + 86 | " orders.orderTime, \n" + 87 | " rate \n" + 88 | "FROM orders \n" + 89 | "LEFT JOIN currency_rate FOR SYSTEM_TIME AS OF orders.rt \n" + 90 | "ON orders.currency = currency_rate.currency" 91 | ).print(); 92 | 93 | 94 | env.execute(); 95 | } 96 | 97 | 98 | @Data 99 | @NoArgsConstructor 100 | @AllArgsConstructor 101 | public static class Order { 102 | // 订单Id,币种,金额,订单时间 103 | public int orderId; 104 | public String currency; 105 | public double price; 106 | public long orderTime; 107 | 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo21_CustomScalarFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.api.DataTypes; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.TableEnvironment; 7 | import org.apache.flink.table.functions.ScalarFunction; 8 | import org.apache.flink.types.Row; 9 | 10 | 11 | public class Demo21_CustomScalarFunction { 12 | public static void main(String[] args) { 13 | 14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 15 | 16 | Table table = tenv.fromValues( 17 | DataTypes.ROW( 18 | DataTypes.FIELD("name", DataTypes.STRING())), 19 | Row.of("aaa"), 20 | Row.of("bbb"), 21 | Row.of("ccc") 22 | ); 23 | 24 | tenv.createTemporaryView("t",table); 25 | 26 | // 注册自定义的函数 27 | tenv.createTemporarySystemFunction("myupper",MyUpper.class); 28 | 29 | // 注册后,就能在sql中使用了 30 | tenv.executeSql("select myupper(name) from t").print(); 31 | 32 | } 33 | 34 | 35 | public static class MyUpper extends ScalarFunction{ 36 | 37 | public String eval(String str){ 38 | return str.toUpperCase(); 39 | } 40 | } 41 | 42 | 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo22_CustomAggregateFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.api.DataTypes; 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.TableEnvironment; 7 | import org.apache.flink.table.functions.AggregateFunction; 8 | import org.apache.flink.types.Row; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/6/16 15 | * @Desc: 学大数据,到多易教育 16 | * 自定义聚合函数 17 | **/ 18 | public class Demo22_CustomAggregateFunction { 19 | public static void main(String[] args) { 20 | 21 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inBatchMode()); 22 | 23 | Table table = tenv.fromValues( 24 | DataTypes.ROW( 25 | DataTypes.FIELD("uid", DataTypes.INT()), 26 | DataTypes.FIELD("gender", DataTypes.STRING()), 27 | DataTypes.FIELD("score", DataTypes.DOUBLE()) 28 | ), 29 | Row.of(1,"male",80), 30 | Row.of(2,"male",100), 31 | Row.of(3,"female",90) 32 | ); 33 | 34 | tenv.createTemporaryView("t",table); 35 | 36 | // 注册自定义的函数 37 | tenv.createTemporarySystemFunction("myavg",MyAvg.class); 38 | 39 | // 注册后,就能在sql中使用了 40 | tenv.executeSql("select gender,myavg(score) as avg_score from t group by gender ").print(); 41 | 42 | } 43 | 44 | 45 | public static class MyAccumulator{ 46 | public int count; 47 | public double sum; 48 | } 49 | 50 | public static class MyAvg extends AggregateFunction { 51 | 52 | /** 53 | * 获取累加器的值 54 | * @param accumulator the accumulator which contains the current intermediate results 55 | * @return 56 | */ 57 | @Override 58 | public Double getValue(MyAccumulator accumulator) { 59 | return accumulator.sum/ accumulator.count; 60 | } 61 | 62 | /** 63 | * 创建累加器 64 | * @return 65 | */ 66 | @Override 67 | public MyAccumulator createAccumulator() { 68 | MyAccumulator myAccumulator = new MyAccumulator(); 69 | myAccumulator.count = 0; 70 | myAccumulator.sum = 0; 71 | 72 | 73 | return myAccumulator; 74 | } 75 | 76 | 77 | /** 78 | * 进来输入数据后,如何更新累加器 79 | * @param accumulator 80 | * @param score 81 | */ 82 | public void accumulate(MyAccumulator accumulator,Double score){ 83 | 84 | accumulator.count = accumulator.count + 1; 85 | accumulator.sum = accumulator.sum + score; 86 | 87 | } 88 | 89 | 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo23_TableFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint; 4 | import org.apache.flink.table.annotation.FunctionHint; 5 | import org.apache.flink.table.api.*; 6 | import org.apache.flink.table.functions.TableFunction; 7 | import org.apache.flink.types.Row; 8 | 9 | 10 | public class Demo23_TableFunction { 11 | 12 | public static void main(String[] args) { 13 | 14 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 15 | 16 | /* 17 | Table table = tenv.fromValues(DataTypes.ROW( 18 | DataTypes.FIELD("id", DataTypes.INT()), 19 | DataTypes.FIELD("name", DataTypes.STRING()), 20 | DataTypes.FIELD("phone_numbers", DataTypes.ARRAY(DataTypes.STRING()))), 21 | Row.of(1, "zs", Expressions.array("138","139","135")), 22 | Row.of(2, "bb", Expressions.array("135","136")) 23 | ); 24 | 25 | tenv.createTemporaryView("t",table); 26 | tenv.executeSql("select t.id,t.name,t2.phone_number from t cross join unnest(phone_numbers) as t2(phone_number)").print(); 27 | */ 28 | 29 | Table table = tenv.fromValues(DataTypes.ROW( 30 | DataTypes.FIELD("id", DataTypes.INT()), 31 | DataTypes.FIELD("name", DataTypes.STRING()), 32 | DataTypes.FIELD("phone_numbers", DataTypes.STRING())), 33 | Row.of(1, "zs", "13888,137,1354455"), 34 | Row.of(2, "bb", "1366688,1374,132224455") 35 | ); 36 | tenv.createTemporaryView("t",table); 37 | 38 | 39 | // 注册函数 40 | tenv.createTemporarySystemFunction("mysplit",MySplit.class); 41 | 42 | // 展开手机号字符串 43 | tenv.executeSql("select * from t , lateral table(mysplit(phone_numbers,',')) as t1(p,l) ")/*.print()*/; 44 | tenv.executeSql("select * from t left join lateral table(mysplit(phone_numbers,',')) as t1(p,l) on true ").print(); 45 | 46 | 47 | 48 | } 49 | 50 | @FunctionHint(output = @DataTypeHint("ROW")) 51 | public static class MySplit extends TableFunction{ 52 | 53 | public void eval(String str,String delimiter){ 54 | for (String s : str.split(delimiter)) { 55 | collect(Row.of(s,s.length())); 56 | } 57 | } 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo24_TableAggregateFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.table.annotation.DataTypeHint; 5 | import org.apache.flink.table.annotation.FunctionHint; 6 | import org.apache.flink.table.api.DataTypes; 7 | import org.apache.flink.table.api.EnvironmentSettings; 8 | import org.apache.flink.table.api.Table; 9 | import org.apache.flink.table.api.TableEnvironment; 10 | import org.apache.flink.table.functions.TableAggregateFunction; 11 | import org.apache.flink.types.Row; 12 | import org.apache.flink.util.Collector; 13 | 14 | import static org.apache.flink.table.api.Expressions.$; 15 | import static org.apache.flink.table.api.Expressions.call; 16 | 17 | /** 18 | * @Author: deep as the sea 19 | * @Site: 多易教育 20 | * @QQ: 657270652 21 | * @Date: 2022/6/17 22 | * @Desc: 学大数据,到多易教育 23 | * 自定义表聚合函数示例 24 | * 什么叫做表聚合函数: 25 | * 1,male,zs,88 26 | * 2,male,bb,99 27 | * 3,male,cc,76 28 | * 4,female,dd,78 29 | * 5,female,ee,92 30 | * 6,female,ff,86 31 | *

32 | * -- 求每种性别中,分数最高的两个成绩 33 | * -- 常规写法 34 | * SELECT 35 | * * 36 | * FROM 37 | * ( 38 | * SELECT 39 | * gender, 40 | * score, 41 | * row_number() over(partition by gender order by score desc) as rn 42 | * FROM t 43 | * ) 44 | * where rn<=2 45 | *

46 | *

47 | * -- 如果有一种聚合函数,能在分组聚合的模式中,对每组数据输出多行多列聚合结果 48 | * SELECT 49 | * gender, 50 | * top2(score) 51 | * from t 52 | * group by gender 53 | *

54 | * male,88 55 | * male,99 56 | * female,92 57 | * female,86 58 | **/ 59 | public class Demo24_TableAggregateFunction { 60 | 61 | public static void main(String[] args) { 62 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 63 | Table table = tenv.fromValues(DataTypes.ROW( 64 | DataTypes.FIELD("id", DataTypes.INT()), 65 | DataTypes.FIELD("gender", DataTypes.STRING()), 66 | DataTypes.FIELD("score", DataTypes.DOUBLE())), 67 | Row.of(1, "male", 67), 68 | Row.of(2, "male", 88), 69 | Row.of(3, "male", 98), 70 | Row.of(4, "female", 99), 71 | Row.of(5, "female", 84), 72 | Row.of(6, "female", 89) 73 | ); 74 | tenv.createTemporaryView("t", table); 75 | 76 | // 用一个聚合函数直接求出每种性别中最高的两个成绩 77 | table 78 | .groupBy($("gender")) 79 | .flatAggregate(call(MyTop2.class, $("score"))) 80 | .select($("gender"), $("score_top"), $("rank_no")) 81 | .execute().print(); 82 | 83 | 84 | } 85 | 86 | public static class MyAccumulator { 87 | 88 | public double first; 89 | public double second; 90 | 91 | } 92 | 93 | @FunctionHint(output = @DataTypeHint("ROW")) 94 | public static class MyTop2 extends TableAggregateFunction { 95 | 96 | @Override 97 | public MyAccumulator createAccumulator() { 98 | 99 | MyAccumulator acc = new MyAccumulator(); 100 | acc.first = Double.MIN_VALUE; 101 | acc.second = Double.MIN_VALUE; 102 | 103 | return acc; 104 | } 105 | 106 | 107 | /** 108 | * 累加更新逻辑 109 | * 110 | * @param acc 111 | * @param value 112 | */ 113 | public void accumulate(MyAccumulator acc, Double score) { 114 | if (score > acc.first) { 115 | acc.second = acc.first; 116 | acc.first = score; 117 | } else if (score > acc.second) { 118 | acc.second = score; 119 | } 120 | } 121 | 122 | public void merge(MyAccumulator acc, Iterable it) { 123 | for (MyAccumulator otherAcc : it) { 124 | accumulate(acc, otherAcc.first); 125 | accumulate(acc, otherAcc.second); 126 | } 127 | } 128 | 129 | /** 130 | * 输出结果: 可以输出多行,多列 131 | * 132 | * @param acc 133 | * @param out 134 | */ 135 | public void emitValue(MyAccumulator acc, Collector out) { 136 | if (acc.first != Double.MIN_VALUE) { 137 | out.collect(Row.of(acc.first, 1)); 138 | } 139 | if (acc.second != Double.MIN_VALUE) { 140 | out.collect(Row.of(acc.second, 2)); 141 | } 142 | } 143 | } 144 | 145 | 146 | } 147 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo25_MetricDemos.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.accumulators.LongCounter; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.metrics.Gauge; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.functions.ProcessFunction; 9 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | public class Demo25_MetricDemos { 13 | 14 | public static void main(String[] args) throws Exception { 15 | 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration()); 17 | 18 | DataStreamSource ds = env.socketTextStream("doitedu", 9999); 19 | 20 | ds.process(new ProcessFunction() { 21 | LongCounter longCounter; 22 | 23 | MyGuage gauge ; 24 | @Override 25 | public void open(Configuration parameters) throws Exception { 26 | 27 | longCounter = getRuntimeContext().getLongCounter("doitedu-counter1"); 28 | 29 | 30 | gauge = getRuntimeContext().getMetricGroup().gauge("doitedu-gauge", new MyGuage()); 31 | } 32 | 33 | @Override 34 | public void processElement(String value, ProcessFunction.Context ctx, Collector out) throws Exception { 35 | 36 | // 业务逻辑之外的 metric代码,度量task所输入的数据条数 37 | longCounter.add(1); 38 | 39 | gauge.add(1); 40 | 41 | 42 | out.collect(value.toUpperCase()); 43 | } 44 | }).print(); 45 | 46 | env.execute(); 47 | 48 | 49 | } 50 | 51 | 52 | public static class MyGuage implements Gauge{ 53 | 54 | int recordCount = 0; 55 | 56 | public void add(int i){ 57 | recordCount += i; 58 | } 59 | 60 | @Override 61 | public Integer getValue() { 62 | return recordCount; 63 | } 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo2_TableApi.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.*; 6 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 7 | 8 | import static org.apache.flink.table.api.Expressions.$; 9 | 10 | public class Demo2_TableApi { 11 | 12 | public static void main(String[] args) { 13 | 14 | // 纯粹表环境 15 | // TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 16 | 17 | // 混合环境创建 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 20 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 21 | 22 | 23 | // 建表 24 | Table table = tableEnv.from(TableDescriptor 25 | .forConnector("kafka") // 指定连接器 26 | .schema(Schema.newBuilder() // 指定表结构 27 | .column("id", DataTypes.INT()) 28 | .column("name", DataTypes.STRING()) 29 | .column("age", DataTypes.INT()) 30 | .column("gender", DataTypes.STRING()) 31 | .build()) 32 | .format("json") // 指定数据源的数据格式 33 | .option("topic", "doit30-3") // 连接器及format格式的相关参数 34 | .option("properties.bootstrap.servers", "doit01:9092") 35 | .option("properties.group.id", "g2") 36 | .option("scan.startup.mode", "earliest-offset") 37 | .option("json.fail-on-missing-field", "false") 38 | .option("json.ignore-parse-errors", "true") 39 | .build()); 40 | 41 | 42 | // 查询 43 | Table table2 = table.groupBy($("gender")) 44 | .select($("gender"),$("age").avg().as("avg_age")); 45 | 46 | 47 | /** 48 | * 将一个已创建好的 table对象,注册成sql中的视图名 49 | */ 50 | tableEnv.createTemporaryView("kafka_table",table); 51 | // 然后就可以写sql语句来进行查询了 52 | tableEnv.executeSql("select gender,avg(age) as avg_age from kafka_table group by gender").print(); 53 | 54 | 55 | 56 | 57 | // 输出 58 | table2.execute().print(); 59 | 60 | 61 | } 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo4_SqlTableCreate.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.connector.kafka.source.KafkaSource; 7 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.table.api.*; 12 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 13 | import org.apache.kafka.clients.consumer.OffsetResetStrategy; 14 | 15 | /** 16 | * 带sql表名的 表创建 17 | * 各种方式 18 | */ 19 | /** 20 | * @Author: deep as the sea 21 | * @Site: 多易教育 22 | * @QQ: 657270652 23 | * @Date: 2022/6/12 24 | * @Desc: 学大数据,到多易教育 25 | * 表创建方式示例: 带sql表名称的 26 | **/ 27 | public class Demo4_SqlTableCreate { 28 | 29 | public static void main(String[] args) { 30 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.setRuntimeMode(RuntimeExecutionMode.STREAMING); 32 | 33 | EnvironmentSettings environmentSettings = EnvironmentSettings.inStreamingMode(); 34 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, environmentSettings); 35 | 36 | /** 37 | * 一、 通过构建一个 TableDescriptor 来创建一个 “有名” 表(sql表) 38 | */ 39 | tenv.createTable("table_a", // 表名 40 | TableDescriptor.forConnector("filesystem") 41 | .schema(Schema.newBuilder() 42 | .column("id", DataTypes.INT()) 43 | .column("name", DataTypes.STRING()) 44 | .column("age", DataTypes.INT()) 45 | .column("gender", DataTypes.STRING()) 46 | .build()) 47 | .format("csv") 48 | .option("path", "data/sqldemo/a.txt") 49 | .option("csv.ignore-parse-errors", "true") 50 | .build()); 51 | 52 | 53 | tenv.executeSql("select * from table_a").print(); 54 | System.exit(1); 55 | tenv.executeSql("select gender,max(age) as max_age from table_a group by gender")/*.print()*/; 56 | 57 | 58 | /** 59 | * 二、 从一个dataStream 上创建“有名”的 视图 60 | */ 61 | DataStreamSource stream1 = env.socketTextStream("doit01", 9999); 62 | SingleOutputStreamOperator javaBeanStream = stream1.map(s -> { 63 | String[] split = s.split(","); 64 | return new Demo3_TableObjectCreate.Person(Integer.parseInt(split[0]), split[1], Integer.parseInt(split[2]), split[3]); 65 | }); 66 | tenv.createTemporaryView("t_person", javaBeanStream); 67 | tenv.executeSql("select gender,max(age) as max_age from t_person group by gender")/*.print()*/; 68 | 69 | 70 | /** 71 | * 三、 从一个已存在Table对象,得到一个 “有名”的视图 72 | */ 73 | Table table_a = tenv.from("table_a"); 74 | tenv.createTemporaryView("table_x",table_a); 75 | tenv.executeSql("select * from table_x").print(); 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo6_Exercise.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.TableEnvironment; 6 | import org.apache.flink.table.api.TableResult; 7 | 8 | /** 9 | * 10 | 11 | * 12 | */ 13 | /** 14 | * @Author: deep as the sea 15 | * @Site: 多易教育 16 | * @QQ: 657270652 17 | * @Date: 2022/6/12 18 | * @Desc: 学大数据,到多易教育 19 | * >>>>> 练习题需求 >>>>>>> 20 | * 基本: kafka中有如下数据: 21 | * {"id":1,"name":"zs","nick":"tiedan","age":18,"gender":"male"} 22 | * 23 | * 高级:kafka中有如下数据: 24 | * {"id":1,"name":{"formal":"zs","nick":"tiedan"},"age":18,"gender":"male"} 25 | * 26 | * 现在需要用flinkSql来对上述数据进行查询统计: 27 | * 截止到当前,每个昵称,都有多少个用户 28 | * 截止到当前,每个性别,年龄最大值 29 | **/ 30 | public class Demo6_Exercise { 31 | public static void main(String[] args) { 32 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 33 | 34 | // 建表(数据源表) 35 | tenv.executeSql( 36 | "create table t_person " 37 | + " ( " 38 | + " id int, " 39 | + " name string, " 40 | + " nick string, " 41 | + " age int, " 42 | + " gender string " 43 | + " ) " 44 | + " WITH ( " 45 | + " 'connector' = 'kafka', " 46 | + " 'topic' = 'doit30-4', " 47 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 48 | + " 'properties.group.id' = 'g1', " 49 | + " 'scan.startup.mode' = 'earliest-offset', " 50 | + " 'format' = 'json', " 51 | + " 'json.fail-on-missing-field' = 'false', " 52 | + " 'json.ignore-parse-errors' = 'true' " 53 | + " ) " 54 | ); 55 | 56 | 57 | // 建表(目标表) 58 | // kafka 连接器,不能接受 UPDATE 修正模式的数据,只能接受INSERT模式的数据 59 | // 而我们的查询语句产生的结果,存在UPDATE模式,就需要另一种 连接器表(upsert-kafka)来接收 60 | tenv.executeSql( 61 | "create table t_nick_cnt " 62 | + " ( " 63 | + " nick string primary key not enforced, " 64 | + " user_cnt bigint " 65 | + " ) " 66 | + " WITH ( " 67 | + " 'connector' = 'upsert-kafka', " 68 | + " 'topic' = 'doit30-nick', " 69 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 70 | + " 'key.format' = 'json' , " 71 | + " 'value.format' = 'json' " 72 | + " ) " 73 | ); 74 | 75 | 76 | // 查询 并 打印 77 | //TableResult tableResult = tenv.executeSql("select nick,count(distinct id) as user_cnt from t_person group by nick"); 78 | tenv.executeSql( 79 | "insert into t_nick_cnt " + 80 | "select nick,count(distinct id) as user_cnt from t_person group by nick"); 81 | 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail1_Sql.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.api.EnvironmentSettings; 4 | import org.apache.flink.table.api.TableEnvironment; 5 | 6 | /** 7 | * @Author: deep as the sea 8 | * @Site: 多易教育 9 | * @QQ: 657270652 10 | * @Date: 2022/6/11 11 | * @Desc: schema定义详细示例 (sql DDL语句定义表结构) 12 | **/ 13 | public class Demo7_ColumnDetail1_Sql { 14 | public static void main(String[] args) { 15 | 16 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 17 | 18 | // 建表(数据源表) 19 | // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"} 20 | tenv.executeSql( 21 | "create table t_person " 22 | + " ( " 23 | + " id int , " // -- 物理字段 24 | + " name string, " // -- 物理字段 25 | + " nick string, " 26 | + " age int, " 27 | + " gender string , " 28 | + " guid as id, " // -- 表达式字段(逻辑字段) 29 | + " big_age as age + 10 , " // -- 表达式字段(逻辑字段) 30 | + " offs bigint metadata from 'offset' , " // -- 元数据字段 31 | + " ts TIMESTAMP_LTZ(3) metadata from 'timestamp', " // -- 元数据字段 32 | /*+ " PRIMARY KEY(id,name) NOT ENFORCED "*/ // -- 主键约束 33 | + " ) " 34 | + " WITH ( " 35 | + " 'connector' = 'kafka', " 36 | + " 'topic' = 'doit30-4', " 37 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 38 | + " 'properties.group.id' = 'g1', " 39 | + " 'scan.startup.mode' = 'earliest-offset', " 40 | + " 'format' = 'json', " 41 | + " 'json.fail-on-missing-field' = 'false', " 42 | + " 'json.ignore-parse-errors' = 'true' " 43 | + " ) " 44 | ); 45 | 46 | tenv.executeSql("desc t_person").print(); 47 | tenv.executeSql("select * from t_person where id>2").print(); 48 | 49 | 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo7_ColumnDetail2_TableApi.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.table.api.*; 4 | 5 | import static org.apache.flink.table.api.Expressions.$; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/6/11 12 | * @Desc: schema定义详细示例(tableApi方式定义表结构) 13 | **/ 14 | public class Demo7_ColumnDetail2_TableApi { 15 | public static void main(String[] args) { 16 | 17 | TableEnvironment tenv = TableEnvironment.create(EnvironmentSettings.inStreamingMode()); 18 | 19 | // 建表(数据源表) 20 | // {"id":4,"name":"zs","nick":"tiedan","age":18,"gender":"male"} 21 | tenv.createTable("t_person", 22 | TableDescriptor 23 | .forConnector("kafka") 24 | .schema(Schema.newBuilder() 25 | .column("id", DataTypes.INT()) // column是声明物理字段到表结构中来 26 | .column("name", DataTypes.STRING()) // column是声明物理字段到表结构中来 27 | .column("nick", DataTypes.STRING()) // column是声明物理字段到表结构中来 28 | .column("age", DataTypes.INT()) // column是声明物理字段到表结构中来 29 | .column("gender", DataTypes.STRING()) // column是声明物理字段到表结构中来 30 | .columnByExpression("guid","id") // 声明表达式字段 31 | /*.columnByExpression("big_age",$("age").plus(10))*/ // 声明表达式字段 32 | .columnByExpression("big_age","age + 10") // 声明表达式字段 33 | // isVirtual 是表示: 当这个表被sink表时,该字段是否出现在schema中 34 | .columnByMetadata("offs",DataTypes.BIGINT(),"offset",true) // 声明元数据字段 35 | .columnByMetadata("ts",DataTypes.TIMESTAMP_LTZ(3),"timestamp",true) // 声明元数据字段 36 | /*.primaryKey("id","name")*/ 37 | .build()) 38 | .format("json") 39 | .option("topic","doit30-4") 40 | .option("properties.bootstrap.servers","doitedu:9092") 41 | .option("properties.group.id","g1") 42 | .option("scan.startup.mode","earliest-offset") 43 | .option("json.fail-on-missing-field","false") 44 | .option("json.ignore-parse-errors","true") 45 | .build() 46 | ); 47 | 48 | tenv.executeSql("select * from t_person").print(); 49 | 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo8_CsvFormat.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.DataTypes; 5 | import org.apache.flink.table.api.EnvironmentSettings; 6 | import org.apache.flink.table.api.Schema; 7 | import org.apache.flink.table.api.TableDescriptor; 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/6/12 15 | * @Desc: 学大数据,到多易教育 16 | * csv format详解 17 | **/ 18 | public class Demo8_CsvFormat { 19 | 20 | public static void main(String[] args) { 21 | 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | EnvironmentSettings settings = EnvironmentSettings.inBatchMode(); 25 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings); 26 | 27 | 28 | tenv.executeSql( 29 | "create table t_csv( " 30 | + " id int, " 31 | + " name string, " 32 | + " age string " 33 | + ") with ( " 34 | + " 'connector' = 'filesystem', " 35 | + " 'path' = 'data/csv/', " 36 | + " 'format'='csv', " 37 | + " 'csv.disable-quote-character' = 'false', " 38 | + " 'csv.quote-character' = '|', " 39 | + " 'csv.ignore-parse-errors' = 'true' , " 40 | + " 'csv.null-literal' = '\\N' , " 41 | + " 'csv.allow-comments' = 'true' " 42 | + ") " 43 | ); 44 | 45 | tenv.executeSql("desc t_csv").print(); 46 | tenv.executeSql("select * from t_csv").print(); 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | import org.apache.flink.table.api.DataTypes; 5 | import org.apache.flink.table.api.EnvironmentSettings; 6 | import org.apache.flink.table.api.Schema; 7 | import org.apache.flink.table.api.TableDescriptor; 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/6/12 15 | * @Desc: 学大数据,到多易教育 16 | * watermark 在DDL中的定义示例代码 17 | * 18 | * 测试数据: 19 | * {"guid":1,"eventId":"e02","eventTime":1655017433000,"pageId":"p001"} 20 | * {"guid":1,"eventId":"e03","eventTime":1655017434000,"pageId":"p001"} 21 | * {"guid":1,"eventId":"e04","eventTime":1655017435000,"pageId":"p001"} 22 | * {"guid":1,"eventId":"e05","eventTime":1655017436000,"pageId":"p001"} 23 | * {"guid":1,"eventId":"e06","eventTime":1655017437000,"pageId":"p001"} 24 | * {"guid":1,"eventId":"e07","eventTime":1655017438000,"pageId":"p001"} 25 | * {"guid":1,"eventId":"e08","eventTime":1655017439000,"pageId":"p001"} 26 | * 27 | * 28 | * 29 | **/ 30 | public class Demo9_EventTimeAndWatermark { 31 | 32 | public static void main(String[] args) { 33 | 34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 35 | 36 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode(); 37 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings); 38 | 39 | 40 | /** 41 | * 只有 TIMESTAMP 或 TIMESTAMP_LTZ 类型的字段可以被声明为rowtime(事件时间属性) 42 | */ 43 | tenv.executeSql( 44 | " create table t_events( " 45 | + " guid int, " 46 | + " eventId string, " 47 | /*+ " eventTime timestamp(3), "*/ 48 | + " eventTime bigint, " 49 | + " pageId string, " 50 | + " pt AS proctime(), " // 利用一个表达式字段,来声明 processing time属性 51 | + " rt as to_timestamp_ltz(eventTime,3), " 52 | + " watermark for rt as rt - interval '0.001' second " // 用watermark for xxx,来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略 53 | + " ) " 54 | + " with ( " 55 | + " 'connector' = 'kafka', " 56 | + " 'topic' = 'doit30-events2', " 57 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 58 | + " 'properties.group.id' = 'g1', " 59 | + " 'scan.startup.mode' = 'earliest-offset', " 60 | + " 'format' = 'json', " 61 | + " 'json.fail-on-missing-field' = 'false', " 62 | + " 'json.ignore-parse-errors' = 'true' " 63 | + " ) " 64 | ); 65 | 66 | tenv.executeSql("desc t_events")/*.print()*/; 67 | tenv.executeSql("select guid,eventId,eventTime,pageId,pt,rt,CURRENT_WATERMARK(rt) as wm from t_events").print(); 68 | 69 | 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/demos/Demo9_EventTimeAndWatermark3.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.demos; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner; 8 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.api.functions.ProcessFunction; 14 | import org.apache.flink.table.api.DataTypes; 15 | import org.apache.flink.table.api.EnvironmentSettings; 16 | import org.apache.flink.table.api.Schema; 17 | import org.apache.flink.table.api.Table; 18 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 19 | import org.apache.flink.types.Row; 20 | import org.apache.flink.util.Collector; 21 | 22 | /** 23 | * @Author: deep as the sea 24 | * @Site: 多易教育 25 | * @QQ: 657270652 26 | * @Date: 2022/6/12 27 | * @Desc: 学大数据,到多易教育 28 | * 流 ===> 表 ,过程中如何传承 事件时间 和 watermark 29 | **/ 30 | public class Demo9_EventTimeAndWatermark3 { 31 | 32 | public static void main(String[] args) throws Exception { 33 | 34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 35 | env.setParallelism(1); 36 | 37 | EnvironmentSettings settings = EnvironmentSettings.inStreamingMode(); 38 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings); 39 | 40 | 41 | tenv.executeSql( 42 | " create table t_events( " 43 | + " guid int, " 44 | + " eventId string, " 45 | + " eventTime bigint, " 46 | + " pageId string, " 47 | /*+ " pt AS proctime(), "*/ // 利用一个表达式字段,来声明 processing time属性 48 | + " rt as to_timestamp_ltz(eventTime,3), " 49 | + " watermark for rt as rt - interval '1' second " // 用watermark for xxx,来将一个已定义的TIMESTAMP/TIMESTAMP_LTZ字段声明成 eventTime属性及指定watermark策略 50 | + " ) " 51 | + " with ( " 52 | + " 'connector' = 'kafka', " 53 | + " 'topic' = 'doit30-events2', " 54 | + " 'properties.bootstrap.servers' = 'doitedu:9092', " 55 | + " 'properties.group.id' = 'g1', " 56 | + " 'scan.startup.mode' = 'earliest-offset', " 57 | + " 'format' = 'json', " 58 | + " 'json.fail-on-missing-field' = 'false', " 59 | + " 'json.ignore-parse-errors' = 'true' " 60 | + " ) " 61 | ); 62 | 63 | 64 | // tenv.executeSql("select guid,eventId,rt,current_watermark(rt) as wm from t_events").print(); 65 | 66 | 67 | 68 | DataStream ds = tenv.toDataStream(tenv.from("t_events")); 69 | 70 | ds.process(new ProcessFunction() { 71 | @Override 72 | public void processElement(Row value, ProcessFunction.Context ctx, Collector out) throws Exception { 73 | out.collect(value + " => " + ctx.timerService().currentWatermark()); 74 | } 75 | }).print(); 76 | 77 | 78 | env.execute(); 79 | 80 | 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/fuxi/EventBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.fuxi; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | import java.util.Map; 8 | 9 | @Data 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | public class EventBean { 13 | 14 | private long guid; 15 | private String sessionId; 16 | private String eventId; 17 | private long eventTs; 18 | private Map properties; 19 | } 20 | -------------------------------------------------------------------------------- /flink_course/src/main/java/cn/doitedu/flinksql/fuxi/TimerDemo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.flinksql.fuxi; 2 | 3 | 4 | import com.alibaba.fastjson.JSON; 5 | import org.apache.flink.api.common.state.ValueState; 6 | import org.apache.flink.api.common.state.ValueStateDescriptor; 7 | import org.apache.flink.api.java.functions.KeySelector; 8 | import org.apache.flink.configuration.Configuration; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 13 | import org.apache.flink.streaming.api.functions.ProcessFunction; 14 | import org.apache.flink.util.Collector; 15 | 16 | /** 17 | * 需求场景: 18 | * 实时监测 用户的行为事件流,如果发现有用户下单事件,则检查下单后30分钟内,该用户是否有订单支付 19 | * 如果没有支付,则输出一条催支付的信息 20 | *

21 | * 关键技术: 定时器功能(定时器就是一个闹钟) 22 | */ 23 | public class TimerDemo { 24 | 25 | public static void main(String[] args) throws Exception { 26 | 27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | 29 | DataStreamSource stream = env.socketTextStream("doitedu", 9999); 30 | 31 | SingleOutputStreamOperator stream2 = stream.map(s -> JSON.parseObject(s, EventBean.class)); 32 | 33 | stream2 34 | .keyBy(new KeySelector() { 35 | @Override 36 | public Long getKey(EventBean value) throws Exception { 37 | return value.getGuid(); 38 | } 39 | }) 40 | .process(new KeyedProcessFunction() { 41 | 42 | ValueState timerTimeState; 43 | 44 | @Override 45 | public void open(Configuration parameters) throws Exception { 46 | 47 | timerTimeState = getRuntimeContext().getState(new ValueStateDescriptor("timerTimeState", Long.class)); 48 | } 49 | 50 | @Override 51 | public void processElement(EventBean eventBean, KeyedProcessFunction.Context ctx, Collector out) throws Exception { 52 | 53 | if (eventBean.getEventId().equals("submitOrder")) { 54 | // 注册一个定时器,所定的时间在 : 当前处理时间+30S 55 | long timerTime = ctx.timerService().currentProcessingTime() + 30 * 1000L; 56 | ctx.timerService().registerProcessingTimeTimer(timerTime); 57 | // 将定时器时间,放入状态管理器中 58 | timerTimeState.update(timerTime); 59 | 60 | out.collect("检测到用户:" + ctx.getCurrentKey() + ",下单了,注册了一个定时器: " + timerTimeState.value()); 61 | } 62 | 63 | if (eventBean.getEventId().equals("payOrder")) { 64 | ctx.timerService().deleteProcessingTimeTimer(timerTimeState.value()); 65 | out.collect("检测到用户:" + ctx.getCurrentKey() + ",在下单后的30s内已经支付,取消定时器 " + timerTimeState.value()); 66 | } 67 | 68 | } 69 | 70 | /** 71 | * 定期器被触发时,会调用的方法 72 | */ 73 | @Override 74 | public void onTimer(long timestamp, KeyedProcessFunction.OnTimerContext ctx, Collector out) throws Exception { 75 | Long guid = ctx.getCurrentKey(); 76 | out.collect("用户: " + guid + ", 您的订单快超时了,赶紧支付!"); 77 | } 78 | }) 79 | .print(); 80 | 81 | 82 | env.execute(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /flink_course/src/main/java/org/apache/flink/api/common/eventtime/BoundedOutOfOrdernessWatermarks.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package org.apache.flink.api.common.eventtime; 20 | 21 | import org.apache.flink.annotation.Public; 22 | 23 | import java.time.Duration; 24 | 25 | import static org.apache.flink.util.Preconditions.checkArgument; 26 | import static org.apache.flink.util.Preconditions.checkNotNull; 27 | 28 | /** 29 | * A WatermarkGenerator for situations where records are out of order, but you can place an upper 30 | * bound on how far the events are out of order. An out-of-order bound B means that once an event 31 | * with timestamp T was encountered, no events older than {@code T - B} will follow any more. 32 | * 33 | *

The watermarks are generated periodically. The delay introduced by this watermark strategy is 34 | * the periodic interval length, plus the out-of-orderness bound. 35 | */ 36 | @Public 37 | public class BoundedOutOfOrdernessWatermarks implements WatermarkGenerator { 38 | 39 | /** The maximum timestamp encountered so far. */ 40 | private long maxTimestamp; 41 | 42 | /** The maximum out-of-orderness that this watermark generator assumes. */ 43 | private final long outOfOrdernessMillis; 44 | 45 | /** 46 | * Creates a new watermark generator with the given out-of-orderness bound. 47 | * 48 | * @param maxOutOfOrderness The bound for the out-of-orderness of the event timestamps. 49 | */ 50 | public BoundedOutOfOrdernessWatermarks(Duration maxOutOfOrderness) { 51 | checkNotNull(maxOutOfOrderness, "maxOutOfOrderness"); 52 | checkArgument(!maxOutOfOrderness.isNegative(), "maxOutOfOrderness cannot be negative"); 53 | 54 | this.outOfOrdernessMillis = maxOutOfOrderness.toMillis(); 55 | 56 | // start so that our lowest watermark would be Long.MIN_VALUE. 57 | this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1; 58 | } 59 | 60 | // ------------------------------------------------------------------------ 61 | 62 | @Override 63 | public void onEvent(T event, long eventTimestamp, WatermarkOutput output) { 64 | maxTimestamp = Math.max(maxTimestamp, eventTimestamp); 65 | } 66 | 67 | @Override 68 | public void onPeriodicEmit(WatermarkOutput output) { 69 | // TODO 70 | // System.out.printf("源头周期输出watermark:%d \n", maxTimestamp - outOfOrdernessMillis - 1); 71 | output.emitWatermark(new Watermark(maxTimestamp - outOfOrdernessMillis - 1)); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /flink_course/src/main/java/tmp/utils/SqlHolder.java: -------------------------------------------------------------------------------- 1 | package tmp.utils; 2 | 3 | public class SqlHolder { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /flink_course/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console, debugFile, errorFile 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 24 | 25 | log4j.appender.debugFile = org.apache.log4j.DailyRollingFileAppender 26 | log4j.appender.debugFile.File = src/logs/debug.log 27 | log4j.appender.debugFile.Append = true 28 | log4j.appender.debugFile.Threshold = debug 29 | log4j.appender.debugFile.layout = org.apache.log4j.PatternLayout 30 | log4j.appender.debugFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n 31 | 32 | log4j.appender.errorFile = org.apache.log4j.DailyRollingFileAppender 33 | log4j.appender.errorFile.File = src/logs/error.log 34 | log4j.appender.errorFile.Append = true 35 | log4j.appender.errorFile.Threshold = error 36 | log4j.appender.errorFile.layout = org.apache.log4j.PatternLayout 37 | log4j.appender.errorFile.layout.ConversionPattern = %-d{yyyy-MM-dd HH:mm:ss} [ %t:%r ] - [ %p ] %m%n 38 | 39 | 40 | -------------------------------------------------------------------------------- /flink_course/src/main/resources/prts.avsc.bak: -------------------------------------------------------------------------------- 1 | {"namespace": "cn.doitedu.flink.avro.schema", 2 | "type": "record", 3 | "name": "AvroEventLog", 4 | "fields": [ 5 | {"name": "guid", "type": "long"}, 6 | {"name": "sessionId", "type": "string"}, 7 | {"name": "eventId", "type": "string"}, 8 | {"name": "timeStamp", "type": "long"}, 9 | {"name": "eventInfo", "type": { "type":"map","values": "string"} } 10 | ] 11 | } -------------------------------------------------------------------------------- /kafka_course/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | doit30_flink 7 | cn.doitedu 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | kafka_course 13 | 14 | 15 | 8 16 | 8 17 | 18 | 19 | 20 | 21 | 22 | org.apache.kafka 23 | kafka-clients 24 | ${kafka.version} 25 | 26 | 27 | 28 | 29 | 30 | 31 | org.apache.commons 32 | commons-lang3 33 | 3.12.0 34 | 35 | 36 | 37 | com.google.guava 38 | guava 39 | 30.0-jre 40 | 41 | 42 | 43 | 44 | org.roaringbitmap 45 | RoaringBitmap 46 | 0.9.25 47 | 48 | 49 | 50 | mysql 51 | mysql-connector-java 52 | 8.0.27 53 | 54 | 55 | 56 | org.apache.flink 57 | flink-connector-files 58 | 1.14.4 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/AdminClientDemo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.admin.*; 4 | import org.apache.kafka.common.KafkaFuture; 5 | import org.apache.kafka.common.Node; 6 | import org.apache.kafka.common.TopicPartitionInfo; 7 | 8 | import java.util.*; 9 | import java.util.concurrent.ExecutionException; 10 | 11 | public class AdminClientDemo { 12 | public static void main(String[] args) throws ExecutionException, InterruptedException { 13 | 14 | Properties props = new Properties(); 15 | props.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092,doit02:9092"); 16 | 17 | // 管理客户端 18 | AdminClient adminClient = KafkaAdminClient.create(props); 19 | 20 | // 创建一个topic 21 | /*NewTopic zzuzz = new NewTopic("zzuzz", 3, (short) 2); 22 | adminClient.createTopics(Arrays.asList(zzuzz));*/ 23 | 24 | // 查看一个topic的详细信息 25 | DescribeTopicsResult topicDescriptions = adminClient.describeTopics(Arrays.asList("zzuzz")); 26 | 27 | KafkaFuture> descriptions = topicDescriptions.all(); 28 | Map infos = descriptions.get(); 29 | Set> entries = infos.entrySet(); 30 | for (Map.Entry entry : entries) { 31 | String topicName = entry.getKey(); 32 | TopicDescription td = entry.getValue(); 33 | List partitions = td.partitions(); 34 | for (TopicPartitionInfo partition : partitions) { 35 | int partitionIndex = partition.partition(); 36 | List replicas = partition.replicas(); 37 | List isr = partition.isr(); 38 | Node leader = partition.leader(); 39 | System.out.println(topicName+ "\t" +partitionIndex + "\t" + replicas + "\t" + isr + "\t" + leader); 40 | } 41 | } 42 | 43 | 44 | adminClient.close(); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.TopicPartition; 8 | import org.apache.kafka.common.header.Headers; 9 | import org.apache.kafka.common.record.TimestampType; 10 | import org.apache.kafka.common.serialization.StringDeserializer; 11 | 12 | import java.time.Duration; 13 | import java.util.*; 14 | 15 | public class ConsumerDemo { 16 | public static void main(String[] args) { 17 | 18 | // 构造一个properties来存放消费者客户端参数 19 | Properties props = new Properties(); 20 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092"); 21 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 22 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 23 | 24 | // kafka的消费者,默认是从所属组之前所记录的偏移量开始消费,如果找不到之前记录的便宜量,则从如下参数配置的策略来确定消费起始偏移量 25 | // 可以选择:earliest(自动重置到每个分区的最前一条消息)/latest(自动重置到每个分区的最新一条消息/none(没有重置策略) 26 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest"); 27 | 28 | // 设置消费者所属的组id 29 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-1"); 30 | 31 | // 设置自动提交最新的消费位移 32 | props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true"); // 默认就是开启的 33 | 34 | // 自动提交最新消费位移的时间间隔 35 | props.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"5000"); // 默认值就是5000ms 36 | 37 | // 构造一个消费者客户端 38 | KafkaConsumer consumer = new KafkaConsumer<>(props); 39 | 40 | // 订阅主题(可以是多个) 41 | consumer.subscribe(Collections.singletonList("abcx")); 42 | 43 | // 显式指定消费起始偏移量 44 | /*TopicPartition abcxP0 = new TopicPartition("abcx", 0); 45 | TopicPartition abcxP1 = new TopicPartition("abcx", 1); 46 | consumer.seek(abcxP0,10); 47 | consumer.seek(abcxP1,15);*/ 48 | 49 | 50 | // 循环往复拉取数据 51 | boolean condition = true; 52 | while(condition){ 53 | 54 | // 客户端去拉取数据的时候,如果服务端没有数据响应,会保持连接等待服务端响应 55 | // poll中传入的超时时长参数,是指等待的最大时长 56 | ConsumerRecords records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE)); 57 | 58 | // 对数据进行业务逻辑处理 59 | // Iterator> iterator = records.iterator(); 60 | // 直接用for循环来迭代本次取到的一批数据 61 | for (ConsumerRecord record : records) { 62 | // ConsumerRecord中,不光有用户的业务数据,还有kafka塞入的元数据 63 | String key = record.key(); 64 | String value = record.value(); 65 | 66 | // 本条数据所属的topic 67 | String topic = record.topic(); 68 | // 本条数据所属的分区 69 | int partition = record.partition(); 70 | 71 | // 本条数据的offset 72 | long offset = record.offset(); 73 | 74 | // 当前这条数据所在分区的leader的朝代纪年 75 | Optional leaderEpoch = record.leaderEpoch(); 76 | 77 | // 在kafka的数据底层存储中,不光有用户的业务数据,还有大量元数据 78 | // timestamp就是其中之一: 记录本条数据的时间戳 79 | // 但是时间戳有两种类型: 本条数据的创建时间(生产者); 本条数据的追加时间(broker写入log文件的时间) 80 | // log.message.timestamp.type ==> topic的参数,控制timestamp元数据记录的时间戳的类型 81 | TimestampType timestampType = record.timestampType(); 82 | long timestamp = record.timestamp(); 83 | 84 | // 数据头; 85 | // 数据头是生产者在写入数据时附加进去的(相当于用户自己自定义的元数据) 86 | Headers headers = record.headers(); 87 | 88 | System.out.println(String.format("数据key: %s , 数据value:%s ,数据所属的topic: %s ,数据所属的partition: %d, 数据的offset: %d , 数据所属leader的纪元: %s , 数据的时间戳类型: %s , 数据的时间戳: %d ", 89 | key,value,topic,partition,offset,leaderEpoch.get(),timestampType.name,timestamp 90 | )); 91 | 92 | } 93 | } 94 | 95 | // 关闭客户端 96 | consumer.close(); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.consumer.ConsumerConfig; 4 | import org.apache.kafka.clients.consumer.ConsumerRecord; 5 | import org.apache.kafka.clients.consumer.ConsumerRecords; 6 | import org.apache.kafka.clients.consumer.KafkaConsumer; 7 | import org.apache.kafka.common.TopicPartition; 8 | 9 | import java.io.IOException; 10 | import java.time.Duration; 11 | import java.util.Arrays; 12 | import java.util.Collections; 13 | import java.util.Properties; 14 | 15 | /** 16 | * 手动指定消费起始偏移量位置 17 | */ 18 | public class ConsumerDemo2 { 19 | 20 | public static void main(String[] args) throws IOException { 21 | 22 | Properties props = new Properties(); 23 | // 从配置文件中加载写好的参数 24 | props.load(ConsumerDemo2.class.getClassLoader().getResourceAsStream("consumer.properties")); 25 | // 手动再set一些参数进去 26 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest"); 27 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-2"); 28 | 29 | KafkaConsumer consumer = new KafkaConsumer<>(props); 30 | 31 | /* 32 | // subscribe订阅 会参与消费组的自动再均衡机制才能真正获得自己要消费的topic及其分区的 33 | consumer.subscribe(Collections.singletonList("ddd")); 34 | // 这里无意义地去拉一次数据,主要就是为了确保 分区分配动作已完成 35 | consumer.poll(Duration.ofMillis(Long.MAX_VALUE)); 36 | // 然后再定位到指定的偏移量,开始正式消费 37 | consumer.seek(new TopicPartition("ddd",0),2); 38 | */ 39 | 40 | 41 | // 既然要自己指定一个确定的起始消费位置,那通常隐含之意是不需要去参与消费组自动再均衡机制 42 | // 那么,就不要使用 subscribe 来订阅主题 43 | consumer.assign(Arrays.asList(new TopicPartition("ddd",0))); 44 | consumer.seek(new TopicPartition("ddd",0),4); 45 | 46 | 47 | while(true){ 48 | ConsumerRecords records = consumer.poll(Duration.ofMillis(Long.MAX_VALUE)); 49 | for (ConsumerRecord record : records) { 50 | System.out.println(String.format("数据key: %s , 数据value:%s ,数据所属的topic: %s ,数据所属的partition: %d, 数据的offset: %d , 数据所属leader的纪元: %s , 数据的时间戳类型: %s , 数据的时间戳: %d ", 51 | record.key(),record.value(),record.topic(),record.partition(),record.offset(),record.leaderEpoch().get(),record.timestampType().name,record.timestamp() 52 | )); 53 | } 54 | } 55 | 56 | 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/ConsumerDemo3.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.consumer.*; 4 | import org.apache.kafka.common.TopicPartition; 5 | 6 | import java.io.IOException; 7 | import java.time.Duration; 8 | import java.util.Arrays; 9 | import java.util.Collection; 10 | import java.util.Collections; 11 | import java.util.Properties; 12 | 13 | /** 14 | * 消费组再均衡观察 15 | */ 16 | public class ConsumerDemo3 { 17 | 18 | public static void main(String[] args) throws IOException { 19 | 20 | Properties props = new Properties(); 21 | // 从配置文件中加载写好的参数 22 | props.load(ConsumerDemo3.class.getClassLoader().getResourceAsStream("consumer.properties")); 23 | // 手动再set一些参数进去 24 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest"); 25 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"d30-2"); 26 | // 指定消费者再均衡策略 27 | props.setProperty(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,RangeAssignor.class.getName()); 28 | 29 | KafkaConsumer consumer = new KafkaConsumer<>(props); 30 | 31 | 32 | // reb-1 主题: 3个分区 33 | // reb-2 主题: 2个分区 34 | consumer.subscribe(Arrays.asList("reb-1", "reb-2"), new ConsumerRebalanceListener() { 35 | // 再均衡过程中,消费者会被取消先前所分配的主题,分区 36 | // 取消了之后,consumer底层就会调用下面的方法 37 | @Override 38 | public void onPartitionsRevoked(Collection partitions) { 39 | System.out.println("我被取消了如下主题,分区:" + partitions); 40 | 41 | } 42 | 43 | // 再均衡过程中,消费者会被重新分配到新的主题,分区 44 | // 分配好了新的主题,分区后,consumer底层调用下面的方法 45 | @Override 46 | public void onPartitionsAssigned(Collection partitions) { 47 | System.out.println("我又被分配了如下主题,分区:" + partitions); 48 | } 49 | }); 50 | 51 | while(true){ 52 | consumer.poll(Duration.ofMillis(Long.MAX_VALUE)); 53 | } 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Getter; 6 | import lombok.NoArgsConstructor; 7 | import lombok.Setter; 8 | import org.apache.commons.lang3.RandomUtils; 9 | import org.apache.commons.lang3.RandomStringUtils; 10 | import org.apache.kafka.clients.producer.KafkaProducer; 11 | import org.apache.kafka.clients.producer.ProducerConfig; 12 | import org.apache.kafka.clients.producer.ProducerRecord; 13 | import org.apache.kafka.common.serialization.StringSerializer; 14 | 15 | import java.util.Properties; 16 | 17 | /** 18 | * 19 | * 创建一个topic 20 | * [root@doit01 ~]# kafka-topics.sh --create --topic doit30-events --partitions 3 --replication-factor 2 --zookeeper doit01:2181 21 | * 22 | * 可以用命令去监视这个topic是否有数据到达: 23 | * [root@doit01 ~]# kafka-console-consumer.sh --topic doit30-events --bootstrap-server doit01:9092 24 | * 25 | * 26 | * 需求: 27 | * 写一个生产者,不断去生成 “用户行为事件”数据 并写入kafka 28 | * {"guid":1,"eventId":"pageview","timeStamp":1637868346789} 29 | * {"guid":1,"eventId":"addcart","timeStamp":1637868346966} 30 | * {"guid":2,"eventId":"applaunch","timeStamp":1637868346967} 31 | * ..... 32 | * 33 | * 需求1: 写一个消费者,不断地从kafka中取消费如上“用户行为事件”数据,并做统计计算: 34 | * 每 5分钟,输出一次截止到当时的数据中出现过的用户总数 35 | * 36 | * 需求2: 写一个消费者,不断地从kafka中取消费如上“用户行为事件”数据,并做如下加工处理: 37 | * 给每一条数据,添加一个字段,来标识,该条数据所属的用户的id在今天是否是第一次出现,如是,则标注1 ;否则,标注0 38 | * {"guid":1,"eventId":"pageview","timeStamp":1637868346789,"flag":1} 39 | * {"guid":1,"eventId":"addcart","timeStamp":1637868346966,"flag":0} 40 | * {"guid":2,"eventId":"applaunch","timeStamp":1637868346967,"flag":1} 41 | * ....... 42 | * 43 | * TODO 需求3: 写一个消费者,不断地从kafka中取消费如上“用户行为事件”数据,并做统计计算: 44 | * 每 5分钟,统计最近 10分钟内的用户总数并输出 45 | * 46 | * 47 | */ 48 | public class Kafka编程练习 { 49 | 50 | public static void main(String[] args) throws InterruptedException { 51 | 52 | MyDataGen myDataGen = new MyDataGen(); 53 | myDataGen.genData(); 54 | } 55 | } 56 | 57 | /** 58 | * 业务数据生成器 59 | */ 60 | class MyDataGen{ 61 | 62 | KafkaProducer producer; 63 | 64 | public MyDataGen(){ 65 | 66 | Properties props = new Properties(); 67 | props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092"); 68 | props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 69 | props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 70 | 71 | producer = new KafkaProducer<>(props); 72 | 73 | } 74 | 75 | 76 | public void genData() throws InterruptedException { 77 | UserEvent userEvent = new UserEvent(); 78 | while(true){ 79 | // 造一条随机的用户行为事件数据对象 80 | userEvent.setGuid(RandomUtils.nextInt(1,10000)); 81 | userEvent.setEventId(RandomStringUtils.randomAlphabetic(5,8)); 82 | userEvent.setTimeStamp(System.currentTimeMillis()); 83 | 84 | // 转成json串 85 | String json = JSON.toJSONString(userEvent); 86 | 87 | // 将业务数据封装成ProducerRecord对象 88 | ProducerRecord record = new ProducerRecord<>("doit30-events", json); 89 | 90 | // 用producer写入kafka 91 | producer.send(record); 92 | 93 | // 控制发送的速度 94 | Thread.sleep(RandomUtils.nextInt(200,1500)); 95 | } 96 | } 97 | 98 | } 99 | 100 | @NoArgsConstructor 101 | @AllArgsConstructor 102 | @Getter 103 | @Setter 104 | class UserEvent{ 105 | private long guid; 106 | private String eventId; 107 | private long timeStamp; 108 | private Integer flag; 109 | } 110 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.commons.lang3.time.DateFormatUtils; 5 | import org.apache.commons.lang3.time.DateUtils; 6 | import org.apache.kafka.clients.consumer.ConsumerConfig; 7 | import org.apache.kafka.clients.consumer.ConsumerRecord; 8 | import org.apache.kafka.clients.consumer.ConsumerRecords; 9 | import org.apache.kafka.clients.consumer.KafkaConsumer; 10 | import org.apache.kafka.common.serialization.StringDeserializer; 11 | 12 | import java.time.Duration; 13 | import java.util.*; 14 | import java.util.concurrent.ConcurrentHashMap; 15 | import org.roaringbitmap.RoaringBitmap; 16 | 17 | public class Kafka编程练习_消费者 { 18 | 19 | public static void main(String[] args) { 20 | 21 | // 用一个hashmap来记录去重guid的map缓存 22 | ConcurrentHashMap guidMap = new ConcurrentHashMap<>(); 23 | 24 | // 启动数据消费线程 25 | new Thread(new ConsumeRunnable(guidMap)).start(); 26 | 27 | 28 | // 启动一个统计及输出结果的线程(每5秒输出一次结果) 29 | // 优雅一点来实现定时调度,可以用各种定时调度器(有第三方的,也可以用jdk自己的:Timer) 30 | Timer timer = new Timer(); 31 | timer.scheduleAtFixedRate(new StatisticTask(guidMap),5000,10000); 32 | 33 | 34 | } 35 | } 36 | 37 | /** 38 | * 消费拉取数据的线程runnable 39 | */ 40 | class ConsumeRunnable implements Runnable{ 41 | 42 | ConcurrentHashMap guidMap; 43 | 44 | public ConsumeRunnable(ConcurrentHashMap guidMap) { 45 | this.guidMap = guidMap; 46 | } 47 | 48 | @Override 49 | public void run() { 50 | Properties props = new Properties(); 51 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092"); 52 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 53 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 54 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest"); 55 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"event-01"); 56 | 57 | KafkaConsumer consumer = new KafkaConsumer<>(props); 58 | consumer.subscribe(Arrays.asList("doit30-events")); 59 | 60 | while(true){ 61 | ConsumerRecords records = consumer.poll(Duration.ofMillis(5000)); 62 | for (ConsumerRecord record : records) { 63 | String eventJson = record.value(); 64 | // 解析json, 拿到 guid 65 | try { 66 | UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class); 67 | guidMap.put(userEvent.getGuid(), ""); 68 | }catch (Exception e){ 69 | System.out.println("出异常了: " + eventJson); 70 | } 71 | } 72 | } 73 | } 74 | } 75 | 76 | class StatisticTask extends TimerTask{ 77 | 78 | ConcurrentHashMap guidMap; 79 | 80 | public StatisticTask(ConcurrentHashMap guidMap) { 81 | this.guidMap = guidMap; 82 | } 83 | 84 | @Override 85 | public void run() { 86 | System.out.println(DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") + " ,截止到当前的用户总数为: " + guidMap.size()); 87 | } 88 | } -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者_Bitmap.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import org.apache.commons.lang3.time.DateFormatUtils; 5 | import org.apache.kafka.clients.consumer.ConsumerConfig; 6 | import org.apache.kafka.clients.consumer.ConsumerRecord; 7 | import org.apache.kafka.clients.consumer.ConsumerRecords; 8 | import org.apache.kafka.clients.consumer.KafkaConsumer; 9 | import org.apache.kafka.common.serialization.StringDeserializer; 10 | import org.roaringbitmap.RoaringBitmap; 11 | 12 | import java.time.Duration; 13 | import java.util.*; 14 | import java.util.concurrent.ConcurrentHashMap; 15 | 16 | public class Kafka编程练习_消费者_Bitmap { 17 | 18 | public static void main(String[] args) { 19 | 20 | // 用一个bitmap来记录去重guid 21 | RoaringBitmap bitmap = RoaringBitmap.bitmapOf(); 22 | 23 | 24 | // 启动数据消费线程 25 | new Thread(new ConsumeRunnableBitmap(bitmap)).start(); 26 | 27 | 28 | // 启动一个统计及输出结果的线程(每5秒输出一次结果) 29 | // 优雅一点来实现定时调度,可以用各种定时调度器(有第三方的,也可以用jdk自己的:Timer) 30 | Timer timer = new Timer(); 31 | timer.scheduleAtFixedRate(new StatisticBitmapTask(bitmap),5000,10000); 32 | 33 | 34 | } 35 | } 36 | 37 | /** 38 | * 消费拉取数据的线程runnable 39 | */ 40 | class ConsumeRunnableBitmap implements Runnable{ 41 | 42 | RoaringBitmap bitmap; 43 | 44 | public ConsumeRunnableBitmap(RoaringBitmap bitmap) { 45 | this.bitmap = bitmap; 46 | } 47 | 48 | @Override 49 | public void run() { 50 | Properties props = new Properties(); 51 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"doit01:9092"); 52 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 53 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 54 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest"); 55 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"event-01"); 56 | 57 | KafkaConsumer consumer = new KafkaConsumer<>(props); 58 | consumer.subscribe(Arrays.asList("doit30-events")); 59 | 60 | while(true){ 61 | ConsumerRecords records = consumer.poll(Duration.ofMillis(5000)); 62 | for (ConsumerRecord record : records) { 63 | String eventJson = record.value(); 64 | // 解析json, 拿到 guid 65 | try { 66 | UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class); 67 | 68 | // 向bitmap中添加元素 69 | bitmap.add((int) userEvent.getGuid()); 70 | 71 | 72 | }catch (Exception e){ 73 | System.out.println("出异常了: " + eventJson); 74 | } 75 | } 76 | } 77 | } 78 | } 79 | 80 | class StatisticBitmapTask extends TimerTask{ 81 | 82 | RoaringBitmap bitmap; 83 | 84 | public StatisticBitmapTask(RoaringBitmap bitmap) { 85 | this.bitmap = bitmap; 86 | } 87 | 88 | @Override 89 | public void run() { 90 | System.out.println(DateFormatUtils.format(new Date(),"yyyy-MM-dd HH:mm:ss") + " ,截止到当前的用户总数为: " + bitmap.getCardinality()); 91 | } 92 | } -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/Kafka编程练习_消费者_判重.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.google.common.hash.BloomFilter; 5 | import com.google.common.hash.Funnels; 6 | import org.apache.commons.lang3.time.DateFormatUtils; 7 | import org.apache.kafka.clients.consumer.ConsumerConfig; 8 | import org.apache.kafka.clients.consumer.ConsumerRecord; 9 | import org.apache.kafka.clients.consumer.ConsumerRecords; 10 | import org.apache.kafka.clients.consumer.KafkaConsumer; 11 | import org.apache.kafka.common.serialization.StringDeserializer; 12 | import org.roaringbitmap.RoaringBitmap; 13 | 14 | import java.time.Duration; 15 | import java.util.*; 16 | 17 | public class Kafka编程练习_消费者_判重 { 18 | 19 | public static void main(String[] args) { 20 | 21 | // 启动数据消费线程 22 | new Thread(new ConsumeRunnableBloomFilter()).start(); 23 | } 24 | 25 | 26 | /** 27 | * 消费拉取数据的线程runnable 28 | */ 29 | static class ConsumeRunnableBloomFilter implements Runnable { 30 | 31 | BloomFilter bloomFilter; 32 | 33 | KafkaConsumer consumer; 34 | 35 | public ConsumeRunnableBloomFilter() { 36 | bloomFilter = BloomFilter.create(Funnels.longFunnel(), 1000000000, 0.01); 37 | 38 | Properties props = new Properties(); 39 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092"); 40 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 41 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 42 | props.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "latest"); 43 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "event-01"); 44 | 45 | consumer = new KafkaConsumer<>(props); 46 | 47 | } 48 | 49 | @Override 50 | public void run() { 51 | consumer.subscribe(Arrays.asList("doit30-events")); 52 | while (true) { 53 | ConsumerRecords records = consumer.poll(Duration.ofMillis(5000)); 54 | for (ConsumerRecord record : records) { 55 | String eventJson = record.value(); 56 | // 解析json, 拿到 guid 57 | try { 58 | UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class); 59 | 60 | // 去布隆过滤器中判断一下,本次出现guid,是否曾经已经记录过 61 | boolean mightContain = bloomFilter.mightContain(userEvent.getGuid()); 62 | 63 | // 如果本次出现的guid已存在,则将flag设置为0 64 | if(mightContain){ 65 | userEvent.setFlag(0); 66 | } 67 | // 如果本次出现的guid不存在,则将flag设置为1,并将本次出现的guid映射到布隆过滤 68 | else{ 69 | userEvent.setFlag(1); 70 | // 向布隆过滤器中映射新的元素 71 | bloomFilter.put(userEvent.getGuid()); 72 | } 73 | 74 | // 输出结果 75 | System.out.println(JSON.toJSONString(userEvent)); 76 | 77 | } catch (Exception e) { 78 | System.out.println("出异常了: " + eventJson); 79 | } 80 | } 81 | } 82 | } 83 | } 84 | } -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/Kafka自身事务机制.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.consumer.*; 4 | import org.apache.kafka.clients.producer.KafkaProducer; 5 | import org.apache.kafka.clients.producer.ProducerConfig; 6 | import org.apache.kafka.clients.producer.ProducerRecord; 7 | import org.apache.kafka.common.TopicPartition; 8 | import org.apache.kafka.common.serialization.StringDeserializer; 9 | import org.apache.kafka.common.serialization.StringSerializer; 10 | 11 | import java.time.Duration; 12 | import java.util.*; 13 | 14 | /** 15 | * 从kafka的topic-a中读数据,处理(把读到的数据转大写),处理结果写回kafka的topic-b 16 | * 利用kafka自身的事务机制,来实现 端到端的eos语义 17 | * 核心点: 让 消费端的偏移量记录更新 和 生产端的数据落地 ,绑定在一个事务中 18 | */ 19 | public class Kafka自身事务机制 { 20 | 21 | public static void main(String[] args) { 22 | 23 | Properties props = new Properties(); 24 | /** 25 | * 消费者参数 26 | */ 27 | props.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092"); 28 | props.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 29 | props.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName()); 30 | props.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "trans-001"); 31 | // 关闭消费者的消费位移自动提交机制 32 | props.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); 33 | 34 | /** 35 | * 生产者参数 36 | */ 37 | props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092"); 38 | props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 39 | props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 40 | props.setProperty(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "x001"); 41 | props.setProperty(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true"); 42 | props.setProperty(ProducerConfig.RETRIES_CONFIG, "3"); 43 | props.setProperty(ProducerConfig.ACKS_CONFIG, "all"); 44 | props.setProperty(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, "4"); 45 | 46 | 47 | KafkaConsumer consumer = new KafkaConsumer<>(props); 48 | KafkaProducer producer = new KafkaProducer<>(props); 49 | // 初始化事务 50 | producer.initTransactions(); 51 | // 创建一个自己记录最大消费位移的hashmap 52 | HashMap offsetsMap = new HashMap<>(); 53 | 54 | consumer.subscribe(Arrays.asList("topic-a")); 55 | 56 | // 开始消费数据,做业务处理 57 | boolean flag = true; 58 | while (flag) { 59 | ConsumerRecords records = consumer.poll(Duration.ofMillis(5000)); 60 | 61 | // 如果要对本次拉取的所有数据的处理绑定在一个事务中,则在此处开启事务 62 | producer.beginTransaction(); 63 | 64 | try { 65 | // 从拉取到的数据中,获得本批数据包含哪些分区 66 | Set topicPartitionSet = records.partitions(); 67 | 68 | // 遍历每一个分区 69 | for (TopicPartition topicPartition : topicPartitionSet) { 70 | 71 | // 从拉取到的数据中取到本分区的所有数据 72 | List> partitionRecords = records.records(topicPartition); 73 | for (ConsumerRecord record : partitionRecords) { 74 | // 业务处理逻辑 75 | String result = record.value().toUpperCase(); 76 | // 把处理的结果写出去 77 | ProducerRecord resultRecord = new ProducerRecord<>("topic-b", result); 78 | producer.send(resultRecord); 79 | 80 | // 记录本分区本条消息的offset 到 offsetMap中 81 | long offset = record.offset(); 82 | offsetsMap.put(topicPartition, new OffsetAndMetadata(offset + 1)); 83 | } 84 | } 85 | 86 | // 提交消费位移 87 | consumer.commitSync(); // 它会自动计算出本批拉取到的数据中的每个分区的最大消息offset,来得到每个分区要提交的消费位移 88 | // consumer.commitSync(offsetsMap); // 或者按照自己想要的各分区消费位移值来提交 89 | 90 | // 提交事务 91 | producer.commitTransaction(); 92 | } catch (Exception e) { 93 | // 如果上面那一批数据处理过程中任意时刻发生了异常,则放弃本次事务 94 | // 下游就可以通过设置 isolation_level=read_committed 来避开本次产生的“脏”数据 95 | producer.abortTransaction(); 96 | } 97 | } 98 | 99 | consumer.close(); 100 | producer.close(); 101 | 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/MyPartitioner.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.producer.Partitioner; 4 | import org.apache.kafka.common.Cluster; 5 | 6 | import java.util.Map; 7 | 8 | public class MyPartitioner implements Partitioner { 9 | @Override 10 | public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) { 11 | 12 | 13 | return 0; 14 | } 15 | 16 | @Override 17 | public void close() { 18 | 19 | } 20 | 21 | @Override 22 | public void configure(Map configs) { 23 | 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /kafka_course/src/main/java/cn/doitedu/kafka/ProducerDemo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.kafka; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.ProducerConfig; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | import org.apache.kafka.common.serialization.StringSerializer; 7 | 8 | import java.util.Properties; 9 | 10 | /** 11 | * kafka生产者api代码示例 12 | */ 13 | public class ProducerDemo { 14 | 15 | public static void main(String[] args) throws InterruptedException { 16 | 17 | 18 | // 泛型 K: 要发送的数据中的key 19 | // 泛型 V: 要发送的数据中的value 20 | // 隐含之意: kafka中的 message,是 Key-value结构的 (可以没有key) 21 | Properties props = new Properties(); 22 | props.setProperty("bootstrap.servers", "doit01:9092,doit02:9092"); 23 | 24 | // 因为kafka底层的存储是没有类型维护机制的,用户所发的所有数据类型,都必须变成 序列化后的byte[] 25 | // 所以,kafka的producer需要一个针对用户要发送的数据类型的序列化工具类 26 | // 且这个序列化工具类,需要实现kafka所提供的序列工具接口: org.apache.kafka.common.serialization.Serializer 27 | props.setProperty("key.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 28 | props.setProperty("value.serializer", "org.apache.kafka.common.serialization.StringSerializer"); 29 | 30 | /** 31 | * 代码中进行客户端参数配置的另一种写法 32 | */ 33 | props.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "doit01:9092,doit02:9092"); 34 | props.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 35 | props.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 36 | props.setProperty(ProducerConfig.ACKS_CONFIG, "all"); // 消息发送应答级别 37 | props.setProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG,MyPartitioner.class.getName()); // 指定自定义的分区器 38 | 39 | 40 | // 构造一个生产者客户端 41 | KafkaProducer producer = new KafkaProducer<>(props); 42 | 43 | 44 | // 检查是否发送成功的消费者命令: 45 | // kafka-console-consumer.sh --bootstrap-server doit01:9092 --topic abcx 46 | // kafka-console-consumer.sh --bootstrap-server doit01:9092 --topic abcy 47 | for(int i=0;i<100;i++){ 48 | // 将业务数据封装成客户端所能发送的封装格式 49 | // 0->abc0 50 | // 1->abc1 51 | 52 | // TODO 请修改此处逻辑: i奇数的业务数据,发到 abcx ; i为偶数的业务数据,发到 abcy 53 | ProducerRecord message = new ProducerRecord<>("abcx", "user_id:"+i, "doit_edu_" + i); 54 | 55 | // 调用客户端去发送 56 | // 数据的发送动作在producer的底层是异步线程去异步发送的 57 | producer.send(message); 58 | 59 | Thread.sleep(100); 60 | } 61 | 62 | 63 | // 关闭客户端 64 | // producer.flush(); 65 | producer.close(); 66 | 67 | 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /kafka_course/src/main/resources/bitmap示意图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderblack/doit30_flink/d693145232e4b3c40a1efbe06e2408ebffcfc3d0/kafka_course/src/main/resources/bitmap示意图.png -------------------------------------------------------------------------------- /kafka_course/src/main/resources/consumer.properties: -------------------------------------------------------------------------------- 1 | bootstrap.servers=doit01:9092,doit02:9092,doit03:9092 2 | key.deserializer=org.apache.kafka.common.serialization.StringDeserializer 3 | value.deserializer=org.apache.kafka.common.serialization.StringDeserializer -------------------------------------------------------------------------------- /kafka_course/src/test/java/RoaringBitmapTest.java: -------------------------------------------------------------------------------- 1 | import com.google.common.hash.BloomFilter; 2 | import com.google.common.hash.Funnels; 3 | import org.roaringbitmap.RoaringBitmap; 4 | 5 | import java.nio.charset.Charset; 6 | 7 | public class RoaringBitmapTest { 8 | 9 | public static void main(String[] args) { 10 | 11 | 12 | RoaringBitmap bitmap1 = RoaringBitmap.bitmapOf(1, 3, 5); 13 | // 添加元素 14 | bitmap1.add(8); 15 | 16 | // 输出bitmap中的1的个数(元素个数) 17 | System.out.println(bitmap1.getCardinality()); 18 | 19 | // 判断一个元素是否已存在 20 | bitmap1.contains(5); // true 21 | 22 | 23 | 24 | RoaringBitmap bitmap2 = RoaringBitmap.bitmapOf(); 25 | // 添加元素 26 | bitmap2.add(1); 27 | bitmap2.add(2); 28 | bitmap2.add(6); 29 | bitmap2.add(3); 30 | 31 | 32 | // 两个bitmap进行或运算 33 | // bitmap1.or(bitmap2); 34 | // System.out.println(bitmap1.getCardinality()); // 6 35 | 36 | // 两个bitmap进行与运算 37 | bitmap1.and(bitmap2); 38 | System.out.println(bitmap1.getCardinality()); // 2 39 | 40 | 41 | 42 | } 43 | 44 | 45 | } 46 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | cn.doitedu 8 | doit30_flink 9 | pom 10 | 1.0 11 | 12 | kafka_course 13 | flink_course 14 | datagen 15 | 16 | 17 | 18 | 8 19 | 8 20 | 2.3.1 21 | 1.14.4 22 | 2.12.12 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | org.projectlombok 31 | lombok 32 | 1.18.24 33 | 34 | 35 | 36 | 37 | com.alibaba 38 | fastjson 39 | 2.0.7 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | --------------------------------------------------------------------------------