├── README.md
├── src
    └── main
    │   ├── scala
    │       └── com
    │       │   └── venn
    │       │       ├── stream
    │       │           └── api
    │       │           │   ├── broadcast
    │       │           │       ├── readmd.md
    │       │           │       └── BroadCastDemo.scala
    │       │           │   ├── intervalJoin
    │       │           │       ├── IntervalUser.scala
    │       │           │       ├── IntervalJoinProcessFunctionDemo.scala
    │       │           │       ├── IntervalJoinKafkaKeyMaker.scala
    │       │           │       └── IntervalJoinDemo.scala
    │       │           │   ├── sideoutput
    │       │           │       └── lateDataProcess
    │       │           │       │   └── readme.md
    │       │           │   ├── dayWindow
    │       │           │       ├── CurrentDayMaker.scala
    │       │           │       └── CurrentDayPvCount.scala
    │       │           │   ├── trigger
    │       │           │       └── ProcessWindowForTrigger.scala
    │       │           │   ├── checkpoint
    │       │           │       └── CheckpointDebug.scala
    │       │           │   ├── tableJoin
    │       │           │       └── CacheFile.scala
    │       │           │   └── timer
    │       │           │       └── CustomerTimerDemo.scala
    │       │       ├── connector
    │       │           ├── jdbcOutput
    │       │           │   ├── User.scala
    │       │           │   ├── MysqlOutputMaker.scala
    │       │           │   ├── MysqlOutputDemo.scala
    │       │           │   ├── MysqlSink1.scala
    │       │           │   └── MysqlSink.scala
    │       │           ├── filesink
    │       │           │   ├── filesink.md
    │       │           │   ├── DayBasePathBucketer.scala
    │       │           │   ├── FileSinkMaker.scala
    │       │           │   ├── DayBucketAssigner.scala
    │       │           │   ├── DayBulkWriter.scala
    │       │           │   ├── StreamingFileSinkDemo.scala
    │       │           │   └── RollingFileSinkDemo.scala
    │       │           ├── starrocks
    │       │           │   ├── Column.java
    │       │           │   ├── TableSchema.java
    │       │           │   ├── CustJdbcSource.java
    │       │           │   ├── StreamLoadTestV2.scala
    │       │           │   └── StreamLoadTest.scala
    │       │           ├── cdc
    │       │           │   └── CdcDdlTest.scala
    │       │           ├── pulsar
    │       │           │   └── PulsarDemo.scala
    │       │           └── kafka
    │       │           │   └── KafkaSinkTest.scala
    │       │       ├── question
    │       │           ├── retention
    │       │           │   └── UserLog.scala
    │       │           ├── stock
    │       │           │   ├── entry
    │       │           │   │   ├── Stock.java
    │       │           │   │   ├── OverStockDetail.java
    │       │           │   │   ├── OverStock.java
    │       │           │   │   ├── StockList.java
    │       │           │   │   └── StockListDetail.java
    │       │           │   ├── util
    │       │           │   │   ├── OverStockFlatMapFunction.scala
    │       │           │   │   └── StockCommon.scala
    │       │           │   └── README.md
    │       │           ├── processAndEvent
    │       │           │   └── SimpleProcessFunction.scala
    │       │           ├── dynamicWindow
    │       │           │   ├── DataSourceFunction.scala
    │       │           │   ├── DyTumblingWindow.java
    │       │           │   ├── DyProcessWindowFunction.scala
    │       │           │   └── readme.md
    │       │           ├── cdcStarrocks
    │       │           │   ├── CdcRecord.java
    │       │           │   ├── CdcStarMapFunction.java
    │       │           │   ├── CdcToStarRocks.java
    │       │           │   └── CdcStarProcessFunction.java
    │       │           ├── UserClue
    │       │           │   ├── UserClue.scala
    │       │           │   └── question.md
    │       │           ├── late1mtps
    │       │           │   ├── LateTpsProcessWindowFunction.scala
    │       │           │   └── LateTps.scala
    │       │           ├── dataFluctuation
    │       │           │   └── DataFluctuation.scala
    │       │           └── tryFlink
    │       │           │   └── FraudDetection.scala
    │       │       ├── table
    │       │           └── TableApiDemo.java
    │       │       ├── util
    │       │           ├── TwoStringSource.scala
    │       │           ├── MathUtil.java
    │       │           ├── StringUtil.java
    │       │           ├── CheckpointUtil.scala
    │       │           └── HttpClientUtil.java
    │       │       ├── demo
    │       │           ├── CustomerSource.scala
    │       │           ├── SlotPartitionDemo.scala
    │       │           ├── FilterTest.scala
    │       │           ├── SlotPartitionMaker.scala
    │       │           └── relationCntA.scala
    │       │       ├── source
    │       │           ├── cust
    │       │           │   ├── ReadHttpWordCount.java
    │       │           │   ├── CustHttpSource.java
    │       │           │   └── HttpServer.java
    │       │           ├── kafka
    │       │           │   ├── kafkaToKafkaGroup.sql
    │       │           │   ├── KafkaUpsertTableSink.java
    │       │           │   └── KafkaUpsertTableSourceSinkFactory.java
    │       │           ├── RichAsyncFunction.scala
    │       │           └── mysql
    │       │           │   └── cdc
    │       │           │       ├── CommonKafkaSink.java
    │       │           │       ├── Binlog.java
    │       │           │       └── MySqlBinlogSourceExample.java
    │       │       ├── cep
    │       │           ├── cep.md
    │       │           ├── ContinueRising.scala
    │       │           └── AfterMatchStrategyDemo.scala
    │       │       └── common
    │       │           ├── Common.java
    │       │           └── MySqlDateTimeConverter.java
    │   ├── resources
    │       ├── data
    │       │   └── tablejoin.txt
    │       ├── cdc_demo.properties
    │       ├── sql
    │       │   ├── kafkaJsonSourceSinkDemo.sql
    │       │   └── sqlDemo.sql
    │       └── log4j.properties
    │   ├── java
    │       └── com
    │       │   └── venn
    │       │       ├── entity
    │       │           ├── EntityObject.java
    │       │           ├── Behavior.java
    │       │           ├── StreamElement.java
    │       │           ├── KafkaSimpleStringRecord.java
    │       │           └── UserLog.java
    │       │       ├── question
    │       │           └── LateTps.java
    │       │       ├── flink
    │       │           └── asyncio
    │       │           │   ├── MysqlData.java
    │       │           │   ├── AsyncUser.java
    │       │           │   ├── AsyncHbaseRequest.java
    │       │           │   ├── AsyncFunctionForMysqlJava.java
    │       │           │   ├── AsyncMysqlRequest.java
    │       │           │   ├── AsyncFunctionForHbaseJava.java
    │       │           │   └── MysqlClient.java
    │       │       ├── util
    │       │           └── SimpleKafkaRecordDeserializationSchema.java
    │       │       └── demo
    │       │           ├── KafkaJoinRedisDemo.java
    │       │           ├── AsyncRedisFunction.java
    │       │           └── TypeTest.java
    │   └── test
    │       └── com
    │           └── venn
    │               └── connector
    │                   └── kafka
    │                       └── KafkaOffsetRevertTest.scala
├── doc
    ├── Flink Table Api & SQL.pdf
    └── Flink Table Api & SQL.docx
├── .gitignore
└── git.sh


/README.md:
--------------------------------------------------------------------------------
1 | # flink-rookie
2 | Flink 菜鸟公众号代码地址
3 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/broadcast/readmd.md:
--------------------------------------------------------------------------------
1 | ## 读取广播变量


--------------------------------------------------------------------------------
/src/main/resources/data/tablejoin.txt:
--------------------------------------------------------------------------------
1 | 1,venn
2 | 2,mary
3 | 3,tom
4 | 4,join


--------------------------------------------------------------------------------
/doc/Flink Table Api & SQL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/springMoon/flink-rookie/HEAD/doc/Flink Table Api & SQL.pdf


--------------------------------------------------------------------------------
/doc/Flink Table Api & SQL.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/springMoon/flink-rookie/HEAD/doc/Flink Table Api & SQL.docx


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | target/
3 | /.idea
4 | .idea/
5 | .idea/workspace.xml
6 | .idea/compiler.xml
7 | .idea/misc.xml
8 | *.iml


--------------------------------------------------------------------------------
/src/main/java/com/venn/entity/EntityObject.java:
--------------------------------------------------------------------------------
1 | package com.venn.entity;
2 | 
3 | public abstract class EntityObject {
4 | 
5 | }
6 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/jdbcOutput/User.scala:
--------------------------------------------------------------------------------
1 | package com.venn.connector.jdbcOutput
2 | 
3 | case class User(username: String, password: String, sex: Int, phone: String)


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/intervalJoin/IntervalUser.scala:
--------------------------------------------------------------------------------
1 | package com.venn.stream.api.intervalJoin
2 | 
3 | case class IntervalUser(id: String, name: String, phone:String, date: String)
4 | 


--------------------------------------------------------------------------------
/git.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | message="update today"
 4 | if [ -n "$1" ]; then
 5 | 	message=$1
 6 | fi
 7 | 
 8 | git pull
 9 | git add *
10 | git commit -m "$message ` date -d now +"%F %T"`"
11 | git push
12 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/retention/UserLog.scala:
--------------------------------------------------------------------------------
1 | package com.venn.question.retention
2 | 
3 | case class UserLog(userId: String, categoryId: Int, itemId: Int, behavior: String, ts: String, tsLong: Long)
4 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/filesink.md:
--------------------------------------------------------------------------------
1 | ## file sink for user define file name
2 | * BucketingSink
3 | * StreamingFileSink
4 | 
5 | ```txt
6 | 用BucketingSink的话重写BasePathBucket
7 | 用StreamingFileSink的话自定义BucketAssigner
8 | ```


--------------------------------------------------------------------------------
/src/main/scala/com/venn/table/TableApiDemo.java:
--------------------------------------------------------------------------------
1 | package com.venn.table;
2 | 
3 | public class TableApiDemo {
4 |     public static void main(String[] args) {
5 | //        TableEnvironment tableEnv = TableEnvironment.create(/*…*/);
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/entry/Stock.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.entry;
 2 | 
 3 | /**
 4 |  * @Classname Stock
 5 |  * @Description TODO
 6 |  * @Date 2023/6/12
 7 |  * @Created by venn
 8 |  */
 9 | public interface Stock {
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/sideoutput/lateDataProcess/readme.md:
--------------------------------------------------------------------------------
 1 | ## 延迟数据处理 and keyBy 的key数量
 2 |     木三：FLINK 对于事件时间延迟的数据，用侧输出接收，内部是什么接收的，有看过源码的大神，帮指点一下
 3 | 
 4 | 研究一下这部分的源码。
 5 | 
 6 | 步骤：
 7 |  * 1、开发一下基于事件时间的flink程序
 8 |  * 2、加上迟到数据的SideOut
 9 |  * 3、debug对应源码，查看迟到数据处理流程与对应源码位置
10 |  
11 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/question/LateTps.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question;
 2 | 
 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 4 | 
 5 | public class LateTps {
 6 | 
 7 |     public static void main(String[] args) {
 8 | 
 9 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
10 |         env.setParallelism(1);
11 | 
12 | 
13 | 
14 | 
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/util/OverStockFlatMapFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.util
 2 | 
 3 | import org.apache.flink.api.common.functions.FlatMapFunction
 4 | import org.apache.flink.util.Collector
 5 | 
 6 | /**
 7 |  * @Classname OverStockFlatMapFunction
 8 |  * @Description TODO
 9 |  * @Date 2023/6/13
10 |  * @Created by venn
11 |  */
12 | class OverStockFlatMapFunction extends FlatMapFunction[String, String]{
13 |   override def flatMap(t: String, collector: Collector[String]): Unit = {
14 | 
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/util/TwoStringSource.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.util
 2 | 
 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction
 4 | 
 5 | class TwoStringSource extends SourceFunction[String] {
 6 | 
 7 |   var flag = true
 8 | 
 9 |   override def cancel(): Unit = {
10 | 
11 |     flag = false
12 |   }
13 | 
14 |   override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
15 | 
16 |     while (flag) {
17 |       val str = MathUtil.getRadomNum(1)
18 |       ctx.collect(str + "," + StringUtil.getRandomString(1).toUpperCase)
19 |       Thread.sleep(1000)
20 |     }
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/demo/CustomerSource.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.demo
 2 | 
 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction
 4 | 
 5 | 
 6 | class CustomerSource extends SourceFunction[Tuple2[Long,Long]]{
 7 | 
 8 |   var count=1625048255867L
 9 |   var isRunning=true
10 |   override def run(ctx: SourceFunction.SourceContext[Tuple2[Long,Long]]): Unit = {
11 |     while(isRunning) {
12 |       ctx.collect(new Tuple2(count,count))
13 |       count += 1000
14 |       Thread.sleep(1000)
15 |     }
16 |   }
17 | 
18 |   override def cancel(): Unit = {
19 | 
20 |     isRunning=false
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/resources/cdc_demo.properties:
--------------------------------------------------------------------------------
 1 | job_name=guest_task_result
 2 | ## source  mysql
 3 | source.host:localhost
 4 | source.port:3306
 5 | source.user:root
 6 | source.pass:123456
 7 | source.database:operation
 8 | source.table_list:operation.guest_task_result
 9 | source.time_zone:Asia/Shanghai
10 | # init  latest
11 | source.startup_option:latest
12 | source.startup_option_time:2024-03-07 00:00:00
13 | ## sink starrocks
14 | sink.jdbc-url=jdbc:mysql://localhost:9030
15 | sink.load-url=localhost:18030
16 | sink.jdbcPort = 9030
17 | sink.httpPort=18030
18 | sink.username=root
19 | sink.password=123456
20 | sink.database-name=test
21 | sink.table-name=guest_task_result
22 | sink.batch=64000
23 | sink.interval=5000


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/util/StockCommon.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.util
 2 | 
 3 | /**
 4 |  * @Classname MoveSaleCommon
 5 |  * @Description TODO
 6 |  * @Date 2023/6/8
 7 |  * @Created by venn
 8 |  */
 9 | object StockCommon {
10 | 
11 |   val MYSQL_HOST = "10.201.0.30"
12 |   val MYSQL_PORT = 3316
13 |   val MYSQL_USER = "root"
14 |   val MYSQL_PASS = "R59JUZJ&dG"
15 | 
16 |   val KAFKA_BOOTSTRAT_SERVER = "localhost:9092"
17 | 
18 |   val OVERSTOCK = "ods_poc_k3_sal_outstock"
19 |   val OVERSTOCK_DETAIL = "ods_poc_k3_sal_outstockentry"
20 |   val STOCK_LIST = "ods_poc_sfa_distributoroutstocklist"
21 |   val STOCK_LIST_DETAIL = "ods_poc_sfa_distributoroutstocklist_detail"
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/README.md:
--------------------------------------------------------------------------------
 1 | # 库存场景计算
 2 | 
 3 | ## 源表
 4 | 
 5 | | 表名 | 内容 |
 6 | | --- | --- |
 7 | | ods_poc_dim_product_doc_api |	dim_产品档案 |
 8 | | ods_poc_dim_agency_doc_api |	dim_经销商档案 |
 9 | | ods_poc_dim_dpt_doc_api	| dim_部门档案 |
10 | | ods_poc_k3_customer	| k3_客户档案 |
11 | | ods_poc_k3_material	| k3_物料档案 |
12 | | ods_poc_k3_sal_outstock	| k3_销售出库单主表 |
13 | | ods_poc_k3_sal_outstockentry	| k3_销售出库单子表 |
14 | | ods_poc_sfa_in_out	| sfa_出入库类型 | 
15 | | ods_poc_sfa_stocking	| sfa_期初库存 | 
16 | | ods_poc_sfa_distributoroutstocklist	| sfa_经销商进出库扫码主表 | 
17 | | ods_poc_sfa_distributoroutstocklist_detail	| sfa_经销商进出库扫码子表 |
18 | | ods_poc_sfa_distributor_department	| sfa_经销商部门对照关系 |
19 | 
20 | 
21 | 
22 | ## 动销
23 | 业务逻辑： 非核心产品采购 + 核心产品销售 C002/C004
24 | 
25 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/cust/ReadHttpWordCount.java:
--------------------------------------------------------------------------------
 1 | package com.venn.source.cust;
 2 | 
 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 5 | 
 6 | public class ReadHttpWordCount {
 7 |     public static void main(String[] args) throws Exception {
 8 | 
 9 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
10 |         env.setParallelism(1);
11 | 
12 |         DataStreamSource<String> source = env.addSource(new CustHttpSource("http://localhost:8888", 10));
13 | 
14 |         source.map(item -> item)
15 |                 .keyBy(item -> "0")
16 |                 .max(0)
17 |                 .print();
18 |         env.execute("ReadHttpWordCount");
19 | 
20 | 
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/DayBasePathBucketer.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.connector.filesink
 2 | //
 3 | //import java.io.File
 4 | //import org.apache.flink.streaming.connectors.fs.Clock
 5 | //import org.apache.flink.streaming.connectors.fs.bucketing.BasePathBucketer
 6 | //import org.apache.hadoop.fs.Path
 7 | //
 8 | //
 9 | ///**
10 | //  * 根据实际数据返回数据输出的路径
11 | //  */
12 | //class DayBasePathBucketer extends BasePathBucketer[String]{
13 | //
14 | //  /**
15 | //    * 返回路径
16 | //    * @param clock
17 | //    * @param basePath
18 | //    * @param element
19 | //    * @return
20 | //    */
21 | //  override def getBucketPath(clock: Clock, basePath: Path, element: String): Path = {
22 | //    // yyyyMMdd
23 | //    val day = element.substring(1, 9)
24 | //    new Path(basePath + File.separator + day)
25 | //  }
26 | //}
27 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/util/MathUtil.java:
--------------------------------------------------------------------------------
 1 | package com.venn.util;
 2 | 
 3 | import java.util.Random;
 4 | 
 5 | /**
 6 |  * Created by venn on 19-2-13.
 7 |  */
 8 | public class MathUtil {
 9 | 
10 |     public static Random random = new Random();
11 |     public static int index =1;
12 | 
13 |     public static String getMediaCode(int i){
14 |         String mediacode = fitNum(i);
15 | 
16 |         return mediacode;
17 |     }
18 | 
19 |     private static String fitNum(int num){
20 |         String str = String.valueOf(num);
21 | 
22 |         while (str.length() < 10){
23 |             str = "0"+str;
24 |         }
25 |         return str;
26 |     }
27 | 
28 |     public static String getRadomNum(int num){
29 |         String tmp = "";
30 |         for (int i =0; i< num; i++){
31 |             tmp += random.nextInt(10);
32 |         }
33 | 
34 |         return tmp;
35 | 
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/processAndEvent/SimpleProcessFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.processAndEvent
 2 | 
 3 | import com.venn.question.retention.UserLog
 4 | import com.venn.util.DateTimeUtil
 5 | import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
 7 | import org.apache.flink.util.Collector
 8 | 
 9 | /**
10 |  * user day retention analyze process function
11 |  */
12 | class SimpleProcessFunction(time: String) extends ProcessWindowFunction[UserLog, String, String, TimeWindow] {
13 |   override def process(key: String, context: Context, elements: Iterable[UserLog], out: Collector[String]): Unit = {
14 | 
15 |     val current = DateTimeUtil.formatMillis(System.currentTimeMillis(), DateTimeUtil.YYYY_MM_DD_HH_MM_SS)
16 |     out.collect(current + "\t  time trigger calc: " + time)
17 | 
18 |   }
19 | }


--------------------------------------------------------------------------------
/src/main/scala/com/venn/demo/SlotPartitionDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.demo
 2 | 
 3 | import com.venn.common.Common
 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 5 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 6 | import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
 7 | import org.apache.flink.api.scala._
 8 | 
 9 | object SlotPartitionDemo {
10 | 
11 |   def main(args: Array[String]): Unit = {
12 | 
13 |     val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
14 |     val topic = "slot_partition"
15 |     val source = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
16 |     val sink = new FlinkKafkaProducer[String](topic+"_out", new SimpleStringSchema(), Common.getProp)
17 | 
18 |     env.setParallelism(2)
19 |     env.addSource(source)
20 |       .addSink(sink)
21 | 
22 | 
23 |     env.execute(this.getClass.getName)
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/dynamicWindow/DataSourceFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.dynamicWindow
 2 | 
 3 | import java.util
 4 | 
 5 | import com.google.gson.Gson
 6 | import org.apache.flink.streaming.api.functions.source.SourceFunction
 7 | 
 8 | import scala.util.Random
 9 | 
10 | /**
11 |   * data source
12 |   */
13 | class DataSourceFunction extends SourceFunction[String] {
14 | 
15 |   var flag = true
16 | 
17 |   override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
18 | 
19 |     var map = new util.HashMap[String, String]
20 |     while (flag) {
21 | 
22 |       val random = new Random()
23 |       val gson = new Gson()
24 |       for (i <- 1 to 4) {
25 | 
26 |         map.put("attr", "attr" + i)
27 |         map.put("value", "" + random.nextInt(1000))
28 |         map.put("time", "" + System.currentTimeMillis())
29 | 
30 |         val json = gson.toJson(map)
31 | 
32 |         ctx.collect(json)
33 |       }
34 | 
35 |       Thread.sleep(1000)
36 | 
37 |     }
38 | 
39 |   }
40 | 
41 |   override def cancel(): Unit = {
42 |     flag = false
43 | 
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/cep/cep.md:
--------------------------------------------------------------------------------
 1 | # CEP Demo
 2 | ```text
 3 | this package for cep demo
 4 | ```
 5 | 
 6 | ## References
 7 | ```text
 8 | 官网文档：https://ci.apache.org/projects/flink/flink-docs-release-1.9/dev/libs/cep.html
 9 | 官网文档翻译：https://www.cnblogs.com/Springmoon-venn/p/11993468.html
10 | 刘博 Flink CEP 实战: 
11 |     PPT : https://files.alicdn.com/tpsservice/94d409d9679d1b46034f7d00161d99a7.pdf
12 |     视频 ： https://www.bilibili.com/video/av66073054/
13 | 刘博 Apache Flink CEP 实战 ： https://mp.weixin.qq.com/s/4dQYr-RXKBRdrhu6Y5dZdw
14 | Flink-CEPplus 项目：https://github.com/ljygz/Flink-CEPplus (作者和 末日布孤单 应该是一个人)
15 | 末日布孤单源 CEP 码解析：https://www.cnblogs.com/ljygz/p/11978386.html
16 | ```
17 | 
18 | ## 匹配后跳过策略：
19 | ```text
20 | 模式： b+ c
21 | input : b1 b2 b3 c
22 | NO_SKIP ： b1 b2 b3 c / b2 b3 c / b3 c # 一次只跳过一个事件，就开始匹配
23 | SKIP_TO_NEXT : b1 b2 b3 c / b2 b3 c / b3 c # 调到下一个 开始事件（也就是 b）
24 | SKIP_PAST_LAST_EVENT : b1 b2 b3 c # 跳过所有匹配过的事件
25 | SKIP_TO_FIRST[b] : b1 b2 b3 c / b2 b3 c / b3 c # 跳到第一个b（如果 第一个就是 b，从这个 b 后面的第一个b 开始）？
26 | SKIP_TO_LAST[b] : b1 b2 b3 c / b3 c # 跳到最后一个b，如果模式里面没有连续的b， 应该是调到 c 的后一个事件 ？
27 | ```


--------------------------------------------------------------------------------
/src/main/java/com/venn/entity/Behavior.java:
--------------------------------------------------------------------------------
 1 | package com.venn.entity;
 2 | 
 3 | /**
 4 |  * 点击流实体对象
 5 |  */
 6 | public class Behavior {
 7 |     private String userId;
 8 |     private String url;
 9 |     private long ts;
10 | 
11 |     public Behavior(String userId, String url, long ts) {
12 |         this.userId = userId;
13 |         this.url = url;
14 |         this.ts = ts;
15 |     }
16 | 
17 |     public String getUserId() {
18 |         return userId;
19 |     }
20 | 
21 |     public void setUserId(String userId) {
22 |         this.userId = userId;
23 |     }
24 | 
25 |     public String getUrl() {
26 |         return url;
27 |     }
28 | 
29 |     public void setUrl(String url) {
30 |         this.url = url;
31 |     }
32 | 
33 |     public long getTs() {
34 |         return ts;
35 |     }
36 | 
37 |     public void setTs(long ts) {
38 |         this.ts = ts;
39 |     }
40 | 
41 |     @Override
42 |     public String toString() {
43 |         return "Behavior{" +
44 |                 "userId='" + userId + '\'' +
45 |                 ", url='" + url + '\'' +
46 |                 ", ts=" + ts +
47 |                 '}';
48 |     }
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/util/StringUtil.java:
--------------------------------------------------------------------------------
 1 | package com.venn.util;
 2 | 
 3 | /**
 4 |  * Created by venn on 19-2-13.
 5 |  */
 6 | public class StringUtil {
 7 | 
 8 |     public static String getRandomString(int len){
 9 |         StringBuilder sb = new StringBuilder();
10 |         char tmp;
11 |         for(int i=0;  i< len; i++){
12 |             if(MathUtil.random.nextBoolean()){
13 |                 tmp = (char)(MathUtil.random.nextInt(26) + 65);
14 |             }else{
15 |                 tmp = (char)(MathUtil.random.nextInt(26) + 97);
16 |             }
17 |             sb.append(tmp);
18 |         }
19 |         return sb.toString();
20 |     }
21 | 
22 |     public static String getRandomString(){
23 |         StringBuilder sb = new StringBuilder();
24 |         char tmp;
25 |         for(int i=0;  i<= 10; i++){
26 |             if(MathUtil.random.nextBoolean()){
27 |                 tmp = (char)(MathUtil.random.nextInt(26) + 65);
28 |             }else{
29 |                 tmp = (char)(MathUtil.random.nextInt(26) + 97);
30 |             }
31 |             sb.append(tmp);
32 |         }
33 |         return sb.toString();
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/intervalJoin/IntervalJoinProcessFunctionDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.intervalJoin
 2 | 
 3 | import org.apache.flink.configuration.Configuration
 4 | import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction
 5 | import org.apache.flink.util.Collector
 6 | 
 7 | /**
 8 |   *
 9 |   */
10 | class IntervalJoinProcessFunctionDemo extends ProcessJoinFunction[IntervalUser, IntervalUser, IntervalUser] {
11 | 
12 |   override def open(parameters: Configuration): Unit = {
13 | 
14 |   }
15 | 
16 | 
17 |   override def processElement(left: IntervalUser,
18 |                               right: IntervalUser,
19 |                               ctx: ProcessJoinFunction[IntervalUser, IntervalUser, IntervalUser]#Context,
20 |                               out: Collector[IntervalUser]): Unit = {
21 | 
22 | //    println("left timestamp : " + ctx.getLeftTimestamp)
23 | //    println("right timestamp : " + ctx.getRightTimestamp)
24 | 
25 |     out.collect(IntervalUser(left.id , left.name, right.phone, (left.date + "-" + right.date)))
26 | 
27 |   }
28 | 
29 | 
30 | 
31 |   override def close(): Unit = {
32 | 
33 |   }
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/MysqlData.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | import java.sql.DriverManager;
 4 | import java.sql.PreparedStatement;
 5 | import java.sql.SQLException;
 6 | 
 7 | public class MysqlData {
 8 | 
 9 |     private static String jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true";
10 |     private static String username = "root";
11 |     private static String password = "123456";
12 |     private static String driverName = "com.mysql.jdbc.Driver";
13 | 
14 | 
15 |     public static void main(String[] args) throws ClassNotFoundException, SQLException {
16 | 
17 |         java.sql.Connection conn;
18 |         PreparedStatement ps;
19 | 
20 |         Class.forName(driverName);
21 |         conn = DriverManager.getConnection(jdbcUrl, username, password);
22 |         ps = conn.prepareStatement("insert into async.async_test(id, phone) values (?, ?)");
23 | 
24 |         for (int i = 100000; i < 1000000; i++){
25 |             ps.setString(1, "" + i);
26 |             ps.setString(2, "" + System.currentTimeMillis());
27 | 
28 |             ps.execute();
29 | //            conn.commit();
30 |         }
31 | 
32 | 
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/jdbcOutput/MysqlOutputMaker.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.jdbcOutput
 2 | 
 3 | import java.text.SimpleDateFormat
 4 | 
 5 | import com.venn.common.Common
 6 | import com.venn.util.{MathUtil, StringUtil}
 7 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 8 | 
 9 | /**
10 |   * test data maker
11 |   */
12 | 
13 | object MysqlOutputMaker {
14 |   val topic = "async"
15 | 
16 |   def main(args: Array[String]): Unit = {
17 | 
18 |     while (true) {
19 | 
20 |       left("mysql_output")
21 |       Thread.sleep(100)
22 |     }
23 |   }
24 | 
25 |   val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
26 | 
27 |   var id = 0
28 | 
29 |   def left(topic: String) = {
30 |     val producer = new KafkaProducer[String, String](Common.getProp)
31 |     id = id + 1
32 |     val username = StringUtil.getRandomString(5)
33 |     val password = StringUtil.getRandomString(10)
34 |     val sex = MathUtil.random.nextInt(2)
35 |     val phone = MathUtil.getRadomNum(11)
36 | 
37 |     val message = username + "," + password + "," + sex + "," + phone
38 | 
39 |     val msg = new ProducerRecord[String, String](topic, message)
40 |     producer.send(msg)
41 |     producer.flush()
42 |     println("send : " + message)
43 |   }
44 | 
45 | 
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/src/main/resources/sql/kafkaJsonSourceSinkDemo.sql:
--------------------------------------------------------------------------------
 1 | --sourceTable
 2 | CREATE TABLE user_log(
 3 |     user_id VARCHAR,
 4 |     item_id VARCHAR,
 5 |     category_id VARCHAR,
 6 |     behavior VARCHAR,
 7 |     ts TIMESTAMP(3)
 8 | ) WITH (
 9 |     'connector.type' = 'kafka',
10 |     'connector.version' = 'universal',
11 |     'connector.topic' = 'user_behavior',
12 |     'connector.properties.zookeeper.connect' = 'venn:2181',
13 |     'connector.properties.bootstrap.servers' = 'venn:9092',
14 |     'connector.startup-mode' = 'earliest-offset',
15 |     'format.type' = 'json'
16 | #    'format.type' = 'csv'
17 | );
18 | 
19 | --sinkTable
20 | CREATE TABLE user_log_sink (
21 |     user_id VARCHAR,
22 |     item_id VARCHAR,
23 |     category_id VARCHAR,
24 |     behavior VARCHAR,
25 |     ts TIMESTAMP(3)
26 | ) WITH (
27 |     'connector.type' = 'kafka',
28 |     'connector.version' = 'universal',
29 |     'connector.topic' = 'user_behavior_sink',
30 |     'connector.properties.zookeeper.connect' = 'venn:2181',
31 |     'connector.properties.bootstrap.servers' = 'venn:9092',
32 |     'update-mode' = 'append',
33 | #    'format.type' = 'json'
34 |      'format.type' = 'csv'
35 | );
36 | 
37 | --insert
38 | INSERT INTO user_log_sink
39 | SELECT user_id, item_id, category_id, behavior, ts
40 | FROM user_log;
41 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #  Licensed to the Apache Software Foundation (ASF) under one
 3 | #  or more contributor license agreements.  See the NOTICE file
 4 | #  distributed with this work for additional information
 5 | #  regarding copyright ownership.  The ASF licenses this file
 6 | #  to you under the Apache License, Version 2.0 (the
 7 | #  "License"); you may not use this file except in compliance
 8 | #  with the License.  You may obtain a copy of the License at
 9 | #
10 | #      http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | #  Unless required by applicable law or agreed to in writing, software
13 | #  distributed under the License is distributed on an "AS IS" BASIS,
14 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | #  See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | ################################################################################
18 | 
19 | log4j.rootLogger=info, console
20 | 
21 | log4j.appender.console=org.apache.log4j.ConsoleAppender
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
24 | 
25 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/starrocks/Column.java:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.starrocks;
 2 | 
 3 | /**
 4 |  * @Classname Column
 5 |  * @Description TODO
 6 |  * @Date 2024/3/8
 7 |  * @Created by venn
 8 |  */
 9 | public class Column {
10 | 
11 |     private String name;
12 |     private String type;
13 |     private String comment;
14 | 
15 |     public Column() {
16 |     }
17 | 
18 |     public Column(String name, String type, String comment) {
19 |         this.name = name;
20 |         this.type = type;
21 |         this.comment = comment;
22 |     }
23 | 
24 |     public String getName() {
25 |         return name;
26 |     }
27 | 
28 |     public void setName(String name) {
29 |         this.name = name;
30 |     }
31 | 
32 |     public String getType() {
33 |         return type;
34 |     }
35 | 
36 |     public void setType(String type) {
37 |         this.type = type;
38 |     }
39 | 
40 |     public String getComment() {
41 |         return comment;
42 |     }
43 | 
44 |     public void setComment(String comment) {
45 |         this.comment = comment;
46 |     }
47 | 
48 |     @Override
49 |     public String toString() {
50 |         return "Column{" +
51 |                 "name='" + name + '\'' +
52 |                 ", type='" + type + '\'' +
53 |                 ", comment='" + comment + '\'' +
54 |                 '}';
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/main/resources/sql/sqlDemo.sql:
--------------------------------------------------------------------------------
 1 | --sourceTable
 2 | CREATE TABLE user_log (
 3 |     user_id VARCHAR,
 4 |     item_id VARCHAR,
 5 |     category_id VARCHAR,
 6 |     behavior VARCHAR,
 7 |     ts TIMESTAMP(3)
 8 | ) WITH (
 9 |     'connector.type' = 'kafka',
10 |     'connector.version' = 'universal',
11 |     'connector.topic' = 'user_behavior',
12 |     'connector.startup-mode' = 'earliest-offset',
13 |     'connector.properties.0.key' = 'zookeeper.connect',
14 |     'connector.properties.0.value' = 'venn:2181',
15 |     'connector.properties.1.key' = 'bootstrap.servers',
16 |     'connector.properties.1.value' = 'venn:9092',
17 |     'update-mode' = 'append',
18 |     'format.type' = 'json',
19 |     'format.derive-schema' = 'true'
20 | );
21 | 
22 | --sinkTable
23 | CREATE TABLE pvuv_sink (
24 |     dt VARCHAR,
25 |     pv BIGINT,
26 |     uv BIGINT
27 | ) WITH (
28 |     'connector.type' = 'jdbc',
29 |     'connector.url' = 'jdbc:mysql://venn:3306/venn',
30 |     'connector.table' = 'pvuv_sink',
31 |     'connector.username' = 'root',
32 |     'connector.password' = '123456',
33 |     'connector.write.flush.max-rows' = '1'
34 | );
35 | 
36 | --insert
37 | INSERT INTO pvuv_sink(dt, pv, uv)
38 | SELECT
39 |   DATE_FORMAT(ts, 'yyyy-MM-dd HH:00') dt,
40 |   COUNT(*) AS pv,
41 |   COUNT(DISTINCT user_id) AS uv
42 | FROM user_log
43 | GROUP BY DATE_FORMAT(ts, 'yyyy-MM-dd HH:00');
44 | 


--------------------------------------------------------------------------------
/src/main/test/com/venn/connector/kafka/KafkaOffsetRevertTest.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.kafka
 2 | 
 3 | import java.text.SimpleDateFormat
 4 | import java.util.{Calendar, Date}
 5 | 
 6 | import com.venn.common.Common
 7 | import com.venn.util.MathUtil
 8 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 9 | 
10 | import scala.util.parsing.json.JSONObject
11 | 
12 | /**
13 |  * test data maker
14 |  */
15 | 
16 | object CurrentDayMaker {
17 | 
18 | 
19 |   /**
20 |    * kafka offset revert test
21 |    * kafka offset 回退测试
22 |    *
23 |    * @return
24 |    */
25 |   val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
26 | 
27 |   def main(args: Array[String]): Unit = {
28 |     val producer = new KafkaProducer[String, String](Common.getProp())
29 |     var i = 0;
30 |     while (true) {
31 | 
32 |       //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
33 |       val map = Map("id" -> i, "createTime" -> sdf.format(System.currentTimeMillis()), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
34 |       val jsonObject: JSONObject = new JSONObject(map)
35 |       println(jsonObject.toString())
36 |       // topic current_day
37 |       val msg = new ProducerRecord[String, String]("kafka_offset", jsonObject.toString())
38 |       producer.send(msg)
39 |       producer.flush()
40 |       Thread.sleep(1000)
41 |       i = i + 1
42 |       //      System.exit(-1)
43 |     }
44 |   }
45 | 
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/FileSinkMaker.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.connector.filesink
 2 | //
 3 | //import java.text.SimpleDateFormat
 4 | //import java.util.Calendar
 5 | //import com.venn.common.Common
 6 | //import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 7 | //
 8 | //
 9 | ///**
10 | //  * test data maker
11 | //  */
12 | //
13 | //object FileSinkMaker {
14 | //  val topic = "async"
15 | //
16 | //  def main(args: Array[String]): Unit = {
17 | //
18 | //    while (true) {
19 | //
20 | //      left("roll_file_sink")
21 | //      Thread.sleep(100)
22 | //    }
23 | //  }
24 | //
25 | //  val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
26 | //
27 | //  var idLeft = 0
28 | //
29 | //  def left(topic: String) = {
30 | //    val producer = new KafkaProducer[String, String](Common.getProp)
31 | //    idLeft = idLeft + 1
32 | //    val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> getCreateTime)
33 | //    val jsonObject: JSONObject = new JSONObject(map)
34 | //    println("left : " + jsonObject.toString())
35 | //    val msg = new ProducerRecord[String, String](topic, jsonObject.toString())
36 | ////    producer.send(msg)
37 | ////    producer.flush()
38 | //  }
39 | //
40 | //  var minute : Int = 1
41 | //  val calendar: Calendar = Calendar.getInstance()
42 | //  def getCreateTime(): String = {
43 | //    //    minute = minute + 1
44 | //    calendar.add(Calendar.MINUTE, 10)
45 | //    sdf.format(calendar.getTime)
46 | //  }
47 | //
48 | //}
49 | //
50 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/cust/CustHttpSource.java:
--------------------------------------------------------------------------------
 1 | package com.venn.source.cust;
 2 | 
 3 | import com.venn.util.HttpClientUtil;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.metrics.Counter;
 6 | import org.apache.flink.metrics.SimpleCounter;
 7 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
 8 | 
 9 | public class CustHttpSource extends RichSourceFunction<String> {
10 | 
11 |     private String url;
12 |     private long requestInterval;
13 |     private boolean flag = false;
14 |     private transient Counter counter;
15 | 
16 |     public CustHttpSource(String url, long requestInterval) {
17 |         this.url = url;
18 |         this.requestInterval = requestInterval;
19 |     }
20 | 
21 |     @Override
22 |     public void open(Configuration parameters) throws Exception {
23 |         flag = true;
24 | 
25 |         counter = new SimpleCounter();
26 |         this.counter = getRuntimeContext()
27 |                 .getMetricGroup()
28 |                 .counter("myCounter");
29 | 
30 |     }
31 | 
32 |     @Override
33 |     public void run(SourceContext<String> ctx) throws Exception {
34 | 
35 | 
36 |         while (true) {
37 |             String result = HttpClientUtil.doGet(url);
38 | 
39 |             ctx.collect(result);
40 |             this.counter.inc();
41 | 
42 |             Thread.sleep(requestInterval);
43 |         }
44 | 
45 |     }
46 | 
47 |     @Override
48 |     public void cancel() {
49 |         flag = false;
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/AsyncUser.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | public class AsyncUser {
 4 | 
 5 |     private String id;
 6 |     private String username;
 7 |     private String password;
 8 |     private String phone;
 9 | 
10 |     public AsyncUser() {
11 |     }
12 | 
13 |     public AsyncUser(String id, String username, String password) {
14 |         this.id = id;
15 |         this.username = username;
16 |         this.password = password;
17 |     }
18 | 
19 |     public String getPhone() {
20 |         return phone;
21 |     }
22 | 
23 |     public void setPhone(String phone) {
24 |         this.phone = phone;
25 |     }
26 | 
27 |     public String getId() {
28 |         return id;
29 |     }
30 | 
31 |     public void setId(String id) {
32 |         this.id = id;
33 |     }
34 | 
35 |     public String getUsername() {
36 |         return username;
37 |     }
38 | 
39 |     public void setUsername(String username) {
40 |         this.username = username;
41 |     }
42 | 
43 |     public String getPassword() {
44 |         return password;
45 |     }
46 | 
47 |     public void setPassword(String password) {
48 |         this.password = password;
49 |     }
50 | 
51 |     @Override
52 |     public String toString() {
53 |         return "AsyncUser{" +
54 |                 "id='" + id + '\'' +
55 |                 ", username='" + username + '\'' +
56 |                 ", password='" + password + '\'' +
57 |                 ", phone='" + phone + '\'' +
58 |                 '}';
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/cdcStarrocks/CdcRecord.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.cdcStarrocks;
 2 | 
 3 | import java.util.LinkedHashMap;
 4 | import java.util.Map;
 5 | 
 6 | /**
 7 |  * cdcRecord save
 8 |  */
 9 | public class CdcRecord {
10 | 
11 |     private String db;
12 |     private String table;
13 |     private String op;
14 |     private Map<String, String> data = new LinkedHashMap<>();
15 | 
16 |     public CdcRecord(String db, String table, String op) {
17 |         this.db = db;
18 |         this.table = table;
19 |         this.op = op;
20 |     }
21 | 
22 |     public String getDb() {
23 |         return db;
24 |     }
25 | 
26 |     public void setDb(String db) {
27 |         this.db = db;
28 |     }
29 | 
30 |     public String getTable() {
31 |         return table;
32 |     }
33 | 
34 |     public void setTable(String table) {
35 |         this.table = table;
36 |     }
37 | 
38 |     public String getOp() {
39 |         return op;
40 |     }
41 | 
42 |     public void setOp(String op) {
43 |         this.op = op;
44 |     }
45 | 
46 |     public Map<String, String> getData() {
47 |         return data;
48 |     }
49 | 
50 |     public void setData(Map<String, String> data) {
51 |         this.data = data;
52 |     }
53 | 
54 |     @Override
55 |     public String toString() {
56 |         return "CdcRecord{" +
57 |                 "db='" + db + '\'' +
58 |                 ", table='" + table + '\'' +
59 |                 ", op='" + op + '\'' +
60 |                 ", data=" + data +
61 |                 '}';
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/demo/FilterTest.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.demo
 2 | 
 3 | import org.apache.flink.api.common.eventtime.WatermarkStrategy
 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 5 | import org.apache.flink.connector.kafka.source.KafkaSource
 6 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 8 | import org.apache.flink.api.scala._
 9 | 
10 | import java.util.regex.Pattern
11 | import scala.util.Random
12 | 
13 | object FilterTest {
14 | 
15 | 
16 |   def main(args: Array[String]): Unit = {
17 | 
18 | 
19 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
20 |     env.setParallelism(1)
21 |     val topic = "filter_test"
22 | 
23 |     val random = new Random();
24 |     print(random.nextString(16))
25 | 
26 |     val bootstrapServer = "localhost:9092"
27 |     val kafkaSource = KafkaSource.builder[String]()
28 |       .setBootstrapServers(bootstrapServer)
29 |       .setTopicPattern(Pattern.compile(topic))
30 |       .setGroupId("day_window")
31 |       //      .setStartingOffsets(OffsetsInitializer.committedOffsets())
32 |       .setStartingOffsets(OffsetsInitializer.latest())
33 |       .setValueOnlyDeserializer(new SimpleStringSchema())
34 |       .build()
35 | 
36 | 
37 |     env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSoruce1")
38 |       .filter(str => str.equals("abc"))
39 |       .print(">>")
40 | 
41 |     env.execute("exec")
42 | 
43 | 
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/entity/StreamElement.java:
--------------------------------------------------------------------------------
 1 | package com.venn.entity;
 2 | 
 3 | /**
 4 |  * 流实例基类
 5 |  */
 6 | public final class StreamElement<T> {
 7 | 
 8 |     public String source;
 9 |     public long ingestionTime;
10 |     public String db;
11 |     public String table;
12 |     public T data;
13 | 
14 |     public StreamElement(T data, long ingestionTime) {
15 |         this.data = data;
16 |         this.ingestionTime = ingestionTime;
17 |     }
18 | 
19 |     public String getSource() {
20 |         return source;
21 |     }
22 | 
23 |     public void setSource(String source) {
24 |         this.source = source;
25 |     }
26 | 
27 |     public Long getIngestionTime() {
28 |         return ingestionTime;
29 |     }
30 | 
31 |     public String getDb() {
32 |         return db;
33 |     }
34 | 
35 |     public void setDb(String db) {
36 |         this.db = db;
37 |     }
38 | 
39 |     public String getTable() {
40 |         return table;
41 |     }
42 | 
43 |     public void setTable(String table) {
44 |         this.table = table;
45 |     }
46 | 
47 |     public T getData() {
48 |         return data;
49 |     }
50 | 
51 |     public void setData(T data) {
52 |         this.data = data;
53 |     }
54 | 
55 |     @Override
56 |     public String toString() {
57 |         return "StreamElement{" +
58 |                 "source='" + source + '\'' +
59 |                 ", ingestionTime=" + ingestionTime +
60 |                 ", db='" + db + '\'' +
61 |                 ", table='" + table + '\'' +
62 |                 ", data=" + data +
63 |                 '}';
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/cdc/CdcDdlTest.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.cdc
 2 | 
 3 | import com.venn.source.mysql.cdc.CommonStringDebeziumDeserializationSchema
 4 | import com.ververica.cdc.connectors.mysql.source.MySqlSource
 5 | import com.ververica.cdc.connectors.mysql.table.StartupOptions
 6 | import org.apache.flink.api.common.eventtime.WatermarkStrategy
 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 8 | import org.apache.flink.api.scala._
 9 | 
10 | import java.util.TimeZone
11 | 
12 | /**
13 |  * @Classname CdcDdlTest
14 |  * @Description TODO
15 |  * @Date 2023/8/31
16 |  * @Created by venn
17 |  */
18 | object CdcDdlTest {
19 | 
20 |   def main(args: Array[String]): Unit = {
21 | 
22 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
23 | 
24 |     // cdc source
25 |     val source = MySqlSource.builder[String]()
26 |       .hostname("rm-2ze0qoq964s4nnodi.mysql.rds.aliyuncs.com")
27 |       .port(3306)
28 |       .username("daas")
29 |       .password("Dass@2021")
30 |       .databaseList("dct3_0")
31 |       .tableList("dct3_0.*")
32 |       .serverTimeZone("Asia/Shanghai")
33 |       // 包含 schema change
34 |       .includeSchemaChanges(true)
35 |       .startupOptions(StartupOptions.latest())
36 |       .deserializer(new DdlDebeziumDeserializationSchema("", 3306))
37 |       .build()
38 | 
39 | 
40 | 
41 |     env.setParallelism(1)
42 |     env.fromSource(source, WatermarkStrategy.noWatermarks[String](), "cdc")
43 |       .map((str: String) => str)
44 |       .print()
45 | 
46 | 
47 |     env.execute("CdcDdlTest")
48 | 
49 |   }
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/starrocks/TableSchema.java:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.starrocks;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | /**
 7 |  * @Classname TableSchema
 8 |  * @Description TODO
 9 |  * @Date 2024/3/8
10 |  * @Created by venn
11 |  */
12 | public class TableSchema {
13 | 
14 |     private String tableName;
15 |     private String tableComment;
16 |     private List<Column> column = new ArrayList<>();
17 | 
18 |     public TableSchema() {
19 |     }
20 | 
21 |     public TableSchema(String tableName, String tableComment, List<Column> column) {
22 |         this.tableName = tableName;
23 |         this.tableComment = tableComment;
24 |         this.column = column;
25 |     }
26 | 
27 |     public String getTableName() {
28 |         return tableName;
29 |     }
30 | 
31 |     public void setTableName(String tableName) {
32 |         this.tableName = tableName;
33 |     }
34 | 
35 |     public String getTableComment() {
36 |         return tableComment;
37 |     }
38 | 
39 |     public void setTableComment(String tableComment) {
40 |         this.tableComment = tableComment;
41 |     }
42 | 
43 |     public List<Column> getColumn() {
44 |         return column;
45 |     }
46 | 
47 |     public void setColumn(List<Column> column) {
48 |         this.column = column;
49 |     }
50 | 
51 |     @Override
52 |     public String toString() {
53 |         return "TableSchema{" +
54 |                 "tableName='" + tableName + '\'' +
55 |                 ", tableComment='" + tableComment + '\'' +
56 |                 ", column=" + column +
57 |                 '}';
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/cust/HttpServer.java:
--------------------------------------------------------------------------------
 1 | package com.venn.source.cust;
 2 | 
 3 | import com.sun.net.httpserver.HttpExchange;
 4 | import com.sun.net.httpserver.HttpHandler;
 5 | import org.apache.commons.io.IOUtils;
 6 | 
 7 | import java.io.IOException;
 8 | import java.io.OutputStream;
 9 | import java.net.InetSocketAddress;
10 | import java.util.UUID;
11 | 
12 | /**
13 |  * 创建 http server 监控端口请求
14 |  */
15 | public class HttpServer {
16 | 
17 |     public static void main(String[] arg) throws Exception {
18 | 
19 |         com.sun.net.httpserver.HttpServer server = com.sun.net.httpserver.HttpServer.create(new InetSocketAddress(8888), 10);
20 |         server.createContext("/", new TestHandler());
21 |         server.start();
22 |     }
23 | 
24 |     static class TestHandler implements HttpHandler {
25 |         public void handle(HttpExchange exchange) throws IOException {
26 |             String response = "hello world";
27 | 
28 |             try {
29 |                 //获得表单提交数据(post)
30 |                 String postString = IOUtils.toString(exchange.getRequestBody());
31 | 
32 |                 exchange.sendResponseHeaders(200, 0);
33 |                 OutputStream os = exchange.getResponseBody();
34 |                 String result = UUID.randomUUID().toString();
35 |                 result = System.currentTimeMillis() + ",name," + result;
36 |                 os.write(result.getBytes());
37 |                 os.close();
38 |             } catch (IOException ie) {
39 |                 ie.printStackTrace();
40 |             } catch (Exception e) {
41 |                 e.printStackTrace();
42 |             }
43 |         }
44 |     }
45 | 
46 | }
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/kafka/kafkaToKafkaGroup.sql:
--------------------------------------------------------------------------------
 1 | -- 读 json，写csv
 2 | ---sourceTable
 3 | CREATE TABLE user_log(
 4 |     user_id VARCHAR,
 5 |     item_id VARCHAR,
 6 |     category_id VARCHAR,
 7 |     behavior VARCHAR,
 8 |     ts TIMESTAMP(3),
 9 |     proctime as PROCTIME()
10 | ) WITH (
11 |     'connector.type' = 'kafka',
12 |     'connector.version' = 'universal',
13 |     'connector.topic' = 'user_behavior',
14 |     'connector.properties.zookeeper.connect' = 'venn:2181',
15 |     'connector.properties.bootstrap.servers' = 'venn:9092',
16 |     'connector.startup-mode' = 'earliest-offset',
17 |     'update-mode' = 'upsert',
18 |     'format.type' = 'json'
19 | );
20 | 
21 | ---sinkTable
22 | CREATE TABLE user_log_sink (
23 |     item_id VARCHAR ,
24 |     category_id VARCHAR ,
25 |     behavior VARCHAR ,
26 |     max_tx TIMESTAMP(3),
27 |     min_prc TIMESTAMP(3),
28 |     max_prc TIMESTAMP(3),
29 |     coun BIGINT
30 | ) WITH (
31 |     'connector.type' = 'myKafka',
32 |     'connector.version' = 'universal',
33 |     'connector.topic' = 'user_behavior_sink',
34 |     'connector.properties.zookeeper.connect' = 'venn:2181',
35 |     'connector.properties.bootstrap.servers' = 'venn:9092',
36 |     'update-mode' = 'upsert',
37 |     'format.type' = 'json'
38 | );
39 | 
40 | ---insert
41 | INSERT INTO user_log_sink
42 | SELECT item_id, category_id, behavior, max(ts), min(proctime), max(proctime), count(user_id)
43 | FROM user_log
44 | group by item_id, category_id, behavior;
45 | -- SELECT item_id, category_id, behavior, max(ts), min(proctime), max(proctime), count(user_id)
46 | -- from user_log
47 | -- group by TUMBLE(proctime, INTERVAL '1' MINUTE ), item_id,category_id,behavior;


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/DayBucketAssigner.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.filesink
 2 | 
 3 | import java.io.IOException
 4 | import java.nio.charset.StandardCharsets
 5 | import org.apache.flink.core.io.SimpleVersionedSerializer
 6 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
 7 | import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner
 8 | 
 9 | class DayBucketAssigner extends BucketAssigner[ObjectNode, String] {
10 | 
11 |   /**
12 |     * bucketId is the output path
13 |     * @param element
14 |     * @param context
15 |     * @return
16 |     */
17 |   override def getBucketId(element: ObjectNode, context: BucketAssigner.Context): String = {
18 |     //context.currentProcessingTime()
19 |     val day = element.get("date").asText("19790101000000").substring(0, 8)
20 |     // wrap can day + "/" + xxx
21 |     day
22 |   }
23 | 
24 |   override def getSerializer: SimpleVersionedSerializer[String] = {
25 | 
26 |     StringSerializer
27 |   }
28 | 
29 |   /**
30 |     * 实现参考 ： org.apache.flink.runtime.checkpoint.StringSerializer
31 |     */
32 |   object StringSerializer extends SimpleVersionedSerializer[String] {
33 |     val VERSION = 77
34 | 
35 |     override def getVersion = 77
36 | 
37 |     @throws[IOException]
38 |     override def serialize(checkpointData: String): Array[Byte] = checkpointData.getBytes(StandardCharsets.UTF_8)
39 | 
40 |     @throws[IOException]
41 |     override def deserialize(version: Int, serialized: Array[Byte]): String = if (version != 77) throw new IOException("version mismatch")
42 |     else new String(serialized, StandardCharsets.UTF_8)
43 |   }
44 | 
45 | 
46 | }
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/dayWindow/CurrentDayMaker.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.dayWindow
 2 | 
 3 | import com.google.gson.GsonBuilder
 4 | 
 5 | import java.text.SimpleDateFormat
 6 | import java.util.{Calendar, Date}
 7 | import com.venn.common.Common
 8 | import com.venn.util.MathUtil
 9 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
10 | 
11 | 
12 | /**
13 |   * test data maker
14 |   */
15 | 
16 | object CurrentDayMaker {
17 | 
18 | 
19 |   var minute : Int = 1
20 |   val calendar: Calendar = Calendar.getInstance()
21 | 
22 |   /**
23 |     * 一天时间比较长，不方便观察，将时间改为当前时间，
24 |     * 每次累加10分钟，这样一天只需要144次循环，也就是144秒
25 |     * @return
26 |     */
27 |   def getCreateTime(): String = {
28 | //    minute = minute + 1
29 |     calendar.add(Calendar.MINUTE, 10)
30 |     sdf.format(calendar.getTime)
31 |   }
32 |   val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
33 | 
34 |   def main(args: Array[String]): Unit = {
35 |     val producer = new KafkaProducer[String, String](Common.getProp)
36 |     calendar.setTime(new Date())
37 |     println(sdf.format(calendar.getTime))
38 |     var i =0;
39 |     while (true) {
40 | 
41 | //      val map = Map("id"-> i, "createTime"-> sdf.format(System.currentTimeMillis()))
42 |       val map = Map("id"-> i, "createTime"-> getCreateTime(), "amt"-> (MathUtil.random.nextInt(10) +"." + MathUtil.random.nextInt(10)))
43 |       val gson = new GsonBuilder().create();
44 |       gson.toJson(map);
45 |       // topic current_day
46 |       val msg = new ProducerRecord[String, String]("current_day", gson.toString())
47 |       producer.send(msg)
48 |       producer.flush()
49 |       Thread.sleep(1000)
50 |       i = i + 1
51 | //      System.exit(-1)
52 |     }
53 |   }
54 | 
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/DayBulkWriter.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.filesink
 2 | 
 3 | import java.io.File
 4 | import java.nio.charset.StandardCharsets
 5 | import org.apache.flink.api.common.serialization.BulkWriter
 6 | import org.apache.flink.core.fs.FSDataOutputStream
 7 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
 8 | import org.apache.flink.util.Preconditions
 9 | 
10 | /**
11 |   * 实现参考 ： org.apache.flink.streaming.api.functions.sink.filesystem.BulkWriterTest
12 |   */
13 | class DayBulkWriter extends BulkWriter[ObjectNode] {
14 | 
15 |   val charset = StandardCharsets.UTF_8
16 |   var stream: FSDataOutputStream = _
17 | 
18 |   def DayBulkWriter(inputStream: FSDataOutputStream): DayBulkWriter = {
19 |     stream = Preconditions.checkNotNull(inputStream);
20 |     this
21 |   }
22 | 
23 |   /**
24 |     * write element
25 |     *
26 |     * @param element
27 |     */
28 |   override def addElement(element: ObjectNode): Unit = {
29 |     this.stream.write(element.toString.getBytes(charset))
30 |     // wrap
31 |     this.stream.write('\n')
32 | 
33 |   }
34 | 
35 |   override def flush(): Unit = {
36 |     this.stream.flush()
37 |   }
38 | 
39 |   /**
40 |     * output stream  is input parameter, just flush, close is factory's job
41 |     */
42 |   override def finish(): Unit = {
43 |     this.flush()
44 |   }
45 | 
46 | }
47 | 
48 | /**
49 |   * 实现参考 ： org.apache.flink.streaming.api.functions.sink.filesystem.BulkWriterTest.TestBulkWriterFactory
50 |   */
51 | class DayBulkWriterFactory extends BulkWriter.Factory[ObjectNode] {
52 |   override def create(out: FSDataOutputStream): BulkWriter[ObjectNode] = {
53 |     val dayBulkWriter = new DayBulkWriter
54 |     dayBulkWriter.DayBulkWriter(out)
55 | 
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/AsyncHbaseRequest.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import com.venn.common.Common;
 5 | import org.apache.flink.formats.json.JsonNodeDeserializationSchema;
 6 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
 7 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
11 | 
12 | import java.util.concurrent.TimeUnit;
13 | 
14 | 
15 | public class AsyncHbaseRequest {
16 | 
17 |     public static void main(String[] args) throws Exception {
18 | 
19 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
20 |         FlinkKafkaConsumer<ObjectNode> source = new FlinkKafkaConsumer<>("async", new JsonNodeDeserializationSchema(), Common.getProp());
21 | 
22 |         // 接收kafka数据，转为User 对象
23 |         DataStream<AsyncUser> input = env.addSource(source).map(value -> {
24 |             String id = value.get("id").asText();
25 |             String username = value.get("username").asText();
26 |             String password = value.get("password").asText();
27 | 
28 |             return new AsyncUser(id, username, password);
29 |         });
30 |         // 异步IO 获取hbase, timeout 时间 1s，容量 100（超过100个请求，会反压上游节点）
31 |         DataStream async = AsyncDataStream.unorderedWait(input, new AsyncFunctionForHbaseJava(), 1000, TimeUnit.MICROSECONDS, 100);
32 | 
33 |         async.map(user -> {
34 | 
35 |             return new Gson().toJson(user).toString();
36 |         })
37 |                 .print();
38 | 
39 |         env.execute("asyncForHbase");
40 | 
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/jdbcOutput/MysqlOutputDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.jdbcOutput
 2 | 
 3 | import java.io.File
 4 | 
 5 | import com.venn.common.Common
 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 7 | import org.apache.flink.api.scala._
 8 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
 9 | import org.apache.flink.streaming.api.functions.ProcessFunction
10 | import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
11 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
12 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
13 | import org.apache.flink.util.Collector
14 | 
15 | /**
16 |  * 侧边输出：This operation can be useful when you want to split a stream of data
17 |  */
18 | object MysqlOutputDemo {
19 | 
20 |   def main(args: Array[String]): Unit = {
21 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
22 |     env.setParallelism(1)
23 |     env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
24 |     if ("/".equals(File.separator)) {
25 |       val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
26 |       env.setStateBackend(backend)
27 |       env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
28 |     } else {
29 |       env.setMaxParallelism(1)
30 |       env.setParallelism(1)
31 |     }
32 | 
33 |     val source = new FlinkKafkaConsumer[String]("mysql_output", new SimpleStringSchema, Common.getProp)
34 |     source.setStartFromLatest()
35 |     env.addSource(source)
36 |       .map(li => {
37 |         val tmp = li.split(",")
38 |         new User(tmp(0), tmp(1), tmp(2) toInt, tmp(3))
39 |       })
40 |       //        .addSink(new MysqlSink1)
41 |       .writeUsingOutputFormat(new MysqlSink1)
42 | 
43 |     env.execute("msqlOutput")
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/intervalJoin/IntervalJoinKafkaKeyMaker.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.intervalJoin
 2 | 
 3 | import com.google.gson.{Gson, GsonBuilder, JsonObject}
 4 | 
 5 | import java.text.SimpleDateFormat
 6 | import com.venn.common.Common
 7 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
 8 | 
 9 | 
10 | /**
11 |   * test data maker
12 |   */
13 | 
14 | object IntervalJoinKafkaKeyMaker {
15 |   val topic = "async"
16 | 
17 |   def main(args: Array[String]): Unit = {
18 | 
19 |     while (true) {
20 | 
21 |       left("topic_left")
22 |       right("topic_right")
23 |       Thread.sleep(500)
24 |     }
25 |   }
26 | 
27 |   val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
28 | 
29 |   var idLeft = 0
30 | 
31 |   def left(topic: String) = {
32 |     val producer = new KafkaProducer[String, String](Common.getProp)
33 |     idLeft = idLeft + 1
34 |     val map = Map("id" -> idLeft, "name" -> ("venn" + System.currentTimeMillis()), "date" -> sdf.format(System.currentTimeMillis()))
35 |     val gson = new GsonBuilder().create();
36 |     gson.toJson(map);
37 | 
38 |     println("left : " + gson.toString())
39 |     val msg = new ProducerRecord[String, String](topic, gson.toString())
40 |     producer.send(msg)
41 |     producer.flush()
42 |   }
43 | 
44 |   var idRight = 0
45 | 
46 |   def right(topic: String) = {
47 |     val producer = new KafkaProducer[String, String](Common.getProp)
48 |     idRight = idRight + 1
49 |     val map = Map("id" -> idRight,  "phone" -> ("17713333333" + idRight), "date" -> sdf.format(System.currentTimeMillis()))
50 |     val gson = new GsonBuilder().create();
51 |     gson.toJson(map);
52 |     println("right : \t\t\t\t\t\t\t\t" + gson.toString())
53 |     val msg = new ProducerRecord[String, String](topic, gson.toString())
54 |     producer.send(msg)
55 |     producer.flush()
56 |   }
57 | 
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/jdbcOutput/MysqlSink1.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.jdbcOutput
 2 | 
 3 | import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
 4 | import org.apache.flink.api.common.io.OutputFormat
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.slf4j.{Logger, LoggerFactory}
 7 | 
 8 | class MysqlSink1 extends OutputFormat[User]{
 9 | 
10 |   val logger: Logger = LoggerFactory.getLogger("MysqlSink1")
11 |   var conn: Connection = _
12 |   var ps: PreparedStatement = _
13 |   val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
14 |   val username = "root"
15 |   val password = "123456"
16 |   val driverName = "com.mysql.jdbc.Driver"
17 | 
18 |   override def configure(parameters: Configuration): Unit = {
19 |     // not need
20 |   }
21 | 
22 |   override def open(taskNumber: Int, numTasks: Int): Unit = {
23 |     Class.forName(driverName)
24 |     try {
25 |       Class.forName(driverName)
26 |       conn = DriverManager.getConnection(jdbcUrl, username, password)
27 | 
28 |       // close auto commit
29 |       conn.setAutoCommit(false)
30 |     } catch {
31 |       case e@(_: ClassNotFoundException | _: SQLException) =>
32 |         logger.error("init mysql error")
33 |         e.printStackTrace()
34 |         System.exit(-1);
35 |     }
36 |   }
37 | 
38 |   override def writeRecord(user: User): Unit = {
39 | 
40 |     println("get user : " + user.toString)
41 |     ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
42 |     ps.setString(1, user.username)
43 |     ps.setString(2, user.password)
44 |     ps.setInt(3, user.sex)
45 |     ps.setString(4, user.phone)
46 | 
47 |     ps.execute()
48 |     conn.commit()
49 |   }
50 | 
51 |   override def close(): Unit = {
52 | 
53 |     if (conn != null){
54 |       conn.commit()
55 |       conn.close()
56 |     }
57 |   }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/common/Common.java:
--------------------------------------------------------------------------------
 1 | package com.venn.common;
 2 | 
 3 | 
 4 | import java.util.Properties;
 5 | 
 6 | /**
 7 |  * Created by venn on 19-3-5.
 8 |  */
 9 | public class Common {
10 | 
11 |     public final static String BROKER_LIST = "venn:9092";
12 |     public final static String ZOOKEEPER_QUORUM = "venn";
13 |     public final static String ZOOKEEPER_PORT = "2180";
14 |     public final static String ZOOKEEPER_ZNODE_PARENT = "venn:9092";
15 |     public final static String PULSAR_SERVER = "pulsar://localhost:6650";
16 |     public final static String PULSAR_ADMIN = "http://localhost:8080";
17 |     public final static String CHECK_POINT_DATA_DIR = "hdfs:///home/wuxu/tmp/checkpoint";
18 | //    public final static String CHECK_POINT_DATA_DIR = "file:///out/checkpoint";
19 | //    public final static String CHECK_POINT_DATA_DIR = "hdfs:///venn/checkpoint";
20 | 
21 |     public static Properties prop = null;
22 | 
23 |     public static Properties getProp(String brokerList) {
24 | 
25 |         if (prop == null) {
26 |             prop = new Properties();
27 |             prop.put("bootstrap.servers", brokerList);
28 |             prop.put("request.required.acks", "-1");
29 |             prop.put("auto.offset.reset", "latest");
30 |             prop.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
31 |             prop.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
32 |             prop.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
33 |             prop.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
34 |             prop.put("group.id", "venn");
35 |             prop.put("client.id", "venn");
36 |         }
37 |         return prop;
38 |     }
39 | 
40 |     public static Properties getProp() {
41 | 
42 |         return getProp(BROKER_LIST);
43 |     }
44 | 
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/entity/KafkaSimpleStringRecord.java:
--------------------------------------------------------------------------------
 1 | package com.venn.entity;
 2 | 
 3 | import org.apache.kafka.common.TopicPartition;
 4 | 
 5 | import java.io.Serializable;
 6 | 
 7 | /**
 8 |  * generic string kafka record, ues by @MyKafkaRecordDeserializationSchema
 9 |  */
10 | public class KafkaSimpleStringRecord implements Serializable {
11 |     private static final long serialVersionUID = 4813439951036021779L;
12 |     // kafka topic partition
13 |     private final TopicPartition tp;
14 |     // record kafka offset
15 |     private final long offset;
16 |     // record key
17 |     private final String key;
18 |     // record timestamp
19 |     private final long timestamp;
20 |     // record value
21 |     private final String value;
22 | 
23 | 
24 |     public KafkaSimpleStringRecord(TopicPartition tp, long offset, String key, long timestamp, String value) {
25 |         this.tp = tp;
26 |         this.offset = offset;
27 |         this.key = key;
28 |         this.timestamp = timestamp;
29 |         this.value = value;
30 |     }
31 | 
32 |     public static long getSerialVersionUID() {
33 |         return serialVersionUID;
34 |     }
35 | 
36 |     public TopicPartition getTp() {
37 |         return tp;
38 |     }
39 | 
40 |     public long getOffset() {
41 |         return offset;
42 |     }
43 | 
44 |     public String getKey() {
45 |         return key;
46 |     }
47 | 
48 |     public long getTimestamp() {
49 |         return timestamp;
50 |     }
51 | 
52 |     public String getValue() {
53 |         return value;
54 |     }
55 | 
56 |     @Override
57 |     public String toString() {
58 |         return "MyStringKafkaRecord{" +
59 |                 "tp=topic:" + tp.topic() + ", partition: " + tp.partition() +
60 |                 ", offset=" + offset +
61 |                 ", key='" + key + '\'' +
62 |                 ", timestamp=" + timestamp +
63 |                 ", value='" + value + '\'' +
64 |                 '}';
65 |     }
66 | }


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/RichAsyncFunction.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  *     http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | 
19 | package com.venn.source
20 | 
21 | import org.apache.flink.api.common.functions.AbstractRichFunction
22 | import org.apache.flink.streaming.api.scala.async.AsyncFunction
23 | 
24 | /**
25 |   * Rich variant of [[AsyncFunction]]. As a [[org.apache.flink.api.common.functions.RichFunction]],
26 |   * it gives access to the [[org.apache.flink.api.common.functions.RuntimeContext]] and provides
27 |   * setup and teardown methods.
28 |   *
29 |   * State related apis in [[org.apache.flink.api.common.functions.RuntimeContext]] are not supported
30 |   * yet because the key may get changed while accessing states in the working thread.
31 |   *
32 |   * [[org.apache.flink.api.common.functions.IterationRuntimeContext#getIterationAggregator(String)]]
33 |   * is not supported since the aggregator may be modified by multiple threads.
34 |   *
35 |   * @tparam IN The type of the input value.
36 |   * @tparam OUT The type of the output value.
37 |   */
38 | abstract class RichAsyncFunction[IN, OUT]
39 |   extends AbstractRichFunction
40 |     with AsyncFunction [IN, OUT] {}
41 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/demo/SlotPartitionMaker.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.demo
 2 | 
 3 | import com.google.gson.GsonBuilder
 4 | 
 5 | import java.text.SimpleDateFormat
 6 | import java.util.{Calendar, Date}
 7 | import com.venn.common.Common
 8 | import com.venn.util.MathUtil
 9 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
10 | 
11 | 
12 | /**
13 |   * test data maker
14 |   */
15 | 
16 | object SlotPartitionMaker {
17 | 
18 |   var minute: Int = 1
19 |   val calendar: Calendar = Calendar.getInstance()
20 |   /**
21 |     * 一天时间比较长，不方便观察，将时间改为当前时间，
22 |     * 每次累加10分钟，这样一天只需要144次循环，也就是144秒
23 |     *
24 |     * @return
25 |     */
26 |   def getCreateTime(): String = {
27 |     //    minute = minute + 1
28 |     calendar.add(Calendar.MILLISECOND, 10)
29 |     sdf.format(calendar.getTime)
30 |   }
31 | 
32 |   val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
33 | 
34 |   def main(args: Array[String]): Unit = {
35 | 
36 |     val prop = Common.getProp
37 |     prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
38 |     prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
39 | 
40 |     val producer = new KafkaProducer[String, String](Common.getProp)
41 |     calendar.setTime(new Date())
42 |     println(sdf.format(calendar.getTime))
43 |     var i = 0;
44 |     while (true) {
45 |       val map = Map("id" -> i, "createTime" -> getCreateTime(), "amt" -> (MathUtil.random.nextInt(10) + "." + MathUtil.random.nextInt(10)))
46 |       val gson = new GsonBuilder().create();
47 |       gson.toJson(map);
48 |       println(gson.toString())
49 |       // topic current_day
50 |       val msg = new ProducerRecord[String, String]("slot_partition", gson.toString())
51 |       producer.send(msg)
52 |       producer.flush()
53 |       if (MathUtil.random.nextBoolean()) {
54 |         Thread.sleep(1500)
55 |       } else {
56 |         Thread.sleep(500)
57 | 
58 |       }
59 |       i = i + 1
60 |       //      System.exit(-1)
61 |     }
62 |   }
63 | 
64 | }
65 | 
66 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/entry/OverStockDetail.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.entry;
 2 | 
 3 | import java.math.BigDecimal;
 4 | 
 5 | /**
 6 |  * @Classname OverStockDetail
 7 |  * @Description TODO
 8 |  * @Date 2023/6/8
 9 |  * @Created by venn
10 |  */
11 | public class OverStockDetail implements Stock{
12 | 
13 |     private int id;
14 |     private String fid;
15 |     private String fentryId;
16 |     private String fmaterialId;
17 |     private BigDecimal frealQty;
18 | 
19 |     public OverStockDetail() {
20 |     }
21 | 
22 |     public OverStockDetail(int id, String fid, String fentryid, String fmaterialid, BigDecimal frealqty) {
23 |         this.id = id;
24 |         this.fid = fid;
25 |         this.fentryId = fentryid;
26 |         this.fmaterialId = fmaterialid;
27 |         this.frealQty = frealqty;
28 |     }
29 | 
30 |     public int getId() {
31 |         return id;
32 |     }
33 | 
34 |     public void setId(int id) {
35 |         this.id = id;
36 |     }
37 | 
38 |     public String getFid() {
39 |         return fid;
40 |     }
41 | 
42 |     public void setFid(String fid) {
43 |         this.fid = fid;
44 |     }
45 | 
46 |     public String getFentryId() {
47 |         return fentryId;
48 |     }
49 | 
50 |     public void setFentryId(String fentryId) {
51 |         this.fentryId = fentryId;
52 |     }
53 | 
54 |     public String getFmaterialId() {
55 |         return fmaterialId;
56 |     }
57 | 
58 |     public void setFmaterialId(String fmaterialId) {
59 |         this.fmaterialId = fmaterialId;
60 |     }
61 | 
62 |     public BigDecimal getFrealQty() {
63 |         return frealQty;
64 |     }
65 | 
66 |     public void setFrealQty(BigDecimal frealQty) {
67 |         this.frealQty = frealQty;
68 |     }
69 | 
70 |     @Override
71 |     public String toString() {
72 |         return "OverStockDetail{" +
73 |                 "id=" + id +
74 |                 ", fid='" + fid + '\'' +
75 |                 ", fentryId='" + fentryId + '\'' +
76 |                 ", fmaterialId='" + fmaterialId + '\'' +
77 |                 ", frealQty=" + frealQty +
78 |                 '}';
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/cdcStarrocks/CdcStarMapFunction.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.cdcStarrocks;
 2 | 
 3 | import com.google.gson.JsonElement;
 4 | import com.google.gson.JsonObject;
 5 | import com.google.gson.JsonParser;
 6 | import org.apache.flink.api.common.functions.RichMapFunction;
 7 | import org.apache.flink.configuration.Configuration;
 8 | import org.slf4j.Logger;
 9 | import org.slf4j.LoggerFactory;
10 | 
11 | import java.util.Map;
12 | 
13 | public class CdcStarMapFunction extends RichMapFunction<String, CdcRecord> {
14 | 
15 |     private final static Logger LOG = LoggerFactory.getLogger(CdcStarMapFunction.class);
16 |     private JsonParser parser;
17 | 
18 |     @Override
19 |     public void open(Configuration parameters) throws Exception {
20 |         parser = new JsonParser();
21 |     }
22 | 
23 |     @Override
24 |     public CdcRecord map(String element) throws Exception {
25 | 
26 |         LOG.info("data : {}", element);
27 |         JsonObject object = parser.parse(element).getAsJsonObject();
28 |         String db = object.get("db").getAsString();
29 |         String table = object.get("table").getAsString();
30 |         String op = object.get("operator_type").getAsString();
31 | 
32 |         CdcRecord record = new CdcRecord(db, table, op);
33 | 
34 |         // insert/update
35 |         String dataLocation = "after";
36 |         if ("d".equals(op)) {
37 |             // if op is delete, get before
38 |             dataLocation = "before";
39 |         }
40 | 
41 |         JsonObject data = object.get(dataLocation).getAsJsonObject();
42 | 
43 |         for (Map.Entry<String, JsonElement> entry : data.entrySet()) {
44 | 
45 |             String columnName = entry.getKey();
46 |             String columnValue;
47 |             JsonElement value = entry.getValue();
48 |             if (!value.isJsonNull()) {
49 |                 // if column value is not null, get as string
50 |                 columnValue = value.getAsString();
51 |                 // put column name/value to record.data
52 |                 record.getData().put(columnName, columnValue);
53 |             }
54 | 
55 |         }
56 | 
57 |         return record;
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/mysql/cdc/CommonKafkaSink.java:
--------------------------------------------------------------------------------
 1 | package com.venn.source.mysql.cdc;
 2 | 
 3 | import com.google.gson.JsonObject;
 4 | import com.google.gson.JsonParser;
 5 | import org.apache.flink.configuration.Configuration;
 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 7 | import org.apache.kafka.clients.producer.KafkaProducer;
 8 | import org.apache.kafka.clients.producer.ProducerRecord;
 9 | import org.slf4j.Logger;
10 | import org.slf4j.LoggerFactory;
11 | import java.util.Properties;
12 | 
13 | public class CommonKafkaSink extends RichSinkFunction<String> {
14 | 
15 |     protected static final Logger LOG = LoggerFactory.getLogger(CommonKafkaSink.class);
16 |     private transient KafkaProducer<String, String> kafkaProducer;
17 |     private transient JsonParser parser;
18 |     private final String bootstrapServer;
19 | 
20 |     public CommonKafkaSink(String bootstrapServer) {
21 |         this.bootstrapServer = bootstrapServer;
22 |     }
23 | 
24 | 
25 |     @Override
26 |     public void open(Configuration parameters) {
27 |         Properties prop = new Properties();
28 |         prop.put("bootstrap.servers", bootstrapServer);
29 |         prop.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
30 |         prop.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
31 |         prop.put("request.timeout.ms", "10");
32 |         kafkaProducer = new KafkaProducer<>(prop);
33 |         parser = new JsonParser();
34 | 
35 |     }
36 | 
37 |     @Override
38 |     public void invoke(String element, Context context) {
39 | 
40 |         JsonObject jsonObject = parser.parse(element).getAsJsonObject();
41 |         String db = jsonObject.get("db").getAsString();
42 |         String table = jsonObject.get("table").getAsString();
43 |         // topic 不存在就自动创建
44 |         String topic = db + "_" + table;
45 |         topic = db;
46 |         ProducerRecord<String, String> record = new ProducerRecord<>(topic, element);
47 |         kafkaProducer.send(record);
48 |     }
49 | 
50 |     @Override
51 |     public void close() {
52 |         kafkaProducer.close();
53 |     }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/util/SimpleKafkaRecordDeserializationSchema.java:
--------------------------------------------------------------------------------
 1 | package com.venn.util;
 2 | 
 3 | import com.venn.entity.KafkaSimpleStringRecord;
 4 | import org.apache.flink.api.common.serialization.DeserializationSchema;
 5 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 6 | import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
 7 | import org.apache.flink.util.Collector;
 8 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 9 | import org.apache.kafka.common.TopicPartition;
10 | import org.apache.kafka.common.serialization.Deserializer;
11 | import org.apache.kafka.common.serialization.StringDeserializer;
12 | 
13 | import java.io.IOException;
14 | 
15 | public class SimpleKafkaRecordDeserializationSchema
16 |         implements KafkaRecordDeserializationSchema<KafkaSimpleStringRecord> {
17 |     private static final long serialVersionUID = -3765473065594331694L;
18 |     private transient Deserializer<String> deserializer;
19 | 
20 |     @Override
21 |     public void open(DeserializationSchema.InitializationContext context) throws Exception {
22 | 
23 |     }
24 | 
25 |     @Override
26 |     public void deserialize(
27 |             ConsumerRecord<byte[], byte[]> record, Collector<KafkaSimpleStringRecord> collector)
28 |             throws IOException {
29 |         if (deserializer == null) {
30 |             deserializer = new StringDeserializer();
31 |         }
32 |         long offset = record.offset();
33 |         String key = null;
34 |         if (record.key() != null) {
35 |             key = new String(record.key());
36 |         }
37 |         long timestamp = record.timestamp();
38 | 
39 | 
40 |         // makeup MyStringKafkaRecord
41 |         KafkaSimpleStringRecord myRecord = new KafkaSimpleStringRecord(
42 |                 new TopicPartition(record.topic(), record.partition()), offset, key, timestamp, deserializer.deserialize(record.topic(), record.value()));
43 | 
44 |         collector.collect(myRecord);
45 |     }
46 | 
47 |     @Override
48 |     public TypeInformation<KafkaSimpleStringRecord> getProducedType() {
49 |         return TypeInformation.of(KafkaSimpleStringRecord.class);
50 |     }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/entry/OverStock.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.entry;
 2 | 
 3 | /**
 4 |  * @Classname OverStock
 5 |  * @Description TODO
 6 |  * @Date 2023/6/8
 7 |  * @Created by venn
 8 |  */
 9 | public class OverStock implements Stock{
10 | 
11 |     private int id;
12 |     private long fdate;
13 |     private String fid;
14 |     private String fbillNo;
15 |     private String fcustomerId;
16 |     private String fGjzh;
17 | 
18 |     public OverStock() {
19 |     }
20 | 
21 |     public OverStock(int id, long fdate, String fid, String fbillno, String fcustomerid, String fGjzh) {
22 |         this.id = id;
23 |         this.fdate = fdate;
24 |         this.fid = fid;
25 |         this.fbillNo = fbillno;
26 |         this.fcustomerId = fcustomerid;
27 |         this.fGjzh = fGjzh;
28 |     }
29 | 
30 |     public int getId() {
31 |         return id;
32 |     }
33 | 
34 |     public void setId(int id) {
35 |         this.id = id;
36 |     }
37 | 
38 |     public long getFdate() {
39 |         return fdate;
40 |     }
41 | 
42 |     public void setFdate(long fdate) {
43 |         this.fdate = fdate;
44 |     }
45 | 
46 |     public String getFid() {
47 |         return fid;
48 |     }
49 | 
50 |     public void setFid(String fid) {
51 |         this.fid = fid;
52 |     }
53 | 
54 |     public String getFbillNo() {
55 |         return fbillNo;
56 |     }
57 | 
58 |     public void setFbillNo(String fbillNo) {
59 |         this.fbillNo = fbillNo;
60 |     }
61 | 
62 |     public String getFcustomerId() {
63 |         return fcustomerId;
64 |     }
65 | 
66 |     public void setFcustomerId(String fcustomerId) {
67 |         this.fcustomerId = fcustomerId;
68 |     }
69 | 
70 |     public String getfGjzh() {
71 |         return fGjzh;
72 |     }
73 | 
74 |     public void setfGjzh(String fGjzh) {
75 |         this.fGjzh = fGjzh;
76 |     }
77 | 
78 |     @Override
79 |     public String toString() {
80 |         return "OverStock{" +
81 |                 "id=" + id +
82 |                 ", fdate=" + fdate +
83 |                 ", fid='" + fid + '\'' +
84 |                 ", fbillNo='" + fbillNo + '\'' +
85 |                 ", fcustomerId='" + fcustomerId + '\'' +
86 |                 ", fGjzh='" + fGjzh + '\'' +
87 |                 '}';
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/entity/UserLog.java:
--------------------------------------------------------------------------------
 1 | package com.venn.entity;
 2 | 
 3 | import com.google.gson.annotations.SerializedName;
 4 | 
 5 | public class UserLog {
 6 | 
 7 |     @SerializedName("user_id")
 8 |     private String userId;
 9 |     @SerializedName("item_id")
10 |     private String itemId;
11 |     @SerializedName("category_id")
12 |     private String categoryId;
13 |     private String behavior;
14 |     private String ts;
15 |     private Long timestamp;
16 | 
17 |     public UserLog() {
18 |     }
19 | 
20 |     public UserLog(String userId, String itemId, String categoryId, String behavior, String ts) {
21 |         this.userId = userId;
22 |         this.itemId = itemId;
23 |         this.categoryId = categoryId;
24 |         this.behavior = behavior;
25 |         this.ts = ts;
26 |     }
27 | 
28 |     public Long getTimestamp() {
29 |         return timestamp;
30 |     }
31 | 
32 |     public void setTimestamp(Long timestamp) {
33 |         this.timestamp = timestamp;
34 |     }
35 | 
36 |     public String getUserId() {
37 |         return userId;
38 |     }
39 | 
40 |     public void setUserId(String userId) {
41 |         this.userId = userId;
42 |     }
43 | 
44 |     public String getItemId() {
45 |         return itemId;
46 |     }
47 | 
48 |     public void setItemId(String itemId) {
49 |         this.itemId = itemId;
50 |     }
51 | 
52 |     public String getCategoryId() {
53 |         return categoryId;
54 |     }
55 | 
56 |     public void setCategoryId(String categoryId) {
57 |         this.categoryId = categoryId;
58 |     }
59 | 
60 |     public String getBehavior() {
61 |         return behavior;
62 |     }
63 | 
64 |     public void setBehavior(String behavior) {
65 |         this.behavior = behavior;
66 |     }
67 | 
68 |     public String getTs() {
69 |         return ts;
70 |     }
71 | 
72 |     public void setTs(String ts) {
73 |         this.ts = ts;
74 |     }
75 | 
76 |     @Override
77 |     public String toString() {
78 |         return "UserLog{" +
79 |                 "userId='" + userId + '\'' +
80 |                 ", itemId='" + itemId + '\'' +
81 |                 ", categoryId='" + categoryId + '\'' +
82 |                 ", behavior='" + behavior + '\'' +
83 |                 ", ts='" + ts + '\'' +
84 |                 '}';
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/pulsar/PulsarDemo.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.connector.pulsar
 2 | //
 3 | //import java.time.Duration
 4 | //
 5 | //import org.apache.flink.api.common.serialization.SimpleStringSchema
 6 | //import org.apache.flink.connector.pulsar.source.PulsarSource
 7 | //import org.apache.flink.connector.pulsar.source.enumerator.cursor.StartCursor
 8 | //import org.apache.flink.connector.pulsar.source.reader.deserializer.PulsarDeserializationSchema
 9 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
10 | //import org.apache.pulsar.client.api.SubscriptionType
11 | //import org.apache.flink.api.scala._
12 | //import com.venn.common.Common.PULSAR_SERVER
13 | //import com.venn.common.Common.PULSAR_ADMIN
14 | //import org.apache.flink.api.common.eventtime.WatermarkStrategy
15 | //import org.apache.flink.streaming.api.functions.ProcessFunction
16 | //import org.apache.flink.util.Collector
17 | //
18 | //object PulsarDemo {
19 | //
20 | //  def main(args: Array[String]): Unit = {
21 | //    val env = StreamExecutionEnvironment.getExecutionEnvironment
22 | //    env.setParallelism(1)
23 | //
24 | //    val pulsarSource = PulsarSource.builder()
25 | //      .setServiceUrl(PULSAR_SERVER)
26 | //      .setAdminUrl(PULSAR_ADMIN)
27 | //      .setStartCursor(StartCursor.earliest())
28 | //      .setTopics("user_log")
29 | //      .setDeserializationSchema(PulsarDeserializationSchema.flinkSchema(new SimpleStringSchema()))
30 | //      .setSubscriptionName("my-subscription")
31 | //      .setSubscriptionType(SubscriptionType.Exclusive)
32 | //      .build()
33 | //
34 | //    //env.fromSource(pulsarSource, WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofSeconds(5)))
35 | //    env.fromSource(pulsarSource, WatermarkStrategy.noWatermarks(), "pulsar")
36 | //      .map(str => str)
37 | //      .process(new ProcessFunction[String, String] {
38 | //        var count: Long = 0
39 | //
40 | //        override def processElement(element: String, ctx: ProcessFunction[String, String]#Context, out: Collector[String]): Unit = {
41 | //          count += 1
42 | //          if (count % 1000 == 0) {
43 | //            println("count: ", count)
44 | //          }
45 | //        }
46 | //      })
47 | //
48 | //
49 | //    env.execute("pulsar demo")
50 | //
51 | //
52 | //  }
53 | //
54 | //}
55 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/util/CheckpointUtil.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.util
 2 | 
 3 | import org.apache.flink.runtime.state.StateBackend
 4 | import org.apache.flink.runtime.state.hashmap.HashMapStateBackend
 5 | import org.apache.flink.streaming.api.CheckpointingMode
 6 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 7 | import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend
 8 | 
 9 | object CheckpointUtil {
10 | 
11 |   def setCheckpoint(env: StreamExecutionEnvironment, stateBackendStr: String, checkpointPath: String, interval: Long, timeOut: Long) = {
12 |     var stateBackend: StateBackend = null
13 |     if ("rocksdb".equals(stateBackendStr)) {
14 |       stateBackend = new EmbeddedRocksDBStateBackend(true)
15 |     } else {
16 |       stateBackend = new HashMapStateBackend()
17 |     }
18 |     env.setStateBackend(stateBackend)
19 |     // checkpoint
20 |     env.enableCheckpointing(interval * 1000, CheckpointingMode.EXACTLY_ONCE)
21 |     env.getCheckpointConfig.setCheckpointTimeout(timeOut * 1000)
22 |     // Flink 1.11.0 new feature: Enables unaligned checkpoints
23 |     env.getCheckpointConfig.enableUnalignedCheckpoints()
24 |     // checkpoint dir
25 |     env.getCheckpointConfig.setCheckpointStorage(checkpointPath)
26 | 
27 |   }
28 | 
29 |   /**
30 |    *
31 |    * @param env
32 |    * @param stateBackendStr state backend: rocksdb, other
33 |    * @param checkpointPath  checkpoint path
34 |    * @param interval        second
35 |    */
36 |   def setCheckpoint(env: StreamExecutionEnvironment, stateBackendStr: String, checkpointPath: String, interval: Long) = {
37 |     var stateBackend: StateBackend = null
38 |     if ("rocksdb".equals(stateBackendStr)) {
39 |       stateBackend = new EmbeddedRocksDBStateBackend(true)
40 |     } else {
41 |       stateBackend = new HashMapStateBackend()
42 |     }
43 |     env.setStateBackend(stateBackend)
44 |     // checkpoint
45 |     env.enableCheckpointing(interval * 1000, CheckpointingMode.EXACTLY_ONCE)
46 |     //    env.getCheckpointConfig.setCheckpointTimeout(timeOut * 1000)
47 |     // Flink 1.11.0 new feature: Enables unaligned checkpoints
48 |     env.getCheckpointConfig.enableUnalignedCheckpoints()
49 |     // checkpoint dir
50 |     env.getCheckpointConfig.setCheckpointStorage(checkpointPath)
51 | 
52 |   }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/dynamicWindow/DyTumblingWindow.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.dynamicWindow;
 2 | 
 3 | import org.apache.flink.api.common.ExecutionConfig;
 4 | import org.apache.flink.api.common.typeutils.TypeSerializer;
 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 6 | import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner;
 7 | import org.apache.flink.streaming.api.windowing.triggers.EventTimeTrigger;
 8 | import org.apache.flink.streaming.api.windowing.triggers.Trigger;
 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
10 | 
11 | import java.util.Collection;
12 | import java.util.Collections;
13 | 
14 | /**
15 |  * flink dynamic window
16 |  */
17 | public class DyTumblingWindow extends WindowAssigner<Object, TimeWindow> {
18 | 
19 |     private final long size;
20 | 
21 |     private final long offset;
22 | 
23 |     protected DyTumblingWindow(long size, long offset) {
24 |         if (Math.abs(offset) >= size) {
25 |             throw new IllegalArgumentException("TumblingEventTimeWindows parameters must satisfy abs(offset) < size");
26 |         }
27 | 
28 |         this.size = size;
29 |         this.offset = offset;
30 |     }
31 | 
32 |     @Override
33 |     public Collection<TimeWindow> assignWindows(Object element, long timestamp, WindowAssignerContext context) {
34 | 
35 |         if (timestamp > Long.MIN_VALUE) {
36 |             long start = TimeWindow.getWindowStartWithOffset(timestamp, offset, size);
37 |             return Collections.singletonList(new TimeWindow(start, start + size));
38 |         } else {
39 |             throw new RuntimeException("Record has Long.MIN_VALUE timestamp (= no timestamp marker). " +
40 |                     "Is the time characteristic set to 'ProcessingTime', or did you forget to call " +
41 |                     "'DataStream.assignTimestampsAndWatermarks(...)'?");
42 |         }
43 |     }
44 | 
45 |     @Override
46 |     public Trigger<Object, TimeWindow> getDefaultTrigger(StreamExecutionEnvironment env) {
47 |         return EventTimeTrigger.create();
48 |     }
49 | 
50 |     @Override
51 |     public TypeSerializer<TimeWindow> getWindowSerializer(ExecutionConfig executionConfig) {
52 |         return null;
53 |     }
54 | 
55 |     @Override
56 |     public boolean isEventTime() {
57 |         return false;
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/demo/KafkaJoinRedisDemo.java:
--------------------------------------------------------------------------------
 1 | package com.venn.demo;
 2 | 
 3 | import com.venn.entity.KafkaSimpleStringRecord;
 4 | import com.venn.util.SimpleKafkaRecordDeserializationSchema;
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 6 | import org.apache.flink.api.common.functions.MapFunction;
 7 | import org.apache.flink.connector.kafka.source.KafkaSource;
 8 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
 9 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | 
13 | import java.util.concurrent.TimeUnit;
14 | 
15 | public class KafkaJoinRedisDemo {
16 | 
17 |     private static final String uri = "redis://localhost";
18 |     private static final String bootstrapServer = "localhost:9092";
19 |     private static final String topic = "user_log";
20 | 
21 |     public static void main(String[] args) throws Exception {
22 | 
23 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         env.setParallelism(1);
25 | 
26 |         // kafka source
27 |         KafkaSource<KafkaSimpleStringRecord> kafkaSource = KafkaSource
28 |                 .<KafkaSimpleStringRecord>builder()
29 |                 .setBootstrapServers(bootstrapServer)
30 |                 .setDeserializer(new SimpleKafkaRecordDeserializationSchema())
31 |                 .setStartingOffsets(OffsetsInitializer.latest())
32 |                 .setTopics(topic)
33 |                 .build();
34 | 
35 | 
36 |         // get value
37 |         SingleOutputStreamOperator<String> source = env
38 |                 .fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSource")
39 |                 .map((MapFunction<KafkaSimpleStringRecord, String>) value -> value.getValue());
40 | 
41 |         // async redis
42 |         AsyncRedisFunction asyncRedisFunction = new AsyncRedisFunction(uri);
43 |         SingleOutputStreamOperator<String> asyncStream = AsyncDataStream
44 |                 .unorderedWait(source, asyncRedisFunction, 5L, TimeUnit.SECONDS);
45 | 
46 |         // print result
47 |         asyncStream
48 |                 .print("match redis");
49 | 
50 |         env.execute("kafkaJoinRedis");
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/mysql/cdc/Binlog.java:
--------------------------------------------------------------------------------
  1 | package com.venn.source.mysql.cdc;
  2 | 
  3 | public class Binlog {
  4 |     private String host;
  5 |     private int port;
  6 |     private String db;
  7 |     private String table;
  8 |     private String file;
  9 |     private Long pos;
 10 |     private Long tsSec;
 11 |     private String operatorType;
 12 |     private String data;
 13 |     private String source;
 14 | 
 15 |     public Binlog() {
 16 |     }
 17 | 
 18 |     public Binlog(String host, int port) {
 19 |         this.host = host;
 20 |         this.port = port;
 21 |     }
 22 | 
 23 |     public Long getTsSec() {
 24 |         return tsSec;
 25 |     }
 26 | 
 27 |     public void setTsSec(Long tsSec) {
 28 |         this.tsSec = tsSec;
 29 |     }
 30 | 
 31 |     public String getHost() {
 32 |         return host;
 33 |     }
 34 | 
 35 |     public void setHost(String host) {
 36 |         this.host = host;
 37 |     }
 38 | 
 39 |     public int getPort() {
 40 |         return port;
 41 |     }
 42 | 
 43 |     public void setPort(int port) {
 44 |         this.port = port;
 45 |     }
 46 | 
 47 |     public String getSource() {
 48 |         return source;
 49 |     }
 50 | 
 51 |     public void setSource(String source) {
 52 |         this.source = source;
 53 |     }
 54 | 
 55 |     public String getDb() {
 56 |         return db;
 57 |     }
 58 | 
 59 |     public void setDb(String db) {
 60 |         this.db = db;
 61 |     }
 62 | 
 63 |     public String getTable() {
 64 |         return table;
 65 |     }
 66 | 
 67 |     public void setTable(String table) {
 68 |         this.table = table;
 69 |     }
 70 | 
 71 |     public String getFile() {
 72 |         return file;
 73 |     }
 74 | 
 75 |     public void setFile(String file) {
 76 |         this.file = file;
 77 |     }
 78 | 
 79 |     public Long getPos() {
 80 |         return pos;
 81 |     }
 82 | 
 83 |     public void setPos(Long pos) {
 84 |         this.pos = pos;
 85 |     }
 86 | 
 87 |     public String getOperatorType() {
 88 |         return operatorType;
 89 |     }
 90 | 
 91 |     public void setOperatorType(String operatorType) {
 92 |         this.operatorType = operatorType;
 93 |     }
 94 | 
 95 |     public String getData() {
 96 |         return data;
 97 |     }
 98 | 
 99 |     public void setData(String data) {
100 |         this.data = data;
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/UserClue/UserClue.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.question.UserClue
 2 | //
 3 | //import com.venn.entity.KafkaSimpleStringRecord
 4 | //import com.venn.util.SimpleKafkaRecordDeserializationSchema
 5 | //import org.apache.flink.api.common.eventtime.WatermarkStrategy
 6 | //import org.apache.flink.api.common.functions.RichMapFunction
 7 | //import org.apache.flink.connector.kafka.source.KafkaSource
 8 | //import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
 9 | //import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema
10 | //import org.apache.flink.formats.json.JsonNodeDeserializationSchema
11 | //import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
12 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
13 | //import org.apache.flink.streaming.connectors.kafka.internals.KafkaDeserializationSchemaWrapper
14 | //import org.apache.flink.streaming.util.serialization.JSONKeyValueDeserializationSchema
15 | //import org.apache.flink.api.scala._
16 | //
17 | //import java.util
18 | //
19 | //object UserClue {
20 | //  val bootstrapServer = "localhost:9092"
21 | //  val topic = "user_log"
22 | //  val sinkTopic = "user_log_sink"
23 | //
24 | //  def main(args: Array[String]): Unit = {
25 | //
26 | //    val env = StreamExecutionEnvironment.getExecutionEnvironment
27 | //    env.setParallelism(1)
28 | //
29 | //    val source = KafkaSource
30 | //      .builder()[ObjectNode]
31 | //      .setBootstrapServers(bootstrapServer)
32 | //      .setGroupId("MyGroup")
33 | //      .setClientIdPrefix("aa")
34 | //      .setTopics(util.Arrays.asList("user_log"))
35 | //            .setDeserializer(KafkaRecordDeserializationSchema.of(new JSONKeyValueDeserializationSchema(true)))
36 | ////      .setDeserializer(new KafkaDeserializationSchemaWrapper())
37 | //      //      .setStartingOffsets(OffsetsInitializer.earliest())
38 | //      .setStartingOffsets(OffsetsInitializer.latest())
39 | //      .build()
40 | //
41 | //    env.fromSource(source, WatermarkStrategy.noWatermarks(), "source")
42 | //      .map(new RichMapFunction[ObjectNode, ObjectNode] {
43 | //        override def map(node: ObjectNode): ObjectNode = {
44 | //
45 | //         val userId = node.get("user_id")
46 | //
47 | //          node
48 | //        }
49 | //      })
50 | //
51 | //
52 | //
53 | //  }
54 | //
55 | //}
56 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/jdbcOutput/MysqlSink.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.jdbcOutput
 2 | 
 3 | import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
 4 | import org.apache.flink.configuration.Configuration
 5 | import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
 6 | import org.slf4j.{Logger, LoggerFactory}
 7 | 
 8 | class MysqlSink extends RichSinkFunction[User] {
 9 | 
10 |   val logger: Logger = LoggerFactory.getLogger("MysqlSink")
11 |   var conn: Connection = _
12 |   var ps: PreparedStatement = _
13 |   val jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true"
14 |   val username = "root"
15 |   val password = "123456"
16 |   val driverName = "com.mysql.jdbc.Driver"
17 | 
18 |   override def open(parameters: Configuration): Unit = {
19 | 
20 |     Class.forName(driverName)
21 |     try {
22 |       Class.forName(driverName)
23 |       conn = DriverManager.getConnection(jdbcUrl, username, password)
24 | 
25 |       // close auto commit
26 |       conn.setAutoCommit(false)
27 |     } catch {
28 |       case e@(_: ClassNotFoundException | _: SQLException) =>
29 |         logger.error("init mysql error")
30 |         e.printStackTrace()
31 |         System.exit(-1);
32 |     }
33 |   }
34 | 
35 |   override def invoke(user: User, context: SinkFunction.Context): Unit = {
36 |     println("get user : " + user.toString)
37 |     ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
38 |     ps.setString(1, user.username)
39 |     ps.setString(2, user.password)
40 |     ps.setInt(3, user.sex)
41 |     ps.setString(4, user.phone)
42 | 
43 |     ps.execute()
44 |     conn.commit()
45 |   }
46 | 
47 |   /**
48 |    * 吞吐量不够话，可以将数据暂存在状态中，批量提交的方式提高吞吐量（如果oom，可能就是数据量太大，资源没有及时释放导致的）
49 |    *
50 |    */
51 |   //  override def invoke(user: User, context: SinkFunction.Context[_]): Unit = {
52 |   //    println("get user : " + user.toString)
53 |   //    ps = conn.prepareStatement("insert into async.user(username, password, sex, phone) values(?,?,?,?)")
54 |   //    ps.setString(1, user.username)
55 |   //    ps.setString(2, user.password)
56 |   //    ps.setInt(3, user.sex)
57 |   //    ps.setString(4, user.phone)
58 |   //
59 |   //    ps.execute()
60 |   //    conn.commit()
61 |   //  }
62 | 
63 | 
64 |   override def close(): Unit = {
65 |     if (conn != null) {
66 |       conn.commit()
67 |       conn.close()
68 |     }
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/entry/StockList.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.entry;
 2 | 
 3 | /**
 4 |  * @Classname StockList
 5 |  * @Description TODO
 6 |  * @Date 2023/6/8
 7 |  * @Created by venn
 8 |  */
 9 | public class StockList {
10 | 
11 |     private int id;
12 |     private long createTime;
13 |     private String outStockCode;
14 |     private String createOp;
15 |     private String distributorCode;
16 |     private String inoutType;
17 | 
18 |     public StockList() {
19 |     }
20 | 
21 |     public StockList(int id, long fdate, String fid, String fbillno, String fcustomerid, String fGjzh) {
22 |         this.id = id;
23 |         this.createTime = fdate;
24 |         this.outStockCode = fid;
25 |         this.createOp = fbillno;
26 |         this.distributorCode = fcustomerid;
27 |         this.inoutType = fGjzh;
28 |     }
29 | 
30 |     public int getId() {
31 |         return id;
32 |     }
33 | 
34 |     public void setId(int id) {
35 |         this.id = id;
36 |     }
37 | 
38 |     public long getCreateTime() {
39 |         return createTime;
40 |     }
41 | 
42 |     public void setCreateTime(long createTime) {
43 |         this.createTime = createTime;
44 |     }
45 | 
46 |     public String getOutStockCode() {
47 |         return outStockCode;
48 |     }
49 | 
50 |     public void setOutStockCode(String outStockCode) {
51 |         this.outStockCode = outStockCode;
52 |     }
53 | 
54 |     public String getCreateOp() {
55 |         return createOp;
56 |     }
57 | 
58 |     public void setCreateOp(String createOp) {
59 |         this.createOp = createOp;
60 |     }
61 | 
62 |     public String getDistributorCode() {
63 |         return distributorCode;
64 |     }
65 | 
66 |     public void setDistributorCode(String distributorCode) {
67 |         this.distributorCode = distributorCode;
68 |     }
69 | 
70 |     public String getInoutType() {
71 |         return inoutType;
72 |     }
73 | 
74 |     public void setInoutType(String inoutType) {
75 |         this.inoutType = inoutType;
76 |     }
77 | 
78 |     @Override
79 |     public String toString() {
80 |         return "StockList{" +
81 |                 "id=" + id +
82 |                 ", createTime=" + createTime +
83 |                 ", outStockCode='" + outStockCode + '\'' +
84 |                 ", createOp='" + createOp + '\'' +
85 |                 ", distributorCode='" + distributorCode + '\'' +
86 |                 ", inoutType='" + inoutType + '\'' +
87 |                 '}';
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/util/HttpClientUtil.java:
--------------------------------------------------------------------------------
 1 | package com.venn.util;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.IOException;
 5 | import java.io.InputStream;
 6 | import java.io.InputStreamReader;
 7 | import java.net.HttpURLConnection;
 8 | import java.net.MalformedURLException;
 9 | import java.net.URL;
10 | 
11 | public class HttpClientUtil {
12 | 
13 |     public static String doGet(String httpurl) throws IOException {
14 |         HttpURLConnection connection = null;
15 |         InputStream is = null;
16 |         BufferedReader br = null;
17 |         String result = null;// 返回结果字符串
18 |         try {
19 |             // 创建远程url连接对象
20 |             URL url = new URL(httpurl);
21 |             // 通过远程url连接对象打开一个连接，强转成httpURLConnection类
22 |             connection = (HttpURLConnection) url.openConnection();
23 |             // 设置连接方式：get
24 |             connection.setRequestMethod("GET");
25 |             // 设置连接主机服务器的超时时间：15000毫秒
26 |             connection.setConnectTimeout(15000);
27 |             // 设置读取远程返回的数据时间：60000毫秒
28 |             connection.setReadTimeout(60000);
29 |             // 发送请求
30 |             connection.connect();
31 |             // 通过connection连接，获取输入流
32 |             if (connection.getResponseCode() == 200) {
33 |                 is = connection.getInputStream();
34 |                 // 封装输入流is，并指定字符集
35 |                 br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
36 |                 // 存放数据
37 |                 StringBuffer sbf = new StringBuffer();
38 |                 String temp = null;
39 |                 while ((temp = br.readLine()) != null) {
40 |                     sbf.append(temp);
41 |                     sbf.append("\r\n");
42 |                 }
43 |                 result = sbf.toString();
44 |             }
45 |         } catch (MalformedURLException e) {
46 |             e.printStackTrace();
47 |         } catch (IOException e) {
48 |             e.printStackTrace();
49 |         } finally {
50 |             // 关闭资源
51 |             if (null != br) {
52 |                 try {
53 |                     br.close();
54 |                 } catch (IOException e) {
55 |                     e.printStackTrace();
56 |                 }
57 |             }
58 | 
59 |             if (null != is) {
60 |                 try {
61 |                     is.close();
62 |                 } catch (IOException e) {
63 |                     e.printStackTrace();
64 |                 }
65 |             }
66 | 
67 |             connection.disconnect();// 关闭远程连接
68 |         }
69 | 
70 |         return result;
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/stock/entry/StockListDetail.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.stock.entry;
 2 | 
 3 | import java.math.BigDecimal;
 4 | 
 5 | /**
 6 |  * @Classname StockListDetail
 7 |  * @Description TODO
 8 |  * @Date 2023/6/8
 9 |  * @Created by venn
10 |  */
11 | public class StockListDetail {
12 | 
13 |    private int id;
14 |     private long createTime;
15 |     private String outStockCode;
16 |     private String productCode;
17 |     private String createOp;
18 |     private BigDecimal outStockNum;
19 | 
20 |     public StockListDetail() {
21 |     }
22 | 
23 |     public StockListDetail(int id, long createTime, String outStockCode, String productCode, String createOp, BigDecimal outStockNum) {
24 |         this.id = id;
25 |         this.createTime = createTime;
26 |         this.outStockCode = outStockCode;
27 |         this.productCode = productCode;
28 |         this.createOp = createOp;
29 |         this.outStockNum = outStockNum;
30 |     }
31 | 
32 |     public int getId() {
33 |         return id;
34 |     }
35 | 
36 |     public void setId(int id) {
37 |         this.id = id;
38 |     }
39 | 
40 |     public long getCreateTime() {
41 |         return createTime;
42 |     }
43 | 
44 |     public void setCreateTime(long createTime) {
45 |         this.createTime = createTime;
46 |     }
47 | 
48 |     public String getOutStockCode() {
49 |         return outStockCode;
50 |     }
51 | 
52 |     public void setOutStockCode(String outStockCode) {
53 |         this.outStockCode = outStockCode;
54 |     }
55 | 
56 |     public String getProductCode() {
57 |         return productCode;
58 |     }
59 | 
60 |     public void setProductCode(String productCode) {
61 |         this.productCode = productCode;
62 |     }
63 | 
64 |     public String getCreateOp() {
65 |         return createOp;
66 |     }
67 | 
68 |     public void setCreateOp(String createOp) {
69 |         this.createOp = createOp;
70 |     }
71 | 
72 |     public BigDecimal getOutStockNum() {
73 |         return outStockNum;
74 |     }
75 | 
76 |     public void setOutStockNum(BigDecimal outStockNum) {
77 |         this.outStockNum = outStockNum;
78 |     }
79 | 
80 |     @Override
81 |     public String toString() {
82 |         return "StockListDetail{" +
83 |                 "id=" + id +
84 |                 ", createTime=" + createTime +
85 |                 ", outStockCode='" + outStockCode + '\'' +
86 |                 ", productCode='" + productCode + '\'' +
87 |                 ", createOp='" + createOp + '\'' +
88 |                 ", outStockNum=" + outStockNum +
89 |                 '}';
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/trigger/ProcessWindowForTrigger.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.trigger
 2 | 
 3 | import java.io.File
 4 | import java.text.SimpleDateFormat
 5 | import com.venn.common.Common
 6 | import com.venn.util.CheckpointUtil
 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 8 | import org.apache.flink.api.scala._
 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
10 | import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows
12 | import org.apache.flink.streaming.api.windowing.time.Time
13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
14 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
15 | import org.apache.flink.util.Collector
16 | import org.slf4j.LoggerFactory
17 | 
18 | /**
19 |  * for test CountAndContinuousProcessTimeTrigger
20 |  *
21 |  */
22 | object ProcessWindowDemoForTrigger {
23 |   val logger = LoggerFactory.getLogger(this.getClass)
24 | 
25 |   def main(args: Array[String]): Unit = {
26 |     // environment
27 |     val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
28 |     env.setParallelism(1)
29 |     if ("\\".equals(File.pathSeparator)) {
30 |       //      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
31 |       //      env.setStateBackend(rock)
32 |       // checkpoint interval
33 |       //      env.enableCheckpointing(10000)
34 |       CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
35 |     }
36 | 
37 |     val topic = "current_day"
38 |     val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
39 | 
40 |     val kafkaSource = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
41 |     val stream = env.addSource(kafkaSource)
42 |       .map(s => {
43 |         s
44 |       })
45 |       .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(60)))
46 |       .trigger(CountAndTimeTrigger.of(10, Time.seconds(10)))
47 |       .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
48 | 
49 |         override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {
50 | 
51 |           var count = 0
52 | 
53 |           elements.iterator.foreach(s => {
54 |             count += 1
55 |           })
56 |           logger.info("this trigger have : {} item", count)
57 |         }
58 | 
59 |       })
60 | 
61 |     // execute job
62 |     env.execute(this.getClass.getName)
63 |   }
64 | 
65 | }
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/dynamicWindow/DyProcessWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.dynamicWindow
 2 | 
 3 | import java.text.SimpleDateFormat
 4 | import java.util
 5 | 
 6 | import com.google.gson.Gson
 7 | import org.apache.flink.configuration.Configuration
 8 | import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
 9 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
10 | import org.apache.flink.util.Collector
11 | import org.slf4j.LoggerFactory
12 | 
13 | class DyProcessWindowFunction() extends ProcessWindowFunction[(DataEntity, Command), String, String, TimeWindow] {
14 | 
15 |   val logger = LoggerFactory.getLogger("DyProcessWindowFunction")
16 |   var gson: Gson = _
17 | 
18 | 
19 |   override def open(parameters: Configuration): Unit = {
20 |     gson = new Gson()
21 |   }
22 | 
23 |   override def process(key: String, context: Context, elements: Iterable[(DataEntity, Command)], out: Collector[String]): Unit = {
24 |     // start-end
25 |     val taskId = elements.head._2.taskId
26 |     val method = elements.head._2.method
27 |     val targetAttr = elements.head._2.targetAttr
28 |     val periodStartTime = context.window.getStart
29 |     val periodEndTime = context.window.getEnd
30 | 
31 |     var value: Double = 0d
32 |     method match {
33 |       case "sum" =>
34 |         value = 0d
35 |       case "min" =>
36 |         value = Double.MaxValue
37 |       case "max" =>
38 |         value = Double.MinValue
39 |       case _ =>
40 |         logger.warn("input method exception")
41 |         return
42 |     }
43 | 
44 |     val it = elements.toIterator
45 |     while (it.hasNext) {
46 |       val currentValue = it.next()._1.value
47 |       method match {
48 |         case "sum" =>
49 |           value += currentValue
50 |         case "count" =>
51 |           value += 1
52 |         case "min" =>
53 |           if (currentValue < value) {
54 |             value = currentValue
55 |           }
56 |         case "max" =>
57 |           if (currentValue > value) {
58 |             value = currentValue
59 |           }
60 |         case _ =>
61 |       }
62 |     }
63 | 
64 |     val sdf = new SimpleDateFormat("HH:mm:ss")
65 |     val resultMap = new util.HashMap[String, String]
66 |     resultMap.put("taskId", taskId)
67 |     resultMap.put("method", method)
68 |     resultMap.put("targetAttr", targetAttr)
69 |     resultMap.put("periodStartTime", sdf.format(periodStartTime))
70 |     resultMap.put("periodEndTime", sdf.format(periodEndTime))
71 |     resultMap.put("value", value.toString)
72 | 
73 |     out.collect(gson.toJson(resultMap))
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/mysql/cdc/MySqlBinlogSourceExample.java:
--------------------------------------------------------------------------------
 1 | package com.venn.source.mysql.cdc;
 2 | 
 3 | import com.ververica.cdc.connectors.mysql.source.MySqlSource;
 4 | import com.ververica.cdc.connectors.mysql.table.StartupOptions;
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 7 | 
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * mysql cdc demo
12 |  */
13 | public class MySqlBinlogSourceExample {
14 |     public static void main(String[] args) throws Exception {
15 | 
16 |         String ip = "10.201.0.166";
17 |         int port = 3306;
18 |         String dbReg = "deepexi.*";
19 |         String tableReg = "[deepexi|dolphinscheduler].*";
20 |         String user = "root";
21 |         String pass = "daas2020";
22 | 
23 |         // caev
24 |         ip = "10.1.8.43";
25 |         dbReg = "order_pro";
26 |         tableReg = "order_opay_info";
27 |         pass = "enc(1C0F4C32D822B87CB4D8AC91246BFD64)";
28 | 
29 | 
30 | 
31 | 
32 |         String bootstrapServer = "dcmp12:9092";
33 | 
34 |         if (args.length > 6) {
35 |             ip = args[0];
36 |             port = Integer.parseInt(args[1]);
37 |             dbReg = args[2];
38 | //            tableReg = args[3];
39 |             user = args[4];
40 |             pass = args[5];
41 |         }
42 | 
43 | 
44 |         Properties prop = new Properties();
45 |         MySqlSource<String> sourceFunction = MySqlSource.<String>builder()
46 |                 .hostname(ip)
47 |                 .port(port)
48 |                 // 获取两个数据库的所有表
49 |                 .databaseList(dbReg)
50 |                 .tableList(tableReg)
51 |                 .username(user)
52 |                 .password(pass)
53 |                 .startupOptions(StartupOptions.latest())
54 |                 // 自定义 解析器，讲数据解析成 json
55 |                 .deserializer(new CommonStringDebeziumDeserializationSchema(ip, port))
56 |                 .debeziumProperties(prop)
57 |                 .build();
58 | 
59 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
60 |         env.setParallelism(1);
61 |         env
62 |                 .fromSource(sourceFunction, WatermarkStrategy.noWatermarks(), "cdc")
63 |                 .map(str -> str)
64 |                 .filter(str -> str.contains("DD012209160741922731"))
65 |                 .print();
66 |                 // 将数据发送到不同的 topic
67 | //                .addSink(new CommonKafkaSink(bootstrapServer))
68 | //                .setParallelism(1);
69 | 
70 |         env.execute();
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/AsyncFunctionForMysqlJava.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | import org.apache.flink.configuration.Configuration;
 4 | import org.apache.flink.streaming.api.functions.async.ResultFuture;
 5 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | import java.util.ArrayList;
10 | import java.util.Collections;
11 | import java.util.List;
12 | import java.util.concurrent.ExecutorService;
13 | import java.util.concurrent.Executors;
14 | 
15 | public class AsyncFunctionForMysqlJava extends RichAsyncFunction<AsyncUser, AsyncUser> {
16 | 
17 | 
18 |     Logger logger = LoggerFactory.getLogger(AsyncFunctionForMysqlJava.class);
19 |     private transient MysqlClient client;
20 |     private transient ExecutorService executorService;
21 | 
22 |     /**
23 |      * open 方法中初始化链接
24 |      *
25 |      * @param parameters
26 |      * @throws Exception
27 |      */
28 |     @Override
29 |     public void open(Configuration parameters) throws Exception {
30 |         logger.info("async function for mysql java open ...");
31 |         super.open(parameters);
32 | 
33 |         client = new MysqlClient();
34 |         executorService = Executors.newFixedThreadPool(30);
35 |     }
36 | 
37 |     /**
38 |      * use asyncUser.getId async get asyncUser phone
39 |      *
40 |      * @param asyncUser
41 |      * @param resultFuture
42 |      * @throws Exception
43 |      */
44 |     @Override
45 |     public void asyncInvoke(AsyncUser asyncUser, ResultFuture<AsyncUser> resultFuture) throws Exception {
46 | 
47 |         executorService.submit(() -> {
48 |             // submit query
49 |             System.out.println("submit query : " + asyncUser.getId() + "-1-" + System.currentTimeMillis());
50 |             AsyncUser tmp = client.query1(asyncUser);
51 |             // 一定要记得放回 resultFuture，不然数据全部是timeout 的
52 |             resultFuture.complete(Collections.singletonList(tmp));
53 |         });
54 |     }
55 | 
56 |     @Override
57 |     public void timeout(AsyncUser input, ResultFuture<AsyncUser> resultFuture) throws Exception {
58 |         logger.warn("Async function for hbase timeout");
59 |         List<AsyncUser> list = new ArrayList();
60 |         input.setPhone("timeout");
61 |         list.add(input);
62 |         resultFuture.complete(list);
63 |     }
64 | 
65 |     /**
66 |      * close function
67 |      *
68 |      * @throws Exception
69 |      */
70 |     @Override
71 |     public void close() throws Exception {
72 |         logger.info("async function for mysql java close ...");
73 |         super.close();
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/late1mtps/LateTpsProcessWindowFunction.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.late1mtps
 2 | 
 3 | import com.venn.util.DateTimeUtil
 4 | import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
 5 | import org.apache.flink.configuration.Configuration
 6 | import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
 7 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
 8 | import org.apache.flink.util.Collector
 9 | 
10 | import java.util
11 | 
12 | /**
13 |  * 整分钟输出间隔的窗口
14 |  * @param windowSize
15 |  * @param intervalSize
16 |  */
17 | class FixedLateTpsProcessAllWindowFunction(windowSize: Int, intervalSize: Int) extends ProcessAllWindowFunction[(String, Long), (String, String, Int, Double), TimeWindow] {
18 | 
19 |   // for last window, last senond
20 |   var lastWindow: ValueState[Double] = _
21 |   var interval: Int = _
22 | 
23 |   override def open(parameters: Configuration): Unit = {
24 | 
25 |     //    windowState = getRuntimeContext.getMapState(new MapStateDescriptor[Int, Long]("window", classOf[Int], classOf[Long]))
26 |     lastWindow = getRuntimeContext.getState(new ValueStateDescriptor[Double]("last", classOf[Double]))
27 | 
28 |     interval = windowSize / intervalSize
29 |   }
30 | 
31 |   override def process(context: Context, elements: Iterable[(String, Long)], out: Collector[(String, String, Int, Double)]): Unit = {
32 | 
33 |     // get window
34 |     val windowStart = DateTimeUtil.formatMillis(context.window.getStart, DateTimeUtil.YYYY_MM_DD_HH_MM_SS)
35 |     val windowEnd = DateTimeUtil.formatMillis(context.window.getEnd, DateTimeUtil.YYYY_MM_DD_HH_MM_SS)
36 |     var lastWindowCount = lastWindow.value()
37 |     if (lastWindowCount == null) {
38 |       lastWindowCount = 0
39 |     }
40 | 
41 |     // init tps map
42 |     val map = new util.HashMap[Int, Long]()
43 |     for (_ <- 0 to windowSize - 1) {
44 |       map.put(0, 0)
45 |     }
46 | 
47 |     // for each element, get every window size
48 |     elements.foreach((e: (String, Long)) => {
49 |       val current: Int = (e._2 / 1000 % interval).toInt
50 |       map.put(current, map.get(current) + 1)
51 |     })
52 | 
53 |     // for every zero window, out last window count
54 |     out.collect(windowStart, windowEnd, 0, lastWindowCount)
55 |     for (i <- 0 until interval - 1) {
56 |       out.collect(windowStart, windowEnd, i + 1, map.get(i + 1) / 60.0)
57 |     }
58 | 
59 |     // keep window last minute count as next window zero window count
60 |     lastWindow.update(map.get(interval - 1) / 60.0)
61 | 
62 |   }
63 | 
64 |   override def close(): Unit = {
65 |     lastWindow.clear()
66 |   }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/AsyncMysqlRequest.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import com.venn.common.Common;
 5 | import org.apache.flink.formats.json.JsonNodeDeserializationSchema;
 6 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
 7 | import org.apache.flink.streaming.api.TimeCharacteristic;
 8 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
14 | 
15 | import java.util.concurrent.TimeUnit;
16 | 
17 | 
18 | public class AsyncMysqlRequest {
19 | 
20 | 
21 |     public static void main(String[] args) throws Exception {
22 | 
23 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
25 |         FlinkKafkaConsumer<ObjectNode> source = new FlinkKafkaConsumer<>("async", new JsonNodeDeserializationSchema(), Common.getProp());
26 |         source.setStartFromLatest();
27 | 
28 |         // 接收kafka数据，转为User 对象
29 |         DataStream<AsyncUser> input = env.addSource(source)
30 |                 .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.seconds(60)) {
31 |                     @Override
32 |                     public long extractTimestamp(ObjectNode element) {
33 |                         return element.get("id").asLong(0) + 1000;
34 |                     }
35 |                 })
36 |                 .map(value -> {
37 |                     String id = value.get("id").asText();
38 |                     String username = value.get("username").asText();
39 |                     String password = value.get("password").asText();
40 | 
41 |                     return new AsyncUser(id, username, password);
42 |                 });
43 |         // 异步IO 获取mysql数据, timeout 时间 1s，容量 100（超过100个请求，会反压上游节点）
44 |         DataStream async = AsyncDataStream
45 |                 .unorderedWait(input,
46 |                         new AsyncFunctionForMysqlJava(),
47 |                         1000,
48 |                         TimeUnit.MILLISECONDS,
49 |                         10);
50 | 
51 |         async.map(user -> {
52 |             return new Gson().toJson(user).toString();
53 |         })
54 |                 .print();
55 | 
56 |         env.execute("asyncForMysql");
57 | 
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/StreamingFileSinkDemo.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.connector.filesink
 2 | //
 3 | //import java.io.File
 4 | //import java.text.SimpleDateFormat
 5 | //
 6 | //import com.venn.common.Common
 7 | //import org.apache.flink.api.common.serialization.{BulkWriter, SimpleStringEncoder}
 8 | //import org.apache.flink.api.scala._
 9 | //import org.apache.flink.core.fs.Path
10 | //import org.apache.flink.formats.json.JsonNodeDeserializationSchema
11 | //import org.apache.flink.runtime.state.filesystem.FsStateBackend
12 | //import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
13 | //import org.apache.flink.streaming.api.functions.sink.filesystem.StreamingFileSink
14 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
15 | //import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
16 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
17 | //
18 | //object StreamingFileSinkDemo {
19 | //
20 | //  def main(args: Array[String]): Unit = {
21 | //
22 | //    val env = StreamExecutionEnvironment.getExecutionEnvironment
23 | //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
24 | //    if ("/".equals(File.separator)) {
25 | //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
26 | //      env.setStateBackend(backend)
27 | //      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
28 | //    } else {
29 | //      env.setMaxParallelism(1)
30 | //      env.setParallelism(1)
31 | //    }
32 | //
33 | //    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
34 | //    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
35 | //    // row format
36 | //    val sinkRow = StreamingFileSink
37 | //      .forRowFormat(new Path("D:\\idea_out\\rollfilesink"), new SimpleStringEncoder[ObjectNode]("UTF-8"))
38 | //      .withBucketAssigner(new DayBucketAssigner)
39 | //      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
40 | //      .build()
41 | //
42 | //    // use define BulkWriterFactory and DayBucketAssinger
43 | //    val sinkBuck = StreamingFileSink
44 | //      .forBulkFormat(new Path("D:\\idea_out\\rollfilesink"), new DayBulkWriterFactory)
45 | //      .withBucketAssigner(new DayBucketAssigner())
46 | //      .withBucketCheckInterval(60 * 60 * 1000l) // 1 hour
47 | //      .build()
48 | //
49 | //
50 | //    env.addSource(source)
51 | //      .assignAscendingTimestamps(json => {
52 | //        sdf.parse(json.get("date").asText()).getTime
53 | //      })
54 | //      .map(json => {
55 | ////        json.get("date") + "-" + json.toString
56 | //        json
57 | //      })
58 | //      .addSink(sinkBuck)
59 | //
60 | //    env.execute("StreamingFileSink")
61 | //  }
62 | //
63 | //}
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/dynamicWindow/readme.md:
--------------------------------------------------------------------------------
  1 | # Dynamic Window Staticstics
  2 | 
  3 | 
  4 | 用 Flink 实现一个动态窗口统计的功能，使用 flink 1.10.0。实现的功能包括：
  5 | 
  6 | 
  7 | ## 1. 定义命令流Source，格式
  8 | {
  9 |     'taskId':  '任务id'，
 10 |     'targetAttr': '要统计的属性',
 11 |     'method': '统计方法，有 SUM 求和，MAX 最大值， MIN 最小值三种'
 12 |     'periodUnit': '统计周期任务，有 SECOND 和 MINUTE 两个值',
 13 |     'periodLength': '周期的长度，数值',
 14 |     'startTime': '任务开始的UNIX时间戳，单位毫秒'
 15 | }
 16 | 
 17 | 
 18 | 如:
 19 | {
 20 |     'taskId':  'task1'，
 21 |     'targetAttr': 'attr1',
 22 |     'method': 'SUM'
 23 |     'periodUnit': 'MINUTE',
 24 |     'periodLength': '3',
 25 |     'startTime': '1598596980000'
 26 | }
 27 | 表示 从 2020/8/28 14:43:00 开始统计属性attr1每三分钟的和
 28 | 
 29 | 
 30 | 题目要求命令流发送4条数据，固定为下：
 31 | {
 32 |     'taskId':  'task1'，
 33 |     'targetAttr': 'attr1',
 34 |     'method': 'SUM'
 35 |     'periodUnit': 'SECOND',
 36 |     'periodLength': '30',
 37 |     'startTime': '1598596980000'
 38 | }
 39 | 
 40 | 
 41 | {
 42 |     'taskId':  'task2'，
 43 |     'targetAttr': 'attr1',
 44 |     'method': 'SUM'
 45 |     'periodUnit': 'MINUTE',
 46 |     'periodLength': '1',
 47 |     'startTime': '1598596980000'
 48 | }
 49 | 
 50 | 
 51 | {
 52 |     'taskId':  'task3'，
 53 |     'targetAttr': 'attr2',
 54 |     'method': 'MAX'
 55 |     'periodUnit': 'SECOND',
 56 |     'periodLength': '30',
 57 |     'startTime': '1598596980000'
 58 | }
 59 | 
 60 | 
 61 | {
 62 |     'taskId':  'task4'，
 63 |     'targetAttr': 'attr3',
 64 |     'method': 'MAX'
 65 |     'periodUnit': 'MINUTE',
 66 |     'periodLength': '2',
 67 |     'startTime': '1598596980000'
 68 | }
 69 | 
 70 | ```text
 71 | {"taskId":"task1","targetAttr":"attr2","method":"sum","periodUnit":"SECOND","periodLength":"20","startTime":"1598596980000"}
 72 | {"taskId":"task2","targetAttr":"attr1","method":"sum","periodUnit":"MINUTE","periodLength":"1","startTime":"1598596980000"}
 73 | {"taskId":"task3","targetAttr":"attr2","method":"max","periodUnit":"SECOND","periodLength":"30","startTime":"1598596980000"}
 74 | {"taskId":"task4","targetAttr":"attr3","method":"min","periodUnit":"MINUTE","periodLength":"1","startTime":"1599640669628"}
 75 | 
 76 | ```
 77 | 
 78 | 
 79 | ## 2. 定义数据流Source，格式如下：
 80 | {
 81 |     "attr": '属性名',
 82 |     "value": double数值,
 83 |     "time": 'UNIX时间戳，单位毫秒'
 84 | }
 85 | 
 86 | 如:
 87 | {
 88 |     'attr': 'attr1',
 89 |     'value': 35.0,
 90 |     'time': '1598596980000'
 91 | }
 92 | 
 93 | 
 94 | 数据流需要每一秒发送4条数据，属性分别是 attr1、attr2、attr3 和 attr4，time使用当前unix毫秒时间戳，value使用 0~100的随机整数
 95 | 
 96 | 
 97 | ## 3. 需要将命令流进行广播，然后和数据流进行connect，根据命令流指定的命令进行统计
 98 | 
 99 | 统计参考涉及到的部分类或方法： DataStream.assignTimestampsAndWatermarks、keyBy、WindowAssigner、reduce、ProcessWindowFunction、addSink   
100 | 
101 | 
102 | ## 4. 实现一个输出到终端的 sink，将统计结果打印出来，每一条记录包括 taskId, targetAttr, periodStartTime(周期开始时间), value (统计后的值，double类型)


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/UserClue/question.md:
--------------------------------------------------------------------------------
 1 | ## 跟进记录
 2 | ```csv
 3 | 1、用户跟进记录表，分析用户跟进次数，取出每次跟进的 激活时间（状态2,3 中小的一条）和跟进失败时间（状态9）
 4 | 2、每个用户跟进状态一定从 1 开始，直到 9 结束一次跟进
 5 | 3、跟进状态可以重复和累加，不能递减
 6 | 4、状态变化可以跳跃
 7 | 5、激活时间为一次跟进过程中状态为 2 或 3 中时间小的一条记录
 8 | 
 9 | 表结构、数据如下：
10 | 
11 | create table clue_log(
12 |     user_id bigint,
13 |     create_time datetime,
14 |     status   int
15 | )
16 | ENGINE=OLAP
17 | DUPLICATE KEY(user_id)
18 | DISTRIBUTED BY HASH(user_id) BUCKETS 8;
19 | 
20 | insert into clue_log values
21 | (1,'2022-08-15 16:20:00', 1),
22 | (1,'2022-08-15 16:20:30', 2),
23 | (1,'2022-08-15 16:21:00', 3),
24 | (1,'2022-08-15 16:22:00', 4),
25 | (1,'2022-08-15 16:23:00', 5),
26 | (1,'2022-08-15 16:24:00', 9),
27 | 
28 | (1,'2022-08-15 16:25:10', 1),
29 | (1,'2022-08-15 16:25:11', 1),
30 | (1,'2022-08-15 16:25:12', 1),
31 | (1,'2022-08-15 16:25:20', 3),
32 | (1,'2022-08-15 16:25:30', 4),
33 | (1,'2022-08-15 16:25:40', 5),
34 | (1,'2022-08-15 16:25:50', 9),
35 | 
36 | (1,'2022-08-15 16:26:10', 1),
37 | (1,'2022-08-15 16:26:11', 2),
38 | (1,'2022-08-15 16:26:12', 2),
39 | (1,'2022-08-15 16:26:13', 2),
40 | (1,'2022-08-15 16:26:14', 3),
41 | (1,'2022-08-15 16:26:15', 4),
42 | (1,'2022-08-15 16:26:16', 5),
43 | (1,'2022-08-15 16:26:19', 9),
44 | 
45 | (1,'2022-08-15 16:27:10', 1),
46 | (1,'2022-08-15 16:27:12', 3),
47 | (1,'2022-08-15 16:27:13', 3),
48 | (1,'2022-08-15 16:27:14', 3),
49 | (1,'2022-08-15 16:27:15', 4),
50 | (1,'2022-08-15 16:27:16', 5),
51 | 
52 | (1,'2022-08-15 16:27:19', 9);
53 | 
54 | 
55 | insert into clue_log values
56 | (2,'2022-08-15 16:20:00', 1),
57 | (2,'2022-08-15 16:20:30', 2),
58 | (2,'2022-08-15 16:21:00', 3),
59 | (2,'2022-08-15 16:22:00', 4),
60 | (2,'2022-08-15 16:23:00', 5),
61 | (2,'2022-08-15 16:24:00', 9),
62 | (2,'2022-08-15 16:25:10', 1),
63 | (2,'2022-08-15 16:25:20', 3),
64 | (2,'2022-08-15 16:25:30', 4),
65 | (2,'2022-08-15 16:25:40', 5),
66 | (2,'2022-08-15 16:25:50', 9),
67 | (2,'2022-08-15 16:26:10', 1),
68 | (2,'2022-08-15 16:26:11', 2),
69 | (2,'2022-08-15 16:26:12', 2),
70 | (2,'2022-08-15 16:26:13', 2),
71 | (2,'2022-08-15 16:26:14', 3),
72 | (2,'2022-08-15 16:26:15', 4),
73 | (2,'2022-08-15 16:26:16', 5),
74 | (2,'2022-08-15 16:26:19', 9),
75 | (2,'2022-08-15 16:27:10', 1),
76 | (2,'2022-08-15 16:27:12', 3),
77 | (2,'2022-08-15 16:27:13', 3),
78 | (2,'2022-08-15 16:27:14', 3),
79 | (2,'2022-08-15 16:27:15', 4),
80 | (2,'2022-08-15 16:27:16', 5),
81 | (2,'2022-08-15 16:27:19', 9);
82 | 
83 | select user_id, active_time,fail_time 
84 | from (
85 |   select a.user_id, min(if(b.status in (2,3) , b.create_time, null)) active_time, min(if(b.status in (9) , b.create_time, null)) fail_time
86 |   from clue_log a 
87 |     left join clue_log b on a.user_id = b.user_id and b.create_time >= a.create_time
88 |   where a.status = 1
89 |   group by a.user_id, a.create_time
90 | )a
91 | where fail_time is not null
92 | group by user_id, active_time,fail_time
93 | ;
94 | 
95 | ```


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/kafka/KafkaUpsertTableSink.java:
--------------------------------------------------------------------------------
 1 | ///*
 2 | // * Licensed to the Apache Software Foundation (ASF) under one or more
 3 | // * contributor license agreements.  See the NOTICE file distributed with
 4 | // * this work for additional information regarding copyright ownership.
 5 | // * The ASF licenses this file to You under the Apache License, Version 2.0
 6 | // * (the "License"); you may not use this file except in compliance with
 7 | // * the License.  You may obtain a copy of the License at
 8 | // *
 9 | // *    http://www.apache.org/licenses/LICENSE-2.0
10 | // *
11 | // * Unless required by applicable law or agreed to in writing, software
12 | // * distributed under the License is distributed on an "AS IS" BASIS,
13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // * See the License for the specific language governing permissions and
15 | // * limitations under the License.
16 | // */
17 | //
18 | //package com.venn.source.kafka;
19 | //
20 | //import org.apache.flink.annotation.Internal;
21 | //import org.apache.flink.api.common.serialization.SerializationSchema;
22 | //import org.apache.flink.api.common.typeinfo.TypeInformation;
23 | //import org.apache.flink.streaming.api.functions.sink.SinkFunction;
24 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
25 | //import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper;
26 | //import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner;
27 | //import org.apache.flink.table.api.TableSchema;
28 | //import org.apache.flink.types.Row;
29 | //
30 | //import java.util.Optional;
31 | //import java.util.Properties;
32 | //
33 | ///**
34 | // * Kafka table sink for writing data into Kafka.
35 | // */
36 | //@Internal
37 | //public class KafkaUpsertTableSink extends KafkaUpsertTableSinkBase{
38 | //
39 | //	public KafkaUpsertTableSink(
40 | //		TableSchema schema,
41 | //		String topic,
42 | //		Properties properties,
43 | //		Optional<FlinkKafkaPartitioner<Row>> partitioner,
44 | //		SerializationSchema<Row> serializationSchema) {
45 | //
46 | //		super(schema, topic, properties, partitioner, serializationSchema);
47 | //	}
48 | //
49 | //	@Override
50 | //	protected SinkFunction<Row> createKafkaProducer(
51 | //		String topic,
52 | //		Properties properties,
53 | //		SerializationSchema<Row> serializationSchema,
54 | //		Optional<FlinkKafkaPartitioner<Row>> partitioner) {
55 | //		// 很难理解 ，为什么内部版本用标记过期的构造器，明明有不过期的
56 | //		return new FlinkKafkaProducer<>(
57 | //			topic,
58 | //			new KeyedSerializationSchemaWrapper<>(serializationSchema),
59 | //			properties,
60 | //			partitioner);
61 | //	}
62 | //
63 | //	@Override
64 | //	public void setKeyFields(String[] keys) {
65 | //
66 | //	}
67 | //
68 | //	@Override
69 | //	public void setIsAppendOnly(Boolean isAppendOnly) {
70 | //		// todo just follow HBaseUpsertTableSink
71 | //	}
72 | //
73 | //	@Override
74 | //	public TypeInformation<Row> getRecordType() {
75 | //		return TypeInformation.of(Row.class);
76 | //	}
77 | //}
78 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/demo/relationCntA.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.demo
 2 | 
 3 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
 4 | import org.apache.flink.streaming.api.scala.{StreamExecutionEnvironment, _}
 5 | import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
 6 | import org.apache.flink.streaming.api.watermark.Watermark
 7 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
 8 | import org.apache.flink.streaming.api.windowing.time.Time
 9 | import org.apache.flink.streaming.api.windowing.triggers.ContinuousEventTimeTrigger
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 | import org.apache.flink.util.Collector
12 | 
13 | import scala.collection.mutable.ListBuffer
14 | 
15 | 
16 | /**
17 |  * 球哥
18 |  */
19 | object relationCntA {
20 |   def main(args: Array[String]): Unit = {
21 | 
22 |     val windowTime = TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8))
23 |     val triggerInterval = 40
24 |     var backendFilePath = ""
25 |     val parallelism = 1
26 |     val evictorTime = 40
27 |     backendFilePath = "hdfs:/tmp/relation" //存储checkpoint数据,//fs状态后端配置,如为file:///,则在taskmanager的本地
28 |     val env=StreamExecutionEnvironment.getExecutionEnvironment
29 |     val endStream=env.addSource(new CustomerSource)
30 | 
31 |     //先做条件过滤
32 | 
33 |     val outStream = endStream
34 |       .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks[Tuple2[Long,Long]] {
35 |         var currentMaxTimestamp = 0L
36 |         val maxOutOfOrderness = 2000L //2秒
37 |         var lastEmittedWatermark: Long = Long.MinValue
38 | 
39 |         override def extractTimestamp(t: Tuple2[Long,Long], l: Long): Long = {
40 |           val timestamp = t._1
41 |           println("---------2---timestamp--------" + timestamp)
42 |           if (timestamp > currentMaxTimestamp) {
43 |             currentMaxTimestamp = timestamp
44 |           }
45 |           timestamp
46 |         }
47 |         override def getCurrentWatermark: Watermark = {
48 | 
49 |           //允许延迟2秒
50 |           val potentialWM = currentMaxTimestamp - maxOutOfOrderness
51 |           if (potentialWM >= lastEmittedWatermark) {
52 |             lastEmittedWatermark = potentialWM -1
53 |           }
54 |           new Watermark(lastEmittedWatermark)
55 |         }
56 | 
57 |       })
58 |       .keyBy(data => data._2)
59 |       .window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8))) //统计今天内的数据量
60 |       .trigger(ContinuousEventTimeTrigger.of(Time.seconds(10)))
61 |       //.evictor(TimeEvictor.of(Time.seconds(evictorTime), true))
62 |       .process(new MyProcessWindowFunction)
63 | 
64 | 
65 |     env.execute("kafka test")
66 |   }
67 | }
68 | class MyProcessWindowFunction extends ProcessWindowFunction[(Long, Long), (String, Long), Long, TimeWindow] {
69 | 
70 |   // 一个窗口结束的时候调用一次（一个分组执行一次），不适合大量数据，全量数据保存在内存中，会造成内存溢出
71 |   override def process(key: Long, context: Context, elements: Iterable[(Long, Long)], out: Collector[(String, Long)]): Unit = {
72 |     // 聚合，注意:整个窗口的数据保存到Iterable，里面有很多行数据, Iterable的size就是日志的总行数
73 |     println("dddddddddddddddddd")
74 |   }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/AsyncFunctionForHbaseJava.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | 
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.async.ResultFuture;
 6 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
 7 | import org.apache.hadoop.hbase.HBaseConfiguration;
 8 | import org.apache.hadoop.hbase.HConstants;
 9 | import org.apache.hadoop.hbase.TableName;
10 | import org.apache.hadoop.hbase.client.*;
11 | import org.apache.hadoop.hbase.util.Bytes;
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 | 
15 | import java.util.ArrayList;
16 | import java.util.List;
17 | 
18 | public class AsyncFunctionForHbaseJava extends RichAsyncFunction<AsyncUser, AsyncUser> {
19 | 
20 |     Table table = null;
21 |     Logger logger = LoggerFactory.getLogger(AsyncFunctionForHbaseJava.class);
22 |     @Override
23 |     public void open(Configuration parameters) throws Exception {
24 |         logger.info("async function for hbase java open ...");
25 |         super.open(parameters);
26 |        org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create();
27 | 
28 |         config.set(HConstants.ZOOKEEPER_QUORUM, "venn");
29 |         config.set(HConstants.ZOOKEEPER_CLIENT_PORT, "2181");
30 |         config.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 30000);
31 |         config.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 30000);
32 | 
33 |         TableName tableName = TableName.valueOf("async");
34 |         Connection conn = ConnectionFactory.createConnection(config);
35 |         table = conn.getTable(tableName);
36 |     }
37 | 
38 | 
39 |     /**
40 |      * use asyncUser.getId get asyncUser phone
41 |      * @param asyncUser
42 |      * @param resultFuture
43 |      * @throws Exception
44 |      */
45 |     @Override
46 |     public void asyncInvoke(AsyncUser asyncUser, ResultFuture<AsyncUser> resultFuture) throws Exception {
47 | 
48 |         Get get = new Get(asyncUser.getId().getBytes());
49 |         get.addColumn("cf".getBytes(), "phone".getBytes());
50 | 
51 |         Result result = table.get(get);
52 | 
53 |         String phone = Bytes.toString(result.getValue("cf".getBytes(), "phone".getBytes()));
54 | 
55 |         if ( phone ==null || phone.length() != 11){
56 |             phone = "00000000000";
57 |         }
58 |         asyncUser.setPhone(phone);
59 |         List<AsyncUser> list = new ArrayList();
60 |         list.add(asyncUser);
61 |         resultFuture.complete(list);
62 |     }
63 | 
64 |     @Override
65 |     public void timeout(AsyncUser input, ResultFuture<AsyncUser> resultFuture) throws Exception {
66 |         logger.info("Async function for hbase timeout");
67 |         List<AsyncUser> list = new ArrayList();
68 |         input.setPhone("00000000001");
69 |         list.add(input);
70 |         resultFuture.complete(list);
71 | 
72 |     }
73 | 
74 |     /**
75 |      * close function
76 |      * @throws Exception
77 |      */
78 |     @Override
79 |     public void close() throws Exception {
80 |        logger.info("async function for hbase java close ...");
81 |         super.close();
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/filesink/RollingFileSinkDemo.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.connector.filesink
 2 | //
 3 | //import java.io.File
 4 | //import java.text.SimpleDateFormat
 5 | //
 6 | //import com.venn.common.Common
 7 | //import org.apache.flink.formats.json.JsonNodeDeserializationSchema
 8 | //import org.apache.flink.runtime.state.filesystem.FsStateBackend
 9 | //import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
10 | //import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
11 | //import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
12 | //import org.apache.flink.streaming.connectors.fs.StringWriter
13 | //import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink
14 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
15 | //import org.apache.flink.api.scala._
16 | //
17 | ///**
18 | //  * 使用BucketingSink 实现 根据‘数据’自定义输出目录
19 | //  */
20 | //object RollingFileSinkDemo {
21 | //
22 | //  def main(args: Array[String]): Unit = {
23 | //
24 | //    val env = StreamExecutionEnvironment.getExecutionEnvironment
25 | //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
26 | //    if ("/".equals(File.separator)) {
27 | //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
28 | //      env.setStateBackend(backend)
29 | //      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
30 | //    } else {
31 | //      env.setMaxParallelism(1)
32 | //      env.setParallelism(1)
33 | //    }
34 | //
35 | //    val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
36 | //    val source = new FlinkKafkaConsumer[ObjectNode]("roll_file_sink", new JsonNodeDeserializationSchema, Common.getProp)
37 | //
38 | //    /**
39 | //      * 这里有个问题，因为重写了BasePathBucketer，自定义了输出文件，
40 | //      * 所有会同时打开多个输出文件，带来文件刷新的问题，在当前文件写完后(这里的表现是：当天
41 | //      * 的数据以及全部流过，下一天的文件以及开始写了)，会发现
42 | //      * 当天的文件中的数据不全，因为数据还没有全部刷到文件，这个时候下一个文件
43 | //      * 又开始写了，会发现上一个文件还没刷完。
44 | //      *
45 | //      * 猜想：每个文件都有个输出缓冲，上一个文件最后一点数据还在缓冲区，下一个文件
46 | //      * 又使用新的缓冲区，没办法刷到上一个文件的数据，只有等缓冲区数据满、超时一类的操作触发刷写 ？？
47 | //      *
48 | //      * 源码BucketingSink.closePartFilesByTime
49 | //      *   默认每60秒或大于滚动时间间隔（batchRolloverInterval）（系统时间） 将当前park文件，
50 | //      *   将状态从 in-process 修改为 pending，随后
51 | //      *   关闭当前的part 文件，数据刷到磁盘
52 | //      *
53 | //      */
54 | //    val sink = new BucketingSink[String]("D:\\idea_out\\rollfilesink")
55 | //    sink.setBucketer(new DayBasePathBucketer)
56 | //    sink.setWriter(new StringWriter[String])
57 | //    sink.setBatchSize(1024 * 1024 * 400) // this is 400 MB,
58 | //    //    sink.setBatchRolloverInterval(24 * 60 * 60 * 1000) // this is 24 hour
59 | ////    sink.setInProgressPrefix("inProcessPre")
60 | ////    sink.setPendingPrefix("pendingpre")
61 | ////    sink.setPartPrefix("partPre")
62 | //
63 | //    env.addSource(source)
64 | //      .assignAscendingTimestamps(json => {
65 | //        sdf.parse(json.get("date").asText()).getTime
66 | //      })
67 | //      .map(json => {
68 | //        json.get("date") + "-" + json.toString
69 | //      })
70 | //      .addSink(sink)
71 | //
72 | //    env.execute("rollingFileSink")
73 | //  }
74 | //
75 | //}
76 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/checkpoint/CheckpointDebug.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.checkpoint
 2 | 
 3 | import com.venn.common.Common
 4 | import com.venn.demo.CustomerSource
 5 | import com.venn.source.TumblingEventTimeWindows
 6 | import com.venn.util.CheckpointUtil
 7 | import org.apache.flink.api.common.functions.RichFlatJoinFunction
 8 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
10 | import org.apache.flink.api.scala._
11 | import org.apache.flink.configuration.Configuration
12 | import org.apache.flink.streaming.api.windowing.time.Time
13 | import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
14 | import org.apache.flink.util.Collector
15 | import org.slf4j.LoggerFactory
16 | 
17 | /**
18 |  * for debug checkpoint
19 |  */
20 | object CheckpointDebug {
21 |   val LOG = LoggerFactory.getLogger("CheckpointDebug")
22 | 
23 |   def main(args: Array[String]): Unit = {
24 | 
25 |     val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
26 |     env.setParallelism(1)
27 |     CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 60)
28 | 
29 |     val prop = Common.getProp()
30 |     val kafkaSource1 = new FlinkKafkaConsumer[String]("source_1", new SimpleStringSchema(), prop)
31 |     val kafkaSource2 = new FlinkKafkaConsumer[String]("source_2", new SimpleStringSchema(), prop)
32 |     val source1 = env.addSource(kafkaSource1)
33 |       .name("source1")
34 | 
35 |     val source2 = env.addSource(kafkaSource2)
36 |       .name("source2")
37 |     val map1 = source1.map(item => {
38 |       val arr = item.split(",")
39 |       ("map_1", arr(0).toLong, arr(1).toLong)
40 |     })
41 |       .name("map1")
42 | 
43 |     val map2 = source2.map(item => {
44 |       val arr = item.split(",")
45 |       ("map_1", arr(0).toLong, arr(1).toLong)
46 |     })
47 |       .name("map2")
48 | 
49 |     val join = map1.join(map2)
50 |       .where(_._2)
51 |       .equalTo(_._2)
52 |       .window(TumblingEventTimeWindows.of(Time.minutes(1)))
53 |       .apply(new RichFlatJoinFunction[(String, Long, Long), (String, Long, Long), (String, String, Long, Long)] {
54 | 
55 |         override def open(parameters: Configuration): Unit = {
56 |           LOG.info("RichFlatJoinFunction open")
57 | 
58 |         }
59 | 
60 |         // join
61 |         override def join(first: (String, Long, Long), second: (String, Long, Long), out: Collector[(String, String, Long, Long)]): Unit = {
62 | 
63 | 
64 |           out.collect((first._1, second._1, first._2, first._3))
65 | 
66 |         }
67 | 
68 |         override def close(): Unit = {
69 |           LOG.info("RichFlatJoinFunction close")
70 | 
71 |         }
72 |       })
73 |       .name("join")
74 | 
75 | 
76 |     val kafkaSink = new FlinkKafkaProducer[String]("localhost:9092", "checkpoint_debug", new SimpleStringSchema())
77 |     val sink = join.map(item => {
78 |       item._1 + "," + item._2 + "," + item._3 + "," + item._4
79 |     })
80 |       .name("joinFormat")
81 |       .addSink(kafkaSink)
82 |       .name("sink")
83 | 
84 | 
85 |     env.execute("checkpointDebug")
86 | 
87 |   }
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/flink/asyncio/MysqlClient.java:
--------------------------------------------------------------------------------
 1 | package com.venn.flink.asyncio;
 2 | 
 3 | 
 4 | import org.apache.flink.shaded.netty4.io.netty.channel.DefaultEventLoop;
 5 | import org.apache.flink.shaded.netty4.io.netty.util.concurrent.Future;
 6 | import org.apache.flink.shaded.netty4.io.netty.util.concurrent.SucceededFuture;
 7 | 
 8 | import java.sql.DriverManager;
 9 | import java.sql.PreparedStatement;
10 | import java.sql.ResultSet;
11 | import java.sql.SQLException;
12 | 
13 | public class MysqlClient {
14 | 
15 |     private static String jdbcUrl = "jdbc:mysql://192.168.229.128:3306?useSSL=false&allowPublicKeyRetrieval=true";
16 |     private static String username = "root";
17 |     private static String password = "123456";
18 |     private static String driverName = "com.mysql.jdbc.Driver";
19 |     private static java.sql.Connection conn;
20 |     private static PreparedStatement ps;
21 | 
22 |     static {
23 |         try {
24 |             Class.forName(driverName);
25 |             conn = DriverManager.getConnection(jdbcUrl, username, password);
26 |             ps = conn.prepareStatement("select phone from async.async_test where id = ?");
27 |         } catch (ClassNotFoundException | SQLException e) {
28 |             e.printStackTrace();
29 |         }
30 |     }
31 | 
32 |     /**
33 |      * execute query
34 |      * @param user
35 |      * @return
36 |      */
37 |     public AsyncUser query1(AsyncUser user) {
38 | 
39 |         try {
40 |             Thread.sleep(10);
41 |         } catch (InterruptedException e) {
42 |             e.printStackTrace();
43 |         }
44 | 
45 |         String phone = "0000";
46 |         try {
47 |             ps.setString(1, user.getId());
48 |             ResultSet rs = ps.executeQuery();
49 |             if (!rs.isClosed() && rs.next()) {
50 |                 phone = rs.getString(1);
51 |             }
52 |             System.out.println("execute query : " + user.getId() + "-2-" + "phone : " + phone +"-"+ System.currentTimeMillis());
53 |         } catch (SQLException e) {
54 |             e.printStackTrace();
55 |         }
56 |         user.setPhone(phone);
57 |         return user;
58 | 
59 |     }
60 | 
61 |     public Future<AsyncUser> query2(AsyncUser user) {
62 | 
63 |         String phone = "0000";
64 |         try {
65 |             ps.setString(1, user.getId());
66 |             ResultSet rs = ps.executeQuery();
67 |             System.out.println(user.getId() + "-3-" + System.currentTimeMillis());
68 |             if (rs.next()) {
69 |                 phone = rs.getString(1);
70 |             }
71 |         } catch (
72 |                 SQLException e) {
73 |             e.printStackTrace();
74 |         }
75 |         user.setPhone(phone);
76 |         return new SucceededFuture(new DefaultEventLoop(), user);
77 | 
78 |     }
79 | 
80 |     public static void main(String[] args) {
81 |         MysqlClient mysqlClient = new MysqlClient();
82 | 
83 |         AsyncUser asyncUser = new AsyncUser();
84 |         asyncUser.setId("526");
85 |         long start = System.currentTimeMillis();
86 |         asyncUser = mysqlClient.query1(asyncUser);
87 | 
88 |         System.out.println("end : " + (System.currentTimeMillis() - start));
89 |         System.out.println(asyncUser.toString());
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/cdcStarrocks/CdcToStarRocks.java:
--------------------------------------------------------------------------------
 1 | package com.venn.question.cdcStarrocks;
 2 | 
 3 | import com.venn.source.mysql.cdc.CommonStringDebeziumDeserializationSchema;
 4 | import com.ververica.cdc.connectors.mysql.source.MySqlSource;
 5 | import com.ververica.cdc.connectors.mysql.table.StartupOptions;
 6 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | 
 9 | import java.util.Properties;
10 | 
11 | /**
12 |  * mysql cdc demo
13 |  * <p>
14 |  * cdc 整库同步数据到 starrocks
15 |  * <p>
16 |  * 局限：
17 |  * 1. 还未实现 starrocks 端表结构跟随 源端表结构同步变更
18 |  * 2. 为了保证效率，仅会在每一个表第一次来的时候判断目标段是否存在该表，如果已经判定该表不存在，后续直接忽略该表的数据变更
19 |  * 3. 部分不导入的表，只在sink 的时候做了过滤，前面的操作还是要继续，可以考虑在 反序列化和map中过滤掉目标库中不存在的表数据
20 |  */
21 | public class CdcToStarRocks {
22 | 
23 |     // 每个批次最大条数和等待时间
24 |     private static int batchSize = 10000;
25 |     private static long batchInterval = 10 * 1000;
26 | 
27 |     public static void main(String[] args) throws Exception {
28 | 
29 |         String ip = "localhost";
30 |         int port = 3306;
31 |         String db = "hive_3";
32 | //        String table = "venn.user_log,venn.user_log_1";
33 |         String table = "hive_3.*";
34 |         String user = "root";
35 |         String pass = "123456";
36 | 
37 |         String starrocksIp = "10.201.0.230";
38 |         String starrocksPort = "29030";
39 |         String starrocksLoadPort = "28030";
40 |         String starrocksUser = "root";
41 |         String starrocksPass = "123456";
42 |         String starrocksDb = "test";
43 | 
44 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
45 |         env.setParallelism(1);
46 | 
47 |         MySqlSource<String> sourceFunction = MySqlSource.<String>builder()
48 |                 .hostname(ip)
49 |                 .port(port)
50 |                 // 获取两个数据库的所有表
51 |                 .databaseList(db)
52 |                 .tableList(table)
53 |                 .username(user)
54 |                 .password(pass)
55 |                 .startupOptions(StartupOptions.latest())
56 | //                .startupOptions(StartupOptions.initial())
57 |                 // do not cache schema change
58 | //                .includeSchemaChanges(true)
59 |                 // 自定义 解析器，讲数据解析成 json
60 |                 .deserializer(new CommonStringDebeziumDeserializationSchema(ip, port))
61 |                 .build();
62 | 
63 |         env
64 |                 .fromSource(sourceFunction, WatermarkStrategy.noWatermarks(), "cdc")
65 |                 .name("source")
66 |                 .uid("source")
67 | //                 json 字符串转 CdcRecord
68 |                 .map(new CdcStarMapFunction())
69 |                 .name("map")
70 |                 .keyBy(record -> record.getDb() + "_" + record.getTable())
71 |                 .process(new CdcStarProcessFunction(batchSize, batchInterval))
72 |                 .name("process")
73 |                 .uid("process")
74 |                 .print();
75 | //                .addSink(new StarRocksSink(starrocksIp, starrocksPort, starrocksLoadPort, starrocksUser, starrocksPass, starrocksDb))
76 | //                .name("sink");
77 | 
78 |         env.execute("cdcToStarRocks");
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/tableJoin/CacheFile.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.tableJoin
 2 | 
 3 | import java.io.File
 4 | import java.text.SimpleDateFormat
 5 | 
 6 | import com.venn.common.Common
 7 | import com.venn.util.CheckpointUtil
 8 | import org.apache.flink.api.scala._
 9 | import org.apache.flink.api.common.functions.RichMapFunction
10 | import org.apache.flink.configuration.Configuration
11 | import org.apache.flink.formats.json.JsonNodeDeserializationSchema
12 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
13 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
14 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
15 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
16 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
17 | 
18 | import scala.io.Source
19 | 
20 | /**
21 |  * stream join read config from cache file
22 |  * register at job start, never change again
23 |  */
24 | object CacheFile {
25 | 
26 |   def main(args: Array[String]): Unit = {
27 | 
28 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
29 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
30 |     if ("/".equals(File.separator)) {
31 |       //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
32 |       //      env.setStateBackend(backend)
33 |       //      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
34 |       CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
35 |       env.registerCachedFile("/opt/flink1.7/data/tablejoin.txt", "tablejoin.txt")
36 |     } else {
37 |       env.setMaxParallelism(1)
38 |       env.setParallelism(1)
39 |       // file and register name
40 |       env.registerCachedFile("C:\\Users\\venn\\git\\venn\\flinkDemo\\src\\main\\resources\\data\\tablejoin.txt", "tablejoin.txt")
41 |     }
42 |     // cache table
43 | 
44 | 
45 |     val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
46 |     val source = new FlinkKafkaConsumer[ObjectNode]("table_join", new JsonNodeDeserializationSchema, Common.getProp)
47 | 
48 | 
49 |     env.addSource(source)
50 |       .map(json => {
51 | 
52 |         val id = json.get("id").asText()
53 |         val phone = json.get("phone").asText()
54 | 
55 |         Tuple2(id, phone)
56 |       })
57 |       .map(new RichMapFunction[(String, String), String] {
58 | 
59 |         var cache = Map("" -> "")
60 | 
61 |         override def open(parameters: Configuration): Unit = {
62 | 
63 |           // read cache file
64 |           val file = getRuntimeContext.getDistributedCache.getFile("tablejoin.txt")
65 |           if (file.canRead) {
66 |             val context = Source.fromFile(file, "utf-8").getLines().toArray
67 | 
68 |             context.foreach(line => {
69 |               val tmp = line.split(",")
70 |               cache += (tmp(0) -> tmp(1))
71 |             })
72 |           }
73 |         }
74 | 
75 |         override def map(value: (String, String)): String = {
76 |           val name = cache.get(value._1)
77 | 
78 |           value._1 + "," + value._2 + "," + cache.get(value._1)
79 |         }
80 | 
81 |       })
82 |       .print()
83 | 
84 |     env.execute("cacheFile")
85 | 
86 |   }
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/starrocks/CustJdbcSource.java:
--------------------------------------------------------------------------------
  1 | package com.venn.connector.starrocks;
  2 | 
  3 | import org.apache.flink.configuration.Configuration;
  4 | import org.apache.flink.metrics.Counter;
  5 | import org.apache.flink.metrics.SimpleCounter;
  6 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
  7 | 
  8 | import java.sql.Connection;
  9 | import java.sql.DriverManager;
 10 | import java.sql.PreparedStatement;
 11 | import java.sql.ResultSet;
 12 | import java.util.ArrayList;
 13 | import java.util.List;
 14 | import java.util.Random;
 15 | 
 16 | public class CustJdbcSource extends RichSourceFunction<String> {
 17 | 
 18 |     private String ip;
 19 |     private String port;
 20 |     private String user;
 21 |     private String pass;
 22 |     private String sql;
 23 |     private String colSep;
 24 |     private int batch;
 25 |     private int interval;
 26 |     private boolean flag = false;
 27 |     private transient Counter counter;
 28 |     private Random random = new Random();
 29 | 
 30 |     private List<String> cache = new ArrayList<>();
 31 | 
 32 |     public CustJdbcSource(String ip, String port, String user, String pass, String sql, String colSep, int batch, int interval) {
 33 |         this.ip = ip;
 34 |         this.port = port;
 35 |         this.user = user;
 36 |         this.pass = pass;
 37 |         this.sql = sql;
 38 |         this.colSep = colSep;
 39 |         this.batch = batch;
 40 |         this.interval = interval * 1000;
 41 |     }
 42 | 
 43 | 
 44 |     @Override
 45 |     public void open(Configuration parameters) throws Exception {
 46 |         flag = true;
 47 | 
 48 |         counter = new SimpleCounter();
 49 |         this.counter = getRuntimeContext()
 50 |                 .getMetricGroup()
 51 |                 .counter("myCounter");
 52 |         // load data
 53 | 
 54 |         String url = "jdbc:mysql://" + ip + ":" + port;
 55 | 
 56 | 
 57 |         Connection connection = DriverManager.getConnection(url, this.user, this.pass);
 58 | 
 59 |         PreparedStatement ps = connection.prepareStatement(sql);
 60 | 
 61 |         ResultSet rs = ps.executeQuery();
 62 | 
 63 |         int columnCount = rs.getMetaData().getColumnCount();
 64 | 
 65 |         while (rs.next()) {
 66 | 
 67 |             StringBuilder builder = new StringBuilder();
 68 |             for (int j = 1; j <= columnCount; j++) {
 69 |                 if (j == columnCount) {
 70 |                     builder.append(rs.getString(j));
 71 |                 } else {
 72 |                     builder.append(rs.getString(j)).append(this.colSep);
 73 |                 }
 74 |             }
 75 | 
 76 |             cache.add(builder.toString());
 77 |         }
 78 | 
 79 |         System.out.println("load cache size : " + cache.size());
 80 | 
 81 |     }
 82 | 
 83 |     @Override
 84 |     public void run(SourceContext<String> ctx) throws Exception {
 85 | 
 86 |         int dataSize = cache.size();
 87 |         while (flag) {
 88 |             int select = random.nextInt(dataSize);
 89 | 
 90 |             String data = cache.get(select);
 91 | 
 92 |             counter.inc();
 93 |             ctx.collect(data);
 94 | 
 95 |             if (counter.getCount() % batch == 0) {
 96 |                 Thread.sleep(interval);
 97 |             }
 98 |         }
 99 | 
100 |     }
101 | 
102 |     @Override
103 |     public void cancel() {
104 |         flag = false;
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/dataFluctuation/DataFluctuation.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.question.dataFluctuation
 2 | 
 3 | import com.google.gson.JsonParser
 4 | import com.venn.entity.KafkaSimpleStringRecord
 5 | import com.venn.util.{CheckpointUtil, DateTimeUtil, SimpleKafkaRecordDeserializationSchema}
 6 | import org.apache.commons.lang.time.DateFormatUtils
 7 | import org.apache.flink.api.common.eventtime.{Watermark, WatermarkGenerator, WatermarkGeneratorSupplier, WatermarkOutput, WatermarkStrategy}
 8 | import org.apache.flink.api.common.functions.RichMapFunction
 9 | import org.apache.flink.connector.kafka.source.KafkaSource
10 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
11 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
12 | import org.apache.flink.api.scala._
13 | import org.apache.flink.configuration.Configuration
14 | 
15 | /*
16 |   计算数据波动
17 |  */
18 | object DataFluctuation {
19 | 
20 |   def main(args: Array[String]): Unit = {
21 | 
22 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
23 |     env.setParallelism(1)
24 | 
25 |     val checkpointInterval = 60 * 1000
26 |     val checkpointTimeOut = 2 * checkpointInterval
27 |     val checkPointPath = "hdfs:///tmp/flink/checkpoint"
28 |     val bootstrapServer = "localhost:9092"
29 |     val topic = "user_log"
30 | 
31 |     // set checkpoint
32 |     CheckpointUtil.setCheckpoint(env, "FileSystem", checkPointPath, checkpointInterval, checkpointTimeOut)
33 | 
34 |     val kafkaSource = KafkaSource
35 |       .builder[KafkaSimpleStringRecord]()
36 |       .setBootstrapServers(bootstrapServer)
37 |       .setTopics(topic)
38 |       .setDeserializer(new SimpleKafkaRecordDeserializationSchema)
39 |       .setStartingOffsets(OffsetsInitializer.latest())
40 |       .build()
41 | 
42 |     val source = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSource")
43 | 
44 |     val stream = source.map(new RichMapFunction[KafkaSimpleStringRecord, (String, Double, Long)] {
45 |       var jsonParser: JsonParser = _
46 | 
47 |       override def open(parameters: Configuration): Unit = {
48 |         jsonParser = new JsonParser
49 |       }
50 | 
51 |       override def map(element: KafkaSimpleStringRecord): (String, Double, Long) = {
52 | 
53 |         val json = jsonParser.parse(element.getValue).getAsJsonObject
54 | 
55 |         val item = json.get("item").getAsString
56 |         val price = json.get("price").getAsDouble
57 |         val tsStr = json.get("ts").getAsString
58 |         val ts = DateTimeUtil.parse(tsStr).getTime
59 | 
60 |         (item, price, ts)
61 |       }
62 |     })
63 |       .name("map")
64 |       .uid("map")
65 | 
66 |     stream
67 |       .assignTimestampsAndWatermarks(WatermarkStrategy
68 |         .forGenerator((_: WatermarkGeneratorSupplier.Context) => {
69 |           new WatermarkGenerator[(String,Double, Long)] {
70 |             var current = 0l
71 |             override def onEvent(t: (String, Double, Long), l: Long, watermarkOutput: WatermarkOutput): Unit = {
72 |               if(t._3 > current){
73 |                 current = t._3
74 |                 watermarkOutput.emitWatermark(new Watermark(current))
75 |               }
76 | 
77 |             }
78 | 
79 |             override def onPeriodicEmit(watermarkOutput: WatermarkOutput): Unit = {
80 |               //
81 |             }
82 |           }
83 | 
84 |         }))
85 | 
86 | 
87 | 
88 |   }
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/kafka/KafkaSinkTest.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.connector.kafka
 2 | 
 3 | import com.venn.common.Common
 4 | import com.venn.question.retention.RetentionAnalyze.bootstrapServer
 5 | import org.apache.flink.api.common.eventtime.WatermarkStrategy
 6 | import org.apache.flink.api.common.functions.RichFlatMapFunction
 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 8 | import org.apache.flink.api.scala._
 9 | import org.apache.flink.connector.kafka.sink.{KafkaRecordSerializationSchema, KafkaSink}
10 | import org.apache.flink.connector.kafka.source.KafkaSource
11 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
12 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
14 | import org.apache.flink.util.Collector
15 | import org.slf4j.LoggerFactory
16 | 
17 | /**
18 |  *
19 |  * 请教个问题哈，sink 到 kafka，采用默认的分区器，是不是每个并行度都会与kafka的partition维护一个连接
20 | 
21 | 比如 10 个并行度，3个 partition，那么维护的连接数总共为 10*3 个  ？  是的
22 | 
23 | 还是一个taskManager建立一个生产者 一个生产者对应多个分区
24 | 
25 | 一个taskManager里面多个slot共享一个生产者？ no
26 |  */
27 | object KafkaSinkTest {
28 | 
29 |   val LOG = LoggerFactory.getLogger("KafkaSinkTest")
30 | 
31 |   def main(args: Array[String]): Unit = {
32 | 
33 |     val topic = "user_log"
34 |     val sinkTopic = "user_log_sink_1"
35 | 
36 |     // env
37 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
38 |     // global parllelism
39 |     val parallelism = 4
40 |     env.setParallelism(parallelism)
41 | 
42 |     // kafka source
43 |     val kafkaSource = KafkaSource.builder[String]()
44 |       .setBootstrapServers(Common.BROKER_LIST)
45 |       .setTopics(topic)
46 |       .setGroupId("KafkaSinkTest")
47 |       .setStartingOffsets(OffsetsInitializer.latest())
48 |       .setValueOnlyDeserializer(new SimpleStringSchema())
49 |       .build();
50 | 
51 |     // kafka sink
52 |     val kafkaSink = KafkaSink
53 |       .builder[String]()
54 |       .setBootstrapServers(bootstrapServer)
55 |       .setKafkaProducerConfig(Common.getProp)
56 |       .setRecordSerializer(KafkaRecordSerializationSchema.builder[String]()
57 |         .setTopic(sinkTopic)
58 |         // 不指定 key 的序列号器，key 会为 空
59 | //        .setKeySerializationSchema(new SimpleStringSchema())
60 |         .setValueSerializationSchema(new SimpleStringSchema())
61 |         .build()
62 |       )
63 |       .build()
64 | 
65 | 
66 |     // add source，读取数据
67 |     val sourceStream = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSource")
68 | 
69 |     // map, add current subtask index
70 |     val mapStream = sourceStream
71 |       // rebalance data to all parallelisn
72 |       .rebalance
73 |       .flatMap(new RichFlatMapFunction[String, String] {
74 |         override def flatMap(element: String, out: Collector[String]): Unit = {
75 |           val parallelism = getRuntimeContext.getIndexOfThisSubtask
76 |           out.collect(parallelism + "," + element)
77 | 
78 |         }
79 |       })
80 |       .name("flatMap")
81 |       .uid("flatMap")
82 | 
83 |     // sink to kafka, new api
84 | //    mapStream.sinkTo(kafkaSink)
85 | 
86 |     // sink to kafka, old api
87 |         val kafkaProducer = new FlinkKafkaProducer[String](bootstrapServer,sinkTopic, new SimpleStringSchema())
88 |         mapStream.addSink(kafkaProducer)
89 |           .setParallelism(parallelism)
90 | 
91 |     env.execute("KafkaSinkTest")
92 |   }
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/cep/ContinueRising.scala:
--------------------------------------------------------------------------------
 1 | //package com.venn.cep
 2 | //
 3 | //import java.util
 4 | //
 5 | //import org.apache.flink.api.scala._
 6 | //import org.apache.flink.cep.functions.PatternProcessFunction
 7 | //import org.apache.flink.cep.pattern.conditions.IterativeCondition
 8 | //import org.apache.flink.cep.scala.CEP
 9 | //import org.apache.flink.cep.scala.pattern.Pattern
10 | //import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
11 | //import org.apache.flink.streaming.api.windowing.time.Time
12 | //import org.apache.flink.util.Collector
13 | //import org.slf4j.LoggerFactory
14 | //
15 | ///**
16 | //  * Cep for price continue rising
17 | //  * CEP : 匹配价格连续上涨（keyby 可以匹配同一个商品价格连续上涨）
18 | //  *
19 | //  */
20 | //object ContinueRising {
21 | //  val logger = LoggerFactory.getLogger(this.getClass)
22 | //
23 | //  def main(args: Array[String]): Unit = {
24 | //
25 | //    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
26 | //
27 | //    // 输入 id, volumn, name 三个字段的数据
28 | //    val input = env.addSource(new CepDemoSourceFunction)
29 | //      .map(str => {
30 | //        //        logger.info(str)
31 | //        val arr = str.split(",")
32 | //        val id = arr(0)
33 | //        val volume = arr(1).toInt
34 | //        val name = arr(2)
35 | //        CepDemoEvent(id, volume, name, arr(3).toInt)
36 | //      })
37 | //    //  Applying your pattern on a non-keyed stream will result in a job with parallelism equal to 1
38 | //    //      .keyBy(_.id)
39 | //
40 | //    /**
41 | //      * 模式说明：
42 | //      * 匹配价格连续上涨
43 | //      *
44 | //      * 匹配后跳过策略： 默认从上次的开始事件后的下一个事件开始
45 | //      *
46 | //      */
47 | //    val pattern = Pattern.begin[CepDemoEvent]("first")
48 | //      .next("second").where(new IterativeCondition[CepDemoEvent] {
49 | //      override def filter(currentEvent: CepDemoEvent, context: IterativeCondition.Context[CepDemoEvent]): Boolean = {
50 | //        // get last event
51 | //        val firstList = context.getEventsForPattern("first").iterator()
52 | //        var lastStart: CepDemoEvent = null
53 | //        // get last from firstList, and get the last one
54 | //        while (firstList.hasNext) {
55 | //          lastStart = firstList.next()
56 | //        }
57 | //        if (currentEvent.volume > lastStart.volume) {
58 | //          true
59 | //        } else {
60 | //          false
61 | //        }
62 | //      }
63 | //    })
64 | //      // always remember add within, it will reduce the state usage
65 | //      .within(Time.minutes(5 * 60 * 1000))
66 | //
67 | //    val patternStream = CEP.pattern(input, pattern)
68 | //
69 | //    val result: DataStream[String] = patternStream.process(
70 | //      new PatternProcessFunction[CepDemoEvent, String]() {
71 | //        override def processMatch(
72 | //                                   events: util.Map[String, util.List[CepDemoEvent]],
73 | //                                   ctx: PatternProcessFunction.Context,
74 | //                                   out: Collector[String]): Unit = {
75 | //          // get the change
76 | //          val first = events.get("first").get(0)
77 | //          val second = events.get("second").get(0)
78 | //          val change = second.volume - first.volume
79 | //          out.collect("from : " + first.id + ", to " + second.id + ", change : " + change)
80 | //        }
81 | //
82 | //      })
83 | //
84 | //    // for convenient, just print
85 | //    result.print()
86 | //    env.execute(this.getClass.getName)
87 | //  }
88 | //
89 | //
90 | //}
91 | //
92 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/source/kafka/KafkaUpsertTableSourceSinkFactory.java:
--------------------------------------------------------------------------------
 1 | ///*
 2 | // * Licensed to the Apache Software Foundation (ASF) under one or more
 3 | // * contributor license agreements.  See the NOTICE file distributed with
 4 | // * this work for additional information regarding copyright ownership.
 5 | // * The ASF licenses this file to You under the Apache License, Version 2.0
 6 | // * (the "License"); you may not use this file except in compliance with
 7 | // * the License.  You may obtain a copy of the License at
 8 | // *
 9 | // *    http://www.apache.org/licenses/LICENSE-2.0
10 | // *
11 | // * Unless required by applicable law or agreed to in writing, software
12 | // * distributed under the License is distributed on an "AS IS" BASIS,
13 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // * See the License for the specific language governing permissions and
15 | // * limitations under the License.
16 | // */
17 | //
18 | //package com.venn.source.kafka;
19 | //
20 | //import org.apache.flink.api.common.serialization.DeserializationSchema;
21 | //import org.apache.flink.api.common.serialization.SerializationSchema;
22 | //import org.apache.flink.streaming.connectors.kafka.KafkaTableSource;
23 | //import org.apache.flink.streaming.connectors.kafka.KafkaTableSourceBase;
24 | //import org.apache.flink.streaming.connectors.kafka.config.StartupMode;
25 | //import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
26 | //import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner;
27 | //import org.apache.flink.table.api.TableSchema;
28 | //import org.apache.flink.table.sources.RowtimeAttributeDescriptor;
29 | //import org.apache.flink.types.Row;
30 | //
31 | //import java.util.List;
32 | //import java.util.Map;
33 | //import java.util.Optional;
34 | //import java.util.Properties;
35 | //
36 | ///**
37 | // * Factory for creating configured instances of {@link KafkaTableSource}.
38 | // */
39 | //public class KafkaUpsertTableSourceSinkFactory extends KafkaUpsertTableSourceSinkFactoryBase {
40 | //
41 | //	@Override
42 | //	protected String kafkaVersion() {
43 | //		return MyKafkaValidator.CONNECTOR_VERSION_VALUE_UNIVERSAL;
44 | //	}
45 | //
46 | //	@Override
47 | //	protected boolean supportsKafkaTimestamps() {
48 | //		return true;
49 | //	}
50 | //
51 | //	@Override
52 | //	protected KafkaTableSourceBase createKafkaTableSource(
53 | //		TableSchema schema,
54 | //		Optional<String> proctimeAttribute,
55 | //		List<RowtimeAttributeDescriptor> rowtimeAttributeDescriptors,
56 | //		Map<String, String> fieldMapping,
57 | //		String topic,
58 | //		Properties properties,
59 | //		DeserializationSchema<Row> deserializationSchema,
60 | //		StartupMode startupMode,
61 | //		Map<KafkaTopicPartition, Long> specificStartupOffsets) {
62 | //
63 | //		return new KafkaTableSource(
64 | //			schema,
65 | //			proctimeAttribute,
66 | //			rowtimeAttributeDescriptors,
67 | //			Optional.of(fieldMapping),
68 | //			topic,
69 | //			properties,
70 | //			deserializationSchema,
71 | //			startupMode,
72 | //			specificStartupOffsets);
73 | //	}
74 | //
75 | //	@Override
76 | //	protected KafkaUpsertTableSink createKafkaTableSink(
77 | //		TableSchema schema,
78 | //		String topic,
79 | //		Properties properties,
80 | //		Optional<FlinkKafkaPartitioner<Row>> partitioner,
81 | //		SerializationSchema<Row> serializationSchema) {
82 | //
83 | //		return new KafkaUpsertTableSink(
84 | //			schema,
85 | //			topic,
86 | //			properties,
87 | //			partitioner,
88 | //			serializationSchema);
89 | //	}
90 | //}
91 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/tryFlink/FraudDetection.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.question.tryFlink
  2 | 
  3 | import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
  4 | import org.apache.flink.api.scala._
  5 | import org.apache.flink.api.scala.typeutils.Types
  6 | import org.apache.flink.configuration.Configuration
  7 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction
  8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
  9 | import org.apache.flink.streaming.api.functions.source.SourceFunction
 10 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 11 | import org.apache.flink.util.Collector
 12 | import org.slf4j.LoggerFactory
 13 | 
 14 | import scala.util.Random
 15 | 
 16 | /**
 17 |  * source from : flink official website : 基于 DataStream API 实现欺诈检测
 18 |  */
 19 | object FraudDetection {
 20 | 
 21 |   private val LOG = LoggerFactory.getLogger("FraudDetection")
 22 | 
 23 |   def main(args: Array[String]): Unit = {
 24 | 
 25 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 26 |     env.setParallelism(1)
 27 | 
 28 |     val source = env.addSource(new FuaudDetectionSource)
 29 |       .name("source")
 30 | 
 31 |     val process = source
 32 |       .keyBy(_._1)
 33 |       .process(new FuaudDetectionProcessFunction)
 34 | 
 35 |     process.addSink(new SinkFunction[String]{
 36 |       override def invoke(element: String, context: SinkFunction.Context): Unit = {
 37 |         println("fraud detection alter : " + element)
 38 |       }
 39 |     } )
 40 | 
 41 |     env.execute("FuaudDetection")
 42 | 
 43 |   }
 44 | 
 45 | }
 46 | 
 47 | class FuaudDetectionSource extends SourceFunction[(String, Double)] {
 48 |   val LOG = LoggerFactory.getLogger("FuaudDetectionSource")
 49 |   var isRunning = true;
 50 |   val random = new Random()
 51 | 
 52 |   override def run(sourceContext: SourceFunction.SourceContext[(String, Double)]): Unit = {
 53 | 
 54 |     while (isRunning) {
 55 |       val accountId = "" + random.nextInt(1000)
 56 |       val amt = random.nextDouble() * 100;
 57 | 
 58 |       sourceContext.collect(accountId, amt)
 59 | 
 60 |       Thread.sleep(1)
 61 |     }
 62 |     LOG.info("source finish")
 63 |   }
 64 | 
 65 |   override def cancel(): Unit = {
 66 | 
 67 |     LOG.info("source canceled...")
 68 |     isRunning = false;
 69 |   }
 70 | }
 71 | 
 72 | class FuaudDetectionProcessFunction extends KeyedProcessFunction[String, (String, Double), String] {
 73 | 
 74 |   var smallFlag: ValueState[java.lang.Boolean] = _
 75 | 
 76 |   override def open(parameters: Configuration): Unit = {
 77 |     smallFlag = getRuntimeContext.getState(new ValueStateDescriptor("smallTransaction", Types.BOOLEAN))
 78 |   }
 79 | 
 80 |   override def processElement(element: (String, Double), context: KeyedProcessFunction[String, (String, Double), String]#Context, collector: Collector[String]): Unit = {
 81 | 
 82 |     if(smallFlag.value() != null && smallFlag.value() && element._2 > 95){
 83 |       collector.collect(element._1)
 84 |     }
 85 | 
 86 |     if(element._2 < 2){
 87 |       smallFlag.update(true)
 88 |       context.timerService().registerProcessingTimeTimer(System.currentTimeMillis() + 10 * 1000 )
 89 |     }
 90 | 
 91 |   }
 92 | 
 93 | 
 94 |   override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, (String, Double), String]#OnTimerContext, out: Collector[String]): Unit = {
 95 |     println("cliear key : " + ctx.getCurrentKey)
 96 |     smallFlag.clear()
 97 |   }
 98 | 
 99 |   override def close(): Unit = {
100 |     smallFlag.clear()
101 |   }
102 | }
103 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/starrocks/StreamLoadTestV2.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.connector.starrocks
  2 | 
  3 | import com.starrocks.connector.flink.StarRocksSink
  4 | import com.starrocks.connector.flink.table.sink.StarRocksSinkOptions
  5 | import org.apache.flink.api.common.functions.RichMapFunction
  6 | import org.apache.flink.api.scala._
  7 | import org.apache.flink.configuration.Configuration
  8 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
  9 | import org.slf4j.LoggerFactory
 10 | 
 11 | import scala.util.Random
 12 | 
 13 | object StreamLoadTestV2 {
 14 | 
 15 |   val LOG = LoggerFactory.getLogger("StreamLoadTest")
 16 |   val COL_SEP = "\\\\x01";
 17 |   val ROW_SEP = "\\\\x02";
 18 |   val ip = "10.201.0.230"
 19 |   val jdbcPort = "29030"
 20 |   val httpPort = "28030"
 21 |   val user = "root"
 22 |   val pass = "123456"
 23 |   val sql = "select * from test.t_starrocks_load_error limit 2000"
 24 |   var batch = 1000
 25 |   var interval = 5
 26 | 
 27 |   def main(args: Array[String]): Unit = {
 28 | 
 29 |     if (args.length >= 2) {
 30 |       batch = Integer.parseInt(args(0))
 31 |       interval = Integer.parseInt(args(1))
 32 |     }
 33 | 
 34 | 
 35 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 36 |     env.setParallelism(1)
 37 | 
 38 | 
 39 | 
 40 |     val source = env.addSource(new CustJdbcSource(ip, jdbcPort, user, pass, sql, COL_SEP, batch, interval))
 41 | 
 42 |     val stream = source.map(new RichMapFunction[String, String] {
 43 | 
 44 |       var random: Random = _
 45 | 
 46 |       override def open(parameters: Configuration): Unit = {
 47 |         random = new Random();
 48 | 
 49 |       }
 50 | 
 51 |       override def map(element: String): String = {
 52 | 
 53 |         val index = element.indexOf(COL_SEP)
 54 | 
 55 |         val prex = element.substring(0, index)
 56 |         val subx = element.substring(index)
 57 | 
 58 |         var newPrex = 0l
 59 |         try {
 60 |           newPrex = prex.toLong / (random.nextInt(10000) + 1)
 61 |         } catch {
 62 |           case ex: java.lang.ArithmeticException =>
 63 |             newPrex = random.nextLong()
 64 |             ex.printStackTrace()
 65 |             LOG.info("prex : {}", prex)
 66 | 
 67 |           case _ =>
 68 | 
 69 |         }
 70 | 
 71 |         newPrex + subx
 72 |       }
 73 |     })
 74 | 
 75 |     val sink = StarRocksSink.sink(
 76 |       // the sink options
 77 |       StarRocksSinkOptions.builder()
 78 |         .withProperty("jdbc-url", "jdbc:mysql://" + ip + ":" + jdbcPort)
 79 |         .withProperty("load-url", ip + ":" + httpPort)
 80 |         .withProperty("username", user)
 81 |         .withProperty("password", pass)
 82 |         .withProperty("database-name", "test")
 83 |         .withProperty("table-name", "t_starrocks_load_error_3")
 84 |         // 自 2.4 版本，支持更新主键模型中的部分列。您可以通过以下两个属性指定需要更新的列。
 85 |         // .withProperty("sink.properties.partial_update", "true")
 86 |         // .withProperty("sink.properties.columns", "k1,k2,k3")
 87 |         //        .withProperty("sink.properties.format", "json")
 88 |         //        .withProperty("sink.properties.strip_outer_array", "true")
 89 |         .withProperty("sink.properties.row_delimiter", ROW_SEP)
 90 |         .withProperty("sink.properties.column_separator", COL_SEP)
 91 |         // 设置并行度，多并行度情况下需要考虑如何保证数据有序性
 92 |         .withProperty("sink.parallelism", "1")
 93 |         .withProperty("sink.buffer-flush.max-rows", "" + batch)
 94 |         .build())
 95 | 
 96 |     stream.addSink(sink)
 97 |       .uid("sink")
 98 |       .name("sink")
 99 | 
100 |     env.execute("StreamLoadTest")
101 | 
102 |   }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/demo/AsyncRedisFunction.java:
--------------------------------------------------------------------------------
  1 | package com.venn.demo;
  2 | 
  3 | import com.google.gson.JsonParser;
  4 | import io.lettuce.core.RedisClient;
  5 | import io.lettuce.core.RedisFuture;
  6 | import io.lettuce.core.api.StatefulRedisConnection;
  7 | import io.lettuce.core.api.async.RedisAsyncCommands;
  8 | import org.apache.flink.configuration.Configuration;
  9 | import org.apache.flink.streaming.api.functions.async.ResultFuture;
 10 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
 11 | 
 12 | import java.util.Collections;
 13 | import java.util.concurrent.CompletableFuture;
 14 | import java.util.concurrent.ExecutionException;
 15 | import java.util.function.Consumer;
 16 | 
 17 | /**
 18 |  * async redis function
 19 |  */
 20 | public class AsyncRedisFunction extends RichAsyncFunction<String, String> {
 21 |     private RedisAsyncCommands<String, String> async;
 22 |     private String url;
 23 |     private StatefulRedisConnection<String, String> connection;
 24 |     private RedisClient redisClient;
 25 |     private JsonParser jsonParser;
 26 | 
 27 |     public AsyncRedisFunction(String url) {
 28 |         this.url = url;
 29 |     }
 30 | 
 31 |     @Override
 32 |     public void open(Configuration parameters) throws Exception {
 33 |         // redis standalone
 34 |         redisClient = RedisClient.create(url);
 35 |         connection = redisClient.connect();
 36 | 
 37 |         // redis cluster
 38 | //        List<RedisURI> uriList = new ArrayList<>();
 39 | //        for (String tmp : url.split(",")) {
 40 | //            String[] str = tmp.split(":");
 41 | //            String host = str[0];
 42 | //            int port = Integer.parseInt(str[1]);
 43 | //            RedisURI redisUri = RedisURI.Builder.redis(host).withPort(port).build();
 44 | //            uriList.add(redisUri);
 45 | //        }
 46 | //        RedisClusterClient redisClient = redisClusterClient.create(uriList);
 47 | //        connection = redisClient.connect();
 48 | 
 49 |         // async
 50 |         async = connection.async();
 51 | 
 52 |         jsonParser = new JsonParser();
 53 |     }
 54 | 
 55 | 
 56 |     //数据处理的方法
 57 |     @Override
 58 |     public void asyncInvoke(String input, ResultFuture<String> resultFuture) throws Exception {
 59 | 
 60 |         String userId = jsonParser.parse(input).getAsJsonObject().get("user_id").getAsString();
 61 |         // query string
 62 |         RedisFuture<String> redisFuture = async.get(userId);
 63 |         //  query hash
 64 | //        RedisFuture<String> redisFuture = async.hget("key", input);
 65 |         // get all
 66 | //        async.hgetall(input);
 67 | 
 68 |         // async query and get result
 69 |         CompletableFuture.supplyAsync(() -> {
 70 |             try {
 71 |                 return redisFuture.get();
 72 |             } catch (InterruptedException e) {
 73 |                 e.printStackTrace();
 74 |             } catch (ExecutionException e) {
 75 |                 e.printStackTrace();
 76 |             }
 77 |             // if get exception
 78 |             return "exception";
 79 |         }).thenAccept(new Consumer<String>() {
 80 |             @Override
 81 |             public void accept(String result) {
 82 |                 if (result == null) {
 83 |                     result = "nothing";
 84 |                 }
 85 |                 // return result
 86 |                 resultFuture.complete(Collections.singleton(input + " - " + result));
 87 |             }
 88 |         });
 89 |     }
 90 | 
 91 |     @Override
 92 |     public void close() throws Exception {
 93 |         super.close();
 94 |         if (connection != null) {
 95 |             connection.close();
 96 |         }
 97 |         if (redisClient != null) {
 98 |             redisClient.shutdown();
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/intervalJoin/IntervalJoinDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.intervalJoin
 2 | 
 3 | import java.io.File
 4 | import java.text.SimpleDateFormat
 5 | 
 6 | import com.venn.common.Common
 7 | import com.venn.source.TumblingEventTimeWindows
 8 | import com.venn.util.CheckpointUtil
 9 | import org.apache.flink.api.common.functions.ReduceFunction
10 | import org.apache.flink.formats.json.JsonNodeDeserializationSchema
11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
12 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
13 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
14 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
15 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
16 | import org.apache.flink.api.scala._
17 | import org.apache.flink.streaming.api.windowing.time.Time
18 | 
19 | /**
20 |  * interval join demo
21 |  */
22 | object IntervalJoinDemo {
23 | 
24 |   def main(args: Array[String]): Unit = {
25 | 
26 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
27 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
28 |     //    if ("/".equals(File.separator)) {
29 |     //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
30 |     //      env.setStateBackend(backend)
31 |     //      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
32 |     //    } else {
33 |     //      env.setMaxParallelism(1)
34 |     //      env.setParallelism(1)
35 |     //    }
36 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
37 |     CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
38 | 
39 |     val sdf = new SimpleDateFormat("yyyyMMddHHmmss")
40 |     val sourceLeft = new FlinkKafkaConsumer[ObjectNode]("topic_left", new JsonNodeDeserializationSchema, Common.getProp)
41 |     val sourceRight = new FlinkKafkaConsumer[ObjectNode]("topic_right", new JsonNodeDeserializationSchema, Common.getProp)
42 | 
43 |     sourceLeft.setStartFromLatest()
44 |     sourceRight.setStartFromLatest()
45 | 
46 |     // transfer left stream json to AsyncUser
47 |     val leftStream = env.addSource(sourceLeft)
48 |       .map(json => {
49 |         val id = json.get("id").asText()
50 |         val name = json.get("name").asText()
51 |         val date = json.get("date").asText()
52 |         IntervalUser(id, name, null, date)
53 |       })
54 |       .assignAscendingTimestamps(u => sdf.parse(u.date).getTime)
55 |       .keyBy(0)
56 |     // transfer right stream json to AsyncUser
57 |     val rightStream = env.addSource(sourceRight)
58 |       .map(json => {
59 |         val id = json.get("id").asText()
60 |         val phone = json.get("phone").asText()
61 |         val date = json.get("date").asText()
62 |         IntervalUser(id, null, phone, date)
63 |       })
64 |       .assignAscendingTimestamps(u => sdf.parse(u.date).getTime)
65 |       .keyBy(0)
66 | 
67 |     // join it
68 |     /*
69 |       左边为主，两边都可以触发,触发范围：
70 |         a.timestamp + lowerBound <= b.timestamp <= a.timestamp + upperBound
71 | 
72 |      */
73 |     leftStream
74 |       .intervalJoin(rightStream)
75 |       .between(Time.seconds(-2), Time.seconds(7))
76 |       //.lowerBoundExclusive() // 排除下界
77 |       //      .upperBoundExclusive() // 排除上界
78 |       .process(new IntervalJoinProcessFunctionDemo)
79 |       /*.assignAscendingTimestamps(_.phone.toLong)
80 |       .keyBy("id")
81 |       .window(TumblingEventTimeWindows.of(Time.milliseconds(10)))
82 |       .min("id")*/
83 |       /*.reduce(new ReduceFunction[IntervalUser] {
84 |         override def reduce(value1: IntervalUser, value2: IntervalUser): IntervalUser = {
85 |           println("xx -> " + value2)
86 |           value2
87 |         }
88 |       })*/
89 | 
90 |       .print("result -> ")
91 | 
92 |     env.execute("IntervalJoinDemo")
93 |   }
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/cep/AfterMatchStrategyDemo.scala:
--------------------------------------------------------------------------------
  1 | //package com.venn.cep
  2 | //
  3 | //import java.util
  4 | //
  5 | //import com.venn.common.Common
  6 | //import org.apache.flink.api.common.serialization.SimpleStringSchema
  7 | //import org.apache.flink.api.scala._
  8 | //import org.apache.flink.cep.functions.PatternProcessFunction
  9 | //import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy
 10 | //import org.apache.flink.cep.pattern.conditions.IterativeCondition
 11 | //import org.apache.flink.cep.scala.CEP
 12 | //import org.apache.flink.cep.scala.pattern.Pattern
 13 | //import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
 14 | //import org.apache.flink.streaming.api.windowing.time.Time
 15 | //import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
 16 | //import org.apache.flink.util.Collector
 17 | //import org.slf4j.LoggerFactory
 18 | //
 19 | ///**
 20 | //  * Cep for after match strategy
 21 | //  * CEP : 模式匹配后的跳过策略测试：
 22 | //  *
 23 | //  * NO_SKIP：
 24 | //  * SKIP_TO_NEXT：
 25 | //  * SKIP_PAST_LAST_EVENT：
 26 | //  * SKIP_TO_FIRST[b]：
 27 | //  * SKIP_TO_LAST[b]：
 28 | //  *
 29 | //  * Command :
 30 | //  *
 31 | //  */
 32 | //object AfterMatchStrategyDemo {
 33 | //  val logger = LoggerFactory.getLogger(this.getClass)
 34 | //
 35 | //  def main(args: Array[String]): Unit = {
 36 | //
 37 | //    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
 38 | //
 39 | //    env.setParallelism(1)
 40 | //    val topic = "match_strategy"
 41 | //    val source = new FlinkKafkaConsumer[String](topic, new SimpleStringSchema(), Common.getProp)
 42 | //
 43 | //    val input = env.addSource(source)
 44 | //      .map(str => {
 45 | //        //        logger.info(str)
 46 | //        val arr = str.split(",")
 47 | //        val id = arr(0)
 48 | //        val name = arr(1)
 49 | //        CepDemoEvent(id, 0, name, 0)
 50 | //      }).setParallelism(1)
 51 | //    //  Applying your pattern on a non-keyed stream will result in a job with parallelism equal to 1
 52 | //    //      .keyBy(_.id)
 53 | //
 54 | //    /**
 55 | //      * 模式说明：
 56 | //      * 匹配价格连续上涨
 57 | //      *
 58 | //      * 匹配后跳过策略： 默认从上次的开始事件后的下一个事件开始
 59 | //      *
 60 | //      * NO_SKIP：default
 61 | //      * SKIP_TO_NEXT：
 62 | //      * SKIP_PAST_LAST_EVENT：
 63 | //      * SKIP_TO_FIRST[b]：
 64 | //      * SKIP_TO_LAST[b]：
 65 | //      *
 66 | //      */
 67 | //    val noSkit = AfterMatchSkipStrategy.noSkip()
 68 | //    val pattern = Pattern.begin[CepDemoEvent]("first").where(event => {
 69 | //      event.name.equals("a")
 70 | //    })
 71 | //      //      .timesOrMore(1)
 72 | //      .next("second").where(event => {
 73 | //      event.name.equals("a")
 74 | //    })
 75 | //      .next("third").where(event => {
 76 | //      event.name.equals("b")
 77 | //    })
 78 | ////      .notNext()
 79 | //
 80 | //    // always remember add within, it will reduce the state usage
 81 | //    //      .within(Time.minutes(5 * 60 * 1000))
 82 | //
 83 | //    val patternStream = CEP.pattern(input, pattern)
 84 | //
 85 | //    val result: DataStream[String] = patternStream.process(
 86 | //      new PatternProcessFunction[CepDemoEvent, String]() {
 87 | //        override def processMatch(
 88 | //                                   events: util.Map[String, util.List[CepDemoEvent]],
 89 | //                                   ctx: PatternProcessFunction.Context,
 90 | //                                   out: Collector[String]): Unit = {
 91 | //          // get the change
 92 | //          val first = events.get("first").get(0)
 93 | //          val second = events.get("second").get(0)
 94 | //          val third = events.get("third").get(0)
 95 | //          out.collect("first : " + first + ", first " + second + ", third : " + third)
 96 | //        }
 97 | //
 98 | //      })
 99 | //
100 | //    // for convenient, just print
101 | //    result.print()
102 | //    env.execute(this.getClass.getName)
103 | //  }
104 | //
105 | //
106 | //}
107 | //
108 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/connector/starrocks/StreamLoadTest.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.connector.starrocks
  2 | 
  3 | import com.starrocks.connector.flink.StarRocksSink
  4 | import com.starrocks.connector.flink.table.sink.StarRocksSinkOptions
  5 | import org.apache.flink.api.common.functions.RichMapFunction
  6 | import org.apache.flink.api.common.restartstrategy.RestartStrategies
  7 | import org.apache.flink.api.common.restartstrategy.RestartStrategies.FixedDelayRestartStrategyConfiguration
  8 | import org.apache.flink.api.common.time.Time
  9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 10 | import org.apache.flink.api.scala._
 11 | import org.apache.flink.configuration.{Configuration, RestartStrategyOptions}
 12 | import org.slf4j.LoggerFactory
 13 | 
 14 | import scala.util.Random
 15 | 
 16 | object StreamLoadTest {
 17 | 
 18 |   val LOG = LoggerFactory.getLogger("StreamLoadTest")
 19 |   //  val COL_SEP = "\\\\x01";
 20 |   //  val ROW_SEP = "\\\\x02";
 21 |   val COL_SEP = ","
 22 |   val ROW_SEP = "\\n"
 23 |   val ip = "10.201.0.230"
 24 |   val jdbcPort = "29030"
 25 |   val httpPort = "28030"
 26 |   val user = "root"
 27 |   val pass = "123456"
 28 |   val sql = "select * from test.t_starrocks_load_error limit 1000"
 29 |   var batch = 1000
 30 |   var interval = 5
 31 | 
 32 |   def main(args: Array[String]): Unit = {
 33 | 
 34 |     if (args.length >= 2) {
 35 |       batch = Integer.parseInt(args(0))
 36 |       interval = Integer.parseInt(args(1))
 37 |     }
 38 | 
 39 | 
 40 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 41 |     env.setParallelism(1)
 42 |     env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, Time.seconds(20)))
 43 | 
 44 |     val source = env.addSource(new CustJdbcSource(ip, jdbcPort, user, pass, sql, COL_SEP, batch, interval))
 45 | 
 46 |     val stream = source.map(new RichMapFunction[String, String] {
 47 | 
 48 |       var random: Random = _
 49 | 
 50 |       override def open(parameters: Configuration): Unit = {
 51 |         random = new Random();
 52 | 
 53 |       }
 54 | 
 55 |       override def map(element: String): String = {
 56 | 
 57 |         val index = element.indexOf(COL_SEP)
 58 | 
 59 |         val prex = element.substring(0, index)
 60 |         val subx = element.substring(index)
 61 | 
 62 |         var newPrex = 0l
 63 |         try {
 64 |           newPrex = prex.toLong / (random.nextInt(10000) + 1)
 65 |         } catch {
 66 |           case ex: java.lang.ArithmeticException =>
 67 |             newPrex = random.nextLong()
 68 |             ex.printStackTrace()
 69 |             LOG.info("prex : {}", prex)
 70 | 
 71 |           case _ =>
 72 | 
 73 |         }
 74 | 
 75 |         newPrex + subx
 76 |       }
 77 |     })
 78 | 
 79 |     val sink = StarRocksSink.sink(
 80 |       // the sink options
 81 |       StarRocksSinkOptions.builder()
 82 |         .withProperty("jdbc-url", "jdbc:mysql://" + ip + ":" + jdbcPort)
 83 |         .withProperty("load-url", ip + ":" + httpPort)
 84 |         .withProperty("username", user)
 85 |         .withProperty("password", pass)
 86 |         .withProperty("database-name", "test")
 87 |         .withProperty("table-name", "t_starrocks_load_error_3")
 88 |         // 自 2.4 版本，支持更新主键模型中的部分列。您可以通过以下两个属性指定需要更新的列。
 89 |         // .withProperty("sink.properties.partial_update", "true")
 90 |         // .withProperty("sink.properties.columns", "k1,k2,k3")
 91 |         //        .withProperty("sink.properties.format", "json")
 92 |         //        .withProperty("sink.properties.strip_outer_array", "true")
 93 |         .withProperty("sink.properties.row_delimiter", ROW_SEP)
 94 |         .withProperty("sink.properties.column_separator", COL_SEP)
 95 |         // 设置并行度，多并行度情况下需要考虑如何保证数据有序性
 96 |         .withProperty("sink.parallelism", "1")
 97 |         .withProperty("sink.version", "v1")
 98 |         .withProperty("sink.buffer-flush.max-rows", "" + batch)
 99 |         .build())
100 | 
101 |     stream.addSink(sink)
102 |       .uid("sink")
103 |       .name("sink")
104 | 
105 |     env.execute("StreamLoadTest")
106 | 
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/java/com/venn/demo/TypeTest.java:
--------------------------------------------------------------------------------
 1 | package com.venn.demo;
 2 | 
 3 | import com.venn.common.Common;
 4 | import com.venn.util.DateTimeUtil;
 5 | import org.apache.flink.api.common.RuntimeExecutionMode;
 6 | import org.apache.flink.api.common.eventtime.WatermarkStrategy;
 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 8 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 9 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
10 | import org.apache.flink.connector.kafka.sink.KafkaSink;
11 | import org.apache.flink.connector.kafka.source.KafkaSource;
12 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer;
13 | import org.apache.flink.connector.kafka.source.reader.deserializer.KafkaRecordDeserializationSchema;
14 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
15 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
16 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
17 | import org.apache.flink.streaming.api.windowing.time.Time;
18 | import org.apache.flink.util.Collector;
19 | import org.apache.kafka.clients.consumer.ConsumerRecord;
20 | 
21 | import java.io.IOException;
22 | 
23 | public class TypeTest {
24 | 
25 |     public static void main(String[] args) throws Exception {
26 | 
27 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
28 | 
29 | //        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
30 | 
31 |         env.setParallelism(1);
32 |         String bootstrapServer = "localhost:9092";
33 |         String topic = "user_log";
34 |         // source
35 |         KafkaSource kafkaSource = KafkaSource
36 |                 .builder()
37 |                 .setBootstrapServers(bootstrapServer)
38 |                 .setGroupId("ra")
39 |                 .setTopics(topic)
40 |                 .setBounded(OffsetsInitializer.timestamp(DateTimeUtil.parse("2022-04-29 12:00:00").getTime()))
41 | //                .setUnbounded(OffsetsInitializer.latest())
42 |                 .setStartingOffsets(OffsetsInitializer.earliest())
43 |                 .setDeserializer(new KafkaRecordDeserializationSchema() {
44 |                     @Override
45 |                     public TypeInformation getProducedType() {
46 |                         return null;
47 |                     }
48 |                     @Override
49 |                     public void deserialize(ConsumerRecord record, Collector out) throws IOException {
50 |                         byte[] value = (byte[])record.value();
51 | 
52 |                         out.collect(new String(value));
53 |                     }
54 |                 })
55 |                 .build();
56 | 
57 | 
58 |         SingleOutputStreamOperator source = env.fromSource(kafkaSource, WatermarkStrategy.noWatermarks(), "kafkaSource")
59 |                 .returns(String.class);
60 | 
61 |         SingleOutputStreamOperator stream = source.map(aa -> aa)
62 |                 .returns(String.class)
63 |                 .map(aa -> 1)
64 |                 .returns(Integer.class)
65 |                 .windowAll(TumblingProcessingTimeWindows.of(Time.seconds(10)))
66 |                 .sum(0)
67 |                 .map(aa -> "" + aa)
68 |                 .returns(String.class);
69 | 
70 |         KafkaSink sink = KafkaSink
71 |                 .<String>builder()
72 |                 .setBootstrapServers(bootstrapServer)
73 |                 .setKafkaProducerConfig(Common.getProp())
74 |                 .setRecordSerializer(KafkaRecordSerializationSchema.builder()
75 |                         .setTopic(topic + "_sink")
76 |                         .setKeySerializationSchema(new SimpleStringSchema())
77 |                         .setValueSerializationSchema(new SimpleStringSchema())
78 |                         .build()
79 |                 )
80 |                 .setTransactionalIdPrefix("xxx" + System.currentTimeMillis())
81 |                 .build();
82 | 
83 |         stream.sinkTo(sink);
84 | 
85 | 
86 |         env.execute("typeTest");
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/timer/CustomerTimerDemo.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.stream.api.timer
  2 | 
  3 | import java.io.File
  4 | import java.sql.{Connection, DriverManager, PreparedStatement, SQLException}
  5 | import java.util
  6 | import java.util.{Timer, TimerTask}
  7 | 
  8 | import org.apache.flink.api.scala._
  9 | import com.venn.common.Common
 10 | import com.venn.util.{CheckpointUtil, TwoStringSource}
 11 | import org.apache.flink.api.common.functions.RichMapFunction
 12 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 13 | import org.apache.flink.configuration.Configuration
 14 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
 15 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 16 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
 17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer
 18 | import org.slf4j.LoggerFactory
 19 | 
 20 | /**
 21 |  * 在 open 方法中使用 定时器，定时加载外部数据，比如mysql
 22 |  * 业务假设： ETL的时候，数据进来，需要补充外部系统的数据，外部系统的数据会更新，所有不能一次性加载就不管了
 23 |  * 又有大部分数据是不会更新的（或者更新只是偶尔的），如果使用异步io 感觉很浪费
 24 |  * 这时候就可以考虑，使用timer，定时加载
 25 |  *
 26 |  *
 27 |  * 在map 中连接，添加定时器，定时从mysql 加载数据
 28 |  */
 29 | object CustomerTimerDemo {
 30 |   private final val logger = LoggerFactory.getLogger(CustomerTimerDemo.getClass)
 31 | 
 32 |   def main(args: Array[String]): Unit = {
 33 | 
 34 | 
 35 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 36 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
 37 |     if ("/".equals(File.separator)) {
 38 |       //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
 39 |       //      env.setStateBackend(backend)
 40 |       //      env.enableCheckpointing(30 * 60 * 1000, CheckpointingMode.EXACTLY_ONCE)
 41 |       CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
 42 |     } else {
 43 |       env.setMaxParallelism(1)
 44 |       env.setParallelism(1)
 45 |     }
 46 | 
 47 |     // 自定义的source，输出 x,xxx 格式随机字符
 48 |     val input = env.addSource(new TwoStringSource)
 49 |     val stream = input.map(new RichMapFunction[String, String] {
 50 | 
 51 |       val jdbcUrl = "jdbc:mysql://venn:3306?useSSL=false&allowPublicKeyRetrieval=true"
 52 |       val username = "root"
 53 |       val password = "123456"
 54 |       val driverName = "com.mysql.jdbc.Driver"
 55 |       var conn: Connection = null
 56 |       var ps: PreparedStatement = null
 57 |       val map = new util.HashMap[String, String]()
 58 | 
 59 |       override def open(parameters: Configuration): Unit = {
 60 |         logger.info("init....")
 61 |         query()
 62 |         // new Timer
 63 |         val timer = new Timer(true)
 64 |         // schedule is 10 second, 1 second between successive task executions
 65 |         timer.schedule(new TimerTask {
 66 |           override def run(): Unit = {
 67 |             query()
 68 |           }
 69 |         }, 10000)
 70 | 
 71 |       }
 72 | 
 73 |       override def map(value: String): String = {
 74 |         // concat input and mysql data
 75 |         value + "-" + map.get(value.split(",")(0))
 76 |       }
 77 | 
 78 |       /**
 79 |        * query mysql for get new config data
 80 |        */
 81 |       def query() = {
 82 |         logger.info("query mysql")
 83 |         try {
 84 |           Class.forName(driverName)
 85 |           conn = DriverManager.getConnection(jdbcUrl, username, password)
 86 |           ps = conn.prepareStatement("select id,name from venn.timer")
 87 |           val rs = ps.executeQuery
 88 | 
 89 |           while (!rs.isClosed && rs.next) {
 90 |             val id = rs.getString(1)
 91 |             val name = rs.getString(2)
 92 |             map.put(id, name)
 93 |           }
 94 |           logger.info("get config from db size : {}", map.size())
 95 | 
 96 |         } catch {
 97 |           case e@(_: ClassNotFoundException | _: SQLException) =>
 98 |             e.printStackTrace()
 99 |         } finally {
100 |           if (conn != null) {
101 |             conn.close()
102 |           }
103 |         }
104 |       }
105 |     })
106 |     //              .print()
107 | 
108 | 
109 |     val sink = new FlinkKafkaProducer[String]("timer_out"
110 |       , new SimpleStringSchema()
111 |       , Common.getProp)
112 |     stream.addSink(sink)
113 |     env.execute(this.getClass.getName)
114 | 
115 |   }
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/broadcast/BroadCastDemo.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.stream.api.broadcast
  2 | 
  3 | import java.io.File
  4 | 
  5 | import com.venn.common.Common
  6 | import com.venn.util.{CheckpointUtil, StringUtil}
  7 | import org.apache.flink.api.common.serialization.SimpleStringSchema
  8 | import org.apache.flink.api.common.state.MapStateDescriptor
  9 | import org.apache.flink.api.common.typeinfo.BasicTypeInfo
 10 | import org.apache.flink.api.scala._
 11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
 12 | import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
 13 | import org.apache.flink.streaming.api.functions.source.SourceFunction
 14 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
 15 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
 16 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
 17 | import org.apache.flink.util.Collector
 18 | 
 19 | /**
 20 |  * broadcast
 21 |  */
 22 | object BroadCastDemo {
 23 | 
 24 |   def main(args: Array[String]): Unit = {
 25 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 26 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
 27 |     //    if ("/".equals(File.separator)) {
 28 |     //      val backend = new FsStateBackend(Common.CHECK_POINT_DATA_DIR, true)
 29 |     //      env.setStateBackend(backend)
 30 |     //      env.enableCheckpointing(10 * 1000, CheckpointingMode.EXACTLY_ONCE)
 31 |     //    } else {
 32 |     //      env.setMaxParallelism(1)
 33 |     //      env.setParallelism(1)
 34 |     //    }
 35 |     CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
 36 | 
 37 |     // 配置更新流
 38 |     val configSource = new FlinkKafkaConsumer[String]("broad_cast_demo", new SimpleStringSchema, Common.getProp)
 39 |     // 配置流的初始化，可以通过读取配置文件实现
 40 |     var initFilePath = ""
 41 |     if ("/".equals(File.separator)) {
 42 |       initFilePath = "hdfs:///venn/init_file.txt"
 43 |     } else {
 44 |       initFilePath = "D:\\idea_out\\broad_cast.txt"
 45 |     }
 46 |     val init = env.readTextFile(initFilePath)
 47 |     val descriptor = new MapStateDescriptor[String, String]("dynamicConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
 48 |     val configStream = env.addSource(configSource).union(init).broadcast(descriptor)
 49 | 
 50 | 
 51 |     val input = env.addSource(new RadomFunction)
 52 |       .connect(configStream)
 53 |       .process(new BroadcastProcessFunction[String, String, String] {
 54 |         override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {
 55 | 
 56 |           println("new config : " + value)
 57 |           val configMap = ctx.getBroadcastState(descriptor)
 58 |           // process update configMap，读取配置数据，写入广播状态中
 59 |           val line = value.split(",")
 60 |           configMap.put(line(0), line(1))
 61 |         }
 62 | 
 63 |         override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
 64 |           // use give key, return value
 65 |           val configMap = ctx.getBroadcastState(descriptor)
 66 |           // 解析三位城市编码，根据广播状态对应的map，转码为城市对应中文
 67 |           //          println(value)
 68 |           val line = value.split(",")
 69 |           val code = line(0)
 70 |           var va = configMap.get(code)
 71 |           // 不能转码的数据默认输出 中国(code=xxx)
 72 |           if (va == null) {
 73 |             va = "中国(code=" + code + ")";
 74 |           } else {
 75 |             va = va + "(code=" + code + ")"
 76 |           }
 77 |           out.collect(va + "," + line(1))
 78 |         }
 79 |       })
 80 |     input.print()
 81 | 
 82 |     env.execute("BroadCastDemo")
 83 |   }
 84 | }
 85 | 
 86 | class RadomFunction extends SourceFunction[String] {
 87 |   var flag = true
 88 | 
 89 |   override def cancel(): Unit = {
 90 |     flag = false
 91 |   }
 92 | 
 93 |   override def run(ctx: SourceFunction.SourceContext[String]): Unit = {
 94 |     while (flag) {
 95 |       for (i <- 0 to 300) {
 96 |         var nu = i.toString
 97 |         while (nu.length < 3) {
 98 |           nu = "0" + nu
 99 |         }
100 |         ctx.collect(nu + "," + StringUtil.getRandomString(5))
101 |         Thread.sleep(2000)
102 |       }
103 |     }
104 |   }
105 | }
106 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/cdcStarrocks/CdcStarProcessFunction.java:
--------------------------------------------------------------------------------
  1 | package com.venn.question.cdcStarrocks;
  2 | 
  3 | import org.apache.flink.api.common.state.ListState;
  4 | import org.apache.flink.api.common.state.ListStateDescriptor;
  5 | import org.apache.flink.api.common.state.ValueState;
  6 | import org.apache.flink.api.common.state.ValueStateDescriptor;
  7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
  8 | import org.apache.flink.configuration.Configuration;
  9 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
 10 | import org.apache.flink.util.Collector;
 11 | import org.slf4j.Logger;
 12 | import org.slf4j.LoggerFactory;
 13 | 
 14 | import java.util.ArrayList;
 15 | import java.util.Iterator;
 16 | import java.util.List;
 17 | 
 18 | public class CdcStarProcessFunction extends KeyedProcessFunction<String, CdcRecord, List<CdcRecord>> {
 19 | 
 20 |     private final static Logger LOG = LoggerFactory.getLogger(CdcStarProcessFunction.class);
 21 |     private int batchSize;
 22 |     private long batchInterval;
 23 |     // next timer time
 24 |     private ValueState<Long> cacheTimer;
 25 |     // current cache size
 26 |     private ValueState<Integer> cacheSize;
 27 |     // cache data
 28 |     private ListState<CdcRecord> cache;
 29 | 
 30 |     public CdcStarProcessFunction(int batchSize, long batchInterval) {
 31 |         this.batchSize = batchSize;
 32 |         this.batchInterval = batchInterval;
 33 |     }
 34 | 
 35 |     @Override
 36 |     public void open(Configuration parameters) throws Exception {
 37 | 
 38 |         ListStateDescriptor cacheDescriptor = new ListStateDescriptor<CdcRecord>("cache", TypeInformation.of(CdcRecord.class));
 39 |         cache = getRuntimeContext().getListState(cacheDescriptor);
 40 | 
 41 |         ValueStateDescriptor cacheSizeDescriptor = new ValueStateDescriptor<Integer>("cacheSize", Integer.class);
 42 |         cacheSize = getRuntimeContext().getState(cacheSizeDescriptor);
 43 | 
 44 |         ValueStateDescriptor cacheTimerDescriptor = new ValueStateDescriptor<Long>("cacheTimer", Long.class);
 45 |         cacheTimer = getRuntimeContext().getState(cacheTimerDescriptor);
 46 |     }
 47 | 
 48 |     @Override
 49 |     public void processElement(CdcRecord element, KeyedProcessFunction<String, CdcRecord, List<CdcRecord>>.Context ctx, Collector<List<CdcRecord>> out) throws Exception {
 50 | 
 51 |         // cache size + 1
 52 |         if (cacheSize.value() != null) {
 53 |             cacheSize.update(cacheSize.value() + 1);
 54 |         } else {
 55 |             cacheSize.update(1);
 56 |             // add timer for interval flush
 57 |             long nextTimer = System.currentTimeMillis() + batchInterval;
 58 |             LOG.debug("register timer : {} , key : {}", nextTimer, ctx.getCurrentKey());
 59 |             cacheTimer.update(nextTimer);
 60 |             ctx.timerService().registerProcessingTimeTimer(nextTimer);
 61 |         }
 62 |         // add data to cache state
 63 |         cache.add(element);
 64 |         // cache size max than batch Size
 65 |         if (cacheSize.value() >= batchSize) {
 66 |             // remove next timer
 67 |             long nextTimer = cacheTimer.value();
 68 |             LOG.debug("{} remove timer, key : {}", nextTimer, ctx.getCurrentKey());
 69 |             ctx.timerService().deleteProcessingTimeTimer(nextTimer);
 70 |             // flush data to down stream
 71 |             flushData(out);
 72 |         }
 73 |     }
 74 | 
 75 |     /**
 76 |      * flush data to down stream
 77 |      */
 78 |     private void flushData(Collector<List<CdcRecord>> out) throws Exception {
 79 |         List<CdcRecord> tmpCache = new ArrayList<>();
 80 |         Iterator<CdcRecord> it = cache.get().iterator();
 81 |         while (it.hasNext()) {
 82 |             tmpCache.add(it.next());
 83 |         }
 84 |         if (tmpCache.size() > 0) {
 85 |             out.collect(tmpCache);
 86 | 
 87 |             // finish flush all cache data, clear state
 88 |             cache.clear();
 89 |             cacheSize.clear();
 90 |             cacheTimer.clear();
 91 |         }
 92 |     }
 93 | 
 94 |     @Override
 95 |     public void onTimer(long timestamp, KeyedProcessFunction<String, CdcRecord, List<CdcRecord>>.OnTimerContext ctx, Collector<List<CdcRecord>> out) throws Exception {
 96 |         LOG.info("{} trigger timer to flush data", ctx.getCurrentKey(), timestamp);
 97 |         // batch interval trigger flush data
 98 |         flushData(out);
 99 |     }
100 | 
101 |     @Override
102 |     public void close() throws Exception {
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/question/late1mtps/LateTps.scala:
--------------------------------------------------------------------------------
  1 | package com.venn.question.late1mtps
  2 | 
  3 | import com.google.gson.JsonParser
  4 | import com.venn.entity.KafkaSimpleStringRecord
  5 | import com.venn.source.TumblingEventTimeWindows
  6 | import com.venn.util.{DateTimeUtil, SimpleKafkaRecordDeserializationSchema}
  7 | import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
  8 | import org.apache.flink.api.common.functions.RichMapFunction
  9 | import org.apache.flink.api.common.serialization.SimpleStringSchema
 10 | import org.apache.flink.api.scala._
 11 | import org.apache.flink.configuration.Configuration
 12 | import org.apache.flink.connector.kafka.sink.{KafkaRecordSerializationSchema, KafkaSink}
 13 | import org.apache.flink.connector.kafka.source.KafkaSource
 14 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
 15 | import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
 16 | import org.apache.flink.streaming.api.windowing.time.Time
 17 | 
 18 | import java.time.Duration
 19 | 
 20 | object LateTps {
 21 | 
 22 |   def main(args: Array[String]): Unit = {
 23 | 
 24 |     val env = StreamExecutionEnvironment.getExecutionEnvironment
 25 |     env.setParallelism(1)
 26 | 
 27 |     val topic = "user_log"
 28 |     val bootstrapServer = "localhost:9092"
 29 |     // window size second
 30 |     val windowSize: Int = 10 * 60
 31 |     // calculate tps interval
 32 |     val intervalSize: Int = 10
 33 | 
 34 |     // kafka source for read data
 35 |     val kafkaSource = KafkaSource
 36 |       .builder[KafkaSimpleStringRecord]()
 37 |       .setTopics(topic)
 38 |       .setBootstrapServers(bootstrapServer)
 39 |       .setGroupId("late_tps")
 40 |       .setStartingOffsets(OffsetsInitializer.latest())
 41 |       .setDeserializer(new SimpleKafkaRecordDeserializationSchema())
 42 |       .build()
 43 | 
 44 |     // add source
 45 |     val source = env
 46 |       .fromSource(kafkaSource, WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofSeconds(5)), "kafkaSource")
 47 | 
 48 |     // parse data, only get (user_id, ts)
 49 |     val stream = source
 50 |       .map(new RichMapFunction[KafkaSimpleStringRecord, (String, Long)] {
 51 |         var jsonParse: JsonParser = _
 52 |         override def open(parameters: Configuration): Unit = {
 53 |           jsonParse = new JsonParser
 54 |         }
 55 |         override def map(element: KafkaSimpleStringRecord): (String, Long) = {
 56 | 
 57 |           val json = jsonParse.parse(element.getValue).getAsJsonObject
 58 |           val tsStr = json.get("ts").getAsString
 59 |           val ts = DateTimeUtil.parse(tsStr).getTime
 60 |           val userId = json.get("user_id").getAsString
 61 | 
 62 |           (userId, ts)
 63 |         }
 64 |         override def close(): Unit = {
 65 |           jsonParse = null
 66 | 
 67 |         }
 68 |       })
 69 |       // set timestamp and watermark
 70 |       .assignTimestampsAndWatermarks(WatermarkStrategy
 71 |         .forBoundedOutOfOrderness[(String, Long)](Duration.ofSeconds(5))
 72 |         .withTimestampAssigner(new SerializableTimestampAssigner[(String, Long)] {
 73 |           override def extractTimestamp(t: (String, Long), l: Long): Long = {
 74 |             t._2
 75 |           }
 76 |         })
 77 |         // idle 1 minute
 78 |         .withIdleness(Duration.ofMinutes(1))
 79 |       )
 80 | 
 81 | 
 82 |     // windowSize 10 minute, export every 1 minute tps
 83 |     val process10m = stream
 84 |       .windowAll(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
 85 |       .process(new FixedLateTpsProcessAllWindowFunction(windowSize, 60))
 86 |       .print("10m")
 87 | 
 88 | //    // windowSize minute, export every 1 minute tps
 89 |     val process10s = stream
 90 |       .windowAll(TumblingEventTimeWindows.of(Time.seconds(windowSize)))
 91 |       .process(new AdjustLateTpsProcessAllWindowFunction(windowSize , intervalSize))
 92 | 
 93 |     process10s.print("10s")
 94 | 
 95 |     val tag = new OutputTag[String]("size")
 96 |     val side = process10s.getSideOutput(tag)
 97 | 
 98 |     // side tmp result to kafka
 99 |     val kafkaSink = KafkaSink.builder[String]()
100 |       .setBootstrapServers(bootstrapServer)
101 |       .setRecordSerializer(KafkaRecordSerializationSchema.builder[String]()
102 |         .setTopic(topic +"_side_sink")
103 |         .setValueSerializationSchema(new SimpleStringSchema())
104 |         .build()
105 |       )
106 |       .build()
107 | 
108 |     // add sink
109 |     side.sinkTo(kafkaSink)
110 | 
111 |     // execute task
112 |     env.execute("LateTps")
113 |   }
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/common/MySqlDateTimeConverter.java:
--------------------------------------------------------------------------------
  1 | package com.venn.common;
  2 | 
  3 | import io.debezium.spi.converter.CustomConverter;
  4 | import io.debezium.spi.converter.RelationalColumn;
  5 | import org.apache.kafka.connect.data.SchemaBuilder;
  6 | 
  7 | import java.time.*;
  8 | import java.time.format.DateTimeFormatter;
  9 | import java.util.Properties;
 10 | /**
 11 |  * @Classname MySqlDateTimeConverter
 12 |  * @Description TODO
 13 |  * @Date 2024/3/7
 14 |  * @Created by venn
 15 |  */
 16 | public class MySqlDateTimeConverter implements CustomConverter<SchemaBuilder, RelationalColumn>{
 17 | 
 18 | 
 19 |     private DateTimeFormatter dateFormatter = DateTimeFormatter.ISO_DATE;
 20 | 
 21 |     private DateTimeFormatter timeFormatter = DateTimeFormatter.ISO_TIME;
 22 | 
 23 |     private DateTimeFormatter datetimeFormatter = DateTimeFormatter.ISO_DATE_TIME;
 24 | 
 25 |     private DateTimeFormatter timestampFormatter = DateTimeFormatter.ISO_DATE_TIME;
 26 | 
 27 |     private ZoneId timestampZoneId = ZoneId.systemDefault();
 28 | 
 29 |     @Override
 30 |     public void configure(Properties props) {
 31 | 
 32 |     }
 33 | 
 34 |     @Override
 35 |     public void converterFor(RelationalColumn column, ConverterRegistration<SchemaBuilder> registration) {
 36 | 
 37 |         String sqlType = column.typeName().toUpperCase();
 38 | 
 39 |         SchemaBuilder schemaBuilder = null;
 40 | 
 41 |         Converter converter = null;
 42 | 
 43 |         if ("DATE".equals(sqlType)) {
 44 | 
 45 |             schemaBuilder = SchemaBuilder.string().optional().name("com.darcytech.debezium.date.string");
 46 | 
 47 |             converter = this::convertDate;
 48 | 
 49 |         }
 50 | 
 51 |         if ("TIME".equals(sqlType)) {
 52 | 
 53 |             schemaBuilder = SchemaBuilder.string().optional().name("com.darcytech.debezium.time.string");
 54 | 
 55 |             converter = this::convertTime;
 56 | 
 57 |         }
 58 | 
 59 |         if ("DATETIME".equals(sqlType)) {
 60 | 
 61 |             schemaBuilder = SchemaBuilder.string().optional().name("com.darcytech.debezium.datetime.string");
 62 | 
 63 |             converter = this::convertDateTime;
 64 | 
 65 | 
 66 |         }
 67 | 
 68 |         if ("TIMESTAMP".equals(sqlType)) {
 69 | 
 70 |             schemaBuilder = SchemaBuilder.string().optional().name("com.darcytech.debezium.timestamp.string");
 71 | 
 72 |             converter = this::convertTimestamp;
 73 | 
 74 |         }
 75 | 
 76 |         if (schemaBuilder != null) {
 77 | 
 78 |             registration.register(schemaBuilder, converter);
 79 | 
 80 |         }
 81 | 
 82 |     }
 83 | 
 84 | 
 85 |     private String convertDate(Object input) {
 86 | 
 87 |         if (input == null) return null;
 88 | 
 89 |         if (input instanceof LocalDate) {
 90 | 
 91 |             return dateFormatter.format((LocalDate) input);
 92 | 
 93 |         }
 94 | 
 95 |         if (input instanceof Integer) {
 96 | 
 97 |             LocalDate date = LocalDate.ofEpochDay((Integer) input);
 98 | 
 99 |             return dateFormatter.format(date);
100 | 
101 |         }
102 | 
103 |         return String.valueOf(input);
104 | 
105 |     }
106 | 
107 | 
108 |     private String convertTime(Object input) {
109 | 
110 |         if (input == null) return null;
111 | 
112 |         if (input instanceof Duration) {
113 | 
114 |             Duration duration = (Duration) input;
115 | 
116 |             long seconds = duration.getSeconds();
117 | 
118 |             int nano = duration.getNano();
119 | 
120 |             LocalTime time = LocalTime.ofSecondOfDay(seconds).withNano(nano);
121 | 
122 |             return timeFormatter.format(time);
123 | 
124 |         }
125 | 
126 |         return String.valueOf(input);
127 | 
128 |     }
129 | 
130 | 
131 |     private String convertDateTime(Object input) {
132 | 
133 |         if (input == null) return null;
134 | 
135 |         if (input instanceof LocalDateTime) {
136 | 
137 |             return datetimeFormatter.format((LocalDateTime) input).replaceAll("T", " ");
138 | 
139 |         }
140 | 
141 |         return String.valueOf(input);
142 | 
143 |     }
144 | 
145 | 
146 |     private String convertTimestamp(Object input) {
147 | 
148 |         if (input == null) return null;
149 | 
150 |         if (input instanceof ZonedDateTime) {
151 | 
152 |             // mysql的timestamp会转成UTC存储，这里的zonedDatetime都是UTC时间
153 | 
154 |             ZonedDateTime zonedDateTime = (ZonedDateTime) input;
155 | 
156 |             LocalDateTime localDateTime = zonedDateTime.withZoneSameInstant(timestampZoneId).toLocalDateTime();
157 | 
158 |             return timestampFormatter.format(localDateTime).replaceAll("T", " ");
159 | 
160 |         }
161 |         return String.valueOf(input);
162 |     }
163 | }
164 | 


--------------------------------------------------------------------------------
/src/main/scala/com/venn/stream/api/dayWindow/CurrentDayPvCount.scala:
--------------------------------------------------------------------------------
 1 | package com.venn.stream.api.dayWindow
 2 | 
 3 | import java.io.File
 4 | import java.text.SimpleDateFormat
 5 | 
 6 | import com.venn.common.Common
 7 | import com.venn.source.TumblingEventTimeWindows
 8 | import com.venn.util.CheckpointUtil
 9 | import org.apache.flink.api.common.functions.ReduceFunction
10 | import org.apache.flink.api.common.serialization.SimpleStringSchema
11 | import org.apache.flink.api.scala._
12 | import org.apache.flink.contrib.streaming.state.RocksDBStateBackend
13 | import org.apache.flink.formats.json.JsonNodeDeserializationSchema
14 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode
15 | import org.apache.flink.streaming.api.TimeCharacteristic
16 | import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
17 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
18 | import org.apache.flink.streaming.api.windowing.time.Time
19 | import org.apache.flink.streaming.api.windowing.triggers.{ContinuousEventTimeTrigger, ContinuousProcessingTimeTrigger}
20 | import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
21 | 
22 | /**
23 |  * Created by venn on 19-5-23.
24 |  *
25 |  * use TumblingEventTimeWindows count current day pv
26 |  * for test, update day window to minute window
27 |  *
28 |  * .windowAll(TumblingEventTimeWindows.of(Time.minutes(1), Time.seconds(0)))
29 |  * TumblingEventTimeWindows can ensure count o minute event,
30 |  * and time start at 0 second (like : 00:00:00 to 00:00:59)
31 |  *
32 |  */
33 | object CurrentDayPvCount {
34 | 
35 |   def main(args: Array[String]): Unit = {
36 |     // environment
37 |     val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
38 |     //    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
39 |     env.setParallelism(1)
40 |     //    if ("\\".equals(File.pathSeparator)) {
41 |     //      val rock = new RocksDBStateBackend(Common.CHECK_POINT_DATA_DIR)
42 |     //      env.setStateBackend(rock)
43 |     //      // checkpoint interval
44 |     //      env.enableCheckpointing(10000)
45 |     //    }
46 |     CheckpointUtil.setCheckpoint(env, "rocksdb", Common.CHECK_POINT_DATA_DIR, 10)
47 | 
48 |     val topic = "current_day"
49 |     val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")
50 |     val kafkaSource = new FlinkKafkaConsumer[ObjectNode](topic, new JsonNodeDeserializationSchema(), Common.getProp)
51 |     val sink = new FlinkKafkaProducer[String](topic + "_out", new SimpleStringSchema(), Common.getProp)
52 |     sink.setWriteTimestampToKafka(true)
53 | 
54 |     val stream = env.addSource(kafkaSource)
55 |       .map(node => {
56 |         Eventx(node.get("id").asText(), node.get("createTime").asText())
57 |       })
58 |       .assignAscendingTimestamps(event => sdf.parse(event.createTime).getTime)
59 |       .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[Eventx](Time.seconds(60)) {
60 |         override def extractTimestamp(element: Eventx): Long = {
61 |           sdf.parse(element.createTime).getTime
62 |         }
63 |       })
64 |       // window is one minute, start at 0 second
65 |       //.windowAll(TumblingEventTimeWindows.of(Time.minutes(1), Time.seconds(0)))
66 |       // window is one hour, start at 0 second
67 |       //      .windowAll(TumblingEventTimeWindows.of(Time.hours(1), Time.seconds(0)))
68 |       // window is one day, start at 0 second, todo there have a bug(FLINK-11326), can't use negative number, 1.8 修复
69 |       //      .windowAll(TumblingEventTimeWindows.of(Time.days(1)))
70 |       .windowAll(TumblingEventTimeWindows.of(Time.days(1), Time.hours(-8)))
71 |       // every event one minute 如果使用了trigger，窗口函数每次执行，窗口中的所有元素都会参与计算
72 |       //      .trigger(ContinuousEventTimeTrigger.of(Time.seconds(3800)))
73 |       // every process one minute
74 |       .trigger(ContinuousProcessingTimeTrigger.of(Time.seconds(10)))
75 |       // every event, export current value,
76 |       //      .trigger(CountTrigger.of(1))
77 |       .reduce(new ReduceFunction[Eventx] {
78 | 
79 | 
80 |         override def reduce(event1: Eventx, event2: Eventx): Eventx = {
81 |           print(event2.toString)
82 | 
83 |           // 将结果中，id的最小值和最大值输出
84 |           new Eventx(event1.id, event2.id, event1.amt + event2.amt)
85 |         }
86 |       })
87 |     // format output even, connect min max id, add current timestamp
88 |     //      .map(event => Event(event.id + "-" + event.createTime, sdf.format(System.currentTimeMillis()), event.count))
89 |     stream.print("result : ")
90 | 
91 |     // execute job
92 |     env.execute("CurrentDayCount")
93 |   }
94 | 
95 | }
96 | 
97 | case class Event(id: String, createTime: String, count: Int = 1) {}
98 | 
99 | 


--------------------------------------------------------------------------------