├── .gitignore ├── Flink 学习链接 —— 都是干货.md ├── MyParFile ├── 0.txt └── 1.txt ├── conf ├── config.properties └── config3.properties ├── doc ├── 1.taskManager和Slots │ ├── TaskManager和Slots_1.png │ ├── TaskManager和Slots_2.png │ ├── TaskManager和Slots_3.png │ └── TaskManager和Slots_4.png ├── 2.flink并行度 │ ├── 0.flink并行度.textClipping │ ├── flink并行度_1.png │ ├── flink并行度_2.png │ └── flink并行度_3.png ├── 3.flink任务链与数据流 │ ├── Flink数据流source+transformation+sink_1.png │ ├── Flink数据流source+transformation+sink_2.png │ ├── flink任务链_1.png │ ├── flink任务链_2.png │ └── flink图生成流程.png ├── 4.flink_window操作 │ ├── flink_window操作_1.png │ ├── flink_window操作_10-其他可选API.png │ ├── flink_window操作_11-API总览.png │ ├── flink_window操作_2.png │ ├── flink_window操作_3-滚动窗口.png │ ├── flink_window操作_4-滑动窗口.png │ ├── flink_window操作_5-会话窗口.png │ ├── flink_window操作_6-windowAPI.png │ ├── flink_window操作_7-窗口分配器.png │ ├── flink_window操作_8-创建不同类型窗口.png │ ├── flink_window操作_8-创建不同类型窗口2.png │ └── flink_window操作_9-窗口函数.png ├── 5.时间语义和watermark │ ├── 时间语义和watermark__01_主要内容.png │ ├── 时间语义和watermark__02_时间(Time)语义.png │ ├── 时间语义和watermark__03_哪种时间语义更重要.png │ ├── 时间语义和watermark__03_哪种时间语义更重要2.png │ ├── 时间语义和watermark__04_在代码中设置Event Time.png │ ├── 时间语义和watermark__05_乱序数据的影响.png │ ├── 时间语义和watermark__06_水位线(WaterMark).png │ ├── 时间语义和watermark__07_水位线(WaterMark)特点.png │ ├── 时间语义和watermark__08_WaterMark的传递.png │ ├── 时间语义和watermark__09_WaterMark在代码引入.png │ ├── 时间语义和watermark__09_WaterMark在代码引入2.png │ ├── 时间语义和watermark__09_WaterMark在代码引入3.png │ ├── 时间语义和watermark__09_WaterMark在代码引入4—TimestampAssigner.png │ └── 时间语义和watermark__10_watermark设定.png ├── 6.状态管理 │ ├── 状态管理-1-主要内容.png │ ├── 状态管理-2-Flink 中的状态.png │ ├── 状态管理-2-Flink 中的状态2.png │ ├── 状态管理-3-算子状态.png │ ├── 状态管理-4-算子状态数据结构.png │ ├── 状态管理-5-键控状态Keyed State.png │ ├── 状态管理-6-键控状态数据结构.png │ ├── 状态管理-7-键控状态的使用.png │ ├── 状态管理-8-状态后端.png │ └── 状态管理-9-选择一个状态后端.png ├── 7.容错机制 │ ├── flink-容错机制-1-主要内容.png │ ├── flink-容错机制-2-一致性检查点checkpoint.png │ ├── flink-容错机制-3-一从检查点恢复状态.png │ ├── flink-容错机制-3-一从检查点恢复状态2.png │ ├── flink-容错机制-3-一从检查点恢复状态3.png │ ├── flink-容错机制-3-一从检查点恢复状态4.png │ ├── flink-容错机制-3-一从检查点恢复状态5.png │ ├── flink-容错机制-4-一检查点的实现算法.png │ ├── flink-容错机制-5-Flink检查点算法.png │ ├── flink-容错机制-5-Flink检查点算法2.png │ ├── flink-容错机制-5-Flink检查点算法3.png │ ├── flink-容错机制-5-Flink检查点算法4.png │ ├── flink-容错机制-5-Flink检查点算法5.png │ ├── flink-容错机制-5-Flink检查点算法6.png │ ├── flink-容错机制-5-Flink检查点算法7.png │ ├── flink-容错机制-5-Flink检查点算法8.png │ └── flink-容错机制-6-保存点(Savepoints).png ├── 8.状态一致性 │ ├── flink状态一致性-1-主要内容.png │ ├── flink状态一致性-10-两阶段提交(Two-Phase-Commit, 2PC).png │ ├── flink状态一致性-11-2PC对外部sink系统的要求.png │ ├── flink状态一致性-12-不同Source和Sink的一致性保证.png │ ├── flink状态一致性-13-端到端状态一致性的保证.png │ ├── flink状态一致性-14-Exactly-once两阶段提交.png │ ├── flink状态一致性-14-Exactly-once两阶段提交2.png │ ├── flink状态一致性-14-Exactly-once两阶段提交3.png │ ├── flink状态一致性-15-Exactly-once两阶段提交步骤.png │ ├── flink状态一致性-2-什么是状态一致性.png │ ├── flink状态一致性-3-状态一致性分类.png │ ├── flink状态一致性-4-一致性检查点CheckPoints.png │ ├── flink状态一致性-4-一致性检查点CheckPoints2.png │ ├── flink状态一致性-5-端到端(end-to-end)状态一致性.png │ ├── flink状态一致性-6-端到端exactly-once.png │ ├── flink状态一致性-7-幂等写入(Idempotent Writes).png │ ├── flink状态一致性-8-事务写入(Transactional Writes).png │ └── flink状态一致性-9-预写日志(Write-Ahead-Log, WAL).png ├── Flink_思维导图.xmind ├── Flink基础知识.txt ├── flink.docx ├── flink任务提交流程图.png ├── flink作业调度原理.png ├── flink在yarn任务提交流程.png ├── mysql_redis_hdfs_hbase_kafka存储选型.jpg └── 实时数仓架构-new.png ├── input_dir ├── dianshang_3c.txt ├── hello.txt ├── richmap_data.txt └── sensor.txt ├── need_learn ├── Flink当中的CEP.zip └── Flink当中的状态编程.zip ├── pom.xml ├── readme.md └── src └── main ├── java └── com │ ├── es │ ├── constant │ │ └── ConfigConstant.java │ ├── test │ │ ├── ClusterDemo.java │ │ ├── EsDemo.java │ │ └── EsDemoWithESUtil.java │ └── util │ │ ├── ESUtil.java │ │ └── test │ │ ├── CreateDemo.java │ │ ├── ESUtilTest.java │ │ └── InsertDemo.java │ ├── flink_demo │ ├── T01_WordCount.java │ ├── T02_WordCountWithError.java │ ├── T03_KeyedState和OperatorState介绍.txt │ ├── T03_WordCountWithCheckPoint.java │ ├── T04_WordCountWithKafka.java │ ├── T05_WordCountWithKafkaExactlyOnce.java │ ├── T05_config.properties │ ├── T06_WordCountKafkaToEsExactlyOnce.java │ ├── T06_WordCountKafkaToHBaseExactlyOnce.java │ ├── T06_WordCountKafkaToKafkaExactlyOnce.java │ ├── T06_WordCountKafkaToMySQLExactlyOnce.java │ ├── T06_WordCountKafkaToRedisExactlyOnce.java │ └── Z_KafkaProducer两阶段提交重要设计.txt │ ├── flink_official │ ├── T01_WindowWordCount.java │ ├── T02_CountWindowAverageWithState.java │ ├── T03_1_FraudDetector.java │ └── T03_2_FraudDetectionJob.java │ └── lei │ ├── JavaRunFor.java │ ├── apitest │ ├── c00_source │ │ ├── C01_SourceDemo1.java │ │ ├── C02_SourceDemo2.java │ │ ├── C03_TestFileSource.java │ │ └── C04_KafkaSource.java │ ├── c02_transformation │ │ ├── C01_Map_TransformationDemo1.java │ │ ├── C01_RichMap_TransformationDemo1.java │ │ ├── C02_FlatMap_TransformationDemo1.java │ │ ├── C03_Filter_TransformationDemo1.java │ │ ├── C04_KeyByDemo1.java │ │ ├── C05_KeyByDemo2.java │ │ ├── C05_WordCounts.java │ │ ├── C06_KeyByDemo3.java │ │ ├── C06_OrderBean.java │ │ ├── C06_transformation-keyBy方法的使用.png │ │ ├── C07_ReduceDemo.java │ │ ├── C08_MaxDemo.java │ │ ├── C09_FoldDemo.java │ │ └── C09_FoldDemo2.java │ ├── c03_sink │ │ ├── C01_PrintSink.java │ │ ├── C02_AddSinkDemo.java │ │ ├── C03_WriteAsTextSink.java │ │ ├── C04_WriteAsCsvSink.java │ │ ├── C05_Task_SubTask_ChainTest.java │ │ ├── C06_SharingGroupTest.java │ │ ├── C06_深入理解Flink的Task和SubTask-共享资源槽.png │ │ └── C06_深入理解Flink的Task和SubTask-共享资源槽_2.png │ ├── c04_window │ │ ├── C01_CountWindowAll.java │ │ ├── C02_CountWindow.java │ │ ├── C03_TimeWindowAll.java │ │ ├── C04_TimeWindow.java │ │ ├── C05_SlidingWindowAll.java │ │ ├── C06_SlidingWindow.java │ │ ├── C07_SessionWindow.java │ │ ├── C08_EventTimeSessionWindow.java │ │ ├── C09_KafkaSourceEventTimeTumblingWindow.java │ │ ├── C09_SocketSourceEventTimeTumblingWindow.java │ │ ├── C09_【重要】数据源多分区WaterMark,需要所有分区时间达到窗口触发时间.png │ │ ├── C09_【重要】滚动窗口结合EventTime&WaterMark延迟触发任务机制.png │ │ └── C10_SocketSourceEventTimeSlidingWindow.java │ ├── c05_project │ │ ├── C01_DataToActivityBeanFunction.java │ │ ├── C01_QueryActivityName.java │ │ ├── C02_A_HttpClientTest.java │ │ ├── C02_AsynHttpClientV2.java │ │ ├── C02_AsyncGeoToActivityBeanFunction.java │ │ ├── C02_AsyncQueryActivityLocation.java │ │ ├── C02_CloseableHttpAsyncTest.java │ │ ├── C02_SerialHttpClientV1.java │ │ ├── C03_AsyncEsRequest.java │ │ ├── C03_AsyncEsRequest_Test.java │ │ ├── C03_AsyncHttpRequest.java │ │ ├── C03_AsyncHttpRequest_Test.java │ │ ├── C03_AsyncMySQLRequest.java │ │ ├── C03_AsyncMySQLRequest_Test.java │ │ ├── C04_ActivityCount.java │ │ ├── C04_MysqlSink.java │ │ ├── C05_RestartStrategiesDemo.java │ │ ├── C06_StateBackendDemo.java │ │ ├── C06_StateBackendDemo2.java │ │ ├── C06_StateBackendForRocksDB.java │ │ ├── C07_KeyedState和OperatorState介绍.txt │ │ ├── C07_KeyedState和OperatorState介绍演示.png │ │ ├── C07_OperatorStateAndKeyedStateDemo.java │ │ ├── C08_MapWithState.java │ │ ├── C08_MapWithStateV2.java │ │ ├── C09_0_使用OperatorState记录文件偏移量需求.png │ │ ├── C09_1_MyParFileSource.java │ │ ├── C09_2_OperatorStateDemo.java │ │ ├── C09_3_MyExactlyOnceParFileSource.java │ │ ├── C09_4_OperatorStateDemoV2.java │ │ ├── C10_1_KafkaSourceV2.java │ │ ├── C10_2_FlinkKafkaToRedis.java │ │ ├── C10_3_FlinkKafkaToRedis_Stock.java │ │ ├── C10_z_checkpoint_实现ExactlyOnce.png │ │ ├── C11_KafkaProducer两阶段提交重要设计.txt │ │ ├── async │ │ │ ├── C02_AsynHttpClientV2.java │ │ │ └── C02_SerialHttpClientV1.java │ │ ├── domain │ │ │ └── ActivityBean.java │ │ └── mysql.sql │ ├── c06_apps │ │ ├── C01_OrderJoin.java │ │ ├── C02_ActBean.java │ │ ├── C02_ActivityCount.java │ │ ├── C02_ActivityCountAdvBloomFilter.java │ │ ├── C02_conf.properties │ │ ├── C03_BroadcastStateDemo.java │ │ ├── C03_conf.properties │ │ └── pojo │ │ │ ├── OrderDetail.java │ │ │ └── OrderMain.java │ ├── c07_sql_api │ │ ├── C01_StreamSqlWordCount.java │ │ ├── C01_WordCount.java │ │ ├── C02_WordCountSQL.java │ │ ├── C03_TumblingEventTimeWindowsSQL.java │ │ ├── C04_KafkaWordCountSQL.java │ │ ├── C05_IpLocation.java │ │ ├── C05_UDFSQL.java │ │ ├── C06_Split.java │ │ └── C06_UDTFSQL.java │ ├── c08_table_api │ │ ├── C01_StreamWordCountTable.java │ │ ├── C02_WordCountTable.java │ │ └── C03_TumblingEventTimeWindowsTable.java │ ├── util │ │ ├── FlinkUtils.java │ │ ├── FlinkUtilsV1.java │ │ └── MyRedisSink.java │ └── z_other_learn │ │ ├── J01_SourceTest.java │ │ ├── J02_TransformTest.java │ │ ├── J03_NcClient.java │ │ ├── J03_WindowTest.java │ │ ├── J04_ProcessFunctionTest.java │ │ ├── J05_SideOutputTest.java │ │ ├── J06_StateTest.java │ │ ├── J07_CheckPointTest.java │ │ ├── c01_value_state │ │ ├── J01_ValueStateOperate.java │ │ ├── J02_ListStateOperate.java │ │ ├── J03_MapStateOperate.java │ │ ├── J04_ReduceingStateOperate.java │ │ ├── J05_AggregrateStateOperate.java │ │ └── J06_OperatorListState.java │ │ └── c02_cep │ │ ├── J01_CheckIPChangeWithState.java │ │ ├── J02_CheckIpChangeWithCEP.java │ │ ├── J03_FlinkTemperatureCEP.java │ │ └── J04_OrderTimeOutCheckCEP.java │ ├── domain │ ├── J_SensorReading.java │ └── J_User.java │ ├── sinktest │ ├── J01_KafkaSinkTest.java │ ├── J01_KafkaSinkTest_DianShang3C.java │ ├── J02_RedisSinkTest.java │ ├── J03_EsSinkTest.java │ ├── J04_JdbcSinkTest.java │ ├── J05_ClickHouseSinkTest.java │ └── J05_ClickHouseSinkTestByLib.java │ ├── util │ ├── ClickHouseUtil.java │ ├── J_ConfigurationManager.java │ ├── J_MyClickHouseUtil.java │ ├── J_MyEsUtil.java │ ├── J_MyJdbcUtil.java │ ├── J_MyKafkaUtil.java │ └── J_MyRedisUtil.java │ └── wc │ ├── J01_WordCount.java │ └── J02_StreamWordCount.java ├── resources ├── application.conf ├── druid.properties └── log4j.properties └── scala └── com └── lei ├── ScalaRunFor.scala ├── apitest ├── C01_SourceTest.scala ├── C02_TransformTest.scala ├── C03_NcClient.java ├── C03_WindowTest.scala ├── C04_ProcessFunctionTest.scala ├── C05_SideOutputTest.scala ├── C06_StateTest.scala ├── C07_CheckPointTest.scala ├── C08_TableAPI_SQL.scala ├── doit │ └── C01_MapWithState.scala └── z_other_learn │ ├── c01_value_state │ ├── C01_ValueStateOperate.scala │ ├── C02_ListStateOperate.scala │ ├── C03_MapStateOperate.scala │ ├── C04_ReduceingStateOperate.scala │ ├── C05_AggregrateStateOperate.scala │ └── C06_OperatorListState.scala │ └── c02_cep │ ├── C01_CheckIPChangeWithState.scala │ ├── C02_CheckIpChangeWithCEP.scala │ ├── C03_FlinkTemperatureCEP.scala │ └── C04_OrderTimeOutCheckCEP.scala ├── sinktest ├── C01_KafkaSinkTest.scala ├── C02_RedisSinkTest.scala ├── C03_EsSinkTest.scala └── C04_JdbcSinkTest.scala ├── util ├── ConfigurationManager.java ├── MyEsUtil.scala ├── MyJdbcUtil.scala ├── MyKafkaUtil.scala └── MyRedisUtil.scala └── wc ├── C01_WordCount.scala └── C02_StreamWordCount.scala /.gitignore: -------------------------------------------------------------------------------- 1 | /check_point_dir/ 2 | -------------------------------------------------------------------------------- /MyParFile/0.txt: -------------------------------------------------------------------------------- 1 | hello 2 | flink 3 | spark 4 | hadoop 5 | hbase 6 | hive 7 | kafka 8 | newWord 9 | tt8888 10 | 8888 11 | 8888 12 | zzzzzz 13 | 14 | -------------------------------------------------------------------------------- /MyParFile/1.txt: -------------------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 4 | 4 5 | 5 6 | 68888 7 | zzzzzz 8 | zzzzzz 9 | zzzzzz 10 | zzzzzz 11 | zzzzzz 12 | zzzzzz 13 | zzzzzz 14 | zzzzzz 15 | zzzzzz 16 | zzzzzz 17 | zzzzzz 18 | zzzzzz 19 | zzzzzzn 20 | zzzzzzrn 21 | zzzzzz/r/n 22 | zzzzzz 23 | zzzzzz//r//n 24 | zzzzzz 25 | 26 | zzzzzz 27 | zzzzzz 28 | 29 | zzzzzzn 30 | zzzzzz 31 | 32 | -------------------------------------------------------------------------------- /conf/config.properties: -------------------------------------------------------------------------------- 1 | topics=activity10 2 | group.id=group_id_flink 3 | bootstrap.servers=node-01:9092,node-02:9092,node-03:9092 4 | auto.offset.reset=earliest 5 | enable.auto.commit=false 6 | checkpoint-interval=10000 7 | 8 | redis.host=localhost 9 | redis.pwd= 10 | redis.db=8 -------------------------------------------------------------------------------- /conf/config3.properties: -------------------------------------------------------------------------------- 1 | order.detail.topics=orderdetail 2 | order.main.topics=ordermain 3 | order.detail.group.id=order10 4 | order.main.group.id=order10 5 | 6 | bootstrap.servers=node-01:9092,node-02:9092,node-03:9092 7 | auto.offset.reset=earliest 8 | enable.auto.commit=false 9 | checkpoint-interval=10000 10 | 11 | redis.host=localhost 12 | redis.pwd= 13 | redis.db=5 -------------------------------------------------------------------------------- /doc/1.taskManager和Slots/TaskManager和Slots_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/1.taskManager和Slots/TaskManager和Slots_1.png -------------------------------------------------------------------------------- /doc/1.taskManager和Slots/TaskManager和Slots_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/1.taskManager和Slots/TaskManager和Slots_2.png -------------------------------------------------------------------------------- /doc/1.taskManager和Slots/TaskManager和Slots_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/1.taskManager和Slots/TaskManager和Slots_3.png -------------------------------------------------------------------------------- /doc/1.taskManager和Slots/TaskManager和Slots_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/1.taskManager和Slots/TaskManager和Slots_4.png -------------------------------------------------------------------------------- /doc/2.flink并行度/0.flink并行度.textClipping: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/2.flink并行度/0.flink并行度.textClipping -------------------------------------------------------------------------------- /doc/2.flink并行度/flink并行度_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/2.flink并行度/flink并行度_1.png -------------------------------------------------------------------------------- /doc/2.flink并行度/flink并行度_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/2.flink并行度/flink并行度_2.png -------------------------------------------------------------------------------- /doc/2.flink并行度/flink并行度_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/2.flink并行度/flink并行度_3.png -------------------------------------------------------------------------------- /doc/3.flink任务链与数据流/Flink数据流source+transformation+sink_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/3.flink任务链与数据流/Flink数据流source+transformation+sink_1.png -------------------------------------------------------------------------------- /doc/3.flink任务链与数据流/Flink数据流source+transformation+sink_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/3.flink任务链与数据流/Flink数据流source+transformation+sink_2.png -------------------------------------------------------------------------------- /doc/3.flink任务链与数据流/flink任务链_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/3.flink任务链与数据流/flink任务链_1.png -------------------------------------------------------------------------------- /doc/3.flink任务链与数据流/flink任务链_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/3.flink任务链与数据流/flink任务链_2.png -------------------------------------------------------------------------------- /doc/3.flink任务链与数据流/flink图生成流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/3.flink任务链与数据流/flink图生成流程.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_1.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_10-其他可选API.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_10-其他可选API.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_11-API总览.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_11-API总览.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_2.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_3-滚动窗口.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_3-滚动窗口.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_4-滑动窗口.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_4-滑动窗口.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_5-会话窗口.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_5-会话窗口.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_6-windowAPI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_6-windowAPI.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_7-窗口分配器.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_7-窗口分配器.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_8-创建不同类型窗口.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_8-创建不同类型窗口.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_8-创建不同类型窗口2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_8-创建不同类型窗口2.png -------------------------------------------------------------------------------- /doc/4.flink_window操作/flink_window操作_9-窗口函数.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/4.flink_window操作/flink_window操作_9-窗口函数.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__01_主要内容.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__01_主要内容.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__02_时间(Time)语义.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__02_时间(Time)语义.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__03_哪种时间语义更重要.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__03_哪种时间语义更重要.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__03_哪种时间语义更重要2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__03_哪种时间语义更重要2.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__04_在代码中设置Event Time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__04_在代码中设置Event Time.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__05_乱序数据的影响.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__05_乱序数据的影响.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__06_水位线(WaterMark).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__06_水位线(WaterMark).png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__07_水位线(WaterMark)特点.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__07_水位线(WaterMark)特点.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__08_WaterMark的传递.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__08_WaterMark的传递.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入2.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入3.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入4—TimestampAssigner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__09_WaterMark在代码引入4—TimestampAssigner.png -------------------------------------------------------------------------------- /doc/5.时间语义和watermark/时间语义和watermark__10_watermark设定.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/5.时间语义和watermark/时间语义和watermark__10_watermark设定.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-1-主要内容.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-1-主要内容.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-2-Flink 中的状态.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-2-Flink 中的状态.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-2-Flink 中的状态2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-2-Flink 中的状态2.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-3-算子状态.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-3-算子状态.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-4-算子状态数据结构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-4-算子状态数据结构.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-5-键控状态Keyed State.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-5-键控状态Keyed State.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-6-键控状态数据结构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-6-键控状态数据结构.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-7-键控状态的使用.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-7-键控状态的使用.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-8-状态后端.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-8-状态后端.png -------------------------------------------------------------------------------- /doc/6.状态管理/状态管理-9-选择一个状态后端.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/6.状态管理/状态管理-9-选择一个状态后端.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-1-主要内容.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-1-主要内容.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-2-一致性检查点checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-2-一致性检查点checkpoint.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态2.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态3.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态4.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-3-一从检查点恢复状态5.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-4-一检查点的实现算法.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-4-一检查点的实现算法.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法2.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法3.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法4.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法5.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法6.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法7.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-5-Flink检查点算法8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-5-Flink检查点算法8.png -------------------------------------------------------------------------------- /doc/7.容错机制/flink-容错机制-6-保存点(Savepoints).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/7.容错机制/flink-容错机制-6-保存点(Savepoints).png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-1-主要内容.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-1-主要内容.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-10-两阶段提交(Two-Phase-Commit, 2PC).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-10-两阶段提交(Two-Phase-Commit, 2PC).png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-11-2PC对外部sink系统的要求.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-11-2PC对外部sink系统的要求.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-12-不同Source和Sink的一致性保证.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-12-不同Source和Sink的一致性保证.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-13-端到端状态一致性的保证.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-13-端到端状态一致性的保证.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交2.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-14-Exactly-once两阶段提交3.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-15-Exactly-once两阶段提交步骤.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-15-Exactly-once两阶段提交步骤.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-2-什么是状态一致性.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-2-什么是状态一致性.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-3-状态一致性分类.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-3-状态一致性分类.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-4-一致性检查点CheckPoints.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-4-一致性检查点CheckPoints.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-4-一致性检查点CheckPoints2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-4-一致性检查点CheckPoints2.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-5-端到端(end-to-end)状态一致性.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-5-端到端(end-to-end)状态一致性.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-6-端到端exactly-once.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-6-端到端exactly-once.png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-7-幂等写入(Idempotent Writes).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-7-幂等写入(Idempotent Writes).png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-8-事务写入(Transactional Writes).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-8-事务写入(Transactional Writes).png -------------------------------------------------------------------------------- /doc/8.状态一致性/flink状态一致性-9-预写日志(Write-Ahead-Log, WAL).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/8.状态一致性/flink状态一致性-9-预写日志(Write-Ahead-Log, WAL).png -------------------------------------------------------------------------------- /doc/Flink_思维导图.xmind: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/Flink_思维导图.xmind -------------------------------------------------------------------------------- /doc/Flink基础知识.txt: -------------------------------------------------------------------------------- 1 | JobManager,相当于Spark 中的ApplicationMaster中的Driver 2 | 作业管理器,是一个JVM进程,接收作业 3 | 分发执行图到taskManager 4 | 5 | TaskManager,相当于Spark 中的Executor 6 | 任务管理器,是一个JVM进程,执行作业,它可能会在独立的线程上执行一个或多个subtask 7 | Flink中会有多个TaskManager 8 | 每个TaskManager都包含一定数量的插槽slots 9 | 10 | Slots,相当于Spark Executor中的cpu core,相当于工作线程。但不同的是Slots是静态的,Spark中的Executor是动态的 11 | 插槽,推荐值是就当前节点的CPU CORES核心数 12 | 主要是隔离内存 13 | 为了控制一个TaskManager能接收多少个task,TaskManager通过task slot来进行控制(一个TaskManager至少有一个slot) 14 | 15 | 最大并行度 16 | Spark: Executor数 * 每个Executor 中的cpu core 17 | Flink: TaskManager数 * 每个TaskManager 中的Task Slot 18 | 19 | ResourceManger 20 | 资源管理器 21 | 22 | 23 | Dispatcher,在yarn模式下不会创建生成 24 | 分发器,为应用提交提供了REST接口 25 | 提供一个Web UI,展示作业流程 26 | 27 | 28 | Flink中的执行图可以分成四层: 29 | StreamGraph -> JobGraph -> ExecutionGraph -> 物理执行图 30 | StreamGraph: 是根据用户通过Stream API编写的代码生成的最初的图。用来表示程序的拓扑结构。 31 | JobGraph: 【Client上生成】StreamGraph经过优化生成了JobGraph,提交给JobManager的数据结构。主要的优化为,将多个符合条件的节点chain在一起作为一个节点 32 | ExecutionGraph: 【JobManager上生成】JobManager根据JobGraph生成ExecutionGraph。ExecutionGraph是JobGraph的并行化版本,是调度最核心的数据结构。 33 | 物理执行图:【TaskManager运行】 34 | JobManager根据ExecutionGraph对Job进行调度后,在各个TaskManager上部署Task后形成的 "图",并不是一个具体的数据结构 35 | 36 | Flink的Task和SubTask-划分的依据(叫redistribute,相当于spark中的shuffle(宽依赖)): 37 | 1.并行度发生变化;比如:之前并行度为4,现在并行度为2,叫:rebalance 38 | 2.发生keyBy算子,叫HASH 39 | 3.广播 brodcast 40 | 4.算子后面调用了 startNewChain/disableChaining 41 | 42 | 43 | Flink支持数据类型: 44 | 1、基础数据类型,Java和Scala基础数据类型 45 | 2、Java和Scala元组(Tuples) 46 | 3、Scala样例类(case classes) 47 | 4、Java简单对象(POJOs) 48 | 5、其它(Arrays, Lists, Maps, Enums,等等 49 | 50 | Flink支持时间语义: 51 | 1、Event Time: 事件创建的时间 52 | 2、Ingestion Time: 数据进入Flink的时间 53 | 3、Processing Time: 执行操作算子的本地系统时间,与机器无关 54 | 55 | 56 | Spark Streaming Flink 57 | DStream DataStream 58 | Transformation Transformation 59 | Action Sink 60 | Stage Task 61 | Task SubTask 62 | Pipline Oprator chains 63 | DAG DataFlow Graph 64 | Master+Driver JobManager 65 | Worker+Executor TaskManager 66 | 67 | 68 | mvn archetype:generate \ 69 | -DarchetypeGroupId=org.apache.flink \ 70 | -DarchetypeArtifactId=flink-quickstart-java \ 71 | -DarchetypeVersion=1.10.0 72 | 73 | 74 | -------------------------------------------------------------------------------- /doc/flink.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/flink.docx -------------------------------------------------------------------------------- /doc/flink任务提交流程图.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/flink任务提交流程图.png -------------------------------------------------------------------------------- /doc/flink作业调度原理.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/flink作业调度原理.png -------------------------------------------------------------------------------- /doc/flink在yarn任务提交流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/flink在yarn任务提交流程.png -------------------------------------------------------------------------------- /doc/mysql_redis_hdfs_hbase_kafka存储选型.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/mysql_redis_hdfs_hbase_kafka存储选型.jpg -------------------------------------------------------------------------------- /doc/实时数仓架构-new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/doc/实时数仓架构-new.png -------------------------------------------------------------------------------- /input_dir/dianshang_3c.txt: -------------------------------------------------------------------------------- 1 | 1628413821038,上海市,上海市,APP,XiaoMi11,1,3799 2 | 1628413821038,北京市,北京市,APP,iPhone 12,1,6599 3 | 1628413821038,杭州市,浙江省,小程序,HuaWei P50,2,14176 4 | 1628413821038,广州市,广东省,网站,OPPO Find X3,1,5999 5 | 1628413821038,广州市,广东省,网站,OPPO Find X3,1,5999 -------------------------------------------------------------------------------- /input_dir/hello.txt: -------------------------------------------------------------------------------- 1 | hello flink 2 | hello world 3 | hello scala 4 | how are you 5 | fine thank you 6 | and you -------------------------------------------------------------------------------- /input_dir/richmap_data.txt: -------------------------------------------------------------------------------- 1 | 20210101082805 -------------------------------------------------------------------------------- /input_dir/sensor.txt: -------------------------------------------------------------------------------- 1 | sensor_1, 1547718199, 35.8 2 | sensor_2, 1547718201, 18.4 3 | sensor_3, 1547718202, 66.7 4 | sensor_4, 1547718205, 27.1 -------------------------------------------------------------------------------- /need_learn/Flink当中的CEP.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/need_learn/Flink当中的CEP.zip -------------------------------------------------------------------------------- /need_learn/Flink当中的状态编程.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/need_learn/Flink当中的状态编程.zip -------------------------------------------------------------------------------- /src/main/java/com/es/test/ClusterDemo.java: -------------------------------------------------------------------------------- 1 | package com.es.test; 2 | 3 | import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; 4 | import org.elasticsearch.client.transport.TransportClient; 5 | import org.elasticsearch.cluster.health.ClusterHealthStatus; 6 | import org.elasticsearch.cluster.health.ClusterIndexHealth; 7 | import org.elasticsearch.common.settings.Settings; 8 | import org.elasticsearch.common.transport.TransportAddress; 9 | import org.elasticsearch.transport.client.PreBuiltTransportClient; 10 | import org.junit.Test; 11 | 12 | import java.net.InetAddress; 13 | 14 | public class ClusterDemo { 15 | 16 | @Test 17 | public void test1() throws Exception { 18 | 19 | // 指定ES集群 20 | // 在配置文件vi /opt/elasticsearch-6.2.4/config/elasticsearch.yml 21 | // cluster.name: my-application (需要打开) 22 | // node.name: node-1 (需要打开) 23 | Settings settings = Settings.builder().put("cluster.name", "my-application").build(); 24 | // 创建访问es服务器的客户端 25 | TransportClient client = new PreBuiltTransportClient(settings) 26 | .addTransportAddress(new TransportAddress(InetAddress.getByName("172.19.125.190"), 9300)); 27 | 28 | 29 | ClusterHealthResponse healths = client.admin().cluster().prepareHealth().get(); 30 | String clusterName = healths.getClusterName(); 31 | System.out.println("clusterName=" + clusterName); 32 | 33 | int numberOfDataNodes = healths.getNumberOfDataNodes(); 34 | System.out.println("numberOfDataNodes=" + numberOfDataNodes); 35 | 36 | int numberOfNodes = healths.getNumberOfNodes(); 37 | System.out.println("numberOfNodes=" + numberOfNodes); 38 | 39 | for (ClusterIndexHealth health : healths.getIndices().values()) { 40 | String index = health.getIndex(); 41 | int numberOfShards = health.getNumberOfShards(); 42 | int numberOfReplicas = health.getNumberOfReplicas(); 43 | System.out.printf("index=%s, numberOfShards=%d, numberOfReplicas=%d\n", index, numberOfShards, numberOfReplicas); 44 | 45 | ClusterHealthStatus status = health.getStatus(); 46 | System.out.println(status.toString()); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/es/util/test/InsertDemo.java: -------------------------------------------------------------------------------- 1 | package com.es.util.test; 2 | 3 | import com.es.util.ESUtil; 4 | import org.elasticsearch.action.get.GetResponse; 5 | import org.elasticsearch.common.xcontent.XContentBuilder; 6 | 7 | import java.io.IOException; 8 | 9 | import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; 10 | 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: 2020-03-23 10:44 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | // 3.接下来,以两种方式插入文档到blog: 22 | 23 | public class InsertDemo { 24 | public static void main(String[] args) throws IOException { 25 | // 方式一 26 | // {"id":"1","title":"Java連接ES","content":"abcdefg。","postdate":"2019-03-24 14:38:00","url":"bas.com"} 27 | String json = "{" + 28 | "\"id\":\"1\"," + 29 | "\"title\":\"Java連接ES\"," + 30 | "\"content\":\"abcdefg。\"," + 31 | "\"postdate\":\"2019-03-24 14:38:00\"," + 32 | "\"url\":\"bas.com\"" + 33 | "}"; 34 | System.out.println(json); 35 | 36 | ESUtil.insertDocument("app_account", "blog", json); 37 | 38 | // 方式二 39 | XContentBuilder doc = jsonBuilder() 40 | .startObject() 41 | .field("id","2") 42 | .field("title","Java插入数据到ES") 43 | .field("content","abcedfasdasd") 44 | .field("postdate","2019-03-24 14:38:00") 45 | .field("url","bas.com") 46 | .endObject(); 47 | ESUtil.insertDocument("app_account", "blog", doc); 48 | 49 | /** 50 | * 上述是插入数据,接下来进行数据的查询 51 | * GET /app_account/blog/_search 52 | */ 53 | 54 | GetResponse response = ESUtil.selectDocument("app_account", "blog", "1"); 55 | } 56 | } -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T01_WordCount.java: -------------------------------------------------------------------------------- 1 | package com.flink_demo; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.util.Collector; 9 | 10 | import java.util.Arrays; 11 | import java.util.stream.Stream; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: Created in 5:04 上午 2020/7/6 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | 22 | /* 23 | Flink 普通版WordCount 24 | */ 25 | public class T01_WordCount { 26 | public static void main(String[] args) throws Exception { 27 | // 获取flink执行环境 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | // 加载数据源 31 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 32 | 33 | // 数据分析、转换、计算 34 | SingleOutputStreamOperator> sum = lines.flatMap(new FlatMapFunction>() { 35 | @Override 36 | public void flatMap(String line, Collector> out) throws Exception { 37 | String[] words = line.split(" "); 38 | for (String word : words) { 39 | out.collect(Tuple2.of(word, 1)); 40 | } 41 | } 42 | }).keyBy(0).sum(1); 43 | 44 | // 数据输出 45 | sum.print(); 46 | 47 | // 任务执行 48 | env.execute("T01_WordCount"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T02_WordCountWithError.java: -------------------------------------------------------------------------------- 1 | package com.flink_demo; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 5:04 上午 2020/7/6 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | /* 20 | Flink 带异常版WordCount 21 | */ 22 | public class T02_WordCountWithError { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | // source 27 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 28 | 29 | // transformation 30 | SingleOutputStreamOperator> summed = lines.flatMap(new FlatMapFunction>() { 31 | @Override 32 | public void flatMap(String line, Collector> collector) throws Exception { 33 | if (line.startsWith("null") ) { 34 | throw new RuntimeException("输入为null,发生异常"); 35 | } 36 | 37 | String[] words = line.split(" "); 38 | for (String word : words) { 39 | collector.collect(Tuple2.of(word, 1)); 40 | } 41 | } 42 | }).keyBy(0).sum(1); 43 | 44 | // sink 45 | summed.print(); 46 | 47 | env.execute("T02_WordCountWithError"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T03_KeyedState和OperatorState介绍.txt: -------------------------------------------------------------------------------- 1 | Flink的State 2 | ---------------------- 3 | Flink架构体系的一大特性是:有状态计算 4 | 1.有状态计算:在程序内部存储计算产生的中间结果,并提供给后续的Function或算子计算结果使用。 5 | 2.状态:与时间相关的任务内部数据(计算数据和元数据属性)的快照,在计算过程中会进行持久化 6 | 7 | 作用: 8 | 每次计算需要基于上一次计算结果,所以需要通过State将每次计算的中间结果进行持久化 9 | 出现错误需要从成功的检查点进行State的恢复 10 | 增量计算,Failover这些机制都需要state的支撑 11 | 12 | 存储实现 13 | 基于内存的HeapStateBackend - 在debug模式使用,不建议在生产模式下应用; 14 | 基于HDFS的FsStateBackend - 分布式文件持久化,每次读写都产生网络IO,整体性能不佳; 15 | 基于RocksDB的RocksDBStateBackend - 本地文件 + 异步HDFS持久化; 16 | 还有一个是基于Niagara(Alibaba内部实现)NiagaraStateBackend - 分布式持久化 - 在Alibaba生产环境应用: 17 | 18 | State的分类【按照是否根据key进行分区分为:KeyedState和OperatorState】 19 | 1.KeyedState 20 | 表示与key相关的一种state,只能用于KeyedStream类型数据集对应的Functions和Operators之上 21 | 基中的key是我们在SQL语句中对应的GroupBy、PartitionBy里面的字段 22 | KeyedState 是Operator 的特例,区别是KeyedState 事先按照key对数据集进行分区。每个key state 23 | 仅对应一个Operator和Key的组合 24 | 25 | OperatorState 26 | 只与算子实例绑定,每个算子实例中持有所有数据元素中的一部分状态数据 27 | 28 | (3)两者都支持并行度发生变化时,进行状态数据的重新分配 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T03_WordCountWithCheckPoint.java: -------------------------------------------------------------------------------- 1 | package com.flink_demo; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 5:04 上午 2020/7/6 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | Flink 使用状态后端保存异常时业务状态信息,处理异常版WordCount 23 | */ 24 | public class T03_WordCountWithCheckPoint { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | // 只有开启了checkpointing 才会有重启策略,默认保存到JobManager中的内存中 29 | env.enableCheckpointing(5000); // 开启,检查点周期,单位毫秒;默认是-1,不开启 30 | 31 | // 默认的重启策略是固定延迟无限重启 32 | //env.getConfig().setRestartStrategy(RestartStrategies.fallBackRestart()); 33 | // 设置固定延迟固定次数重启 34 | env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000)); 35 | // 设置checkpoint策略,为本地文件存储;默认内存存储; 生产环境建议使用hdfs分布式文件存储且配置在flink-conf.yaml文件中 36 | env.setStateBackend(new FsStateBackend("file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir")); 37 | 38 | // source 39 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 40 | 41 | // transformation 42 | SingleOutputStreamOperator> summed = lines.flatMap(new FlatMapFunction>() { 43 | @Override 44 | public void flatMap(String line, Collector> collector) throws Exception { 45 | if (line.startsWith("null") ) { 46 | throw new RuntimeException("输入为null,发生异常"); 47 | } 48 | 49 | String[] words = line.split(" "); 50 | for (String word : words) { 51 | collector.collect(Tuple2.of(word, 1)); 52 | } 53 | } 54 | }).keyBy(0).sum(1); 55 | 56 | // sink 57 | summed.print(); 58 | 59 | env.execute("T03_WordCountWithCheckPoint"); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T05_WordCountWithKafkaExactlyOnce.java: -------------------------------------------------------------------------------- 1 | package com.flink_demo; 2 | 3 | import com.lei.apitest.util.FlinkUtils; 4 | import org.apache.flink.api.common.functions.MapFunction; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.api.java.utils.ParameterTool; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: 2020-06-10 14:43 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | /* 20 | 21 | 22 | */ 23 | 24 | public class T05_WordCountWithKafkaExactlyOnce { 25 | public static void main(String[] args) throws Exception { 26 | 27 | /*ParameterTool parameters = ParameterTool.fromArgs(args); 28 | */ 29 | 30 | // 上述方式可以将传入的配置参数读取,但是在实际生产环境最佳放在指定的文件中 31 | // 而不是放在maven 项目的resources中,因为打完包后,想要修改就不太方便了 32 | // Flink 官方最佳实践 33 | ParameterTool parameters = ParameterTool.fromPropertiesFile(args[0]); 34 | // # topic:activity10 分区3,副本2 35 | // # 创建topic 36 | // kafka-topics --create --zookeeper node-01:2181,node-02:2181,node-03:2181 --replication-factor 2 --partitions 3 --topic activity10 37 | // 38 | // # 创建生产者 39 | // kafka-console-producer --broker-list node-01:9092,node-02:9092,node-03:9092 --topic activity10 40 | 41 | DataStream lines = FlinkUtils.createKafkaStream(parameters, SimpleStringSchema.class); 42 | 43 | SingleOutputStreamOperator> wordAndOne = lines.map(new MapFunction>() { 44 | @Override 45 | public Tuple2 map(String word) throws Exception { 46 | if (word.startsWith("null")) { 47 | throw new RuntimeException("输入为null,发生异常"); 48 | } 49 | return Tuple2.of(word, 1); 50 | } 51 | }); 52 | 53 | SingleOutputStreamOperator> summed = wordAndOne.keyBy(0).sum(1); 54 | 55 | summed.print(); 56 | 57 | FlinkUtils.getEnv().execute("T04_WordCountPro"); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/T05_config.properties: -------------------------------------------------------------------------------- 1 | topics=activity10 2 | group.id=group_id_flink 3 | bootstrap.servers=node-01:9092,node-02:9092,node-03:9092 4 | auto.offset.reset=earliest 5 | enable.auto.commit=false 6 | checkpoint-interval=10000 7 | 8 | redis.host=localhost 9 | redis.pwd= 10 | redis.db=5 -------------------------------------------------------------------------------- /src/main/java/com/flink_demo/Z_KafkaProducer两阶段提交重要设计.txt: -------------------------------------------------------------------------------- 1 | 2 | 首先Kafka在0.11+版本后,支持事务,就是可以预先提交;如果后续成功操作,再次提交确认信息即可,以达到避免脏数据问题 3 | 4 | 保证事务提交和checkpoint同时成功 5 | 6 | 1.在checkpoint前预先提交 7 | 2.如果checkpoint成功,再次提交;如果失败,回滚 8 | 9 | 两阶段提交与checkpoint联动;实现CheckpointedFunction接口snapshotState 10 | 在snapshotState方法中,调用preCommit方法 11 | 12 | 等所有subTask都成功了,TaskManager通知JobManager,JobManager发送notifyCheckpointComplete指令 13 | 在TwoPhaseCommitSinkFunction类的notifyCheckpointComplete方法中,调用了最终的commit提交 14 | 15 | 在FlinkKafkaProducer类中commit方法,最终调用producer.commitTransaction 16 | 17 | =============================================================== 18 | 19 | FlinkKafkaProducer 正是借鉴了此设计,采用两阶段提交达到精确Exactly-Once 20 | 21 | public class FlinkKafkaProducer 22 | extends TwoPhaseCommitSinkFunction { 23 | protected abstract void invoke(TXN transaction, IN value, Context context) throws Exception; 24 | 25 | protected abstract void preCommit(TXN transaction) throws Exception; 26 | 27 | // 5.KafkaProducer被两阶段调用commit完成二次提交 28 | protected abstract void commit(TXN transaction); 29 | } 30 | 31 | 32 | TwoPhaseCommitSinkFunction与CheckpointedFunction联动 33 | 34 | public abstract class TwoPhaseCommitSinkFunction 35 | extends RichSinkFunction 36 | implements CheckpointedFunction, CheckpointListener{ 37 | 38 | protected abstract void invoke(TXN transaction, IN value, Context context) throws Exception; 39 | 40 | protected abstract void preCommit(TXN transaction) throws Exception; 41 | 42 | protected abstract void commit(TXN transaction); 43 | 44 | // 4.KafkaProducer 的两阶段提交就会收到:notifyCheckpointComplete,并调用commit 45 | @Override 46 | public final void notifyCheckpointComplete(long checkpointId) throws Exception { 47 | commit(pendingTransaction.handle); 48 | } 49 | } 50 | 51 | 52 | public interface CheckpointListener { 53 | // 1.所有subTask向JobManager发送已经进行预提交 54 | // 2.JobManager收集所有subTask上送过来的信息,如果已经收到所有subTask都已经完成的预提交 55 | // 3.JobManager就会向所以subTask发送:通知CheckPoint完成指令 56 | 57 | void notifyCheckpointComplete(long var1) throws Exception; 58 | } 59 | 60 | 61 | public interface CheckpointedFunction { 62 | // 定期将指定的状态数据保存到StateBackend中;由JobManager触发 63 | void snapshotState(FunctionSnapshotContext context) throws Exception; 64 | 65 | // 初始化OperatorState,生命周期方法,构造方法执行后执行一次; 初始化状态或获取历史状态 66 | void initializeState(FunctionInitializationContext context) throws Exception; 67 | 68 | } -------------------------------------------------------------------------------- /src/main/java/com/flink_official/T01_WindowWordCount.java: -------------------------------------------------------------------------------- 1 | package com.flink_official; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.api.windowing.time.Time; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: 2020-07-01 14:41 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | // 官方示例1 20 | // 以下程序是流式窗口字数统计应用程序的一个完整的工作示例,该程序在5秒的窗口中统计来自Web套接字的字数。 21 | 22 | public class T01_WindowWordCount { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | DataStream> dataStream = env.socketTextStream("localhost", 9999) 27 | .flatMap(new Splitter()) 28 | .keyBy(0) 29 | .timeWindow(Time.seconds(5)) 30 | .sum(1); 31 | 32 | dataStream.print(); 33 | 34 | env.execute("T01_WindowWordCount"); 35 | } 36 | 37 | private static class Splitter implements FlatMapFunction> { 38 | @Override 39 | public void flatMap(String sentence, Collector> out) throws Exception { 40 | for (String word : sentence.split(" ")) { 41 | out.collect(new Tuple2<>(word, 1)); 42 | } 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/flink_official/T03_2_FraudDetectionJob.java: -------------------------------------------------------------------------------- 1 | package com.flink_official; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStream; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.walkthrough.common.sink.AlertSink; 6 | import org.apache.flink.walkthrough.common.entity.Alert; 7 | import org.apache.flink.walkthrough.common.entity.Transaction; 8 | import org.apache.flink.walkthrough.common.source.TransactionSource; 9 | 10 | public class T03_2_FraudDetectionJob { 11 | 12 | public static void main(String[] args) throws Exception { 13 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 14 | 15 | DataStream transactions = env 16 | .addSource(new TransactionSource()) 17 | .name("transactions"); 18 | 19 | DataStream alerts = transactions 20 | .keyBy(Transaction::getAccountId) 21 | .process(new T03_1_FraudDetector()) 22 | .name("fraud-detector"); 23 | 24 | alerts 25 | .addSink(new AlertSink()) 26 | .name("send-alerts"); 27 | alerts.print(); 28 | 29 | env.execute("Fraud Detection"); 30 | } 31 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/JavaRunFor.java: -------------------------------------------------------------------------------- 1 | package com.lei; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: 2020-06-03 9:46 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | public class JavaRunFor { 12 | public static void main(String[] args) { 13 | long start = System.currentTimeMillis(); 14 | 15 | int t = 0; 16 | for (int i= 0; i<= 100000;i++) { 17 | t += i; 18 | } 19 | long end = System.currentTimeMillis(); 20 | System.out.println(end-start); 21 | System.out.println(t); 22 | // 1 23 | // 705082704 24 | //scala中的for比while循环慢很多。在代码优化中可以想到在此优化。 25 | 26 | //还有其他的测试总结: 用java代码和scala代码,对比同一个算法,发现java比scala快很多。执行的快慢应该主要看scala编译成字节码的质量了。 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c00_source/C01_SourceDemo1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c00_source; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | import java.util.Arrays; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: Created in 10:45 上午 2020/6/6 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | /* 21 | 并行度为1的source 22 | */ 23 | public class C01_SourceDemo1 { 24 | public static void main(String[] args) throws Exception { 25 | // 实时计算,创建一个实时的执行环境 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | // 创建抽象的数据集【创建原始的抽象数据集的方法:Source】 29 | // DataStream是一个抽象的数据集 30 | DataStream socketTextStream = env.socketTextStream("localhost", 7777); 31 | 32 | int parallelism2 = socketTextStream.getParallelism(); 33 | System.out.println("+++++>" + parallelism2); 34 | 35 | // 将客户端的集后并行化成一个抽象的数据集,通常是用来做测试和实验 36 | // fromElements是一个有界的数据量,虽然是一个实时计算程序,但是数据处理完,程序就会退出 37 | //DataStream nums = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8, 9); 38 | 39 | // 并行度为1的source 40 | DataStream nums = env.fromCollection(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9)); 41 | 42 | // 获取这个DataStream的并行度 43 | int parallelism = nums.getParallelism(); 44 | System.out.println("=====>" + parallelism); 45 | 46 | SingleOutputStreamOperator filtered = nums.filter(new FilterFunction() { 47 | @Override 48 | public boolean filter(Integer integer) throws Exception { 49 | return integer % 2 == 0; 50 | } 51 | }).setParallelism(8); 52 | 53 | int parallelism1 = filtered.getParallelism(); 54 | System.out.println("&&&&&>" + parallelism1); 55 | 56 | filtered.print(); 57 | 58 | env.execute("SourceDemo1"); 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c00_source/C02_SourceDemo2.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c00_source; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.NumberSequenceIterator; 10 | 11 | import java.util.Arrays; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: Created in 10:45 上午 2020/6/6 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | 22 | /* 23 | 可以并行的source,即并行度大于1的Source 24 | */ 25 | public class C02_SourceDemo2 { 26 | public static void main(String[] args) throws Exception { 27 | // 实时计算,创建一个实时的执行环境 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | //DataStreamSource nums = env.fromParallelCollection(new NumberSequenceIterator(1, 10), Long.class); 31 | //DataStreamSource nums = env.fromParallelCollection(new NumberSequenceIterator(1, 10), TypeInformation.of(Long.TYPE)); 32 | DataStreamSource nums = env.generateSequence(1, 10); 33 | 34 | // 如果没有设置的话,并行度就是当前机器的逻辑核数 35 | int parallelism = nums.getParallelism(); 36 | System.out.println("+++++>" + parallelism); 37 | 38 | SingleOutputStreamOperator filtered = nums.filter(new FilterFunction() { 39 | @Override 40 | public boolean filter(Long integer) throws Exception { 41 | return integer % 2 == 0; 42 | } 43 | }).setParallelism(3); 44 | 45 | int parallelism1 = filtered.getParallelism(); 46 | System.out.println("&&&&&>" + parallelism1); 47 | 48 | filtered.print(); 49 | 50 | env.execute("SourceDemo1"); 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c00_source/C03_TestFileSource.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c00_source; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.util.Collector; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 11:31 上午 2020/6/6 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | /* 19 | 从文件夹加载数据,运行完后程序就会停止 20 | 数据源多并行度 21 | */ 22 | public class C03_TestFileSource { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | DataStreamSource lines = env.readTextFile("input_dir"); 27 | 28 | int parallelism = lines.getParallelism(); 29 | System.out.println("+++++>" + parallelism); 30 | 31 | SingleOutputStreamOperator> words = lines.flatMap(new FlatMapFunction>() { 32 | @Override 33 | public void flatMap(String line, Collector> out) throws Exception { 34 | String[] words = line.split(" "); 35 | for (String word : words) { 36 | out.collect(Tuple2.of(word, 1)); 37 | } 38 | } 39 | }); 40 | 41 | int parallelism1 = words.getParallelism(); 42 | System.out.println("+++++>" + parallelism1); 43 | 44 | SingleOutputStreamOperator> summed = words.keyBy(0).sum(1); 45 | summed.print(); 46 | 47 | env.execute("C03_TestFileSource"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c00_source/C04_KafkaSource.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c00_source; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 9 | 10 | import java.util.Properties; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 11:46 上午 2020/6/6 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | 从kafka中读取数据的Source,可以并行的Source,并且可以实现ExactlyOnce 23 | */ 24 | public class C04_KafkaSource { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | Properties props = new Properties(); 29 | 30 | // activity10 group_id_flink node-01:9092,node-02:9092,node-03:9092 31 | // 指定Kafka的Broker地址 32 | props.setProperty("bootstrap.servers", "node-01:9092,node-02:9092,node-03:9092"); 33 | // 提定组ID 34 | props.setProperty("group.id", "group_id_flink"); 35 | // 如果没有记录偏移量,第一次从开始消费 36 | props.setProperty("auto.offset.reset", "earliest"); 37 | props.setProperty("key,deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 38 | props.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | // kafka的消费者不自动提交偏移量,默认kafka自动提交offset,且保存在__consumer_offsets 40 | // props.setProperty("enable.auto.commit", "false"); 41 | 42 | // kafkaSource 43 | FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011<>( 44 | "activity10", 45 | new SimpleStringSchema(), // 序列化与反序列化方式 46 | props); 47 | 48 | // Source 49 | DataStream lines = env.addSource(kafkaSource); 50 | 51 | 52 | // Sink 53 | lines.print(); 54 | 55 | env.execute("C03_KafkaSource"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C01_RichMap_TransformationDemo1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.RichMapFunction; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | 10 | import java.time.LocalDateTime; 11 | import java.time.ZoneOffset; 12 | import java.time.format.DateTimeFormatter; 13 | 14 | /* 15 | 对DataStream进行操作,返回一个新的DataStream 16 | 17 | */ 18 | public class C01_RichMap_TransformationDemo1 { 19 | public static void main(String[] args) throws Exception { 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | 22 | DataStreamSource lines = env.readTextFile("input_dir/richmap_data.txt"); 23 | 24 | 25 | // 方式三:传入功能更加强大的RichMapFunction 26 | // 使用RichXXX_Function,里面含有open,close方法,比如后续读取数据库的前后操作就可以使用open,close 27 | SingleOutputStreamOperator map = lines.map( 28 | new RichMapFunction() { 29 | // open,在构造方法之后,map方法执行之前,执行一次,Configuration可以拿到全局配置 30 | // 用来初始化一下连接,或者初始化或恢复state 31 | private transient DateTimeFormatter dtf = null; 32 | 33 | @Override 34 | public void open(Configuration parameters) throws Exception { 35 | super.open(parameters); 36 | dtf = DateTimeFormatter.ofPattern("yyyyMMddHHmmss"); 37 | } 38 | 39 | @Override 40 | public String map(String value) throws Exception { 41 | LocalDateTime ldt = LocalDateTime.parse(value, dtf); 42 | long toEpochSecond = ldt.toEpochSecond(ZoneOffset.of("+8")); 43 | 44 | System.out.println(toEpochSecond); 45 | long changeSecond = toEpochSecond + 8 * 60 * 60; 46 | System.out.println(changeSecond); 47 | return changeSecond + ""; 48 | } 49 | // 销毁之前,执行一次,通常是做资源释放 50 | @Override 51 | public void close() throws Exception { 52 | super.close(); 53 | } 54 | }); 55 | 56 | System.out.println("=================================="); 57 | map.print(); 58 | 59 | env.execute("C01_TransformationDemo1"); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C02_FlatMap_TransformationDemo1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.api.common.typeinfo.Types; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | import java.util.Arrays; 13 | 14 | /** 15 | * @Author: Lei 16 | * @E-mail: 843291011@qq.com 17 | * @Date: Created in 4:56 下午 2020/6/6 18 | * @Version: 1.0 19 | * @Modified By: 20 | * @Description: 21 | */ 22 | 23 | /* 24 | 对DataStream进行操作,返回一个新的DataStream 25 | 26 | */ 27 | public class C02_FlatMap_TransformationDemo1 { 28 | public static void main(String[] args) throws Exception { 29 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 30 | 31 | DataStreamSource lines = env.fromElements("spark flink hadoop", "spark flink hbase"); 32 | 33 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 34 | @Override 35 | public void flatMap(String line, Collector collector) throws Exception { 36 | /*String[] words = line.split(" "); 37 | for (String word : words) { 38 | collector.collect(word); 39 | }*/ 40 | 41 | //Arrays.asList(line.split(" ")).forEach(w -> collector.collect(w)); 42 | Arrays.stream(line.split(" ")).forEach(collector::collect); // 推荐使用这种方式编写代码,简洁 43 | } 44 | }); 45 | 46 | SingleOutputStreamOperator words2 = lines.flatMap((String line, Collector out) -> 47 | Arrays.stream(line.split(" ")).forEach(out::collect)).returns(Types.STRING); 48 | 49 | // flatMap方法还可以传入RichFlatMapFunction 50 | 51 | // Sink 52 | words2.print(); 53 | 54 | env.execute("C02_FlatMap_TransformationDemo1"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C03_Filter_TransformationDemo1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.typeinfo.Types; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.Collector; 10 | 11 | import java.util.Arrays; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: Created in 4:56 下午 2020/6/6 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | 22 | /* 23 | 对DataStream进行操作,返回一个新的DataStream 24 | 25 | */ 26 | public class C03_Filter_TransformationDemo1 { 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | DataStreamSource nums = env.fromElements(1,2,3,4,5,6,7,8,9); 31 | 32 | SingleOutputStreamOperator odd = nums.filter(new FilterFunction() { 33 | @Override 34 | public boolean filter(Integer integer) throws Exception { 35 | return integer % 2 != 0; 36 | } 37 | }); 38 | 39 | // lambda表达式 40 | // SingleOutputStreamOperator filtered = nums.filter(i -> i >= 5); 41 | // 如果lambda表达式比较复杂,需要添加{},同时,添加return 42 | SingleOutputStreamOperator filtered = nums.filter(i -> { 43 | return i >= 5; 44 | }); 45 | 46 | // Sink 47 | filtered.print(); 48 | 49 | 50 | env.execute("C02_FlatMap_TransformationDemo1"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C04_KeyByDemo1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.typeinfo.Types; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.slf4j.LoggerFactory; 11 | import org.slf4j.Logger; 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 5:51 下午 2020/6/6 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | keyBy是shuffle算子 23 | 在Flink中叫redistrute 24 | */ 25 | public class C04_KeyByDemo1 { 26 | 27 | private static Logger LOG = LoggerFactory.getLogger(C04_KeyByDemo1.class); 28 | 29 | public static void main(String[] args) throws Exception { 30 | LOG.error("This message contains {} placeholders. {}", 1, "error"); 31 | LOG.warn("This message contains {} placeholders. {}", 2, "warn"); 32 | LOG.debug("This message contains {} placeholders. {}", 3, "debug"); 33 | LOG.info("This message contains {} placeholders. {}", 4, "info"); 34 | LOG.trace("This message contains {} placeholders. {}", 5, "trace"); 35 | 36 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 37 | 38 | // 直接输入的就是单词 39 | DataStreamSource words = env.socketTextStream("localhost", 7777); 40 | 41 | SingleOutputStreamOperator> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING, Types.INT)); 42 | 43 | // 在java,认为元素是一个特殊的集合,脚标是从0开始;因为Flink底层源码是java编写的 44 | KeyedStream, Tuple> keyed = wordAndOne.keyBy(0); 45 | 46 | keyed.print(); 47 | 48 | env.execute("C04_KeyByDemo1"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C05_KeyByDemo2.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.KeyedStream; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: Created in 5:51 下午 2020/6/6 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | /* 21 | keyBy是shuffle算子 22 | 在Flink中叫redistrute 23 | */ 24 | public class C05_KeyByDemo2 { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | 28 | // 直接输入的就是单词 29 | DataStreamSource words = env.socketTextStream("localhost", 7777); 30 | 31 | SingleOutputStreamOperator wordAndOne = words.flatMap(new FlatMapFunction() { 32 | @Override 33 | public void flatMap(String value, Collector collector) throws Exception { 34 | collector.collect(new C05_WordCounts(value, 1L)); 35 | } 36 | }); 37 | 38 | // 在java,认为元素是一个特殊的集合,脚标是从0开始;因为Flink底层源码是java编写的 39 | //KeyedStream keyed = wordAndOne.keyBy(t -> t.getWord()); 40 | KeyedStream keyed = wordAndOne.keyBy("word"); 41 | 42 | // 聚合 43 | SingleOutputStreamOperator sumed = keyed.sum("counts"); 44 | 45 | sumed.print(); 46 | 47 | env.execute("C04_KeyByDemo1"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C05_WordCounts.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: Created in 6:53 上午 2020/6/7 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | 12 | // 封装数据的Bean 13 | 14 | public class C05_WordCounts { 15 | public String word; 16 | public Long counts; 17 | 18 | public C05_WordCounts() { 19 | } 20 | 21 | public C05_WordCounts(String word, Long counts) { 22 | this.word = word; 23 | this.counts = counts; 24 | } 25 | 26 | public static C05_WordCounts of(String word, Long counts){ 27 | return new C05_WordCounts(word, counts); 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "C05_WordCounts{" + 33 | "word='" + word + '\'' + 34 | ", counts=" + counts + 35 | '}'; 36 | } 37 | 38 | /*private String word; 39 | private Long counts; 40 | 41 | // 如果提供了有参构造器,一定要提供一个无参构造器,要不以后反射会出问题 42 | public C05_WordCounts() { 43 | } 44 | 45 | public C05_WordCounts(String word, Long counts) { 46 | this.word = word; 47 | this.counts = counts; 48 | } 49 | 50 | public String getWord() { 51 | return word; 52 | } 53 | 54 | public void setWord(String word) { 55 | this.word = word; 56 | } 57 | 58 | public Long getCounts() { 59 | return counts; 60 | } 61 | 62 | public void setCounts(Long counts) { 63 | this.counts = counts; 64 | } 65 | 66 | @Override 67 | public String toString() { 68 | return "C05_WordCounts{" + 69 | "word='" + word + '\'' + 70 | ", counts=" + counts + 71 | '}'; 72 | }*/ 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C06_OrderBean.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: Created in 7:23 上午 2020/6/7 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | public class C06_OrderBean { 12 | 13 | public String province; 14 | public String city; 15 | public Double money; 16 | 17 | public C06_OrderBean() { 18 | } 19 | 20 | public C06_OrderBean(String province, String city, Double money) { 21 | this.province = province; 22 | this.city = city; 23 | this.money = money; 24 | } 25 | 26 | public static C06_OrderBean of(String province, String city, Double money) { 27 | return new C06_OrderBean(province, city, money); 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | return "C06_OrderBean{" + 33 | "province='" + province + '\'' + 34 | ", city='" + city + '\'' + 35 | ", money=" + money + 36 | '}'; 37 | } 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C06_transformation-keyBy方法的使用.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c02_transformation/C06_transformation-keyBy方法的使用.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C07_ReduceDemo.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.ReduceFunction; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.api.java.tuple.Tuple; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.KeyedStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 5:51 下午 2020/6/6 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | keyBy是shuffle算子 23 | 在Flink中叫redistrute 24 | */ 25 | public class C07_ReduceDemo { 26 | public static void main(String[] args) throws Exception { 27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | 29 | // 直接输入的就是单词 30 | DataStreamSource words = env.socketTextStream("localhost", 7777); 31 | 32 | SingleOutputStreamOperator> wordAndOne = words.map(w -> Tuple2.of(w, 1)).returns(Types.TUPLE(Types.STRING, Types.INT)); 33 | 34 | // 在java,认为元素是一个特殊的集合,脚标是从0开始;因为Flink底层源码是java编写的 35 | KeyedStream, Tuple> keyed = wordAndOne.keyBy(0); 36 | 37 | SingleOutputStreamOperator> reduced = keyed.reduce(new ReduceFunction>() { 38 | @Override 39 | public Tuple2 reduce(Tuple2 v1, Tuple2 v2) throws Exception { 40 | //return Tuple2.of(v1.f0, v1.f1 + v2.f1); 41 | v1.f1 = v1.f1 + v2.f1; 42 | return v1; 43 | } 44 | }); 45 | 46 | reduced.print(); 47 | 48 | env.execute("C07_ReduceDemo"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C08_MaxDemo.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: Created in 7:54 上午 2020/6/7 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | // 取当最当前key最大值 20 | public class C08_MaxDemo { 21 | public static void main(String[] args) throws Exception { 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | // spark,10 25 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 26 | 27 | SingleOutputStreamOperator> wordAndNum = lines.map(new MapFunction>() { 28 | @Override 29 | public Tuple2 map(String line) throws Exception { 30 | String[] fields = line.split(","); 31 | String word = fields[0]; 32 | int num = Integer.parseInt(fields[1]); 33 | return Tuple2.of(word, num); 34 | } 35 | }); 36 | 37 | KeyedStream, Tuple> keyed = wordAndNum.keyBy(0); 38 | SingleOutputStreamOperator> res = keyed.max(1); 39 | 40 | res.print(); 41 | 42 | env.execute("C08_MaxDemo"); 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c02_transformation/C09_FoldDemo2.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c02_transformation; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FoldFunction; 5 | import org.apache.flink.api.common.typeinfo.Types; 6 | import org.apache.flink.api.java.tuple.Tuple; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.KeyedStream; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: Created in 11:02 上午 2020/6/7 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | // Fold 算子适合需要从指定数据开始累计的场景;本示例是实现计数,且计数从1000开始 22 | public class C09_FoldDemo2 { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | // 直接输入的就是单词 27 | DataStreamSource words = env.socketTextStream("localhost", 7777); 28 | 29 | SingleOutputStreamOperator> wordAndOne = words.map(w -> Tuple2.of(w, 1)) 30 | .returns(Types.TUPLE(Types.STRING, Types.INT)); 31 | 32 | // 在java,认为元组是一个特殊的集合,脚标是从0开始 33 | KeyedStream, Tuple> keyed = wordAndOne.keyBy(0); 34 | 35 | SingleOutputStreamOperator> result = keyed.fold(new Tuple2("", 1000), 36 | new FoldFunction, Tuple2>() { 37 | @Override 38 | public Tuple2 fold(Tuple2 accumulator, Tuple2 value) throws Exception { 39 | String key = value.f0; 40 | Integer count = value.f1; 41 | accumulator.f0 = key; 42 | accumulator.f1 += count; 43 | return accumulator; 44 | } 45 | }); 46 | 47 | result.print(); 48 | 49 | 50 | env.execute("C09_FoldDemo2"); 51 | 52 | 53 | } 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C01_PrintSink.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * @Author: Lei 8 | * @E-mail: 843291011@qq.com 9 | * @Date: Created in 11:51 上午 2020/6/7 10 | * @Version: 1.0 11 | * @Modified By: 12 | * @Description: 13 | */ 14 | // 在 flink 中 print 属于一种sink 15 | public class C01_PrintSink { 16 | public static void main(String[] args) throws Exception { 17 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 18 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 19 | 20 | // print时显示的数字哪里来的,代表什么含义:PrintSinkOutputWriter.java 21 | // completedPrefix += (subtaskIndex + 1); 22 | // completedPrefix += "> "; 23 | 24 | // print是测试时使用,生产环境与其他存储介质结合使用 25 | lines.print("res").setParallelism(2); 26 | 27 | env.execute("C01_PrintSink"); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C02_AddSinkDemo.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 11 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; 12 | import org.apache.flink.util.Collector; 13 | 14 | /** 15 | * @Author: Lei 16 | * @E-mail: 843291011@qq.com 17 | * @Date: Created in 12:07 下午 2020/6/7 18 | * @Version: 1.0 19 | * @Modified By: 20 | * @Description: 21 | */ 22 | public class C02_AddSinkDemo { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 27 | 28 | SingleOutputStreamOperator> wordAndOne = lines.flatMap(new FlatMapFunction>() { 29 | @Override 30 | public void flatMap(String line, Collector> out) throws Exception { 31 | String[] words = line.split(" "); 32 | for (String word : words) { 33 | out.collect(Tuple2.of(word, 1)); 34 | } 35 | } 36 | }); 37 | 38 | SingleOutputStreamOperator> summed = wordAndOne.keyBy(0).sum(1); 39 | 40 | summed.addSink(new RichSinkFunction>() { 41 | @Override 42 | public void invoke(Tuple2 value, Context context) throws Exception { 43 | // 通过功能更加丰富的RichSinkFunction,可以通过getRuntimeContext可以拿到subTaskIndex 44 | int indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask(); 45 | System.out.println(indexOfThisSubtask + " > " + value); 46 | } 47 | }); 48 | 49 | env.execute("C02_AddSinkDemo"); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C03_WriteAsTextSink.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * @Author: Lei 8 | * @E-mail: 843291011@qq.com 9 | * @Date: Created in 12:17 下午 2020/6/7 10 | * @Version: 1.0 11 | * @Modified By: 12 | * @Description: 13 | */ 14 | public class C03_WriteAsTextSink { 15 | public static void main(String[] args) throws Exception { 16 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 17 | 18 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 19 | 20 | lines.writeAsText("out_dir"); 21 | 22 | env.execute("C03_WriteAsTextSink"); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C04_WriteAsCsvSink.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.core.fs.FileSystem; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 12:07 下午 2020/6/7 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | public class C04_WriteAsCsvSink { 21 | public static void main(String[] args) throws Exception { 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 25 | 26 | SingleOutputStreamOperator> wordAndOne = lines.flatMap(new FlatMapFunction>() { 27 | @Override 28 | public void flatMap(String line, Collector> out) throws Exception { 29 | String[] words = line.split(" "); 30 | for (String word : words) { 31 | out.collect(Tuple2.of(word, 1)); 32 | } 33 | } 34 | }); 35 | 36 | SingleOutputStreamOperator> summed = wordAndOne.keyBy(0).sum(1); 37 | 38 | 39 | // 如果数据不是Tuple类型,writeAsCsv是无法正常保存 40 | summed.writeAsCsv("out_dir", FileSystem.WriteMode.NO_OVERWRITE); 41 | 42 | env.execute("C02_AddSinkDemo"); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C05_Task_SubTask_ChainTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.core.fs.FileSystem; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.util.Collector; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: Created in 12:07 下午 2020/6/7 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | public class C05_Task_SubTask_ChainTest { 22 | public static void main(String[] args) throws Exception { 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | 25 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 26 | 27 | SingleOutputStreamOperator word = lines.flatMap(new FlatMapFunction() { 28 | @Override 29 | public void flatMap(String line, Collector out) throws Exception { 30 | String[] words = line.split(" "); 31 | for (String word : words) { 32 | out.collect(word); 33 | } 34 | } 35 | }); 36 | 37 | SingleOutputStreamOperator> wordAndOne = word.map(new MapFunction>() { 38 | @Override 39 | public Tuple2 map(String value) throws Exception { 40 | return Tuple2.of(value, 1); 41 | } 42 | }); 43 | 44 | SingleOutputStreamOperator> filtered = wordAndOne.filter(new FilterFunction>() { 45 | @Override 46 | public boolean filter(Tuple2 value) throws Exception { 47 | return value.f0.startsWith("h"); 48 | } 49 | }); 50 | //.disableChaining(); // 将这个算子单独划分处理,生成一个Task,跟其他的算子不再有Operator Chain; 比如:CPU、内存密集型,算法逻辑复杂的操作单独划分成Task,独享硬件资源 51 | //.startNewChain(); 52 | // 从该算子开始,开启一个新的链;从这个算子之前,发生redistributing 53 | // 需要使用Flink web 查看Show Plan 54 | 55 | filtered.print(); 56 | 57 | env.execute("C05_Task_SubTask_ChainTest"); 58 | } 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C06_SharingGroupTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c03_sink; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.util.Collector; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 12:07 下午 2020/6/7 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | Flink 的资源槽,默认的名字都是default 23 | 24 | */ 25 | public class C06_SharingGroupTest { 26 | public static void main(String[] args) throws Exception { 27 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 28 | 29 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 30 | 31 | SingleOutputStreamOperator word = lines.flatMap(new FlatMapFunction() { 32 | @Override 33 | public void flatMap(String line, Collector out) throws Exception { 34 | String[] words = line.split(" "); 35 | for (String word : words) { 36 | out.collect(word); 37 | } 38 | } 39 | }).slotSharingGroup("lei_group"); 40 | 41 | SingleOutputStreamOperator filtered = word.filter(new FilterFunction() { 42 | @Override 43 | public boolean filter(String value) throws Exception { 44 | return value.startsWith("h"); 45 | } 46 | }); 47 | //.disableChaining(); // 将这个算子单独划分处理,生成一个Task,跟其他的算子不再有Operator Chain; 比如:CPU、内存密集型,算法逻辑复杂的操作单独划分成Task,独享硬件资源 48 | //.startNewChain(); 49 | // 从该算子开始,开启一个新的链;从这个算子之前,发生redistributing 50 | // 需要使用Flink web 查看Show Plan 51 | 52 | SingleOutputStreamOperator> wordAndOne = filtered.map(new MapFunction>() { 53 | @Override 54 | public Tuple2 map(String value) throws Exception { 55 | return Tuple2.of(value, 1); 56 | } 57 | }); 58 | 59 | 60 | SingleOutputStreamOperator> summed = wordAndOne.keyBy(0).sum(1); 61 | 62 | summed.print(); 63 | 64 | env.execute("C05_Task_SubTask_ChainTest"); 65 | } 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C06_深入理解Flink的Task和SubTask-共享资源槽.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c03_sink/C06_深入理解Flink的Task和SubTask-共享资源槽.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c03_sink/C06_深入理解Flink的Task和SubTask-共享资源槽_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c03_sink/C06_深入理解Flink的Task和SubTask-共享资源槽_2.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C01_CountWindowAll.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 3:18 下午 2020/6/7 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | // countWindowAll 全局,每 N条数据触发一个窗口 20 | public class C01_CountWindowAll { 21 | public static void main(String[] args) throws Exception { 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 25 | 26 | SingleOutputStreamOperator nums = lines.map(new MapFunction() { 27 | @Override 28 | public Integer map(String value) throws Exception { 29 | return Integer.parseInt(value); 30 | } 31 | }); 32 | 33 | // 不分组,将整体当成一个组 34 | // 每5条数据收集成一个组 35 | AllWindowedStream window = nums.countWindowAll(5); 36 | 37 | // 在窗口中聚合 38 | SingleOutputStreamOperator summed = window.sum(0); 39 | 40 | summed.print(); 41 | 42 | env.execute("CountWindowAll"); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C02_CountWindow.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.*; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 3:18 下午 2020/6/7 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | /* 20 | 分组后再调用CountWindow,每一个组达到一定的条数才会触发窗口执行 21 | */ 22 | public class C02_CountWindow { 23 | public static void main(String[] args) throws Exception { 24 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | 26 | // spark,3 27 | // hadoop,2 28 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 29 | 30 | SingleOutputStreamOperator> wordAndCount = lines.map(new MapFunction>() { 31 | @Override 32 | public Tuple2 map(String value) throws Exception { 33 | String[] fields = value.split(","); 34 | String word = fields[0]; 35 | Integer count = Integer.parseInt(fields[1]); 36 | return Tuple2.of(word, count); 37 | } 38 | }); 39 | 40 | // 先分组,再划分窗口 41 | KeyedStream, Tuple> keyed = wordAndCount.keyBy(0); 42 | 43 | // 划分窗口 44 | WindowedStream, Tuple, GlobalWindow> window = keyed.countWindow(5); 45 | 46 | SingleOutputStreamOperator> summed = window.sum(1); 47 | 48 | summed.print(); 49 | 50 | env.execute("C02_CountWindow"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C03_TimeWindowAll.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 3:18 下午 2020/6/7 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | // timeWindowAll 全局 每N间隔将数据收集成一个组 21 | public class C03_TimeWindowAll { 22 | public static void main(String[] args) throws Exception { 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | 25 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 26 | 27 | SingleOutputStreamOperator nums = lines.map(new MapFunction() { 28 | @Override 29 | public Integer map(String value) throws Exception { 30 | return Integer.parseInt(value); 31 | } 32 | }); 33 | 34 | // 不分组,将整体当成一个组 35 | // 每5秒钟将数据收集成一个组 36 | AllWindowedStream window = nums.timeWindowAll(Time.seconds(5)); 37 | 38 | // 在窗口中聚合 39 | SingleOutputStreamOperator summed = window.sum(0); 40 | 41 | summed.print(); 42 | 43 | env.execute("C03_TumblingWindowAll"); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C04_TimeWindow.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 14 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 15 | 16 | import java.util.concurrent.TimeUnit; 17 | 18 | /** 19 | * @Author: Lei 20 | * @E-mail: 843291011@qq.com 21 | * @Date: Created in 3:18 下午 2020/6/7 22 | * @Version: 1.0 23 | * @Modified By: 24 | * @Description: 25 | */ 26 | 27 | /* 28 | timeWindow 分组后,每一个组5秒收集数据才会触发窗口执行 29 | */ 30 | public class C04_TimeWindow { 31 | public static void main(String[] args) throws Exception { 32 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 33 | 34 | // spark,3 35 | // hadoop,2 36 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 37 | 38 | SingleOutputStreamOperator> wordAndCount = lines.map(new MapFunction>() { 39 | @Override 40 | public Tuple2 map(String value) throws Exception { 41 | String[] fields = value.split(","); 42 | String word = fields[0]; 43 | Integer count = Integer.parseInt(fields[1]); 44 | return Tuple2.of(word, count); 45 | } 46 | }); 47 | 48 | // 先分组,再划分窗口 49 | KeyedStream, Tuple> keyed = wordAndCount.keyBy(0); 50 | 51 | // 划分滚动窗口timeWindow,只传入一个参数 52 | WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(5)); 53 | //WindowedStream, Tuple, TimeWindow> window = keyed.window(TumblingProcessingTimeWindows.of(Time.of(5, TimeUnit.SECONDS))); 54 | SingleOutputStreamOperator> summed = window.sum(1); 55 | 56 | summed.print(); 57 | 58 | env.execute("C04_TimeWindow"); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C05_SlidingWindowAll.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.datastream.AllWindowedStream; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.api.windowing.time.Time; 9 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: Created in 4:22 下午 2020/6/7 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | public class C05_SlidingWindowAll { 21 | public static void main(String[] args) throws Exception { 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | 24 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 25 | 26 | SingleOutputStreamOperator nums = lines.map(new MapFunction() { 27 | @Override 28 | public Integer map(String value) throws Exception { 29 | return Integer.parseInt(value); 30 | } 31 | }); 32 | 33 | // 不分组,将整体当成一个组 34 | AllWindowedStream window = nums.timeWindowAll(Time.seconds(10), Time.seconds(5)); 35 | 36 | // 在窗口中聚合 37 | SingleOutputStreamOperator summed = window.sum(0); 38 | 39 | summed.print(); 40 | 41 | env.execute("C05_SlidingWindowAll"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C06_SlidingWindow.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.time.Time; 12 | import org.apache.flink.streaming.api.windowing.windows.GlobalWindow; 13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 14 | 15 | /** 16 | * @Author: Lei 17 | * @E-mail: 843291011@qq.com 18 | * @Date: Created in 3:18 下午 2020/6/7 19 | * @Version: 1.0 20 | * @Modified By: 21 | * @Description: 22 | */ 23 | 24 | // 分组后再调用SlidingWindow,通常来算趋势 25 | 26 | public class C06_SlidingWindow { 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | // spark,3 31 | // hadoop,2 32 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 33 | 34 | SingleOutputStreamOperator> wordAndCount = lines.map(new MapFunction>() { 35 | @Override 36 | public Tuple2 map(String value) throws Exception { 37 | String[] fields = value.split(","); 38 | String word = fields[0]; 39 | Integer count = Integer.parseInt(fields[1]); 40 | return Tuple2.of(word, count); 41 | } 42 | }); 43 | 44 | // 先分组,再划分窗口 45 | KeyedStream, Tuple> keyed = wordAndCount.keyBy(0); 46 | 47 | // 划分窗口 48 | WindowedStream, Tuple, TimeWindow> window = keyed.timeWindow(Time.seconds(10), Time.seconds(5)); 49 | 50 | SingleOutputStreamOperator> summed = window.sum(1); 51 | 52 | summed.print(); 53 | 54 | env.execute("C06_SlidingWindow"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C07_SessionWindow.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c04_window; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.KeyedStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.datastream.WindowedStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.assigners.ProcessingTimeSessionWindows; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 14 | 15 | /** 16 | * @Author: Lei 17 | * @E-mail: 843291011@qq.com 18 | * @Date: Created in 3:18 下午 2020/6/7 19 | * @Version: 1.0 20 | * @Modified By: 21 | * @Description: 22 | */ 23 | 24 | // 距离上一次会话时间超过时间,触发 25 | 26 | public class C07_SessionWindow { 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | 30 | // spark,3 31 | // hadoop,2 32 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 33 | 34 | SingleOutputStreamOperator> wordAndCount = lines.map(new MapFunction>() { 35 | @Override 36 | public Tuple2 map(String value) throws Exception { 37 | String[] fields = value.split(","); 38 | String word = fields[0]; 39 | Integer count = Integer.parseInt(fields[1]); 40 | return Tuple2.of(word, count); 41 | } 42 | }); 43 | 44 | // 先分组,再划分窗口 45 | KeyedStream, Tuple> keyed = wordAndCount.keyBy(0); 46 | 47 | // 划分窗口 48 | WindowedStream, Tuple, TimeWindow> window = keyed.window(ProcessingTimeSessionWindows.withGap(Time.seconds(5))); 49 | 50 | SingleOutputStreamOperator> summed = window.sum(1); 51 | 52 | summed.print(); 53 | 54 | env.execute("C07_SessionWindow"); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C09_【重要】数据源多分区WaterMark,需要所有分区时间达到窗口触发时间.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c04_window/C09_【重要】数据源多分区WaterMark,需要所有分区时间达到窗口触发时间.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c04_window/C09_【重要】滚动窗口结合EventTime&WaterMark延迟触发任务机制.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c04_window/C09_【重要】滚动窗口结合EventTime&WaterMark延迟触发任务机制.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C01_DataToActivityBeanFunction.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.lei.apitest.c05_project.domain.ActivityBean; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.configuration.Configuration; 6 | 7 | import java.sql.Connection; 8 | import java.sql.DriverManager; 9 | import java.sql.PreparedStatement; 10 | import java.sql.ResultSet; 11 | 12 | /** 13 | * @Author: 14 | * @Date: Created in 10:20 上午 2020/6/8 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | public class C01_DataToActivityBeanFunction extends RichMapFunction { 20 | 21 | private Connection connection = null; 22 | 23 | @Override 24 | public void open(Configuration parameters) throws Exception { 25 | super.open(parameters); 26 | 27 | // 创建MySQL连接 28 | // 这里不应该对异常进行捕获,让Flink自行处理,比如重启之类的 29 | // 如果捕获异常了,则Flink无法捕获到该异常 30 | String url = "jdbc:mysql://mysql-01:3306/flink_big_data?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&failOverReadOnly=false"; 31 | String user = "root"; 32 | String password = "1234"; 33 | connection = DriverManager.getConnection(url, user, password); 34 | } 35 | 36 | @Override 37 | public ActivityBean map(String line) throws Exception { 38 | String[] fields = line.split(","); 39 | 40 | String uid = fields[0]; 41 | String aid = fields[1]; 42 | 43 | // 根据aid作为查询条件查询出name 44 | // 最好使用简单的关联查询,MySQL也可以进行关联查询 45 | PreparedStatement preparedStatement = connection.prepareStatement("SELECT name FROM t_activities WHERE a_id = ?"); 46 | preparedStatement.setString(1, aid); 47 | ResultSet resultSet = preparedStatement.executeQuery(); 48 | String name = null; 49 | while (resultSet.next()) { 50 | name = resultSet.getString(1); 51 | } 52 | 53 | String time = fields[2]; 54 | int eventType = Integer.parseInt(fields[3]); 55 | String province = fields[4]; 56 | 57 | return ActivityBean.of(uid, aid, name, time, eventType, province); 58 | } 59 | 60 | @Override 61 | public void close() throws Exception { 62 | super.close(); 63 | if (connection != null) { 64 | connection.close(); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C02_A_HttpClientTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.http.client.HttpClient; 6 | import org.apache.http.client.methods.CloseableHttpResponse; 7 | import org.apache.http.client.methods.HttpGet; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | import org.apache.http.util.EntityUtils; 11 | 12 | /** 13 | * @Author: 14 | * @Date: 2020-06-09 11:29 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | // 需求:根据经纬度查询高德API关联位置信息,就是一个普通的 Java程序 21 | public class C02_A_HttpClientTest { 22 | public static void main(String[] args) throws Exception { 23 | double longitude = 116.311805; 24 | double latitude = 40.028572; 25 | 26 | String url = "https://restapi.amap.com/v3/geocode/regeo?key=4924f7ef5c86a278f5500851541cdcff&location=" + longitude +"," + latitude; 27 | CloseableHttpClient httpClient = HttpClients.createDefault(); 28 | HttpGet httpGet = new HttpGet(url); 29 | CloseableHttpResponse response = httpClient.execute(httpGet); 30 | 31 | try { 32 | int status = response.getStatusLine().getStatusCode(); 33 | String province = null; 34 | if (status == 200) { 35 | // 获取请求的json字符串 36 | String result = EntityUtils.toString(response.getEntity()); 37 | // 转成json对象 38 | JSONObject josnObj = JSON.parseObject(result); 39 | // 获取位置信息 40 | JSONObject regeocode = josnObj.getJSONObject("regeocode"); 41 | if (regeocode != null && !regeocode.isEmpty()) { 42 | JSONObject address = regeocode.getJSONObject("addressComponent"); 43 | // 获取省市 44 | province = address.getString("province"); 45 | String city = address.getString("city"); 46 | String businessAreas = address.getString("businessAreas"); 47 | } 48 | } 49 | 50 | System.out.println(province); 51 | } finally { 52 | response.close(); 53 | httpClient.close(); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C03_AsyncEsRequest_Test.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.lei.apitest.c05_project.domain.ActivityBean; 4 | import com.lei.apitest.util.FlinkUtilsV1; 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | 11 | import java.util.concurrent.TimeUnit; 12 | 13 | /** 14 | * @Author: 15 | * @Date: 2020-06-09 14:30 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | // 通过 id 查询ES 对应的文档,id 则来自于kafka 21 | public class C03_AsyncEsRequest_Test { 22 | public static void main(String[] args) throws Exception { 23 | // 输入参数:activity10 group_id_flink node-01:9092,node-02:9092,node-03:9092 24 | DataStream lines = FlinkUtilsV1.createKafkaStream(args, new SimpleStringSchema()); 25 | 26 | //SingleOutputStreamOperator beans = lines.map(new C01_DataToActivityBeanFunction()); 27 | SingleOutputStreamOperator> result = AsyncDataStream.unorderedWait( 28 | // 这里的队列不能超过最大队列大小 29 | lines, new C03_AsyncEsRequest(), 0, TimeUnit.MILLISECONDS, 10); 30 | 31 | result.print(); 32 | 33 | FlinkUtilsV1.getEnv().execute("C03_AsyncEsRequest_Test"); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C03_AsyncHttpRequest_Test.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.lei.apitest.util.FlinkUtilsV1; 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | 10 | import java.util.concurrent.TimeUnit; 11 | 12 | /** 13 | * @Author: 14 | * @Date: 2020-06-09 15:06 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | // flink 通过异步方式从resfulAPI接口中获取数据,查询到详细的地理位置信息 20 | public class C03_AsyncHttpRequest_Test { 21 | public static void main(String[] args) throws Exception { 22 | // 输入参数:activity10 group_id_flink node-01:9092,node-02:9092,node-03:9092 23 | DataStream lines = FlinkUtilsV1.createKafkaStream(args, new SimpleStringSchema()); 24 | 25 | //SingleOutputStreamOperator beans = lines.map(new C01_DataToActivityBeanFunction()); 26 | SingleOutputStreamOperator result = AsyncDataStream.unorderedWait( 27 | // 这里的队列不能超过最大队列大小 28 | lines, new C03_AsyncHttpRequest(), 0, TimeUnit.MILLISECONDS, 10); 29 | 30 | result.print(); 31 | 32 | FlinkUtilsV1.getEnv().execute("C03_AsyncHttpRequest_Test"); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C03_AsyncMySQLRequest_Test.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.lei.apitest.util.FlinkUtilsV1; 4 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 5 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | 9 | import java.util.concurrent.TimeUnit; 10 | 11 | /** 12 | * @Author: 13 | * @Date: 2020-06-09 15:11 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | // flink 通过活动id,从mysql查询活动名称 19 | public class C03_AsyncMySQLRequest_Test { 20 | public static void main(String[] args) throws Exception { 21 | // 输入参数:activity10 group_id_flink node-01:9092,node-02:9092,node-03:9092 22 | DataStream lines = FlinkUtilsV1.createKafkaStream(args, new SimpleStringSchema()); 23 | 24 | //SingleOutputStreamOperator beans = lines.map(new C01_DataToActivityBeanFunction()); 25 | SingleOutputStreamOperator result = AsyncDataStream.unorderedWait( 26 | // 这里的队列不能超过最大队列大小 27 | lines, new C03_AsyncMySQLRequest(), 0, TimeUnit.MILLISECONDS, 10); 28 | 29 | result.print(); 30 | 31 | FlinkUtilsV1.getEnv().execute("C03_AsyncMySQLRequest_Test"); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C04_MysqlSink.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import com.lei.apitest.c05_project.domain.ActivityBean; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 6 | 7 | import java.sql.Connection; 8 | import java.sql.DriverManager; 9 | import java.sql.PreparedStatement; 10 | 11 | /** 12 | * @Author: 13 | * @Date: Created in 10:10 下午 2020/6/9 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | /* 20 | 21 | create table t_activity_counts( 22 | aid varchar(10), 23 | event_type int, 24 | counts int, 25 | INDEX MultiIdx(aid,event_type) 26 | ) 27 | 28 | */ 29 | public class C04_MysqlSink extends RichSinkFunction { 30 | 31 | private transient Connection connection = null; 32 | 33 | @Override 34 | public void open(Configuration parameters) throws Exception { 35 | super.open(parameters); 36 | 37 | // 创建MySQL连接 38 | String url = "jdbc:mysql://mysql-01:3306/flink_big_data?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&failOverReadOnly=false"; 39 | String user = "root"; 40 | String password = "1234"; 41 | 42 | connection = DriverManager.getConnection(url, user, password); 43 | } 44 | 45 | @Override 46 | public void invoke(ActivityBean value, Context context) throws Exception { 47 | String sql = "INSERT INTO t_activity_counts (aid, event_type, counts) VALUES (?,?,?) ON DUPLICATE KEY UPDATE counts = ?"; 48 | PreparedStatement pstm = connection.prepareStatement(sql); 49 | 50 | try { 51 | pstm.setString(1, value.aid); 52 | pstm.setInt(2, value.eventType); 53 | pstm.setInt(3, value.count); 54 | pstm.setInt(4, value.count); 55 | 56 | pstm.executeUpdate(); 57 | } finally { 58 | if (pstm != null) { 59 | pstm.close(); 60 | } 61 | } 62 | } 63 | 64 | @Override 65 | public void close() throws Exception { 66 | super.close(); 67 | // 关闭连接 68 | if (connection != null) { 69 | connection.close(); 70 | } 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C07_KeyedState和OperatorState介绍.txt: -------------------------------------------------------------------------------- 1 | Flink的State 2 | ---------------------- 3 | Flink 架构体系的一大特性是:有状态计算 4 | 1.有状态计算:在程序内部存储计算产生的中间结果,并提供给后续的 Function或算子计算结果使用。 5 | 2.状态:与时间相关的任务内部数据(计算数据和元数据属性)的快照,在计算过程中会进行持久化 6 | 7 | 作用: 8 | 每次计算需要基于上一次计算结果,所以需要通过 State将每次计算的中间结果进行持久化 9 | 出现错误需要从成功的检查点进行 State的恢复 10 | 增量计算,Failover这些机制都需要 State的支撑 11 | 12 | 存储实现 13 | 基于内存的 MemoryStateBackend - 在debug模式使用,不建议在生产模式下应用; 14 | 基于HDFS的 FsStateBackend - 分布式文件持久化,每次读写都产生网络IO,整体性能不佳; 15 | 基于RocksDB的RocksDBStateBackend - 本地文件 + 异步HDFS持久化; 16 | 还有一个是基于Niagara(Alibaba 内部实现) NiagaraStateBackend - 分布式持久化 - 在Alibaba生产环境应用: 17 | 18 | State的分类【按照是否根据key进行分区分为:KeyedState和OperatorState】 19 | 1.KeyedState 20 | 表示与key相关的一种state,只能用于KeyedStream类型数据集对应的Functions和Operators之上 21 | 基中的key是我们在SQL语句中对应的GroupBy、PartitionBy里面的字段 22 | KeyedState 是Operator 的特例,区别是KeyedState 事先按照key对数据集进行分区。每个key state 23 | 仅对应一个Operator和Key的组合 24 | 25 | OperatorState 26 | 只与算子实例绑定,每个算子实例中持有所有数据元素中的一部分状态数据 27 | 28 | (3)两者都支持并行度发生变化时,进行状态数据的重新分配 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C07_KeyedState和OperatorState介绍演示.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c05_project/C07_KeyedState和OperatorState介绍演示.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C09_0_使用OperatorState记录文件偏移量需求.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c05_project/C09_0_使用OperatorState记录文件偏移量需求.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C09_1_MyParFileSource.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; 5 | 6 | import java.io.RandomAccessFile; 7 | import java.util.concurrent.TimeUnit; 8 | 9 | /** 10 | * @Author: 11 | * @Date: 2020-06-12 8:44 12 | * @Version: 1.0 13 | * @Modified By: 14 | * @Description: 15 | */ 16 | 17 | /* 18 | 自定义可以并行的source 19 | 需要继承RichParallelSourceFunction,重写run与cancel方法 20 | */ 21 | public class C09_1_MyParFileSource extends RichParallelSourceFunction> { 22 | private String path; 23 | private boolean flag = true; 24 | 25 | public C09_1_MyParFileSource() { 26 | } 27 | 28 | public C09_1_MyParFileSource(String path) { 29 | this.path = path; 30 | } 31 | 32 | @Override 33 | public void run(SourceContext> ctx) throws Exception { 34 | int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); 35 | 36 | // /var/data/0.txt 37 | RandomAccessFile randomAccessFile = new RandomAccessFile(path + "/" + subtaskIndex + ".txt", "r"); 38 | 39 | while (flag) { 40 | String line = randomAccessFile.readLine(); 41 | 42 | if (line != null) { 43 | line = new String(line.getBytes("ISO-8859-1"), "UTF-8"); 44 | // 将数据发送出去 45 | ctx.collect(Tuple2.of(subtaskIndex + "", line)); 46 | } else { 47 | TimeUnit.SECONDS.sleep(1000); 48 | } 49 | } 50 | } 51 | 52 | @Override 53 | public void cancel() { 54 | flag = false; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C09_2_OperatorStateDemo.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.runtime.state.StateBackend; 7 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | 12 | /** 13 | * @Author: 14 | * @Date: 2020-06-12 8:55 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | // 测试自定义并行数据源 21 | // file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir 22 | // echo zzzzzz >> 1.txt 向文件中添加数据 23 | public class C09_2_OperatorStateDemo { 24 | public static void main(String[] args) throws Exception { 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | 27 | env.setParallelism(2); 28 | 29 | // 开启checkpoint,并设置checkpoint间隔;默认不开启checkPoint 30 | env.enableCheckpointing(5000); 31 | // 设置故障重启次数,重启2次,重启间隔2秒;默认无限重启 32 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, 2000)); 33 | // 设置checkpoint策略,为本地文件存储;默认内存存储; 生产环境建议使用hdfs分布式文件存储且配置在flink-conf.yaml文件中 34 | StateBackend fsStateBackend = new FsStateBackend("file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir"); 35 | env.setStateBackend(fsStateBackend); 36 | // 设置Job被cancel掉后或故障下线后,checkpoint不删除;默认checkpoint在Job下线后会删除 37 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);; 38 | 39 | DataStreamSource lines = env.socketTextStream("node-01", 7777); 40 | lines.map(new MapFunction() { 41 | @Override 42 | public String map(String line) throws Exception { 43 | if (line.equals("null")) { 44 | System.out.println(1 / 0); 45 | } 46 | return line; 47 | } 48 | }).print(); 49 | 50 | DataStreamSource> tp = env.addSource(new C09_1_MyParFileSource("MyParFile")); 51 | 52 | tp.print(); 53 | 54 | 55 | env.execute("C09_OperatorStateDemo"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C09_4_OperatorStateDemoV2.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.runtime.state.StateBackend; 7 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.environment.CheckpointConfig; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | 12 | /** 13 | * @Author: 14 | * @Date: 2020-06-12 8:55 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | public class C09_4_OperatorStateDemoV2 { 20 | public static void main(String[] args) throws Exception { 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | env.setParallelism(2); 24 | 25 | // 开启checkpoint,并设置checkpoint间隔;默认不开启checkPoint 26 | env.enableCheckpointing(5000); 27 | // 设置故障重启次数,重启2次,重启间隔2秒;默认无限重启 28 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2, 2000)); 29 | // 设置checkpoint策略,为本地文件存储;默认内存存储; 生产环境建议使用hdfs分布式文件存储且配置在flink-conf.yaml文件中 30 | StateBackend fsStateBackend = new FsStateBackend("file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir"); 31 | env.setStateBackend(fsStateBackend); 32 | // 设置Job被cancel掉后或故障下线后,checkpoint不删除;默认checkpoint在Job下线后会删除 33 | env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);; 34 | 35 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 36 | lines.map(new MapFunction() { 37 | @Override 38 | public String map(String line) throws Exception { 39 | if (line.equals("null")) { 40 | System.out.println(1 / 0); 41 | } 42 | return line; 43 | } 44 | }).print(); 45 | 46 | DataStreamSource> tp = env.addSource(new C09_3_MyExactlyOnceParFileSource("MyParFile")); 47 | 48 | tp.print(); 49 | 50 | 51 | env.execute("C09_OperatorStateDemo"); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C10_z_checkpoint_实现ExactlyOnce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lei-zuquan/FlinkTutorial/486ad5e943ace44d2a47d74e055a1d0d236f44cf/src/main/java/com/lei/apitest/c05_project/C10_z_checkpoint_实现ExactlyOnce.png -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/C11_KafkaProducer两阶段提交重要设计.txt: -------------------------------------------------------------------------------- 1 | 2 | 首先Kafka在0.11+版本后,支持事务,就是可以预先提交;如果后续成功操作,再次提交确认信息即可,以达到避免脏数据问题 3 | 4 | 保证事务提交和checkpoint同时成功 5 | 6 | 1.在checkpoint前预先提交 7 | 2.如果checkpoint成功,再次提交;如果失败,回滚 8 | 9 | 两阶段提交与checkpoint联动;实现CheckpointedFunction接口snapshotState 10 | 在snapshotState方法中,调用preCommit方法 11 | 12 | 等所有subTask都成功了,TaskManager通知JobManager,JobManager发送notifyCheckpointComplete指令 13 | 在TwoPhaseCommitSinkFunction类的notifyCheckpointComplete方法中,调用了最终的commit提交 14 | 15 | 在FlinkKafkaProducer类中commit方法,最终调用producer.commitTransaction 16 | 17 | =============================================================== 18 | 19 | FlinkKafkaProducer 正是借鉴了此设计,采用两阶段提交达到精确Exactly-Once 20 | 21 | public class FlinkKafkaProducer 22 | extends TwoPhaseCommitSinkFunction { 23 | protected abstract void invoke(TXN transaction, IN value, Context context) throws Exception; 24 | 25 | protected abstract void preCommit(TXN transaction) throws Exception; 26 | 27 | // 5.KafkaProducer被两阶段调用commit完成二次提交 28 | protected abstract void commit(TXN transaction); 29 | } 30 | 31 | 32 | TwoPhaseCommitSinkFunction与CheckpointedFunction联动 33 | 34 | public abstract class TwoPhaseCommitSinkFunction 35 | extends RichSinkFunction 36 | implements CheckpointedFunction, CheckpointListener{ 37 | 38 | protected abstract void invoke(TXN transaction, IN value, Context context) throws Exception; 39 | 40 | protected abstract void preCommit(TXN transaction) throws Exception; 41 | 42 | protected abstract void commit(TXN transaction); 43 | 44 | // 4.KafkaProducer 的两阶段提交就会收到:notifyCheckpointComplete,并调用commit 45 | @Override 46 | public final void notifyCheckpointComplete(long checkpointId) throws Exception { 47 | commit(pendingTransaction.handle); 48 | } 49 | } 50 | 51 | 52 | public interface CheckpointListener { 53 | // 1.所有subTask向JobManager发送已经进行预提交 54 | // 2.JobManager收集所有subTask上送过来的信息,如果已经收到所有subTask都已经完成的预提交 55 | // 3.JobManager就会向所以subTask发送:通知CheckPoint完成指令 56 | 57 | void notifyCheckpointComplete(long var1) throws Exception; 58 | } 59 | 60 | 61 | public interface CheckpointedFunction { 62 | // 定期将指定的状态数据保存到StateBackend中;由JobManager触发 63 | void snapshotState(FunctionSnapshotContext context) throws Exception; 64 | 65 | // 初始化OperatorState,生命周期方法,构造方法执行后执行一次; 初始化状态或获取历史状态 66 | void initializeState(FunctionInitializationContext context) throws Exception; 67 | 68 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/async/C02_SerialHttpClientV1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project.async; 2 | 3 | import org.apache.http.HttpEntity; 4 | import org.apache.http.client.methods.CloseableHttpResponse; 5 | import org.apache.http.client.methods.HttpPost; 6 | import org.apache.http.entity.ContentType; 7 | import org.apache.http.entity.StringEntity; 8 | import org.apache.http.impl.client.CloseableHttpClient; 9 | import org.apache.http.impl.client.HttpClients; 10 | import org.apache.http.util.EntityUtils; 11 | 12 | 13 | /** 14 | * @Author: 15 | * @Date: 2020-09-15 15:51 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | public class C02_SerialHttpClientV1 { 21 | 22 | public static void main(String[] args) throws Exception { 23 | // 构造请求 24 | // String url = "http://www.baidu.com/"; 25 | String url = "https://www.cnblogs.com/"; 26 | // String url = "https://study.163.com/"; 27 | HttpPost httpPost = new HttpPost(url); 28 | // httpPost.addHeader("Connection", "keep-alive"); 29 | 30 | httpPost.setEntity(null); 31 | 32 | // 异步请求 33 | long start = System.currentTimeMillis(); 34 | CloseableHttpClient httpClient = HttpClients.createDefault(); 35 | 36 | for (int i = 0; i < 90; i++) { 37 | CloseableHttpResponse httpResponse = httpClient.execute(httpPost); 38 | try { 39 | if (httpResponse.getStatusLine().getStatusCode() == 200) { 40 | System.out.println("ok: " + i); 41 | /*HttpEntity revEntity = httpResponse.getEntity(); 42 | String res = EntityUtils.toString(revEntity); 43 | System.out.println("cost is:"+(System.currentTimeMillis()-start)+":"+ res + " finishedCnt:" + i);*/ 44 | // System.out.println(httpResponse.getEntity().getContent().toString()); 45 | } 46 | } finally { 47 | httpResponse.close(); 48 | } 49 | } 50 | 51 | long end = System.currentTimeMillis(); 52 | long spend = end - start; 53 | System.out.println("spend:" + spend); 54 | httpClient.close(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/domain/ActivityBean.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c05_project.domain; 2 | 3 | /** 4 | * @Author: 5 | * @Date: Created in 10:15 上午 2020/6/8 6 | * @Version: 1.0 7 | * @Modified By: 8 | * @Description: 9 | */ 10 | public class ActivityBean { 11 | 12 | public String uid; // userId 13 | public String aid; // activityId 14 | public String activityName; 15 | public String time; 16 | public int eventType; 17 | public double longitude; 18 | public double latitude; 19 | public String province; 20 | public int count = 1; 21 | 22 | public ActivityBean() { 23 | } 24 | 25 | public ActivityBean(String uid, String aid, String activityName, String time, int eventType, String province) { 26 | this.uid = uid; 27 | this.aid = aid; 28 | this.activityName = activityName; 29 | this.time = time; 30 | this.eventType = eventType; 31 | this.province = province; 32 | } 33 | 34 | public ActivityBean(String uid, String aid, String activityName, String time, int eventType, double longitude, double latitude, String province) { 35 | this.uid = uid; 36 | this.aid = aid; 37 | this.activityName = activityName; 38 | this.time = time; 39 | this.eventType = eventType; 40 | this.longitude = longitude; 41 | this.latitude = latitude; 42 | this.province = province; 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "ActivityBean{" + 48 | "uid='" + uid + '\'' + 49 | ", aid='" + aid + '\'' + 50 | ", activityName='" + activityName + '\'' + 51 | ", time='" + time + '\'' + 52 | ", eventType=" + eventType + 53 | ", longitude=" + longitude + 54 | ", latitude=" + latitude + 55 | ", province='" + province + '\'' + 56 | ", count=" + count + 57 | '}'; 58 | } 59 | 60 | public static ActivityBean of(String uid, String aid, String activityName, String time, int eventType, String province) { 61 | return new ActivityBean(uid, aid, activityName, time, eventType, province); 62 | } 63 | 64 | public static ActivityBean of(String uid, String aid, String activityName, String time, int eventType, double longitude, double latitude, String province) { 65 | return new ActivityBean(uid, aid, activityName, time, eventType, longitude, latitude, province); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c05_project/mysql.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS `flink_big_data`; -- 库名与项目名保持一致 2 | CREATE DATABASE IF NOT EXISTS `flink_big_data` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; 3 | 4 | USE `flink_big_data`; 5 | 6 | -- 活动列表 7 | DROP TABLE IF EXISTS `t_activities`; 8 | CREATE TABLE `t_activities` ( 9 | `id` BIGINT UNSIGNED NOT NULL AUTO_INCREMENT COMMENT '主键id, 必备字段', 10 | `gmt_create` DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间, 必备字段', 11 | `gmt_modified` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间, 必备字段', 12 | 13 | `a_id` VARCHAR(100) NOT NULL COMMENT '活动id', 14 | `name` VARCHAR(100) NOT NULL COMMENT '舆情词对应的hashcode', 15 | `last_update` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间, 必备字段', 16 | PRIMARY KEY (`id`) 17 | ) ENGINE=INNODB DEFAULT CHARSET=utf8; 18 | 19 | -- 插入数据 20 | INSERT INTO `t_activities` (`a_id`, `name`) VALUES ('A1', '新人礼包'); 21 | INSERT INTO `t_activities` (`a_id`, `name`) VALUES ('A2', '月末活动'); 22 | INSERT INTO `t_activities` (`a_id`, `name`) VALUES ('A3', '周末促销'); 23 | INSERT INTO `t_activities` (`a_id`, `name`) VALUES ('A4', '年度促销'); 24 | 25 | 26 | -- 活动数据统计 27 | DROP TABLE IF EXISTS `t_activity_counts`; 28 | CREATE TABLE t_activity_counts( 29 | aid VARCHAR(10), 30 | event_type INT, 31 | counts INT, 32 | PRIMARY KEY (`aid`, `event_type`) 33 | ) ENGINE=INNODB DEFAULT CHARSET=utf8; 34 | 35 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c06_apps/C02_ActBean.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c06_apps; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: Created in 3:42 下午 2020/6/14 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | public class C02_ActBean { 12 | 13 | public String uid; 14 | 15 | // 活动ID 16 | public String aid; 17 | 18 | public String time; 19 | 20 | // 事件类型 21 | public Integer type; 22 | 23 | public String province; 24 | 25 | public Integer count; 26 | 27 | public C02_ActBean() { 28 | 29 | } 30 | 31 | public C02_ActBean(String uid, String aid, String time, Integer type, String province) { 32 | this.uid = uid; 33 | this.aid = aid; 34 | this.time = time; 35 | this.type = type; 36 | this.province = province; 37 | } 38 | 39 | public static C02_ActBean of(String uid, String aid, String time, Integer type, String province){ 40 | return new C02_ActBean(uid, aid, time, type, province); 41 | } 42 | 43 | public C02_ActBean(String uid, String aid, String time, Integer type, String province, Integer count) { 44 | this.uid = uid; 45 | this.aid = aid; 46 | this.time = time; 47 | this.type = type; 48 | this.province = province; 49 | this.count = count; 50 | } 51 | 52 | public static C02_ActBean of(String uid, String aid, String time, Integer type, String province, Integer count){ 53 | return new C02_ActBean(uid, aid, time, type, province, count); 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return "ActBean{" + 59 | "uid='" + uid + '\'' + 60 | ", aid='" + aid + '\'' + 61 | ", time='" + time + '\'' + 62 | ", type=" + type + 63 | ", province='" + province + '\'' + 64 | ", count=" + count + 65 | '}'; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c06_apps/C02_conf.properties: -------------------------------------------------------------------------------- 1 | topics=activity11 2 | group.id=group_id_flink 3 | bootstrap.servers=node-01:9092,node-02:9092,node-03:9092 4 | auto.offset.reset=earliest 5 | enable.auto.commit=false 6 | checkpoint-interval=10000 7 | restart.times=5 8 | state.backend.path=file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir/chk_act -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c06_apps/C03_conf.properties: -------------------------------------------------------------------------------- 1 | topics=activity11 2 | group.id=group_id_flink 3 | bootstrap.servers=node-01:9092,node-02:9092,node-03:9092 4 | auto.offset.reset=earliest 5 | enable.auto.commit=false 6 | checkpoint-interval=10000 7 | restart.times=5 8 | state.backend.path=file:///Users/leizuquan/IdeaProjects/FlinkTutorial/check_point_dir/chk_act 9 | 10 | redis.host=node-01 11 | redis.password=123456 12 | redis.db=2 -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C01_StreamSqlWordCount.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.java.StreamTableEnvironment; 12 | import org.apache.flink.types.Row; 13 | import org.apache.flink.util.Collector; 14 | 15 | import java.util.Arrays; 16 | 17 | /** 18 | * @Author: Lei 19 | * @E-mail: 843291011@qq.com 20 | * @Date: Created in 10:25 下午 2020/6/15 21 | * @Version: 1.0 22 | * @Modified By: 23 | * @Description: 24 | */ 25 | 26 | // spark hadoop flink spark 27 | public class C01_StreamSqlWordCount { 28 | 29 | public static void main(String[] args) throws Exception { 30 | // 实时DataStreamAPI 31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | 33 | // 创建一个实时的Table执行上下文环境 34 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 35 | 36 | // word count spark hadoop 37 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 38 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 39 | @Override 40 | public void flatMap(String line, Collector out) throws Exception { 41 | Arrays.stream(line.split(" ")).forEach(out::collect); 42 | 43 | } 44 | }); 45 | 46 | // 注册成表 47 | tableEnv.registerDataStream("t_wordcount", words, "word"); 48 | 49 | // 写SQL 50 | Table table = tableEnv.sqlQuery("SELECT word, COUNT(1) counts FROM t_wordcount GROUP BY word"); 51 | 52 | // 53 | //DataStream> dataStream = tableEnv.toRetractStream(table, C01_WordCount.class); 54 | DataStream> dataStream = tableEnv.toRetractStream(table, Row.class); 55 | 56 | dataStream.filter(new FilterFunction>() { 57 | @Override 58 | public boolean filter(Tuple2 value) throws Exception { 59 | return value.f0; 60 | } 61 | }).print(); 62 | 63 | 64 | env.execute("C01_StreamSqlWordCount"); 65 | } 66 | } 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C01_WordCount.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: Created in 10:50 下午 2020/6/15 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | public class C01_WordCount { 12 | public String word; 13 | public Long counts; 14 | 15 | public C01_WordCount() { 16 | } 17 | 18 | public C01_WordCount(String word, Long counts) { 19 | this.word = word; 20 | this.counts = counts; 21 | } 22 | 23 | @Override 24 | public String toString() { 25 | return "WordCount{" + 26 | "word='" + word + '\'' + 27 | ", counts='" + counts + '\'' + 28 | '}'; 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C02_WordCountSQL.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | import org.apache.flink.api.java.DataSet; 4 | import org.apache.flink.api.java.ExecutionEnvironment; 5 | import org.apache.flink.api.java.operators.DataSource; 6 | import org.apache.flink.table.api.Table; 7 | import org.apache.flink.table.api.java.BatchTableEnvironment; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: 2020-06-16 9:19 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class C02_WordCountSQL { 18 | public static void main(String[] args) throws Exception { 19 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 20 | // 创建BatchTable上下文环境 21 | BatchTableEnvironment tEnv = BatchTableEnvironment.create(env); 22 | // 模拟测试数据 23 | DataSource input = env.fromElements( 24 | new C01_WordCount("Storm", 1L), 25 | new C01_WordCount("Spark", 1L), 26 | new C01_WordCount("Flink", 1L), 27 | new C01_WordCount("Spark", 1L), 28 | new C01_WordCount("Flink", 1L), 29 | new C01_WordCount("Flink", 1L) ); 30 | 31 | // 注册表名并指定字段名称 32 | tEnv.registerDataSet("WordCount", input, "word, counts"); 33 | 34 | // 按照单词分组并统计单词次数,然后过虑排序 35 | String sql = "SELECT word, SUM(counts) as counts FROM WordCount GROUP BY word" + 36 | " HAVING SUM(counts) >= 2 ORDER BY counts DESC"; 37 | 38 | Table table = tEnv.sqlQuery(sql); 39 | 40 | // 将Table表转成DataSet 41 | DataSet result = tEnv.toDataSet(table, C01_WordCount.class); 42 | 43 | // 输出数据 44 | result.print(); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C04_KafkaWordCountSQL.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | 4 | import org.apache.flink.api.common.typeinfo.TypeInformation; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.table.api.DataTypes; 7 | import org.apache.flink.table.api.Table; 8 | import org.apache.flink.table.api.java.StreamTableEnvironment; 9 | import org.apache.flink.table.descriptors.Kafka; 10 | import org.apache.flink.table.descriptors.Schema; 11 | import org.apache.flink.table.descriptors.Json; 12 | import org.apache.flink.table.types.DataType; 13 | import org.apache.flink.table.types.logical.DateType; 14 | import org.apache.flink.types.Row; 15 | 16 | /** 17 | * @Author: Lei 18 | * @E-mail: 843291011@qq.com 19 | * @Date: Created in 10:17 下午 2020/6/16 20 | * @Version: 1.0 21 | * @Modified By: 22 | * @Description: 23 | */ 24 | public class C04_KafkaWordCountSQL { 25 | public static void main(String[] args) throws Exception { 26 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 27 | StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); 28 | 29 | tEnv.connect(new Kafka() 30 | .version("universal") 31 | .topic("json-input") 32 | .startFromEarliest() 33 | .property("bootstrap.servers", "node-01:9092,node-02:9092:node-03:9092") 34 | ).withFormat(new Json().deriveSchema()).withSchema(new Schema() 35 | .field("name", DataTypes.STRING()) 36 | .field("gender", DataTypes.STRING()) 37 | ).inAppendMode().registerTableSource("kafkaSource"); 38 | 39 | Table result = tEnv.scan("kafkaSource") 40 | .groupBy("gender") 41 | .select("gender, count(1) as counts"); 42 | 43 | tEnv.toRetractStream(result, Row.class).print(); 44 | 45 | env.execute("C04_KafkaWordCountSQL"); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C05_UDFSQL.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.java.StreamTableEnvironment; 7 | import org.apache.flink.types.Row; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: Created in 10:34 下午 2020/6/16 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class C05_UDFSQL { 18 | public static void main(String[] args) throws Exception{ 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | // 注册一个可以Cache的文件,通过网络发送给TaskManager 22 | env.registerCachedFile("ip.txt", "ip-rules"); 23 | 24 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 25 | 26 | // 106.121.4.252 27 | // 42.57.88.186 28 | DataStreamSource socketTextStream = env.socketTextStream("localhost", 7777); 29 | 30 | tableEnv.registerDataStream("t_lines", socketTextStream, "ip"); 31 | 32 | // 注册自定义函数,是一个UDF,输入一个IP地址,返回Row<省、市> 33 | tableEnv.registerFunction("ipLocation", new C05_IpLocation()); 34 | 35 | // tableEnv.registerFunction("split", new Split("\\W+")); 36 | Table table = tableEnv.sqlQuery( 37 | "SELECT ip, ipLocation(ip) FROM t_lines"); 38 | 39 | tableEnv.toAppendStream(table, Row.class).print(); 40 | 41 | env.execute("C05_UDFSQL"); 42 | 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C06_Split.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | import org.apache.flink.api.common.typeinfo.TypeInformation; 4 | import org.apache.flink.api.common.typeinfo.Types; 5 | import org.apache.flink.table.functions.ScalarFunction; 6 | import org.apache.flink.table.functions.TableFunction; 7 | import org.apache.flink.types.Row; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: Created in 11:22 下午 2020/6/16 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | 18 | /* 19 | UDTF 要继承 TableFunction 20 | */ 21 | public class C06_Split extends TableFunction { 22 | 23 | private String separator = ","; 24 | 25 | public C06_Split(String separator) { 26 | this.separator = separator; 27 | } 28 | 29 | public void eval(String line) { 30 | for (String s : line.split(separator)) { 31 | collect(Row.of(s)); 32 | } 33 | } 34 | 35 | @Override 36 | public TypeInformation getResultType() { 37 | return Types.ROW(Types.STRING); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c07_sql_api/C06_UDTFSQL.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c07_sql_api; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.java.StreamTableEnvironment; 7 | import org.apache.flink.types.Row; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: Created in 11:17 下午 2020/6/16 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class C06_UDTFSQL { 18 | public static void main(String[] args) throws Exception { 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | 21 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 22 | 23 | DataStreamSource socketTextStream = env.socketTextStream("localhost", 7777); 24 | 25 | // hello tom jerry tom 26 | tableEnv.registerDataStream("t_lines", socketTextStream, "line"); 27 | 28 | tableEnv.registerFunction("split", new C06_Split("\\W+")); 29 | 30 | //Table table = tableEnv.sqlQuery( 31 | // "SELECT word, line FROM t_lines, LATERAL TABLE(split(line)) as T(word)"); 32 | 33 | // 左表关联右表 34 | Table table = tableEnv.sqlQuery( 35 | "SELECT word FROM t_lines, LATERAL TABLE(split(line)) as T(word)"); 36 | 37 | tableEnv.toAppendStream(table, Row.class).print(); 38 | 39 | env.execute("C06_UDTFSQL"); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c08_table_api/C01_StreamWordCountTable.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c08_table_api; 2 | 3 | import org.apache.flink.api.common.functions.FilterFunction; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | import org.apache.flink.table.api.Table; 11 | import org.apache.flink.table.api.java.StreamTableEnvironment; 12 | import org.apache.flink.types.Row; 13 | import org.apache.flink.util.Collector; 14 | 15 | import java.util.Arrays; 16 | 17 | /** 18 | * @Author: Lei 19 | * @E-mail: 843291011@qq.com 20 | * @Date: Created in 10:25 下午 2020/6/15 21 | * @Version: 1.0 22 | * @Modified By: 23 | * @Description: 24 | */ 25 | 26 | // spark hadoop flink spark 27 | public class C01_StreamWordCountTable { 28 | 29 | public static void main(String[] args) throws Exception { 30 | // 实时DataStreamAPI 31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | 33 | // 创建一个实时的Table执行上下文环境 34 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 35 | 36 | // word count spark hadoop 37 | DataStreamSource lines = env.socketTextStream("localhost", 7777); 38 | SingleOutputStreamOperator words = lines.flatMap(new FlatMapFunction() { 39 | @Override 40 | public void flatMap(String line, Collector out) throws Exception { 41 | Arrays.stream(line.split(" ")).forEach(out::collect); 42 | 43 | } 44 | }); 45 | 46 | // 注册成表 47 | Table table = tableEnv.fromDataStream(words, "word"); 48 | 49 | // 写SQL 50 | Table result = table.groupBy("word") // 分组 51 | .select("word, count(1) as counts");// 聚合 52 | 53 | // 54 | //DataStream> dataStream = tableEnv.toRetractStream(table, C01_WordCount.class); 55 | DataStream> dataStream = tableEnv.toRetractStream(result, Row.class); 56 | 57 | dataStream.filter(new FilterFunction>() { 58 | @Override 59 | public boolean filter(Tuple2 value) throws Exception { 60 | return value.f0; 61 | } 62 | }).print(); 63 | 64 | 65 | env.execute("C02_StreamWordCountTable"); 66 | } 67 | } 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/c08_table_api/C02_WordCountTable.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.c08_table_api; 2 | 3 | import com.lei.apitest.c07_sql_api.C01_WordCount; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.DataSource; 7 | import org.apache.flink.table.api.Table; 8 | import org.apache.flink.table.api.java.BatchTableEnvironment; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: 2020-06-16 9:19 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | public class C02_WordCountTable { 19 | public static void main(String[] args) throws Exception { 20 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 21 | // 创建BatchTable上下文环境 22 | BatchTableEnvironment tEnv = BatchTableEnvironment.create(env); 23 | // 模拟测试数据 24 | DataSource input = env.fromElements( 25 | new C01_WordCount("Storm", 1L), 26 | new C01_WordCount("Spark", 1L), 27 | new C01_WordCount("Flink", 1L), 28 | new C01_WordCount("Spark", 1L), 29 | new C01_WordCount("Flink", 1L), 30 | new C01_WordCount("Flink", 1L) ); 31 | 32 | // 通过DataSet创建表 33 | Table table = tEnv.fromDataSet(input); 34 | // 调用Table的API进行操作 35 | Table filtered = table.groupBy("word") // 分组 36 | .select("word, counts.sum as counts") // sum 37 | .filter("counts >= 2") // 过虑 38 | .orderBy("counts.desc"); // 排序 39 | 40 | // 将表转成DataSet 41 | DataSet result = tEnv.toDataSet(filtered, C01_WordCount.class); 42 | 43 | // 输出数据 44 | result.print(); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/util/FlinkUtilsV1.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.util; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 8 | 9 | import java.util.Properties; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: Created in 10:02 上午 2020/6/8 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | public class FlinkUtilsV1 { 20 | 21 | private static final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | 23 | public static DataStream createKafkaStream(String[] args, SimpleStringSchema simpleStringSchema) { 24 | String topic = args[0]; 25 | String groupId = args[1]; 26 | String brokerList = args[2]; 27 | 28 | Properties props = new Properties(); 29 | 30 | // 指定Kafka的Broker地址 31 | props.setProperty("bootstrap.servers", brokerList); 32 | // 提定组ID 33 | props.setProperty("group.id", groupId); 34 | // 如果没有记录偏移量,第一次从开始消费 35 | props.setProperty("auto.offset.reset", "earliest"); 36 | // kafka的消费者不自动提交偏移量,默认kafka自动提交offset,且保存在__consumer_offsets 37 | // props.setProperty("enable.auto.commit", "false"); 38 | 39 | // kafkaSource 40 | FlinkKafkaConsumer011 kafkaSource = new FlinkKafkaConsumer011<>( 41 | topic, 42 | new SimpleStringSchema(), // 序列化与反序列化方式 43 | props); 44 | 45 | // Source 46 | DataStream lines = env.addSource(kafkaSource); 47 | return lines; 48 | } 49 | 50 | public static StreamExecutionEnvironment getEnv() { 51 | return env; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/util/MyRedisSink.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.util; 2 | 3 | import org.apache.flink.api.common.ExecutionConfig; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 8 | import redis.clients.jedis.Jedis; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 10:49 上午 2020/6/13 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | // 自定义高级的RedisSink 19 | public class MyRedisSink extends RichSinkFunction> { 20 | 21 | private transient Jedis jedis; 22 | 23 | @Override 24 | public void open(Configuration parameters) throws Exception { 25 | super.open(parameters); 26 | 27 | // 获取全局的配置参数 28 | ParameterTool params = (ParameterTool) getRuntimeContext().getExecutionConfig().getGlobalJobParameters(); 29 | 30 | String host = params.getRequired("redis.host"); 31 | String passWord = params.getRequired("redis.pwd"); 32 | int db = params.getInt("redis.db", 0); 33 | 34 | // 获取redis超时连接时间 35 | jedis = new Jedis(host, 6379, 5000); 36 | //jedis.auth(passWord); 37 | jedis.select(db); 38 | } 39 | 40 | @Override 41 | public void invoke(Tuple3 value, Context context) throws Exception { 42 | if (!jedis.isConnected()) { 43 | jedis.connect(); 44 | } 45 | 46 | jedis.hset(value.f0, value.f1, value.f2); 47 | } 48 | 49 | @Override 50 | public void close() throws Exception { 51 | super.close(); 52 | if (jedis != null) { 53 | jedis.close(); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/z_other_learn/J03_NcClient.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn; 2 | 3 | import java.io.IOException; 4 | import java.io.OutputStream; 5 | import java.net.ServerSocket; 6 | import java.net.Socket; 7 | import java.text.SimpleDateFormat; 8 | import java.util.Date; 9 | import java.util.concurrent.TimeUnit; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: 2020-05-21 11:15 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | public class J03_NcClient { 20 | 21 | // 定义端口号 22 | private static final int PORT = 7777; 23 | 24 | public static void main(String[] args) throws IOException, InterruptedException { 25 | 26 | ServerSocket server = new ServerSocket(PORT); 27 | Socket socket = server.accept(); 28 | 29 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 30 | System.out.println("[" + simpleDateFormat.format(new Date()) + "]" + socket.getInetAddress() + "已建立连接!"); 31 | 32 | //监控连接是否断开线程 33 | //new Thread(new C03_NcClient.CheckClientThread(socket)).start(); 34 | 35 | //输出流 36 | OutputStream outputStream = socket.getOutputStream(); 37 | 38 | //控制台输入 39 | //Scanner in = new Scanner(System.in); 40 | 41 | // while (true) { 42 | // String str = in.nextLine() + "\n"; 43 | // 44 | // outputStream.write(str.getBytes()); 45 | // outputStream.flush(); 46 | // } 47 | for (int i = 11; i <= 50; i++) { 48 | String value = "sensor_1, 15477181" + i + ", " + (i) + "\n"; 49 | outputStream.write(value.getBytes()); 50 | TimeUnit.MILLISECONDS.sleep(1000); 51 | outputStream.flush(); 52 | } 53 | } 54 | 55 | // 监控连接程序是否断开 线程类 56 | static class CheckClientThread implements Runnable { 57 | 58 | private Socket socketClient; 59 | 60 | public CheckClientThread(Socket socketClient) { 61 | this.socketClient = socketClient; 62 | } 63 | 64 | @Override 65 | public void run() { 66 | while (true) { 67 | try { 68 | socketClient.sendUrgentData(0xFF); 69 | } catch (IOException e) { 70 | SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 71 | System.out.println("[" + simpleDateFormat.format(new Date()) + "]" + socketClient.getInetAddress() + "连接已关闭!"); 72 | // 断开后退出程序 73 | System.exit(0); 74 | } 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/z_other_learn/c01_value_state/J04_ReduceingStateOperate.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn.c01_value_state; 2 | 3 | import org.apache.flink.api.common.functions.ReduceFunction; 4 | import org.apache.flink.api.common.functions.RichFlatMapFunction; 5 | import org.apache.flink.api.common.state.ReducingState; 6 | import org.apache.flink.api.common.state.ReducingStateDescriptor; 7 | import org.apache.flink.configuration.Configuration; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.util.Collector; 10 | 11 | import java.util.Arrays; 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: 2020-06-03 15:53 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | public class J04_ReduceingStateOperate { 22 | public static void main(String[] args) throws Exception { 23 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | 25 | env.fromCollection(Arrays.asList( 26 | new Element(1L, 3d), 27 | new Element(1L, 5d), 28 | new Element(1L, 7d), 29 | new Element(2L, 4d), 30 | new Element(2L, 2d), 31 | new Element(2L, 6d) 32 | )).keyBy(value -> value.key) 33 | .flatMap(new J_CountWithReduceingAverageStage()) 34 | .print(); 35 | 36 | env.execute("J04_ReduceingStateOperate"); 37 | } 38 | } 39 | 40 | 41 | class J_CountWithReduceingAverageStage extends RichFlatMapFunction { 42 | 43 | // 定义ReducingState 44 | private ReducingState reducingState = null; 45 | 46 | // 求取平均值 ==》需要知道每个相同key的数据出现了多少次 47 | // 定义一个计数器 48 | Long counter = 0L; 49 | 50 | @Override 51 | public void open(Configuration parameters) throws Exception { 52 | ReducingStateDescriptor reduceSum = new ReducingStateDescriptor("reduceSum", new ReduceFunction() { 53 | 54 | @Override 55 | public Double reduce(Double t1, Double t2) throws Exception { 56 | return t1 + t2; 57 | } 58 | }, Double.class); 59 | 60 | //初始化获取mapState对象 61 | this.reducingState = getRuntimeContext().getReducingState(reduceSum); 62 | } 63 | 64 | @Override 65 | public void flatMap(Element element, Collector out) throws Exception { 66 | //计数器+1 67 | counter += 1; 68 | //添加数据到reducingState 69 | reducingState.add(element.value); 70 | 71 | out.collect(new Element(element.key,reducingState.get()/counter)); 72 | } 73 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/apitest/z_other_learn/c01_value_state/J06_OperatorListState.java: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn.c01_value_state; 2 | 3 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.streaming.api.functions.sink.SinkFunction; 6 | 7 | import java.util.ArrayList; 8 | import java.util.Arrays; 9 | import java.util.List; 10 | 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: 2020-06-03 16:21 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | 21 | /* 22 | 需求 23 | 实现每两条数据进行输出打印一次,不用区分数据的key 24 | 这里使用ListState实现 25 | */ 26 | /** 27 | * 实现每两条数据进行输出打印一次,不用区分数据的key 28 | */ 29 | public class J06_OperatorListState { 30 | public static void main(String[] args) throws Exception { 31 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 32 | 33 | DataStreamSource sourceStream = env.fromCollection(Arrays.asList( 34 | new MyKeyValue("spark", 3), 35 | new MyKeyValue("hadoop", 5), 36 | new MyKeyValue("hive", 7), 37 | new MyKeyValue("flume", 9) 38 | )); 39 | 40 | sourceStream.addSink(new J_OperateTaskState()).setParallelism(1); 41 | 42 | 43 | env.execute("J06_OperatorListState"); 44 | } 45 | } 46 | 47 | class MyKeyValue { 48 | public String key; 49 | public Integer value; 50 | 51 | public MyKeyValue(String key, Integer value) { 52 | this.key = key; 53 | this.value = value; 54 | } 55 | 56 | @Override 57 | public String toString() { 58 | return "MyKeyValue{" + 59 | "key='" + key + '\'' + 60 | ", value=" + value + 61 | '}'; 62 | } 63 | } 64 | 65 | 66 | class J_OperateTaskState implements SinkFunction { 67 | // 定义一个list 用于我们每两条数据打印一下 68 | private List listBuffer = new ArrayList<>(); 69 | 70 | @Override 71 | public void invoke(MyKeyValue value, Context context) throws Exception { 72 | listBuffer.add(value); 73 | 74 | if(listBuffer.size() ==2){ 75 | System.out.println(listBuffer); 76 | 77 | // 清空state状态 每隔两条数据,打印一下之后,清空状态 78 | listBuffer.clear(); 79 | } 80 | } 81 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/domain/J_SensorReading.java: -------------------------------------------------------------------------------- 1 | package com.lei.domain; 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: 2020-05-25 15:15 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | public class J_SensorReading { 12 | public String id; 13 | public Long timestamp; 14 | public Double temperature; 15 | 16 | public J_SensorReading(String id, Long timestamp, Double temperature) { 17 | this.id = id; 18 | this.timestamp = timestamp; 19 | this.temperature = temperature; 20 | } 21 | 22 | 23 | @Override 24 | public String toString() { 25 | return "J_SensorReading{" + 26 | "id='" + id + '\'' + 27 | ", timestamp=" + timestamp + 28 | ", temperature=" + temperature + 29 | '}'; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/lei/domain/J_User.java: -------------------------------------------------------------------------------- 1 | package com.lei.domain; 2 | 3 | /** 4 | * @Author: 5 | * @Date: 2021-01-03 12:52 6 | * @Version: 1.0 7 | * @Modified By: 8 | * @Description: 9 | */ 10 | public class J_User { 11 | public int id; 12 | public String name; 13 | public int age; 14 | 15 | public J_User(int id, String name, int age) { 16 | this.id = id; 17 | this.name = name; 18 | this.age = age; 19 | } 20 | 21 | public static J_User of(int id, String name, int age) { 22 | return new J_User(id, name, age); 23 | } 24 | 25 | // Java Bean 必须实现的方法,信息通过字符串进行拼接 26 | public static String convertToCsv(J_User user) { 27 | StringBuilder builder = new StringBuilder(); 28 | builder.append("("); 29 | 30 | // add user.id 31 | builder.append(user.id); 32 | builder.append(", "); 33 | 34 | // add user.name 35 | builder.append("'"); 36 | builder.append(String.valueOf(user.name)); 37 | builder.append("', "); 38 | 39 | // add user.age 40 | builder.append(user.age); 41 | 42 | builder.append(" )"); 43 | return builder.toString(); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/lei/sinktest/J02_RedisSinkTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest; 2 | 3 | import com.lei.domain.J_SensorReading; 4 | import com.lei.util.J_MyRedisUtil; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: 2020-05-25 14:52 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class J02_RedisSinkTest { 18 | public static void main(String[] args) throws Exception { 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | env.setParallelism(1); 21 | 22 | // source 23 | DataStreamSource inputStream = env.readTextFile("input_dir/sensor.txt"); 24 | 25 | /* 26 | 数据源示例:sensor_1, 1547718205, 27.1 27 | 数据源示例:sensor_2, 1547718206, 28.1 28 | 数据源示例:sensor_3, 1547718207, 29.1 29 | */ 30 | SingleOutputStreamOperator dataStream = inputStream.map( 31 | (String data) -> { 32 | String[] dataArray = data.split(","); 33 | // 转成String 方便序列化输出 34 | return new J_SensorReading(dataArray[0].trim(), 35 | Long.valueOf(dataArray[1].trim()), 36 | Double.valueOf(dataArray[2].trim())); 37 | } 38 | ).returns(J_SensorReading.class); 39 | 40 | // sink 41 | dataStream.addSink(J_MyRedisUtil.getRedisSink()); 42 | /* 43 | centos redis: redis-cli 44 | hset channal_sum xiaomi 100 45 | hset channal_sum huawei 100 46 | keys * 47 | get channal_sum 48 | hgetall channal_sum 49 | 删除所有Key,可以使用Redis的flushdb和flushall命令 50 | */ 51 | // 把结果存入redis hset key:channel_sum field: channel value: count 52 | 53 | env.execute("redis sink test"); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/lei/sinktest/J03_EsSinkTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest; 2 | 3 | import com.lei.domain.J_SensorReading; 4 | import com.lei.util.J_MyEsUtil; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: 2020-05-25 16:01 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class J03_EsSinkTest { 18 | public static void main(String[] args) throws Exception { 19 | 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | env.setParallelism(1); 22 | 23 | // source 24 | DataStream inputStream = env.readTextFile("input_dir/sensor.txt"); 25 | 26 | SingleOutputStreamOperator dataStream = inputStream.map( 27 | (String data) -> { 28 | String[] dataArray = data.split(","); 29 | // 转成String 方便序列化输出 30 | return new J_SensorReading(dataArray[0].trim(), 31 | Long.valueOf(dataArray[1].trim()), 32 | Double.valueOf(dataArray[2].trim())); 33 | } 34 | ).returns(J_SensorReading.class); 35 | 36 | // sink 37 | dataStream.addSink(J_MyEsUtil.getElasticSearchSink("sensor")); 38 | dataStream.print(); 39 | 40 | env.execute("es sink test"); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/lei/sinktest/J04_JdbcSinkTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest; 2 | 3 | import com.lei.domain.J_SensorReading; 4 | import com.lei.util.J_MyJdbcUtil; 5 | import org.apache.flink.streaming.api.datastream.DataStream; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | import java.util.concurrent.TimeUnit; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: 2020-05-25 17:21 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | public class J04_JdbcSinkTest { 20 | public static void main(String[] args) throws Exception { 21 | 22 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 23 | env.setParallelism(1); 24 | 25 | // source 26 | DataStream inputStream = env.readTextFile("input_dir/sensor.txt"); 27 | 28 | // Transform操作 29 | SingleOutputStreamOperator dataStream = inputStream.map( 30 | (String data) -> { 31 | String[] dataArray = data.split(","); 32 | // 转成String 方便序列化输出 33 | return new J_SensorReading(dataArray[0].trim(), 34 | Long.valueOf(dataArray[1].trim()), 35 | Double.valueOf(dataArray[2].trim())); 36 | } 37 | ).returns(J_SensorReading.class); 38 | 39 | // sink 40 | // dataStream.addSink(new MyJdbcSink()) 41 | 42 | 43 | J_MyJdbcUtil jdbcSink = new J_MyJdbcUtil("insert into temperatures values(?,?,?)"); 44 | dataStream.addSink(jdbcSink); 45 | 46 | TimeUnit.SECONDS.sleep(60); 47 | env.execute("jdbc sink test"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/lei/sinktest/J05_ClickHouseSinkTest.java: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest; 2 | 3 | import com.lei.domain.J_User; 4 | import com.lei.util.J_MyClickHouseUtil; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.streaming.api.datastream.DataStream; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 9 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 10 | 11 | /** 12 | * @Author: 13 | * @Date: 2021-01-03 13:06 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | /* 20 | 进入clickhouse-client 21 | use default; 22 | drop table if exists user_table; 23 | 24 | CREATE TABLE default.user_table(id UInt16, name String, age UInt16 ) ENGINE = TinyLog(); 25 | */ 26 | public class J05_ClickHouseSinkTest { 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); 29 | env.setParallelism(1); 30 | 31 | // source 32 | DataStream inputStream = env.socketTextStream("localhost", 7777); 33 | 34 | // Transform 操作 35 | SingleOutputStreamOperator dataStream = inputStream.map(new MapFunction() { 36 | @Override 37 | public J_User map(String data) throws Exception { 38 | String[] split = data.split(","); 39 | return J_User.of(Integer.parseInt(split[0]), 40 | split[1], 41 | Integer.parseInt(split[2])); 42 | } 43 | }); 44 | 45 | // sink 46 | String sql = "INSERT INTO default.user_table (id, name, age) VALUES (?,?,?)"; 47 | J_MyClickHouseUtil jdbcSink = new J_MyClickHouseUtil(sql); 48 | dataStream.addSink(jdbcSink); 49 | dataStream.print(); 50 | 51 | env.execute("clickhouse sink test"); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/lei/util/ClickHouseUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import java.sql.Connection; 4 | import java.sql.DriverManager; 5 | import java.sql.SQLException; 6 | 7 | /** 8 | * @Author: 9 | * @Date: 2021-01-03 12:54 10 | * @Version: 1.0 11 | * @Modified By: 12 | * @Description: 13 | */ 14 | public class ClickHouseUtil { 15 | private static Connection connection; 16 | 17 | public static Connection getConn(String host, int port, String database) throws SQLException, ClassNotFoundException { 18 | Class.forName("ru.yandex.clickhouse.ClickHouseDriver"); 19 | String address = "jdbc:clickhouse://" + host + ":" + port + "/" + database; 20 | connection = DriverManager.getConnection(address); 21 | return connection; 22 | } 23 | 24 | public static Connection getConn(String host, int port) throws SQLException, ClassNotFoundException { 25 | return getConn(host,port,"default"); 26 | } 27 | public static Connection getConn() throws SQLException, ClassNotFoundException { 28 | return getConn("node-01",8123); 29 | } 30 | public void close() throws SQLException { 31 | connection.close(); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/lei/util/J_MyClickHouseUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import com.lei.domain.J_User; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 6 | 7 | import java.sql.Connection; 8 | import java.sql.PreparedStatement; 9 | 10 | /** 11 | * @Author: 12 | * @Date: 2021-01-03 12:59 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | public class J_MyClickHouseUtil extends RichSinkFunction { 18 | Connection connection = null; 19 | 20 | String sql; 21 | 22 | public J_MyClickHouseUtil(String sql) { 23 | this.sql = sql; 24 | } 25 | 26 | @Override 27 | public void open(Configuration parameters) throws Exception { 28 | super.open(parameters); 29 | connection = ClickHouseUtil.getConn("node-01", 8123, "default"); 30 | } 31 | 32 | @Override 33 | public void close() throws Exception { 34 | super.close(); 35 | if (connection != null) { 36 | connection.close(); 37 | } 38 | } 39 | 40 | @Override 41 | public void invoke(J_User user, Context context) throws Exception { 42 | PreparedStatement preparedStatement = connection.prepareStatement(sql); 43 | preparedStatement.setLong(1, user.id); 44 | preparedStatement.setString(2, user.name); 45 | preparedStatement.setLong(3, user.age); 46 | preparedStatement.addBatch(); 47 | 48 | long startTime = System.currentTimeMillis(); 49 | int[] ints = preparedStatement.executeBatch(); 50 | connection.commit(); 51 | long endTime = System.currentTimeMillis(); 52 | System.out.println("批量插入完毕用时:" + (endTime - startTime) + " -- 插入数据 = " + ints.length); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/lei/util/J_MyEsUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import com.lei.domain.J_SensorReading; 4 | import org.apache.flink.api.common.functions.RuntimeContext; 5 | import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction; 6 | import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer; 7 | import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink; 8 | import org.apache.http.HttpHost; 9 | import org.elasticsearch.action.index.IndexRequest; 10 | import org.elasticsearch.client.Requests; 11 | 12 | import java.util.ArrayList; 13 | import java.util.HashMap; 14 | import java.util.List; 15 | 16 | /** 17 | * @Author: Lei 18 | * @E-mail: 843291011@qq.com 19 | * @Date: 2020-05-25 15:47 20 | * @Version: 1.0 21 | * @Modified By: 22 | * @Description: 23 | */ 24 | public class J_MyEsUtil { 25 | 26 | private static List httpHosts = new ArrayList(); 27 | 28 | static { 29 | httpHosts.add(new HttpHost("elasticsearch01",9200,"http")); 30 | httpHosts.add(new HttpHost("elasticsearch02",9200,"http")); 31 | httpHosts.add(new HttpHost("elasticsearch03",9200,"http")); 32 | } 33 | 34 | public static ElasticsearchSink getElasticSearchSink(String indexName) { 35 | ElasticsearchSinkFunction esFunc = new ElasticsearchSinkFunction() { 36 | 37 | @Override 38 | public void process(J_SensorReading element, RuntimeContext ctx, RequestIndexer indexer) { 39 | System.out.println("saving data:" + element); 40 | // 包装成一个Map或者JsonObject 41 | HashMap json = new HashMap(); 42 | json.put("sensor_id", element.id); 43 | json.put("temperature", element.temperature.toString()); 44 | json.put("ts", element.timestamp.toString()); 45 | 46 | // 创建index request, 准备发送数据 47 | IndexRequest indexRequest = Requests.indexRequest() 48 | .index(indexName) 49 | .type("_doc") 50 | .source(json); 51 | 52 | // 利用index发送请求,写入数据 53 | indexer.add(indexRequest); 54 | System.out.println("data saved..."); 55 | } 56 | }; 57 | 58 | ElasticsearchSink.Builder sinkBuilder = new ElasticsearchSink.Builder<>(httpHosts, esFunc); 59 | 60 | //刷新前缓冲的最大动作量 61 | sinkBuilder.setBulkFlushMaxActions(10); 62 | 63 | 64 | return sinkBuilder.build(); 65 | } 66 | 67 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/util/J_MyJdbcUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import com.alibaba.druid.pool.DruidDataSourceFactory; 4 | import com.lei.domain.J_SensorReading; 5 | import org.apache.flink.configuration.Configuration; 6 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 7 | 8 | import javax.sql.DataSource; 9 | import java.sql.Connection; 10 | import java.sql.PreparedStatement; 11 | 12 | /** 13 | * @Author: Lei 14 | * @E-mail: 843291011@qq.com 15 | * @Date: 2020-05-25 17:10 16 | * @Version: 1.0 17 | * @Modified By: 18 | * @Description: 19 | */ 20 | public class J_MyJdbcUtil extends RichSinkFunction { 21 | 22 | Connection connection = null; 23 | String sql; 24 | 25 | public J_MyJdbcUtil(String sql) { 26 | this.sql = sql; 27 | } 28 | 29 | // 创建连接 30 | @Override 31 | public void open(Configuration parameters) throws Exception { 32 | // super.open(parameters); 33 | // 获取连接池对象 34 | DataSource dataSource = DruidDataSourceFactory.createDataSource(J_ConfigurationManager.getProp()); 35 | connection = dataSource.getConnection(); 36 | // 一定要注意druid.properties配置文件中的参数名一定要和上表中的名称相一致,如连接数据库的用户名为username,否则会报错。 37 | } 38 | 39 | @Override 40 | public void close() throws Exception { 41 | //super.close(); 42 | if(connection!=null){ 43 | connection.close(); 44 | } 45 | } 46 | 47 | // // 反复调用 48 | // @Override 49 | // public void invoke(String[] value, Context context) throws Exception { 50 | // PreparedStatement ps = connection.prepareStatement(sql); 51 | // System.out.println(Arrays.toString(value)); 52 | // for (int i = 0; i < value.length; i++) { 53 | // ps.setObject(i + 1, value[i]); 54 | // } 55 | // 56 | // ps.executeUpdate(); 57 | // } 58 | 59 | 60 | @Override 61 | public void invoke(J_SensorReading value, Context context) throws Exception { 62 | PreparedStatement ps = connection.prepareStatement(sql); 63 | System.out.println(value.toString()); 64 | /*for (int i = 0; i < value.length; i++) { 65 | ps.setObject(i + 1, value[i]); 66 | }*/ 67 | ps.setObject(1, value.id); 68 | ps.setObject(2, value.timestamp); 69 | ps.setObject(3, value.temperature); 70 | 71 | ps.executeUpdate(); 72 | } 73 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/util/J_MyKafkaUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 4 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 5 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011; 6 | 7 | import java.util.Properties; 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: 2020-05-20 17:06 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | 18 | // flink通过有状态支持,将kafka消费的offset自动进行状态保存,自动维护偏移量 19 | public class J_MyKafkaUtil { 20 | 21 | private static Properties prop = new Properties(); 22 | private static String zk_servers = "node-01:9092,node-02:9092,node-03:9092"; 23 | 24 | static { 25 | prop.setProperty("bootstrap.servers", zk_servers); 26 | prop.setProperty("group.id", "flink_topic_test_g1"); 27 | prop.setProperty("key,deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 28 | prop.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 29 | prop.setProperty("auto.offset.reset", "latest"); 30 | } 31 | 32 | 33 | public static FlinkKafkaConsumer011 getConsumer(String topic) { 34 | FlinkKafkaConsumer011 myKafkaConsumer = new FlinkKafkaConsumer011<> (topic, new SimpleStringSchema(), prop); 35 | return myKafkaConsumer; 36 | } 37 | 38 | public static FlinkKafkaProducer011 getProducer(String topic) { 39 | return new FlinkKafkaProducer011<> (zk_servers, topic, new SimpleStringSchema()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/lei/util/J_MyRedisUtil.java: -------------------------------------------------------------------------------- 1 | package com.lei.util; 2 | 3 | import com.lei.domain.J_SensorReading; 4 | import org.apache.flink.streaming.connectors.redis.RedisSink; 5 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig; 6 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommand; 7 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisCommandDescription; 8 | import org.apache.flink.streaming.connectors.redis.common.mapper.RedisMapper; 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: 2020-05-25 14:54 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | public class J_MyRedisUtil { 19 | 20 | private static FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost("localhost").setPort(6379).build(); 21 | 22 | public static RedisSink getRedisSink() { 23 | return new RedisSink(conf, new J_MyRedisMapper()); 24 | } 25 | } 26 | 27 | class J_MyRedisMapper implements RedisMapper { 28 | 29 | // 定义保存数据到redis的命令 30 | @Override 31 | public RedisCommandDescription getCommandDescription() { 32 | // 把传感器id和温度值保存成哈希表 HSET key field value 33 | return new RedisCommandDescription(RedisCommand.HSET, "sensor_temperature"); 34 | // new RedisCommandDescription(RedisCommand.SET ) 35 | } 36 | 37 | @Override 38 | public String getKeyFromData(J_SensorReading sensorReading) { 39 | return sensorReading.id; 40 | } 41 | 42 | @Override 43 | public String getValueFromData(J_SensorReading sensorReading) { 44 | return sensorReading.temperature + ""; 45 | } 46 | 47 | } -------------------------------------------------------------------------------- /src/main/java/com/lei/wc/J01_WordCount.java: -------------------------------------------------------------------------------- 1 | package com.lei.wc; 2 | 3 | import org.apache.flink.api.common.typeinfo.Types; 4 | import org.apache.flink.api.java.DataSet; 5 | import org.apache.flink.api.java.ExecutionEnvironment; 6 | import org.apache.flink.api.java.operators.AggregateOperator; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.api.java.utils.ParameterTool; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: Created in 10:36 下午 2020/5/13 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | public class J01_WordCount { 21 | public static void main(String[] args) throws Exception { 22 | final ParameterTool params = ParameterTool.fromArgs(args); 23 | 24 | ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 25 | // 从文件中读取数据 26 | String inputPath = "input_dir/hello.txt"; 27 | DataSet inputDataSet = env.readTextFile(inputPath); 28 | // 创建DataSet,这里我们的输入是一行一行的文本 29 | /*DataSet inputDataSet = env.fromElements( 30 | "Flink Spark Storm", 31 | "Flink Flink Flink", 32 | "Spark Spark Spark", 33 | "Storm Storm Storm" 34 | );*/ 35 | 36 | AggregateOperator> counts = inputDataSet.flatMap( 37 | (String line, Collector> collector) -> { 38 | String[] words = line.split(" "); 39 | for (String word : words) { 40 | collector.collect(new Tuple2<>(word, 1)); 41 | } 42 | } 43 | ).returns(Types.TUPLE(Types.STRING, Types.INT)).groupBy(0).sum(1); 44 | 45 | counts.print(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | ################################################################### 2 | # test 3 | run.on.test_mode = "dev123" 4 | 5 | // ES集群名,默认值elasticsearch 6 | // es.cluster.name = "my-application" 7 | es.cluster.name = "cluster-elasticsearch-prod" 8 | 9 | // ES集群中某个节点 10 | // es.host.name = "172.19.125.190" 11 | es.host.name = "elasticsearch01,elasticsearch02,elasticsearch03" 12 | 13 | // ES连接端口号 14 | es.host.port = 9300 -------------------------------------------------------------------------------- /src/main/scala/com/lei/ScalaRunFor.scala: -------------------------------------------------------------------------------- 1 | package com.lei 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: 2020-06-03 9:47 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | object ScalaRunFor { 12 | def main(args: Array[String]): Unit = { 13 | // forCry 14 | // 177 15 | // 705082704 16 | 17 | whileCry 18 | // 2 19 | // 705082704 20 | //scala中的for比while循环慢很多。在代码优化中可以想到在此优化。 21 | 22 | //还有其他的测试总结: 用java代码和scala代码,对比同一个算法,发现java比scala快很多。执行的快慢应该主要看scala编译成字节码的质量了。 23 | } 24 | 25 | def forCry(): Unit = { 26 | var start = System.currentTimeMillis 27 | var t = 0 28 | for (i <- 0 to 100000) { 29 | t += i 30 | } 31 | val end = System.currentTimeMillis 32 | println(end-start) 33 | println(t) 34 | } 35 | 36 | def whileCry(): Unit ={ 37 | val start = System.currentTimeMillis 38 | var total = 0 39 | var i = 0 40 | while ( { 41 | i < 100000 42 | }) { 43 | i = i + 1 44 | total += i 45 | } 46 | val end = System.currentTimeMillis 47 | println(end - start) 48 | println(total) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/apitest/C08_TableAPI_SQL.scala: -------------------------------------------------------------------------------- 1 | package com.lei.apitest 2 | 3 | /** 4 | * @Author: Lei 5 | * @E-mail: 843291011@qq.com 6 | * @Date: Created in 4:36 下午 2020/4/22 7 | * @Version: 1.0 8 | * @Modified By: 9 | * @Description: 10 | */ 11 | 12 | /** 13 | Table API是流处理和批处理通用的关系型API,Table API可以基于流输入或者 14 | 指输入来运行而不需要进行任何修改。Table API是SQL语言的超集并专门为Apache 15 | Flink设计的,Table API是Scala和Java语言集成式的API。与常规SQL语言中将 16 | 查询指定为字符串不同, Table API查询是以Java或Scala中的语言潜入样式来定义 17 | 的,具有IDE支持如:自动完成和语法检测。 18 | 19 | 需要导入以下包: 20 | 21 | org.apache.flink 22 | flink-table_2.11 23 | 1.7.2 24 | 25 | */ 26 | object C08_TableAPI_SQL { 27 | 28 | } 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/apitest/doit/C01_MapWithState.scala: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.doit 2 | 3 | import org.apache.flink.streaming.api.scala._ 4 | 5 | /** 6 | * @Author: Lei 7 | * @E-mail: 843291011@qq.com 8 | * @Date: 2020-06-11 14:17 9 | * @Version: 1.0 10 | * @Modified By: 11 | * @Description: 12 | */ 13 | 14 | // scala版本的State状态存储计算 15 | 16 | object C01_MapWithState { 17 | 18 | def main(args: Array[String]): Unit = { 19 | val env = StreamExecutionEnvironment.getExecutionEnvironment 20 | 21 | val lines = env.socketTextStream("node-01", 7777) 22 | 23 | val keyed = lines.flatMap(_.split(" ")).map(word => (word, 1)).keyBy(0) 24 | 25 | val summed = keyed.mapWithState((input: (String, Int), state: Option[Int]) => { 26 | state match { 27 | case Some(count) => { 28 | val key = input._1 29 | val value = input._2 30 | val total = count + value 31 | ((key, total), Some(total)) 32 | } 33 | case None => { 34 | (input, Some(input._2)) 35 | } 36 | } 37 | }) 38 | 39 | summed.print() 40 | 41 | env.execute() 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/apitest/z_other_learn/c01_value_state/C03_MapStateOperate.scala: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn.c01_value_state 2 | 3 | 4 | import java.util.UUID 5 | 6 | import org.apache.flink.api.common.functions.RichFlatMapFunction 7 | import org.apache.flink.api.common.state.{MapState, MapStateDescriptor} 8 | import org.apache.flink.configuration.Configuration 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 10 | import org.apache.flink.util.Collector 11 | 12 | 13 | /** 14 | * @Author: Lei 15 | * @E-mail: 843291011@qq.com 16 | * @Date: 2020-05-20 12:41 17 | * @Version: 1.0 18 | * @Modified By: 19 | * @Description: 20 | */ 21 | 22 | /* 23 | 作用 24 | 用于将每个key对应的数据都保存成一个map集合 25 | 26 | 需求 27 | 使用MapState求取每个key对应的平均值 28 | 29 | */ 30 | object MapStateOperate { 31 | 32 | def main(args: Array[String]): Unit = { 33 | 34 | val env = StreamExecutionEnvironment.getExecutionEnvironment 35 | import org.apache.flink.api.scala._ 36 | env.fromCollection(List( 37 | (1L, 3d), 38 | (1L, 5d), 39 | (1L, 7d), 40 | (2L, 4d), 41 | (2L, 2d), 42 | (2L, 6d) 43 | )).keyBy(_._1) 44 | .flatMap(new CountWithAverageMapState) 45 | .print() 46 | env.execute() 47 | } 48 | } 49 | 50 | class CountWithAverageMapState extends RichFlatMapFunction[(Long,Double),(Long,Double)]{ 51 | private var mapState:MapState[String,Double] = _ 52 | 53 | //初始化获取mapState对象 54 | override def open(parameters: Configuration): Unit = { 55 | val mapStateOperate = new MapStateDescriptor[String,Double]("mapStateOperate",classOf[String],classOf[Double]) 56 | mapState = getRuntimeContext.getMapState(mapStateOperate) 57 | } 58 | override def flatMap(input: (Long, Double), out: Collector[(Long, Double)]): Unit = { 59 | //将相同的key对应的数据放到一个map集合当中去,就是这种对应 1 -> List[Map,Map,Map] 60 | //每次都构建一个map集合 61 | //每个相同key的数据,都是对应一个map集合 ==》 hello => Map(hello -> 1,abc -> 2 , ddd -> 3) 62 | mapState.put(UUID.randomUUID().toString,input._2) 63 | import scala.collection.JavaConverters._ 64 | 65 | //获取map集合当中所有的value,我们每次将数据的value给放到map的value里面去 66 | val listState: List[Double] = mapState.values().iterator().asScala.toList 67 | if(listState.size >=3){ 68 | var count = 0L 69 | var sum = 0d 70 | for(eachState <- listState){ 71 | count +=1 72 | sum += eachState 73 | } 74 | println("average"+ sum/count) 75 | out.collect(input._1,sum/count) 76 | } 77 | } 78 | } 79 | 80 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/apitest/z_other_learn/c01_value_state/C04_ReduceingStateOperate.scala: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn.c01_value_state 2 | 3 | 4 | import org.apache.flink.api.common.functions.{ReduceFunction, RichFlatMapFunction} 5 | import org.apache.flink.api.common.state.{ReducingState, ReducingStateDescriptor} 6 | import org.apache.flink.configuration.Configuration 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 8 | import org.apache.flink.util.Collector 9 | 10 | 11 | /** 12 | * @Author: Lei 13 | * @E-mail: 843291011@qq.com 14 | * @Date: 2020-05-20 12:42 15 | * @Version: 1.0 16 | * @Modified By: 17 | * @Description: 18 | */ 19 | 20 | /* 21 | 作用 22 | 用于数据的聚合 23 | 需求 24 | 使用ReducingState求取每个key对应的平均值 25 | */ 26 | /** 27 | * ReducingState :这个状态为每一个 key 保存一个聚合之后的值 28 | * get() 获取状态值 29 | * add() 更新状态值,将数据放到状态中 30 | * clear() 清除状态 31 | */ 32 | 33 | object ReduceingStateOperate { 34 | def main(args: Array[String]): Unit = { 35 | val env = StreamExecutionEnvironment.getExecutionEnvironment 36 | import org.apache.flink.api.scala._ 37 | env.fromCollection(List( 38 | (1L, 3d), 39 | (1L, 5d), 40 | (1L, 7d), 41 | (2L, 4d), 42 | (2L, 2d), 43 | (2L, 6d) 44 | )).keyBy(_._1) 45 | .flatMap(new CountWithReduceingAverageStage) 46 | .print() 47 | env.execute() 48 | } 49 | } 50 | 51 | 52 | 53 | class CountWithReduceingAverageStage extends RichFlatMapFunction[(Long,Double),(Long,Double)]{ 54 | 55 | //定义ReducingState 56 | private var reducingState:ReducingState[Double] = _ 57 | 58 | //求取平均值 ==》需要知道每个相同key的数据出现了多少次 59 | //定义一个计数器 60 | var counter=0L 61 | 62 | override def open(parameters: Configuration): Unit = { 63 | val reduceSum = new ReducingStateDescriptor[Double]("reduceSum", new ReduceFunction[Double] { 64 | override def reduce(value1: Double, value2: Double): Double = { 65 | value1+ value2 66 | } 67 | }, classOf[Double]) 68 | 69 | //初始化获取mapState对象 70 | reducingState = getRuntimeContext.getReducingState[Double](reduceSum) 71 | 72 | } 73 | override def flatMap(input: (Long, Double), out: Collector[(Long, Double)]): Unit = { 74 | //计数器+1 75 | counter += 1 76 | //添加数据到reducingState 77 | reducingState.add(input._2) 78 | 79 | out.collect(input._1,reducingState.get()/counter) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/apitest/z_other_learn/c01_value_state/C06_OperatorListState.scala: -------------------------------------------------------------------------------- 1 | package com.lei.apitest.z_other_learn.c01_value_state 2 | 3 | 4 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 5 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 6 | 7 | import scala.collection.mutable.ListBuffer 8 | 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: 2020-05-20 12:41 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | 19 | /* 20 | 需求 21 | 实现每两条数据进行输出打印一次,不用区分数据的key 22 | 这里使用ListState实现 23 | */ 24 | /** 25 | * 实现每两条数据进行输出打印一次,不用区分数据的key 26 | */ 27 | object OperatorListState { 28 | def main(args: Array[String]): Unit = { 29 | val env = StreamExecutionEnvironment.getExecutionEnvironment 30 | import org.apache.flink.api.scala._ 31 | //构造数据的输入流 32 | val sourceStream: DataStream[(String, Int)] = env.fromCollection(List( 33 | ("spark", 3), 34 | ("hadoop", 5), 35 | ("hive", 7), 36 | ("flume", 9) 37 | )) 38 | 39 | // 40 | //每隔两条数据 ==》 打印一下 41 | sourceStream.addSink(new OperateTaskState).setParallelism(1) 42 | env.execute() 43 | } 44 | 45 | } 46 | 47 | class OperateTaskState extends SinkFunction[(String,Int)]{ 48 | //定义一个list 用于我们每两条数据打印一下 49 | private var listBuffer:ListBuffer[(String,Int)] = new ListBuffer[(String, Int)] 50 | 51 | override def invoke(value: (String, Int), context: SinkFunction.Context[_]): Unit = { 52 | listBuffer.+=(value) 53 | 54 | if(listBuffer.size ==2){ 55 | println(listBuffer) 56 | 57 | //清空state状态 每隔两条数据,打印一下之后,清空状态 58 | listBuffer.clear() 59 | } 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/sinktest/C01_KafkaSinkTest.scala: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest 2 | 3 | import com.lei.apitest.SensorReading 4 | import com.lei.util.J_MyKafkaUtil 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema 6 | import org.apache.flink.streaming.api.scala._ 7 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011 8 | 9 | /** 10 | * @Author: Lei 11 | * @E-mail: 843291011@qq.com 12 | * @Date: Created in 8:57 下午 2020/4/20 13 | * @Version: 1.0 14 | * @Modified By: 15 | * @Description: 16 | */ 17 | object C01_KafkaSinkTest { 18 | def main(args: Array[String]): Unit = { 19 | 20 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 21 | env.setParallelism(1) 22 | 23 | // source 24 | //val inputStream: DataStream[String] = env.readTextFile("input_dir/sensor.txt") 25 | 26 | val inputStream: DataStream[String] = env.addSource(J_MyKafkaUtil.getConsumer("sensor")) 27 | 28 | // Transform操作 29 | val dataStream: DataStream[String] = inputStream.map(data => { 30 | val dataArray: Array[String] = data.split(",") 31 | // 转成String 方便序列化输出 32 | SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble).toString 33 | }) 34 | 35 | // sink 36 | // dataStream.addSink(new FlinkKafkaProducer011[String]("node-01:9092", "gmall", new SimpleStringSchema())) 37 | dataStream.addSink(J_MyKafkaUtil.getProducer("GMALL_STARTUP")) 38 | dataStream.print() 39 | 40 | env.execute("kafka sink test") 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/sinktest/C02_RedisSinkTest.scala: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest 2 | 3 | import com.lei.apitest.SensorReading 4 | import com.lei.util.MyRedisUtil 5 | import org.apache.flink.streaming.api.scala._ 6 | import org.apache.flink.streaming.connectors.redis.RedisSink 7 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig 8 | import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper} 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 10:45 下午 2020/4/20 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | class C02_RedisSinkTest { 19 | 20 | def main(args: Array[String]): Unit = { 21 | 22 | 23 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 24 | env.setParallelism(1) 25 | 26 | // source 27 | val inputStream: DataStream[String] = env.readTextFile("input_dir/sensor.txt") 28 | 29 | // Transform操作 30 | val dataStream: DataStream[SensorReading] = inputStream.map(data => { 31 | val dataArray: Array[String] = data.split(",") 32 | // 转成String 方便序列化输出 33 | SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble) 34 | }) 35 | 36 | // sink 37 | dataStream.addSink(MyRedisUtil.getRedisSink()) 38 | /* 39 | centos redis: redis-cli 40 | hset channal_sum xiaomi 100 41 | hset channal_sum huawei 100 42 | keys * 43 | hgetall channal_sum 44 | */ 45 | // 把结果存入redis hset key:channel_sum field: channel value: count 46 | 47 | env.execute("redis sink test") 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/sinktest/C03_EsSinkTest.scala: -------------------------------------------------------------------------------- 1 | package com.lei.sinktest 2 | 3 | import com.lei.apitest.SensorReading 4 | import com.lei.util.MyEsUtil 5 | import org.apache.flink.streaming.api.scala._ 6 | 7 | /** 8 | * @Author: Lei 9 | * @E-mail: 843291011@qq.com 10 | * @Date: Created in 11:08 下午 2020/4/20 11 | * @Version: 1.0 12 | * @Modified By: 13 | * @Description: 14 | */ 15 | object C03_EsSinkTest { 16 | def main(args: Array[String]): Unit = { 17 | 18 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 19 | env.setParallelism(1) 20 | 21 | // source 22 | val inputStream: DataStream[String] = env.readTextFile("input_dir/sensor.txt") 23 | 24 | // Transform操作 25 | val dataStream: DataStream[SensorReading] = inputStream.map(data => { 26 | val dataArray: Array[String] = data.split(",") 27 | // 转成String 方便序列化输出 28 | SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble) 29 | }) 30 | 31 | // sink 32 | dataStream.addSink(MyEsUtil.getElasticSearchSink("sensor")) 33 | dataStream.print() 34 | 35 | env.execute("es sink test") 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/util/MyEsUtil.scala: -------------------------------------------------------------------------------- 1 | package com.lei.util 2 | 3 | import java.util 4 | 5 | import com.lei.apitest.SensorReading 6 | import org.apache.flink.api.common.functions.RuntimeContext 7 | import org.apache.flink.streaming.connectors.elasticsearch.{ElasticsearchSinkFunction, RequestIndexer} 8 | import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink 9 | import org.apache.http.HttpHost 10 | import org.elasticsearch.action.index.IndexRequest 11 | import org.elasticsearch.client.Requests 12 | 13 | //import scala.util.parsing.json.JSONObject 14 | 15 | 16 | /** 17 | * @Author: Lei 18 | * @E-mail: 843291011@qq.com 19 | * @Date: Created in 11:57 上午 2020/4/18 20 | * @Version: 1.0 21 | * @Modified By: 22 | * @Description: 23 | */ 24 | 25 | object MyEsUtil { 26 | 27 | 28 | val httpHosts = new util.ArrayList[HttpHost] 29 | httpHosts.add(new HttpHost("elasticsearch01",9200,"http")) 30 | httpHosts.add(new HttpHost("elasticsearch02",9200,"http")) 31 | httpHosts.add(new HttpHost("elasticsearch03",9200,"http")) 32 | 33 | 34 | def getElasticSearchSink(indexName: String): ElasticsearchSink[SensorReading] ={ 35 | val esFunc = new ElasticsearchSinkFunction[SensorReading] { 36 | override def process(element: SensorReading, ctx: RuntimeContext, indexer: RequestIndexer): Unit = { 37 | println("saving data:" + element) 38 | // 包装成一个Map或者JsonObject 39 | val json = new util.HashMap[String, String]() 40 | json.put("sensor_id", element.id) 41 | json.put("temperature", element.temperature.toString) 42 | json.put("ts", element.timestamp.toString) 43 | 44 | // 创建index request, 准备发送数据 45 | val indexRequest: IndexRequest = Requests.indexRequest() 46 | .index(indexName) 47 | .`type`("_doc") 48 | .source(json) 49 | 50 | // 利用index发送请求,写入数据 51 | indexer.add(indexRequest) 52 | println("data saved...") 53 | } 54 | } 55 | 56 | val sinkBuilder = new ElasticsearchSink.Builder[SensorReading](httpHosts, esFunc) 57 | 58 | //刷新前缓冲的最大动作量 59 | sinkBuilder.setBulkFlushMaxActions(10) 60 | 61 | 62 | sinkBuilder.build() 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/util/MyJdbcUtil.scala: -------------------------------------------------------------------------------- 1 | package com.lei.util 2 | 3 | import java.sql.{Connection, PreparedStatement} 4 | 5 | import com.alibaba.druid.pool.DruidDataSourceFactory 6 | import javax.sql.DataSource 7 | import org.apache.flink.configuration.Configuration 8 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction 9 | 10 | /** 11 | * @Author: Lei 12 | * @E-mail: 843291011@qq.com 13 | * @Date: Created in 7:45 上午 2020/4/21 14 | * @Version: 1.0 15 | * @Modified By: 16 | * @Description: 17 | */ 18 | class MyJdbcUtil(sql:String ) extends RichSinkFunction[Array[Any]] { 19 | 20 | var connection: Connection = null; 21 | 22 | //创建连接 23 | override def open(parameters: Configuration): Unit = { 24 | //获取连接池对象 25 | val dataSource: DataSource = DruidDataSourceFactory.createDataSource(ConfigurationManager.getProp()) 26 | connection = dataSource.getConnection() 27 | // 一定要注意druid.properties配置文件中的参数名一定要和上表中的名称相一致,如连接数据库的用户名为username,否则会报错。 28 | } 29 | 30 | //反复调用 31 | override def invoke(values: Array[Any]): Unit = { 32 | val ps: PreparedStatement = connection.prepareStatement(sql ) 33 | println(values.mkString(",")) 34 | for (i <- 0 until values.length) { 35 | ps.setObject(i + 1, values(i)) 36 | } 37 | ps.executeUpdate() 38 | 39 | 40 | } 41 | 42 | override def close(): Unit = { 43 | 44 | if(connection!=null){ 45 | connection.close() 46 | } 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/util/MyKafkaUtil.scala: -------------------------------------------------------------------------------- 1 | package com.lei.util 2 | 3 | import java.util.Properties 4 | 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema 6 | import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer011, FlinkKafkaProducer011} 7 | 8 | /** 9 | * @Author: Lei 10 | * @E-mail: 843291011@qq.com 11 | * @Date: Created in 7:53 上午 2020/4/20 12 | * @Version: 1.0 13 | * @Modified By: 14 | * @Description: 15 | */ 16 | 17 | // flink通过有状态支持,将kafka消费的offset自动进行状态保存,自动维护偏移量 18 | object MyKafkaUtil { 19 | 20 | val prop = new Properties() 21 | 22 | val zk_servers = "node-01:9092,node-02:9092,node-03:9092" 23 | prop.setProperty("bootstrap.servers", zk_servers) 24 | prop.setProperty("group.id", "flink_topic_test_g1") 25 | prop.setProperty("key,deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 26 | prop.setProperty("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") 27 | prop.setProperty("auto.offset.reset", "latest") 28 | 29 | def getConsumer(topic:String ):FlinkKafkaConsumer011[String]= { 30 | val myKafkaConsumer:FlinkKafkaConsumer011[String] = new FlinkKafkaConsumer011[String](topic, new SimpleStringSchema(), prop) 31 | myKafkaConsumer 32 | } 33 | 34 | def getProducer(topic: String): FlinkKafkaProducer011[String] = { 35 | new FlinkKafkaProducer011[String](zk_servers, topic, new SimpleStringSchema()) 36 | } 37 | 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/util/MyRedisUtil.scala: -------------------------------------------------------------------------------- 1 | package com.lei.util 2 | 3 | import com.lei.apitest.SensorReading 4 | import org.apache.flink.streaming.connectors.redis.RedisSink 5 | import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisPoolConfig 6 | import org.apache.flink.streaming.connectors.redis.common.mapper.{RedisCommand, RedisCommandDescription, RedisMapper} 7 | 8 | /** 9 | * @Author: Lei 10 | * @E-mail: 843291011@qq.com 11 | * @Date: Created in 11:09 上午 2020/4/18 12 | * @Version: 1.0 13 | * @Modified By: 14 | * @Description: 15 | */ 16 | object MyRedisUtil { 17 | 18 | val conf = new FlinkJedisPoolConfig.Builder().setHost("hadoop1").setPort(6379).build() 19 | 20 | def getRedisSink(): RedisSink[SensorReading] = { 21 | new RedisSink(conf, new MyRedisMapper) 22 | } 23 | } 24 | 25 | class MyRedisMapper extends RedisMapper[SensorReading]{ 26 | // 定义保存数据到redis的命令 27 | override def getCommandDescription: RedisCommandDescription = { 28 | // 把传感器id和温度值保存成哈希表 HSET key field value 29 | new RedisCommandDescription(RedisCommand.HSET, "sensor_temperature") 30 | // new RedisCommandDescription(RedisCommand.SET ) 31 | } 32 | 33 | // 定义保存到redis的value 34 | override def getValueFromData(t: SensorReading): String = t.temperature.toString 35 | 36 | // 定义保到到redis的key 37 | override def getKeyFromData(t: SensorReading): String = t.id 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/wc/C01_WordCount.scala: -------------------------------------------------------------------------------- 1 | package com.lei.wc 2 | 3 | import org.apache.flink.api.scala._ 4 | //import org.apache.flink.api.scala.{AggregateDataSet, DataSet, ExecutionEnvironment} 5 | 6 | /** 7 | * @Author: Lei 8 | * @E-mail: 843291011@qq.com 9 | * @Date: Created in 11:11 上午 2020/4/19 10 | * @Version: 1.0 11 | * @Modified By: 12 | * @Description: 13 | */ 14 | 15 | /** 16 | * Flink 入门程序 WordCount(离线) 17 | * 18 | * 对word状态进行实时统计,包含状态监控 19 | * 20 | */ 21 | 22 | 23 | // 批处理代码 24 | object C01_WordCount { 25 | def main(args: Array[String]): Unit = { 26 | // 创建一个批处理的执行环境 27 | val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment 28 | 29 | // 从文件中读取数据 30 | val inputPath = "input_dir/hello.txt" 31 | val inputDataSet: DataSet[String] = env.readTextFile(inputPath) 32 | 33 | // 分词之后做count 34 | val wordCountDataSet: AggregateDataSet[(String, Int)] = inputDataSet.flatMap(_.split(" ")) 35 | .map((_, 1)) 36 | .groupBy(0) 37 | .sum(1) 38 | 39 | // 打印输出 40 | wordCountDataSet.print() 41 | 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/com/lei/wc/C02_StreamWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.lei.wc 2 | 3 | 4 | import org.apache.flink.api.java.utils.ParameterTool 5 | import org.apache.flink.api.scala._ 6 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 7 | /** 8 | * @Author: Lei 9 | * @E-mail: 843291011@qq.com 10 | * @Date: Created in 11:25 上午 2020/4/19 11 | * @Version: 1.0 12 | * @Modified By: 13 | * @Description: 14 | */ 15 | 16 | /** 17 | * Flink 入门程序 WordCount(实时) 18 | * 19 | * 对word状态进行实时统计,包含状态监控 20 | * 21 | */ 22 | 23 | object C02_StreamWordCount { 24 | def main(args: Array[String]): Unit = { 25 | /* 26 | 注意:需要在服务器对环境变量新增HADOOP_CONF_DIR路径,具体如下: 27 | 1. vi /etc/profile 28 | 2. 添加:export HADOOP_CONF_DIR=/etc/hadoop/conf 29 | */ 30 | 31 | // 启动Flink集群:/usr/local/flink_learn/flink-1.7.2/bin/start-cluster.sh 32 | // 使用WebUI查看Flink集群启动情况:http://node-01:8081/#/overview 33 | 34 | // --host localhost --port 7777 35 | // standalone提交方式:(含后台运行) 36 | // ./bin/flink run -c com.lei.wc.C02_StreamWordCount -p 2 /usr/local/spark-study/FlinkTutorial-1.0.jar --host localhost --port 7777 37 | // ./bin/flink run -c com.lei.wc.C02_StreamWordCount -p 2 /usr/local/spark-study/FlinkTutorial-1.0-jar-with-dependencies.jar --host localhost --port 7777 38 | 39 | // 列出正在运行的flink作业: 40 | // ./bin/flink list 41 | // ./bin/flink cancel xxxx_id 42 | 43 | // 查看所有flink作业 44 | // ./bin/flink list --all 45 | val params: ParameterTool = ParameterTool.fromArgs(args) 46 | val host: String = params.get("host") 47 | val port: Int = params.getInt("port") 48 | 49 | 50 | // 创建一个流处理的执行环境 51 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 52 | //env.setParallelism(1) 53 | //env.disableOperatorChaining() // 禁用任务链划分 54 | 55 | // 接收socket数据流 56 | val textDataStream: DataStream[String] = env.socketTextStream(host, port) 57 | 58 | // 逐一读取数据,分词之后进行wordcount 59 | val wordCountDataStream: DataStream[(String, Int)] = textDataStream.flatMap(_.split("\\s")) 60 | .filter(_.nonEmpty) 61 | //.filter(_.nonEmpty).disableChaining() // 禁用任务链划分 62 | //.filter(_.nonEmpty).startNewChain() // 开始新的任务链 63 | .map((_, 1)) 64 | .keyBy(0) 65 | .sum(1) 66 | 67 | // 打印输出,流处理到这里才只是定义了流处理流程 68 | //wordCountDataStream.print() 69 | wordCountDataStream.print().setParallelism(1) // 设置并行度,如果没有指定默认是电脑CPU核心数 70 | 71 | // 打印输出,传入job名称 72 | env.execute("stream word count job") 73 | 74 | // 启动一个socket输入 75 | // nc -lk 7777 76 | } 77 | } 78 | --------------------------------------------------------------------------------