├── .gitignore ├── README.md ├── 专题篇 ├── 数据同步 │ └── 基于Flume的数据同步CDC技术实现 │ │ ├── flume_conf_product_detail.properties │ │ └── 基于Flume的数据同步CDC数据同步技术 ├── 第三代大数据技术 │ ├── Flink上线常见问题整理 │ │ └── Flink常见异常和错误信息小结.txt │ ├── Flink常见功能 │ │ └── Flink双流实时对账 │ │ │ └── README.md │ ├── Flink常见知识点整理 │ │ ├── Flink共享变量-广播变量 │ │ │ └── 广播变量 │ │ ├── Flink实现状态数据互相访问 │ │ ├── Flink窗口全解析 │ │ │ └── Flink窗口全解析 │ │ ├── OldPlanner&BlinkPlanner.md │ │ ├── SparkStreaming和Flink对比 │ │ ├── WaterMark │ │ │ └── Flink的waterMark的通俗理解 │ │ ├── 实时维表join │ │ │ └── Flink实时维表join方法总结 │ │ └── 旁路输出 │ │ │ └── 使用旁路输出(side output)来拆分和复制流.md │ ├── Flink版本解读 │ │ └── Flink1.13 │ │ │ └── Flink1.13新特性解读 │ └── README.md └── 第二代大数据技术 │ ├── Spark常见知识点整理 │ ├── Catalyst │ │ └── Spark的优化器系统Catalyst │ ├── Java版Spark读取Kafka数据 │ ├── SparkListener监听使用方式及自定义的事件处理动作 │ ├── SparkSQL │ │ └── Shark与SparkSQL │ ├── SparkSQL中的hint │ ├── Spark函数传递 │ ├── Spark累加器 │ ├── Spark资源参数.md │ └── 压测 │ │ └── Spark Bucket Table优化改造后测试.md │ └── Spark调优实践 │ ├── SparkSQL调优案例 │ ├── SparkSql 控制输出文件数量且大小均匀 │ └── spark sql 查看分区_Spark-sql读取hive分区表限制分区过滤条件及限制分区数量 │ └── Spark小文件调优 │ └── Spark小文件过多 ├── 分布式存储篇 └── Hive │ └── 常见知识总结 │ └── Hive小知识之分桶抽样.md ├── 大数据运维篇 └── Lunix服务器运维 │ └── 服务器巡检 │ └── lunixserver-check-script.sh ├── 常见大数据项目 ├── Spark3-Learning │ ├── .mvn │ │ └── wrapper │ │ │ ├── MavenWrapperDownloader.java │ │ │ ├── maven-wrapper.jar │ │ │ └── maven-wrapper.properties │ ├── README.md │ ├── configs │ │ ├── aurora │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ └── hive-site.xml │ │ └── tower │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ └── hive-site.xml │ ├── dataset │ │ └── iris.csv │ ├── derby.log │ ├── metastore_db │ │ ├── README_DO_NOT_TOUCH_FILES.txt │ │ ├── db.lck │ │ ├── log │ │ │ ├── README_DO_NOT_TOUCH_FILES.txt │ │ │ ├── log.ctrl │ │ │ ├── log1.dat │ │ │ └── logmirror.ctrl │ │ ├── seg0 │ │ │ ├── README_DO_NOT_TOUCH_FILES.txt │ │ │ ├── c10.dat │ │ │ ├── c101.dat │ │ │ ├── c111.dat │ │ │ ├── c121.dat │ │ │ ├── c130.dat │ │ │ ├── c141.dat │ │ │ ├── c150.dat │ │ │ ├── c161.dat │ │ │ ├── c171.dat │ │ │ ├── c180.dat │ │ │ ├── c191.dat │ │ │ ├── c1a1.dat │ │ │ ├── c1b1.dat │ │ │ ├── c1c0.dat │ │ │ ├── c1d1.dat │ │ │ ├── c1e0.dat │ │ │ ├── c1f1.dat │ │ │ ├── c20.dat │ │ │ ├── c200.dat │ │ │ ├── c211.dat │ │ │ ├── c221.dat │ │ │ ├── c230.dat │ │ │ ├── c241.dat │ │ │ ├── c251.dat │ │ │ ├── c260.dat │ │ │ ├── c271.dat │ │ │ ├── c281.dat │ │ │ ├── c290.dat │ │ │ ├── c2a1.dat │ │ │ ├── c2b1.dat │ │ │ ├── c2c1.dat │ │ │ ├── c2d0.dat │ │ │ ├── c2e1.dat │ │ │ ├── c2f0.dat │ │ │ ├── c300.dat │ │ │ ├── c31.dat │ │ │ ├── c311.dat │ │ │ ├── c321.dat │ │ │ ├── c331.dat │ │ │ ├── c340.dat │ │ │ ├── c351.dat │ │ │ ├── c361.dat │ │ │ ├── c371.dat │ │ │ ├── c380.dat │ │ │ ├── c391.dat │ │ │ ├── c3a1.dat │ │ │ ├── c3b1.dat │ │ │ ├── c3c0.dat │ │ │ ├── c3d1.dat │ │ │ ├── c3e1.dat │ │ │ ├── c3f1.dat │ │ │ ├── c400.dat │ │ │ ├── c41.dat │ │ │ ├── c411.dat │ │ │ ├── c421.dat │ │ │ ├── c430.dat │ │ │ ├── c441.dat │ │ │ ├── c451.dat │ │ │ ├── c461.dat │ │ │ ├── c470.dat │ │ │ ├── c481.dat │ │ │ ├── c490.dat │ │ │ ├── c4a1.dat │ │ │ ├── c4b0.dat │ │ │ ├── c4c1.dat │ │ │ ├── c4d0.dat │ │ │ ├── c4e1.dat │ │ │ ├── c4f0.dat │ │ │ ├── c501.dat │ │ │ ├── c51.dat │ │ │ ├── c511.dat │ │ │ ├── c521.dat │ │ │ ├── c530.dat │ │ │ ├── c541.dat │ │ │ ├── c550.dat │ │ │ ├── c561.dat │ │ │ ├── c571.dat │ │ │ ├── c580.dat │ │ │ ├── c591.dat │ │ │ ├── c5a1.dat │ │ │ ├── c5b0.dat │ │ │ ├── c5c1.dat │ │ │ ├── c5d0.dat │ │ │ ├── c5e1.dat │ │ │ ├── c5f1.dat │ │ │ ├── c60.dat │ │ │ ├── c601.dat │ │ │ ├── c611.dat │ │ │ ├── c71.dat │ │ │ ├── c81.dat │ │ │ ├── c90.dat │ │ │ ├── ca1.dat │ │ │ ├── cb1.dat │ │ │ ├── cc0.dat │ │ │ ├── cd1.dat │ │ │ ├── ce1.dat │ │ │ └── cf0.dat │ │ └── service.properties │ ├── model │ │ ├── metadata │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000 │ │ └── stages │ │ │ ├── 0_tok_286ea6d49f44 │ │ │ └── metadata │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ ├── .part-00000.crc │ │ │ │ ├── _SUCCESS │ │ │ │ └── part-00000 │ │ │ ├── 1_hashingTF_484172a6ce8b │ │ │ └── metadata │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ ├── .part-00000.crc │ │ │ │ ├── _SUCCESS │ │ │ │ └── part-00000 │ │ │ └── 2_logreg_0f17b0e587fd │ │ │ ├── data │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet │ │ │ └── metadata │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000 │ ├── mvnw │ ├── mvnw.cmd │ ├── pipeline │ │ ├── metadata │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000 │ │ └── stages │ │ │ ├── 0_tok_286ea6d49f44 │ │ │ └── metadata │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ ├── .part-00000.crc │ │ │ │ ├── _SUCCESS │ │ │ │ └── part-00000 │ │ │ ├── 1_hashingTF_484172a6ce8b │ │ │ └── metadata │ │ │ │ ├── ._SUCCESS.crc │ │ │ │ ├── .part-00000.crc │ │ │ │ ├── _SUCCESS │ │ │ │ └── part-00000 │ │ │ └── 2_logreg_0f17b0e587fd │ │ │ └── metadata │ │ │ ├── ._SUCCESS.crc │ │ │ ├── .part-00000.crc │ │ │ ├── _SUCCESS │ │ │ └── part-00000 │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── turing │ │ │ └── common │ │ │ └── PropertiesUtils.java │ │ ├── resources │ │ ├── common-prod.properties │ │ ├── common-test.properties │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ ├── hive-site.xml │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── bigdata │ │ ├── catalog │ │ └── SparkCatalogExample.scala │ │ ├── connector │ │ ├── hologres │ │ │ ├── SparkToHologresExamples.scala │ │ │ └── SparkToHologresExamples2.scala │ │ ├── kafka │ │ │ └── ExactlyOnceExample.scala │ │ ├── mysql │ │ │ ├── JdbcExampleV1.scala │ │ │ └── JdbcExampleV2.scala │ │ └── oracle │ │ │ └── Grade.scala │ │ ├── core │ │ ├── AbstractTuringBaseStreamApp.scala │ │ └── BaseApp.scala │ │ ├── doris │ │ └── SparkDorisConnetorExample.scala │ │ ├── hive │ │ ├── ConnectHiveByThrift.scala │ │ └── SmallFileMerger.scala │ │ ├── hudi │ │ └── SparkHudi.scala │ │ ├── kafka │ │ ├── KafkaOffsetManagerUtils.scala │ │ ├── SparkKafkaSinkUtils.scala │ │ └── SparkKafkaUtils.scala │ │ ├── ml │ │ ├── IRISKmeans.scala │ │ └── PipelineExample.scala │ │ ├── mllib │ │ ├── SparkTraining.scala │ │ ├── StreamingKMeansDriver.scala │ │ ├── StreamingRegressionCompare.scala │ │ ├── StreamingRegressionExample001.scala │ │ └── recommend │ │ │ ├── AlsRecommender.scala │ │ │ ├── MainClass.scala │ │ │ ├── Recommender.scala │ │ │ └── SimilarityRecommender.scala │ │ ├── redis │ │ └── RedisUtils.scala │ │ ├── runSparkPipeline.scala │ │ ├── sql │ │ ├── SparkSQLExample01.scala │ │ └── udf │ │ │ └── AuroaUDF.scala │ │ ├── stream │ │ ├── DauApp.scala │ │ ├── example001 │ │ │ └── BaseDBCanalApp.scala │ │ └── example002 │ │ │ └── TuringStreamDataEtlApp.scala │ │ └── utils │ │ └── HiveUtil.scala ├── flink-learning │ ├── README.md │ ├── datasets │ │ └── AdClickLog.csv │ ├── documents │ │ ├── DDL.sql │ │ ├── ElasticsearchSink │ │ ├── Flink 从1.7 到1.12版本升级汇总.txt │ │ ├── Flink系列文章整理.md │ │ ├── auto_start_push_task.sh │ │ ├── blink.txt │ │ ├── centos7安装CDH6.pdf │ │ ├── flink-api │ │ │ └── 对于Flink’s API相关知识的整理与理解 │ │ ├── flink-connector-hive_2.11-1.10.0.jar │ │ ├── flink_auto_check_and_start.sh │ │ ├── flink_kafka_connector调用关系.png │ │ ├── flink二进制安装.md │ │ ├── pyflink │ │ │ ├── demo.py │ │ │ └── flink_kafka_demo.py │ │ └── 使用RedisCommand设置数据结构类型时和redis结构对应关系.png │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── luoj │ │ │ ├── common │ │ │ └── Logging.scala │ │ │ ├── runJavaApp.java │ │ │ └── task │ │ │ ├── connector │ │ │ └── clickhouse │ │ │ │ ├── AbstractClickHouseJDBCOutputFormat.java │ │ │ │ ├── ClickHouseJDBCOutputFormat.java │ │ │ │ ├── ClickHouseJDBCSinkFunction.java │ │ │ │ ├── FlinkJob3.scala │ │ │ │ ├── MyClickHouseSink3.java │ │ │ │ ├── StreamingJob.scala │ │ │ │ └── tableColums.scala │ │ │ ├── example │ │ │ ├── example001 │ │ │ │ ├── OrderAccumulator.java │ │ │ │ ├── OrderIndexRealtimeReport.java │ │ │ │ └── SubOrderDetail.java │ │ │ ├── example002 │ │ │ │ ├── AdClickAnalysis.scala │ │ │ │ ├── AppMarketByChannel.scala │ │ │ │ ├── HotItemsWithSql.scala │ │ │ │ ├── HotPagesNetworkFlow.scala │ │ │ │ ├── Hotitems.scala │ │ │ │ ├── KafkaProducerUtil.scala │ │ │ │ ├── LoginFail.scala │ │ │ │ ├── LoginFailAdvance.scala │ │ │ │ ├── LoginFailCEP.scala │ │ │ │ ├── OrderTimeout.scala │ │ │ │ ├── OrderTimeoutUseProcess.scala │ │ │ │ ├── PageView.scala │ │ │ │ ├── TxMatch.scala │ │ │ │ ├── TxMatchUseJoin.scala │ │ │ │ └── UniqueView.scala │ │ │ ├── example003 │ │ │ │ └── AsyncIOSideTableJoinRedis.java │ │ │ ├── example004 │ │ │ │ └── StreamingJob.scala │ │ │ └── example005 │ │ │ │ └── StreamingJob.java │ │ │ └── learn │ │ │ ├── accumulator │ │ │ └── OtherAPI_Accumulator.java │ │ │ ├── api │ │ │ ├── DataStreamAndDataSetApiExampleByJava001.java │ │ │ ├── DataStreamAndDataSetApiExampleByScala001.scala │ │ │ ├── SQLExampleByJava001.java │ │ │ ├── SQLExampleByScala001.scala │ │ │ ├── StatefulStreamProcessingExampleByJava001.java │ │ │ ├── StatefulStreamProcessingExampleByScala001.scala │ │ │ ├── TableApiExampleByJava001.java │ │ │ ├── TableApiExampleByScala001.scala │ │ │ └── processfunction │ │ │ │ ├── ProcessAllWindowFunctionDemo.java │ │ │ │ ├── ProcessWindowFunctionDemo.java │ │ │ │ ├── TestBroadcastProcessFunction.scala │ │ │ │ ├── TestCoProcessFunction.scala │ │ │ │ ├── TestKeyedBroadcastProcessFunction.scala │ │ │ │ ├── TestProcessFunction.scala │ │ │ │ ├── TestProcessFunctionByWindow.scala │ │ │ │ ├── TestProcessJoinFunction.scala │ │ │ │ ├── TestProcessWinFunctionOnWindow.java │ │ │ │ └── TestProcessWindowFunction.scala │ │ │ ├── broadcast │ │ │ └── OtherAPI_Broadcast.java │ │ │ ├── catalog │ │ │ └── HiveCatalogStreamExample.java │ │ │ ├── checkpoint │ │ │ ├── CheckpointDemo01.java │ │ │ └── CheckpointDemo02_RestartStrategy.java │ │ │ ├── dataset │ │ │ └── WordCount.java │ │ │ ├── datasetapi │ │ │ └── WordCountExampleByJava.java │ │ │ ├── datastream │ │ │ ├── WordCount2.java │ │ │ └── WordCountByLambda.java │ │ │ ├── datastreamapi │ │ │ ├── DataStreamAPIByJava.java │ │ │ ├── DataStreamAPIByScala.scala │ │ │ └── WatermarkStrategy.java │ │ │ ├── distributedCache │ │ │ └── OtherAPI_DistributedCache.java │ │ │ ├── kafka │ │ │ ├── Kafka_Comsumer_Demo.java │ │ │ └── Kafka_Sink_Demo.java │ │ │ ├── outputtag │ │ │ └── OutPutTagExample002.java │ │ │ ├── partition │ │ │ ├── MyParalleSource.java │ │ │ ├── MyPartition.java │ │ │ └── StreamingWithMyPartition.java │ │ │ ├── redis │ │ │ └── Redis_Demo.java │ │ │ ├── sideoutputs │ │ │ └── Flink_side_output_filter.java │ │ │ ├── sink │ │ │ ├── Connectors_JDBC_Demo01.java │ │ │ ├── MultiThreadConsumerClient.java │ │ │ ├── MultiThreadConsumerSink.java │ │ │ ├── SinkToRedisByScala.scala │ │ │ ├── SinkToRedisLearn.java │ │ │ └── Sink_Demo02.java │ │ │ ├── source │ │ │ ├── Source_Demo01_Collection.java │ │ │ ├── Source_Demo02_File.java │ │ │ ├── Source_Demo03_Socket.java │ │ │ ├── Source_Demo04_Customer.java │ │ │ └── Source_Demo05_Customer_MySQL.java │ │ │ ├── state │ │ │ ├── SourceDemo03.java │ │ │ ├── StateDemo01_KeyedState.java │ │ │ └── StateDemo02_OperatorState.java │ │ │ ├── tableapi │ │ │ ├── StreamSQLExample.java │ │ │ ├── TableApi.java │ │ │ └── TableApiByScala.scala │ │ │ ├── tablesql │ │ │ ├── CustomTableFunction.java │ │ │ ├── DuplicatorFunction.java │ │ │ ├── FlinkTableSQL.java │ │ │ ├── FlinkTableSQLByBatchMode.java │ │ │ ├── FlinkTableSQLByBatchModeFromMySQL.java │ │ │ ├── FlinkTableSQLByStreamingMode.java │ │ │ ├── ScalaFlinkTableSQLByBatchModeFromMySQL.scala │ │ │ ├── ScalaFlinkTableSQLByStreamingModeFromHive.scala │ │ │ ├── Sensor.java │ │ │ ├── StreamSql.java │ │ │ ├── StreamTableApi.java │ │ │ ├── StreamingJob.scala │ │ │ ├── TableFunctionExample001.java │ │ │ ├── TmpTableExample.java │ │ │ └── customfunc │ │ │ │ └── ScalarFunctionExample.java │ │ │ ├── test │ │ │ ├── ExampleIntegrationTest.java │ │ │ ├── IncrementMapFunction.java │ │ │ └── IncrementMapFunctionTest.java │ │ │ ├── udf │ │ │ ├── FromUnixTimeUDF.java │ │ │ ├── MedianUdaf.scala │ │ │ ├── UdafSource.scala │ │ │ └── UdafTest.scala │ │ │ ├── watermark │ │ │ ├── StreamingWindowWatermark.java │ │ │ ├── WaterMaker_Demo01.java │ │ │ ├── WatermakerDemo02_Check.java │ │ │ └── WatermakerDemo03_AllowedLateness.java │ │ │ └── window │ │ │ ├── Window_Demo.java │ │ │ ├── Window_Demo02.java │ │ │ └── Window_Demo03.java │ │ ├── resources │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ ├── hive-site.xml │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── bigdata │ │ ├── common │ │ ├── ClickHouseUtil.scala │ │ └── KafkaProducerUtil.java │ │ ├── runScalaApp.scala │ │ └── task │ │ ├── example │ │ └── example001 │ │ │ ├── AddSearchPlanBean.java │ │ │ ├── TableApiByJavaEaxmple001.java │ │ │ ├── TableApiEaxmple001.scala │ │ │ └── WordCount.scala │ │ ├── learn │ │ └── flinksql │ │ │ └── TableFunctionExample001.scala │ │ └── planner │ │ ├── BlinkPlannerExample.scala │ │ └── MyDataSource.scala ├── flink1.11-learning │ ├── README.md │ ├── doc │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ └── hive-site.xml │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── luoj │ │ │ ├── common │ │ │ ├── ExampleConstant.java │ │ │ ├── GenerateRandomDataUtils.java │ │ │ ├── Logging.scala │ │ │ └── PropertiesConstants.java │ │ │ ├── runJavaApp.java │ │ │ └── task │ │ │ ├── example │ │ │ ├── JavaTableApp.java │ │ │ └── PikaTest.java │ │ │ └── learn │ │ │ ├── catalog │ │ │ ├── HiveCatalogsExample.java │ │ │ └── PostgresCatalogTest.java │ │ │ ├── cep │ │ │ ├── WebMonitorAlert.java │ │ │ ├── WebMonitorAlertDy.java │ │ │ ├── WebMonitorAlertDynamicConf.java │ │ │ └── WebMonitorAlertDynamicConf1.java │ │ │ ├── connector │ │ │ ├── file │ │ │ │ ├── StreamingWriteFile.java │ │ │ │ └── StreamingWriteFileOrc.java │ │ │ └── hive │ │ │ │ └── StreamingWriteHive.java │ │ │ ├── deploy │ │ │ ├── StopYarnJob.java │ │ │ └── SubmitJobApplicationMode.java │ │ │ ├── flinksql │ │ │ ├── CustomScalarFunction.java │ │ │ ├── CustomTableFunction.java │ │ │ ├── SQLFirst.java │ │ │ └── example001 │ │ │ │ ├── GeneUISource.java │ │ │ │ ├── MySQLTableFunction.java │ │ │ │ └── TestMySQLTableFunction.java │ │ │ ├── function │ │ │ ├── CustomAggregateFunctionTCase.java │ │ │ └── UdafTP.java │ │ │ ├── iceberg │ │ │ └── Flink2Iceberg.java │ │ │ ├── sink │ │ │ ├── CacheCloudHelperFactory.java │ │ │ ├── SinkToCacheCloud.java │ │ │ ├── SinkToCacheCloudByExactlyOnce.java │ │ │ └── example001 │ │ │ │ ├── JCacheExecutionOptions.java │ │ │ │ ├── JCacheSinkExample001.java │ │ │ │ ├── JcacheSink.java │ │ │ │ └── SinkToJCache.java │ │ │ ├── tableapi │ │ │ └── PV2mysql.java │ │ │ ├── timer │ │ │ └── AutoEvaluation.java │ │ │ ├── watermark │ │ │ └── WatermarkTest.java │ │ │ └── windows │ │ │ ├── BigScreem.java │ │ │ ├── RealTimePvUv_BitMap.java │ │ │ └── RealTimePvUv_Set.java │ │ ├── resources │ │ ├── application.properties │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ ├── hive-site.xml │ │ ├── jcache_ice_client.properties │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── bigdata │ │ ├── common │ │ ├── ClickHouseUtil.scala │ │ └── KafkaProducerUtil.java │ │ ├── runScalaApp.scala │ │ └── task │ │ └── learn │ │ ├── DataAnalysisExample001.scala │ │ └── catalog │ │ └── DataAnalysisByHive001.scala ├── flink1.12-learning │ ├── README.md │ ├── doc │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ └── hive-site.xml │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── luoj │ │ │ ├── example │ │ │ └── example1 │ │ │ │ ├── BaseDBApp.java │ │ │ │ ├── BaseLogApp.java │ │ │ │ ├── DimSink.java │ │ │ │ ├── GmallConfig.java │ │ │ │ ├── MyDeserializationSchema.java │ │ │ │ ├── MyKafkaUtil.java │ │ │ │ ├── TableProcess.java │ │ │ │ └── TableProcessFunction.java │ │ │ └── runJavaApp.java │ │ ├── resources │ │ ├── application.properties │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ ├── hive-site.xml │ │ ├── jcache_ice_client.properties │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── bigdata │ │ └── runScalaApp.scala ├── flink1.13-learning │ ├── README.md │ ├── doc │ │ └── LogEventDataExample.json │ ├── output │ │ └── data │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── luoj │ │ │ │ ├── alarm │ │ │ │ └── AlarmClient.java │ │ │ │ ├── bean │ │ │ │ ├── Customer.java │ │ │ │ ├── MarketingUserBehavior.java │ │ │ │ ├── Order.java │ │ │ │ ├── OrderItem.java │ │ │ │ ├── Product.java │ │ │ │ ├── StudentViewCount.java │ │ │ │ └── TrafficData.java │ │ │ │ ├── common │ │ │ │ ├── DBConnectUtil.java │ │ │ │ ├── DruidConnectionUtils.java │ │ │ │ ├── ESSinkUtil.java │ │ │ │ ├── ExampleConstant.java │ │ │ │ ├── ExecutionEnvUtil.java │ │ │ │ ├── FieldDescrib.java │ │ │ │ ├── GenerateRandomDataUtils.java │ │ │ │ ├── HdfsUtil.java │ │ │ │ ├── KafkaUtil.java │ │ │ │ ├── MyKafkaUtil.java │ │ │ │ ├── MySQLGlobalConfig.java │ │ │ │ ├── MySQLJDBCUtil.java │ │ │ │ ├── PropConfigUtil.java │ │ │ │ ├── PropertiesConstants.java │ │ │ │ └── ReflectUtils.java │ │ │ │ ├── runJavaApp.java │ │ │ │ ├── source │ │ │ │ └── GenerateCustomOrderSource.java │ │ │ │ └── task │ │ │ │ ├── example │ │ │ │ ├── cdc │ │ │ │ │ ├── Binlog.java │ │ │ │ │ ├── CommonKafkaSink.java │ │ │ │ │ ├── CommonStringDebeziumDeserializationSchema.java │ │ │ │ │ └── MySqlBinlogSourceExample.java │ │ │ │ ├── datareport │ │ │ │ │ ├── DataReport.java │ │ │ │ │ ├── MyAggFunction.java │ │ │ │ │ ├── MyWatermark.java │ │ │ │ │ └── kafkaProducerDataReport.java │ │ │ │ ├── finance │ │ │ │ │ ├── FinancialTransactionJob.java │ │ │ │ │ ├── MyWaterMark.java │ │ │ │ │ ├── Transaction.java │ │ │ │ │ └── TransactionAggregate.java │ │ │ │ ├── jcache │ │ │ │ │ ├── JCacheSinkExample002.java │ │ │ │ │ ├── JCacheSinkUtil.java │ │ │ │ │ ├── JCacheSourceExample002.java │ │ │ │ │ ├── JCacheTestExample001.java │ │ │ │ │ ├── SinkToJCache.java │ │ │ │ │ └── SourceFromJCache.java │ │ │ │ ├── order │ │ │ │ │ └── OrderDataAnalysis.java │ │ │ │ ├── others │ │ │ │ │ ├── FinancialTransactionJob.java │ │ │ │ │ ├── JavaTableApp.java │ │ │ │ │ └── example001 │ │ │ │ │ │ ├── SourceData.java │ │ │ │ │ │ └── TraceSourceData.java │ │ │ │ ├── pv │ │ │ │ │ ├── MySensorSource.scala │ │ │ │ │ ├── PvUv2.scala │ │ │ │ │ └── SensorReading.scala │ │ │ │ ├── traffic │ │ │ │ │ ├── TrafficAnalysisMain.java │ │ │ │ │ └── TrafficFlow.java │ │ │ │ └── userbehavior │ │ │ │ │ ├── AsynGetUserInfoJon.java │ │ │ │ │ ├── AsyncInsertUserBehavior2Mysql.java │ │ │ │ │ ├── AsyncInsertUserBehaviorToMysql.java │ │ │ │ │ ├── GetUserInfoByUserIdAsyncFunction.java │ │ │ │ │ ├── User.java │ │ │ │ │ ├── UserBehavingInfo.java │ │ │ │ │ ├── UserBehavingInfoKeyByGood.java │ │ │ │ │ ├── UserBehavingInfoTask.java │ │ │ │ │ ├── UserBehaviorRedisMapper.java │ │ │ │ │ ├── UserBehavorCountAggregateUtils.java │ │ │ │ │ └── UserBehavorCountWindowFunction.java │ │ │ │ ├── help │ │ │ │ └── example001 │ │ │ │ │ ├── CustomerSource.scala │ │ │ │ │ └── relationCntA.scala │ │ │ │ ├── jira │ │ │ │ ├── FLINK_19038.java │ │ │ │ ├── FLINK_19281.java │ │ │ │ └── FLINK_20937.java │ │ │ │ └── learn │ │ │ │ ├── OutOfOrderCase.scala │ │ │ │ ├── accumulator │ │ │ │ ├── AccumulatorTest.java │ │ │ │ ├── Accumulators.java │ │ │ │ └── CounterTest.java │ │ │ │ ├── alibaba │ │ │ │ ├── batch │ │ │ │ │ └── OutOfOrderCase.scala │ │ │ │ ├── checkpoint │ │ │ │ │ ├── CheckpointedSourceTestCase.java │ │ │ │ │ ├── EnableCheckpointRestartJob.java │ │ │ │ │ └── No25SourceWithoutCheckpointed.java │ │ │ │ ├── matric │ │ │ │ │ ├── FlinkMatricByPrometheus.java │ │ │ │ │ ├── FraudDetectionJob.java │ │ │ │ │ └── MyUDFWithMetric.java │ │ │ │ ├── state │ │ │ │ │ ├── EnableCheckpointForFailover.java │ │ │ │ │ ├── KeepCheckpointForRestore.java │ │ │ │ │ ├── SavepointForRestore.java │ │ │ │ │ └── SavepointForRestoreWithoutException.java │ │ │ │ └── table │ │ │ │ │ ├── App.java │ │ │ │ │ └── Kafka2Mysql.java │ │ │ │ ├── async │ │ │ │ ├── AsyncDatabaseRequest.java │ │ │ │ ├── ElasticsearchAsyncFunction.java │ │ │ │ ├── FlinkAsyncIO.java │ │ │ │ ├── example001 │ │ │ │ │ ├── AsyncIODemo.java │ │ │ │ │ └── SampleAsyncFunction.java │ │ │ │ └── example002 │ │ │ │ │ └── WeiboProfileAsynFunction.java │ │ │ │ ├── broadcast │ │ │ │ └── BroadCastWordCountExample.java │ │ │ │ ├── catalog │ │ │ │ ├── FlinkDDLHiveCatalog.scala │ │ │ │ └── HiveCatalogsExample.java │ │ │ │ ├── connector │ │ │ │ ├── elasticsearch │ │ │ │ │ ├── ElasticSearchSinkUtil.java │ │ │ │ │ ├── ElasticsearchSinkExample001.java │ │ │ │ │ └── RetryRequestFailureHandler.java │ │ │ │ ├── jcache │ │ │ │ │ ├── JCacheSinkExample001.java │ │ │ │ │ └── PikaTest.java │ │ │ │ └── mysql │ │ │ │ │ └── MySqlTwoPhaseCommitSink.java │ │ │ │ ├── datagenerator │ │ │ │ └── TestSource.java │ │ │ │ ├── dataset │ │ │ │ ├── DataSetDataAnalysisExample001.java │ │ │ │ ├── IterationOperatorExample.java │ │ │ │ ├── WordCountExample.java │ │ │ │ └── ZippingElementExample.java │ │ │ │ ├── datastream │ │ │ │ ├── BatchExecutionBehaviorExample.java │ │ │ │ ├── DataStreamAPIIntegrationExample001.java │ │ │ │ ├── DataStreamUserDefinedFunctionExample.java │ │ │ │ └── WindowWordCount.java │ │ │ │ ├── debug │ │ │ │ └── FlinkWebUIExample001.java │ │ │ │ ├── exactlyonce │ │ │ │ ├── OracleTwoPhaseCommitSink.java │ │ │ │ └── StreamDemoKafka2Mysql.java │ │ │ │ ├── extensions │ │ │ │ └── ScalaExtensionExample.scala │ │ │ │ ├── func │ │ │ │ ├── LiteralFunction2.java │ │ │ │ ├── MapFunctionWithException.java │ │ │ │ ├── NonParallelCheckpointedSource.java │ │ │ │ ├── ParallelCheckpointedSource.java │ │ │ │ ├── ParallelCheckpointedSourceRestoreFromTaskIndex.java │ │ │ │ ├── SideOutputProcessFunction.java │ │ │ │ ├── SimpleSourceFunction.java │ │ │ │ ├── StateProcessFunction.java │ │ │ │ ├── TopologyChanges.java │ │ │ │ └── Tuple3KeySelector.java │ │ │ │ ├── hbase │ │ │ │ └── fullPullApp.java │ │ │ │ ├── hive │ │ │ │ └── DataSetDataAnalysisExample001.java │ │ │ │ ├── hudi │ │ │ │ └── example001 │ │ │ │ │ ├── Frog.java │ │ │ │ │ ├── SocketProducer.java │ │ │ │ │ ├── User.java │ │ │ │ │ └── UserFrog.java │ │ │ │ ├── join │ │ │ │ ├── JoinDemo1.java │ │ │ │ ├── JoinDemo2.java │ │ │ │ ├── JoinDemo3.java │ │ │ │ ├── JoinDemo4.java │ │ │ │ ├── JoinDemo5.java │ │ │ │ ├── JoinDemo9.java │ │ │ │ └── example001 │ │ │ │ │ ├── CityInfo.java │ │ │ │ │ ├── CityInfoSchema.java │ │ │ │ │ ├── JoinDemo10.java │ │ │ │ │ ├── UserInfo.java │ │ │ │ │ └── UserInfoSchema.java │ │ │ │ ├── kafka │ │ │ │ ├── KafkaElement.java │ │ │ │ ├── KafkaFlinkTest1.java │ │ │ │ ├── KafkaFlinkTest12.java │ │ │ │ ├── KafkaOut.java │ │ │ │ └── KafkaSource.java │ │ │ │ ├── matric │ │ │ │ ├── counter │ │ │ │ │ └── MyMapper.java │ │ │ │ ├── example001 │ │ │ │ │ ├── AbsDeserialization.java │ │ │ │ │ ├── CustomerKafkaConsumer.java │ │ │ │ │ ├── ParseDeserialization.scala │ │ │ │ │ ├── RawData.scala │ │ │ │ │ └── UdefineMatricsByFlinkExample001.scala │ │ │ │ ├── gauge │ │ │ │ │ └── MyMapper.java │ │ │ │ ├── histogram │ │ │ │ │ ├── MyHistogram.java │ │ │ │ │ └── MyMapper.java │ │ │ │ └── meter │ │ │ │ │ ├── MyMapper.java │ │ │ │ │ └── MyMeter.java │ │ │ │ ├── operator │ │ │ │ ├── aggregate │ │ │ │ │ └── example001 │ │ │ │ │ │ ├── AggregateFunctionMain.java │ │ │ │ │ │ ├── MyCountAggregate.java │ │ │ │ │ │ ├── MyCountWindowFunction.java │ │ │ │ │ │ ├── ProductViewData.java │ │ │ │ │ │ └── RecordSeclectId.java │ │ │ │ ├── example001 │ │ │ │ │ ├── AggregationStateOperator.java │ │ │ │ │ ├── Constants.java │ │ │ │ │ ├── ListStateOperator.java │ │ │ │ │ ├── MapStateOperator.java │ │ │ │ │ ├── MyKey.java │ │ │ │ │ ├── MyValue.java │ │ │ │ │ ├── ReduceStateOperator.java │ │ │ │ │ ├── Source.java │ │ │ │ │ ├── UnifiedSavepointGeneratorJob.java │ │ │ │ │ └── ValueStateOperator.java │ │ │ │ ├── example003 │ │ │ │ │ ├── AggregationStateOperator.java │ │ │ │ │ ├── CollectResultSink.java │ │ │ │ │ ├── Constants.java │ │ │ │ │ ├── ListStateOperator.java │ │ │ │ │ ├── MapStateOperator.java │ │ │ │ │ ├── ReduceStateOperator.java │ │ │ │ │ ├── Source.java │ │ │ │ │ ├── UnifiedSavepointGeneratorJob.java │ │ │ │ │ ├── UnifiedSavepointRestartAndCheckJob.java │ │ │ │ │ ├── ValueStateOperator.java │ │ │ │ │ └── v2 │ │ │ │ │ │ ├── AggregationStateOperator.java │ │ │ │ │ │ ├── CollectResultSink.java │ │ │ │ │ │ ├── ListStateOperator.java │ │ │ │ │ │ ├── MapStateOperator.java │ │ │ │ │ │ ├── MyKey.java │ │ │ │ │ │ ├── MyValue.java │ │ │ │ │ │ ├── ReduceStateOperator.java │ │ │ │ │ │ ├── Source.java │ │ │ │ │ │ ├── UnifiedSavepointGeneratorJob.java │ │ │ │ │ │ ├── UnifiedSavepointRestartAndCheckJob.java │ │ │ │ │ │ └── ValueStateOperator.java │ │ │ │ └── join │ │ │ │ │ ├── IntervalJoinExample001.java │ │ │ │ │ ├── IntervalJoinOperator.java │ │ │ │ │ ├── JoinAndCoGroupOperator.java │ │ │ │ │ ├── JoinOperator.java │ │ │ │ │ ├── Order.java │ │ │ │ │ ├── SessionWindowJoinExample001.java │ │ │ │ │ ├── SlidingWindowJoinExample001.java │ │ │ │ │ ├── TumblingWindowJoinExample001.java │ │ │ │ │ ├── User.java │ │ │ │ │ ├── UserBrowseLog.java │ │ │ │ │ └── UserClickLog.java │ │ │ │ ├── partition │ │ │ │ ├── MyNoParallelSourceScala.scala │ │ │ │ ├── MyPartitionerScala.scala │ │ │ │ └── StreamingDemoMyPartitionerScala.scala │ │ │ │ ├── plusar │ │ │ │ ├── FlinkToPulsar.java │ │ │ │ └── PulsarToFlink.java │ │ │ │ ├── processfunction │ │ │ │ ├── KeyedProcessFunctionExample.java │ │ │ │ ├── NOKeyedProcessFunctionExample.java │ │ │ │ └── ProcessFunctionExample.java │ │ │ │ ├── sink │ │ │ │ └── CacheCloudSink.java │ │ │ │ ├── source │ │ │ │ ├── MySelfSourceTest01.java │ │ │ │ ├── SensorReading.scala │ │ │ │ ├── SensorSource.scala │ │ │ │ └── ThresholdUpdate.scala │ │ │ │ ├── state │ │ │ │ ├── CountAverageWithListState.java │ │ │ │ ├── CountAverageWithValueState.java │ │ │ │ ├── example001 │ │ │ │ │ └── FlinkStateExample001.scala │ │ │ │ ├── example002 │ │ │ │ │ └── UserDefineFunctionWithSateExample.scala │ │ │ │ ├── example003 │ │ │ │ │ └── ConfigRealTimeChangeByBroadcastState.java │ │ │ │ ├── keyedstate │ │ │ │ │ └── MapStateExample.java │ │ │ │ └── operatorstate │ │ │ │ │ ├── CheckpointFunctionExample.java │ │ │ │ │ └── ListCheckpointedExample.java │ │ │ │ ├── statebackends │ │ │ │ └── MyOptionsFactory.java │ │ │ │ ├── table │ │ │ │ ├── func │ │ │ │ │ ├── DuplicatorFunction.java │ │ │ │ │ ├── SplitFunc.java │ │ │ │ │ └── TableFuncExample001.java │ │ │ │ └── sql │ │ │ │ │ ├── TableSQLExample.java │ │ │ │ │ ├── TableSQLExample002.java │ │ │ │ │ ├── TemporalTablesDemo001.java │ │ │ │ │ └── UseWatermarkGenerator.java │ │ │ │ ├── time │ │ │ │ └── example001 │ │ │ │ │ ├── EventTimeExample001.java │ │ │ │ │ └── WordCountWindow.java │ │ │ │ ├── udf │ │ │ │ ├── FirstNonNull.java │ │ │ │ ├── RandomFunction.java │ │ │ │ ├── SentenceToWordsUDF.java │ │ │ │ └── WordToWordCountUDF.java │ │ │ │ ├── unit │ │ │ │ └── IncrementMapFunction.java │ │ │ │ ├── watermark │ │ │ │ ├── DataStreamDemo.java │ │ │ │ ├── example001 │ │ │ │ │ ├── BoundedOutOfOrdernessGenerator.java │ │ │ │ │ ├── GeneratingWatermarksExample001.java │ │ │ │ │ ├── PunctuatedAssigner.java │ │ │ │ │ └── TimeLagWatermarkGenerator.java │ │ │ │ ├── example002 │ │ │ │ │ └── WatermarkTest.java │ │ │ │ └── example03 │ │ │ │ │ └── Demo1.java │ │ │ │ └── window │ │ │ │ ├── DataStreamToWindowsCount.java │ │ │ │ ├── DyWindowDemo.java │ │ │ │ ├── DynamicTumblingEventTimeWindows.java │ │ │ │ ├── WindowApiExample001.java │ │ │ │ ├── WindowApiFunctionExample001.java │ │ │ │ └── function │ │ │ │ └── WindowFunctionExample001.java │ │ ├── resources │ │ │ ├── application.properties │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ ├── hive-site.xml │ │ │ └── log4j.properties │ │ └── scala │ │ │ └── com │ │ │ └── bigdata │ │ │ ├── bean │ │ │ ├── RawData.scala │ │ │ └── Student.java │ │ │ ├── common │ │ │ ├── FlinkEvnBuilder.scala │ │ │ ├── Logging.scala │ │ │ ├── MyDruidUtils.java │ │ │ ├── StringUtils.scala │ │ │ └── source │ │ │ │ ├── CustomAddressSource.scala │ │ │ │ ├── CustomOrderSource.scala │ │ │ │ ├── CustomProductSource.scala │ │ │ │ └── CustomUserSource.scala │ │ │ ├── runScalaApp.scala │ │ │ └── task │ │ │ ├── example │ │ │ ├── JavaTableApp.scala │ │ │ ├── OrderAnalysisTask.scala │ │ │ └── tableAggFunc.scala │ │ │ ├── learn │ │ │ ├── analysis │ │ │ │ ├── DataAnalysisExample001.scala │ │ │ │ └── Demo02Mysql.scala │ │ │ ├── cep │ │ │ │ ├── AfterMatchStrategyDemo.scala │ │ │ │ └── FlinkCEPDemo01.scala │ │ │ ├── datastream │ │ │ │ ├── DataStreamAPIIntegrationExample001.scala │ │ │ │ ├── DataStreamAPIIntegrationExample002.scala │ │ │ │ ├── DataStreamAPIIntegrationExample003.scala │ │ │ │ ├── DataStreamAPIIntegrationExample004.scala │ │ │ │ ├── DataStreamAPIIntegrationExample005.scala │ │ │ │ ├── HandlingOfChangelogStreamsExample001.scala │ │ │ │ ├── ImplicitConversionsInScala.scala │ │ │ │ ├── ImplicitConversionsInScalaExample001.scala │ │ │ │ ├── ToChangelogStreamExample001.scala │ │ │ │ ├── TransformationsMap.scala │ │ │ │ └── WindowWordCount.scala │ │ │ ├── exactlyonce │ │ │ │ └── TransactionalFileSink.scala │ │ │ ├── extensions │ │ │ │ └── Main.scala │ │ │ ├── flinksql │ │ │ │ ├── FlinkKafkaDDLDemo.scala │ │ │ │ ├── FlinkSQLExample0002.scala │ │ │ │ ├── FlinkSQLExample0003.scala │ │ │ │ ├── FlinkSQLExample0004.scala │ │ │ │ ├── FlinkSQLExample0005.scala │ │ │ │ ├── FlinkSQLExample0006.scala │ │ │ │ └── FlinkSQLExample001.scala │ │ │ ├── func │ │ │ │ ├── FlinkCustomerFuncExample01.scala │ │ │ │ ├── FlinkCustomerFuncExample02.scala │ │ │ │ ├── HashCodeFunction.scala │ │ │ │ ├── OverloadedFunction.scala │ │ │ │ ├── OverloadedFunction2.scala │ │ │ │ ├── SplitFunction.scala │ │ │ │ ├── Top2.scala │ │ │ │ ├── WeightedAvg.scala │ │ │ │ └── WeightedAvgAccum.scala │ │ │ ├── sink │ │ │ │ ├── es │ │ │ │ │ └── DataReportScala.scala │ │ │ │ └── mysql │ │ │ │ │ └── SinkToMySQL.java │ │ │ ├── source │ │ │ │ └── redis │ │ │ │ │ ├── DataCleanScala.scala │ │ │ │ │ └── MyRedisSourceScala.scala │ │ │ ├── tableapi │ │ │ │ ├── TableAPIExample001.scala │ │ │ │ ├── Top2.scala │ │ │ │ └── Top2Accum.scala │ │ │ ├── tablesql │ │ │ │ ├── LateTest.scala │ │ │ │ ├── TableAndDataStreamConversionDemo.scala │ │ │ │ ├── TableSQLExample0001.scala │ │ │ │ └── TabletoChangelogStream.scala │ │ │ ├── watermark │ │ │ │ ├── StreamingWindowWatermarkScala.scala │ │ │ │ └── StreamingWindowWatermarkScala2.scala │ │ │ └── window │ │ │ │ ├── DyWindowDemo.scala │ │ │ │ ├── WindowComputeExample.scala │ │ │ │ ├── evictor │ │ │ │ └── MyEvictor.scala │ │ │ │ ├── func │ │ │ │ ├── CountStudentAgg.java │ │ │ │ ├── WindowFuncDemo01.scala │ │ │ │ ├── WindowFunctionDemo02.java │ │ │ │ └── WindowStudentResultFunction.java │ │ │ │ ├── trigger │ │ │ │ └── CustomTrigger.scala │ │ │ │ └── watermark │ │ │ │ └── WatermarkExample01.scala │ │ │ └── theme │ │ │ └── broadcast │ │ │ ├── BroadcastDemo001.java │ │ │ └── FlinkBroadcastDemo002.java │ │ └── test │ │ ├── java │ │ └── com │ │ │ └── luoj │ │ │ ├── IncrementMapFunctionTest.java │ │ │ ├── kudu │ │ │ └── Test.java │ │ │ ├── plusar │ │ │ └── PlusarTest.java │ │ │ └── testAsyncWaitOperator.java │ │ └── scala │ │ └── org │ │ └── aurora │ │ └── FlinkSQLExample0001.scala ├── flink1.14-learning │ ├── README.md │ ├── doc │ │ └── pom.xml │ ├── mvnw │ ├── mvnw.cmd │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── aurora │ │ │ │ ├── bean │ │ │ │ ├── LngLat.java │ │ │ │ └── Message.java │ │ │ │ ├── cdc │ │ │ │ ├── CustomerDeserializationSchema.java │ │ │ │ ├── FlinkCDC001.java │ │ │ │ ├── MongoDemo.java │ │ │ │ ├── MysqlDemo.java │ │ │ │ ├── PGSourceDemo.java │ │ │ │ ├── Product.java │ │ │ │ └── User.java │ │ │ │ ├── common │ │ │ │ ├── Constants.java │ │ │ │ ├── DataUtil.java │ │ │ │ ├── HBaseUtil.java │ │ │ │ ├── LoadResourcesUtils.java │ │ │ │ └── RandomUtil.java │ │ │ │ ├── generate │ │ │ │ └── GenerateCustomerOrderSource.java │ │ │ │ ├── mq │ │ │ │ ├── AbsDeserialization.java │ │ │ │ └── kafka │ │ │ │ │ └── CustomerKafkaConsumer.java │ │ │ │ ├── sink │ │ │ │ └── HbaseSink.java │ │ │ │ ├── source │ │ │ │ ├── GenerateCustomOrderSource.java │ │ │ │ ├── Order.java │ │ │ │ ├── OrderSource.java │ │ │ │ ├── TypeStat.java │ │ │ │ └── TypeStatSource.java │ │ │ │ ├── sql │ │ │ │ └── ReadHive.java │ │ │ │ └── stream │ │ │ │ ├── BlockedDataStreamSourcesJob.java │ │ │ │ ├── BlockedSQLSourcesJob.java │ │ │ │ ├── FlinkStreamExample001.java │ │ │ │ ├── MixBlockedSourcesJob.java │ │ │ │ └── OrderAnalysisTask.java │ │ ├── resources │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ ├── hive-site.xml │ │ │ └── log4j.properties │ │ └── scala │ │ │ └── com │ │ │ └── bigdata │ │ │ ├── bean │ │ │ ├── Log.scala │ │ │ ├── OrderObj.scala │ │ │ └── RawData.scala │ │ │ ├── common │ │ │ └── ParseDeserialization.scala │ │ │ ├── examples │ │ │ ├── DataStreamAPIIntegration001.scala │ │ │ └── ex002 │ │ │ │ ├── FraudDetectionJob.java │ │ │ │ └── FraudDetector.java │ │ │ ├── metric │ │ │ └── FlinkMetricDemo001.scala │ │ │ ├── runScalaApp.scala │ │ │ ├── sink │ │ │ └── HBaseSinkFunction.scala │ │ │ ├── sql │ │ │ └── FlinkCustomerFuncExample02.scala │ │ │ ├── theme │ │ │ ├── OrderAnalysisTask2.scala │ │ │ └── hive │ │ │ │ └── HiveConnectDemo.scala │ │ │ └── window │ │ │ ├── evictor │ │ │ └── MyEvictor.scala │ │ │ └── trigger │ │ │ └── CustomTrigger.scala │ │ └── test │ │ └── java │ │ └── com │ │ └── aurora │ │ └── AnalysisExampleUnit.java ├── flink1.15-learning │ ├── README.md │ ├── data │ │ ├── SensorReading.txt │ │ └── output │ │ │ └── output1.txt │ ├── docs │ │ └── ddl.sql │ ├── mvnw │ ├── mvnw.cmd │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── aurora │ │ │ │ ├── bean │ │ │ │ ├── Jason.java │ │ │ │ ├── Order.java │ │ │ │ └── Student.java │ │ │ │ ├── common │ │ │ │ ├── SnowFlake.java │ │ │ │ └── enums │ │ │ │ │ └── FlinkPipelineEnum.java │ │ │ │ ├── example │ │ │ │ ├── example001 │ │ │ │ │ ├── FlinkByKafkaExample2.java │ │ │ │ │ ├── FlinkWithKafka.java │ │ │ │ │ ├── MyDeSerializer.java │ │ │ │ │ └── MyKafkaDeserialization.java │ │ │ │ ├── example002 │ │ │ │ │ ├── CustomWatermarkStrategy.java │ │ │ │ │ └── FlinkByKafkaExample01.java │ │ │ │ └── example03 │ │ │ │ │ └── MyEventTimeWindowByFlink.java │ │ │ │ ├── feature │ │ │ │ ├── accumulator │ │ │ │ │ └── PreciseAccumulator.java │ │ │ │ ├── datastream │ │ │ │ │ ├── BatchFlinkTask.java │ │ │ │ │ ├── FlinkDataStreamDemo1.java │ │ │ │ │ ├── FlinkDatasetDemo1.java │ │ │ │ │ ├── FlinkSqlApiDemo1.java │ │ │ │ │ └── WordCountTask.java │ │ │ │ ├── func │ │ │ │ │ ├── function │ │ │ │ │ │ └── WordCountReduceFunction.java │ │ │ │ │ ├── process │ │ │ │ │ │ └── TimerProcessFunction.java │ │ │ │ │ ├── stateful │ │ │ │ │ │ └── MyStatefulFlatMap.java │ │ │ │ │ ├── stateless │ │ │ │ │ │ ├── MyStatelessFlatMap.java │ │ │ │ │ │ └── MyStatelessMap.java │ │ │ │ │ └── udf │ │ │ │ │ │ ├── ReduceWordsStateWindowUDF.java │ │ │ │ │ │ ├── SentenceToWordsUDF.java │ │ │ │ │ │ └── WordToWordCountUDF.java │ │ │ │ ├── hdfs │ │ │ │ │ └── FlinkTableAPIFromHDFS.java │ │ │ │ ├── hive │ │ │ │ │ └── FlinkTableAPIFromHive.java │ │ │ │ ├── kafka │ │ │ │ │ └── MyKafkaDeserialization.java │ │ │ │ ├── state │ │ │ │ │ ├── FlinkReadAndUpdateState.java │ │ │ │ │ ├── FlinkStreamingDemo.java │ │ │ │ │ ├── Jason.java │ │ │ │ │ └── UserDefinedSource.java │ │ │ │ ├── table │ │ │ │ │ ├── DataStreamConvertTableByFlink001.java │ │ │ │ │ ├── DataStreamConvertTableByFlink002.java │ │ │ │ │ ├── DataStreamConvertTableByFlink003.java │ │ │ │ │ ├── DataStreamConvertTableByFlink004.java │ │ │ │ │ ├── FlinkSqlApiDemo1.java │ │ │ │ │ ├── FlinkSqlApiDemo2.java │ │ │ │ │ ├── FlinkTableApiDemo1.java │ │ │ │ │ ├── FlinkTableApiDemo2.java │ │ │ │ │ └── FlinkTableApiDemo3.java │ │ │ │ └── window │ │ │ │ │ └── MyEventTimeWindow.java │ │ │ │ ├── generate │ │ │ │ ├── FakeTrafficRecordSource.java │ │ │ │ ├── RandomOrderSource.java │ │ │ │ ├── SentenceUDSource.java │ │ │ │ └── WordCountSource1ps.java │ │ │ │ └── source │ │ │ │ └── MySqlStudentSource.java │ │ ├── resources │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ ├── hive-site.xml │ │ │ └── logback.xml │ │ └── scala │ │ │ └── com │ │ │ └── bigdata │ │ │ ├── bean │ │ │ └── DataSchema.scala │ │ │ ├── example │ │ │ └── example001 │ │ │ │ └── AppMarketingByChannel.scala │ │ │ └── feature │ │ │ ├── sideoutput │ │ │ └── SideOutputTest.scala │ │ │ ├── sql │ │ │ ├── TableSqlTest.scala │ │ │ ├── TableSqlTest2.scala │ │ │ ├── batch │ │ │ │ ├── SqlBatchDemo.scala │ │ │ │ └── TableSQLJdbcAnalysisPipeline.scala │ │ │ └── stream │ │ │ │ └── SqlStreamDemo.scala │ │ │ └── table │ │ │ ├── FlinkTableApiExample.scala │ │ │ ├── FlinkTableApiExample2.scala │ │ │ ├── FlinkTableApiExample3.scala │ │ │ ├── FlinkTableApiExample4.scala │ │ │ ├── batch │ │ │ └── BatchTableTest.scala │ │ │ └── stream │ │ │ ├── RandomWordSource.java │ │ │ └── StreamTableTest.scala │ │ └── test │ │ └── java │ │ └── com │ │ └── aurora │ │ ├── ExampleCountSource.java │ │ ├── FakeTrafficRecordSourceUnitTest.java │ │ ├── MyStatefullFlatMapUnitTest.java │ │ ├── MyStatelessMapUnitTest.java │ │ ├── PhoenixUtil.scala │ │ ├── StatefulFlatMapTest.java │ │ ├── func │ │ ├── ReduceWordsStateWindowUDFTest.java │ │ └── SentenceToWordsUDFTest.java │ │ └── window │ │ ├── FlinkTriggerUnit.java │ │ ├── FlinkWindowOperatorUnit.java │ │ └── TriggerResult.java ├── flink1.16-learning │ ├── README.md │ ├── data │ │ └── example_001.json │ ├── docs │ │ └── ddl.sql │ ├── logs │ │ └── strutslog1.log │ ├── mvnw │ ├── mvnw.cmd │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── com │ │ │ └── aurora │ │ │ ├── common │ │ │ ├── enums │ │ │ │ └── FlinkPipelineEnum.java │ │ │ └── utils │ │ │ │ ├── DateTimeUtil.java │ │ │ │ └── RedisUtils.java │ │ │ ├── example │ │ │ ├── FlinkTableApiDemo1.java │ │ │ ├── OrderDataAnalysis.java │ │ │ └── WordCountSource1ps.java │ │ │ ├── feature │ │ │ ├── kafka │ │ │ │ └── FlinkToKafka.java │ │ │ ├── ml │ │ │ │ └── QuickStart.java │ │ │ └── table │ │ │ │ └── FlinkTableSQLExample.java │ │ │ ├── generate │ │ │ ├── PrintSqlExample.java │ │ │ └── PrintSqlExample2.java │ │ │ ├── generator │ │ │ ├── Order.java │ │ │ └── OrderItem.java │ │ │ ├── runTuringPipeline.java │ │ │ └── source │ │ │ └── hive │ │ │ └── FlinkToHiveExample.java │ │ ├── resources │ │ ├── common-prod.properties │ │ ├── common-test.properties │ │ ├── core-site.xml │ │ ├── hdfs-site.xml │ │ ├── hive-site.xml │ │ └── log4j.properties │ │ └── scala │ │ └── com │ │ └── bigdata │ │ ├── bean │ │ └── DataSchema.scala │ │ └── feature │ │ └── table │ │ └── FlinkSqlExample.scala ├── spark-learning │ ├── README.md │ ├── data │ │ ├── employees.json │ │ ├── invoices.csv │ │ ├── iris.data │ │ ├── people.json │ │ ├── user.txt │ │ └── userInfo.json │ ├── doc │ │ ├── CleanOdsToDwd.scala │ │ ├── avro_data.json │ │ ├── binlog.json │ │ ├── data.json │ │ └── dwd_ad_push_device_info_df.py │ ├── pom.xml │ └── src │ │ ├── main │ │ ├── java │ │ │ └── com │ │ │ │ └── jiguang │ │ │ │ ├── SparkComputeApp.java │ │ │ │ └── common │ │ │ │ ├── Kudu.java │ │ │ │ ├── KuduUtils.java │ │ │ │ ├── Spark.java │ │ │ │ └── SpringContextUtil.java │ │ ├── resources │ │ │ ├── application.properties │ │ │ ├── core-site.xml │ │ │ ├── hdfs-site.xml │ │ │ ├── hive-site.xml │ │ │ ├── log4j.properties │ │ │ └── spring-bean.xml │ │ └── scala │ │ │ └── com │ │ │ └── bidata │ │ │ ├── bean │ │ │ └── ParamsList.scala │ │ │ ├── common │ │ │ ├── C3p0Utils.scala │ │ │ ├── ConfigParser.scala │ │ │ ├── ConfigReader.scala │ │ │ ├── JDBCWrapper.scala │ │ │ ├── MySQLSink.scala │ │ │ ├── MySQLSource.scala │ │ │ ├── MySQLSourceProvider.scala │ │ │ └── SparkSchemaUtil.scala │ │ │ ├── example │ │ │ ├── accumulator │ │ │ │ ├── AccumulatorExample.scala │ │ │ │ ├── FieldAccumulator.scala │ │ │ │ ├── Spark2RDDAccumulator.scala │ │ │ │ ├── Spark3RDDAccumulator.scala │ │ │ │ ├── Spark4RDDAccumulator.scala │ │ │ │ ├── Spark5RDDAccumulator.scala │ │ │ │ ├── SparkRDDAccumulator.scala │ │ │ │ └── SumAandB.scala │ │ │ ├── analysis │ │ │ │ └── top │ │ │ │ │ ├── Spark2ReqTop10.scala │ │ │ │ │ ├── Spark3ReqTop10.scala │ │ │ │ │ ├── Spark4ReqTop10.scala │ │ │ │ │ ├── Spark5ReqTop10.scala │ │ │ │ │ ├── Spark6ReqTop10.scala │ │ │ │ │ └── SparkReqTop10.scala │ │ │ ├── avro │ │ │ │ ├── AvroCompression.scala │ │ │ │ ├── ByDatabricksSparkAvro.scala │ │ │ │ ├── ReadAvro.scala │ │ │ │ ├── TextTest.scala │ │ │ │ └── learning1.scala │ │ │ ├── core │ │ │ │ ├── SparkWordCount.scala │ │ │ │ ├── SparkWordCount2.scala │ │ │ │ ├── SparkWordCount3.scala │ │ │ │ └── SparkWordCount4.scala │ │ │ ├── dataframe │ │ │ │ ├── SparkDataFrameExample001.scala │ │ │ │ └── SparkDataFrameExample002.scala │ │ │ ├── dataset │ │ │ │ └── SparkDataSetExample001.scala │ │ │ ├── demos │ │ │ │ ├── DataFrameKudu.scala │ │ │ │ ├── Kafka010Demo05.scala │ │ │ │ ├── Kafka010Demo06.scala │ │ │ │ ├── MyKafkaUtils.scala │ │ │ │ ├── PolicyCreditApp.scala │ │ │ │ ├── PropertiesUtil.scala │ │ │ │ ├── RealtimeEtl.scala │ │ │ │ ├── SparkKuDuDemo.scala │ │ │ │ ├── SparkKuduTest.scala │ │ │ │ ├── SparkSQLDemo.scala │ │ │ │ ├── StreamingAssignOffset.scala │ │ │ │ ├── WordCountWithMonitor.scala │ │ │ │ └── jira │ │ │ │ │ └── AmountOfAppsUsingInappStatistic.scala │ │ │ ├── factory │ │ │ │ ├── Dept.scala │ │ │ │ └── Factory.scala │ │ │ ├── graphx │ │ │ │ ├── GraphStu.scala │ │ │ │ ├── Pregel_SSSP.scala │ │ │ │ ├── Pregeloperator.scala │ │ │ │ └── ShortPaths.scala │ │ │ ├── hive │ │ │ │ ├── HiveSinkBySparkSQL.java │ │ │ │ └── KafkaToHive.scala │ │ │ ├── hudi │ │ │ │ ├── SparkOnHudiExample.scala │ │ │ │ ├── example │ │ │ │ │ ├── DayPartitionValueExtractor.java │ │ │ │ │ ├── SmallFilesTestApp.scala │ │ │ │ │ └── User.scala │ │ │ │ ├── example002 │ │ │ │ │ └── SparkHudi.scala │ │ │ │ └── example003 │ │ │ │ │ └── StructStreamingOnHudiExample.scala │ │ │ ├── iceberg │ │ │ │ ├── example001 │ │ │ │ │ ├── IcebergApi.java │ │ │ │ │ └── IcebergExample001.scala │ │ │ │ ├── example002 │ │ │ │ │ ├── IcebergExample002.scala │ │ │ │ │ └── IcebergExample002_02.scala │ │ │ │ └── example003 │ │ │ │ │ ├── ReadFromIcebergeByStructedStreaming.scala │ │ │ │ │ └── WriteToIcebergeByStructedStreaming.scala │ │ │ ├── kafka │ │ │ │ ├── CustomStreamingQueryListener.java │ │ │ │ ├── ReadKafkaBySparkStreaming.scala │ │ │ │ └── SparkKafkaManager.java │ │ │ ├── kyro │ │ │ │ ├── KryoExample.scala │ │ │ │ ├── YourClass.java │ │ │ │ └── YourKryoRegistrator.java │ │ │ ├── matric │ │ │ │ └── SparkMetricsUtils.scala │ │ │ ├── ml │ │ │ │ ├── MLPipelineApp.scala │ │ │ │ └── xgboost │ │ │ │ │ └── SparkTraining.scala │ │ │ ├── olap │ │ │ │ └── CubeOLAPDataAnalysisExample001.scala │ │ │ ├── operator │ │ │ │ └── BasicOperatorExample.scala │ │ │ ├── partitioner │ │ │ │ └── SparkRDDPartitioner.scala │ │ │ ├── rdd │ │ │ │ ├── README.txt │ │ │ │ └── SparkRDDExample001.scala │ │ │ ├── sparkcore │ │ │ │ ├── FancyApp.scala │ │ │ │ ├── FancyAppAfterYouhua.scala │ │ │ │ └── MuiltSparkJob.scala │ │ │ ├── sparksql │ │ │ │ ├── AppInfo.java │ │ │ │ ├── Emmm.scala │ │ │ │ ├── LogInfo.java │ │ │ │ ├── SQLIPLocation.scala │ │ │ │ ├── SchemaTsvSpark.scala │ │ │ │ └── SparkSchemaTest.java │ │ │ ├── sparkstreaming │ │ │ │ ├── KafkaOffsetManager.scala │ │ │ │ ├── MonitorStop.scala │ │ │ │ ├── SparkDirectStreaming.scala │ │ │ │ ├── SparkTest.scala │ │ │ │ └── nc2.scala │ │ │ ├── structuredstreaming │ │ │ │ ├── HandleKafkaJSONExample.scala │ │ │ │ ├── MultiQueryCache.scala │ │ │ │ ├── OutputModeExample.scala │ │ │ │ ├── StreamingQueryListenerExample.scala │ │ │ │ ├── WindowMockFunctionExample.scala │ │ │ │ ├── WindowOptionExample.scala │ │ │ │ ├── WindowOptionWithWatermarkExample.scala │ │ │ │ ├── WindowSourceFunctionExample.scala │ │ │ │ ├── custom │ │ │ │ │ ├── CustomDataSink.scala │ │ │ │ │ ├── CustomDataSource.scala │ │ │ │ │ └── CustomDataSourceProvider.scala │ │ │ │ └── example │ │ │ │ │ ├── ConsoleSinkExample.scala │ │ │ │ │ ├── FileSinkExample.scala │ │ │ │ │ ├── FileSourceExample.scala │ │ │ │ │ ├── ForeachSinkExample.scala │ │ │ │ │ ├── KafkaSinkExample.scala │ │ │ │ │ ├── KafkaSourceExample.scala │ │ │ │ │ ├── MemorySinkExample.scala │ │ │ │ │ ├── RateSourceExample.scala │ │ │ │ │ └── SocketSourceExample.scala │ │ │ ├── udf │ │ │ │ └── example001 │ │ │ │ │ ├── DufTest01.scala │ │ │ │ │ ├── MyAverage.scala │ │ │ │ │ ├── MyUdtf.scala │ │ │ │ │ ├── SparkSQLUDF1.scala │ │ │ │ │ ├── SparkSQLUDF2.scala │ │ │ │ │ ├── SparkSQLUDF3.scala │ │ │ │ │ ├── SparkSQLUDF4.scala │ │ │ │ │ ├── UdfTest.scala │ │ │ │ │ └── UdtfTestTest.scala │ │ │ └── unittest │ │ │ │ ├── CardDataGenerator.scala │ │ │ │ ├── CardDataNester.scala │ │ │ │ ├── CoreUnitTest.scala │ │ │ │ ├── MakingNestedTableTest.scala │ │ │ │ ├── SqlUnitTest.scala │ │ │ │ ├── StreamingUnitTest.scala │ │ │ │ └── TestableQueueInputDStream.scala │ │ │ ├── runSparkApp.scala │ │ │ └── theme │ │ │ └── idmapping │ │ │ ├── IdMapFirst.scala │ │ │ └── IdMapSecond.scala │ │ └── test │ │ └── scala │ │ └── com │ │ └── bidata │ │ ├── SimpleCanalClientExample.java │ │ ├── app │ │ └── CustomerCrm.scala │ │ ├── bean │ │ └── Customer.scala │ │ ├── service │ │ └── CustomerService.scala │ │ └── view │ │ └── CustomerView.scala └── spark3.2-learning │ ├── dataset │ ├── a.txt │ ├── app.log │ ├── test.log │ ├── u.data │ ├── u.item │ └── users.txt │ ├── mvnw │ ├── mvnw.cmd │ ├── pom.xml │ └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── turing │ │ │ ├── TuringSparkPipeline.java │ │ │ ├── bean │ │ │ └── Student.java │ │ │ ├── common │ │ │ └── PropertiesUtils.java │ │ │ └── pipeline │ │ │ └── ml │ │ │ └── MovieRecommendByALS.java │ ├── resources │ │ ├── spark-dev.properties │ │ └── spark-prod.properties │ └── scala │ │ └── com │ │ └── bigdata │ │ ├── common │ │ ├── config │ │ │ └── GlobalConfig.scala │ │ ├── constant │ │ │ └── GlobalConstant.scala │ │ └── utils │ │ │ ├── DataTimeUtils.scala │ │ │ ├── JdbcUtils.scala │ │ │ ├── KafkaOffsetManagerUtils.scala │ │ │ ├── KafkaSinkUtils.scala │ │ │ ├── KafkaSourceUtils.scala │ │ │ ├── RedisUtils.scala │ │ │ └── SparkPipelineUtils.scala │ │ ├── pipeline │ │ ├── batch │ │ │ └── Retention.scala │ │ ├── etl │ │ │ ├── EtlDemo.scala │ │ │ ├── LogWork.scala │ │ │ └── SparkEtlExample002.scala │ │ └── ml │ │ │ ├── LSHAlgorithmsExample001.scala │ │ │ ├── LSHAlgorithmsExample002.scala │ │ │ ├── SparkMLBucketizerExample001.scala │ │ │ └── UnivariateFeatureSelectorExample002.scala │ │ └── runSparkPipeline.scala │ └── test │ └── java │ └── com │ └── turing │ └── SparkMLlibUnit.java ├── 常见组件整理 ├── 分布式存储系统 │ └── Hive │ │ └── README.md ├── 数据CDC │ └── Debezium │ │ └── README.md ├── 数据湖 │ └── Hudi │ │ └── README.md ├── 数据计算引擎 │ └── Flink │ │ └── README.md └── 消息系统 │ └── Pulsar │ └── README.md ├── 开源社区文献整理 ├── 1、邸星星—基于Iceberg的湖仓一体架构实践(已美化).pdf ├── 2、孙伟—iceberg和对象存储构建数据湖方案(已美化).pdf ├── 3、陈俊杰-百亿数据入湖实战(已美化).pdf └── 4、胡争-Flink和Iceberg如何解决数据入湖面临的挑战(已美化).pdf └── 环境搭建篇 └── 大数据领域常见概念与术语 /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /专题篇/数据同步/基于Flume的数据同步CDC技术实现/基于Flume的数据同步CDC数据同步技术: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见功能/Flink双流实时对账/README.md: -------------------------------------------------------------------------------- 1 | 2 | https://app.yinxiang.com/fx/8c65027b-d9dc-41eb-90c0-2ab0d1e4a7b4 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/Flink共享变量-广播变量/广播变量: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/a2c5707b-c6b1-47aa-94d2-c868fd776d84 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/Flink实现状态数据互相访问: -------------------------------------------------------------------------------- 1 | 2 | https://app.yinxiang.com/fx/bea9c362-63da-427d-a96f-ee39b14e6be3 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/Flink窗口全解析/Flink窗口全解析: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/44d6af62-ccbc-495c-b099-5248fcff8e39 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/OldPlanner&BlinkPlanner.md: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/d00b73c2-9db5-441b-a4a3-7d09dae51b81 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/SparkStreaming和Flink对比: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/9902c606-216b-40ce-abcf-41112f672c09 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/WaterMark/Flink的waterMark的通俗理解: -------------------------------------------------------------------------------- 1 | 2 | https://app.yinxiang.com/fx/3dc454e9-cff4-4846-9447-2f7b087687a4 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/实时维表join/Flink实时维表join方法总结: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/9225ed33-a1e3-444c-abd3-cf649046a98a -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink常见知识点整理/旁路输出/使用旁路输出(side output)来拆分和复制流.md: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/14e05fb9-a47d-491f-b0d5-39694344bfae -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/Flink版本解读/Flink1.13/Flink1.13新特性解读: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/cd5dffc2-ab67-4161-833a-a501d5101d51 -------------------------------------------------------------------------------- /专题篇/第三代大数据技术/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Flink技术文档: 4 | https://ci.apache.org/projects/flink/flink-docs-release-1.12/zh/dev/table/connectors/hbase.html -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/Catalyst/Spark的优化器系统Catalyst: -------------------------------------------------------------------------------- 1 | 2 | https://app.yinxiang.com/fx/1c30850a-4a92-49c2-9c91-919b90d0ff09 -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/Java版Spark读取Kafka数据: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/67a658d3-fddb-41c6-9aba-a3793872bf1c -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/SparkListener监听使用方式及自定义的事件处理动作: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/502b3645-6d67-434c-be7f-e6af55825e73 -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/SparkSQL/Shark与SparkSQL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/专题篇/第二代大数据技术/Spark常见知识点整理/SparkSQL/Shark与SparkSQL -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/SparkSQL中的hint: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/3d51d147-2478-4eba-9348-5dcc51a0b75a -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/Spark函数传递: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/45ccdc16-3e13-4dd1-860d-d64dad1e5018 -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/Spark累加器: -------------------------------------------------------------------------------- 1 | 2 | https://app.yinxiang.com/fx/1b81aa71-e471-413a-9760-32cb2e6b472a -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/Spark资源参数.md: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/90c4d68f-c30d-4082-ab24-995cf305614d -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark常见知识点整理/压测/Spark Bucket Table优化改造后测试.md: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/6b6fe443-e3f6-4e59-ae60-38c88921c480 -------------------------------------------------------------------------------- /专题篇/第二代大数据技术/Spark调优实践/Spark小文件调优/Spark小文件过多: -------------------------------------------------------------------------------- 1 | 2 | https://blog.csdn.net/a13705510005/article/details/102295768?utm_medium=distribute.pc_relevant.none-task-blog-baidujs_title-8&spm=1001.2101.3001.4242 3 | https://blog.csdn.net/Sampson_Hugo/article/details/106908852 4 | https://issues.apache.org/jira/browse/SPARK-24940 5 | 6 | -------------------------------------------------------------------------------- /分布式存储篇/Hive/常见知识总结/Hive小知识之分桶抽样.md: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/37923073-6a01-4c94-9c78-1f03c55c5657 -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/.mvn/wrapper/maven-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/.mvn/wrapper/maven-wrapper.jar -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/.mvn/wrapper/maven-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.1/apache-maven-3.8.1-bin.zip 2 | wrapperUrl=https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar 3 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/derby.log: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------------------- 2 | Fri Aug 06 15:55:02 CST 2021: 3 | Booting Derby version The Apache Software Foundation - Apache Derby - 10.14.1.0 - (1808820): instance a816c00e-017b-1a75-55e7-0000098fee88 4 | on database directory E:\OpenSource\GitHub\bigdata-learning\常见大数据项目\Spark3-Learning\metastore_db with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@7d66e544 5 | Loaded from file:/D:/software/ServerTool/maven/jiguang-repository/org/apache/derby/derby/10.14.1.0/derby-10.14.1.0.jar 6 | java.vendor=Oracle Corporation 7 | java.runtime.version=1.8.0_77-b03 8 | user.dir=E:\OpenSource\GitHub\bigdata-learning\常见大数据项目\Spark3-Learning 9 | os.name=Windows 10 10 | os.arch=amd64 11 | os.version=10.0 12 | derby.system.home=null 13 | Database Class Loader started - derby.database.classpath='' 14 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/README_DO_NOT_TOUCH_FILES.txt: -------------------------------------------------------------------------------- 1 | 2 | # ************************************************************************* 3 | # *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** 4 | # *** FILES IN THIS DIRECTORY AND SUBDIRECTORIES CONSTITUTE A DERBY *** 5 | # *** DATABASE, WHICH INCLUDES THE DATA (USER AND SYSTEM) AND THE *** 6 | # *** FILES NECESSARY FOR DATABASE RECOVERY. *** 7 | # *** EDITING, ADDING, OR DELETING ANY OF THESE FILES MAY CAUSE DATA *** 8 | # *** CORRUPTION AND LEAVE THE DATABASE IN A NON-RECOVERABLE STATE. *** 9 | # ************************************************************************* -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/db.lck: -------------------------------------------------------------------------------- 1 | $a816c00e-017b-1a75-55e7-0000098fee88 -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/log/README_DO_NOT_TOUCH_FILES.txt: -------------------------------------------------------------------------------- 1 | 2 | # ************************************************************************* 3 | # *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** 4 | # *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE RECOVERY *** 5 | # *** SYSTEM. EDITING, ADDING, OR DELETING FILES IN THIS DIRECTORY *** 6 | # *** WILL CAUSE THE DERBY RECOVERY SYSTEM TO FAIL, LEADING TO *** 7 | # *** NON-RECOVERABLE CORRUPT DATABASES. *** 8 | # ************************************************************************* -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/log/log.ctrl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/log/log.ctrl -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/log/log1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/log/log1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/log/logmirror.ctrl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/log/logmirror.ctrl -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/README_DO_NOT_TOUCH_FILES.txt: -------------------------------------------------------------------------------- 1 | 2 | # ************************************************************************* 3 | # *** DO NOT TOUCH FILES IN THIS DIRECTORY! *** 4 | # *** FILES IN THIS DIRECTORY ARE USED BY THE DERBY DATABASE TO STORE *** 5 | # *** USER AND SYSTEM DATA. EDITING, ADDING, OR DELETING FILES IN THIS *** 6 | # *** DIRECTORY WILL CORRUPT THE ASSOCIATED DERBY DATABASE AND MAKE *** 7 | # *** IT NON-RECOVERABLE. *** 8 | # ************************************************************************* -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c10.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c10.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c101.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c101.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c111.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c111.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c121.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c121.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c130.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c130.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c141.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c141.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c150.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c150.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c161.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c161.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c171.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c171.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c180.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c180.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c191.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c191.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1a1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1a1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1b1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1b1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1c0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1c0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1d1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1d1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1e0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1e0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c1f1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c1f1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c20.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c20.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c200.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c200.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c211.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c211.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c221.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c221.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c230.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c230.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c241.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c241.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c251.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c251.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c260.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c260.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c271.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c271.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c281.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c281.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c290.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c290.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2a1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2a1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2b1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2b1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2c1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2c1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2d0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2d0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2e1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2e1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c2f0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c2f0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c300.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c300.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c31.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c31.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c311.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c311.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c321.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c321.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c331.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c331.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c340.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c340.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c351.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c351.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c361.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c361.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c371.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c371.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c380.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c380.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c391.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c391.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3a1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3a1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3b1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3b1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3c0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3c0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3d1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3d1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3e1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3e1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c3f1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c3f1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c400.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c400.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c41.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c41.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c411.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c411.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c421.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c421.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c430.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c430.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c441.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c441.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c451.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c451.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c461.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c461.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c470.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c470.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c481.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c481.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c490.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c490.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4a1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4a1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4b0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4b0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4c1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4c1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4d0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4d0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4e1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4e1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c4f0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c4f0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c501.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c501.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c51.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c51.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c511.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c511.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c521.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c521.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c530.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c530.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c541.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c541.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c550.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c550.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c561.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c561.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c571.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c571.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c580.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c580.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c591.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c591.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5a1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5a1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5b0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5b0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5c1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5c1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5d0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5d0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5e1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5e1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c5f1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c5f1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c60.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c60.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c601.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c601.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c611.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c611.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c71.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c71.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c81.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c81.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/c90.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/c90.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/ca1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/ca1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/cb1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/cb1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/cc0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/cc0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/cd1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/cd1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/ce1.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/ce1.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/metastore_db/seg0/cf0.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/metastore_db/seg0/cf0.dat -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.PipelineModel","timestamp":1629448977982,"sparkVersion":"2.4.0-cdh6.3.4","uid":"pipeline_6d259bef7616","paramMap":{"stageUids":["tok_286ea6d49f44","hashingTF_484172a6ce8b","logreg_0f17b0e587fd"]},"defaultParamMap":{}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/0_tok_286ea6d49f44/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.feature.Tokenizer","timestamp":1629448978978,"sparkVersion":"2.4.0-cdh6.3.4","uid":"tok_286ea6d49f44","paramMap":{"outputCol":"words","inputCol":"text"},"defaultParamMap":{"outputCol":"tok_286ea6d49f44__output"}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/1_hashingTF_484172a6ce8b/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/1_hashingTF_484172a6ce8b/metadata/.part-00000.crc: -------------------------------------------------------------------------------- 1 | crcOpM -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/1_hashingTF_484172a6ce8b/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/1_hashingTF_484172a6ce8b/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/1_hashingTF_484172a6ce8b/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.feature.HashingTF","timestamp":1629448979060,"sparkVersion":"2.4.0-cdh6.3.4","uid":"hashingTF_484172a6ce8b","paramMap":{"outputCol":"features","numFeatures":1000,"inputCol":"words"},"defaultParamMap":{"numFeatures":262144,"outputCol":"hashingTF_484172a6ce8b__output","binary":false}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/.part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/.part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/data/part-00000-9a6fc196-e410-4d1a-b0e8-925461d1849b-c000.snappy.parquet -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/model/stages/2_logreg_0f17b0e587fd/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.classification.LogisticRegressionModel","timestamp":1629448979144,"sparkVersion":"2.4.0-cdh6.3.4","uid":"logreg_0f17b0e587fd","paramMap":{"regParam":0.001,"maxIter":10},"defaultParamMap":{"tol":1.0E-6,"rawPredictionCol":"rawPrediction","family":"auto","aggregationDepth":2,"labelCol":"label","probabilityCol":"probability","standardization":true,"fitIntercept":true,"threshold":0.5,"regParam":0.0,"featuresCol":"features","elasticNetParam":0.0,"maxIter":100,"predictionCol":"prediction"}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.Pipeline","timestamp":1629448980757,"sparkVersion":"2.4.0-cdh6.3.4","uid":"pipeline_6d259bef7616","paramMap":{"stageUids":["tok_286ea6d49f44","hashingTF_484172a6ce8b","logreg_0f17b0e587fd"]},"defaultParamMap":{}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/0_tok_286ea6d49f44/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/0_tok_286ea6d49f44/metadata/.part-00000.crc: -------------------------------------------------------------------------------- 1 | crc~Vdv -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/0_tok_286ea6d49f44/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/stages/0_tok_286ea6d49f44/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/0_tok_286ea6d49f44/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.feature.Tokenizer","timestamp":1629448980819,"sparkVersion":"2.4.0-cdh6.3.4","uid":"tok_286ea6d49f44","paramMap":{"outputCol":"words","inputCol":"text"},"defaultParamMap":{"outputCol":"tok_286ea6d49f44__output"}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/1_hashingTF_484172a6ce8b/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.feature.HashingTF","timestamp":1629448980887,"sparkVersion":"2.4.0-cdh6.3.4","uid":"hashingTF_484172a6ce8b","paramMap":{"outputCol":"features","numFeatures":1000,"inputCol":"words"},"defaultParamMap":{"numFeatures":262144,"outputCol":"hashingTF_484172a6ce8b__output","binary":false}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/.part-00000.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/.part-00000.crc -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/_SUCCESS -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/pipeline/stages/2_logreg_0f17b0e587fd/metadata/part-00000: -------------------------------------------------------------------------------- 1 | {"class":"org.apache.spark.ml.classification.LogisticRegression","timestamp":1629448980969,"sparkVersion":"2.4.0-cdh6.3.4","uid":"logreg_0f17b0e587fd","paramMap":{"regParam":0.001,"maxIter":10},"defaultParamMap":{"tol":1.0E-6,"rawPredictionCol":"rawPrediction","family":"auto","aggregationDepth":2,"labelCol":"label","probabilityCol":"probability","standardization":true,"fitIntercept":true,"threshold":0.5,"regParam":0.0,"featuresCol":"features","elasticNetParam":0.0,"maxIter":100,"predictionCol":"prediction"}} 2 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/java/com/turing/common/PropertiesUtils.java: -------------------------------------------------------------------------------- 1 | package com.turing.common; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | 5 | import java.io.IOException; 6 | import java.util.Properties; 7 | 8 | /** 9 | * @descri 10 | * 11 | * @author lj.michale 12 | * @date 2022-04-05 13 | */ 14 | @Slf4j 15 | public class PropertiesUtils { 16 | 17 | /** 18 | * @descri 获取某一个properties文件的properties对象,以方便或得文件里面的值 19 | * 20 | * @param filePath:properties文件所在路径 21 | * @return Properties对象 22 | */ 23 | public static Properties getProperties(String filePath) { 24 | final Properties properties = new Properties(); 25 | try { 26 | properties.load(PropertiesUtils.class.getClassLoader().getResourceAsStream(filePath)); 27 | } catch (IOException e) { 28 | log.error("获取某一个properties文件的properties对象失败", e); 29 | } 30 | return properties; 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/resources/common-prod.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/src/main/resources/common-prod.properties -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/resources/common-test.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/src/main/resources/common-test.properties -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/Spark3-Learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/scala/com/bigdata/hive/ConnectHiveByThrift.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.hive 2 | 3 | import com.bigdata.utils.HiveUtil 4 | import org.apache.log4j.Logger 5 | import org.apache.spark.sql.SparkSession 6 | 7 | 8 | object ConnectHiveByThrift { 9 | 10 | val logger = Logger.getLogger(ConnectHiveByThrift.getClass) 11 | 12 | def main(args: Array[String]): Unit = { 13 | 14 | val spark = SparkSession.builder() 15 | .appName("ConnectHiveByThrift") 16 | .master("local[2]") 17 | .enableHiveSupport() 18 | .getOrCreate() 19 | 20 | // 开启动态分区 21 | HiveUtil.openDynamicPartition(spark) 22 | // 开启压缩 23 | HiveUtil.openCompression(spark) 24 | 25 | spark.sql("show databases").show() 26 | 27 | spark.close() 28 | 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/scala/com/bigdata/mllib/StreamingKMeansDriver.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.mllib 2 | 3 | import org.apache.log4j.Logger 4 | 5 | /** 6 | * @descr 流式聚类计算 7 | * @author lj.michale 8 | * @date 2021-06 9 | */ 10 | object StreamingKMeansDriver { 11 | 12 | val logger = Logger.getLogger(StreamingKMeansDriver.getClass) 13 | 14 | def main(args: Array[String]): Unit = { 15 | 16 | 17 | 18 | 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/scala/com/bigdata/mllib/recommend/Recommender.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.mllib.recommend 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.mllib.recommendation.Rating 5 | 6 | trait Recommender { 7 | 8 | implicit class AugmentParams(val params: Map[String, Any]) { 9 | def getInt(key: String) = params(key).asInstanceOf[Number].intValue 10 | def getDouble(key: String) = params(key).asInstanceOf[Number].doubleValue 11 | def getBoolean(key: String) = params(key).asInstanceOf[Boolean] 12 | def getString(key: String) = params(key).toString 13 | } 14 | 15 | def recommend(trainingSet: RDD[Rating], params: Map[String, Any]): RDD[(Int, Seq[Rating])] 16 | 17 | } -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/scala/com/bigdata/runSparkPipeline.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | import org.apache.log4j.Logger 4 | 5 | /** 6 | * @descr runSparkPipeline 7 | * @author lj.michale 8 | * @date 2021-06 9 | */ 10 | object runSparkPipeline { 11 | 12 | val logger = Logger.getLogger(runSparkPipeline.getClass) 13 | 14 | def main(args: Array[String]): Unit = { 15 | 16 | 17 | } 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/Spark3-Learning/src/main/scala/com/bigdata/sql/udf/AuroaUDF.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.sql.udf 2 | 3 | object AuroaUDF { 4 | 5 | /** 6 | * @descr getChannel 7 | * @param step 8 | * @param platform 9 | */ 10 | def getChannel( step : String, platform: String): String ={ 11 | val channel = step match { 12 | case _ => "" 13 | } 14 | channel 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/DDL.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | --创建BEIMU数据表 5 | CREATE TABLE IF NOT EXISTS student( 6 | `id` INT NOT NULL AUTO_INCREMENT, 7 | `name` VARCHAR(20) NULL , 8 | `gender` VARCHAR(10) NULL, 9 | `age` INT, 10 | PRIMARY KEY (`id`) 11 | )ENGINE=InnoDB DEFAULT CHARSET=utf8; 12 | 13 | --插入数据(暂时未学到可忽略,这章主要学习数创建据表) 14 | INSERT INTO student(`id`,`name`,`gender`,`age`) VALUES (1,'张山', '男', 20); 15 | 16 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/ElasticsearchSink: -------------------------------------------------------------------------------- 1 | 2 | val httpHosts = new util.ArrayList[HttpHost]() 3 | httpHosts.add(new HttpHost("localhost", 9200)) 4 | 5 | val esSinkBuilder = new ElasticsearchSink.Builder[SensorReading]( httpHosts, new ElasticsearchSinkFunction[SensorReading] { 6 | override def process(t: SensorReading, runtimeContext: RuntimeContext, requestIndexer: RequestIndexer): Unit = { 7 | println("saving data: " + t) 8 | val json = new util.HashMap[String, String]() 9 | json.put("data", t.toString) 10 | val indexRequest = Requests.indexRequest().index("sensor").`type`("readingData").source(json) 11 | requestIndexer.add(indexRequest) 12 | println("saved successfully") 13 | } 14 | } ) 15 | 16 | dataStream.addSink( esSinkBuilder.build() ) 17 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/centos7安装CDH6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink-learning/documents/centos7安装CDH6.pdf -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/flink-api/对于Flink’s API相关知识的整理与理解: -------------------------------------------------------------------------------- 1 | https://app.yinxiang.com/fx/04ef4f01-fd13-4c43-a8b1-886dfb7bb95f -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/flink-connector-hive_2.11-1.10.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink-learning/documents/flink-connector-hive_2.11-1.10.0.jar -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/flink_kafka_connector调用关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink-learning/documents/flink_kafka_connector调用关系.png -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/documents/使用RedisCommand设置数据结构类型时和redis结构对应关系.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink-learning/documents/使用RedisCommand设置数据结构类型时和redis结构对应关系.png -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/runJavaApp.java: -------------------------------------------------------------------------------- 1 | package com.luoj; 2 | 3 | 4 | public class runJavaApp { 5 | 6 | public static void main(String[] args) { 7 | 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/connector/clickhouse/tableColums.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.connector.clickhouse 2 | 3 | object tableColums { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/example/example003/AsyncIOSideTableJoinRedis.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.example003; 2 | 3 | 4 | 5 | public class AsyncIOSideTableJoinRedis { 6 | 7 | 8 | 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/example/example005/StreamingJob.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.example005; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-05-11 7 | */ 8 | public class StreamingJob { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/DataStreamAndDataSetApiExampleByJava001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-04-26 7 | */ 8 | public class DataStreamAndDataSetApiExampleByJava001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/DataStreamAndDataSetApiExampleByScala001.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api 2 | 3 | object DataStreamAndDataSetApiExampleByScala001 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/SQLExampleByJava001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-04-26 7 | */ 8 | public class SQLExampleByJava001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/SQLExampleByScala001.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api 2 | 3 | object SQLExampleByScala001 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/StatefulStreamProcessingExampleByJava001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-04-26 7 | */ 8 | public class StatefulStreamProcessingExampleByJava001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/StatefulStreamProcessingExampleByScala001.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api 2 | 3 | object StatefulStreamProcessingExampleByScala001 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/TableApiExampleByJava001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-04-26 7 | */ 8 | public class TableApiExampleByJava001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/api/TableApiExampleByScala001.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.api 2 | 3 | object TableApiExampleByScala001 { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/datastreamapi/DataStreamAPIByScala.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.datastreamapi 2 | 3 | object DataStreamAPIByScala { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/partition/MyParalleSource.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition; 2 | 3 | import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction; 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-04-10 10 | */ 11 | public class MyParalleSource implements ParallelSourceFunction { 12 | 13 | private long count = 1L; 14 | private Boolean isRunning = true; 15 | 16 | @Override 17 | public void run(SourceContext sourceContext) throws Exception { 18 | while (isRunning){ 19 | sourceContext.collect(count); 20 | count++; 21 | // 每秒产生一条 22 | Thread.sleep(1000); 23 | } 24 | } 25 | 26 | @Override 27 | public void cancel() { 28 | isRunning = false; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/partition/MyPartition.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.apache.flink.api.common.functions.Partitioner; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 自定义分区 9 | * @date 2021-04-10 10 | */ 11 | @Slf4j 12 | public class MyPartition implements Partitioner { 13 | 14 | @Override 15 | public int partition(Object o, int numPartitions) { 16 | log.info("分区总数:{}", numPartitions); 17 | if(Long.valueOf(o.toString()) % 2 == 0){ 18 | return 0; 19 | } else { 20 | return 1; 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/partition/StreamingWithMyPartition.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 5 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 6 | import org.apache.flink.streaming.api.scala.DataStream; 7 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment; 8 | 9 | /** 10 | * @author lj.michale 11 | * @description 12 | * @date 2021-04-10 13 | */ 14 | @Slf4j 15 | public class StreamingWithMyPartition { 16 | 17 | public static void main(String[] args) { 18 | 19 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 20 | env.setParallelism(2); 21 | // DataStreamSource textDataStream = env.addSource(new MyParalleSource()) 22 | // DataStreamSource streamSource = env.addSource(new MyParalleSource()); 23 | 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/sink/MultiThreadConsumerClient.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.sink; 2 | 3 | import java.util.concurrent.CyclicBarrier; 4 | import java.util.concurrent.LinkedBlockingQueue; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-04-27 10 | */ 11 | public class MultiThreadConsumerClient implements Runnable { 12 | 13 | private LinkedBlockingQueue bufferQueue; 14 | 15 | public MultiThreadConsumerClient(LinkedBlockingQueue bufferQueue, CyclicBarrier clientBarrier) { 16 | this.bufferQueue = bufferQueue; 17 | } 18 | 19 | @Override 20 | public void run() { 21 | String entity; 22 | while (true){ 23 | // 从 bufferQueue 的队首消费数据 24 | entity = bufferQueue.poll(); 25 | // 执行 client 消费数据的逻辑 26 | doSomething(entity); 27 | } 28 | } 29 | 30 | // client 消费数据的逻辑 31 | private void doSomething(String entity) { 32 | // client 积攒批次并调用第三方 api 33 | } 34 | } 35 | 36 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/source/Source_Demo02_File.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.source; 2 | 3 | 4 | public class Source_Demo02_File { 5 | 6 | 7 | 8 | } 9 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/tablesql/DuplicatorFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.tablesql; 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint; 4 | import org.apache.flink.table.annotation.FunctionHint; 5 | import org.apache.flink.table.functions.TableFunction; 6 | import org.apache.flink.types.Row; 7 | 8 | /** 9 | * @descr 自定义函数-通过注解指定返回类型 10 | * 通过注册指定返回值类型,flink 1.11 版本开始支持 11 | */ 12 | @FunctionHint(output = @DataTypeHint("ROW< i INT, s STRING >")) 13 | public class DuplicatorFunction extends TableFunction { 14 | public void eval(Integer i, String s) { 15 | collect(Row.of(i, s)); 16 | collect(Row.of(i, s)); 17 | } 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/tablesql/Sensor.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.tablesql; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-04-10 7 | */ 8 | public class Sensor { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/test/IncrementMapFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.test; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-04-12 9 | */ 10 | public class IncrementMapFunction implements MapFunction { 11 | 12 | @Override 13 | public Long map(Long record) throws Exception { 14 | return record + 1; 15 | } 16 | } -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/test/IncrementMapFunctionTest.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.test; 2 | 3 | 4 | import org.junit.Test; 5 | 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-04-12 11 | */ 12 | public class IncrementMapFunctionTest { 13 | @Test 14 | public void testIncrement() throws Exception { 15 | // instantiate your function 16 | IncrementMapFunction incrementer = new IncrementMapFunction(); 17 | 18 | // call the methods that you have implemented 19 | // assertEquals(3L, incrementer.map(2L)); 20 | } 21 | } -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/udf/FromUnixTimeUDF.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.udf; 2 | 3 | import org.apache.flink.table.functions.ScalarFunction; 4 | 5 | import java.text.SimpleDateFormat; 6 | import java.util.Date; 7 | 8 | /** 9 | * @author lj.michale 10 | * @description 11 | * @date 2021-04-12 12 | */ 13 | 14 | public class FromUnixTimeUDF extends ScalarFunction { 15 | 16 | public String DATE_FORMAT; 17 | 18 | public FromUnixTimeUDF() { 19 | this.DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; 20 | } 21 | 22 | public FromUnixTimeUDF(String dateFormat) { 23 | this.DATE_FORMAT = dateFormat; 24 | } 25 | 26 | public String eval(String longTime) { 27 | try { 28 | SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT); 29 | Date date = new Date(Long.parseLong(longTime) * 1000); 30 | return sdf.format(date); 31 | } catch (Exception e) { 32 | return null; 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/java/com/luoj/task/learn/udf/UdafSource.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.udf 2 | 3 | import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction} 4 | 5 | class UdafSource extends RichSourceFunction[Double] { 6 | 7 | override def run(ctx: SourceFunction.SourceContext[Double]) = { 8 | while (true) { 9 | val d = scala.math.random 10 | ctx.collect(d) 11 | // 测试产生的每条数据以日志的格式打印出 12 | // val logger = Logger(this.getClass) 13 | // logger.error(s"当前值:$d") 14 | Thread.sleep(12000) 15 | } 16 | 17 | } 18 | 19 | override def cancel() = ??? 20 | 21 | } 22 | 23 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/scala/com/bigdata/common/ClickHouseUtil.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common 2 | 3 | import java.sql.{Connection, DriverManager} 4 | 5 | object ClickHouseUtil { 6 | 7 | def getConnection(ip:String, port:Int, tableName:String): Connection ={ 8 | var connection: Connection = null 9 | Class.forName("ru.yandex.clickhouse.ClickHouseDriver") 10 | connection = DriverManager.getConnection("jdbc:clickhouse://" + ip + ":" + port + "/" + tableName) 11 | connection 12 | } 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/scala/com/bigdata/runScalaApp.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | object runScalaApp { 4 | 5 | def main(args: Array[String]): Unit = { 6 | 7 | 8 | 9 | 10 | 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/scala/com/bigdata/task/example/example001/AddSearchPlanBean.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.example.example001; 2 | 3 | import lombok.*; 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 5 | 6 | @Setter 7 | @Getter 8 | @ToString 9 | @NoArgsConstructor 10 | @AllArgsConstructor 11 | public class AddSearchPlanBean { 12 | 13 | // 筛选方案名称 14 | private String name; 15 | 16 | // 用户ID搜索方案所属者 17 | private Long userId; 18 | 19 | // 适用业务场景,数据来源类型:100:总仓搜索方案;101:分仓搜索方案;102:出入库明细搜索方案 20 | private int sourceType; 21 | 22 | // 筛选方案详情 23 | private String plan; 24 | 25 | // 是否默认:1默认,0否 26 | private Byte defaultFlag; 27 | 28 | // 系统初始化方案:1是,0否 29 | private Byte systemFlag; 30 | 31 | } 32 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/scala/com/bigdata/task/learn/flinksql/TableFunctionExample001.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.flinksql 2 | 3 | import org.apache.flink.table.functions.TableFunction 4 | 5 | object TableFunctionExample001 { 6 | 7 | def main(args: Array[String]): Unit = { 8 | 9 | 10 | 11 | 12 | } 13 | 14 | // 自定义函数 15 | 16 | 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink-learning/src/main/scala/com/bigdata/task/planner/MyDataSource.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.planner 2 | 3 | object MyDataSource { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/java/com/luoj/common/PropertiesConstants.java: -------------------------------------------------------------------------------- 1 | package com.luoj.common; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-02 7 | */ 8 | public class PropertiesConstants { 9 | 10 | public static final String PROPERTIES_FILE_NAME = "/application.properties"; 11 | public static final String STREAM_PARALLELISM = "stream.parallelism"; 12 | public static final String STREAM_CHECKPOINT_ENABLE = "stream.checkpoint.enable"; 13 | public static final String STREAM_CHECKPOINT_INTERVAL = "stream.checkpoint.interval"; 14 | 15 | //es config 16 | public static final String ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS = "40"; 17 | public static final String ELASTICSEARCH_HOSTS = "elasticsearch.hosts"; 18 | public static final String STREAM_SINK_PARALLELISM = "1"; 19 | 20 | 21 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/java/com/luoj/runJavaApp.java: -------------------------------------------------------------------------------- 1 | package com.luoj; 2 | 3 | 4 | public class runJavaApp { 5 | 6 | public static void main(String[] args) { 7 | 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/java/com/luoj/task/learn/sink/example001/JCacheExecutionOptions.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.sink.example001; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-02 7 | */ 8 | public class JCacheExecutionOptions { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.11-learning/src/main/resources/application.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/resources/jcache_ice_client.properties: -------------------------------------------------------------------------------- 1 | JcacheProxy=jCache 2 | JcacheProxy.Locator=jCacheIceGrid/Locator:tcp -h 172.16.105.4 -p 4061 3 | JcacheProxy.LocatorCacheTimeout=60 4 | JcacheProxy.InvocationTimeout=30000 5 | JcacheProxy.ConnectionCached=0 -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.11-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/scala/com/bigdata/common/ClickHouseUtil.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common 2 | 3 | import java.sql.{Connection, DriverManager} 4 | 5 | object ClickHouseUtil { 6 | 7 | def getConnection(ip:String, port:Int, tableName:String): Connection ={ 8 | var connection: Connection = null 9 | Class.forName("ru.yandex.clickhouse.ClickHouseDriver") 10 | connection = DriverManager.getConnection("jdbc:clickhouse://" + ip + ":" + port + "/" + tableName) 11 | connection 12 | } 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.11-learning/src/main/scala/com/bigdata/runScalaApp.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | object runScalaApp { 4 | 5 | def main(args: Array[String]): Unit = { 6 | 7 | 8 | 9 | 10 | 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/java/com/luoj/example/example1/GmallConfig.java: -------------------------------------------------------------------------------- 1 | package com.luoj.example.example1; 2 | 3 | /** 4 | * 项目配置类 5 | */ 6 | public class GmallConfig { 7 | // hbase中的空间 8 | public static final String HBASE_SCHEMA="GMALL2022_REALTIME"; 9 | // phoenix地址 10 | public static final String PHOENIX_SERVER="jdbc:phoenix:hadoop102,hadoop103,hadoop104:2181"; 11 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/java/com/luoj/example/example1/TableProcess.java: -------------------------------------------------------------------------------- 1 | package com.luoj.example.example1; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * 配置表对应的实体类 7 | */ 8 | @Data 9 | public class TableProcess { 10 | // 动态分流Sink常量 改为小写和脚本一致 11 | public static final String SINK_TYPE_HBASE = "hbase"; 12 | public static final String SINK_TYPE_KAFKA = "kafka"; 13 | public static final String SINK_TYPE_CK = "clickhouse"; 14 | //来源表 15 | String sourceTable; 16 | //操作类型 insert,update,delete 17 | String operateType; 18 | //输出类型 hbase kafka 19 | String sinkType; 20 | //输出表(主题) 21 | String sinkTable; 22 | //输出字段 23 | String sinkColumns; 24 | //主键字段 25 | String sinkPk; 26 | //建表扩展 27 | String sinkExtend; 28 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/java/com/luoj/runJavaApp.java: -------------------------------------------------------------------------------- 1 | package com.luoj; 2 | 3 | 4 | public class runJavaApp { 5 | 6 | public static void main(String[] args) { 7 | 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.12-learning/src/main/resources/application.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/resources/jcache_ice_client.properties: -------------------------------------------------------------------------------- 1 | JcacheProxy=jCache 2 | JcacheProxy.Locator=jCacheIceGrid/Locator:tcp -h 172.16.105.4 -p 4061 3 | JcacheProxy.LocatorCacheTimeout=60 4 | JcacheProxy.InvocationTimeout=30000 5 | JcacheProxy.ConnectionCached=0 -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.12-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.12-learning/src/main/scala/com/bigdata/runScalaApp.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | object runScalaApp { 4 | 5 | def main(args: Array[String]): Unit = { 6 | 7 | 8 | 9 | 10 | 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/doc/LogEventDataExample.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "app", 3 | "id": "121", 4 | "timestamp": 1570941591229, 5 | "level": "error", 6 | "offset": 32313131, 7 | "content": "Exception in thread \"main\" java.lang.NoClassDefFoundError: org/apache/flink/api/common/ExecutionConfig$GlobalJobParameters", 8 | "tags": { 9 | "cluster_name": "lj", 10 | "app_name": "lj", 11 | "host_ip": "127.0.0.1", 12 | "app_id": "21" 13 | } 14 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/output/data: -------------------------------------------------------------------------------- 1 | 0,A 2 | 1,B 3 | 2,C 4 | 3,D 5 | 4,E 6 | 5,F 7 | 6,G 8 | 7,H 9 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/alarm/AlarmClient.java: -------------------------------------------------------------------------------- 1 | package com.luoj.alarm; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-08 7 | */ 8 | public class AlarmClient { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/bean/Customer.java: -------------------------------------------------------------------------------- 1 | package com.luoj.bean; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-04 7 | */ 8 | public class Customer { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/bean/MarketingUserBehavior.java: -------------------------------------------------------------------------------- 1 | package com.luoj.bean; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-08-09 9 | */ 10 | @Data 11 | public class MarketingUserBehavior { 12 | 13 | private String channel; 14 | 15 | private String behavior; 16 | 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/bean/Product.java: -------------------------------------------------------------------------------- 1 | package com.luoj.bean; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-04 7 | */ 8 | public class Product { 9 | 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/bean/StudentViewCount.java: -------------------------------------------------------------------------------- 1 | package com.luoj.bean; 2 | 3 | 4 | import lombok.Data; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 学生的统计基础类 9 | * @date 2021-08-13 10 | */ 11 | @Data 12 | public class StudentViewCount { 13 | 14 | private int id; 15 | 16 | /** 17 | * 窗口结束时间 18 | */ 19 | private long windowEnd; 20 | 21 | /** 22 | * 同一个 id 下的统计数量 23 | */ 24 | private long viewCount; 25 | 26 | public static StudentViewCount of(int id, long windowEnd, long count) { 27 | StudentViewCount result = new StudentViewCount(); 28 | result.setId(id); 29 | result.setWindowEnd(windowEnd); 30 | result.setViewCount(count); 31 | return result; 32 | } 33 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/common/FieldDescrib.java: -------------------------------------------------------------------------------- 1 | package com.luoj.common; 2 | 3 | import java.lang.annotation.ElementType; 4 | import java.lang.annotation.Retention; 5 | import java.lang.annotation.RetentionPolicy; 6 | import java.lang.annotation.Target; 7 | 8 | /** 9 | * @descr 字段描述注解 10 | * @author orange 11 | * @date 2021/8/4 23:42 12 | */ 13 | @Retention(RetentionPolicy.RUNTIME) 14 | @Target(ElementType.FIELD) 15 | public @interface FieldDescrib { 16 | 17 | /** 字段名 */ 18 | String name(); 19 | 20 | /** 字段描述 */ 21 | String desc() default ""; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/common/HdfsUtil.java: -------------------------------------------------------------------------------- 1 | package com.luoj.common; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | 5 | import java.nio.file.FileSystem; 6 | import java.nio.file.Path; 7 | 8 | /** 9 | * @author lj.michale 10 | * @description 11 | * @date 2021-07-11 12 | */ 13 | public class HdfsUtil { 14 | 15 | // private static FileSystem getFs(Path path, Configuration conf) { 16 | // 17 | // try { 18 | // String proxyUser = conf.get(""); 19 | // if (proxyUser != null) { 20 | // 21 | // } 22 | // } catch (Exception e) { 23 | // throw new RuntimeException("", e); 24 | // } 25 | // 26 | // } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/common/KafkaUtil.java: -------------------------------------------------------------------------------- 1 | package com.luoj.common; 2 | 3 | import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode; 4 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-05-25 10 | */ 11 | public class KafkaUtil { 12 | 13 | public static FlinkKafkaConsumer getKafkaSource(String newsTopic, String groupId) { 14 | 15 | 16 | return null; 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/common/MySQLGlobalConfig.java: -------------------------------------------------------------------------------- 1 | package com.luoj.common; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-27 7 | */ 8 | public class MySQLGlobalConfig { 9 | 10 | public static final String MySQL_DB = ""; 11 | 12 | public static final String MySQL_URL = "jdbc:mysql://localhost:3306/"+ MySQL_DB +"?useSSL=false&useUnicode=true&characterEncoding=UTF-8&characterSetResults=UTF-8&zeroDateTimeBehavior=CONVERT_TO_NULL&serverTimezone=UTC"; 13 | 14 | public static final String MySQL_NAME = "root"; 15 | 16 | public static final String MySQL_PASSWORD = "abc1314520"; 17 | 18 | public static final String MySQL_DRIVER_CLASS = "com.mysql.cj.jdbc.Driver"; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/runJavaApp.java: -------------------------------------------------------------------------------- 1 | package com.luoj; 2 | 3 | 4 | import lombok.extern.slf4j.Slf4j; 5 | 6 | @Slf4j 7 | public class runJavaApp { 8 | 9 | public static void main(String[] args) { 10 | 11 | 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/jcache/JCacheSinkUtil.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.jcache; 2 | 3 | import org.apache.flink.api.java.utils.ParameterTool; 4 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 5 | import org.apache.flink.util.Preconditions; 6 | 7 | import java.util.Map; 8 | 9 | /** 10 | * @author lj.michale 11 | * @description 12 | * @date 2021-07-02 13 | */ 14 | public class JCacheSinkUtil { 15 | 16 | /** 17 | * @descr Sink to JCache 18 | * @param jcacheLocator 19 | * @param jcacheProxy 20 | * @param jcacheResourceName 21 | * @param dataStream 22 | */ 23 | public static void addSink(String jcacheLocator, 24 | String jcacheProxy, 25 | String jcacheResourceName, 26 | SingleOutputStreamOperator dataStream) { 27 | 28 | 29 | 30 | } 31 | 32 | 33 | } 34 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/pv/SensorReading.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.pv 2 | 3 | //(sensor_1,time,温度)这样类型的数据源 4 | case class SensorReading(id:String,timestamp:Long,temperature:Double) 5 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/userbehavior/User.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.userbehavior; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-27 9 | */ 10 | @Data 11 | public class User { 12 | /** 13 | * userId 14 | */ 15 | private Long userId; 16 | /** 17 | * 注册时间 18 | */ 19 | private Long registerTime; 20 | /** 21 | * 上次登录时间 22 | */ 23 | private Long lastLoadTime; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/userbehavior/UserBehavingInfo.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.userbehavior; 2 | 3 | import lombok.*; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-27 9 | */ 10 | @Setter 11 | @Getter 12 | @ToString 13 | @AllArgsConstructor 14 | @NoArgsConstructor 15 | public class UserBehavingInfo { 16 | 17 | private String userNo; 18 | 19 | /** 20 | * 用户行为 21 | */ 22 | private String behavior; 23 | 24 | /** 25 | * 行为商品 26 | */ 27 | private String operatedGoods; 28 | 29 | /** 30 | * 行为发生时间 31 | */ 32 | private Long time; 33 | 34 | } 35 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/userbehavior/UserBehavingInfoKeyByGood.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.userbehavior; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-27 9 | */ 10 | public class UserBehavingInfoKeyByGood implements KeySelector { 11 | 12 | private static final long serialVersionUID = 4780234853172462378L; 13 | 14 | @Override 15 | public String getKey(UserBehavingInfo value) throws Exception { 16 | return value.getOperatedGoods(); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/userbehavior/UserBehavorCountAggregateUtils.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.userbehavior; 2 | 3 | 4 | import org.apache.flink.api.common.functions.AggregateFunction; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 增量计算用户点击行为数量 9 | * @date 2021-05-27 10 | */ 11 | public class UserBehavorCountAggregateUtils implements AggregateFunction { 12 | 13 | @Override 14 | public Integer createAccumulator() { 15 | return 0; 16 | } 17 | 18 | //一条数据执行一次 19 | @Override 20 | public Integer add(UserBehavingInfo UserBehavingInfo, Integer integer) { 21 | return integer + 1; 22 | 23 | } 24 | 25 | //窗口结束执行一次 26 | @Override 27 | public Integer getResult(Integer integer) { 28 | return integer; 29 | 30 | } 31 | 32 | @Override 33 | public Integer merge(Integer integer, Integer acc1) { 34 | return integer+acc1; 35 | 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/example/userbehavior/UserBehavorCountWindowFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.example.userbehavior; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction; 5 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 6 | import org.apache.flink.util.Collector; 7 | 8 | /** 9 | * @author lj.michale 10 | * @description 11 | * @date 2021-05-27 12 | */ 13 | public class UserBehavorCountWindowFunction extends ProcessWindowFunction,String,TimeWindow> { 14 | 15 | @Override 16 | public void process(String key, Context context, Iterable iterable, Collector> collector) throws Exception { 17 | collector.collect(new Tuple2(key, iterable.iterator().next())); 18 | } 19 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/help/example001/CustomerSource.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.help.example001 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction 4 | 5 | 6 | class CustomerSource extends SourceFunction[Tuple2[Long,Long]]{ 7 | 8 | var count=1625048255867L 9 | var isRunning=true 10 | override def run(ctx: SourceFunction.SourceContext[Tuple2[Long,Long]]): Unit = { 11 | while(isRunning) { 12 | ctx.collect(new Tuple2(count,count)) 13 | count += 1 14 | Thread.sleep(1000) 15 | } 16 | } 17 | 18 | override def cancel(): Unit = { 19 | 20 | isRunning=false 21 | } 22 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/jira/FLINK_19038.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.jira; 2 | 3 | 4 | import org.apache.flink.table.api.EnvironmentSettings; 5 | import org.apache.flink.table.api.Table; 6 | import org.apache.flink.table.api.TableEnvironment; 7 | 8 | /** 9 | * @author lj.michale 10 | * @description 11 | * https://issues.apache.org/jira/browse/FLINK-19038 12 | * @date 2021-06-15 13 | */ 14 | public class FLINK_19038 { 15 | public static void main(String[] args) throws Exception { 16 | EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inBatchMode().build(); 17 | TableEnvironment tbEnv = TableEnvironment.create(settings); 18 | Table t1 = tbEnv.fromValues("1", "2", "3", "4", "5", "6", "7", "8", "9"); 19 | Table t2 = t1.orderBy("0").fetch(5); 20 | tbEnv.executeSql("create table print(c1 String) with ('connector' = 'print')"); 21 | tbEnv.insertInto("print",t2); 22 | tbEnv.execute(""); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/accumulator/AccumulatorTest.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.accumulator; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-01 7 | */ 8 | public class AccumulatorTest { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/alibaba/batch/OutOfOrderCase.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.alibaba.batch 2 | 3 | object OutOfOrderCase { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/extensions/ScalaExtensionExample.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.extensions 2 | 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 4 | import org.apache.flink.streaming.api.scala.extensions.acceptPartialFunctions 5 | import org.apache.flink.streaming.api.scala.extensions._ 6 | 7 | object ScalaExtensionExample { 8 | 9 | case class Point(x: Double, y: Double) 10 | 11 | def main(args: Array[String]): Unit = { 12 | 13 | val env = StreamExecutionEnvironment.getExecutionEnvironment 14 | import org.apache.flink.api.scala._ 15 | val ds = env.fromElements(Point(1, 2), Point(3, 4), Point(5, 6)) 16 | 17 | ds.filterWith { 18 | case Point(x, _) => x > 1 19 | }.mapWith { 20 | case Point(x, y) => (x, y) 21 | }.flatMapWith { 22 | case (x, y) => Seq("x" -> x, "y" -> y) 23 | }.keyingBy { 24 | case (id, value) => id 25 | } 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/func/SimpleSourceFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.func; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple3; 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 5 | 6 | /** 7 | * 功能描述: 产生Tuple3的数据源,仅供测试使用。 8 | */ 9 | public class SimpleSourceFunction implements SourceFunction> { 10 | 11 | @Override 12 | public void run(SourceContext> ctx) throws Exception { 13 | int index = 1; 14 | while (true) { 15 | ctx.collect(new Tuple3<>("key", ++index, System.currentTimeMillis())); 16 | // ctx.collect(new Tuple3<>("key2", index, System.currentTimeMillis())); 17 | // ctx.collect(new Tuple3<>("key3", index, System.currentTimeMillis())); 18 | Thread.sleep(500); 19 | } 20 | } 21 | 22 | @Override 23 | public void cancel() { } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/func/Tuple3KeySelector.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.func; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | import org.apache.flink.api.java.tuple.Tuple3; 5 | 6 | /** 7 | * 项目名称: Apache Flink 知其然,知其所以然 - khkw.correctness.functions 8 | * 作者: 孙金城 9 | * 日期: 2020/7/13 10 | */ 11 | public class Tuple3KeySelector implements KeySelector, String> { 12 | @Override 13 | public String getKey(Tuple3 event) throws Exception { 14 | return event.f0; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/hive/DataSetDataAnalysisExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.hive; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.common.time.Time; 6 | import org.apache.flink.api.java.ExecutionEnvironment; 7 | 8 | import java.util.concurrent.TimeUnit; 9 | 10 | /** 11 | * @author lj.michale 12 | * @description Hive批处理数据分析 13 | * @date 2021-07-27 14 | */ 15 | @Slf4j 16 | public class DataSetDataAnalysisExample001 { 17 | 18 | public static void main(String[] args) throws Exception { 19 | 20 | final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); 21 | env.setParallelism(1); 22 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10, TimeUnit.SECONDS))); 23 | 24 | 25 | 26 | env.execute("DataSetDataAnalysisExample001"); 27 | 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/hudi/example001/Frog.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.hudi.example001; 2 | 3 | /** 4 | * 实体(数据)生成器 5 | */ 6 | public interface Frog { 7 | T getOne(); 8 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/join/example001/CityInfo.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.join.example001; 2 | 3 | import lombok.Data; 4 | 5 | import java.io.Serializable; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-06-23 11 | */ 12 | @Data 13 | public class CityInfo implements Serializable { 14 | private Integer cityId; 15 | private String cityName; 16 | private Long ts; 17 | 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/join/example001/UserInfo.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.join.example001; 2 | 3 | import lombok.Data; 4 | 5 | import java.io.Serializable; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-06-23 11 | */ 12 | @Data 13 | public class UserInfo implements Serializable { 14 | private String userName; 15 | private Integer cityId; 16 | private Long ts; 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/kafka/KafkaElement.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.kafka; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-26 11 | */ 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Data 15 | public class KafkaElement { 16 | 17 | private String elem1; 18 | private String name; 19 | private Integer age; 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/kafka/KafkaOut.java: -------------------------------------------------------------------------------- 1 | //package com.luoj.task.learn.kafka; 2 | // 3 | //import lombok.AllArgsConstructor; 4 | //import lombok.Data; 5 | //import lombok.NoArgsConstructor; 6 | // 7 | ///** 8 | // * @author lj.michale 9 | // * @description 10 | // * @date 2021-05-26 11 | // */ 12 | //@NoArgsConstructor 13 | //@AllArgsConstructor 14 | //@Data 15 | //public class KafkaOut { 16 | //// public KafkaOut(Integer valueOf, String s) { 17 | //// } 18 | //} 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/example001/CustomerKafkaConsumer.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.example001; 2 | 3 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; 4 | import java.util.Properties; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-05-26 10 | */ 11 | public class CustomerKafkaConsumer extends FlinkKafkaConsumer010 { 12 | 13 | private AbsDeserialization valueDeserializer; 14 | 15 | public CustomerKafkaConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { 16 | super(topic, valueDeserializer, props); 17 | this.valueDeserializer=valueDeserializer; 18 | } 19 | 20 | @Override public void run(SourceContext sourceContext) throws Exception { 21 | valueDeserializer.setRuntimeContext(getRuntimeContext()); 22 | valueDeserializer.initMetric(); 23 | super.run(sourceContext); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/example001/ParseDeserialization.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.example001 2 | 3 | import com.alibaba.fastjson.JSON 4 | 5 | 6 | class ParseDeserialization extends AbsDeserialization[RawData] { 7 | 8 | override def deserialize(message: Array[Byte]): RawData = { 9 | try { 10 | val msg = new String(message) 11 | val rawData = JSON.parseObject(msg, classOf[RawData]) 12 | normalDataNum.inc() //正常数据指标 13 | rawData 14 | } catch { 15 | case e:Exception=>{ 16 | dirtyDataNum.inc() //脏数据指标 17 | null 18 | } 19 | } 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/example001/RawData.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.example001 2 | 3 | case class RawData() 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/histogram/MyHistogram.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.histogram; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-05-27 7 | */ 8 | public class MyHistogram { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/histogram/MyMapper.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.histogram; 2 | 3 | import com.codahale.metrics.Histogram; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.configuration.Configuration; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-27 11 | */ 12 | public class MyMapper extends RichMapFunction { 13 | private transient Histogram histogram; 14 | 15 | @Override 16 | public void open(Configuration config) { 17 | // this.histogram = getRuntimeContext() 18 | // .getMetricGroup() 19 | // .histogram("myHistogram", new MyHistogram()); 20 | } 21 | 22 | @Override 23 | public Long map(Long value) throws Exception { 24 | this.histogram.update(value); 25 | return value; 26 | } 27 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/meter/MyMapper.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.meter; 2 | 3 | import com.codahale.metrics.Meter; 4 | import org.apache.flink.api.common.functions.RichMapFunction; 5 | import org.apache.flink.configuration.Configuration; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-27 11 | */ 12 | public class MyMapper extends RichMapFunction { 13 | private transient Meter meter; 14 | 15 | @Override 16 | public void open(Configuration config) { 17 | // this.meter = getRuntimeContext() 18 | // .getMetricGroup() 19 | // .meter("myMeter", new MyMeter()); 20 | } 21 | 22 | @Override 23 | public Long map(Long value) throws Exception { 24 | // this.meter.markEvent(); 25 | return value; 26 | } 27 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/matric/meter/MyMeter.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.matric.meter; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-05-27 7 | */ 8 | public class MyMeter { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/aggregate/example001/MyCountAggregate.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.aggregate.example001; 2 | 3 | import org.apache.flink.api.common.functions.AggregateFunction; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 自定义聚合函数MyCountAggregate 8 | * 输入类型(IN)、累加器类型(ACC)和输出类型(OUT) 9 | * @date 2021-05-27 10 | */ 11 | public class MyCountAggregate implements AggregateFunction { 12 | 13 | /*访问量初始化为0*/ 14 | @Override 15 | public Long createAccumulator() { 16 | return 0L; 17 | } 18 | 19 | /*访问量直接+1 即可*/ 20 | @Override 21 | public Long add(ProductViewData productViewData, Long accumulator) { 22 | return accumulator + 1; 23 | } 24 | 25 | /*合并两个统计量*/ 26 | @Override 27 | public Long merge(Long a, Long b) { 28 | return a + b; 29 | } 30 | 31 | @Override 32 | public Long getResult(Long accumulator) { 33 | return accumulator; 34 | } 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/aggregate/example001/ProductViewData.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.aggregate.example001; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-27 11 | */ 12 | @AllArgsConstructor 13 | @NoArgsConstructor 14 | @Data 15 | public class ProductViewData { 16 | 17 | private String productId; 18 | 19 | private String userId; 20 | 21 | private Long operationType; 22 | 23 | private Long timestamp; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/aggregate/example001/RecordSeclectId.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.aggregate.example001; 2 | 3 | import org.apache.flink.api.java.functions.KeySelector; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-27 9 | */ 10 | public class RecordSeclectId implements KeySelector { 11 | private static final long serialVersionUID = 4780234853172462378L; 12 | 13 | @Override 14 | public String getKey(ProductViewData value) throws Exception { 15 | return value.getUserId(); 16 | } 17 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example001/Constants.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example001; 2 | 3 | public interface Constants { 4 | String OUTPUT_TAG_NAME = "result"; 5 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example001/MyKey.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example001; 2 | 3 | 4 | import java.io.Serializable; 5 | import java.util.Objects; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-20 11 | */ 12 | public class MyKey implements Serializable { 13 | 14 | private int value; 15 | 16 | public MyKey(int value) { 17 | this.value = value; 18 | } 19 | 20 | public int getValue() { 21 | return value; 22 | } 23 | 24 | @Override 25 | public boolean equals(Object o) { 26 | if (this == o) return true; 27 | if (o == null || getClass() != o.getClass()) return false; 28 | MyKey myKey = (MyKey) o; 29 | return value == myKey.value; 30 | } 31 | 32 | @Override 33 | public int hashCode() { 34 | return Objects.hash(value); 35 | } 36 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example001/MyValue.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example001; 2 | 3 | 4 | import java.util.Objects; 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-20 9 | */ 10 | public class MyValue { 11 | private int value; 12 | 13 | public MyValue() {} 14 | 15 | public MyValue(int value) { 16 | this.value = value; 17 | } 18 | 19 | public int getValue() { 20 | return value; 21 | } 22 | 23 | public void setValue(int value) { 24 | this.value = value; 25 | } 26 | 27 | @Override 28 | public boolean equals(Object o) { 29 | if (this == o) return true; 30 | if (o == null || getClass() != o.getClass()) return false; 31 | MyValue myValue = (MyValue) o; 32 | return value == myValue.value; 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return Objects.hash(value); 38 | } 39 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example003/Constants.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example003; 2 | 3 | public interface Constants { 4 | 5 | String OUTPUT_TAG_NAME = "result"; 6 | 7 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example003/v2/MyKey.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example003.v2; 2 | 3 | import java.io.Serializable; 4 | import java.util.Objects; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-05-29 10 | */ 11 | public class MyKey implements Serializable { 12 | private int value; 13 | 14 | public MyKey(int value) { 15 | this.value = value; 16 | } 17 | 18 | public int getValue() { 19 | return value; 20 | } 21 | 22 | @Override 23 | public boolean equals(Object o) { 24 | if (this == o) return true; 25 | if (o == null || getClass() != o.getClass()) return false; 26 | MyKey myKey = (MyKey) o; 27 | return value == myKey.value; 28 | } 29 | 30 | @Override 31 | public int hashCode() { 32 | return Objects.hash(value); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/example003/v2/MyValue.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.example003.v2; 2 | 3 | import java.util.Objects; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-05-29 9 | */ 10 | public class MyValue { 11 | private int value; 12 | 13 | public MyValue() {} 14 | 15 | public MyValue(int value) { 16 | this.value = value; 17 | } 18 | 19 | public int getValue() { 20 | return value; 21 | } 22 | 23 | public void setValue(int value) { 24 | this.value = value; 25 | } 26 | 27 | @Override 28 | public boolean equals(Object o) { 29 | if (this == o) return true; 30 | if (o == null || getClass() != o.getClass()) return false; 31 | MyValue myValue = (MyValue) o; 32 | return value == myValue.value; 33 | } 34 | 35 | @Override 36 | public int hashCode() { 37 | return Objects.hash(value); 38 | } 39 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/Order.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-26 11 | */ 12 | 13 | @NoArgsConstructor 14 | @AllArgsConstructor 15 | @Data 16 | public class Order { 17 | 18 | public String userId; 19 | public String price; 20 | public Long timestamp; 21 | public String orderId; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/SessionWindowJoinExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description Session Window Join 6 | * @date 2021-05-26 7 | */ 8 | public class SessionWindowJoinExample001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/SlidingWindowJoinExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description Sliding Window Join 6 | * @date 2021-05-26 7 | */ 8 | public class SlidingWindowJoinExample001 { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/TumblingWindowJoinExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description Tumbling Window Join 6 | * @date 2021-05-26 7 | */ 8 | public class TumblingWindowJoinExample001 { 9 | 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/User.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-26 11 | */ 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Data 15 | public class User { 16 | public long createTime; 17 | public String userId; 18 | public String name; 19 | public String age; 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/UserBrowseLog.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-26 11 | */ 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Data 15 | public class UserBrowseLog { 16 | 17 | private String userID; 18 | 19 | private String eventTime; 20 | 21 | private String eventType; 22 | 23 | private String productID; 24 | 25 | private Integer productPrice; 26 | 27 | } 28 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/operator/join/UserClickLog.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.operator.join; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-26 11 | */ 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Data 15 | public class UserClickLog { 16 | 17 | private String userID; 18 | 19 | private String eventTime; 20 | 21 | private String eventType; 22 | 23 | private String pageID; 24 | 25 | } 26 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/partition/MyNoParallelSourceScala.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext 5 | 6 | /** 7 | * 创建自定义并行度为1的source 8 | * 实现从1开始产生递增数字 9 | */ 10 | class MyNoParallelSourceScala extends SourceFunction[Long]{ 11 | 12 | var count = 1L 13 | var isRunning = true 14 | 15 | override def run(ctx: SourceContext[Long]) = { 16 | while(isRunning){ 17 | ctx.collect(count) 18 | count+=1 19 | Thread.sleep(1000) 20 | } 21 | 22 | } 23 | 24 | override def cancel() = { 25 | isRunning = false 26 | } 27 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/partition/MyPartitionerScala.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition 2 | 3 | import org.apache.flink.api.common.functions.Partitioner 4 | 5 | class MyPartitionerScala extends Partitioner[Long]{ 6 | override def partition(key: Long, numPartitions: Int) = { 7 | println("分区总数:"+numPartitions) 8 | if(key % 2 ==0){ 9 | 0 10 | }else{ 11 | 1 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/partition/StreamingDemoMyPartitionerScala.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.partition 2 | 3 | import java.util 4 | 5 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 6 | 7 | object StreamingDemoMyPartitionerScala { 8 | 9 | def main(args: Array[String]): Unit = { 10 | 11 | val env = StreamExecutionEnvironment.getExecutionEnvironment 12 | env.setParallelism(2) 13 | 14 | //隐式转换 15 | import org.apache.flink.api.scala._ 16 | val text = env.addSource(new MyNoParallelSourceScala) 17 | 18 | //把long类型的数据转成tuple类型 19 | val tupleData = text.map(line=>{ 20 | Tuple1(line)// 注意tuple1的实现方式 21 | }) 22 | 23 | val partitionData = tupleData.partitionCustom(new MyPartitionerScala,0) 24 | val result = partitionData.map(line=>{ 25 | println("当前线程id:"+Thread.currentThread().getId+",value: "+line) 26 | line._1 27 | }) 28 | result.print().setParallelism(1) 29 | env.execute("StreamingDemoWithMyNoParallelSourceScala") 30 | 31 | } 32 | 33 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/processfunction/KeyedProcessFunctionExample.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.processfunction; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-26 7 | */ 8 | public class KeyedProcessFunctionExample { 9 | 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/processfunction/NOKeyedProcessFunctionExample.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.processfunction; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-07-26 7 | */ 8 | public class NOKeyedProcessFunctionExample { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/processfunction/ProcessFunctionExample.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.processfunction; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-07-26 9 | */ 10 | @Slf4j 11 | public class ProcessFunctionExample { 12 | 13 | public static void main(String[] args) { 14 | 15 | 16 | 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/sink/CacheCloudSink.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.sink; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-06-29 9 | */ 10 | @Slf4j 11 | public class CacheCloudSink { 12 | 13 | 14 | 15 | 16 | 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/source/SensorReading.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.source 2 | 3 | case class SensorReading( id: String, timestamp: Long, timepreture: Double) 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/source/ThresholdUpdate.scala: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.source 2 | 3 | case class ThresholdUpdate(id: String, threshold: Double, timepreture: Double) 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/table/func/DuplicatorFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.table.func; 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint; 4 | import org.apache.flink.table.annotation.FunctionHint; 5 | import org.apache.flink.table.functions.TableFunction; 6 | import org.apache.flink.types.Row; 7 | 8 | /** 9 | * @descr 通过注册指定返回值类型,flink 1.11 版本开始支持 10 | * @author 11 | */ 12 | @FunctionHint(output = @DataTypeHint("ROW< i INT, s STRING >")) 13 | public class DuplicatorFunction extends TableFunction { 14 | public void eval(Integer i, String s) { 15 | collect(Row.of(i, s)); 16 | collect(Row.of(i, s)); 17 | } 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/table/sql/TableSQLExample002.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.table.sql; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-10 7 | */ 8 | public class TableSQLExample002 { 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/time/example001/EventTimeExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.time.example001; 2 | 3 | import org.apache.flink.api.common.RuntimeExecutionMode; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-07-01 10 | */ 11 | public class EventTimeExample001 { 12 | 13 | public static void main(String[] args) { 14 | 15 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 16 | env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); 17 | 18 | 19 | 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/udf/WordToWordCountUDF.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.udf; 2 | 3 | import org.apache.flink.api.common.functions.RichMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 实现一个将单词转换为元组的UDF 9 | * @date 2021-07-26 10 | */ 11 | public class WordToWordCountUDF extends RichMapFunction> { 12 | 13 | @Override 14 | public Tuple2 map(String word) { 15 | Tuple2 wordCount = Tuple2.of(word, 1); 16 | return wordCount; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/unit/IncrementMapFunction.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.unit; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-07-26 9 | */ 10 | public class IncrementMapFunction implements MapFunction { 11 | 12 | @Override 13 | public Long map(Long record) throws Exception { 14 | return record + 1; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/java/com/luoj/task/learn/window/WindowApiFunctionExample001.java: -------------------------------------------------------------------------------- 1 | package com.luoj.task.learn.window; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-09 7 | */ 8 | public class WindowApiFunctionExample001 { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | jcacheLocator segClusterRes 2 | jcacheProxy jCacheTest 3 | jcacheResourceName jCacheIceGrid/Locator:tcp -h 172.17.8.17 -p 4061 4 | LocatorCacheTimeout 60 5 | InvocationTimeout 30000 6 | ConnectionCached 0 7 | 8 | ykc.profile=dev 9 | #ykc.profile=test 10 | #ykc.profile=prod 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.13-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/bean/RawData.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | case class RawData(oredrId:String, customerId:String, productId:String, productName:String, price:String, buyMoney:Double, buyCount:Int, buyTime:String) 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/bean/Student.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean; 2 | 3 | import lombok.Data; 4 | import java.util.Date; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-08-13 10 | */ 11 | @Data 12 | public class Student { 13 | 14 | private int id; 15 | 16 | private String name; 17 | 18 | private int age; 19 | 20 | private String address; 21 | 22 | private Date checkInTime; 23 | 24 | private long successTimeStamp; 25 | 26 | public Student() { 27 | } 28 | 29 | public Student(int id, String name, int age, String address) { 30 | this.id = id; 31 | this.name = name; 32 | this.age = age; 33 | this.address = address; 34 | } 35 | 36 | public static Student of(int id, String name, int age, String address, long timeStamp) { 37 | Student student = new Student(id, name, age, address); 38 | student.setSuccessTimeStamp(timeStamp); 39 | return student; 40 | } 41 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/MyDruidUtils.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.common; 2 | 3 | import com.alibaba.druid.pool.DruidDataSource; 4 | import java.sql.Connection; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 数据库连接池工具类 9 | * @date 2021-08-13 10 | */ 11 | public class MyDruidUtils { 12 | 13 | private static DruidDataSource dataSource; 14 | 15 | public static Connection getConnection() throws Exception { 16 | // 使用 Druid 管理链接 17 | dataSource = new DruidDataSource(); 18 | dataSource.setDriverClassName("com.mysql.jdbc.Driver"); 19 | dataSource.setUrl("jdbc:mysql://localhost:3306/test"); 20 | dataSource.setUsername("root"); 21 | dataSource.setPassword("12345678"); 22 | // 初始链接数、最大连接数、最小闲置数 23 | dataSource.setInitialSize(10); 24 | dataSource.setMaxActive(50); 25 | dataSource.setMinIdle(2); 26 | // 返回链接 27 | return dataSource.getConnection(); 28 | } 29 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/StringUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common 2 | 3 | object StringUtils { 4 | 5 | /** 6 | * MD5加密 7 | * 8 | * @param s 输入字符串 9 | * @return MD5字符串 10 | */ 11 | def encryptMd5_32(s: String): String = { 12 | val m = java.security.MessageDigest.getInstance("MD5") 13 | val b = s.getBytes("UTF-8") 14 | m.update(b, 0, b.length) 15 | val r = new java.math.BigInteger(1, m.digest()).toString(16) 16 | val sb = new StringBuffer() 17 | 18 | if (r.length == 32) { 19 | r 20 | } 21 | else { 22 | for (_ <- 0 until 32 - r.length) { 23 | sb.append("0") 24 | } 25 | sb.append(r) 26 | sb.toString 27 | } 28 | 29 | } 30 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/source/CustomAddressSource.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.source 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 自定义地理位置相关信息 6 | * @date 2021-07-01 7 | */ 8 | class CustomAddressSource { 9 | 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/source/CustomOrderSource.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.source 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 自定义订单相关信息 8 | * @date 2021-07-01 9 | */ 10 | 11 | case class CustomOrder() 12 | 13 | class CustomOrderSource extends SourceFunction[CustomOrder]{ 14 | 15 | //定义一个flag 用来标记数据源是否正常发出数据 16 | var running = true 17 | var currentTime:Long = System.currentTimeMillis() 18 | 19 | override def run(sc: SourceFunction.SourceContext[CustomOrder]): Unit = { 20 | 21 | 22 | 23 | } 24 | 25 | override def cancel(): Unit = { 26 | 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/source/CustomProductSource.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.source 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 自定义产品相关信息 6 | * @date 2021-07-01 7 | */ 8 | class CustomProductSource { 9 | 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/common/source/CustomUserSource.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.source 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 自定义用户相关信息 8 | * @date 2021-07-01 9 | */ 10 | case class CustomUser() 11 | 12 | class CustomUserSource extends SourceFunction[CustomUser]{ 13 | 14 | override def run(sc: SourceFunction.SourceContext[CustomUser]): Unit = { 15 | 16 | } 17 | 18 | override def cancel(): Unit = { 19 | 20 | } 21 | 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/runScalaApp.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | object runScalaApp { 4 | 5 | def main(args: Array[String]): Unit = { 6 | 7 | 8 | 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/datastream/TransformationsMap.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.datastream 2 | 3 | /** 4 | * @author lj.michale 5 | * @description Map 6 | * DataStream → DataStream 7 | * 读入一个元素,返回转换后的一个元素。一个把输入流转换中的数值翻倍的map function: 8 | * dataStream.map { x => x * 2 } 9 | * @date 2021-05-20 10 | */ 11 | object TransformationsMap { 12 | 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/datastream/WindowWordCount.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.datastream 2 | 3 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 4 | import org.apache.flink.streaming.api.windowing.time.Time 5 | 6 | object WindowWordCount { 7 | 8 | def main(args: Array[String]) { 9 | 10 | val env = StreamExecutionEnvironment.getExecutionEnvironment 11 | val text = env.socketTextStream("localhost", 9999) 12 | import org.apache.flink.api.scala._ 13 | val counts = text.flatMap { _.toLowerCase.split("\\W+") filter { _.nonEmpty } } 14 | .map { (_, 1) } 15 | .keyBy(0) 16 | .timeWindow(Time.seconds(5)) 17 | .sum(1) 18 | 19 | counts.print 20 | 21 | env.execute("Window Stream WordCount") 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/func/HashCodeFunction.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.func 2 | 3 | import org.apache.flink.table.api._ 4 | import org.apache.flink.table.functions.FunctionContext 5 | import org.apache.flink.table.functions.ScalarFunction 6 | 7 | class HashCodeFunction extends ScalarFunction { 8 | 9 | private var factor: Int = 0 10 | 11 | override def open(context: FunctionContext): Unit = { 12 | // 获取参数 "hashcode_factor" 13 | // 如果不存在,则使用默认值 "12" 14 | factor = context.getJobParameter("hashcode_factor", "12").toInt 15 | } 16 | 17 | def eval(s: String): Int = { 18 | s.hashCode * factor 19 | } 20 | } 21 | 22 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/func/OverloadedFunction.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.func 2 | 3 | import org.apache.flink.table.annotation.{DataTypeHint, FunctionHint} 4 | import org.apache.flink.table.functions.TableFunction 5 | import org.apache.flink.types.Row 6 | 7 | // 为函数类的所有求值方法指定同一个输出类型 8 | @FunctionHint(output = new DataTypeHint("ROW")) 9 | class OverloadedFunction extends TableFunction[Row] { 10 | 11 | def eval(a: Int, b: Int): Unit = { 12 | collect(Row.of("Sum", Int.box(a + b))) 13 | } 14 | 15 | // overloading of arguments is still possible 16 | def eval(): Unit = { 17 | collect(Row.of("Empty args", Int.box(-1))) 18 | } 19 | 20 | } 21 | 22 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/func/OverloadedFunction2.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.func 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint 4 | import org.apache.flink.table.annotation.FunctionHint 5 | import org.apache.flink.table.functions.TableFunction 6 | import org.apache.flink.types.Row 7 | import scala.annotation.varargs 8 | 9 | class OverloadedFunction2 extends TableFunction[AnyRef] { 10 | 11 | // an implementer just needs to make sure that a method exists 12 | // that can be called by the JVM 13 | @varargs 14 | def eval(o: AnyRef*) = { 15 | if (o.length == 0) { 16 | collect(Boolean.box(false)) 17 | } 18 | collect(o(0)) 19 | } 20 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/func/SplitFunction.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.func 2 | 3 | import org.apache.flink.table.annotation.DataTypeHint 4 | import org.apache.flink.table.annotation.FunctionHint 5 | import org.apache.flink.table.functions.TableFunction 6 | import org.apache.flink.types.Row 7 | 8 | @FunctionHint(output = new DataTypeHint("ROW")) 9 | class SplitFunction extends TableFunction[Row] { 10 | def eval(str: String): Unit = { 11 | // use collect(...) to emit a row 12 | str.split(" ").foreach(s => collect(Row.of(s, Int.box(s.length)))) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/func/WeightedAvgAccum.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.func 2 | 3 | import java.lang.{Integer => JInteger, Long => JLong} 4 | 5 | import org.apache.flink.api.java.tuple.{Tuple, Tuple1 => JTuple1} 6 | import org.apache.flink.api.java.typeutils.TupleTypeInfo 7 | import org.apache.flink.table.api.Types 8 | import org.apache.flink.table.functions.AggregateFunction 9 | 10 | /** 11 | * Accumulator for WeightedAvg. 12 | */ 13 | class WeightedAvgAccum { 14 | var sum: JLong = 0L 15 | var count: JInteger = 0 16 | } 17 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/tableapi/Top2Accum.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.tableapi 2 | 3 | import java.lang.{Integer => JInteger} 4 | import org.apache.flink.table.api.Types 5 | import org.apache.flink.table.functions.TableAggregateFunction 6 | 7 | /** 8 | * Top2 Accumulator。 9 | */ 10 | class Top2Accum { 11 | var first: JInteger = _ 12 | var second: JInteger = _ 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/window/func/CountStudentAgg.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.window.func; 2 | 3 | 4 | import com.bigdata.bean.Student; 5 | import org.apache.flink.api.common.functions.AggregateFunction; 6 | 7 | 8 | /** 9 | * @author lj.michale 10 | * @description COUNT 统计的聚合函数实现,将结果累加,每遇到一条记录进行加一 11 | * @date 2021-08-13 12 | */ 13 | public class CountStudentAgg implements AggregateFunction { 14 | 15 | @Override 16 | public Long createAccumulator() { 17 | return 0L; 18 | } 19 | 20 | @Override 21 | public Long add(Student value, Long accumulator) { 22 | return accumulator + 1; 23 | } 24 | 25 | @Override 26 | public Long getResult(Long accumulator) { 27 | return accumulator; 28 | } 29 | 30 | @Override 31 | public Long merge(Long a, Long b) { 32 | return a + b; 33 | } 34 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/main/scala/com/bigdata/task/learn/window/func/WindowStudentResultFunction.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.task.learn.window.func; 2 | 3 | 4 | import com.luoj.bean.StudentViewCount; 5 | import org.apache.flink.api.java.tuple.Tuple; 6 | import org.apache.flink.api.java.tuple.Tuple1; 7 | import org.apache.flink.streaming.api.functions.windowing.WindowFunction; 8 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow; 9 | import org.apache.flink.util.Collector; 10 | 11 | /** 12 | * @author lj.michale 13 | * @description 用于输出统计学生的结果 14 | * @date 2021-08-13 15 | */ 16 | public class WindowStudentResultFunction implements WindowFunction { 17 | 18 | @Override 19 | public void apply(Tuple tuple, TimeWindow window, Iterable input, Collector out) throws Exception { 20 | int id = ((Tuple1) tuple).f0; 21 | long count = input.iterator().next(); 22 | out.collect(StudentViewCount.of(id, window.getEnd(), count)); 23 | } 24 | 25 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.13-learning/src/test/java/com/luoj/IncrementMapFunctionTest.java: -------------------------------------------------------------------------------- 1 | package com.luoj; 2 | 3 | import com.luoj.task.learn.unit.IncrementMapFunction; 4 | import org.junit.Test; 5 | 6 | /** 7 | * @author lj.michale 8 | * @description 9 | * @date 2021-07-26 10 | */ 11 | public class IncrementMapFunctionTest { 12 | 13 | @Test 14 | public void testIncrement() throws Exception { 15 | IncrementMapFunction incrementer = new IncrementMapFunction(); 16 | // assertEquals(3L, incrementer.map(2L)); 17 | } 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/README.md: -------------------------------------------------------------------------------- 1 |

Hi This is LJ.Michale

2 |

A bigdata developer from China

3 |

ajauntor

4 | 5 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/bean/LngLat.java: -------------------------------------------------------------------------------- 1 | package com.aurora.bean; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-08-22 9 | */ 10 | @Data 11 | public class LngLat { 12 | 13 | public double latitude; 14 | 15 | 16 | public double longitude; 17 | 18 | } 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/bean/Message.java: -------------------------------------------------------------------------------- 1 | //package com.aurora.bean; 2 | // 3 | //import groovy.lang.GString; 4 | //import lombok.Data; 5 | // 6 | ///** 7 | // * @author lj.michale 8 | // * @description 9 | // * @date 2021-08-22 10 | // */ 11 | //@Data 12 | //public class Message { 13 | // 14 | // public String id; 15 | // 16 | // public String ts; 17 | // 18 | // public GString vals; 19 | // 20 | // public String p; 21 | // 22 | //} 23 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/cdc/FlinkCDC001.java: -------------------------------------------------------------------------------- 1 | package com.aurora.cdc; 2 | 3 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 4 | /** 5 | * @author lj.michale 6 | * @date 2022-01-19 7 | */ 8 | public class FlinkCDC001 { 9 | 10 | public static void main(String[] args) { 11 | 12 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 13 | env.setParallelism(1); 14 | 15 | 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/cdc/Product.java: -------------------------------------------------------------------------------- 1 | package com.aurora.cdc; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import lombok.Data; 5 | import lombok.extern.slf4j.Slf4j; 6 | 7 | /** 8 | * @desc : 9 | */ 10 | @Slf4j 11 | @Data 12 | public class Product { 13 | private String name; 14 | private String description; 15 | 16 | public static Product of(String json) { 17 | return JSON.parseObject(json, Product.class); 18 | } 19 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/common/Constants.java: -------------------------------------------------------------------------------- 1 | package com.aurora.common; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-22 7 | */ 8 | public class Constants { 9 | 10 | private final static String ZOOKEEPER_QUORUM = ""; 11 | 12 | private final static String ZOOKEEPER_PORT = ""; 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/common/LoadResourcesUtils.java: -------------------------------------------------------------------------------- 1 | package com.aurora.common; 2 | 3 | import java.util.Properties; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-08-22 9 | */ 10 | public class LoadResourcesUtils { 11 | public static Properties getProperties(String s) { 12 | Properties properties = new Properties(); 13 | return properties; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/java/com/aurora/sql/ReadHive.java: -------------------------------------------------------------------------------- 1 | package com.aurora.sql; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-08-12 7 | */ 8 | 9 | public class ReadHive { 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.14-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/scala/com/bigdata/bean/Log.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | case class Log(sid:String,var callOut:String, var callIn:String, callType:String, callTime:Long, duration:Long) -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/scala/com/bigdata/bean/OrderObj.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | case class OrderObj(database:String,table:String,`type`:String,data:String) extends Serializable 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/scala/com/bigdata/bean/RawData.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | case class RawData(oredrId:String, customerId:String, productId:String, productName:String, price:String, buyMoney:Double, buyCount:Int, buyTime:String) 4 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.14-learning/src/main/scala/com/bigdata/runScalaApp.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | object runScalaApp { 4 | 5 | def main(args: Array[String]): Unit = { 6 | 7 | 8 | 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/README.md: -------------------------------------------------------------------------------- 1 |

Hi This is LJ.Michale

2 |

A bigdata developer from China

3 |

ajauntor

4 | 5 | 6 | - flink1.14.4中文文档 7 | https://nightlies.apache.org/flink/flink-docs-release-1.14/zh/docs/dev/table/sql/overview/ 8 | https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/dev/table/tableapi/ 9 | https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/dev/table/overview/ -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/data/SensorReading.txt: -------------------------------------------------------------------------------- 1 | sensor_1,1547718199,55.9 2 | sensor_6,1547718201,15.4 3 | sensor_7,1547718202,6.7 4 | sensor_10,1547718205,38.1 5 | sensor_1,1547718199,32.2 6 | sensor_1,1547718199,38.0 7 | sensor_10,1547718205,32.2 8 | sensor_10,1547718205,35.4 9 | sensor_7,1547718202,31 -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/data/output/output1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.15-learning/data/output/output1.txt -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/docs/ddl.sql: -------------------------------------------------------------------------------- 1 | 2 | create database flink; 3 | 4 | use flink; 5 | 6 | CREATE TABLE `t_student` ( 7 | `id` int(11) NOT NULL AUTO_INCREMENT, 8 | `name` varchar(255) DEFAULT NULL, 9 | `age` int(11) DEFAULT NULL, 10 | PRIMARY KEY (`id`) 11 | ) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8; 12 | 13 | INSERT INTO `t_student` VALUES ('1', 'jack', '18'); 14 | INSERT INTO `t_student` VALUES ('2', 'tom', '19'); 15 | INSERT INTO `t_student` VALUES ('3', 'rose', '20'); 16 | INSERT INTO `t_student` VALUES ('4', 'tom', '19'); 17 | INSERT INTO `t_student` VALUES ('5', 'jack', '18'); 18 | INSERT INTO `t_student` VALUES ('6', 'rose', '20'); 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/bean/Jason.java: -------------------------------------------------------------------------------- 1 | package com.aurora.bean; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | /** 7 | * @descri 8 | * 9 | * @author lj.michale 10 | * @date 2022-04-02 11 | */ 12 | @Getter 13 | @Setter 14 | public class Jason { 15 | 16 | public String topic; 17 | 18 | public long timestamp; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/bean/Order.java: -------------------------------------------------------------------------------- 1 | package com.aurora.bean; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2022-04-01 11 | */ 12 | @Data 13 | @AllArgsConstructor 14 | @NoArgsConstructor 15 | public class Order { 16 | 17 | private String id; 18 | private Integer userId; 19 | private Integer money; 20 | private Long createTime; 21 | 22 | } 23 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/bean/Student.java: -------------------------------------------------------------------------------- 1 | package com.aurora.bean; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2022-04-01 11 | */ 12 | @Data 13 | @AllArgsConstructor 14 | @NoArgsConstructor 15 | public class Student { 16 | 17 | private Integer id; 18 | private String name; 19 | private Integer age; 20 | 21 | } 22 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/example/example001/MyDeSerializer.java: -------------------------------------------------------------------------------- 1 | package com.aurora.example.example001; 2 | 3 | import com.aurora.bean.Jason; 4 | import org.apache.flink.api.common.serialization.DeserializationSchema; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema; 7 | import org.apache.flink.util.Collector; 8 | import org.apache.kafka.clients.consumer.ConsumerRecord; 9 | 10 | /** 11 | * @author lj.michale 12 | * @description 13 | * @date 2022-04-02 14 | */ 15 | public class MyDeSerializer implements KafkaDeserializationSchema { 16 | 17 | @Override 18 | public boolean isEndOfStream(Jason jason) { 19 | return false; 20 | } 21 | 22 | @Override 23 | public Jason deserialize(ConsumerRecord consumerRecord) throws Exception { 24 | return null; 25 | } 26 | 27 | @Override 28 | public TypeInformation getProducedType() { 29 | return null; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/example/example002/CustomWatermarkStrategy.java: -------------------------------------------------------------------------------- 1 | package com.aurora.example.example002; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2022-04-02 7 | */ 8 | public class CustomWatermarkStrategy { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/accumulator/PreciseAccumulator.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.accumulator; 2 | /** 3 | * @descri 4 | * 5 | * @author lj.michale 6 | * @date 2022-04-30 7 | */ 8 | public class PreciseAccumulator{ 9 | 10 | private Roaring64NavigableMap bitmap; 11 | 12 | public PreciseAccumulator(){ 13 | bitmap=new Roaring64NavigableMap(); 14 | } 15 | 16 | public void add(long id){ 17 | bitmap.addLong(id); 18 | } 19 | 20 | public long getCardinality(){ 21 | return bitmap.getLongCardinality(); 22 | } 23 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/datastream/BatchFlinkTask.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.datastream; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | import org.apache.flink.api.common.RuntimeExecutionMode; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | 7 | /** 8 | * @descri 基于DataStream的批处理 9 | * 10 | * @author lj.michale 11 | * @date 2022-03-31 12 | */ 13 | @Slf4j 14 | public class BatchFlinkTask { 15 | 16 | public static void main(String[] args) throws Exception { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | env.setRuntimeMode(RuntimeExecutionMode.BATCH); 20 | 21 | 22 | env.execute("BatchFlinkTask"); 23 | 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/func/function/WordCountReduceFunction.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.func.function; 2 | 3 | 4 | import org.apache.flink.api.common.functions.ReduceFunction; 5 | import org.apache.flink.api.java.tuple.Tuple2; 6 | 7 | /** 8 | * @descri 实现ReducingState中单词聚合计算 9 | * 10 | * @author lj.michale 11 | * @date 2022-03-31 12 | */ 13 | public class WordCountReduceFunction implements ReduceFunction> { 14 | @Override 15 | public Tuple2 reduce(Tuple2 value1, Tuple2 value2) { 16 | return Tuple2.of(value1.f0, value1.f1 + value2.f1); 17 | } 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/func/process/TimerProcessFunction.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.func.process; 2 | 3 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 4 | import org.apache.flink.util.Collector; 5 | 6 | /** 7 | * @descri 定时处理算子 8 | * 9 | * @author lj.michale 10 | * @date 2022-03-31 11 | */ 12 | public class TimerProcessFunction extends KeyedProcessFunction { 13 | 14 | @Override 15 | public void processElement(String s, Context context, Collector collector) throws Exception { 16 | context.timerService().registerProcessingTimeTimer(50); 17 | String out = "hello " + s; 18 | collector.collect(out); 19 | } 20 | 21 | @Override 22 | public void onTimer(long timestamp, OnTimerContext ctx, Collector out) throws Exception { 23 | // 到达时间点触发事件操作 24 | out.collect(String.format("Timer triggered at timestamp %d", timestamp)); 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/func/stateless/MyStatelessFlatMap.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.func.stateless; 2 | 3 | import org.apache.flink.api.common.functions.FlatMapFunction; 4 | import org.apache.flink.util.Collector; 5 | /** 6 | * @descri 无状态算子 7 | * 8 | * @author lj.michale 9 | * @date 2022-03-31 10 | */ 11 | public class MyStatelessFlatMap implements FlatMapFunction { 12 | @Override 13 | public void flatMap(String in, Collector collector) throws Exception { 14 | String out = "hello " + in; 15 | collector.collect(out); 16 | } 17 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/func/stateless/MyStatelessMap.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.func.stateless; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | 5 | /** 6 | * @descri 无状态算子 7 | * 8 | * @author lj.michale 9 | * @date 2022-03-31 10 | */ 11 | public class MyStatelessMap implements MapFunction { 12 | @Override 13 | public String map(String in) throws Exception { 14 | String out = "hello " + in; 15 | return out; 16 | } 17 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/func/udf/WordToWordCountUDF.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.func.udf; 2 | 3 | import org.apache.flink.api.common.functions.RichMapFunction; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | 6 | /** 7 | * @descri 将单词转换为元组的UDF 8 | * 9 | * @author lj.michale 10 | * @date 2022-03-31 11 | */ 12 | public class WordToWordCountUDF extends RichMapFunction> { 13 | 14 | @Override 15 | public Tuple2 map(String word) { 16 | Tuple2 wordCount = Tuple2.of(word, 1); 17 | 18 | return wordCount; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/state/Jason.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.state; 2 | 3 | import lombok.Data; 4 | 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2022-04-10 9 | */ 10 | @Data 11 | public class Jason { 12 | 13 | private String name; 14 | 15 | private int age; 16 | 17 | } 18 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/java/com/aurora/feature/state/UserDefinedSource.java: -------------------------------------------------------------------------------- 1 | package com.aurora.feature.state; 2 | 3 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 4 | 5 | import java.math.BigDecimal; 6 | import java.util.Random; 7 | 8 | /** 9 | * @descri 10 | * 11 | * @author lj.michale 12 | * @date 2022-04-10 13 | */ 14 | public class UserDefinedSource implements SourceFunction { 15 | 16 | @Override 17 | public void run(SourceContext sourceContext) throws Exception { 18 | for (int i = 1; i <= 5000; ++i) { 19 | Jason jason = new Jason(); 20 | jason.setName("li.michale"); 21 | jason.setAge(100); 22 | sourceContext.collect(jason); 23 | } 24 | } 25 | 26 | @Override 27 | public void cancel() { 28 | 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/scala/com/bigdata/bean/DataSchema.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | object DataSchema { 4 | 5 | // 定义样例类,传感器id,时间戳,温度 6 | case class SensorReading(id: String, timestamp: Long, temperature: Double) 7 | 8 | case class MarketingUserBehavior(id:String, behavior:String, channel:String, timestamp: Long) 9 | 10 | case class MarketingViewCount(startTs:String, endTs:String, channel:String, behavior:String, count:Int) 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/scala/com/bigdata/feature/sql/batch/TableSQLJdbcAnalysisPipeline.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.feature.sql.batch 2 | 3 | import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment} 4 | 5 | object TableSQLJdbcAnalysisPipeline { 6 | 7 | def main(args: Array[String]): Unit = { 8 | 9 | // 定义Table环境 10 | val settings = EnvironmentSettings 11 | .newInstance() 12 | .inBatchMode() 13 | .build() 14 | val tEnv = TableEnvironment.create(settings) 15 | 16 | 17 | 18 | 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/main/scala/com/bigdata/feature/table/FlinkTableApiExample4.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.feature.table 2 | 3 | import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment 4 | import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment} 5 | 6 | /** 7 | * @descri Table-Api操作 8 | * 9 | * @author lj.michale 10 | * @date 2022-04-28 11 | */ 12 | object FlinkTableApiExample4 { 13 | 14 | def main(args: Array[String]): Unit = { 15 | 16 | // setup the environment 17 | val settings:EnvironmentSettings = EnvironmentSettings.newInstance.inStreamingMode.build 18 | val tEnv:TableEnvironment = TableEnvironment.create(settings) 19 | 20 | 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/test/java/com/aurora/FakeTrafficRecordSourceUnitTest.java: -------------------------------------------------------------------------------- 1 | package com.aurora; 2 | 3 | import com.aurora.generate.FakeTrafficRecordSource; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | 6 | /** 7 | * @descri FakeTrafficRecordSourceUnitTest 8 | * 9 | * @author lj.michale 10 | * @date 2022-03-31 11 | */ 12 | public class FakeTrafficRecordSourceUnitTest { 13 | 14 | public static void main(String[] args) throws Exception { 15 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 16 | FakeTrafficRecordSource fakeTrafficRecordSource = new FakeTrafficRecordSource(); 17 | 18 | env.addSource(fakeTrafficRecordSource).print(); 19 | 20 | env.execute(); 21 | } 22 | 23 | 24 | } 25 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/test/java/com/aurora/MyStatelessMapUnitTest.java: -------------------------------------------------------------------------------- 1 | package com.aurora; 2 | 3 | import com.aurora.feature.func.stateless.MyStatelessMap; 4 | import lombok.extern.slf4j.Slf4j; 5 | import junit.framework.TestCase; 6 | import org.junit.Assert; 7 | import org.junit.Test; 8 | 9 | /** 10 | * @descri flink 单元测试 11 | * 12 | * @author lj.michale 13 | * @date 2022-03-31 14 | */ 15 | @Slf4j 16 | public class MyStatelessMapUnitTest extends TestCase { 17 | 18 | @Test 19 | public void testMap() throws Exception { 20 | MyStatelessMap statelessMap = new MyStatelessMap(); 21 | String out = statelessMap.map("world"); 22 | System.out.println(out); 23 | Assert.assertEquals("hello world", out); 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/test/java/com/aurora/StatefulFlatMapTest.java: -------------------------------------------------------------------------------- 1 | package com.aurora; 2 | 3 | import org.junit.Test; 4 | import com.aurora.feature.func.stateless.MyStatelessFlatMap; 5 | /** 6 | * @descri 7 | * 8 | * @author lj.michale 9 | * @date 2022-03-31 10 | */ 11 | public class StatefulFlatMapTest { 12 | 13 | @Test 14 | public void testFlatMap() throws Exception{ 15 | MyStatelessFlatMap statefulFlatMap = new MyStatelessFlatMap(); 16 | //模拟上下文生成 17 | } 18 | } -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/test/java/com/aurora/func/SentenceToWordsUDFTest.java: -------------------------------------------------------------------------------- 1 | package com.aurora.func; 2 | 3 | import com.aurora.feature.func.udf.SentenceToWordsUDF; 4 | import org.apache.flink.util.Collector; 5 | import org.junit.Test; 6 | import org.mockito.Mockito; 7 | 8 | /** 9 | * @descri 10 | * 11 | * @author lj.michale 12 | * @date 2022-04-01 13 | */ 14 | public class SentenceToWordsUDFTest { 15 | 16 | @Test 17 | public void flatMap() { 18 | SentenceToWordsUDF sentenceToWordsUDF = new SentenceToWordsUDF(); 19 | 20 | // 使用Mockito mock一个Collector对象 21 | Collector collector = Mockito.mock(Collector.class); 22 | sentenceToWordsUDF.flatMap("hadoop spark hello", collector); 23 | 24 | // 因为此处flatMap UDF会输出3次,所以我们用Mockito verify三次,验证每次收集到的单词 25 | Mockito.verify(collector, Mockito.times(1)).collect("hadoop"); 26 | Mockito.verify(collector, Mockito.times(1)).collect("spark"); 27 | Mockito.verify(collector, Mockito.times(1)).collect("hello"); 28 | 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.15-learning/src/test/java/com/aurora/window/TriggerResult.java: -------------------------------------------------------------------------------- 1 | package com.aurora.window; 2 | 3 | //public enum TriggerResult { 4 | // 5 | // /** 没有对窗口采取任何操作。*/ 6 | // CONTINUE(false, false), 7 | // 8 | // /** {@code FIRE_AND_PURGE} 计算window函数并发出window结果。*/ 9 | // FIRE_AND_PURGE(true, true), 10 | // 11 | // /** 12 | // * On {@code FIRE}, 窗口被评估并发出结果。窗户没有清洗, 13 | // *但是,所有的元素都被保留了。 14 | // */ 15 | // FIRE(true, false), 16 | // 17 | // /** 18 | // * 属性的值将被清除并丢弃窗口中的所有元素 19 | // * 窗口函数或发出任何元素。 20 | // */ 21 | // PURGE(false, true); 22 | //} -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/README.md: -------------------------------------------------------------------------------- 1 |

Hi This is LJ.Michale

2 |

A bigdata developer from China

3 |

ajauntor

4 | 5 | 6 | - flink1.15.0文档 7 | * https://nightlies.apache.org/flink/flink-docs-release-1.15/docs/dev/table/common/ 8 | * https://cwiki.apache.org/confluence/display/FLINK/1.15+Release -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/docs/ddl.sql: -------------------------------------------------------------------------------- 1 | 2 | create database flink; 3 | 4 | use flink; 5 | 6 | CREATE TABLE `t_student` ( 7 | `id` int(11) NOT NULL AUTO_INCREMENT, 8 | `name` varchar(255) DEFAULT NULL, 9 | `age` int(11) DEFAULT NULL, 10 | PRIMARY KEY (`id`) 11 | ) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8; 12 | 13 | INSERT INTO `t_student` VALUES ('1', 'jack', '18'); 14 | INSERT INTO `t_student` VALUES ('2', 'tom', '19'); 15 | INSERT INTO `t_student` VALUES ('3', 'rose', '20'); 16 | INSERT INTO `t_student` VALUES ('4', 'tom', '19'); 17 | INSERT INTO `t_student` VALUES ('5', 'jack', '18'); 18 | INSERT INTO `t_student` VALUES ('6', 'rose', '20'); 19 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/logs/strutslog1.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.16-learning/logs/strutslog1.log -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/src/main/java/com/aurora/runTuringPipeline.java: -------------------------------------------------------------------------------- 1 | package com.aurora; 2 | /** 3 | * @descri 4 | * 5 | * @author lj.michale 6 | * @date 2022-05-18 7 | */ 8 | public class runTuringPipeline { 9 | 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/src/main/resources/common-prod.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.16-learning/src/main/resources/common-prod.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/src/main/resources/common-test.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/flink1.16-learning/src/main/resources/common-test.properties -------------------------------------------------------------------------------- /常见大数据项目/flink1.16-learning/src/main/scala/com/bigdata/bean/DataSchema.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.bean 2 | 3 | object DataSchema { 4 | 5 | // 定义样例类,传感器id,时间戳,温度 6 | case class SensorReading(id: String, timestamp: Long, temperature: Double) 7 | 8 | case class MarketingUserBehavior(id:String, behavior:String, channel:String, timestamp: Long) 9 | 10 | case class MarketingViewCount(startTs:String, endTs:String, channel:String, behavior:String, count:Int) 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | | Spark专题名称 | 资源EURL 6 | | ----- | --------- 7 | | Spark Streaming反压机制 | https://app.yinxiang.com/fx/0a77fcb1-1598-4603-be5c-207247d5597c 8 | 9 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/data/employees.json: -------------------------------------------------------------------------------- 1 | {"name":"Michael", "salary":3000} 2 | {"name":"Andy", "salary":4500} 3 | {"name":"Justin", "salary":3500} 4 | {"name":"Berta", "salary":4000} 5 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/data/people.json: -------------------------------------------------------------------------------- 1 | {"name":"aa bb cc dd","age":12} 2 | {"name":"ee ff","age":30} 3 | {"name":"gg","age":19} 4 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/data/user.txt: -------------------------------------------------------------------------------- 1 | 2 1 2 | 2 4 3 | 4 1 4 | 5 2 5 | 5 3 6 | 5 6 7 | 3 6 8 | 3 2 9 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/doc/avro_data.json: -------------------------------------------------------------------------------- 1 | { 2 | "namespace":"example.avro", 3 | "type":"record", 4 | "name":"User", 5 | "fields":[ 6 | { 7 | "name":"name", 8 | "type":"string" 9 | }, 10 | { 11 | "name":"favorite_number", 12 | "type":[ 13 | "int", 14 | "null" 15 | ] 16 | }, 17 | { 18 | "name":"favorite_color", 19 | "type":[ 20 | "string", 21 | "null" 22 | ] 23 | } 24 | ] 25 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/doc/binlog.json: -------------------------------------------------------------------------------- 1 | {"data":[{"p_num":"1","policy_status":"1","mor_rate":"0.8800","load_time":"2019-03-17 12:54:57"}],"database":"canal_test","es":1552698141000,"id":10,"isDdl":false,"mysqlType":{"p_num":"varchar(22)","policy_status":"varchar(2)","mor_rate":"decimal(20,4)","load_time":"datetime"},"old":[{"mor_rate":"0.5500"}],"sql":"","sqlType":{"p_num":12,"policy_status":12,"mor_rate":3,"load_time":93},"table":"policy_cred","ts":1552698141621,"type":"UPDATE"} -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/doc/data.json: -------------------------------------------------------------------------------- 1 | {"deviceId":"4d6021db-7483-4911-8025-87494776ba87","deviceName":"风机温度","deviceValue":76.3,"deviceTime":1553140083} 2 | 3 | [ 4 | { 5 | "deviceId":"4d6021db-7483-4911-8025-87494776ba87", 6 | "deviceName":"风机温度", 7 | "deviceValue":76.3, 8 | "deviceTime":1553140083 9 | }, 10 | { 11 | "deviceId":"89cf0815-9a1e-4dd5-a2d9-ff16c2308ddf", 12 | "deviceName":"风机转速", 13 | "deviceValue":600, 14 | "deviceTime":1553140021 15 | } 16 | ] -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/spark-learning/src/main/resources/application.properties -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/spark-learning/src/main/resources/log4j.properties -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/bean/ParamsList.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.bean 2 | 3 | /** 4 | * 结果表对应的参数实体对象 5 | */ 6 | class ParamsList extends Serializable{ 7 | var p_num: String = _ 8 | var risk_rank: String = _ 9 | var mor_rate: Double = _ 10 | var ch_mor_rate: Double = _ 11 | var load_time:java.util.Date = _ 12 | var params_Type : String = _ 13 | override def toString = s"ParamsList($p_num, $risk_rank, $mor_rate, $ch_mor_rate, $load_time)" 14 | } 15 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/common/C3p0Utils.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.common 2 | 3 | import java.util.Properties 4 | 5 | import com.mchange.v2.c3p0.ComboPooledDataSource 6 | 7 | /** 8 | * @author : 9 | * @date : 10 | */ 11 | object C3p0Utils { 12 | def getDataSource(dbOptions: Map[String, String]): ComboPooledDataSource 13 | = { 14 | val properties = new Properties() 15 | dbOptions.foreach(x => properties.setProperty(x._1, x._2)) 16 | val dataSource = new ComboPooledDataSource() 17 | dataSource.setDriverClass(dbOptions("driverClass")) 18 | dataSource.setJdbcUrl(dbOptions("jdbcUrl")) 19 | dataSource.setProperties(properties) 20 | dataSource 21 | } 22 | 23 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/common/MySQLSink.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.common 2 | 3 | import org.apache.spark.internal.Logging 4 | import org.apache.spark.sql.execution.streaming.Sink 5 | import org.apache.spark.sql.streaming.OutputMode 6 | import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} 7 | 8 | /** 9 | * @author : 10 | * @date : 11 | */ 12 | class MySQLSink(sqlContext: SQLContext,parameters: Map[String, String], outputMode: OutputMode) extends Sink with Logging { 13 | override def addBatch(batchId: Long, data: DataFrame): Unit = { 14 | val query = data.queryExecution 15 | val rdd = query.toRdd 16 | // val df = sqlContext.internalCreateDataFrame(rdd, data.schema) 17 | // df.show(false) 18 | // df.write.format("jdbc").options(parameters).mode(SaveMode.Append).save() 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/common/SparkSchemaUtil.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.common 2 | 3 | object SparkSchemaUtil { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/accumulator/AccumulatorExample.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.accumulator 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object AccumulatorExample { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val spark:SparkSession = SparkSession 11 | .builder 12 | .appName("BasicOperatorExample") 13 | .getOrCreate() 14 | val conf = new SparkConf 15 | val sc = new SparkContext(conf) 16 | 17 | // 累加器:LongAccumulator、DoubleAccumulator、CollectionAccumulator、自定义累加器 18 | val fieldAcc = new FieldAccumulator 19 | sc.register(fieldAcc, "fieldAcc") 20 | 21 | val tableRDD = sc.textFile("table.csv").filter(_.split(",")(0) != "A") 22 | tableRDD.map(x => { 23 | val fields = x.split(",") 24 | val a = fields(1).toInt 25 | val b = fields(2).toLong 26 | fieldAcc.add(SumAandB(a, b)) 27 | x 28 | }).count() 29 | 30 | spark.close() 31 | 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/accumulator/Spark2RDDAccumulator.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.accumulator 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | 5 | object Spark2RDDAccumulator { 6 | 7 | def main(args: Array[String]): Unit = { 8 | val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount") 9 | val sc = new SparkContext(sparkConf) 10 | 11 | val rdd = sc.makeRDD(List(1,2,3,4)) 12 | // 系统自带的分布式累加器 13 | var sum = sc.longAccumulator("sum"); 14 | rdd.foreach( 15 | num => sum.add(num) 16 | 17 | ) 18 | // 期望结果是10 19 | println("sum = " + sum.value) 20 | sc.stop() 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/accumulator/Spark3RDDAccumulator.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.accumulator 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | object Spark3RDDAccumulator { 7 | 8 | def main(args: Array[String]): Unit = { 9 | val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount") 10 | val sc = new SparkContext(sparkConf) 11 | 12 | val rdd = sc.makeRDD(List(1,2,3,4)) 13 | // 系统自带的分布式累加器 分布式只写共享变量 14 | // 少加:转换算子中调用累加器,如果没有行动算子的话,那么不会执行 15 | // 多加: 16 | // 一般情况下,累加器会放在行动算子中进行操作 17 | var sum = sc.longAccumulator("sum"); 18 | val mapRDD: RDD[Unit] = rdd.map( 19 | num => sum.add(num) 20 | ) 21 | 22 | mapRDD.collect() 23 | mapRDD.collect() 24 | 25 | // 期望结果是10 26 | println("sum = " + sum.value) 27 | sc.stop() 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/accumulator/SparkRDDAccumulator.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.accumulator 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | 5 | object SparkRDDAccumulator { 6 | 7 | def main(args: Array[String]): Unit = { 8 | 9 | val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount") 10 | val sc = new SparkContext(sparkConf) 11 | val rdd = sc.makeRDD(List(1,2,3,4)) 12 | var sum = 0; 13 | rdd.foreach( 14 | num => {sum += num} 15 | ) 16 | 17 | // 期望结果是10, 实际确是0。原因executor分布式计算,没有将sum结果传递给driver端的sum 18 | println(sum) 19 | sc.stop() 20 | 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/accumulator/SumAandB.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.accumulator 2 | 3 | // 保存累加器结果的类 4 | case class SumAandB(A:Long, B:Long) 5 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/avro/ReadAvro.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.avro 2 | 3 | import com.alibaba.fastjson.JSON 4 | import org.apache.avro.generic.GenericRecord 5 | import org.apache.avro.mapred.{AvroInputFormat, AvroWrapper} 6 | import org.apache.hadoop.io.NullWritable 7 | import org.apache.spark.{SparkConf, SparkContext} 8 | 9 | 10 | object ReadAvro { 11 | 12 | def main(args: Array[String]) { 13 | if (args.length < 1) { 14 | println("Usage:SparkWordCount FileName") 15 | System.exit(1) 16 | } 17 | val conf = new SparkConf().setAppName("log_deal").setMaster("local") 18 | val sc = new SparkContext(conf) 19 | val avroRDD = sc.hadoopFile[AvroWrapper[GenericRecord], NullWritable, AvroInputFormat[GenericRecord]](args(0)) 20 | avroRDD.map{l=> 21 | val line = l._1.toString 22 | val json= JSON.parseObject(line) 23 | val shape=json.get("shape") 24 | val count=json.get("count") 25 | (shape,count)}.foreach(println) 26 | sc.stop() 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/core/SparkWordCount2.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.core 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object SparkWordCount2 { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val sc = SparkSession.builder().master("local").appName("WordCount").getOrCreate() 11 | 12 | val lines: RDD[String] = sc.read.textFile("datas").cache().rdd 13 | val words: RDD[String] = lines.flatMap(_.split(" ")) 14 | val wordToOne: RDD[(String, Int)] = words.map( 15 | word => (word, 1) 16 | ) 17 | 18 | val groupRdd: RDD[(String, Iterable[(String, Int)])] = wordToOne.groupBy(t => t._1) 19 | 20 | val wordCount: RDD[(String, Int)] = groupRdd.map { 21 | case (word, list) => { 22 | list.reduce((t1, t2) => { 23 | (t1._1, t1._2 + t2._2) 24 | }) 25 | } 26 | } 27 | 28 | // 5、将转换结果采集到控制台打印出来 29 | wordCount.collect().foreach(println) 30 | 31 | sc.stop() 32 | 33 | } 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/core/SparkWordCount3.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.core 2 | 3 | import org.apache.spark.rdd.RDD 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object SparkWordCount3 { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val sc = SparkSession.builder().master("local").appName("WordCount").getOrCreate() 11 | val lines: RDD[String] = sc.read.textFile("datas").cache().rdd 12 | val words: RDD[String] = lines.flatMap(_.split(" ")) 13 | 14 | val wordToOne: RDD[(String, Int)] = words.map( 15 | word => (word, 1) 16 | ) 17 | 18 | // 相同key的数据,可以对value进行聚合 19 | val wordCount: RDD[(String, Int)] = wordToOne.reduceByKey(_ + _) 20 | 21 | // 5、将转换结果采集到控制台打印出来 22 | wordCount.collect().foreach(println) 23 | 24 | sc.stop() 25 | 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/demos/MyKafkaUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.demos 2 | 3 | object MyKafkaUtils { 4 | 5 | def getKafkaConsumerParams(groupID: String, str: String): Map[String, String] = { 6 | val map:Map[String, String] = null 7 | map 8 | } 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/demos/SparkKuDuDemo.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.demos 2 | 3 | import org.apache.kudu.spark.kudu.KuduContext 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | import org.apache.spark.sql.SparkSession 6 | 7 | object SparkKuDuDemo { 8 | 9 | def main(args: Array[String]): Unit = { 10 | 11 | 12 | 13 | 14 | 15 | } 16 | 17 | 18 | 19 | } 20 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/demos/SparkSQLDemo.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.demos 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object SparkSQLDemo { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val sparkConf = new SparkConf().setAppName("SparkSQLDemo").setMaster("local[*]") 11 | val sparkSession = SparkSession.builder() 12 | .config(sparkConf) 13 | .enableHiveSupport() 14 | .getOrCreate() 15 | 16 | sparkSession.sql("show databases").explain() 17 | 18 | sparkSession.sql( 19 | """ 20 | |SELECT app_key,msg_id,uid,platform 21 | |FROM edw.third_msg_status_latest_hour_log 22 | |WHERE data_hour >= 2021020400 23 | |AND data_hour <= 2021020500 24 | |""".stripMargin).explain() 25 | 26 | 27 | 28 | sparkSession.stop() 29 | 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/demos/WordCountWithMonitor.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.demos 2 | 3 | 4 | import org.apache.spark.{SparkConf, SparkContext} 5 | 6 | object WordCountWithMonitor { 7 | def main(args: Array[String]): Unit = { 8 | val conf = new SparkConf() 9 | .setAppName(s"${this.getClass.getCanonicalName}") 10 | .setMaster("local[*]") 11 | 12 | val sc = new SparkContext(conf) 13 | 14 | // sc.addSparkListener(new SparkMonitoringListener) 15 | 16 | val lines = sc.textFile("C:\\Users\\wangsu\\Desktop\\流式项目课件2\\antispider24\\src\\main\\lua\\controller.lua") 17 | lines.flatMap(_.split("\\s+")) 18 | .map((_, 1)) 19 | .reduceByKey(_+_) 20 | .collect() 21 | 22 | sc.stop() 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/factory/Dept.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.factory 2 | 3 | import com.typesafe.config.Config 4 | import org.apache.spark.sql.SparkSession 5 | 6 | trait Dept extends Serializable { 7 | // attributes 8 | def config: Config 9 | def product_json: Map[String, List[Map[String, String]]] 10 | 11 | // methods 12 | def createTable(spark: SparkSession, btime: String, etime: String): Unit 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/factory/Factory.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.factory 2 | 3 | import com.typesafe.config.Config 4 | 5 | trait Factory { 6 | 7 | // attributes 8 | def config: Config 9 | def product_json: Map[String, List[Map[String, String]]] 10 | 11 | // methods 12 | def getDept(): Dept 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/hudi/example/User.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.hudi.example 2 | 3 | import com.alibaba.fastjson.JSON 4 | 5 | case class User(id :Integer 6 | , birthday : String 7 | , name :String 8 | , createTime :String 9 | , position :String) 10 | 11 | object User { 12 | 13 | def apply(json: String) = { 14 | val jsonObject = JSON.parseObject(json) 15 | 16 | val id = jsonObject.getInteger("id") 17 | val birthday = jsonObject.getString("birthday"); 18 | val name = jsonObject.getString("name") 19 | 20 | val createTime = jsonObject.getString("createTime"); 21 | val position = jsonObject.getString("position") 22 | 23 | new User(id 24 | , birthday 25 | , name 26 | , createTime 27 | , position) 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/iceberg/example002/IcebergExample002_02.scala: -------------------------------------------------------------------------------- 1 | //package com.bidata.example.iceberg.example002 2 | // 3 | //import org.apache.iceberg.catalog.TableIdentifier 4 | //import org.apache.iceberg.hive.HiveCatalog 5 | //import org.apache.iceberg.spark.SparkSchemaUtil 6 | //import org.apache.spark 7 | //import org.apache.spark.SparkConf 8 | //import org.apache.spark.sql.SparkSession 9 | // 10 | //object IcebergExample002_02 { 11 | // 12 | // def main(args: Array[String]): Unit = { 13 | // 14 | // val sparkConf:SparkConf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[*]") 15 | // val spark:SparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate() 16 | // val catalog:HiveCatalog = new HiveCatalog(spark.sparkContext.hadoopConfiguration) 17 | // val data = Seq((1, "a"), (2, "b"), (3, "c")) 18 | // 19 | // 20 | // } 21 | // 22 | //} 23 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/kyro/KryoExample.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.kyro 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object KryoExample { 6 | 7 | def main(args: Array[String]): Unit = { 8 | 9 | val spark = SparkSession 10 | .builder 11 | .appName("BasicOperatorExample") 12 | .getOrCreate() 13 | 14 | // 设置序列化工具并配置注册器 15 | spark.conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer") 16 | spark.conf.set("spark.kryo.registrator","YourKryoRegistrator.class.getName()") 17 | 18 | spark.close() 19 | 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/kyro/YourClass.java: -------------------------------------------------------------------------------- 1 | package com.bidata.example.kyro; 2 | 3 | /** 4 | * @author lj.michale 5 | * @description 6 | * @date 2021-05-05 7 | */ 8 | public class YourClass { 9 | } 10 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/kyro/YourKryoRegistrator.java: -------------------------------------------------------------------------------- 1 | package com.bidata.example.kyro; 2 | 3 | import com.esotericsoftware.kryo.Kryo; 4 | import com.esotericsoftware.kryo.serializers.FieldSerializer; 5 | import org.apache.spark.serializer.KryoRegistrator; 6 | 7 | /** 8 | * @author lj.michale 9 | * @description 10 | * @date 2021-05-05 11 | */ 12 | public class YourKryoRegistrator implements KryoRegistrator { 13 | 14 | @Override 15 | public void registerClasses(Kryo kryo) { 16 | // 在Kyro序列化库中注册自定义的类 17 | kryo.register(YourClass.class, new FieldSerializer(kryo, YourClass.class)); 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/matric/SparkMetricsUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.matric 2 | 3 | import com.alibaba.fastjson.JSON 4 | 5 | /** 6 | * Spark监控数据获取工具类 7 | */ 8 | object SparkMetricsUtils { 9 | 10 | /** 11 | * 获取监控页面信息 12 | */ 13 | def getMetricsJson(url: String, 14 | connectTimeout: Int = 5000, 15 | readTimeout: Int = 5000, 16 | requestMethod: String = "GET") ={ 17 | 18 | import java.net.{HttpURLConnection, URL} 19 | 20 | val connection = (new URL(url)).openConnection.asInstanceOf[HttpURLConnection] 21 | connection.setConnectTimeout(connectTimeout) 22 | connection.setReadTimeout(readTimeout) 23 | connection.setRequestMethod(requestMethod) 24 | 25 | val inputStream = connection.getInputStream 26 | val content = scala.io.Source.fromInputStream(inputStream).mkString 27 | if(inputStream != null) inputStream.close() 28 | 29 | JSON.parseObject(content) 30 | 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/operator/BasicOperatorExample.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.operator 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object BasicOperatorExample { 6 | 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val spark = SparkSession 11 | .builder 12 | .appName("BasicOperatorExample") 13 | .getOrCreate() 14 | 15 | // 累加器:LongAccumulator、DoubleAccumulator、CollectionAccumulator、自定义累加器 16 | 17 | } 18 | 19 | 20 | } 21 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/rdd/README.txt: -------------------------------------------------------------------------------- 1 | 2 | RDD的转换(Transformation)操作 3 | 1.map(func) 4 | 2. mapPartitions(func) 尽量使用mapPartitions 5 | 3.glom 6 | 4. flatMap(func) map后再扁平化 7 | 5.filter(func) 8 | 6.mapPartitionsWithIndex(func) 9 | 7.sample(withReplacement, fraction, seed) 10 | 8.distinct([numTasks])) 11 | 9.partitionBy 12 | 10.coalesce(numPartitions) 13 | 11. repartition(numPartitions) 14 | 12.repartitionAndSortWithinPartitions(partitioner) 15 | 13.sortBy(func,[ascending], [numTasks]) 16 | 14.union(otherDataset) 17 | 15.subtract (otherDataset) 18 | 16.intersection(otherDataset) 19 | 17.cartesian(otherDataset) 20 | 18.pipe(command, [envVars]) 21 | 19.join(otherDataset, [numTasks]) 22 | 20.cogroup(otherDataset, [numTasks]) 23 | 21.reduceByKey(func, [numTasks]) 24 | 22.groupByKey 25 | 23.combineByKey[C] 26 | 24.aggregateByKey 27 | 25.foldByKey 28 | 26.sortByKey([ascending], [numTasks]) 29 | 27. mapValues 30 | 31 | 原文链接:https://blog.csdn.net/qq_43081842/article/details/100676870 -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/sparkcore/FancyApp.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.sparkcore 2 | 3 | import org.apache.spark.sql.{DataFrame, SparkSession} 4 | import org.apache.spark.sql.functions.sum 5 | 6 | object FancyApp { 7 | 8 | def appMain(args: Array[String]) = { 9 | // configure spark 10 | val spark = SparkSession 11 | .builder 12 | .appName("parjobs") 13 | .getOrCreate() 14 | 15 | import spark.implicits._ 16 | 17 | val df = spark.sparkContext.parallelize(1 to 100).toDF 18 | doFancyDistinct(df, "hdfs:///dis.parquet") 19 | doFancySum(df, "hdfs:///sum.parquet") 20 | 21 | } 22 | 23 | def doFancyDistinct(df: DataFrame, outPath: String) = df.distinct.write.parquet(outPath) 24 | 25 | def doFancySum(df: DataFrame, outPath: String) = df.agg(sum("value")).write.parquet(outPath) 26 | 27 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/sparksql/LogInfo.java: -------------------------------------------------------------------------------- 1 | package com.bidata.example.sparksql; 2 | 3 | import java.io.Serializable; 4 | import java.util.List; 5 | /** 6 | * @author lj.michale 7 | * @description 8 | * @date 2021-04-16 9 | */ 10 | 11 | public class LogInfo implements Serializable{ 12 | private static final long serialVersionUID = 4053810260183406530L; 13 | public String logFilePath; 14 | public List appInfo ; 15 | public String getLogFilePath() { 16 | return logFilePath; 17 | } 18 | public List getAppInfo() { 19 | return appInfo; 20 | } 21 | public void setLogFilePath(String logFilePath) { 22 | this.logFilePath = logFilePath; 23 | } 24 | public void setAppInfo(List appInfo) { 25 | this.appInfo = appInfo; 26 | } 27 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/structuredstreaming/example/RateSourceExample.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.structuredstreaming.example 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | /** 6 | * @author : 7 | * @date : 8 | * 基于RateSource的数据源测试 9 | */ 10 | object RateSourceExample { 11 | def main(args: Array[String]): Unit = { 12 | val spark = SparkSession 13 | .builder() 14 | .appName(this.getClass.getSimpleName) 15 | .master("local[2]") 16 | .getOrCreate() 17 | 18 | val rate = spark.readStream 19 | .format("rate") 20 | // 每秒生成的行数,默认值为1 21 | .option("rowsPerSecond", 10) 22 | .option("numPartitions", 10) 23 | .load() 24 | 25 | val query =rate.writeStream 26 | .outputMode("update") 27 | .format("console") 28 | .option("truncate", value = false) 29 | .start() 30 | 31 | query.awaitTermination() 32 | } 33 | } -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/structuredstreaming/example/SocketSourceExample.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.structuredstreaming.example 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | /** 6 | * @author : 7 | * @date : 8 | * 基于Socket的数据源 nc -lc 9090 9 | */ 10 | object SocketSourceExample { 11 | def main(args: Array[String]): Unit = { 12 | val spark = SparkSession 13 | .builder() 14 | .appName(this.getClass.getSimpleName) 15 | .master("local[2]") 16 | .getOrCreate() 17 | 18 | val lines = spark.readStream 19 | .format("socket") 20 | .option("host", "localhost") 21 | .option("port", 9090) 22 | .load() 23 | 24 | val query = lines.writeStream 25 | .outputMode("update") 26 | .format("console") 27 | .option("truncate", value = false) 28 | .start() 29 | 30 | query.awaitTermination() 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/udf/example001/SparkSQLUDF1.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.udf.example001 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.sql.{DataFrame, SparkSession} 5 | 6 | object SparkSQLUDF1 { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val sparkConf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("udf") 11 | val spark: SparkSession = SparkSession.builder().config(sparkConf).getOrCreate() 12 | 13 | val df: DataFrame = spark.read.json("datas/user.json") 14 | df.createOrReplaceTempView("user") 15 | 16 | spark.udf.register("prefixName", (name:String) => { 17 | "Name: " + name 18 | }) 19 | 20 | spark.sql("select age, prefixName(username) from user").show() 21 | 22 | spark.close() 23 | } 24 | 25 | 26 | } 27 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/main/scala/com/bidata/example/udf/example001/UdfTest.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.example.udf.example001 2 | 3 | import org.apache.spark.sql.{DataFrame, SparkSession} 4 | 5 | object UdfTest { 6 | 7 | def main(args: Array[String]): Unit = { 8 | // spark 上下文 9 | val spark: SparkSession = SparkSession.builder().appName("testSql").master("local").getOrCreate() 10 | // 日志级别 11 | spark.sparkContext.setLogLevel("error") 12 | // 读文件,创建表 13 | val df: DataFrame = spark.read.json("file:///E:/OpenSource/GitHub/bigdata-learning/常见大数据项目/spark-learning/data/people.json") 14 | df.createOrReplaceTempView("person") 15 | // sql 语句 16 | val dfPerson: DataFrame = spark.sql("select name,age from person") 17 | dfPerson.show() 18 | // udf :转大写字母 19 | spark.udf.register("toBig",(str:String) => str.toUpperCase) 20 | spark.sql("select toBig(name),age from person").show() 21 | 22 | spark.close() 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/test/scala/com/bidata/app/CustomerCrm.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.app 2 | 3 | import com.bidata.view.CustomerView 4 | 5 | object CustomerCrm { 6 | 7 | def main(args: Array[String]): Unit = { 8 | new CustomerView().mainMenu() 9 | } 10 | 11 | } 12 | -------------------------------------------------------------------------------- /常见大数据项目/spark-learning/src/test/scala/com/bidata/bean/Customer.scala: -------------------------------------------------------------------------------- 1 | package com.bidata.bean 2 | 3 | class Customer { 4 | var id: Int = _ 5 | var name: String = _ 6 | var gender: Char = _ 7 | var age: Short = _ 8 | var tel: String = _ 9 | var email: String = _ 10 | 11 | // 辅助构造器 12 | def this( name: String, gender: Char, age: Short, tel: String, email: String) { 13 | this 14 | this.name = name 15 | this.gender = gender 16 | this.age = age 17 | this.tel = tel 18 | this.email = email 19 | } 20 | 21 | // 辅助构造器 22 | def this(id: Int, name: String, gender: Char, age: Short, tel: String, email: String) { 23 | this(name, gender, age, tel, email) 24 | this.id = id 25 | } 26 | 27 | override def toString: String = { 28 | this.id + "\t\t" + this.name + "\t\t" + this.gender + "\t\t" + this.age + "\t\t" + this.tel + "\t\t" + this.email 29 | } 30 | } -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/dataset/a.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/spark3.2-learning/dataset/a.txt -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/dataset/test.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/spark3.2-learning/dataset/test.log -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/dataset/u.item: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/常见大数据项目/spark3.2-learning/dataset/u.item -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/dataset/users.txt: -------------------------------------------------------------------------------- 1 | 1,zhangsan,20 2 | 2,lisi,21 3 | 3,wangwu,19 4 | 4,zhaoliu,18 -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/java/com/turing/TuringSparkPipeline.java: -------------------------------------------------------------------------------- 1 | package com.turing; 2 | 3 | /** 4 | * @descri 5 | * 6 | * @author lj.michale 7 | * @date 2022-05-19 8 | */ 9 | public class TuringSparkPipeline { 10 | 11 | 12 | } 13 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/java/com/turing/common/PropertiesUtils.java: -------------------------------------------------------------------------------- 1 | package com.turing.common; 2 | 3 | import lombok.extern.slf4j.Slf4j; 4 | 5 | import java.io.IOException; 6 | import java.util.Properties; 7 | 8 | /** 9 | * @descri 10 | * 11 | * @author lj.michale 12 | * @date 2022-04-05 13 | */ 14 | @Slf4j 15 | public class PropertiesUtils { 16 | 17 | /** 18 | * @descri 获取某一个properties文件的properties对象,以方便或得文件里面的值 19 | * 20 | * @param filePath:properties文件所在路径 21 | * @return Properties对象 22 | */ 23 | public static Properties getProperties(String filePath) { 24 | final Properties properties = new Properties(); 25 | 26 | try { 27 | properties.load(PropertiesUtils.class.getClassLoader().getResourceAsStream(filePath)); 28 | } catch (IOException e) { 29 | log.error("获取某一个properties文件的properties对象失败:{}", e.getMessage()); 30 | e.printStackTrace(); 31 | } 32 | return properties; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/resources/spark-dev.properties: -------------------------------------------------------------------------------- 1 | 2 | # ####### ClickHouse DEV/TEST 3 | jdbc.clickhouse.url=jdbc:clickhouse://10.8.182.61:8123/report 4 | jdbc.clickhouse.user=default 5 | jdbc.clickhouse.password=FJ/LMeo5 6 | jdbc.clickhouse.driver=ru.yandex.clickhouse.ClickHouseDriver 7 | 8 | # ####### Kafka DEV/TEST 9 | kafka.broker.url=p88-dataplat-slave1:9092,p88-dataplat-slave2:9092,p88-dataplat-slave3:9092 10 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/resources/spark-prod.properties: -------------------------------------------------------------------------------- 1 | 2 | # ####### ClickHouse QA/PROD 3 | jdbc.clickhouse.url=jdbc:clickhouse://10.90.11.238:8123/report 4 | jdbc.clickhouse.user=luoj 5 | jdbc.clickhouse.password=ctg@2022 6 | jdbc.clickhouse.driver=ru.yandex.clickhouse.ClickHouseDriver -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/common/config/GlobalConfig.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.config 2 | 3 | /** 4 | * @descr GlobalConfig 5 | * 6 | * @author lj.michale 7 | * @date 2022-05-20 8 | */ 9 | object GlobalConfig { 10 | 11 | 12 | 13 | } 14 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/common/constant/GlobalConstant.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.constant 2 | 3 | /** 4 | * @descr GlobalConstant 5 | * 6 | * @author lj.michale 7 | * @date 2022-05-20 8 | */ 9 | object GlobalConstant { 10 | 11 | // Pipeline 执行全局配置环境文件:spark-dev.properties、spark-prod.properties 12 | val Pipeline_Global_Prpos:String = "spark-dev.properties" 13 | 14 | 15 | } 16 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/common/utils/DataTimeUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.utils 2 | 3 | object DataTimeUtils { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/common/utils/JdbcUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.utils 2 | 3 | object JdbcUtils { 4 | val url="jdbc:mysql://192.168.146.222:3306/etldemo" 5 | val user="root" 6 | val driver="com.mysql.jdbc.Driver" 7 | val pwd="1" 8 | val table_access_logs = "access_logs" 9 | val table_full_access_logs="full_access_logs" 10 | } 11 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/common/utils/SparkPipelineUtils.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.common.utils 2 | 3 | object SparkPipelineUtils { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/pipeline/etl/SparkEtlExample002.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata.pipeline.etl 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object SparkEtlExample002 { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val conf: SparkConf = new SparkConf() 11 | .setAppName("SparkEtlExample002") 12 | .setMaster("local[*]") 13 | val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate() 14 | val sc: SparkContext = spark.sparkContext 15 | 16 | val fileRDD = sc.textFile("E:\\OpenSource\\GitHub\\bigdata-learning\\常见大数据项目\\spark3.2-learning\\dataset\\app.log") 17 | 18 | 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /常见大数据项目/spark3.2-learning/src/main/scala/com/bigdata/runSparkPipeline.scala: -------------------------------------------------------------------------------- 1 | package com.bigdata 2 | 3 | import java.util.Properties 4 | 5 | import com.turing.common.PropertiesUtils 6 | import org.apache.log4j.Logger 7 | 8 | /** 9 | * @descri 10 | * 11 | * @author lj.michale 12 | * @date 2022-05-20 13 | */ 14 | object runSparkPipeline { 15 | 16 | val logger = Logger.getLogger(runSparkPipeline.getClass) 17 | 18 | def main(args: Array[String]): Unit = { 19 | 20 | val pipelineProps = PropertiesUtils.getProperties("spark-dev.properties") 21 | 22 | 23 | } 24 | 25 | 26 | } 27 | -------------------------------------------------------------------------------- /常见组件整理/分布式存储系统/Hive/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /常见组件整理/数据CDC/Debezium/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 参考资料: 9 | https://debezium.io/ 10 | 11 | -------------------------------------------------------------------------------- /常见组件整理/数据湖/Hudi/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 参考资料: 5 | 官方地址:https://hudi.apache.org/ 6 | gihub地址:https://github.com/apache/hudi 7 | 8 | 9 | 10 | hdfs dfs -ls -h -R /tmp/delta* && hdfs dfs -ls -h -R /tmp/hudi* && hdfs dfs -ls -h -R /tmp/iceberg* 11 | 12 | -------------------------------------------------------------------------------- /常见组件整理/数据计算引擎/Flink/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Yarn Pre-Job模式 6 | /flink run -m yarn-cluster -yn 3 -ys 3 -ynm bjsxt02 -c com.test.flink.wc.StreamWordCount ./appjars/test-1.0-SNAPSHOT.jar 7 | 8 | 9 | Yarn Session模式 10 | 11 | Yarn Application模式 12 | ./bin/flink run-application -t yarn-application hdfs://hadoop-master:9000/shadow/FlinkSQLTest-1.0-SNAPSHOT.jar 13 | 14 | 15 | yarn application -kill application_1601372571363_0001 -------------------------------------------------------------------------------- /常见组件整理/消息系统/Pulsar/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 相关资源链接: 11 | https://pulsar.apache.org/en/download/ 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /开源社区文献整理/1、邸星星—基于Iceberg的湖仓一体架构实践(已美化).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/开源社区文献整理/1、邸星星—基于Iceberg的湖仓一体架构实践(已美化).pdf -------------------------------------------------------------------------------- /开源社区文献整理/2、孙伟—iceberg和对象存储构建数据湖方案(已美化).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/开源社区文献整理/2、孙伟—iceberg和对象存储构建数据湖方案(已美化).pdf -------------------------------------------------------------------------------- /开源社区文献整理/3、陈俊杰-百亿数据入湖实战(已美化).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/开源社区文献整理/3、陈俊杰-百亿数据入湖实战(已美化).pdf -------------------------------------------------------------------------------- /开源社区文献整理/4、胡争-Flink和Iceberg如何解决数据入湖面临的挑战(已美化).pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lj-michale/bigdata-learning/311ccf0959392b6c54371d7927e9df05c5369bf6/开源社区文献整理/4、胡争-Flink和Iceberg如何解决数据入湖面临的挑战(已美化).pdf -------------------------------------------------------------------------------- /环境搭建篇/大数据领域常见概念与术语: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------- 4 | Hadoop 5 | -------------------------------------------- 6 | NameNode: Hadoop中HDFS的主节点进程名称 7 | DataNode: Hadoop中HDFS的从节点进程名称 8 | SecondaryNameNode: Hadoop中HDFS的辅助节点名称 9 | 10 | ResourceManager: Hadoop中Yarn的主节点进程 11 | NodeManager: Hadoop中Yarn的从节点进程 12 | 13 | 14 | -------------------------------------------- 15 | Flink 16 | -------------------------------------------- 17 | JobManager: Flink主节点进程名称 18 | TaskManager: Flink从节点进程名称 19 | --------------------------------------------------------------------------------