├── elasticsearch ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ └── log4j2.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── elasticsearch │ │ │ └── ElasticsearchApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── elasticsearch │ │ └── ElasticsearchApplicationTests.java ├── .gitignore └── pom.xml ├── flink-async-io ├── src │ ├── main │ │ ├── resources │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkasyncio │ │ │ ├── source │ │ │ └── SimpleSource.java │ │ │ ├── async │ │ │ └── AsyncDataBaseRequest.java │ │ │ └── FlinkAsyncIoApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkasyncio │ │ └── FlinkAsyncIoApplicationTests.java ├── .gitignore └── pom.xml ├── flink-jdbc-hbase ├── src │ ├── main │ │ ├── resources │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkjdbchbase │ │ │ ├── FlinkJdbcHbaseApplication.java │ │ │ ├── source │ │ │ └── JdbcSource.java │ │ │ ├── core │ │ │ ├── Jdbc2Hbase.java │ │ │ └── FlinkFromTxt.java │ │ │ └── sink │ │ │ └── HbaseSink.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkjdbchbase │ │ └── FlinkJdbcHbaseApplicationTests.java ├── .gitignore └── pom.xml ├── flink-sideoutput ├── src │ ├── main │ │ ├── resources │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinksideoutput │ │ │ ├── tag │ │ │ └── SideOutputTag.java │ │ │ ├── process │ │ │ ├── ProcessTokenizer.java │ │ │ └── KeyedTokenizer.java │ │ │ └── FlinkSideoutputApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinksideoutput │ │ └── FlinkSideoutputApplicationTests.java ├── .gitignore └── pom.xml ├── kafka-stream ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ ├── users.csv │ │ │ ├── items.csv │ │ │ ├── log4j.properties │ │ │ └── orders.csv │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── kafkastream │ │ │ ├── serdes │ │ │ ├── SerdesFactory.java │ │ │ ├── GenericDeserializer.java │ │ │ └── GenericSerializer.java │ │ │ ├── model │ │ │ ├── User.java │ │ │ ├── Item.java │ │ │ └── Order.java │ │ │ ├── utils │ │ │ └── HashPartitioner.java │ │ │ ├── timeextractor │ │ │ └── OrderTimestampExtractor.java │ │ │ └── producer │ │ │ ├── UserProducer.java │ │ │ ├── ItemProducer.java │ │ │ └── OrderProducer.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── kafkastream │ │ └── KafkaStreamApplicationTests.java ├── .gitignore └── pom.xml ├── flink-kafka-hbase ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ └── log4j2.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkkafkahbase │ │ │ ├── FlinkKafkaHbaseApplication.java │ │ │ ├── watermarks │ │ │ └── FlinkHbaseWaterMarks.java │ │ │ ├── model │ │ │ └── Metric.java │ │ │ ├── core │ │ │ ├── Kafka2Hbase.java │ │ │ └── Hbase2Kafka.java │ │ │ ├── sink │ │ │ └── FlinkHbaseSink.java │ │ │ └── source │ │ │ └── FlinkHbaseSource.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkkafkahbase │ │ └── FlinkKafkaHbaseApplicationTests.java ├── .gitignore └── pom.xml ├── flink-kafka-source ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ └── logback.xml │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkkafkasource │ │ │ ├── watermarks │ │ │ └── ConsumerWaterMarkEmitter.java │ │ │ └── FlinkKafkaSourceApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkkafkasource │ │ └── FlinkKafkaSourceApplicationTests.java ├── .gitignore └── pom.xml ├── bigdata-study.iml ├── spark-phoenix ├── src │ ├── main │ │ ├── resources │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── sparkphoenix │ │ │ ├── utils │ │ │ └── PhoenixUtil.java │ │ │ ├── SparkPhoenixApplication.java │ │ │ └── apps │ │ │ └── SparkPhoenixReadHbase.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── sparkphoenix │ │ ├── SparkPhoenixApplicationTests.java │ │ └── phoenix │ │ └── SparkPhoenixTest.java ├── .gitignore └── pom.xml ├── README.md ├── flink-hdfs ├── src │ ├── main │ │ ├── resources │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkhdfs │ │ │ ├── FlinkHdfsApplication.java │ │ │ ├── utils │ │ │ └── HadoopConfig.java │ │ │ ├── zip │ │ │ └── FlinkHdfsZip.java │ │ │ └── core │ │ │ └── FlinkHdfs.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkhdfs │ │ └── FlinkHdfsApplicationTests.java ├── .gitignore └── pom.xml ├── dataflow-stream-kafka-source ├── src │ ├── main │ │ ├── resources │ │ │ ├── META-INT │ │ │ │ └── spring-configuration-metadata-whitelist.properties │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── dataflowstreamkafkasource │ │ │ ├── DataflowStreamKafkaSourceApplication.java │ │ │ ├── prop │ │ │ └── KafkaSourceProperties.java │ │ │ ├── config │ │ │ └── KafkaSourceConfig.java │ │ │ └── utils │ │ │ └── JsonMapper.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── dataflowstreamkafkasource │ │ └── DataflowStreamKafkaSourceApplicationTests.java ├── .gitignore └── pom.xml ├── flink-common └── src │ └── main │ └── java │ ├── exception │ └── ExceptionUtils.java │ ├── utils │ ├── GsonUtils.java │ ├── ExecutionEnvUtil.java │ ├── HttpUtil.java │ └── KafkaUtils.java │ ├── watermarks │ └── MetricWatermark.java │ ├── model │ └── Metrics.java │ ├── constant │ └── PropertiesConstants.java │ └── schemas │ └── MetricSchema.java ├── flink-kafka11-sink ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ └── logback.xml │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkkafkasink │ │ │ └── FlinkKafkaSinkApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkkafkasink │ │ └── FlinkKafkaSinkApplicationTests.java ├── .gitignore └── pom.xml ├── dataflow-stream-redis-pub-sink ├── src │ ├── main │ │ ├── resources │ │ │ ├── META-INF │ │ │ │ └── spring-configuration-metadata-whitelist.properties │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── dataflowstreamredispubsink │ │ │ ├── prop │ │ │ └── RedisPubProperties.java │ │ │ ├── DataflowStreamRedisPubSinkApplication.java │ │ │ └── config │ │ │ └── RedisStreamPubConfig.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── dataflowstreamredispubsink │ │ └── DataflowStreamRedisPubSinkApplicationTests.java ├── .gitignore └── pom.xml ├── dataflow-stream-redis-set-processor ├── src │ ├── main │ │ ├── resources │ │ │ ├── META-INT │ │ │ │ └── spring-configuration-metadata-whitelist.properties │ │ │ └── application.properties │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── dataflowstreamredissetprocessor │ │ │ ├── prop │ │ │ └── RedisSetProperties.java │ │ │ ├── DataflowStreamRedisSetProcessorApplication.java │ │ │ ├── config │ │ │ └── RedisStreamProcessorConfig.java │ │ │ └── utils │ │ │ └── JsonMapper.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── dataflowstreamredissetprocessor │ │ └── DataflowStreamRedisSetProcessorApplicationTests.java ├── .gitignore └── pom.xml ├── flink-elasticsearch-sink ├── .gitignore ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ └── logback.xml │ │ └── java │ │ │ └── com │ │ │ └── bigdata │ │ │ └── study │ │ │ └── flinkelasticsearchsink │ │ │ ├── handler │ │ │ └── FlinkFailHandler.java │ │ │ └── FlinkElasticsearchSinkApplication.java │ └── test │ │ └── java │ │ └── com │ │ └── bigdata │ │ └── study │ │ └── flinkelasticsearchsink │ │ └── FlinkElasticsearchSinkApplicationTests.java └── pom.xml ├── fork-join ├── src │ └── main │ │ └── java │ │ └── forkjoin │ │ ├── ParallelStream.java │ │ ├── ForkJoinApp.java │ │ └── MyForkJoinTask.java └── pom.xml └── pom.xml /elasticsearch/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flink-async-io/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flink-sideoutput/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /kafka-stream/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flink-kafka-source/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bigdata-study.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /spark-phoenix/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.application.name=spark-phoenix 2 | server.port=-1 -------------------------------------------------------------------------------- /kafka-stream/src/main/resources/users.csv: -------------------------------------------------------------------------------- 1 | Jack, BJ, male, 23 2 | Lily, SH, female, 21 3 | Mike, SZ, male, 22 4 | Lucy, GZ, female, 20 -------------------------------------------------------------------------------- /kafka-stream/src/main/resources/items.csv: -------------------------------------------------------------------------------- 1 | iphone, BJ, phone, 5388.88 2 | ipad, SH, pad, 4888.88 3 | iwatch, SZ, watch, 2668.88 4 | ipod, GZ, pod, 1888.88 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bigdata-study 2 | 大数据组件学习;包括dataflow,spring cloud stream;elasticsearch;flink;spark;kafka;phoenix;Hive;Hbase; 3 | 4 | 这只是个人学习,练习,代码写得丑,大家就不要fork了! 5 | -------------------------------------------------------------------------------- /flink-hdfs/src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laixiangshun/bigdata-study/HEAD/flink-hdfs/src/main/resources/application.properties -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/resources/META-INT/spring-configuration-metadata-whitelist.properties: -------------------------------------------------------------------------------- 1 | configuration-properties.classes=com.bigdata.study.dataflowstreamkafkasource.prop.KafkaSourceProperties 2 | -------------------------------------------------------------------------------- /elasticsearch/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | appender.console.type = Console 2 | appender.console.name = console 3 | appender.console.layout.type = PatternLayout 4 | 5 | rootLogger.level = info 6 | rootLogger.appenderRef.console.ref = console -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/resources/log4j2.properties: -------------------------------------------------------------------------------- 1 | appender.console.type = Console 2 | appender.console.name = console 3 | appender.console.layout.type = PatternLayout 4 | 5 | rootLogger.level = info 6 | rootLogger.appenderRef.console.ref = console -------------------------------------------------------------------------------- /flink-common/src/main/java/exception/ExceptionUtils.java: -------------------------------------------------------------------------------- 1 | package exception; 2 | 3 | /** 4 | * 异常工具类 5 | **/ 6 | public class ExceptionUtils { 7 | public static boolean containsThrowable(Throwable throwable, Class type) { 8 | 9 | return false; 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /flink-kafka11-sink/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | kafka.brokers=192.168.20.48:9092 2 | kafka.group.id=metrics-group-test 3 | kafka.zookeeper.connect=192.168.20.48:2181 4 | metrics.topic=alert-metrics 5 | stream.parallelism=5 6 | stream.checkpoint.interval=1000 7 | stream.checkpoint.enable=false 8 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/main/resources/META-INF/spring-configuration-metadata-whitelist.properties: -------------------------------------------------------------------------------- 1 | configuration-properties.classes= com.bigdata.study.dataflowstreamredispubsink.prop.RedisPubProperties,\ 2 | org.springframework.boot.autoconfigure.data.redis.RedisProperties, \ 3 | org.springframework.boot.autoconfigure.data.redis.RedisProperties$Pool 4 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/resources/META-INT/spring-configuration-metadata-whitelist.properties: -------------------------------------------------------------------------------- 1 | configuration-properties.classes=com.bigdata.study.dataflowstreamredissetprocessor.prop.RedisSetProperties,\ 2 | org.springframework.boot.autoconfigure.data.redis.RedisProperties, \ 3 | org.springframework.boot.autoconfigure.data.redis.RedisProperties$Pool 4 | -------------------------------------------------------------------------------- /elasticsearch/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-hdfs/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /kafka-stream/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /spark-phoenix/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-async-io/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-jdbc-hbase/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-kafka-hbase/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-kafka-source/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-kafka11-sink/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-sideoutput/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-elasticsearch-sink/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/tag/SideOutputTag.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinksideoutput.tag; 2 | 3 | import org.apache.flink.util.OutputTag; 4 | 5 | /** 6 | * @Description 7 | * @Author hasee 8 | * @Date 2019/1/11 9 | **/ 10 | public class SideOutputTag { 11 | public static final OutputTag wordTag = new OutputTag("rejected") { 12 | }; 13 | } 14 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | !.mvn/wrapper/maven-wrapper.jar 3 | 4 | ### STS ### 5 | .apt_generated 6 | .classpath 7 | .factorypath 8 | .project 9 | .settings 10 | .springBeans 11 | .sts4-cache 12 | 13 | ### IntelliJ IDEA ### 14 | .idea 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### NetBeans ### 20 | /nbproject/private/ 21 | /build/ 22 | /nbbuild/ 23 | /dist/ 24 | /nbdist/ 25 | /.nb-gradle/ -------------------------------------------------------------------------------- /flink-elasticsearch-sink/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | kafka.brokers=192.168.20.48:9092 2 | kafka.group.id=metrics-group-test 3 | kafka.zookeeper.connect=192.168.20.48:2181 4 | metrics.topic=alert-metrics 5 | stream.parallelism=5 6 | stream.checkpoint.interval=1000 7 | stream.checkpoint.enable=false 8 | elasticsearch.hosts=192.168.20.48:9201 9 | elasticsearch.bulk.flush.max.actions=40 10 | stream.sink.parallelism=5 -------------------------------------------------------------------------------- /flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/FlinkHdfsApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkhdfs; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class FlinkHdfsApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(FlinkHdfsApplication.class, args); 11 | } 12 | } 13 | 14 | -------------------------------------------------------------------------------- /elasticsearch/src/main/java/com/bigdata/study/elasticsearch/ElasticsearchApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.elasticsearch; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class ElasticsearchApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(ElasticsearchApplication.class, args); 11 | } 12 | 13 | } 14 | 15 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/FlinkJdbcHbaseApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class FlinkJdbcHbaseApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(FlinkJdbcHbaseApplication.class, args); 11 | } 12 | 13 | } 14 | 15 | -------------------------------------------------------------------------------- /flink-hdfs/src/test/java/com/bigdata/study/flinkhdfs/FlinkHdfsApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkhdfs; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkHdfsApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/FlinkKafkaHbaseApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class FlinkKafkaHbaseApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(FlinkKafkaHbaseApplication.class, args); 11 | } 12 | 13 | } 14 | 15 | -------------------------------------------------------------------------------- /kafka-stream/src/test/java/com/bigdata/study/kafkastream/KafkaStreamApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class KafkaStreamApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-async-io/src/test/java/com/bigdata/study/flinkasyncio/FlinkAsyncIoApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkasyncio; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkAsyncIoApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /spark-phoenix/src/test/java/com/bigdata/study/sparkphoenix/SparkPhoenixApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.sparkphoenix; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class SparkPhoenixApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /elasticsearch/src/test/java/com/bigdata/study/elasticsearch/ElasticsearchApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.elasticsearch; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class ElasticsearchApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/test/java/com/bigdata/study/flinkjdbchbase/FlinkJdbcHbaseApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkJdbcHbaseApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/test/java/com/bigdata/study/flinkkafkahbase/FlinkKafkaHbaseApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkKafkaHbaseApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-kafka11-sink/src/test/java/com/bigdata/study/flinkkafkasink/FlinkKafkaSinkApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkasink; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkKafkaSinkApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-sideoutput/src/test/java/com/bigdata/study/flinksideoutput/FlinkSideoutputApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinksideoutput; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkSideoutputApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-kafka-source/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /flink-kafka-source/src/test/java/com/bigdata/study/flinkkafkasource/FlinkKafkaSourceApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkasource; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkKafkaSourceApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /flink-kafka11-sink/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /flink-elasticsearch-sink/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/SerdesFactory.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.serdes; 2 | 3 | import org.apache.kafka.common.serialization.Serde; 4 | import org.apache.kafka.common.serialization.Serdes; 5 | 6 | /** 7 | * @Description 8 | * @Author hasee 9 | * @Date 2019/1/8 10 | **/ 11 | public class SerdesFactory { 12 | 13 | public static Serde serdeFrom(Class tClass) { 14 | return Serdes.serdeFrom(new GenericSerializer<>(tClass), new GenericDeserializer<>(tClass)); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /flink-elasticsearch-sink/src/test/java/com/bigdata/study/flinkelasticsearchsink/FlinkElasticsearchSinkApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkelasticsearchsink; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class FlinkElasticsearchSinkApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/test/java/com/bigdata/study/dataflowstreamkafkasource/DataflowStreamKafkaSourceApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamkafkasource; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class DataflowStreamKafkaSourceApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/test/java/com/bigdata/study/dataflowstreamredispubsink/DataflowStreamRedisPubSinkApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredispubsink; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class DataflowStreamRedisPubSinkApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/test/java/com/bigdata/study/dataflowstreamredissetprocessor/DataflowStreamRedisSetProcessorApplicationTests.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredissetprocessor; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.boot.test.context.SpringBootTest; 6 | import org.springframework.test.context.junit4.SpringRunner; 7 | 8 | @RunWith(SpringRunner.class) 9 | @SpringBootTest 10 | public class DataflowStreamRedisSetProcessorApplicationTests { 11 | 12 | @Test 13 | public void contextLoads() { 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/prop/RedisPubProperties.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredispubsink.prop; 2 | 3 | import org.springframework.boot.context.properties.ConfigurationProperties; 4 | 5 | /** 6 | * @Description 7 | * @Author hasee 8 | * @Date 2019/1/2 9 | **/ 10 | @ConfigurationProperties("redis") 11 | public class RedisPubProperties { 12 | 13 | private String topic; 14 | 15 | public String getTopic() { 16 | return topic; 17 | } 18 | 19 | public void setTopic(String topic) { 20 | this.topic = topic; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /kafka-stream/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n 6 | 7 | #log4j.appender.fileAppender=org.apache.log4j.FileAppender 8 | #log4j.appender.fileAppender.File=kafka-request.log 9 | #log4j.appender.fileAppender.layout=org.apache.log4j.PatternLayout 10 | #log4j.appender.fileAppender.layout.ConversionPattern= %-4r [%t] %-5p %c %x - %m%n 11 | 12 | 13 | # Turn on all our debugging info 14 | log4j.logger.kafka=WARN 15 | log4j.logger.org=WARN 16 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/prop/RedisSetProperties.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredissetprocessor.prop; 2 | 3 | import org.springframework.boot.context.properties.ConfigurationProperties; 4 | 5 | /** 6 | * @Description 7 | * @Author hasee 8 | * @Date 2019/1/2 9 | **/ 10 | @ConfigurationProperties("redis") 11 | public class RedisSetProperties { 12 | 13 | private String setName; 14 | 15 | public String getSetName() { 16 | return setName; 17 | } 18 | 19 | public void setSetName(String setName) { 20 | this.setName = setName; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /flink-common/src/main/java/utils/GsonUtils.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import com.google.gson.Gson; 4 | 5 | import java.nio.charset.Charset; 6 | 7 | /** 8 | * Gson 序列化,反序列化操作 9 | **/ 10 | public class GsonUtils { 11 | private static final Gson gson = new Gson(); 12 | 13 | public static T fromJson(String value, Class tClass) { 14 | return gson.fromJson(value, tClass); 15 | } 16 | 17 | public static String toJson(Object value) { 18 | return gson.toJson(value); 19 | } 20 | 21 | public static byte[] toJsonBytes(Object value) { 22 | return gson.toJson(value).getBytes(Charset.forName("utf-8")); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /kafka-stream/src/main/resources/orders.csv: -------------------------------------------------------------------------------- 1 | Jack, iphone, 2016-11-11 00:00:01, 3 2 | Jack, ipad, 2016-11-11 00:00:02, 4 3 | Jack, iwatch, 2016-11-11 00:00:03, 5 4 | Jack, ipod, 2016-11-11 00:00:04, 4 5 | 6 | Lily, ipad, 2016-11-11 00:00:06, 3 7 | Lily, iwatch, 2016-11-11 00:00:07, 4 8 | Lily, iphone, 2016-11-11 00:00:08, 2 9 | Lily, ipod, 2016-11-11 00:00:09, 3 10 | 11 | Mike, ipad, 2016-11-11 00:00:11, 2 12 | Mike, iwatch, 2016-11-11 00:00:12, 3 13 | Mike, iphone, 2016-11-11 00:00:13, 4 14 | Mike, ipod, 2016-11-11 00:00:14, 3 15 | 16 | Lucy, ipod, 2016-11-11 00:00:16, 3 17 | Lucy, ipad, 2016-11-11 00:00:17, 4 18 | Lucy, iwatch, 2016-11-11 00:00:18, 3 19 | Lucy, iphone, 2016-11-11 00:00:19, 5 -------------------------------------------------------------------------------- /fork-join/src/main/java/forkjoin/ParallelStream.java: -------------------------------------------------------------------------------- 1 | package forkjoin; 2 | 3 | import java.time.Duration; 4 | import java.time.Instant; 5 | import java.util.stream.LongStream; 6 | 7 | /** 8 | * 并行流计算 9 | * 底层为fork join的实现 10 | * 效率高于直接使用顺序流 fork join 11 | **/ 12 | public class ParallelStream { 13 | public static void main(String[] args) { 14 | Instant startTime = Instant.now(); 15 | long result = LongStream.rangeClosed(0, 10_000_00_100L) 16 | .parallel() 17 | .reduce(0, Long::sum); 18 | System.out.println(result); 19 | Instant endTime = Instant.now(); 20 | System.out.println("计算10亿条数据耗时:" + Duration.between(startTime, endTime).toMillis()); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /fork-join/src/main/java/forkjoin/ForkJoinApp.java: -------------------------------------------------------------------------------- 1 | package forkjoin; 2 | 3 | import java.time.Duration; 4 | import java.time.Instant; 5 | import java.util.concurrent.ForkJoinPool; 6 | 7 | /** 8 | * 顺序流使用fork join 9 | **/ 10 | public class ForkJoinApp { 11 | public static void main(String[] args) { 12 | Instant startTime = Instant.now(); 13 | ForkJoinPool forkJoinPool = new ForkJoinPool(); 14 | MyForkJoinTask myForkJoinTask = new MyForkJoinTask(0L, 10_000_00_000L); 15 | Long result = forkJoinPool.invoke(myForkJoinTask); 16 | System.out.println(result); 17 | Instant endTime = Instant.now(); 18 | System.out.println("计算10亿条数据耗时:" + Duration.between(startTime, endTime).toMillis()); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /spark-phoenix/src/test/java/com/bigdata/study/sparkphoenix/phoenix/SparkPhoenixTest.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.sparkphoenix.phoenix; 2 | 3 | import com.bigdata.study.sparkphoenix.SparkPhoenixApplication; 4 | import org.junit.Test; 5 | import org.springframework.boot.SpringApplication; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | /** 11 | * @Description 12 | * @Author hasee 13 | * @Date 2019/1/3 14 | **/ 15 | public class SparkPhoenixTest { 16 | public static void main(String[] args) { 17 | List params = new ArrayList<>(); 18 | params.add("2017-06-01"); 19 | params.add("2017-07-01"); 20 | String[] argArray = params.toArray(new String[]{}); 21 | SpringApplication.run(SparkPhoenixApplication.class, argArray); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/DataflowStreamKafkaSourceApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamkafkasource; 2 | 3 | import com.bigdata.study.dataflowstreamkafkasource.config.KafkaSourceConfig; 4 | import org.springframework.boot.SpringApplication; 5 | import org.springframework.boot.autoconfigure.SpringBootApplication; 6 | import org.springframework.context.annotation.ComponentScan; 7 | import org.springframework.context.annotation.Import; 8 | 9 | @SpringBootApplication 10 | @ComponentScan("com.bigdata") 11 | @Import({KafkaSourceConfig.class}) 12 | public class DataflowStreamKafkaSourceApplication { 13 | 14 | public static void main(String[] args) { 15 | SpringApplication.run(DataflowStreamKafkaSourceApplication.class, args); 16 | } 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/DataflowStreamRedisPubSinkApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredispubsink; 2 | 3 | import com.bigdata.study.dataflowstreamredispubsink.config.RedisStreamPubConfig; 4 | import org.springframework.boot.SpringApplication; 5 | import org.springframework.boot.autoconfigure.SpringBootApplication; 6 | import org.springframework.context.annotation.ComponentScan; 7 | import org.springframework.context.annotation.Import; 8 | 9 | @SpringBootApplication 10 | @ComponentScan("com.bigdata") 11 | @Import({RedisStreamPubConfig.class}) 12 | public class DataflowStreamRedisPubSinkApplication { 13 | 14 | public static void main(String[] args) { 15 | SpringApplication.run(DataflowStreamRedisPubSinkApplication.class, args); 16 | } 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /fork-join/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | bigdata-starter 7 | com.bigdata.study 8 | 1.0 9 | 10 | 4.0.0 11 | 12 | fork-join 13 | 14 | jar 15 | 16 | 17 | 18 | commons-dbutils 19 | commons-dbutils 20 | 1.6 21 | 22 | 23 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/watermarks/FlinkHbaseWaterMarks.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.watermarks; 2 | 3 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 4 | import org.apache.flink.streaming.api.watermark.Watermark; 5 | 6 | import javax.annotation.Nullable; 7 | import java.util.Map; 8 | 9 | /** 10 | * 自定义水印 11 | **/ 12 | public class FlinkHbaseWaterMarks implements AssignerWithPeriodicWatermarks> { 13 | private long currentTime; 14 | 15 | @Nullable 16 | @Override 17 | public Watermark getCurrentWatermark() { 18 | return new Watermark(currentTime); 19 | } 20 | 21 | @Override 22 | public long extractTimestamp(Map stringStringMap, long l) { 23 | currentTime = l; 24 | return l; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /flink-kafka-source/src/main/java/com/bigdata/study/flinkkafkasource/watermarks/ConsumerWaterMarkEmitter.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkasource.watermarks; 2 | 3 | import model.Metrics; 4 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 5 | import org.apache.flink.streaming.api.watermark.Watermark; 6 | 7 | import javax.annotation.Nullable; 8 | 9 | /** 10 | * 自定义时间戳提取器/水印发射器 11 | **/ 12 | public class ConsumerWaterMarkEmitter implements AssignerWithPeriodicWatermarks { 13 | private long currentTime; 14 | 15 | @Nullable 16 | @Override 17 | public Watermark getCurrentWatermark() { 18 | return new Watermark(currentTime); 19 | } 20 | 21 | @Override 22 | public long extractTimestamp(Metrics metrics, long l) { 23 | currentTime = l; 24 | return currentTime; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/DataflowStreamRedisSetProcessorApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredissetprocessor; 2 | 3 | import com.bigdata.study.dataflowstreamredissetprocessor.config.RedisStreamProcessorConfig; 4 | import org.springframework.boot.SpringApplication; 5 | import org.springframework.boot.autoconfigure.SpringBootApplication; 6 | import org.springframework.context.annotation.ComponentScan; 7 | import org.springframework.context.annotation.Import; 8 | 9 | @SpringBootApplication 10 | @ComponentScan("com.bigdata") 11 | @Import({RedisStreamProcessorConfig.class}) 12 | public class DataflowStreamRedisSetProcessorApplication { 13 | 14 | public static void main(String[] args) { 15 | SpringApplication.run(DataflowStreamRedisSetProcessorApplication.class, args); 16 | } 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /flink-common/src/main/java/watermarks/MetricWatermark.java: -------------------------------------------------------------------------------- 1 | package watermarks; 2 | 3 | import model.Metrics; 4 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; 5 | import org.apache.flink.streaming.api.watermark.Watermark; 6 | 7 | import javax.annotation.Nullable; 8 | 9 | /** 10 | * @Description 11 | * @Author hasee 12 | * @Date 2019/1/4 13 | **/ 14 | public class MetricWatermark implements AssignerWithPeriodicWatermarks { 15 | private long currentTime = Long.MAX_VALUE; 16 | 17 | @Nullable 18 | @Override 19 | public Watermark getCurrentWatermark() { 20 | return new Watermark(currentTime == Long.MAX_VALUE ? Long.MAX_VALUE : currentTime - 1); 21 | } 22 | 23 | @Override 24 | public long extractTimestamp(Metrics metrics, long l) { 25 | long time = metrics.getTimestamp() / (1000 * 1000); 26 | this.currentTime = time; 27 | return currentTime; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.cloud.dataflow.stream.app.label=redisq 2 | endpoints.shutdown.enabled=true 3 | spring.redis.host=192.168.254.252 4 | spring.cloud.stream.metrics.properties=spring.application.name,spring.application.index,spring.cloud.application.*,spring.cloud.dataflow.* 5 | spring.cloud.dataflow.stream.name=stream2redisq 6 | spring.metrics.export.triggers.application.includes=integration** 7 | spring.cloud.stream.metrics.key=stream2redisq.redisq.${spring.cloud.application.guid} 8 | spring.cloud.stream.bindings.input.group=stream2redisq 9 | server.port=29728 10 | spring.cloud.stream.kafka.binder.zkNodes=tools.wjm.com:2181 11 | spring.cloud.dataflow.stream.app.type=sink 12 | redis.queue=all 13 | spring.cloud.stream.bindings.input.destination=stream2redisq.rand-map 14 | spring.cloud.stream.kafka.binder.brokers=tools.wjm.com:9092 15 | spring.jmx.default-domain=stream2redisq.redisq 16 | spring.cloud.application.group=stream2redisq -------------------------------------------------------------------------------- /flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/process/ProcessTokenizer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinksideoutput.process; 2 | 3 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.functions.ProcessFunction; 6 | import org.apache.flink.util.Collector; 7 | 8 | /** 9 | * @Description 10 | * @Author hasee 11 | * @Date 2019/1/11 12 | **/ 13 | public class ProcessTokenizer extends ProcessFunction> { 14 | @Override 15 | public void processElement(String value, Context context, Collector> collector) throws Exception { 16 | String[] tokens = value.toLowerCase().split("\\w+"); 17 | for (String token : tokens) { 18 | if (token.length() > 5) { 19 | context.output(SideOutputTag.wordTag, token); 20 | } else { 21 | collector.collect(new Tuple2<>(token, 1)); 22 | } 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | server.port=-1 2 | spring.cloud.dataflow.stream.app.type=source 3 | 4 | #spring.cloud.stream.kafka.binder.brokers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667 5 | #spring.cloud.stream.kafka.binder.zk-nodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181 6 | #spring.cloud.stream.kafka.binder.fetchSize=1024000 7 | #spring.cloud.stream.kafka.binder.minPartitionCount=1 8 | #spring.cloud.stream.kafka.binder.autoCreateTopics=true 9 | #spring.cloud.stream.kafka.binder.autoAddPartitions=false 10 | 11 | spring.cloud.stream.bindings.output.destination=kafka2kafka.topic 12 | kafka.topic=kafka.from 13 | kafka.servers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667 14 | kafka.zkNodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181 15 | kafka.groupId=kafka.from.group 16 | kafka.batchSize=102400 17 | kafka.keyDeserializer=org.apache.kafka.common.serialization.StringDeserializer 18 | kafka.valueDeserializer=org.apache.kafka.common.serialization.StringDeserializer -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/User.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.model; 2 | 3 | public class User { 4 | private String name; 5 | private String address; 6 | private String gender; 7 | private int age; 8 | 9 | public User() {} 10 | 11 | public User(String name, String address, String gender, int age) { 12 | this.name = name; 13 | this.address = address; 14 | this.gender = gender; 15 | this.age = age; 16 | } 17 | 18 | public String getName() { 19 | return name; 20 | } 21 | 22 | public void setName(String name) { 23 | this.name = name; 24 | } 25 | 26 | public String getAddress() { 27 | return address; 28 | } 29 | 30 | public void setAddress(String address) { 31 | this.address = address; 32 | } 33 | 34 | public String getGender() { 35 | return gender; 36 | } 37 | 38 | public void setGender(String gender) { 39 | this.gender = gender; 40 | } 41 | 42 | public int getAge() { 43 | return age; 44 | } 45 | 46 | public void setAge(int age) { 47 | this.age = age; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/process/KeyedTokenizer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinksideoutput.process; 2 | 3 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag; 4 | import org.apache.flink.api.java.tuple.Tuple2; 5 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 6 | import org.apache.flink.util.Collector; 7 | 8 | /** 9 | * 以用户自定义FlatMapFunction函数的形式来实现分词器功能,该分词器会将分词封装为(word,1), 10 | * 同时不接受单词长度大于5的,也即是侧输出都是单词长度大于5的单词。 11 | **/ 12 | public class KeyedTokenizer extends KeyedProcessFunction> { 13 | @Override 14 | public void processElement(String value, Context context, Collector> collector) throws Exception { 15 | String[] tokens = value.toLowerCase().split("\\W+"); 16 | for (String token : tokens) { 17 | if (token.length() > 5) { 18 | context.output(SideOutputTag.wordTag, token); 19 | } else { 20 | collector.collect(new Tuple2<>(token, 1)); 21 | } 22 | } 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/Item.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.model; 2 | 3 | public class Item { 4 | private String itemName; 5 | private String address; 6 | private String type; 7 | private double price; 8 | 9 | public Item() {} 10 | 11 | public Item(String itemName, String address, String type, double price) { 12 | this.itemName = itemName; 13 | this.address = address; 14 | this.type = type; 15 | this.price = price; 16 | } 17 | 18 | public String getItemName() { 19 | return itemName; 20 | } 21 | 22 | public void setItemName(String itemName) { 23 | this.itemName = itemName; 24 | } 25 | 26 | public String getAddress() { 27 | return address; 28 | } 29 | 30 | public void setAddress(String address) { 31 | this.address = address; 32 | } 33 | 34 | public String getType() { 35 | return type; 36 | } 37 | 38 | public void setType(String type) { 39 | this.type = type; 40 | } 41 | 42 | public double getPrice() { 43 | return price; 44 | } 45 | 46 | public void setPrice(double price) { 47 | this.price = price; 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /flink-common/src/main/java/model/Metrics.java: -------------------------------------------------------------------------------- 1 | package model; 2 | 3 | import java.util.Map; 4 | 5 | /** 6 | * @Description 7 | * @Author hasee 8 | * @Date 2019/1/4 9 | **/ 10 | public class Metrics { 11 | private String name; 12 | 13 | private Long timestamp; 14 | 15 | private Map fields; 16 | 17 | private Map tags; 18 | 19 | public String getName() { 20 | return name; 21 | } 22 | 23 | public void setName(String name) { 24 | this.name = name; 25 | } 26 | 27 | public Long getTimestamp() { 28 | return timestamp; 29 | } 30 | 31 | public void setTimestamp(Long timestamp) { 32 | this.timestamp = timestamp; 33 | } 34 | 35 | public Map getFields() { 36 | return fields; 37 | } 38 | 39 | public void setFields(Map fields) { 40 | this.fields = fields; 41 | } 42 | 43 | public Map getTags() { 44 | return tags; 45 | } 46 | 47 | public void setTags(Map tags) { 48 | this.tags = tags; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /flink-common/src/main/java/constant/PropertiesConstants.java: -------------------------------------------------------------------------------- 1 | package constant; 2 | 3 | /** 4 | * 基本配置属性信息 5 | **/ 6 | public class PropertiesConstants { 7 | //kafka 8 | public static final String KAFKA_BROKERS = "kafka.brokers"; 9 | public static final String KAFKA_ZOOKEEPER_CONNECT = "kafka.zookeeper.connect"; 10 | public static final String KAFKA_GROUP_ID = "kafka.group.id"; 11 | public static final String METRICS_TOPIC = "metrics.topic"; 12 | public static final String CONSUMER_FROM_TIME = "consumer.from.time"; 13 | 14 | public static final String STREAM_PARALLELISM = "stream.parallelism"; 15 | public static final String STREAM_SINK_PARALLELISM = "stream.sink.parallelism"; 16 | public static final String STREAM_CHECKPOINT_ENABLE = "stream.checkpoint.enable"; 17 | public static final String STREAM_CHECKPOINT_INTERVAL = "stream.checkpoint.interval"; 18 | 19 | public static final String PROPERTIES_FILE_NAME = "/application.properties"; 20 | 21 | //es config 22 | public static final String ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS = "elasticsearch.bulk.flush.max.actions"; 23 | public static final String ELASTICSEARCH_HOSTS = "elasticsearch.hosts"; 24 | } 25 | -------------------------------------------------------------------------------- /flink-elasticsearch-sink/src/main/java/com/bigdata/study/flinkelasticsearchsink/handler/FlinkFailHandler.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkelasticsearchsink.handler; 2 | 3 | import org.apache.flink.streaming.connectors.elasticsearch.ActionRequestFailureHandler; 4 | import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer; 5 | import org.elasticsearch.ElasticsearchParseException; 6 | import org.elasticsearch.action.ActionRequest; 7 | import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException; 8 | 9 | /** 10 | * 自定义es写入失败处理 11 | **/ 12 | public class FlinkFailHandler implements ActionRequestFailureHandler { 13 | 14 | @Override 15 | public void onFailure(ActionRequest actionRequest, Throwable throwable, int requestStatusCode, RequestIndexer requestIndexer) throws Throwable { 16 | if (throwable instanceof EsRejectedExecutionException) { 17 | //将失败请求继续加入队列,后续进行重试写入 18 | requestIndexer.add(actionRequest); 19 | } else if (throwable instanceof ElasticsearchParseException) { 20 | //自定义异常处理 21 | throwable.printStackTrace(); 22 | } else { 23 | throw throwable; 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/Order.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.model; 2 | 3 | public class Order { 4 | 5 | private String userName; 6 | private String itemName; 7 | private long transactionDate; 8 | private int quantity; 9 | 10 | public Order() {} 11 | 12 | public Order(String userName, String itemName, long transactionDate, int quantity) { 13 | this.userName = userName; 14 | this.itemName = itemName; 15 | this.transactionDate = transactionDate; 16 | this.quantity = quantity; 17 | } 18 | 19 | public String getUserName() { 20 | return userName; 21 | } 22 | 23 | public void setUserName(String userName) { 24 | this.userName = userName; 25 | } 26 | 27 | public String getItemName() { 28 | return itemName; 29 | } 30 | 31 | public void setItemName(String itemName) { 32 | this.itemName = itemName; 33 | } 34 | 35 | public long getTransactionDate() { 36 | return transactionDate; 37 | } 38 | 39 | public void setTransactionDate(long transactionDate) { 40 | this.transactionDate = transactionDate; 41 | } 42 | 43 | public int getQuantity() { 44 | return quantity; 45 | } 46 | 47 | public void setQuantity(int quantity) { 48 | this.quantity = quantity; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /flink-common/src/main/java/schemas/MetricSchema.java: -------------------------------------------------------------------------------- 1 | package schemas; 2 | 3 | import com.google.gson.Gson; 4 | import model.Metrics; 5 | import org.apache.flink.api.common.serialization.DeserializationSchema; 6 | import org.apache.flink.api.common.serialization.SerializationSchema; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | 9 | import java.io.IOException; 10 | import java.nio.charset.Charset; 11 | 12 | /** 13 | * @Description 14 | * @Author hasee 15 | * @Date 2019/1/4 16 | **/ 17 | public class MetricSchema implements DeserializationSchema, SerializationSchema { 18 | private static Gson gson = new Gson(); 19 | 20 | @Override 21 | public Metrics deserialize(byte[] bytes) throws IOException { 22 | return gson.fromJson(new String(bytes), Metrics.class); 23 | } 24 | 25 | @Override 26 | public boolean isEndOfStream(Metrics metrics) { 27 | return false; 28 | } 29 | 30 | @Override 31 | public byte[] serialize(Metrics metrics) { 32 | return gson.toJson(metrics).getBytes(Charset.forName("utf-8")); 33 | } 34 | 35 | @Override 36 | public TypeInformation getProducedType() { 37 | return TypeInformation.of(Metrics.class); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/utils/HashPartitioner.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.utils; 2 | 3 | import org.apache.kafka.clients.producer.Partitioner; 4 | import org.apache.kafka.common.Cluster; 5 | import org.apache.kafka.common.PartitionInfo; 6 | 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | /** 11 | * 自定义kafka分区规则 12 | **/ 13 | public class HashPartitioner implements Partitioner { 14 | 15 | @Override 16 | public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) { 17 | List partitionInfos = cluster.partitionsForTopic(topic); 18 | int numPartitions = partitionInfos.size(); 19 | if (keyBytes.length > 1) { 20 | int hashCode; 21 | if (key instanceof Integer || key instanceof Long) { 22 | hashCode = (int) key; 23 | } else { 24 | hashCode = key.hashCode(); 25 | } 26 | hashCode = hashCode & 0x7fffffff; 27 | return hashCode % numPartitions; 28 | } else { 29 | return 0; 30 | } 31 | } 32 | 33 | @Override 34 | public void close() { 35 | 36 | } 37 | 38 | @Override 39 | public void configure(Map map) { 40 | 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /fork-join/src/main/java/forkjoin/MyForkJoinTask.java: -------------------------------------------------------------------------------- 1 | package forkjoin; 2 | 3 | import java.util.concurrent.RecursiveTask; 4 | 5 | /** 6 | * fork join 处理任务 7 | **/ 8 | public class MyForkJoinTask extends RecursiveTask { 9 | /** 10 | * 任务拆分临界值 11 | */ 12 | private static final long CRTTICAL_VALUE = 100_00; 13 | /** 14 | * 任务开启值 15 | */ 16 | private Long startNum; 17 | 18 | /** 19 | * 任务结束值 20 | */ 21 | private Long endNum; 22 | 23 | public MyForkJoinTask(Long startNum, Long endNum) { 24 | this.startNum = startNum; 25 | this.endNum = endNum; 26 | } 27 | 28 | @Override 29 | protected Long compute() { 30 | long length = endNum - startNum; 31 | if (length <= CRTTICAL_VALUE) { 32 | long num = 0; 33 | for (int i = 0; i < endNum; i++) { 34 | num += i; 35 | } 36 | return num; 37 | } else { 38 | long middleValue = (startNum + endNum) / 2; 39 | MyForkJoinTask leftTask = new MyForkJoinTask(startNum, middleValue); 40 | leftTask.fork(); 41 | MyForkJoinTask rightTask = new MyForkJoinTask(middleValue + 1, endNum); 42 | rightTask.fork(); 43 | return leftTask.join() + rightTask.join(); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/utils/PhoenixUtil.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.sparkphoenix.utils; 2 | 3 | import org.apache.commons.collections.CollectionUtils; 4 | 5 | import java.sql.Connection; 6 | import java.sql.DriverManager; 7 | import java.sql.SQLException; 8 | import java.util.LinkedList; 9 | 10 | /** 11 | * @Description 12 | * @Author hasee 13 | * @Date 2019/1/3 14 | **/ 15 | public class PhoenixUtil { 16 | 17 | private static LinkedList connectionQueue; 18 | 19 | static { 20 | try { 21 | Class.forName("org.apache.phoenix.jdbc.PhoenixDriver"); 22 | } catch (ClassNotFoundException e) { 23 | e.printStackTrace(); 24 | } 25 | } 26 | 27 | public synchronized static Connection getConnection() { 28 | try { 29 | if (CollectionUtils.isEmpty(connectionQueue)) { 30 | connectionQueue = new LinkedList<>(); 31 | for (int i = 0; i < 3; i++) { 32 | Connection connection = DriverManager.getConnection("jdbc:phoenix:zk:2181"); 33 | connectionQueue.add(connection); 34 | } 35 | } 36 | } catch (SQLException e) { 37 | e.printStackTrace(); 38 | } 39 | return connectionQueue.poll(); 40 | } 41 | 42 | public static void returnConnection(Connection connection) { 43 | connectionQueue.push(connection); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.cloud.dataflow.stream.app.label=redisq 2 | endpoints.shutdown.enabled=true 3 | 4 | spring.redis.host=192.168.254.252 5 | redis.queue=all 6 | 7 | spring.cloud.stream.metrics.properties=spring.application.name,spring.application.index,spring.cloud.application.*,spring.cloud.dataflow.* 8 | spring.cloud.dataflow.stream.name=stream2redisq 9 | spring.metrics.export.triggers.application.includes=integration** 10 | spring.cloud.stream.metrics.key=stream2redisq.redisq.${spring.cloud.application.guid} 11 | 12 | server.port=29728 13 | #spring.cloud.stream.kafka.binder.zkNodes=tools.wjm.com:2181 14 | spring.cloud.dataflow.stream.app.type=processor 15 | 16 | spring.cloud.stream.bindings.input.destination=stream2redisq.rand-map 17 | spring.cloud.stream.bindings.input.group=stream2redisq 18 | 19 | spring.jmx.default-domain=stream2redisq.redisq 20 | spring.cloud.application.group=stream2redisq 21 | 22 | spring.cloud.stream.kafka.binder.brokers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667 23 | spring.cloud.stream.kafka.binder.zk-nodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181 24 | spring.cloud.stream.kafka.binder.fetchSize=1024000 25 | spring.cloud.stream.kafka.binder.minPartitionCount=1 26 | spring.cloud.stream.kafka.binder.autoCreateTopics=true 27 | spring.cloud.stream.kafka.binder.autoAddPartitions=false 28 | 29 | spring.cloud.stream.bindings.output.destination=stream2redisq.rand-map -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | dataflow-stream-redis-set-processor 12 | 1.0 13 | dataflow-stream-redis-set-processor 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter-data-redis 20 | 21 | 22 | org.springframework.cloud 23 | spring-cloud-starter-stream-kafka 24 | 25 | 26 | 27 | 28 | 29 | 30 | org.springframework.boot 31 | spring-boot-maven-plugin 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | 12 | dataflow-stream-redis-pub-sink 13 | 0.0.1-SNAPSHOT 14 | dataflow-stream-redis-pub-sink 15 | Demo project for Spring Boot 16 | 17 | 18 | 19 | org.springframework.boot 20 | spring-boot-starter-data-redis 21 | 22 | 23 | org.springframework.cloud 24 | spring-cloud-starter-stream-kafka 25 | 26 | 27 | 28 | 29 | 30 | 31 | org.springframework.boot 32 | spring-boot-maven-plugin 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/timeextractor/OrderTimestampExtractor.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.timeextractor; 2 | 3 | import com.bigdata.study.kafkastream.model.Item; 4 | import com.bigdata.study.kafkastream.model.Order; 5 | import com.bigdata.study.kafkastream.model.User; 6 | import com.fasterxml.jackson.databind.JsonNode; 7 | import org.apache.kafka.clients.consumer.ConsumerRecord; 8 | import org.apache.kafka.streams.processor.TimestampExtractor; 9 | 10 | import java.time.LocalDateTime; 11 | import java.time.ZoneOffset; 12 | 13 | /** 14 | * 自定义从topic中抽取时间 15 | **/ 16 | public class OrderTimestampExtractor implements TimestampExtractor { 17 | @Override 18 | public long extract(ConsumerRecord consumerRecord) { 19 | Object value = consumerRecord.value(); 20 | if (value instanceof Order) { 21 | return ((Order) value).getTransactionDate(); 22 | } else if (value instanceof JsonNode) { 23 | return ((JsonNode) value).get("transactionDate").longValue(); 24 | } else if (value instanceof User) { 25 | return LocalDateTime.of(2015, 12, 11, 1, 0, 10) 26 | .toEpochSecond(ZoneOffset.UTC) * 1000; 27 | } else if (value instanceof Item) { 28 | return LocalDateTime.of(2015, 12, 11, 0, 0, 10).toEpochSecond(ZoneOffset.UTC) * 1000; 29 | } else { 30 | return LocalDateTime.of(2015, 11, 10, 0, 0, 10).toEpochSecond(ZoneOffset.UTC) * 1000; 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /elasticsearch/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | elasticsearch 12 | 0.0.1-SNAPSHOT 13 | elasticsearch 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | org.elasticsearch.client 23 | transport 24 | 6.2.4 25 | 26 | 27 | org.springframework.boot 28 | spring-boot-starter-test 29 | test 30 | 31 | 32 | 33 | 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-maven-plugin 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /flink-async-io/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-async-io 12 | 0.0.1-SNAPSHOT 13 | flink-async-io 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.springframework.boot 28 | spring-boot-starter-test 29 | test 30 | 31 | 32 | 33 | 34 | 35 | 36 | org.springframework.boot 37 | spring-boot-maven-plugin 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /flink-sideoutput/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-sideoutput 12 | 0.0.1-SNAPSHOT 13 | flink-sideoutput 14 | Demo project for Spring Boot 15 | 16 | 17 | 1.8 18 | 19 | 20 | 21 | 22 | org.springframework.boot 23 | spring-boot-starter 24 | 25 | 26 | com.bigdata.study 27 | flink-common 28 | 1.0 29 | 30 | 31 | org.springframework.boot 32 | spring-boot-starter-test 33 | test 34 | 35 | 36 | 37 | 38 | 39 | 40 | org.springframework.boot 41 | spring-boot-maven-plugin 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/source/SimpleSource.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkasyncio.source; 2 | 3 | import org.apache.flink.streaming.api.checkpoint.ListCheckpointed; 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction; 5 | import org.springframework.util.CollectionUtils; 6 | 7 | import java.util.Collections; 8 | import java.util.List; 9 | 10 | /** 11 | * @Description 12 | * @Author hasee 13 | * @Date 2019/1/15 14 | **/ 15 | public class SimpleSource implements SourceFunction, ListCheckpointed { 16 | 17 | private volatile boolean isRunning = true; 18 | 19 | private int counter = 0; 20 | 21 | private int start = 0; 22 | 23 | public SimpleSource(int counter) { 24 | this.counter = counter; 25 | } 26 | 27 | @Override 28 | public List snapshotState(long l, long l1) throws Exception { 29 | return Collections.singletonList(start); 30 | } 31 | 32 | @Override 33 | public void restoreState(List list) throws Exception { 34 | for (Integer state : list) { 35 | this.start = state; 36 | } 37 | } 38 | 39 | @Override 40 | public void run(SourceContext sourceContext) throws Exception { 41 | while ((start < counter || counter == -1) && isRunning) { 42 | synchronized (sourceContext.getCheckpointLock()) { 43 | sourceContext.collect(start); 44 | ++start; 45 | if (start == Integer.MAX_VALUE) { 46 | start = 0; 47 | } 48 | } 49 | Thread.sleep(10L); 50 | } 51 | } 52 | 53 | @Override 54 | public void cancel() { 55 | isRunning = false; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/model/Metric.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.model; 2 | 3 | import java.util.Map; 4 | 5 | /** 6 | * @Description 7 | * @Author hasee 8 | * @Date 2018/12/5 9 | **/ 10 | public class Metric { 11 | 12 | public String name; 13 | public long timestamp; 14 | public Map fields; 15 | public Map tags; 16 | 17 | public Metric() { 18 | } 19 | 20 | public Metric(String name, long timestamp, Map fields, Map tags) { 21 | this.name = name; 22 | this.timestamp = timestamp; 23 | this.fields = fields; 24 | this.tags = tags; 25 | } 26 | 27 | public String getName() { 28 | return name; 29 | } 30 | 31 | public void setName(String name) { 32 | this.name = name; 33 | } 34 | 35 | public long getTimestamp() { 36 | return timestamp; 37 | } 38 | 39 | public void setTimestamp(long timestamp) { 40 | this.timestamp = timestamp; 41 | } 42 | 43 | public Map getFields() { 44 | return fields; 45 | } 46 | 47 | public void setFields(Map fields) { 48 | this.fields = fields; 49 | } 50 | 51 | public Map getTags() { 52 | return tags; 53 | } 54 | 55 | public void setTags(Map tags) { 56 | this.tags = tags; 57 | } 58 | 59 | @Override 60 | public String toString() { 61 | return "Metric{" + 62 | "name='" + name + '\'' + 63 | ", timestamp=" + timestamp + 64 | ", fields=" + fields + 65 | ", tags=" + tags + 66 | '}'; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/GenericDeserializer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.serdes; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import com.sun.xml.internal.ws.encoding.soap.DeserializationException; 5 | import org.apache.kafka.common.serialization.Deserializer; 6 | 7 | import java.io.IOException; 8 | import java.util.Map; 9 | 10 | /** 11 | * @Description 12 | * @Author hasee 13 | * @Date 2019/1/8 14 | **/ 15 | public class GenericDeserializer implements Deserializer { 16 | private Class type; 17 | private static ObjectMapper mapper = new ObjectMapper(); 18 | 19 | public GenericDeserializer() { 20 | } 21 | 22 | public GenericDeserializer(Class type) { 23 | this.type = type; 24 | } 25 | 26 | @Override 27 | public void configure(Map map, boolean b) { 28 | if (type != null) { 29 | return; 30 | } 31 | String typeProp = b ? "key.deserializer.type" : "value.deserializer.type"; 32 | String typeName = String.valueOf(map.get(typeProp)); 33 | try { 34 | type = (Class) Class.forName(typeName); 35 | } catch (ClassNotFoundException e) { 36 | throw new DeserializationException("failed to initialize GenericDeserializer for " + typeName, e); 37 | } 38 | } 39 | 40 | @Override 41 | public T deserialize(String s, byte[] bytes) { 42 | if (bytes.length < 1) { 43 | return null; 44 | } 45 | try { 46 | return mapper.readValue(bytes, type); 47 | } catch (IOException e) { 48 | throw new DeserializationException(e); 49 | } 50 | } 51 | 52 | @Override 53 | public void close() { 54 | 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/GenericSerializer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.serdes; 2 | 3 | 4 | import com.fasterxml.jackson.core.JsonProcessingException; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.kafka.common.errors.SerializationException; 7 | import org.apache.kafka.common.serialization.Serializer; 8 | 9 | import java.util.Map; 10 | 11 | /** 12 | * @Description 13 | * @Author hasee 14 | * @Date 2019/1/8 15 | **/ 16 | public class GenericSerializer implements Serializer { 17 | private Class tClass; 18 | private static ObjectMapper mapper = new ObjectMapper(); 19 | 20 | public GenericSerializer() { 21 | } 22 | 23 | public GenericSerializer(Class tClass) { 24 | this.tClass = tClass; 25 | } 26 | 27 | @Override 28 | public void configure(Map map, boolean b) { 29 | if (tClass != null) { 30 | return; 31 | } 32 | String type = b ? "key.serializer.type" : "value.serializer.type"; 33 | String typeName = String.valueOf(map.get(type)); 34 | try { 35 | tClass = (Class) Class.forName(typeName); 36 | } catch (ClassNotFoundException e) { 37 | throw new SerializationException("failed to initialize GenericSerializer:+" + typeName, e); 38 | } 39 | } 40 | 41 | @Override 42 | public byte[] serialize(String s, T t) { 43 | if (t == null) { 44 | return new byte[0]; 45 | } 46 | try { 47 | return mapper.writerFor(tClass).writeValueAsBytes(t); 48 | } catch (JsonProcessingException e) { 49 | throw new SerializationException(e); 50 | } 51 | } 52 | 53 | @Override 54 | public void close() { 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /flink-kafka11-sink/src/main/java/com/bigdata/study/flinkkafkasink/FlinkKafkaSinkApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkasink; 2 | 3 | import constant.PropertiesConstants; 4 | import model.Metrics; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011; 9 | import org.springframework.boot.autoconfigure.SpringBootApplication; 10 | import schemas.MetricSchema; 11 | import utils.ExecutionEnvUtil; 12 | import utils.KafkaUtils; 13 | 14 | /** 15 | * flink从kafka中读取数据,并写入kafka中 16 | */ 17 | @SpringBootApplication 18 | public class FlinkKafkaSinkApplication { 19 | 20 | public static void main(String[] args) { 21 | // SpringApplication.run(FlinkKafkaSinkApplication.class, args); 22 | try { 23 | ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args); 24 | StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterPool); 25 | DataStreamSource dataStreamSource = KafkaUtils.buildSource(env); 26 | String brokers = parameterPool.get(PropertiesConstants.KAFKA_BROKERS); 27 | FlinkKafkaProducer011 kafkaProducer011 = new FlinkKafkaProducer011<>(brokers, 28 | parameterPool.get(PropertiesConstants.METRICS_TOPIC), new MetricSchema()); 29 | kafkaProducer011.setWriteTimestampToKafka(true); 30 | kafkaProducer011.setLogFailuresOnly(false); 31 | dataStreamSource.addSink(kafkaProducer011); 32 | env.execute("flink kafka sink"); 33 | } catch (Exception e) { 34 | e.printStackTrace(); 35 | } 36 | } 37 | 38 | } 39 | 40 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/source/JdbcSource.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase.source; 2 | 3 | import org.apache.commons.dbutils.DbUtils; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction; 6 | import org.slf4j.Logger; 7 | import org.slf4j.LoggerFactory; 8 | 9 | import java.sql.*; 10 | 11 | /** 12 | * 自定义从mysql读取数据的source 13 | **/ 14 | public class JdbcSource extends RichSourceFunction { 15 | 16 | private static final Logger logger = LoggerFactory.getLogger(JdbcSource.class); 17 | 18 | private Connection connection; 19 | 20 | private PreparedStatement ps; 21 | 22 | @Override 23 | public void open(Configuration parameters) throws Exception { 24 | super.open(parameters); 25 | Class.forName("com.mysql.jdbc.Driver"); 26 | connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test?characterEncoding=utf8&useSSL=true", "root", "root"); 27 | String sql = "select name from user"; 28 | ps = connection.prepareStatement(sql); 29 | } 30 | 31 | @Override 32 | public void close() throws Exception { 33 | super.close(); 34 | if (connection != null) { 35 | DbUtils.closeQuietly(connection); 36 | } 37 | if (ps != null) { 38 | DbUtils.close(ps); 39 | } 40 | } 41 | 42 | @Override 43 | public void run(SourceContext sourceContext) throws Exception { 44 | try { 45 | ResultSet resultSet = ps.executeQuery(); 46 | while (resultSet.next()) { 47 | String name = resultSet.getString("name"); 48 | sourceContext.collect(name); 49 | } 50 | } catch (SQLException e) { 51 | logger.error("读取mysql数据出错:{}", e.getMessage()); 52 | } 53 | } 54 | 55 | @Override 56 | public void cancel() { 57 | 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/core/Jdbc2Hbase.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase.core; 2 | 3 | import com.bigdata.study.flinkjdbchbase.sink.HbaseSink; 4 | import com.bigdata.study.flinkjdbchbase.source.JdbcSource; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.datastream.DataStream; 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | 14 | /** 15 | * @Description 16 | * @Author hasee 17 | * @Date 2019/1/7 18 | **/ 19 | public class Jdbc2Hbase { 20 | private static final Logger logger = LoggerFactory.getLogger(Jdbc2Hbase.class); 21 | 22 | public static void main(String[] args) { 23 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 24 | env.setParallelism(1); 25 | env.enableCheckpointing(1000); 26 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 27 | DataStreamSource dataStreamSource = env.addSource(new JdbcSource()); 28 | String hbase_zk = "namenode1.xxx.com"; 29 | String hbase_port = "2181"; 30 | String hbase_table = "ns:table1"; 31 | String hbase_family = "cf1"; 32 | DataStream process = dataStreamSource.map(new MapFunction() { 33 | @Override 34 | public String map(String s) throws Exception { 35 | logger.info("接收到消息:{}", s); 36 | return s; 37 | } 38 | }).process(new HbaseSink(hbase_zk, hbase_port, hbase_table, hbase_family)); 39 | try { 40 | env.execute("flink from mysql 2 hbase"); 41 | } catch (Exception e) { 42 | e.printStackTrace(); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /flink-kafka11-sink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-kafka-sink 12 | 0.0.1-SNAPSHOT 13 | flink-kafka-sink 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.springframework.cloud 28 | spring-cloud-starter-stream-kafka 29 | 30 | 31 | 32 | 33 | org.apache.flink 34 | flink-connector-kafka-0.11_${scala.binary.version} 35 | ${flink.version} 36 | 37 | 38 | org.springframework.boot 39 | spring-boot-starter-test 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | org.springframework.boot 48 | spring-boot-maven-plugin 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /flink-elasticsearch-sink/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-elasticsearch-sink 12 | 0.0.1-SNAPSHOT 13 | flink-elasticsearch-sink 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.springframework.cloud 28 | spring-cloud-starter-stream-kafka 29 | 30 | 31 | 32 | 33 | org.apache.flink 34 | flink-connector-elasticsearch6_${scala.binary.version} 35 | ${flink.version} 36 | 37 | 38 | org.springframework.boot 39 | spring-boot-starter-test 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | org.springframework.boot 48 | spring-boot-maven-plugin 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /flink-kafka-source/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-kafka-source 12 | 0.0.1-SNAPSHOT 13 | flink-kafka-source 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.springframework.cloud 28 | spring-cloud-starter-stream-kafka 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | org.springframework.boot 39 | spring-boot-starter-test 40 | test 41 | 42 | 43 | 44 | 45 | 46 | 47 | org.springframework.boot 48 | spring-boot-maven-plugin 49 | 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/config/RedisStreamPubConfig.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredispubsink.config; 2 | 3 | import com.bigdata.study.dataflowstreamredispubsink.prop.RedisPubProperties; 4 | import com.fasterxml.jackson.core.JsonProcessingException; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.springframework.beans.factory.InitializingBean; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 9 | import org.springframework.cloud.stream.annotation.EnableBinding; 10 | import org.springframework.cloud.stream.annotation.StreamListener; 11 | import org.springframework.cloud.stream.messaging.Sink; 12 | import org.springframework.context.annotation.Configuration; 13 | import org.springframework.data.redis.core.RedisTemplate; 14 | import org.springframework.data.redis.listener.ChannelTopic; 15 | import org.springframework.messaging.Message; 16 | import org.springframework.scheduling.annotation.EnableScheduling; 17 | 18 | /** 19 | * @Description 20 | * @Author hasee 21 | * @Date 2019/1/2 22 | **/ 23 | @Configuration 24 | @EnableConfigurationProperties(RedisPubProperties.class) 25 | @EnableBinding(Sink.class) 26 | public class RedisStreamPubConfig implements InitializingBean { 27 | 28 | @Autowired 29 | private RedisPubProperties redisPubProperties; 30 | 31 | @Autowired 32 | private RedisTemplate redisTemplate; 33 | 34 | private ChannelTopic topic; 35 | 36 | private static final ObjectMapper mapper = new ObjectMapper(); 37 | 38 | @StreamListener(value = Sink.INPUT) 39 | public void pubRedis(Message message) { 40 | try { 41 | redisTemplate.convertAndSend(topic.getTopic(), mapper.writeValueAsString(message.getPayload())); 42 | System.out.println("向redis中发送消息:" + mapper.writeValueAsString(message.getPayload())); 43 | } catch (JsonProcessingException e) { 44 | e.printStackTrace(); 45 | } 46 | } 47 | 48 | @Override 49 | public void afterPropertiesSet() throws Exception { 50 | topic = new ChannelTopic(redisPubProperties.getTopic()); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/config/RedisStreamProcessorConfig.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredissetprocessor.config; 2 | 3 | import com.bigdata.study.dataflowstreamredissetprocessor.prop.RedisSetProperties; 4 | import org.springframework.beans.factory.annotation.Autowired; 5 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 6 | import org.springframework.cloud.stream.annotation.EnableBinding; 7 | import org.springframework.cloud.stream.messaging.Processor; 8 | import org.springframework.cloud.stream.messaging.Sink; 9 | import org.springframework.context.annotation.Configuration; 10 | import org.springframework.data.redis.core.BoundSetOperations; 11 | import org.springframework.data.redis.core.RedisTemplate; 12 | import org.springframework.expression.Expression; 13 | import org.springframework.expression.spel.standard.SpelExpression; 14 | import org.springframework.expression.spel.standard.SpelExpressionParser; 15 | import org.springframework.integration.annotation.Filter; 16 | import org.springframework.messaging.Message; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * redis 流水任务 processor 22 | **/ 23 | @Configuration 24 | @EnableConfigurationProperties(RedisSetProperties.class) 25 | @EnableBinding(Processor.class) 26 | public class RedisStreamProcessorConfig { 27 | 28 | private static final SpelExpressionParser expressionParser = new SpelExpressionParser(); 29 | private static final String expressionString = "payload[\"index\"]"; 30 | 31 | @Autowired 32 | private RedisSetProperties redisSetProperties; 33 | 34 | @Autowired 35 | private RedisTemplate redisTemplate; 36 | 37 | @Filter(inputChannel = Processor.INPUT, outputChannel = Processor.OUTPUT) 38 | public boolean filter(Message message) { 39 | final Expression expression = expressionParser.parseExpression(expressionString); 40 | Map map = (Map) message.getPayload(); 41 | BoundSetOperations boundSetOperations = redisTemplate.boundSetOps(redisSetProperties.getSetName()); 42 | boolean member = boundSetOperations.isMember(expression.getValue(map, String.class)); 43 | return member; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-jdbc-hbase 12 | 0.0.1-SNAPSHOT 13 | flink-jdbc-hbase 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.apache.flink 28 | flink-hbase_${scala.binary.version} 29 | ${flink.version} 30 | 31 | 32 | org.apache.hbase 33 | hbase-client 34 | 1.4.3 35 | 36 | 37 | mysql 38 | mysql-connector-java 39 | 5.1.45 40 | 41 | 42 | org.springframework.boot 43 | spring-boot-starter-test 44 | test 45 | 46 | 47 | 48 | 49 | 50 | 51 | org.springframework.boot 52 | spring-boot-maven-plugin 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/prop/KafkaSourceProperties.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamkafkasource.prop; 2 | 3 | import org.springframework.boot.context.properties.ConfigurationProperties; 4 | 5 | /** 6 | * kafka 配置属性 7 | **/ 8 | @ConfigurationProperties("kafka") 9 | public class KafkaSourceProperties { 10 | private String topic = "test"; 11 | 12 | private String servers = "ebmas-02:6667,ebmas-01:6667,ebmas-03:6667"; 13 | 14 | private String groupId = "test-group"; 15 | 16 | private long batchSize = 1024; 17 | 18 | private String zkNodes="192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181"; 19 | 20 | private String keyDeserializer = "org.apache.kafka.common.serialization.StringDeserializer"; 21 | 22 | private String valueDeserializer = "org.apache.kafka.common.serialization.StringDeserializer"; 23 | 24 | public String getTopic() { 25 | return topic; 26 | } 27 | 28 | public void setTopic(String topic) { 29 | this.topic = topic; 30 | } 31 | 32 | public String getServers() { 33 | return servers; 34 | } 35 | 36 | public void setServers(String servers) { 37 | this.servers = servers; 38 | } 39 | 40 | public String getGroupId() { 41 | return groupId; 42 | } 43 | 44 | public String getZkNodes() { 45 | return zkNodes; 46 | } 47 | 48 | public void setZkNodes(String zkNodes) { 49 | this.zkNodes = zkNodes; 50 | } 51 | 52 | public void setGroupId(String groupId) { 53 | this.groupId = groupId; 54 | } 55 | 56 | public long getBatchSize() { 57 | return batchSize; 58 | } 59 | 60 | public void setBatchSize(long batchSize) { 61 | this.batchSize = batchSize; 62 | } 63 | 64 | public String getKeyDeserializer() { 65 | return keyDeserializer; 66 | } 67 | 68 | public void setKeyDeserializer(String keyDeserializer) { 69 | this.keyDeserializer = keyDeserializer; 70 | } 71 | 72 | public String getValueDeserializer() { 73 | return valueDeserializer; 74 | } 75 | 76 | public void setValueDeserializer(String valueDeserializer) { 77 | this.valueDeserializer = valueDeserializer; 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | dataflow-stream-kafka-source 12 | 0.0.1-SNAPSHOT 13 | dataflow-stream-kafka-source 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.cloud 19 | spring-cloud-starter-stream-kafka 20 | 21 | 22 | org.springframework.boot 23 | spring-boot-configuration-processor 24 | true 25 | 26 | 27 | org.apache.kafka 28 | kafka-streams 29 | 0.10.1.1 30 | 31 | 32 | org.slf4j 33 | slf4j-api 34 | 35 | 36 | log4j 37 | log4j 38 | 39 | 40 | org.slf4j 41 | slf4j-log4j12 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | org.springframework.boot 51 | spring-boot-maven-plugin 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/core/Kafka2Hbase.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.core; 2 | 3 | import com.bigdata.study.flinkkafkahbase.model.Metric; 4 | import com.bigdata.study.flinkkafkahbase.sink.FlinkHbaseSink; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 8 | import org.apache.flink.streaming.api.TimeCharacteristic; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 14 | import org.apache.hadoop.hbase.TableName; 15 | 16 | import java.util.Properties; 17 | 18 | /** 19 | * flink 处理数据从kafka到hbase 20 | **/ 21 | public class Kafka2Hbase { 22 | 23 | private static final ObjectMapper mapper = new ObjectMapper(); 24 | 25 | public static void main(String[] args) { 26 | final String ZOOKEEPER_HOST = "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181"; 27 | final String KAFKA_HOST = "192.168.20.48:9092"; 28 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | env.enableCheckpointing(1000); 30 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 31 | 32 | Properties prop = new Properties(); 33 | prop.setProperty("bootstrap.servers", KAFKA_HOST); 34 | prop.put("zookeeper.connect", ZOOKEEPER_HOST); 35 | prop.put("group.id", "kafka-hbase-group"); 36 | DataStreamSource dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>("kafka-hbase", new SimpleStringSchema(), prop)); 37 | DataStream metricDataStream = dataStreamSource.rebalance().filter(StringUtils::isNotBlank).map(m -> { 38 | Metric metric = mapper.readValue(m, Metric.class); 39 | return metric; 40 | }); 41 | metricDataStream.addSink(new FlinkHbaseSink()); 42 | env.setParallelism(2); 43 | try { 44 | env.execute("flink kafka hbase sink"); 45 | } catch (Exception e) { 46 | e.printStackTrace(); 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/async/AsyncDataBaseRequest.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkasyncio.async; 2 | 3 | import org.apache.flink.api.java.tuple.Tuple2; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.async.ResultFuture; 6 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction; 7 | import org.apache.flink.util.ExecutorUtils; 8 | 9 | import java.util.ArrayList; 10 | import java.util.Collections; 11 | import java.util.concurrent.ExecutorService; 12 | import java.util.concurrent.Executors; 13 | import java.util.concurrent.ThreadLocalRandom; 14 | import java.util.concurrent.TimeUnit; 15 | 16 | /** 17 | * 使用线程模仿async IO 操作 18 | **/ 19 | public class AsyncDataBaseRequest extends RichAsyncFunction { 20 | 21 | private static final long serialVersionUID = -1L; 22 | 23 | private transient ExecutorService executorService; 24 | 25 | private final long sleepFactor; 26 | 27 | private final float failRatio; 28 | 29 | private final long shutdownWaitTS; 30 | 31 | public AsyncDataBaseRequest(long sleepFactor, float failRatio, long shutdownWaitTS) { 32 | this.sleepFactor = sleepFactor; 33 | this.failRatio = failRatio; 34 | this.shutdownWaitTS = shutdownWaitTS; 35 | } 36 | 37 | @Override 38 | public void open(Configuration parameters) throws Exception { 39 | super.open(parameters); 40 | executorService = Executors.newFixedThreadPool(10); 41 | } 42 | 43 | @Override 44 | public void close() throws Exception { 45 | super.close(); 46 | ExecutorUtils.gracefulShutdown(shutdownWaitTS, TimeUnit.MICROSECONDS, executorService); 47 | } 48 | 49 | @Override 50 | public void asyncInvoke(Integer integer, ResultFuture resultFuture) throws Exception { 51 | executorService.submit(() -> { 52 | long sleep = (long) (ThreadLocalRandom.current().nextFloat() * sleepFactor); 53 | try { 54 | Thread.sleep(sleep); 55 | if (ThreadLocalRandom.current().nextFloat() < failRatio) { 56 | resultFuture.completeExceptionally(new Exception("数据太小了。。。")); 57 | } else { 58 | resultFuture.complete(Collections.singletonList("key-" + (integer % 10))); 59 | } 60 | } catch (InterruptedException e) { 61 | e.printStackTrace(); 62 | resultFuture.complete(new ArrayList<>(0)); 63 | } 64 | }); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /flink-hdfs/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-hdfs 12 | 0.0.1-SNAPSHOT 13 | flink-hdfs 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.apache.flink 28 | flink-connector-filesystem_${scala.binary.version} 29 | ${flink.version} 30 | 31 | 32 | org.apache.hadoop 33 | hadoop-common 34 | 2.7.1 35 | 36 | 37 | org.apache.flink 38 | flink-hadoop-compatibility_${scala.binary.version} 39 | ${flink.version} 40 | 41 | 42 | org.springframework.boot 43 | spring-boot-starter-test 44 | test 45 | 46 | 47 | org.apache.hadoop 48 | hadoop-mapreduce-client-core 49 | 3.1.0 50 | compile 51 | 52 | 53 | 54 | 55 | 56 | 57 | org.springframework.boot 58 | spring-boot-maven-plugin 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/utils/HadoopConfig.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkhdfs.utils; 2 | 3 | import org.springframework.boot.context.properties.ConfigurationProperties; 4 | import org.springframework.context.annotation.Configuration; 5 | 6 | /** 7 | * 项目名 data-acquisition-dataflow 8 | * Created by zhongdev. 9 | * Created at 2017/10/26 10 | * 描述:hadoop的配置信息 11 | */ 12 | @Configuration 13 | @ConfigurationProperties(prefix = "hadoop") 14 | public class HadoopConfig { 15 | 16 | private String fsDefaultFS; 17 | private String hadoopTmpDir; 18 | private Integer dfsReplication; 19 | private String dfsNamenodeNameDir; 20 | private String dfsDatanodeDataDir; 21 | private Boolean dfsPermissions; 22 | private Boolean dfsSupportAppend; 23 | private String dfsUser = "hdfs"; 24 | 25 | public String getFsDefaultFS() { 26 | return fsDefaultFS; 27 | } 28 | 29 | public void setFsDefaultFS(String fsDefaultFS) { 30 | this.fsDefaultFS = fsDefaultFS; 31 | } 32 | 33 | public String getHadoopTmpDir() { 34 | return hadoopTmpDir; 35 | } 36 | 37 | public void setHadoopTmpDir(String hadoopTmpDir) { 38 | this.hadoopTmpDir = hadoopTmpDir; 39 | } 40 | 41 | public Integer getDfsReplication() { 42 | return dfsReplication; 43 | } 44 | 45 | public void setDfsReplication(Integer dfsReplication) { 46 | this.dfsReplication = dfsReplication; 47 | } 48 | 49 | public String getDfsNamenodeNameDir() { 50 | return dfsNamenodeNameDir; 51 | } 52 | 53 | public void setDfsNamenodeNameDir(String dfsNamenodeNameDir) { 54 | this.dfsNamenodeNameDir = dfsNamenodeNameDir; 55 | } 56 | 57 | public String getDfsDatanodeDataDir() { 58 | return dfsDatanodeDataDir; 59 | } 60 | 61 | public void setDfsDatanodeDataDir(String dfsDatanodeDataDir) { 62 | this.dfsDatanodeDataDir = dfsDatanodeDataDir; 63 | } 64 | 65 | public Boolean getDfsPermissions() { 66 | return dfsPermissions; 67 | } 68 | 69 | public void setDfsPermissions(Boolean dfsPermissions) { 70 | this.dfsPermissions = dfsPermissions; 71 | } 72 | 73 | public Boolean getDfsSupportAppend() { 74 | return dfsSupportAppend; 75 | } 76 | 77 | public void setDfsSupportAppend(Boolean dfsSupportAppend) { 78 | this.dfsSupportAppend = dfsSupportAppend; 79 | } 80 | 81 | public String getDfsUser() { 82 | return dfsUser; 83 | } 84 | 85 | public void setDfsUser(String dfsUser) { 86 | this.dfsUser = dfsUser; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/core/FlinkFromTxt.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase.core; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.ReduceFunction; 6 | import org.apache.flink.api.java.tuple.Tuple2; 7 | import org.apache.flink.streaming.api.TimeCharacteristic; 8 | import org.apache.flink.streaming.api.datastream.DataStream; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.windowing.time.Time; 13 | import org.apache.flink.util.Collector; 14 | 15 | /** 16 | * @Description 17 | * @Author hasee 18 | * @Date 2019/1/7 19 | **/ 20 | public class FlinkFromTxt { 21 | 22 | public static void main(String[] args) { 23 | String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt"; 24 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 25 | env.enableCheckpointing(10000); 26 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 27 | env.setParallelism(1); 28 | DataStream dataStreamSource = env.readTextFile(file_input); 29 | DataStream> reduce = dataStreamSource.filter(StringUtils::isNotBlank) 30 | .flatMap(new FlatMapFunction>() { 31 | @Override 32 | public void flatMap(String s, Collector> collector) throws Exception { 33 | String[] words = s.toLowerCase().split("\\W+"); 34 | for (String word : words) { 35 | if (word.length() > 0) { 36 | Tuple2 tuple2 = new Tuple2<>(); 37 | tuple2.f0 = word; 38 | tuple2.f1 = 1; 39 | collector.collect(tuple2); 40 | } 41 | } 42 | } 43 | }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction>() { 44 | @Override 45 | public Tuple2 reduce(Tuple2 t1, Tuple2 t2) throws Exception { 46 | return new Tuple2<>(t1.f0, t1.f1 + t2.f1); 47 | } 48 | }); 49 | reduce.print(); 50 | try { 51 | env.execute("flink read txt"); 52 | } catch (Exception e) { 53 | e.printStackTrace(); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /flink-common/src/main/java/utils/ExecutionEnvUtil.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import constant.PropertiesConstants; 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.streaming.api.TimeCharacteristic; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | 9 | import java.io.IOException; 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | 13 | /** 14 | * 解析参数工具类 15 | **/ 16 | public class ExecutionEnvUtil { 17 | 18 | public static ParameterTool createParameterPool(final String[] args) throws IOException { 19 | return ParameterTool.fromPropertiesFile(ExecutionEnvUtil.class.getResourceAsStream(PropertiesConstants.PROPERTIES_FILE_NAME)) 20 | .mergeWith(ParameterTool.fromArgs(args)) 21 | .mergeWith(ParameterTool.fromSystemProperties()) 22 | .mergeWith(ParameterTool.fromMap(getEnv())); 23 | } 24 | 25 | public static ParameterTool PARAMETERTOOL = createParameterPool(); 26 | 27 | private static ParameterTool createParameterPool() { 28 | try { 29 | return ParameterTool.fromPropertiesFile(ExecutionEnvUtil.class.getResourceAsStream(PropertiesConstants.PROPERTIES_FILE_NAME)) 30 | .mergeWith(ParameterTool.fromSystemProperties()) 31 | .mergeWith(ParameterTool.fromMap(getEnv())); 32 | } catch (IOException e) { 33 | e.printStackTrace(); 34 | } 35 | return null; 36 | } 37 | 38 | public static StreamExecutionEnvironment prepare(ParameterTool parameterTool) { 39 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 40 | env.setParallelism(parameterTool.getInt(PropertiesConstants.STREAM_PARALLELISM, 5)); 41 | env.getConfig().disableSysoutLogging(); 42 | env.setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000)); 43 | if (parameterTool.getBoolean(PropertiesConstants.STREAM_CHECKPOINT_ENABLE, true)) { 44 | env.enableCheckpointing(parameterTool.getInt(PropertiesConstants.STREAM_CHECKPOINT_INTERVAL, 1000)); 45 | } 46 | env.getConfig().setGlobalJobParameters(parameterTool); 47 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 48 | return env; 49 | } 50 | 51 | private static Map getEnv() { 52 | Map envMap = new HashMap<>(); 53 | Map sysEnv = System.getenv(); 54 | for (Map.Entry entry : sysEnv.entrySet()) { 55 | envMap.put(entry.getKey(), entry.getValue()); 56 | } 57 | return envMap; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /spark-phoenix/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | spark-phoenix 12 | 0.0.1-SNAPSHOT 13 | spark-phoenix 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.apache.spark 19 | spark-core_2.11 20 | 2.3.0 21 | 22 | 23 | org.apache.spark 24 | spark-sql_2.11 25 | 2.3.0 26 | 27 | 28 | org.apache.phoenix 29 | phoenix-core 30 | 4.13.1-HBase-1.3 31 | 32 | 33 | org.apache.phoenix 34 | phoenix-spark 35 | 4.13.1-HBase-1.3 36 | 37 | 38 | joda-time 39 | joda-time 40 | 41 | 42 | org.apache.hbase 43 | hbase-protocol 44 | 1.3.1 45 | 46 | 47 | org.apache.hbase 48 | hbase-client 49 | 1.3.1 50 | 51 | 52 | org.slf4j 53 | slf4j-log4j12 54 | 55 | 56 | 57 | 58 | junit 59 | junit 60 | 61 | 62 | org.springframework.boot 63 | spring-boot-starter-test 64 | 65 | 66 | 67 | 68 | 69 | 70 | org.springframework.boot 71 | spring-boot-maven-plugin 72 | 73 | 74 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/sink/FlinkHbaseSink.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.sink; 2 | 3 | import com.bigdata.study.flinkkafkahbase.model.Metric; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; 6 | import org.apache.hadoop.hbase.HBaseConfiguration; 7 | import org.apache.hadoop.hbase.HColumnDescriptor; 8 | import org.apache.hadoop.hbase.HTableDescriptor; 9 | import org.apache.hadoop.hbase.TableName; 10 | import org.apache.hadoop.hbase.client.*; 11 | import org.apache.hadoop.hbase.util.Bytes; 12 | 13 | import java.util.Map; 14 | 15 | /** 16 | * Hbase sink 17 | **/ 18 | public class FlinkHbaseSink extends RichSinkFunction { 19 | 20 | private static final String hbaseZookeeperQuorum = "192.168.20.48"; 21 | private static final String hbaseZookeeperClinentPort = "2181"; 22 | private static TableName hbaseTableName = TableName.valueOf("test"); 23 | private static final String columnFamily = "cf"; 24 | 25 | private Connection connection; 26 | 27 | @Override 28 | public void open(Configuration parameters) throws Exception { 29 | super.open(parameters); 30 | org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create(); 31 | config.set("hbase.zookeeper.quorum", hbaseZookeeperQuorum); 32 | config.set("hbase.master", "10.45.151.26:60000"); 33 | config.set("hbase.zookeeper.property.clientPort", hbaseZookeeperClinentPort); 34 | config.setInt("hbase.rpc.timeout", 20000); 35 | config.setInt("hbase.client.operation.timeout", 30000); 36 | config.setInt("hbase.client.scanner.timeout.period", 200000); 37 | connection = ConnectionFactory.createConnection(config); 38 | } 39 | 40 | @Override 41 | public void close() throws Exception { 42 | super.close(); 43 | if (connection != null) { 44 | connection.close(); 45 | } 46 | } 47 | 48 | @Override 49 | public void invoke(Metric value, Context context) throws Exception { 50 | Admin admin = connection.getAdmin(); 51 | boolean tableExists = admin.tableExists(hbaseTableName); 52 | if (!tableExists) { 53 | admin.createTable(new HTableDescriptor(hbaseTableName).addFamily(new HColumnDescriptor(columnFamily))); 54 | } 55 | Table table = connection.getTable(hbaseTableName); 56 | long timeMillis = System.currentTimeMillis(); 57 | Put put = new Put(Bytes.toBytes(timeMillis)); 58 | Map fields = value.getFields(); 59 | for (Map.Entry entry : fields.entrySet()) { 60 | String field = entry.getKey(); 61 | put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(field), Bytes.toBytes((String) entry.getValue())); 62 | } 63 | table.put(put); 64 | table.close(); 65 | admin.close(); 66 | connection.close(); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /kafka-stream/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | kafka-stream 12 | 0.0.1-SNAPSHOT 13 | kafka-stream 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | org.apache.kafka 23 | kafka_2.11 24 | 0.10.1.0 25 | 26 | 27 | com.101tec 28 | zkclient 29 | 30 | 31 | 32 | 33 | org.apache.kafka 34 | kafka-clients 35 | 0.10.1.0 36 | 37 | 38 | org.apache.kafka 39 | kafka-streams 40 | 0.10.1.0 41 | 42 | 43 | com.101tec 44 | zkclient 45 | 46 | 47 | 48 | 49 | com.101tec 50 | zkclient 51 | 0.10 52 | 53 | 54 | commons-io 55 | commons-io 56 | 2.6 57 | 58 | 59 | org.apache.commons 60 | commons-lang3 61 | 3.7 62 | 63 | 64 | org.springframework.boot 65 | spring-boot-starter-test 66 | test 67 | 68 | 69 | 70 | 71 | 72 | 73 | org.springframework.boot 74 | spring-boot-maven-plugin 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/core/Hbase2Kafka.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.core; 2 | 3 | import com.bigdata.study.flinkkafkahbase.source.FlinkHbaseSource; 4 | import com.bigdata.study.flinkkafkahbase.watermarks.FlinkHbaseWaterMarks; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.commons.collections.CollectionUtils; 7 | import org.apache.flink.api.common.functions.FlatMapFunction; 8 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 9 | import org.apache.flink.streaming.api.TimeCharacteristic; 10 | import org.apache.flink.streaming.api.datastream.DataStream; 11 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011; 14 | import org.apache.flink.util.Collector; 15 | 16 | import java.util.Collections; 17 | import java.util.Map; 18 | import java.util.Properties; 19 | 20 | /** 21 | * 从Hbase读取数据到kafka 22 | **/ 23 | public class Hbase2Kafka { 24 | private static final ObjectMapper mapper = new ObjectMapper(); 25 | 26 | public static void main(String[] args) { 27 | final String ZOOKEEPER_HOST = "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181"; 28 | final String KAFKA_HOST = "192.168.20.48:9092"; 29 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 30 | env.enableCheckpointing(1000); 31 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 32 | DataStreamSource> dataStreamSource = env.addSource(new FlinkHbaseSource()); 33 | dataStreamSource.assignTimestampsAndWatermarks(new FlinkHbaseWaterMarks()); 34 | DataStream dataStream = dataStreamSource.filter(data -> CollectionUtils.isNotEmpty(Collections.singleton(data))).flatMap(new FlatMapFunction, String>() { 35 | @Override 36 | public void flatMap(Map stringStringMap, Collector collector) throws Exception { 37 | String value = mapper.writeValueAsString(stringStringMap); 38 | collector.collect(value); 39 | } 40 | }); 41 | Properties prop = new Properties(); 42 | prop.setProperty("bootstrap.servers", KAFKA_HOST); 43 | prop.put("zookeeper.connect", ZOOKEEPER_HOST); 44 | prop.put("key.serializer", "org.apache.kafka.common.serialization.StringDeserializer"); 45 | prop.put("value.serializer", "org.apache.kafka.common.serialization.StringDeserializer"); 46 | prop.put("auto.offset.reset", "latest"); 47 | FlinkKafkaProducer011 producer011 = new FlinkKafkaProducer011<>("hbase-kafka", new SimpleStringSchema(), prop); 48 | producer011.setWriteTimestampToKafka(true); 49 | dataStream.addSink(producer011); 50 | try { 51 | env.execute("flink hbase 2 kafka11"); 52 | } catch (Exception e) { 53 | e.printStackTrace(); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/sink/HbaseSink.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkjdbchbase.sink; 2 | 3 | import org.apache.flink.configuration.Configuration; 4 | import org.apache.flink.streaming.api.functions.ProcessFunction; 5 | import org.apache.flink.table.shaded.org.joda.time.Instant; 6 | import org.apache.flink.util.Collector; 7 | import org.apache.hadoop.hbase.*; 8 | import org.apache.hadoop.hbase.client.*; 9 | import org.apache.hadoop.hbase.security.User; 10 | import org.apache.hadoop.hbase.util.Bytes; 11 | import org.apache.hadoop.security.UserGroupInformation; 12 | 13 | /** 14 | * 自定义Hbase sink 15 | **/ 16 | public class HbaseSink extends ProcessFunction { 17 | 18 | private String zookeeper; 19 | private String zkPort; 20 | private String tableName; 21 | private String family; 22 | private Table table; 23 | 24 | public HbaseSink(String zookeeper, String zkPort, String tableName, String family) { 25 | this.zookeeper = zookeeper; 26 | this.zkPort = zkPort; 27 | this.tableName = tableName; 28 | this.family = family; 29 | } 30 | 31 | @Override 32 | public void open(Configuration parameters) throws Exception { 33 | super.open(parameters); 34 | org.apache.hadoop.conf.Configuration configuration = HBaseConfiguration.create(); 35 | configuration.set(HConstants.ZOOKEEPER_QUORUM, zookeeper); 36 | configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT, zkPort); 37 | configuration.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/hbase"); 38 | configuration.setInt(HConstants.HBASE_RPC_READ_TIMEOUT_KEY, 5000); 39 | // configuration.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY,5000); 40 | configuration.setInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, 5000); 41 | configuration.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 5000); 42 | configuration.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 5000); 43 | User user = User.create(UserGroupInformation.createRemoteUser("hbase")); 44 | Connection connection = ConnectionFactory.createConnection(configuration, user); 45 | Admin admin = connection.getAdmin(); 46 | if (!admin.tableExists(TableName.valueOf(tableName))) { 47 | admin.createTable(new HTableDescriptor(TableName.valueOf(tableName)).addFamily(new HColumnDescriptor(family))); 48 | } 49 | table = connection.getTable(TableName.valueOf(Bytes.toBytes(tableName))); 50 | } 51 | 52 | @Override 53 | public void close() throws Exception { 54 | super.close(); 55 | if (table != null) { 56 | table.close(); 57 | } 58 | } 59 | 60 | @Override 61 | public void processElement(String s, Context context, Collector collector) throws Exception { 62 | String rowKey = String.valueOf(Instant.now().getMillis()); 63 | Put put = new Put(Bytes.toBytes(rowKey)); 64 | put.setDurability(Durability.ASYNC_WAL); 65 | put.addColumn(Bytes.toBytes(family), Bytes.toBytes("name"), Bytes.toBytes(s)); 66 | table.put(put); 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/config/KafkaSourceConfig.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamkafkasource.config; 2 | 3 | import com.bigdata.study.dataflowstreamkafkasource.prop.KafkaSourceProperties; 4 | import com.bigdata.study.dataflowstreamkafkasource.utils.JsonMapper; 5 | import org.apache.kafka.streams.KafkaStreams; 6 | import org.apache.kafka.streams.StreamsConfig; 7 | import org.apache.kafka.streams.kstream.ForeachAction; 8 | import org.apache.kafka.streams.kstream.KStreamBuilder; 9 | import org.springframework.beans.factory.InitializingBean; 10 | import org.springframework.beans.factory.annotation.Autowired; 11 | import org.springframework.boot.context.properties.EnableConfigurationProperties; 12 | import org.springframework.cloud.stream.annotation.EnableBinding; 13 | import org.springframework.cloud.stream.messaging.Source; 14 | import org.springframework.context.annotation.Configuration; 15 | import org.springframework.messaging.Message; 16 | import org.springframework.messaging.support.MessageBuilder; 17 | 18 | import java.io.IOException; 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * kafka source 24 | **/ 25 | @Configuration 26 | @EnableConfigurationProperties(KafkaSourceProperties.class) 27 | @EnableBinding(Source.class) 28 | public class KafkaSourceConfig implements InitializingBean { 29 | 30 | private StreamsConfig streamsConfig; 31 | 32 | @Autowired 33 | private KafkaSourceProperties kafkaSourceProperties; 34 | 35 | @Autowired 36 | private Source source; 37 | 38 | // @InboundChannelAdapter(channel = Source.OUTPUT) 39 | public void sendMessage() { 40 | KStreamBuilder builder = new KStreamBuilder(); 41 | builder.stream(kafkaSourceProperties.getTopic()).foreach(new ForeachAction() { 42 | @Override 43 | public void apply(Object key, Object value) { 44 | Message message; 45 | try { 46 | Map map = JsonMapper.defaultMapper().fromJson(String.valueOf(value), Map.class); 47 | message = MessageBuilder.withPayload(map).build(); 48 | source.output().send(message); 49 | System.out.println("成功发送消息:" + message.getPayload()); 50 | } catch (IOException e) { 51 | e.printStackTrace(); 52 | } 53 | } 54 | }); 55 | KafkaStreams kafkaStreams = new KafkaStreams(builder, streamsConfig); 56 | kafkaStreams.start(); 57 | } 58 | 59 | 60 | @Override 61 | public void afterPropertiesSet() { 62 | Map prop = new HashMap<>(); 63 | prop.put(StreamsConfig.APPLICATION_ID_CONFIG, kafkaSourceProperties.getGroupId()); 64 | prop.put("bootstrap.servers", kafkaSourceProperties.getServers()); 65 | prop.put("zookeeper.connect", kafkaSourceProperties.getZkNodes()); 66 | prop.put("key.serde", kafkaSourceProperties.getKeyDeserializer()); 67 | prop.put("value.serde", kafkaSourceProperties.getValueDeserializer()); 68 | prop.put("batch.size", kafkaSourceProperties.getBatchSize()); 69 | streamsConfig = new StreamsConfig(prop); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/source/FlinkHbaseSource.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkahbase.source; 2 | 3 | import com.fasterxml.jackson.databind.ObjectMapper; 4 | import org.apache.flink.configuration.Configuration; 5 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction; 6 | import org.apache.hadoop.hbase.Cell; 7 | import org.apache.hadoop.hbase.HBaseConfiguration; 8 | import org.apache.hadoop.hbase.TableName; 9 | import org.apache.hadoop.hbase.client.*; 10 | import org.apache.hadoop.hbase.util.Bytes; 11 | 12 | import java.io.IOException; 13 | import java.util.HashMap; 14 | import java.util.Map; 15 | 16 | /** 17 | * Hbase source 18 | **/ 19 | public class FlinkHbaseSource extends RichSourceFunction> { 20 | 21 | private static final String hbaseZookeeperQuorum = "192.168.20.48"; 22 | private static final String hbaseZookeeperClinentPort = "2181"; 23 | private static TableName hbaseTableName = TableName.valueOf("test"); 24 | private static final String columnFamily = "cf"; 25 | 26 | private static final ObjectMapper mapper = new ObjectMapper(); 27 | 28 | private Connection connection; 29 | 30 | @Override 31 | public void open(Configuration parameters) throws Exception { 32 | super.open(parameters); 33 | org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create(); 34 | config.set("hbase.zookeeper.quorum", hbaseZookeeperQuorum); 35 | config.set("hbase.master", "10.45.151.26:60000"); 36 | config.set("hbase.zookeeper.property.clientPort", hbaseZookeeperClinentPort); 37 | config.setInt("hbase.rpc.timeout", 20000); 38 | config.setInt("hbase.client.operation.timeout", 30000); 39 | config.setInt("hbase.client.scanner.timeout.period", 200000); 40 | connection = ConnectionFactory.createConnection(config); 41 | } 42 | 43 | @Override 44 | public void close() throws Exception { 45 | super.close(); 46 | if (connection != null) { 47 | connection.close(); 48 | } 49 | } 50 | 51 | @Override 52 | public void run(SourceContext> sourceContext) throws Exception { 53 | Table table = connection.getTable(hbaseTableName); 54 | Scan scan = new Scan(); 55 | scan.addFamily(Bytes.toBytes(columnFamily)); 56 | ResultScanner tableScanner = table.getScanner(scan); 57 | tableScanner.iterator().forEachRemaining(scanner -> { 58 | Cell[] cells = scanner.rawCells(); 59 | Map map = new HashMap<>(); 60 | for (Cell cell : cells) { 61 | String key = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); 62 | String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 63 | map.put(key, value); 64 | } 65 | sourceContext.collect(map); 66 | }); 67 | 68 | } 69 | 70 | @Override 71 | public void cancel() { 72 | if (!connection.isClosed()) { 73 | try { 74 | connection.close(); 75 | } catch (IOException e) { 76 | e.printStackTrace(); 77 | } 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/SparkPhoenixApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.sparkphoenix; 2 | 3 | import javafx.util.Pair; 4 | import org.apache.spark.SparkConf; 5 | import org.apache.spark.api.java.JavaRDD; 6 | import org.apache.spark.sql.Row; 7 | import org.apache.spark.sql.RowFactory; 8 | import org.apache.spark.sql.SaveMode; 9 | import org.apache.spark.sql.SparkSession; 10 | import org.apache.spark.sql.types.DataTypes; 11 | import org.apache.spark.sql.types.StructField; 12 | import org.apache.spark.sql.types.StructType; 13 | import org.joda.time.DateTime; 14 | import org.joda.time.format.DateTimeFormatter; 15 | import org.joda.time.format.DateTimePrinter; 16 | import org.springframework.boot.SpringApplication; 17 | import org.springframework.boot.autoconfigure.SpringBootApplication; 18 | import scala.Tuple2; 19 | 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | import java.util.Properties; 23 | 24 | /** 25 | * spark通过Phoenix读取Hbase数据 26 | */ 27 | @SpringBootApplication 28 | public class SparkPhoenixApplication { 29 | 30 | public static void main(String[] args) { 31 | SpringApplication.run(SparkPhoenixApplication.class, args); 32 | 33 | //初始化Spark 34 | SparkConf conf = new SparkConf().setAppName("Test") 35 | .setMaster("local[1]") 36 | .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 37 | .registerKryoClasses(new Class[]{}); 38 | SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate(); 39 | DateTime start = new DateTime(args[0]); 40 | DateTime end = new DateTime(args[1]); 41 | String startStr = start.toString("yyyy-MM-dd"); 42 | String endStr = end.toString("yyyy-MM-dd"); 43 | final String SQL_QUERY = "(SELECT date,member_id FROM events WHERE time>='%s' AND time<'%s' AND event='login') events"; 44 | String sql = String.format(SQL_QUERY, startStr, endStr); 45 | 46 | //jdbc从Hbase读取数据 47 | Properties prop = new Properties(); 48 | prop.put("driver", "org.apache.phoenix.jdbc.PhoenixDriver"); 49 | prop.put("user", ""); 50 | prop.put("password", ""); 51 | prop.put("fetchsize", "10000"); 52 | JavaRDD javaRDD = sparkSession.read() 53 | .jdbc("jdbc:phoenix:hadoop101,hadoop102,hadoop103", sql, prop) 54 | .filter("member_id!=-1") 55 | .javaRDD(); 56 | JavaRDD rowJavaRDD = javaRDD.mapToPair(r -> new Tuple2<>(r.getString(0), r.getLong(1))) 57 | .distinct() 58 | .groupByKey() 59 | .map(r -> { 60 | StringBuilder buffer = new StringBuilder(); 61 | r._2.forEach(buffer::append); 62 | return RowFactory.create(r._1, buffer.toString()); 63 | }); 64 | 65 | //schema 66 | List fields = new ArrayList<>(); 67 | fields.add(DataTypes.createStructField("date", DataTypes.StringType, false)); 68 | fields.add(DataTypes.createStructField("dist_mem", DataTypes.StringType, true)); 69 | StructType structType = DataTypes.createStructType(fields); 70 | 71 | //去重并存储 72 | sparkSession.createDataFrame(rowJavaRDD, structType) 73 | .write() 74 | .format("org.apache.phoenix.spark") 75 | .mode(SaveMode.Overwrite) 76 | .option("table", "test_string") 77 | .option("zkUrl", "jdbc:phoenix:hadoop101,hadoop102,hadoop103") 78 | .save(); 79 | sparkSession.stop(); 80 | sparkSession.close(); 81 | 82 | } 83 | 84 | } 85 | 86 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/UserProducer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.producer; 2 | 3 | import com.bigdata.study.kafkastream.model.User; 4 | import com.bigdata.study.kafkastream.serdes.GenericSerializer; 5 | import com.bigdata.study.kafkastream.utils.HashPartitioner; 6 | import org.apache.commons.io.IOUtils; 7 | import org.apache.commons.lang3.StringUtils; 8 | import org.apache.kafka.clients.producer.KafkaProducer; 9 | import org.apache.kafka.clients.producer.ProducerRecord; 10 | import org.apache.kafka.common.KafkaException; 11 | import org.apache.kafka.common.serialization.StringSerializer; 12 | 13 | import java.io.IOException; 14 | import java.nio.charset.Charset; 15 | import java.util.List; 16 | import java.util.Properties; 17 | import java.util.stream.Collectors; 18 | 19 | /** 20 | * 用户生产者 21 | **/ 22 | public class UserProducer { 23 | public static void main(String[] args) { 24 | Properties prop = new Properties(); 25 | prop.put("bootstrap.servers", "192.168.20.48:9092"); 26 | prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181"); 27 | prop.put("acks", "all"); 28 | prop.put("retries", 3); 29 | prop.put("batch.size", 16348); 30 | prop.put("linger.ms", 1); 31 | prop.put("buffer.memory", 33554432); 32 | prop.put("key.serializer", StringSerializer.class.getCanonicalName()); 33 | prop.put("value.serializer", GenericSerializer.class.getCanonicalName()); 34 | prop.put("value.serializer.type", User.class.getCanonicalName()); 35 | prop.put("partitioner.class", HashPartitioner.class.getCanonicalName()); 36 | KafkaProducer userKafkaProducer = new KafkaProducer<>(prop); 37 | try { 38 | List users = readUser(); 39 | users.forEach(user -> { 40 | ProducerRecord producerRecord = new ProducerRecord<>("users", user.getName(), user); 41 | userKafkaProducer.send(producerRecord, (recordMetadata, e) -> { 42 | if (e != null) { 43 | System.err.printf("发送用户消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败", 44 | recordMetadata.topic(), 45 | recordMetadata.partition(), 46 | recordMetadata.offset(), 47 | recordMetadata.serializedKeySize(), 48 | recordMetadata.serializedValueSize()); 49 | e.printStackTrace(); 50 | } 51 | System.out.printf("成功发送用户消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]", 52 | recordMetadata.topic(), 53 | recordMetadata.partition(), 54 | recordMetadata.offset(), 55 | recordMetadata.serializedKeySize(), 56 | recordMetadata.serializedValueSize()); 57 | }); 58 | }); 59 | } catch (IOException e) { 60 | throw new KafkaException("发送用户信息到kafka出错", e); 61 | } finally { 62 | userKafkaProducer.close(); 63 | } 64 | } 65 | 66 | private static List readUser() throws IOException { 67 | List lines = IOUtils.readLines(UserProducer.class.getResourceAsStream("/users.csv"), Charset.forName("utf-8")); 68 | List users = lines.stream() 69 | .filter(StringUtils::isNotBlank) 70 | .map(line -> line.split("\\s*,\\s*")) 71 | .filter(value -> value.length == 4) 72 | .map(value -> new User(value[0], value[1], value[2], Integer.parseInt(value[3]))) 73 | .collect(Collectors.toList()); 74 | return users; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /flink-kafka-hbase/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.bigdata.study 7 | bigdata-starter 8 | 1.0 9 | ../pom.xml 10 | 11 | flink-kafka-hbase 12 | 0.0.1-SNAPSHOT 13 | flink-kafka-hbase 14 | Demo project for Spring Boot 15 | 16 | 17 | 18 | org.springframework.boot 19 | spring-boot-starter 20 | 21 | 22 | com.bigdata.study 23 | flink-common 24 | 1.0 25 | 26 | 27 | org.apache.kafka 28 | kafka_2.11 29 | 0.11.0.2 30 | 31 | 32 | org.apache.hbase 33 | hbase-client 34 | 1.4.3 35 | 36 | 37 | org.apache.flink 38 | flink-hbase_${scala.binary.version} 39 | ${flink.version} 40 | 41 | 42 | log4j 43 | log4j 44 | 45 | 46 | org.apache.zookeeper 47 | zookeeper 48 | 49 | 50 | org.apache.httpcomponents 51 | httpclient 52 | 53 | 54 | junit 55 | junit 56 | 57 | 58 | org.apache.httpcomponents 59 | httpcore 60 | 61 | 62 | commons-cli 63 | commons-cli 64 | 65 | 66 | com.google.guava 67 | guava 68 | 69 | 70 | com.yammer.metrics 71 | metrics-core 72 | 73 | 74 | commons-codec 75 | commons-codec 76 | 77 | 78 | 79 | 80 | org.springframework.boot 81 | spring-boot-starter-test 82 | test 83 | 84 | 85 | 86 | 87 | 88 | 89 | org.springframework.boot 90 | spring-boot-maven-plugin 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /flink-common/src/main/java/utils/HttpUtil.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import org.apache.http.HttpEntity; 4 | import org.apache.http.HttpStatus; 5 | import org.apache.http.client.methods.CloseableHttpResponse; 6 | import org.apache.http.client.methods.HttpGet; 7 | import org.apache.http.client.methods.HttpPost; 8 | import org.apache.http.entity.StringEntity; 9 | import org.apache.http.impl.client.CloseableHttpClient; 10 | import org.apache.http.impl.client.HttpClients; 11 | import org.apache.http.util.EntityUtils; 12 | 13 | import java.io.IOException; 14 | 15 | public class HttpUtil { 16 | /** 17 | * 通过GET方式发起http请求 18 | */ 19 | public static String doGet(String url) { 20 | CloseableHttpClient httpClient = HttpClients.createDefault(); 21 | try { 22 | HttpGet get = new HttpGet(url); 23 | // get.setHeader("Internal-Client", "alert"); 24 | get.setHeader("content-type", "application/json"); 25 | CloseableHttpResponse httpResponse = null; 26 | httpResponse = httpClient.execute(get); 27 | try { 28 | if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { 29 | HttpEntity entity = httpResponse.getEntity(); 30 | if (null != entity) { 31 | return EntityUtils.toString(httpResponse.getEntity()); 32 | } 33 | } 34 | } finally { 35 | httpResponse.close(); 36 | } 37 | } catch (Exception e) { 38 | e.printStackTrace(); 39 | } finally { 40 | try { 41 | if (httpClient != null) { 42 | httpClient.close(); 43 | } 44 | } catch (IOException e) { 45 | e.printStackTrace(); 46 | } 47 | } 48 | return null; 49 | } 50 | 51 | 52 | /** 53 | * 发送 POST 请求(HTTP),JSON形式 54 | * 55 | * @param url 调用的地址 56 | * @param jsonParams 调用的参数 57 | * @return 58 | * @throws Exception 59 | */ 60 | public static CloseableHttpResponse doPostResponse(String url, String jsonParams) throws Exception { 61 | CloseableHttpClient httpClient = HttpClients.createDefault(); 62 | CloseableHttpResponse response = null; 63 | HttpPost httpPost = new HttpPost(url); 64 | 65 | try { 66 | StringEntity entity = new StringEntity(jsonParams, "UTF-8"); 67 | entity.setContentEncoding("UTF-8"); 68 | entity.setContentType("application/json"); 69 | 70 | httpPost.setEntity(entity); 71 | httpPost.setHeader("content-type", "application/json"); 72 | response = httpClient.execute(httpPost); 73 | } finally { 74 | if (response != null) { 75 | EntityUtils.consume(response.getEntity()); 76 | } 77 | } 78 | return response; 79 | } 80 | 81 | 82 | public static String doPostString(String url, String jsonParams) throws Exception { 83 | CloseableHttpClient httpClient = HttpClients.createDefault(); 84 | CloseableHttpResponse response = null; 85 | HttpPost httpPost = new HttpPost(url); 86 | 87 | String httpStr; 88 | try { 89 | StringEntity entity = new StringEntity(jsonParams, "UTF-8"); 90 | entity.setContentEncoding("UTF-8"); 91 | entity.setContentType("application/json"); 92 | 93 | httpPost.setEntity(entity); 94 | httpPost.setHeader("content-type", "application/json"); 95 | // httpPost.setHeader("Internal-Client", "alert"); 96 | response = httpClient.execute(httpPost); 97 | httpStr = EntityUtils.toString(response.getEntity(), "UTF-8"); 98 | 99 | } finally { 100 | if (response != null) { 101 | EntityUtils.consume(response.getEntity()); 102 | } 103 | } 104 | return httpStr; 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/ItemProducer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.producer; 2 | 3 | import com.bigdata.study.kafkastream.model.Item; 4 | import com.bigdata.study.kafkastream.serdes.GenericSerializer; 5 | import com.bigdata.study.kafkastream.utils.HashPartitioner; 6 | import org.apache.commons.io.IOUtils; 7 | import org.apache.commons.lang3.StringUtils; 8 | import org.apache.kafka.clients.producer.Callback; 9 | import org.apache.kafka.clients.producer.KafkaProducer; 10 | import org.apache.kafka.clients.producer.ProducerRecord; 11 | import org.apache.kafka.clients.producer.RecordMetadata; 12 | import org.apache.kafka.common.KafkaException; 13 | import org.apache.kafka.common.serialization.StringSerializer; 14 | 15 | import java.io.IOException; 16 | import java.nio.charset.Charset; 17 | import java.util.List; 18 | import java.util.Properties; 19 | import java.util.stream.Collectors; 20 | 21 | /** 22 | * 商品生产者 23 | **/ 24 | public class ItemProducer { 25 | public static void main(String[] args) { 26 | Properties prop = new Properties(); 27 | prop.put("bootstrap.servers", "192.168.20.48:9092"); 28 | prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181"); 29 | prop.put("acks", "all"); 30 | prop.put("retries", 3); 31 | prop.put("batch.size", 16348); 32 | prop.put("linger.ms", 1); 33 | prop.put("buffer.memory", 33554432); 34 | prop.put("key.serializer", StringSerializer.class.getCanonicalName()); 35 | prop.put("value.serializer", GenericSerializer.class.getCanonicalName()); 36 | prop.put("value.serializer.type", Item.class.getCanonicalName()); 37 | prop.put("partitioner.class", HashPartitioner.class.getCanonicalName()); 38 | KafkaProducer kafkaProducer = new KafkaProducer<>(prop); 39 | try { 40 | List items = readItem(); 41 | items.forEach(item -> { 42 | ProducerRecord record = new ProducerRecord<>("items", item.getItemName(), item); 43 | kafkaProducer.send(record, new Callback() { 44 | @Override 45 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 46 | if (e != null) { 47 | System.err.printf("发送商品消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败", 48 | recordMetadata.topic(), 49 | recordMetadata.partition(), 50 | recordMetadata.offset(), 51 | recordMetadata.serializedKeySize(), 52 | recordMetadata.serializedValueSize()); 53 | e.printStackTrace(); 54 | } 55 | System.out.printf("成功发送商品消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]", 56 | recordMetadata.topic(), 57 | recordMetadata.partition(), 58 | recordMetadata.offset(), 59 | recordMetadata.serializedKeySize(), 60 | recordMetadata.serializedValueSize()); 61 | } 62 | }); 63 | }); 64 | } catch (IOException e) { 65 | throw new KafkaException("发送商品数据到kafka出错", e); 66 | } finally { 67 | kafkaProducer.close(); 68 | } 69 | } 70 | 71 | private static List readItem() throws IOException { 72 | List lines = IOUtils.readLines(ItemProducer.class.getResourceAsStream("/items.csv"), Charset.forName("utf-8")); 73 | return lines.stream().filter(StringUtils::isNotBlank) 74 | .map(line -> line.split("\\s*,\\s*")) 75 | .filter(value -> value.length == 4) 76 | .map(value -> new Item(value[0], value[1], value[2], Double.parseDouble(value[3]))) 77 | .collect(Collectors.toList()); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/OrderProducer.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.kafkastream.producer; 2 | 3 | import com.bigdata.study.kafkastream.model.Order; 4 | import com.bigdata.study.kafkastream.model.User; 5 | import com.bigdata.study.kafkastream.serdes.GenericSerializer; 6 | import com.bigdata.study.kafkastream.utils.HashPartitioner; 7 | import org.apache.commons.io.IOUtils; 8 | import org.apache.commons.lang3.StringUtils; 9 | import org.apache.kafka.clients.producer.Callback; 10 | import org.apache.kafka.clients.producer.KafkaProducer; 11 | import org.apache.kafka.clients.producer.ProducerRecord; 12 | import org.apache.kafka.clients.producer.RecordMetadata; 13 | import org.apache.kafka.common.KafkaException; 14 | import org.apache.kafka.common.serialization.StringSerializer; 15 | 16 | import java.io.IOException; 17 | import java.nio.charset.Charset; 18 | import java.util.List; 19 | import java.util.Properties; 20 | import java.util.stream.Collectors; 21 | 22 | /** 23 | * 订单生产者 24 | **/ 25 | public class OrderProducer { 26 | public static void main(String[] args) { 27 | Properties prop = new Properties(); 28 | prop.put("bootstrap.servers", "192.168.20.48:9092"); 29 | prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181"); 30 | prop.put("acks", "all"); 31 | prop.put("retries", 3); 32 | prop.put("batch.size", 16348); 33 | prop.put("linger.ms", 1); 34 | prop.put("buffer.memory", 33554432); 35 | prop.put("key.serializer", StringSerializer.class.getCanonicalName()); 36 | prop.put("value.serializer", GenericSerializer.class.getCanonicalName()); 37 | prop.put("value.serializer.type", Order.class.getCanonicalName()); 38 | prop.put("partitioner.class", HashPartitioner.class.getCanonicalName()); 39 | KafkaProducer kafkaProducer = new KafkaProducer<>(prop); 40 | try { 41 | List orders = readOrder(); 42 | orders.forEach(order -> { 43 | ProducerRecord record = new ProducerRecord<>("orders", order.getUserName(), order); 44 | kafkaProducer.send(record, new Callback() { 45 | @Override 46 | public void onCompletion(RecordMetadata recordMetadata, Exception e) { 47 | if (e != null) { 48 | System.err.printf("发送订单消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败", 49 | recordMetadata.topic(), 50 | recordMetadata.partition(), 51 | recordMetadata.offset(), 52 | recordMetadata.serializedKeySize(), 53 | recordMetadata.serializedValueSize()); 54 | e.printStackTrace(); 55 | } 56 | System.out.printf("成功发送订单消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]", 57 | recordMetadata.topic(), 58 | recordMetadata.partition(), 59 | recordMetadata.offset(), 60 | recordMetadata.serializedKeySize(), 61 | recordMetadata.serializedValueSize()); 62 | } 63 | }); 64 | }); 65 | } catch (IOException e) { 66 | throw new KafkaException("发送订单消息出错", e); 67 | } finally { 68 | kafkaProducer.close(); 69 | } 70 | } 71 | 72 | private static List readOrder() throws IOException { 73 | List lines = IOUtils.readLines(OrderProducer.class.getResourceAsStream("/orders.csv"), Charset.forName("utf-8")); 74 | return lines.stream().filter(StringUtils::isNotBlank) 75 | .map(line -> line.split("\\s*,\\s*")) 76 | .filter(value -> value.length == 4) 77 | .map(value -> new Order(value[0], value[1], Long.parseLong(value[2]), Integer.parseInt(value[3]))) 78 | .collect(Collectors.toList()); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /flink-common/src/main/java/utils/KafkaUtils.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import constant.PropertiesConstants; 4 | import model.Metrics; 5 | import org.apache.flink.api.java.utils.ParameterTool; 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 10 | import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; 11 | import org.apache.kafka.clients.consumer.KafkaConsumer; 12 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp; 13 | import org.apache.kafka.common.PartitionInfo; 14 | import org.apache.kafka.common.TopicPartition; 15 | import schemas.MetricSchema; 16 | import watermarks.MetricWatermark; 17 | 18 | import java.util.HashMap; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.Properties; 22 | 23 | /** 24 | * kafka 工具类 25 | **/ 26 | public class KafkaUtils { 27 | 28 | private static Properties buildKafkaProp(ParameterTool parameterTool) { 29 | Properties properties = parameterTool.getProperties(); 30 | properties.put("bootstrap.servers", parameterTool.get(PropertiesConstants.KAFKA_BROKERS)); 31 | properties.put("zookeeper.connect", parameterTool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT)); 32 | properties.put("group.id", parameterTool.get(PropertiesConstants.KAFKA_GROUP_ID)); 33 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 34 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 35 | properties.put("auto.offset.reset", "latest"); 36 | return properties; 37 | } 38 | 39 | public static DataStreamSource buildSource(StreamExecutionEnvironment env) { 40 | ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters(); 41 | String topic = parameterTool.getRequired(PropertiesConstants.METRICS_TOPIC); 42 | long consumerTime = parameterTool.getLong(PropertiesConstants.CONSUMER_FROM_TIME, 0L); 43 | return buildSource(env, topic, consumerTime); 44 | } 45 | 46 | public static DataStreamSource buildSource(StreamExecutionEnvironment env, String topic, Long time) { 47 | ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters(); 48 | Properties properties = buildKafkaProp(parameterTool); 49 | FlinkKafkaConsumer011 consumer011 = new FlinkKafkaConsumer011<>(topic, new MetricSchema(), properties); 50 | //重装offset到time处 51 | if (time != null && time != 0L) { 52 | properties.setProperty("group.id", "query_time_" + time); 53 | KafkaConsumer consumer = new KafkaConsumer(properties); 54 | List partitionsFor = consumer.partitionsFor(PropertiesConstants.METRICS_TOPIC); 55 | Map partitionLongMap = new HashMap<>(); 56 | for (PartitionInfo partitionInfo : partitionsFor) { 57 | partitionLongMap.put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), time); 58 | } 59 | Map offsetsForTimes = consumer.offsetsForTimes(partitionLongMap); 60 | Map partitionOffsetMap = new HashMap<>(); 61 | for (Map.Entry entry : offsetsForTimes.entrySet()) { 62 | TopicPartition topicPartition = entry.getKey(); 63 | partitionOffsetMap.put(new KafkaTopicPartition(topicPartition.topic(), topicPartition.partition()), entry.getValue().offset()); 64 | } 65 | consumer.close(); 66 | consumer011.setStartFromSpecificOffsets(partitionOffsetMap); 67 | } 68 | return env.addSource(consumer011); 69 | } 70 | 71 | public static SingleOutputStreamOperator parseSource(DataStreamSource dataStreamSource) { 72 | return dataStreamSource.assignTimestampsAndWatermarks(new MetricWatermark()); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /flink-kafka-source/src/main/java/com/bigdata/study/flinkkafkasource/FlinkKafkaSourceApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkkafkasource; 2 | 3 | import com.bigdata.study.flinkkafkasource.watermarks.ConsumerWaterMarkEmitter; 4 | import constant.PropertiesConstants; 5 | import model.Metrics; 6 | import org.apache.flink.api.java.utils.ParameterTool; 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; 10 | import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; 11 | import org.apache.kafka.clients.consumer.KafkaConsumer; 12 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp; 13 | import org.apache.kafka.common.PartitionInfo; 14 | import org.apache.kafka.common.TopicPartition; 15 | import org.springframework.boot.autoconfigure.SpringBootApplication; 16 | import schemas.MetricSchema; 17 | import utils.ExecutionEnvUtil; 18 | 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.Properties; 23 | 24 | @SpringBootApplication 25 | public class FlinkKafkaSourceApplication { 26 | 27 | public static void main(String[] args) { 28 | // SpringApplication.run(FlinkKafkaSourceApplication.class, args); 29 | try { 30 | ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args); 31 | StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterPool); 32 | 33 | Properties properties = new Properties(); 34 | properties.put("bootstrap.servers", parameterPool.get(PropertiesConstants.KAFKA_BROKERS)); 35 | properties.put("zookeeper.connect", parameterPool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT)); 36 | properties.put("group.id", parameterPool.get(PropertiesConstants.KAFKA_GROUP_ID)); 37 | properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 38 | properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); 39 | properties.put("auto.offset.reset", "latest"); 40 | 41 | String topic = parameterPool.getRequired(PropertiesConstants.METRICS_TOPIC); 42 | long consumerTime = parameterPool.getLong(PropertiesConstants.CONSUMER_FROM_TIME, 0L); 43 | 44 | FlinkKafkaConsumer011 consumer011 = new FlinkKafkaConsumer011<>(topic, new MetricSchema(), properties); 45 | 46 | //设置消费者开始位置 47 | //指定消费者应从每个分区开始的确切偏移量 48 | if (consumerTime != 0L) { 49 | properties.setProperty("group.id", "query_time_" + consumerTime); 50 | KafkaConsumer consumer = new KafkaConsumer(properties); 51 | List partitionsFor = consumer.partitionsFor(topic); 52 | Map topicPartitionMap = new HashMap<>(); 53 | for (PartitionInfo partitionInfo : partitionsFor) { 54 | topicPartitionMap.put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), consumerTime); 55 | } 56 | Map offsetsForTimes = consumer.offsetsForTimes(topicPartitionMap); 57 | Map kafkaTopicPartitionMap = new HashMap<>(); 58 | for (Map.Entry entry : offsetsForTimes.entrySet()) { 59 | TopicPartition topicPartition = entry.getKey(); 60 | KafkaTopicPartition kafkaTopicPartition = new KafkaTopicPartition(topicPartition.topic(), topicPartition.partition()); 61 | kafkaTopicPartitionMap.put(kafkaTopicPartition, entry.getValue().offset()); 62 | } 63 | consumer.close(); 64 | consumer011.setStartFromSpecificOffsets(kafkaTopicPartitionMap); 65 | } 66 | //指定自定义水印发射器 67 | consumer011.assignTimestampsAndWatermarks(new ConsumerWaterMarkEmitter()); 68 | DataStreamSource streamSource = env.addSource(consumer011); 69 | streamSource.print(); 70 | env.execute("flink kafka source"); 71 | } catch (Exception e) { 72 | e.printStackTrace(); 73 | } 74 | } 75 | 76 | } 77 | 78 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.bigdata.study 8 | bigdata-starter 9 | 1.0 10 | pom 11 | 12 | 13 | 1.8 14 | true 15 | UTF-8 16 | UTF-8 17 | 1.2.0.RELEASE 18 | 1.5.2.RELEASE 19 | 1.2.1.RELEASE 20 | 21 | 1.6.2 22 | 2.11 23 | 1.8 24 | 1.8 25 | 1.8 26 | 27 | true 28 | UTF-8 29 | UTF-8 30 | 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-starter-parent 35 | 1.5.2.RELEASE 36 | 37 | 38 | 39 | 40 | apache.snapshots 41 | Apache Development Snapshot Repository 42 | https://repository.apache.org/content/repositories/snapshots/ 43 | 44 | false 45 | 46 | 47 | true 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | org.springframework.cloud 56 | spring-cloud-dependencies 57 | Dalston.SR3 58 | pom 59 | import 60 | 61 | 62 | org.springframework.cloud.stream.app 63 | app-starters-core-dependencies 64 | 1.2.0.RELEASE 65 | pom 66 | import 67 | 68 | 69 | 70 | 71 | 72 | org.springframework.cloud 73 | spring-cloud-starter-stream-kafka 74 | 75 | 76 | org.springframework.boot 77 | spring-boot-configuration-processor 78 | ${spring-boot-starter.version} 79 | true 80 | 81 | 82 | 83 | 84 | 85 | 86 | org.apache.maven.plugins 87 | maven-compiler-plugin 88 | 89 | 1.8 90 | 1.8 91 | 92 | 93 | 94 | 95 | 96 | 97 | fork-join 98 | dataflow-stream-redis-set-processor 99 | dataflow-stream-redis-pub-sink 100 | dataflow-stream-kafka-source 101 | spark-phoenix 102 | flink-elasticsearch-sink 103 | flink-common 104 | flink-kafka11-sink 105 | flink-kafka-source 106 | elasticsearch 107 | flink-kafka-hbase 108 | flink-jdbc-hbase 109 | flink-hdfs 110 | kafka-stream 111 | flink-sideoutput 112 | flink-async-io 113 | 114 | -------------------------------------------------------------------------------- /flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/FlinkSideoutputApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinksideoutput; 2 | 3 | import com.bigdata.study.flinksideoutput.process.KeyedTokenizer; 4 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.common.typeinfo.TypeHint; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.api.java.functions.KeySelector; 9 | import org.apache.flink.api.java.tuple.Tuple2; 10 | import org.apache.flink.streaming.api.TimeCharacteristic; 11 | import org.apache.flink.streaming.api.datastream.DataStream; 12 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 15 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows; 16 | import org.apache.flink.streaming.api.windowing.time.Time; 17 | 18 | //@SpringBootApplication 19 | public class FlinkSideoutputApplication { 20 | public static final String[] WORDS = new String[]{ 21 | "To be, or not to be,--that is the question:--", 22 | "Whether 'tis nobler in the mind to suffer", 23 | "The slings and arrows of outrageous fortune", 24 | "Or to take arms against a sea of troubles,", 25 | "And by opposing end them?--To die,--to sleep,--", 26 | "No more; and by a sleep to say we end", 27 | "The heartache, and the thousand natural shocks", 28 | "That flesh is heir to,--'tis a consummation", 29 | "Devoutly to be wish'd. To die,--to sleep;--", 30 | "To sleep! perchance to dream:--ay, there's the rub;", 31 | "For in that sleep of death what dreams may come,", 32 | "When we have shuffled off this mortal coil,", 33 | "Must give us pause: there's the respect", 34 | "That makes calamity of so long life;", 35 | "For who would bear the whips and scorns of time,", 36 | "The oppressor's wrong, the proud man's contumely,", 37 | "The pangs of despis'd love, the law's delay,", 38 | "The insolence of office, and the spurns", 39 | "That patient merit of the unworthy takes,", 40 | "When he himself might his quietus make", 41 | "With a bare bodkin? who would these fardels bear,", 42 | "To grunt and sweat under a weary life,", 43 | "But that the dread of something after death,--", 44 | "The undiscover'd country, from whose bourn", 45 | "No traveller returns,--puzzles the will,", 46 | "And makes us rather bear those ills we have", 47 | "Than fly to others that we know not of?", 48 | "Thus conscience does make cowards of us all;", 49 | "And thus the native hue of resolution", 50 | "Is sicklied o'er with the pale cast of thought;", 51 | "And enterprises of great pith and moment,", 52 | "With this regard, their currents turn awry,", 53 | "And lose the name of action.--Soft you now!", 54 | "The fair Ophelia!--Nymph, in thy orisons", 55 | "Be all my sins remember'd." 56 | }; 57 | 58 | /** 59 | * 使用flink 的侧输出流sideoutput 60 | */ 61 | public static void main(String[] args) { 62 | // SpringApplication.run(FlinkSideoutputApplication.class, args); 63 | 64 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 65 | env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); 66 | DataStreamSource textStream = env.fromElements(WORDS); 67 | SingleOutputStreamOperator> process = textStream.keyBy(new KeySelector() { 68 | @Override 69 | public Integer getKey(String s) throws Exception { 70 | return 0; 71 | } 72 | }).process(new KeyedTokenizer(), TypeInformation.of(new TypeHint>() { 73 | })); 74 | env.getConfig().disableSysoutLogging(); 75 | //侧输出 76 | DataStream sideoutputStream = process.getSideOutput(SideOutputTag.wordTag) 77 | .map(new MapFunction() { 78 | @Override 79 | public String map(String s) throws Exception { 80 | return "rejected:" + s; 81 | } 82 | }); 83 | DataStream> counts = process.keyBy(0) 84 | .window(TumblingEventTimeWindows.of(Time.seconds(5))) 85 | .sum(1); 86 | 87 | counts.print(); 88 | sideoutputStream.print(); 89 | try { 90 | env.execute("Streaming wordcount sideoutput"); 91 | } catch (Exception e) { 92 | e.printStackTrace(); 93 | } 94 | } 95 | 96 | } 97 | 98 | -------------------------------------------------------------------------------- /dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/utils/JsonMapper.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamkafkasource.utils; 2 | 3 | 4 | import com.fasterxml.jackson.annotation.JsonInclude; 5 | import com.fasterxml.jackson.core.JsonProcessingException; 6 | import com.fasterxml.jackson.databind.DeserializationFeature; 7 | import com.fasterxml.jackson.databind.JavaType; 8 | import com.fasterxml.jackson.databind.ObjectMapper; 9 | import com.fasterxml.jackson.databind.SerializationFeature; 10 | import com.fasterxml.jackson.databind.util.JSONPObject; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | import org.springframework.util.StringUtils; 14 | 15 | import java.io.IOException; 16 | import java.util.Collection; 17 | import java.util.Map; 18 | 19 | /** 20 | * Created by wjm on 2017/9/6. 21 | */ 22 | 23 | public class JsonMapper extends ObjectMapper { 24 | 25 | private static Logger logger = LoggerFactory.getLogger(JsonMapper.class); 26 | 27 | public static final JsonMapper INSTANCE = new JsonMapper(); 28 | 29 | private ObjectMapper mapper; 30 | 31 | public JsonMapper() { 32 | this(null); 33 | } 34 | 35 | public JsonMapper(JsonInclude.Include include) { 36 | mapper = new ObjectMapper(); 37 | // 设置输出时包含属性的风格 38 | if (include != null) { 39 | mapper.setSerializationInclusion(include); 40 | } 41 | // 设置输入时忽略在JSON字符串中存在但Java对象实际没有的属性 42 | mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); 43 | } 44 | 45 | /** 46 | * 创建只输出非Null的属性到Json字符串的Mapper. 47 | */ 48 | public static JsonMapper nonNullMapper() { 49 | return new JsonMapper(JsonInclude.Include.NON_NULL); 50 | } 51 | 52 | /** 53 | * 创建只输出非Null且非Empty(如List.isEmpty)的属性到Json字符串的Mapper. 54 | * 55 | * 注意,要小心使用, 特别留意empty的情况. 56 | */ 57 | public static JsonMapper nonEmptyMapper() { 58 | return new JsonMapper(JsonInclude.Include.NON_EMPTY); 59 | } 60 | 61 | /** 62 | * 默认的全部输出的Mapper, 区别于INSTANCE,可以做进一步的配置 63 | */ 64 | public static JsonMapper defaultMapper() { 65 | return new JsonMapper(); 66 | } 67 | 68 | /** 69 | * Object可以是POJO,也可以是Collection或数组。 如果对象为Null, 返回"null". 如果集合为空集合, 返回"[]". 70 | */ 71 | public String toJson(Object object) throws JsonProcessingException { 72 | 73 | 74 | return mapper.writeValueAsString(object); 75 | 76 | } 77 | 78 | /** 79 | * 反序列化POJO或简单Collection如List. 80 | * 81 | * 如果JSON字符串为Null或"null"字符串, 返回Null. 如果JSON字符串为"[]", 返回空集合. 82 | * 83 | * 如需反序列化复杂Collection如List, 请使用fromJson(String, JavaType) 84 | * 85 | * @see #fromJson(String, JavaType) 86 | */ 87 | public T fromJson( String jsonString, Class clazz) throws IOException { 88 | if (StringUtils.isEmpty(jsonString)) { 89 | return null; 90 | } 91 | 92 | return mapper.readValue(jsonString, clazz); 93 | 94 | } 95 | 96 | /** 97 | * 反序列化复杂Collection如List, contructCollectionType()或contructMapType()构造类型, 然后调用本函数. 98 | * 99 | */ 100 | public T fromJson( String jsonString, JavaType javaType) throws IOException { 101 | if (StringUtils.isEmpty(jsonString)) { 102 | return null; 103 | } 104 | 105 | 106 | return (T) mapper.readValue(jsonString, javaType); 107 | 108 | } 109 | 110 | /** 111 | * 构造Collection类型. 112 | */ 113 | public JavaType buildCollectionType(Class collectionClass, Class elementClass) { 114 | return mapper.getTypeFactory().constructCollectionType(collectionClass, elementClass); 115 | } 116 | 117 | /** 118 | * 构造Map类型. 119 | */ 120 | public JavaType buildMapType(Class mapClass, Class keyClass, Class valueClass) { 121 | return mapper.getTypeFactory().constructMapType(mapClass, keyClass, valueClass); 122 | } 123 | 124 | /** 125 | * 当JSON里只含有Bean的部分属性時,更新一個已存在Bean,只覆盖該部分的属性. 126 | */ 127 | public void update(String jsonString, Object object) throws IOException { 128 | 129 | mapper.readerForUpdating(object).readValue(jsonString); 130 | 131 | } 132 | 133 | /** 134 | * 輸出JSONP格式數據. 135 | */ 136 | public String toJsonP(String functionName, Object object) throws JsonProcessingException { 137 | return toJson(new JSONPObject(functionName, object)); 138 | } 139 | 140 | /** 141 | * 設定是否使用Enum的toString函數來讀寫Enum, 為False時時使用Enum的name()函數來讀寫Enum, 默認為False. 注意本函數一定要在Mapper創建後, 所有的讀寫動作之前調用. 142 | */ 143 | public void enableEnumUseToString() { 144 | mapper.enable(SerializationFeature.WRITE_ENUMS_USING_TO_STRING); 145 | mapper.enable(DeserializationFeature.READ_ENUMS_USING_TO_STRING); 146 | } 147 | 148 | /** 149 | * 取出Mapper做进一步的设置或使用其他序列化API. 150 | */ 151 | public ObjectMapper getMapper() { 152 | return mapper; 153 | } 154 | 155 | 156 | 157 | } -------------------------------------------------------------------------------- /dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/utils/JsonMapper.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.dataflowstreamredissetprocessor.utils; 2 | 3 | 4 | import com.fasterxml.jackson.annotation.JsonInclude; 5 | import com.fasterxml.jackson.core.JsonProcessingException; 6 | import com.fasterxml.jackson.databind.DeserializationFeature; 7 | import com.fasterxml.jackson.databind.JavaType; 8 | import com.fasterxml.jackson.databind.ObjectMapper; 9 | import com.fasterxml.jackson.databind.SerializationFeature; 10 | import com.fasterxml.jackson.databind.util.JSONPObject; 11 | import org.slf4j.Logger; 12 | import org.slf4j.LoggerFactory; 13 | import org.springframework.util.StringUtils; 14 | 15 | import java.io.IOException; 16 | import java.util.Collection; 17 | import java.util.Map; 18 | 19 | /** 20 | * Created by wjm on 2017/9/6. 21 | */ 22 | 23 | public class JsonMapper extends ObjectMapper { 24 | 25 | private static Logger logger = LoggerFactory.getLogger(JsonMapper.class); 26 | 27 | public static final JsonMapper INSTANCE = new JsonMapper(); 28 | 29 | private ObjectMapper mapper; 30 | 31 | public JsonMapper() { 32 | this(null); 33 | } 34 | 35 | public JsonMapper(JsonInclude.Include include) { 36 | mapper = new ObjectMapper(); 37 | // 设置输出时包含属性的风格 38 | if (include != null) { 39 | mapper.setSerializationInclusion(include); 40 | } 41 | // 设置输入时忽略在JSON字符串中存在但Java对象实际没有的属性 42 | mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); 43 | } 44 | 45 | /** 46 | * 创建只输出非Null的属性到Json字符串的Mapper. 47 | */ 48 | public static JsonMapper nonNullMapper() { 49 | return new JsonMapper(JsonInclude.Include.NON_NULL); 50 | } 51 | 52 | /** 53 | * 创建只输出非Null且非Empty(如List.isEmpty)的属性到Json字符串的Mapper. 54 | * 55 | * 注意,要小心使用, 特别留意empty的情况. 56 | */ 57 | public static JsonMapper nonEmptyMapper() { 58 | return new JsonMapper(JsonInclude.Include.NON_EMPTY); 59 | } 60 | 61 | /** 62 | * 默认的全部输出的Mapper, 区别于INSTANCE,可以做进一步的配置 63 | */ 64 | public static JsonMapper defaultMapper() { 65 | return new JsonMapper(); 66 | } 67 | 68 | /** 69 | * Object可以是POJO,也可以是Collection或数组。 如果对象为Null, 返回"null". 如果集合为空集合, 返回"[]". 70 | */ 71 | public String toJson(Object object) throws JsonProcessingException { 72 | 73 | 74 | return mapper.writeValueAsString(object); 75 | 76 | } 77 | 78 | /** 79 | * 反序列化POJO或简单Collection如List. 80 | * 81 | * 如果JSON字符串为Null或"null"字符串, 返回Null. 如果JSON字符串为"[]", 返回空集合. 82 | * 83 | * 如需反序列化复杂Collection如List, 请使用fromJson(String, JavaType) 84 | * 85 | * @see #fromJson(String, JavaType) 86 | */ 87 | public T fromJson( String jsonString, Class clazz) throws IOException { 88 | if (StringUtils.isEmpty(jsonString)) { 89 | return null; 90 | } 91 | 92 | return mapper.readValue(jsonString, clazz); 93 | 94 | } 95 | 96 | /** 97 | * 反序列化复杂Collection如List, contructCollectionType()或contructMapType()构造类型, 然后调用本函数. 98 | * 99 | */ 100 | public T fromJson( String jsonString, JavaType javaType) throws IOException { 101 | if (StringUtils.isEmpty(jsonString)) { 102 | return null; 103 | } 104 | 105 | 106 | return (T) mapper.readValue(jsonString, javaType); 107 | 108 | } 109 | 110 | /** 111 | * 构造Collection类型. 112 | */ 113 | public JavaType buildCollectionType(Class collectionClass, Class elementClass) { 114 | return mapper.getTypeFactory().constructCollectionType(collectionClass, elementClass); 115 | } 116 | 117 | /** 118 | * 构造Map类型. 119 | */ 120 | public JavaType buildMapType(Class mapClass, Class keyClass, Class valueClass) { 121 | return mapper.getTypeFactory().constructMapType(mapClass, keyClass, valueClass); 122 | } 123 | 124 | /** 125 | * 当JSON里只含有Bean的部分属性時,更新一個已存在Bean,只覆盖該部分的属性. 126 | */ 127 | public void update(String jsonString, Object object) throws IOException { 128 | 129 | mapper.readerForUpdating(object).readValue(jsonString); 130 | 131 | } 132 | 133 | /** 134 | * 輸出JSONP格式數據. 135 | */ 136 | public String toJsonP(String functionName, Object object) throws JsonProcessingException { 137 | return toJson(new JSONPObject(functionName, object)); 138 | } 139 | 140 | /** 141 | * 設定是否使用Enum的toString函數來讀寫Enum, 為False時時使用Enum的name()函數來讀寫Enum, 默認為False. 注意本函數一定要在Mapper創建後, 所有的讀寫動作之前調用. 142 | */ 143 | public void enableEnumUseToString() { 144 | mapper.enable(SerializationFeature.WRITE_ENUMS_USING_TO_STRING); 145 | mapper.enable(DeserializationFeature.READ_ENUMS_USING_TO_STRING); 146 | } 147 | 148 | /** 149 | * 取出Mapper做进一步的设置或使用其他序列化API. 150 | */ 151 | public ObjectMapper getMapper() { 152 | return mapper; 153 | } 154 | 155 | 156 | 157 | } -------------------------------------------------------------------------------- /flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/zip/FlinkHdfsZip.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkhdfs.zip; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | import org.apache.flink.api.common.functions.FlatMapFunction; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.common.functions.ReduceFunction; 7 | import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat; 8 | import org.apache.flink.api.java.tuple.Tuple2; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.api.windowing.time.Time; 12 | import org.apache.flink.util.Collector; 13 | import org.apache.hadoop.fs.Path; 14 | import org.apache.hadoop.io.IntWritable; 15 | import org.apache.hadoop.io.SequenceFile; 16 | import org.apache.hadoop.io.Text; 17 | import org.apache.hadoop.io.compress.GzipCodec; 18 | import org.apache.hadoop.mapred.FileOutputFormat; 19 | import org.apache.hadoop.mapred.JobConf; 20 | import org.apache.hadoop.mapred.TextOutputFormat; 21 | 22 | /** 23 | * 使用Flink内置sink API将数据以压缩的格式写入到HDFS上 24 | * 将数据以gz压缩格式将处理后的数据写入到HDFS上 25 | **/ 26 | public class FlinkHdfsZip { 27 | 28 | public static void main(String[] args) { 29 | String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt"; 30 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 31 | env.setParallelism(2); 32 | DataStream dataStreamSource = env.readTextFile(file_input); 33 | DataStream> reduce = dataStreamSource.filter(StringUtils::isNotBlank) 34 | .flatMap(new FlatMapFunction>() { 35 | @Override 36 | public void flatMap(String s, Collector> collector) throws Exception { 37 | String[] words = s.toLowerCase().split("\\W+"); 38 | for (String word : words) { 39 | if (word.length() > 0) { 40 | Tuple2 tuple2 = new Tuple2<>(); 41 | tuple2.f0 = word; 42 | tuple2.f1 = 1; 43 | collector.collect(tuple2); 44 | } 45 | } 46 | } 47 | }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction>() { 48 | @Override 49 | public Tuple2 reduce(Tuple2 t1, Tuple2 t2) throws Exception { 50 | return new Tuple2<>(t1.f0, t1.f1 + t2.f1); 51 | } 52 | }); 53 | DataStream> hdfsStream = reduce.flatMap(new FlatMapFunction, Tuple2>() { 54 | @Override 55 | public void flatMap(Tuple2 in, Collector> collector) throws Exception { 56 | Tuple2 tuple2 = new Tuple2<>(); 57 | tuple2.f0 = new IntWritable(in.f1); 58 | tuple2.f1 = new Text(in.f0); 59 | collector.collect(tuple2); 60 | } 61 | }); 62 | DataStream> outStream = hdfsStream.map(new MapFunction, Tuple2>() { 63 | @Override 64 | public Tuple2 map(Tuple2 in) throws Exception { 65 | Tuple2 tuple2 = new Tuple2<>(); 66 | tuple2.f0 = in.f1; 67 | tuple2.f1 = in.f0; 68 | return tuple2; 69 | } 70 | }); 71 | // DataStream> outStream = hdfsStream.flatMap(new FlatMapFunction, Tuple2>() { 72 | // @Override 73 | // public void flatMap(Tuple2 in, Collector> collector) throws Exception { 74 | // Tuple2 tuple2 = new Tuple2<>(); 75 | // tuple2.f0 = in.f1; 76 | // tuple2.f1 = in.f0; 77 | // collector.collect(tuple2); 78 | // } 79 | // }); 80 | //用gz格式压缩文件 81 | HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat<>(new TextOutputFormat<>(), new JobConf()); 82 | hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " "); 83 | hadoopOutputFormat.getJobConf().setCompressMapOutput(true); 84 | hadoopOutputFormat.getJobConf().set("mapred.output.compress", "true"); 85 | hadoopOutputFormat.getJobConf().setMapOutputCompressorClass(GzipCodec.class); 86 | //GzipCodec.class.getCanonicalName() 获取类名,等于getName() 87 | hadoopOutputFormat.getJobConf().set("mapred.output.compression.codec", GzipCodec.class.getCanonicalName()); 88 | hadoopOutputFormat.getJobConf().set("mapred.output.compression.type", SequenceFile.CompressionType.BLOCK.toString()); 89 | FileOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path("/tmp/data/")); 90 | outStream.writeUsingOutputFormat(hadoopOutputFormat); 91 | try { 92 | env.execute("Hadoop Compat WordCount"); 93 | } catch (Exception e) { 94 | e.printStackTrace(); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /flink-elasticsearch-sink/src/main/java/com/bigdata/study/flinkelasticsearchsink/FlinkElasticsearchSinkApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkelasticsearchsink; 2 | 3 | import com.bigdata.study.flinkelasticsearchsink.handler.FlinkFailHandler; 4 | import constant.PropertiesConstants; 5 | import model.Metrics; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.apache.flink.api.common.functions.RuntimeContext; 8 | import org.apache.flink.api.java.utils.ParameterTool; 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 11 | import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase; 12 | import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction; 13 | import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer; 14 | import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink; 15 | import org.apache.http.HttpHost; 16 | import org.elasticsearch.action.index.IndexRequest; 17 | import org.elasticsearch.client.Requests; 18 | import org.elasticsearch.common.xcontent.XContent; 19 | import org.elasticsearch.common.xcontent.XContentBuilder; 20 | import org.elasticsearch.common.xcontent.XContentType; 21 | import org.springframework.boot.SpringApplication; 22 | import org.springframework.boot.autoconfigure.SpringBootApplication; 23 | import utils.ExecutionEnvUtil; 24 | import utils.GsonUtils; 25 | import utils.KafkaUtils; 26 | 27 | import java.io.IOException; 28 | import java.net.MalformedURLException; 29 | import java.net.URI; 30 | import java.net.URISyntaxException; 31 | import java.net.URL; 32 | import java.util.ArrayList; 33 | import java.util.List; 34 | 35 | /** 36 | * flink从kafka中读取数据,经过flink处理后写入es6中 37 | */ 38 | @SpringBootApplication 39 | public class FlinkElasticsearchSinkApplication { 40 | 41 | public static void main(String[] args) { 42 | // SpringApplication.run(FlinkElasticsearchSinkApplication.class, args); 43 | try { 44 | ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args); 45 | StreamExecutionEnvironment environment = ExecutionEnvUtil.prepare(parameterPool); 46 | DataStreamSource dataStreamSource = KafkaUtils.buildSource(environment); 47 | List httpHosts = parseEsHost(parameterPool.get(PropertiesConstants.ELASTICSEARCH_HOSTS)); 48 | int bulkSize = parameterPool.getInt(PropertiesConstants.ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40); 49 | int parallelism = parameterPool.getInt(PropertiesConstants.STREAM_SINK_PARALLELISM, 4); 50 | ElasticsearchSink.Builder builder = new ElasticsearchSink.Builder<>(httpHosts, new ElasticsearchSinkFunction() { 51 | @Override 52 | public void process(Metrics metrics, RuntimeContext runtimeContext, RequestIndexer requestIndexer) { 53 | requestIndexer.add(Requests.indexRequest() 54 | .index("flink_" + metrics.getName()) 55 | .type("document") 56 | .source(GsonUtils.toJsonBytes(metrics), XContentType.JSON)); 57 | } 58 | }); 59 | //复杂配置 60 | 61 | //是否开启重试机制 62 | builder.setBulkFlushBackoff(true); 63 | //重试策略 64 | //CONSTANT 常数型,表示多次重试之间的时间间隔为固定常数。eg:2 -> 2 -> 2 ... 65 | builder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.CONSTANT); 66 | //指数型,表示多次重试之间的时间间隔按照指数方式进行增长。eg:2 -> 4 -> 8 ... 67 | // builder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.EXPONENTIAL); 68 | //进行重试的时间间隔。对于指数型则表示起始的基数 69 | builder.setBulkFlushBackoffDelay(2); 70 | //失败重试次数 71 | builder.setBulkFlushBackoffRetries(3); 72 | 73 | //批量写入时的最大写入条数 74 | builder.setBulkFlushMaxActions(bulkSize); 75 | //批量写入时的最大数据量 76 | builder.setBulkFlushMaxSizeMb(10); 77 | 78 | //想要使用EsSink的失败重试机制,则需要通过env.enableCheckpoint()方法来开启Flink任务对checkpoint的支持, 79 | // 如果没有开启checkpoint机制的话,则失败重试策略是无法生效的 80 | boolean checkpoint = parameterPool.getBoolean(PropertiesConstants.STREAM_CHECKPOINT_ENABLE); 81 | if (checkpoint) { 82 | //设置失败策略 83 | builder.setFailureHandler(new FlinkFailHandler()); 84 | } 85 | 86 | dataStreamSource.addSink(builder.build()).setParallelism(parallelism); 87 | environment.execute("flink connectors es6"); 88 | } catch (Exception e) { 89 | e.printStackTrace(); 90 | } 91 | } 92 | 93 | private static List parseEsHost(String hosts) throws MalformedURLException { 94 | String[] hostArray = hosts.split(","); 95 | List httpHosts = new ArrayList<>(); 96 | for (String host : hostArray) { 97 | if (StringUtils.startsWith(host, "http:")) { 98 | URL url = new URL(host); 99 | httpHosts.add(new HttpHost(url.getHost(), url.getPort())); 100 | } else { 101 | String[] parts = host.split(":", 2); 102 | if (parts.length > 1) { 103 | httpHosts.add(new HttpHost(parts[0], Integer.parseInt(parts[1]))); 104 | } else { 105 | throw new MalformedURLException("invalid elasticsearch hosts exception!"); 106 | } 107 | } 108 | } 109 | return httpHosts; 110 | } 111 | } 112 | 113 | -------------------------------------------------------------------------------- /flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/FlinkAsyncIoApplication.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkasyncio; 2 | 3 | import com.bigdata.study.flinkasyncio.async.AsyncDataBaseRequest; 4 | import com.bigdata.study.flinkasyncio.source.SimpleSource; 5 | import org.apache.commons.lang3.StringUtils; 6 | import org.apache.flink.api.common.functions.FlatMapFunction; 7 | import org.apache.flink.api.java.tuple.Tuple2; 8 | import org.apache.flink.api.java.utils.ParameterTool; 9 | import org.apache.flink.runtime.state.filesystem.FsStateBackend; 10 | import org.apache.flink.streaming.api.CheckpointingMode; 11 | import org.apache.flink.streaming.api.TimeCharacteristic; 12 | import org.apache.flink.streaming.api.datastream.AsyncDataStream; 13 | import org.apache.flink.streaming.api.datastream.DataStream; 14 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 15 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 16 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 17 | import org.apache.flink.streaming.api.functions.async.AsyncFunction; 18 | import org.apache.flink.util.Collector; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import java.util.concurrent.TimeUnit; 23 | 24 | /** 25 | * 使用async IO 26 | */ 27 | //@SpringBootApplication 28 | public class FlinkAsyncIoApplication { 29 | 30 | private static final Logger LOG = LoggerFactory.getLogger(FlinkAsyncIoApplication.class); 31 | 32 | public static void main(String[] args) { 33 | // SpringApplication.run(FlinkAsyncIoApplication.class, args); 34 | 35 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 36 | ParameterTool params = ParameterTool.fromArgs(args); 37 | String statePath = null; 38 | String cpMode = null; 39 | int maxCount = 0; 40 | long sleepFactor = 0; 41 | float failRatio = 0; 42 | String mode = null; 43 | int taskNum = 0; 44 | String timeType = null; 45 | long shutdownWaitTS = 0; 46 | long timeout = 0; 47 | 48 | try { 49 | // check the configuration for the job 50 | statePath = params.get("fsStatePath", null); 51 | cpMode = params.get("checkpointMode", "exactly_once"); 52 | maxCount = params.getInt("maxCount", 100000); 53 | sleepFactor = params.getLong("sleepFactor", 100); 54 | failRatio = params.getFloat("failRatio", 0.001f); 55 | mode = params.get("waitMode", "ordered"); 56 | taskNum = params.getInt("waitOperatorParallelism", 1); 57 | timeType = params.get("eventType", "EventTime"); 58 | shutdownWaitTS = params.getLong("shutdownWaitTS", 20000); 59 | timeout = params.getLong("timeout", 10000L); 60 | } catch (Exception e) { 61 | e.printStackTrace(); 62 | } 63 | 64 | StringBuilder configStringBuilder = new StringBuilder(); 65 | 66 | final String lineSeparator = System.getProperty("line.separator"); 67 | 68 | configStringBuilder 69 | .append("Job configuration").append(lineSeparator) 70 | .append("FS state path=").append(statePath).append(lineSeparator) 71 | .append("Checkpoint mode=").append(cpMode).append(lineSeparator) 72 | .append("Max count of input from source=").append(maxCount).append(lineSeparator) 73 | .append("Sleep factor=").append(sleepFactor).append(lineSeparator) 74 | .append("Fail ratio=").append(failRatio).append(lineSeparator) 75 | .append("Waiting mode=").append(mode).append(lineSeparator) 76 | .append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator) 77 | .append("Event type=").append(timeType).append(lineSeparator) 78 | .append("Shutdown wait timestamp=").append(shutdownWaitTS); 79 | 80 | LOG.info(configStringBuilder.toString()); 81 | 82 | if (StringUtils.isNotBlank(statePath)) { 83 | env.setStateBackend(new FsStateBackend(statePath)); 84 | } 85 | if ("exactly_once".equals(cpMode)) { 86 | env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE); 87 | } else { 88 | env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE); 89 | } 90 | 91 | if ("event_time".equals(timeType)) { 92 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 93 | } else if ("ingestion_time".equals(timeType)) { 94 | env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); 95 | } else { 96 | env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); 97 | } 98 | 99 | DataStream inputStream = env.addSource(new SimpleSource(maxCount)); 100 | AsyncFunction asyncFunction = new AsyncDataBaseRequest(sleepFactor, failRatio, shutdownWaitTS); 101 | 102 | DataStream result; 103 | if ("ordered".equals(mode)) { 104 | result = AsyncDataStream.orderedWait(inputStream, asyncFunction, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum); 105 | } else { 106 | result = AsyncDataStream.unorderedWait(inputStream, asyncFunction, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum); 107 | } 108 | 109 | DataStream> outputStream = result.flatMap(new FlatMapFunction>() { 110 | @Override 111 | public void flatMap(String s, Collector> collector) throws Exception { 112 | collector.collect(new Tuple2<>(s, 1)); 113 | } 114 | }); 115 | DataStream> sum = outputStream.keyBy(0).sum(1); 116 | sum.print(); 117 | try { 118 | env.execute("flink async io example"); 119 | } catch (Exception e) { 120 | e.printStackTrace(); 121 | } 122 | } 123 | 124 | } 125 | 126 | -------------------------------------------------------------------------------- /flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/core/FlinkHdfs.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.flinkhdfs.core; 2 | 3 | import com.bigdata.study.flinkhdfs.utils.HadoopConfig; 4 | import com.bigdata.study.flinkhdfs.utils.HadoopHelper; 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import org.apache.commons.lang3.StringUtils; 7 | import org.apache.commons.lang3.time.DateFormatUtils; 8 | import org.apache.flink.api.common.functions.FlatMapFunction; 9 | import org.apache.flink.api.common.functions.ReduceFunction; 10 | import org.apache.flink.api.java.tuple.Tuple2; 11 | import org.apache.flink.streaming.api.TimeCharacteristic; 12 | import org.apache.flink.streaming.api.datastream.DataStream; 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 14 | import org.apache.flink.streaming.api.windowing.time.Time; 15 | import org.apache.flink.streaming.connectors.fs.Clock; 16 | import org.apache.flink.streaming.connectors.fs.StringWriter; 17 | import org.apache.flink.streaming.connectors.fs.bucketing.Bucketer; 18 | import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink; 19 | import org.apache.flink.util.Collector; 20 | import org.apache.hadoop.conf.Configuration; 21 | import org.apache.hadoop.fs.FileSystem; 22 | import org.apache.hadoop.fs.Path; 23 | import org.apache.hadoop.io.IntWritable; 24 | import org.apache.hadoop.io.Text; 25 | import org.springframework.beans.factory.annotation.Autowired; 26 | import org.springframework.boot.CommandLineRunner; 27 | 28 | import java.io.File; 29 | import java.io.IOException; 30 | import java.util.Date; 31 | import java.util.Map; 32 | 33 | /** 34 | * @Description 35 | * @Author hasee 36 | * @Date 2019/1/7 37 | **/ 38 | public class FlinkHdfs implements CommandLineRunner { 39 | 40 | @Autowired 41 | private HadoopConfig hadoopConfig; 42 | 43 | @Override 44 | public void run(String... strings) throws Exception { 45 | String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt"; 46 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 47 | env.enableCheckpointing(10000); 48 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); 49 | env.setParallelism(2); 50 | DataStream dataStreamSource = env.readTextFile(file_input); 51 | DataStream> reduce = dataStreamSource.filter(StringUtils::isNotBlank) 52 | .flatMap(new FlatMapFunction>() { 53 | @Override 54 | public void flatMap(String s, Collector> collector) throws Exception { 55 | String[] words = s.toLowerCase().split("\\W+"); 56 | for (String word : words) { 57 | if (word.length() > 0) { 58 | Tuple2 tuple2 = new Tuple2<>(); 59 | tuple2.f0 = word; 60 | tuple2.f1 = 1; 61 | collector.collect(tuple2); 62 | } 63 | } 64 | } 65 | }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction>() { 66 | @Override 67 | public Tuple2 reduce(Tuple2 t1, Tuple2 t2) throws Exception { 68 | return new Tuple2<>(t1.f0, t1.f1 + t2.f1); 69 | } 70 | }); 71 | DataStream> hdfsStream = reduce.flatMap(new FlatMapFunction, Tuple2>() { 72 | @Override 73 | public void flatMap(Tuple2 in, Collector> collector) throws Exception { 74 | Tuple2 tuple2 = new Tuple2<>(); 75 | tuple2.f0 = new IntWritable(in.f1); 76 | tuple2.f1 = new Text(in.f0); 77 | collector.collect(tuple2); 78 | } 79 | }); 80 | DataStream outStream = hdfsStream.flatMap(new FlatMapFunction, String>() { 81 | @Override 82 | public void flatMap(Tuple2 in, Collector collector) throws Exception { 83 | StringBuilder builder = new StringBuilder(); 84 | String name = in.f1.toString(); 85 | int num = in.f0.get(); 86 | builder.append(name).append("\t").append(num).append("\n"); 87 | collector.collect(builder.toString()); 88 | } 89 | }); 90 | BucketingSink bucketingSink = new BucketingSink<>("/base/path"); 91 | // bucketingSink.setBucketer(new DateTimeBucketer<>("yyyy-MM-dd--HHmm")); 92 | //使用自定义分桶 93 | bucketingSink.setBucketer(new DateHourBucketer()); 94 | bucketingSink.setWriter(new StringWriter<>()); 95 | bucketingSink.setBatchSize(1024 * 1024 * 4); 96 | bucketingSink.setBatchRolloverInterval(Integer.MAX_VALUE); 97 | bucketingSink.setInactiveBucketCheckInterval(60); 98 | bucketingSink.setInactiveBucketThreshold(60); 99 | HadoopHelper hadoopHelper = new HadoopHelper(hadoopConfig); 100 | Configuration configuration = hadoopHelper.getConfig(); 101 | bucketingSink.setFSConfig(configuration); 102 | outStream.addSink(bucketingSink); 103 | } 104 | 105 | private static ObjectMapper mapper = new ObjectMapper(); 106 | 107 | /** 108 | * 自定义hdfs分桶规则 109 | */ 110 | private class DateHourBucketer implements Bucketer { 111 | @Override 112 | public Path getBucketPath(Clock clock, Path path, String s) { 113 | try { 114 | Map map = mapper.readValue(s, Map.class); 115 | Long timeStamp = (Long) map.get("TimeStamp"); 116 | Date date = new Date(timeStamp); 117 | String format = DateFormatUtils.format(date, "yyyy-MM-dd--HH"); 118 | return new Path(path + File.separator + format); 119 | } catch (IOException e) { 120 | e.printStackTrace(); 121 | } 122 | return null; 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/apps/SparkPhoenixReadHbase.java: -------------------------------------------------------------------------------- 1 | package com.bigdata.study.sparkphoenix.apps; 2 | 3 | import com.bigdata.study.sparkphoenix.utils.PhoenixUtil; 4 | import org.apache.hadoop.conf.Configuration; 5 | import org.apache.hadoop.hbase.HBaseConfiguration; 6 | import org.apache.hadoop.hbase.client.Result; 7 | import org.apache.hadoop.hbase.client.Scan; 8 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 9 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat; 10 | import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 11 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 12 | import org.apache.hadoop.hbase.util.Base64; 13 | import org.apache.hadoop.hbase.util.Bytes; 14 | import org.apache.spark.SparkConf; 15 | import org.apache.spark.SparkContext; 16 | import org.apache.spark.api.java.JavaPairRDD; 17 | import org.apache.spark.api.java.JavaRDD; 18 | import org.apache.spark.api.java.JavaSparkContext; 19 | import org.apache.spark.api.java.function.Function; 20 | import org.apache.spark.sql.Dataset; 21 | import org.apache.spark.sql.Row; 22 | import org.apache.spark.sql.RowFactory; 23 | import org.apache.spark.sql.SparkSession; 24 | import org.apache.spark.sql.types.DataType; 25 | import org.apache.spark.sql.types.DataTypes; 26 | import org.apache.spark.sql.types.StructField; 27 | import org.apache.spark.sql.types.StructType; 28 | import scala.Tuple2; 29 | 30 | import java.io.IOException; 31 | import java.sql.*; 32 | import java.util.ArrayList; 33 | import java.util.LinkedHashMap; 34 | import java.util.List; 35 | import java.util.Map; 36 | 37 | /** 38 | * spark 通过phoenix读取Hbase数据 39 | **/ 40 | public class SparkPhoenixReadHbase { 41 | 42 | public static void main(String[] args) { 43 | Configuration configuration = HBaseConfiguration.create(); 44 | configuration.set("hbase.zookeeper.quorum", "zk"); 45 | configuration.set("hbase.zookeeper.property.clientPort", "2181"); 46 | configuration.set("zookeeper.znode.parent", "/hbase"); 47 | configuration.set(TableInputFormat.INPUT_TABLE, "tableName"); 48 | Scan scan = new Scan(); 49 | try { 50 | ClientProtos.Scan proto = ProtobufUtil.toScan(scan); 51 | String scanToString = Base64.encodeBytes(proto.toByteArray()); 52 | configuration.set(TableInputFormat.SCAN, scanToString); 53 | 54 | //初始化Spark 55 | SparkConf conf = new SparkConf().setAppName("Test") 56 | .setMaster("local[1]") 57 | .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 58 | .registerKryoClasses(new Class[]{}); 59 | SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate(); 60 | 61 | finalSchema(configuration, sparkSession); 62 | dynamicSchema(configuration, sparkSession); 63 | } catch (Exception e) { 64 | e.printStackTrace(); 65 | } 66 | } 67 | 68 | /** 69 | * 通过phoenix获取hbase数据 70 | */ 71 | private static void finalSchema(Configuration configuration, SparkSession sparkSession) { 72 | JavaSparkContext context = new JavaSparkContext(sparkSession.sparkContext()); 73 | JavaPairRDD javaPairRDD = context.newAPIHadoopRDD(configuration, TableInputFormat.class, 74 | ImmutableBytesWritable.class, Result.class); 75 | JavaRDD javaRDD = javaPairRDD.map(new Function, Row>() { 76 | @Override 77 | public Row call(Tuple2 tuple2) throws Exception { 78 | Result result = tuple2._2(); 79 | String rowKey = Bytes.toString(result.getRow()); 80 | String id = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("id"))); 81 | String account = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("account"))); 82 | String password = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("password"))); 83 | return RowFactory.create(rowKey, id, account, password); 84 | } 85 | }); 86 | List fields = new ArrayList<>(); 87 | fields.add(DataTypes.createStructField("id", DataTypes.StringType, true)); 88 | fields.add(DataTypes.createStructField("account", DataTypes.StringType, true)); 89 | fields.add(DataTypes.createStructField("password", DataTypes.StringType, true)); 90 | StructType schema = DataTypes.createStructType(fields); 91 | 92 | Dataset dataset = sparkSession.createDataFrame(javaRDD, schema); 93 | } 94 | 95 | /** 96 | * 动态配置schema的形式,比如说通过phoenix直接读取整个schema 97 | */ 98 | private static void dynamicSchema(Configuration configuration, SparkSession sparkSession) { 99 | try { 100 | Connection connection = PhoenixUtil.getConnection(); 101 | Statement statement = connection.createStatement(); 102 | String sql = "select * from tableName limit 1"; 103 | ResultSet resultSet = statement.executeQuery(sql); 104 | ResultSetMetaData metaData = resultSet.getMetaData(); 105 | int columnCount = metaData.getColumnCount(); 106 | Map columnNameMap = new LinkedHashMap<>(columnCount); 107 | for (int i = 1; i <= columnCount; i++) { 108 | String columnTypeName = metaData.getColumnTypeName(i); 109 | String columnName = metaData.getColumnName(i); 110 | columnNameMap.put(columnName, columnTypeName); 111 | } 112 | PhoenixUtil.returnConnection(connection); 113 | 114 | JavaSparkContext context = new JavaSparkContext(sparkSession.sparkContext()); 115 | JavaPairRDD javaPairRDD = context.newAPIHadoopRDD(configuration, TableInputFormat.class, ImmutableBytesWritable.class, Result.class); 116 | JavaRDD map = javaPairRDD.map(new Function, Row>() { 117 | @Override 118 | public Row call(Tuple2 tuple2) throws Exception { 119 | Result result = tuple2._2(); 120 | String row = Bytes.toString(result.getRow()); 121 | List valueList = new ArrayList<>(); 122 | for (String column : columnNameMap.keySet()) { 123 | String columnValue = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes(column))); 124 | valueList.add(columnValue); 125 | } 126 | String[] values = valueList.toArray(new String[]{}); 127 | return RowFactory.create(row, values); 128 | } 129 | }); 130 | 131 | List fieldList = new ArrayList<>(); 132 | for (Map.Entry entry : columnNameMap.entrySet()) { 133 | String key = entry.getKey(); 134 | String value = entry.getValue(); 135 | DataType dataType = getDataType(value); 136 | fieldList.add(DataTypes.createStructField(key, dataType, false)); 137 | } 138 | StructType schema = DataTypes.createStructType(fieldList); 139 | Dataset dataset = sparkSession.createDataFrame(map, schema); 140 | } catch (Exception e) { 141 | e.printStackTrace(); 142 | } 143 | } 144 | 145 | private static DataType getDataType(String typeName) { 146 | DataType dataType = null; 147 | if ("string".equals(typeName)) { 148 | dataType = DataTypes.StringType; 149 | } else if ("boolean".equals(typeName)) { 150 | dataType = DataTypes.BooleanType; 151 | } else if ("dobule".equals(typeName)) { 152 | dataType = DataTypes.DoubleType; 153 | } else if ("date".equals(typeName)) { 154 | dataType = DataTypes.DateType; 155 | } else if ("float".equals(typeName)) { 156 | dataType = DataTypes.FloatType; 157 | } else if ("bigint".equals(typeName)) { 158 | dataType = DataTypes.LongType; 159 | } else if ("short".equals(typeName)) { 160 | dataType = DataTypes.ShortType; 161 | } else if ("byte".equals(typeName)) { 162 | dataType = DataTypes.ByteType; 163 | } else if ("timestamp".equals(typeName)) { 164 | dataType = DataTypes.TimestampType; 165 | } 166 | return dataType; 167 | } 168 | } 169 | --------------------------------------------------------------------------------