├── elasticsearch
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   └── log4j2.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── elasticsearch
    │   │   │                   └── ElasticsearchApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── elasticsearch
    │   │                       └── ElasticsearchApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-async-io
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkasyncio
    │   │   │                   ├── source
    │   │   │                       └── SimpleSource.java
    │   │   │                   ├── async
    │   │   │                       └── AsyncDataBaseRequest.java
    │   │   │                   └── FlinkAsyncIoApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkasyncio
    │   │                       └── FlinkAsyncIoApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-jdbc-hbase
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkjdbchbase
    │   │   │                   ├── FlinkJdbcHbaseApplication.java
    │   │   │                   ├── source
    │   │   │                       └── JdbcSource.java
    │   │   │                   ├── core
    │   │   │                       ├── Jdbc2Hbase.java
    │   │   │                       └── FlinkFromTxt.java
    │   │   │                   └── sink
    │   │   │                       └── HbaseSink.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkjdbchbase
    │   │                       └── FlinkJdbcHbaseApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-sideoutput
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinksideoutput
    │   │   │                   ├── tag
    │   │   │                       └── SideOutputTag.java
    │   │   │                   ├── process
    │   │   │                       ├── ProcessTokenizer.java
    │   │   │                       └── KeyedTokenizer.java
    │   │   │                   └── FlinkSideoutputApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinksideoutput
    │   │                       └── FlinkSideoutputApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── kafka-stream
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   ├── users.csv
    │   │   │   ├── items.csv
    │   │   │   ├── log4j.properties
    │   │   │   └── orders.csv
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── kafkastream
    │   │   │                   ├── serdes
    │   │   │                       ├── SerdesFactory.java
    │   │   │                       ├── GenericDeserializer.java
    │   │   │                       └── GenericSerializer.java
    │   │   │                   ├── model
    │   │   │                       ├── User.java
    │   │   │                       ├── Item.java
    │   │   │                       └── Order.java
    │   │   │                   ├── utils
    │   │   │                       └── HashPartitioner.java
    │   │   │                   ├── timeextractor
    │   │   │                       └── OrderTimestampExtractor.java
    │   │   │                   └── producer
    │   │   │                       ├── UserProducer.java
    │   │   │                       ├── ItemProducer.java
    │   │   │                       └── OrderProducer.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── kafkastream
    │   │                       └── KafkaStreamApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-kafka-hbase
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   └── log4j2.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkkafkahbase
    │   │   │                   ├── FlinkKafkaHbaseApplication.java
    │   │   │                   ├── watermarks
    │   │   │                       └── FlinkHbaseWaterMarks.java
    │   │   │                   ├── model
    │   │   │                       └── Metric.java
    │   │   │                   ├── core
    │   │   │                       ├── Kafka2Hbase.java
    │   │   │                       └── Hbase2Kafka.java
    │   │   │                   ├── sink
    │   │   │                       └── FlinkHbaseSink.java
    │   │   │                   └── source
    │   │   │                       └── FlinkHbaseSource.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkkafkahbase
    │   │                       └── FlinkKafkaHbaseApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-kafka-source
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   └── logback.xml
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkkafkasource
    │   │   │                   ├── watermarks
    │   │   │                       └── ConsumerWaterMarkEmitter.java
    │   │   │                   └── FlinkKafkaSourceApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkkafkasource
    │   │                       └── FlinkKafkaSourceApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── bigdata-study.iml
├── spark-phoenix
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── sparkphoenix
    │   │   │                   ├── utils
    │   │   │                       └── PhoenixUtil.java
    │   │   │                   ├── SparkPhoenixApplication.java
    │   │   │                   └── apps
    │   │   │                       └── SparkPhoenixReadHbase.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── sparkphoenix
    │   │                       ├── SparkPhoenixApplicationTests.java
    │   │                       └── phoenix
    │   │                           └── SparkPhoenixTest.java
    ├── .gitignore
    └── pom.xml
├── README.md
├── flink-hdfs
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkhdfs
    │   │   │                   ├── FlinkHdfsApplication.java
    │   │   │                   ├── utils
    │   │   │                       └── HadoopConfig.java
    │   │   │                   ├── zip
    │   │   │                       └── FlinkHdfsZip.java
    │   │   │                   └── core
    │   │   │                       └── FlinkHdfs.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkhdfs
    │   │                       └── FlinkHdfsApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── dataflow-stream-kafka-source
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── META-INT
    │   │   │   │   └── spring-configuration-metadata-whitelist.properties
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── dataflowstreamkafkasource
    │   │   │                   ├── DataflowStreamKafkaSourceApplication.java
    │   │   │                   ├── prop
    │   │   │                       └── KafkaSourceProperties.java
    │   │   │                   ├── config
    │   │   │                       └── KafkaSourceConfig.java
    │   │   │                   └── utils
    │   │   │                       └── JsonMapper.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── dataflowstreamkafkasource
    │   │                       └── DataflowStreamKafkaSourceApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-common
    └── src
    │   └── main
    │       └── java
    │           ├── exception
    │               └── ExceptionUtils.java
    │           ├── utils
    │               ├── GsonUtils.java
    │               ├── ExecutionEnvUtil.java
    │               ├── HttpUtil.java
    │               └── KafkaUtils.java
    │           ├── watermarks
    │               └── MetricWatermark.java
    │           ├── model
    │               └── Metrics.java
    │           ├── constant
    │               └── PropertiesConstants.java
    │           └── schemas
    │               └── MetricSchema.java
├── flink-kafka11-sink
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   └── logback.xml
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkkafkasink
    │   │   │                   └── FlinkKafkaSinkApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkkafkasink
    │   │                       └── FlinkKafkaSinkApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── dataflow-stream-redis-pub-sink
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── META-INF
    │   │   │   │   └── spring-configuration-metadata-whitelist.properties
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── dataflowstreamredispubsink
    │   │   │                   ├── prop
    │   │   │                       └── RedisPubProperties.java
    │   │   │                   ├── DataflowStreamRedisPubSinkApplication.java
    │   │   │                   └── config
    │   │   │                       └── RedisStreamPubConfig.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── dataflowstreamredispubsink
    │   │                       └── DataflowStreamRedisPubSinkApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── dataflow-stream-redis-set-processor
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── META-INT
    │   │   │   │   └── spring-configuration-metadata-whitelist.properties
    │   │   │   └── application.properties
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── dataflowstreamredissetprocessor
    │   │   │                   ├── prop
    │   │   │                       └── RedisSetProperties.java
    │   │   │                   ├── DataflowStreamRedisSetProcessorApplication.java
    │   │   │                   ├── config
    │   │   │                       └── RedisStreamProcessorConfig.java
    │   │   │                   └── utils
    │   │   │                       └── JsonMapper.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── dataflowstreamredissetprocessor
    │   │                       └── DataflowStreamRedisSetProcessorApplicationTests.java
    ├── .gitignore
    └── pom.xml
├── flink-elasticsearch-sink
    ├── .gitignore
    ├── src
    │   ├── main
    │   │   ├── resources
    │   │   │   ├── application.properties
    │   │   │   └── logback.xml
    │   │   └── java
    │   │   │   └── com
    │   │   │       └── bigdata
    │   │   │           └── study
    │   │   │               └── flinkelasticsearchsink
    │   │   │                   ├── handler
    │   │   │                       └── FlinkFailHandler.java
    │   │   │                   └── FlinkElasticsearchSinkApplication.java
    │   └── test
    │   │   └── java
    │   │       └── com
    │   │           └── bigdata
    │   │               └── study
    │   │                   └── flinkelasticsearchsink
    │   │                       └── FlinkElasticsearchSinkApplicationTests.java
    └── pom.xml
├── fork-join
    ├── src
    │   └── main
    │   │   └── java
    │   │       └── forkjoin
    │   │           ├── ParallelStream.java
    │   │           ├── ForkJoinApp.java
    │   │           └── MyForkJoinTask.java
    └── pom.xml
└── pom.xml


/elasticsearch/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flink-async-io/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flink-sideoutput/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/flink-kafka-source/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bigdata-study.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="JAVA_MODULE" version="4" />


--------------------------------------------------------------------------------
/spark-phoenix/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | spring.application.name=spark-phoenix
2 | server.port=-1


--------------------------------------------------------------------------------
/kafka-stream/src/main/resources/users.csv:
--------------------------------------------------------------------------------
1 | Jack, BJ, male, 23
2 | Lily, SH, female, 21
3 | Mike, SZ, male, 22
4 | Lucy, GZ, female, 20


--------------------------------------------------------------------------------
/kafka-stream/src/main/resources/items.csv:
--------------------------------------------------------------------------------
1 | iphone, BJ, phone, 5388.88
2 | ipad, SH, pad, 4888.88
3 | iwatch, SZ, watch, 2668.88
4 | ipod, GZ, pod, 1888.88


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bigdata-study
2 | 大数据组件学习；包括dataflow，spring cloud stream；elasticsearch；flink；spark；kafka；phoenix；Hive；Hbase；
3 | 
4 | 这只是个人学习，练习，代码写得丑，大家就不要fork了！
5 | 


--------------------------------------------------------------------------------
/flink-hdfs/src/main/resources/application.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/laixiangshun/bigdata-study/HEAD/flink-hdfs/src/main/resources/application.properties


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/resources/META-INT/spring-configuration-metadata-whitelist.properties:
--------------------------------------------------------------------------------
1 | configuration-properties.classes=com.bigdata.study.dataflowstreamkafkasource.prop.KafkaSourceProperties
2 | 


--------------------------------------------------------------------------------
/elasticsearch/src/main/resources/log4j2.properties:
--------------------------------------------------------------------------------
1 | appender.console.type = Console
2 | appender.console.name = console
3 | appender.console.layout.type = PatternLayout
4 | 
5 | rootLogger.level = info
6 | rootLogger.appenderRef.console.ref = console


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/resources/log4j2.properties:
--------------------------------------------------------------------------------
1 | appender.console.type = Console
2 | appender.console.name = console
3 | appender.console.layout.type = PatternLayout
4 | 
5 | rootLogger.level = info
6 | rootLogger.appenderRef.console.ref = console


--------------------------------------------------------------------------------
/flink-common/src/main/java/exception/ExceptionUtils.java:
--------------------------------------------------------------------------------
 1 | package exception;
 2 | 
 3 | /**
 4 |  * 异常工具类
 5 |  **/
 6 | public class ExceptionUtils {
 7 |     public static boolean containsThrowable(Throwable throwable, Class type) {
 8 | 
 9 |         return false;
10 |     }
11 | }
12 | 


--------------------------------------------------------------------------------
/flink-kafka11-sink/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | kafka.brokers=192.168.20.48:9092
2 | kafka.group.id=metrics-group-test
3 | kafka.zookeeper.connect=192.168.20.48:2181
4 | metrics.topic=alert-metrics
5 | stream.parallelism=5
6 | stream.checkpoint.interval=1000
7 | stream.checkpoint.enable=false
8 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/main/resources/META-INF/spring-configuration-metadata-whitelist.properties:
--------------------------------------------------------------------------------
1 | configuration-properties.classes= com.bigdata.study.dataflowstreamredispubsink.prop.RedisPubProperties,\
2 |   org.springframework.boot.autoconfigure.data.redis.RedisProperties, \
3 |   org.springframework.boot.autoconfigure.data.redis.RedisProperties$Pool
4 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/resources/META-INT/spring-configuration-metadata-whitelist.properties:
--------------------------------------------------------------------------------
1 | configuration-properties.classes=com.bigdata.study.dataflowstreamredissetprocessor.prop.RedisSetProperties,\
2 |   org.springframework.boot.autoconfigure.data.redis.RedisProperties, \
3 |   org.springframework.boot.autoconfigure.data.redis.RedisProperties$Pool
4 | 


--------------------------------------------------------------------------------
/elasticsearch/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-hdfs/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/kafka-stream/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/spark-phoenix/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-async-io/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-jdbc-hbase/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-kafka-hbase/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-kafka-source/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-kafka11-sink/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-sideoutput/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/tag/SideOutputTag.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinksideoutput.tag;
 2 | 
 3 | import org.apache.flink.util.OutputTag;
 4 | 
 5 | /**
 6 |  * @Description
 7 |  * @Author hasee
 8 |  * @Date 2019/1/11
 9 |  **/
10 | public class SideOutputTag {
11 |     public static final OutputTag<String> wordTag = new OutputTag<String>("rejected") {
12 |     };
13 | }
14 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | !.mvn/wrapper/maven-wrapper.jar
 3 | 
 4 | ### STS ###
 5 | .apt_generated
 6 | .classpath
 7 | .factorypath
 8 | .project
 9 | .settings
10 | .springBeans
11 | .sts4-cache
12 | 
13 | ### IntelliJ IDEA ###
14 | .idea
15 | *.iws
16 | *.iml
17 | *.ipr
18 | 
19 | ### NetBeans ###
20 | /nbproject/private/
21 | /build/
22 | /nbbuild/
23 | /dist/
24 | /nbdist/
25 | /.nb-gradle/


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/src/main/resources/application.properties:
--------------------------------------------------------------------------------
 1 | kafka.brokers=192.168.20.48:9092
 2 | kafka.group.id=metrics-group-test
 3 | kafka.zookeeper.connect=192.168.20.48:2181
 4 | metrics.topic=alert-metrics
 5 | stream.parallelism=5
 6 | stream.checkpoint.interval=1000
 7 | stream.checkpoint.enable=false
 8 | elasticsearch.hosts=192.168.20.48:9201
 9 | elasticsearch.bulk.flush.max.actions=40
10 | stream.sink.parallelism=5


--------------------------------------------------------------------------------
/flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/FlinkHdfsApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkhdfs;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | 
 6 | @SpringBootApplication
 7 | public class FlinkHdfsApplication {
 8 | 
 9 |     public static void main(String[] args) {
10 |         SpringApplication.run(FlinkHdfsApplication.class, args);
11 |     }
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/elasticsearch/src/main/java/com/bigdata/study/elasticsearch/ElasticsearchApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.elasticsearch;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | 
 6 | @SpringBootApplication
 7 | public class ElasticsearchApplication {
 8 | 
 9 |     public static void main(String[] args) {
10 |         SpringApplication.run(ElasticsearchApplication.class, args);
11 |     }
12 | 
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/FlinkJdbcHbaseApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | 
 6 | @SpringBootApplication
 7 | public class FlinkJdbcHbaseApplication {
 8 | 
 9 |     public static void main(String[] args) {
10 |         SpringApplication.run(FlinkJdbcHbaseApplication.class, args);
11 |     }
12 | 
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/flink-hdfs/src/test/java/com/bigdata/study/flinkhdfs/FlinkHdfsApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkhdfs;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkHdfsApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/FlinkKafkaHbaseApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase;
 2 | 
 3 | import org.springframework.boot.SpringApplication;
 4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 5 | 
 6 | @SpringBootApplication
 7 | public class FlinkKafkaHbaseApplication {
 8 | 
 9 |     public static void main(String[] args) {
10 |         SpringApplication.run(FlinkKafkaHbaseApplication.class, args);
11 |     }
12 | 
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/kafka-stream/src/test/java/com/bigdata/study/kafkastream/KafkaStreamApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class KafkaStreamApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-async-io/src/test/java/com/bigdata/study/flinkasyncio/FlinkAsyncIoApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkasyncio;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkAsyncIoApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/spark-phoenix/src/test/java/com/bigdata/study/sparkphoenix/SparkPhoenixApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.sparkphoenix;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class SparkPhoenixApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/elasticsearch/src/test/java/com/bigdata/study/elasticsearch/ElasticsearchApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.elasticsearch;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class ElasticsearchApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/test/java/com/bigdata/study/flinkjdbchbase/FlinkJdbcHbaseApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkJdbcHbaseApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/test/java/com/bigdata/study/flinkkafkahbase/FlinkKafkaHbaseApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkKafkaHbaseApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-kafka11-sink/src/test/java/com/bigdata/study/flinkkafkasink/FlinkKafkaSinkApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkasink;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkKafkaSinkApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-sideoutput/src/test/java/com/bigdata/study/flinksideoutput/FlinkSideoutputApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinksideoutput;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkSideoutputApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-kafka-source/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <!-- encoders are assigned the type
 5 |              ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |         <encoder>
 7 |             <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |         </encoder>
 9 |     </appender>
10 | 
11 |     <root level="info">
12 |         <appender-ref ref="STDOUT" />
13 |     </root>
14 | </configuration>


--------------------------------------------------------------------------------
/flink-kafka-source/src/test/java/com/bigdata/study/flinkkafkasource/FlinkKafkaSourceApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkasource;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkKafkaSourceApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/flink-kafka11-sink/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <!-- encoders are assigned the type
 5 |              ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |         <encoder>
 7 |             <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |         </encoder>
 9 |     </appender>
10 | 
11 |     <root level="info">
12 |         <appender-ref ref="STDOUT" />
13 |     </root>
14 | </configuration>


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 | 
 3 |     <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
 4 |         <!-- encoders are assigned the type
 5 |              ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |         <encoder>
 7 |             <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |         </encoder>
 9 |     </appender>
10 | 
11 |     <root level="info">
12 |         <appender-ref ref="STDOUT" />
13 |     </root>
14 | </configuration>


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/SerdesFactory.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.serdes;
 2 | 
 3 | import org.apache.kafka.common.serialization.Serde;
 4 | import org.apache.kafka.common.serialization.Serdes;
 5 | 
 6 | /**
 7 |  * @Description
 8 |  * @Author hasee
 9 |  * @Date 2019/1/8
10 |  **/
11 | public class SerdesFactory {
12 | 
13 |     public static <T> Serde<T> serdeFrom(Class<T> tClass) {
14 |         return Serdes.serdeFrom(new GenericSerializer<>(tClass), new GenericDeserializer<>(tClass));
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/src/test/java/com/bigdata/study/flinkelasticsearchsink/FlinkElasticsearchSinkApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkelasticsearchsink;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class FlinkElasticsearchSinkApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/test/java/com/bigdata/study/dataflowstreamkafkasource/DataflowStreamKafkaSourceApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamkafkasource;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class DataflowStreamKafkaSourceApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/test/java/com/bigdata/study/dataflowstreamredispubsink/DataflowStreamRedisPubSinkApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredispubsink;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class DataflowStreamRedisPubSinkApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/test/java/com/bigdata/study/dataflowstreamredissetprocessor/DataflowStreamRedisSetProcessorApplicationTests.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredissetprocessor;
 2 | 
 3 | import org.junit.Test;
 4 | import org.junit.runner.RunWith;
 5 | import org.springframework.boot.test.context.SpringBootTest;
 6 | import org.springframework.test.context.junit4.SpringRunner;
 7 | 
 8 | @RunWith(SpringRunner.class)
 9 | @SpringBootTest
10 | public class DataflowStreamRedisSetProcessorApplicationTests {
11 | 
12 |     @Test
13 |     public void contextLoads() {
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/prop/RedisPubProperties.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredispubsink.prop;
 2 | 
 3 | import org.springframework.boot.context.properties.ConfigurationProperties;
 4 | 
 5 | /**
 6 |  * @Description
 7 |  * @Author hasee
 8 |  * @Date 2019/1/2
 9 |  **/
10 | @ConfigurationProperties("redis")
11 | public class RedisPubProperties {
12 | 
13 |     private String topic;
14 | 
15 |     public String getTopic() {
16 |         return topic;
17 |     }
18 | 
19 |     public void setTopic(String topic) {
20 |         this.topic = topic;
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | log4j.rootLogger=INFO, stdout
 2 | 
 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
 6 | 
 7 | #log4j.appender.fileAppender=org.apache.log4j.FileAppender
 8 | #log4j.appender.fileAppender.File=kafka-request.log
 9 | #log4j.appender.fileAppender.layout=org.apache.log4j.PatternLayout
10 | #log4j.appender.fileAppender.layout.ConversionPattern= %-4r [%t] %-5p %c %x - %m%n
11 | 
12 | 
13 | # Turn on all our debugging info
14 | log4j.logger.kafka=WARN
15 | log4j.logger.org=WARN
16 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/prop/RedisSetProperties.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredissetprocessor.prop;
 2 | 
 3 | import org.springframework.boot.context.properties.ConfigurationProperties;
 4 | 
 5 | /**
 6 |  * @Description
 7 |  * @Author hasee
 8 |  * @Date 2019/1/2
 9 |  **/
10 | @ConfigurationProperties("redis")
11 | public class RedisSetProperties {
12 | 
13 |     private String setName;
14 | 
15 |     public String getSetName() {
16 |         return setName;
17 |     }
18 | 
19 |     public void setSetName(String setName) {
20 |         this.setName = setName;
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/utils/GsonUtils.java:
--------------------------------------------------------------------------------
 1 | package utils;
 2 | 
 3 | import com.google.gson.Gson;
 4 | 
 5 | import java.nio.charset.Charset;
 6 | 
 7 | /**
 8 |  * Gson 序列化，反序列化操作
 9 |  **/
10 | public class GsonUtils {
11 |     private static final Gson gson = new Gson();
12 | 
13 |     public static <T> T fromJson(String value, Class<T> tClass) {
14 |         return gson.fromJson(value, tClass);
15 |     }
16 | 
17 |     public static String toJson(Object value) {
18 |         return gson.toJson(value);
19 |     }
20 | 
21 |     public static byte[] toJsonBytes(Object value) {
22 |         return gson.toJson(value).getBytes(Charset.forName("utf-8"));
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/resources/orders.csv:
--------------------------------------------------------------------------------
 1 | Jack, iphone, 2016-11-11 00:00:01, 3
 2 | Jack, ipad, 2016-11-11 00:00:02, 4
 3 | Jack, iwatch, 2016-11-11 00:00:03, 5
 4 | Jack, ipod, 2016-11-11 00:00:04, 4
 5 | 
 6 | Lily, ipad, 2016-11-11 00:00:06, 3
 7 | Lily, iwatch, 2016-11-11 00:00:07, 4
 8 | Lily, iphone, 2016-11-11 00:00:08, 2
 9 | Lily, ipod, 2016-11-11 00:00:09, 3
10 | 
11 | Mike, ipad, 2016-11-11 00:00:11, 2
12 | Mike, iwatch, 2016-11-11 00:00:12, 3
13 | Mike, iphone, 2016-11-11 00:00:13, 4
14 | Mike, ipod, 2016-11-11 00:00:14, 3
15 | 
16 | Lucy, ipod, 2016-11-11 00:00:16, 3
17 | Lucy, ipad, 2016-11-11 00:00:17, 4
18 | Lucy, iwatch, 2016-11-11 00:00:18, 3
19 | Lucy, iphone, 2016-11-11 00:00:19, 5


--------------------------------------------------------------------------------
/fork-join/src/main/java/forkjoin/ParallelStream.java:
--------------------------------------------------------------------------------
 1 | package forkjoin;
 2 | 
 3 | import java.time.Duration;
 4 | import java.time.Instant;
 5 | import java.util.stream.LongStream;
 6 | 
 7 | /**
 8 |  * 并行流计算
 9 |  * 底层为fork join的实现
10 |  * 效率高于直接使用顺序流 fork join
11 |  **/
12 | public class ParallelStream {
13 |     public static void main(String[] args) {
14 |         Instant startTime = Instant.now();
15 |         long result = LongStream.rangeClosed(0, 10_000_00_100L)
16 |                 .parallel()
17 |                 .reduce(0, Long::sum);
18 |         System.out.println(result);
19 |         Instant endTime = Instant.now();
20 |         System.out.println("计算10亿条数据耗时：" + Duration.between(startTime, endTime).toMillis());
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/fork-join/src/main/java/forkjoin/ForkJoinApp.java:
--------------------------------------------------------------------------------
 1 | package forkjoin;
 2 | 
 3 | import java.time.Duration;
 4 | import java.time.Instant;
 5 | import java.util.concurrent.ForkJoinPool;
 6 | 
 7 | /**
 8 |  * 顺序流使用fork join
 9 |  **/
10 | public class ForkJoinApp {
11 |     public static void main(String[] args) {
12 |         Instant startTime = Instant.now();
13 |         ForkJoinPool forkJoinPool = new ForkJoinPool();
14 |         MyForkJoinTask myForkJoinTask = new MyForkJoinTask(0L, 10_000_00_000L);
15 |         Long result = forkJoinPool.invoke(myForkJoinTask);
16 |         System.out.println(result);
17 |         Instant endTime = Instant.now();
18 |         System.out.println("计算10亿条数据耗时：" + Duration.between(startTime, endTime).toMillis());
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/spark-phoenix/src/test/java/com/bigdata/study/sparkphoenix/phoenix/SparkPhoenixTest.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.sparkphoenix.phoenix;
 2 | 
 3 | import com.bigdata.study.sparkphoenix.SparkPhoenixApplication;
 4 | import org.junit.Test;
 5 | import org.springframework.boot.SpringApplication;
 6 | 
 7 | import java.util.ArrayList;
 8 | import java.util.List;
 9 | 
10 | /**
11 |  * @Description
12 |  * @Author hasee
13 |  * @Date 2019/1/3
14 |  **/
15 | public class SparkPhoenixTest {
16 |     public static void main(String[] args) {
17 |         List<String> params = new ArrayList<>();
18 |         params.add("2017-06-01");
19 |         params.add("2017-07-01");
20 |         String[] argArray = params.toArray(new String[]{});
21 |         SpringApplication.run(SparkPhoenixApplication.class, argArray);
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/DataflowStreamKafkaSourceApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamkafkasource;
 2 | 
 3 | import com.bigdata.study.dataflowstreamkafkasource.config.KafkaSourceConfig;
 4 | import org.springframework.boot.SpringApplication;
 5 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 6 | import org.springframework.context.annotation.ComponentScan;
 7 | import org.springframework.context.annotation.Import;
 8 | 
 9 | @SpringBootApplication
10 | @ComponentScan("com.bigdata")
11 | @Import({KafkaSourceConfig.class})
12 | public class DataflowStreamKafkaSourceApplication {
13 | 
14 |     public static void main(String[] args) {
15 |         SpringApplication.run(DataflowStreamKafkaSourceApplication.class, args);
16 |     }
17 | 
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/DataflowStreamRedisPubSinkApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredispubsink;
 2 | 
 3 | import com.bigdata.study.dataflowstreamredispubsink.config.RedisStreamPubConfig;
 4 | import org.springframework.boot.SpringApplication;
 5 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 6 | import org.springframework.context.annotation.ComponentScan;
 7 | import org.springframework.context.annotation.Import;
 8 | 
 9 | @SpringBootApplication
10 | @ComponentScan("com.bigdata")
11 | @Import({RedisStreamPubConfig.class})
12 | public class DataflowStreamRedisPubSinkApplication {
13 | 
14 |     public static void main(String[] args) {
15 |         SpringApplication.run(DataflowStreamRedisPubSinkApplication.class, args);
16 |     }
17 | 
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/fork-join/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 5 |     <parent>
 6 |         <artifactId>bigdata-starter</artifactId>
 7 |         <groupId>com.bigdata.study</groupId>
 8 |         <version>1.0</version>
 9 |     </parent>
10 |     <modelVersion>4.0.0</modelVersion>
11 | 
12 |     <artifactId>fork-join</artifactId>
13 | 
14 |     <packaging>jar</packaging>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>commons-dbutils</groupId>
19 |             <artifactId>commons-dbutils</artifactId>
20 |             <version>1.6</version>
21 |         </dependency>
22 |     </dependencies>
23 | </project>


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/watermarks/FlinkHbaseWaterMarks.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.watermarks;
 2 | 
 3 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
 4 | import org.apache.flink.streaming.api.watermark.Watermark;
 5 | 
 6 | import javax.annotation.Nullable;
 7 | import java.util.Map;
 8 | 
 9 | /**
10 |  * 自定义水印
11 |  **/
12 | public class FlinkHbaseWaterMarks implements AssignerWithPeriodicWatermarks<Map<String, String>> {
13 |     private long currentTime;
14 | 
15 |     @Nullable
16 |     @Override
17 |     public Watermark getCurrentWatermark() {
18 |         return new Watermark(currentTime);
19 |     }
20 | 
21 |     @Override
22 |     public long extractTimestamp(Map<String, String> stringStringMap, long l) {
23 |         currentTime = l;
24 |         return l;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/flink-kafka-source/src/main/java/com/bigdata/study/flinkkafkasource/watermarks/ConsumerWaterMarkEmitter.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkasource.watermarks;
 2 | 
 3 | import model.Metrics;
 4 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
 5 | import org.apache.flink.streaming.api.watermark.Watermark;
 6 | 
 7 | import javax.annotation.Nullable;
 8 | 
 9 | /**
10 |  * 自定义时间戳提取器/水印发射器
11 |  **/
12 | public class ConsumerWaterMarkEmitter implements AssignerWithPeriodicWatermarks<Metrics> {
13 |     private long currentTime;
14 | 
15 |     @Nullable
16 |     @Override
17 |     public Watermark getCurrentWatermark() {
18 |         return new Watermark(currentTime);
19 |     }
20 | 
21 |     @Override
22 |     public long extractTimestamp(Metrics metrics, long l) {
23 |         currentTime = l;
24 |         return currentTime;
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/DataflowStreamRedisSetProcessorApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredissetprocessor;
 2 | 
 3 | import com.bigdata.study.dataflowstreamredissetprocessor.config.RedisStreamProcessorConfig;
 4 | import org.springframework.boot.SpringApplication;
 5 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 6 | import org.springframework.context.annotation.ComponentScan;
 7 | import org.springframework.context.annotation.Import;
 8 | 
 9 | @SpringBootApplication
10 | @ComponentScan("com.bigdata")
11 | @Import({RedisStreamProcessorConfig.class})
12 | public class DataflowStreamRedisSetProcessorApplication {
13 | 
14 |     public static void main(String[] args) {
15 |         SpringApplication.run(DataflowStreamRedisSetProcessorApplication.class, args);
16 |     }
17 | 
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/watermarks/MetricWatermark.java:
--------------------------------------------------------------------------------
 1 | package watermarks;
 2 | 
 3 | import model.Metrics;
 4 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
 5 | import org.apache.flink.streaming.api.watermark.Watermark;
 6 | 
 7 | import javax.annotation.Nullable;
 8 | 
 9 | /**
10 |  * @Description
11 |  * @Author hasee
12 |  * @Date 2019/1/4
13 |  **/
14 | public class MetricWatermark implements AssignerWithPeriodicWatermarks<Metrics> {
15 |     private long currentTime = Long.MAX_VALUE;
16 | 
17 |     @Nullable
18 |     @Override
19 |     public Watermark getCurrentWatermark() {
20 |         return new Watermark(currentTime == Long.MAX_VALUE ? Long.MAX_VALUE : currentTime - 1);
21 |     }
22 | 
23 |     @Override
24 |     public long extractTimestamp(Metrics metrics, long l) {
25 |         long time = metrics.getTimestamp() / (1000 * 1000);
26 |         this.currentTime = time;
27 |         return currentTime;
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/main/resources/application.properties:
--------------------------------------------------------------------------------
 1 | spring.cloud.dataflow.stream.app.label=redisq
 2 | endpoints.shutdown.enabled=true
 3 | spring.redis.host=192.168.254.252
 4 | spring.cloud.stream.metrics.properties=spring.application.name,spring.application.index,spring.cloud.application.*,spring.cloud.dataflow.*
 5 | spring.cloud.dataflow.stream.name=stream2redisq
 6 | spring.metrics.export.triggers.application.includes=integration**
 7 | spring.cloud.stream.metrics.key=stream2redisq.redisq.${spring.cloud.application.guid}
 8 | spring.cloud.stream.bindings.input.group=stream2redisq
 9 | server.port=29728
10 | spring.cloud.stream.kafka.binder.zkNodes=tools.wjm.com:2181
11 | spring.cloud.dataflow.stream.app.type=sink
12 | redis.queue=all
13 | spring.cloud.stream.bindings.input.destination=stream2redisq.rand-map
14 | spring.cloud.stream.kafka.binder.brokers=tools.wjm.com:9092
15 | spring.jmx.default-domain=stream2redisq.redisq
16 | spring.cloud.application.group=stream2redisq


--------------------------------------------------------------------------------
/flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/process/ProcessTokenizer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinksideoutput.process;
 2 | 
 3 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 6 | import org.apache.flink.util.Collector;
 7 | 
 8 | /**
 9 |  * @Description
10 |  * @Author hasee
11 |  * @Date 2019/1/11
12 |  **/
13 | public class ProcessTokenizer extends ProcessFunction<String, Tuple2<String, Integer>> {
14 |     @Override
15 |     public void processElement(String value, Context context, Collector<Tuple2<String, Integer>> collector) throws Exception {
16 |         String[] tokens = value.toLowerCase().split("\\w+");
17 |         for (String token : tokens) {
18 |             if (token.length() > 5) {
19 |                 context.output(SideOutputTag.wordTag, token);
20 |             } else {
21 |                 collector.collect(new Tuple2<>(token, 1));
22 |             }
23 |         }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/resources/application.properties:
--------------------------------------------------------------------------------
 1 | server.port=-1
 2 | spring.cloud.dataflow.stream.app.type=source
 3 | 
 4 | #spring.cloud.stream.kafka.binder.brokers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667
 5 | #spring.cloud.stream.kafka.binder.zk-nodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181
 6 | #spring.cloud.stream.kafka.binder.fetchSize=1024000
 7 | #spring.cloud.stream.kafka.binder.minPartitionCount=1
 8 | #spring.cloud.stream.kafka.binder.autoCreateTopics=true
 9 | #spring.cloud.stream.kafka.binder.autoAddPartitions=false
10 | 
11 | spring.cloud.stream.bindings.output.destination=kafka2kafka.topic
12 | kafka.topic=kafka.from
13 | kafka.servers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667
14 | kafka.zkNodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181
15 | kafka.groupId=kafka.from.group
16 | kafka.batchSize=102400
17 | kafka.keyDeserializer=org.apache.kafka.common.serialization.StringDeserializer
18 | kafka.valueDeserializer=org.apache.kafka.common.serialization.StringDeserializer


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/User.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.model;
 2 | 
 3 | public class User {
 4 | 	private String name;
 5 | 	private String address;
 6 | 	private String gender;
 7 | 	private int age;
 8 | 
 9 | 	public User() {}
10 | 	
11 | 	public User(String name, String address, String gender, int age) {
12 | 		this.name = name;
13 | 		this.address = address;
14 | 		this.gender = gender;
15 | 		this.age = age;
16 | 	}
17 | 	
18 | 	public String getName() {
19 | 		return name;
20 | 	}
21 | 
22 | 	public void setName(String name) {
23 | 		this.name = name;
24 | 	}
25 | 
26 | 	public String getAddress() {
27 | 		return address;
28 | 	}
29 | 
30 | 	public void setAddress(String address) {
31 | 		this.address = address;
32 | 	}
33 | 
34 | 	public String getGender() {
35 | 		return gender;
36 | 	}
37 | 
38 | 	public void setGender(String gender) {
39 | 		this.gender = gender;
40 | 	}
41 | 
42 | 	public int getAge() {
43 | 		return age;
44 | 	}
45 | 
46 | 	public void setAge(int age) {
47 | 		this.age = age;
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/process/KeyedTokenizer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinksideoutput.process;
 2 | 
 3 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag;
 4 | import org.apache.flink.api.java.tuple.Tuple2;
 5 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
 6 | import org.apache.flink.util.Collector;
 7 | 
 8 | /**
 9 |  * 以用户自定义FlatMapFunction函数的形式来实现分词器功能，该分词器会将分词封装为(word,1)，
10 |  * 同时不接受单词长度大于5的，也即是侧输出都是单词长度大于5的单词。
11 |  **/
12 | public class KeyedTokenizer extends KeyedProcessFunction<Integer, String, Tuple2<String, Integer>> {
13 |     @Override
14 |     public void processElement(String value, Context context, Collector<Tuple2<String, Integer>> collector) throws Exception {
15 |         String[] tokens = value.toLowerCase().split("\\W+");
16 |         for (String token : tokens) {
17 |             if (token.length() > 5) {
18 |                 context.output(SideOutputTag.wordTag, token);
19 |             } else {
20 |                 collector.collect(new Tuple2<>(token, 1));
21 |             }
22 |         }
23 |     }
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/Item.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.model;
 2 | 
 3 | public class Item {
 4 | 	private String itemName;
 5 | 	private String address;
 6 | 	private String type;
 7 | 	private double price;
 8 | 
 9 | 	public Item() {}
10 | 	
11 | 	public Item(String itemName, String address, String type, double price) {
12 | 		this.itemName = itemName;
13 | 		this.address = address;
14 | 		this.type = type;
15 | 		this.price = price;
16 | 	}
17 | 	
18 | 	public String getItemName() {
19 | 		return itemName;
20 | 	}
21 | 
22 | 	public void setItemName(String itemName) {
23 | 		this.itemName = itemName;
24 | 	}
25 | 
26 | 	public String getAddress() {
27 | 		return address;
28 | 	}
29 | 
30 | 	public void setAddress(String address) {
31 | 		this.address = address;
32 | 	}
33 | 
34 | 	public String getType() {
35 | 		return type;
36 | 	}
37 | 
38 | 	public void setType(String type) {
39 | 		this.type = type;
40 | 	}
41 | 
42 | 	public double getPrice() {
43 | 		return price;
44 | 	}
45 | 
46 | 	public void setPrice(double price) {
47 | 		this.price = price;
48 | 	}
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/model/Metrics.java:
--------------------------------------------------------------------------------
 1 | package model;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | /**
 6 |  * @Description
 7 |  * @Author hasee
 8 |  * @Date 2019/1/4
 9 |  **/
10 | public class Metrics {
11 |     private String name;
12 | 
13 |     private Long timestamp;
14 | 
15 |     private Map<String, Object> fields;
16 | 
17 |     private Map<String, String> tags;
18 | 
19 |     public String getName() {
20 |         return name;
21 |     }
22 | 
23 |     public void setName(String name) {
24 |         this.name = name;
25 |     }
26 | 
27 |     public Long getTimestamp() {
28 |         return timestamp;
29 |     }
30 | 
31 |     public void setTimestamp(Long timestamp) {
32 |         this.timestamp = timestamp;
33 |     }
34 | 
35 |     public Map<String, Object> getFields() {
36 |         return fields;
37 |     }
38 | 
39 |     public void setFields(Map<String, Object> fields) {
40 |         this.fields = fields;
41 |     }
42 | 
43 |     public Map<String, String> getTags() {
44 |         return tags;
45 |     }
46 | 
47 |     public void setTags(Map<String, String> tags) {
48 |         this.tags = tags;
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/constant/PropertiesConstants.java:
--------------------------------------------------------------------------------
 1 | package constant;
 2 | 
 3 | /**
 4 |  * 基本配置属性信息
 5 |  **/
 6 | public class PropertiesConstants {
 7 |     //kafka
 8 |     public static final String KAFKA_BROKERS = "kafka.brokers";
 9 |     public static final String KAFKA_ZOOKEEPER_CONNECT = "kafka.zookeeper.connect";
10 |     public static final String KAFKA_GROUP_ID = "kafka.group.id";
11 |     public static final String METRICS_TOPIC = "metrics.topic";
12 |     public static final String CONSUMER_FROM_TIME = "consumer.from.time";
13 | 
14 |     public static final String STREAM_PARALLELISM = "stream.parallelism";
15 |     public static final String STREAM_SINK_PARALLELISM = "stream.sink.parallelism";
16 |     public static final String STREAM_CHECKPOINT_ENABLE = "stream.checkpoint.enable";
17 |     public static final String STREAM_CHECKPOINT_INTERVAL = "stream.checkpoint.interval";
18 | 
19 |     public static final String PROPERTIES_FILE_NAME = "/application.properties";
20 | 
21 |     //es config
22 |     public static final String ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS = "elasticsearch.bulk.flush.max.actions";
23 |     public static final String ELASTICSEARCH_HOSTS = "elasticsearch.hosts";
24 | }
25 | 


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/src/main/java/com/bigdata/study/flinkelasticsearchsink/handler/FlinkFailHandler.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkelasticsearchsink.handler;
 2 | 
 3 | import org.apache.flink.streaming.connectors.elasticsearch.ActionRequestFailureHandler;
 4 | import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
 5 | import org.elasticsearch.ElasticsearchParseException;
 6 | import org.elasticsearch.action.ActionRequest;
 7 | import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
 8 | 
 9 | /**
10 |  * 自定义es写入失败处理
11 |  **/
12 | public class FlinkFailHandler implements ActionRequestFailureHandler {
13 | 
14 |     @Override
15 |     public void onFailure(ActionRequest actionRequest, Throwable throwable, int requestStatusCode, RequestIndexer requestIndexer) throws Throwable {
16 |         if (throwable instanceof EsRejectedExecutionException) {
17 |             //将失败请求继续加入队列，后续进行重试写入
18 |             requestIndexer.add(actionRequest);
19 |         } else if (throwable instanceof ElasticsearchParseException) {
20 |             //自定义异常处理
21 |             throwable.printStackTrace();
22 |         } else {
23 |             throw throwable;
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/model/Order.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.model;
 2 | 
 3 | public class Order {
 4 | 
 5 | 	private String userName;
 6 | 	private String itemName;
 7 | 	private long transactionDate;
 8 | 	private int quantity;
 9 | 
10 | 	public Order() {}
11 | 	
12 | 	public Order(String userName, String itemName, long transactionDate, int quantity) {
13 | 		this.userName = userName;
14 | 		this.itemName = itemName;
15 | 		this.transactionDate = transactionDate;
16 | 		this.quantity = quantity;
17 | 	}
18 | 
19 | 	public String getUserName() {
20 | 		return userName;
21 | 	}
22 | 
23 | 	public void setUserName(String userName) {
24 | 		this.userName = userName;
25 | 	}
26 | 
27 | 	public String getItemName() {
28 | 		return itemName;
29 | 	}
30 | 
31 | 	public void setItemName(String itemName) {
32 | 		this.itemName = itemName;
33 | 	}
34 | 
35 | 	public long getTransactionDate() {
36 | 		return transactionDate;
37 | 	}
38 | 
39 | 	public void setTransactionDate(long transactionDate) {
40 | 		this.transactionDate = transactionDate;
41 | 	}
42 | 
43 | 	public int getQuantity() {
44 | 		return quantity;
45 | 	}
46 | 
47 | 	public void setQuantity(int quantity) {
48 | 		this.quantity = quantity;
49 | 	}
50 | }
51 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/schemas/MetricSchema.java:
--------------------------------------------------------------------------------
 1 | package schemas;
 2 | 
 3 | import com.google.gson.Gson;
 4 | import model.Metrics;
 5 | import org.apache.flink.api.common.serialization.DeserializationSchema;
 6 | import org.apache.flink.api.common.serialization.SerializationSchema;
 7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 8 | 
 9 | import java.io.IOException;
10 | import java.nio.charset.Charset;
11 | 
12 | /**
13 |  * @Description
14 |  * @Author hasee
15 |  * @Date 2019/1/4
16 |  **/
17 | public class MetricSchema implements DeserializationSchema<Metrics>, SerializationSchema<Metrics> {
18 |     private static Gson gson = new Gson();
19 | 
20 |     @Override
21 |     public Metrics deserialize(byte[] bytes) throws IOException {
22 |         return gson.fromJson(new String(bytes), Metrics.class);
23 |     }
24 | 
25 |     @Override
26 |     public boolean isEndOfStream(Metrics metrics) {
27 |         return false;
28 |     }
29 | 
30 |     @Override
31 |     public byte[] serialize(Metrics metrics) {
32 |         return gson.toJson(metrics).getBytes(Charset.forName("utf-8"));
33 |     }
34 | 
35 |     @Override
36 |     public TypeInformation<Metrics> getProducedType() {
37 |         return TypeInformation.of(Metrics.class);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/utils/HashPartitioner.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.utils;
 2 | 
 3 | import org.apache.kafka.clients.producer.Partitioner;
 4 | import org.apache.kafka.common.Cluster;
 5 | import org.apache.kafka.common.PartitionInfo;
 6 | 
 7 | import java.util.List;
 8 | import java.util.Map;
 9 | 
10 | /**
11 |  * 自定义kafka分区规则
12 |  **/
13 | public class HashPartitioner implements Partitioner {
14 | 
15 |     @Override
16 |     public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
17 |         List<PartitionInfo> partitionInfos = cluster.partitionsForTopic(topic);
18 |         int numPartitions = partitionInfos.size();
19 |         if (keyBytes.length > 1) {
20 |             int hashCode;
21 |             if (key instanceof Integer || key instanceof Long) {
22 |                 hashCode = (int) key;
23 |             } else {
24 |                 hashCode = key.hashCode();
25 |             }
26 |             hashCode = hashCode & 0x7fffffff;
27 |             return hashCode % numPartitions;
28 |         } else {
29 |             return 0;
30 |         }
31 |     }
32 | 
33 |     @Override
34 |     public void close() {
35 | 
36 |     }
37 | 
38 |     @Override
39 |     public void configure(Map<String, ?> map) {
40 | 
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/fork-join/src/main/java/forkjoin/MyForkJoinTask.java:
--------------------------------------------------------------------------------
 1 | package forkjoin;
 2 | 
 3 | import java.util.concurrent.RecursiveTask;
 4 | 
 5 | /**
 6 |  * fork join 处理任务
 7 |  **/
 8 | public class MyForkJoinTask extends RecursiveTask<Long> {
 9 |     /**
10 |      * 任务拆分临界值
11 |      */
12 |     private static final long CRTTICAL_VALUE = 100_00;
13 |     /**
14 |      * 任务开启值
15 |      */
16 |     private Long startNum;
17 | 
18 |     /**
19 |      * 任务结束值
20 |      */
21 |     private Long endNum;
22 | 
23 |     public MyForkJoinTask(Long startNum, Long endNum) {
24 |         this.startNum = startNum;
25 |         this.endNum = endNum;
26 |     }
27 | 
28 |     @Override
29 |     protected Long compute() {
30 |         long length = endNum - startNum;
31 |         if (length <= CRTTICAL_VALUE) {
32 |             long num = 0;
33 |             for (int i = 0; i < endNum; i++) {
34 |                 num += i;
35 |             }
36 |             return num;
37 |         } else {
38 |             long middleValue = (startNum + endNum) / 2;
39 |             MyForkJoinTask leftTask = new MyForkJoinTask(startNum, middleValue);
40 |             leftTask.fork();
41 |             MyForkJoinTask rightTask = new MyForkJoinTask(middleValue + 1, endNum);
42 |             rightTask.fork();
43 |             return leftTask.join() + rightTask.join();
44 |         }
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/utils/PhoenixUtil.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.sparkphoenix.utils;
 2 | 
 3 | import org.apache.commons.collections.CollectionUtils;
 4 | 
 5 | import java.sql.Connection;
 6 | import java.sql.DriverManager;
 7 | import java.sql.SQLException;
 8 | import java.util.LinkedList;
 9 | 
10 | /**
11 |  * @Description
12 |  * @Author hasee
13 |  * @Date 2019/1/3
14 |  **/
15 | public class PhoenixUtil {
16 | 
17 |     private static LinkedList<Connection> connectionQueue;
18 | 
19 |     static {
20 |         try {
21 |             Class.forName("org.apache.phoenix.jdbc.PhoenixDriver");
22 |         } catch (ClassNotFoundException e) {
23 |             e.printStackTrace();
24 |         }
25 |     }
26 | 
27 |     public synchronized static Connection getConnection() {
28 |         try {
29 |             if (CollectionUtils.isEmpty(connectionQueue)) {
30 |                 connectionQueue = new LinkedList<>();
31 |                 for (int i = 0; i < 3; i++) {
32 |                     Connection connection = DriverManager.getConnection("jdbc:phoenix:zk:2181");
33 |                     connectionQueue.add(connection);
34 |                 }
35 |             }
36 |         } catch (SQLException e) {
37 |             e.printStackTrace();
38 |         }
39 |         return connectionQueue.poll();
40 |     }
41 | 
42 |     public static void returnConnection(Connection connection) {
43 |         connectionQueue.push(connection);
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/resources/application.properties:
--------------------------------------------------------------------------------
 1 | spring.cloud.dataflow.stream.app.label=redisq
 2 | endpoints.shutdown.enabled=true
 3 | 
 4 | spring.redis.host=192.168.254.252
 5 | redis.queue=all
 6 | 
 7 | spring.cloud.stream.metrics.properties=spring.application.name,spring.application.index,spring.cloud.application.*,spring.cloud.dataflow.*
 8 | spring.cloud.dataflow.stream.name=stream2redisq
 9 | spring.metrics.export.triggers.application.includes=integration**
10 | spring.cloud.stream.metrics.key=stream2redisq.redisq.${spring.cloud.application.guid}
11 | 
12 | server.port=29728
13 | #spring.cloud.stream.kafka.binder.zkNodes=tools.wjm.com:2181
14 | spring.cloud.dataflow.stream.app.type=processor
15 | 
16 | spring.cloud.stream.bindings.input.destination=stream2redisq.rand-map
17 | spring.cloud.stream.bindings.input.group=stream2redisq
18 | 
19 | spring.jmx.default-domain=stream2redisq.redisq
20 | spring.cloud.application.group=stream2redisq
21 | 
22 | spring.cloud.stream.kafka.binder.brokers=192.168.10.120:6667,192.168.10.121:6667,192.168.10.122:6667
23 | spring.cloud.stream.kafka.binder.zk-nodes=192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181
24 | spring.cloud.stream.kafka.binder.fetchSize=1024000
25 | spring.cloud.stream.kafka.binder.minPartitionCount=1
26 | spring.cloud.stream.kafka.binder.autoCreateTopics=true
27 | spring.cloud.stream.kafka.binder.autoAddPartitions=false
28 | 
29 | spring.cloud.stream.bindings.output.destination=stream2redisq.rand-map


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>dataflow-stream-redis-set-processor</artifactId>
12 |     <version>1.0</version>
13 |     <name>dataflow-stream-redis-set-processor</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter-data-redis</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>org.springframework.cloud</groupId>
23 |             <artifactId>spring-cloud-starter-stream-kafka</artifactId>
24 |         </dependency>
25 |     </dependencies>
26 | 
27 |     <build>
28 |         <plugins>
29 |             <plugin>
30 |                 <groupId>org.springframework.boot</groupId>
31 |                 <artifactId>spring-boot-maven-plugin</artifactId>
32 |             </plugin>
33 |         </plugins>
34 |     </build>
35 | 
36 | </project>
37 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 | 
12 |     <artifactId>dataflow-stream-redis-pub-sink</artifactId>
13 |     <version>0.0.1-SNAPSHOT</version>
14 |     <name>dataflow-stream-redis-pub-sink</name>
15 |     <description>Demo project for Spring Boot</description>
16 | 
17 |     <dependencies>
18 |         <dependency>
19 |             <groupId>org.springframework.boot</groupId>
20 |             <artifactId>spring-boot-starter-data-redis</artifactId>
21 |         </dependency>
22 |         <dependency>
23 |             <groupId>org.springframework.cloud</groupId>
24 |             <artifactId>spring-cloud-starter-stream-kafka</artifactId>
25 |         </dependency>
26 |     </dependencies>
27 | 
28 |     <build>
29 |         <plugins>
30 |             <plugin>
31 |                 <groupId>org.springframework.boot</groupId>
32 |                 <artifactId>spring-boot-maven-plugin</artifactId>
33 |             </plugin>
34 |         </plugins>
35 |     </build>
36 | 
37 | </project>
38 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/timeextractor/OrderTimestampExtractor.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.timeextractor;
 2 | 
 3 | import com.bigdata.study.kafkastream.model.Item;
 4 | import com.bigdata.study.kafkastream.model.Order;
 5 | import com.bigdata.study.kafkastream.model.User;
 6 | import com.fasterxml.jackson.databind.JsonNode;
 7 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 8 | import org.apache.kafka.streams.processor.TimestampExtractor;
 9 | 
10 | import java.time.LocalDateTime;
11 | import java.time.ZoneOffset;
12 | 
13 | /**
14 |  * 自定义从topic中抽取时间
15 |  **/
16 | public class OrderTimestampExtractor implements TimestampExtractor {
17 |     @Override
18 |     public long extract(ConsumerRecord<Object, Object> consumerRecord) {
19 |         Object value = consumerRecord.value();
20 |         if (value instanceof Order) {
21 |             return ((Order) value).getTransactionDate();
22 |         } else if (value instanceof JsonNode) {
23 |             return ((JsonNode) value).get("transactionDate").longValue();
24 |         } else if (value instanceof User) {
25 |             return LocalDateTime.of(2015, 12, 11, 1, 0, 10)
26 |                     .toEpochSecond(ZoneOffset.UTC) * 1000;
27 |         } else if (value instanceof Item) {
28 |             return LocalDateTime.of(2015, 12, 11, 0, 0, 10).toEpochSecond(ZoneOffset.UTC) * 1000;
29 |         } else {
30 |             return LocalDateTime.of(2015, 11, 10, 0, 0, 10).toEpochSecond(ZoneOffset.UTC) * 1000;
31 |         }
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/elasticsearch/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>elasticsearch</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>elasticsearch</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>org.elasticsearch.client</groupId>
23 |             <artifactId>transport</artifactId>
24 |             <version>6.2.4</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.springframework.boot</groupId>
28 |             <artifactId>spring-boot-starter-test</artifactId>
29 |             <scope>test</scope>
30 |         </dependency>
31 |     </dependencies>
32 | 
33 |     <build>
34 |         <plugins>
35 |             <plugin>
36 |                 <groupId>org.springframework.boot</groupId>
37 |                 <artifactId>spring-boot-maven-plugin</artifactId>
38 |             </plugin>
39 |         </plugins>
40 |     </build>
41 | 
42 | </project>
43 | 


--------------------------------------------------------------------------------
/flink-async-io/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-async-io</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-async-io</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.springframework.boot</groupId>
28 |             <artifactId>spring-boot-starter-test</artifactId>
29 |             <scope>test</scope>
30 |         </dependency>
31 |     </dependencies>
32 | 
33 |     <build>
34 |         <plugins>
35 |             <plugin>
36 |                 <groupId>org.springframework.boot</groupId>
37 |                 <artifactId>spring-boot-maven-plugin</artifactId>
38 |             </plugin>
39 |         </plugins>
40 |     </build>
41 | 
42 | </project>
43 | 


--------------------------------------------------------------------------------
/flink-sideoutput/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-sideoutput</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-sideoutput</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <properties>
17 |         <java.version>1.8</java.version>
18 |     </properties>
19 | 
20 |     <dependencies>
21 |         <dependency>
22 |             <groupId>org.springframework.boot</groupId>
23 |             <artifactId>spring-boot-starter</artifactId>
24 |         </dependency>
25 |         <dependency>
26 |             <groupId>com.bigdata.study</groupId>
27 |             <artifactId>flink-common</artifactId>
28 |             <version>1.0</version>
29 |         </dependency>
30 |         <dependency>
31 |             <groupId>org.springframework.boot</groupId>
32 |             <artifactId>spring-boot-starter-test</artifactId>
33 |             <scope>test</scope>
34 |         </dependency>
35 |     </dependencies>
36 | 
37 |     <build>
38 |         <plugins>
39 |             <plugin>
40 |                 <groupId>org.springframework.boot</groupId>
41 |                 <artifactId>spring-boot-maven-plugin</artifactId>
42 |             </plugin>
43 |         </plugins>
44 |     </build>
45 | 
46 | </project>
47 | 


--------------------------------------------------------------------------------
/flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/source/SimpleSource.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkasyncio.source;
 2 | 
 3 | import org.apache.flink.streaming.api.checkpoint.ListCheckpointed;
 4 | import org.apache.flink.streaming.api.functions.source.SourceFunction;
 5 | import org.springframework.util.CollectionUtils;
 6 | 
 7 | import java.util.Collections;
 8 | import java.util.List;
 9 | 
10 | /**
11 |  * @Description
12 |  * @Author hasee
13 |  * @Date 2019/1/15
14 |  **/
15 | public class SimpleSource implements SourceFunction<Integer>, ListCheckpointed<Integer> {
16 | 
17 |     private volatile boolean isRunning = true;
18 | 
19 |     private int counter = 0;
20 | 
21 |     private int start = 0;
22 | 
23 |     public SimpleSource(int counter) {
24 |         this.counter = counter;
25 |     }
26 | 
27 |     @Override
28 |     public List<Integer> snapshotState(long l, long l1) throws Exception {
29 |         return Collections.singletonList(start);
30 |     }
31 | 
32 |     @Override
33 |     public void restoreState(List<Integer> list) throws Exception {
34 |         for (Integer state : list) {
35 |             this.start = state;
36 |         }
37 |     }
38 | 
39 |     @Override
40 |     public void run(SourceContext<Integer> sourceContext) throws Exception {
41 |         while ((start < counter || counter == -1) && isRunning) {
42 |             synchronized (sourceContext.getCheckpointLock()) {
43 |                 sourceContext.collect(start);
44 |                 ++start;
45 |                 if (start == Integer.MAX_VALUE) {
46 |                     start = 0;
47 |                 }
48 |             }
49 |             Thread.sleep(10L);
50 |         }
51 |     }
52 | 
53 |     @Override
54 |     public void cancel() {
55 |         isRunning = false;
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/model/Metric.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.model;
 2 | 
 3 | import java.util.Map;
 4 | 
 5 | /**
 6 |  * @Description
 7 |  * @Author hasee
 8 |  * @Date 2018/12/5
 9 |  **/
10 | public class Metric {
11 | 
12 |     public String name;
13 |     public long timestamp;
14 |     public Map<String, Object> fields;
15 |     public Map<String, String> tags;
16 | 
17 |     public Metric() {
18 |     }
19 | 
20 |     public Metric(String name, long timestamp, Map<String, Object> fields, Map<String, String> tags) {
21 |         this.name = name;
22 |         this.timestamp = timestamp;
23 |         this.fields = fields;
24 |         this.tags = tags;
25 |     }
26 | 
27 |     public String getName() {
28 |         return name;
29 |     }
30 | 
31 |     public void setName(String name) {
32 |         this.name = name;
33 |     }
34 | 
35 |     public long getTimestamp() {
36 |         return timestamp;
37 |     }
38 | 
39 |     public void setTimestamp(long timestamp) {
40 |         this.timestamp = timestamp;
41 |     }
42 | 
43 |     public Map<String, Object> getFields() {
44 |         return fields;
45 |     }
46 | 
47 |     public void setFields(Map<String, Object> fields) {
48 |         this.fields = fields;
49 |     }
50 | 
51 |     public Map<String, String> getTags() {
52 |         return tags;
53 |     }
54 | 
55 |     public void setTags(Map<String, String> tags) {
56 |         this.tags = tags;
57 |     }
58 | 
59 |     @Override
60 |     public String toString() {
61 |         return "Metric{" +
62 |                 "name='" + name + '\'' +
63 |                 ", timestamp=" + timestamp +
64 |                 ", fields=" + fields +
65 |                 ", tags=" + tags +
66 |                 '}';
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/GenericDeserializer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.serdes;
 2 | 
 3 | import com.fasterxml.jackson.databind.ObjectMapper;
 4 | import com.sun.xml.internal.ws.encoding.soap.DeserializationException;
 5 | import org.apache.kafka.common.serialization.Deserializer;
 6 | 
 7 | import java.io.IOException;
 8 | import java.util.Map;
 9 | 
10 | /**
11 |  * @Description
12 |  * @Author hasee
13 |  * @Date 2019/1/8
14 |  **/
15 | public class GenericDeserializer<T> implements Deserializer<T> {
16 |     private Class<T> type;
17 |     private static ObjectMapper mapper = new ObjectMapper();
18 | 
19 |     public GenericDeserializer() {
20 |     }
21 | 
22 |     public GenericDeserializer(Class<T> type) {
23 |         this.type = type;
24 |     }
25 | 
26 |     @Override
27 |     public void configure(Map<String, ?> map, boolean b) {
28 |         if (type != null) {
29 |             return;
30 |         }
31 |         String typeProp = b ? "key.deserializer.type" : "value.deserializer.type";
32 |         String typeName = String.valueOf(map.get(typeProp));
33 |         try {
34 |             type = (Class<T>) Class.forName(typeName);
35 |         } catch (ClassNotFoundException e) {
36 |             throw new DeserializationException("failed to initialize GenericDeserializer for " + typeName, e);
37 |         }
38 |     }
39 | 
40 |     @Override
41 |     public T deserialize(String s, byte[] bytes) {
42 |         if (bytes.length < 1) {
43 |             return null;
44 |         }
45 |         try {
46 |             return mapper.readValue(bytes, type);
47 |         } catch (IOException e) {
48 |             throw new DeserializationException(e);
49 |         }
50 |     }
51 | 
52 |     @Override
53 |     public void close() {
54 | 
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/serdes/GenericSerializer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.serdes;
 2 | 
 3 | 
 4 | import com.fasterxml.jackson.core.JsonProcessingException;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | import org.apache.kafka.common.errors.SerializationException;
 7 | import org.apache.kafka.common.serialization.Serializer;
 8 | 
 9 | import java.util.Map;
10 | 
11 | /**
12 |  * @Description
13 |  * @Author hasee
14 |  * @Date 2019/1/8
15 |  **/
16 | public class GenericSerializer<T> implements Serializer<T> {
17 |     private Class<T> tClass;
18 |     private static ObjectMapper mapper = new ObjectMapper();
19 | 
20 |     public GenericSerializer() {
21 |     }
22 | 
23 |     public GenericSerializer(Class<T> tClass) {
24 |         this.tClass = tClass;
25 |     }
26 | 
27 |     @Override
28 |     public void configure(Map<String, ?> map, boolean b) {
29 |         if (tClass != null) {
30 |             return;
31 |         }
32 |         String type = b ? "key.serializer.type" : "value.serializer.type";
33 |         String typeName = String.valueOf(map.get(type));
34 |         try {
35 |             tClass = (Class<T>) Class.forName(typeName);
36 |         } catch (ClassNotFoundException e) {
37 |             throw new SerializationException("failed to initialize GenericSerializer:+" + typeName, e);
38 |         }
39 |     }
40 | 
41 |     @Override
42 |     public byte[] serialize(String s, T t) {
43 |         if (t == null) {
44 |             return new byte[0];
45 |         }
46 |         try {
47 |             return mapper.writerFor(tClass).writeValueAsBytes(t);
48 |         } catch (JsonProcessingException e) {
49 |             throw new SerializationException(e);
50 |         }
51 |     }
52 | 
53 |     @Override
54 |     public void close() {
55 | 
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/flink-kafka11-sink/src/main/java/com/bigdata/study/flinkkafkasink/FlinkKafkaSinkApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkasink;
 2 | 
 3 | import constant.PropertiesConstants;
 4 | import model.Metrics;
 5 | import org.apache.flink.api.java.utils.ParameterTool;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
 9 | import org.springframework.boot.autoconfigure.SpringBootApplication;
10 | import schemas.MetricSchema;
11 | import utils.ExecutionEnvUtil;
12 | import utils.KafkaUtils;
13 | 
14 | /**
15 |  * flink从kafka中读取数据，并写入kafka中
16 |  */
17 | @SpringBootApplication
18 | public class FlinkKafkaSinkApplication {
19 | 
20 |     public static void main(String[] args) {
21 | //        SpringApplication.run(FlinkKafkaSinkApplication.class, args);
22 |         try {
23 |             ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args);
24 |             StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterPool);
25 |             DataStreamSource<Metrics> dataStreamSource = KafkaUtils.buildSource(env);
26 |             String brokers = parameterPool.get(PropertiesConstants.KAFKA_BROKERS);
27 |             FlinkKafkaProducer011<Metrics> kafkaProducer011 = new FlinkKafkaProducer011<>(brokers,
28 |                     parameterPool.get(PropertiesConstants.METRICS_TOPIC), new MetricSchema());
29 |             kafkaProducer011.setWriteTimestampToKafka(true);
30 |             kafkaProducer011.setLogFailuresOnly(false);
31 |             dataStreamSource.addSink(kafkaProducer011);
32 |             env.execute("flink kafka sink");
33 |         } catch (Exception e) {
34 |             e.printStackTrace();
35 |         }
36 |     }
37 | 
38 | }
39 | 
40 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/source/JdbcSource.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase.source;
 2 | 
 3 | import org.apache.commons.dbutils.DbUtils;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | import java.sql.*;
10 | 
11 | /**
12 |  * 自定义从mysql读取数据的source
13 |  **/
14 | public class JdbcSource extends RichSourceFunction<String> {
15 | 
16 |     private static final Logger logger = LoggerFactory.getLogger(JdbcSource.class);
17 | 
18 |     private Connection connection;
19 | 
20 |     private PreparedStatement ps;
21 | 
22 |     @Override
23 |     public void open(Configuration parameters) throws Exception {
24 |         super.open(parameters);
25 |         Class.forName("com.mysql.jdbc.Driver");
26 |         connection = DriverManager.getConnection("jdbc:mysql://localhost:3306/test?characterEncoding=utf8&useSSL=true", "root", "root");
27 |         String sql = "select name from user";
28 |         ps = connection.prepareStatement(sql);
29 |     }
30 | 
31 |     @Override
32 |     public void close() throws Exception {
33 |         super.close();
34 |         if (connection != null) {
35 |             DbUtils.closeQuietly(connection);
36 |         }
37 |         if (ps != null) {
38 |             DbUtils.close(ps);
39 |         }
40 |     }
41 | 
42 |     @Override
43 |     public void run(SourceContext<String> sourceContext) throws Exception {
44 |         try {
45 |             ResultSet resultSet = ps.executeQuery();
46 |             while (resultSet.next()) {
47 |                 String name = resultSet.getString("name");
48 |                 sourceContext.collect(name);
49 |             }
50 |         } catch (SQLException e) {
51 |             logger.error("读取mysql数据出错：{}", e.getMessage());
52 |         }
53 |     }
54 | 
55 |     @Override
56 |     public void cancel() {
57 | 
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/core/Jdbc2Hbase.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase.core;
 2 | 
 3 | import com.bigdata.study.flinkjdbchbase.sink.HbaseSink;
 4 | import com.bigdata.study.flinkjdbchbase.source.JdbcSource;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.streaming.api.TimeCharacteristic;
 7 | import org.apache.flink.streaming.api.datastream.DataStream;
 8 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 9 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.slf4j.Logger;
12 | import org.slf4j.LoggerFactory;
13 | 
14 | /**
15 |  * @Description
16 |  * @Author hasee
17 |  * @Date 2019/1/7
18 |  **/
19 | public class Jdbc2Hbase {
20 |     private static final Logger logger = LoggerFactory.getLogger(Jdbc2Hbase.class);
21 | 
22 |     public static void main(String[] args) {
23 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
24 |         env.setParallelism(1);
25 |         env.enableCheckpointing(1000);
26 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
27 |         DataStreamSource<String> dataStreamSource = env.addSource(new JdbcSource());
28 |         String hbase_zk = "namenode1.xxx.com";
29 |         String hbase_port = "2181";
30 |         String hbase_table = "ns:table1";
31 |         String hbase_family = "cf1";
32 |         DataStream<String> process = dataStreamSource.map(new MapFunction<String, String>() {
33 |             @Override
34 |             public String map(String s) throws Exception {
35 |                 logger.info("接收到消息：{}", s);
36 |                 return s;
37 |             }
38 |         }).process(new HbaseSink(hbase_zk, hbase_port, hbase_table, hbase_family));
39 |         try {
40 |             env.execute("flink from mysql 2 hbase");
41 |         } catch (Exception e) {
42 |             e.printStackTrace();
43 |         }
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/flink-kafka11-sink/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-kafka-sink</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-kafka-sink</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |             <exclusions>
26 |                 <exclusion>
27 |                     <groupId>org.springframework.cloud</groupId>
28 |                     <artifactId>spring-cloud-starter-stream-kafka</artifactId>
29 |                 </exclusion>
30 |             </exclusions>
31 |         </dependency>
32 |         <dependency>
33 |             <groupId>org.apache.flink</groupId>
34 |             <artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
35 |             <version>${flink.version}</version>
36 |         </dependency>
37 |         <dependency>
38 |             <groupId>org.springframework.boot</groupId>
39 |             <artifactId>spring-boot-starter-test</artifactId>
40 |             <scope>test</scope>
41 |         </dependency>
42 |     </dependencies>
43 | 
44 |     <build>
45 |         <plugins>
46 |             <plugin>
47 |                 <groupId>org.springframework.boot</groupId>
48 |                 <artifactId>spring-boot-maven-plugin</artifactId>
49 |             </plugin>
50 |         </plugins>
51 |     </build>
52 | 
53 | </project>
54 | 


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-elasticsearch-sink</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-elasticsearch-sink</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |             <exclusions>
26 |                 <exclusion>
27 |                     <groupId>org.springframework.cloud</groupId>
28 |                     <artifactId>spring-cloud-starter-stream-kafka</artifactId>
29 |                 </exclusion>
30 |             </exclusions>
31 |         </dependency>
32 |         <dependency>
33 |             <groupId>org.apache.flink</groupId>
34 |             <artifactId>flink-connector-elasticsearch6_${scala.binary.version}</artifactId>
35 |             <version>${flink.version}</version>
36 |         </dependency>
37 |         <dependency>
38 |             <groupId>org.springframework.boot</groupId>
39 |             <artifactId>spring-boot-starter-test</artifactId>
40 |             <scope>test</scope>
41 |         </dependency>
42 |     </dependencies>
43 | 
44 |     <build>
45 |         <plugins>
46 |             <plugin>
47 |                 <groupId>org.springframework.boot</groupId>
48 |                 <artifactId>spring-boot-maven-plugin</artifactId>
49 |             </plugin>
50 |         </plugins>
51 |     </build>
52 | 
53 | </project>
54 | 


--------------------------------------------------------------------------------
/flink-kafka-source/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-kafka-source</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-kafka-source</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |             <exclusions>
26 |                 <exclusion>
27 |                     <groupId>org.springframework.cloud</groupId>
28 |                     <artifactId>spring-cloud-starter-stream-kafka</artifactId>
29 |                 </exclusion>
30 |             </exclusions>
31 |         </dependency>
32 |         <!--<dependency>-->
33 |             <!--<groupId>org.apache.flink</groupId>-->
34 |             <!--<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>-->
35 |             <!--<version>${flink.version}</version>-->
36 |         <!--</dependency>-->
37 |         <dependency>
38 |             <groupId>org.springframework.boot</groupId>
39 |             <artifactId>spring-boot-starter-test</artifactId>
40 |             <scope>test</scope>
41 |         </dependency>
42 |     </dependencies>
43 | 
44 |     <build>
45 |         <plugins>
46 |             <plugin>
47 |                 <groupId>org.springframework.boot</groupId>
48 |                 <artifactId>spring-boot-maven-plugin</artifactId>
49 |             </plugin>
50 |         </plugins>
51 |     </build>
52 | 
53 | </project>
54 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-pub-sink/src/main/java/com/bigdata/study/dataflowstreamredispubsink/config/RedisStreamPubConfig.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredispubsink.config;
 2 | 
 3 | import com.bigdata.study.dataflowstreamredispubsink.prop.RedisPubProperties;
 4 | import com.fasterxml.jackson.core.JsonProcessingException;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | import org.springframework.beans.factory.InitializingBean;
 7 | import org.springframework.beans.factory.annotation.Autowired;
 8 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
 9 | import org.springframework.cloud.stream.annotation.EnableBinding;
10 | import org.springframework.cloud.stream.annotation.StreamListener;
11 | import org.springframework.cloud.stream.messaging.Sink;
12 | import org.springframework.context.annotation.Configuration;
13 | import org.springframework.data.redis.core.RedisTemplate;
14 | import org.springframework.data.redis.listener.ChannelTopic;
15 | import org.springframework.messaging.Message;
16 | import org.springframework.scheduling.annotation.EnableScheduling;
17 | 
18 | /**
19 |  * @Description
20 |  * @Author hasee
21 |  * @Date 2019/1/2
22 |  **/
23 | @Configuration
24 | @EnableConfigurationProperties(RedisPubProperties.class)
25 | @EnableBinding(Sink.class)
26 | public class RedisStreamPubConfig implements InitializingBean {
27 | 
28 |     @Autowired
29 |     private RedisPubProperties redisPubProperties;
30 | 
31 |     @Autowired
32 |     private RedisTemplate<String, String> redisTemplate;
33 | 
34 |     private ChannelTopic topic;
35 | 
36 |     private static final ObjectMapper mapper = new ObjectMapper();
37 | 
38 |     @StreamListener(value = Sink.INPUT)
39 |     public void pubRedis(Message<?> message) {
40 |         try {
41 |             redisTemplate.convertAndSend(topic.getTopic(), mapper.writeValueAsString(message.getPayload()));
42 |             System.out.println("向redis中发送消息：" + mapper.writeValueAsString(message.getPayload()));
43 |         } catch (JsonProcessingException e) {
44 |             e.printStackTrace();
45 |         }
46 |     }
47 | 
48 |     @Override
49 |     public void afterPropertiesSet() throws Exception {
50 |         topic = new ChannelTopic(redisPubProperties.getTopic());
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/config/RedisStreamProcessorConfig.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamredissetprocessor.config;
 2 | 
 3 | import com.bigdata.study.dataflowstreamredissetprocessor.prop.RedisSetProperties;
 4 | import org.springframework.beans.factory.annotation.Autowired;
 5 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
 6 | import org.springframework.cloud.stream.annotation.EnableBinding;
 7 | import org.springframework.cloud.stream.messaging.Processor;
 8 | import org.springframework.cloud.stream.messaging.Sink;
 9 | import org.springframework.context.annotation.Configuration;
10 | import org.springframework.data.redis.core.BoundSetOperations;
11 | import org.springframework.data.redis.core.RedisTemplate;
12 | import org.springframework.expression.Expression;
13 | import org.springframework.expression.spel.standard.SpelExpression;
14 | import org.springframework.expression.spel.standard.SpelExpressionParser;
15 | import org.springframework.integration.annotation.Filter;
16 | import org.springframework.messaging.Message;
17 | 
18 | import java.util.Map;
19 | 
20 | /**
21 |  * redis 流水任务 processor
22 |  **/
23 | @Configuration
24 | @EnableConfigurationProperties(RedisSetProperties.class)
25 | @EnableBinding(Processor.class)
26 | public class RedisStreamProcessorConfig {
27 | 
28 |     private static final SpelExpressionParser expressionParser = new SpelExpressionParser();
29 |     private static final String expressionString = "payload[\"index\"]";
30 | 
31 |     @Autowired
32 |     private RedisSetProperties redisSetProperties;
33 | 
34 |     @Autowired
35 |     private RedisTemplate<String, String> redisTemplate;
36 | 
37 |     @Filter(inputChannel = Processor.INPUT, outputChannel = Processor.OUTPUT)
38 |     public boolean filter(Message<?> message) {
39 |         final Expression expression = expressionParser.parseExpression(expressionString);
40 |         Map map = (Map) message.getPayload();
41 |         BoundSetOperations<String, String> boundSetOperations = redisTemplate.boundSetOps(redisSetProperties.getSetName());
42 |         boolean member = boundSetOperations.isMember(expression.getValue(map, String.class));
43 |         return member;
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-jdbc-hbase</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-jdbc-hbase</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.flink</groupId>
28 |             <artifactId>flink-hbase_${scala.binary.version}</artifactId>
29 |             <version>${flink.version}</version>
30 |         </dependency>
31 |         <dependency>
32 |             <groupId>org.apache.hbase</groupId>
33 |             <artifactId>hbase-client</artifactId>
34 |             <version>1.4.3</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>mysql</groupId>
38 |             <artifactId>mysql-connector-java</artifactId>
39 |             <version>5.1.45</version>
40 |         </dependency>
41 |         <dependency>
42 |             <groupId>org.springframework.boot</groupId>
43 |             <artifactId>spring-boot-starter-test</artifactId>
44 |             <scope>test</scope>
45 |         </dependency>
46 |     </dependencies>
47 | 
48 |     <build>
49 |         <plugins>
50 |             <plugin>
51 |                 <groupId>org.springframework.boot</groupId>
52 |                 <artifactId>spring-boot-maven-plugin</artifactId>
53 |             </plugin>
54 |         </plugins>
55 |     </build>
56 | 
57 | </project>
58 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/prop/KafkaSourceProperties.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamkafkasource.prop;
 2 | 
 3 | import org.springframework.boot.context.properties.ConfigurationProperties;
 4 | 
 5 | /**
 6 |  * kafka 配置属性
 7 |  **/
 8 | @ConfigurationProperties("kafka")
 9 | public class KafkaSourceProperties {
10 |     private String topic = "test";
11 | 
12 |     private String servers = "ebmas-02:6667,ebmas-01:6667,ebmas-03:6667";
13 | 
14 |     private String groupId = "test-group";
15 | 
16 |     private long batchSize = 1024;
17 | 
18 |     private String zkNodes="192.168.10.120:2181,192.168.10.121:2181,192.168.10.122:2181";
19 | 
20 |     private String keyDeserializer = "org.apache.kafka.common.serialization.StringDeserializer";
21 | 
22 |     private String valueDeserializer = "org.apache.kafka.common.serialization.StringDeserializer";
23 | 
24 |     public String getTopic() {
25 |         return topic;
26 |     }
27 | 
28 |     public void setTopic(String topic) {
29 |         this.topic = topic;
30 |     }
31 | 
32 |     public String getServers() {
33 |         return servers;
34 |     }
35 | 
36 |     public void setServers(String servers) {
37 |         this.servers = servers;
38 |     }
39 | 
40 |     public String getGroupId() {
41 |         return groupId;
42 |     }
43 | 
44 |     public String getZkNodes() {
45 |         return zkNodes;
46 |     }
47 | 
48 |     public void setZkNodes(String zkNodes) {
49 |         this.zkNodes = zkNodes;
50 |     }
51 | 
52 |     public void setGroupId(String groupId) {
53 |         this.groupId = groupId;
54 |     }
55 | 
56 |     public long getBatchSize() {
57 |         return batchSize;
58 |     }
59 | 
60 |     public void setBatchSize(long batchSize) {
61 |         this.batchSize = batchSize;
62 |     }
63 | 
64 |     public String getKeyDeserializer() {
65 |         return keyDeserializer;
66 |     }
67 | 
68 |     public void setKeyDeserializer(String keyDeserializer) {
69 |         this.keyDeserializer = keyDeserializer;
70 |     }
71 | 
72 |     public String getValueDeserializer() {
73 |         return valueDeserializer;
74 |     }
75 | 
76 |     public void setValueDeserializer(String valueDeserializer) {
77 |         this.valueDeserializer = valueDeserializer;
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>dataflow-stream-kafka-source</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>dataflow-stream-kafka-source</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.cloud</groupId>
19 |             <artifactId>spring-cloud-starter-stream-kafka</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>org.springframework.boot</groupId>
23 |             <artifactId>spring-boot-configuration-processor</artifactId>
24 |             <optional>true</optional>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.kafka</groupId>
28 |             <artifactId>kafka-streams</artifactId>
29 |             <version>0.10.1.1</version>
30 |             <exclusions>
31 |                 <exclusion>
32 |                     <groupId>org.slf4j</groupId>
33 |                     <artifactId>slf4j-api</artifactId>
34 |                 </exclusion>
35 |                 <exclusion>
36 |                     <groupId>log4j</groupId>
37 |                     <artifactId>log4j</artifactId>
38 |                 </exclusion>
39 |                 <exclusion>
40 |                     <groupId>org.slf4j</groupId>
41 |                     <artifactId>slf4j-log4j12</artifactId>
42 |                 </exclusion>
43 |             </exclusions>
44 |         </dependency>
45 |     </dependencies>
46 | 
47 |     <build>
48 |         <plugins>
49 |             <plugin>
50 |                 <groupId>org.springframework.boot</groupId>
51 |                 <artifactId>spring-boot-maven-plugin</artifactId>
52 |             </plugin>
53 |         </plugins>
54 |     </build>
55 | 
56 | </project>
57 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/core/Kafka2Hbase.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.core;
 2 | 
 3 | import com.bigdata.study.flinkkafkahbase.model.Metric;
 4 | import com.bigdata.study.flinkkafkahbase.sink.FlinkHbaseSink;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | import org.apache.commons.lang3.StringUtils;
 7 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 8 | import org.apache.flink.streaming.api.TimeCharacteristic;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
11 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
14 | import org.apache.hadoop.hbase.TableName;
15 | 
16 | import java.util.Properties;
17 | 
18 | /**
19 |  * flink 处理数据从kafka到hbase
20 |  **/
21 | public class Kafka2Hbase {
22 | 
23 |     private static final ObjectMapper mapper = new ObjectMapper();
24 | 
25 |     public static void main(String[] args) {
26 |         final String ZOOKEEPER_HOST = "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181";
27 |         final String KAFKA_HOST = "192.168.20.48:9092";
28 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
29 |         env.enableCheckpointing(1000);
30 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
31 | 
32 |         Properties prop = new Properties();
33 |         prop.setProperty("bootstrap.servers", KAFKA_HOST);
34 |         prop.put("zookeeper.connect", ZOOKEEPER_HOST);
35 |         prop.put("group.id", "kafka-hbase-group");
36 |         DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>("kafka-hbase", new SimpleStringSchema(), prop));
37 |         DataStream<Metric> metricDataStream = dataStreamSource.rebalance().filter(StringUtils::isNotBlank).map(m -> {
38 |             Metric metric = mapper.readValue(m, Metric.class);
39 |             return metric;
40 |         });
41 |         metricDataStream.addSink(new FlinkHbaseSink());
42 |         env.setParallelism(2);
43 |         try {
44 |             env.execute("flink kafka hbase sink");
45 |         } catch (Exception e) {
46 |             e.printStackTrace();
47 |         }
48 |     }
49 | 
50 | }
51 | 


--------------------------------------------------------------------------------
/flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/async/AsyncDataBaseRequest.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkasyncio.async;
 2 | 
 3 | import org.apache.flink.api.java.tuple.Tuple2;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.async.ResultFuture;
 6 | import org.apache.flink.streaming.api.functions.async.RichAsyncFunction;
 7 | import org.apache.flink.util.ExecutorUtils;
 8 | 
 9 | import java.util.ArrayList;
10 | import java.util.Collections;
11 | import java.util.concurrent.ExecutorService;
12 | import java.util.concurrent.Executors;
13 | import java.util.concurrent.ThreadLocalRandom;
14 | import java.util.concurrent.TimeUnit;
15 | 
16 | /**
17 |  * 使用线程模仿async IO 操作
18 |  **/
19 | public class AsyncDataBaseRequest extends RichAsyncFunction<Integer, String> {
20 | 
21 |     private static final long serialVersionUID = -1L;
22 | 
23 |     private transient ExecutorService executorService;
24 | 
25 |     private final long sleepFactor;
26 | 
27 |     private final float failRatio;
28 | 
29 |     private final long shutdownWaitTS;
30 | 
31 |     public AsyncDataBaseRequest(long sleepFactor, float failRatio, long shutdownWaitTS) {
32 |         this.sleepFactor = sleepFactor;
33 |         this.failRatio = failRatio;
34 |         this.shutdownWaitTS = shutdownWaitTS;
35 |     }
36 | 
37 |     @Override
38 |     public void open(Configuration parameters) throws Exception {
39 |         super.open(parameters);
40 |         executorService = Executors.newFixedThreadPool(10);
41 |     }
42 | 
43 |     @Override
44 |     public void close() throws Exception {
45 |         super.close();
46 |         ExecutorUtils.gracefulShutdown(shutdownWaitTS, TimeUnit.MICROSECONDS, executorService);
47 |     }
48 | 
49 |     @Override
50 |     public void asyncInvoke(Integer integer, ResultFuture<String> resultFuture) throws Exception {
51 |         executorService.submit(() -> {
52 |             long sleep = (long) (ThreadLocalRandom.current().nextFloat() * sleepFactor);
53 |             try {
54 |                 Thread.sleep(sleep);
55 |                 if (ThreadLocalRandom.current().nextFloat() < failRatio) {
56 |                     resultFuture.completeExceptionally(new Exception("数据太小了。。。"));
57 |                 } else {
58 |                     resultFuture.complete(Collections.singletonList("key-" + (integer % 10)));
59 |                 }
60 |             } catch (InterruptedException e) {
61 |                 e.printStackTrace();
62 |                 resultFuture.complete(new ArrayList<>(0));
63 |             }
64 |         });
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/flink-hdfs/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-hdfs</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-hdfs</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.flink</groupId>
28 |             <artifactId>flink-connector-filesystem_${scala.binary.version}</artifactId>
29 |             <version>${flink.version}</version>
30 |         </dependency>
31 |         <dependency>
32 |             <groupId>org.apache.hadoop</groupId>
33 |             <artifactId>hadoop-common</artifactId>
34 |             <version>2.7.1</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>org.apache.flink</groupId>
38 |             <artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
39 |             <version>${flink.version}</version>
40 |         </dependency>
41 |         <dependency>
42 |             <groupId>org.springframework.boot</groupId>
43 |             <artifactId>spring-boot-starter-test</artifactId>
44 |             <scope>test</scope>
45 |         </dependency>
46 |         <dependency>
47 |             <groupId>org.apache.hadoop</groupId>
48 |             <artifactId>hadoop-mapreduce-client-core</artifactId>
49 |             <version>3.1.0</version>
50 |             <scope>compile</scope>
51 |         </dependency>
52 |     </dependencies>
53 | 
54 |     <build>
55 |         <plugins>
56 |             <plugin>
57 |                 <groupId>org.springframework.boot</groupId>
58 |                 <artifactId>spring-boot-maven-plugin</artifactId>
59 |             </plugin>
60 |         </plugins>
61 |     </build>
62 | 
63 | </project>
64 | 


--------------------------------------------------------------------------------
/flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/utils/HadoopConfig.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkhdfs.utils;
 2 | 
 3 | import org.springframework.boot.context.properties.ConfigurationProperties;
 4 | import org.springframework.context.annotation.Configuration;
 5 | 
 6 | /**
 7 |  * 项目名  data-acquisition-dataflow
 8 |  * Created by zhongdev.
 9 |  * Created at 2017/10/26
10 |  * 描述:hadoop的配置信息
11 |  */
12 | @Configuration
13 | @ConfigurationProperties(prefix = "hadoop")
14 | public class HadoopConfig {
15 | 
16 |     private String fsDefaultFS;
17 |     private String hadoopTmpDir;
18 |     private Integer dfsReplication;
19 |     private String dfsNamenodeNameDir;
20 |     private String dfsDatanodeDataDir;
21 |     private Boolean dfsPermissions;
22 |     private Boolean dfsSupportAppend;
23 |     private String dfsUser = "hdfs";
24 | 
25 |     public String getFsDefaultFS() {
26 |         return fsDefaultFS;
27 |     }
28 | 
29 |     public void setFsDefaultFS(String fsDefaultFS) {
30 |         this.fsDefaultFS = fsDefaultFS;
31 |     }
32 | 
33 |     public String getHadoopTmpDir() {
34 |         return hadoopTmpDir;
35 |     }
36 | 
37 |     public void setHadoopTmpDir(String hadoopTmpDir) {
38 |         this.hadoopTmpDir = hadoopTmpDir;
39 |     }
40 | 
41 |     public Integer getDfsReplication() {
42 |         return dfsReplication;
43 |     }
44 | 
45 |     public void setDfsReplication(Integer dfsReplication) {
46 |         this.dfsReplication = dfsReplication;
47 |     }
48 | 
49 |     public String getDfsNamenodeNameDir() {
50 |         return dfsNamenodeNameDir;
51 |     }
52 | 
53 |     public void setDfsNamenodeNameDir(String dfsNamenodeNameDir) {
54 |         this.dfsNamenodeNameDir = dfsNamenodeNameDir;
55 |     }
56 | 
57 |     public String getDfsDatanodeDataDir() {
58 |         return dfsDatanodeDataDir;
59 |     }
60 | 
61 |     public void setDfsDatanodeDataDir(String dfsDatanodeDataDir) {
62 |         this.dfsDatanodeDataDir = dfsDatanodeDataDir;
63 |     }
64 | 
65 |     public Boolean getDfsPermissions() {
66 |         return dfsPermissions;
67 |     }
68 | 
69 |     public void setDfsPermissions(Boolean dfsPermissions) {
70 |         this.dfsPermissions = dfsPermissions;
71 |     }
72 | 
73 |     public Boolean getDfsSupportAppend() {
74 |         return dfsSupportAppend;
75 |     }
76 | 
77 |     public void setDfsSupportAppend(Boolean dfsSupportAppend) {
78 |         this.dfsSupportAppend = dfsSupportAppend;
79 |     }
80 | 
81 |     public String getDfsUser() {
82 |         return dfsUser;
83 |     }
84 | 
85 |     public void setDfsUser(String dfsUser) {
86 |         this.dfsUser = dfsUser;
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/core/FlinkFromTxt.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase.core;
 2 | 
 3 | import org.apache.commons.lang3.StringUtils;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.functions.ReduceFunction;
 6 | import org.apache.flink.api.java.tuple.Tuple2;
 7 | import org.apache.flink.streaming.api.TimeCharacteristic;
 8 | import org.apache.flink.streaming.api.datastream.DataStream;
 9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
10 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
12 | import org.apache.flink.streaming.api.windowing.time.Time;
13 | import org.apache.flink.util.Collector;
14 | 
15 | /**
16 |  * @Description
17 |  * @Author hasee
18 |  * @Date 2019/1/7
19 |  **/
20 | public class FlinkFromTxt {
21 | 
22 |     public static void main(String[] args) {
23 |         String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt";
24 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
25 |         env.enableCheckpointing(10000);
26 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
27 |         env.setParallelism(1);
28 |         DataStream<String> dataStreamSource = env.readTextFile(file_input);
29 |         DataStream<Tuple2<String, Integer>> reduce = dataStreamSource.filter(StringUtils::isNotBlank)
30 |                 .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
31 |             @Override
32 |             public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
33 |                 String[] words = s.toLowerCase().split("\\W+");
34 |                 for (String word : words) {
35 |                     if (word.length() > 0) {
36 |                         Tuple2<String, Integer> tuple2 = new Tuple2<>();
37 |                         tuple2.f0 = word;
38 |                         tuple2.f1 = 1;
39 |                         collector.collect(tuple2);
40 |                     }
41 |                 }
42 |             }
43 |         }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction<Tuple2<String, Integer>>() {
44 |             @Override
45 |             public Tuple2<String, Integer> reduce(Tuple2<String, Integer> t1, Tuple2<String, Integer> t2) throws Exception {
46 |                 return new Tuple2<>(t1.f0, t1.f1 + t2.f1);
47 |             }
48 |         });
49 |         reduce.print();
50 |         try {
51 |             env.execute("flink read txt");
52 |         } catch (Exception e) {
53 |             e.printStackTrace();
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/utils/ExecutionEnvUtil.java:
--------------------------------------------------------------------------------
 1 | package utils;
 2 | 
 3 | import constant.PropertiesConstants;
 4 | import org.apache.flink.api.common.restartstrategy.RestartStrategies;
 5 | import org.apache.flink.api.java.utils.ParameterTool;
 6 | import org.apache.flink.streaming.api.TimeCharacteristic;
 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 8 | 
 9 | import java.io.IOException;
10 | import java.util.HashMap;
11 | import java.util.Map;
12 | 
13 | /**
14 |  * 解析参数工具类
15 |  **/
16 | public class ExecutionEnvUtil {
17 | 
18 |     public static ParameterTool createParameterPool(final String[] args) throws IOException {
19 |         return ParameterTool.fromPropertiesFile(ExecutionEnvUtil.class.getResourceAsStream(PropertiesConstants.PROPERTIES_FILE_NAME))
20 |                 .mergeWith(ParameterTool.fromArgs(args))
21 |                 .mergeWith(ParameterTool.fromSystemProperties())
22 |                 .mergeWith(ParameterTool.fromMap(getEnv()));
23 |     }
24 | 
25 |     public static ParameterTool PARAMETERTOOL = createParameterPool();
26 | 
27 |     private static ParameterTool createParameterPool() {
28 |         try {
29 |             return ParameterTool.fromPropertiesFile(ExecutionEnvUtil.class.getResourceAsStream(PropertiesConstants.PROPERTIES_FILE_NAME))
30 |                     .mergeWith(ParameterTool.fromSystemProperties())
31 |                     .mergeWith(ParameterTool.fromMap(getEnv()));
32 |         } catch (IOException e) {
33 |             e.printStackTrace();
34 |         }
35 |         return null;
36 |     }
37 | 
38 |     public static StreamExecutionEnvironment prepare(ParameterTool parameterTool) {
39 |         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
40 |         env.setParallelism(parameterTool.getInt(PropertiesConstants.STREAM_PARALLELISM, 5));
41 |         env.getConfig().disableSysoutLogging();
42 |         env.setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
43 |         if (parameterTool.getBoolean(PropertiesConstants.STREAM_CHECKPOINT_ENABLE, true)) {
44 |             env.enableCheckpointing(parameterTool.getInt(PropertiesConstants.STREAM_CHECKPOINT_INTERVAL, 1000));
45 |         }
46 |         env.getConfig().setGlobalJobParameters(parameterTool);
47 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
48 |         return env;
49 |     }
50 | 
51 |     private static Map<String, String> getEnv() {
52 |         Map<String, String> envMap = new HashMap<>();
53 |         Map<String, String> sysEnv = System.getenv();
54 |         for (Map.Entry<String, String> entry : sysEnv.entrySet()) {
55 |             envMap.put(entry.getKey(), entry.getValue());
56 |         }
57 |         return envMap;
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/spark-phoenix/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>spark-phoenix</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>spark-phoenix</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.apache.spark</groupId>
19 |             <artifactId>spark-core_2.11</artifactId>
20 |             <version>2.3.0</version>
21 |         </dependency>
22 |         <dependency>
23 |             <groupId>org.apache.spark</groupId>
24 |             <artifactId>spark-sql_2.11</artifactId>
25 |             <version>2.3.0</version>
26 |         </dependency>
27 |         <dependency>
28 |             <groupId>org.apache.phoenix</groupId>
29 |             <artifactId>phoenix-core</artifactId>
30 |             <version>4.13.1-HBase-1.3</version>
31 |         </dependency>
32 |         <dependency>
33 |             <groupId>org.apache.phoenix</groupId>
34 |             <artifactId>phoenix-spark</artifactId>
35 |             <version>4.13.1-HBase-1.3</version>
36 |         </dependency>
37 |         <dependency>
38 |             <groupId>joda-time</groupId>
39 |             <artifactId>joda-time</artifactId>
40 |         </dependency>
41 |         <dependency>
42 |             <groupId>org.apache.hbase</groupId>
43 |             <artifactId>hbase-protocol</artifactId>
44 |             <version>1.3.1</version>
45 |         </dependency>
46 |         <dependency>
47 |             <groupId>org.apache.hbase</groupId>
48 |             <artifactId>hbase-client</artifactId>
49 |             <version>1.3.1</version>
50 |             <exclusions>
51 |                 <exclusion>
52 |                     <groupId>org.slf4j</groupId>
53 |                     <artifactId>slf4j-log4j12</artifactId>
54 |                 </exclusion>
55 |             </exclusions>
56 |         </dependency>
57 |         <dependency>
58 |             <groupId>junit</groupId>
59 |             <artifactId>junit</artifactId>
60 |         </dependency>
61 |         <dependency>
62 |             <groupId>org.springframework.boot</groupId>
63 |             <artifactId>spring-boot-starter-test</artifactId>
64 |         </dependency>
65 |     </dependencies>
66 | 
67 |     <build>
68 |         <plugins>
69 |             <plugin>
70 |                 <groupId>org.springframework.boot</groupId>
71 |                 <artifactId>spring-boot-maven-plugin</artifactId>
72 |             </plugin>
73 |         </plugins>
74 |     </build>
75 | 
76 | </project>
77 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/sink/FlinkHbaseSink.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.sink;
 2 | 
 3 | import com.bigdata.study.flinkkafkahbase.model.Metric;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
 6 | import org.apache.hadoop.hbase.HBaseConfiguration;
 7 | import org.apache.hadoop.hbase.HColumnDescriptor;
 8 | import org.apache.hadoop.hbase.HTableDescriptor;
 9 | import org.apache.hadoop.hbase.TableName;
10 | import org.apache.hadoop.hbase.client.*;
11 | import org.apache.hadoop.hbase.util.Bytes;
12 | 
13 | import java.util.Map;
14 | 
15 | /**
16 |  * Hbase sink
17 |  **/
18 | public class FlinkHbaseSink extends RichSinkFunction<Metric> {
19 | 
20 |     private static final String hbaseZookeeperQuorum = "192.168.20.48";
21 |     private static final String hbaseZookeeperClinentPort = "2181";
22 |     private static TableName hbaseTableName = TableName.valueOf("test");
23 |     private static final String columnFamily = "cf";
24 | 
25 |     private Connection connection;
26 | 
27 |     @Override
28 |     public void open(Configuration parameters) throws Exception {
29 |         super.open(parameters);
30 |         org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create();
31 |         config.set("hbase.zookeeper.quorum", hbaseZookeeperQuorum);
32 |         config.set("hbase.master", "10.45.151.26:60000");
33 |         config.set("hbase.zookeeper.property.clientPort", hbaseZookeeperClinentPort);
34 |         config.setInt("hbase.rpc.timeout", 20000);
35 |         config.setInt("hbase.client.operation.timeout", 30000);
36 |         config.setInt("hbase.client.scanner.timeout.period", 200000);
37 |         connection = ConnectionFactory.createConnection(config);
38 |     }
39 | 
40 |     @Override
41 |     public void close() throws Exception {
42 |         super.close();
43 |         if (connection != null) {
44 |             connection.close();
45 |         }
46 |     }
47 | 
48 |     @Override
49 |     public void invoke(Metric value, Context context) throws Exception {
50 |         Admin admin = connection.getAdmin();
51 |         boolean tableExists = admin.tableExists(hbaseTableName);
52 |         if (!tableExists) {
53 |             admin.createTable(new HTableDescriptor(hbaseTableName).addFamily(new HColumnDescriptor(columnFamily)));
54 |         }
55 |         Table table = connection.getTable(hbaseTableName);
56 |         long timeMillis = System.currentTimeMillis();
57 |         Put put = new Put(Bytes.toBytes(timeMillis));
58 |         Map<String, Object> fields = value.getFields();
59 |         for (Map.Entry<String, Object> entry : fields.entrySet()) {
60 |             String field = entry.getKey();
61 |             put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(field), Bytes.toBytes((String) entry.getValue()));
62 |         }
63 |         table.put(put);
64 |         table.close();
65 |         admin.close();
66 |         connection.close();
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/kafka-stream/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>kafka-stream</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>kafka-stream</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>org.apache.kafka</groupId>
23 |             <artifactId>kafka_2.11</artifactId>
24 |             <version>0.10.1.0</version>
25 |             <exclusions>
26 |                 <exclusion>
27 |                     <groupId>com.101tec</groupId>
28 |                     <artifactId>zkclient</artifactId>
29 |                 </exclusion>
30 |             </exclusions>
31 |         </dependency>
32 |         <dependency>
33 |             <groupId>org.apache.kafka</groupId>
34 |             <artifactId>kafka-clients</artifactId>
35 |             <version>0.10.1.0</version>
36 |         </dependency>
37 |         <dependency>
38 |             <groupId>org.apache.kafka</groupId>
39 |             <artifactId>kafka-streams</artifactId>
40 |             <version>0.10.1.0</version>
41 |             <exclusions>
42 |                 <exclusion>
43 |                     <groupId>com.101tec</groupId>
44 |                     <artifactId>zkclient</artifactId>
45 |                 </exclusion>
46 |             </exclusions>
47 |         </dependency>
48 |         <dependency>
49 |             <groupId>com.101tec</groupId>
50 |             <artifactId>zkclient</artifactId>
51 |             <version>0.10</version>
52 |         </dependency>
53 |         <dependency>
54 |             <groupId>commons-io</groupId>
55 |             <artifactId>commons-io</artifactId>
56 |             <version>2.6</version>
57 |         </dependency>
58 |         <dependency>
59 |             <groupId>org.apache.commons</groupId>
60 |             <artifactId>commons-lang3</artifactId>
61 |             <version>3.7</version>
62 |         </dependency>
63 |         <dependency>
64 |             <groupId>org.springframework.boot</groupId>
65 |             <artifactId>spring-boot-starter-test</artifactId>
66 |             <scope>test</scope>
67 |         </dependency>
68 |     </dependencies>
69 | 
70 |     <build>
71 |         <plugins>
72 |             <plugin>
73 |                 <groupId>org.springframework.boot</groupId>
74 |                 <artifactId>spring-boot-maven-plugin</artifactId>
75 |             </plugin>
76 |         </plugins>
77 |     </build>
78 | 
79 | </project>
80 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/core/Hbase2Kafka.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.core;
 2 | 
 3 | import com.bigdata.study.flinkkafkahbase.source.FlinkHbaseSource;
 4 | import com.bigdata.study.flinkkafkahbase.watermarks.FlinkHbaseWaterMarks;
 5 | import com.fasterxml.jackson.databind.ObjectMapper;
 6 | import org.apache.commons.collections.CollectionUtils;
 7 | import org.apache.flink.api.common.functions.FlatMapFunction;
 8 | import org.apache.flink.api.common.serialization.SimpleStringSchema;
 9 | import org.apache.flink.streaming.api.TimeCharacteristic;
10 | import org.apache.flink.streaming.api.datastream.DataStream;
11 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
12 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
13 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011;
14 | import org.apache.flink.util.Collector;
15 | 
16 | import java.util.Collections;
17 | import java.util.Map;
18 | import java.util.Properties;
19 | 
20 | /**
21 |  * 从Hbase读取数据到kafka
22 |  **/
23 | public class Hbase2Kafka {
24 |     private static final ObjectMapper mapper = new ObjectMapper();
25 | 
26 |     public static void main(String[] args) {
27 |         final String ZOOKEEPER_HOST = "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181";
28 |         final String KAFKA_HOST = "192.168.20.48:9092";
29 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
30 |         env.enableCheckpointing(1000);
31 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
32 |         DataStreamSource<Map<String, String>> dataStreamSource = env.addSource(new FlinkHbaseSource());
33 |         dataStreamSource.assignTimestampsAndWatermarks(new FlinkHbaseWaterMarks());
34 |         DataStream<String> dataStream = dataStreamSource.filter(data -> CollectionUtils.isNotEmpty(Collections.singleton(data))).flatMap(new FlatMapFunction<Map<String, String>, String>() {
35 |             @Override
36 |             public void flatMap(Map<String, String> stringStringMap, Collector<String> collector) throws Exception {
37 |                 String value = mapper.writeValueAsString(stringStringMap);
38 |                 collector.collect(value);
39 |             }
40 |         });
41 |         Properties prop = new Properties();
42 |         prop.setProperty("bootstrap.servers", KAFKA_HOST);
43 |         prop.put("zookeeper.connect", ZOOKEEPER_HOST);
44 |         prop.put("key.serializer", "org.apache.kafka.common.serialization.StringDeserializer");
45 |         prop.put("value.serializer", "org.apache.kafka.common.serialization.StringDeserializer");
46 |         prop.put("auto.offset.reset", "latest");
47 |         FlinkKafkaProducer011<String> producer011 = new FlinkKafkaProducer011<>("hbase-kafka", new SimpleStringSchema(), prop);
48 |         producer011.setWriteTimestampToKafka(true);
49 |         dataStream.addSink(producer011);
50 |         try {
51 |             env.execute("flink hbase 2 kafka11");
52 |         } catch (Exception e) {
53 |             e.printStackTrace();
54 |         }
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/flink-jdbc-hbase/src/main/java/com/bigdata/study/flinkjdbchbase/sink/HbaseSink.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkjdbchbase.sink;
 2 | 
 3 | import org.apache.flink.configuration.Configuration;
 4 | import org.apache.flink.streaming.api.functions.ProcessFunction;
 5 | import org.apache.flink.table.shaded.org.joda.time.Instant;
 6 | import org.apache.flink.util.Collector;
 7 | import org.apache.hadoop.hbase.*;
 8 | import org.apache.hadoop.hbase.client.*;
 9 | import org.apache.hadoop.hbase.security.User;
10 | import org.apache.hadoop.hbase.util.Bytes;
11 | import org.apache.hadoop.security.UserGroupInformation;
12 | 
13 | /**
14 |  * 自定义Hbase sink
15 |  **/
16 | public class HbaseSink extends ProcessFunction<String, String> {
17 | 
18 |     private String zookeeper;
19 |     private String zkPort;
20 |     private String tableName;
21 |     private String family;
22 |     private Table table;
23 | 
24 |     public HbaseSink(String zookeeper, String zkPort, String tableName, String family) {
25 |         this.zookeeper = zookeeper;
26 |         this.zkPort = zkPort;
27 |         this.tableName = tableName;
28 |         this.family = family;
29 |     }
30 | 
31 |     @Override
32 |     public void open(Configuration parameters) throws Exception {
33 |         super.open(parameters);
34 |         org.apache.hadoop.conf.Configuration configuration = HBaseConfiguration.create();
35 |         configuration.set(HConstants.ZOOKEEPER_QUORUM, zookeeper);
36 |         configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT, zkPort);
37 |         configuration.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/hbase");
38 |         configuration.setInt(HConstants.HBASE_RPC_READ_TIMEOUT_KEY, 5000);
39 | //        configuration.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY,5000);
40 |         configuration.setInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, 5000);
41 |         configuration.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 5000);
42 |         configuration.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 5000);
43 |         User user = User.create(UserGroupInformation.createRemoteUser("hbase"));
44 |         Connection connection = ConnectionFactory.createConnection(configuration, user);
45 |         Admin admin = connection.getAdmin();
46 |         if (!admin.tableExists(TableName.valueOf(tableName))) {
47 |             admin.createTable(new HTableDescriptor(TableName.valueOf(tableName)).addFamily(new HColumnDescriptor(family)));
48 |         }
49 |         table = connection.getTable(TableName.valueOf(Bytes.toBytes(tableName)));
50 |     }
51 | 
52 |     @Override
53 |     public void close() throws Exception {
54 |         super.close();
55 |         if (table != null) {
56 |             table.close();
57 |         }
58 |     }
59 | 
60 |     @Override
61 |     public void processElement(String s, Context context, Collector<String> collector) throws Exception {
62 |         String rowKey = String.valueOf(Instant.now().getMillis());
63 |         Put put = new Put(Bytes.toBytes(rowKey));
64 |         put.setDurability(Durability.ASYNC_WAL);
65 |         put.addColumn(Bytes.toBytes(family), Bytes.toBytes("name"), Bytes.toBytes(s));
66 |         table.put(put);
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/config/KafkaSourceConfig.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.dataflowstreamkafkasource.config;
 2 | 
 3 | import com.bigdata.study.dataflowstreamkafkasource.prop.KafkaSourceProperties;
 4 | import com.bigdata.study.dataflowstreamkafkasource.utils.JsonMapper;
 5 | import org.apache.kafka.streams.KafkaStreams;
 6 | import org.apache.kafka.streams.StreamsConfig;
 7 | import org.apache.kafka.streams.kstream.ForeachAction;
 8 | import org.apache.kafka.streams.kstream.KStreamBuilder;
 9 | import org.springframework.beans.factory.InitializingBean;
10 | import org.springframework.beans.factory.annotation.Autowired;
11 | import org.springframework.boot.context.properties.EnableConfigurationProperties;
12 | import org.springframework.cloud.stream.annotation.EnableBinding;
13 | import org.springframework.cloud.stream.messaging.Source;
14 | import org.springframework.context.annotation.Configuration;
15 | import org.springframework.messaging.Message;
16 | import org.springframework.messaging.support.MessageBuilder;
17 | 
18 | import java.io.IOException;
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * kafka source
24 |  **/
25 | @Configuration
26 | @EnableConfigurationProperties(KafkaSourceProperties.class)
27 | @EnableBinding(Source.class)
28 | public class KafkaSourceConfig implements InitializingBean {
29 | 
30 |     private StreamsConfig streamsConfig;
31 | 
32 |     @Autowired
33 |     private KafkaSourceProperties kafkaSourceProperties;
34 | 
35 |     @Autowired
36 |     private Source source;
37 | 
38 |     //    @InboundChannelAdapter(channel = Source.OUTPUT)
39 |     public void sendMessage() {
40 |         KStreamBuilder builder = new KStreamBuilder();
41 |         builder.stream(kafkaSourceProperties.getTopic()).foreach(new ForeachAction<Object, Object>() {
42 |             @Override
43 |             public void apply(Object key, Object value) {
44 |                 Message<Map> message;
45 |                 try {
46 |                     Map map = JsonMapper.defaultMapper().fromJson(String.valueOf(value), Map.class);
47 |                     message = MessageBuilder.withPayload(map).build();
48 |                     source.output().send(message);
49 |                     System.out.println("成功发送消息：" + message.getPayload());
50 |                 } catch (IOException e) {
51 |                     e.printStackTrace();
52 |                 }
53 |             }
54 |         });
55 |         KafkaStreams kafkaStreams = new KafkaStreams(builder, streamsConfig);
56 |         kafkaStreams.start();
57 |     }
58 | 
59 | 
60 |     @Override
61 |     public void afterPropertiesSet() {
62 |         Map<String, Object> prop = new HashMap<>();
63 |         prop.put(StreamsConfig.APPLICATION_ID_CONFIG, kafkaSourceProperties.getGroupId());
64 |         prop.put("bootstrap.servers", kafkaSourceProperties.getServers());
65 |         prop.put("zookeeper.connect", kafkaSourceProperties.getZkNodes());
66 |         prop.put("key.serde", kafkaSourceProperties.getKeyDeserializer());
67 |         prop.put("value.serde", kafkaSourceProperties.getValueDeserializer());
68 |         prop.put("batch.size", kafkaSourceProperties.getBatchSize());
69 |         streamsConfig = new StreamsConfig(prop);
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/src/main/java/com/bigdata/study/flinkkafkahbase/source/FlinkHbaseSource.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkahbase.source;
 2 | 
 3 | import com.fasterxml.jackson.databind.ObjectMapper;
 4 | import org.apache.flink.configuration.Configuration;
 5 | import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
 6 | import org.apache.hadoop.hbase.Cell;
 7 | import org.apache.hadoop.hbase.HBaseConfiguration;
 8 | import org.apache.hadoop.hbase.TableName;
 9 | import org.apache.hadoop.hbase.client.*;
10 | import org.apache.hadoop.hbase.util.Bytes;
11 | 
12 | import java.io.IOException;
13 | import java.util.HashMap;
14 | import java.util.Map;
15 | 
16 | /**
17 |  * Hbase source
18 |  **/
19 | public class FlinkHbaseSource extends RichSourceFunction<Map<String, String>> {
20 | 
21 |     private static final String hbaseZookeeperQuorum = "192.168.20.48";
22 |     private static final String hbaseZookeeperClinentPort = "2181";
23 |     private static TableName hbaseTableName = TableName.valueOf("test");
24 |     private static final String columnFamily = "cf";
25 | 
26 |     private static final ObjectMapper mapper = new ObjectMapper();
27 | 
28 |     private Connection connection;
29 | 
30 |     @Override
31 |     public void open(Configuration parameters) throws Exception {
32 |         super.open(parameters);
33 |         org.apache.hadoop.conf.Configuration config = HBaseConfiguration.create();
34 |         config.set("hbase.zookeeper.quorum", hbaseZookeeperQuorum);
35 |         config.set("hbase.master", "10.45.151.26:60000");
36 |         config.set("hbase.zookeeper.property.clientPort", hbaseZookeeperClinentPort);
37 |         config.setInt("hbase.rpc.timeout", 20000);
38 |         config.setInt("hbase.client.operation.timeout", 30000);
39 |         config.setInt("hbase.client.scanner.timeout.period", 200000);
40 |         connection = ConnectionFactory.createConnection(config);
41 |     }
42 | 
43 |     @Override
44 |     public void close() throws Exception {
45 |         super.close();
46 |         if (connection != null) {
47 |             connection.close();
48 |         }
49 |     }
50 | 
51 |     @Override
52 |     public void run(SourceContext<Map<String, String>> sourceContext) throws Exception {
53 |         Table table = connection.getTable(hbaseTableName);
54 |         Scan scan = new Scan();
55 |         scan.addFamily(Bytes.toBytes(columnFamily));
56 |         ResultScanner tableScanner = table.getScanner(scan);
57 |         tableScanner.iterator().forEachRemaining(scanner -> {
58 |             Cell[] cells = scanner.rawCells();
59 |             Map<String, String> map = new HashMap<>();
60 |             for (Cell cell : cells) {
61 |                 String key = Bytes.toString(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
62 |                 String value = Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
63 |                 map.put(key, value);
64 |             }
65 |             sourceContext.collect(map);
66 |         });
67 | 
68 |     }
69 | 
70 |     @Override
71 |     public void cancel() {
72 |         if (!connection.isClosed()) {
73 |             try {
74 |                 connection.close();
75 |             } catch (IOException e) {
76 |                 e.printStackTrace();
77 |             }
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/SparkPhoenixApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.sparkphoenix;
 2 | 
 3 | import javafx.util.Pair;
 4 | import org.apache.spark.SparkConf;
 5 | import org.apache.spark.api.java.JavaRDD;
 6 | import org.apache.spark.sql.Row;
 7 | import org.apache.spark.sql.RowFactory;
 8 | import org.apache.spark.sql.SaveMode;
 9 | import org.apache.spark.sql.SparkSession;
10 | import org.apache.spark.sql.types.DataTypes;
11 | import org.apache.spark.sql.types.StructField;
12 | import org.apache.spark.sql.types.StructType;
13 | import org.joda.time.DateTime;
14 | import org.joda.time.format.DateTimeFormatter;
15 | import org.joda.time.format.DateTimePrinter;
16 | import org.springframework.boot.SpringApplication;
17 | import org.springframework.boot.autoconfigure.SpringBootApplication;
18 | import scala.Tuple2;
19 | 
20 | import java.util.ArrayList;
21 | import java.util.List;
22 | import java.util.Properties;
23 | 
24 | /**
25 |  * spark通过Phoenix读取Hbase数据
26 |  */
27 | @SpringBootApplication
28 | public class SparkPhoenixApplication {
29 | 
30 |     public static void main(String[] args) {
31 |         SpringApplication.run(SparkPhoenixApplication.class, args);
32 | 
33 |         //初始化Spark
34 |         SparkConf conf = new SparkConf().setAppName("Test")
35 |                 .setMaster("local[1]")
36 |                 .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
37 |                 .registerKryoClasses(new Class[]{});
38 |         SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate();
39 |         DateTime start = new DateTime(args[0]);
40 |         DateTime end = new DateTime(args[1]);
41 |         String startStr = start.toString("yyyy-MM-dd");
42 |         String endStr = end.toString("yyyy-MM-dd");
43 |         final String SQL_QUERY = "(SELECT date,member_id FROM events WHERE time>='%s' AND time<'%s' AND event='login') events";
44 |         String sql = String.format(SQL_QUERY, startStr, endStr);
45 | 
46 |         //jdbc从Hbase读取数据
47 |         Properties prop = new Properties();
48 |         prop.put("driver", "org.apache.phoenix.jdbc.PhoenixDriver");
49 |         prop.put("user", "");
50 |         prop.put("password", "");
51 |         prop.put("fetchsize", "10000");
52 |         JavaRDD<Row> javaRDD = sparkSession.read()
53 |                 .jdbc("jdbc:phoenix:hadoop101,hadoop102,hadoop103", sql, prop)
54 |                 .filter("member_id!=-1")
55 |                 .javaRDD();
56 |         JavaRDD<Row> rowJavaRDD = javaRDD.mapToPair(r -> new Tuple2<>(r.getString(0), r.getLong(1)))
57 |                 .distinct()
58 |                 .groupByKey()
59 |                 .map(r -> {
60 |                     StringBuilder buffer = new StringBuilder();
61 |                     r._2.forEach(buffer::append);
62 |                     return RowFactory.create(r._1, buffer.toString());
63 |                 });
64 | 
65 |         //schema
66 |         List<StructField> fields = new ArrayList<>();
67 |         fields.add(DataTypes.createStructField("date", DataTypes.StringType, false));
68 |         fields.add(DataTypes.createStructField("dist_mem", DataTypes.StringType, true));
69 |         StructType structType = DataTypes.createStructType(fields);
70 | 
71 |         //去重并存储
72 |         sparkSession.createDataFrame(rowJavaRDD, structType)
73 |                 .write()
74 |                 .format("org.apache.phoenix.spark")
75 |                 .mode(SaveMode.Overwrite)
76 |                 .option("table", "test_string")
77 |                 .option("zkUrl", "jdbc:phoenix:hadoop101,hadoop102,hadoop103")
78 |                 .save();
79 |         sparkSession.stop();
80 |         sparkSession.close();
81 | 
82 |     }
83 | 
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/UserProducer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.producer;
 2 | 
 3 | import com.bigdata.study.kafkastream.model.User;
 4 | import com.bigdata.study.kafkastream.serdes.GenericSerializer;
 5 | import com.bigdata.study.kafkastream.utils.HashPartitioner;
 6 | import org.apache.commons.io.IOUtils;
 7 | import org.apache.commons.lang3.StringUtils;
 8 | import org.apache.kafka.clients.producer.KafkaProducer;
 9 | import org.apache.kafka.clients.producer.ProducerRecord;
10 | import org.apache.kafka.common.KafkaException;
11 | import org.apache.kafka.common.serialization.StringSerializer;
12 | 
13 | import java.io.IOException;
14 | import java.nio.charset.Charset;
15 | import java.util.List;
16 | import java.util.Properties;
17 | import java.util.stream.Collectors;
18 | 
19 | /**
20 |  * 用户生产者
21 |  **/
22 | public class UserProducer {
23 |     public static void main(String[] args) {
24 |         Properties prop = new Properties();
25 |         prop.put("bootstrap.servers", "192.168.20.48:9092");
26 |         prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181");
27 |         prop.put("acks", "all");
28 |         prop.put("retries", 3);
29 |         prop.put("batch.size", 16348);
30 |         prop.put("linger.ms", 1);
31 |         prop.put("buffer.memory", 33554432);
32 |         prop.put("key.serializer", StringSerializer.class.getCanonicalName());
33 |         prop.put("value.serializer", GenericSerializer.class.getCanonicalName());
34 |         prop.put("value.serializer.type", User.class.getCanonicalName());
35 |         prop.put("partitioner.class", HashPartitioner.class.getCanonicalName());
36 |         KafkaProducer<String, User> userKafkaProducer = new KafkaProducer<>(prop);
37 |         try {
38 |             List<User> users = readUser();
39 |             users.forEach(user -> {
40 |                 ProducerRecord<String, User> producerRecord = new ProducerRecord<>("users", user.getName(), user);
41 |                 userKafkaProducer.send(producerRecord, (recordMetadata, e) -> {
42 |                     if (e != null) {
43 |                         System.err.printf("发送用户消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败",
44 |                                 recordMetadata.topic(),
45 |                                 recordMetadata.partition(),
46 |                                 recordMetadata.offset(),
47 |                                 recordMetadata.serializedKeySize(),
48 |                                 recordMetadata.serializedValueSize());
49 |                         e.printStackTrace();
50 |                     }
51 |                     System.out.printf("成功发送用户消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]",
52 |                             recordMetadata.topic(),
53 |                             recordMetadata.partition(),
54 |                             recordMetadata.offset(),
55 |                             recordMetadata.serializedKeySize(),
56 |                             recordMetadata.serializedValueSize());
57 |                 });
58 |             });
59 |         } catch (IOException e) {
60 |             throw new KafkaException("发送用户信息到kafka出错", e);
61 |         } finally {
62 |             userKafkaProducer.close();
63 |         }
64 |     }
65 | 
66 |     private static List<User> readUser() throws IOException {
67 |         List<String> lines = IOUtils.readLines(UserProducer.class.getResourceAsStream("/users.csv"), Charset.forName("utf-8"));
68 |         List<User> users = lines.stream()
69 |                 .filter(StringUtils::isNotBlank)
70 |                 .map(line -> line.split("\\s*,\\s*"))
71 |                 .filter(value -> value.length == 4)
72 |                 .map(value -> new User(value[0], value[1], value[2], Integer.parseInt(value[3])))
73 |                 .collect(Collectors.toList());
74 |         return users;
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/flink-kafka-hbase/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 4 |     <modelVersion>4.0.0</modelVersion>
 5 |     <parent>
 6 |         <groupId>com.bigdata.study</groupId>
 7 |         <artifactId>bigdata-starter</artifactId>
 8 |         <version>1.0</version>
 9 |         <relativePath>../pom.xml</relativePath>
10 |     </parent>
11 |     <artifactId>flink-kafka-hbase</artifactId>
12 |     <version>0.0.1-SNAPSHOT</version>
13 |     <name>flink-kafka-hbase</name>
14 |     <description>Demo project for Spring Boot</description>
15 | 
16 |     <dependencies>
17 |         <dependency>
18 |             <groupId>org.springframework.boot</groupId>
19 |             <artifactId>spring-boot-starter</artifactId>
20 |         </dependency>
21 |         <dependency>
22 |             <groupId>com.bigdata.study</groupId>
23 |             <artifactId>flink-common</artifactId>
24 |             <version>1.0</version>
25 |         </dependency>
26 |         <dependency>
27 |             <groupId>org.apache.kafka</groupId>
28 |             <artifactId>kafka_2.11</artifactId>
29 |             <version>0.11.0.2</version>
30 |         </dependency>
31 |         <dependency>
32 |             <groupId>org.apache.hbase</groupId>
33 |             <artifactId>hbase-client</artifactId>
34 |             <version>1.4.3</version>
35 |         </dependency>
36 |         <dependency>
37 |             <groupId>org.apache.flink</groupId>
38 |             <artifactId>flink-hbase_${scala.binary.version}</artifactId>
39 |             <version>${flink.version}</version>
40 |             <exclusions>
41 |                 <exclusion>
42 |                     <groupId>log4j</groupId>
43 |                     <artifactId>log4j</artifactId>
44 |                 </exclusion>
45 |                 <exclusion>
46 |                     <groupId>org.apache.zookeeper</groupId>
47 |                     <artifactId>zookeeper</artifactId>
48 |                 </exclusion>
49 |                 <exclusion>
50 |                     <groupId>org.apache.httpcomponents</groupId>
51 |                     <artifactId>httpclient</artifactId>
52 |                 </exclusion>
53 |                 <exclusion>
54 |                     <groupId>junit</groupId>
55 |                     <artifactId>junit</artifactId>
56 |                 </exclusion>
57 |                 <exclusion>
58 |                     <groupId>org.apache.httpcomponents</groupId>
59 |                     <artifactId>httpcore</artifactId>
60 |                 </exclusion>
61 |                 <exclusion>
62 |                     <groupId>commons-cli</groupId>
63 |                     <artifactId>commons-cli</artifactId>
64 |                 </exclusion>
65 |                 <exclusion>
66 |                     <groupId>com.google.guava</groupId>
67 |                     <artifactId>guava</artifactId>
68 |                 </exclusion>
69 |                 <exclusion>
70 |                     <groupId>com.yammer.metrics</groupId>
71 |                     <artifactId>metrics-core</artifactId>
72 |                 </exclusion>
73 |                 <exclusion>
74 |                     <groupId>commons-codec</groupId>
75 |                     <artifactId>commons-codec</artifactId>
76 |                 </exclusion>
77 |             </exclusions>
78 |         </dependency>
79 |         <dependency>
80 |             <groupId>org.springframework.boot</groupId>
81 |             <artifactId>spring-boot-starter-test</artifactId>
82 |             <scope>test</scope>
83 |         </dependency>
84 |     </dependencies>
85 | 
86 |     <build>
87 |         <plugins>
88 |             <plugin>
89 |                 <groupId>org.springframework.boot</groupId>
90 |                 <artifactId>spring-boot-maven-plugin</artifactId>
91 |             </plugin>
92 |         </plugins>
93 |     </build>
94 | 
95 | </project>
96 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/utils/HttpUtil.java:
--------------------------------------------------------------------------------
  1 | package utils;
  2 | 
  3 | import org.apache.http.HttpEntity;
  4 | import org.apache.http.HttpStatus;
  5 | import org.apache.http.client.methods.CloseableHttpResponse;
  6 | import org.apache.http.client.methods.HttpGet;
  7 | import org.apache.http.client.methods.HttpPost;
  8 | import org.apache.http.entity.StringEntity;
  9 | import org.apache.http.impl.client.CloseableHttpClient;
 10 | import org.apache.http.impl.client.HttpClients;
 11 | import org.apache.http.util.EntityUtils;
 12 | 
 13 | import java.io.IOException;
 14 | 
 15 | public class HttpUtil {
 16 |     /**
 17 |      * 通过GET方式发起http请求
 18 |      */
 19 |     public static String doGet(String url) {
 20 |         CloseableHttpClient httpClient = HttpClients.createDefault();
 21 |         try {
 22 |             HttpGet get = new HttpGet(url);
 23 | //            get.setHeader("Internal-Client", "alert");
 24 |             get.setHeader("content-type", "application/json");
 25 |             CloseableHttpResponse httpResponse = null;
 26 |             httpResponse = httpClient.execute(get);
 27 |             try {
 28 |                 if (httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
 29 |                     HttpEntity entity = httpResponse.getEntity();
 30 |                     if (null != entity) {
 31 |                         return EntityUtils.toString(httpResponse.getEntity());
 32 |                     }
 33 |                 }
 34 |             } finally {
 35 |                 httpResponse.close();
 36 |             }
 37 |         } catch (Exception e) {
 38 |             e.printStackTrace();
 39 |         } finally {
 40 |             try {
 41 |                 if (httpClient != null) {
 42 |                     httpClient.close();
 43 |                 }
 44 |             } catch (IOException e) {
 45 |                 e.printStackTrace();
 46 |             }
 47 |         }
 48 |         return null;
 49 |     }
 50 | 
 51 | 
 52 |     /**
 53 |      * 发送 POST 请求（HTTP），JSON形式
 54 |      *
 55 |      * @param url        调用的地址
 56 |      * @param jsonParams 调用的参数
 57 |      * @return
 58 |      * @throws Exception
 59 |      */
 60 |     public static CloseableHttpResponse doPostResponse(String url, String jsonParams) throws Exception {
 61 |         CloseableHttpClient httpClient = HttpClients.createDefault();
 62 |         CloseableHttpResponse response = null;
 63 |         HttpPost httpPost = new HttpPost(url);
 64 | 
 65 |         try {
 66 |             StringEntity entity = new StringEntity(jsonParams, "UTF-8");
 67 |             entity.setContentEncoding("UTF-8");
 68 |             entity.setContentType("application/json");
 69 | 
 70 |             httpPost.setEntity(entity);
 71 |             httpPost.setHeader("content-type", "application/json");
 72 |             response = httpClient.execute(httpPost);
 73 |         } finally {
 74 |             if (response != null) {
 75 |                 EntityUtils.consume(response.getEntity());
 76 |             }
 77 |         }
 78 |         return response;
 79 |     }
 80 | 
 81 | 
 82 |     public static String doPostString(String url, String jsonParams) throws Exception {
 83 |         CloseableHttpClient httpClient = HttpClients.createDefault();
 84 |         CloseableHttpResponse response = null;
 85 |         HttpPost httpPost = new HttpPost(url);
 86 | 
 87 |         String httpStr;
 88 |         try {
 89 |             StringEntity entity = new StringEntity(jsonParams, "UTF-8");
 90 |             entity.setContentEncoding("UTF-8");
 91 |             entity.setContentType("application/json");
 92 | 
 93 |             httpPost.setEntity(entity);
 94 |             httpPost.setHeader("content-type", "application/json");
 95 | //            httpPost.setHeader("Internal-Client", "alert");
 96 |             response = httpClient.execute(httpPost);
 97 |             httpStr = EntityUtils.toString(response.getEntity(), "UTF-8");
 98 | 
 99 |         } finally {
100 |             if (response != null) {
101 |                 EntityUtils.consume(response.getEntity());
102 |             }
103 |         }
104 |         return httpStr;
105 |     }
106 | 
107 | }
108 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/ItemProducer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.producer;
 2 | 
 3 | import com.bigdata.study.kafkastream.model.Item;
 4 | import com.bigdata.study.kafkastream.serdes.GenericSerializer;
 5 | import com.bigdata.study.kafkastream.utils.HashPartitioner;
 6 | import org.apache.commons.io.IOUtils;
 7 | import org.apache.commons.lang3.StringUtils;
 8 | import org.apache.kafka.clients.producer.Callback;
 9 | import org.apache.kafka.clients.producer.KafkaProducer;
10 | import org.apache.kafka.clients.producer.ProducerRecord;
11 | import org.apache.kafka.clients.producer.RecordMetadata;
12 | import org.apache.kafka.common.KafkaException;
13 | import org.apache.kafka.common.serialization.StringSerializer;
14 | 
15 | import java.io.IOException;
16 | import java.nio.charset.Charset;
17 | import java.util.List;
18 | import java.util.Properties;
19 | import java.util.stream.Collectors;
20 | 
21 | /**
22 |  * 商品生产者
23 |  **/
24 | public class ItemProducer {
25 |     public static void main(String[] args) {
26 |         Properties prop = new Properties();
27 |         prop.put("bootstrap.servers", "192.168.20.48:9092");
28 |         prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181");
29 |         prop.put("acks", "all");
30 |         prop.put("retries", 3);
31 |         prop.put("batch.size", 16348);
32 |         prop.put("linger.ms", 1);
33 |         prop.put("buffer.memory", 33554432);
34 |         prop.put("key.serializer", StringSerializer.class.getCanonicalName());
35 |         prop.put("value.serializer", GenericSerializer.class.getCanonicalName());
36 |         prop.put("value.serializer.type", Item.class.getCanonicalName());
37 |         prop.put("partitioner.class", HashPartitioner.class.getCanonicalName());
38 |         KafkaProducer<String, Item> kafkaProducer = new KafkaProducer<>(prop);
39 |         try {
40 |             List<Item> items = readItem();
41 |             items.forEach(item -> {
42 |                 ProducerRecord<String, Item> record = new ProducerRecord<>("items", item.getItemName(), item);
43 |                 kafkaProducer.send(record, new Callback() {
44 |                     @Override
45 |                     public void onCompletion(RecordMetadata recordMetadata, Exception e) {
46 |                         if (e != null) {
47 |                             System.err.printf("发送商品消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败",
48 |                                     recordMetadata.topic(),
49 |                                     recordMetadata.partition(),
50 |                                     recordMetadata.offset(),
51 |                                     recordMetadata.serializedKeySize(),
52 |                                     recordMetadata.serializedValueSize());
53 |                             e.printStackTrace();
54 |                         }
55 |                         System.out.printf("成功发送商品消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]",
56 |                                 recordMetadata.topic(),
57 |                                 recordMetadata.partition(),
58 |                                 recordMetadata.offset(),
59 |                                 recordMetadata.serializedKeySize(),
60 |                                 recordMetadata.serializedValueSize());
61 |                     }
62 |                 });
63 |             });
64 |         } catch (IOException e) {
65 |             throw new KafkaException("发送商品数据到kafka出错", e);
66 |         } finally {
67 |             kafkaProducer.close();
68 |         }
69 |     }
70 | 
71 |     private static List<Item> readItem() throws IOException {
72 |         List<String> lines = IOUtils.readLines(ItemProducer.class.getResourceAsStream("/items.csv"), Charset.forName("utf-8"));
73 |         return lines.stream().filter(StringUtils::isNotBlank)
74 |                 .map(line -> line.split("\\s*,\\s*"))
75 |                 .filter(value -> value.length == 4)
76 |                 .map(value -> new Item(value[0], value[1], value[2], Double.parseDouble(value[3])))
77 |                 .collect(Collectors.toList());
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/kafka-stream/src/main/java/com/bigdata/study/kafkastream/producer/OrderProducer.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.kafkastream.producer;
 2 | 
 3 | import com.bigdata.study.kafkastream.model.Order;
 4 | import com.bigdata.study.kafkastream.model.User;
 5 | import com.bigdata.study.kafkastream.serdes.GenericSerializer;
 6 | import com.bigdata.study.kafkastream.utils.HashPartitioner;
 7 | import org.apache.commons.io.IOUtils;
 8 | import org.apache.commons.lang3.StringUtils;
 9 | import org.apache.kafka.clients.producer.Callback;
10 | import org.apache.kafka.clients.producer.KafkaProducer;
11 | import org.apache.kafka.clients.producer.ProducerRecord;
12 | import org.apache.kafka.clients.producer.RecordMetadata;
13 | import org.apache.kafka.common.KafkaException;
14 | import org.apache.kafka.common.serialization.StringSerializer;
15 | 
16 | import java.io.IOException;
17 | import java.nio.charset.Charset;
18 | import java.util.List;
19 | import java.util.Properties;
20 | import java.util.stream.Collectors;
21 | 
22 | /**
23 |  * 订单生产者
24 |  **/
25 | public class OrderProducer {
26 |     public static void main(String[] args) {
27 |         Properties prop = new Properties();
28 |         prop.put("bootstrap.servers", "192.168.20.48:9092");
29 |         prop.put("zookeeper.connect", "192.168.20.48:2181,192.168.20.51:2181,192.168.20.52:2181");
30 |         prop.put("acks", "all");
31 |         prop.put("retries", 3);
32 |         prop.put("batch.size", 16348);
33 |         prop.put("linger.ms", 1);
34 |         prop.put("buffer.memory", 33554432);
35 |         prop.put("key.serializer", StringSerializer.class.getCanonicalName());
36 |         prop.put("value.serializer", GenericSerializer.class.getCanonicalName());
37 |         prop.put("value.serializer.type", Order.class.getCanonicalName());
38 |         prop.put("partitioner.class", HashPartitioner.class.getCanonicalName());
39 |         KafkaProducer<String, Order> kafkaProducer = new KafkaProducer<>(prop);
40 |         try {
41 |             List<Order> orders = readOrder();
42 |             orders.forEach(order -> {
43 |                 ProducerRecord<String, Order> record = new ProducerRecord<>("orders", order.getUserName(), order);
44 |                 kafkaProducer.send(record, new Callback() {
45 |                     @Override
46 |                     public void onCompletion(RecordMetadata recordMetadata, Exception e) {
47 |                         if (e != null) {
48 |                             System.err.printf("发送订单消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]失败",
49 |                                     recordMetadata.topic(),
50 |                                     recordMetadata.partition(),
51 |                                     recordMetadata.offset(),
52 |                                     recordMetadata.serializedKeySize(),
53 |                                     recordMetadata.serializedValueSize());
54 |                             e.printStackTrace();
55 |                         }
56 |                         System.out.printf("成功发送订单消息[topic:%s,partition:%d,offset:%d,keysize:%d,valuesize:%d]",
57 |                                 recordMetadata.topic(),
58 |                                 recordMetadata.partition(),
59 |                                 recordMetadata.offset(),
60 |                                 recordMetadata.serializedKeySize(),
61 |                                 recordMetadata.serializedValueSize());
62 |                     }
63 |                 });
64 |             });
65 |         } catch (IOException e) {
66 |             throw new KafkaException("发送订单消息出错", e);
67 |         } finally {
68 |             kafkaProducer.close();
69 |         }
70 |     }
71 | 
72 |     private static List<Order> readOrder() throws IOException {
73 |         List<String> lines = IOUtils.readLines(OrderProducer.class.getResourceAsStream("/orders.csv"), Charset.forName("utf-8"));
74 |         return lines.stream().filter(StringUtils::isNotBlank)
75 |                 .map(line -> line.split("\\s*,\\s*"))
76 |                 .filter(value -> value.length == 4)
77 |                 .map(value -> new Order(value[0], value[1], Long.parseLong(value[2]), Integer.parseInt(value[3])))
78 |                 .collect(Collectors.toList());
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/flink-common/src/main/java/utils/KafkaUtils.java:
--------------------------------------------------------------------------------
 1 | package utils;
 2 | 
 3 | import constant.PropertiesConstants;
 4 | import model.Metrics;
 5 | import org.apache.flink.api.java.utils.ParameterTool;
 6 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 7 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
10 | import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
11 | import org.apache.kafka.clients.consumer.KafkaConsumer;
12 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
13 | import org.apache.kafka.common.PartitionInfo;
14 | import org.apache.kafka.common.TopicPartition;
15 | import schemas.MetricSchema;
16 | import watermarks.MetricWatermark;
17 | 
18 | import java.util.HashMap;
19 | import java.util.List;
20 | import java.util.Map;
21 | import java.util.Properties;
22 | 
23 | /**
24 |  * kafka 工具类
25 |  **/
26 | public class KafkaUtils {
27 | 
28 |     private static Properties buildKafkaProp(ParameterTool parameterTool) {
29 |         Properties properties = parameterTool.getProperties();
30 |         properties.put("bootstrap.servers", parameterTool.get(PropertiesConstants.KAFKA_BROKERS));
31 |         properties.put("zookeeper.connect", parameterTool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT));
32 |         properties.put("group.id", parameterTool.get(PropertiesConstants.KAFKA_GROUP_ID));
33 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
34 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
35 |         properties.put("auto.offset.reset", "latest");
36 |         return properties;
37 |     }
38 | 
39 |     public static DataStreamSource<Metrics> buildSource(StreamExecutionEnvironment env) {
40 |         ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters();
41 |         String topic = parameterTool.getRequired(PropertiesConstants.METRICS_TOPIC);
42 |         long consumerTime = parameterTool.getLong(PropertiesConstants.CONSUMER_FROM_TIME, 0L);
43 |         return buildSource(env, topic, consumerTime);
44 |     }
45 | 
46 |     public static DataStreamSource<Metrics> buildSource(StreamExecutionEnvironment env, String topic, Long time) {
47 |         ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters();
48 |         Properties properties = buildKafkaProp(parameterTool);
49 |         FlinkKafkaConsumer011<Metrics> consumer011 = new FlinkKafkaConsumer011<>(topic, new MetricSchema(), properties);
50 |         //重装offset到time处
51 |         if (time != null && time != 0L) {
52 |             properties.setProperty("group.id", "query_time_" + time);
53 |             KafkaConsumer consumer = new KafkaConsumer(properties);
54 |             List<PartitionInfo> partitionsFor = consumer.partitionsFor(PropertiesConstants.METRICS_TOPIC);
55 |             Map<TopicPartition, Long> partitionLongMap = new HashMap<>();
56 |             for (PartitionInfo partitionInfo : partitionsFor) {
57 |                 partitionLongMap.put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), time);
58 |             }
59 |             Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes = consumer.offsetsForTimes(partitionLongMap);
60 |             Map<KafkaTopicPartition, Long> partitionOffsetMap = new HashMap<>();
61 |             for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : offsetsForTimes.entrySet()) {
62 |                 TopicPartition topicPartition = entry.getKey();
63 |                 partitionOffsetMap.put(new KafkaTopicPartition(topicPartition.topic(), topicPartition.partition()), entry.getValue().offset());
64 |             }
65 |             consumer.close();
66 |             consumer011.setStartFromSpecificOffsets(partitionOffsetMap);
67 |         }
68 |         return env.addSource(consumer011);
69 |     }
70 | 
71 |     public static SingleOutputStreamOperator<Metrics> parseSource(DataStreamSource<Metrics> dataStreamSource) {
72 |         return dataStreamSource.assignTimestampsAndWatermarks(new MetricWatermark());
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/flink-kafka-source/src/main/java/com/bigdata/study/flinkkafkasource/FlinkKafkaSourceApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkkafkasource;
 2 | 
 3 | import com.bigdata.study.flinkkafkasource.watermarks.ConsumerWaterMarkEmitter;
 4 | import constant.PropertiesConstants;
 5 | import model.Metrics;
 6 | import org.apache.flink.api.java.utils.ParameterTool;
 7 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 8 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 9 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
10 | import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
11 | import org.apache.kafka.clients.consumer.KafkaConsumer;
12 | import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
13 | import org.apache.kafka.common.PartitionInfo;
14 | import org.apache.kafka.common.TopicPartition;
15 | import org.springframework.boot.autoconfigure.SpringBootApplication;
16 | import schemas.MetricSchema;
17 | import utils.ExecutionEnvUtil;
18 | 
19 | import java.util.HashMap;
20 | import java.util.List;
21 | import java.util.Map;
22 | import java.util.Properties;
23 | 
24 | @SpringBootApplication
25 | public class FlinkKafkaSourceApplication {
26 | 
27 |     public static void main(String[] args) {
28 | //        SpringApplication.run(FlinkKafkaSourceApplication.class, args);
29 |         try {
30 |             ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args);
31 |             StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterPool);
32 | 
33 |             Properties properties = new Properties();
34 |             properties.put("bootstrap.servers", parameterPool.get(PropertiesConstants.KAFKA_BROKERS));
35 |             properties.put("zookeeper.connect", parameterPool.get(PropertiesConstants.KAFKA_ZOOKEEPER_CONNECT));
36 |             properties.put("group.id", parameterPool.get(PropertiesConstants.KAFKA_GROUP_ID));
37 |             properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
38 |             properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
39 |             properties.put("auto.offset.reset", "latest");
40 | 
41 |             String topic = parameterPool.getRequired(PropertiesConstants.METRICS_TOPIC);
42 |             long consumerTime = parameterPool.getLong(PropertiesConstants.CONSUMER_FROM_TIME, 0L);
43 | 
44 |             FlinkKafkaConsumer011<Metrics> consumer011 = new FlinkKafkaConsumer011<>(topic, new MetricSchema(), properties);
45 | 
46 |             //设置消费者开始位置
47 |             //指定消费者应从每个分区开始的确切偏移量
48 |             if (consumerTime != 0L) {
49 |                 properties.setProperty("group.id", "query_time_" + consumerTime);
50 |                 KafkaConsumer consumer = new KafkaConsumer(properties);
51 |                 List<PartitionInfo> partitionsFor = consumer.partitionsFor(topic);
52 |                 Map<TopicPartition, Long> topicPartitionMap = new HashMap<>();
53 |                 for (PartitionInfo partitionInfo : partitionsFor) {
54 |                     topicPartitionMap.put(new TopicPartition(partitionInfo.topic(), partitionInfo.partition()), consumerTime);
55 |                 }
56 |                 Map<TopicPartition, OffsetAndTimestamp> offsetsForTimes = consumer.offsetsForTimes(topicPartitionMap);
57 |                 Map<KafkaTopicPartition, Long> kafkaTopicPartitionMap = new HashMap<>();
58 |                 for (Map.Entry<TopicPartition, OffsetAndTimestamp> entry : offsetsForTimes.entrySet()) {
59 |                     TopicPartition topicPartition = entry.getKey();
60 |                     KafkaTopicPartition kafkaTopicPartition = new KafkaTopicPartition(topicPartition.topic(), topicPartition.partition());
61 |                     kafkaTopicPartitionMap.put(kafkaTopicPartition, entry.getValue().offset());
62 |                 }
63 |                 consumer.close();
64 |                 consumer011.setStartFromSpecificOffsets(kafkaTopicPartitionMap);
65 |             }
66 |             //指定自定义水印发射器
67 |             consumer011.assignTimestampsAndWatermarks(new ConsumerWaterMarkEmitter());
68 |             DataStreamSource<Metrics> streamSource = env.addSource(consumer011);
69 |             streamSource.print();
70 |             env.execute("flink kafka source");
71 |         } catch (Exception e) {
72 |             e.printStackTrace();
73 |         }
74 |     }
75 | 
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0"
  3 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 |     <modelVersion>4.0.0</modelVersion>
  6 | 
  7 |     <groupId>com.bigdata.study</groupId>
  8 |     <artifactId>bigdata-starter</artifactId>
  9 |     <version>1.0</version>
 10 |     <packaging>pom</packaging>
 11 | 
 12 |     <properties>
 13 |         <java.version>1.8</java.version>
 14 |         <downloadSources>true</downloadSources>
 15 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 16 |         <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 17 |         <spring-cloud-task.version>1.2.0.RELEASE</spring-cloud-task.version>
 18 |         <spring-boot-starter.version>1.5.2.RELEASE</spring-boot-starter.version>
 19 |         <spring-cloud-dataflow.version>1.2.1.RELEASE</spring-cloud-dataflow.version>
 20 | 
 21 |         <flink.version>1.6.2</flink.version>
 22 |         <scala.binary.version>2.11</scala.binary.version>
 23 |         <java.version>1.8</java.version>
 24 |         <maven.compiler.source>1.8</maven.compiler.source>
 25 |         <maven.compiler.target>1.8</maven.compiler.target>
 26 | 
 27 |         <downloadSources>true</downloadSources>
 28 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 29 |         <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 30 |     </properties>
 31 | 
 32 |     <parent>
 33 |         <groupId>org.springframework.boot</groupId>
 34 |         <artifactId>spring-boot-starter-parent</artifactId>
 35 |         <version>1.5.2.RELEASE</version>
 36 |     </parent>
 37 |     <!--添加Snapshot存储库-->
 38 |     <repositories>
 39 |         <repository>
 40 |             <id>apache.snapshots</id>
 41 |             <name>Apache Development Snapshot Repository</name>
 42 |             <url>https://repository.apache.org/content/repositories/snapshots/</url>
 43 |             <releases>
 44 |                 <enabled>false</enabled>
 45 |             </releases>
 46 |             <snapshots>
 47 |                 <enabled>true</enabled>
 48 |             </snapshots>
 49 |         </repository>
 50 |     </repositories>
 51 | 
 52 |     <dependencyManagement>
 53 |         <dependencies>
 54 |             <dependency>
 55 |                 <groupId>org.springframework.cloud</groupId>
 56 |                 <artifactId>spring-cloud-dependencies</artifactId>
 57 |                 <version>Dalston.SR3</version>
 58 |                 <type>pom</type>
 59 |                 <scope>import</scope>
 60 |             </dependency>
 61 |             <dependency>
 62 |                 <groupId>org.springframework.cloud.stream.app</groupId>
 63 |                 <artifactId>app-starters-core-dependencies</artifactId>
 64 |                 <version>1.2.0.RELEASE</version>
 65 |                 <type>pom</type>
 66 |                 <scope>import</scope>
 67 |             </dependency>
 68 |         </dependencies>
 69 |     </dependencyManagement>
 70 |     <dependencies>
 71 |         <dependency>
 72 |             <groupId>org.springframework.cloud</groupId>
 73 |             <artifactId>spring-cloud-starter-stream-kafka</artifactId>
 74 |         </dependency>
 75 |         <dependency>
 76 |             <groupId>org.springframework.boot</groupId>
 77 |             <artifactId>spring-boot-configuration-processor</artifactId>
 78 |             <version>${spring-boot-starter.version}</version>
 79 |             <optional>true</optional>
 80 |         </dependency>
 81 |     </dependencies>
 82 | 
 83 |     <build>
 84 |         <plugins>
 85 |             <plugin>
 86 |                 <groupId>org.apache.maven.plugins</groupId>
 87 |                 <artifactId>maven-compiler-plugin</artifactId>
 88 |                 <configuration>
 89 |                     <source>1.8</source>
 90 |                     <target>1.8</target>
 91 |                 </configuration>
 92 |             </plugin>
 93 |         </plugins>
 94 |     </build>
 95 | 
 96 |     <modules>
 97 |         <module>fork-join</module>
 98 |         <module>dataflow-stream-redis-set-processor</module>
 99 |         <module>dataflow-stream-redis-pub-sink</module>
100 |         <module>dataflow-stream-kafka-source</module>
101 |         <module>spark-phoenix</module>
102 |         <module>flink-elasticsearch-sink</module>
103 |         <module>flink-common</module>
104 |         <module>flink-kafka11-sink</module>
105 |         <module>flink-kafka-source</module>
106 |         <module>elasticsearch</module>
107 |         <module>flink-kafka-hbase</module>
108 |         <module>flink-jdbc-hbase</module>
109 |         <module>flink-hdfs</module>
110 |         <module>kafka-stream</module>
111 |         <module>flink-sideoutput</module>
112 |         <module>flink-async-io</module>
113 |     </modules>
114 | </project>


--------------------------------------------------------------------------------
/flink-sideoutput/src/main/java/com/bigdata/study/flinksideoutput/FlinkSideoutputApplication.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinksideoutput;
 2 | 
 3 | import com.bigdata.study.flinksideoutput.process.KeyedTokenizer;
 4 | import com.bigdata.study.flinksideoutput.tag.SideOutputTag;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.common.typeinfo.TypeHint;
 7 | import org.apache.flink.api.common.typeinfo.TypeInformation;
 8 | import org.apache.flink.api.java.functions.KeySelector;
 9 | import org.apache.flink.api.java.tuple.Tuple2;
10 | import org.apache.flink.streaming.api.TimeCharacteristic;
11 | import org.apache.flink.streaming.api.datastream.DataStream;
12 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
13 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
14 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
15 | import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
16 | import org.apache.flink.streaming.api.windowing.time.Time;
17 | 
18 | //@SpringBootApplication
19 | public class FlinkSideoutputApplication {
20 |     public static final String[] WORDS = new String[]{
21 |             "To be, or not to be,--that is the question:--",
22 |             "Whether 'tis nobler in the mind to suffer",
23 |             "The slings and arrows of outrageous fortune",
24 |             "Or to take arms against a sea of troubles,",
25 |             "And by opposing end them?--To die,--to sleep,--",
26 |             "No more; and by a sleep to say we end",
27 |             "The heartache, and the thousand natural shocks",
28 |             "That flesh is heir to,--'tis a consummation",
29 |             "Devoutly to be wish'd. To die,--to sleep;--",
30 |             "To sleep! perchance to dream:--ay, there's the rub;",
31 |             "For in that sleep of death what dreams may come,",
32 |             "When we have shuffled off this mortal coil,",
33 |             "Must give us pause: there's the respect",
34 |             "That makes calamity of so long life;",
35 |             "For who would bear the whips and scorns of time,",
36 |             "The oppressor's wrong, the proud man's contumely,",
37 |             "The pangs of despis'd love, the law's delay,",
38 |             "The insolence of office, and the spurns",
39 |             "That patient merit of the unworthy takes,",
40 |             "When he himself might his quietus make",
41 |             "With a bare bodkin? who would these fardels bear,",
42 |             "To grunt and sweat under a weary life,",
43 |             "But that the dread of something after death,--",
44 |             "The undiscover'd country, from whose bourn",
45 |             "No traveller returns,--puzzles the will,",
46 |             "And makes us rather bear those ills we have",
47 |             "Than fly to others that we know not of?",
48 |             "Thus conscience does make cowards of us all;",
49 |             "And thus the native hue of resolution",
50 |             "Is sicklied o'er with the pale cast of thought;",
51 |             "And enterprises of great pith and moment,",
52 |             "With this regard, their currents turn awry,",
53 |             "And lose the name of action.--Soft you now!",
54 |             "The fair Ophelia!--Nymph, in thy orisons",
55 |             "Be all my sins remember'd."
56 |     };
57 | 
58 |     /**
59 |      * 使用flink 的侧输出流sideoutput
60 |      */
61 |     public static void main(String[] args) {
62 | //        SpringApplication.run(FlinkSideoutputApplication.class, args);
63 | 
64 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
65 |         env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
66 |         DataStreamSource<String> textStream = env.fromElements(WORDS);
67 |         SingleOutputStreamOperator<Tuple2<String, Integer>> process = textStream.keyBy(new KeySelector<String, Integer>() {
68 |             @Override
69 |             public Integer getKey(String s) throws Exception {
70 |                 return 0;
71 |             }
72 |         }).process(new KeyedTokenizer(), TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {
73 |         }));
74 |         env.getConfig().disableSysoutLogging();
75 |         //侧输出
76 |         DataStream<String> sideoutputStream = process.getSideOutput(SideOutputTag.wordTag)
77 |                 .map(new MapFunction<String, String>() {
78 |                     @Override
79 |                     public String map(String s) throws Exception {
80 |                         return "rejected:" + s;
81 |                     }
82 |                 });
83 |         DataStream<Tuple2<String, Integer>> counts = process.keyBy(0)
84 |                 .window(TumblingEventTimeWindows.of(Time.seconds(5)))
85 |                 .sum(1);
86 | 
87 |         counts.print();
88 |         sideoutputStream.print();
89 |         try {
90 |             env.execute("Streaming wordcount sideoutput");
91 |         } catch (Exception e) {
92 |             e.printStackTrace();
93 |         }
94 |     }
95 | 
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/dataflow-stream-kafka-source/src/main/java/com/bigdata/study/dataflowstreamkafkasource/utils/JsonMapper.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.dataflowstreamkafkasource.utils;
  2 | 
  3 | 
  4 | import com.fasterxml.jackson.annotation.JsonInclude;
  5 | import com.fasterxml.jackson.core.JsonProcessingException;
  6 | import com.fasterxml.jackson.databind.DeserializationFeature;
  7 | import com.fasterxml.jackson.databind.JavaType;
  8 | import com.fasterxml.jackson.databind.ObjectMapper;
  9 | import com.fasterxml.jackson.databind.SerializationFeature;
 10 | import com.fasterxml.jackson.databind.util.JSONPObject;
 11 | import org.slf4j.Logger;
 12 | import org.slf4j.LoggerFactory;
 13 | import org.springframework.util.StringUtils;
 14 | 
 15 | import java.io.IOException;
 16 | import java.util.Collection;
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * Created by wjm on 2017/9/6.
 21 |  */
 22 | 
 23 | public class JsonMapper extends ObjectMapper {
 24 | 
 25 |     private static Logger logger = LoggerFactory.getLogger(JsonMapper.class);
 26 | 
 27 |     public static final JsonMapper INSTANCE = new JsonMapper();
 28 | 
 29 |     private ObjectMapper mapper;
 30 | 
 31 |     public JsonMapper() {
 32 |         this(null);
 33 |     }
 34 | 
 35 |     public JsonMapper(JsonInclude.Include include) {
 36 |         mapper = new ObjectMapper();
 37 |         // 设置输出时包含属性的风格
 38 |         if (include != null) {
 39 |             mapper.setSerializationInclusion(include);
 40 |         }
 41 |         // 设置输入时忽略在JSON字符串中存在但Java对象实际没有的属性
 42 |         mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
 43 |     }
 44 | 
 45 |     /**
 46 |      * 创建只输出非Null的属性到Json字符串的Mapper.
 47 |      */
 48 |     public static JsonMapper nonNullMapper() {
 49 |         return new JsonMapper(JsonInclude.Include.NON_NULL);
 50 |     }
 51 | 
 52 |     /**
 53 |      * 创建只输出非Null且非Empty(如List.isEmpty)的属性到Json字符串的Mapper.
 54 |      *
 55 |      * 注意，要小心使用, 特别留意empty的情况.
 56 |      */
 57 |     public static JsonMapper nonEmptyMapper() {
 58 |         return new JsonMapper(JsonInclude.Include.NON_EMPTY);
 59 |     }
 60 | 
 61 |     /**
 62 |      * 默认的全部输出的Mapper, 区别于INSTANCE，可以做进一步的配置
 63 |      */
 64 |     public static JsonMapper defaultMapper() {
 65 |         return new JsonMapper();
 66 |     }
 67 | 
 68 |     /**
 69 |      * Object可以是POJO，也可以是Collection或数组。 如果对象为Null, 返回"null". 如果集合为空集合, 返回"[]".
 70 |      */
 71 |     public String toJson(Object object) throws JsonProcessingException {
 72 | 
 73 | 
 74 |             return mapper.writeValueAsString(object);
 75 | 
 76 |     }
 77 | 
 78 |     /**
 79 |      * 反序列化POJO或简单Collection如List<String>.
 80 |      *
 81 |      * 如果JSON字符串为Null或"null"字符串, 返回Null. 如果JSON字符串为"[]", 返回空集合.
 82 |      *
 83 |      * 如需反序列化复杂Collection如List<MyBean>, 请使用fromJson(String, JavaType)
 84 |      *
 85 |      * @see #fromJson(String, JavaType)
 86 |      */
 87 |     public <T> T fromJson( String jsonString, Class<T> clazz) throws IOException {
 88 |         if (StringUtils.isEmpty(jsonString)) {
 89 |             return null;
 90 |         }
 91 | 
 92 |             return mapper.readValue(jsonString, clazz);
 93 | 
 94 |     }
 95 | 
 96 |     /**
 97 |      * 反序列化复杂Collection如List<Bean>, contructCollectionType()或contructMapType()构造类型, 然后调用本函数.
 98 |      *
 99 |      */
100 |     public <T> T fromJson( String jsonString, JavaType javaType) throws IOException {
101 |         if (StringUtils.isEmpty(jsonString)) {
102 |             return null;
103 |         }
104 | 
105 | 
106 |             return (T) mapper.readValue(jsonString, javaType);
107 | 
108 |     }
109 | 
110 |     /**
111 |      * 构造Collection类型.
112 |      */
113 |     public JavaType buildCollectionType(Class<? extends Collection> collectionClass, Class<?> elementClass) {
114 |         return mapper.getTypeFactory().constructCollectionType(collectionClass, elementClass);
115 |     }
116 | 
117 |     /**
118 |      * 构造Map类型.
119 |      */
120 |     public JavaType buildMapType(Class<? extends Map> mapClass, Class<?> keyClass, Class<?> valueClass) {
121 |         return mapper.getTypeFactory().constructMapType(mapClass, keyClass, valueClass);
122 |     }
123 | 
124 |     /**
125 |      * 当JSON里只含有Bean的部分属性時，更新一個已存在Bean，只覆盖該部分的属性.
126 |      */
127 |     public void update(String jsonString, Object object) throws IOException {
128 | 
129 |             mapper.readerForUpdating(object).readValue(jsonString);
130 | 
131 |     }
132 | 
133 |     /**
134 |      * 輸出JSONP格式數據.
135 |      */
136 |     public String toJsonP(String functionName, Object object) throws JsonProcessingException {
137 |         return toJson(new JSONPObject(functionName, object));
138 |     }
139 | 
140 |     /**
141 |      * 設定是否使用Enum的toString函數來讀寫Enum, 為False時時使用Enum的name()函數來讀寫Enum, 默認為False. 注意本函數一定要在Mapper創建後, 所有的讀寫動作之前調用.
142 |      */
143 |     public void enableEnumUseToString() {
144 |         mapper.enable(SerializationFeature.WRITE_ENUMS_USING_TO_STRING);
145 |         mapper.enable(DeserializationFeature.READ_ENUMS_USING_TO_STRING);
146 |     }
147 | 
148 |     /**
149 |      * 取出Mapper做进一步的设置或使用其他序列化API.
150 |      */
151 |     public ObjectMapper getMapper() {
152 |         return mapper;
153 |     }
154 | 
155 | 
156 | 
157 | }


--------------------------------------------------------------------------------
/dataflow-stream-redis-set-processor/src/main/java/com/bigdata/study/dataflowstreamredissetprocessor/utils/JsonMapper.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.dataflowstreamredissetprocessor.utils;
  2 | 
  3 | 
  4 | import com.fasterxml.jackson.annotation.JsonInclude;
  5 | import com.fasterxml.jackson.core.JsonProcessingException;
  6 | import com.fasterxml.jackson.databind.DeserializationFeature;
  7 | import com.fasterxml.jackson.databind.JavaType;
  8 | import com.fasterxml.jackson.databind.ObjectMapper;
  9 | import com.fasterxml.jackson.databind.SerializationFeature;
 10 | import com.fasterxml.jackson.databind.util.JSONPObject;
 11 | import org.slf4j.Logger;
 12 | import org.slf4j.LoggerFactory;
 13 | import org.springframework.util.StringUtils;
 14 | 
 15 | import java.io.IOException;
 16 | import java.util.Collection;
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * Created by wjm on 2017/9/6.
 21 |  */
 22 | 
 23 | public class JsonMapper extends ObjectMapper {
 24 | 
 25 |     private static Logger logger = LoggerFactory.getLogger(JsonMapper.class);
 26 | 
 27 |     public static final JsonMapper INSTANCE = new JsonMapper();
 28 | 
 29 |     private ObjectMapper mapper;
 30 | 
 31 |     public JsonMapper() {
 32 |         this(null);
 33 |     }
 34 | 
 35 |     public JsonMapper(JsonInclude.Include include) {
 36 |         mapper = new ObjectMapper();
 37 |         // 设置输出时包含属性的风格
 38 |         if (include != null) {
 39 |             mapper.setSerializationInclusion(include);
 40 |         }
 41 |         // 设置输入时忽略在JSON字符串中存在但Java对象实际没有的属性
 42 |         mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
 43 |     }
 44 | 
 45 |     /**
 46 |      * 创建只输出非Null的属性到Json字符串的Mapper.
 47 |      */
 48 |     public static JsonMapper nonNullMapper() {
 49 |         return new JsonMapper(JsonInclude.Include.NON_NULL);
 50 |     }
 51 | 
 52 |     /**
 53 |      * 创建只输出非Null且非Empty(如List.isEmpty)的属性到Json字符串的Mapper.
 54 |      *
 55 |      * 注意，要小心使用, 特别留意empty的情况.
 56 |      */
 57 |     public static JsonMapper nonEmptyMapper() {
 58 |         return new JsonMapper(JsonInclude.Include.NON_EMPTY);
 59 |     }
 60 | 
 61 |     /**
 62 |      * 默认的全部输出的Mapper, 区别于INSTANCE，可以做进一步的配置
 63 |      */
 64 |     public static JsonMapper defaultMapper() {
 65 |         return new JsonMapper();
 66 |     }
 67 | 
 68 |     /**
 69 |      * Object可以是POJO，也可以是Collection或数组。 如果对象为Null, 返回"null". 如果集合为空集合, 返回"[]".
 70 |      */
 71 |     public String toJson(Object object) throws JsonProcessingException {
 72 | 
 73 | 
 74 |             return mapper.writeValueAsString(object);
 75 | 
 76 |     }
 77 | 
 78 |     /**
 79 |      * 反序列化POJO或简单Collection如List<String>.
 80 |      *
 81 |      * 如果JSON字符串为Null或"null"字符串, 返回Null. 如果JSON字符串为"[]", 返回空集合.
 82 |      *
 83 |      * 如需反序列化复杂Collection如List<MyBean>, 请使用fromJson(String, JavaType)
 84 |      *
 85 |      * @see #fromJson(String, JavaType)
 86 |      */
 87 |     public <T> T fromJson( String jsonString, Class<T> clazz) throws IOException {
 88 |         if (StringUtils.isEmpty(jsonString)) {
 89 |             return null;
 90 |         }
 91 | 
 92 |             return mapper.readValue(jsonString, clazz);
 93 | 
 94 |     }
 95 | 
 96 |     /**
 97 |      * 反序列化复杂Collection如List<Bean>, contructCollectionType()或contructMapType()构造类型, 然后调用本函数.
 98 |      *
 99 |      */
100 |     public <T> T fromJson( String jsonString, JavaType javaType) throws IOException {
101 |         if (StringUtils.isEmpty(jsonString)) {
102 |             return null;
103 |         }
104 | 
105 | 
106 |             return (T) mapper.readValue(jsonString, javaType);
107 | 
108 |     }
109 | 
110 |     /**
111 |      * 构造Collection类型.
112 |      */
113 |     public JavaType buildCollectionType(Class<? extends Collection> collectionClass, Class<?> elementClass) {
114 |         return mapper.getTypeFactory().constructCollectionType(collectionClass, elementClass);
115 |     }
116 | 
117 |     /**
118 |      * 构造Map类型.
119 |      */
120 |     public JavaType buildMapType(Class<? extends Map> mapClass, Class<?> keyClass, Class<?> valueClass) {
121 |         return mapper.getTypeFactory().constructMapType(mapClass, keyClass, valueClass);
122 |     }
123 | 
124 |     /**
125 |      * 当JSON里只含有Bean的部分属性時，更新一個已存在Bean，只覆盖該部分的属性.
126 |      */
127 |     public void update(String jsonString, Object object) throws IOException {
128 | 
129 |             mapper.readerForUpdating(object).readValue(jsonString);
130 | 
131 |     }
132 | 
133 |     /**
134 |      * 輸出JSONP格式數據.
135 |      */
136 |     public String toJsonP(String functionName, Object object) throws JsonProcessingException {
137 |         return toJson(new JSONPObject(functionName, object));
138 |     }
139 | 
140 |     /**
141 |      * 設定是否使用Enum的toString函數來讀寫Enum, 為False時時使用Enum的name()函數來讀寫Enum, 默認為False. 注意本函數一定要在Mapper創建後, 所有的讀寫動作之前調用.
142 |      */
143 |     public void enableEnumUseToString() {
144 |         mapper.enable(SerializationFeature.WRITE_ENUMS_USING_TO_STRING);
145 |         mapper.enable(DeserializationFeature.READ_ENUMS_USING_TO_STRING);
146 |     }
147 | 
148 |     /**
149 |      * 取出Mapper做进一步的设置或使用其他序列化API.
150 |      */
151 |     public ObjectMapper getMapper() {
152 |         return mapper;
153 |     }
154 | 
155 | 
156 | 
157 | }


--------------------------------------------------------------------------------
/flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/zip/FlinkHdfsZip.java:
--------------------------------------------------------------------------------
 1 | package com.bigdata.study.flinkhdfs.zip;
 2 | 
 3 | import org.apache.commons.lang3.StringUtils;
 4 | import org.apache.flink.api.common.functions.FlatMapFunction;
 5 | import org.apache.flink.api.common.functions.MapFunction;
 6 | import org.apache.flink.api.common.functions.ReduceFunction;
 7 | import org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormat;
 8 | import org.apache.flink.api.java.tuple.Tuple2;
 9 | import org.apache.flink.streaming.api.datastream.DataStream;
10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
11 | import org.apache.flink.streaming.api.windowing.time.Time;
12 | import org.apache.flink.util.Collector;
13 | import org.apache.hadoop.fs.Path;
14 | import org.apache.hadoop.io.IntWritable;
15 | import org.apache.hadoop.io.SequenceFile;
16 | import org.apache.hadoop.io.Text;
17 | import org.apache.hadoop.io.compress.GzipCodec;
18 | import org.apache.hadoop.mapred.FileOutputFormat;
19 | import org.apache.hadoop.mapred.JobConf;
20 | import org.apache.hadoop.mapred.TextOutputFormat;
21 | 
22 | /**
23 |  * 使用Flink内置sink API将数据以压缩的格式写入到HDFS上
24 |  * 将数据以gz压缩格式将处理后的数据写入到HDFS上
25 |  **/
26 | public class FlinkHdfsZip {
27 | 
28 |     public static void main(String[] args) {
29 |         String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt";
30 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
31 |         env.setParallelism(2);
32 |         DataStream<String> dataStreamSource = env.readTextFile(file_input);
33 |         DataStream<Tuple2<String, Integer>> reduce = dataStreamSource.filter(StringUtils::isNotBlank)
34 |                 .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
35 |                     @Override
36 |                     public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
37 |                         String[] words = s.toLowerCase().split("\\W+");
38 |                         for (String word : words) {
39 |                             if (word.length() > 0) {
40 |                                 Tuple2<String, Integer> tuple2 = new Tuple2<>();
41 |                                 tuple2.f0 = word;
42 |                                 tuple2.f1 = 1;
43 |                                 collector.collect(tuple2);
44 |                             }
45 |                         }
46 |                     }
47 |                 }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction<Tuple2<String, Integer>>() {
48 |                     @Override
49 |                     public Tuple2<String, Integer> reduce(Tuple2<String, Integer> t1, Tuple2<String, Integer> t2) throws Exception {
50 |                         return new Tuple2<>(t1.f0, t1.f1 + t2.f1);
51 |                     }
52 |                 });
53 |         DataStream<Tuple2<IntWritable, Text>> hdfsStream = reduce.flatMap(new FlatMapFunction<Tuple2<String, Integer>, Tuple2<IntWritable, Text>>() {
54 |             @Override
55 |             public void flatMap(Tuple2<String, Integer> in, Collector<Tuple2<IntWritable, Text>> collector) throws Exception {
56 |                 Tuple2<IntWritable, Text> tuple2 = new Tuple2<>();
57 |                 tuple2.f0 = new IntWritable(in.f1);
58 |                 tuple2.f1 = new Text(in.f0);
59 |                 collector.collect(tuple2);
60 |             }
61 |         });
62 |         DataStream<Tuple2<Text, IntWritable>> outStream = hdfsStream.map(new MapFunction<Tuple2<IntWritable, Text>, Tuple2<Text, IntWritable>>() {
63 |             @Override
64 |             public Tuple2<Text, IntWritable> map(Tuple2<IntWritable, Text> in) throws Exception {
65 |                 Tuple2<Text, IntWritable> tuple2 = new Tuple2<>();
66 |                 tuple2.f0 = in.f1;
67 |                 tuple2.f1 = in.f0;
68 |                 return tuple2;
69 |             }
70 |         });
71 | //        DataStream<Tuple2<Text, IntWritable>> outStream = hdfsStream.flatMap(new FlatMapFunction<Tuple2<IntWritable, Text>, Tuple2<Text, IntWritable>>() {
72 | //            @Override
73 | //            public void flatMap(Tuple2<IntWritable, Text> in, Collector<Tuple2<Text, IntWritable>> collector) throws Exception {
74 | //                Tuple2<Text, IntWritable> tuple2 = new Tuple2<>();
75 | //                tuple2.f0 = in.f1;
76 | //                tuple2.f1 = in.f0;
77 | //                collector.collect(tuple2);
78 | //            }
79 | //        });
80 |         //用gz格式压缩文件
81 |         HadoopOutputFormat<Text, IntWritable> hadoopOutputFormat = new HadoopOutputFormat<>(new TextOutputFormat<>(), new JobConf());
82 |         hadoopOutputFormat.getJobConf().set("mapred.textoutputformat.separator", " ");
83 |         hadoopOutputFormat.getJobConf().setCompressMapOutput(true);
84 |         hadoopOutputFormat.getJobConf().set("mapred.output.compress", "true");
85 |         hadoopOutputFormat.getJobConf().setMapOutputCompressorClass(GzipCodec.class);
86 |         //GzipCodec.class.getCanonicalName() 获取类名，等于getName()
87 |         hadoopOutputFormat.getJobConf().set("mapred.output.compression.codec", GzipCodec.class.getCanonicalName());
88 |         hadoopOutputFormat.getJobConf().set("mapred.output.compression.type", SequenceFile.CompressionType.BLOCK.toString());
89 |         FileOutputFormat.setOutputPath(hadoopOutputFormat.getJobConf(), new Path("/tmp/data/"));
90 |         outStream.writeUsingOutputFormat(hadoopOutputFormat);
91 |         try {
92 |             env.execute("Hadoop Compat WordCount");
93 |         } catch (Exception e) {
94 |             e.printStackTrace();
95 |         }
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/flink-elasticsearch-sink/src/main/java/com/bigdata/study/flinkelasticsearchsink/FlinkElasticsearchSinkApplication.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.flinkelasticsearchsink;
  2 | 
  3 | import com.bigdata.study.flinkelasticsearchsink.handler.FlinkFailHandler;
  4 | import constant.PropertiesConstants;
  5 | import model.Metrics;
  6 | import org.apache.commons.lang3.StringUtils;
  7 | import org.apache.flink.api.common.functions.RuntimeContext;
  8 | import org.apache.flink.api.java.utils.ParameterTool;
  9 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 10 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 11 | import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkBase;
 12 | import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSinkFunction;
 13 | import org.apache.flink.streaming.connectors.elasticsearch.RequestIndexer;
 14 | import org.apache.flink.streaming.connectors.elasticsearch6.ElasticsearchSink;
 15 | import org.apache.http.HttpHost;
 16 | import org.elasticsearch.action.index.IndexRequest;
 17 | import org.elasticsearch.client.Requests;
 18 | import org.elasticsearch.common.xcontent.XContent;
 19 | import org.elasticsearch.common.xcontent.XContentBuilder;
 20 | import org.elasticsearch.common.xcontent.XContentType;
 21 | import org.springframework.boot.SpringApplication;
 22 | import org.springframework.boot.autoconfigure.SpringBootApplication;
 23 | import utils.ExecutionEnvUtil;
 24 | import utils.GsonUtils;
 25 | import utils.KafkaUtils;
 26 | 
 27 | import java.io.IOException;
 28 | import java.net.MalformedURLException;
 29 | import java.net.URI;
 30 | import java.net.URISyntaxException;
 31 | import java.net.URL;
 32 | import java.util.ArrayList;
 33 | import java.util.List;
 34 | 
 35 | /**
 36 |  * flink从kafka中读取数据，经过flink处理后写入es6中
 37 |  */
 38 | @SpringBootApplication
 39 | public class FlinkElasticsearchSinkApplication {
 40 | 
 41 |     public static void main(String[] args) {
 42 | //        SpringApplication.run(FlinkElasticsearchSinkApplication.class, args);
 43 |         try {
 44 |             ParameterTool parameterPool = ExecutionEnvUtil.createParameterPool(args);
 45 |             StreamExecutionEnvironment environment = ExecutionEnvUtil.prepare(parameterPool);
 46 |             DataStreamSource<Metrics> dataStreamSource = KafkaUtils.buildSource(environment);
 47 |             List<HttpHost> httpHosts = parseEsHost(parameterPool.get(PropertiesConstants.ELASTICSEARCH_HOSTS));
 48 |             int bulkSize = parameterPool.getInt(PropertiesConstants.ELASTICSEARCH_BULK_FLUSH_MAX_ACTIONS, 40);
 49 |             int parallelism = parameterPool.getInt(PropertiesConstants.STREAM_SINK_PARALLELISM, 4);
 50 |             ElasticsearchSink.Builder<Metrics> builder = new ElasticsearchSink.Builder<>(httpHosts, new ElasticsearchSinkFunction<Metrics>() {
 51 |                 @Override
 52 |                 public void process(Metrics metrics, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
 53 |                     requestIndexer.add(Requests.indexRequest()
 54 |                             .index("flink_" + metrics.getName())
 55 |                             .type("document")
 56 |                             .source(GsonUtils.toJsonBytes(metrics), XContentType.JSON));
 57 |                 }
 58 |             });
 59 |             //复杂配置
 60 | 
 61 |             //是否开启重试机制
 62 |             builder.setBulkFlushBackoff(true);
 63 |             //重试策略
 64 |             //CONSTANT 常数型，表示多次重试之间的时间间隔为固定常数。eg:2 -> 2 -> 2 ...
 65 |             builder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.CONSTANT);
 66 |             //指数型，表示多次重试之间的时间间隔按照指数方式进行增长。eg:2 -> 4 -> 8 ...
 67 | //            builder.setBulkFlushBackoffType(ElasticsearchSinkBase.FlushBackoffType.EXPONENTIAL);
 68 |             //进行重试的时间间隔。对于指数型则表示起始的基数
 69 |             builder.setBulkFlushBackoffDelay(2);
 70 |             //失败重试次数
 71 |             builder.setBulkFlushBackoffRetries(3);
 72 | 
 73 |             //批量写入时的最大写入条数
 74 |             builder.setBulkFlushMaxActions(bulkSize);
 75 |             //批量写入时的最大数据量
 76 |             builder.setBulkFlushMaxSizeMb(10);
 77 | 
 78 |             //想要使用EsSink的失败重试机制，则需要通过env.enableCheckpoint()方法来开启Flink任务对checkpoint的支持，
 79 |             // 如果没有开启checkpoint机制的话，则失败重试策略是无法生效的
 80 |             boolean checkpoint = parameterPool.getBoolean(PropertiesConstants.STREAM_CHECKPOINT_ENABLE);
 81 |             if (checkpoint) {
 82 |                 //设置失败策略
 83 |                 builder.setFailureHandler(new FlinkFailHandler());
 84 |             }
 85 | 
 86 |             dataStreamSource.addSink(builder.build()).setParallelism(parallelism);
 87 |             environment.execute("flink connectors es6");
 88 |         } catch (Exception e) {
 89 |             e.printStackTrace();
 90 |         }
 91 |     }
 92 | 
 93 |     private static List<HttpHost> parseEsHost(String hosts) throws MalformedURLException {
 94 |         String[] hostArray = hosts.split(",");
 95 |         List<HttpHost> httpHosts = new ArrayList<>();
 96 |         for (String host : hostArray) {
 97 |             if (StringUtils.startsWith(host, "http:")) {
 98 |                 URL url = new URL(host);
 99 |                 httpHosts.add(new HttpHost(url.getHost(), url.getPort()));
100 |             } else {
101 |                 String[] parts = host.split(":", 2);
102 |                 if (parts.length > 1) {
103 |                     httpHosts.add(new HttpHost(parts[0], Integer.parseInt(parts[1])));
104 |                 } else {
105 |                     throw new MalformedURLException("invalid elasticsearch hosts exception!");
106 |                 }
107 |             }
108 |         }
109 |         return httpHosts;
110 |     }
111 | }
112 | 
113 | 


--------------------------------------------------------------------------------
/flink-async-io/src/main/java/com/bigdata/study/flinkasyncio/FlinkAsyncIoApplication.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.flinkasyncio;
  2 | 
  3 | import com.bigdata.study.flinkasyncio.async.AsyncDataBaseRequest;
  4 | import com.bigdata.study.flinkasyncio.source.SimpleSource;
  5 | import org.apache.commons.lang3.StringUtils;
  6 | import org.apache.flink.api.common.functions.FlatMapFunction;
  7 | import org.apache.flink.api.java.tuple.Tuple2;
  8 | import org.apache.flink.api.java.utils.ParameterTool;
  9 | import org.apache.flink.runtime.state.filesystem.FsStateBackend;
 10 | import org.apache.flink.streaming.api.CheckpointingMode;
 11 | import org.apache.flink.streaming.api.TimeCharacteristic;
 12 | import org.apache.flink.streaming.api.datastream.AsyncDataStream;
 13 | import org.apache.flink.streaming.api.datastream.DataStream;
 14 | import org.apache.flink.streaming.api.datastream.DataStreamSource;
 15 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 16 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 17 | import org.apache.flink.streaming.api.functions.async.AsyncFunction;
 18 | import org.apache.flink.util.Collector;
 19 | import org.slf4j.Logger;
 20 | import org.slf4j.LoggerFactory;
 21 | 
 22 | import java.util.concurrent.TimeUnit;
 23 | 
 24 | /**
 25 |  * 使用async IO
 26 |  */
 27 | //@SpringBootApplication
 28 | public class FlinkAsyncIoApplication {
 29 | 
 30 |     private static final Logger LOG = LoggerFactory.getLogger(FlinkAsyncIoApplication.class);
 31 | 
 32 |     public static void main(String[] args) {
 33 | //        SpringApplication.run(FlinkAsyncIoApplication.class, args);
 34 | 
 35 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 36 |         ParameterTool params = ParameterTool.fromArgs(args);
 37 |         String statePath = null;
 38 |         String cpMode = null;
 39 |         int maxCount = 0;
 40 |         long sleepFactor = 0;
 41 |         float failRatio = 0;
 42 |         String mode = null;
 43 |         int taskNum = 0;
 44 |         String timeType = null;
 45 |         long shutdownWaitTS = 0;
 46 |         long timeout = 0;
 47 | 
 48 |         try {
 49 |             // check the configuration for the job
 50 |             statePath = params.get("fsStatePath", null);
 51 |             cpMode = params.get("checkpointMode", "exactly_once");
 52 |             maxCount = params.getInt("maxCount", 100000);
 53 |             sleepFactor = params.getLong("sleepFactor", 100);
 54 |             failRatio = params.getFloat("failRatio", 0.001f);
 55 |             mode = params.get("waitMode", "ordered");
 56 |             taskNum = params.getInt("waitOperatorParallelism", 1);
 57 |             timeType = params.get("eventType", "EventTime");
 58 |             shutdownWaitTS = params.getLong("shutdownWaitTS", 20000);
 59 |             timeout = params.getLong("timeout", 10000L);
 60 |         } catch (Exception e) {
 61 |             e.printStackTrace();
 62 |         }
 63 | 
 64 |         StringBuilder configStringBuilder = new StringBuilder();
 65 | 
 66 |         final String lineSeparator = System.getProperty("line.separator");
 67 | 
 68 |         configStringBuilder
 69 |                 .append("Job configuration").append(lineSeparator)
 70 |                 .append("FS state path=").append(statePath).append(lineSeparator)
 71 |                 .append("Checkpoint mode=").append(cpMode).append(lineSeparator)
 72 |                 .append("Max count of input from source=").append(maxCount).append(lineSeparator)
 73 |                 .append("Sleep factor=").append(sleepFactor).append(lineSeparator)
 74 |                 .append("Fail ratio=").append(failRatio).append(lineSeparator)
 75 |                 .append("Waiting mode=").append(mode).append(lineSeparator)
 76 |                 .append("Parallelism for async wait operator=").append(taskNum).append(lineSeparator)
 77 |                 .append("Event type=").append(timeType).append(lineSeparator)
 78 |                 .append("Shutdown wait timestamp=").append(shutdownWaitTS);
 79 | 
 80 |         LOG.info(configStringBuilder.toString());
 81 | 
 82 |         if (StringUtils.isNotBlank(statePath)) {
 83 |             env.setStateBackend(new FsStateBackend(statePath));
 84 |         }
 85 |         if ("exactly_once".equals(cpMode)) {
 86 |             env.enableCheckpointing(1000L, CheckpointingMode.EXACTLY_ONCE);
 87 |         } else {
 88 |             env.enableCheckpointing(1000L, CheckpointingMode.AT_LEAST_ONCE);
 89 |         }
 90 | 
 91 |         if ("event_time".equals(timeType)) {
 92 |             env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 93 |         } else if ("ingestion_time".equals(timeType)) {
 94 |             env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
 95 |         } else {
 96 |             env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
 97 |         }
 98 | 
 99 |         DataStream<Integer> inputStream = env.addSource(new SimpleSource(maxCount));
100 |         AsyncFunction<Integer, String> asyncFunction = new AsyncDataBaseRequest(sleepFactor, failRatio, shutdownWaitTS);
101 | 
102 |         DataStream<String> result;
103 |         if ("ordered".equals(mode)) {
104 |             result = AsyncDataStream.orderedWait(inputStream, asyncFunction, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
105 |         } else {
106 |             result = AsyncDataStream.unorderedWait(inputStream, asyncFunction, timeout, TimeUnit.MILLISECONDS, 20).setParallelism(taskNum);
107 |         }
108 | 
109 |         DataStream<Tuple2<String, Integer>> outputStream = result.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
110 |             @Override
111 |             public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
112 |                 collector.collect(new Tuple2<>(s, 1));
113 |             }
114 |         });
115 |         DataStream<Tuple2<String, Integer>> sum = outputStream.keyBy(0).sum(1);
116 |         sum.print();
117 |         try {
118 |             env.execute("flink async io example");
119 |         } catch (Exception e) {
120 |             e.printStackTrace();
121 |         }
122 |     }
123 | 
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/flink-hdfs/src/main/java/com/bigdata/study/flinkhdfs/core/FlinkHdfs.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.flinkhdfs.core;
  2 | 
  3 | import com.bigdata.study.flinkhdfs.utils.HadoopConfig;
  4 | import com.bigdata.study.flinkhdfs.utils.HadoopHelper;
  5 | import com.fasterxml.jackson.databind.ObjectMapper;
  6 | import org.apache.commons.lang3.StringUtils;
  7 | import org.apache.commons.lang3.time.DateFormatUtils;
  8 | import org.apache.flink.api.common.functions.FlatMapFunction;
  9 | import org.apache.flink.api.common.functions.ReduceFunction;
 10 | import org.apache.flink.api.java.tuple.Tuple2;
 11 | import org.apache.flink.streaming.api.TimeCharacteristic;
 12 | import org.apache.flink.streaming.api.datastream.DataStream;
 13 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 14 | import org.apache.flink.streaming.api.windowing.time.Time;
 15 | import org.apache.flink.streaming.connectors.fs.Clock;
 16 | import org.apache.flink.streaming.connectors.fs.StringWriter;
 17 | import org.apache.flink.streaming.connectors.fs.bucketing.Bucketer;
 18 | import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink;
 19 | import org.apache.flink.util.Collector;
 20 | import org.apache.hadoop.conf.Configuration;
 21 | import org.apache.hadoop.fs.FileSystem;
 22 | import org.apache.hadoop.fs.Path;
 23 | import org.apache.hadoop.io.IntWritable;
 24 | import org.apache.hadoop.io.Text;
 25 | import org.springframework.beans.factory.annotation.Autowired;
 26 | import org.springframework.boot.CommandLineRunner;
 27 | 
 28 | import java.io.File;
 29 | import java.io.IOException;
 30 | import java.util.Date;
 31 | import java.util.Map;
 32 | 
 33 | /**
 34 |  * @Description
 35 |  * @Author hasee
 36 |  * @Date 2019/1/7
 37 |  **/
 38 | public class FlinkHdfs implements CommandLineRunner {
 39 | 
 40 |     @Autowired
 41 |     private HadoopConfig hadoopConfig;
 42 | 
 43 |     @Override
 44 |     public void run(String... strings) throws Exception {
 45 |         String file_input = "C:\\Users\\hasee\\Desktop\\spark.txt";
 46 |         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 47 |         env.enableCheckpointing(10000);
 48 |         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
 49 |         env.setParallelism(2);
 50 |         DataStream<String> dataStreamSource = env.readTextFile(file_input);
 51 |         DataStream<Tuple2<String, Integer>> reduce = dataStreamSource.filter(StringUtils::isNotBlank)
 52 |                 .flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
 53 |                     @Override
 54 |                     public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
 55 |                         String[] words = s.toLowerCase().split("\\W+");
 56 |                         for (String word : words) {
 57 |                             if (word.length() > 0) {
 58 |                                 Tuple2<String, Integer> tuple2 = new Tuple2<>();
 59 |                                 tuple2.f0 = word;
 60 |                                 tuple2.f1 = 1;
 61 |                                 collector.collect(tuple2);
 62 |                             }
 63 |                         }
 64 |                     }
 65 |                 }).keyBy(0).timeWindow(Time.seconds(30)).reduce(new ReduceFunction<Tuple2<String, Integer>>() {
 66 |                     @Override
 67 |                     public Tuple2<String, Integer> reduce(Tuple2<String, Integer> t1, Tuple2<String, Integer> t2) throws Exception {
 68 |                         return new Tuple2<>(t1.f0, t1.f1 + t2.f1);
 69 |                     }
 70 |                 });
 71 |         DataStream<Tuple2<IntWritable, Text>> hdfsStream = reduce.flatMap(new FlatMapFunction<Tuple2<String, Integer>, Tuple2<IntWritable, Text>>() {
 72 |             @Override
 73 |             public void flatMap(Tuple2<String, Integer> in, Collector<Tuple2<IntWritable, Text>> collector) throws Exception {
 74 |                 Tuple2<IntWritable, Text> tuple2 = new Tuple2<>();
 75 |                 tuple2.f0 = new IntWritable(in.f1);
 76 |                 tuple2.f1 = new Text(in.f0);
 77 |                 collector.collect(tuple2);
 78 |             }
 79 |         });
 80 |         DataStream<String> outStream = hdfsStream.flatMap(new FlatMapFunction<Tuple2<IntWritable, Text>, String>() {
 81 |             @Override
 82 |             public void flatMap(Tuple2<IntWritable, Text> in, Collector<String> collector) throws Exception {
 83 |                 StringBuilder builder = new StringBuilder();
 84 |                 String name = in.f1.toString();
 85 |                 int num = in.f0.get();
 86 |                 builder.append(name).append("\t").append(num).append("\n");
 87 |                 collector.collect(builder.toString());
 88 |             }
 89 |         });
 90 |         BucketingSink<String> bucketingSink = new BucketingSink<>("/base/path");
 91 | //        bucketingSink.setBucketer(new DateTimeBucketer<>("yyyy-MM-dd--HHmm"));
 92 |         //使用自定义分桶
 93 |         bucketingSink.setBucketer(new DateHourBucketer());
 94 |         bucketingSink.setWriter(new StringWriter<>());
 95 |         bucketingSink.setBatchSize(1024 * 1024 * 4);
 96 |         bucketingSink.setBatchRolloverInterval(Integer.MAX_VALUE);
 97 |         bucketingSink.setInactiveBucketCheckInterval(60);
 98 |         bucketingSink.setInactiveBucketThreshold(60);
 99 |         HadoopHelper hadoopHelper = new HadoopHelper(hadoopConfig);
100 |         Configuration configuration = hadoopHelper.getConfig();
101 |         bucketingSink.setFSConfig(configuration);
102 |         outStream.addSink(bucketingSink);
103 |     }
104 | 
105 |     private static ObjectMapper mapper = new ObjectMapper();
106 | 
107 |     /**
108 |      * 自定义hdfs分桶规则
109 |      */
110 |     private class DateHourBucketer implements Bucketer<String> {
111 |         @Override
112 |         public Path getBucketPath(Clock clock, Path path, String s) {
113 |             try {
114 |                 Map map = mapper.readValue(s, Map.class);
115 |                 Long timeStamp = (Long) map.get("TimeStamp");
116 |                 Date date = new Date(timeStamp);
117 |                 String format = DateFormatUtils.format(date, "yyyy-MM-dd--HH");
118 |                 return new Path(path + File.separator + format);
119 |             } catch (IOException e) {
120 |                 e.printStackTrace();
121 |             }
122 |             return null;
123 |         }
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/spark-phoenix/src/main/java/com/bigdata/study/sparkphoenix/apps/SparkPhoenixReadHbase.java:
--------------------------------------------------------------------------------
  1 | package com.bigdata.study.sparkphoenix.apps;
  2 | 
  3 | import com.bigdata.study.sparkphoenix.utils.PhoenixUtil;
  4 | import org.apache.hadoop.conf.Configuration;
  5 | import org.apache.hadoop.hbase.HBaseConfiguration;
  6 | import org.apache.hadoop.hbase.client.Result;
  7 | import org.apache.hadoop.hbase.client.Scan;
  8 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
  9 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
 10 | import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 11 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
 12 | import org.apache.hadoop.hbase.util.Base64;
 13 | import org.apache.hadoop.hbase.util.Bytes;
 14 | import org.apache.spark.SparkConf;
 15 | import org.apache.spark.SparkContext;
 16 | import org.apache.spark.api.java.JavaPairRDD;
 17 | import org.apache.spark.api.java.JavaRDD;
 18 | import org.apache.spark.api.java.JavaSparkContext;
 19 | import org.apache.spark.api.java.function.Function;
 20 | import org.apache.spark.sql.Dataset;
 21 | import org.apache.spark.sql.Row;
 22 | import org.apache.spark.sql.RowFactory;
 23 | import org.apache.spark.sql.SparkSession;
 24 | import org.apache.spark.sql.types.DataType;
 25 | import org.apache.spark.sql.types.DataTypes;
 26 | import org.apache.spark.sql.types.StructField;
 27 | import org.apache.spark.sql.types.StructType;
 28 | import scala.Tuple2;
 29 | 
 30 | import java.io.IOException;
 31 | import java.sql.*;
 32 | import java.util.ArrayList;
 33 | import java.util.LinkedHashMap;
 34 | import java.util.List;
 35 | import java.util.Map;
 36 | 
 37 | /**
 38 |  * spark 通过phoenix读取Hbase数据
 39 |  **/
 40 | public class SparkPhoenixReadHbase {
 41 | 
 42 |     public static void main(String[] args) {
 43 |         Configuration configuration = HBaseConfiguration.create();
 44 |         configuration.set("hbase.zookeeper.quorum", "zk");
 45 |         configuration.set("hbase.zookeeper.property.clientPort", "2181");
 46 |         configuration.set("zookeeper.znode.parent", "/hbase");
 47 |         configuration.set(TableInputFormat.INPUT_TABLE, "tableName");
 48 |         Scan scan = new Scan();
 49 |         try {
 50 |             ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
 51 |             String scanToString = Base64.encodeBytes(proto.toByteArray());
 52 |             configuration.set(TableInputFormat.SCAN, scanToString);
 53 | 
 54 |             //初始化Spark
 55 |             SparkConf conf = new SparkConf().setAppName("Test")
 56 |                     .setMaster("local[1]")
 57 |                     .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
 58 |                     .registerKryoClasses(new Class[]{});
 59 |             SparkSession sparkSession = SparkSession.builder().config(conf).getOrCreate();
 60 | 
 61 |             finalSchema(configuration, sparkSession);
 62 |             dynamicSchema(configuration, sparkSession);
 63 |         } catch (Exception e) {
 64 |             e.printStackTrace();
 65 |         }
 66 |     }
 67 | 
 68 |     /**
 69 |      * 通过phoenix获取hbase数据
 70 |      */
 71 |     private static void finalSchema(Configuration configuration, SparkSession sparkSession) {
 72 |         JavaSparkContext context = new JavaSparkContext(sparkSession.sparkContext());
 73 |         JavaPairRDD<ImmutableBytesWritable, Result> javaPairRDD = context.newAPIHadoopRDD(configuration, TableInputFormat.class,
 74 |                 ImmutableBytesWritable.class, Result.class);
 75 |         JavaRDD<Row> javaRDD = javaPairRDD.map(new Function<Tuple2<ImmutableBytesWritable, Result>, Row>() {
 76 |             @Override
 77 |             public Row call(Tuple2<ImmutableBytesWritable, Result> tuple2) throws Exception {
 78 |                 Result result = tuple2._2();
 79 |                 String rowKey = Bytes.toString(result.getRow());
 80 |                 String id = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("id")));
 81 |                 String account = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("account")));
 82 |                 String password = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes("password")));
 83 |                 return RowFactory.create(rowKey, id, account, password);
 84 |             }
 85 |         });
 86 |         List<StructField> fields = new ArrayList<>();
 87 |         fields.add(DataTypes.createStructField("id", DataTypes.StringType, true));
 88 |         fields.add(DataTypes.createStructField("account", DataTypes.StringType, true));
 89 |         fields.add(DataTypes.createStructField("password", DataTypes.StringType, true));
 90 |         StructType schema = DataTypes.createStructType(fields);
 91 | 
 92 |         Dataset<Row> dataset = sparkSession.createDataFrame(javaRDD, schema);
 93 |     }
 94 | 
 95 |     /**
 96 |      * 动态配置schema的形式，比如说通过phoenix直接读取整个schema
 97 |      */
 98 |     private static void dynamicSchema(Configuration configuration, SparkSession sparkSession) {
 99 |         try {
100 |             Connection connection = PhoenixUtil.getConnection();
101 |             Statement statement = connection.createStatement();
102 |             String sql = "select * from tableName limit 1";
103 |             ResultSet resultSet = statement.executeQuery(sql);
104 |             ResultSetMetaData metaData = resultSet.getMetaData();
105 |             int columnCount = metaData.getColumnCount();
106 |             Map<String, String> columnNameMap = new LinkedHashMap<>(columnCount);
107 |             for (int i = 1; i <= columnCount; i++) {
108 |                 String columnTypeName = metaData.getColumnTypeName(i);
109 |                 String columnName = metaData.getColumnName(i);
110 |                 columnNameMap.put(columnName, columnTypeName);
111 |             }
112 |             PhoenixUtil.returnConnection(connection);
113 | 
114 |             JavaSparkContext context = new JavaSparkContext(sparkSession.sparkContext());
115 |             JavaPairRDD<ImmutableBytesWritable, Result> javaPairRDD = context.newAPIHadoopRDD(configuration, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
116 |             JavaRDD<Row> map = javaPairRDD.map(new Function<Tuple2<ImmutableBytesWritable, Result>, Row>() {
117 |                 @Override
118 |                 public Row call(Tuple2<ImmutableBytesWritable, Result> tuple2) throws Exception {
119 |                     Result result = tuple2._2();
120 |                     String row = Bytes.toString(result.getRow());
121 |                     List<String> valueList = new ArrayList<>();
122 |                     for (String column : columnNameMap.keySet()) {
123 |                         String columnValue = Bytes.toString(result.getValue(Bytes.toBytes("cf"), Bytes.toBytes(column)));
124 |                         valueList.add(columnValue);
125 |                     }
126 |                     String[] values = valueList.toArray(new String[]{});
127 |                     return RowFactory.create(row, values);
128 |                 }
129 |             });
130 | 
131 |             List<StructField> fieldList = new ArrayList<>();
132 |             for (Map.Entry<String, String> entry : columnNameMap.entrySet()) {
133 |                 String key = entry.getKey();
134 |                 String value = entry.getValue();
135 |                 DataType dataType = getDataType(value);
136 |                 fieldList.add(DataTypes.createStructField(key, dataType, false));
137 |             }
138 |             StructType schema = DataTypes.createStructType(fieldList);
139 |             Dataset<Row> dataset = sparkSession.createDataFrame(map, schema);
140 |         } catch (Exception e) {
141 |             e.printStackTrace();
142 |         }
143 |     }
144 | 
145 |     private static DataType getDataType(String typeName) {
146 |         DataType dataType = null;
147 |         if ("string".equals(typeName)) {
148 |             dataType = DataTypes.StringType;
149 |         } else if ("boolean".equals(typeName)) {
150 |             dataType = DataTypes.BooleanType;
151 |         } else if ("dobule".equals(typeName)) {
152 |             dataType = DataTypes.DoubleType;
153 |         } else if ("date".equals(typeName)) {
154 |             dataType = DataTypes.DateType;
155 |         } else if ("float".equals(typeName)) {
156 |             dataType = DataTypes.FloatType;
157 |         } else if ("bigint".equals(typeName)) {
158 |             dataType = DataTypes.LongType;
159 |         } else if ("short".equals(typeName)) {
160 |             dataType = DataTypes.ShortType;
161 |         } else if ("byte".equals(typeName)) {
162 |             dataType = DataTypes.ByteType;
163 |         } else if ("timestamp".equals(typeName)) {
164 |             dataType = DataTypes.TimestampType;
165 |         }
166 |         return dataType;
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------