├── .idea
├── .name
├── Flink-pyg.iml
├── codeStyles
│ └── codeStyleConfig.xml
├── compiler.xml
├── encodings.xml
├── hydra.xml
├── misc.xml
├── uiDesigner.xml
├── vcs.xml
└── workspace.xml
├── ReadMe.md
├── batch-process
└── pom.xml
├── canal-kafka
├── pom.xml
└── src
│ └── main
│ ├── java
│ ├── CanalClient.java
│ └── com
│ │ └── henry
│ │ └── canal_kafka
│ │ └── util
│ │ ├── GlobalConfigUtil.java
│ │ └── KafkaSender.java
│ └── resources
│ ├── application.properties
│ └── log4j.properties
├── pom.xml
├── pyg.iml
├── real-process
├── pom.xml
└── src
│ ├── main
│ ├── resources
│ │ ├── application.conf
│ │ ├── hbase-site.xml
│ │ └── log4j.properties
│ └── scala
│ │ └── com
│ │ └── henry
│ │ └── realprocess
│ │ ├── App.scala
│ │ ├── bean
│ │ ├── ClickLog.scala
│ │ ├── ClickLogWide.scala
│ │ └── Message.scala
│ │ ├── task
│ │ ├── BaseTask.scala
│ │ ├── ChannelAreaTask.scala
│ │ ├── ChannelBrowserTask.scala
│ │ ├── ChannelFreshnessTask.scala
│ │ ├── ChannelFreshnessTaskTrait.scala
│ │ ├── ChannelNetworkTask.scala
│ │ ├── ChannelPvUvTask.scala
│ │ ├── ChannelPvUvTaskMerge.scala
│ │ ├── ChannelRealHotTask.scala
│ │ └── PreprocessTask.scala
│ │ └── util
│ │ ├── GlobalConfigutil.scala
│ │ └── HBaseUtil.scala
│ └── test
│ └── temp.txt
├── report
├── pom.xml
└── src
│ ├── main
│ ├── java
│ │ └── com
│ │ │ └── henry
│ │ │ └── report
│ │ │ ├── ReportApplication.java
│ │ │ ├── bean
│ │ │ ├── Clicklog.java
│ │ │ └── Message.java
│ │ │ ├── controller
│ │ │ ├── ReportController.java
│ │ │ └── TestController.java
│ │ │ └── util
│ │ │ ├── ClickLogGenerator.java
│ │ │ ├── KafkaProducerConfig.java
│ │ │ └── RoundRobinPartitioner.java
│ └── resources
│ │ └── application.properties
│ └── test
│ └── java
│ └── com
│ └── henry
│ └── report
│ └── KafkaTest.java
├── screenshot
├── 036a079d.png
├── 03ef7ace.png
├── 04e25b5a.png
├── 07a78b77.png
├── 0b4d0c1b.png
├── 0b4ea4e1.png
├── 0bd763d1.png
├── 0ced234a.png
├── 0e6080a2.png
├── 0fcd02b7.png
├── 1.png
├── 121bf948.png
├── 12f712f9.png
├── 13c61ea9.png
├── 14679e84.png
├── 1a3addd7.png
├── 1d504cce.png
├── 2.png
├── 201507bb.png
├── 21733492.png
├── 2193cbd1.png
├── 22cd7b3c.png
├── 277372f9.png
├── 2b7f3937.png
├── 2c0ad8e2.png
├── 2d11fecd.png
├── 2f5a312e.png
├── 3.png
├── 3254e2ca.png
├── 32a6daaf.png
├── 342dcc3e.png
├── 34a79ff7.png
├── 34f66a92.png
├── 3754f480.png
├── 3936fce5.png
├── 3ab50051.png
├── 3b6d6d1f.png
├── 3c8d398c.png
├── 3d2cda96.png
├── 3f08b9d0.png
├── 4.png
├── 48cd018e.png
├── 4b18ecbe.png
├── 4cf81224.png
├── 520fd656.png
├── 5326b634.png
├── 54187145.png
├── 544d0e7a.png
├── 565c64ed.png
├── 58926ce0.png
├── 58945558.png
├── 5a321628.png
├── 62c03232.png
├── 64a0b856.png
├── 65e75e0f.png
├── 69907922.png
├── 6ac8e320.png
├── 6c04e485.png
├── 6c99f78b.png
├── 6f5af076.png
├── 6f897038.png
├── 6fcd4a44.png
├── 70a923ce.png
├── 72d64e76.png
├── 74d009f4.png
├── 75fcc253.png
├── 76c4fbf8.png
├── 79c600b1.png
├── 7b5e4836.png
├── 7cba404f.png
├── 7cd00637.png
├── 7cf4425b.png
├── 7fe930e0.png
├── 820fe570.png
├── 831e1859.png
├── 880c750d.png
├── 8c5fa195.png
├── 8cca6196.png
├── 8f89e666.png
├── 8fe964b8.png
├── 908989c5.png
├── 9379b632.png
├── 946fe86f.png
├── 9897be78.png
├── 98ddfe9a.png
├── 9e4179c5.png
├── 9e67979f.png
├── a13d8808.png
├── a2ab75e3.png
├── a35893be.png
├── a47efd66.png
├── a560cff6.png
├── a66b3e6f.png
├── a8d36972.png
├── aa3dbfbf.png
├── abb5e847.png
├── aef2abe1.png
├── af73ebaa.png
├── b35e8d12.png
├── b77622b6.png
├── c1186185.png
├── c33fe1b4.png
├── c6d0728b.png
├── c84f6044.png
├── cba7b53e.png
├── cdefdf02.png
├── cf67e612.png
├── cfd8e121.png
├── d068b5c0.png
├── d1a2dc81.png
├── d42bd3f1.png
├── d452de1b.png
├── d457be6b.png
├── d57e648a.png
├── d6cc806c.png
├── d99a61f4.png
├── d9fcfcf5.png
├── dc0e0c05.png
├── dc64a356.png
├── dedf144c.png
├── df332a64.png
├── e219a541.png
├── e4022013.png
├── e44c5879.png
├── e6130b81.png
├── e61c1e01.png
├── e751cb2d.png
├── ea8764de.png
├── ebf3c65b.png
├── ec1f3fda.png
├── fc27880f.png
├── fe002ea4.png
└── ff2dcb9b.png
└── sync-db
├── pom.xml
└── src
└── main
├── resources
├── application.conf
├── hbase-site.xml
└── log4j.properties
└── scala
└── com
└── henry
└── syncdb
├── App.scala
├── bean
├── Cannal.scala
└── HBaseOperation.scala
├── task
└── PreprocessTask.scala
└── util
├── FlinkUtils.scala
├── GlobalConfigutil.scala
└── HBaseUtil.scala
/.idea/.name:
--------------------------------------------------------------------------------
1 | pyg
--------------------------------------------------------------------------------
/.idea/Flink-pyg.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/hydra.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/batch-process/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | pyg
7 | com.henry
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | batch-process
13 |
14 |
15 |
--------------------------------------------------------------------------------
/canal-kafka/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | pyg
7 | com.henry
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | canal-kafka
13 |
14 |
15 |
16 | com.alibaba.otter
17 | canal.client
18 | 1.0.24
19 |
20 |
21 |
22 | org.apache.kafka
23 | kafka_2.11
24 | 0.10.1.0
25 |
26 |
27 |
28 | com.alibaba
29 | fastjson
30 | 1.2.83
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/canal-kafka/src/main/java/CanalClient.java:
--------------------------------------------------------------------------------
1 | import com.alibaba.fastjson.JSON;
2 | import com.alibaba.fastjson.JSONObject;
3 | import com.alibaba.otter.canal.client.CanalConnector;
4 | import com.alibaba.otter.canal.client.CanalConnectors;
5 | import com.alibaba.otter.canal.protocol.CanalEntry;
6 | import com.alibaba.otter.canal.protocol.Message;
7 | import com.henry.canal_kafka.util.GlobalConfigUtil;
8 | import com.henry.canal_kafka.util.KafkaSender;
9 |
10 | import java.net.InetSocketAddress;
11 | import java.util.ArrayList;
12 | import java.util.List;
13 | import java.util.UUID;
14 |
15 | /**
16 | * Canal解析binlog日志工具类
17 | */
18 | public class CanalClient {
19 |
20 | static class ColumnValuePair {
21 | private String columnName;
22 | private String columnValue;
23 | private Boolean isValid;
24 |
25 | public ColumnValuePair(String columnName, String columnValue, Boolean isValid) {
26 | this.columnName = columnName;
27 | this.columnValue = columnValue;
28 | this.isValid = isValid;
29 | }
30 |
31 | public String getColumnName() { return columnName; }
32 | public void setColumnName(String columnName) { this.columnName = columnName; }
33 | public String getColumnValue() { return columnValue; }
34 | public void setColumnValue(String columnValue) { this.columnValue = columnValue; }
35 | public Boolean getIsValid() { return isValid; }
36 | public void setIsValid(Boolean isValid) { this.isValid = isValid; }
37 | }
38 |
39 | /**
40 | * 获取Canal连接
41 | *
42 | * @param host 主机名
43 | * @param port 端口号
44 | * @param instance Canal实例名
45 | * @param username 用户名
46 | * @param password 密码
47 | * @return Canal连接器
48 | */
49 | public static CanalConnector getConn(String host, int port, String instance, String username, String password) {
50 | CanalConnector canalConnector = CanalConnectors.newSingleConnector(new InetSocketAddress(host, port), instance, username, password);
51 |
52 | return canalConnector;
53 | }
54 |
55 | /**
56 | * 解析Binlog日志
57 | *
58 | * @param entries Binlog消息实体
59 | * @param emptyCount 操作的序号
60 | */
61 | public static void analysis(List entries, int emptyCount) {
62 | for (CanalEntry.Entry entry : entries) {
63 | // 只解析mysql事务的操作,其他的不解析
64 | if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN ||
65 | entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) {
66 | continue;
67 | }
68 |
69 | // 那么解析binlog
70 | CanalEntry.RowChange rowChange = null;
71 |
72 | try {
73 | rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue());
74 | } catch (Exception e) {
75 | e.printStackTrace();
76 | }
77 |
78 | // 获取操作类型字段(增加 删除 修改)
79 | CanalEntry.EventType eventType = rowChange.getEventType();
80 | // 获取binlog文件名称
81 | String logfileName = entry.getHeader().getLogfileName();
82 | // 读取当前操作在binlog文件的位置
83 | long logfileOffset = entry.getHeader().getLogfileOffset();
84 | // 获取当前操作所属的数据库
85 | String dbName = entry.getHeader().getSchemaName();
86 | // 获取当前操作所属的表
87 | String tableName = entry.getHeader().getTableName();//当前操作的是哪一张表
88 | long timestamp = entry.getHeader().getExecuteTime();//执行时间
89 |
90 | // 解析操作的行数据
91 | for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) {
92 | // 删除操作
93 | if (eventType == CanalEntry.EventType.DELETE) {
94 | // 获取删除之前的所有列数据
95 | dataDetails(rowData.getBeforeColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
96 | }
97 | // 新增操作
98 | else if (eventType == CanalEntry.EventType.INSERT) {
99 | // 获取新增之后的所有列数据
100 | dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
101 | }
102 | // 更新操作
103 | else {
104 | // 获取更新之后的所有列数据
105 | dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp);
106 | }
107 | }
108 | }
109 | }
110 |
111 | /**
112 | * 解析具体一条Binlog消息的数据
113 | *
114 | * @param columns 当前行所有的列数据
115 | * @param logFileName binlog文件名
116 | * @param logFileOffset 当前操作在binlog中的位置
117 | * @param dbName 当前操作所属数据库名称
118 | * @param tableName 当前操作所属表名称
119 | * @param eventType 当前操作类型(新增、修改、删除)
120 | * @param emptyCount 操作的序号
121 | */
122 | private static void dataDetails(List columns,
123 | String logFileName,
124 | Long logFileOffset,
125 | String dbName,
126 | String tableName,
127 | CanalEntry.EventType eventType,
128 | int emptyCount,
129 | long timestamp) {
130 |
131 | // 找到当前那些列发生了改变 以及改变的值
132 | List columnValueList = new ArrayList();
133 |
134 | for (CanalEntry.Column column : columns) {
135 | ColumnValuePair columnValuePair = new ColumnValuePair(column.getName(), column.getValue(), column.getUpdated());
136 | columnValueList.add(columnValuePair);
137 | }
138 |
139 | String key = UUID.randomUUID().toString();
140 |
141 | JSONObject jsonObject = new JSONObject();
142 | jsonObject.put("logFileName", logFileName);
143 | jsonObject.put("logFileOffset", logFileOffset);
144 | jsonObject.put("dbName", dbName);
145 | jsonObject.put("tableName", tableName);
146 | jsonObject.put("eventType", eventType);
147 | jsonObject.put("columnValueList", columnValueList);
148 | jsonObject.put("emptyCount", emptyCount);
149 | jsonObject.put("timestamp", timestamp);
150 |
151 |
152 | // 拼接所有binlog解析的字段
153 | String data = JSON.toJSONString(jsonObject);
154 |
155 | System.out.println(data);
156 |
157 | // 解析后的数据发送到kafka
158 | KafkaSender.sendMessage(GlobalConfigUtil.kafkaInputTopic, key, data);
159 | }
160 |
161 |
162 | public static void main(String[] args) {
163 |
164 | // 加载配置文件
165 | String host = GlobalConfigUtil.canalHost;
166 | int port = Integer.parseInt(GlobalConfigUtil.canalPort);
167 | String instance = GlobalConfigUtil.canalInstance;
168 | String username = GlobalConfigUtil.mysqlUsername;
169 | String password = GlobalConfigUtil.mysqlPassword;
170 |
171 | // 获取Canal连接
172 | CanalConnector conn = getConn(host, port, instance, username, password);
173 |
174 | // 从binlog中读取数据
175 | int batchSize = 100;
176 | int emptyCount = 1;
177 |
178 | try {
179 | // 连接cannal
180 | conn.connect();
181 | //订阅实例中所有的数据库和表
182 | conn.subscribe(".*\\..*");
183 | // 回滚到未进行ack的地方
184 | conn.rollback();
185 |
186 | int totalCount = 120; //循环次数
187 |
188 | while (totalCount > emptyCount) {
189 | // 获取数据
190 | Message message = conn.getWithoutAck(batchSize);
191 |
192 | long id = message.getId();
193 | int size = message.getEntries().size();
194 | if (id == -1 || size == 0) {
195 | //没有读取到任何数据
196 | } else {
197 | //有数据,那么解析binlog日志
198 | analysis(message.getEntries(), emptyCount);
199 | emptyCount++;
200 | }
201 |
202 | // 确认消息
203 | conn.ack(message.getId());
204 |
205 | }
206 | } catch (Exception e) {
207 | e.printStackTrace();
208 | } finally {
209 | conn.disconnect();
210 | }
211 | }
212 | }
213 |
--------------------------------------------------------------------------------
/canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java:
--------------------------------------------------------------------------------
1 | package com.henry.canal_kafka.util;
2 |
3 | import java.util.ResourceBundle;
4 |
5 | public class GlobalConfigUtil {
6 | // 获取一个资源加载器
7 | // 资源加载器会自动去加载CLASSPATH中的application.properties配置文件
8 | private static ResourceBundle resourceBundle = ResourceBundle.getBundle("application");
9 |
10 | // 使用ResourceBundle.getString方法来读取配置
11 | public static String canalHost = resourceBundle.getString("canal.host");
12 | public static String canalPort = resourceBundle.getString("canal.port");
13 | public static String canalInstance = resourceBundle.getString("canal.instance");
14 | public static String mysqlUsername = resourceBundle.getString("mysql.username");
15 | public static String mysqlPassword = resourceBundle.getString("mysql.password");
16 | public static String kafkaBootstrapServers = resourceBundle.getString("kafka.bootstrap.servers");
17 | public static String kafkaZookeeperConnect = resourceBundle.getString("kafka.zookeeper.connect");
18 | public static String kafkaInputTopic = resourceBundle.getString("kafka.input.topic");
19 |
20 | public static void main(String[] args) {
21 | System.out.println(canalHost);
22 | System.out.println(canalPort);
23 | System.out.println(canalInstance);
24 | System.out.println(mysqlUsername);
25 | System.out.println(mysqlPassword);
26 | System.out.println(kafkaBootstrapServers);
27 | System.out.println(kafkaZookeeperConnect);
28 | System.out.println(kafkaInputTopic);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java:
--------------------------------------------------------------------------------
1 | package com.henry.canal_kafka.util;
2 |
3 | import kafka.javaapi.producer.Producer;
4 | import kafka.producer.KeyedMessage;
5 | import kafka.producer.ProducerConfig;
6 | import kafka.serializer.StringEncoder;
7 |
8 | import java.util.Properties;
9 |
10 | /**
11 | * Kafka生产消息工具类
12 | */
13 | public class KafkaSender {
14 | private String topic;
15 |
16 | public KafkaSender(String topic){
17 | super();
18 | this.topic = topic;
19 | }
20 |
21 | /**
22 | * 发送消息到Kafka指定topic
23 | *
24 | * @param topic topic名字
25 | * @param key 键值
26 | * @param data 数据
27 | */
28 | public static void sendMessage(String topic , String key , String data){
29 | Producer producer = createProducer();
30 | producer.send(new KeyedMessage(topic , key , data));
31 | }
32 |
33 | private static Producer createProducer(){
34 | Properties properties = new Properties();
35 |
36 | properties.put("metadata.broker.list" , GlobalConfigUtil.kafkaBootstrapServers);
37 | properties.put("zookeeper.connect" , GlobalConfigUtil.kafkaZookeeperConnect);
38 | properties.put("serializer.class" , StringEncoder.class.getName());
39 |
40 | return new Producer(new ProducerConfig(properties));
41 | }
42 | }
--------------------------------------------------------------------------------
/canal-kafka/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | #
2 | # canal\u914D\u7F6E
3 | #
4 | canal.host=master
5 | canal.port=11111
6 | canal.instance=example
7 | mysql.username=root
8 | mysql.password=123456
9 | #
10 | #kafka\u7684\u914D\u7F6E
11 | #
12 | kafka.bootstrap.servers=master:9092,slave1:9092,slave2:9092
13 | kafka.zookeeper.connect=master:2181,slave1:2181,slave2:2181
14 | kafka.input.topic=canal
15 |
--------------------------------------------------------------------------------
/canal-kafka/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=error,stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.henry
8 | pyg
9 | 1.0-SNAPSHOT
10 | pom
11 |
12 |
13 | report
14 | real-process
15 | canal-kafka
16 | sync-db
17 | batch-process
18 |
19 |
20 |
--------------------------------------------------------------------------------
/pyg.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/real-process/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | pyg
7 | com.henry
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | real-process
13 |
14 |
15 | 2.11
16 | 1.6.0
17 | 3.2.4
18 | 2.0.0
19 |
20 |
21 |
22 |
23 |
24 | org.apache.kafka
25 | kafka_${scala.version}
26 | 0.10.1.0
27 |
28 |
29 |
30 |
31 | org.apache.flink
32 | flink-connector-kafka-0.10_${scala.version}
33 | ${flink.version}
34 |
35 |
36 |
37 |
38 | org.apache.flink
39 | flink-table_${scala.version}
40 | ${flink.version}
41 |
42 |
43 |
44 |
45 | org.apache.flink
46 | flink-scala_${scala.version}
47 | ${flink.version}
48 |
49 |
50 |
51 |
52 | org.apache.flink
53 | flink-streaming-scala_${scala.version}
54 | ${flink.version}
55 |
56 |
57 | org.apache.flink
58 | flink-streaming-java_${scala.version}
59 | ${flink.version}
60 |
61 |
62 |
63 |
64 | org.apache.flink
65 | flink-hbase_${scala.version}
66 | ${flink.version}
67 |
68 |
69 |
70 | org.apache.hbase
71 | hbase-client
72 | ${hbase.version}
73 |
74 |
75 |
76 |
77 | org.apache.hadoop
78 | hadoop-common
79 | ${hadoop.version}
80 |
81 |
82 | org.apache.hadoop
83 | hadoop-hdfs
84 | ${hadoop.version}
85 |
86 |
87 |
88 | xml-apis
89 | xml-apis
90 |
91 |
92 |
93 |
94 |
95 | org.apache.hadoop
96 | hadoop-client
97 | ${hadoop.version}
98 |
99 |
100 |
101 | com.google.protobuf
102 | protobuf-java
103 |
104 |
105 |
106 |
107 |
108 |
109 | com.alibaba
110 | fastjson
111 | 1.2.83
112 |
113 |
114 |
115 |
116 |
117 |
118 | src/main/scala
119 | src/test/scala
120 |
121 |
122 |
123 | org.apache.maven.plugins
124 | maven-shade-plugin
125 | 2.2
126 |
127 |
128 | package
129 |
130 | shade
131 |
132 |
133 |
134 |
135 | com.google.code.findbugs:jsr305
136 | org.slf4j:*
137 | log4j:*
138 |
139 |
140 |
141 |
142 | *:*
143 |
144 | META-INF/*.SF
145 | META-INF/*.DSA
146 | META-INF/*.RSA
147 |
148 |
149 |
150 |
151 |
152 | com.henry.pyg.App
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/real-process/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | #
2 | #
3 | # kafka的配置
4 | #
5 | # kafka 集群地址
6 | bootstrap.servers="master:9092,slave1:9092,slave2:9092"
7 | # zookeeper 集群地址
8 | zookeeper.connect="master:2181,slave1:2181,slave2:2181"
9 | # kafka topic
10 | input.topic="pyg"
11 | # 消费者组 ID
12 | gruop.id="pyg"
13 | # 自动提交拉取到的消费端的消息offset到kafka
14 | enable.auto.commit="true"
15 | # 自动提交offset到zookeeper的时间间隔单位(毫秒)
16 | auto.commit.interval.ms="5000"
17 | # 每次消费最新的数据
18 | auto.offset.reset="latest"
19 |
20 | #Hbase的配置
21 | //hbase.zookeeper.quorum="master:2181,slave1:2181,slave2:2181"
22 | //hbase.master="master:60000"
23 | //hbase.zookeeper.property.clientPort="2181"
24 | //hbase.rpc.timeout="600000"
25 | //hbase.client.operator.timeout="600000"
26 | //hbase.client.scanner.timeout.period="600000"
--------------------------------------------------------------------------------
/real-process/src/main/resources/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 |
26 |
27 | hbase.rootdir
28 | hdfs://master:9000/hbase2
29 |
30 |
31 |
32 |
33 | hbase.cluster.distributed
34 | true
35 |
36 |
37 |
38 |
39 | hbase.master.info.port
40 | 16000
41 |
42 |
43 |
44 |
45 | hbase.zookeeper.quorum
46 | master:2181,slave1:2181,slave2:2181
47 |
48 |
49 |
50 | hbase.zookeeper.property.clientPort
51 | 2181
52 |
53 |
54 |
55 | hbase.zookeeper.property.dataDir
56 | /usr/local/src/zookeeper-3.4.5/hbasedata
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/real-process/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Define some default values that can be overridden by system properties
18 | hadoop.root.logger=ERROR,console
19 | hadoop.log.dir=.
20 | hadoop.log.file=hadoop.log
21 |
22 | # Define the root logger to the system property "hadoop.root.logger".
23 | log4j.rootLogger=${hadoop.root.logger}, EventCounter
24 |
25 | # Logging Threshold
26 | log4j.threshold=ALL
27 |
28 | # Null Appender
29 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
30 |
31 | #
32 | # Rolling File Appender - cap space usage at 5gb.
33 | #
34 | hadoop.log.maxfilesize=256MB
35 | hadoop.log.maxbackupindex=20
36 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender
37 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
38 |
39 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
40 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
41 |
42 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
43 |
44 | # Pattern format: Date LogLevel LoggerName LogMessage
45 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
46 | # Debugging Pattern format
47 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
48 |
49 |
50 | #
51 | # Daily Rolling File Appender
52 | #
53 |
54 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
55 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
56 |
57 | # Rollver at midnight
58 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
59 |
60 | # 30-day backup
61 | #log4j.appender.DRFA.MaxBackupIndex=30
62 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
63 |
64 | # Pattern format: Date LogLevel LoggerName LogMessage
65 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
66 | # Debugging Pattern format
67 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
68 |
69 |
70 | #
71 | # console
72 | # Add "console" to rootlogger above if you want to use this
73 | #
74 |
75 | log4j.appender.console=org.apache.log4j.ConsoleAppender
76 | log4j.appender.console.target=System.err
77 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
78 | log4j.appender.console.layout.ConversionPattern=%d{yy HH:mm:ss} %p %c{2}: %m%n
79 |
80 | #
81 | # TaskLog Appender
82 | #
83 |
84 | #Default values
85 | hadoop.tasklog.taskid=null
86 | hadoop.tasklog.iscleanup=false
87 | hadoop.tasklog.noKeepSplits=4
88 | hadoop.tasklog.totalLogFileSize=100
89 | hadoop.tasklog.purgeLogSplits=true
90 | hadoop.tasklog.logsRetainHours=12
91 |
92 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
93 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
94 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
95 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
96 |
97 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
98 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
99 |
100 | #
101 | # HDFS block state change log from block manager
102 | #
103 | # Uncomment the following to suppress normal block state change
104 | # messages from BlockManager in NameNode.
105 | #log4j.logger.BlockStateChange=WARN
106 |
107 | #
108 | #Security appender
109 | #
110 | hadoop.security.logger=INFO,NullAppender
111 | hadoop.security.log.maxfilesize=256MB
112 | hadoop.security.log.maxbackupindex=20
113 | log4j.category.SecurityLogger=${hadoop.security.logger}
114 | hadoop.security.log.file=SecurityAuth-${user.name}.audit
115 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
116 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
117 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
118 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
119 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
120 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
121 |
122 | #
123 | # Daily Rolling Security appender
124 | #
125 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
126 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
127 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
128 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
129 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
130 |
131 | #
132 | # hadoop configuration logging
133 | #
134 |
135 | # Uncomment the following line to turn off configuration deprecation warnings.
136 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN
137 |
138 | #
139 | # hdfs audit logging
140 | #
141 | hdfs.audit.logger=INFO,NullAppender
142 | hdfs.audit.log.maxfilesize=256MB
143 | hdfs.audit.log.maxbackupindex=20
144 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
145 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
146 | log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
147 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
148 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
149 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
150 | log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
151 | log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
152 |
153 | #
154 | # mapred audit logging
155 | #
156 | mapred.audit.logger=INFO,NullAppender
157 | mapred.audit.log.maxfilesize=256MB
158 | mapred.audit.log.maxbackupindex=20
159 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
160 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
161 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
162 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
163 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
164 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
165 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
166 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
167 |
168 | # Custom Logging levels
169 |
170 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
171 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
172 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
173 |
174 | # Jets3t library
175 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
176 |
177 | # AWS SDK & S3A FileSystem
178 | log4j.logger.com.amazonaws=ERROR
179 | log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR
180 | log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN
181 |
182 | #
183 | # Event Counter Appender
184 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
185 | #
186 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
187 |
188 | #
189 | # Job Summary Appender
190 | #
191 | # Use following logger to send summary to separate file defined by
192 | # hadoop.mapreduce.jobsummary.log.file :
193 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA
194 | #
195 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
196 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
197 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
198 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20
199 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender
200 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
201 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
202 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
203 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
204 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
205 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
206 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
207 |
208 | #
209 | # Yarn ResourceManager Application Summary Log
210 | #
211 | # Set the ResourceManager summary log filename
212 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
213 | # Set the ResourceManager summary log level and appender
214 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger}
215 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
216 |
217 | # To enable AppSummaryLogging for the RM,
218 | # set yarn.server.resourcemanager.appsummary.logger to
219 | # ,RMSUMMARY in hadoop-env.sh
220 |
221 | # Appender for ResourceManager Application Summary Log
222 | # Requires the following properties to be set
223 | # - hadoop.log.dir (Hadoop Log directory)
224 | # - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
225 | # - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
226 |
227 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
228 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
229 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
230 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
231 | log4j.appender.RMSUMMARY.MaxFileSize=256MB
232 | log4j.appender.RMSUMMARY.MaxBackupIndex=20
233 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
234 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
235 |
236 | # HS audit log configs
237 | #mapreduce.hs.audit.logger=INFO,HSAUDIT
238 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger}
239 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false
240 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender
241 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log
242 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout
243 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
244 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
245 |
246 | # Http Server Request Logs
247 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog
248 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
249 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
250 | #log4j.appender.namenoderequestlog.RetainDays=3
251 |
252 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog
253 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
254 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
255 | #log4j.appender.datanoderequestlog.RetainDays=3
256 |
257 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
258 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
259 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
260 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3
261 |
262 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
263 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
264 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
265 | #log4j.appender.jobhistoryrequestlog.RetainDays=3
266 |
267 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
268 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
269 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
270 | #log4j.appender.nodemanagerrequestlog.RetainDays=3
271 |
272 |
273 | # WebHdfs request log on datanodes
274 | # Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to
275 | # direct the log to a separate file.
276 | #datanode.webhdfs.logger=INFO,console
277 | #log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger}
278 | #log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender
279 | #log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log
280 | #log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout
281 | #log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n
282 | #log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd
283 |
284 | #
285 | # Fair scheduler state dump
286 | #
287 | # Use following logger to dump the state to a separate file
288 |
289 | #log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=DEBUG,FSSTATEDUMP
290 | #log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=false
291 | #log4j.appender.FSSTATEDUMP=org.apache.log4j.RollingFileAppender
292 | #log4j.appender.FSSTATEDUMP.File=${hadoop.log.dir}/fairscheduler-statedump.log
293 | #log4j.appender.FSSTATEDUMP.layout=org.apache.log4j.PatternLayout
294 | #log4j.appender.FSSTATEDUMP.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
295 | #log4j.appender.FSSTATEDUMP.MaxFileSize=${hadoop.log.maxfilesize}
296 | #log4j.appender.FSSTATEDUMP.MaxBackupIndex=${hadoop.log.maxbackupindex}
297 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/App.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess
2 |
3 |
4 | import java.util.Properties
5 |
6 | import com.alibaba.fastjson.JSON
7 | import com.henry.realprocess.bean.{ClickLog, ClickLogWide, Message}
8 | import com.henry.realprocess.task._
9 | import com.henry.realprocess.util.GlobalConfigutil
10 | import org.apache.flink.api.common.serialization.SimpleStringSchema
11 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
12 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
13 | import org.apache.flink.api.scala._
14 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
15 | import org.apache.flink.streaming.api.environment.CheckpointConfig
16 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
17 | import org.apache.flink.streaming.api.watermark.Watermark
18 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
19 |
20 |
21 | /**
22 | * @Author: Henry
23 | * @Description: 入口类
24 | * @Date: Create in 2019/10/16 22:42
25 | **/
26 | object App {
27 |
28 | def main(args: Array[String]): Unit = {
29 |
30 | //------------ 初始化Flink流式环境,ctrl+alt+v --------------------
31 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
32 |
33 | // 设置处理时间为EventTime
34 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
35 |
36 | // 设置并行度
37 | env.setParallelism(1)
38 |
39 | // 本地测试 加载本地集合 成为一个 Datastream 打印输出
40 | // val localDataStream:DataStream[String] = env.fromCollection(
41 | // List("hadoop", "hive", "hbase", "flink")
42 | // )
43 | // localDataStream.print()
44 |
45 |
46 | //------------ 添加 checkpoint 的支持 -------------------------------
47 | env.enableCheckpointing(5000) // 5秒启动一次checkpoint
48 |
49 | // 设置 checkpoint 只检查 1次,即 仅一次
50 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
51 | // 设置两次 checkpoint 的最小时间间隔 1s
52 | env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000)
53 | // 设置checkpoint的超时时长, 60s
54 | env.getCheckpointConfig.setCheckpointTimeout(60000)
55 | // 允许的最大并行度
56 | env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)
57 | // 当程序关闭时,触发额外的checkpoint
58 | env.getCheckpointConfig.enableExternalizedCheckpoints(
59 | CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
60 |
61 |
62 | // 设置checkpoint的地址
63 | env.setStateBackend(new FsStateBackend("hdfs://master:9000/flink-checkpoint/"))
64 |
65 |
66 | //--------------- 整合kafka --------------------------
67 | val properties = new Properties()
68 | // kafka 集群地址
69 | properties.setProperty("bootstrap.servers", GlobalConfigutil.bootstrapServers)
70 | // zookeeper 集群地址
71 | properties.setProperty("zookeeper.connect", GlobalConfigutil.zookeeperConnect)
72 | // kafka topic
73 | properties.setProperty("input.topic", GlobalConfigutil.inputTopic)
74 | // 消费者组 ID
75 | properties.setProperty("gruop.id", GlobalConfigutil.gruopId)
76 | // 自动提交拉取到的消费端的消息offset到kafka
77 | properties.setProperty("enable.auto.commit", GlobalConfigutil.enableAutoCommit)
78 | // 自动提交offset到zookeeper的时间间隔单位(毫秒)
79 | properties.setProperty("auto.commit.interval.ms", GlobalConfigutil.autoCommitIntervalMs)
80 | // 每次消费最新的数据
81 | properties.setProperty("auto.offset.reset", GlobalConfigutil.autoOffsetReset)
82 |
83 |
84 | // topic 、反序列化器、 属性集合
85 | val consumer = new FlinkKafkaConsumer010[String](
86 | GlobalConfigutil.inputTopic,
87 | new SimpleStringSchema(),
88 | properties)
89 |
90 | val kafkaDataStream: DataStream[String] = env.addSource(consumer)
91 |
92 | // kafkaDataStream.print()
93 |
94 | // JSON -> 元组
95 | val tupleDataStream = kafkaDataStream.map {
96 | msgJson =>
97 | val jsonObject = JSON.parseObject(msgJson)
98 |
99 | val message = jsonObject.getString("message")
100 | val count = jsonObject.getLong("count")
101 | val timeStamp = jsonObject.getLong("timestamp")
102 |
103 | // (message, count, timeStamp)
104 | // 改造成样例类
105 | // (ClickLog(message), count, timeStamp)
106 | Message(ClickLog(message), count, timeStamp)
107 |
108 | }
109 |
110 | // tupleDataStream.print()
111 |
112 | //----------------- 添加水印支持 -----------------------
113 |
114 | var watermarkDataStream = tupleDataStream.assignTimestampsAndWatermarks(
115 | new AssignerWithPeriodicWatermarks[Message] {
116 |
117 | var currentTimestamp = 0L
118 |
119 | // 延迟时间
120 | var maxDelayTime = 2000L
121 |
122 | // 获取当前时间戳
123 | override def getCurrentWatermark: Watermark = {
124 | // 设置水印时间比事件时间小 2s
125 | new Watermark(currentTimestamp - maxDelayTime)
126 | }
127 |
128 | // 获取当前事件时间
129 | override def extractTimestamp(
130 | element: Message,
131 | previousElementTimestamp: Long): Long = {
132 | currentTimestamp = Math.max(element.timeStamp, previousElementTimestamp)
133 | currentTimestamp
134 | }
135 | })
136 |
137 | // 数据的预处理
138 | val clickLogWideDateStream : DataStream[ClickLogWide] = PreprocessTask.process(watermarkDataStream)
139 | // clickLogWideDateStream.print()
140 |
141 | // 转换
142 | // ChannelRealHotTask.process(clickLogWideDateStream).print()
143 | // ChannelRealHotTask.process(clickLogWideDateStream)
144 |
145 | // 转换 PV、UV
146 | ChannelPvUvTask.process(clickLogWideDateStream)
147 | // ChannelPvUvTaskMerge.process(clickLogWideDateStream)
148 | // ChannelFreshnessTask.process(clickLogWideDateStream)
149 |
150 | // 重构模板方法
151 | ChannelFreshnessTaskTrait.process(clickLogWideDateStream)
152 |
153 | // ChannelAreaTask 测试
154 | ChannelAreaTask.process(clickLogWideDateStream)
155 |
156 | // ChannelNetworkTask 测试
157 | ChannelNetworkTask.process(clickLogWideDateStream)
158 |
159 | // ChannelBrowserTask 测试
160 | ChannelBrowserTask.process(clickLogWideDateStream)
161 |
162 |
163 | // 执行任务
164 | env.execute("real-process")
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.bean
2 |
3 | import com.alibaba.fastjson.JSON
4 |
5 | /**
6 | * @Author: Henry
7 | * @Description:
8 | * @Date: Create in 2019/10/20 14:45
9 | **/
10 |
11 | //频道ID(channelID)
12 | //产品类别ID(categoryID)
13 | //产品ID(produceID)
14 | //国家(country)
15 | //省份(province)
16 | //城市(city)
17 | //网络方式(network)
18 | //来源方式(source)
19 | //浏览器类型(browserType)
20 | //进入网站时间(entryTime)
21 | //离开网站时间(leaveTime)
22 | //用户ID(userID)
23 |
24 | case class ClickLog (
25 | // 1、alt + 下拉
26 | // 2、ctrl + shift + →,选中各个变量
27 | var channelID:String,
28 | var categoryID:String,
29 | var produceID:String,
30 | var country:String,
31 | var province:String,
32 | var city:String,
33 | var network:String,
34 | var source:String,
35 | var browserType:String,
36 | var entryTime:String,
37 | var leaveTime:String,
38 | var userID:String
39 | )
40 |
41 | object ClickLog{
42 |
43 | def apply(json: String): ClickLog = {
44 |
45 | // 先把json转换为JSONObject
46 | val jsonObject = JSON.parseObject(json)
47 |
48 | // 提取jsonObject中的各个属性,赋值给样例类
49 | var channelID = jsonObject.getString("channelID")
50 | var categoryID = jsonObject.getString("categoryID")
51 | var produceID = jsonObject.getString("produceID")
52 | var country = jsonObject.getString("country")
53 | var province = jsonObject.getString("province")
54 | var city = jsonObject.getString("city")
55 | var network = jsonObject.getString("network")
56 | var source = jsonObject.getString("source")
57 | var browserType = jsonObject.getString("browserType")
58 | var entryTime = jsonObject.getString("entryTime")
59 | var leaveTime = jsonObject.getString("leaveTime")
60 | var userID = jsonObject.getString("userID")
61 |
62 | ClickLog(
63 | channelID,
64 | categoryID,
65 | produceID,
66 | country,
67 | province,
68 | city,
69 | network,
70 | source,
71 | browserType,
72 | entryTime,
73 | leaveTime,
74 | userID
75 | )
76 | }
77 | }
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.bean
2 |
3 | /**
4 | * @Author: Henry
5 | * @Description:
6 | * @Date: Create in 2019/10/27 14:19
7 | **/
8 |
9 | // 频道ID(channelID)
10 | // 产品类别ID(categoryID)
11 | // 产品ID(produceID)
12 | // 国家(country)
13 | // 省份(province)
14 | // 城市(city)
15 | // 网络方式(network)
16 | // 来源方式(source)
17 | // 浏览器类型(browserType)
18 | // 进入网站时间(entryTime)
19 | // 离开网站时间(leaveTime)
20 | // 用户ID(userID)
21 | // ---- 添加以下字段 ---------------
22 | // 用户访问次数(count)
23 | // 用户访问的时间(timestamp)
24 | // 国家省份城市(拼接)(address)
25 | // 年月(yearMonth)
26 | // 年月日(yearMonthDay)
27 | // 年月日时(yearMonthDayHour)
28 | // 是否为访问某个频道的新用户(isNew)—— 0:表示否; 1:表示是
29 | // 在某一小时内是否为某个频道的新用户(isHourNew)—— 0:表示否; 1:表示是
30 | // 在某一天内是否为某个频道的新用户(isDayNew)—— 0:表示否; 1:表示是
31 | // 在某一天月是否为某个频道的新用户(isMonthNew)—— 0:表示否; 1:表示是
32 |
33 | case class ClickLogWide (
34 | // 1、alt + 下拉
35 | // 2、ctrl + shift + →,选中各个变量
36 | var channelID:String,
37 | var categoryID:String,
38 | var produceID:String,
39 | var country:String,
40 | var province:String,
41 | var city:String,
42 | var network:String,
43 | var source:String,
44 | var browserType:String,
45 | var entryTime:String,
46 | var leaveTime:String,
47 | var userID:String,
48 | //--- 新增 ---------------------------
49 | var count:Long,
50 | var timestamp:Long,
51 | var address:String,
52 | var yearMonth:String,
53 | var yearMonthDay:String,
54 | var yearMonthDayHour:String,
55 | var isNew:Int,
56 | var isHourNew:Int,
57 | var isDayNew:Int,
58 | var isMonthNew:Int
59 |
60 | )
61 |
62 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/bean/Message.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.bean
2 |
3 | /**
4 | * @Author: Henry
5 | * @Description:
6 | * @Date: Create in 2019/10/20 15:58
7 | **/
8 | case class Message (
9 | var clickLog:ClickLog,
10 | var count:Long,
11 | var timeStamp:Long
12 | )
13 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.task.ChannelBrowserTask.pvColName
5 | import org.apache.commons.lang.StringUtils
6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
7 | import org.apache.flink.streaming.api.windowing.time.Time
8 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
9 |
10 | /**
11 | * @Author: Henry
12 | * @Description:
13 | * @Date: Create in 2019/11/3 10:42
14 | **/
15 |
16 | trait BaseTask[T] {
17 |
18 |
19 | // 1、 转换
20 | def map(clickLogWideDataStream : DataStream[ClickLogWide]): DataStream[T]
21 |
22 | // 2、 分组
23 | def keyBy(mapDataStream : DataStream[T]): KeyedStream[T, String]
24 |
25 | // 3、 时间窗口
26 | def timeWindow(keyedStream: KeyedStream[T, String]) : WindowedStream[T, String, TimeWindow] = {
27 | // 因为所有自类都是 3 秒的时间窗口
28 | keyedStream.timeWindow(Time.seconds(3))
29 | }
30 |
31 | // 4、 聚合
32 | def reduce(windowedStream : WindowedStream[T, String, TimeWindow]) : DataStream[T]
33 |
34 | // 5、 落地 HBase
35 | def sink2HBase(reduceDataStream: DataStream[T])
36 |
37 |
38 | // 定义模板执行顺序
39 | def process(clickLogWideDataStream : DataStream[ClickLogWide]): Unit = {
40 | val mapDataStream: DataStream[T] = map(clickLogWideDataStream)
41 | val keyedStream: KeyedStream[T, String] = keyBy(mapDataStream)
42 | val windowedStream: WindowedStream[T, String, TimeWindow] = timeWindow(keyedStream)
43 | val reduceStream: DataStream[T] = reduce(windowedStream)
44 | sink2HBase(reduceStream)
45 | }
46 |
47 | // 检测老用户是否第一次访问
48 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
49 |
50 | // 创建 HBase 相关列
51 | var tableName = ""
52 | var clfName = "info"
53 | var rowkey = ""
54 | var channelIdColName = "channelID"
55 | var browserColName = "browser"
56 | var dateColName = "date"
57 | var pvColName = "pv"
58 | var uvColName = "uv"
59 | var newCountColName = "newCount"
60 | var oldCountColName = "oldCount"
61 |
62 |
63 | /* 累加相关列的值
64 | * @param resultMap map集合
65 | * @param column 待查询的列
66 | * @param currentValue 当前值
67 | * @return 累加后的值
68 | */
69 | def getTotal(resultMap: Map[String, String],column:String,currentValue:Long):Long={
70 |
71 | var total = currentValue
72 | // 如果resultMap不为空,并且可以去到相关列的值,那么就进行累加
73 | if (resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(column,""))) {
74 | total = resultMap(column).toLong + currentValue
75 | }
76 | total
77 | }
78 |
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 | import com.henry.realprocess.bean.ClickLogWide
3 | import com.henry.realprocess.util.HBaseUtil
4 | import org.apache.commons.lang.StringUtils
5 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
7 | import org.apache.flink.api.scala._
8 | import org.apache.flink.streaming.api.windowing.time.Time
9 |
10 |
11 | /**
12 | * @Author: Henry
13 | * @Description:
14 | * @Date: Create in 2019/11/3 14:06
15 | **/
16 |
17 | // 样例类
18 | case class ChannelArea(
19 | var channelId: String,
20 | var area: String,
21 | var date: String,
22 | var pv: Long,
23 | var uv: Long,
24 | var newCount: Long,
25 | var oldCount: Long
26 | )
27 |
28 | object ChannelAreaTask extends BaseTask [ChannelArea]{
29 |
30 | // 1、 转换
31 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelArea] = {
32 |
33 | clickLogWideDataStream.flatMap{
34 |
35 | clickLogWide =>{
36 |
37 | // 如果是老用户,并且在该时间段内第一次来,就计数 1. 否则计 0
38 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
39 |
40 | List(
41 | ChannelArea( // 月维度
42 | clickLogWide.channelID,
43 | clickLogWide.address,
44 | clickLogWide.yearMonth,
45 | clickLogWide.count, // pv, 每来一个数据进行累加
46 | clickLogWide.isMonthNew, // uv, 第一次来的时候只计数一次
47 | clickLogWide.isNew, // 当是 New 的时候进行累加
48 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
49 | ),
50 | ChannelArea( // 日维度
51 | clickLogWide.channelID,
52 | clickLogWide.address,
53 | clickLogWide.yearMonth,
54 | clickLogWide.count,
55 | clickLogWide.isDayNew,
56 | clickLogWide.isNew,
57 | isOld(clickLogWide.isNew, clickLogWide.isDayNew)
58 | ),
59 | ChannelArea( // 小时维度
60 | clickLogWide.channelID,
61 | clickLogWide.address,
62 | clickLogWide.yearMonth,
63 | clickLogWide.count,
64 | clickLogWide.isHourNew,
65 | clickLogWide.isNew,
66 | isOld(clickLogWide.isNew, clickLogWide.isHourNew)
67 | )
68 | )
69 | }
70 | }
71 | }
72 |
73 | // 2、 分组 根据 频道ID+地域+时间
74 | override def keyBy(mapDataStream: DataStream[ChannelArea]): KeyedStream[ChannelArea, String] = {
75 | mapDataStream.keyBy{
76 | area =>
77 | area.channelId + " : " + area.area + " : " + area.date
78 | }
79 | }
80 |
81 | // 3、 时间窗口, 这段代码每个子类都是一样的,可以写到父类中
82 | // override def timeWindow(keyedStream: KeyedStream[ChannelArea, String]): WindowedStream[ChannelArea, String, TimeWindow] = {}
83 |
84 |
85 | // 4、 聚合 累加4个字段
86 | override def reduce(windowedStream: WindowedStream[ChannelArea, String, TimeWindow]) = {
87 | windowedStream.reduce {
88 | (t1, t2) =>
89 | ChannelArea(t1.channelId, t1.area,
90 | t1.date,
91 | t1.pv + t2.pv,
92 | t1.uv + t2.uv,
93 | t1.newCount + t2.newCount,
94 | t1.oldCount + t2.oldCount)
95 | }
96 | }
97 |
98 |
99 | // 5、 落地HBase
100 | override def sink2HBase(reduceDataStream: DataStream[ChannelArea]): Unit = {
101 | reduceDataStream.addSink{
102 | area => {
103 | // HBase 相关字段
104 | val tableName = "channel_area"
105 | val clfName = "info"
106 | val rowkey = area.channelId + ":" + area.area + ":" + area.date
107 |
108 | val channelIdColumn = "channelId"
109 | val areaColumn = "area"
110 | val dateColumn = "date"
111 | val pvColumn = "pv"
112 | val uvColumn = "uv"
113 | val newCountColumn = "newCount"
114 | val oldCountColumn = "oldCount"
115 |
116 | // 查询 HBase
117 | val pvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,pvColumn)
118 | val uvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,uvColumn)
119 | val newCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,newCountColumn)
120 | val oldCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,oldCountColumn)
121 |
122 | // 累加
123 | var totalPv = 0L
124 | var totalUv = 0L
125 | var totalNewCount = 0L
126 | var totalOldCount = 0L
127 |
128 | // PV
129 | if(StringUtils.isNotBlank(pvInHbase)){
130 | totalPv = pvInHbase.toLong+area.pv
131 | }else{
132 | totalPv = area.pv
133 | }
134 |
135 | // UV
136 | if(StringUtils.isNotBlank(uvInHbase)){
137 | totalUv = uvInHbase.toLong+area.uv
138 | }else{
139 | totalUv = area.uv
140 | }
141 |
142 | // totalNewCount
143 | if(StringUtils.isNotBlank(newCountInHbase)){
144 | totalNewCount = newCountInHbase.toLong+area.newCount
145 | }else{
146 | totalNewCount = area.newCount
147 | }
148 |
149 | // totalOldCount
150 | if(StringUtils.isNotBlank(oldCountInHbase)){
151 | totalOldCount = oldCountInHbase.toLong+area.oldCount
152 | }else{
153 | totalOldCount = area.oldCount
154 | }
155 |
156 | // 保存数据
157 | HBaseUtil.putMapData(tableName,rowkey,clfName,Map(
158 | channelIdColumn->area.channelId,
159 | areaColumn->area.area,
160 | dateColumn->area.date,
161 | pvColumn->totalPv,
162 | uvColumn->totalUv,
163 | newCountColumn->totalNewCount,
164 | oldCountColumn->totalOldCount
165 | ))
166 |
167 | }
168 | }
169 | }
170 | }
171 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.commons.lang.StringUtils
6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
7 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
8 | import org.apache.flink.api.scala._
9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
10 | import org.apache.flink.streaming.api.windowing.time.Time
11 |
12 |
13 | /**
14 | * @Author: Henry
15 | * @Description:
16 | * @Date: Create in 2019/11/3 15:52
17 | **/
18 |
19 | // 2. 添加一个`ChannelBrowser`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商
20 | // (browser)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
21 | case class ChannelBrowser(
22 | var channelId: String,
23 | var browser: String,
24 | var date: String,
25 | var pv: Long,
26 | var uv: Long,
27 | var newCount: Long,
28 | var oldCount: Long
29 | )
30 |
31 |
32 | object ChannelBrowserTask extends BaseTask[ChannelBrowser]{
33 |
34 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelBrowser] = {
35 |
36 | clickLogWideDataStream.flatMap{
37 | clickLogWide => {
38 | List(
39 | ChannelBrowser( // 月维度
40 | clickLogWide.channelID,
41 | clickLogWide.browserType,
42 | clickLogWide.yearMonth,
43 | clickLogWide.count,
44 | clickLogWide.isMonthNew,
45 | clickLogWide.isNew,
46 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
47 | ),
48 | ChannelBrowser( // 天维度
49 | clickLogWide.channelID,
50 | clickLogWide.browserType,
51 | clickLogWide.yearMonthDay,
52 | clickLogWide.count,
53 | clickLogWide.isDayNew,
54 | clickLogWide.isNew,
55 | isOld(clickLogWide.isNew, clickLogWide.isDayNew)
56 | ),
57 | ChannelBrowser( // 小时维度
58 | clickLogWide.channelID,
59 | clickLogWide.browserType,
60 | clickLogWide.yearMonthDayHour,
61 | clickLogWide.count,
62 | clickLogWide.isHourNew,
63 | clickLogWide.isNew,
64 | isOld(clickLogWide.isNew, clickLogWide.isHourNew)
65 | )
66 | )
67 | }
68 | }
69 | }
70 |
71 | override def keyBy(mapDataStream: DataStream[ChannelBrowser]): KeyedStream[ChannelBrowser, String] = {
72 |
73 | mapDataStream.keyBy {
74 | browser =>
75 | browser.channelId +" : "+ browser.browser +" : "+ browser.date
76 | }
77 | }
78 |
79 | override def reduce(windowedStream: WindowedStream[ChannelBrowser, String, TimeWindow]): DataStream[ChannelBrowser] = {
80 | windowedStream.reduce {
81 | (t1, t2) => {
82 | ChannelBrowser(
83 | t1.channelId,
84 | t1.browser,
85 | t1.date,
86 | t1.pv + t2.pv,
87 | t1.uv + t2.uv,
88 | t1.newCount + t2.newCount,
89 | t1.oldCount + t2.oldCount
90 | )
91 | }
92 | }
93 | }
94 |
95 |
96 | override def sink2HBase(reduceDataStream: DataStream[ChannelBrowser]): Unit = {
97 |
98 | reduceDataStream.addSink(
99 | browser => {
100 |
101 | // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名
102 | // 不需要加 val 或者 var ,因为引用的是父类的变量
103 | tableName = "channel_browser"
104 | rowkey = s"${browser.channelId} : ${browser.date} : ${browser.browser}" // 引用变量的方式
105 | browserColName = "browser"
106 |
107 |
108 | // 查询 HBase
109 | // - 判断hbase中是否已经存在结果记录
110 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName,
111 | List( pvColName, uvColName, newCountColName, oldCountColName )
112 | )
113 |
114 | // 数据累加
115 | // 保存数据
116 | HBaseUtil.putMapData(
117 | tableName, rowkey, clfName, Map(
118 | channelIdColName -> browser.channelId,
119 | browserColName -> browser.browser,
120 | dateColName -> browser.date,
121 | pvColName -> getTotal(resultMap, pvColName , browser.pv),
122 | uvColName -> getTotal(resultMap, uvColName , browser.uv),
123 | newCountColName -> getTotal(resultMap, newCountColName , browser.newCount),
124 | oldCountColName -> getTotal(resultMap, oldCountColName , browser.newCount)
125 | )
126 | )
127 | }
128 | )
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.commons.lang.StringUtils
6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
7 | import org.apache.flink.api.scala._
8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 |
12 | /**
13 | * @Author: Henry
14 | * @Description:
15 | * @Date: Create in 2019/10/31 21:38
16 | **/
17 |
18 | case class ChannelFreshness(
19 | var channelId : String ,
20 | var date : String ,
21 | var newCount: Long ,
22 | val oldCount: Long
23 |
24 | )
25 |
26 | /**
27 | * 1、 转换
28 | * 2、 分组
29 | * 3、 时间窗口
30 | * 4、 聚合
31 | * 5、 落地 HBase
32 | */
33 | object ChannelFreshnessTask {
34 |
35 | def process(clickLogWideDataStream: DataStream[ClickLogWide])= {
36 |
37 | // 1、 转换
38 | val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap {
39 | clickLog =>
40 |
41 | // 如果是老用户,只有在第一次来的时候,计数为 1
42 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
43 | // 统计新用户、老用户数量
44 | List(
45 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)),
46 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)),
47 | ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew))
48 | )
49 | }
50 |
51 | // 2、 分组
52 | val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy {
53 | freshness => (freshness.channelId + freshness.date)
54 | }
55 |
56 |
57 | // 3、 时间窗口
58 | val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3))
59 |
60 |
61 | // 4、 聚合
62 | val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce {
63 | (t1, t2) =>
64 | ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount)
65 | }
66 |
67 | // 5、 落地 HBase
68 | reduceDataStream.addSink(new SinkFunction[ChannelFreshness] {
69 | override def invoke(value: ChannelFreshness): Unit = {
70 | // 创建 HBase 相关变量
71 | val tableName = "channel_freshness"
72 | val clfName = "info"
73 | val channelIdColumn = "channelId"
74 | val dateColumn = "date"
75 | val newCountColumn = "newCount"
76 | val oldCountColumn = "oldCount"
77 |
78 | val rowkey = value.channelId + ":" + value.date
79 |
80 | // 查询历史数据
81 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn))
82 |
83 | // 累加
84 | var totalNewCount = 0L
85 | var totalOldCount = 0L
86 |
87 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){
88 | resultMap(newCountColumn).toLong + value.newCount
89 | }
90 | else {
91 | totalNewCount = value.newCount
92 | }
93 |
94 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){
95 | resultMap(oldCountColumn).toLong + value.oldCount
96 | }
97 | else {
98 | totalOldCount = value.oldCount
99 | }
100 |
101 |
102 | // 保存数据
103 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
104 | // 向如下列插入数据
105 | channelIdColumn -> value.channelId ,
106 | dateColumn -> value.date ,
107 | newCountColumn -> totalNewCount ,
108 | oldCountColumn -> totalOldCount
109 | ))
110 |
111 | }
112 | })
113 | }
114 |
115 | }
116 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.commons.lang.StringUtils
6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
7 | import org.apache.flink.api.scala._
8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 |
12 | /**
13 | * @Author: Henry
14 | * @Description:
15 | * @Date: Create in 2019/10/31 21:38
16 | **/
17 |
18 | case class ChannelFreshness(
19 | var channelId : String ,
20 | var date : String ,
21 | var newCount: Long ,
22 | val oldCount: Long
23 |
24 | )
25 |
26 | /**
27 | * 1、 转换
28 | * 2、 分组
29 | * 3、 时间窗口
30 | * 4、 聚合
31 | * 5、 落地 HBase
32 | */
33 | object ChannelFreshnessTaskTrait extends BaseTask[ChannelFreshness] {
34 | /* Alt + Enter */
35 |
36 | // 1、 转换
37 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelFreshness] = {
38 |
39 | val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap {
40 | clickLog =>
41 |
42 | // 如果是老用户,只有在第一次来的时候,计数为 1
43 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
44 | // 统计新用户、老用户数量
45 | List(
46 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)),
47 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)),
48 | ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew))
49 | )
50 | }
51 | mapDataStream
52 | }
53 |
54 | // 2、 分组
55 | override def keyBy(mapDataStream: DataStream[ChannelFreshness]): KeyedStream[ChannelFreshness, String] = {
56 |
57 | // 或者:mapDataStream.keyBy {freshness => (freshness.channelId + freshness.date)
58 | val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy {
59 | freshness => (freshness.channelId + freshness.date)
60 | }
61 | keyedStream
62 |
63 | }
64 |
65 | // 3、 时间窗口
66 | override def timeWindow(keyedStream: KeyedStream[ChannelFreshness, String]): WindowedStream[ChannelFreshness, String, TimeWindow] = {
67 |
68 | val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3))
69 | windowedStream
70 |
71 | }
72 |
73 | // 4、 聚合
74 | override def reduce(windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow]): DataStream[ChannelFreshness] = {
75 |
76 | val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce {
77 | (t1, t2) =>
78 | ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount)
79 | }
80 | reduceDataStream
81 |
82 | }
83 |
84 | // 5、 落地 HBase
85 | override def sink2HBase(reduceDataStream: DataStream[ChannelFreshness]): Unit = {
86 |
87 | reduceDataStream.addSink(new SinkFunction[ChannelFreshness] {
88 | override def invoke(value: ChannelFreshness): Unit = {
89 | // 创建 HBase 相关变量
90 | val tableName = "channel_freshness"
91 | val clfName = "info"
92 | val channelIdColumn = "channelId"
93 | val dateColumn = "date"
94 | val newCountColumn = "newCount"
95 | val oldCountColumn = "oldCount"
96 |
97 | val rowkey = value.channelId + ":" + value.date
98 |
99 | // 查询历史数据
100 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn))
101 |
102 | // 累加
103 | var totalNewCount = 0L
104 | var totalOldCount = 0L
105 |
106 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){
107 | resultMap(newCountColumn).toLong + value.newCount
108 | }
109 | else {
110 | totalNewCount = value.newCount
111 | }
112 |
113 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){
114 | resultMap(oldCountColumn).toLong + value.oldCount
115 | }
116 | else {
117 | totalOldCount = value.oldCount
118 | }
119 |
120 | // 保存数据
121 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
122 | // 向如下列插入数据
123 | channelIdColumn -> value.channelId ,
124 | dateColumn -> value.date ,
125 | newCountColumn -> totalNewCount ,
126 | oldCountColumn -> totalOldCount
127 | ))
128 | }
129 | })
130 | }
131 |
132 | }
133 |
134 |
135 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 | import com.henry.realprocess.bean.ClickLogWide
3 | import com.henry.realprocess.util.HBaseUtil
4 | import org.apache.commons.lang.StringUtils
5 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
7 | import org.apache.flink.api.scala._
8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 |
11 |
12 | /**
13 | * @Author: Henry
14 | * @Description:
15 | * @Date: Create in 2019/11/3 15:52
16 | **/
17 |
18 | // 2. 添加一个`ChannelNetwork`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商
19 | // (network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
20 | case class ChannelNetWork(
21 | var channelId: String,
22 | var network: String,
23 | var date: String,
24 | var pv: Long,
25 | var uv: Long,
26 | var newCount: Long,
27 | var oldCount: Long
28 | )
29 |
30 |
31 | object ChannelNetworkTask extends BaseTask[ChannelNetWork]{
32 |
33 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelNetWork] = {
34 |
35 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0
36 |
37 | clickLogWideDataStream.flatMap{
38 | clickLogWide => {
39 | List(
40 | ChannelNetWork( // 月维度
41 | clickLogWide.channelID,
42 | clickLogWide.network,
43 | clickLogWide.yearMonth,
44 | clickLogWide.count,
45 | clickLogWide.isMonthNew,
46 | clickLogWide.isNew,
47 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew)
48 | ),
49 | ChannelNetWork( // 天维度
50 | clickLogWide.channelID,
51 | clickLogWide.network,
52 | clickLogWide.yearMonthDay,
53 | clickLogWide.count,
54 | clickLogWide.isDayNew,
55 | clickLogWide.isNew,
56 | isOld(clickLogWide.isNew, clickLogWide.isDayNew)
57 | ),
58 | ChannelNetWork( // 小时维度
59 | clickLogWide.channelID,
60 | clickLogWide.network,
61 | clickLogWide.yearMonthDayHour,
62 | clickLogWide.count,
63 | clickLogWide.isHourNew,
64 | clickLogWide.isNew,
65 | isOld(clickLogWide.isNew, clickLogWide.isHourNew)
66 | )
67 | )
68 | }
69 | }
70 | }
71 |
72 | override def keyBy(mapDataStream: DataStream[ChannelNetWork]): KeyedStream[ChannelNetWork, String] = {
73 |
74 | mapDataStream.keyBy {
75 | network =>
76 | network.channelId +" : "+ network.network +" : "+ network.date
77 | }
78 | }
79 |
80 | override def reduce(windowedStream: WindowedStream[ChannelNetWork, String, TimeWindow]): DataStream[ChannelNetWork] = {
81 | windowedStream.reduce {
82 | (t1, t2) => {
83 | ChannelNetWork(
84 | t1.channelId,
85 | t1.network,
86 | t1.date,
87 | t1.pv + t2.pv,
88 | t1.uv + t2.uv,
89 | t1.newCount + t2.newCount,
90 | t1.oldCount + t2.oldCount
91 | )
92 | }
93 | }
94 | }
95 |
96 |
97 | override def sink2HBase(reduceDataStream: DataStream[ChannelNetWork]): Unit = {
98 |
99 | reduceDataStream.addSink(
100 | network => {
101 | // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名
102 | val tableName = "channel_network"
103 | val clfName = "info"
104 | // 频道ID(channelID)、运营商(network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount)
105 | val rowkey = s"${network.channelId} : ${network.date} : ${network.network}" // 引用变量的方式
106 | val channelIdColName = "channelID"
107 | val networkColName = "network"
108 | val dateColName = "date"
109 | val pvColName = "pv"
110 | val uvColName = "uv"
111 | val newCountColName = "newCount"
112 | val oldCountColName = "oldCount"
113 |
114 | // 查询 HBase
115 | // - 判断hbase中是否已经存在结果记录
116 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName,
117 | List( pvColName, uvColName, newCountColName, oldCountColName )
118 | )
119 |
120 | // 数据累加
121 | var totalPv = 0L
122 | var totalUv = 0L
123 | var totalNewCount = 0L
124 | var totalOldCount = 0L
125 |
126 | // totalPv
127 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(pvColName,""))) {
128 | totalPv = resultMap(pvColName).toLong + network.pv
129 | }
130 | else {
131 | totalPv = network.pv
132 | }
133 |
134 | // totalUv
135 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(uvColName,""))) {
136 | totalUv = resultMap(uvColName).toLong + network.uv
137 | }
138 | else {
139 | totalUv = network.uv
140 | }
141 |
142 | // totalNewCount
143 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(newCountColName,""))) {
144 | totalNewCount = resultMap(newCountColName).toLong + network.newCount
145 | }
146 | else {
147 | totalNewCount = network.newCount
148 | }
149 |
150 | // totalOldCount
151 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColName,""))) {
152 | totalOldCount = resultMap(oldCountColName).toLong + network.oldCount
153 | }
154 | else {
155 | totalOldCount = network.oldCount
156 | }
157 |
158 | // 保存数据
159 | HBaseUtil.putMapData(
160 | tableName, rowkey, clfName, Map(
161 | channelIdColName -> network.channelId,
162 | networkColName -> network.network,
163 | dateColName -> network.date,
164 | pvColName -> totalPv,
165 | uvColName -> totalUv,
166 | newCountColName -> totalNewCount,
167 | oldCountColName -> totalOldCount
168 | )
169 | )
170 | }
171 | )
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream}
6 | import org.apache.flink.api.scala._
7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
8 | import org.apache.flink.streaming.api.scala.KeyedStream
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 | import org.apache.commons.lang.StringUtils
12 |
13 | /**
14 | * @Author: Henry
15 | * @Description: 渠道 PV/UV
16 | * 1、字段转换;
17 | * 2、分组;
18 | * 3、时间窗口;
19 | * 4、聚合;
20 | * 5、落地HBase
21 | * @Date: Create in 2019/10/30 20:15
22 | **/
23 |
24 | case class ChannelPvUv(
25 | val channelId: String,
26 | val yearDayMonthHour: String,
27 | val pv: Long,
28 | val uv: Long
29 | )
30 |
31 | object ChannelPvUvTask {
32 |
33 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
34 |
35 | // 1、转换
36 | val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.map{
37 | clickLogWide => {
38 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour,
39 | clickLogWide.count, clickLogWide.isHourNew)
40 | }
41 | }
42 |
43 | // 2、分组
44 | val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{
45 | channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour
46 | }
47 |
48 | // 3、窗口
49 | val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] =
50 | keyedStream.timeWindow(Time.seconds(3))
51 |
52 |
53 | // 4、聚合
54 | val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{
55 | (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv)
56 | }
57 |
58 | // 5、HBase 落地
59 | reduceDataStream.addSink(new SinkFunction[ChannelPvUv] {
60 |
61 | override def invoke(value: ChannelPvUv): Unit = {
62 |
63 | // HBase 相关字段
64 | val tableName = "channel_pvuv"
65 | val clfName = "info"
66 | val channelIdColumn = "channelId"
67 | val yearMonthDayHourColumn = "yearMonthDayHour"
68 | val pvColumn = "pv"
69 | val uvColumn = "uv"
70 |
71 | val rowkey = value.channelId + ":" + value.yearDayMonthHour
72 |
73 | // 查询 HBase ,并且获取相关记录
74 | val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn)
75 | val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn)
76 |
77 | var totalPv = 0L
78 | var totalUv = 0L
79 |
80 | // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加
81 | if(StringUtils.isBlank(pvInHBase)){
82 | totalPv = value.pv
83 | }
84 | else {
85 | totalPv = pvInHBase.toLong + value.pv
86 | }
87 |
88 | // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加
89 | if(StringUtils.isBlank(uvInHBase)){
90 | totalUv = value.uv
91 | }
92 | else {
93 | totalUv = uvInHBase.toLong + value.uv
94 | }
95 |
96 | // 保存数据
97 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
98 |
99 | channelIdColumn -> value.channelId ,
100 | yearMonthDayHourColumn -> value.yearDayMonthHour ,
101 | pvColumn -> value.pv.toString ,
102 | uvColumn -> value.uv.toString
103 | ))
104 |
105 | }
106 | })
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.flink.api.scala._
6 | import org.apache.commons.lang.StringUtils
7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
8 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream}
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 |
12 | /**
13 | * @Author: Henry
14 | * @Description:
15 | * @Date: Create in 2019/10/30 22:42
16 | **/
17 |
18 | case class ChannelPvUv(
19 | val channelId: String,
20 | val yearDayMonthHour: String,
21 | val pv: Long,
22 | val uv: Long
23 | )
24 |
25 | object ChannelPvUvTaskMerge {
26 |
27 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
28 |
29 | // 1、转换
30 | val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.flatMap{
31 | clickLogWide => {
32 | List(
33 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour, clickLogWide.count, clickLogWide.isHourNew) ,
34 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDay, clickLogWide.count, clickLogWide.isDayNew) ,
35 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonth, clickLogWide.count, clickLogWide.isMonthNew)
36 | )
37 | }
38 | }
39 |
40 | // 2、分组
41 | val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{
42 | channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour
43 | }
44 |
45 | // 3、窗口
46 | val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] =
47 | keyedStream.timeWindow(Time.seconds(3))
48 |
49 |
50 | // 4、聚合
51 | val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{
52 | (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv)
53 | }
54 |
55 | // 5、HBase 落地
56 | reduceDataStream.addSink(new SinkFunction[ChannelPvUv] {
57 |
58 | override def invoke(value: ChannelPvUv): Unit = {
59 |
60 | // HBase 相关字段
61 | val tableName = "channel_pvuv"
62 | val clfName = "info"
63 | val channelIdColumn = "channelId"
64 | val yearMonthDayHourColumn = "yearMonthDayHour"
65 | val pvColumn = "pv"
66 | val uvColumn = "uv"
67 |
68 | val rowkey = value.channelId + ":" + value.yearDayMonthHour
69 |
70 | // 查询 HBase ,并且获取相关记录
71 | val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn)
72 | val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn)
73 |
74 | var totalPv = 0L
75 | var totalUv = 0L
76 |
77 | // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加
78 | if(StringUtils.isBlank(pvInHBase)){
79 | totalPv = value.pv
80 | }
81 | else {
82 | totalPv = pvInHBase.toLong + value.pv
83 | }
84 |
85 | // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加
86 | if(StringUtils.isBlank(uvInHBase)){
87 | totalUv = value.uv
88 | }
89 | else {
90 | totalUv = uvInHBase.toLong + value.uv
91 | }
92 |
93 | // 保存数据
94 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
95 |
96 | channelIdColumn -> value.channelId ,
97 | yearMonthDayHourColumn -> value.yearDayMonthHour ,
98 | pvColumn -> value.pv.toString ,
99 | uvColumn -> value.uv.toString
100 | ))
101 |
102 | }
103 | })
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.ClickLogWide
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream}
6 | import org.apache.flink.api.scala._
7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
8 | import org.apache.flink.streaming.api.scala.KeyedStream
9 | import org.apache.flink.streaming.api.windowing.time.Time
10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow
11 | import org.apache.commons.lang.StringUtils
12 |
13 |
14 | /**
15 | * @Author: Henry
16 | * @Description: 频道热点分析业务开发
17 | * 1、字段转换;
18 | * 2、分组;
19 | * 3、时间窗口;
20 | * 4、聚合;
21 | * 5、落地HBase
22 | * @Date: Create in 2019/10/29 20:22
23 | **/
24 |
25 | case class ChannelRealHot(var channelid:String, var visited:Long)
26 |
27 |
28 | object ChannelRealHotTask {
29 |
30 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= {
31 |
32 | // 1、字段转换 channelid、visited
33 | val realHotDataStream: DataStream[ChannelRealHot] = clickLogWideDateStream.map{
34 | clickLogWide: ClickLogWide =>
35 | ChannelRealHot(clickLogWide.channelID, clickLogWide.count)
36 | }
37 |
38 | // 2、分组
39 | val keyedStream: KeyedStream[ChannelRealHot, String] = realHotDataStream.keyBy(_.channelid)
40 |
41 |
42 | // 3、时间窗口
43 | val windowedStream: WindowedStream[ChannelRealHot, String, TimeWindow] = keyedStream.timeWindow(
44 | Time.seconds(3))
45 |
46 | // 4、聚合
47 | val reduceDataStream: DataStream[ChannelRealHot] = windowedStream.reduce{
48 | (t1: ChannelRealHot, t2: ChannelRealHot) =>
49 | ChannelRealHot(t1.channelid, t1.visited + t2.visited)
50 | }
51 | // 输出测试
52 | reduceDataStream
53 |
54 | // 5、落地 HBase
55 | reduceDataStream.addSink(new SinkFunction[ChannelRealHot] {
56 |
57 | override def invoke(value: ChannelRealHot): Unit = {
58 |
59 | // HBase 相关字段
60 | val tableName = "channel"
61 | val clfName = "info"
62 | val channelIdColumn = "channelId"
63 | val visitedColumn = "visited"
64 | val rowkey = value.channelid
65 |
66 |
67 | // 查询 HBase ,并且获取相关记录
68 | val visitedValue: String = HBaseUtil.getData(tableName, rowkey, clfName, visitedColumn)
69 | // 创建总数的临时变量
70 | var totalCount: Long = 0
71 |
72 | if(StringUtils.isBlank(visitedValue)){
73 | totalCount = value.visited
74 | }
75 | else {
76 | totalCount = visitedValue.toLong + value.visited
77 | }
78 |
79 | // 保存数据
80 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
81 | channelIdColumn -> value.channelid ,
82 | visitedColumn -> totalCount.toString
83 | ))
84 | }
85 | })
86 | }
87 |
88 | }
89 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.task
2 |
3 | import com.henry.realprocess.bean.{ClickLogWide, Message}
4 | import com.henry.realprocess.util.HBaseUtil
5 | import org.apache.commons.lang.StringUtils
6 | import org.apache.commons.lang.time.FastDateFormat
7 | import org.apache.flink.streaming.api.scala.DataStream
8 | import org.apache.flink.api.scala._
9 |
10 | /**
11 | * @Author: Henry
12 | * @Description: 预处理任务
13 | * @Date: Create in 2019/10/27 14:31
14 | **/
15 | object PreprocessTask {
16 |
17 |
18 | def process(watermarkDataStream:DataStream[Message])= {
19 |
20 | /**
21 | * 大括号{}用于代码块,计算结果是代码最后一行;
22 | * 大括号{}用于拥有代码块的函数;
23 | * 大括号{}在只有一行代码时可以省略,除了case语句(Scala适用);
24 | * 小括号()在函数只有一个参数时可以省略(Scala适用);
25 | * 几乎没有二者都省略的情况。
26 | */
27 | watermarkDataStream.map {
28 |
29 | msg =>
30 | // 转换时间
31 | val yearMonth: String = FastDateFormat.getInstance("yyyyMM").format(msg.timeStamp)
32 | val yearMonthDay: String = FastDateFormat.getInstance("yyyyMMdd").format(msg.timeStamp)
33 | val yearMonthDayHour: String = FastDateFormat.getInstance("yyyyMMddHH").format(msg.timeStamp)
34 |
35 | // 转换地区
36 | val address = msg.clickLog.country + msg.clickLog.province + msg.clickLog.city
37 |
38 | val isNewtuple = isNewProcess(msg)
39 |
40 | ClickLogWide(
41 | msg.clickLog.channelID,
42 | msg.clickLog.categoryID,
43 | msg.clickLog.produceID,
44 | msg.clickLog.country,
45 | msg.clickLog.province,
46 | msg.clickLog.city,
47 | msg.clickLog.network,
48 | msg.clickLog.source,
49 | msg.clickLog.browserType,
50 | msg.clickLog.entryTime,
51 | msg.clickLog.leaveTime,
52 | msg.clickLog.userID,
53 | msg.count,
54 | msg.timeStamp,
55 | address,
56 | yearMonth,
57 | yearMonthDay,
58 | yearMonthDayHour,
59 | isNewtuple._1,
60 | isNewtuple._2,
61 | isNewtuple._3,
62 | isNewtuple._4
63 | )
64 | }
65 |
66 | }
67 |
68 | /**
69 | * 判断用户是否为新用户
70 | * @param msg
71 | */
72 | private def isNewProcess(msg:Message)={
73 |
74 | // 1、定义4个变量,初始化为0
75 | var isNew = 0
76 | var isHourNew = 0
77 | var isDayNew = 0
78 | var isMonthNew = 0
79 |
80 |
81 | // 2、从HBase中查询用户记录,如果有记录,再去判断其他时间;如果没有记录,则证明是新用户
82 | val tableName = "user_history"
83 | var clfName = "info"
84 | var rowkey = msg.clickLog.userID + ":" + msg.clickLog.channelID
85 |
86 | // - 用户ID(userID)
87 | var userIdColumn = "userid"
88 | // - 频道ID(channelid)
89 | var channelidColumn = "channelid"
90 | // - 最后访问时间(时间戳)(lastVisitedTime)
91 | var lastVisitedTimeColumn = "lastVisitedTime"
92 |
93 |
94 | var userId: String = HBaseUtil.getData(tableName, rowkey, clfName, userIdColumn)
95 | var channelid: String = HBaseUtil.getData(tableName, rowkey, clfName, channelidColumn)
96 | var lastVisitedTime: String = HBaseUtil.getData(tableName, rowkey, clfName, lastVisitedTimeColumn)
97 |
98 |
99 | // 如果 userid 为空,则该用户一定是新用户
100 | if(StringUtils.isBlank(userId)){
101 | isNew = 1
102 | isHourNew = 1
103 | isDayNew = 1
104 | isMonthNew = 1
105 |
106 | // 保存用户的访问记录到 "user_history"
107 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map(
108 | userIdColumn -> msg.clickLog.userID ,
109 | channelidColumn -> msg.clickLog.channelID ,
110 | lastVisitedTimeColumn -> msg.timeStamp
111 | ))
112 | }
113 | else{
114 | isNew = 0
115 | // 其它字段需要进行时间戳的比对
116 | isHourNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMddHH")
117 | isDayNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMdd")
118 | isMonthNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMM")
119 |
120 | // 更新 "user_history" 用户的时间戳
121 | HBaseUtil.putData(tableName, rowkey, clfName, lastVisitedTimeColumn , msg.timeStamp.toString)
122 |
123 | }
124 |
125 | (isDayNew, isHourNew, isDayNew, isMonthNew)
126 | }
127 |
128 |
129 | /**
130 | * 比对时间: 201912 > 201911
131 | * @param currentTime 当前时间
132 | * @param historyTime 历史时间
133 | * @param format 时间格式: yyyyMM yyyyMMdd
134 | * @return 1 或者 0
135 | */
136 | def compareDate(currentTime:Long, historyTime:Long, format:String):Int={
137 |
138 | val currentTimeStr:String = timestamp2Str(currentTime, format)
139 | val historyTimeStr:String = timestamp2Str(historyTime, format)
140 |
141 | // 比对字符串大小,如果当前时间 > 历史时间,返回1
142 | var result:Int = currentTimeStr.compareTo(historyTimeStr)
143 |
144 | if(result > 0){
145 | result = 1
146 | }
147 | else {
148 | result = 0
149 | }
150 | result
151 | }
152 |
153 | /**
154 | * 转换日期
155 | * @param timestamp Long 类型时间戳
156 | * @param format 日期格式
157 | * @return
158 | */
159 | def timestamp2Str(timestamp:Long, format:String):String={
160 | FastDateFormat.getInstance("yyyyMM").format(timestamp)
161 | }
162 |
163 |
164 |
165 | }
166 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.util
2 |
3 | import com.typesafe.config.{Config, ConfigFactory}
4 |
5 | /**
6 | * @Author: Henry
7 | * @Description: 配置文件加载类
8 | * @Date: Create in 2019/10/15 23:42
9 | **/
10 | object GlobalConfigutil {
11 |
12 | // 通过工厂加载配置, config 会自动加载 application.conf 文件,文件名不能变
13 | val config:Config = ConfigFactory.load()
14 |
15 | val bootstrapServers = config.getString("bootstrap.servers")
16 | val zookeeperConnect = config.getString("zookeeper.connect")
17 | val inputTopic = config.getString("input.topic")
18 | val gruopId = config.getString("gruop.id")
19 | val enableAutoCommit = config.getString("enable.auto.commit")
20 | val autoCommitIntervalMs = config.getString("auto.commit.interval.ms")
21 | val autoOffsetReset = config.getString("auto.offset.reset")
22 |
23 | def main(args: Array[String]): Unit = {
24 | // 选择快捷键,alt,鼠标左键拉倒最后一行,然后按 ctrl+shift 键,再按 →
25 | println(bootstrapServers)
26 | println(zookeeperConnect)
27 | println(inputTopic)
28 | println(gruopId)
29 | println(enableAutoCommit)
30 | println(autoCommitIntervalMs)
31 | println(autoOffsetReset)
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala:
--------------------------------------------------------------------------------
1 | package com.henry.realprocess.util
2 |
3 | import org.apache.hadoop.conf.Configuration
4 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
5 | import org.apache.hadoop.hbase.client.{ColumnFamilyDescriptor, _}
6 | import org.apache.hadoop.hbase.util.Bytes
7 |
8 | /**
9 | * @Author: Henry
10 | * @Description: HBase 工具类
11 | * 1、获取Table对象
12 | * 2、保存单列数据
13 | * 3、查询单列数据
14 | * 4、保存多列数据
15 | * 5、查询多列数据
16 | * 6、删除数据
17 | * @Date: Create in 2019/10/21 22:53
18 | **/
19 | object HBaseUtil {
20 |
21 | // HBase 配置类,不需要指定配置文件名,文件名要求是 hbase-site.xml
22 | val conf:Configuration = HBaseConfiguration.create()
23 |
24 | // HBase 的连接
25 | val conn:Connection = ConnectionFactory.createConnection(conf)
26 |
27 | // HBase 的操作 API
28 | val admin:Admin = conn.getAdmin
29 |
30 | /**
31 | * 返回Table,如果不存在,则创建表
32 | *
33 | * @param tableName 表名
34 | * @param columnFamilyName 列族名
35 | * @return
36 | */
37 | def getTable(tableNameStr:String, columnFamilyName:String):Table={
38 |
39 |
40 | // 获取 TableName
41 | val tableName:TableName = TableName.valueOf(tableNameStr)
42 |
43 | // 如果表不存在,则创建表
44 |
45 | if(!admin.tableExists(tableName)){
46 |
47 | // 构建出表的描述的建造者
48 | val descBuilder: TableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName)
49 |
50 | val familyDescriptor:ColumnFamilyDescriptor = ColumnFamilyDescriptorBuilder
51 | .newBuilder(columnFamilyName.getBytes).build()
52 |
53 | // 给表添加列族
54 | descBuilder.setColumnFamily(familyDescriptor)
55 |
56 | // 创建表
57 | admin.createTable(descBuilder.build())
58 | }
59 |
60 | conn.getTable(tableName)
61 |
62 | }
63 |
64 | /**
65 | * 存储单列数据
66 | *
67 | * @param tableNameStr 表名
68 | * @param rowkey 主键
69 | * @param columnFamilyName 列族名
70 | * @param columnName 列名
71 | * @param columnValue 列值
72 | */
73 | def putData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String, columnValue:String)={
74 |
75 | // 获取表
76 | val table:Table = getTable(tableNameStr, columnFamilyName)
77 |
78 | try{
79 | // Put
80 | val put:Put = new Put(rowkey.getBytes)
81 | put.addColumn(columnFamilyName.getBytes, columnName.getBytes, columnValue.getBytes)
82 |
83 | // 保存数据
84 | table.put(put)
85 | }catch {
86 | case ex:Exception=>{
87 | ex.printStackTrace()
88 | }
89 | }finally {
90 | table.close()
91 | }
92 | }
93 |
94 |
95 | /**
96 | * 通过单列名获取列值
97 | * @param tableNameStr 表名
98 | * @param rowkey 主键
99 | * @param columnFamilyName 列族名
100 | * @param columnName 列名
101 | * @param columnValue 列值
102 | * @return
103 | */
104 | def getData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String):String={
105 |
106 | // 1. 获取 Table 对象
107 | val table = getTable(tableNameStr, columnFamilyName)
108 |
109 | try {
110 | // 2. 构建 get 对象
111 | val get = new Get(rowkey.getBytes)
112 |
113 | // 3. 进行查询
114 | val result:Result = table.get(get)
115 |
116 | // 4. 判断查询结果是否为空,并且包含要查询的列
117 | if (result != null && result.containsColumn(columnFamilyName.getBytes, columnName.getBytes)){
118 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), columnName.getBytes)
119 |
120 | Bytes.toString(bytes)
121 | }else{
122 | ""
123 | }
124 |
125 | }catch{
126 | case ex:Exception => {
127 | ex.printStackTrace()
128 | ""
129 | }
130 | }finally {
131 | // 5、关闭表
132 | table.close()
133 | }
134 |
135 | }
136 |
137 |
138 | /**
139 | * 存储多列数据
140 | * @param tableNameStr 表名
141 | * @param rowkey 主键
142 | * @param columnFamilyName 列族名
143 | * @param map 多个列名和列族集合
144 | */
145 | def putMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, map:Map[String,Any])={
146 |
147 | // 1、获取 table 对象
148 | val table = getTable(tableNameStr, columnFamilyName)
149 |
150 | try{
151 | // 2、创建 put
152 | val put = new Put(rowkey.getBytes)
153 |
154 | // 3、在 put 中添加多个列名和列值
155 | for ((colName, colValue) <- map){
156 | put.addColumn(columnFamilyName.getBytes, colName.getBytes, colValue.toString.getBytes)
157 | }
158 |
159 | // 4、保存 put
160 | table.put(put)
161 |
162 | }catch{
163 | case ex:Exception => {
164 | ex.printStackTrace()
165 |
166 | }
167 | }finally {
168 | // 5、关闭表
169 | table.close()
170 | }
171 |
172 |
173 | // 5、关闭 table
174 | table.close()
175 | }
176 |
177 |
178 | /**
179 | * 获取多了数据的值
180 | * @param tableNameStr 表名
181 | * @param rowkey 主键
182 | * @param columnFamilyName 列族名
183 | * @param columnNameList 多个列名和列值集合
184 | * @return
185 | */
186 | def getMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnNameList:List[String]):Map[String,String]= {
187 |
188 | // 1、获取 Table
189 | val table = getTable(tableNameStr, columnFamilyName)
190 |
191 | try{
192 | // 2、构建 get
193 | val get = new Get(rowkey.getBytes)
194 |
195 | // 3、执行查询
196 | val result: Result = table.get(get)
197 |
198 | // 4、遍历列名集合,取出列值,构建成 Map 返回
199 | columnNameList.map {
200 | col =>
201 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), col.getBytes)
202 |
203 | if (bytes != null && bytes.size > 0) {
204 | col -> Bytes.toString(bytes)
205 | }
206 | else { // 如果取不到值,则赋一个空串
207 | "" -> ""
208 | }
209 | }.filter(_._1 != "").toMap // 把不是空串的过滤出来,再转换成 Map
210 |
211 | }catch {
212 | case ex:Exception => {
213 | ex.printStackTrace()
214 | Map[String, String]() // 返回一个空的 Map
215 | }
216 | }finally {
217 | // 5、关闭 Table
218 | table.close()
219 | }
220 | }
221 |
222 |
223 | /**
224 | * 删除数据
225 | * @param tableNameStr 表名
226 | * @param rowkey 主键
227 | * @param columnFamilyName 列族名
228 | */
229 | def delete(tableNameStr:String, rowkey:String, columnFamilyName:String)={
230 |
231 | // 1、获取 Table
232 | val table:Table = getTable(tableNameStr, columnFamilyName)
233 |
234 | try {
235 | // 2、构建 delete 对象
236 | val delete: Delete = new Delete(rowkey.getBytes)
237 |
238 | // 3、执行删除
239 | table.delete(delete)
240 |
241 | }
242 | catch {
243 | case ex:Exception =>
244 | ex.printStackTrace()
245 | }
246 | finally {
247 | // 4、关闭 table
248 | table.close()
249 | }
250 |
251 | }
252 |
253 |
254 | def main(args: Array[String]): Unit = {
255 |
256 | // println(getTable("test","info"))
257 | // putData("test", "1", "info", "t1", "hello world")
258 | // println(getData("test", "1", "info", "t1"))
259 |
260 | // val map = Map(
261 | // "t2" -> "scala" ,
262 | // "t3" -> "hive" ,
263 | // "t4" -> "flink"
264 | // )
265 | // putMapData("test", "1", "info", map)
266 |
267 | // println(getMapData("test", "1", "info", List("t1", "t2")))
268 |
269 | delete("test", "1", "info")
270 | println(getMapData("test", "1", "info", List("t1", "t2")))
271 |
272 | }
273 |
274 | }
275 |
--------------------------------------------------------------------------------
/real-process/src/test/temp.txt:
--------------------------------------------------------------------------------
1 | val bootstrap.servers = config.getString("bootstrap.servers")
2 | val zookeeper.connect = config.getString("zookeeper.connect")
3 | val input.topic = config.getString("input.topic")
4 | val gruop.id = config.getString("gruop.id")
5 | val enable.auto.commit = config.getString("enable.auto.commit")
6 | val auto.commit.interval.ms = config.getString("auto.commit.interval.ms")
7 | val auto.offset.reset = config.getString("auto.offset.reset")
--------------------------------------------------------------------------------
/report/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | 4.0.0
7 | com.henry
8 | report
9 | 1.0-SNAPSHOT
10 | jar
11 | report
12 | Spring Boot,上报服务
13 |
14 |
15 |
16 | org.springframework.boot
17 | spring-boot-starter-parent
18 | 1.5.13.RELEASE
19 |
20 |
21 |
22 |
23 | UTF-8
24 | UTF-8
25 | 1.8
26 | Greenwich.M3
27 |
28 |
29 |
30 |
31 |
32 |
33 | alimaven
34 | alimaven
35 | http://maven.aliyun.com/nexus/content/groups/public/
36 |
37 |
38 |
39 |
40 |
41 | org.springframework.boot
42 | spring-boot-starter
43 | 1.5.13.RELEASE
44 |
45 |
46 | org.springframework.boot
47 | spring-boot-starter-test
48 | 1.5.13.RELEASE
49 |
50 |
51 |
52 | org.springframework.boot
53 | spring-boot-starter-web
54 | 2.5.12
55 |
56 |
57 |
58 | org.springframework.boot
59 | spring-boot-starter-tomcat
60 | 1.5.13.RELEASE
61 |
62 |
63 |
64 | org.apache.tomcat
65 | tomcat-catalina
66 | 8.5.86
67 |
68 |
69 |
70 | com.alibaba
71 | fastjson
72 | 1.2.83
73 |
74 |
75 |
76 | org.springframework.kafka
77 | spring-kafka
78 | 1.0.6.RELEASE
79 |
80 |
81 |
82 |
83 | org.apache.httpcomponents
84 | httpclient
85 | 4.5.13
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 | org.springframework.boot
95 | spring-boot-maven-plugin
96 |
97 |
98 |
99 |
100 |
101 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/ReportApplication.java:
--------------------------------------------------------------------------------
1 | package com.henry.report;
2 |
3 | import org.springframework.boot.autoconfigure.SpringBootApplication;
4 | import org.springframework.boot.SpringApplication;
5 | /**
6 | * @Author: HongZhen
7 | * @Description:
8 | * @Date: Create in 2019/9/20 11:10
9 | **/
10 |
11 | // 添加注解 @SpringBootApplication ,表示该类是一个启动类
12 | @SpringBootApplication
13 | public class ReportApplication {
14 |
15 | public static void main(String[] args) {
16 | SpringApplication.run(ReportApplication.class, args);
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/bean/Clicklog.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.bean;
2 |
3 | /**
4 | * @Author: Henry
5 | * @Description: 点击流日志
6 | * @Date: Create in 2019/10/13 19:33
7 | **/
8 |
9 | public class Clicklog {
10 |
11 | // 频道 ID
12 | private long channelID;
13 | // 产品的类别 ID
14 | private long categoryID ;
15 | // 产品 ID
16 | private long produceID ;
17 | // 用户 ID
18 | private long userID ;
19 |
20 | // 国家
21 | private String country;
22 | // 省份
23 | private String province;
24 | // 城市
25 | private String city;
26 |
27 | // 网络方式
28 | private String network;
29 | // 来源方式
30 | private String source;
31 |
32 | // 浏览器类型
33 | private String browserType;
34 |
35 | // 进入网站时间
36 | private Long entryTime ;
37 | // 离开网站实际
38 | private long leaveTime;
39 |
40 | public long getChannelID() {
41 | return channelID;
42 | }
43 |
44 | public void setChannelID(long channelID) {
45 | this.channelID = channelID;
46 | }
47 |
48 | public long getCategoryID() {
49 | return categoryID;
50 | }
51 |
52 | public void setCategoryID(long categoryID) {
53 | this.categoryID = categoryID;
54 | }
55 |
56 | public long getProduceID() {
57 | return produceID;
58 | }
59 |
60 | public void setProduceID(long produceID) {
61 | this.produceID = produceID;
62 | }
63 |
64 | public long getUserID() {
65 | return userID;
66 | }
67 |
68 | public void setUserID(long userID) {
69 | this.userID = userID;
70 | }
71 |
72 | public String getCountry() {
73 | return country;
74 | }
75 |
76 | public void setCountry(String country) {
77 | this.country = country;
78 | }
79 |
80 | public String getProvince() {
81 | return province;
82 | }
83 |
84 | public void setProvince(String province) {
85 | this.province = province;
86 | }
87 |
88 | public String getCity() {
89 | return city;
90 | }
91 |
92 | public void setCity(String city) {
93 | this.city = city;
94 | }
95 |
96 | public String getNetwork() {
97 | return network;
98 | }
99 |
100 | public void setNetwork(String network) {
101 | this.network = network;
102 | }
103 |
104 | public String getSource() {
105 | return source;
106 | }
107 |
108 | public void setSource(String source) {
109 | this.source = source;
110 | }
111 |
112 | public String getBrowserType() {
113 | return browserType;
114 | }
115 |
116 | public void setBrowserType(String browserType) {
117 | this.browserType = browserType;
118 | }
119 |
120 | public Long getEntryTime() {
121 | return entryTime;
122 | }
123 |
124 | public void setEntryTime(Long entryTime) {
125 | this.entryTime = entryTime;
126 | }
127 |
128 | public long getLeaveTime() {
129 | return leaveTime;
130 | }
131 |
132 | public void setLeaveTime(long leaveTime) {
133 | this.leaveTime = leaveTime;
134 | }
135 |
136 | }
137 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/bean/Message.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.bean;
2 |
3 | /**
4 | * @Author: Henry
5 | * @Description: 消息实体类
6 | * @Date: Create in 2019/10/11 23:40
7 | **/
8 | public class Message {
9 |
10 | // 消息次数
11 | private int count;
12 |
13 | // 消息的时间戳
14 | private long timestamp;
15 |
16 | // 消息体
17 | private String message;
18 |
19 | public int getCount() {
20 | return count;
21 | }
22 |
23 | public void setCount(int count) {
24 | this.count = count;
25 | }
26 |
27 | public long getTimestamp() {
28 | return timestamp;
29 | }
30 |
31 | public void setTimestamp(long timestamp) {
32 | this.timestamp = timestamp;
33 | }
34 |
35 | public String getMessage() {
36 | return message;
37 | }
38 |
39 | public void setMessage(String message) {
40 | this.message = message;
41 | }
42 |
43 | @Override
44 | public String toString() {
45 | return "Message{" +
46 | "count=" + count +
47 | ", timestamp=" + timestamp +
48 | ", message='" + message + '\'' +
49 | '}';
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/controller/ReportController.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.controller;
2 |
3 | import com.alibaba.fastjson.JSON;
4 | import com.henry.report.bean.Message;
5 | import org.springframework.beans.factory.annotation.Autowired;
6 | import org.springframework.kafka.core.KafkaTemplate;
7 | import org.springframework.web.bind.annotation.RequestBody;
8 | import org.springframework.web.bind.annotation.RequestMapping;
9 | import org.springframework.web.bind.annotation.RestController;
10 |
11 | import java.util.HashMap;
12 | import java.util.Map;
13 |
14 | /**
15 | * @Author: Henry
16 | * @Description:
17 | * @Date: Create in 2019/10/11 23:43
18 | **/
19 |
20 | // 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解
21 | @RestController
22 | public class ReportController {
23 |
24 | @Autowired
25 | KafkaTemplate kafkaTemplate;
26 |
27 | @RequestMapping("/receive")
28 | public Map receive(@RequestBody String json) {
29 |
30 | Map map = new HashMap(); // 记录是否发送成功
31 |
32 | try {
33 | // 构建 Message
34 | Message msg = new Message();
35 | msg.setMessage(json);
36 | msg.setCount(1);
37 | msg.setTimestamp(System.currentTimeMillis());
38 |
39 | String msgJSON = JSON.toJSONString(msg);
40 |
41 | // 发送 Message 到 Kafka
42 | kafkaTemplate.send("pyg", msgJSON);
43 | map.put("success", "ture");
44 |
45 | }catch (Exception ex){
46 | ex.printStackTrace();
47 | map.put("success", "false");
48 | }
49 |
50 | return map;
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/controller/TestController.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.controller;
2 |
3 | import org.springframework.web.bind.annotation.RequestMapping;
4 | import org.springframework.web.bind.annotation.RestController;
5 |
6 | /**
7 | * @Author: HongZhen
8 | * @Description: Spring Boot 测试
9 | * @Date: Create in 2019/9/20 11:19
10 | **/
11 |
12 | // 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解
13 | @RestController
14 | public class TestController{
15 |
16 | // 为了能访问到该方法,需要添加如下注解,参数是代表如何来请求
17 | @RequestMapping("/test")
18 | public String test(String json){
19 | System.out.println(json);
20 | return json;
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/util/ClickLogGenerator.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.util;
2 |
3 | import com.alibaba.fastjson.JSONObject;
4 | import com.henry.report.bean.Clicklog;
5 | import org.apache.http.HttpResponse;
6 | import org.apache.http.HttpStatus;
7 | import org.apache.http.client.methods.HttpPost;
8 | import org.apache.http.entity.StringEntity;
9 | import org.apache.http.impl.client.CloseableHttpClient;
10 | import org.apache.http.impl.client.HttpClientBuilder;
11 | import org.apache.http.util.EntityUtils;
12 |
13 | import java.text.DateFormat;
14 | import java.text.ParseException;
15 | import java.text.SimpleDateFormat;
16 | import java.util.ArrayList;
17 | import java.util.Date;
18 | import java.util.List;
19 | import java.util.Random;
20 |
21 | /**
22 | * @Author: Henry
23 | * @Description: 点击流日志模拟器
24 | * @Date: Create in 2019/10/13 20:00
25 | **/
26 | public class ClickLogGenerator {
27 |
28 | // ID 信息
29 | private static Long[] channelID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
30 | private static Long[] categoryID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
31 | private static Long[] produceID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
32 | private static Long[] userID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L};
33 |
34 | // 地区
35 | private static String[] contrys = new String[]{"china"}; // 地区-国家集合
36 | private static String[] provinces = new String[]{"HeNan", "HeBeijing"}; // 地区-省集合
37 | private static String[] citys = new String[]{"ShiJiaZhuang", "ZhengZhou", "LuoyYang"}; // 地区-市集合
38 |
39 | // 网络方式
40 | private static String[] networks = new String[]{"电信", "移动", "联通"};
41 |
42 | // 来源方式
43 | private static String[] sources = new String[]{"直接输入", "百度跳转", "360搜索跳转", "必应跳转"};
44 |
45 | // 浏览器
46 | private static String[] browser = new String[]{"火狐", "QQ浏览器", "360浏览器", "谷歌浏览器"};
47 |
48 | // 打开方式,离开时间
49 | private static List usertimeLog = producetimes();
50 |
51 | // 获取时间
52 | private static List producetimes() {
53 | List usertimelog = new ArrayList<>();
54 | for (int i = 0; i < 100; i++) {
55 | Long[] timearray = gettimes("2019-10-10 24:60:60:000");
56 | usertimelog.add(timearray);
57 | }
58 | return usertimelog;
59 | }
60 |
61 | private static Long[] gettimes(String time) {
62 | DateFormat dataFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss:SSS");
63 | try {
64 | Date date = dataFormat.parse(time);
65 | long timetemp = date.getTime();
66 | Random random = new Random();
67 | int randomint = random.nextInt(10);
68 | long starttime = timetemp - randomint*3600*1000;
69 | long endtime = starttime + randomint*3600*1000;
70 | return new Long[]{starttime,endtime};
71 | }catch (ParseException e){
72 | e.printStackTrace();
73 | }
74 | return new Long[]{0L, 0L};
75 | }
76 |
77 | // 模拟发送 Http 请求到上报服务系统
78 | public static void send(String url, String json){
79 | try {
80 | CloseableHttpClient httpClient = HttpClientBuilder.create().build();
81 | HttpPost post = new HttpPost(url);
82 | JSONObject response = null ;
83 | try {
84 | StringEntity s = new StringEntity(json.toString(), "utf-8");
85 | s.setContentEncoding("utf-8");
86 | // 发送 json 数据需要设置 contentType
87 | s.setContentType("application/json");
88 | post.setEntity(s);
89 |
90 | HttpResponse res = httpClient.execute(post);
91 | if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){
92 | // 返回 json 格式
93 | String result = EntityUtils.toString(res.getEntity());
94 | System.out.println(result);
95 | }
96 | }catch (Exception e){
97 | throw new RuntimeException();
98 |
99 | }
100 |
101 | }catch (Exception e){
102 | e.printStackTrace();
103 | }
104 | }
105 |
106 | public static void main(String[] args) {
107 | Random random = new Random();
108 | for (int i = 0; i < 100; i++) {
109 | // 频道id、类别id、产品id、用户id、打开时间、离开时间、地区、网络方式、来源方式、浏览器
110 | Clicklog clicklog = new Clicklog();
111 |
112 | clicklog.setChannelID(channelID[random.nextInt(channelID.length)]);
113 | clicklog.setCategoryID(categoryID[random.nextInt(channelID.length)]);
114 | clicklog.setProduceID(produceID[random.nextInt(produceID.length)]);
115 | clicklog.setUserID(userID[random.nextInt(userID.length)]);
116 | clicklog.setCountry(contrys[random.nextInt(contrys.length)]);
117 | clicklog.setProvince(provinces[random.nextInt(provinces.length)]);
118 | clicklog.setCity(citys[random.nextInt(citys.length)]);
119 | clicklog.setNetwork(networks[random.nextInt(networks.length)]);
120 | clicklog.setSource(sources[random.nextInt(sources.length)]);
121 | clicklog.setBrowserType(browser[random.nextInt(browser.length)]);
122 |
123 | Long[] times = usertimeLog.get(random.nextInt(usertimeLog.size()));
124 | clicklog.setEntryTime(times[0]);
125 | clicklog.setLeaveTime(times[1]);
126 |
127 | // 将点击流日志转成字符串,发送到前端地址
128 | String jsonstr = JSONObject.toJSONString(clicklog);
129 | System.out.println(jsonstr);
130 | try {
131 | Thread.sleep(100);
132 | }catch (InterruptedException e){
133 | e.printStackTrace();
134 | }
135 |
136 | send("http://localhost:1234/receive", jsonstr);
137 | }
138 | }
139 | }
140 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/util/KafkaProducerConfig.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.util;
2 |
3 | import org.apache.kafka.clients.producer.ProducerConfig;
4 | import org.apache.kafka.common.serialization.StringSerializer;
5 | import org.springframework.beans.factory.annotation.Value;
6 | import org.springframework.context.annotation.Bean;
7 | import org.springframework.context.annotation.Configuration;
8 | import org.springframework.kafka.core.DefaultKafkaProducerFactory;
9 | import org.springframework.kafka.core.KafkaTemplate;
10 | import org.springframework.kafka.core.ProducerFactory;
11 |
12 | import java.util.HashMap;
13 | import java.util.Map;
14 |
15 | /**
16 | * @Author: Henry
17 | * @Description: KafkaProducerConfig
18 | * @Date: Create in 2019/10/6 21:56
19 | **/
20 |
21 | @Configuration // 1、表示该类是一个配置类,这样在下面才能创建 Bean
22 | public class KafkaProducerConfig {
23 |
24 | // 通过@value注解将配置文件中kafka.bootstrap_servers_config的值赋值给成员变量
25 | @Value("${kafka.bootstrap_servers_config}")
26 | private String bootstrap_servers_config;
27 | // 如果出现发送失败的情况,允许重试的次数
28 | @Value("${kafka.retries_config}")
29 | private String retries_config;
30 | // 每个批次发送多大的数据,单位:字节
31 | @Value("${kafka.batch_size_config}")
32 | private String batch_size_config;
33 | // 定时发送,达到 1ms 发送
34 | @Value("${kafka.linger_ms_config}")
35 | private String linger_ms_config;
36 | // 缓存的大小,单位:字节
37 | @Value("${kafka.buffer_memory_config}")
38 | private String buffer_memory_config;
39 | // TOPOC 名字
40 | @Value("${kafka.topic}")
41 | private String topic;
42 |
43 |
44 | @Bean // 2、表示该对象是受 Spring 管理的一个 Bean
45 | public KafkaTemplate kafkaTemplate() {
46 |
47 | // 构建工程需要的配置
48 | Map configs = new HashMap<>();
49 |
50 | // 3、设置相应的配置
51 | // 将成员变量的值设置到Map中,在创建kafka_producer中用到
52 | configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers_config);
53 | configs.put(ProducerConfig.RETRIES_CONFIG, retries_config);
54 | configs.put(ProducerConfig.BATCH_SIZE_CONFIG, batch_size_config);
55 | configs.put(ProducerConfig.LINGER_MS_CONFIG, linger_ms_config);
56 | configs.put(ProducerConfig.BUFFER_MEMORY_CONFIG, buffer_memory_config);
57 |
58 | // 设置 key、value 的序列化器
59 | configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
60 | configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG , StringSerializer.class);
61 |
62 | // 指定自定义分区
63 | configs.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, RoundRobinPartitioner.class);
64 |
65 |
66 | // 4、创建生产者工厂
67 | ProducerFactory producerFactory = new DefaultKafkaProducerFactory(configs);
68 |
69 | // 5、再把工厂传递给Template构造方法
70 | // 表示需要返回一个 kafkaTemplate 对象
71 | return new KafkaTemplate(producerFactory);
72 | }
73 | }
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
--------------------------------------------------------------------------------
/report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java:
--------------------------------------------------------------------------------
1 | package com.henry.report.util;
2 |
3 | import org.apache.kafka.clients.producer.Partitioner;
4 | import org.apache.kafka.common.Cluster;
5 |
6 | import java.util.Map;
7 | import java.util.concurrent.atomic.AtomicInteger;
8 |
9 | /**
10 | * @Author: Henry
11 | * @Description: 自定义分区
12 | * @Date: Create in 2019/10/9 23:00
13 | **/
14 |
15 | public class RoundRobinPartitioner implements Partitioner {
16 |
17 | // AtomicInteger 并发包下的多线程安全的整型类
18 | AtomicInteger counter = new AtomicInteger(0) ;
19 |
20 |
21 | // 返回值为分区号: 0、1、2
22 | @Override
23 | public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
24 |
25 | // 获取分区的数量
26 | Integer partitions = cluster.partitionCountForTopic(topic) ;
27 |
28 | int curpartition = counter.incrementAndGet() % partitions ; // 当前轮询的 partition 号
29 |
30 | if(counter.get() > 65535){
31 | counter.set(0);
32 | }
33 |
34 | return curpartition;
35 | }
36 |
37 | @Override
38 | public void close() {
39 |
40 | }
41 |
42 | @Override
43 | public void configure(Map map) {
44 |
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/report/src/main/resources/application.properties:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/report/src/main/resources/application.properties
--------------------------------------------------------------------------------
/report/src/test/java/com/henry/report/KafkaTest.java:
--------------------------------------------------------------------------------
1 | package com.henry.report;
2 |
3 | import org.junit.Test;
4 | import org.junit.runner.RunWith;
5 | import org.springframework.beans.factory.annotation.Autowired;
6 | import org.springframework.boot.test.context.SpringBootTest;
7 | import org.springframework.kafka.core.KafkaTemplate;
8 | import org.springframework.test.context.junit4.SpringRunner;
9 |
10 | /**
11 | * @Author: Henry
12 | * @Description: 测试Kafka
13 | * @Date: Create in 2019/10/8 23:26
14 | **/
15 |
16 | @RunWith(SpringRunner.class)
17 | @SpringBootTest
18 | public class KafkaTest {
19 |
20 | @Autowired
21 | KafkaTemplate kafkaTemplate;
22 |
23 | @Test
24 | public void sendMsg(){
25 | for (int i = 0; i < 100; i++)
26 | kafkaTemplate.send("test", "key","this is test msg") ;
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/screenshot/036a079d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/036a079d.png
--------------------------------------------------------------------------------
/screenshot/03ef7ace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/03ef7ace.png
--------------------------------------------------------------------------------
/screenshot/04e25b5a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/04e25b5a.png
--------------------------------------------------------------------------------
/screenshot/07a78b77.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/07a78b77.png
--------------------------------------------------------------------------------
/screenshot/0b4d0c1b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0b4d0c1b.png
--------------------------------------------------------------------------------
/screenshot/0b4ea4e1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0b4ea4e1.png
--------------------------------------------------------------------------------
/screenshot/0bd763d1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0bd763d1.png
--------------------------------------------------------------------------------
/screenshot/0ced234a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0ced234a.png
--------------------------------------------------------------------------------
/screenshot/0e6080a2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0e6080a2.png
--------------------------------------------------------------------------------
/screenshot/0fcd02b7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0fcd02b7.png
--------------------------------------------------------------------------------
/screenshot/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1.png
--------------------------------------------------------------------------------
/screenshot/121bf948.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/121bf948.png
--------------------------------------------------------------------------------
/screenshot/12f712f9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/12f712f9.png
--------------------------------------------------------------------------------
/screenshot/13c61ea9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/13c61ea9.png
--------------------------------------------------------------------------------
/screenshot/14679e84.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/14679e84.png
--------------------------------------------------------------------------------
/screenshot/1a3addd7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1a3addd7.png
--------------------------------------------------------------------------------
/screenshot/1d504cce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1d504cce.png
--------------------------------------------------------------------------------
/screenshot/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2.png
--------------------------------------------------------------------------------
/screenshot/201507bb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/201507bb.png
--------------------------------------------------------------------------------
/screenshot/21733492.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/21733492.png
--------------------------------------------------------------------------------
/screenshot/2193cbd1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2193cbd1.png
--------------------------------------------------------------------------------
/screenshot/22cd7b3c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/22cd7b3c.png
--------------------------------------------------------------------------------
/screenshot/277372f9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/277372f9.png
--------------------------------------------------------------------------------
/screenshot/2b7f3937.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2b7f3937.png
--------------------------------------------------------------------------------
/screenshot/2c0ad8e2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2c0ad8e2.png
--------------------------------------------------------------------------------
/screenshot/2d11fecd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2d11fecd.png
--------------------------------------------------------------------------------
/screenshot/2f5a312e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2f5a312e.png
--------------------------------------------------------------------------------
/screenshot/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3.png
--------------------------------------------------------------------------------
/screenshot/3254e2ca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3254e2ca.png
--------------------------------------------------------------------------------
/screenshot/32a6daaf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/32a6daaf.png
--------------------------------------------------------------------------------
/screenshot/342dcc3e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/342dcc3e.png
--------------------------------------------------------------------------------
/screenshot/34a79ff7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/34a79ff7.png
--------------------------------------------------------------------------------
/screenshot/34f66a92.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/34f66a92.png
--------------------------------------------------------------------------------
/screenshot/3754f480.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3754f480.png
--------------------------------------------------------------------------------
/screenshot/3936fce5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3936fce5.png
--------------------------------------------------------------------------------
/screenshot/3ab50051.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3ab50051.png
--------------------------------------------------------------------------------
/screenshot/3b6d6d1f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3b6d6d1f.png
--------------------------------------------------------------------------------
/screenshot/3c8d398c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3c8d398c.png
--------------------------------------------------------------------------------
/screenshot/3d2cda96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3d2cda96.png
--------------------------------------------------------------------------------
/screenshot/3f08b9d0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3f08b9d0.png
--------------------------------------------------------------------------------
/screenshot/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4.png
--------------------------------------------------------------------------------
/screenshot/48cd018e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/48cd018e.png
--------------------------------------------------------------------------------
/screenshot/4b18ecbe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4b18ecbe.png
--------------------------------------------------------------------------------
/screenshot/4cf81224.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4cf81224.png
--------------------------------------------------------------------------------
/screenshot/520fd656.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/520fd656.png
--------------------------------------------------------------------------------
/screenshot/5326b634.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/5326b634.png
--------------------------------------------------------------------------------
/screenshot/54187145.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/54187145.png
--------------------------------------------------------------------------------
/screenshot/544d0e7a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/544d0e7a.png
--------------------------------------------------------------------------------
/screenshot/565c64ed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/565c64ed.png
--------------------------------------------------------------------------------
/screenshot/58926ce0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/58926ce0.png
--------------------------------------------------------------------------------
/screenshot/58945558.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/58945558.png
--------------------------------------------------------------------------------
/screenshot/5a321628.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/5a321628.png
--------------------------------------------------------------------------------
/screenshot/62c03232.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/62c03232.png
--------------------------------------------------------------------------------
/screenshot/64a0b856.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/64a0b856.png
--------------------------------------------------------------------------------
/screenshot/65e75e0f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/65e75e0f.png
--------------------------------------------------------------------------------
/screenshot/69907922.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/69907922.png
--------------------------------------------------------------------------------
/screenshot/6ac8e320.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6ac8e320.png
--------------------------------------------------------------------------------
/screenshot/6c04e485.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6c04e485.png
--------------------------------------------------------------------------------
/screenshot/6c99f78b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6c99f78b.png
--------------------------------------------------------------------------------
/screenshot/6f5af076.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6f5af076.png
--------------------------------------------------------------------------------
/screenshot/6f897038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6f897038.png
--------------------------------------------------------------------------------
/screenshot/6fcd4a44.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6fcd4a44.png
--------------------------------------------------------------------------------
/screenshot/70a923ce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/70a923ce.png
--------------------------------------------------------------------------------
/screenshot/72d64e76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/72d64e76.png
--------------------------------------------------------------------------------
/screenshot/74d009f4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/74d009f4.png
--------------------------------------------------------------------------------
/screenshot/75fcc253.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/75fcc253.png
--------------------------------------------------------------------------------
/screenshot/76c4fbf8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/76c4fbf8.png
--------------------------------------------------------------------------------
/screenshot/79c600b1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/79c600b1.png
--------------------------------------------------------------------------------
/screenshot/7b5e4836.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7b5e4836.png
--------------------------------------------------------------------------------
/screenshot/7cba404f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cba404f.png
--------------------------------------------------------------------------------
/screenshot/7cd00637.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cd00637.png
--------------------------------------------------------------------------------
/screenshot/7cf4425b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cf4425b.png
--------------------------------------------------------------------------------
/screenshot/7fe930e0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7fe930e0.png
--------------------------------------------------------------------------------
/screenshot/820fe570.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/820fe570.png
--------------------------------------------------------------------------------
/screenshot/831e1859.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/831e1859.png
--------------------------------------------------------------------------------
/screenshot/880c750d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/880c750d.png
--------------------------------------------------------------------------------
/screenshot/8c5fa195.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8c5fa195.png
--------------------------------------------------------------------------------
/screenshot/8cca6196.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8cca6196.png
--------------------------------------------------------------------------------
/screenshot/8f89e666.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8f89e666.png
--------------------------------------------------------------------------------
/screenshot/8fe964b8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8fe964b8.png
--------------------------------------------------------------------------------
/screenshot/908989c5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/908989c5.png
--------------------------------------------------------------------------------
/screenshot/9379b632.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9379b632.png
--------------------------------------------------------------------------------
/screenshot/946fe86f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/946fe86f.png
--------------------------------------------------------------------------------
/screenshot/9897be78.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9897be78.png
--------------------------------------------------------------------------------
/screenshot/98ddfe9a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/98ddfe9a.png
--------------------------------------------------------------------------------
/screenshot/9e4179c5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9e4179c5.png
--------------------------------------------------------------------------------
/screenshot/9e67979f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9e67979f.png
--------------------------------------------------------------------------------
/screenshot/a13d8808.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a13d8808.png
--------------------------------------------------------------------------------
/screenshot/a2ab75e3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a2ab75e3.png
--------------------------------------------------------------------------------
/screenshot/a35893be.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a35893be.png
--------------------------------------------------------------------------------
/screenshot/a47efd66.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a47efd66.png
--------------------------------------------------------------------------------
/screenshot/a560cff6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a560cff6.png
--------------------------------------------------------------------------------
/screenshot/a66b3e6f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a66b3e6f.png
--------------------------------------------------------------------------------
/screenshot/a8d36972.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a8d36972.png
--------------------------------------------------------------------------------
/screenshot/aa3dbfbf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/aa3dbfbf.png
--------------------------------------------------------------------------------
/screenshot/abb5e847.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/abb5e847.png
--------------------------------------------------------------------------------
/screenshot/aef2abe1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/aef2abe1.png
--------------------------------------------------------------------------------
/screenshot/af73ebaa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/af73ebaa.png
--------------------------------------------------------------------------------
/screenshot/b35e8d12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/b35e8d12.png
--------------------------------------------------------------------------------
/screenshot/b77622b6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/b77622b6.png
--------------------------------------------------------------------------------
/screenshot/c1186185.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c1186185.png
--------------------------------------------------------------------------------
/screenshot/c33fe1b4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c33fe1b4.png
--------------------------------------------------------------------------------
/screenshot/c6d0728b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c6d0728b.png
--------------------------------------------------------------------------------
/screenshot/c84f6044.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c84f6044.png
--------------------------------------------------------------------------------
/screenshot/cba7b53e.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cba7b53e.png
--------------------------------------------------------------------------------
/screenshot/cdefdf02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cdefdf02.png
--------------------------------------------------------------------------------
/screenshot/cf67e612.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cf67e612.png
--------------------------------------------------------------------------------
/screenshot/cfd8e121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cfd8e121.png
--------------------------------------------------------------------------------
/screenshot/d068b5c0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d068b5c0.png
--------------------------------------------------------------------------------
/screenshot/d1a2dc81.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d1a2dc81.png
--------------------------------------------------------------------------------
/screenshot/d42bd3f1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d42bd3f1.png
--------------------------------------------------------------------------------
/screenshot/d452de1b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d452de1b.png
--------------------------------------------------------------------------------
/screenshot/d457be6b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d457be6b.png
--------------------------------------------------------------------------------
/screenshot/d57e648a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d57e648a.png
--------------------------------------------------------------------------------
/screenshot/d6cc806c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d6cc806c.png
--------------------------------------------------------------------------------
/screenshot/d99a61f4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d99a61f4.png
--------------------------------------------------------------------------------
/screenshot/d9fcfcf5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d9fcfcf5.png
--------------------------------------------------------------------------------
/screenshot/dc0e0c05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dc0e0c05.png
--------------------------------------------------------------------------------
/screenshot/dc64a356.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dc64a356.png
--------------------------------------------------------------------------------
/screenshot/dedf144c.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dedf144c.png
--------------------------------------------------------------------------------
/screenshot/df332a64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/df332a64.png
--------------------------------------------------------------------------------
/screenshot/e219a541.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e219a541.png
--------------------------------------------------------------------------------
/screenshot/e4022013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e4022013.png
--------------------------------------------------------------------------------
/screenshot/e44c5879.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e44c5879.png
--------------------------------------------------------------------------------
/screenshot/e6130b81.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e6130b81.png
--------------------------------------------------------------------------------
/screenshot/e61c1e01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e61c1e01.png
--------------------------------------------------------------------------------
/screenshot/e751cb2d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e751cb2d.png
--------------------------------------------------------------------------------
/screenshot/ea8764de.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ea8764de.png
--------------------------------------------------------------------------------
/screenshot/ebf3c65b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ebf3c65b.png
--------------------------------------------------------------------------------
/screenshot/ec1f3fda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ec1f3fda.png
--------------------------------------------------------------------------------
/screenshot/fc27880f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/fc27880f.png
--------------------------------------------------------------------------------
/screenshot/fe002ea4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/fe002ea4.png
--------------------------------------------------------------------------------
/screenshot/ff2dcb9b.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ff2dcb9b.png
--------------------------------------------------------------------------------
/sync-db/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | pyg
7 | com.henry
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | sync-db
13 |
14 |
15 | 2.11
16 | 1.6.0
17 | 3.2.4
18 | 2.0.0
19 |
20 |
21 |
22 |
23 |
24 |
25 | org.apache.kafka
26 | kafka_${scala.version}
27 | 0.10.1.0
28 |
29 |
30 |
31 |
32 |
33 | org.apache.flink
34 | flink-connector-kafka-0.10_${scala.version}
35 | ${flink.version}
36 |
37 |
38 |
39 |
40 | org.apache.flink
41 | flink-table_${scala.version}
42 | ${flink.version}
43 |
44 |
45 |
46 | org.apache.flink
47 | flink-scala_${scala.version}
48 | ${flink.version}
49 |
50 |
51 |
52 |
53 | org.apache.flink
54 | flink-streaming-scala_${scala.version}
55 | ${flink.version}
56 |
57 |
58 | org.apache.flink
59 | flink-streaming-java_${scala.version}
60 | ${flink.version}
61 |
62 |
63 |
64 |
65 | org.apache.flink
66 | flink-hbase_${scala.version}
67 | ${flink.version}
68 |
69 |
70 |
71 | org.apache.hbase
72 | hbase-client
73 | ${hbase.version}
74 |
75 |
76 |
77 |
82 |
83 |
84 |
85 |
86 | org.apache.hadoop
87 | hadoop-common
88 | ${hadoop.version}
89 |
90 |
91 |
92 | org.apache.hadoop
93 | hadoop-hdfs
94 | ${hadoop.version}
95 |
96 |
97 |
98 | xml-apis
99 | xml-apis
100 |
101 |
102 |
103 |
104 |
105 | org.apache.hadoop
106 | hadoop-client
107 | ${hadoop.version}
108 |
109 |
110 |
111 | com.google.protobuf
112 | protobuf-java
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 | com.alibaba
121 | fastjson
122 | 1.2.83
123 |
124 |
125 |
126 |
127 |
128 | src/main/scala
129 | src/test/scala
130 |
131 |
132 | org.apache.maven.plugins
133 | maven-shade-plugin
134 | 3.0.0
135 |
136 |
137 | package
138 |
139 | shade
140 |
141 |
142 |
143 |
144 | com.google.code.findbugs:jsr305
145 | org.slf4j:*
146 | log4j:*
147 |
148 |
149 |
150 |
151 |
153 | *:*
154 |
155 | META-INF/*.SF
156 | META-INF/*.DSA
157 | META-INF/*.RSA
158 |
159 |
160 |
161 |
162 |
164 | com.itheima.syncdb.App
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
--------------------------------------------------------------------------------
/sync-db/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | #
2 | #kafka的配置
3 | #
4 | # Kafka集群地址
5 | bootstrap.servers="master:9092,slave1:9092,slave2:9092"
6 | # ZooKeeper集群地址
7 | zookeeper.connect="master:2181,slave1:2181,slave2:2181"
8 | # Kafka Topic名称
9 | input.topic="canal"
10 | # 消费组ID
11 | group.id="canal"
12 | # 自动提交拉取到消费端的消息offset到kafka
13 | enable.auto.commit="true"
14 | # 自动提交offset到zookeeper的时间间隔单位(毫秒)
15 | auto.commit.interval.ms="5000"
16 | # 每次消费最新的数据
17 | auto.offset.reset="latest"
--------------------------------------------------------------------------------
/sync-db/src/main/resources/hbase-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
23 |
24 |
25 | hbase.rootdir
26 | hdfs://mster:8020/hbase
27 |
28 |
29 |
30 | hbase.cluster.distributed
31 | true
32 |
33 |
34 |
35 |
36 | hbase.master.port
37 | 16000
38 |
39 |
40 |
41 |
42 | hbase.zookeeper.property.clientPort
43 | 2181
44 |
45 |
46 |
47 | hbase.zookeeper.quorum
48 | master:2181,slave1:2181,slave2:2181
49 |
50 |
51 |
52 | hbase.zookeeper.property.dataDir
53 | /usr/local/src/zookeeper-3.4.5/hbasedata
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/sync-db/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=warn,stdout
2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
4 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/App.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb
2 |
3 | import java.util.Properties
4 |
5 | import com.henry.syncdb.bean.{Cannal, HBaseOperation}
6 | import com.henry.syncdb.task.PreprocessTask
7 | import com.henry.syncdb.util.{FlinkUtils, GlobalConfigutil, HBaseUtil}
8 | import org.apache.flink.api.common.serialization.SimpleStringSchema
9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
10 | import org.apache.flink.api.scala._
11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
12 | import org.apache.flink.streaming.api.environment.CheckpointConfig
13 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks
15 | import org.apache.flink.streaming.api.functions.sink.SinkFunction
16 | import org.apache.flink.streaming.api.watermark.Watermark
17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
18 |
19 | /**
20 | * @Author: Henry
21 | * @Description:
22 | * @Date: Create in 2019/11/6 21:17
23 | **/
24 | object App {
25 |
26 | def main(args: Array[String]): Unit = {
27 |
28 |
29 | val env = FlinkUtils.initFlinkEnv()
30 |
31 | // // 1、输出测试
32 | // val testDs: DataStream[String] = env.fromCollection(List(
33 | // "1", "2", "3"
34 | // ))
35 | // testDs.print()
36 |
37 | val consumer = FlinkUtils.initKafkaFlink()
38 |
39 | // 测试打印
40 | val kafkaDataStream: DataStream[String] = env.addSource(consumer)
41 | // kafkaDataStream.print()
42 |
43 | val cannalDs: DataStream[Cannal] = kafkaDataStream.map {
44 | json =>
45 | Cannal(json)
46 | }
47 | // cannalDs.print()
48 |
49 |
50 | val waterDS: DataStream[Cannal] = cannalDs.assignTimestampsAndWatermarks(
51 | new AssignerWithPeriodicWatermarks[Cannal] {
52 |
53 | // 当前的时间戳
54 | var currentTimestamp = 0L
55 |
56 | // 延迟的时间
57 | val delayTime = 2000L
58 |
59 | // 返回水印时间
60 | override def getCurrentWatermark: Watermark = {
61 | new Watermark(currentTimestamp - delayTime)
62 | }
63 |
64 | // 比较当前元素的时间和上一个元素的时间,取最大值,防止发生时光倒流
65 | override def extractTimestamp(element: Cannal, previousElementTimestamp: Long): Long = {
66 | currentTimestamp = Math.max(element.timestamp, previousElementTimestamp)
67 | currentTimestamp
68 | }
69 | })
70 | // waterDS.print()
71 |
72 | val hbaseDs: DataStream[HBaseOperation] = PreprocessTask.process(waterDS)
73 | hbaseDs.print()
74 |
75 | hbaseDs.addSink(new SinkFunction[HBaseOperation] {
76 | override def invoke(value: HBaseOperation): Unit = {
77 | value.opType match {
78 | case "DELETE" => HBaseUtil.deleteData(value.tableName,value.rowkey,value.cfName)
79 | case _ => HBaseUtil.putData(value.tableName,value.rowkey,value.cfName,value.colName,value.colValue)
80 | }
81 | }
82 | })
83 |
84 |
85 |
86 | // 执行任务
87 | env.execute("sync-db")
88 |
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/bean/Cannal.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.bean
2 |
3 | import com.alibaba.fastjson.JSON
4 |
5 | /**
6 | * @Author: Henry
7 | * @Description:
8 | * @Date: Create in 2019/11/7 19:28
9 | **/
10 | case class Cannal(
11 | var emptyCount:Long,
12 | var logFileName:String,
13 | var dbName:String,
14 | var logFileOffset:Long,
15 | var eventType:String,
16 | var columnValueList:String,
17 | var tableName:String,
18 | var timestamp:Long
19 | )
20 |
21 | object Cannal {
22 |
23 | def apply(json:String): Cannal = {
24 | val canal: Cannal = JSON.parseObject[Cannal](json,classOf[Cannal])
25 | canal
26 | }
27 |
28 | def main(args: Array[String]): Unit = {
29 |
30 | val json = "{\"emptyCount\":2,\"logFileName\":\"mysql-bin.000005\",\"dbName\":\"pyg\",\"logFileOffset\":20544,\"eventType\":\"INSERT\",\"columnValueList\":[{\"columnName\":\"commodityId\",\"columnValue\":\"6\",\"isValid\":true},{\"columnName\":\"commodityName\",\"columnValue\":\"欧派\",\"isValid\":true},{\"columnName\":\"commodityTypeId\",\"columnValue\":\"3\",\"isValid\":true},{\"columnName\":\"originalPrice\",\"columnValue\":\"43000.0\",\"isValid\":true},{\"columnName\":\"activityPrice\",\"columnValue\":\"40000.0\",\"isValid\":true}],\"tableName\":\"commodity\",\"timestamp\":1558764495000}"
31 | val cannal = Cannal(json)
32 |
33 |
34 | println(cannal.timestamp)
35 | println(Cannal(json).dbName)
36 |
37 | }
38 | }
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/bean/HBaseOperation.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.bean
2 |
3 | /**
4 | * @Author: Henry
5 | * @Description:
6 | * 操作类型(opType)= INSERT/DELETE/UPDATE
7 | * 表名(tableName)= mysql.binlog数据库名.binlog表名
8 | * 列族名(cfName)= 固定为info
9 | * rowkey = 唯一主键(取binlog中列数据的第一个)
10 | * 列名(colName)= binlog中列名
11 | * 列值(colValue)= binlog中列值
12 | * @Date: Create in 2019/11/7 19:52
13 | **/
14 |
15 | case class HBaseOperation(
16 | var opType: String,
17 | val tableName: String,
18 | val cfName: String,
19 | val rowkey: String,
20 | val colName: String,
21 | val colValue: String
22 | )
23 |
24 |
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/task/PreprocessTask.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.task
2 |
3 | import java.util
4 |
5 | import com.alibaba.fastjson.JSON
6 | import com.henry.syncdb.bean.{Cannal, HBaseOperation}
7 | import org.apache.flink.streaming.api.scala.DataStream
8 | import org.apache.flink.api.scala._
9 |
10 | import scala.collection.JavaConverters._
11 | import scala.collection.mutable
12 |
13 | case class NameValuePair(
14 | var columnName: String,
15 | var columnValue: String,
16 | var isValid: Boolean
17 | )
18 |
19 | object PreprocessTask {
20 |
21 | def process(canalDataStream: DataStream[Cannal]) = {
22 |
23 | // flatmap
24 |
25 | canalDataStream.flatMap {
26 | canal => {
27 |
28 | // 把canal.columnValueList转换为scala的集合
29 | // JSON.parseArray 转换之后是一个java集合
30 | val javaList: util.List[NameValuePair] = JSON.parseArray(canal.columnValueList, classOf[NameValuePair])
31 | val nameValueList: mutable.Buffer[NameValuePair] = javaList.asScala
32 |
33 | // HBaseOpertation相关字段
34 | var opType = canal.eventType
35 | val tableName = "mysql." + canal.dbName + "." + canal.tableName
36 | val cfName = "info"
37 | val rowkey = nameValueList(0).columnValue
38 |
39 | // 遍历集合,先判断是insert还是update或者delete
40 | opType match {
41 | case "INSERT" =>
42 | nameValueList.map {
43 | nameValue => HBaseOperation(opType, tableName, cfName, rowkey, nameValue.columnName, nameValue.columnValue)
44 | }
45 |
46 | case "UPDATE" =>
47 | nameValueList.filter(_.isValid).map {
48 | nameValue => HBaseOperation(opType, tableName, cfName, rowkey, nameValue.columnName, nameValue.columnValue)
49 | }
50 |
51 | case "DELETE" =>
52 | List(HBaseOperation(opType,tableName,cfName,rowkey,"",""))
53 |
54 | }
55 |
56 | // List[HBaseOperation]()
57 | }
58 | }
59 |
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/util/FlinkUtils.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.util
2 |
3 | import java.util.Properties
4 |
5 | import org.apache.flink.api.common.serialization.SimpleStringSchema
6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend
7 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
8 | import org.apache.flink.streaming.api.environment.CheckpointConfig
9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
11 |
12 | /**
13 | * @Author: Henry
14 | * @Description:
15 | * @Date: Create in 2019/11/6 21:58
16 | **/
17 | object FlinkUtils {
18 |
19 | // 初始化Flink流式环境
20 | def initFlinkEnv()={
21 | // Flink 流式环境的创建
22 | val env = StreamExecutionEnvironment.getExecutionEnvironment
23 |
24 | // 设置env的处理时间为EventTime
25 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
26 |
27 | // 设置并行度
28 | env.setParallelism(1)
29 |
30 | // 设置checkpoint
31 | // 开启checkpoint,间隔时间为 5s
32 | env.enableCheckpointing(5000)
33 | // 设置处理模式,这句话可以省略,因为在上一个方法enableCheckpointing中默认设置的是EXACTLY_ONCE
34 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
35 | // 设置两次checkpoint的间隔
36 | env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000)
37 | // 设置超时时长
38 | env.getCheckpointConfig.setCheckpointTimeout(60000)
39 | // 设置并行度
40 | env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)
41 |
42 | // 当程序关闭的时候,触发额外的 checkpoint
43 | env.getCheckpointConfig.enableExternalizedCheckpoints(
44 | CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
45 |
46 | // 设置检查点在HDFS中的存储位置
47 | env.setStateBackend(new FsStateBackend("hdfs://master:9000/flink-checkpoint"))
48 |
49 | env
50 | }
51 |
52 |
53 | // 整合 kafka
54 | def initKafkaFlink()={
55 |
56 | val props:Properties = new Properties()
57 |
58 | props.setProperty("bootstrap.servers", GlobalConfigutil.bootstrapServers)
59 | props.setProperty("group.id", GlobalConfigutil.gruopId)
60 | props.setProperty("enable.auto.commit", GlobalConfigutil.enableAutoCommit)
61 | props.setProperty("auto.commit.interval.ms", GlobalConfigutil.autoCommitIntervalMs)
62 | props.setProperty("auto.offset.reset", GlobalConfigutil.autoOffsetReset)
63 |
64 | // topic: String, valueDeserializer: DeserializationSchema[T], props: Properties
65 | val consumer: FlinkKafkaConsumer010[String] = new FlinkKafkaConsumer010[String](
66 | GlobalConfigutil.inputTopic,
67 | new SimpleStringSchema(),
68 | props
69 | )
70 | consumer
71 | }
72 |
73 |
74 |
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/util/GlobalConfigutil.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.util
2 |
3 | import com.typesafe.config.{Config, ConfigFactory}
4 |
5 | /**
6 | * @Author: Henry
7 | * @Description: 配置文件加载类
8 | * @Date: Create in 2019/10/15 23:42
9 | **/
10 | object GlobalConfigutil {
11 |
12 | // 通过工厂加载配置, config 会自动加载 application.conf 文件,文件名不能变
13 | val config:Config = ConfigFactory.load()
14 |
15 | val bootstrapServers = config.getString("bootstrap.servers")
16 | val zookeeperConnect = config.getString("zookeeper.connect")
17 | val inputTopic = config.getString("input.topic")
18 | val gruopId = config.getString("gruop.id")
19 | val enableAutoCommit = config.getString("enable.auto.commit")
20 | val autoCommitIntervalMs = config.getString("auto.commit.interval.ms")
21 | val autoOffsetReset = config.getString("auto.offset.reset")
22 |
23 | def main(args: Array[String]): Unit = {
24 | // 选择快捷键,alt,鼠标左键拉倒最后一行,然后按 ctrl+shift 键,再按 →
25 | println(bootstrapServers)
26 | println(zookeeperConnect)
27 | println(inputTopic)
28 | println(gruopId)
29 | println(enableAutoCommit)
30 | println(autoCommitIntervalMs)
31 | println(autoOffsetReset)
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/sync-db/src/main/scala/com/henry/syncdb/util/HBaseUtil.scala:
--------------------------------------------------------------------------------
1 | package com.henry.syncdb.util
2 |
3 | import org.apache.hadoop.conf.Configuration
4 | import org.apache.hadoop.hbase.client.{ColumnFamilyDescriptor, _}
5 | import org.apache.hadoop.hbase.util.Bytes
6 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
7 |
8 | /**
9 | * @Author: Henry
10 | * @Description: HBase 工具类
11 | * 1、获取Table对象
12 | * 2、保存单列数据
13 | * 3、查询单列数据
14 | * 4、保存多列数据
15 | * 5、查询多列数据
16 | * 6、删除数据
17 | * @Date: Create in 2019/10/21 22:53
18 | **/
19 | object HBaseUtil {
20 |
21 | // HBase 配置类,不需要指定配置文件名,文件名要求是 hbase-site.xml
22 | val conf:Configuration = HBaseConfiguration.create()
23 |
24 | // HBase 的连接
25 | val conn:Connection = ConnectionFactory.createConnection(conf)
26 |
27 | // HBase 的操作 API
28 | val admin:Admin = conn.getAdmin
29 |
30 | /**
31 | * 返回Table,如果不存在,则创建表
32 | *
33 | * @param tableName 表名
34 | * @param columnFamilyName 列族名
35 | * @return
36 | */
37 | def getTable(tableNameStr:String, columnFamilyName:String):Table={
38 |
39 |
40 | // 获取 TableName
41 | val tableName:TableName = TableName.valueOf(tableNameStr)
42 |
43 | // 如果表不存在,则创建表
44 |
45 | if(!admin.tableExists(tableName)){
46 |
47 | // 构建出表的描述的建造者
48 | val descBuilder: TableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName)
49 |
50 | val familyDescriptor:ColumnFamilyDescriptor = ColumnFamilyDescriptorBuilder
51 | .newBuilder(columnFamilyName.getBytes).build()
52 |
53 | // 给表添加列族
54 | descBuilder.setColumnFamily(familyDescriptor)
55 |
56 | // 创建表
57 | admin.createTable(descBuilder.build())
58 | }
59 |
60 | conn.getTable(tableName)
61 |
62 | }
63 |
64 | /**
65 | * 存储单列数据
66 | *
67 | * @param tableNameStr 表名
68 | * @param rowkey 主键
69 | * @param columnFamilyName 列族名
70 | * @param columnName 列名
71 | * @param columnValue 列值
72 | */
73 | def putData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String, columnValue:String)={
74 |
75 | // 获取表
76 | val table:Table = getTable(tableNameStr, columnFamilyName)
77 |
78 | try{
79 | // Put
80 | val put:Put = new Put(rowkey.getBytes)
81 | put.addColumn(columnFamilyName.getBytes, columnName.getBytes, columnValue.getBytes)
82 |
83 | // 保存数据
84 | table.put(put)
85 | }catch {
86 | case ex:Exception=>{
87 | ex.printStackTrace()
88 | }
89 | }finally {
90 | table.close()
91 | }
92 | }
93 |
94 |
95 | /**
96 | * 通过单列名获取列值
97 | * @param tableNameStr 表名
98 | * @param rowkey 主键
99 | * @param columnFamilyName 列族名
100 | * @param columnName 列名
101 | * @param columnValue 列值
102 | * @return
103 | */
104 | def getData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String):String={
105 |
106 | // 1. 获取 Table 对象
107 | val table = getTable(tableNameStr, columnFamilyName)
108 |
109 | try {
110 | // 2. 构建 get 对象
111 | val get = new Get(rowkey.getBytes)
112 |
113 | // 3. 进行查询
114 | val result:Result = table.get(get)
115 |
116 | // 4. 判断查询结果是否为空,并且包含要查询的列
117 | if (result != null && result.containsColumn(columnFamilyName.getBytes, columnName.getBytes)){
118 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), columnName.getBytes)
119 |
120 | Bytes.toString(bytes)
121 | }else{
122 | ""
123 | }
124 |
125 | }catch{
126 | case ex:Exception => {
127 | ex.printStackTrace()
128 | ""
129 | }
130 | }finally {
131 | // 5、关闭表
132 | table.close()
133 | }
134 |
135 | }
136 |
137 |
138 | /**
139 | * 存储多列数据
140 | * @param tableNameStr 表名
141 | * @param rowkey 主键
142 | * @param columnFamilyName 列族名
143 | * @param map 多个列名和列族集合
144 | */
145 | def putMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, map:Map[String,Any])={
146 |
147 | // 1、获取 table 对象
148 | val table = getTable(tableNameStr, columnFamilyName)
149 |
150 | try{
151 | // 2、创建 put
152 | val put = new Put(rowkey.getBytes)
153 |
154 | // 3、在 put 中添加多个列名和列值
155 | for ((colName, colValue) <- map){
156 | put.addColumn(columnFamilyName.getBytes, colName.getBytes, colValue.toString.getBytes)
157 | }
158 |
159 | // 4、保存 put
160 | table.put(put)
161 |
162 | }catch{
163 | case ex:Exception => {
164 | ex.printStackTrace()
165 |
166 | }
167 | }finally {
168 | // 5、关闭表
169 | table.close()
170 | }
171 |
172 |
173 | // 5、关闭 table
174 | table.close()
175 | }
176 |
177 |
178 | /**
179 | * 获取多了数据的值
180 | * @param tableNameStr 表名
181 | * @param rowkey 主键
182 | * @param columnFamilyName 列族名
183 | * @param columnNameList 多个列名和列值集合
184 | * @return
185 | */
186 | def getMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnNameList:List[String]):Map[String,String]= {
187 |
188 | // 1、获取 Table
189 | val table = getTable(tableNameStr, columnFamilyName)
190 |
191 | try{
192 | // 2、构建 get
193 | val get = new Get(rowkey.getBytes)
194 |
195 | // 3、执行查询
196 | val result: Result = table.get(get)
197 |
198 | // 4、遍历列名集合,取出列值,构建成 Map 返回
199 | columnNameList.map {
200 | col =>
201 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), col.getBytes)
202 |
203 | if (bytes != null && bytes.size > 0) {
204 | col -> Bytes.toString(bytes)
205 | }
206 | else { // 如果取不到值,则赋一个空串
207 | "" -> ""
208 | }
209 | }.filter(_._1 != "").toMap // 把不是空串的过滤出来,再转换成 Map
210 |
211 | }catch {
212 | case ex:Exception => {
213 | ex.printStackTrace()
214 | Map[String, String]() // 返回一个空的 Map
215 | }
216 | }finally {
217 | // 5、关闭 Table
218 | table.close()
219 | }
220 | }
221 |
222 |
223 | /**
224 | * 删除数据
225 | * @param tableNameStr 表名
226 | * @param rowkey 主键
227 | * @param columnFamilyName 列族名
228 | */
229 | def deleteData(tableNameStr:String, rowkey:String, columnFamilyName:String)={
230 |
231 | // 1、获取 Table
232 | val table:Table = getTable(tableNameStr, columnFamilyName)
233 |
234 | try {
235 | // 2、构建 delete 对象
236 | val delete: Delete = new Delete(rowkey.getBytes)
237 |
238 | // 3、执行删除
239 | table.delete(delete)
240 |
241 | }
242 | catch {
243 | case ex:Exception =>
244 | ex.printStackTrace()
245 | }
246 | finally {
247 | // 4、关闭 table
248 | table.close()
249 | }
250 |
251 | }
252 |
253 |
254 | def main(args: Array[String]): Unit = {
255 |
256 | // println(getTable("test","info"))
257 | // putData("test", "1", "info", "t1", "hello world")
258 | // println(getData("test", "1", "info", "t1"))
259 |
260 | // val map = Map(
261 | // "t2" -> "scala" ,
262 | // "t3" -> "hive" ,
263 | // "t4" -> "flink"
264 | // )
265 | // putMapData("test", "1", "info", map)
266 |
267 | // println(getMapData("test", "1", "info", List("t1", "t2")))
268 |
269 | deleteData("test", "1", "info")
270 | println(getMapData("test", "1", "info", List("t1", "t2")))
271 |
272 | }
273 |
274 | }
275 |
--------------------------------------------------------------------------------