├── .idea ├── .name ├── Flink-pyg.iml ├── codeStyles │ └── codeStyleConfig.xml ├── compiler.xml ├── encodings.xml ├── hydra.xml ├── misc.xml ├── uiDesigner.xml ├── vcs.xml └── workspace.xml ├── ReadMe.md ├── batch-process └── pom.xml ├── canal-kafka ├── pom.xml └── src │ └── main │ ├── java │ ├── CanalClient.java │ └── com │ │ └── henry │ │ └── canal_kafka │ │ └── util │ │ ├── GlobalConfigUtil.java │ │ └── KafkaSender.java │ └── resources │ ├── application.properties │ └── log4j.properties ├── pom.xml ├── pyg.iml ├── real-process ├── pom.xml └── src │ ├── main │ ├── resources │ │ ├── application.conf │ │ ├── hbase-site.xml │ │ └── log4j.properties │ └── scala │ │ └── com │ │ └── henry │ │ └── realprocess │ │ ├── App.scala │ │ ├── bean │ │ ├── ClickLog.scala │ │ ├── ClickLogWide.scala │ │ └── Message.scala │ │ ├── task │ │ ├── BaseTask.scala │ │ ├── ChannelAreaTask.scala │ │ ├── ChannelBrowserTask.scala │ │ ├── ChannelFreshnessTask.scala │ │ ├── ChannelFreshnessTaskTrait.scala │ │ ├── ChannelNetworkTask.scala │ │ ├── ChannelPvUvTask.scala │ │ ├── ChannelPvUvTaskMerge.scala │ │ ├── ChannelRealHotTask.scala │ │ └── PreprocessTask.scala │ │ └── util │ │ ├── GlobalConfigutil.scala │ │ └── HBaseUtil.scala │ └── test │ └── temp.txt ├── report ├── pom.xml └── src │ ├── main │ ├── java │ │ └── com │ │ │ └── henry │ │ │ └── report │ │ │ ├── ReportApplication.java │ │ │ ├── bean │ │ │ ├── Clicklog.java │ │ │ └── Message.java │ │ │ ├── controller │ │ │ ├── ReportController.java │ │ │ └── TestController.java │ │ │ └── util │ │ │ ├── ClickLogGenerator.java │ │ │ ├── KafkaProducerConfig.java │ │ │ └── RoundRobinPartitioner.java │ └── resources │ │ └── application.properties │ └── test │ └── java │ └── com │ └── henry │ └── report │ └── KafkaTest.java ├── screenshot ├── 036a079d.png ├── 03ef7ace.png ├── 04e25b5a.png ├── 07a78b77.png ├── 0b4d0c1b.png ├── 0b4ea4e1.png ├── 0bd763d1.png ├── 0ced234a.png ├── 0e6080a2.png ├── 0fcd02b7.png ├── 1.png ├── 121bf948.png ├── 12f712f9.png ├── 13c61ea9.png ├── 14679e84.png ├── 1a3addd7.png ├── 1d504cce.png ├── 2.png ├── 201507bb.png ├── 21733492.png ├── 2193cbd1.png ├── 22cd7b3c.png ├── 277372f9.png ├── 2b7f3937.png ├── 2c0ad8e2.png ├── 2d11fecd.png ├── 2f5a312e.png ├── 3.png ├── 3254e2ca.png ├── 32a6daaf.png ├── 342dcc3e.png ├── 34a79ff7.png ├── 34f66a92.png ├── 3754f480.png ├── 3936fce5.png ├── 3ab50051.png ├── 3b6d6d1f.png ├── 3c8d398c.png ├── 3d2cda96.png ├── 3f08b9d0.png ├── 4.png ├── 48cd018e.png ├── 4b18ecbe.png ├── 4cf81224.png ├── 520fd656.png ├── 5326b634.png ├── 54187145.png ├── 544d0e7a.png ├── 565c64ed.png ├── 58926ce0.png ├── 58945558.png ├── 5a321628.png ├── 62c03232.png ├── 64a0b856.png ├── 65e75e0f.png ├── 69907922.png ├── 6ac8e320.png ├── 6c04e485.png ├── 6c99f78b.png ├── 6f5af076.png ├── 6f897038.png ├── 6fcd4a44.png ├── 70a923ce.png ├── 72d64e76.png ├── 74d009f4.png ├── 75fcc253.png ├── 76c4fbf8.png ├── 79c600b1.png ├── 7b5e4836.png ├── 7cba404f.png ├── 7cd00637.png ├── 7cf4425b.png ├── 7fe930e0.png ├── 820fe570.png ├── 831e1859.png ├── 880c750d.png ├── 8c5fa195.png ├── 8cca6196.png ├── 8f89e666.png ├── 8fe964b8.png ├── 908989c5.png ├── 9379b632.png ├── 946fe86f.png ├── 9897be78.png ├── 98ddfe9a.png ├── 9e4179c5.png ├── 9e67979f.png ├── a13d8808.png ├── a2ab75e3.png ├── a35893be.png ├── a47efd66.png ├── a560cff6.png ├── a66b3e6f.png ├── a8d36972.png ├── aa3dbfbf.png ├── abb5e847.png ├── aef2abe1.png ├── af73ebaa.png ├── b35e8d12.png ├── b77622b6.png ├── c1186185.png ├── c33fe1b4.png ├── c6d0728b.png ├── c84f6044.png ├── cba7b53e.png ├── cdefdf02.png ├── cf67e612.png ├── cfd8e121.png ├── d068b5c0.png ├── d1a2dc81.png ├── d42bd3f1.png ├── d452de1b.png ├── d457be6b.png ├── d57e648a.png ├── d6cc806c.png ├── d99a61f4.png ├── d9fcfcf5.png ├── dc0e0c05.png ├── dc64a356.png ├── dedf144c.png ├── df332a64.png ├── e219a541.png ├── e4022013.png ├── e44c5879.png ├── e6130b81.png ├── e61c1e01.png ├── e751cb2d.png ├── ea8764de.png ├── ebf3c65b.png ├── ec1f3fda.png ├── fc27880f.png ├── fe002ea4.png └── ff2dcb9b.png └── sync-db ├── pom.xml └── src └── main ├── resources ├── application.conf ├── hbase-site.xml └── log4j.properties └── scala └── com └── henry └── syncdb ├── App.scala ├── bean ├── Cannal.scala └── HBaseOperation.scala ├── task └── PreprocessTask.scala └── util ├── FlinkUtils.scala ├── GlobalConfigutil.scala └── HBaseUtil.scala /.idea/.name: -------------------------------------------------------------------------------- 1 | pyg -------------------------------------------------------------------------------- /.idea/Flink-pyg.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/hydra.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 7 | 8 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /batch-process/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | pyg 7 | com.henry 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | batch-process 13 | 14 | 15 | -------------------------------------------------------------------------------- /canal-kafka/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | pyg 7 | com.henry 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | canal-kafka 13 | 14 | 15 | 16 | com.alibaba.otter 17 | canal.client 18 | 1.0.24 19 | 20 | 21 | 22 | org.apache.kafka 23 | kafka_2.11 24 | 0.10.1.0 25 | 26 | 27 | 28 | com.alibaba 29 | fastjson 30 | 1.2.83 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /canal-kafka/src/main/java/CanalClient.java: -------------------------------------------------------------------------------- 1 | import com.alibaba.fastjson.JSON; 2 | import com.alibaba.fastjson.JSONObject; 3 | import com.alibaba.otter.canal.client.CanalConnector; 4 | import com.alibaba.otter.canal.client.CanalConnectors; 5 | import com.alibaba.otter.canal.protocol.CanalEntry; 6 | import com.alibaba.otter.canal.protocol.Message; 7 | import com.henry.canal_kafka.util.GlobalConfigUtil; 8 | import com.henry.canal_kafka.util.KafkaSender; 9 | 10 | import java.net.InetSocketAddress; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.UUID; 14 | 15 | /** 16 | * Canal解析binlog日志工具类 17 | */ 18 | public class CanalClient { 19 | 20 | static class ColumnValuePair { 21 | private String columnName; 22 | private String columnValue; 23 | private Boolean isValid; 24 | 25 | public ColumnValuePair(String columnName, String columnValue, Boolean isValid) { 26 | this.columnName = columnName; 27 | this.columnValue = columnValue; 28 | this.isValid = isValid; 29 | } 30 | 31 | public String getColumnName() { return columnName; } 32 | public void setColumnName(String columnName) { this.columnName = columnName; } 33 | public String getColumnValue() { return columnValue; } 34 | public void setColumnValue(String columnValue) { this.columnValue = columnValue; } 35 | public Boolean getIsValid() { return isValid; } 36 | public void setIsValid(Boolean isValid) { this.isValid = isValid; } 37 | } 38 | 39 | /** 40 | * 获取Canal连接 41 | * 42 | * @param host 主机名 43 | * @param port 端口号 44 | * @param instance Canal实例名 45 | * @param username 用户名 46 | * @param password 密码 47 | * @return Canal连接器 48 | */ 49 | public static CanalConnector getConn(String host, int port, String instance, String username, String password) { 50 | CanalConnector canalConnector = CanalConnectors.newSingleConnector(new InetSocketAddress(host, port), instance, username, password); 51 | 52 | return canalConnector; 53 | } 54 | 55 | /** 56 | * 解析Binlog日志 57 | * 58 | * @param entries Binlog消息实体 59 | * @param emptyCount 操作的序号 60 | */ 61 | public static void analysis(List entries, int emptyCount) { 62 | for (CanalEntry.Entry entry : entries) { 63 | // 只解析mysql事务的操作,其他的不解析 64 | if (entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONBEGIN || 65 | entry.getEntryType() == CanalEntry.EntryType.TRANSACTIONEND) { 66 | continue; 67 | } 68 | 69 | // 那么解析binlog 70 | CanalEntry.RowChange rowChange = null; 71 | 72 | try { 73 | rowChange = CanalEntry.RowChange.parseFrom(entry.getStoreValue()); 74 | } catch (Exception e) { 75 | e.printStackTrace(); 76 | } 77 | 78 | // 获取操作类型字段(增加 删除 修改) 79 | CanalEntry.EventType eventType = rowChange.getEventType(); 80 | // 获取binlog文件名称 81 | String logfileName = entry.getHeader().getLogfileName(); 82 | // 读取当前操作在binlog文件的位置 83 | long logfileOffset = entry.getHeader().getLogfileOffset(); 84 | // 获取当前操作所属的数据库 85 | String dbName = entry.getHeader().getSchemaName(); 86 | // 获取当前操作所属的表 87 | String tableName = entry.getHeader().getTableName();//当前操作的是哪一张表 88 | long timestamp = entry.getHeader().getExecuteTime();//执行时间 89 | 90 | // 解析操作的行数据 91 | for (CanalEntry.RowData rowData : rowChange.getRowDatasList()) { 92 | // 删除操作 93 | if (eventType == CanalEntry.EventType.DELETE) { 94 | // 获取删除之前的所有列数据 95 | dataDetails(rowData.getBeforeColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp); 96 | } 97 | // 新增操作 98 | else if (eventType == CanalEntry.EventType.INSERT) { 99 | // 获取新增之后的所有列数据 100 | dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp); 101 | } 102 | // 更新操作 103 | else { 104 | // 获取更新之后的所有列数据 105 | dataDetails(rowData.getAfterColumnsList(), logfileName, logfileOffset, dbName, tableName, eventType, emptyCount,timestamp); 106 | } 107 | } 108 | } 109 | } 110 | 111 | /** 112 | * 解析具体一条Binlog消息的数据 113 | * 114 | * @param columns 当前行所有的列数据 115 | * @param logFileName binlog文件名 116 | * @param logFileOffset 当前操作在binlog中的位置 117 | * @param dbName 当前操作所属数据库名称 118 | * @param tableName 当前操作所属表名称 119 | * @param eventType 当前操作类型(新增、修改、删除) 120 | * @param emptyCount 操作的序号 121 | */ 122 | private static void dataDetails(List columns, 123 | String logFileName, 124 | Long logFileOffset, 125 | String dbName, 126 | String tableName, 127 | CanalEntry.EventType eventType, 128 | int emptyCount, 129 | long timestamp) { 130 | 131 | // 找到当前那些列发生了改变 以及改变的值 132 | List columnValueList = new ArrayList(); 133 | 134 | for (CanalEntry.Column column : columns) { 135 | ColumnValuePair columnValuePair = new ColumnValuePair(column.getName(), column.getValue(), column.getUpdated()); 136 | columnValueList.add(columnValuePair); 137 | } 138 | 139 | String key = UUID.randomUUID().toString(); 140 | 141 | JSONObject jsonObject = new JSONObject(); 142 | jsonObject.put("logFileName", logFileName); 143 | jsonObject.put("logFileOffset", logFileOffset); 144 | jsonObject.put("dbName", dbName); 145 | jsonObject.put("tableName", tableName); 146 | jsonObject.put("eventType", eventType); 147 | jsonObject.put("columnValueList", columnValueList); 148 | jsonObject.put("emptyCount", emptyCount); 149 | jsonObject.put("timestamp", timestamp); 150 | 151 | 152 | // 拼接所有binlog解析的字段 153 | String data = JSON.toJSONString(jsonObject); 154 | 155 | System.out.println(data); 156 | 157 | // 解析后的数据发送到kafka 158 | KafkaSender.sendMessage(GlobalConfigUtil.kafkaInputTopic, key, data); 159 | } 160 | 161 | 162 | public static void main(String[] args) { 163 | 164 | // 加载配置文件 165 | String host = GlobalConfigUtil.canalHost; 166 | int port = Integer.parseInt(GlobalConfigUtil.canalPort); 167 | String instance = GlobalConfigUtil.canalInstance; 168 | String username = GlobalConfigUtil.mysqlUsername; 169 | String password = GlobalConfigUtil.mysqlPassword; 170 | 171 | // 获取Canal连接 172 | CanalConnector conn = getConn(host, port, instance, username, password); 173 | 174 | // 从binlog中读取数据 175 | int batchSize = 100; 176 | int emptyCount = 1; 177 | 178 | try { 179 | // 连接cannal 180 | conn.connect(); 181 | //订阅实例中所有的数据库和表 182 | conn.subscribe(".*\\..*"); 183 | // 回滚到未进行ack的地方 184 | conn.rollback(); 185 | 186 | int totalCount = 120; //循环次数 187 | 188 | while (totalCount > emptyCount) { 189 | // 获取数据 190 | Message message = conn.getWithoutAck(batchSize); 191 | 192 | long id = message.getId(); 193 | int size = message.getEntries().size(); 194 | if (id == -1 || size == 0) { 195 | //没有读取到任何数据 196 | } else { 197 | //有数据,那么解析binlog日志 198 | analysis(message.getEntries(), emptyCount); 199 | emptyCount++; 200 | } 201 | 202 | // 确认消息 203 | conn.ack(message.getId()); 204 | 205 | } 206 | } catch (Exception e) { 207 | e.printStackTrace(); 208 | } finally { 209 | conn.disconnect(); 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /canal-kafka/src/main/java/com/henry/canal_kafka/util/GlobalConfigUtil.java: -------------------------------------------------------------------------------- 1 | package com.henry.canal_kafka.util; 2 | 3 | import java.util.ResourceBundle; 4 | 5 | public class GlobalConfigUtil { 6 | // 获取一个资源加载器 7 | // 资源加载器会自动去加载CLASSPATH中的application.properties配置文件 8 | private static ResourceBundle resourceBundle = ResourceBundle.getBundle("application"); 9 | 10 | // 使用ResourceBundle.getString方法来读取配置 11 | public static String canalHost = resourceBundle.getString("canal.host"); 12 | public static String canalPort = resourceBundle.getString("canal.port"); 13 | public static String canalInstance = resourceBundle.getString("canal.instance"); 14 | public static String mysqlUsername = resourceBundle.getString("mysql.username"); 15 | public static String mysqlPassword = resourceBundle.getString("mysql.password"); 16 | public static String kafkaBootstrapServers = resourceBundle.getString("kafka.bootstrap.servers"); 17 | public static String kafkaZookeeperConnect = resourceBundle.getString("kafka.zookeeper.connect"); 18 | public static String kafkaInputTopic = resourceBundle.getString("kafka.input.topic"); 19 | 20 | public static void main(String[] args) { 21 | System.out.println(canalHost); 22 | System.out.println(canalPort); 23 | System.out.println(canalInstance); 24 | System.out.println(mysqlUsername); 25 | System.out.println(mysqlPassword); 26 | System.out.println(kafkaBootstrapServers); 27 | System.out.println(kafkaZookeeperConnect); 28 | System.out.println(kafkaInputTopic); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /canal-kafka/src/main/java/com/henry/canal_kafka/util/KafkaSender.java: -------------------------------------------------------------------------------- 1 | package com.henry.canal_kafka.util; 2 | 3 | import kafka.javaapi.producer.Producer; 4 | import kafka.producer.KeyedMessage; 5 | import kafka.producer.ProducerConfig; 6 | import kafka.serializer.StringEncoder; 7 | 8 | import java.util.Properties; 9 | 10 | /** 11 | * Kafka生产消息工具类 12 | */ 13 | public class KafkaSender { 14 | private String topic; 15 | 16 | public KafkaSender(String topic){ 17 | super(); 18 | this.topic = topic; 19 | } 20 | 21 | /** 22 | * 发送消息到Kafka指定topic 23 | * 24 | * @param topic topic名字 25 | * @param key 键值 26 | * @param data 数据 27 | */ 28 | public static void sendMessage(String topic , String key , String data){ 29 | Producer producer = createProducer(); 30 | producer.send(new KeyedMessage(topic , key , data)); 31 | } 32 | 33 | private static Producer createProducer(){ 34 | Properties properties = new Properties(); 35 | 36 | properties.put("metadata.broker.list" , GlobalConfigUtil.kafkaBootstrapServers); 37 | properties.put("zookeeper.connect" , GlobalConfigUtil.kafkaZookeeperConnect); 38 | properties.put("serializer.class" , StringEncoder.class.getName()); 39 | 40 | return new Producer(new ProducerConfig(properties)); 41 | } 42 | } -------------------------------------------------------------------------------- /canal-kafka/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | # 2 | # canal\u914D\u7F6E 3 | # 4 | canal.host=master 5 | canal.port=11111 6 | canal.instance=example 7 | mysql.username=root 8 | mysql.password=123456 9 | # 10 | #kafka\u7684\u914D\u7F6E 11 | # 12 | kafka.bootstrap.servers=master:9092,slave1:9092,slave2:9092 13 | kafka.zookeeper.connect=master:2181,slave1:2181,slave2:2181 14 | kafka.input.topic=canal 15 | -------------------------------------------------------------------------------- /canal-kafka/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=error,stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.henry 8 | pyg 9 | 1.0-SNAPSHOT 10 | pom 11 | 12 | 13 | report 14 | real-process 15 | canal-kafka 16 | sync-db 17 | batch-process 18 | 19 | 20 | -------------------------------------------------------------------------------- /pyg.iml: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /real-process/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | pyg 7 | com.henry 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | real-process 13 | 14 | 15 | 2.11 16 | 1.6.0 17 | 3.2.4 18 | 2.0.0 19 | 20 | 21 | 22 | 23 | 24 | org.apache.kafka 25 | kafka_${scala.version} 26 | 0.10.1.0 27 | 28 | 29 | 30 | 31 | org.apache.flink 32 | flink-connector-kafka-0.10_${scala.version} 33 | ${flink.version} 34 | 35 | 36 | 37 | 38 | org.apache.flink 39 | flink-table_${scala.version} 40 | ${flink.version} 41 | 42 | 43 | 44 | 45 | org.apache.flink 46 | flink-scala_${scala.version} 47 | ${flink.version} 48 | 49 | 50 | 51 | 52 | org.apache.flink 53 | flink-streaming-scala_${scala.version} 54 | ${flink.version} 55 | 56 | 57 | org.apache.flink 58 | flink-streaming-java_${scala.version} 59 | ${flink.version} 60 | 61 | 62 | 63 | 64 | org.apache.flink 65 | flink-hbase_${scala.version} 66 | ${flink.version} 67 | 68 | 69 | 70 | org.apache.hbase 71 | hbase-client 72 | ${hbase.version} 73 | 74 | 75 | 76 | 77 | org.apache.hadoop 78 | hadoop-common 79 | ${hadoop.version} 80 | 81 | 82 | org.apache.hadoop 83 | hadoop-hdfs 84 | ${hadoop.version} 85 | 86 | 87 | 88 | xml-apis 89 | xml-apis 90 | 91 | 92 | 93 | 94 | 95 | org.apache.hadoop 96 | hadoop-client 97 | ${hadoop.version} 98 | 99 | 100 | 101 | com.google.protobuf 102 | protobuf-java 103 | 104 | 105 | 106 | 107 | 108 | 109 | com.alibaba 110 | fastjson 111 | 1.2.83 112 | 113 | 114 | 115 | 116 | 117 | 118 | src/main/scala 119 | src/test/scala 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-shade-plugin 125 | 2.2 126 | 127 | 128 | package 129 | 130 | shade 131 | 132 | 133 | 134 | 135 | com.google.code.findbugs:jsr305 136 | org.slf4j:* 137 | log4j:* 138 | 139 | 140 | 141 | 142 | *:* 143 | 144 | META-INF/*.SF 145 | META-INF/*.DSA 146 | META-INF/*.RSA 147 | 148 | 149 | 150 | 151 | 152 | com.henry.pyg.App 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /real-process/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # kafka的配置 4 | # 5 | # kafka 集群地址 6 | bootstrap.servers="master:9092,slave1:9092,slave2:9092" 7 | # zookeeper 集群地址 8 | zookeeper.connect="master:2181,slave1:2181,slave2:2181" 9 | # kafka topic 10 | input.topic="pyg" 11 | # 消费者组 ID 12 | gruop.id="pyg" 13 | # 自动提交拉取到的消费端的消息offset到kafka 14 | enable.auto.commit="true" 15 | # 自动提交offset到zookeeper的时间间隔单位(毫秒) 16 | auto.commit.interval.ms="5000" 17 | # 每次消费最新的数据 18 | auto.offset.reset="latest" 19 | 20 | #Hbase的配置 21 | //hbase.zookeeper.quorum="master:2181,slave1:2181,slave2:2181" 22 | //hbase.master="master:60000" 23 | //hbase.zookeeper.property.clientPort="2181" 24 | //hbase.rpc.timeout="600000" 25 | //hbase.client.operator.timeout="600000" 26 | //hbase.client.scanner.timeout.period="600000" -------------------------------------------------------------------------------- /real-process/src/main/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | 26 | 27 | hbase.rootdir 28 | hdfs://master:9000/hbase2 29 | 30 | 31 | 32 | 33 | hbase.cluster.distributed 34 | true 35 | 36 | 37 | 38 | 39 | hbase.master.info.port 40 | 16000 41 | 42 | 43 | 44 | 45 | hbase.zookeeper.quorum 46 | master:2181,slave1:2181,slave2:2181 47 | 48 | 49 | 50 | hbase.zookeeper.property.clientPort 51 | 2181 52 | 53 | 54 | 55 | hbase.zookeeper.property.dataDir 56 | /usr/local/src/zookeeper-3.4.5/hbasedata 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /real-process/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Define some default values that can be overridden by system properties 18 | hadoop.root.logger=ERROR,console 19 | hadoop.log.dir=. 20 | hadoop.log.file=hadoop.log 21 | 22 | # Define the root logger to the system property "hadoop.root.logger". 23 | log4j.rootLogger=${hadoop.root.logger}, EventCounter 24 | 25 | # Logging Threshold 26 | log4j.threshold=ALL 27 | 28 | # Null Appender 29 | log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender 30 | 31 | # 32 | # Rolling File Appender - cap space usage at 5gb. 33 | # 34 | hadoop.log.maxfilesize=256MB 35 | hadoop.log.maxbackupindex=20 36 | log4j.appender.RFA=org.apache.log4j.RollingFileAppender 37 | log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file} 38 | 39 | log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize} 40 | log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex} 41 | 42 | log4j.appender.RFA.layout=org.apache.log4j.PatternLayout 43 | 44 | # Pattern format: Date LogLevel LoggerName LogMessage 45 | log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 46 | # Debugging Pattern format 47 | #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 48 | 49 | 50 | # 51 | # Daily Rolling File Appender 52 | # 53 | 54 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 55 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} 56 | 57 | # Rollver at midnight 58 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 59 | 60 | # 30-day backup 61 | #log4j.appender.DRFA.MaxBackupIndex=30 62 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 63 | 64 | # Pattern format: Date LogLevel LoggerName LogMessage 65 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 66 | # Debugging Pattern format 67 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 68 | 69 | 70 | # 71 | # console 72 | # Add "console" to rootlogger above if you want to use this 73 | # 74 | 75 | log4j.appender.console=org.apache.log4j.ConsoleAppender 76 | log4j.appender.console.target=System.err 77 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 78 | log4j.appender.console.layout.ConversionPattern=%d{yy HH:mm:ss} %p %c{2}: %m%n 79 | 80 | # 81 | # TaskLog Appender 82 | # 83 | 84 | #Default values 85 | hadoop.tasklog.taskid=null 86 | hadoop.tasklog.iscleanup=false 87 | hadoop.tasklog.noKeepSplits=4 88 | hadoop.tasklog.totalLogFileSize=100 89 | hadoop.tasklog.purgeLogSplits=true 90 | hadoop.tasklog.logsRetainHours=12 91 | 92 | log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender 93 | log4j.appender.TLA.taskId=${hadoop.tasklog.taskid} 94 | log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup} 95 | log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize} 96 | 97 | log4j.appender.TLA.layout=org.apache.log4j.PatternLayout 98 | log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 99 | 100 | # 101 | # HDFS block state change log from block manager 102 | # 103 | # Uncomment the following to suppress normal block state change 104 | # messages from BlockManager in NameNode. 105 | #log4j.logger.BlockStateChange=WARN 106 | 107 | # 108 | #Security appender 109 | # 110 | hadoop.security.logger=INFO,NullAppender 111 | hadoop.security.log.maxfilesize=256MB 112 | hadoop.security.log.maxbackupindex=20 113 | log4j.category.SecurityLogger=${hadoop.security.logger} 114 | hadoop.security.log.file=SecurityAuth-${user.name}.audit 115 | log4j.appender.RFAS=org.apache.log4j.RollingFileAppender 116 | log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 117 | log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout 118 | log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 119 | log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize} 120 | log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex} 121 | 122 | # 123 | # Daily Rolling Security appender 124 | # 125 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 126 | log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file} 127 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout 128 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 129 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd 130 | 131 | # 132 | # hadoop configuration logging 133 | # 134 | 135 | # Uncomment the following line to turn off configuration deprecation warnings. 136 | # log4j.logger.org.apache.hadoop.conf.Configuration.deprecation=WARN 137 | 138 | # 139 | # hdfs audit logging 140 | # 141 | hdfs.audit.logger=INFO,NullAppender 142 | hdfs.audit.log.maxfilesize=256MB 143 | hdfs.audit.log.maxbackupindex=20 144 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} 145 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false 146 | log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender 147 | log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log 148 | log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout 149 | log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 150 | log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize} 151 | log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex} 152 | 153 | # 154 | # mapred audit logging 155 | # 156 | mapred.audit.logger=INFO,NullAppender 157 | mapred.audit.log.maxfilesize=256MB 158 | mapred.audit.log.maxbackupindex=20 159 | log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger} 160 | log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false 161 | log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender 162 | log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log 163 | log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout 164 | log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 165 | log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize} 166 | log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex} 167 | 168 | # Custom Logging levels 169 | 170 | #log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG 171 | #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG 172 | #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG 173 | 174 | # Jets3t library 175 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR 176 | 177 | # AWS SDK & S3A FileSystem 178 | log4j.logger.com.amazonaws=ERROR 179 | log4j.logger.com.amazonaws.http.AmazonHttpClient=ERROR 180 | log4j.logger.org.apache.hadoop.fs.s3a.S3AFileSystem=WARN 181 | 182 | # 183 | # Event Counter Appender 184 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 185 | # 186 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter 187 | 188 | # 189 | # Job Summary Appender 190 | # 191 | # Use following logger to send summary to separate file defined by 192 | # hadoop.mapreduce.jobsummary.log.file : 193 | # hadoop.mapreduce.jobsummary.logger=INFO,JSA 194 | # 195 | hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger} 196 | hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log 197 | hadoop.mapreduce.jobsummary.log.maxfilesize=256MB 198 | hadoop.mapreduce.jobsummary.log.maxbackupindex=20 199 | log4j.appender.JSA=org.apache.log4j.RollingFileAppender 200 | log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file} 201 | log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize} 202 | log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex} 203 | log4j.appender.JSA.layout=org.apache.log4j.PatternLayout 204 | log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 205 | log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger} 206 | log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false 207 | 208 | # 209 | # Yarn ResourceManager Application Summary Log 210 | # 211 | # Set the ResourceManager summary log filename 212 | yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log 213 | # Set the ResourceManager summary log level and appender 214 | yarn.server.resourcemanager.appsummary.logger=${hadoop.root.logger} 215 | #yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY 216 | 217 | # To enable AppSummaryLogging for the RM, 218 | # set yarn.server.resourcemanager.appsummary.logger to 219 | # ,RMSUMMARY in hadoop-env.sh 220 | 221 | # Appender for ResourceManager Application Summary Log 222 | # Requires the following properties to be set 223 | # - hadoop.log.dir (Hadoop Log directory) 224 | # - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename) 225 | # - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender) 226 | 227 | log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger} 228 | log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false 229 | log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender 230 | log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file} 231 | log4j.appender.RMSUMMARY.MaxFileSize=256MB 232 | log4j.appender.RMSUMMARY.MaxBackupIndex=20 233 | log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout 234 | log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 235 | 236 | # HS audit log configs 237 | #mapreduce.hs.audit.logger=INFO,HSAUDIT 238 | #log4j.logger.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=${mapreduce.hs.audit.logger} 239 | #log4j.additivity.org.apache.hadoop.mapreduce.v2.hs.HSAuditLogger=false 240 | #log4j.appender.HSAUDIT=org.apache.log4j.DailyRollingFileAppender 241 | #log4j.appender.HSAUDIT.File=${hadoop.log.dir}/hs-audit.log 242 | #log4j.appender.HSAUDIT.layout=org.apache.log4j.PatternLayout 243 | #log4j.appender.HSAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 244 | #log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd 245 | 246 | # Http Server Request Logs 247 | #log4j.logger.http.requests.namenode=INFO,namenoderequestlog 248 | #log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 249 | #log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log 250 | #log4j.appender.namenoderequestlog.RetainDays=3 251 | 252 | #log4j.logger.http.requests.datanode=INFO,datanoderequestlog 253 | #log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender 254 | #log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log 255 | #log4j.appender.datanoderequestlog.RetainDays=3 256 | 257 | #log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog 258 | #log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 259 | #log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log 260 | #log4j.appender.resourcemanagerrequestlog.RetainDays=3 261 | 262 | #log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog 263 | #log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 264 | #log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log 265 | #log4j.appender.jobhistoryrequestlog.RetainDays=3 266 | 267 | #log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog 268 | #log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender 269 | #log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log 270 | #log4j.appender.nodemanagerrequestlog.RetainDays=3 271 | 272 | 273 | # WebHdfs request log on datanodes 274 | # Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to 275 | # direct the log to a separate file. 276 | #datanode.webhdfs.logger=INFO,console 277 | #log4j.logger.datanode.webhdfs=${datanode.webhdfs.logger} 278 | #log4j.appender.HTTPDRFA=org.apache.log4j.DailyRollingFileAppender 279 | #log4j.appender.HTTPDRFA.File=${hadoop.log.dir}/hadoop-datanode-webhdfs.log 280 | #log4j.appender.HTTPDRFA.layout=org.apache.log4j.PatternLayout 281 | #log4j.appender.HTTPDRFA.layout.ConversionPattern=%d{ISO8601} %m%n 282 | #log4j.appender.HTTPDRFA.DatePattern=.yyyy-MM-dd 283 | 284 | # 285 | # Fair scheduler state dump 286 | # 287 | # Use following logger to dump the state to a separate file 288 | 289 | #log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=DEBUG,FSSTATEDUMP 290 | #log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.statedump=false 291 | #log4j.appender.FSSTATEDUMP=org.apache.log4j.RollingFileAppender 292 | #log4j.appender.FSSTATEDUMP.File=${hadoop.log.dir}/fairscheduler-statedump.log 293 | #log4j.appender.FSSTATEDUMP.layout=org.apache.log4j.PatternLayout 294 | #log4j.appender.FSSTATEDUMP.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 295 | #log4j.appender.FSSTATEDUMP.MaxFileSize=${hadoop.log.maxfilesize} 296 | #log4j.appender.FSSTATEDUMP.MaxBackupIndex=${hadoop.log.maxbackupindex} 297 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/App.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess 2 | 3 | 4 | import java.util.Properties 5 | 6 | import com.alibaba.fastjson.JSON 7 | import com.henry.realprocess.bean.{ClickLog, ClickLogWide, Message} 8 | import com.henry.realprocess.task._ 9 | import com.henry.realprocess.util.GlobalConfigutil 10 | import org.apache.flink.api.common.serialization.SimpleStringSchema 11 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} 12 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 13 | import org.apache.flink.api.scala._ 14 | import org.apache.flink.runtime.state.filesystem.FsStateBackend 15 | import org.apache.flink.streaming.api.environment.CheckpointConfig 16 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks 17 | import org.apache.flink.streaming.api.watermark.Watermark 18 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 19 | 20 | 21 | /** 22 | * @Author: Henry 23 | * @Description: 入口类 24 | * @Date: Create in 2019/10/16 22:42 25 | **/ 26 | object App { 27 | 28 | def main(args: Array[String]): Unit = { 29 | 30 | //------------ 初始化Flink流式环境,ctrl+alt+v -------------------- 31 | val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment 32 | 33 | // 设置处理时间为EventTime 34 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 35 | 36 | // 设置并行度 37 | env.setParallelism(1) 38 | 39 | // 本地测试 加载本地集合 成为一个 Datastream 打印输出 40 | // val localDataStream:DataStream[String] = env.fromCollection( 41 | // List("hadoop", "hive", "hbase", "flink") 42 | // ) 43 | // localDataStream.print() 44 | 45 | 46 | //------------ 添加 checkpoint 的支持 ------------------------------- 47 | env.enableCheckpointing(5000) // 5秒启动一次checkpoint 48 | 49 | // 设置 checkpoint 只检查 1次,即 仅一次 50 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) 51 | // 设置两次 checkpoint 的最小时间间隔 1s 52 | env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000) 53 | // 设置checkpoint的超时时长, 60s 54 | env.getCheckpointConfig.setCheckpointTimeout(60000) 55 | // 允许的最大并行度 56 | env.getCheckpointConfig.setMaxConcurrentCheckpoints(1) 57 | // 当程序关闭时,触发额外的checkpoint 58 | env.getCheckpointConfig.enableExternalizedCheckpoints( 59 | CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION) 60 | 61 | 62 | // 设置checkpoint的地址 63 | env.setStateBackend(new FsStateBackend("hdfs://master:9000/flink-checkpoint/")) 64 | 65 | 66 | //--------------- 整合kafka -------------------------- 67 | val properties = new Properties() 68 | // kafka 集群地址 69 | properties.setProperty("bootstrap.servers", GlobalConfigutil.bootstrapServers) 70 | // zookeeper 集群地址 71 | properties.setProperty("zookeeper.connect", GlobalConfigutil.zookeeperConnect) 72 | // kafka topic 73 | properties.setProperty("input.topic", GlobalConfigutil.inputTopic) 74 | // 消费者组 ID 75 | properties.setProperty("gruop.id", GlobalConfigutil.gruopId) 76 | // 自动提交拉取到的消费端的消息offset到kafka 77 | properties.setProperty("enable.auto.commit", GlobalConfigutil.enableAutoCommit) 78 | // 自动提交offset到zookeeper的时间间隔单位(毫秒) 79 | properties.setProperty("auto.commit.interval.ms", GlobalConfigutil.autoCommitIntervalMs) 80 | // 每次消费最新的数据 81 | properties.setProperty("auto.offset.reset", GlobalConfigutil.autoOffsetReset) 82 | 83 | 84 | // topic 、反序列化器、 属性集合 85 | val consumer = new FlinkKafkaConsumer010[String]( 86 | GlobalConfigutil.inputTopic, 87 | new SimpleStringSchema(), 88 | properties) 89 | 90 | val kafkaDataStream: DataStream[String] = env.addSource(consumer) 91 | 92 | // kafkaDataStream.print() 93 | 94 | // JSON -> 元组 95 | val tupleDataStream = kafkaDataStream.map { 96 | msgJson => 97 | val jsonObject = JSON.parseObject(msgJson) 98 | 99 | val message = jsonObject.getString("message") 100 | val count = jsonObject.getLong("count") 101 | val timeStamp = jsonObject.getLong("timestamp") 102 | 103 | // (message, count, timeStamp) 104 | // 改造成样例类 105 | // (ClickLog(message), count, timeStamp) 106 | Message(ClickLog(message), count, timeStamp) 107 | 108 | } 109 | 110 | // tupleDataStream.print() 111 | 112 | //----------------- 添加水印支持 ----------------------- 113 | 114 | var watermarkDataStream = tupleDataStream.assignTimestampsAndWatermarks( 115 | new AssignerWithPeriodicWatermarks[Message] { 116 | 117 | var currentTimestamp = 0L 118 | 119 | // 延迟时间 120 | var maxDelayTime = 2000L 121 | 122 | // 获取当前时间戳 123 | override def getCurrentWatermark: Watermark = { 124 | // 设置水印时间比事件时间小 2s 125 | new Watermark(currentTimestamp - maxDelayTime) 126 | } 127 | 128 | // 获取当前事件时间 129 | override def extractTimestamp( 130 | element: Message, 131 | previousElementTimestamp: Long): Long = { 132 | currentTimestamp = Math.max(element.timeStamp, previousElementTimestamp) 133 | currentTimestamp 134 | } 135 | }) 136 | 137 | // 数据的预处理 138 | val clickLogWideDateStream : DataStream[ClickLogWide] = PreprocessTask.process(watermarkDataStream) 139 | // clickLogWideDateStream.print() 140 | 141 | // 转换 142 | // ChannelRealHotTask.process(clickLogWideDateStream).print() 143 | // ChannelRealHotTask.process(clickLogWideDateStream) 144 | 145 | // 转换 PV、UV 146 | ChannelPvUvTask.process(clickLogWideDateStream) 147 | // ChannelPvUvTaskMerge.process(clickLogWideDateStream) 148 | // ChannelFreshnessTask.process(clickLogWideDateStream) 149 | 150 | // 重构模板方法 151 | ChannelFreshnessTaskTrait.process(clickLogWideDateStream) 152 | 153 | // ChannelAreaTask 测试 154 | ChannelAreaTask.process(clickLogWideDateStream) 155 | 156 | // ChannelNetworkTask 测试 157 | ChannelNetworkTask.process(clickLogWideDateStream) 158 | 159 | // ChannelBrowserTask 测试 160 | ChannelBrowserTask.process(clickLogWideDateStream) 161 | 162 | 163 | // 执行任务 164 | env.execute("real-process") 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/bean/ClickLog.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.bean 2 | 3 | import com.alibaba.fastjson.JSON 4 | 5 | /** 6 | * @Author: Henry 7 | * @Description: 8 | * @Date: Create in 2019/10/20 14:45 9 | **/ 10 | 11 | //频道ID(channelID) 12 | //产品类别ID(categoryID) 13 | //产品ID(produceID) 14 | //国家(country) 15 | //省份(province) 16 | //城市(city) 17 | //网络方式(network) 18 | //来源方式(source) 19 | //浏览器类型(browserType) 20 | //进入网站时间(entryTime) 21 | //离开网站时间(leaveTime) 22 | //用户ID(userID) 23 | 24 | case class ClickLog ( 25 | // 1、alt + 下拉 26 | // 2、ctrl + shift + →,选中各个变量 27 | var channelID:String, 28 | var categoryID:String, 29 | var produceID:String, 30 | var country:String, 31 | var province:String, 32 | var city:String, 33 | var network:String, 34 | var source:String, 35 | var browserType:String, 36 | var entryTime:String, 37 | var leaveTime:String, 38 | var userID:String 39 | ) 40 | 41 | object ClickLog{ 42 | 43 | def apply(json: String): ClickLog = { 44 | 45 | // 先把json转换为JSONObject 46 | val jsonObject = JSON.parseObject(json) 47 | 48 | // 提取jsonObject中的各个属性,赋值给样例类 49 | var channelID = jsonObject.getString("channelID") 50 | var categoryID = jsonObject.getString("categoryID") 51 | var produceID = jsonObject.getString("produceID") 52 | var country = jsonObject.getString("country") 53 | var province = jsonObject.getString("province") 54 | var city = jsonObject.getString("city") 55 | var network = jsonObject.getString("network") 56 | var source = jsonObject.getString("source") 57 | var browserType = jsonObject.getString("browserType") 58 | var entryTime = jsonObject.getString("entryTime") 59 | var leaveTime = jsonObject.getString("leaveTime") 60 | var userID = jsonObject.getString("userID") 61 | 62 | ClickLog( 63 | channelID, 64 | categoryID, 65 | produceID, 66 | country, 67 | province, 68 | city, 69 | network, 70 | source, 71 | browserType, 72 | entryTime, 73 | leaveTime, 74 | userID 75 | ) 76 | } 77 | } -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/bean/ClickLogWide.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.bean 2 | 3 | /** 4 | * @Author: Henry 5 | * @Description: 6 | * @Date: Create in 2019/10/27 14:19 7 | **/ 8 | 9 | // 频道ID(channelID) 10 | // 产品类别ID(categoryID) 11 | // 产品ID(produceID) 12 | // 国家(country) 13 | // 省份(province) 14 | // 城市(city) 15 | // 网络方式(network) 16 | // 来源方式(source) 17 | // 浏览器类型(browserType) 18 | // 进入网站时间(entryTime) 19 | // 离开网站时间(leaveTime) 20 | // 用户ID(userID) 21 | // ---- 添加以下字段 --------------- 22 | // 用户访问次数(count) 23 | // 用户访问的时间(timestamp) 24 | // 国家省份城市(拼接)(address) 25 | // 年月(yearMonth) 26 | // 年月日(yearMonthDay) 27 | // 年月日时(yearMonthDayHour) 28 | // 是否为访问某个频道的新用户(isNew)—— 0:表示否; 1:表示是 29 | // 在某一小时内是否为某个频道的新用户(isHourNew)—— 0:表示否; 1:表示是 30 | // 在某一天内是否为某个频道的新用户(isDayNew)—— 0:表示否; 1:表示是 31 | // 在某一天月是否为某个频道的新用户(isMonthNew)—— 0:表示否; 1:表示是 32 | 33 | case class ClickLogWide ( 34 | // 1、alt + 下拉 35 | // 2、ctrl + shift + →,选中各个变量 36 | var channelID:String, 37 | var categoryID:String, 38 | var produceID:String, 39 | var country:String, 40 | var province:String, 41 | var city:String, 42 | var network:String, 43 | var source:String, 44 | var browserType:String, 45 | var entryTime:String, 46 | var leaveTime:String, 47 | var userID:String, 48 | //--- 新增 --------------------------- 49 | var count:Long, 50 | var timestamp:Long, 51 | var address:String, 52 | var yearMonth:String, 53 | var yearMonthDay:String, 54 | var yearMonthDayHour:String, 55 | var isNew:Int, 56 | var isHourNew:Int, 57 | var isDayNew:Int, 58 | var isMonthNew:Int 59 | 60 | ) 61 | 62 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/bean/Message.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.bean 2 | 3 | /** 4 | * @Author: Henry 5 | * @Description: 6 | * @Date: Create in 2019/10/20 15:58 7 | **/ 8 | case class Message ( 9 | var clickLog:ClickLog, 10 | var count:Long, 11 | var timeStamp:Long 12 | ) 13 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/BaseTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.task.ChannelBrowserTask.pvColName 5 | import org.apache.commons.lang.StringUtils 6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 7 | import org.apache.flink.streaming.api.windowing.time.Time 8 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 9 | 10 | /** 11 | * @Author: Henry 12 | * @Description: 13 | * @Date: Create in 2019/11/3 10:42 14 | **/ 15 | 16 | trait BaseTask[T] { 17 | 18 | 19 | // 1、 转换 20 | def map(clickLogWideDataStream : DataStream[ClickLogWide]): DataStream[T] 21 | 22 | // 2、 分组 23 | def keyBy(mapDataStream : DataStream[T]): KeyedStream[T, String] 24 | 25 | // 3、 时间窗口 26 | def timeWindow(keyedStream: KeyedStream[T, String]) : WindowedStream[T, String, TimeWindow] = { 27 | // 因为所有自类都是 3 秒的时间窗口 28 | keyedStream.timeWindow(Time.seconds(3)) 29 | } 30 | 31 | // 4、 聚合 32 | def reduce(windowedStream : WindowedStream[T, String, TimeWindow]) : DataStream[T] 33 | 34 | // 5、 落地 HBase 35 | def sink2HBase(reduceDataStream: DataStream[T]) 36 | 37 | 38 | // 定义模板执行顺序 39 | def process(clickLogWideDataStream : DataStream[ClickLogWide]): Unit = { 40 | val mapDataStream: DataStream[T] = map(clickLogWideDataStream) 41 | val keyedStream: KeyedStream[T, String] = keyBy(mapDataStream) 42 | val windowedStream: WindowedStream[T, String, TimeWindow] = timeWindow(keyedStream) 43 | val reduceStream: DataStream[T] = reduce(windowedStream) 44 | sink2HBase(reduceStream) 45 | } 46 | 47 | // 检测老用户是否第一次访问 48 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0 49 | 50 | // 创建 HBase 相关列 51 | var tableName = "" 52 | var clfName = "info" 53 | var rowkey = "" 54 | var channelIdColName = "channelID" 55 | var browserColName = "browser" 56 | var dateColName = "date" 57 | var pvColName = "pv" 58 | var uvColName = "uv" 59 | var newCountColName = "newCount" 60 | var oldCountColName = "oldCount" 61 | 62 | 63 | /* 累加相关列的值 64 | * @param resultMap map集合 65 | * @param column 待查询的列 66 | * @param currentValue 当前值 67 | * @return 累加后的值 68 | */ 69 | def getTotal(resultMap: Map[String, String],column:String,currentValue:Long):Long={ 70 | 71 | var total = currentValue 72 | // 如果resultMap不为空,并且可以去到相关列的值,那么就进行累加 73 | if (resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(column,""))) { 74 | total = resultMap(column).toLong + currentValue 75 | } 76 | total 77 | } 78 | 79 | 80 | } 81 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelAreaTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | import com.henry.realprocess.bean.ClickLogWide 3 | import com.henry.realprocess.util.HBaseUtil 4 | import org.apache.commons.lang.StringUtils 5 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.streaming.api.windowing.time.Time 9 | 10 | 11 | /** 12 | * @Author: Henry 13 | * @Description: 14 | * @Date: Create in 2019/11/3 14:06 15 | **/ 16 | 17 | // 样例类 18 | case class ChannelArea( 19 | var channelId: String, 20 | var area: String, 21 | var date: String, 22 | var pv: Long, 23 | var uv: Long, 24 | var newCount: Long, 25 | var oldCount: Long 26 | ) 27 | 28 | object ChannelAreaTask extends BaseTask [ChannelArea]{ 29 | 30 | // 1、 转换 31 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelArea] = { 32 | 33 | clickLogWideDataStream.flatMap{ 34 | 35 | clickLogWide =>{ 36 | 37 | // 如果是老用户,并且在该时间段内第一次来,就计数 1. 否则计 0 38 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0 39 | 40 | List( 41 | ChannelArea( // 月维度 42 | clickLogWide.channelID, 43 | clickLogWide.address, 44 | clickLogWide.yearMonth, 45 | clickLogWide.count, // pv, 每来一个数据进行累加 46 | clickLogWide.isMonthNew, // uv, 第一次来的时候只计数一次 47 | clickLogWide.isNew, // 当是 New 的时候进行累加 48 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew) 49 | ), 50 | ChannelArea( // 日维度 51 | clickLogWide.channelID, 52 | clickLogWide.address, 53 | clickLogWide.yearMonth, 54 | clickLogWide.count, 55 | clickLogWide.isDayNew, 56 | clickLogWide.isNew, 57 | isOld(clickLogWide.isNew, clickLogWide.isDayNew) 58 | ), 59 | ChannelArea( // 小时维度 60 | clickLogWide.channelID, 61 | clickLogWide.address, 62 | clickLogWide.yearMonth, 63 | clickLogWide.count, 64 | clickLogWide.isHourNew, 65 | clickLogWide.isNew, 66 | isOld(clickLogWide.isNew, clickLogWide.isHourNew) 67 | ) 68 | ) 69 | } 70 | } 71 | } 72 | 73 | // 2、 分组 根据 频道ID+地域+时间 74 | override def keyBy(mapDataStream: DataStream[ChannelArea]): KeyedStream[ChannelArea, String] = { 75 | mapDataStream.keyBy{ 76 | area => 77 | area.channelId + " : " + area.area + " : " + area.date 78 | } 79 | } 80 | 81 | // 3、 时间窗口, 这段代码每个子类都是一样的,可以写到父类中 82 | // override def timeWindow(keyedStream: KeyedStream[ChannelArea, String]): WindowedStream[ChannelArea, String, TimeWindow] = {} 83 | 84 | 85 | // 4、 聚合 累加4个字段 86 | override def reduce(windowedStream: WindowedStream[ChannelArea, String, TimeWindow]) = { 87 | windowedStream.reduce { 88 | (t1, t2) => 89 | ChannelArea(t1.channelId, t1.area, 90 | t1.date, 91 | t1.pv + t2.pv, 92 | t1.uv + t2.uv, 93 | t1.newCount + t2.newCount, 94 | t1.oldCount + t2.oldCount) 95 | } 96 | } 97 | 98 | 99 | // 5、 落地HBase 100 | override def sink2HBase(reduceDataStream: DataStream[ChannelArea]): Unit = { 101 | reduceDataStream.addSink{ 102 | area => { 103 | // HBase 相关字段 104 | val tableName = "channel_area" 105 | val clfName = "info" 106 | val rowkey = area.channelId + ":" + area.area + ":" + area.date 107 | 108 | val channelIdColumn = "channelId" 109 | val areaColumn = "area" 110 | val dateColumn = "date" 111 | val pvColumn = "pv" 112 | val uvColumn = "uv" 113 | val newCountColumn = "newCount" 114 | val oldCountColumn = "oldCount" 115 | 116 | // 查询 HBase 117 | val pvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,pvColumn) 118 | val uvInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,uvColumn) 119 | val newCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,newCountColumn) 120 | val oldCountInHbase: String = HBaseUtil.getData(tableName,rowkey,clfName,oldCountColumn) 121 | 122 | // 累加 123 | var totalPv = 0L 124 | var totalUv = 0L 125 | var totalNewCount = 0L 126 | var totalOldCount = 0L 127 | 128 | // PV 129 | if(StringUtils.isNotBlank(pvInHbase)){ 130 | totalPv = pvInHbase.toLong+area.pv 131 | }else{ 132 | totalPv = area.pv 133 | } 134 | 135 | // UV 136 | if(StringUtils.isNotBlank(uvInHbase)){ 137 | totalUv = uvInHbase.toLong+area.uv 138 | }else{ 139 | totalUv = area.uv 140 | } 141 | 142 | // totalNewCount 143 | if(StringUtils.isNotBlank(newCountInHbase)){ 144 | totalNewCount = newCountInHbase.toLong+area.newCount 145 | }else{ 146 | totalNewCount = area.newCount 147 | } 148 | 149 | // totalOldCount 150 | if(StringUtils.isNotBlank(oldCountInHbase)){ 151 | totalOldCount = oldCountInHbase.toLong+area.oldCount 152 | }else{ 153 | totalOldCount = area.oldCount 154 | } 155 | 156 | // 保存数据 157 | HBaseUtil.putMapData(tableName,rowkey,clfName,Map( 158 | channelIdColumn->area.channelId, 159 | areaColumn->area.area, 160 | dateColumn->area.date, 161 | pvColumn->totalPv, 162 | uvColumn->totalUv, 163 | newCountColumn->totalNewCount, 164 | oldCountColumn->totalOldCount 165 | )) 166 | 167 | } 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelBrowserTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.commons.lang.StringUtils 6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 7 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 8 | import org.apache.flink.api.scala._ 9 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 10 | import org.apache.flink.streaming.api.windowing.time.Time 11 | 12 | 13 | /** 14 | * @Author: Henry 15 | * @Description: 16 | * @Date: Create in 2019/11/3 15:52 17 | **/ 18 | 19 | // 2. 添加一个`ChannelBrowser`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商 20 | // (browser)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount) 21 | case class ChannelBrowser( 22 | var channelId: String, 23 | var browser: String, 24 | var date: String, 25 | var pv: Long, 26 | var uv: Long, 27 | var newCount: Long, 28 | var oldCount: Long 29 | ) 30 | 31 | 32 | object ChannelBrowserTask extends BaseTask[ChannelBrowser]{ 33 | 34 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelBrowser] = { 35 | 36 | clickLogWideDataStream.flatMap{ 37 | clickLogWide => { 38 | List( 39 | ChannelBrowser( // 月维度 40 | clickLogWide.channelID, 41 | clickLogWide.browserType, 42 | clickLogWide.yearMonth, 43 | clickLogWide.count, 44 | clickLogWide.isMonthNew, 45 | clickLogWide.isNew, 46 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew) 47 | ), 48 | ChannelBrowser( // 天维度 49 | clickLogWide.channelID, 50 | clickLogWide.browserType, 51 | clickLogWide.yearMonthDay, 52 | clickLogWide.count, 53 | clickLogWide.isDayNew, 54 | clickLogWide.isNew, 55 | isOld(clickLogWide.isNew, clickLogWide.isDayNew) 56 | ), 57 | ChannelBrowser( // 小时维度 58 | clickLogWide.channelID, 59 | clickLogWide.browserType, 60 | clickLogWide.yearMonthDayHour, 61 | clickLogWide.count, 62 | clickLogWide.isHourNew, 63 | clickLogWide.isNew, 64 | isOld(clickLogWide.isNew, clickLogWide.isHourNew) 65 | ) 66 | ) 67 | } 68 | } 69 | } 70 | 71 | override def keyBy(mapDataStream: DataStream[ChannelBrowser]): KeyedStream[ChannelBrowser, String] = { 72 | 73 | mapDataStream.keyBy { 74 | browser => 75 | browser.channelId +" : "+ browser.browser +" : "+ browser.date 76 | } 77 | } 78 | 79 | override def reduce(windowedStream: WindowedStream[ChannelBrowser, String, TimeWindow]): DataStream[ChannelBrowser] = { 80 | windowedStream.reduce { 81 | (t1, t2) => { 82 | ChannelBrowser( 83 | t1.channelId, 84 | t1.browser, 85 | t1.date, 86 | t1.pv + t2.pv, 87 | t1.uv + t2.uv, 88 | t1.newCount + t2.newCount, 89 | t1.oldCount + t2.oldCount 90 | ) 91 | } 92 | } 93 | } 94 | 95 | 96 | override def sink2HBase(reduceDataStream: DataStream[ChannelBrowser]): Unit = { 97 | 98 | reduceDataStream.addSink( 99 | browser => { 100 | 101 | // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名 102 | // 不需要加 val 或者 var ,因为引用的是父类的变量 103 | tableName = "channel_browser" 104 | rowkey = s"${browser.channelId} : ${browser.date} : ${browser.browser}" // 引用变量的方式 105 | browserColName = "browser" 106 | 107 | 108 | // 查询 HBase 109 | // - 判断hbase中是否已经存在结果记录 110 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, 111 | List( pvColName, uvColName, newCountColName, oldCountColName ) 112 | ) 113 | 114 | // 数据累加 115 | // 保存数据 116 | HBaseUtil.putMapData( 117 | tableName, rowkey, clfName, Map( 118 | channelIdColName -> browser.channelId, 119 | browserColName -> browser.browser, 120 | dateColName -> browser.date, 121 | pvColName -> getTotal(resultMap, pvColName , browser.pv), 122 | uvColName -> getTotal(resultMap, uvColName , browser.uv), 123 | newCountColName -> getTotal(resultMap, newCountColName , browser.newCount), 124 | oldCountColName -> getTotal(resultMap, oldCountColName , browser.newCount) 125 | ) 126 | ) 127 | } 128 | ) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.commons.lang.StringUtils 6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | 12 | /** 13 | * @Author: Henry 14 | * @Description: 15 | * @Date: Create in 2019/10/31 21:38 16 | **/ 17 | 18 | case class ChannelFreshness( 19 | var channelId : String , 20 | var date : String , 21 | var newCount: Long , 22 | val oldCount: Long 23 | 24 | ) 25 | 26 | /** 27 | * 1、 转换 28 | * 2、 分组 29 | * 3、 时间窗口 30 | * 4、 聚合 31 | * 5、 落地 HBase 32 | */ 33 | object ChannelFreshnessTask { 34 | 35 | def process(clickLogWideDataStream: DataStream[ClickLogWide])= { 36 | 37 | // 1、 转换 38 | val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap { 39 | clickLog => 40 | 41 | // 如果是老用户,只有在第一次来的时候,计数为 1 42 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0 43 | // 统计新用户、老用户数量 44 | List( 45 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)), 46 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)), 47 | ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew)) 48 | ) 49 | } 50 | 51 | // 2、 分组 52 | val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy { 53 | freshness => (freshness.channelId + freshness.date) 54 | } 55 | 56 | 57 | // 3、 时间窗口 58 | val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3)) 59 | 60 | 61 | // 4、 聚合 62 | val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce { 63 | (t1, t2) => 64 | ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount) 65 | } 66 | 67 | // 5、 落地 HBase 68 | reduceDataStream.addSink(new SinkFunction[ChannelFreshness] { 69 | override def invoke(value: ChannelFreshness): Unit = { 70 | // 创建 HBase 相关变量 71 | val tableName = "channel_freshness" 72 | val clfName = "info" 73 | val channelIdColumn = "channelId" 74 | val dateColumn = "date" 75 | val newCountColumn = "newCount" 76 | val oldCountColumn = "oldCount" 77 | 78 | val rowkey = value.channelId + ":" + value.date 79 | 80 | // 查询历史数据 81 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn)) 82 | 83 | // 累加 84 | var totalNewCount = 0L 85 | var totalOldCount = 0L 86 | 87 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){ 88 | resultMap(newCountColumn).toLong + value.newCount 89 | } 90 | else { 91 | totalNewCount = value.newCount 92 | } 93 | 94 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){ 95 | resultMap(oldCountColumn).toLong + value.oldCount 96 | } 97 | else { 98 | totalOldCount = value.oldCount 99 | } 100 | 101 | 102 | // 保存数据 103 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 104 | // 向如下列插入数据 105 | channelIdColumn -> value.channelId , 106 | dateColumn -> value.date , 107 | newCountColumn -> totalNewCount , 108 | oldCountColumn -> totalOldCount 109 | )) 110 | 111 | } 112 | }) 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelFreshnessTaskTrait.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.commons.lang.StringUtils 6 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | 12 | /** 13 | * @Author: Henry 14 | * @Description: 15 | * @Date: Create in 2019/10/31 21:38 16 | **/ 17 | 18 | case class ChannelFreshness( 19 | var channelId : String , 20 | var date : String , 21 | var newCount: Long , 22 | val oldCount: Long 23 | 24 | ) 25 | 26 | /** 27 | * 1、 转换 28 | * 2、 分组 29 | * 3、 时间窗口 30 | * 4、 聚合 31 | * 5、 落地 HBase 32 | */ 33 | object ChannelFreshnessTaskTrait extends BaseTask[ChannelFreshness] { 34 | /* Alt + Enter */ 35 | 36 | // 1、 转换 37 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelFreshness] = { 38 | 39 | val mapDataStream: DataStream[ChannelFreshness] = clickLogWideDataStream.flatMap { 40 | clickLog => 41 | 42 | // 如果是老用户,只有在第一次来的时候,计数为 1 43 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0 44 | // 统计新用户、老用户数量 45 | List( 46 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDayHour, clickLog.isNew, isOld(clickLog.isNew, clickLog.isHourNew)), 47 | ChannelFreshness(clickLog.channelID, clickLog.yearMonthDay, clickLog.isNew, isOld(clickLog.isNew, clickLog.isDayNew)), 48 | ChannelFreshness(clickLog.channelID, clickLog.yearMonth, clickLog.isNew, isOld(clickLog.isNew, clickLog.isMonthNew)) 49 | ) 50 | } 51 | mapDataStream 52 | } 53 | 54 | // 2、 分组 55 | override def keyBy(mapDataStream: DataStream[ChannelFreshness]): KeyedStream[ChannelFreshness, String] = { 56 | 57 | // 或者:mapDataStream.keyBy {freshness => (freshness.channelId + freshness.date) 58 | val keyedStream: KeyedStream[ChannelFreshness, String] = mapDataStream.keyBy { 59 | freshness => (freshness.channelId + freshness.date) 60 | } 61 | keyedStream 62 | 63 | } 64 | 65 | // 3、 时间窗口 66 | override def timeWindow(keyedStream: KeyedStream[ChannelFreshness, String]): WindowedStream[ChannelFreshness, String, TimeWindow] = { 67 | 68 | val windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow] = keyedStream.timeWindow(Time.seconds(3)) 69 | windowedStream 70 | 71 | } 72 | 73 | // 4、 聚合 74 | override def reduce(windowedStream: WindowedStream[ChannelFreshness, String, TimeWindow]): DataStream[ChannelFreshness] = { 75 | 76 | val reduceDataStream: DataStream[ChannelFreshness] = windowedStream.reduce { 77 | (t1, t2) => 78 | ChannelFreshness(t1.channelId, t1.date, t1.newCount + t2.newCount, t1.oldCount + t2.oldCount) 79 | } 80 | reduceDataStream 81 | 82 | } 83 | 84 | // 5、 落地 HBase 85 | override def sink2HBase(reduceDataStream: DataStream[ChannelFreshness]): Unit = { 86 | 87 | reduceDataStream.addSink(new SinkFunction[ChannelFreshness] { 88 | override def invoke(value: ChannelFreshness): Unit = { 89 | // 创建 HBase 相关变量 90 | val tableName = "channel_freshness" 91 | val clfName = "info" 92 | val channelIdColumn = "channelId" 93 | val dateColumn = "date" 94 | val newCountColumn = "newCount" 95 | val oldCountColumn = "oldCount" 96 | 97 | val rowkey = value.channelId + ":" + value.date 98 | 99 | // 查询历史数据 100 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, List(newCountColumn, oldCountColumn)) 101 | 102 | // 累加 103 | var totalNewCount = 0L 104 | var totalOldCount = 0L 105 | 106 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(newCountColumn,""))){ 107 | resultMap(newCountColumn).toLong + value.newCount 108 | } 109 | else { 110 | totalNewCount = value.newCount 111 | } 112 | 113 | if(resultMap != null && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColumn,""))){ 114 | resultMap(oldCountColumn).toLong + value.oldCount 115 | } 116 | else { 117 | totalOldCount = value.oldCount 118 | } 119 | 120 | // 保存数据 121 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 122 | // 向如下列插入数据 123 | channelIdColumn -> value.channelId , 124 | dateColumn -> value.date , 125 | newCountColumn -> totalNewCount , 126 | oldCountColumn -> totalOldCount 127 | )) 128 | } 129 | }) 130 | } 131 | 132 | } 133 | 134 | 135 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelNetworkTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | import com.henry.realprocess.bean.ClickLogWide 3 | import com.henry.realprocess.util.HBaseUtil 4 | import org.apache.commons.lang.StringUtils 5 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 6 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 7 | import org.apache.flink.api.scala._ 8 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | 11 | 12 | /** 13 | * @Author: Henry 14 | * @Description: 15 | * @Date: Create in 2019/11/3 15:52 16 | **/ 17 | 18 | // 2. 添加一个`ChannelNetwork`样例类,它封装要统计的四个业务字段:频道ID(channelID)、运营商 19 | // (network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount) 20 | case class ChannelNetWork( 21 | var channelId: String, 22 | var network: String, 23 | var date: String, 24 | var pv: Long, 25 | var uv: Long, 26 | var newCount: Long, 27 | var oldCount: Long 28 | ) 29 | 30 | 31 | object ChannelNetworkTask extends BaseTask[ChannelNetWork]{ 32 | 33 | override def map(clickLogWideDataStream: DataStream[ClickLogWide]): DataStream[ChannelNetWork] = { 34 | 35 | val isOld = (isNew: Int, isDateNew: Int) => if (isNew == 0 && isDateNew == 1) 1 else 0 36 | 37 | clickLogWideDataStream.flatMap{ 38 | clickLogWide => { 39 | List( 40 | ChannelNetWork( // 月维度 41 | clickLogWide.channelID, 42 | clickLogWide.network, 43 | clickLogWide.yearMonth, 44 | clickLogWide.count, 45 | clickLogWide.isMonthNew, 46 | clickLogWide.isNew, 47 | isOld(clickLogWide.isNew, clickLogWide.isMonthNew) 48 | ), 49 | ChannelNetWork( // 天维度 50 | clickLogWide.channelID, 51 | clickLogWide.network, 52 | clickLogWide.yearMonthDay, 53 | clickLogWide.count, 54 | clickLogWide.isDayNew, 55 | clickLogWide.isNew, 56 | isOld(clickLogWide.isNew, clickLogWide.isDayNew) 57 | ), 58 | ChannelNetWork( // 小时维度 59 | clickLogWide.channelID, 60 | clickLogWide.network, 61 | clickLogWide.yearMonthDayHour, 62 | clickLogWide.count, 63 | clickLogWide.isHourNew, 64 | clickLogWide.isNew, 65 | isOld(clickLogWide.isNew, clickLogWide.isHourNew) 66 | ) 67 | ) 68 | } 69 | } 70 | } 71 | 72 | override def keyBy(mapDataStream: DataStream[ChannelNetWork]): KeyedStream[ChannelNetWork, String] = { 73 | 74 | mapDataStream.keyBy { 75 | network => 76 | network.channelId +" : "+ network.network +" : "+ network.date 77 | } 78 | } 79 | 80 | override def reduce(windowedStream: WindowedStream[ChannelNetWork, String, TimeWindow]): DataStream[ChannelNetWork] = { 81 | windowedStream.reduce { 82 | (t1, t2) => { 83 | ChannelNetWork( 84 | t1.channelId, 85 | t1.network, 86 | t1.date, 87 | t1.pv + t2.pv, 88 | t1.uv + t2.uv, 89 | t1.newCount + t2.newCount, 90 | t1.oldCount + t2.oldCount 91 | ) 92 | } 93 | } 94 | } 95 | 96 | 97 | override def sink2HBase(reduceDataStream: DataStream[ChannelNetWork]): Unit = { 98 | 99 | reduceDataStream.addSink( 100 | network => { 101 | // 创建 HBase 相关列 - 准备hbase的表名、列族名、rowkey名、列名 102 | val tableName = "channel_network" 103 | val clfName = "info" 104 | // 频道ID(channelID)、运营商(network)、日期(date)pv、uv、新用户(newCount)、老用户(oldCount) 105 | val rowkey = s"${network.channelId} : ${network.date} : ${network.network}" // 引用变量的方式 106 | val channelIdColName = "channelID" 107 | val networkColName = "network" 108 | val dateColName = "date" 109 | val pvColName = "pv" 110 | val uvColName = "uv" 111 | val newCountColName = "newCount" 112 | val oldCountColName = "oldCount" 113 | 114 | // 查询 HBase 115 | // - 判断hbase中是否已经存在结果记录 116 | val resultMap: Map[String, String] = HBaseUtil.getMapData(tableName, rowkey, clfName, 117 | List( pvColName, uvColName, newCountColName, oldCountColName ) 118 | ) 119 | 120 | // 数据累加 121 | var totalPv = 0L 122 | var totalUv = 0L 123 | var totalNewCount = 0L 124 | var totalOldCount = 0L 125 | 126 | // totalPv 127 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(pvColName,""))) { 128 | totalPv = resultMap(pvColName).toLong + network.pv 129 | } 130 | else { 131 | totalPv = network.pv 132 | } 133 | 134 | // totalUv 135 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(uvColName,""))) { 136 | totalUv = resultMap(uvColName).toLong + network.uv 137 | } 138 | else { 139 | totalUv = network.uv 140 | } 141 | 142 | // totalNewCount 143 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(newCountColName,""))) { 144 | totalNewCount = resultMap(newCountColName).toLong + network.newCount 145 | } 146 | else { 147 | totalNewCount = network.newCount 148 | } 149 | 150 | // totalOldCount 151 | if (resultMap != null && resultMap.size > 0 && StringUtils.isNotBlank(resultMap.getOrElse(oldCountColName,""))) { 152 | totalOldCount = resultMap(oldCountColName).toLong + network.oldCount 153 | } 154 | else { 155 | totalOldCount = network.oldCount 156 | } 157 | 158 | // 保存数据 159 | HBaseUtil.putMapData( 160 | tableName, rowkey, clfName, Map( 161 | channelIdColName -> network.channelId, 162 | networkColName -> network.network, 163 | dateColName -> network.date, 164 | pvColName -> totalPv, 165 | uvColName -> totalUv, 166 | newCountColName -> totalNewCount, 167 | oldCountColName -> totalOldCount 168 | ) 169 | ) 170 | } 171 | ) 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream} 6 | import org.apache.flink.api.scala._ 7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 8 | import org.apache.flink.streaming.api.scala.KeyedStream 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | import org.apache.commons.lang.StringUtils 12 | 13 | /** 14 | * @Author: Henry 15 | * @Description: 渠道 PV/UV 16 | * 1、字段转换; 17 | * 2、分组; 18 | * 3、时间窗口; 19 | * 4、聚合; 20 | * 5、落地HBase 21 | * @Date: Create in 2019/10/30 20:15 22 | **/ 23 | 24 | case class ChannelPvUv( 25 | val channelId: String, 26 | val yearDayMonthHour: String, 27 | val pv: Long, 28 | val uv: Long 29 | ) 30 | 31 | object ChannelPvUvTask { 32 | 33 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= { 34 | 35 | // 1、转换 36 | val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.map{ 37 | clickLogWide => { 38 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour, 39 | clickLogWide.count, clickLogWide.isHourNew) 40 | } 41 | } 42 | 43 | // 2、分组 44 | val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{ 45 | channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour 46 | } 47 | 48 | // 3、窗口 49 | val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] = 50 | keyedStream.timeWindow(Time.seconds(3)) 51 | 52 | 53 | // 4、聚合 54 | val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{ 55 | (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv) 56 | } 57 | 58 | // 5、HBase 落地 59 | reduceDataStream.addSink(new SinkFunction[ChannelPvUv] { 60 | 61 | override def invoke(value: ChannelPvUv): Unit = { 62 | 63 | // HBase 相关字段 64 | val tableName = "channel_pvuv" 65 | val clfName = "info" 66 | val channelIdColumn = "channelId" 67 | val yearMonthDayHourColumn = "yearMonthDayHour" 68 | val pvColumn = "pv" 69 | val uvColumn = "uv" 70 | 71 | val rowkey = value.channelId + ":" + value.yearDayMonthHour 72 | 73 | // 查询 HBase ,并且获取相关记录 74 | val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn) 75 | val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn) 76 | 77 | var totalPv = 0L 78 | var totalUv = 0L 79 | 80 | // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加 81 | if(StringUtils.isBlank(pvInHBase)){ 82 | totalPv = value.pv 83 | } 84 | else { 85 | totalPv = pvInHBase.toLong + value.pv 86 | } 87 | 88 | // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加 89 | if(StringUtils.isBlank(uvInHBase)){ 90 | totalUv = value.uv 91 | } 92 | else { 93 | totalUv = uvInHBase.toLong + value.uv 94 | } 95 | 96 | // 保存数据 97 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 98 | 99 | channelIdColumn -> value.channelId , 100 | yearMonthDayHourColumn -> value.yearDayMonthHour , 101 | pvColumn -> value.pv.toString , 102 | uvColumn -> value.uv.toString 103 | )) 104 | 105 | } 106 | }) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelPvUvTaskMerge.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.flink.api.scala._ 6 | import org.apache.commons.lang.StringUtils 7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 8 | import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, WindowedStream} 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | 12 | /** 13 | * @Author: Henry 14 | * @Description: 15 | * @Date: Create in 2019/10/30 22:42 16 | **/ 17 | 18 | case class ChannelPvUv( 19 | val channelId: String, 20 | val yearDayMonthHour: String, 21 | val pv: Long, 22 | val uv: Long 23 | ) 24 | 25 | object ChannelPvUvTaskMerge { 26 | 27 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= { 28 | 29 | // 1、转换 30 | val channelPvUvDS: DataStream[ChannelPvUv] = clickLogWideDateStream.flatMap{ 31 | clickLogWide => { 32 | List( 33 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDayHour, clickLogWide.count, clickLogWide.isHourNew) , 34 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonthDay, clickLogWide.count, clickLogWide.isDayNew) , 35 | ChannelPvUv(clickLogWide.channelID, clickLogWide.yearMonth, clickLogWide.count, clickLogWide.isMonthNew) 36 | ) 37 | } 38 | } 39 | 40 | // 2、分组 41 | val keyedStream: KeyedStream[ChannelPvUv, String] = channelPvUvDS.keyBy{ 42 | channelPvUv => channelPvUv.channelId + channelPvUv.yearDayMonthHour 43 | } 44 | 45 | // 3、窗口 46 | val windowedStream: WindowedStream[ChannelPvUv, String, TimeWindow] = 47 | keyedStream.timeWindow(Time.seconds(3)) 48 | 49 | 50 | // 4、聚合 51 | val reduceDataStream: DataStream[ChannelPvUv] = windowedStream.reduce{ 52 | (t1, t2) => ChannelPvUv(t1.channelId, t1.yearDayMonthHour, t1.pv + t2.pv, t1.uv + t2.uv) 53 | } 54 | 55 | // 5、HBase 落地 56 | reduceDataStream.addSink(new SinkFunction[ChannelPvUv] { 57 | 58 | override def invoke(value: ChannelPvUv): Unit = { 59 | 60 | // HBase 相关字段 61 | val tableName = "channel_pvuv" 62 | val clfName = "info" 63 | val channelIdColumn = "channelId" 64 | val yearMonthDayHourColumn = "yearMonthDayHour" 65 | val pvColumn = "pv" 66 | val uvColumn = "uv" 67 | 68 | val rowkey = value.channelId + ":" + value.yearDayMonthHour 69 | 70 | // 查询 HBase ,并且获取相关记录 71 | val pvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, pvColumn) 72 | val uvInHBase: String = HBaseUtil.getData(tableName, rowkey, clfName, uvColumn) 73 | 74 | var totalPv = 0L 75 | var totalUv = 0L 76 | 77 | // 如果 HBase 中没有 PV 值,就把当前值保存;如果有值就进行累加 78 | if(StringUtils.isBlank(pvInHBase)){ 79 | totalPv = value.pv 80 | } 81 | else { 82 | totalPv = pvInHBase.toLong + value.pv 83 | } 84 | 85 | // 如果 HBase 中没有 UV 值,就把当前值保存;如果有值就进行累加 86 | if(StringUtils.isBlank(uvInHBase)){ 87 | totalUv = value.uv 88 | } 89 | else { 90 | totalUv = uvInHBase.toLong + value.uv 91 | } 92 | 93 | // 保存数据 94 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 95 | 96 | channelIdColumn -> value.channelId , 97 | yearMonthDayHourColumn -> value.yearDayMonthHour , 98 | pvColumn -> value.pv.toString , 99 | uvColumn -> value.uv.toString 100 | )) 101 | 102 | } 103 | }) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/ChannelRealHotTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.ClickLogWide 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.flink.streaming.api.scala.{DataStream, WindowedStream} 6 | import org.apache.flink.api.scala._ 7 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 8 | import org.apache.flink.streaming.api.scala.KeyedStream 9 | import org.apache.flink.streaming.api.windowing.time.Time 10 | import org.apache.flink.streaming.api.windowing.windows.TimeWindow 11 | import org.apache.commons.lang.StringUtils 12 | 13 | 14 | /** 15 | * @Author: Henry 16 | * @Description: 频道热点分析业务开发 17 | * 1、字段转换; 18 | * 2、分组; 19 | * 3、时间窗口; 20 | * 4、聚合; 21 | * 5、落地HBase 22 | * @Date: Create in 2019/10/29 20:22 23 | **/ 24 | 25 | case class ChannelRealHot(var channelid:String, var visited:Long) 26 | 27 | 28 | object ChannelRealHotTask { 29 | 30 | def process(clickLogWideDateStream : DataStream[ClickLogWide])= { 31 | 32 | // 1、字段转换 channelid、visited 33 | val realHotDataStream: DataStream[ChannelRealHot] = clickLogWideDateStream.map{ 34 | clickLogWide: ClickLogWide => 35 | ChannelRealHot(clickLogWide.channelID, clickLogWide.count) 36 | } 37 | 38 | // 2、分组 39 | val keyedStream: KeyedStream[ChannelRealHot, String] = realHotDataStream.keyBy(_.channelid) 40 | 41 | 42 | // 3、时间窗口 43 | val windowedStream: WindowedStream[ChannelRealHot, String, TimeWindow] = keyedStream.timeWindow( 44 | Time.seconds(3)) 45 | 46 | // 4、聚合 47 | val reduceDataStream: DataStream[ChannelRealHot] = windowedStream.reduce{ 48 | (t1: ChannelRealHot, t2: ChannelRealHot) => 49 | ChannelRealHot(t1.channelid, t1.visited + t2.visited) 50 | } 51 | // 输出测试 52 | reduceDataStream 53 | 54 | // 5、落地 HBase 55 | reduceDataStream.addSink(new SinkFunction[ChannelRealHot] { 56 | 57 | override def invoke(value: ChannelRealHot): Unit = { 58 | 59 | // HBase 相关字段 60 | val tableName = "channel" 61 | val clfName = "info" 62 | val channelIdColumn = "channelId" 63 | val visitedColumn = "visited" 64 | val rowkey = value.channelid 65 | 66 | 67 | // 查询 HBase ,并且获取相关记录 68 | val visitedValue: String = HBaseUtil.getData(tableName, rowkey, clfName, visitedColumn) 69 | // 创建总数的临时变量 70 | var totalCount: Long = 0 71 | 72 | if(StringUtils.isBlank(visitedValue)){ 73 | totalCount = value.visited 74 | } 75 | else { 76 | totalCount = visitedValue.toLong + value.visited 77 | } 78 | 79 | // 保存数据 80 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 81 | channelIdColumn -> value.channelid , 82 | visitedColumn -> totalCount.toString 83 | )) 84 | } 85 | }) 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/task/PreprocessTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.task 2 | 3 | import com.henry.realprocess.bean.{ClickLogWide, Message} 4 | import com.henry.realprocess.util.HBaseUtil 5 | import org.apache.commons.lang.StringUtils 6 | import org.apache.commons.lang.time.FastDateFormat 7 | import org.apache.flink.streaming.api.scala.DataStream 8 | import org.apache.flink.api.scala._ 9 | 10 | /** 11 | * @Author: Henry 12 | * @Description: 预处理任务 13 | * @Date: Create in 2019/10/27 14:31 14 | **/ 15 | object PreprocessTask { 16 | 17 | 18 | def process(watermarkDataStream:DataStream[Message])= { 19 | 20 | /** 21 | *  大括号{}用于代码块,计算结果是代码最后一行; 22 | *  大括号{}用于拥有代码块的函数; 23 | *  大括号{}在只有一行代码时可以省略,除了case语句(Scala适用); 24 | *  小括号()在函数只有一个参数时可以省略(Scala适用); 25 | *  几乎没有二者都省略的情况。 26 | */ 27 | watermarkDataStream.map { 28 | 29 | msg => 30 | // 转换时间 31 | val yearMonth: String = FastDateFormat.getInstance("yyyyMM").format(msg.timeStamp) 32 | val yearMonthDay: String = FastDateFormat.getInstance("yyyyMMdd").format(msg.timeStamp) 33 | val yearMonthDayHour: String = FastDateFormat.getInstance("yyyyMMddHH").format(msg.timeStamp) 34 | 35 | // 转换地区 36 | val address = msg.clickLog.country + msg.clickLog.province + msg.clickLog.city 37 | 38 | val isNewtuple = isNewProcess(msg) 39 | 40 | ClickLogWide( 41 | msg.clickLog.channelID, 42 | msg.clickLog.categoryID, 43 | msg.clickLog.produceID, 44 | msg.clickLog.country, 45 | msg.clickLog.province, 46 | msg.clickLog.city, 47 | msg.clickLog.network, 48 | msg.clickLog.source, 49 | msg.clickLog.browserType, 50 | msg.clickLog.entryTime, 51 | msg.clickLog.leaveTime, 52 | msg.clickLog.userID, 53 | msg.count, 54 | msg.timeStamp, 55 | address, 56 | yearMonth, 57 | yearMonthDay, 58 | yearMonthDayHour, 59 | isNewtuple._1, 60 | isNewtuple._2, 61 | isNewtuple._3, 62 | isNewtuple._4 63 | ) 64 | } 65 | 66 | } 67 | 68 | /** 69 | * 判断用户是否为新用户 70 | * @param msg 71 | */ 72 | private def isNewProcess(msg:Message)={ 73 | 74 | // 1、定义4个变量,初始化为0 75 | var isNew = 0 76 | var isHourNew = 0 77 | var isDayNew = 0 78 | var isMonthNew = 0 79 | 80 | 81 | // 2、从HBase中查询用户记录,如果有记录,再去判断其他时间;如果没有记录,则证明是新用户 82 | val tableName = "user_history" 83 | var clfName = "info" 84 | var rowkey = msg.clickLog.userID + ":" + msg.clickLog.channelID 85 | 86 | // - 用户ID(userID) 87 | var userIdColumn = "userid" 88 | // - 频道ID(channelid) 89 | var channelidColumn = "channelid" 90 | // - 最后访问时间(时间戳)(lastVisitedTime) 91 | var lastVisitedTimeColumn = "lastVisitedTime" 92 | 93 | 94 | var userId: String = HBaseUtil.getData(tableName, rowkey, clfName, userIdColumn) 95 | var channelid: String = HBaseUtil.getData(tableName, rowkey, clfName, channelidColumn) 96 | var lastVisitedTime: String = HBaseUtil.getData(tableName, rowkey, clfName, lastVisitedTimeColumn) 97 | 98 | 99 | // 如果 userid 为空,则该用户一定是新用户 100 | if(StringUtils.isBlank(userId)){ 101 | isNew = 1 102 | isHourNew = 1 103 | isDayNew = 1 104 | isMonthNew = 1 105 | 106 | // 保存用户的访问记录到 "user_history" 107 | HBaseUtil.putMapData(tableName, rowkey, clfName, Map( 108 | userIdColumn -> msg.clickLog.userID , 109 | channelidColumn -> msg.clickLog.channelID , 110 | lastVisitedTimeColumn -> msg.timeStamp 111 | )) 112 | } 113 | else{ 114 | isNew = 0 115 | // 其它字段需要进行时间戳的比对 116 | isHourNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMddHH") 117 | isDayNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMMdd") 118 | isMonthNew = compareDate(msg.timeStamp, lastVisitedTimeColumn.toLong, "yyyyMM") 119 | 120 | // 更新 "user_history" 用户的时间戳 121 | HBaseUtil.putData(tableName, rowkey, clfName, lastVisitedTimeColumn , msg.timeStamp.toString) 122 | 123 | } 124 | 125 | (isDayNew, isHourNew, isDayNew, isMonthNew) 126 | } 127 | 128 | 129 | /** 130 | * 比对时间: 201912 > 201911 131 | * @param currentTime 当前时间 132 | * @param historyTime 历史时间 133 | * @param format 时间格式: yyyyMM yyyyMMdd 134 | * @return 1 或者 0 135 | */ 136 | def compareDate(currentTime:Long, historyTime:Long, format:String):Int={ 137 | 138 | val currentTimeStr:String = timestamp2Str(currentTime, format) 139 | val historyTimeStr:String = timestamp2Str(historyTime, format) 140 | 141 | // 比对字符串大小,如果当前时间 > 历史时间,返回1 142 | var result:Int = currentTimeStr.compareTo(historyTimeStr) 143 | 144 | if(result > 0){ 145 | result = 1 146 | } 147 | else { 148 | result = 0 149 | } 150 | result 151 | } 152 | 153 | /** 154 | * 转换日期 155 | * @param timestamp Long 类型时间戳 156 | * @param format 日期格式 157 | * @return 158 | */ 159 | def timestamp2Str(timestamp:Long, format:String):String={ 160 | FastDateFormat.getInstance("yyyyMM").format(timestamp) 161 | } 162 | 163 | 164 | 165 | } 166 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/util/GlobalConfigutil.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.util 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | 5 | /** 6 | * @Author: Henry 7 | * @Description: 配置文件加载类 8 | * @Date: Create in 2019/10/15 23:42 9 | **/ 10 | object GlobalConfigutil { 11 | 12 | // 通过工厂加载配置, config 会自动加载 application.conf 文件,文件名不能变 13 | val config:Config = ConfigFactory.load() 14 | 15 | val bootstrapServers = config.getString("bootstrap.servers") 16 | val zookeeperConnect = config.getString("zookeeper.connect") 17 | val inputTopic = config.getString("input.topic") 18 | val gruopId = config.getString("gruop.id") 19 | val enableAutoCommit = config.getString("enable.auto.commit") 20 | val autoCommitIntervalMs = config.getString("auto.commit.interval.ms") 21 | val autoOffsetReset = config.getString("auto.offset.reset") 22 | 23 | def main(args: Array[String]): Unit = { 24 | // 选择快捷键,alt,鼠标左键拉倒最后一行,然后按 ctrl+shift 键,再按 → 25 | println(bootstrapServers) 26 | println(zookeeperConnect) 27 | println(inputTopic) 28 | println(gruopId) 29 | println(enableAutoCommit) 30 | println(autoCommitIntervalMs) 31 | println(autoOffsetReset) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /real-process/src/main/scala/com/henry/realprocess/util/HBaseUtil.scala: -------------------------------------------------------------------------------- 1 | package com.henry.realprocess.util 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} 5 | import org.apache.hadoop.hbase.client.{ColumnFamilyDescriptor, _} 6 | import org.apache.hadoop.hbase.util.Bytes 7 | 8 | /** 9 | * @Author: Henry 10 | * @Description: HBase 工具类 11 | * 1、获取Table对象 12 | * 2、保存单列数据 13 | * 3、查询单列数据 14 | * 4、保存多列数据 15 | * 5、查询多列数据 16 | * 6、删除数据 17 | * @Date: Create in 2019/10/21 22:53 18 | **/ 19 | object HBaseUtil { 20 | 21 | // HBase 配置类,不需要指定配置文件名,文件名要求是 hbase-site.xml 22 | val conf:Configuration = HBaseConfiguration.create() 23 | 24 | // HBase 的连接 25 | val conn:Connection = ConnectionFactory.createConnection(conf) 26 | 27 | // HBase 的操作 API 28 | val admin:Admin = conn.getAdmin 29 | 30 | /** 31 | * 返回Table,如果不存在,则创建表 32 | * 33 | * @param tableName 表名 34 | * @param columnFamilyName 列族名 35 | * @return 36 | */ 37 | def getTable(tableNameStr:String, columnFamilyName:String):Table={ 38 | 39 | 40 | // 获取 TableName 41 | val tableName:TableName = TableName.valueOf(tableNameStr) 42 | 43 | // 如果表不存在,则创建表 44 | 45 | if(!admin.tableExists(tableName)){ 46 | 47 | // 构建出表的描述的建造者 48 | val descBuilder: TableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName) 49 | 50 | val familyDescriptor:ColumnFamilyDescriptor = ColumnFamilyDescriptorBuilder 51 | .newBuilder(columnFamilyName.getBytes).build() 52 | 53 | // 给表添加列族 54 | descBuilder.setColumnFamily(familyDescriptor) 55 | 56 | // 创建表 57 | admin.createTable(descBuilder.build()) 58 | } 59 | 60 | conn.getTable(tableName) 61 | 62 | } 63 | 64 | /** 65 | * 存储单列数据 66 | * 67 | * @param tableNameStr 表名 68 | * @param rowkey 主键 69 | * @param columnFamilyName 列族名 70 | * @param columnName 列名 71 | * @param columnValue 列值 72 | */ 73 | def putData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String, columnValue:String)={ 74 | 75 | // 获取表 76 | val table:Table = getTable(tableNameStr, columnFamilyName) 77 | 78 | try{ 79 | // Put 80 | val put:Put = new Put(rowkey.getBytes) 81 | put.addColumn(columnFamilyName.getBytes, columnName.getBytes, columnValue.getBytes) 82 | 83 | // 保存数据 84 | table.put(put) 85 | }catch { 86 | case ex:Exception=>{ 87 | ex.printStackTrace() 88 | } 89 | }finally { 90 | table.close() 91 | } 92 | } 93 | 94 | 95 | /** 96 | * 通过单列名获取列值 97 | * @param tableNameStr 表名 98 | * @param rowkey 主键 99 | * @param columnFamilyName 列族名 100 | * @param columnName 列名 101 | * @param columnValue 列值 102 | * @return 103 | */ 104 | def getData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String):String={ 105 | 106 | // 1. 获取 Table 对象 107 | val table = getTable(tableNameStr, columnFamilyName) 108 | 109 | try { 110 | // 2. 构建 get 对象 111 | val get = new Get(rowkey.getBytes) 112 | 113 | // 3. 进行查询 114 | val result:Result = table.get(get) 115 | 116 | // 4. 判断查询结果是否为空,并且包含要查询的列 117 | if (result != null && result.containsColumn(columnFamilyName.getBytes, columnName.getBytes)){ 118 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), columnName.getBytes) 119 | 120 | Bytes.toString(bytes) 121 | }else{ 122 | "" 123 | } 124 | 125 | }catch{ 126 | case ex:Exception => { 127 | ex.printStackTrace() 128 | "" 129 | } 130 | }finally { 131 | // 5、关闭表 132 | table.close() 133 | } 134 | 135 | } 136 | 137 | 138 | /** 139 | * 存储多列数据 140 | * @param tableNameStr 表名 141 | * @param rowkey 主键 142 | * @param columnFamilyName 列族名 143 | * @param map 多个列名和列族集合 144 | */ 145 | def putMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, map:Map[String,Any])={ 146 | 147 | // 1、获取 table 对象 148 | val table = getTable(tableNameStr, columnFamilyName) 149 | 150 | try{ 151 | // 2、创建 put 152 | val put = new Put(rowkey.getBytes) 153 | 154 | // 3、在 put 中添加多个列名和列值 155 | for ((colName, colValue) <- map){ 156 | put.addColumn(columnFamilyName.getBytes, colName.getBytes, colValue.toString.getBytes) 157 | } 158 | 159 | // 4、保存 put 160 | table.put(put) 161 | 162 | }catch{ 163 | case ex:Exception => { 164 | ex.printStackTrace() 165 | 166 | } 167 | }finally { 168 | // 5、关闭表 169 | table.close() 170 | } 171 | 172 | 173 | // 5、关闭 table 174 | table.close() 175 | } 176 | 177 | 178 | /** 179 | * 获取多了数据的值 180 | * @param tableNameStr 表名 181 | * @param rowkey 主键 182 | * @param columnFamilyName 列族名 183 | * @param columnNameList 多个列名和列值集合 184 | * @return 185 | */ 186 | def getMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnNameList:List[String]):Map[String,String]= { 187 | 188 | // 1、获取 Table 189 | val table = getTable(tableNameStr, columnFamilyName) 190 | 191 | try{ 192 | // 2、构建 get 193 | val get = new Get(rowkey.getBytes) 194 | 195 | // 3、执行查询 196 | val result: Result = table.get(get) 197 | 198 | // 4、遍历列名集合,取出列值,构建成 Map 返回 199 | columnNameList.map { 200 | col => 201 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), col.getBytes) 202 | 203 | if (bytes != null && bytes.size > 0) { 204 | col -> Bytes.toString(bytes) 205 | } 206 | else { // 如果取不到值,则赋一个空串 207 | "" -> "" 208 | } 209 | }.filter(_._1 != "").toMap // 把不是空串的过滤出来,再转换成 Map 210 | 211 | }catch { 212 | case ex:Exception => { 213 | ex.printStackTrace() 214 | Map[String, String]() // 返回一个空的 Map 215 | } 216 | }finally { 217 | // 5、关闭 Table 218 | table.close() 219 | } 220 | } 221 | 222 | 223 | /** 224 | * 删除数据 225 | * @param tableNameStr 表名 226 | * @param rowkey 主键 227 | * @param columnFamilyName 列族名 228 | */ 229 | def delete(tableNameStr:String, rowkey:String, columnFamilyName:String)={ 230 | 231 | // 1、获取 Table 232 | val table:Table = getTable(tableNameStr, columnFamilyName) 233 | 234 | try { 235 | // 2、构建 delete 对象 236 | val delete: Delete = new Delete(rowkey.getBytes) 237 | 238 | // 3、执行删除 239 | table.delete(delete) 240 | 241 | } 242 | catch { 243 | case ex:Exception => 244 | ex.printStackTrace() 245 | } 246 | finally { 247 | // 4、关闭 table 248 | table.close() 249 | } 250 | 251 | } 252 | 253 | 254 | def main(args: Array[String]): Unit = { 255 | 256 | // println(getTable("test","info")) 257 | // putData("test", "1", "info", "t1", "hello world") 258 | // println(getData("test", "1", "info", "t1")) 259 | 260 | // val map = Map( 261 | // "t2" -> "scala" , 262 | // "t3" -> "hive" , 263 | // "t4" -> "flink" 264 | // ) 265 | // putMapData("test", "1", "info", map) 266 | 267 | // println(getMapData("test", "1", "info", List("t1", "t2"))) 268 | 269 | delete("test", "1", "info") 270 | println(getMapData("test", "1", "info", List("t1", "t2"))) 271 | 272 | } 273 | 274 | } 275 | -------------------------------------------------------------------------------- /real-process/src/test/temp.txt: -------------------------------------------------------------------------------- 1 | val bootstrap.servers = config.getString("bootstrap.servers") 2 | val zookeeper.connect = config.getString("zookeeper.connect") 3 | val input.topic = config.getString("input.topic") 4 | val gruop.id = config.getString("gruop.id") 5 | val enable.auto.commit = config.getString("enable.auto.commit") 6 | val auto.commit.interval.ms = config.getString("auto.commit.interval.ms") 7 | val auto.offset.reset = config.getString("auto.offset.reset") -------------------------------------------------------------------------------- /report/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | com.henry 8 | report 9 | 1.0-SNAPSHOT 10 | jar 11 | report 12 | Spring Boot,上报服务 13 | 14 | 15 | 16 | org.springframework.boot 17 | spring-boot-starter-parent 18 | 1.5.13.RELEASE 19 | 20 | 21 | 22 | 23 | UTF-8 24 | UTF-8 25 | 1.8 26 | Greenwich.M3 27 | 28 | 29 | 30 | 31 | 32 | 33 | alimaven 34 | alimaven 35 | http://maven.aliyun.com/nexus/content/groups/public/ 36 | 37 | 38 | 39 | 40 | 41 | org.springframework.boot 42 | spring-boot-starter 43 | 1.5.13.RELEASE 44 | 45 | 46 | org.springframework.boot 47 | spring-boot-starter-test 48 | 1.5.13.RELEASE 49 | 50 | 51 | 52 | org.springframework.boot 53 | spring-boot-starter-web 54 | 2.5.12 55 | 56 | 57 | 58 | org.springframework.boot 59 | spring-boot-starter-tomcat 60 | 1.5.13.RELEASE 61 | 62 | 63 | 64 | org.apache.tomcat 65 | tomcat-catalina 66 | 8.5.86 67 | 68 | 69 | 70 | com.alibaba 71 | fastjson 72 | 1.2.83 73 | 74 | 75 | 76 | org.springframework.kafka 77 | spring-kafka 78 | 1.0.6.RELEASE 79 | 80 | 81 | 82 | 83 | org.apache.httpcomponents 84 | httpclient 85 | 4.5.13 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | org.springframework.boot 95 | spring-boot-maven-plugin 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/ReportApplication.java: -------------------------------------------------------------------------------- 1 | package com.henry.report; 2 | 3 | import org.springframework.boot.autoconfigure.SpringBootApplication; 4 | import org.springframework.boot.SpringApplication; 5 | /** 6 | * @Author: HongZhen 7 | * @Description: 8 | * @Date: Create in 2019/9/20 11:10 9 | **/ 10 | 11 | // 添加注解 @SpringBootApplication ,表示该类是一个启动类 12 | @SpringBootApplication 13 | public class ReportApplication { 14 | 15 | public static void main(String[] args) { 16 | SpringApplication.run(ReportApplication.class, args); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/bean/Clicklog.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.bean; 2 | 3 | /** 4 | * @Author: Henry 5 | * @Description: 点击流日志 6 | * @Date: Create in 2019/10/13 19:33 7 | **/ 8 | 9 | public class Clicklog { 10 | 11 | // 频道 ID 12 | private long channelID; 13 | // 产品的类别 ID 14 | private long categoryID ; 15 | // 产品 ID 16 | private long produceID ; 17 | // 用户 ID 18 | private long userID ; 19 | 20 | // 国家 21 | private String country; 22 | // 省份 23 | private String province; 24 | // 城市 25 | private String city; 26 | 27 | // 网络方式 28 | private String network; 29 | // 来源方式 30 | private String source; 31 | 32 | // 浏览器类型 33 | private String browserType; 34 | 35 | // 进入网站时间 36 | private Long entryTime ; 37 | // 离开网站实际 38 | private long leaveTime; 39 | 40 | public long getChannelID() { 41 | return channelID; 42 | } 43 | 44 | public void setChannelID(long channelID) { 45 | this.channelID = channelID; 46 | } 47 | 48 | public long getCategoryID() { 49 | return categoryID; 50 | } 51 | 52 | public void setCategoryID(long categoryID) { 53 | this.categoryID = categoryID; 54 | } 55 | 56 | public long getProduceID() { 57 | return produceID; 58 | } 59 | 60 | public void setProduceID(long produceID) { 61 | this.produceID = produceID; 62 | } 63 | 64 | public long getUserID() { 65 | return userID; 66 | } 67 | 68 | public void setUserID(long userID) { 69 | this.userID = userID; 70 | } 71 | 72 | public String getCountry() { 73 | return country; 74 | } 75 | 76 | public void setCountry(String country) { 77 | this.country = country; 78 | } 79 | 80 | public String getProvince() { 81 | return province; 82 | } 83 | 84 | public void setProvince(String province) { 85 | this.province = province; 86 | } 87 | 88 | public String getCity() { 89 | return city; 90 | } 91 | 92 | public void setCity(String city) { 93 | this.city = city; 94 | } 95 | 96 | public String getNetwork() { 97 | return network; 98 | } 99 | 100 | public void setNetwork(String network) { 101 | this.network = network; 102 | } 103 | 104 | public String getSource() { 105 | return source; 106 | } 107 | 108 | public void setSource(String source) { 109 | this.source = source; 110 | } 111 | 112 | public String getBrowserType() { 113 | return browserType; 114 | } 115 | 116 | public void setBrowserType(String browserType) { 117 | this.browserType = browserType; 118 | } 119 | 120 | public Long getEntryTime() { 121 | return entryTime; 122 | } 123 | 124 | public void setEntryTime(Long entryTime) { 125 | this.entryTime = entryTime; 126 | } 127 | 128 | public long getLeaveTime() { 129 | return leaveTime; 130 | } 131 | 132 | public void setLeaveTime(long leaveTime) { 133 | this.leaveTime = leaveTime; 134 | } 135 | 136 | } 137 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/bean/Message.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.bean; 2 | 3 | /** 4 | * @Author: Henry 5 | * @Description: 消息实体类 6 | * @Date: Create in 2019/10/11 23:40 7 | **/ 8 | public class Message { 9 | 10 | // 消息次数 11 | private int count; 12 | 13 | // 消息的时间戳 14 | private long timestamp; 15 | 16 | // 消息体 17 | private String message; 18 | 19 | public int getCount() { 20 | return count; 21 | } 22 | 23 | public void setCount(int count) { 24 | this.count = count; 25 | } 26 | 27 | public long getTimestamp() { 28 | return timestamp; 29 | } 30 | 31 | public void setTimestamp(long timestamp) { 32 | this.timestamp = timestamp; 33 | } 34 | 35 | public String getMessage() { 36 | return message; 37 | } 38 | 39 | public void setMessage(String message) { 40 | this.message = message; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | return "Message{" + 46 | "count=" + count + 47 | ", timestamp=" + timestamp + 48 | ", message='" + message + '\'' + 49 | '}'; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/controller/ReportController.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.controller; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.henry.report.bean.Message; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.kafka.core.KafkaTemplate; 7 | import org.springframework.web.bind.annotation.RequestBody; 8 | import org.springframework.web.bind.annotation.RequestMapping; 9 | import org.springframework.web.bind.annotation.RestController; 10 | 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | 14 | /** 15 | * @Author: Henry 16 | * @Description: 17 | * @Date: Create in 2019/10/11 23:43 18 | **/ 19 | 20 | // 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解 21 | @RestController 22 | public class ReportController { 23 | 24 | @Autowired 25 | KafkaTemplate kafkaTemplate; 26 | 27 | @RequestMapping("/receive") 28 | public Map receive(@RequestBody String json) { 29 | 30 | Map map = new HashMap(); // 记录是否发送成功 31 | 32 | try { 33 | // 构建 Message 34 | Message msg = new Message(); 35 | msg.setMessage(json); 36 | msg.setCount(1); 37 | msg.setTimestamp(System.currentTimeMillis()); 38 | 39 | String msgJSON = JSON.toJSONString(msg); 40 | 41 | // 发送 Message 到 Kafka 42 | kafkaTemplate.send("pyg", msgJSON); 43 | map.put("success", "ture"); 44 | 45 | }catch (Exception ex){ 46 | ex.printStackTrace(); 47 | map.put("success", "false"); 48 | } 49 | 50 | return map; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/controller/TestController.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.controller; 2 | 3 | import org.springframework.web.bind.annotation.RequestMapping; 4 | import org.springframework.web.bind.annotation.RestController; 5 | 6 | /** 7 | * @Author: HongZhen 8 | * @Description: Spring Boot 测试 9 | * @Date: Create in 2019/9/20 11:19 10 | **/ 11 | 12 | // 表示这是一个 Controller,并且其中所有的方法都是带有 @ResponseBody 的注解 13 | @RestController 14 | public class TestController{ 15 | 16 | // 为了能访问到该方法,需要添加如下注解,参数是代表如何来请求 17 | @RequestMapping("/test") 18 | public String test(String json){ 19 | System.out.println(json); 20 | return json; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/util/ClickLogGenerator.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.util; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import com.henry.report.bean.Clicklog; 5 | import org.apache.http.HttpResponse; 6 | import org.apache.http.HttpStatus; 7 | import org.apache.http.client.methods.HttpPost; 8 | import org.apache.http.entity.StringEntity; 9 | import org.apache.http.impl.client.CloseableHttpClient; 10 | import org.apache.http.impl.client.HttpClientBuilder; 11 | import org.apache.http.util.EntityUtils; 12 | 13 | import java.text.DateFormat; 14 | import java.text.ParseException; 15 | import java.text.SimpleDateFormat; 16 | import java.util.ArrayList; 17 | import java.util.Date; 18 | import java.util.List; 19 | import java.util.Random; 20 | 21 | /** 22 | * @Author: Henry 23 | * @Description: 点击流日志模拟器 24 | * @Date: Create in 2019/10/13 20:00 25 | **/ 26 | public class ClickLogGenerator { 27 | 28 | // ID 信息 29 | private static Long[] channelID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L}; 30 | private static Long[] categoryID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L}; 31 | private static Long[] produceID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L}; 32 | private static Long[] userID = new Long[]{1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L}; 33 | 34 | // 地区 35 | private static String[] contrys = new String[]{"china"}; // 地区-国家集合 36 | private static String[] provinces = new String[]{"HeNan", "HeBeijing"}; // 地区-省集合 37 | private static String[] citys = new String[]{"ShiJiaZhuang", "ZhengZhou", "LuoyYang"}; // 地区-市集合 38 | 39 | // 网络方式 40 | private static String[] networks = new String[]{"电信", "移动", "联通"}; 41 | 42 | // 来源方式 43 | private static String[] sources = new String[]{"直接输入", "百度跳转", "360搜索跳转", "必应跳转"}; 44 | 45 | // 浏览器 46 | private static String[] browser = new String[]{"火狐", "QQ浏览器", "360浏览器", "谷歌浏览器"}; 47 | 48 | // 打开方式,离开时间 49 | private static List usertimeLog = producetimes(); 50 | 51 | // 获取时间 52 | private static List producetimes() { 53 | List usertimelog = new ArrayList<>(); 54 | for (int i = 0; i < 100; i++) { 55 | Long[] timearray = gettimes("2019-10-10 24:60:60:000"); 56 | usertimelog.add(timearray); 57 | } 58 | return usertimelog; 59 | } 60 | 61 | private static Long[] gettimes(String time) { 62 | DateFormat dataFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss:SSS"); 63 | try { 64 | Date date = dataFormat.parse(time); 65 | long timetemp = date.getTime(); 66 | Random random = new Random(); 67 | int randomint = random.nextInt(10); 68 | long starttime = timetemp - randomint*3600*1000; 69 | long endtime = starttime + randomint*3600*1000; 70 | return new Long[]{starttime,endtime}; 71 | }catch (ParseException e){ 72 | e.printStackTrace(); 73 | } 74 | return new Long[]{0L, 0L}; 75 | } 76 | 77 | // 模拟发送 Http 请求到上报服务系统 78 | public static void send(String url, String json){ 79 | try { 80 | CloseableHttpClient httpClient = HttpClientBuilder.create().build(); 81 | HttpPost post = new HttpPost(url); 82 | JSONObject response = null ; 83 | try { 84 | StringEntity s = new StringEntity(json.toString(), "utf-8"); 85 | s.setContentEncoding("utf-8"); 86 | // 发送 json 数据需要设置 contentType 87 | s.setContentType("application/json"); 88 | post.setEntity(s); 89 | 90 | HttpResponse res = httpClient.execute(post); 91 | if(res.getStatusLine().getStatusCode() == HttpStatus.SC_OK){ 92 | // 返回 json 格式 93 | String result = EntityUtils.toString(res.getEntity()); 94 | System.out.println(result); 95 | } 96 | }catch (Exception e){ 97 | throw new RuntimeException(); 98 | 99 | } 100 | 101 | }catch (Exception e){ 102 | e.printStackTrace(); 103 | } 104 | } 105 | 106 | public static void main(String[] args) { 107 | Random random = new Random(); 108 | for (int i = 0; i < 100; i++) { 109 | // 频道id、类别id、产品id、用户id、打开时间、离开时间、地区、网络方式、来源方式、浏览器 110 | Clicklog clicklog = new Clicklog(); 111 | 112 | clicklog.setChannelID(channelID[random.nextInt(channelID.length)]); 113 | clicklog.setCategoryID(categoryID[random.nextInt(channelID.length)]); 114 | clicklog.setProduceID(produceID[random.nextInt(produceID.length)]); 115 | clicklog.setUserID(userID[random.nextInt(userID.length)]); 116 | clicklog.setCountry(contrys[random.nextInt(contrys.length)]); 117 | clicklog.setProvince(provinces[random.nextInt(provinces.length)]); 118 | clicklog.setCity(citys[random.nextInt(citys.length)]); 119 | clicklog.setNetwork(networks[random.nextInt(networks.length)]); 120 | clicklog.setSource(sources[random.nextInt(sources.length)]); 121 | clicklog.setBrowserType(browser[random.nextInt(browser.length)]); 122 | 123 | Long[] times = usertimeLog.get(random.nextInt(usertimeLog.size())); 124 | clicklog.setEntryTime(times[0]); 125 | clicklog.setLeaveTime(times[1]); 126 | 127 | // 将点击流日志转成字符串,发送到前端地址 128 | String jsonstr = JSONObject.toJSONString(clicklog); 129 | System.out.println(jsonstr); 130 | try { 131 | Thread.sleep(100); 132 | }catch (InterruptedException e){ 133 | e.printStackTrace(); 134 | } 135 | 136 | send("http://localhost:1234/receive", jsonstr); 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/util/KafkaProducerConfig.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.util; 2 | 3 | import org.apache.kafka.clients.producer.ProducerConfig; 4 | import org.apache.kafka.common.serialization.StringSerializer; 5 | import org.springframework.beans.factory.annotation.Value; 6 | import org.springframework.context.annotation.Bean; 7 | import org.springframework.context.annotation.Configuration; 8 | import org.springframework.kafka.core.DefaultKafkaProducerFactory; 9 | import org.springframework.kafka.core.KafkaTemplate; 10 | import org.springframework.kafka.core.ProducerFactory; 11 | 12 | import java.util.HashMap; 13 | import java.util.Map; 14 | 15 | /** 16 | * @Author: Henry 17 | * @Description: KafkaProducerConfig 18 | * @Date: Create in 2019/10/6 21:56 19 | **/ 20 | 21 | @Configuration // 1、表示该类是一个配置类,这样在下面才能创建 Bean 22 | public class KafkaProducerConfig { 23 | 24 | // 通过@value注解将配置文件中kafka.bootstrap_servers_config的值赋值给成员变量 25 | @Value("${kafka.bootstrap_servers_config}") 26 | private String bootstrap_servers_config; 27 | // 如果出现发送失败的情况,允许重试的次数 28 | @Value("${kafka.retries_config}") 29 | private String retries_config; 30 | // 每个批次发送多大的数据,单位:字节 31 | @Value("${kafka.batch_size_config}") 32 | private String batch_size_config; 33 | // 定时发送,达到 1ms 发送 34 | @Value("${kafka.linger_ms_config}") 35 | private String linger_ms_config; 36 | // 缓存的大小,单位:字节 37 | @Value("${kafka.buffer_memory_config}") 38 | private String buffer_memory_config; 39 | // TOPOC 名字 40 | @Value("${kafka.topic}") 41 | private String topic; 42 | 43 | 44 | @Bean // 2、表示该对象是受 Spring 管理的一个 Bean 45 | public KafkaTemplate kafkaTemplate() { 46 | 47 | // 构建工程需要的配置 48 | Map configs = new HashMap<>(); 49 | 50 | // 3、设置相应的配置 51 | // 将成员变量的值设置到Map中,在创建kafka_producer中用到 52 | configs.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers_config); 53 | configs.put(ProducerConfig.RETRIES_CONFIG, retries_config); 54 | configs.put(ProducerConfig.BATCH_SIZE_CONFIG, batch_size_config); 55 | configs.put(ProducerConfig.LINGER_MS_CONFIG, linger_ms_config); 56 | configs.put(ProducerConfig.BUFFER_MEMORY_CONFIG, buffer_memory_config); 57 | 58 | // 设置 key、value 的序列化器 59 | configs.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG , StringSerializer.class); 60 | configs.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG , StringSerializer.class); 61 | 62 | // 指定自定义分区 63 | configs.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, RoundRobinPartitioner.class); 64 | 65 | 66 | // 4、创建生产者工厂 67 | ProducerFactory producerFactory = new DefaultKafkaProducerFactory(configs); 68 | 69 | // 5、再把工厂传递给Template构造方法 70 | // 表示需要返回一个 kafkaTemplate 对象 71 | return new KafkaTemplate(producerFactory); 72 | } 73 | } 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /report/src/main/java/com/henry/report/util/RoundRobinPartitioner.java: -------------------------------------------------------------------------------- 1 | package com.henry.report.util; 2 | 3 | import org.apache.kafka.clients.producer.Partitioner; 4 | import org.apache.kafka.common.Cluster; 5 | 6 | import java.util.Map; 7 | import java.util.concurrent.atomic.AtomicInteger; 8 | 9 | /** 10 | * @Author: Henry 11 | * @Description: 自定义分区 12 | * @Date: Create in 2019/10/9 23:00 13 | **/ 14 | 15 | public class RoundRobinPartitioner implements Partitioner { 16 | 17 | // AtomicInteger 并发包下的多线程安全的整型类 18 | AtomicInteger counter = new AtomicInteger(0) ; 19 | 20 | 21 | // 返回值为分区号: 0、1、2 22 | @Override 23 | public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) { 24 | 25 | // 获取分区的数量 26 | Integer partitions = cluster.partitionCountForTopic(topic) ; 27 | 28 | int curpartition = counter.incrementAndGet() % partitions ; // 当前轮询的 partition 号 29 | 30 | if(counter.get() > 65535){ 31 | counter.set(0); 32 | } 33 | 34 | return curpartition; 35 | } 36 | 37 | @Override 38 | public void close() { 39 | 40 | } 41 | 42 | @Override 43 | public void configure(Map map) { 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /report/src/main/resources/application.properties: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/report/src/main/resources/application.properties -------------------------------------------------------------------------------- /report/src/test/java/com/henry/report/KafkaTest.java: -------------------------------------------------------------------------------- 1 | package com.henry.report; 2 | 3 | import org.junit.Test; 4 | import org.junit.runner.RunWith; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.boot.test.context.SpringBootTest; 7 | import org.springframework.kafka.core.KafkaTemplate; 8 | import org.springframework.test.context.junit4.SpringRunner; 9 | 10 | /** 11 | * @Author: Henry 12 | * @Description: 测试Kafka 13 | * @Date: Create in 2019/10/8 23:26 14 | **/ 15 | 16 | @RunWith(SpringRunner.class) 17 | @SpringBootTest 18 | public class KafkaTest { 19 | 20 | @Autowired 21 | KafkaTemplate kafkaTemplate; 22 | 23 | @Test 24 | public void sendMsg(){ 25 | for (int i = 0; i < 100; i++) 26 | kafkaTemplate.send("test", "key","this is test msg") ; 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /screenshot/036a079d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/036a079d.png -------------------------------------------------------------------------------- /screenshot/03ef7ace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/03ef7ace.png -------------------------------------------------------------------------------- /screenshot/04e25b5a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/04e25b5a.png -------------------------------------------------------------------------------- /screenshot/07a78b77.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/07a78b77.png -------------------------------------------------------------------------------- /screenshot/0b4d0c1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0b4d0c1b.png -------------------------------------------------------------------------------- /screenshot/0b4ea4e1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0b4ea4e1.png -------------------------------------------------------------------------------- /screenshot/0bd763d1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0bd763d1.png -------------------------------------------------------------------------------- /screenshot/0ced234a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0ced234a.png -------------------------------------------------------------------------------- /screenshot/0e6080a2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0e6080a2.png -------------------------------------------------------------------------------- /screenshot/0fcd02b7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/0fcd02b7.png -------------------------------------------------------------------------------- /screenshot/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1.png -------------------------------------------------------------------------------- /screenshot/121bf948.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/121bf948.png -------------------------------------------------------------------------------- /screenshot/12f712f9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/12f712f9.png -------------------------------------------------------------------------------- /screenshot/13c61ea9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/13c61ea9.png -------------------------------------------------------------------------------- /screenshot/14679e84.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/14679e84.png -------------------------------------------------------------------------------- /screenshot/1a3addd7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1a3addd7.png -------------------------------------------------------------------------------- /screenshot/1d504cce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/1d504cce.png -------------------------------------------------------------------------------- /screenshot/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2.png -------------------------------------------------------------------------------- /screenshot/201507bb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/201507bb.png -------------------------------------------------------------------------------- /screenshot/21733492.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/21733492.png -------------------------------------------------------------------------------- /screenshot/2193cbd1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2193cbd1.png -------------------------------------------------------------------------------- /screenshot/22cd7b3c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/22cd7b3c.png -------------------------------------------------------------------------------- /screenshot/277372f9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/277372f9.png -------------------------------------------------------------------------------- /screenshot/2b7f3937.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2b7f3937.png -------------------------------------------------------------------------------- /screenshot/2c0ad8e2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2c0ad8e2.png -------------------------------------------------------------------------------- /screenshot/2d11fecd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2d11fecd.png -------------------------------------------------------------------------------- /screenshot/2f5a312e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/2f5a312e.png -------------------------------------------------------------------------------- /screenshot/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3.png -------------------------------------------------------------------------------- /screenshot/3254e2ca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3254e2ca.png -------------------------------------------------------------------------------- /screenshot/32a6daaf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/32a6daaf.png -------------------------------------------------------------------------------- /screenshot/342dcc3e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/342dcc3e.png -------------------------------------------------------------------------------- /screenshot/34a79ff7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/34a79ff7.png -------------------------------------------------------------------------------- /screenshot/34f66a92.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/34f66a92.png -------------------------------------------------------------------------------- /screenshot/3754f480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3754f480.png -------------------------------------------------------------------------------- /screenshot/3936fce5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3936fce5.png -------------------------------------------------------------------------------- /screenshot/3ab50051.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3ab50051.png -------------------------------------------------------------------------------- /screenshot/3b6d6d1f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3b6d6d1f.png -------------------------------------------------------------------------------- /screenshot/3c8d398c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3c8d398c.png -------------------------------------------------------------------------------- /screenshot/3d2cda96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3d2cda96.png -------------------------------------------------------------------------------- /screenshot/3f08b9d0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/3f08b9d0.png -------------------------------------------------------------------------------- /screenshot/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4.png -------------------------------------------------------------------------------- /screenshot/48cd018e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/48cd018e.png -------------------------------------------------------------------------------- /screenshot/4b18ecbe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4b18ecbe.png -------------------------------------------------------------------------------- /screenshot/4cf81224.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/4cf81224.png -------------------------------------------------------------------------------- /screenshot/520fd656.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/520fd656.png -------------------------------------------------------------------------------- /screenshot/5326b634.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/5326b634.png -------------------------------------------------------------------------------- /screenshot/54187145.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/54187145.png -------------------------------------------------------------------------------- /screenshot/544d0e7a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/544d0e7a.png -------------------------------------------------------------------------------- /screenshot/565c64ed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/565c64ed.png -------------------------------------------------------------------------------- /screenshot/58926ce0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/58926ce0.png -------------------------------------------------------------------------------- /screenshot/58945558.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/58945558.png -------------------------------------------------------------------------------- /screenshot/5a321628.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/5a321628.png -------------------------------------------------------------------------------- /screenshot/62c03232.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/62c03232.png -------------------------------------------------------------------------------- /screenshot/64a0b856.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/64a0b856.png -------------------------------------------------------------------------------- /screenshot/65e75e0f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/65e75e0f.png -------------------------------------------------------------------------------- /screenshot/69907922.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/69907922.png -------------------------------------------------------------------------------- /screenshot/6ac8e320.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6ac8e320.png -------------------------------------------------------------------------------- /screenshot/6c04e485.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6c04e485.png -------------------------------------------------------------------------------- /screenshot/6c99f78b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6c99f78b.png -------------------------------------------------------------------------------- /screenshot/6f5af076.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6f5af076.png -------------------------------------------------------------------------------- /screenshot/6f897038.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6f897038.png -------------------------------------------------------------------------------- /screenshot/6fcd4a44.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/6fcd4a44.png -------------------------------------------------------------------------------- /screenshot/70a923ce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/70a923ce.png -------------------------------------------------------------------------------- /screenshot/72d64e76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/72d64e76.png -------------------------------------------------------------------------------- /screenshot/74d009f4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/74d009f4.png -------------------------------------------------------------------------------- /screenshot/75fcc253.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/75fcc253.png -------------------------------------------------------------------------------- /screenshot/76c4fbf8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/76c4fbf8.png -------------------------------------------------------------------------------- /screenshot/79c600b1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/79c600b1.png -------------------------------------------------------------------------------- /screenshot/7b5e4836.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7b5e4836.png -------------------------------------------------------------------------------- /screenshot/7cba404f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cba404f.png -------------------------------------------------------------------------------- /screenshot/7cd00637.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cd00637.png -------------------------------------------------------------------------------- /screenshot/7cf4425b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7cf4425b.png -------------------------------------------------------------------------------- /screenshot/7fe930e0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/7fe930e0.png -------------------------------------------------------------------------------- /screenshot/820fe570.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/820fe570.png -------------------------------------------------------------------------------- /screenshot/831e1859.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/831e1859.png -------------------------------------------------------------------------------- /screenshot/880c750d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/880c750d.png -------------------------------------------------------------------------------- /screenshot/8c5fa195.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8c5fa195.png -------------------------------------------------------------------------------- /screenshot/8cca6196.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8cca6196.png -------------------------------------------------------------------------------- /screenshot/8f89e666.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8f89e666.png -------------------------------------------------------------------------------- /screenshot/8fe964b8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/8fe964b8.png -------------------------------------------------------------------------------- /screenshot/908989c5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/908989c5.png -------------------------------------------------------------------------------- /screenshot/9379b632.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9379b632.png -------------------------------------------------------------------------------- /screenshot/946fe86f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/946fe86f.png -------------------------------------------------------------------------------- /screenshot/9897be78.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9897be78.png -------------------------------------------------------------------------------- /screenshot/98ddfe9a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/98ddfe9a.png -------------------------------------------------------------------------------- /screenshot/9e4179c5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9e4179c5.png -------------------------------------------------------------------------------- /screenshot/9e67979f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/9e67979f.png -------------------------------------------------------------------------------- /screenshot/a13d8808.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a13d8808.png -------------------------------------------------------------------------------- /screenshot/a2ab75e3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a2ab75e3.png -------------------------------------------------------------------------------- /screenshot/a35893be.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a35893be.png -------------------------------------------------------------------------------- /screenshot/a47efd66.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a47efd66.png -------------------------------------------------------------------------------- /screenshot/a560cff6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a560cff6.png -------------------------------------------------------------------------------- /screenshot/a66b3e6f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a66b3e6f.png -------------------------------------------------------------------------------- /screenshot/a8d36972.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/a8d36972.png -------------------------------------------------------------------------------- /screenshot/aa3dbfbf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/aa3dbfbf.png -------------------------------------------------------------------------------- /screenshot/abb5e847.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/abb5e847.png -------------------------------------------------------------------------------- /screenshot/aef2abe1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/aef2abe1.png -------------------------------------------------------------------------------- /screenshot/af73ebaa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/af73ebaa.png -------------------------------------------------------------------------------- /screenshot/b35e8d12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/b35e8d12.png -------------------------------------------------------------------------------- /screenshot/b77622b6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/b77622b6.png -------------------------------------------------------------------------------- /screenshot/c1186185.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c1186185.png -------------------------------------------------------------------------------- /screenshot/c33fe1b4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c33fe1b4.png -------------------------------------------------------------------------------- /screenshot/c6d0728b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c6d0728b.png -------------------------------------------------------------------------------- /screenshot/c84f6044.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/c84f6044.png -------------------------------------------------------------------------------- /screenshot/cba7b53e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cba7b53e.png -------------------------------------------------------------------------------- /screenshot/cdefdf02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cdefdf02.png -------------------------------------------------------------------------------- /screenshot/cf67e612.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cf67e612.png -------------------------------------------------------------------------------- /screenshot/cfd8e121.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/cfd8e121.png -------------------------------------------------------------------------------- /screenshot/d068b5c0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d068b5c0.png -------------------------------------------------------------------------------- /screenshot/d1a2dc81.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d1a2dc81.png -------------------------------------------------------------------------------- /screenshot/d42bd3f1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d42bd3f1.png -------------------------------------------------------------------------------- /screenshot/d452de1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d452de1b.png -------------------------------------------------------------------------------- /screenshot/d457be6b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d457be6b.png -------------------------------------------------------------------------------- /screenshot/d57e648a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d57e648a.png -------------------------------------------------------------------------------- /screenshot/d6cc806c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d6cc806c.png -------------------------------------------------------------------------------- /screenshot/d99a61f4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d99a61f4.png -------------------------------------------------------------------------------- /screenshot/d9fcfcf5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/d9fcfcf5.png -------------------------------------------------------------------------------- /screenshot/dc0e0c05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dc0e0c05.png -------------------------------------------------------------------------------- /screenshot/dc64a356.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dc64a356.png -------------------------------------------------------------------------------- /screenshot/dedf144c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/dedf144c.png -------------------------------------------------------------------------------- /screenshot/df332a64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/df332a64.png -------------------------------------------------------------------------------- /screenshot/e219a541.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e219a541.png -------------------------------------------------------------------------------- /screenshot/e4022013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e4022013.png -------------------------------------------------------------------------------- /screenshot/e44c5879.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e44c5879.png -------------------------------------------------------------------------------- /screenshot/e6130b81.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e6130b81.png -------------------------------------------------------------------------------- /screenshot/e61c1e01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e61c1e01.png -------------------------------------------------------------------------------- /screenshot/e751cb2d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/e751cb2d.png -------------------------------------------------------------------------------- /screenshot/ea8764de.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ea8764de.png -------------------------------------------------------------------------------- /screenshot/ebf3c65b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ebf3c65b.png -------------------------------------------------------------------------------- /screenshot/ec1f3fda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ec1f3fda.png -------------------------------------------------------------------------------- /screenshot/fc27880f.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/fc27880f.png -------------------------------------------------------------------------------- /screenshot/fe002ea4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/fe002ea4.png -------------------------------------------------------------------------------- /screenshot/ff2dcb9b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HenryBao91/Flink-Analysis-of-Electronic-Commerce/a50d993b8f545aeafbedf02fc18adaf768e03ec7/screenshot/ff2dcb9b.png -------------------------------------------------------------------------------- /sync-db/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | pyg 7 | com.henry 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | sync-db 13 | 14 | 15 | 2.11 16 | 1.6.0 17 | 3.2.4 18 | 2.0.0 19 | 20 | 21 | 22 | 23 | 24 | 25 | org.apache.kafka 26 | kafka_${scala.version} 27 | 0.10.1.0 28 | 29 | 30 | 31 | 32 | 33 | org.apache.flink 34 | flink-connector-kafka-0.10_${scala.version} 35 | ${flink.version} 36 | 37 | 38 | 39 | 40 | org.apache.flink 41 | flink-table_${scala.version} 42 | ${flink.version} 43 | 44 | 45 | 46 | org.apache.flink 47 | flink-scala_${scala.version} 48 | ${flink.version} 49 | 50 | 51 | 52 | 53 | org.apache.flink 54 | flink-streaming-scala_${scala.version} 55 | ${flink.version} 56 | 57 | 58 | org.apache.flink 59 | flink-streaming-java_${scala.version} 60 | ${flink.version} 61 | 62 | 63 | 64 | 65 | org.apache.flink 66 | flink-hbase_${scala.version} 67 | ${flink.version} 68 | 69 | 70 | 71 | org.apache.hbase 72 | hbase-client 73 | ${hbase.version} 74 | 75 | 76 | 77 | 82 | 83 | 84 | 85 | 86 | org.apache.hadoop 87 | hadoop-common 88 | ${hadoop.version} 89 | 90 | 91 | 92 | org.apache.hadoop 93 | hadoop-hdfs 94 | ${hadoop.version} 95 | 96 | 97 | 98 | xml-apis 99 | xml-apis 100 | 101 | 102 | 103 | 104 | 105 | org.apache.hadoop 106 | hadoop-client 107 | ${hadoop.version} 108 | 109 | 110 | 111 | com.google.protobuf 112 | protobuf-java 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | com.alibaba 121 | fastjson 122 | 1.2.83 123 | 124 | 125 | 126 | 127 | 128 | src/main/scala 129 | src/test/scala 130 | 131 | 132 | org.apache.maven.plugins 133 | maven-shade-plugin 134 | 3.0.0 135 | 136 | 137 | package 138 | 139 | shade 140 | 141 | 142 | 143 | 144 | com.google.code.findbugs:jsr305 145 | org.slf4j:* 146 | log4j:* 147 | 148 | 149 | 150 | 151 | 153 | *:* 154 | 155 | META-INF/*.SF 156 | META-INF/*.DSA 157 | META-INF/*.RSA 158 | 159 | 160 | 161 | 162 | 164 | com.itheima.syncdb.App 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /sync-db/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | # 2 | #kafka的配置 3 | # 4 | # Kafka集群地址 5 | bootstrap.servers="master:9092,slave1:9092,slave2:9092" 6 | # ZooKeeper集群地址 7 | zookeeper.connect="master:2181,slave1:2181,slave2:2181" 8 | # Kafka Topic名称 9 | input.topic="canal" 10 | # 消费组ID 11 | group.id="canal" 12 | # 自动提交拉取到消费端的消息offset到kafka 13 | enable.auto.commit="true" 14 | # 自动提交offset到zookeeper的时间间隔单位(毫秒) 15 | auto.commit.interval.ms="5000" 16 | # 每次消费最新的数据 17 | auto.offset.reset="latest" -------------------------------------------------------------------------------- /sync-db/src/main/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | 24 | 25 | hbase.rootdir 26 | hdfs://mster:8020/hbase 27 | 28 | 29 | 30 | hbase.cluster.distributed 31 | true 32 | 33 | 34 | 35 | 36 | hbase.master.port 37 | 16000 38 | 39 | 40 | 41 | 42 | hbase.zookeeper.property.clientPort 43 | 2181 44 | 45 | 46 | 47 | hbase.zookeeper.quorum 48 | master:2181,slave1:2181,slave2:2181 49 | 50 | 51 | 52 | hbase.zookeeper.property.dataDir 53 | /usr/local/src/zookeeper-3.4.5/hbasedata 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /sync-db/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=warn,stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/App.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb 2 | 3 | import java.util.Properties 4 | 5 | import com.henry.syncdb.bean.{Cannal, HBaseOperation} 6 | import com.henry.syncdb.task.PreprocessTask 7 | import com.henry.syncdb.util.{FlinkUtils, GlobalConfigutil, HBaseUtil} 8 | import org.apache.flink.api.common.serialization.SimpleStringSchema 9 | import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} 10 | import org.apache.flink.api.scala._ 11 | import org.apache.flink.runtime.state.filesystem.FsStateBackend 12 | import org.apache.flink.streaming.api.environment.CheckpointConfig 13 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} 14 | import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks 15 | import org.apache.flink.streaming.api.functions.sink.SinkFunction 16 | import org.apache.flink.streaming.api.watermark.Watermark 17 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 18 | 19 | /** 20 | * @Author: Henry 21 | * @Description: 22 | * @Date: Create in 2019/11/6 21:17 23 | **/ 24 | object App { 25 | 26 | def main(args: Array[String]): Unit = { 27 | 28 | 29 | val env = FlinkUtils.initFlinkEnv() 30 | 31 | // // 1、输出测试 32 | // val testDs: DataStream[String] = env.fromCollection(List( 33 | // "1", "2", "3" 34 | // )) 35 | // testDs.print() 36 | 37 | val consumer = FlinkUtils.initKafkaFlink() 38 | 39 | // 测试打印 40 | val kafkaDataStream: DataStream[String] = env.addSource(consumer) 41 | // kafkaDataStream.print() 42 | 43 | val cannalDs: DataStream[Cannal] = kafkaDataStream.map { 44 | json => 45 | Cannal(json) 46 | } 47 | // cannalDs.print() 48 | 49 | 50 | val waterDS: DataStream[Cannal] = cannalDs.assignTimestampsAndWatermarks( 51 | new AssignerWithPeriodicWatermarks[Cannal] { 52 | 53 | // 当前的时间戳 54 | var currentTimestamp = 0L 55 | 56 | // 延迟的时间 57 | val delayTime = 2000L 58 | 59 | // 返回水印时间 60 | override def getCurrentWatermark: Watermark = { 61 | new Watermark(currentTimestamp - delayTime) 62 | } 63 | 64 | // 比较当前元素的时间和上一个元素的时间,取最大值,防止发生时光倒流 65 | override def extractTimestamp(element: Cannal, previousElementTimestamp: Long): Long = { 66 | currentTimestamp = Math.max(element.timestamp, previousElementTimestamp) 67 | currentTimestamp 68 | } 69 | }) 70 | // waterDS.print() 71 | 72 | val hbaseDs: DataStream[HBaseOperation] = PreprocessTask.process(waterDS) 73 | hbaseDs.print() 74 | 75 | hbaseDs.addSink(new SinkFunction[HBaseOperation] { 76 | override def invoke(value: HBaseOperation): Unit = { 77 | value.opType match { 78 | case "DELETE" => HBaseUtil.deleteData(value.tableName,value.rowkey,value.cfName) 79 | case _ => HBaseUtil.putData(value.tableName,value.rowkey,value.cfName,value.colName,value.colValue) 80 | } 81 | } 82 | }) 83 | 84 | 85 | 86 | // 执行任务 87 | env.execute("sync-db") 88 | 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/bean/Cannal.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.bean 2 | 3 | import com.alibaba.fastjson.JSON 4 | 5 | /** 6 | * @Author: Henry 7 | * @Description: 8 | * @Date: Create in 2019/11/7 19:28 9 | **/ 10 | case class Cannal( 11 | var emptyCount:Long, 12 | var logFileName:String, 13 | var dbName:String, 14 | var logFileOffset:Long, 15 | var eventType:String, 16 | var columnValueList:String, 17 | var tableName:String, 18 | var timestamp:Long 19 | ) 20 | 21 | object Cannal { 22 | 23 | def apply(json:String): Cannal = { 24 | val canal: Cannal = JSON.parseObject[Cannal](json,classOf[Cannal]) 25 | canal 26 | } 27 | 28 | def main(args: Array[String]): Unit = { 29 | 30 | val json = "{\"emptyCount\":2,\"logFileName\":\"mysql-bin.000005\",\"dbName\":\"pyg\",\"logFileOffset\":20544,\"eventType\":\"INSERT\",\"columnValueList\":[{\"columnName\":\"commodityId\",\"columnValue\":\"6\",\"isValid\":true},{\"columnName\":\"commodityName\",\"columnValue\":\"欧派\",\"isValid\":true},{\"columnName\":\"commodityTypeId\",\"columnValue\":\"3\",\"isValid\":true},{\"columnName\":\"originalPrice\",\"columnValue\":\"43000.0\",\"isValid\":true},{\"columnName\":\"activityPrice\",\"columnValue\":\"40000.0\",\"isValid\":true}],\"tableName\":\"commodity\",\"timestamp\":1558764495000}" 31 | val cannal = Cannal(json) 32 | 33 | 34 | println(cannal.timestamp) 35 | println(Cannal(json).dbName) 36 | 37 | } 38 | } -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/bean/HBaseOperation.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.bean 2 | 3 | /** 4 | * @Author: Henry 5 | * @Description: 6 | * 操作类型(opType)= INSERT/DELETE/UPDATE 7 | * 表名(tableName)= mysql.binlog数据库名.binlog表名 8 | * 列族名(cfName)= 固定为info 9 | * rowkey = 唯一主键(取binlog中列数据的第一个) 10 | * 列名(colName)= binlog中列名 11 | * 列值(colValue)= binlog中列值 12 | * @Date: Create in 2019/11/7 19:52 13 | **/ 14 | 15 | case class HBaseOperation( 16 | var opType: String, 17 | val tableName: String, 18 | val cfName: String, 19 | val rowkey: String, 20 | val colName: String, 21 | val colValue: String 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/task/PreprocessTask.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.task 2 | 3 | import java.util 4 | 5 | import com.alibaba.fastjson.JSON 6 | import com.henry.syncdb.bean.{Cannal, HBaseOperation} 7 | import org.apache.flink.streaming.api.scala.DataStream 8 | import org.apache.flink.api.scala._ 9 | 10 | import scala.collection.JavaConverters._ 11 | import scala.collection.mutable 12 | 13 | case class NameValuePair( 14 | var columnName: String, 15 | var columnValue: String, 16 | var isValid: Boolean 17 | ) 18 | 19 | object PreprocessTask { 20 | 21 | def process(canalDataStream: DataStream[Cannal]) = { 22 | 23 | // flatmap 24 | 25 | canalDataStream.flatMap { 26 | canal => { 27 | 28 | // 把canal.columnValueList转换为scala的集合 29 | // JSON.parseArray 转换之后是一个java集合 30 | val javaList: util.List[NameValuePair] = JSON.parseArray(canal.columnValueList, classOf[NameValuePair]) 31 | val nameValueList: mutable.Buffer[NameValuePair] = javaList.asScala 32 | 33 | // HBaseOpertation相关字段 34 | var opType = canal.eventType 35 | val tableName = "mysql." + canal.dbName + "." + canal.tableName 36 | val cfName = "info" 37 | val rowkey = nameValueList(0).columnValue 38 | 39 | // 遍历集合,先判断是insert还是update或者delete 40 | opType match { 41 | case "INSERT" => 42 | nameValueList.map { 43 | nameValue => HBaseOperation(opType, tableName, cfName, rowkey, nameValue.columnName, nameValue.columnValue) 44 | } 45 | 46 | case "UPDATE" => 47 | nameValueList.filter(_.isValid).map { 48 | nameValue => HBaseOperation(opType, tableName, cfName, rowkey, nameValue.columnName, nameValue.columnValue) 49 | } 50 | 51 | case "DELETE" => 52 | List(HBaseOperation(opType,tableName,cfName,rowkey,"","")) 53 | 54 | } 55 | 56 | // List[HBaseOperation]() 57 | } 58 | } 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/util/FlinkUtils.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.util 2 | 3 | import java.util.Properties 4 | 5 | import org.apache.flink.api.common.serialization.SimpleStringSchema 6 | import org.apache.flink.runtime.state.filesystem.FsStateBackend 7 | import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic} 8 | import org.apache.flink.streaming.api.environment.CheckpointConfig 9 | import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment 10 | import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010 11 | 12 | /** 13 | * @Author: Henry 14 | * @Description: 15 | * @Date: Create in 2019/11/6 21:58 16 | **/ 17 | object FlinkUtils { 18 | 19 | // 初始化Flink流式环境 20 | def initFlinkEnv()={ 21 | // Flink 流式环境的创建 22 | val env = StreamExecutionEnvironment.getExecutionEnvironment 23 | 24 | // 设置env的处理时间为EventTime 25 | env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) 26 | 27 | // 设置并行度 28 | env.setParallelism(1) 29 | 30 | // 设置checkpoint 31 | // 开启checkpoint,间隔时间为 5s 32 | env.enableCheckpointing(5000) 33 | // 设置处理模式,这句话可以省略,因为在上一个方法enableCheckpointing中默认设置的是EXACTLY_ONCE 34 | env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) 35 | // 设置两次checkpoint的间隔 36 | env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000) 37 | // 设置超时时长 38 | env.getCheckpointConfig.setCheckpointTimeout(60000) 39 | // 设置并行度 40 | env.getCheckpointConfig.setMaxConcurrentCheckpoints(1) 41 | 42 | // 当程序关闭的时候,触发额外的 checkpoint 43 | env.getCheckpointConfig.enableExternalizedCheckpoints( 44 | CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION) 45 | 46 | // 设置检查点在HDFS中的存储位置 47 | env.setStateBackend(new FsStateBackend("hdfs://master:9000/flink-checkpoint")) 48 | 49 | env 50 | } 51 | 52 | 53 | // 整合 kafka 54 | def initKafkaFlink()={ 55 | 56 | val props:Properties = new Properties() 57 | 58 | props.setProperty("bootstrap.servers", GlobalConfigutil.bootstrapServers) 59 | props.setProperty("group.id", GlobalConfigutil.gruopId) 60 | props.setProperty("enable.auto.commit", GlobalConfigutil.enableAutoCommit) 61 | props.setProperty("auto.commit.interval.ms", GlobalConfigutil.autoCommitIntervalMs) 62 | props.setProperty("auto.offset.reset", GlobalConfigutil.autoOffsetReset) 63 | 64 | // topic: String, valueDeserializer: DeserializationSchema[T], props: Properties 65 | val consumer: FlinkKafkaConsumer010[String] = new FlinkKafkaConsumer010[String]( 66 | GlobalConfigutil.inputTopic, 67 | new SimpleStringSchema(), 68 | props 69 | ) 70 | consumer 71 | } 72 | 73 | 74 | 75 | 76 | } 77 | -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/util/GlobalConfigutil.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.util 2 | 3 | import com.typesafe.config.{Config, ConfigFactory} 4 | 5 | /** 6 | * @Author: Henry 7 | * @Description: 配置文件加载类 8 | * @Date: Create in 2019/10/15 23:42 9 | **/ 10 | object GlobalConfigutil { 11 | 12 | // 通过工厂加载配置, config 会自动加载 application.conf 文件,文件名不能变 13 | val config:Config = ConfigFactory.load() 14 | 15 | val bootstrapServers = config.getString("bootstrap.servers") 16 | val zookeeperConnect = config.getString("zookeeper.connect") 17 | val inputTopic = config.getString("input.topic") 18 | val gruopId = config.getString("gruop.id") 19 | val enableAutoCommit = config.getString("enable.auto.commit") 20 | val autoCommitIntervalMs = config.getString("auto.commit.interval.ms") 21 | val autoOffsetReset = config.getString("auto.offset.reset") 22 | 23 | def main(args: Array[String]): Unit = { 24 | // 选择快捷键,alt,鼠标左键拉倒最后一行,然后按 ctrl+shift 键,再按 → 25 | println(bootstrapServers) 26 | println(zookeeperConnect) 27 | println(inputTopic) 28 | println(gruopId) 29 | println(enableAutoCommit) 30 | println(autoCommitIntervalMs) 31 | println(autoOffsetReset) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /sync-db/src/main/scala/com/henry/syncdb/util/HBaseUtil.scala: -------------------------------------------------------------------------------- 1 | package com.henry.syncdb.util 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.hadoop.hbase.client.{ColumnFamilyDescriptor, _} 5 | import org.apache.hadoop.hbase.util.Bytes 6 | import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} 7 | 8 | /** 9 | * @Author: Henry 10 | * @Description: HBase 工具类 11 | * 1、获取Table对象 12 | * 2、保存单列数据 13 | * 3、查询单列数据 14 | * 4、保存多列数据 15 | * 5、查询多列数据 16 | * 6、删除数据 17 | * @Date: Create in 2019/10/21 22:53 18 | **/ 19 | object HBaseUtil { 20 | 21 | // HBase 配置类,不需要指定配置文件名,文件名要求是 hbase-site.xml 22 | val conf:Configuration = HBaseConfiguration.create() 23 | 24 | // HBase 的连接 25 | val conn:Connection = ConnectionFactory.createConnection(conf) 26 | 27 | // HBase 的操作 API 28 | val admin:Admin = conn.getAdmin 29 | 30 | /** 31 | * 返回Table,如果不存在,则创建表 32 | * 33 | * @param tableName 表名 34 | * @param columnFamilyName 列族名 35 | * @return 36 | */ 37 | def getTable(tableNameStr:String, columnFamilyName:String):Table={ 38 | 39 | 40 | // 获取 TableName 41 | val tableName:TableName = TableName.valueOf(tableNameStr) 42 | 43 | // 如果表不存在,则创建表 44 | 45 | if(!admin.tableExists(tableName)){ 46 | 47 | // 构建出表的描述的建造者 48 | val descBuilder: TableDescriptorBuilder = TableDescriptorBuilder.newBuilder(tableName) 49 | 50 | val familyDescriptor:ColumnFamilyDescriptor = ColumnFamilyDescriptorBuilder 51 | .newBuilder(columnFamilyName.getBytes).build() 52 | 53 | // 给表添加列族 54 | descBuilder.setColumnFamily(familyDescriptor) 55 | 56 | // 创建表 57 | admin.createTable(descBuilder.build()) 58 | } 59 | 60 | conn.getTable(tableName) 61 | 62 | } 63 | 64 | /** 65 | * 存储单列数据 66 | * 67 | * @param tableNameStr 表名 68 | * @param rowkey 主键 69 | * @param columnFamilyName 列族名 70 | * @param columnName 列名 71 | * @param columnValue 列值 72 | */ 73 | def putData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String, columnValue:String)={ 74 | 75 | // 获取表 76 | val table:Table = getTable(tableNameStr, columnFamilyName) 77 | 78 | try{ 79 | // Put 80 | val put:Put = new Put(rowkey.getBytes) 81 | put.addColumn(columnFamilyName.getBytes, columnName.getBytes, columnValue.getBytes) 82 | 83 | // 保存数据 84 | table.put(put) 85 | }catch { 86 | case ex:Exception=>{ 87 | ex.printStackTrace() 88 | } 89 | }finally { 90 | table.close() 91 | } 92 | } 93 | 94 | 95 | /** 96 | * 通过单列名获取列值 97 | * @param tableNameStr 表名 98 | * @param rowkey 主键 99 | * @param columnFamilyName 列族名 100 | * @param columnName 列名 101 | * @param columnValue 列值 102 | * @return 103 | */ 104 | def getData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnName:String):String={ 105 | 106 | // 1. 获取 Table 对象 107 | val table = getTable(tableNameStr, columnFamilyName) 108 | 109 | try { 110 | // 2. 构建 get 对象 111 | val get = new Get(rowkey.getBytes) 112 | 113 | // 3. 进行查询 114 | val result:Result = table.get(get) 115 | 116 | // 4. 判断查询结果是否为空,并且包含要查询的列 117 | if (result != null && result.containsColumn(columnFamilyName.getBytes, columnName.getBytes)){ 118 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), columnName.getBytes) 119 | 120 | Bytes.toString(bytes) 121 | }else{ 122 | "" 123 | } 124 | 125 | }catch{ 126 | case ex:Exception => { 127 | ex.printStackTrace() 128 | "" 129 | } 130 | }finally { 131 | // 5、关闭表 132 | table.close() 133 | } 134 | 135 | } 136 | 137 | 138 | /** 139 | * 存储多列数据 140 | * @param tableNameStr 表名 141 | * @param rowkey 主键 142 | * @param columnFamilyName 列族名 143 | * @param map 多个列名和列族集合 144 | */ 145 | def putMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, map:Map[String,Any])={ 146 | 147 | // 1、获取 table 对象 148 | val table = getTable(tableNameStr, columnFamilyName) 149 | 150 | try{ 151 | // 2、创建 put 152 | val put = new Put(rowkey.getBytes) 153 | 154 | // 3、在 put 中添加多个列名和列值 155 | for ((colName, colValue) <- map){ 156 | put.addColumn(columnFamilyName.getBytes, colName.getBytes, colValue.toString.getBytes) 157 | } 158 | 159 | // 4、保存 put 160 | table.put(put) 161 | 162 | }catch{ 163 | case ex:Exception => { 164 | ex.printStackTrace() 165 | 166 | } 167 | }finally { 168 | // 5、关闭表 169 | table.close() 170 | } 171 | 172 | 173 | // 5、关闭 table 174 | table.close() 175 | } 176 | 177 | 178 | /** 179 | * 获取多了数据的值 180 | * @param tableNameStr 表名 181 | * @param rowkey 主键 182 | * @param columnFamilyName 列族名 183 | * @param columnNameList 多个列名和列值集合 184 | * @return 185 | */ 186 | def getMapData(tableNameStr:String, rowkey:String, columnFamilyName:String, columnNameList:List[String]):Map[String,String]= { 187 | 188 | // 1、获取 Table 189 | val table = getTable(tableNameStr, columnFamilyName) 190 | 191 | try{ 192 | // 2、构建 get 193 | val get = new Get(rowkey.getBytes) 194 | 195 | // 3、执行查询 196 | val result: Result = table.get(get) 197 | 198 | // 4、遍历列名集合,取出列值,构建成 Map 返回 199 | columnNameList.map { 200 | col => 201 | val bytes: Array[Byte] = result.getValue(columnFamilyName.getBytes(), col.getBytes) 202 | 203 | if (bytes != null && bytes.size > 0) { 204 | col -> Bytes.toString(bytes) 205 | } 206 | else { // 如果取不到值,则赋一个空串 207 | "" -> "" 208 | } 209 | }.filter(_._1 != "").toMap // 把不是空串的过滤出来,再转换成 Map 210 | 211 | }catch { 212 | case ex:Exception => { 213 | ex.printStackTrace() 214 | Map[String, String]() // 返回一个空的 Map 215 | } 216 | }finally { 217 | // 5、关闭 Table 218 | table.close() 219 | } 220 | } 221 | 222 | 223 | /** 224 | * 删除数据 225 | * @param tableNameStr 表名 226 | * @param rowkey 主键 227 | * @param columnFamilyName 列族名 228 | */ 229 | def deleteData(tableNameStr:String, rowkey:String, columnFamilyName:String)={ 230 | 231 | // 1、获取 Table 232 | val table:Table = getTable(tableNameStr, columnFamilyName) 233 | 234 | try { 235 | // 2、构建 delete 对象 236 | val delete: Delete = new Delete(rowkey.getBytes) 237 | 238 | // 3、执行删除 239 | table.delete(delete) 240 | 241 | } 242 | catch { 243 | case ex:Exception => 244 | ex.printStackTrace() 245 | } 246 | finally { 247 | // 4、关闭 table 248 | table.close() 249 | } 250 | 251 | } 252 | 253 | 254 | def main(args: Array[String]): Unit = { 255 | 256 | // println(getTable("test","info")) 257 | // putData("test", "1", "info", "t1", "hello world") 258 | // println(getData("test", "1", "info", "t1")) 259 | 260 | // val map = Map( 261 | // "t2" -> "scala" , 262 | // "t3" -> "hive" , 263 | // "t4" -> "flink" 264 | // ) 265 | // putMapData("test", "1", "info", map) 266 | 267 | // println(getMapData("test", "1", "info", List("t1", "t2"))) 268 | 269 | deleteData("test", "1", "info") 270 | println(getMapData("test", "1", "info", List("t1", "t2"))) 271 | 272 | } 273 | 274 | } 275 | --------------------------------------------------------------------------------