├── README.md
├── assets
└── Metastore.png
├── gen-java
└── org
│ └── apache
│ └── ranger
│ └── binding
│ └── metastore
│ └── thrift
│ ├── MetaStoreHistoryService.java
│ ├── MetaStoreUpdateService.java
│ ├── MetaStoreUpdateServiceVersion.java
│ ├── TErrorCode.java
│ ├── TFetchUpdatesRequest.java
│ ├── TFetchUpdatesResponse.java
│ ├── TOperation.java
│ ├── TStatus.java
│ ├── TTableType.java
│ ├── TUpdateDelta.java
│ ├── TUpdateMetadataRequest.java
│ └── TUpdateMetadataResponse.java
├── hive-tools.properties
├── log4j.properties
├── pom.xml
├── script
├── DelMetaData.sh
├── MetastoreChangelog.sh
├── merge2hivecluster1.sh
├── merge2hivecluster2.sh
├── merge2hivecluster3.sh
├── merge2hivecluster4.sh
└── merge2hivecluster5.sh
└── src
└── main
├── java
├── com
│ └── netease
│ │ └── hivetools
│ │ ├── Main.java
│ │ ├── apps
│ │ ├── DelMetaData.java
│ │ ├── HdfsTest.java
│ │ ├── HiveTest.java
│ │ ├── Mammut.java
│ │ ├── MetaDataMerge.java
│ │ ├── MetastoreChangelog.java
│ │ └── SchemaToMetaBean.java
│ │ ├── mammut
│ │ └── PfHiveSite.java
│ │ ├── mappers
│ │ ├── MammutMapper.java
│ │ └── MetaDataMapper.java
│ │ ├── meta
│ │ ├── BucketingCols.java
│ │ ├── Cds.java
│ │ ├── ColumnsV2.java
│ │ ├── CompactionQueue.java
│ │ ├── CompletedTxnComponents.java
│ │ ├── DatabaseParams.java
│ │ ├── DbPrivs.java
│ │ ├── Dbs.java
│ │ ├── DelegationTokens.java
│ │ ├── FuncRu.java
│ │ ├── Funcs.java
│ │ ├── GlobalPrivs.java
│ │ ├── HiveLocks.java
│ │ ├── Idxs.java
│ │ ├── IndexParams.java
│ │ ├── MasterKeys.java
│ │ ├── NextCompactionQueueId.java
│ │ ├── NextLockId.java
│ │ ├── NextTxnId.java
│ │ ├── NotificationLog.java
│ │ ├── NotificationSequence.java
│ │ ├── NucleusTables.java
│ │ ├── PartColPrivs.java
│ │ ├── PartColStats.java
│ │ ├── PartPrivs.java
│ │ ├── PartitionEvents.java
│ │ ├── PartitionKeyVals.java
│ │ ├── PartitionKeys.java
│ │ ├── PartitionParams.java
│ │ ├── Partitions.java
│ │ ├── RoleMap.java
│ │ ├── Roles.java
│ │ ├── SdParams.java
│ │ ├── Sds.java
│ │ ├── SequenceTable.java
│ │ ├── SerdeParams.java
│ │ ├── Serdes.java
│ │ ├── SkewedColNames.java
│ │ ├── SkewedColValueLocMap.java
│ │ ├── SkewedStringList.java
│ │ ├── SkewedStringListValues.java
│ │ ├── SkewedValues.java
│ │ ├── SortCols.java
│ │ ├── TabColStats.java
│ │ ├── TableParams.java
│ │ ├── TblColPrivs.java
│ │ ├── TblPrivs.java
│ │ ├── Tbls.java
│ │ ├── TxnComponents.java
│ │ ├── Txns.java
│ │ ├── TypeFields.java
│ │ ├── Types.java
│ │ └── Version.java
│ │ └── service
│ │ └── MyBatisUtil.java
└── org
│ └── apache
│ └── ranger
│ └── binding
│ └── metastore
│ └── thrift
│ ├── MetaStoreHistoryService.java
│ ├── MetaStoreUpdateService.java
│ ├── MetaStoreUpdateServiceVersion.java
│ ├── TErrorCode.java
│ ├── TFetchUpdatesRequest.java
│ ├── TFetchUpdatesResponse.java
│ ├── TOperation.java
│ ├── TStatus.java
│ ├── TTableType.java
│ ├── TUpdateDelta.java
│ ├── TUpdateMetadataRequest.java
│ └── TUpdateMetadataResponse.java
├── resources
├── Hive-DDL-MySQL-CREATE.sql
├── MammutMapper.xml
├── MetaDataMapper.xml
├── hive-tools.properties
├── log4j.properties
└── mybatis-config.xml
└── thrift
└── MetastoreUpdater.thrift
/README.md:
--------------------------------------------------------------------------------
1 | ## hive-tools 项目介绍
2 |
3 | 在网易集团内部有大大小小几百套 hive 集群,为了满足网易猛犸大数据平台的元数据统一管理的需求,我们需要将多个分别独立的 hive 集群的元数据信息进行合并,但是不需要移动 HDFS 中的数据文件,比如可以将 hive2、hive3、hive4 的元数据全部合并到 hive1 的元数据 Mysql 中,然后就可以在 hive1 中处理 hive2、hive3、hive4 中的数据。
4 |
5 | 我们首先想到的是 hive 中有自带的 EXPORT 命令,可以把指定库表的数据和元数据导出到本地或者 HDFS 目录中,再通过 IMPORT 命令将元数据和数据文件导入新的 hive 仓库中,但是存在以下问题不符合我们的场景
6 |
7 | 1. 我们不需要重现导入数据;
8 | 2. 我们的每个 hive 中的表的数量多达上十万,分区数量几千万,无法指定 IMPORT 命令中的分区名;
9 | 3. 经过测试 IMPORT 命令执行效率也很低,在偶发性导入失败后,无法回滚已经导入的部分元数据,只能手工在 hive 中执行 drop table 操作,但是我们线上的 hive 配置是开启了删除表同时删除数据,这是无法接受的;
10 |
11 | 于是我们便考虑自己开发一个 hive 元数据迁移合并工具,满足我们的以下需求:
12 |
13 | 1. 可以将一个 hive 集群中的元数据全部迁移到目标 hive 集群中,不移动数据;
14 | 2. 在迁移失败的情况下,可以回退到元数据导入之前的状态;
15 | 3. 可以停止源 hive 服务,但不能停止目标 hive 的服务下,进行元数据迁移;
16 | 4. 迁移过程控制在十分钟之内,以减少对迁移方的业务影响;
17 |
18 | ## 元数据合并的难点
19 |
20 | hive 的元数据信息(metastore)一般是通过 Mysql 数据库进行存储的,在 hive-1.2.1 版本中元数据信息有 54 张表进行了存储,比如存储了数据库名称的表 `DBS`、存储表名称的表 `TBLS` 、分区信息的 `PARTITIONS` 等等。
21 |
22 | ### 元数据表依赖关系非常复杂
23 |
24 | 
25 |
26 | 元数据信息的这 54 张表通过 `ID` 号形成的很强的主外健依赖关系,例如
27 |
28 | 1. `DBS` 表中的 `DB_ID` 字段被 20 多张表作为外健进行了引用;
29 | 2. `TBLS` 表中的 `TBL_ID` 字段被 20 多张表作为外健进行了引用;
30 | 3. `TBLS` 表中的 `DB_ID` 字段是 `DBS` 表的外健、`SD_ID` 字段是 `SDS` 表的外健;
31 | 4. `PARTITIONS` 表中的 `TBL_ID` 字段是 `TBLS` 表的外健、`SD_ID` 字段是 `SDS` 表的外健;
32 | 5. `DATABASE_PARAMS` 表中的 `DB_ID` 字段是 `DBS` 表的外健;
33 |
34 | 这样的嵌套让表与表之间的关系表现为 [DBS]=>[TBLS]=>[PARTITIONS]=>[PARTITION_KEY_VALS],像这样具有 5 层以上嵌套关系的有4-5 套,这为元数据合并带来了如下问题。
35 |
36 | 1. 源 hive 中的所有表的主键 ID 必须修改,否则会和目标 hive2 中的主键 ID 冲突,导致失败;
37 | 2. 源 hive 中所有表的主键 ID 修改后,但必须依然保持源 hive1 中自身的主外健依赖关系,也就是说所有的关联表的主外健 ID 都必须进行完全一致性的修改,比如 DBS 中的 ID 从 1 变成 100,那么 TBLS、PARTITIONS 等所有子表中的 DB_ID 也需要需要从 1 变成 100;
38 | 3. 按照表的依赖关系,我们必须首先导入主表,再导入子表,再导入子子表 …,否则也无法正确导入;
39 |
40 | ### 修改元数据的主外健 ID
41 |
42 | 我们使用了一个巧妙的方法来解决 ID 修改的问题:
43 |
44 | 1. 从目标 hive 中查询出所有表的最大 ID 号,将每个表的 ID 号加上源 hive 中所有对应表的 ID 号码,形成导入后新生成出的 ID 号,公式是:新表ID = 源表ID + 目标表 ID,因为所有的表都使用了相同的逻辑,通过这个方法我们的程序就不需要维护父子表之间主外健的 ID 号。
45 | 2. 唯一可能会存在问题的是,在线导入过程中,目标 hive 新创建了 DB,导致 DB_ID 冲突的问题,为此,我们在每次导入 hive 增加一个跳号,公式变为:新表ID = 源表ID + 目标表 ID + 跳号值(100)
46 |
47 | ### 数据库操作
48 |
49 | 我们使用了 mybatis 进行了源和目标这 2 个 Mysql 的数据库操作,从源 Mysql 中按照上面的逻辑关系取出元数据修改主外健的 ID 号再插入到目标 Mysql 数据库中。
50 |
51 | 1. 由于 mybatis 进行数据库操作的时候,需要通过表的 bean 对象进行操作,54 张表全部手工敲出来又累又容易出错,应该想办法偷懒,于是我们使用了 `druid` 解析 hive 的建表语句,再通过 `codemodel` 自动生成出了对应每个表的 54 个 JAVA 类对象。参见代码:`com.netease.hivetools.apps.SchemaToMetaBean`
52 |
53 |
54 |
55 | ### 元数据迁移操作步骤
56 |
57 | 1. 第一步:备份元数据迁移前的目标和源数据库
58 |
59 | 2. 第二步:将源数据库的元数据导入到临时数据库 exchange_db 中,需要一个临时数据库是因为源数据库的 hive 集群仍然在提供在线服务,元数据表的 ID 流水号仍然在变化,hive-tools 工具只支持目的数据库是在线状态;
60 |
61 | 3. 通过临时数据库 exchange_db 能够删除多余 hive db 的目的,还能够通过固定的数据库名称,规范整个元数据迁移操作流程,减低因为手工修改执行命令参数导致出错的概率
62 |
63 | 4. 在 hive-tools.properties 文件中中配置源和目的数据库的 JDBC 配置项
64 |
65 | ```shell
66 | # exchange_db
67 | exchange_db.jdbc.driverClassName=com.mysql.jdbc.Driver
68 | exchange_db.jdbc.url=jdbc:mysql://10.172.121.126:3306/hivecluster1?useUnicode=true&characterEncoding=utf8&autoReconnect=true&allowMultiQueries=true
69 | exchange_db.jdbc.username=src_hive
70 | exchange_db.jdbc.password=abcdefg
71 |
72 | # dest_hive
73 | dest_hive.jdbc.driverClassName=com.mysql.jdbc.Driver
74 | dest_hive.jdbc.url=jdbc:mysql://10.172.121.126:3306/hivecluster1?useUnicode=true&characterEncoding=utf8&autoReconnect=true&allowMultiQueries=true
75 | dest_hive.jdbc.username=dest_hive
76 | dest_hive.jdbc.password=abcdefg
77 | ```
78 |
79 | 5. 执行元数据迁移命令
80 |
81 | ```shell
82 | export SOURCE_NAME=exchange_db
83 | export DEST_NAME=dest_hive
84 |
85 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
86 | ```
87 |
88 | 6. hive-tools 会在迁移元数据之前首先检查源和目的元数据库中重名的 hive db,终止元数据迁移操作并给出提示
89 |
90 | 7. 执行删除重名数据库命令
91 |
92 | ```shell
93 | # 修改脚本中的 DEL_DB(多个库之间用逗号分割,default必须删除)参数和 DEL_TBL(为空则删除所有表)
94 | export SOURCE=exchange_db
95 | export DEL_DB=default,nisp_nhids,real,azkaban_autotest_db
96 | export DEL_TBL=
97 |
98 | ~/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.DelMetaData --s=$SOURCE --d=$DEL_DB --t=$DEL_TBL
99 | ```
100 |
101 | 8. 再次执行执行元数据迁移命令
102 |
103 | 9. 检查元数据迁移命令窗口日志或文件日志,如果发现元数据合并出错,通过对目的数据库进行执行删除指定 hive db 的命令,将迁移过去的元数据进行删除,如果没有错误,通过 hive 客户端检查目的数据库中是否能够正常使用新迁移过来的元数据
104 |
105 | 10. 严格按照我们的元数据迁移流程已经在网易集团内部通过 hive-tools 已经成功迁移合并了大量的 hive 元数据库,几乎没有出现过问题
106 |
107 | ## compile
108 |
109 | mvn clean compile package -Dmaven.test.skip=true
110 |
111 |
112 |
113 | ## history
114 |
115 |
116 | Release Notes - Hive-tools - Version 0.1.4
117 |
118 | * [hive-tools-0.1.5]
119 | MetaDataMerge add update SEQUENCE_TABLE NO
120 |
121 | * [hive-tools-0.1.4]
122 | MetastoreChangelog -z=zkHost -c=changelog -d=database -t=table
123 | thrift -gen java src/main/thrift/MetastoreUpdater.thrift
124 |
125 | * [hive-tools-0.1.3]
126 | - delete database metedata database_name/table_name support % wildcard
127 |
128 | * [hive-tools-0.1.2]
129 | - hdfs proxy user test
130 |
131 | * [hive-tools-0.1.1]
132 | - delete database metedata
133 |
134 | * [hive-tools-0.1.0]
135 | - hive meta schema convert to java bean
136 | - multiple hive meta merge
--------------------------------------------------------------------------------
/assets/Metastore.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NetEase/hive-tools/da3338bff95fddcfc9aa6b9e10b28d13b6666d7e/assets/Metastore.png
--------------------------------------------------------------------------------
/gen-java/org/apache/ranger/binding/metastore/thrift/MetaStoreUpdateServiceVersion.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Autogenerated by Thrift Compiler (0.9.3)
3 | *
4 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5 | * @generated
6 | */
7 | package org.apache.ranger.binding.metastore.thrift;
8 |
9 |
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | import org.apache.thrift.TEnum;
13 |
14 | public enum MetaStoreUpdateServiceVersion implements org.apache.thrift.TEnum {
15 | V1(0);
16 |
17 | private final int value;
18 |
19 | private MetaStoreUpdateServiceVersion(int value) {
20 | this.value = value;
21 | }
22 |
23 | /**
24 | * Get the integer value of this enum value, as defined in the Thrift IDL.
25 | */
26 | public int getValue() {
27 | return value;
28 | }
29 |
30 | /**
31 | * Find a the enum type by its integer value, as defined in the Thrift IDL.
32 | * @return null if the value is not found.
33 | */
34 | public static MetaStoreUpdateServiceVersion findByValue(int value) {
35 | switch (value) {
36 | case 0:
37 | return V1;
38 | default:
39 | return null;
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/gen-java/org/apache/ranger/binding/metastore/thrift/TErrorCode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Autogenerated by Thrift Compiler (0.9.3)
3 | *
4 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5 | * @generated
6 | */
7 | package org.apache.ranger.binding.metastore.thrift;
8 |
9 |
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | import org.apache.thrift.TEnum;
13 |
14 | public enum TErrorCode implements org.apache.thrift.TEnum {
15 | OK(0),
16 | ERROR(1),
17 | INVALID(2);
18 |
19 | private final int value;
20 |
21 | private TErrorCode(int value) {
22 | this.value = value;
23 | }
24 |
25 | /**
26 | * Get the integer value of this enum value, as defined in the Thrift IDL.
27 | */
28 | public int getValue() {
29 | return value;
30 | }
31 |
32 | /**
33 | * Find a the enum type by its integer value, as defined in the Thrift IDL.
34 | * @return null if the value is not found.
35 | */
36 | public static TErrorCode findByValue(int value) {
37 | switch (value) {
38 | case 0:
39 | return OK;
40 | case 1:
41 | return ERROR;
42 | case 2:
43 | return INVALID;
44 | default:
45 | return null;
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/gen-java/org/apache/ranger/binding/metastore/thrift/TOperation.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Autogenerated by Thrift Compiler (0.9.3)
3 | *
4 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5 | * @generated
6 | */
7 | package org.apache.ranger.binding.metastore.thrift;
8 |
9 |
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | import org.apache.thrift.TEnum;
13 |
14 | public enum TOperation implements org.apache.thrift.TEnum {
15 | CREATE_DATABASE(0),
16 | DROP_DATABASE(1),
17 | CREATE_TABLE(2),
18 | DROP_TABLE(3),
19 | ALTER_TABLE(4),
20 | REMAME_TABLE(5),
21 | ADD_PARTITION(6),
22 | DROP_PARTITION(7),
23 | ALTER_PARTITION(8),
24 | RENAME_PARTITION(9),
25 | INIT_ID(10);
26 |
27 | private final int value;
28 |
29 | private TOperation(int value) {
30 | this.value = value;
31 | }
32 |
33 | /**
34 | * Get the integer value of this enum value, as defined in the Thrift IDL.
35 | */
36 | public int getValue() {
37 | return value;
38 | }
39 |
40 | /**
41 | * Find a the enum type by its integer value, as defined in the Thrift IDL.
42 | * @return null if the value is not found.
43 | */
44 | public static TOperation findByValue(int value) {
45 | switch (value) {
46 | case 0:
47 | return CREATE_DATABASE;
48 | case 1:
49 | return DROP_DATABASE;
50 | case 2:
51 | return CREATE_TABLE;
52 | case 3:
53 | return DROP_TABLE;
54 | case 4:
55 | return ALTER_TABLE;
56 | case 5:
57 | return REMAME_TABLE;
58 | case 6:
59 | return ADD_PARTITION;
60 | case 7:
61 | return DROP_PARTITION;
62 | case 8:
63 | return ALTER_PARTITION;
64 | case 9:
65 | return RENAME_PARTITION;
66 | case 10:
67 | return INIT_ID;
68 | default:
69 | return null;
70 | }
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/gen-java/org/apache/ranger/binding/metastore/thrift/TTableType.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Autogenerated by Thrift Compiler (0.9.3)
3 | *
4 | * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
5 | * @generated
6 | */
7 | package org.apache.ranger.binding.metastore.thrift;
8 |
9 |
10 | import java.util.Map;
11 | import java.util.HashMap;
12 | import org.apache.thrift.TEnum;
13 |
14 | public enum TTableType implements org.apache.thrift.TEnum {
15 | TABLE(0),
16 | VIEW(1);
17 |
18 | private final int value;
19 |
20 | private TTableType(int value) {
21 | this.value = value;
22 | }
23 |
24 | /**
25 | * Get the integer value of this enum value, as defined in the Thrift IDL.
26 | */
27 | public int getValue() {
28 | return value;
29 | }
30 |
31 | /**
32 | * Find a the enum type by its integer value, as defined in the Thrift IDL.
33 | * @return null if the value is not found.
34 | */
35 | public static TTableType findByValue(int value) {
36 | switch (value) {
37 | case 0:
38 | return TABLE;
39 | case 1:
40 | return VIEW;
41 | default:
42 | return null;
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/hive-tools.properties:
--------------------------------------------------------------------------------
1 | # 第一步:备份所有数据库");
2 | # hadoop519 > cd /home/hadoop/database-auto-backup/");
3 | # hadoop519 > ./autoBackupDB.sh");
4 | # 第二步:清空 hadoop357 上 Mysql 数据库中的 exchange_db");
5 | # hadoop357 > mysql -uroot -proot -Dexchange_db;");
6 | # mysql > use exchange_db;");
7 | # mysql > drop database exchange_db;");
8 | # mysql > create database exchange_db;");
9 | # mysql > exit;");
10 | # 第三步:使用用户提供的元数据 Mysql IP 和用户名密码,将数据库通过 mysqldump 出数据库(申请RDS权限)");
11 | # hadoop357 > mysqldump -hMysqlHostIP -uUserName -pPassword --single-transaction MysqlDatabaseName > MysqlDatabaseName.sql;");
12 | # 第四步:将 用户的 元数据导入 exchange_db");
13 | # hadoop357 > mysql -uroot -proot -Dexchange_db;");
14 | # mysql > use exchange_db;");
15 | # mysql > source ./MysqlDatabaseName.sql");
16 | # 第五步:检查是否存着和 DEST 数据库存着数据库重名,联系业务方将重名的数据库该名");
17 | # :通过用户 Hive 表中的 HDFS 文件所属集群确定需要导入到哪个 hive 集群");
18 | # hadoop357 > cd /home/hadoop/hive-tools/");
19 | # hadoop357 > ./merge2hivecluster[1-5].sh");
20 | # hadoop357 > 如果有数据库重名,会打印出重名到数据库名称");
21 | # 第六步:在 exchange_db 中删除多余的 DB");
22 | # hadoop357 > cd /home/hadoop/hive-tools/");
23 | # hadoop357 > vi delMetaData.sh");
24 | # 修改脚本中的 DEL_DB(多个库之间用逗号分割,default必须删除)参数和 DEL_TBL(为空则删除所有表)");
25 | # hadoop357 > ./delMetaData.sh");
26 | # 第七步:重新执行元数据合并脚本");
27 | # hadoop357 > cd /home/hadoop/hive-tools/");
28 | # hadoop357 > ./merge2hivecluster[1-5].sh");
29 | # 第八步:检查合并日志,通过hive进行测试是否导入成功");
30 |
31 | ########################################### online hive cluster mysql ################################################################
32 | # online_cluster1
33 | online_cluster1.jdbc.driverClassName=com.mysql.jdbc.Driver
34 | online_cluster1.jdbc.url=jdbc:mysql://10.172.121.126:3306/hivecluster1?useUnicode=true&characterEncoding=utf8&autoReconnect=true&allowMultiQueries=true
35 | online_cluster1.jdbc.username=hivecluster1
36 | online_cluster1.jdbc.password=SIpFGv2KgChQ
37 |
38 | ############################################### Intermediate temporary database ############################################################
39 |
40 | # exchange_db
41 | exchange_db.jdbc.driverClassName=com.mysql.jdbc.Driver
42 | exchange_db.jdbc.url=jdbc:mysql://10.120.232.16:3306/exchange_db?useUnicode=true&characterEncoding=UTF-8&allowMultiQueries=true
43 | exchange_db.jdbc.username=exchange_db
44 | exchange_db.jdbc.password=exchange_db
--------------------------------------------------------------------------------
/log4j.properties:
--------------------------------------------------------------------------------
1 | #定义输出格式
2 | ConversionPattern=%d %-p %l - %m%n
3 |
4 | log4j.rootLogger=INFO,Console,DailyFile
5 | log4j.logger.com.cnblogs.lzrabbit=DEBUG
6 | log4j.logger.org.springframework=ERROR
7 | log4j.logger.org.mybatis=ERROR
8 | log4j.logger.org.apache.ibatis=ERROR
9 | log4j.logger.org.quartz=ERROR
10 | log4j.logger.org.apache.axis2=ERROR
11 | log4j.logger.org.apache.axiom=ERROR
12 | log4j.logger.org.apache=ERROR
13 | log4j.logger.httpclient=ERROR
14 | #log4j.additivity.org.springframework=false
15 | #Console
16 | log4j.appender.Console=org.apache.log4j.ConsoleAppender
17 | log4j.appender.Console.Threshold=DEBUG
18 | log4j.appender.Console.Target=System.out
19 | log4j.appender.Console.layout=org.apache.log4j.PatternLayout
20 | log4j.appender.Console.layout.ConversionPattern=${ConversionPattern}
21 | #log4j.appender.Console.encoding=UTF-8
22 |
23 | #org.apache.log4j.DailyRollingFileAppender
24 | log4j.appender.DailyFile=org.apache.log4j.DailyRollingFileAppender
25 | log4j.appender.DailyFile.DatePattern=.yyyy-MM-dd
26 | log4j.appender.DailyFile.File=logs/hive-tools.log
27 | log4j.appender.DailyFile.Append=true
28 | log4j.appender.DailyFile.Threshold=DEBUG
29 | log4j.appender.DailyFile.layout=org.apache.log4j.PatternLayout
30 | log4j.appender.DailyFile.layout.ConversionPattern=${ConversionPattern}
31 | log4j.appender.DailyFile.encoding=UTF-8
32 |
33 | # %c 输出日志信息所属的类的全名
34 | # %d 输出日志时间点的日期或时间,默认格式为ISO8601,也可以在其后指定格式,比如:%d{yyy-MM-dd HH:mm:ss},输出类似:2002-10-18- 22:10:28
35 | # %f 输出日志信息所属的类的类名
36 | # %l 输出日志事件的发生位置,即输出日志信息的语句处于它所在的类的第几行
37 | # %m 输出代码中指定的信息,如log(message)中的message
38 | # %n 输出一个回车换行符,Windows平台为“rn”,Unix平台为“n”
39 | # %p 输出优先级,即DEBUG,INFO,WARN,ERROR,FATAL。如果是调用debug()输出的,则为DEBUG,依此类推
40 | # %r 输出自应用启动到输出该日志信息所耗费的毫秒数
41 | # %t 输出产生该日志事件的线程名
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 |
21 | 4.0.0
22 | com.netease
23 | hive-tools
24 | 0.1.6
25 |
26 | UTF-8
27 | UTF-8
28 | 1.7
29 | 2.11.7
30 | 2.11
31 | 1.7.16
32 | 1.2.17
33 | 1.6.0
34 | 20.0-rc1
35 | 0.8.0
36 | 10.12.1.1
37 | 10.12.1.1
38 | 3.2.8
39 | 2.6
40 | 1.0.19
41 | 5.1.39
42 | 3.8.1
43 |
44 | 64m
45 | 512m
46 | 512m
47 |
48 |
49 |
50 | apache.snapshots.https
51 | Apache Development Snapshot Repository
52 | https://repository.apache.org/content/repositories/snapshots
53 |
54 | true
55 |
56 |
57 |
58 | apache.public.https
59 | Apache Development Snapshot Repository
60 | https://repository.apache.org/content/repositories/public
61 |
62 | true
63 |
64 |
65 | false
66 |
67 |
68 |
69 |
70 |
71 | com.alibaba
72 | druid
73 | ${druid.version}
74 |
75 |
76 | com.sun.codemodel
77 | codemodel
78 | ${codemodel.version}
79 |
80 |
81 | com.google.guava
82 | guava
83 | ${guava.version}
84 |
85 |
86 | org.apache.calcite
87 | calcite-core
88 | ${calcite.version}
89 |
90 |
91 | org.apache.calcite
92 | calcite-avatica
93 | ${calcite.version}
94 |
95 |
96 | org.apache.calcite
97 | calcite-linq4j
98 | ${calcite.version}
99 |
100 |
101 | net.sf.jsqlparser
102 | jsqlparser
103 | ${jsqlparser.version}
104 |
105 |
106 | log4j
107 | log4j
108 | ${log4j.version}
109 |
110 |
111 | org.apache.derby
112 | derby
113 | ${derby.version}
114 |
115 |
116 | org.apache.derby
117 | derbyclient
118 | ${derbyclient.version}
119 |
120 |
121 | mysql
122 | mysql-connector-java
123 | ${mysql-connector-java.version}
124 |
125 |
126 | org.mybatis
127 | mybatis
128 | ${mybatis.version}
129 |
130 |
131 | junit
132 | junit
133 | ${junit.version}
134 | test
135 |
136 |
137 | org.apache.commons
138 | commons-math3
139 | 3.5
140 |
141 |
142 | commons-cli
143 | commons-cli
144 | 1.2
145 |
146 |
147 | org.apache.hadoop
148 | hadoop-common
149 | 2.7.3
150 |
151 |
152 | org.apache.hive
153 | hive-jdbc
154 | 1.2.1
155 |
156 |
157 | junit
158 | junit
159 | 4.12
160 |
161 |
162 | org.apache.curator
163 | curator-client
164 | 2.6.0
165 |
166 |
167 | org.apache.curator
168 | curator-framework
169 | 2.6.0
170 |
171 |
172 | org.apache.curator
173 | curator-recipes
174 | 2.6.0
175 |
176 |
177 |
178 |
179 |
180 |
191 |
192 |
193 | *.sql
194 | *.json
195 | hive-tools.sh
196 | hive-tools.properties
197 | log4j.properties
198 |
199 | src/main/resources
200 | true
201 |
202 |
203 |
204 |
205 | org.apache.maven.plugins
206 | maven-compiler-plugin
207 | 3.3
208 |
209 | ${java.version}
210 | ${java.version}
211 |
212 |
213 |
214 | org.apache.maven.plugins
215 | maven-jar-plugin
216 |
217 |
218 |
219 | true
220 | com.netease.hivetools.apps.MetaDataMerge
221 |
222 |
223 |
224 |
225 |
226 | maven-assembly-plugin
227 | 2.2-beta-5
228 |
229 | false
230 |
231 |
232 | com.netease.hivetools.apps.MetaDataMerge
233 |
234 |
235 |
236 | jar-with-dependencies
237 |
238 |
239 |
240 |
241 | make-assembly
242 | package
243 |
244 | single
245 |
246 |
247 |
248 |
249 |
250 |
251 | org.apache.maven.plugins
252 | maven-resources-plugin
253 | 2.6
254 |
255 |
256 | copy-resources
257 | package
258 |
259 | copy-resources
260 |
261 |
262 | UTF-8
263 | ${project.build.directory}/classes
264 |
265 |
266 | src/main/resources/
267 | false
268 |
269 | *.json
270 | *.sql
271 | hive-tools.sh
272 | hive-tools.properties
273 | log4j.properties
274 |
275 |
276 |
277 |
278 |
279 |
280 | copy-command
281 | package
282 |
283 | copy-resources
284 |
285 |
286 | UTF-8
287 | ${project.build.directory}
288 |
289 |
290 | src/main/resources/
291 | true
292 |
293 | hive-tools.sh
294 | hive-tools.properties
295 | log4j.properties
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
--------------------------------------------------------------------------------
/script/DelMetaData.sh:
--------------------------------------------------------------------------------
1 | export SOURCE=exchange_db
2 | export DEL_DB=default,nisp_nhids,real,azkaban_autotest_db
3 | export DEL_TBL=
4 |
5 | ~/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.DelMetaData --s=$SOURCE --d=$DEL_DB --t=$DEL_TBL
--------------------------------------------------------------------------------
/script/MetastoreChangelog.sh:
--------------------------------------------------------------------------------
1 | export ZK_HOST=hadoop944.hz.163.org
2 | export ZK_PATH=/hive-metastore-changelog/hive-cluster3
3 | export FILTE_DATABASE=beauty_dw
4 | export FILTE_TABLE=ods_beauty
5 | #export DEBUG='-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000'
6 |
7 | /home/hadoop/java-current/jre/bin/java ${DEBUG} -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetastoreChangelog --z=${ZK_HOST} --c=${ZK_PATH} --d=${FILTE_DATABASE} --t=${FILTE_TABLE}
8 |
--------------------------------------------------------------------------------
/script/merge2hivecluster1.sh:
--------------------------------------------------------------------------------
1 | export SOURCE_NAME=exchange_db
2 | export DEST_NAME=online_cluster1
3 |
4 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
--------------------------------------------------------------------------------
/script/merge2hivecluster2.sh:
--------------------------------------------------------------------------------
1 | export SOURCE_NAME=exchange_db
2 | export DEST_NAME=online_cluster2
3 |
4 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
--------------------------------------------------------------------------------
/script/merge2hivecluster3.sh:
--------------------------------------------------------------------------------
1 | export SOURCE_NAME=exchange_db
2 | export DEST_NAME=online_cluster3
3 |
4 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
--------------------------------------------------------------------------------
/script/merge2hivecluster4.sh:
--------------------------------------------------------------------------------
1 | export SOURCE_NAME=exchange_db
2 | export DEST_NAME=online_cluster4
3 |
4 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
--------------------------------------------------------------------------------
/script/merge2hivecluster5.sh:
--------------------------------------------------------------------------------
1 | export SOURCE_NAME=exchange_db
2 | export DEST_NAME=online_cluster5
3 |
4 | /home/hadoop/java-current/jre/bin/java -cp "./hive-tools-current.jar" com.netease.hivetools.apps.MetaDataMerge --s=$SOURCE_NAME --d=$DEST_NAME
--------------------------------------------------------------------------------
/src/main/java/com/netease/hivetools/Main.java:
--------------------------------------------------------------------------------
1 | package com.netease.hivetools;
2 |
3 |
4 | import com.netease.hivetools.apps.DelMetaData;
5 | import com.netease.hivetools.apps.Mammut;
6 | import com.netease.hivetools.apps.MetaDataMerge;
7 | import com.netease.hivetools.apps.SchemaToMetaBean;
8 | import org.apache.commons.cli.*;
9 | import org.apache.log4j.Logger;
10 | import org.apache.log4j.PropertyConfigurator;
11 | import org.junit.Test;
12 |
13 | import java.sql.Connection;
14 | import java.sql.DriverManager;
15 | import java.sql.PreparedStatement;
16 | import java.sql.ResultSet;
17 | import java.util.ArrayList;
18 |
19 | public class Main {
20 | private static final Logger logger = Logger.getLogger(Main.class.getName());
21 |
22 | public static void main(String[] args) {
23 | PropertyConfigurator.configure("log4j.properties");
24 |
25 | // test(args);
26 | // cliCommond(args);
27 | }
28 |
29 | static private void cliCommond(String[] args) {
30 | Options opt = new Options();
31 | opt.addOption(OptionBuilder.withLongOpt("p")
32 | .withDescription("处理函数名称")
33 | .withValueSeparator('=')
34 | .hasArg()
35 | .create());
36 | opt.addOption("h", "help", false, "打印命令行帮助");
37 |
38 | String formatstr = "hive-tools --p=[MetaDataMerge|SchemaToMetaBean|Mammut|DelMetaData] [-h/--help]";
39 |
40 | HelpFormatter formatter = new HelpFormatter();
41 | CommandLineParser parser = new PosixParser();
42 | CommandLine cl = null;
43 | try {
44 | for (int i = 0; i < args.length; i ++)
45 | logger.debug("args[" +i +"] : " + args[i]);
46 |
47 | cl = parser.parse(opt, args);
48 | } catch (ParseException e) {
49 | formatter.printHelp(formatstr, opt);
50 | }
51 | if (cl.hasOption("h")) {
52 | HelpFormatter hf = new HelpFormatter();
53 | hf.printHelp(formatstr, "", opt, "");
54 | System.exit(1);
55 | }
56 |
57 | if (false == cl.hasOption("p")) {
58 | System.out.println("missing --t arg");
59 | HelpFormatter hf = new HelpFormatter();
60 | hf.printHelp(formatstr, "", opt, "");
61 | System.exit(1);
62 | }
63 |
64 | String procName = cl.getOptionValue("p");
65 | if (procName.equalsIgnoreCase("MetaDataMerge")) {
66 | MetaDataMerge.main(args);
67 | } else if (procName.equalsIgnoreCase("SchemaToMetaBean")) {
68 | SchemaToMetaBean.main(args);
69 | } else if (procName.equalsIgnoreCase("Mammut")) {
70 | Mammut.main(args);
71 | } else if (procName.equalsIgnoreCase("DelMetaData")) {
72 | DelMetaData.main(args);
73 | } else {
74 | System.out.println("error --p arg");
75 | HelpFormatter hf = new HelpFormatter();
76 | hf.printHelp(formatstr, "", opt, "");
77 | System.exit(1);
78 | }
79 | }
80 |
81 | static class TabInfo{
82 | public String tblId = "";
83 | public String tblName = "";
84 | public String tblType = "";
85 | public String tblLocation = "";
86 | public String partName = "";
87 | public String partLocation = "";
88 |
89 | public TabInfo() {}
90 |
91 | @Override
92 | public String toString() {
93 | return "tblId = " + tblId + ", tblName = " + tblName +", tblType = " + tblType +", tblLocation = " + tblLocation +", partName = " + partName +", partLocation = " + partLocation;
94 | }
95 | }
96 |
97 | @Test
98 | public static void test(String[] args)
99 | {
100 | String url = "jdbc:mysql://10.120.232.16:3306/haitao1201?useUnicode=true&characterEncoding=UTF-8";
101 | try
102 | {
103 | Class.forName("com.mysql.jdbc.Driver").newInstance();
104 | Connection c = DriverManager.getConnection(url, "haitao1201", "haitao1201");
105 | PreparedStatement p = c.prepareStatement("select TBLS.TBL_ID, TBLS.TBL_NAME, TBLS.TBL_TYPE, SDS.LOCATION from TBLS, SDS where TBLS.SD_ID = SDS.SD_ID;"); // limit 300
106 | p.execute();
107 | ResultSet rs = p.getResultSet();
108 | ArrayList tabInfos = new ArrayList<>();
109 | while (!rs.isLast())
110 | {
111 | if (!rs.next())
112 | break;
113 |
114 | TabInfo tabInfo = new TabInfo();
115 | tabInfo.tblId = rs.getString(1);
116 | tabInfo.tblName = rs.getString(2);
117 | tabInfo.tblType = rs.getString(3);
118 | tabInfo.tblLocation = rs.getString(4)==null?"":rs.getString(4);
119 |
120 | tabInfos.add(tabInfo);
121 | }
122 | rs.close();
123 |
124 | for (TabInfo tabInfo : tabInfos) {
125 | String sql = "select SDS.LOCATION, PARTITIONS.PART_NAME from SDS, PARTITIONS where PARTITIONS.SD_ID = SDS.SD_ID and TBL_ID = " + tabInfo.tblId + " limit 1";
126 | p = c.prepareStatement(sql);
127 | p.execute();
128 | rs = p.getResultSet();
129 | while (!rs.isLast())
130 | {
131 | if (!rs.next())
132 | break;
133 | tabInfo.partLocation = rs.getString(1)==null?"":rs.getString(1);
134 | tabInfo.partName = rs.getString(2)==null?"":rs.getString(2);
135 | }
136 | rs.close();
137 | }
138 |
139 | int count = 0, notsame = 0;
140 | for (TabInfo tabInfo : tabInfos) {
141 | count ++;
142 | boolean samePath = tabInfo.partLocation.startsWith(tabInfo.tblLocation);
143 | if (samePath) {
144 | // System.out.println("Y " + tabInfo.toString());
145 | } else if(!samePath && !tabInfo.partLocation.isEmpty()) {
146 | notsame ++;
147 | System.out.println("N " + tabInfo.toString());
148 | }
149 | }
150 | System.out.println("总数: " + count + ", 不相同的: " + notsame);
151 | }
152 | catch (Exception e)
153 | {
154 | e.printStackTrace();
155 | }
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/src/main/java/com/netease/hivetools/apps/DelMetaData.java:
--------------------------------------------------------------------------------
1 | package com.netease.hivetools.apps;
2 |
3 | import com.netease.hivetools.mappers.MetaDataMapper;
4 | import com.netease.hivetools.meta.Dbs;
5 | import com.netease.hivetools.meta.Tbls;
6 | import com.netease.hivetools.service.MyBatisUtil;
7 | import org.apache.commons.cli.*;
8 | import org.apache.log4j.Logger;
9 | import org.apache.log4j.PropertyConfigurator;
10 |
11 | import java.util.HashMap;
12 | import java.util.List;
13 | import java.util.Map;
14 | import java.util.Scanner;
15 |
16 | /**
17 | * Created by hzliuxun on 17/1/22.
18 | */
19 | public class DelMetaData {
20 | private static final Logger logger = Logger.getLogger(MetaDataMerge.class.getName());
21 |
22 | private static String del_database_name = "";
23 | private static String del_table_name = "";
24 | public static void main(String[] args) {
25 | PropertyConfigurator.configure("log4j.properties");
26 |
27 | cliCommond(args);
28 |
29 | Scanner sc = new Scanner(System.in);
30 | String useInput = "";
31 | while (!useInput.equals("Y")) {
32 | System.err.println("请先备份数据库!");
33 | System.err.println("删除数据源 " + MyBatisUtil.sourceName + " 中的数据库 " + del_database_name + ", 表 " + del_table_name + " 请输入[Y/n] : ");
34 |
35 | useInput = sc.nextLine();
36 | if (useInput.equalsIgnoreCase("n")) {
37 | System.exit(1);
38 | }
39 | }
40 |
41 | String[] delDbNames = del_database_name.split(",");
42 | for (String delDbName : delDbNames) {
43 | deleteMetaData(delDbName);
44 | }
45 | }
46 |
47 | static void deleteMetaData(String delDbName) {
48 | System.out.println("==> deleteMetaData(" + delDbName + ")");
49 | MetaDataMapper sourceMetaData = new MetaDataMapper(MyBatisUtil.sourceName);
50 |
51 | Map params = new HashMap();
52 | params.put("database_name", delDbName);
53 | List