├── .gitignore ├── LICENSE ├── README.md ├── data └── database │ ├── mysql_ddl.sql │ └── mysql_init.sql ├── docs ├── data-mask │ └── README.md ├── images │ ├── Appreciation code.png │ ├── Calcite SqlNode diagrams.png │ ├── Data mask and Row-level filter example data.png │ ├── Data mask and Row-level filter overall execution flowchart.png │ ├── Data mask example conditions.png │ ├── Data mask example data.png │ ├── Data mask-Rewrite the main process.png │ ├── Data mask-masked with customer_name after mask.png │ ├── Data mask-masked with customer_name after mask_show_first_4.png │ ├── FlinkSQL data mask solution.png │ ├── FlinkSQL row-level filter solution.png │ ├── FlinkSQL security.drawio │ ├── FlinkSQL simple-execution flowchart.png │ ├── Hive-Ranger data mask.png │ ├── Hive-Ranger row-level filter.png │ ├── Orders table.pptx │ ├── Row-level Filter-Rewrite the main process.png │ └── Row-level filter example data.png └── row-filter │ └── README.md ├── pom.xml ├── src ├── main │ ├── java │ │ ├── com │ │ │ └── hw │ │ │ │ └── security │ │ │ │ └── flink │ │ │ │ ├── PolicyManager.java │ │ │ │ ├── SecurityContext.java │ │ │ │ ├── enums │ │ │ │ └── DataMaskType.java │ │ │ │ ├── exception │ │ │ │ └── CustomException.java │ │ │ │ ├── model │ │ │ │ ├── ColumnEntity.java │ │ │ │ └── TableEntity.java │ │ │ │ ├── policy │ │ │ │ ├── DataMaskPolicy.java │ │ │ │ └── RowFilterPolicy.java │ │ │ │ ├── util │ │ │ │ └── ResourceReader.java │ │ │ │ └── visitor │ │ │ │ ├── DataMaskVisitor.java │ │ │ │ ├── RowFilterVisitor.java │ │ │ │ └── basic │ │ │ │ └── AbstractBasicVisitor.java │ │ └── extensions │ │ │ └── org │ │ │ └── apache │ │ │ └── flink │ │ │ └── table │ │ │ └── planner │ │ │ └── delegation │ │ │ └── ParserImpl │ │ │ └── ParserImplExtension.java │ └── resources │ │ ├── data_mask_types.json │ │ └── log4j.properties └── test │ ├── java │ ├── apache │ │ └── flink │ │ │ └── table │ │ │ └── catalog │ │ │ └── hive │ │ │ └── HiveTestUtils.java │ └── com │ │ └── hw │ │ └── security │ │ └── flink │ │ ├── PolicyManagerTest.java │ │ ├── SuiteTest.java │ │ ├── basic │ │ └── AbstractBasicTest.java │ │ ├── common │ │ └── CommonTest.java │ │ ├── execute │ │ ├── ExecuteDataMaskTest.java │ │ ├── ExecuteRowFilterTest.java │ │ └── MixedExecuteTest.java │ │ └── rewrite │ │ ├── MixedRewriteTest.java │ │ ├── RewriteDataMaskTest.java │ │ └── RewriteRowFilterTest.java │ └── resources │ └── hive-site.xml └── style ├── license-header └── spotless-formatter.xml /.gitignore: -------------------------------------------------------------------------------- 1 | # Mobile Tools for Java (J2ME) 2 | .mtj.tmp/ 3 | 4 | # Package Files # 5 | *.jar 6 | *.war 7 | *.ear 8 | 9 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 10 | hs_err_pid* 11 | 12 | *~ 13 | 14 | # eclipse ignore 15 | .settings 16 | .project 17 | .classpath 18 | .tomcatplugin 19 | logPath_IS_UNDEFINED 20 | *.gz 21 | 22 | # idea ignore 23 | .idea 24 | *.iml 25 | 26 | # maven ignore 27 | target 28 | 29 | # other ignore 30 | *.log 31 | *.tmp 32 | Thumbs.db 33 | *.DS_Store 34 | 35 | 36 | *.tgz 37 | 38 | .DS_Store 39 | node_modules 40 | 41 | # local env files 42 | .env.local 43 | .env.*.local 44 | 45 | # Log files 46 | npm-debug.log* 47 | yarn-debug.log* 48 | yarn-error.log* 49 | 50 | # Editor directories and files 51 | .vscode 52 | *.suo 53 | *.ntvs* 54 | *.njsproj 55 | *.sln 56 | *.sw? 57 | 58 | data/tmp 59 | data/log 60 | 61 | dist 62 | 63 | package-lock.json 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FlinkSQL数据脱敏和行级权限解决方案及源码 2 | 3 | 支持面向用户级别的数据脱敏和行级数据访问控制,即特定用户只能访问到脱敏后的数据或授权过的行。此方案是实时领域Flink的解决方案,类似于离线数仓Hive Ranger中的Row-level Filter和Column Masking方案。 4 | 5 | > If you are interested, you can add me on WeChat: HamaWhite, or send email to baisongxx@gmail.com 6 | 7 |
8 | 9 | | 序号 | 作者 | 版本 | 时间 | 备注 | 10 | | -- | --- | --- | --- | --- | 11 | | 1 | HamaWhite | 1.0.0 | 2022-12-15 | 1. 支持行级权限 | 12 | | 2 | HamaWhite | 1.0.1 | 2023-04-11 | 1. 通过 [manifold-ext](https://github.com/manifold-systems/manifold/tree/master/manifold-deps-parent/manifold-ext) 扩展Flink ParserImpl类的方法
2. 自定义calcite visitor来增加行级权限,不再改SqlSelect源码 | 13 | | 3 | HamaWhite | 2.0.0 | 2023-04-23 | 1. 支持数据脱敏 | 14 | | 4 | HamaWhite | 2.0.1 | 2023-05-07 | 1. 语法校验后再增加权限约束 | 15 |
16 | 17 | > 注: 如果用IntelliJ IDEA打开源码,请提前安装 **Manifold** 插件。 18 | 19 | **如果希望进一步阅读技术细节,请查看系列文章**: 20 | 1. [FlinkSQL的行级权限解决方案及源码](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/row-filter/README.md) 21 | 2. [FlinkSQL的数据脱敏解决方案及源码](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/data-mask/README.md) 22 | 23 | 24 | 25 | ## 一、基础知识 26 | ### 1.1 数据脱敏 27 | 数据脱敏(Data Masking)是一种数据安全技术,用于保护敏感数据,以防止未经授权的访问。该技术通过将敏感数据替换为虚假数据或不可识别的数据来实现。 28 | 例如可以使用数据脱敏技术将信用卡号码、社会安全号码等敏感信息替换为随机生成的数字或字母,以保护这些信息的隐私和安全。 29 | 30 | ### 1.2 行级权限 31 | 行级权限(Row-Level Security)是一种数据权限控制机制,它允许系统管理员或数据所有者对数据库中的数据行进行细粒度的访问控制。 32 | 行级权限可以限制用户对数据库中某些行的读取或修改,以确保敏感数据只能被授权人员访问。行级权限可以基于多种条件来定义,如用户角色、组织结构、地理位置等。通过行级权限控制,可以有效地防止未经授权的数据访问和泄露,提高数据的安全性和保密性。 33 | 在大型企业和组织中,行级权限通常被广泛应用于数据库、电子表格和其他数据存储系统中,以满足安全和合规性的要求。 34 | 35 | ### 1.3 简单案例 36 | 例如针对订单表,在数据脱敏方面,**用户A**查看到的顾客姓名(`customer_name`字段)全部被掩盖掉,**用户B**查看到顾客姓名只会显示前4位,剩下的用`x`代替。 37 | 在行级权限方面,**用户A**只能查看到**北京**区域的数据,**用户B**只能查看到**杭州**区域的数据。 38 | ![Data mask and Row-level filter example data.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask%20and%20Row-level%20filter%20example%20data.png) 39 | 40 | ### 1.4 组件版本 41 | | 组件名称 | 版本 | 备注 | 42 | | --- | --- | --- | 43 | | Flink | 1.16.1 | | 44 | | Flink-connector-mysql-cdc | 2.3.0 | | 45 | 46 | 47 | ## 二、 FlinkSQL执行流程介绍 48 | 可以参考作者文章[[FlinkSQL字段血缘解决方案及源码]](https://github.com/HamaWhiteGG/flink-sql-lineage/blob/main/README_CN.md),本文根据Flink1.16修正和简化后的执行流程如下图所示。 49 | ![FlinkSQL simple-execution flowchart.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20simple-execution%20flowchart.png) 50 | 51 | 在`CalciteParser`进行`parse()`和`validate()`处理后会得到一个SqlNode类型的抽象语法树(`Abstract Syntax Tree`,简称AST),本文会针对此抽象语法树来组装行级过滤条件后生成新的AST,以实现行级权限控制。 52 | 53 | ## 三、解决方案 54 | ### 3.1 数据脱敏 55 | 针对输入的Flink SQL,在`CalciteParser`进行语法解析(parse)和语法校验(validate)后生成抽象语法树(`Abstract Syntax Tree`,简称AST)后,采用自定义 56 | `Calcite SqlBasicVisitor`的方法遍历AST中的所有`SqlSelect`,获取到里面的每个输入表。如果输入表中字段有配置脱敏条件,则针对输入表生成子查询语句, 57 | 并把脱敏字段改写成`CAST(脱敏函数(字段名) AS 字段类型) AS 字段名`,再通过`CalciteParser.parseExpression()`把子查询转换成SqlSelect, 58 | 并用此SqlSelect替换原AST中的输入表来生成新的AST,最后得到新的SQL来继续执行。 59 | ![FlinkSQL data mask solution.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20data%20mask%20solution.png) 60 | 61 | ### 3.2 行级权限 62 | 如果输入SQL包含对表的查询操作,则一定会构建Calcite SqlSelect对象。因此限制表的行级权限,只要对Calcite SqlSelect对象的Where条件进行修改即可,而不需要解析用户执行的各种SQL来查找配置过行级权限条件约束的表。在`CalciteParser`进行语法解析(parse)和语法校验(validate)后生成抽象语法树AST,其会构造出SqlSelect对象,采用自定义`Calcite SqlBasicVisitor`来重新生成新的SqlSelect Where条件。 63 | 64 | 首先通过执行用户和表名来查找配置的行级权限条件,系统会把此条件用CalciteParser提供的`parseExpression(String sqlExpression)`方法解析生成一个SqlBasicCall再返回。然后结合用户执行的SQL和配置的行级权限条件重新组装Where条件,即生成新的带行级过滤条件Abstract Syntax Tree,最后基于新AST(即新SQL)再执行。 65 | ![FlinkSQL row-level filter solution.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20row-level%20filter%20solution.png) 66 | 67 | ### 3.3 整体执行流程 68 | 针对输入的Flink SQL,由`CalciteParser`进行语法解析和语法校验后生成抽象语法树AST。由于行级权限会修改SELECT语句中的Where子句,为避免修改数据脱敏生成子SELECT语句中的WHERE,因此先根据行级权限方案替换AST中的Where子句,然后再根据数据脱敏方案把AST中的输入表改为子查询,最后得到新的SQL来继续执行。 69 | ![Data mask and Row-level filter overall execution flowchart.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask%20and%20Row-level%20filter%20overall%20execution%20flowchart.png) 70 | 71 | 72 | ## 四、案例讲解 73 | 项目源码中有比较多的单元测试用例,可用于学习和测试,下面只描述部分测试点。 74 | 75 | 测试用例中的catalog名称是`hive`,database名称是`default`。 76 | 77 | ```shell 78 | $ cd flink-sql-security 79 | $ mvn test 80 | ``` 81 | 用户A和用户B的权限策略配置如1.3小节所述,即: 82 | - **用户A**只能查看到**北京**区域的数据,且顾客姓名(`customer_name`字段)全部被掩盖掉; 83 | - **用户B**只能查看到**杭州**区域的数据,且顾客姓名只会显示前4位,剩下的用`x`代替。 84 | 85 | ### 4.1 输入SQL 86 | ```sql 87 | SELECT 88 | order_id, 89 | customer_name, 90 | product_id, 91 | region 92 | FROM 93 | orders 94 | ``` 95 | 96 | ### 4.2 用户A的最终执行SQL 97 | ```sql 98 | SELECT 99 | orders.order_id, 100 | orders.customer_name, 101 | orders.product_id, 102 | orders.region 103 | FROM ( 104 | SELECT 105 | order_id, 106 | order_date, 107 | CAST(mask(customer_name) AS STRING) AS customer_name, 108 | product_id, 109 | price, 110 | order_status, 111 | region 112 | FROM 113 | hive.default.orders 114 | ) AS orders 115 | WHERE 116 | orders.region = 'beijing' 117 | ``` 118 | 119 | ### 4.3 用户B的最终执行SQL 120 | ```sql 121 | SELECT 122 | orders.order_id, 123 | orders.customer_name, 124 | orders.product_id, 125 | orders.region 126 | FROM ( 127 | SELECT 128 | order_id, 129 | order_date, 130 | CAST(mask_show_first_n(customer_name, 4, 'x', 'x', 'x', -1, '1') AS STRING) AS customer_name, 131 | product_id, 132 | price, 133 | order_status, 134 | region 135 | FROM 136 | hive.default.orders 137 | ) AS orders 138 | WHERE 139 | orders.region = 'hangzhou' 140 | ``` 141 | 142 | ## 五、下一步计划 143 | 1. FlinkSQL Access策略,即库、表、字段的权限控制。 144 | 2. ranger-flink-plugin。 145 | 146 | 147 | ## 六、赞赏 148 | 如果项目对你有帮助,可以选择请我喝杯咖啡(下图是微信赞赏码)。 149 | Appreciation code 150 | 151 | -------------------------------------------------------------------------------- /data/database/mysql_ddl.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS demo; 2 | CREATE DATABASE demo; 3 | 4 | USE demo; 5 | 6 | DROP TABLE IF EXISTS products; 7 | CREATE TABLE products ( 8 | id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 9 | name VARCHAR(255) NOT NULL, 10 | description VARCHAR(512) 11 | ) AUTO_INCREMENT = 101; 12 | 13 | DROP TABLE IF EXISTS orders; 14 | CREATE TABLE orders ( 15 | order_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 16 | order_date DATETIME NOT NULL, 17 | customer_name VARCHAR(255) NOT NULL, 18 | price DECIMAL(10, 5) NOT NULL, 19 | product_id INTEGER NOT NULL, 20 | order_status BOOLEAN NOT NULL, 21 | region VARCHAR(255) NOT NULL 22 | ) AUTO_INCREMENT = 10001; 23 | 24 | DROP TABLE IF EXISTS shipments; 25 | CREATE TABLE shipments ( 26 | shipment_id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, 27 | order_id INTEGER NOT NULL, 28 | origin VARCHAR(255) NOT NULL, 29 | destination VARCHAR(255) NOT NULL, 30 | is_arrived BOOLEAN NOT NULL 31 | ) AUTO_INCREMENT = 1001; -------------------------------------------------------------------------------- /data/database/mysql_init.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO products 2 | VALUES (default,"scooter","Small 2-wheel scooter"), 3 | (default,"car battery","12V car battery"), 4 | (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3"), 5 | (default,"hammer","12oz carpenter's hammer"), 6 | (default,"hammer","14oz carpenter's hammer"), 7 | (default,"hammer","16oz carpenter's hammer"), 8 | (default,"rocks","box of assorted rocks"), 9 | (default,"jacket","water resistent black wind breaker"), 10 | (default,"spare tire","24 inch spare tire"); 11 | 12 | 13 | INSERT INTO orders 14 | VALUES (default, '2020-07-30 10:08:22', 'Jack', 50.50, 102, false, 'beijing'), 15 | (default, '2020-07-30 10:11:09', 'Sally', 15.00, 105, false, 'beijing'), 16 | (default, '2020-07-30 12:00:30', 'Edward', 25.25, 106, false, 'hangzhou'), 17 | (default, '2022-12-15 12:11:09', 'John', 78.00, 103, false, 'hangzhou'), 18 | (default, '2022-12-16 12:00:30', 'Edward', 64.00, 104, false, 'shanghai'), 19 | (default, '2022-12-17 23:00:30', 'Jack', 20.00, 103, false, 'shanghai'); 20 | 21 | 22 | INSERT INTO shipments 23 | VALUES (default,10001,'Beijing','Shanghai',false), 24 | (default,10002,'Hangzhou','Shanghai',false), 25 | (default,10003,'Shanghai','Hangzhou',false); 26 | 27 | 28 | -- this data can be inserted when running the job 29 | -- INSERT INTO orders VALUES (default, '2022-12-14 18:08:22', 'Jack', 35, 102, false, 'beijing') -------------------------------------------------------------------------------- /docs/data-mask/README.md: -------------------------------------------------------------------------------- 1 | # FlinkSQL的数据脱敏解决方案及源码 2 | 3 | 支持面向用户级别的数据脱敏访问控制,即特定用户只能访问到脱敏后的数据。此方案是实时领域Flink的解决方案,类似于离线数仓Hive中Ranger Column Masking方案。 4 | 5 |
6 | 源码地址: https://github.com/HamaWhiteGG/flink-sql-security 7 | 8 | > 注: 如果用IntelliJ IDEA打开源码,请提前安装 **Manifold** 插件。 9 | 10 | 11 | ## 一、基础知识 12 | ### 1.1 数据脱敏 13 | 数据脱敏(Data Masking)是一种数据安全技术,用于保护敏感数据,以防止未经授权的访问。该技术通过将敏感数据替换为虚假数据或不可识别的数据来实现。 14 | 例如可以使用数据脱敏技术将信用卡号码、社会安全号码等敏感信息替换为随机生成的数字或字母,以保护这些信息的隐私和安全。 15 | 16 | ### 1.2 业务流程 17 | 下面用订单表`orders`的两行数据来举例,示例数据如下: 18 | ![Data mask example data.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask%20example%20data.png) 19 | 20 | #### 1.2.1 设置脱敏策略 21 | 管理员配置用户、表、字段、脱敏条件,例如下面的配置。 22 | ![Data mask example conditions.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask%20example%20conditions.png) 23 | 24 | 25 | #### 1.2.2 用户访问数据 26 | 当用户在Flink上查询`orders`表的数据时,会在底层结合该用户的脱敏条件重新生成SQL,即让数据脱敏生效。 27 | 当用户A和用户B在执行下面相同的SQL时,会看到不同的结果数据。 28 | ```sql 29 | SELECT * FROM orders 30 | ``` 31 | 32 | **用户A查看到的结果数据如下**,`customer_name`字段的数据被全部掩盖掉。 33 | ![Data mask-masked with customer_name after mask.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask-masked%20with%20customer_name%20after%20mask.png) 34 | 35 |
36 | 37 | **用户B查看到的结果数据如下**,`customer_name`字段的数据只会显示前4位,剩下的用x代替。 38 | ![Data mask-masked with customer_name after mask_show_first_4.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask-masked%20with%20customer_name%20after%20mask_show_first_4.png) 39 | 40 | 41 | ## 二、Hive数据脱敏解决方案 42 | 在离线数仓工具Hive领域,由于发展多年已有Ranger来支持字段数据的脱敏控制,详见参考文献[[1]](https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.0/authorization-ranger/content/dynamic_resource_based_column_masking_in_hive_with_ranger_policies.html)。 43 | 下图是在Ranger里配置Hive表数据脱敏条件的页面,供参考。 44 | ![Hive-Ranger data mask.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Hive-Ranger%20data%20mask.png) 45 | 46 | 但由于Flink实时数仓领域发展相对较短,Ranger还不支持FlinkSQL,以及依赖Ranger的话会导致系统部署和运维过重,因此开始**自研实时数仓的数据脱敏解决工具**。 47 | 当然本文中的核心思想也适用于Ranger中,可以基于此较快开发出ranger-flink插件。 48 | 49 | ## 三、FlinkSQL数据脱敏解决方案 50 | ### 3.1 解决方案 51 | #### 3.1.1 FlinkSQL执行流程 52 | 可以参考作者文章[[FlinkSQL字段血缘解决方案及源码]](https://github.com/HamaWhiteGG/flink-sql-lineage/blob/main/README_CN.md),本文根据Flink1.16修正和简化后的执行流程如下图所示。 53 | ![FlinkSQL simple-execution flowchart.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20simple-execution%20flowchart.png) 54 | 55 | 在`CalciteParser`进行`parse()`和`validate()`处理后会得到一个SqlNode类型的抽象语法树(`Abstract Syntax Tree`,简称AST),本文会针对此抽象语法树来组装行级过滤条件后生成新的AST,以实现数据脱敏控制。 56 | 57 | #### 3.1.2 Calcite对象继承关系 58 | 下面章节要用到Calcite中的SqlNode、SqlCall、SqlIdentifier、SqlJoin、SqlBasicCall和SqlSelect等类,此处进行简单介绍以及展示它们间继承关系,以便读者阅读本文源码。 59 | 60 | | 序号 | 类 | 介绍 | 61 | | --- | --- | --- | 62 | | 1 | SqlNode | A SqlNode is a SQL parse tree. | 63 | | 2 | SqlCall | A SqlCall is a call to an SqlOperator operator. | 64 | | 3 | SqlIdentifier | A SqlIdentifier is an identifier, possibly compound. | 65 | | 4 | SqlJoin | Parse tree node representing a JOIN clause. | 66 | | 5 | SqlBasicCall | Implementation of SqlCall that keeps its operands in an array. | 67 | | 6 | SqlSelect | A SqlSelect is a node of a parse tree which represents a select statement, the parent class is SqlCall | 68 | 69 | Calcite SqlNode diagrams.png 70 | 71 | #### 3.1.3 解决思路 72 | 73 | 针对输入的Flink SQL,在`CalciteParser`进行语法解析(parse)和语法校验(validate)后生成抽象语法树(`Abstract Syntax Tree`,简称AST)后,采用自定义 74 | `Calcite SqlBasicVisitor`的方法遍历AST中的所有`SqlSelect`,获取到里面的每个输入表。如果输入表中字段有配置脱敏条件,则针对输入表生成子查询语句, 75 | 并把脱敏字段改写成`CAST(脱敏函数(字段名) AS 字段类型) AS 字段名`,再通过`CalciteParser.parseExpression()`把子查询转换成SqlSelect, 76 | 并用此SqlSelect替换原AST中的输入表来生成新的AST,最后得到新的SQL来继续执行。 77 | ![FlinkSQL data mask solution.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20data%20mask%20solution.png) 78 | 79 | ### 3.2 详细方案 80 | #### 3.2.1 解析输入表 81 | 通过对Flink SQL 语法的分析和研究,最终出现输入表的只包含以下两种情况: 82 | 1. SELECT 语句的FROM子句,如果是子查询,则递归继续遍历。 83 | 2. SELECT ... JOIN 语句的Left和Right子句,如果是多表JOIN,则递归查询遍历。 84 | 85 | 因此,下面的主要步骤会根据FROM子句的类型来寻找输入表。 86 | 87 | #### 3.2.2 主要步骤 88 | 主要通过Calcite提供的访问者模式自定义DataMaskVisitor来实现,遍历AST中所有的SqlSelect对象用子查询替换里面的输入表。 89 | 下面详细描述替换输入表的步骤,整体流程如下图所示。 90 | 91 | ![Data mask-rewrite the main process.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Data%20mask-Rewrite%20the%20main%20process.png) 92 | 93 | 1. 遍历AST中的SELECT语句。 94 | 2. 判断是否自定义的SELECT语句(由下面步骤9生成),是则跳转到步骤10,否则继续步骤3。 95 | 3. 判断SELECT语句中的FROM类型,按照不同类型对应执行下面的步骤4、5和10。 96 | 4. 如果FROM是SqlJoin类型,则分别遍历其左Left和Right右节点,即执行当前步骤4和步骤6。由于可能是三张表及以上的Join,因此进行递归处理,即针对其左节点跳回到步骤3。 97 | 5. 如果FROM是SqlBasicCall类型,还需要判断是否来自子查询,是则跳转到步骤10继续遍历AST,后续步骤1会对子查询中的SELECT语句进行处理。否则跳转到步骤7。 98 | 6. 递归处理Join的右节点,即跳回到步骤3。 99 | 7. 遍历表中的每个字段,如果某个字段有定义脱敏条件,则把改字段改写成格式`CAST(脱敏函数(字段名) AS 字段类型) AS 字段名`,否则用原字段名。 100 | 8. 针对步骤7处理后的字段,构建子查询语句,形如 `(SELECT 字段名1, 字段名2, CAST(脱敏函数(字段名3) AS 字段类型) AS 字段名3、字段名4 FROM 表名) AS 表别名`。 101 | 9. 对步骤8的子查询调用`CalciteParser.parseExpression()`进行解析,生成自定义的SELECT语句,并替换掉原FROM。 102 | 10. 继续遍历AST,找到里面的SELECT语句进行处理,跳回到步骤1。 103 | 104 | #### 3.2.3 Hive及Ranger兼容性 105 | 在Ranger中,默认的脱敏策略的如下所示。通过调研发现Ranger的大部分脱敏策略是通过调用Hive自带或自定义的系统函数实现的。 106 | 107 | | 序号 | 策略名 | 策略说明 | Hive系统函数 | 108 | | --- | --- | --- | --- | 109 | | 1 | Redact | 用x屏蔽字母字符,用n屏蔽数字字符 | mask | 110 | | 2 | Partial mask: show last 4 | 仅显示最后四个字符,其他用x代替 | mask_show_last_n | 111 | | 3 | Partial mask: show first 4 | 仅显示前四个字符,其他用x代替 | mask_show_first_n | 112 | | 4 | Hash | 用值的哈希值替换原值 | mask_hash | 113 | | 5 | Nullify | 用NULL值替换原值 | Ranger自身实现 | 114 | | 6 | Unmasked | 原样显示 | Ranger自身实现 | 115 | | 7 | Date: show only year | 仅显示日期字符串的年份 | mask | 116 | | 8 | Custom | Hive UDF来自定义策略 | | 117 | 118 | 由于Flink支持Hive Catalog,在Flink能调用Hive系统函数。 因此,本方案也支持在Flink SQL配置Ranger的脱敏策略。 119 | 120 | ## 四、用例测试 121 | 用例测试数据来自于CDC Connectors for Apache Flink 122 | [[4]](https://ververica.github.io/flink-cdc-connectors/master/content/%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B/mysql-postgres-tutorial-zh.html)官网, 123 | 本文给`orders`表增加一个region字段,再增加`'connector'='print'`类型的print_sink表,其字段和`orders`表的一样。数据库建表及初始化SQL位于data/database目录下。 124 | 125 | 测试用例中的catalog名称是`hive`,database名称是`default`。 126 | 127 | 下载本文源码后,可通过Maven运行单元测试。 128 | ```shell 129 | $ cd flink-sql-security 130 | $ mvn test 131 | ``` 132 | 133 | 详细测试用例可查看源码中的单测`RewriteDataMaskTest`和`ExecuteDataMaskTest`,下面只描述两个案例。 134 | 135 | ### 4.1 测试SELECT 136 | #### 4.1.1 输入SQL 137 | 用户A执行下述SQL: 138 | ```sql 139 | SELECT 140 | order_id, 141 | customer_name, 142 | product_id, 143 | region 144 | FROM 145 | orders 146 | ``` 147 | #### 4.1.2 根据脱敏条件重新生成SQL 148 | 1. 输入SQL是一个简单SELECT语句,经过语法分析和语法校验后FROM类型是`SqlBasicCall`,SQL中的表名`orders`会被替换为完整的`hive.default.orders`,别名是`orders`。 149 | 2. 由于用户A针对字段`customer_name`定义脱敏条件MASK(对应函数是脱敏函数是`mask`),该字段在流程图中的步骤8中被改写为`CAST(mask(customer_name) AS STRING) AS customer_name`,其余字段未定义脱敏条件则保持不变。 150 | 3. 然后在步骤8的操作中,表名`hive.default.orders`被改写成如下子查询,子查询两侧用括号`()`进行包裹,并且用 `AS 别名`来增加表别名。 151 | 152 | ```sql 153 | (SELECT 154 | order_id, 155 | order_date, 156 | CAST(mask(customer_name) AS STRING) AS customer_name, 157 | product_id, 158 | price, 159 | order_status, 160 | region 161 | FROM 162 | hive.default.orders 163 | ) AS orders 164 | ``` 165 | #### 4.1.3 输出SQL和运行结果 166 | 最终执行的改写后SQL如下所示,这样用户A查询到的顾客姓名`customer_name`字段都是掩盖后的数据。 167 | ```sql 168 | SELECT 169 | orders.order_id, 170 | orders.customer_name, 171 | orders.product_id, 172 | orders.region 173 | FROM ( 174 | SELECT 175 | order_id, 176 | order_date, 177 | CAST(mask(customer_name) AS STRING) AS customer_name, 178 | product_id, 179 | price, 180 | order_status, 181 | region 182 | FROM 183 | hive.default.orders 184 | ) AS orders 185 | ``` 186 | 187 | ### 4.2 测试INSERT-SELECT 188 | #### 4.2.1 输入SQL 189 | 用户A执行下述SQL: 190 | ```sql 191 | INSERT INTO print_sink SELECT * FROM orders 192 | ``` 193 | #### 4.2.2 根据脱敏条件重新生成SQL 194 | 通过自定义Calcite DataMaskVisitor访问生成的AST,能找到对应的SELECT语句如下,注意在语法校验阶段 `*` 会被改写成表中所有字段。 195 | ```sql 196 | SELECT 197 | orders.order_id, 198 | orders.order_date, 199 | orders.customer_name, 200 | orders.product_id, 201 | orders.price, 202 | orders.order_status, 203 | orders.region 204 | FROM 205 | hive.default.orders AS orders 206 | ``` 207 | 208 | 针对此SELECT语句的改写逻辑同上,不再阐述。 209 | 210 | #### 4.2.3 输出SQL和运行结果 211 | 最终执行的改写后SQL如下所示,注意插入到`print_sink`表的`customer_name`字段是掩盖后的数据。 212 | ```sql 213 | INSERT INTO print_sink ( 214 | SELECT 215 | orders.order_id, 216 | orders.order_date, 217 | orders.customer_name, 218 | orders.product_id, 219 | orders.price, 220 | orders.order_status, 221 | orders.region 222 | FROM ( 223 | SELECT 224 | order_id, 225 | order_date, 226 | CAST(mask(customer_name) AS STRING) AS customer_name, 227 | product_id, 228 | price, 229 | order_status, 230 | region 231 | FROM 232 | hive.default.orders 233 | ) AS orders 234 | ) 235 | ``` 236 | 237 | ## 五、参考文献 238 | 1. [Apache Ranger Column Masking in Hive](https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.0/authorization-ranger/content/dynamic_resource_based_column_masking_in_hive_with_ranger_policies.html) 239 | 2. [FlinkSQL字段血缘解决方案及源码](https://github.com/HamaWhiteGG/flink-sql-lineage/blob/main/README_CN.md) 240 | 3. [从SQL语句中解析出源表和结果表](https://blog.jrwang.me/2018/parse-table-in-sql) 241 | 4. [基于Flink CDC构建MySQL和Postgres的Streaming ETL](https://ververica.github.io/flink-cdc-connectors/master/content/%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B/mysql-postgres-tutorial-zh.html) 242 | 5. [HiveQL—数据脱敏函数](https://blog.csdn.net/CPP_MAYIBO/article/details/104065839) 243 | -------------------------------------------------------------------------------- /docs/images/Appreciation code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Appreciation code.png -------------------------------------------------------------------------------- /docs/images/Calcite SqlNode diagrams.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Calcite SqlNode diagrams.png -------------------------------------------------------------------------------- /docs/images/Data mask and Row-level filter example data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask and Row-level filter example data.png -------------------------------------------------------------------------------- /docs/images/Data mask and Row-level filter overall execution flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask and Row-level filter overall execution flowchart.png -------------------------------------------------------------------------------- /docs/images/Data mask example conditions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask example conditions.png -------------------------------------------------------------------------------- /docs/images/Data mask example data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask example data.png -------------------------------------------------------------------------------- /docs/images/Data mask-Rewrite the main process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask-Rewrite the main process.png -------------------------------------------------------------------------------- /docs/images/Data mask-masked with customer_name after mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask-masked with customer_name after mask.png -------------------------------------------------------------------------------- /docs/images/Data mask-masked with customer_name after mask_show_first_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Data mask-masked with customer_name after mask_show_first_4.png -------------------------------------------------------------------------------- /docs/images/FlinkSQL data mask solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/FlinkSQL data mask solution.png -------------------------------------------------------------------------------- /docs/images/FlinkSQL row-level filter solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/FlinkSQL row-level filter solution.png -------------------------------------------------------------------------------- /docs/images/FlinkSQL simple-execution flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/FlinkSQL simple-execution flowchart.png -------------------------------------------------------------------------------- /docs/images/Hive-Ranger data mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Hive-Ranger data mask.png -------------------------------------------------------------------------------- /docs/images/Hive-Ranger row-level filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Hive-Ranger row-level filter.png -------------------------------------------------------------------------------- /docs/images/Orders table.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Orders table.pptx -------------------------------------------------------------------------------- /docs/images/Row-level Filter-Rewrite the main process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Row-level Filter-Rewrite the main process.png -------------------------------------------------------------------------------- /docs/images/Row-level filter example data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HamaWhiteGG/flink-sql-security/b03156485e09424f1e186b6de16cd9ba87539876/docs/images/Row-level filter example data.png -------------------------------------------------------------------------------- /docs/row-filter/README.md: -------------------------------------------------------------------------------- 1 | # FlinkSQL的行级权限解决方案及源码 2 | 3 | 支持面向用户级别的行级数据访问控制,即特定用户只能访问授权过的行,隐藏未授权的行数据。此方案是实时领域Flink的解决方案,类似于离线数仓Hive中Ranger Row-level Filter方案。 4 | 5 |
6 | 源码地址: https://github.com/HamaWhiteGG/flink-sql-security 7 | 8 | > 注: 如果用IntelliJ IDEA打开源码,请提前安装 **Manifold** 插件。 9 | 10 | 11 | ## 一、基础知识 12 | ### 1.1 行级权限 13 | 行级权限(Row-Level Security)是一种数据权限控制机制,它允许系统管理员或数据所有者对数据库中的数据行进行细粒度的访问控制。 14 | 行级权限可以限制用户对数据库中某些行的读取或修改,以确保敏感数据只能被授权人员访问。行级权限可以基于多种条件来定义,如用户角色、组织结构、地理位置等。通过行级权限控制,可以有效地防止未经授权的数据访问和泄露,提高数据的安全性和保密性。 15 | 在大型企业和组织中,行级权限通常被广泛应用于数据库、电子表格和其他数据存储系统中,以满足安全和合规性的要求。 16 | 17 | 例如针对订单表,**用户A**只能查看到**北京**区域的数据,**用户B**只能查看到**杭州**区域的数据。 18 | ![Row-level filter example data.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Row-level%20filter%20example%20data.png) 19 | 20 | ### 1.2 业务流程 21 | #### 1.2.1 设置行级权限 22 | 管理员配置用户、表、行级权限条件,例如下面的配置。 23 | | 序号 | 用户名 | 表名 | 行级权限条件 | 24 | | --- | --- | --- | --- | 25 | | 1 | 用户A | orders | region = 'beijing' | 26 | | 2 | 用户B | orders | region = 'hangzhou' | 27 | 28 | 29 | #### 1.2.2 用户查询数据 30 | 用户在系统上查询`orders`表的数据时,系统在底层查询时会根据该用户的行级权限条件来自动过滤数据,即让行级权限生效。 31 | 32 | 当用户A和用户B在执行下面相同的SQL时,会查看到不同的结果数据。 33 | 34 | ```sql 35 | SELECT * FROM orders 36 | ``` 37 | 38 | **用户A查看到的结果数据是**: 39 | | order_id | order_date | customer_name | price | product_id | order_status | region | 40 | | --- | --- | --- | --- | --- | --- | --- | 41 | | 10001 | 2020-07-30 10:08:22 | Jack | 50.50 | 102 | false | beijing | 42 | | 10002 | 2020-07-30 10:11:09 | Sally | 15.00 | 105 | false | beijing | 43 | 44 |
45 | 46 | **用户B查看到的结果数据是**: 47 | | order_id | order_date | customer_name | price | product_id | order_status | region | 48 | | --- | --- | --- | --- | --- | --- | --- | 49 | | 10003 | 2020-07-30 12:00:30 | Edward | 25.25 | 106 | false | hangzhou | 50 | | 10004 | 2022-12-15 12:11:09 | John | 78.00 | 103 | false | hangzhou | 51 | 52 | ## 二、Hive行级权限解决方案 53 | 在离线数仓工具Hive领域,由于发展多年已有Ranger来支持表数据的行级权限控制,详见参考文献[[2]](https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.0/authorization-ranger/content/row_level_filtering_in_hive_with_ranger_policies.html)。下图是在Ranger里配置Hive表行级过滤条件的页面,供参考。 54 | 55 | ![Hive-Ranger row-level filter.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Hive-Ranger%20row-level%20filter.png) 56 | 57 |
58 | 59 | 但由于Flink实时数仓领域发展相对较短,Ranger还不支持FlinkSQL,以及要依赖Ranger会导致系统部署和运维过重,因此开始**自研实时数仓的行级权限解决工具**。 60 | 61 | ## 三、FlinkSQL行级权限解决方案 62 | ### 3.1 解决方案 63 | #### 3.1.1 FlinkSQL执行流程 64 | 可以参考作者文章[[FlinkSQL字段血缘解决方案及源码]](https://github.com/HamaWhiteGG/flink-sql-lineage/blob/main/README_CN.md),本文根据Flink1.16修正和简化后的执行流程如下图所示。 65 | ![FlinkSQL simple-execution flowchart.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20simple-execution%20flowchart.png) 66 | 67 | 在`CalciteParser`进行`parse()`和`validate()`处理后会得到一个SqlNode类型的抽象语法树(`Abstract Syntax Tree`,简称AST),本文会针对此抽象语法树来组装行级过滤条件后生成新的AST,以实现行级权限控制。 68 | 69 | #### 3.1.2 Calcite对象继承关系 70 | 下面章节要用到Calcite中的SqlNode、SqlCall、SqlIdentifier、SqlJoin、SqlBasicCall和SqlSelect等类,此处进行简单介绍以及展示它们间继承关系,以便读者阅读本文源码。 71 | 72 | | 序号 | 类 | 介绍 | 73 | | --- | --- | --- | 74 | | 1 | SqlNode | A SqlNode is a SQL parse tree. | 75 | | 2 | SqlCall | A SqlCall is a call to an SqlOperator operator. | 76 | | 3 | SqlIdentifier | A SqlIdentifier is an identifier, possibly compound. | 77 | | 4 | SqlJoin | Parse tree node representing a JOIN clause. | 78 | | 5 | SqlBasicCall | Implementation of SqlCall that keeps its operands in an array. | 79 | | 6 | SqlSelect | A SqlSelect is a node of a parse tree which represents a select statement, the parent class is SqlCall | 80 | 81 | Calcite SqlNode diagrams.png 82 | 83 | #### 3.1.3 解决思路 84 | 85 | 如果输入SQL包含对表的查询操作,则一定会构建Calcite SqlSelect对象。因此限制表的行级权限,只要对Calcite SqlSelect对象的Where条件进行修改即可,而不需要解析用户执行的各种SQL来查找配置过行级权限条件约束的表。在`CalciteParser`进行语法解析(parse)和语法校验(validate)后生成抽象语法树AST,其会构造出SqlSelect对象,采用自定义`Calcite SqlBasicVisitor`来重新生成新的SqlSelect Where条件。 86 | 87 | 首先通过执行用户和表名来查找配置的行级权限条件,系统会把此条件用CalciteParser提供的`parseExpression(String sqlExpression)`方法解析生成一个SqlBasicCall再返回。然后结合用户执行的SQL和配置的行级权限条件重新组装Where条件,即生成新的带行级过滤条件Abstract Syntax Tree,最后基于新AST(即新SQL)再执行。 88 | ![FlinkSQL row-level filter solution.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/FlinkSQL%20row-level%20filter%20solution.png) 89 | 90 | ### 3.2 详细方案 91 | 主要通过Calcite提供的访问者模式自定义RowFilterVisitor来实现,遍历AST中所有的SqlSelect对象重新生成Where子句。 92 | 下面详细描述替换Where子句的步骤,整体流程如下图所示。 93 | 94 | ![Row-level filter-rewrite the main process.png](https://github.com/HamaWhiteGG/flink-sql-security/blob/dev/docs/images/Row-level%20Filter-Rewrite%20the%20main%20process.png) 95 | 96 | 1. 遍历AST中的SELECT语句。 97 | 2. 判断SELECT语句中的FROM类型,按照不同类型对应执行下面的步骤3、4和10。 98 | 3. 如果FROM是SqlJoin类型,则分别遍历其左Left和Right右节点,即执行当前步骤3和步骤5。由于可能是三张表及以上的Join,因此进行递归处理,即针对其左节点跳回到步骤2。 99 | 4. 如果FROM是SqlBasicCall类型,还需要判断是否来自子查询,是则跳转到步骤10继续遍历AST,后续步骤1会对子查询中的SELECT语句进行处理。否则跳转到步骤6。 100 | 5. 递归处理Join的右节点,即跳回到步骤2。 101 | 6. 根据当前执行SQL的用户名和表名来查找已配置的行级约束条件,并调用Calcite进行解析表达式操作,生成permissions(类型是上文提到的SqlBasicCall)。 102 | 7. 给行级权限解析后的permissions增加别名,例如行级约束条件是region = '北京',来自于orders表,别名是o。则此步骤处理后的结果是o.region = '北京'。 103 | 8. 组装旧where和行级权限permissions来生成新的where,即把两个约束用and联合起来,然后执行步骤9。 104 | 9. 用新where替换掉旧where。 105 | 10. 继续遍历AST,找到里面的SELECT语句进行处理,跳回到步骤1。 106 | 107 | ## 四、用例测试 108 | 用例测试数据来自于CDC Connectors for Apache Flink 109 | [[6]](https://ververica.github.io/flink-cdc-connectors/master/content/%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B/mysql-postgres-tutorial-zh.html)官网, 110 | 本文给`orders`表增加一个region字段,再增加`'connector'='print'`类型的print_sink表,其字段和`orders`表的一样。数据库建表及初始化SQL位于data/database目录下。 111 | 112 | 测试用例中的catalog名称是`hive`,database名称是`default`。 113 | 114 | 下载本文源码后,可通过Maven运行单元测试。 115 | 116 | ```shell 117 | $ cd flink-sql-security 118 | $ mvn test 119 | ``` 120 | 详细测试用例可查看源码中的单测`RewriteRowFilterTest`和`ExecuteRowFilterTest`,下面只描述三个案例。 121 | 122 | ### 4.1 测试SELECT 123 | 124 | #### 4.1.1 输入SQL 125 | ```sql 126 | SELECT 127 | order_id, 128 | customer_name, 129 | product_id, 130 | region 131 | FROM 132 | orders 133 | ``` 134 | #### 4.1.2 输出SQL 135 | ```sql 136 | SELECT 137 | orders.order_id, 138 | orders.customer_name, 139 | orders.product_id, 140 | orders.region 141 | FROM 142 | hive.default.orders AS orders 143 | WHERE 144 | orders.region = 'beijing' 145 | ``` 146 | #### 4.1.3 测试小结 147 | 输入SQL是一个简单SELECT语句,经过语法分析和语法校验后FROM类型是SqlBasicCall,SQL中的表名orders会被替换为完整的`hive.default.orders`,别名是`orders`。由于输入SQL中没有WHERE条件,只需要把行级过滤条件`region = 'beijing'`追加到WHERE后,同时带上表别名`orders`。 148 | 149 | ### 4.2 两表JOIN 150 | 151 | #### 4.2.1 输入SQL 152 | ```sql 153 | SELECT 154 | o.order_id, 155 | o.customer_name, 156 | o.product_id, 157 | o.region, 158 | p.name, 159 | p.description 160 | FROM 161 | orders AS o 162 | LEFT JOIN 163 | products AS p 164 | ON 165 | o.product_id = p.id 166 | WHERE 167 | o.price > 45.0 OR o.customer_name = 'John' 168 | ``` 169 | #### 4.2.2 输出SQL 170 | ```sql 171 | SELECT 172 | o.order_id, 173 | o.customer_name, 174 | o.product_id, 175 | o.region, 176 | p.name, 177 | p.description 178 | FROM 179 | hive.default.orders AS o 180 | LEFT JOIN 181 | hive.default.products AS p 182 | ON 183 | o.product_id = p.id 184 | WHERE 185 | (o.price > 45.0 OR o.customer_name = 'John') 186 | AND o.region = 'beijing' 187 | ``` 188 | #### 4.2.3 测试小结 189 | 两张表进行JOIN时,左表order配置有行级约束条件`region = 'beijing'`,而且WHERE子句后已有约束条件`o.price > 45.0 OR o.customer_name = 'John'`. 190 | 191 | 因此先把`region = 'beijing'`增加左表的别名o得到 o.region = 'beijing',然后在组装的时候给已有的`price > 45.0 OR customer_name = 'John'`两侧增加括号。 192 | 193 | ### 4.3 INSERT来自带子查询的SELECT 194 | #### 4.3.1 输入SQL 195 | ```sql 196 | INSERT INTO print_sink SELECT * FROM orders 197 | ``` 198 | #### 4.3.2 输出SQL 199 | ```sql 200 | INSERT INTO print_sink ( 201 | SELECT 202 | orders.order_id, 203 | orders.order_date, 204 | orders.customer_name, 205 | orders.product_id, 206 | orders.price, 207 | orders.order_status, 208 | orders.region 209 | FROM 210 | hive.default.orders AS orders 211 | WHERE 212 | orders.region = 'beijing' 213 | ) 214 | ``` 215 | #### 4.3.3 测试小结 216 | 无论运行SQL类型是INSERT、SELECT或者其他,只会找到查询`orders`表的子句,然后对其组装行级权限条件。 217 | 218 | 219 | ## 五、参考文献 220 | 1. [数据管理DMS-敏感数据管理-行级管控](https://help.aliyun.com/document_detail/161149.html) 221 | 2. [Apache Ranger Row-level Filter](https://docs.cloudera.com/HDPDocuments/HDP3/HDP-3.1.0/authorization-ranger/content/row_level_filtering_in_hive_with_ranger_policies.html) 222 | 3. [OpenLooKeng的行级权限控制](https://www.modb.pro/db/212124) 223 | 4. [PostgreSQL中的行级权限/数据权限/行安全策略](https://www.kankanzhijian.com/2018/09/28/PostgreSQL-rowsecurity) 224 | 5. [FlinkSQL字段血缘解决方案及源码](https://github.com/HamaWhiteGG/flink-sql-lineage/blob/main/README_CN.md) 225 | 6. [基于 Flink CDC 构建 MySQL 和 Postgres 的 Streaming ETL](https://ververica.github.io/flink-cdc-connectors/master/content/%E5%BF%AB%E9%80%9F%E4%B8%8A%E6%89%8B/mysql-postgres-tutorial-zh.html) 226 | 7. [重新认识访问者模式:从实践到本质](https://www.51cto.com/article/703150.html) 227 | 8. [github-manifold-systems/manifold](https://github.com/manifold-systems/manifold) 228 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.hw.security 7 | flink-sql-security 8 | 1.0.0 9 | 10 | 11 | 1.16.1 12 | 3.1.2 13 | 3.2.2 14 | 8.0.32 15 | 1.7.25 16 | 4.13.2 17 | 3.24.2 18 | 1.18.26 19 | 2.3.0 20 | 2.0.24 21 | 1.7.25 22 | 2023.1.4 23 | 24 | 1.8 25 | 2.12 26 | 3.8.0 27 | 2.27.1 28 | ${target.java.version} 29 | ${target.java.version} 30 | UTF-8 31 | 32 | 33 | 34 | 35 | org.apache.flink 36 | flink-clients 37 | ${flink.version} 38 | 39 | 40 | 41 | org.apache.flink 42 | flink-table-planner_${scala.binary.version} 43 | ${flink.version} 44 | 45 | 46 | 47 | org.apache.flink 48 | flink-streaming-java 49 | ${flink.version} 50 | 51 | 52 | 53 | org.apache.flink 54 | flink-runtime-web 55 | ${flink.version} 56 | 57 | 58 | 59 | org.apache.flink 60 | flink-connector-base 61 | ${flink.version} 62 | 63 | 64 | 65 | org.apache.flink 66 | flink-connector-jdbc 67 | ${flink.version} 68 | 69 | 70 | 71 | com.ververica 72 | flink-connector-mysql-cdc 73 | ${mysql.cdc.version} 74 | 75 | 76 | 77 | org.apache.flink 78 | flink-connector-hive_${scala.binary.version} 79 | ${flink.version} 80 | 81 | 82 | 83 | mysql 84 | mysql-connector-java 85 | ${mysql.version} 86 | 87 | 88 | 89 | org.slf4j 90 | slf4j-log4j12 91 | ${slf4j-log4j12.version} 92 | 93 | 94 | 95 | junit 96 | junit 97 | ${junit.version} 98 | test 99 | 100 | 101 | 102 | org.assertj 103 | assertj-core 104 | ${assertj.version} 105 | test 106 | 107 | 108 | 109 | org.projectlombok 110 | lombok 111 | ${lombok.version} 112 | 113 | 114 | 115 | systems.manifold 116 | manifold-ext 117 | ${manifold-ext.version} 118 | provided 119 | 120 | 121 | 122 | com.alibaba.fastjson2 123 | fastjson2 124 | ${fastjson2.version} 125 | 126 | 127 | 128 | org.apache.hadoop 129 | hadoop-mapreduce-client-core 130 | ${hadoop.version} 131 | 132 | 133 | 134 | org.apache.hive 135 | hive-exec 136 | ${hive.version} 137 | 138 | 139 | org.apache.calcite 140 | calcite-core 141 | 142 | 143 | org.apache.calcite 144 | calcite-linq4j 145 | 146 | 147 | org.apache.logging.log4j 148 | log4j-slf4j-impl 149 | 150 | 151 | 152 | 153 | 154 | org.apache.hive 155 | hive-metastore 156 | ${hive.version} 157 | 158 | 159 | org.apache.logging.log4j 160 | log4j-slf4j-impl 161 | 162 | 163 | 164 | 165 | 166 | org.slf4j 167 | slf4j-log4j12 168 | ${slf4j.version} 169 | 170 | 171 | 172 | 173 | 174 | 175 | org.apache.maven.plugins 176 | maven-compiler-plugin 177 | ${maven-compiler-plugin.version} 178 | 179 | ${target.java.version} 180 | ${target.java.version} 181 | 182 | -Xplugin:Manifold no-bootstrap 183 | 184 | 185 | 186 | org.projectlombok 187 | lombok 188 | ${lombok.version} 189 | 190 | 191 | systems.manifold 192 | manifold-ext 193 | ${manifold-ext.version} 194 | 195 | 196 | 197 | 198 | 199 | com.diffplug.spotless 200 | spotless-maven-plugin 201 | ${spotless-maven-plugin.version} 202 | 203 | 204 | 205 | style/spotless-formatter.xml 206 | 207 | 208 | style/license-header 209 | 210 | 211 | 212 | 213 | com,org,,javax,java,scala,\# 214 | 215 | 216 | 217 | 218 | 219 | 220 | UTF-8 221 | 4 222 | true 223 | false 224 | true 225 | true 226 | false 227 | false 228 | custom_1 229 | false 230 | false 231 | 232 | 233 | 234 | 235 | 236 | 237 | check 238 | 239 | compile 240 | 241 | 242 | 243 | 244 | 245 | 246 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/PolicyManager.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink; 20 | 21 | import com.alibaba.fastjson2.JSON; 22 | import com.hw.security.flink.enums.DataMaskType; 23 | import com.hw.security.flink.exception.CustomException; 24 | import com.hw.security.flink.policy.DataMaskPolicy; 25 | import com.hw.security.flink.policy.RowFilterPolicy; 26 | import com.hw.security.flink.util.ResourceReader; 27 | 28 | import org.apache.commons.lang3.StringUtils; 29 | 30 | import java.util.LinkedList; 31 | import java.util.List; 32 | import java.util.Optional; 33 | 34 | /** 35 | * The manager of row-level filter and data masking policies,which can be connected to the policies in ranger later. 36 | * 37 | * @author: HamaWhite 38 | */ 39 | public class PolicyManager { 40 | 41 | private static final String DATA_MASK_TYPES_FILE = "data_mask_types.json"; 42 | 43 | private final List rowFilterPolicyList; 44 | 45 | private final List dataMaskPolicyList; 46 | 47 | private final List maskTypeList; 48 | 49 | public PolicyManager() { 50 | this.rowFilterPolicyList = new LinkedList<>(); 51 | this.dataMaskPolicyList = new LinkedList<>(); 52 | 53 | try { 54 | byte[] bytes = ResourceReader.readFile(DATA_MASK_TYPES_FILE); 55 | this.maskTypeList = JSON.parseArray(new String(bytes), DataMaskType.class); 56 | } catch (Exception e) { 57 | throw new CustomException(String.format("read file %s error", DATA_MASK_TYPES_FILE), e); 58 | } 59 | } 60 | 61 | public Optional getRowFilterCondition(String username, String catalogName, String database, 62 | String tableName) { 63 | for (RowFilterPolicy policy : rowFilterPolicyList) { 64 | if (policy.getUsername().equals(username) 65 | && policy.getCatalogName().equals(catalogName) 66 | && policy.getDatabase().equals(database) 67 | && policy.getTableName().equals(tableName)) { 68 | return Optional.ofNullable(policy.getCondition()); 69 | } 70 | } 71 | return Optional.empty(); 72 | } 73 | 74 | public Optional getDataMaskCondition(String username, String catalogName, String database, String tableName, 75 | String columnName) { 76 | for (DataMaskPolicy policy : dataMaskPolicyList) { 77 | if (policy.getUsername().equals(username) 78 | && policy.getCatalogName().equals(catalogName) 79 | && policy.getDatabase().equals(database) 80 | && policy.getTableName().equals(tableName) 81 | && policy.getColumnName().equals(columnName)) { 82 | return Optional.ofNullable(policy.getCondition()); 83 | } 84 | } 85 | return Optional.empty(); 86 | } 87 | 88 | public DataMaskType getDataMaskType(String typeName) { 89 | DataMaskType ret = null; 90 | for (DataMaskType maskType : maskTypeList) { 91 | if (StringUtils.equals(maskType.getName(), typeName)) { 92 | ret = maskType; 93 | break; 94 | } 95 | } 96 | return ret; 97 | } 98 | 99 | public boolean addPolicy(RowFilterPolicy policy) { 100 | return rowFilterPolicyList.add(policy); 101 | } 102 | 103 | public boolean removePolicy(RowFilterPolicy policy) { 104 | return rowFilterPolicyList.remove(policy); 105 | } 106 | 107 | public boolean addPolicy(DataMaskPolicy policy) { 108 | return dataMaskPolicyList.add(policy); 109 | } 110 | 111 | public boolean removePolicy(DataMaskPolicy policy) { 112 | return dataMaskPolicyList.remove(policy); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/SecurityContext.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink; 20 | 21 | import com.hw.security.flink.exception.CustomException; 22 | import com.hw.security.flink.model.ColumnEntity; 23 | import com.hw.security.flink.model.TableEntity; 24 | import com.hw.security.flink.visitor.DataMaskVisitor; 25 | import com.hw.security.flink.visitor.RowFilterVisitor; 26 | 27 | import org.apache.calcite.sql.SqlNode; 28 | import org.apache.flink.configuration.Configuration; 29 | import org.apache.flink.configuration.RestOptions; 30 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 31 | import org.apache.flink.table.api.*; 32 | import org.apache.flink.table.api.Schema.UnresolvedColumn; 33 | import org.apache.flink.table.api.Schema.UnresolvedComputedColumn; 34 | import org.apache.flink.table.api.Schema.UnresolvedMetadataColumn; 35 | import org.apache.flink.table.api.Schema.UnresolvedPhysicalColumn; 36 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 37 | import org.apache.flink.table.api.internal.TableEnvironmentImpl; 38 | import org.apache.flink.table.catalog.*; 39 | import org.apache.flink.table.catalog.exceptions.TableNotExistException; 40 | import org.apache.flink.table.planner.delegation.ParserImpl; 41 | import org.apache.flink.types.Row; 42 | import org.apache.flink.util.FlinkRuntimeException; 43 | import org.slf4j.Logger; 44 | import org.slf4j.LoggerFactory; 45 | 46 | import java.util.ArrayList; 47 | import java.util.Iterator; 48 | import java.util.List; 49 | import java.util.function.BinaryOperator; 50 | import java.util.stream.Collectors; 51 | import javassist.*; 52 | 53 | /** 54 | * @description: SecurityContext 55 | * @author: HamaWhite 56 | */ 57 | public class SecurityContext { 58 | 59 | private static final Logger LOG = LoggerFactory.getLogger(SecurityContext.class); 60 | 61 | private TableEnvironmentImpl tableEnv; 62 | 63 | private final ParserImpl parser; 64 | 65 | private final PolicyManager policyManager; 66 | 67 | static { 68 | /* 69 | * Use javassist to modify the bytecode to add the variable custom to org.apache.calcite.sql.SqlSelect, which is 70 | * used to mark whether SqlSelect is custom generated 71 | */ 72 | try { 73 | ClassPool classPool = ClassPool.getDefault(); 74 | CtClass ctClass = classPool.getCtClass("org.apache.calcite.sql.SqlSelect"); 75 | 76 | // add field custom, the default value is false 77 | CtField field = new CtField(CtClass.booleanType, "custom", ctClass); 78 | ctClass.addField(field, CtField.Initializer.constant(false)); 79 | // add set method 80 | CtMethod setMethod = CtNewMethod.setter("setCustom", field); 81 | ctClass.addMethod(setMethod); 82 | // add get method 83 | CtMethod getMethod = CtNewMethod.getter("isCustom", field); 84 | ctClass.addMethod(getMethod); 85 | // load class 86 | ctClass.toClass(); 87 | } catch (Exception e) { 88 | throw new CustomException("Dynamic add field method exception.", e); 89 | } 90 | } 91 | 92 | public SecurityContext(PolicyManager policyManager) { 93 | this.policyManager = policyManager; 94 | // init table environment 95 | initTableEnvironment(); 96 | this.parser = (ParserImpl) tableEnv.getParser(); 97 | } 98 | 99 | private void initTableEnvironment() { 100 | Configuration configuration = new Configuration(); 101 | configuration.setString(RestOptions.BIND_PORT, "8081-8189"); 102 | 103 | try (StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(configuration)) { 104 | env.setParallelism(1); 105 | 106 | EnvironmentSettings settings = EnvironmentSettings.newInstance() 107 | .inStreamingMode() 108 | .build(); 109 | this.tableEnv = (TableEnvironmentImpl) StreamTableEnvironment.create(env, settings); 110 | 111 | } catch (Exception e) { 112 | throw new FlinkRuntimeException("Init local flink execution environment error", e); 113 | } 114 | } 115 | 116 | public void useCatalog(AbstractCatalog catalog) { 117 | if (!tableEnv.getCatalog(catalog.getName()).isPresent()) { 118 | tableEnv.registerCatalog(catalog.getName(), catalog); 119 | } 120 | tableEnv.useCatalog(catalog.getName()); 121 | } 122 | 123 | /** 124 | * Add row-level filter and return new SQL 125 | */ 126 | public String rewriteRowFilter(String username, String singleSql) { 127 | // parse and validate sql 128 | SqlNode validated = validate(singleSql); 129 | 130 | // add row-level filter and return a new abstract syntax tree 131 | RowFilterVisitor visitor = new RowFilterVisitor(this, username); 132 | validated.accept(visitor); 133 | 134 | return validated.toString(); 135 | } 136 | 137 | /** 138 | * Add column masking and return new SQL 139 | */ 140 | public String rewriteDataMask(String username, String singleSql) { 141 | // parse and validate sql 142 | SqlNode validated = validate(singleSql); 143 | 144 | // add data masking and return a new abstract syntax tree 145 | DataMaskVisitor visitor = new DataMaskVisitor(this, username); 146 | validated.accept(visitor); 147 | 148 | return validated.toString(); 149 | } 150 | 151 | /** 152 | * Add row-level filter and column masking, then return new SQL. 153 | */ 154 | public String mixedRewrite(String username, String singleSql) { 155 | // parse and validate sql 156 | SqlNode validated = validate(singleSql); 157 | 158 | // add row-level filter and return a new abstract syntax tree 159 | RowFilterVisitor rowFilterVisitor = new RowFilterVisitor(this, username); 160 | validated.accept(rowFilterVisitor); 161 | 162 | // add data masking and return a new abstract syntax tree 163 | DataMaskVisitor dataMaskVisitor = new DataMaskVisitor(this, username); 164 | validated.accept(dataMaskVisitor); 165 | 166 | return validated.toString(); 167 | } 168 | 169 | /** 170 | * Parse and validate sql, then return the abstract syntax tree 171 | */ 172 | private SqlNode validate(String singleSql) { 173 | // parsing sql and return the abstract syntax tree 174 | SqlNode sqlNode = parser.parseSql(singleSql); 175 | 176 | // validate the ast 177 | return parser.validate(sqlNode); 178 | } 179 | 180 | /** 181 | * Parses a SQL expression into a {@link SqlNode} 182 | */ 183 | public SqlNode parseExpression(String sqlExpression) { 184 | return parser.parseExpression(sqlExpression); 185 | } 186 | 187 | /** 188 | * Execute a SQL directly, returns 10 rows by default 189 | */ 190 | public List execute(String singleSql) { 191 | return execute(singleSql, 10); 192 | } 193 | 194 | /** 195 | * Execute the single sql directly, and return size rows 196 | */ 197 | public List execute(String singleSql, int size) { 198 | LOG.info("Execute SQL: {}", singleSql); 199 | TableResult tableResult = tableEnv.executeSql(singleSql); 200 | return fetchRows(tableResult.collect(), size); 201 | } 202 | 203 | /** 204 | * Execute the single sql with user rewrite policies 205 | */ 206 | private List executeWithRewrite(String username, String originSql, BinaryOperator rewriteFunction, 207 | int size) { 208 | LOG.info("Origin SQL: {}", originSql); 209 | String rewriteSql = rewriteFunction.apply(username, originSql); 210 | LOG.info("Rewrite SQL: {}", rewriteSql); 211 | return execute(rewriteSql, size); 212 | } 213 | 214 | /** 215 | * Execute the single sql with user row-level filter policies 216 | */ 217 | public List executeRowFilter(String username, String singleSql, int size) { 218 | return executeWithRewrite(username, singleSql, this::rewriteRowFilter, size); 219 | } 220 | 221 | /** 222 | * Execute the single sql with user data mask policies 223 | */ 224 | public List executeDataMask(String username, String singleSql, int size) { 225 | return executeWithRewrite(username, singleSql, this::rewriteDataMask, size); 226 | } 227 | 228 | /** 229 | * Execute the single sql with user row-level filter and data mask policies 230 | */ 231 | public List mixedExecute(String username, String singleSql, int size) { 232 | return executeWithRewrite(username, singleSql, this::mixedRewrite, size); 233 | } 234 | 235 | private List fetchRows(Iterator iter, int size) { 236 | List rowList = new ArrayList<>(size); 237 | while (size > 0 && iter.hasNext()) { 238 | rowList.add(iter.next()); 239 | size--; 240 | } 241 | return rowList; 242 | } 243 | 244 | public PolicyManager getPolicyManager() { 245 | return policyManager; 246 | } 247 | 248 | private Catalog getCatalog(String catalogName) { 249 | return tableEnv.getCatalog(catalogName) 250 | .orElseThrow(() -> new ValidationException(String.format("Catalog %s does not exist", catalogName))); 251 | } 252 | 253 | public TableEntity getTable(ObjectIdentifier identifier) { 254 | return getTable(identifier.getCatalogName(), identifier.getDatabaseName(), identifier.getObjectName()); 255 | } 256 | 257 | public TableEntity getTable(String catalogName, String database, String tableName) { 258 | ObjectPath objectPath = new ObjectPath(database, tableName); 259 | try { 260 | CatalogBaseTable table = getCatalog(catalogName).getTable(objectPath); 261 | Schema schema = table.getUnresolvedSchema(); 262 | LOG.info("table.schema: {}", schema); 263 | 264 | List columnList = schema.getColumns() 265 | .stream() 266 | .map(column -> new ColumnEntity(column.getName(), processColumnType(column))) 267 | .collect(Collectors.toList()); 268 | 269 | return new TableEntity(ObjectIdentifier.of(catalogName, database, tableName), columnList); 270 | } catch (TableNotExistException e) { 271 | throw new TableException(String.format( 272 | "Cannot find table '%s' in the database %s of catalog %s .", tableName, database, catalogName)); 273 | } 274 | } 275 | 276 | private String processColumnType(UnresolvedColumn column) { 277 | if (column instanceof UnresolvedComputedColumn) { 278 | return ((UnresolvedComputedColumn) column) 279 | .getExpression() 280 | .asSummaryString(); 281 | } else if (column instanceof UnresolvedPhysicalColumn) { 282 | return ((UnresolvedPhysicalColumn) column).getDataType() 283 | .toString() 284 | // delete NOT NULL 285 | .replace("NOT NULL", "") 286 | .trim(); 287 | } else if (column instanceof UnresolvedMetadataColumn) { 288 | return ((UnresolvedMetadataColumn) column).getDataType().toString(); 289 | } else { 290 | throw new IllegalArgumentException("Unsupported column type: " + column); 291 | } 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/enums/DataMaskType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.enums; 20 | 21 | import lombok.Data; 22 | 23 | import java.util.Map; 24 | 25 | /** 26 | * @description: DataMaskType 27 | * @author: HamaWhite 28 | */ 29 | @Data 30 | public class DataMaskType { 31 | 32 | private Long itemId; 33 | 34 | private String name; 35 | 36 | private String label; 37 | 38 | private String description; 39 | 40 | private String transformer; 41 | 42 | private Map dataMaskOptions; 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/exception/CustomException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.exception; 20 | 21 | /** 22 | * @description: CustomException 23 | * @author: HamaWhite 24 | */ 25 | public class CustomException extends RuntimeException { 26 | 27 | private static final long serialVersionUID = 193141189399279147L; 28 | 29 | /** 30 | * Creates a new Exception with the given message and null as the cause. 31 | * 32 | * @param message The exception message 33 | */ 34 | public CustomException(String message) { 35 | super(message); 36 | } 37 | 38 | /** 39 | * Creates a new exception with a null message and the given cause. 40 | * 41 | * @param cause The exception that caused this exception 42 | */ 43 | public CustomException(Throwable cause) { 44 | super(cause); 45 | } 46 | 47 | /** 48 | * Creates a new exception with the given message and cause. 49 | * 50 | * @param message The exception message 51 | * @param cause The exception that caused this exception 52 | */ 53 | public CustomException(String message, Throwable cause) { 54 | super(message, cause); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/model/ColumnEntity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.model; 20 | 21 | import lombok.AllArgsConstructor; 22 | import lombok.Data; 23 | import lombok.experimental.Accessors; 24 | 25 | /** 26 | * @description: ColumnEntity 27 | * @author: HamaWhite 28 | */ 29 | @Data 30 | @AllArgsConstructor 31 | @Accessors(chain = true) 32 | public class ColumnEntity { 33 | 34 | private String columnName; 35 | 36 | private String columnType; 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/model/TableEntity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.model; 20 | 21 | import org.apache.flink.table.catalog.ObjectIdentifier; 22 | 23 | import lombok.AllArgsConstructor; 24 | import lombok.Data; 25 | import lombok.experimental.Accessors; 26 | 27 | import java.util.List; 28 | 29 | /** 30 | * @description: TableEntity 31 | * @author: HamaWhite 32 | */ 33 | @Data 34 | @AllArgsConstructor 35 | @Accessors(chain = true) 36 | public class TableEntity { 37 | 38 | private ObjectIdentifier tableIdentifier; 39 | 40 | private List columnList; 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/policy/DataMaskPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.policy; 20 | 21 | import lombok.AllArgsConstructor; 22 | import lombok.Data; 23 | 24 | /** 25 | * @description: DataMaskPolicy 26 | * @author: HamaWhite 27 | */ 28 | @Data 29 | @AllArgsConstructor 30 | public class DataMaskPolicy { 31 | 32 | private String username; 33 | 34 | private String catalogName; 35 | 36 | private String database; 37 | 38 | private String tableName; 39 | 40 | private String columnName; 41 | 42 | private String condition; 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/policy/RowFilterPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.policy; 20 | 21 | import lombok.AllArgsConstructor; 22 | import lombok.Data; 23 | 24 | /** 25 | * @description: RowFilterPolicy 26 | * @author: HamaWhite 27 | */ 28 | @Data 29 | @AllArgsConstructor 30 | public class RowFilterPolicy { 31 | 32 | private String username; 33 | 34 | private String catalogName; 35 | 36 | private String database; 37 | 38 | private String tableName; 39 | 40 | private String condition; 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/util/ResourceReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.util; 20 | 21 | import java.io.IOException; 22 | import java.net.URISyntaxException; 23 | import java.net.URL; 24 | import java.nio.file.Files; 25 | import java.nio.file.Path; 26 | import java.nio.file.Paths; 27 | 28 | import static java.util.Objects.requireNonNull; 29 | 30 | /** 31 | * @description: ResourceReader 32 | * @author: HamaWhite 33 | */ 34 | public class ResourceReader { 35 | 36 | private ResourceReader() { 37 | } 38 | 39 | public static byte[] readFile(String fileName) throws URISyntaxException, IOException { 40 | URL url = ResourceReader.class.getClassLoader().getResource(fileName); 41 | Path path = Paths.get(requireNonNull(url).toURI()); 42 | return Files.readAllBytes(path); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/visitor/DataMaskVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.visitor; 20 | 21 | import com.google.common.collect.ImmutableList; 22 | import com.hw.security.flink.SecurityContext; 23 | import com.hw.security.flink.enums.DataMaskType; 24 | import com.hw.security.flink.exception.CustomException; 25 | import com.hw.security.flink.model.ColumnEntity; 26 | import com.hw.security.flink.model.TableEntity; 27 | import com.hw.security.flink.visitor.basic.AbstractBasicVisitor; 28 | 29 | import org.apache.calcite.sql.*; 30 | import org.apache.calcite.sql.parser.SqlParserPos; 31 | import org.apache.flink.table.catalog.ObjectIdentifier; 32 | import org.slf4j.Logger; 33 | import org.slf4j.LoggerFactory; 34 | 35 | import java.lang.reflect.Method; 36 | import java.util.ArrayList; 37 | import java.util.List; 38 | import java.util.Optional; 39 | 40 | import static com.hw.security.flink.visitor.DataMaskVisitor.ParentType.*; 41 | 42 | /** 43 | * @description: DataMaskVisitor 44 | * @author: HamaWhite 45 | */ 46 | public class DataMaskVisitor extends AbstractBasicVisitor { 47 | 48 | private static final Logger LOG = LoggerFactory.getLogger(DataMaskVisitor.class); 49 | 50 | public DataMaskVisitor(SecurityContext context, String username) { 51 | super(context, username); 52 | } 53 | 54 | @Override 55 | public Void visit(SqlCall call) { 56 | if (call instanceof SqlSelect) { 57 | SqlSelect sqlSelect = (SqlSelect) call; 58 | if (!isCustomSqlSelect(sqlSelect)) { 59 | walkTreeMaskTableRef(sqlSelect, SQL_SELECT, sqlSelect.getFrom()); 60 | } 61 | } 62 | return super.visit(call); 63 | } 64 | 65 | private void walkTreeMaskTableRef(SqlNode parent, ParentType parentType, SqlNode from) { 66 | if (from instanceof SqlJoin) { 67 | SqlJoin sqlJoin = (SqlJoin) from; 68 | walkTreeMaskTableRef(sqlJoin, SQL_JOIN_LEFT, sqlJoin.getLeft()); 69 | walkTreeMaskTableRef(sqlJoin, SQL_JOIN_RIGHT, sqlJoin.getRight()); 70 | } else if (from instanceof SqlBasicCall) { 71 | SqlNode[] operands = ((SqlBasicCall) from).getOperands(); 72 | // for example, for a sub-query, operands[0] is of type SqlSelect 73 | if (operands[0] instanceof SqlIdentifier) { 74 | String tablePath = operands[0].toString(); 75 | String tableAlias = operands[1].toString(); 76 | LOG.debug("SqlBasicCall-tablePath: [{}], tableAlias: [{}]", tablePath, tableAlias); 77 | addDataMask(parent, parentType, tablePath, tableAlias); 78 | } 79 | } 80 | } 81 | 82 | private void addDataMask(SqlNode parent, ParentType parentType, String tablePath, String tableAlias) { 83 | ObjectIdentifier tableIdentifier = toObjectIdentifier(tablePath); 84 | TableEntity table = securityContext.getTable(tableIdentifier); 85 | 86 | boolean doColumnMasking = false; 87 | List columnTransformerList = new ArrayList<>(); 88 | for (ColumnEntity column : table.getColumnList()) { 89 | String columnTransformer = column.getColumnName(); 90 | Optional condition = policyManager.getDataMaskCondition(username, 91 | tableIdentifier.getCatalogName(), 92 | tableIdentifier.getDatabaseName(), 93 | tableIdentifier.getObjectName(), 94 | column.getColumnName()); 95 | if (condition.isPresent()) { 96 | doColumnMasking = true; 97 | DataMaskType maskType = policyManager.getDataMaskType(condition.get()); 98 | columnTransformer = maskType.getTransformer().replace("{col}", column.getColumnName()); 99 | } 100 | columnTransformerList.add(columnTransformer); 101 | } 102 | if (doColumnMasking) { 103 | String replaceText = buildReplaceText(table, columnTransformerList); 104 | SqlSelect sqlSelect = (SqlSelect) securityContext.parseExpression(replaceText); 105 | // mark this SqlSelect as custom, no need to rewrite 106 | setSqlSelectCustom(sqlSelect, true); 107 | SqlNode[] operands = new SqlNode[2]; 108 | operands[0] = sqlSelect; 109 | // add table alias 110 | operands[1] = new SqlIdentifier(ImmutableList.of(tableAlias), null, new SqlParserPos(0, 0), null); 111 | SqlBasicCall replaced = new SqlBasicCall(new SqlAsOperator(), operands, new SqlParserPos(0, 0)); 112 | rewrittenTree(parent, parentType, replaced); 113 | } 114 | } 115 | 116 | private String buildReplaceText(TableEntity table, List columnTransformerList) { 117 | StringBuilder sb = new StringBuilder(); 118 | sb.append("(SELECT "); 119 | boolean firstOne = true; 120 | for (int index = 0; index < columnTransformerList.size(); index++) { 121 | String transformer = columnTransformerList.get(index); 122 | if (!firstOne) { 123 | sb.append(", "); 124 | } else { 125 | firstOne = false; 126 | } 127 | ColumnEntity column = table.getColumnList().get(index); 128 | String colName = column.getColumnName(); 129 | if (!transformer.equals(colName)) { 130 | // CAST(transformer AS col_type) AS col_name 131 | sb.append(String.format("CAST( %s AS %s) AS %s", transformer, column.getColumnType(), 132 | column.getColumnName())); 133 | } else { 134 | sb.append(column.getColumnName()); 135 | } 136 | } 137 | sb.append(" FROM "); 138 | sb.append(table.getTableIdentifier().asSerializableString()); 139 | sb.append(")"); 140 | return sb.toString(); 141 | } 142 | 143 | private void rewrittenTree(SqlNode parent, ParentType parentType, SqlBasicCall replaced) { 144 | switch (parentType) { 145 | case SQL_SELECT: 146 | ((SqlSelect) parent).setFrom(replaced); 147 | break; 148 | case SQL_JOIN_LEFT: 149 | ((SqlJoin) parent).setLeft(replaced); 150 | break; 151 | case SQL_JOIN_RIGHT: 152 | ((SqlJoin) parent).setRight(replaced); 153 | break; 154 | default: 155 | throw new IllegalArgumentException("Unsupported parent type: " + parentType); 156 | } 157 | } 158 | 159 | public enum ParentType { 160 | // parent is SqlSelect 161 | SQL_SELECT, 162 | // parent is the left of SqlJoin 163 | SQL_JOIN_LEFT, 164 | // parent is the right of SqlJoin 165 | SQL_JOIN_RIGHT 166 | } 167 | 168 | public void setSqlSelectCustom(SqlSelect sqlSelect, boolean custom) { 169 | try { 170 | Method declaredMethod = SqlSelect.class.getDeclaredMethod("setCustom", boolean.class); 171 | declaredMethod.invoke(sqlSelect, custom); 172 | } catch (Exception e) { 173 | throw new CustomException(e); 174 | } 175 | } 176 | 177 | public boolean isCustomSqlSelect(SqlSelect sqlSelect) { 178 | try { 179 | Method declaredMethod = SqlSelect.class.getDeclaredMethod("isCustom"); 180 | return (boolean) declaredMethod.invoke(sqlSelect); 181 | } catch (Exception e) { 182 | throw new CustomException(e); 183 | } 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/visitor/RowFilterVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.visitor; 20 | 21 | import com.google.common.collect.ImmutableList; 22 | import com.hw.security.flink.SecurityContext; 23 | import com.hw.security.flink.visitor.basic.AbstractBasicVisitor; 24 | 25 | import org.apache.calcite.sql.*; 26 | import org.apache.calcite.sql.parser.SqlParserPos; 27 | import org.apache.flink.table.catalog.ObjectIdentifier; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import java.util.Optional; 32 | 33 | /** 34 | * @description: RowFilterVisitor 35 | * @author: HamaWhite 36 | */ 37 | public class RowFilterVisitor extends AbstractBasicVisitor { 38 | 39 | private static final Logger LOG = LoggerFactory.getLogger(RowFilterVisitor.class); 40 | 41 | public RowFilterVisitor(SecurityContext context, String username) { 42 | super(context, username); 43 | } 44 | 45 | @Override 46 | public Void visit(SqlCall call) { 47 | if (call instanceof SqlSelect) { 48 | SqlSelect sqlSelect = (SqlSelect) call; 49 | 50 | SqlNode originWhere = sqlSelect.getWhere(); 51 | // add row level filter condition for where clause 52 | SqlNode rowFilterWhere = addCondition(sqlSelect.getFrom(), originWhere); 53 | if (rowFilterWhere != originWhere) { 54 | LOG.info("Rewritten SQL based on row-level privilege filtering for user [{}]", username); 55 | } 56 | sqlSelect.setWhere(rowFilterWhere); 57 | } 58 | return super.visit(call); 59 | } 60 | 61 | /** 62 | * The main process of controlling row-level permissions 63 | */ 64 | private SqlNode addCondition(SqlNode from, SqlNode where) { 65 | if (from instanceof SqlJoin) { 66 | SqlJoin sqlJoin = (SqlJoin) from; 67 | // support recursive processing, such as join for three tables, process left sqlNode 68 | where = addCondition(sqlJoin.getLeft(), where); 69 | // process right sqlNode 70 | return addCondition(sqlJoin.getRight(), where); 71 | } else if (from instanceof SqlBasicCall) { 72 | SqlNode[] tableNodes = ((SqlBasicCall) from).getOperands(); 73 | /* 74 | * If there is a sub-query in the Join, row-level filtering has been appended to the sub-query. What is 75 | * returned here is the SqlSelect type, just return the original where directly 76 | */ 77 | if (!(tableNodes[0] instanceof SqlIdentifier)) { 78 | return where; 79 | } 80 | String tablePath = tableNodes[0].toString(); 81 | String tableAlias = tableNodes[1].toString(); 82 | LOG.debug("SqlBasicCall-tablePath: [{}], tableAlias: [{}]", tablePath, tableAlias); 83 | return addRowFilter(where, tablePath, tableAlias); 84 | } 85 | return where; 86 | } 87 | 88 | /** 89 | * Add row-level filtering based on user-configured permission points 90 | */ 91 | private SqlNode addRowFilter(SqlNode where, String tablePath, String tableAlias) { 92 | ObjectIdentifier tableIdentifier = toObjectIdentifier(tablePath); 93 | 94 | Optional condition = policyManager.getRowFilterCondition(username, 95 | tableIdentifier.getCatalogName(), 96 | tableIdentifier.getDatabaseName(), 97 | tableIdentifier.getObjectName()); 98 | 99 | if (condition.isPresent()) { 100 | SqlBasicCall sqlBasicCall = (SqlBasicCall) securityContext.parseExpression(condition.get()); 101 | ImmutableList namesList = ImmutableList.of(tableAlias, sqlBasicCall.getOperands()[0].toString()); 102 | sqlBasicCall.getOperands()[0] = new SqlIdentifier(namesList, null, new SqlParserPos(0, 0), null); 103 | return buildWhereClause(where, sqlBasicCall); 104 | } 105 | return buildWhereClause(where, null); 106 | } 107 | 108 | /** 109 | * Rebuild the where clause 110 | */ 111 | private SqlNode buildWhereClause(SqlNode where, SqlBasicCall permissions) { 112 | if (permissions != null) { 113 | if (where == null) { 114 | return permissions; 115 | } 116 | SqlBinaryOperator sqlBinaryOperator = 117 | new SqlBinaryOperator(SqlKind.AND.name(), SqlKind.AND, 0, true, null, null, null); 118 | SqlNode[] operands = new SqlNode[2]; 119 | operands[0] = where; 120 | operands[1] = permissions; 121 | SqlParserPos sqlParserPos = new SqlParserPos(0, 0); 122 | return new SqlBasicCall(sqlBinaryOperator, operands, sqlParserPos); 123 | } 124 | return where; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/com/hw/security/flink/visitor/basic/AbstractBasicVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.visitor.basic; 20 | 21 | import com.hw.security.flink.PolicyManager; 22 | import com.hw.security.flink.SecurityContext; 23 | 24 | import org.apache.calcite.sql.util.SqlBasicVisitor; 25 | import org.apache.flink.table.catalog.ObjectIdentifier; 26 | 27 | /** 28 | * @description: AbstractBasicVisitor 29 | * @author: HamaWhite 30 | */ 31 | public abstract class AbstractBasicVisitor extends SqlBasicVisitor { 32 | 33 | protected final SecurityContext securityContext; 34 | 35 | protected final PolicyManager policyManager; 36 | 37 | protected final String username; 38 | 39 | protected AbstractBasicVisitor(SecurityContext securityContext, String username) { 40 | this.securityContext = securityContext; 41 | this.policyManager = securityContext.getPolicyManager(); 42 | this.username = username; 43 | } 44 | 45 | protected ObjectIdentifier toObjectIdentifier(String tablePath) { 46 | String[] items = tablePath.split("\\."); 47 | return ObjectIdentifier.of(items[0], items[1], items[2]); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/extensions/org/apache/flink/table/planner/delegation/ParserImpl/ParserImplExtension.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package extensions.org.apache.flink.table.planner.delegation.ParserImpl; 20 | 21 | import org.apache.calcite.sql.SqlNode; 22 | import org.apache.calcite.sql.SqlNodeList; 23 | import org.apache.flink.table.api.SqlParserException; 24 | import org.apache.flink.table.planner.calcite.FlinkPlannerImpl; 25 | import org.apache.flink.table.planner.delegation.ParserImpl; 26 | import org.apache.flink.table.planner.parse.CalciteParser; 27 | import org.apache.flink.util.Preconditions; 28 | 29 | import manifold.ext.rt.api.Extension; 30 | import manifold.ext.rt.api.Jailbreak; 31 | import manifold.ext.rt.api.This; 32 | 33 | import java.util.List; 34 | 35 | /** 36 | * Extend {@link ParserImpl} with manifold to add method parseExpression(String sqlExpression) and parseSql(String) 37 | * 38 | * @author: HamaWhite 39 | */ 40 | @Extension 41 | public class ParserImplExtension { 42 | 43 | private ParserImplExtension() { 44 | throw new IllegalStateException("Extension class"); 45 | } 46 | 47 | /** 48 | * Parses a SQL expression into a {@link SqlNode}. The {@link SqlNode} is not yet validated. 49 | * 50 | * @param sqlExpression a SQL expression string to parse 51 | * @return a parsed SQL node 52 | * @throws SqlParserException if an exception is thrown when parsing the statement 53 | */ 54 | public static SqlNode parseExpression(@This @Jailbreak ParserImpl thiz, String sqlExpression) { 55 | // add @Jailbreak annotation to access private variables 56 | CalciteParser parser = thiz.calciteParserSupplier.get(); 57 | return parser.parseExpression(sqlExpression); 58 | } 59 | 60 | /** 61 | * Entry point for parsing SQL queries and return the abstract syntax tree 62 | * 63 | * @param statement the SQL statement to evaluate 64 | * @return abstract syntax tree 65 | * @throws org.apache.flink.table.api.SqlParserException when failed to parse the statement 66 | */ 67 | public static SqlNode parseSql(@This @Jailbreak ParserImpl thiz, String statement) { 68 | // add @Jailbreak annotation to access private variables 69 | CalciteParser parser = thiz.calciteParserSupplier.get(); 70 | 71 | // use parseSqlList here because we need to support statement end with ';' in sql client. 72 | SqlNodeList sqlNodeList = parser.parseSqlList(statement); 73 | List parsed = sqlNodeList.getList(); 74 | Preconditions.checkArgument(parsed.size() == 1, "only single statement supported"); 75 | return parsed.get(0); 76 | } 77 | 78 | /** 79 | * validate the query 80 | * 81 | * @param thiz Implementation of Parser that uses Calcite. 82 | * @param sqlNode SqlNode to execute on 83 | * @return validated sqlNode 84 | */ 85 | public static SqlNode validate(@This @Jailbreak ParserImpl thiz, SqlNode sqlNode) { 86 | // add @Jailbreak annotation to access private variables 87 | FlinkPlannerImpl flinkPlanner = thiz.validatorSupplier.get(); 88 | return flinkPlanner.validate(sqlNode); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/main/resources/data_mask_types.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "itemId": 1, 4 | "name": "MASK", 5 | "label": "Redact", 6 | "description": "Replace lowercase with 'x', uppercase with 'X', digits with '0'", 7 | "transformer": "mask({col})", 8 | "dataMaskOptions": {} 9 | }, 10 | { 11 | "itemId": 2, 12 | "name": "MASK_SHOW_LAST_4", 13 | "label": "Partial mask: show last 4", 14 | "description": "Show last 4 characters; replace rest with 'x'", 15 | "transformer": "mask_show_last_n({col}, 4, 'x', 'x', 'x', -1, '1')", 16 | "dataMaskOptions": {} 17 | }, 18 | { 19 | "itemId": 3, 20 | "name": "MASK_SHOW_FIRST_4", 21 | "label": "Partial mask: show first 4", 22 | "description": "Show first 4 characters; replace rest with 'x'", 23 | "transformer": "mask_show_first_n({col}, 4, 'x', 'x', 'x', -1, '1')", 24 | "dataMaskOptions": {} 25 | }, 26 | { 27 | "itemId": 4, 28 | "name": "MASK_HASH", 29 | "label": "Hash", 30 | "description": "Hash the value", 31 | "transformer": "mask_hash({col})", 32 | "dataMaskOptions": {} 33 | }, 34 | { 35 | "itemId": 5, 36 | "name": "MASK_NULL", 37 | "label": "Nullify", 38 | "description": "Replace with NULL", 39 | "dataMaskOptions": {} 40 | }, 41 | { 42 | "itemId": 6, 43 | "name": "MASK_NONE", 44 | "label": "Unmasked (retain original value)", 45 | "description": "No masking", 46 | "dataMaskOptions": {} 47 | }, 48 | { 49 | "itemId": 12, 50 | "name": "MASK_DATE_SHOW_YEAR", 51 | "label": "Date: show only year", 52 | "description": "Date: show only year", 53 | "transformer": "mask({col}, 'x', 'x', 'x', -1, '1', 1, 0, -1)", 54 | "dataMaskOptions": {} 55 | }, 56 | { 57 | "itemId": 13, 58 | "name": "CUSTOM", 59 | "label": "Custom", 60 | "description": "Custom", 61 | "dataMaskOptions": {} 62 | } 63 | ] -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger=ERROR, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-20c %x - %m%n 24 | 25 | log4j.logger.com.hw.security.flink=DEBUG -------------------------------------------------------------------------------- /src/test/java/apache/flink/table/catalog/hive/HiveTestUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package apache.flink.table.catalog.hive; 20 | 21 | import org.apache.flink.table.catalog.exceptions.CatalogException; 22 | import org.apache.flink.table.catalog.hive.HiveCatalog; 23 | import org.apache.hadoop.hive.conf.HiveConf; 24 | import org.junit.rules.TemporaryFolder; 25 | 26 | import java.io.IOException; 27 | 28 | /** 29 | * Test utils for Hive connector. 30 | * 31 | * @description: HiveTestUtils 32 | * @author: HamaWhite 33 | */ 34 | public class HiveTestUtils { 35 | 36 | private static final String HIVE_WAREHOUSE_URI_FORMAT = "jdbc:derby:;databaseName=%s;create=true"; 37 | 38 | private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); 39 | 40 | public static HiveCatalog createHiveCatalog(String catalogName, String defaultDatabase, String hiveVersion) { 41 | return new HiveCatalog( 42 | catalogName, 43 | defaultDatabase, 44 | createHiveConf(), 45 | hiveVersion, 46 | true); 47 | } 48 | 49 | public static HiveConf createHiveConf() { 50 | ClassLoader classLoader = HiveTestUtils.class.getClassLoader(); 51 | 52 | try { 53 | TEMPORARY_FOLDER.create(); 54 | String warehouseDir = TEMPORARY_FOLDER.newFolder().getAbsolutePath() + "/metastore_db"; 55 | String warehouseUri = String.format(HIVE_WAREHOUSE_URI_FORMAT, warehouseDir); 56 | 57 | HiveConf.setHiveSiteLocation(classLoader.getResource(HiveCatalog.HIVE_SITE_FILE)); 58 | HiveConf hiveConf = new HiveConf(); 59 | hiveConf.setVar( 60 | HiveConf.ConfVars.METASTOREWAREHOUSE, 61 | TEMPORARY_FOLDER.newFolder("hive_warehouse").getAbsolutePath()); 62 | hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, warehouseUri); 63 | return hiveConf; 64 | } catch (IOException e) { 65 | throw new CatalogException("Failed to create test HiveConf to HiveCatalog.", e); 66 | } 67 | } 68 | 69 | public static void deleteTemporaryFolder() { 70 | TEMPORARY_FOLDER.delete(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/PolicyManagerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink; 20 | 21 | import com.hw.security.flink.enums.DataMaskType; 22 | 23 | import org.junit.Test; 24 | 25 | import java.util.Collections; 26 | 27 | import static org.assertj.core.api.AssertionsForClassTypes.assertThat; 28 | 29 | /** 30 | * @description: PolicyManagerTest 31 | * @author: HamaWhite 32 | */ 33 | public class PolicyManagerTest { 34 | 35 | private final PolicyManager policyManager = new PolicyManager(); 36 | 37 | @Test 38 | public void testGetDataMaskType() { 39 | DataMaskType ret = policyManager.getDataMaskType("MASK_HASH"); 40 | 41 | assertThat(ret).isNotNull(); 42 | assertThat(ret.getItemId()).isEqualTo(4L); 43 | assertThat(ret.getName()).isEqualTo("MASK_HASH"); 44 | assertThat(ret.getLabel()).isEqualTo("Hash"); 45 | assertThat(ret.getDescription()).isEqualTo("Hash the value"); 46 | assertThat(ret.getTransformer()).isEqualTo("mask_hash({col})"); 47 | assertThat(ret.getDataMaskOptions()).isEqualTo(Collections.emptyMap()); 48 | } 49 | } -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/SuiteTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink; 20 | 21 | import com.hw.security.flink.common.CommonTest; 22 | import com.hw.security.flink.execute.ExecuteDataMaskTest; 23 | import com.hw.security.flink.execute.ExecuteRowFilterTest; 24 | import com.hw.security.flink.execute.MixedExecuteTest; 25 | import com.hw.security.flink.rewrite.MixedRewriteTest; 26 | import com.hw.security.flink.rewrite.RewriteDataMaskTest; 27 | import com.hw.security.flink.rewrite.RewriteRowFilterTest; 28 | 29 | import org.junit.Ignore; 30 | import org.junit.runner.RunWith; 31 | import org.junit.runners.Suite; 32 | 33 | /** 34 | * Add the @Ignore annotation and run it manually 35 | * 36 | * @description: SuiteTest 37 | * @author: HamaWhite 38 | */ 39 | @Ignore 40 | @RunWith(Suite.class) 41 | @Suite.SuiteClasses({CommonTest.class, 42 | PolicyManagerTest.class, 43 | RewriteRowFilterTest.class, 44 | RewriteDataMaskTest.class, 45 | MixedRewriteTest.class, 46 | ExecuteRowFilterTest.class, 47 | ExecuteDataMaskTest.class, 48 | MixedExecuteTest.class}) 49 | public class SuiteTest { 50 | /* 51 | * The entry class of the test suite is just to organize the test classes together for testing, without any test 52 | * methods. 53 | */ 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/basic/AbstractBasicTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.basic; 20 | 21 | import com.hw.security.flink.PolicyManager; 22 | import com.hw.security.flink.SecurityContext; 23 | import com.hw.security.flink.policy.DataMaskPolicy; 24 | import com.hw.security.flink.policy.RowFilterPolicy; 25 | 26 | import org.apache.flink.table.catalog.hive.HiveCatalog; 27 | import org.apache.flink.types.Row; 28 | import org.junit.AfterClass; 29 | import org.junit.BeforeClass; 30 | import org.slf4j.Logger; 31 | import org.slf4j.LoggerFactory; 32 | 33 | import apache.flink.table.catalog.hive.HiveTestUtils; 34 | 35 | import java.util.List; 36 | import java.util.stream.Collectors; 37 | 38 | import static org.assertj.core.api.Assertions.assertThat; 39 | import static org.junit.Assert.assertEquals; 40 | 41 | /** 42 | * Start the local hive metastore and build a test for the Hive catalog. 43 | * 44 | * @author: HamaWhite 45 | */ 46 | public abstract class AbstractBasicTest { 47 | 48 | private static final Logger LOG = LoggerFactory.getLogger(AbstractBasicTest.class); 49 | 50 | protected static final String CATALOG_NAME = "hive"; 51 | protected static final String DATABASE = "default"; 52 | private static final String HIVE_VERSION = "3.1.2"; 53 | 54 | protected static final String USER_A = "user_A"; 55 | protected static final String USER_B = "user_B"; 56 | 57 | protected static final String TABLE_ORDERS = "orders"; 58 | protected static final String TABLE_PRODUCTS = "products"; 59 | protected static final String TABLE_SHIPMENTS = "shipments"; 60 | 61 | protected static PolicyManager policyManager; 62 | protected static SecurityContext securityContext; 63 | private static HiveCatalog hiveCatalog; 64 | 65 | @BeforeClass 66 | public static void setup() { 67 | policyManager = new PolicyManager(); 68 | // use hive catalog, so that flink can use hive's data masking function, such as mask_hash, mask_first_n 69 | hiveCatalog = HiveTestUtils.createHiveCatalog(CATALOG_NAME, DATABASE, HIVE_VERSION); 70 | hiveCatalog.open(); 71 | securityContext = new SecurityContext(policyManager); 72 | securityContext.useCatalog(hiveCatalog); 73 | 74 | securityContext.execute(String.format("LOAD MODULE hive WITH ('hive-version' = '%s')", HIVE_VERSION)); 75 | } 76 | 77 | @AfterClass 78 | public static void closeCatalog() { 79 | if (hiveCatalog != null) { 80 | hiveCatalog.close(); 81 | } 82 | HiveTestUtils.deleteTemporaryFolder(); 83 | } 84 | 85 | public static RowFilterPolicy rowFilterPolicy(String username, String tableName, String condition) { 86 | return new RowFilterPolicy(username, CATALOG_NAME, DATABASE, tableName, condition); 87 | } 88 | 89 | public static DataMaskPolicy dataMaskPolicy(String username, String tableName, String columnName, 90 | String condition) { 91 | return new DataMaskPolicy(username, CATALOG_NAME, DATABASE, tableName, columnName, condition); 92 | } 93 | 94 | protected void execute(String sql, Object[][] expected) { 95 | List rowList = securityContext.execute(sql, expected.length); 96 | assertExecuteResult(expected, rowList); 97 | } 98 | 99 | protected void executeRowFilter(String username, String sql, Object[][] expected) { 100 | List rowList = securityContext.executeRowFilter(username, sql, expected.length); 101 | assertExecuteResult(expected, rowList); 102 | } 103 | 104 | protected void executeDataMask(String username, String sql, Object[][] expected) { 105 | List rowList = securityContext.executeDataMask(username, sql, expected.length); 106 | assertExecuteResult(expected, rowList); 107 | } 108 | 109 | protected void mixedExecute(String username, String sql, Object[][] expected) { 110 | List rowList = securityContext.mixedExecute(username, sql, expected.length); 111 | assertExecuteResult(expected, rowList); 112 | } 113 | 114 | protected void assertExecuteResult(Object[][] expectedArray, List actualList) { 115 | Object[][] actualArray = actualList.stream() 116 | .map(e -> { 117 | Object[] array = new Object[e.getArity()]; 118 | for (int pos = 0; pos < e.getArity(); pos++) { 119 | array[pos] = e.getField(pos); 120 | } 121 | return array; 122 | }).collect(Collectors.toList()) 123 | .toArray(new Object[0][0]); 124 | 125 | assertThat(actualArray).isEqualTo(expectedArray); 126 | } 127 | 128 | protected void rewriteRowFilter(String username, String inputSql, String expectedSql) { 129 | String resultSql = securityContext.rewriteRowFilter(username, inputSql); 130 | assertRewriteResult(inputSql, expectedSql, resultSql); 131 | } 132 | 133 | protected void rewriteDataMask(String username, String inputSql, String expectedSql) { 134 | String resultSql = securityContext.rewriteDataMask(username, inputSql); 135 | assertRewriteResult(inputSql, expectedSql, resultSql); 136 | } 137 | 138 | protected void mixedRewrite(String username, String inputSql, String expectedSql) { 139 | String resultSql = securityContext.mixedRewrite(username, inputSql); 140 | assertRewriteResult(inputSql, expectedSql, resultSql); 141 | } 142 | 143 | protected void assertRewriteResult(String inputSql, String expectedSql, String resultSql) { 144 | inputSql = minifySql(inputSql); 145 | expectedSql = minifySql(expectedSql); 146 | 147 | resultSql = resultSql.replace("\n", " ").replace("`", ""); 148 | LOG.info("Input SQL: {}", inputSql); 149 | LOG.info("Result SQL: {}\n", resultSql); 150 | assertEquals(expectedSql, resultSql); 151 | } 152 | 153 | /** 154 | * Simplify some problems with indentation and spaces 155 | */ 156 | private String minifySql(String sql) { 157 | return sql.replaceAll("\\s+", " ") 158 | .replace(" ,", ",") 159 | .replace("( ", "(") 160 | .replace(" )", ")") 161 | .trim(); 162 | } 163 | 164 | /** 165 | * Create mysql cdc table orders 166 | */ 167 | protected static void createTableOfOrders() { 168 | securityContext.execute("DROP TABLE IF EXISTS " + TABLE_ORDERS); 169 | 170 | securityContext.execute("CREATE TABLE IF NOT EXISTS " + TABLE_ORDERS + " (" + 171 | " order_id INT PRIMARY KEY NOT ENFORCED ," + 172 | " order_date TIMESTAMP(0) ," + 173 | " customer_name STRING ," + 174 | " product_id INT ," + 175 | " price DECIMAL(10, 5) ," + 176 | " order_status BOOLEAN ," + 177 | " region STRING " + 178 | ") WITH ( " + 179 | " 'connector' = 'mysql-cdc' ," + 180 | " 'hostname' = '192.168.90.150' ," + 181 | " 'port' = '3306' ," + 182 | " 'username' = 'root' ," + 183 | " 'password' = 'root@123456' ," + 184 | " 'server-time-zone' = 'Asia/Shanghai' ," + 185 | " 'database-name' = 'demo' ," + 186 | " 'table-name' = '" + TABLE_ORDERS + "' " + 187 | ")"); 188 | } 189 | 190 | /** 191 | * Create mysql cdc table products 192 | */ 193 | protected static void createTableOfProducts() { 194 | securityContext.execute("DROP TABLE IF EXISTS " + TABLE_PRODUCTS); 195 | 196 | securityContext.execute("CREATE TABLE IF NOT EXISTS " + TABLE_PRODUCTS + " (" + 197 | " id INT PRIMARY KEY NOT ENFORCED ," + 198 | " name STRING ," + 199 | " description STRING " + 200 | ") WITH ( " + 201 | " 'connector' = 'mysql-cdc' ," + 202 | " 'hostname' = '192.168.90.150' ," + 203 | " 'port' = '3306' ," + 204 | " 'username' = 'root' ," + 205 | " 'password' = 'root@123456' ," + 206 | " 'server-time-zone' = 'Asia/Shanghai' ," + 207 | " 'database-name' = 'demo' ," + 208 | " 'table-name' = '" + TABLE_PRODUCTS + "' " + 209 | ")"); 210 | } 211 | 212 | /** 213 | * Create mysql cdc table shipments 214 | */ 215 | protected static void createTableOfShipments() { 216 | securityContext.execute("DROP TABLE IF EXISTS " + TABLE_SHIPMENTS); 217 | 218 | securityContext.execute("CREATE TABLE IF NOT EXISTS " + TABLE_SHIPMENTS + " (" + 219 | " shipment_id INT PRIMARY KEY NOT ENFORCED ," + 220 | " order_id INT ," + 221 | " origin STRING ," + 222 | " destination STRING ," + 223 | " is_arrived BOOLEAN " + 224 | ") WITH ( " + 225 | " 'connector' = 'mysql-cdc' ," + 226 | " 'hostname' = '192.168.90.150' ," + 227 | " 'port' = '3306' ," + 228 | " 'username' = 'root' ," + 229 | " 'password' = 'root@123456' ," + 230 | " 'server-time-zone' = 'Asia/Shanghai' ," + 231 | " 'database-name' = 'demo' ," + 232 | " 'table-name' = '" + TABLE_SHIPMENTS + "' " + 233 | ")"); 234 | } 235 | 236 | /** 237 | * Create mysql cdc table print_sink 238 | */ 239 | protected static void createTableOfPrintSink() { 240 | securityContext.execute("DROP TABLE IF EXISTS print_sink "); 241 | 242 | securityContext.execute("CREATE TABLE IF NOT EXISTS print_sink (" + 243 | " order_id INT PRIMARY KEY NOT ENFORCED ," + 244 | " order_date TIMESTAMP(0) ," + 245 | " customer_name STRING ," + 246 | " product_id INT ," + 247 | " price DECIMAL(10, 5) ," + 248 | " order_status BOOLEAN ," + 249 | " region STRING " + 250 | ") WITH ( " + 251 | " 'connector' = 'print' " + 252 | ")"); 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/common/CommonTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.common; 20 | 21 | import com.google.common.collect.ImmutableList; 22 | import com.hw.security.flink.basic.AbstractBasicTest; 23 | import com.hw.security.flink.model.ColumnEntity; 24 | import com.hw.security.flink.model.TableEntity; 25 | 26 | import org.apache.flink.table.catalog.ObjectIdentifier; 27 | import org.apache.flink.types.Row; 28 | import org.junit.BeforeClass; 29 | import org.junit.Test; 30 | 31 | import java.util.List; 32 | 33 | import static org.assertj.core.api.Assertions.assertThat; 34 | import static org.assertj.core.api.Assertions.atIndex; 35 | import static org.junit.Assert.assertEquals; 36 | 37 | /** 38 | * @description: CommonTest 39 | * @author: HamaWhite 40 | */ 41 | public class CommonTest extends AbstractBasicTest { 42 | 43 | @BeforeClass 44 | public static void createTable() { 45 | // create mysql cdc table orders 46 | createTableOfOrders(); 47 | } 48 | 49 | @Test 50 | public void testGetTable() { 51 | ObjectIdentifier tableIdentifier = ObjectIdentifier.of(CATALOG_NAME, DATABASE, TABLE_ORDERS); 52 | TableEntity actual = securityContext.getTable(tableIdentifier); 53 | List columnList = ImmutableList.of( 54 | new ColumnEntity("order_id", "INT"), 55 | new ColumnEntity("order_date", "TIMESTAMP(0)"), 56 | new ColumnEntity("customer_name", "STRING"), 57 | new ColumnEntity("product_id", "INT"), 58 | new ColumnEntity("price", "DECIMAL(10, 5)"), 59 | new ColumnEntity("order_status", "BOOLEAN"), 60 | new ColumnEntity("region", "STRING")); 61 | TableEntity expected = new TableEntity(tableIdentifier, columnList); 62 | assertEquals(expected, actual); 63 | } 64 | 65 | /** 66 | * Call the system function that comes with Hive in FlinkSQL (under the default database), 67 | *

so that Hive UDF can be reused when desensitizing Flink SQL data. 68 | * 69 | *

Note: 70 | *

    71 | *
  1. Ranger's masking strategy is also implemented by calling Hive's UDF. 72 | *
  2. Uppercase letters are converted to "X" 73 | *
  3. Lowercase letters are converted to "x" 74 | *
  4. Numbers are converted to "n" 75 | *
76 | */ 77 | @Test 78 | public void testHiveSystemFunction() { 79 | executeHiveFunction("select mask('hive-HDFS-8765-4321')", "xxxx-XXXX-nnnn-nnnn"); 80 | executeHiveFunction("select mask_first_n('hive-HDFS-8765-4321', 4)", "xxxx-HDFS-8765-4321"); 81 | executeHiveFunction("select mask_last_n('hive-HDFS-8765-4321', 4)", "hive-HDFS-8765-nnnn"); 82 | executeHiveFunction("select mask_show_first_n('hive-HDFS-8765-4321', 4)", "hive-XXXX-nnnn-nnnn"); 83 | executeHiveFunction("select mask_show_last_n('hive-HDFS-8765-4321', 4)", "xxxx-XXXX-nnnn-4321"); 84 | executeHiveFunction("select mask_hash('flink')", 85 | "7f025323639628aa5e5d24bd56f43317552b140c71406d0eb5a555671bd534d2"); 86 | } 87 | 88 | private void executeHiveFunction(String sql, String result) { 89 | List rowList = securityContext.execute(sql); 90 | assertThat(rowList).isNotNull() 91 | .extracting(e -> e.getField(0)) 92 | .contains(result, atIndex(0)); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/execute/ExecuteDataMaskTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.execute; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | 23 | import org.junit.BeforeClass; 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | /** 28 | * Execute SQL based on data mask. 29 | * 30 | *

Note: Depending on the external Mysql environment, you can run it manually. 31 | * 32 | * @author: HamaWhite 33 | */ 34 | @Ignore 35 | public class ExecuteDataMaskTest extends AbstractBasicTest { 36 | 37 | @BeforeClass 38 | public static void init() { 39 | // create mysql cdc table orders 40 | createTableOfOrders(); 41 | 42 | // create print sink table print_sink 43 | createTableOfPrintSink(); 44 | 45 | // add data mask policies 46 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_ORDERS, "customer_name", "MASK")); 47 | policyManager.addPolicy(dataMaskPolicy(USER_B, TABLE_ORDERS, "customer_name", "MASK_SHOW_FIRST_4")); 48 | } 49 | 50 | /** 51 | * Execute without data mask 52 | */ 53 | @Test 54 | public void testExecute() { 55 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 56 | 57 | Object[][] expected = { 58 | {10001, "Jack", 102, "beijing"}, 59 | {10002, "Sally", 105, "beijing"}, 60 | {10003, "Edward", 106, "hangzhou"}, 61 | {10004, "John", 103, "hangzhou"}, 62 | {10005, "Edward", 104, "shanghai"}, 63 | {10006, "Jack", 103, "shanghai"} 64 | }; 65 | execute(sql, expected); 66 | } 67 | 68 | /** 69 | * User A view the customer_name after mask 70 | */ 71 | @Test 72 | public void testExecuteByUserA() { 73 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 74 | 75 | Object[][] expected = { 76 | {10001, "Xxxx", 102, "beijing"}, 77 | {10002, "Xxxxx", 105, "beijing"}, 78 | {10003, "Xxxxxx", 106, "hangzhou"}, 79 | {10004, "Xxxx", 103, "hangzhou"}, 80 | {10005, "Xxxxxx", 104, "shanghai"}, 81 | {10006, "Xxxx", 103, "shanghai"} 82 | }; 83 | executeDataMask(USER_A, sql, expected); 84 | } 85 | 86 | /** 87 | * User B view the customer_name after mask_show_first_4 88 | */ 89 | @Test 90 | public void testExecuteByUserB() { 91 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 92 | 93 | Object[][] expected = { 94 | {10001, "Jack", 102, "beijing"}, 95 | {10002, "Sallx", 105, "beijing"}, 96 | {10003, "Edwaxx", 106, "hangzhou"}, 97 | {10004, "John", 103, "hangzhou"}, 98 | {10005, "Edwaxx", 104, "shanghai"}, 99 | {10006, "Jack", 103, "shanghai"} 100 | }; 101 | executeDataMask(USER_B, sql, expected); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/execute/ExecuteRowFilterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.execute; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | 23 | import org.junit.BeforeClass; 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | /** 28 | * Execute SQL based on row filter. 29 | * 30 | *

Note: Depending on the external Mysql environment, you can run it manually. 31 | * 32 | * @author: HamaWhite 33 | */ 34 | @Ignore 35 | public class ExecuteRowFilterTest extends AbstractBasicTest { 36 | 37 | @BeforeClass 38 | public static void init() { 39 | // create mysql cdc table orders 40 | createTableOfOrders(); 41 | 42 | // add row filter policies 43 | policyManager.addPolicy(rowFilterPolicy(USER_A, TABLE_ORDERS, "region = 'beijing'")); 44 | policyManager.addPolicy(rowFilterPolicy(USER_B, TABLE_ORDERS, "region = 'hangzhou'")); 45 | } 46 | 47 | /** 48 | * Execute without row-level filter 49 | */ 50 | @Test 51 | public void testExecute() { 52 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 53 | 54 | Object[][] expected = { 55 | {10001, "Jack", 102, "beijing"}, 56 | {10002, "Sally", 105, "beijing"}, 57 | {10003, "Edward", 106, "hangzhou"}, 58 | {10004, "John", 103, "hangzhou"}, 59 | {10005, "Edward", 104, "shanghai"}, 60 | {10006, "Jack", 103, "shanghai"} 61 | }; 62 | execute(sql, expected); 63 | } 64 | 65 | /** 66 | * User A can only view data in the beijing region 67 | */ 68 | @Test 69 | public void testExecuteByUserA() { 70 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 71 | 72 | Object[][] expected = { 73 | {10001, "Jack", 102, "beijing"}, 74 | {10002, "Sally", 105, "beijing"} 75 | }; 76 | executeRowFilter(USER_A, sql, expected); 77 | } 78 | 79 | /** 80 | * User B can only view data in the hangzhou region 81 | */ 82 | @Test 83 | public void testExecuteByUserB() { 84 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 85 | 86 | Object[][] expected = { 87 | {10003, "Edward", 106, "hangzhou"}, 88 | {10004, "John", 103, "hangzhou"} 89 | }; 90 | executeRowFilter(USER_B, sql, expected); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/execute/MixedExecuteTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.execute; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | 23 | import org.junit.BeforeClass; 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | /** 28 | * Execute the single sql with user row-level filter and data mask policies. 29 | * 30 | *

Note: Depending on the external Mysql environment, you can run it manually. 31 | * 32 | * @author: HamaWhite 33 | */ 34 | @Ignore 35 | public class MixedExecuteTest extends AbstractBasicTest { 36 | 37 | @BeforeClass 38 | public static void init() { 39 | // create mysql cdc table orders 40 | createTableOfOrders(); 41 | 42 | // add row filter policies 43 | policyManager.addPolicy(rowFilterPolicy(USER_A, TABLE_ORDERS, "region = 'beijing'")); 44 | policyManager.addPolicy(rowFilterPolicy(USER_B, TABLE_ORDERS, "region = 'hangzhou'")); 45 | 46 | // add data mask policies 47 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_ORDERS, "customer_name", "MASK")); 48 | policyManager.addPolicy(dataMaskPolicy(USER_B, TABLE_ORDERS, "customer_name", "MASK_SHOW_FIRST_4")); 49 | } 50 | 51 | /** 52 | * Execute without row-level filter or data mask 53 | */ 54 | @Test 55 | public void testExecute() { 56 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 57 | 58 | Object[][] expected = { 59 | {10001, "Jack", 102, "beijing"}, 60 | {10002, "Sally", 105, "beijing"}, 61 | {10003, "Edward", 106, "hangzhou"}, 62 | {10004, "John", 103, "hangzhou"}, 63 | {10005, "Edward", 104, "shanghai"}, 64 | {10006, "Jack", 103, "shanghai"} 65 | }; 66 | execute(sql, expected); 67 | } 68 | 69 | /** 70 | * User A can only view data in the beijing region and the customer_name after mask 71 | */ 72 | @Test 73 | public void testExecuteByUserA() { 74 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 75 | 76 | Object[][] expected = { 77 | {10001, "Xxxx", 102, "beijing"}, 78 | {10002, "Xxxxx", 105, "beijing"} 79 | }; 80 | mixedExecute(USER_A, sql, expected); 81 | } 82 | 83 | /** 84 | * User B can only view data in the hangzhou region and the customer_name after mask_show_first_4 85 | */ 86 | @Test 87 | public void testExecuteByUserB() { 88 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 89 | 90 | Object[][] expected = { 91 | {10003, "Edwaxx", 106, "hangzhou"}, 92 | {10004, "John", 103, "hangzhou"} 93 | }; 94 | mixedExecute(USER_B, sql, expected); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/rewrite/MixedRewriteTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.rewrite; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | 23 | import org.junit.BeforeClass; 24 | import org.junit.Test; 25 | 26 | /** 27 | * Add row-level filter and column masking, then return new SQL. 28 | * 29 | * @author: HamaWhite 30 | */ 31 | public class MixedRewriteTest extends AbstractBasicTest { 32 | 33 | @BeforeClass 34 | public static void init() { 35 | // create mysql cdc table orders 36 | createTableOfOrders(); 37 | 38 | // create mysql cdc table products 39 | createTableOfProducts(); 40 | 41 | // add row filter policy 42 | policyManager.addPolicy(rowFilterPolicy(USER_A, TABLE_ORDERS, "region = 'beijing'")); 43 | policyManager.addPolicy(rowFilterPolicy(USER_A, TABLE_PRODUCTS, "name = 'hammer'")); 44 | 45 | // add data mask policies 46 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_ORDERS, "customer_name", "MASK")); 47 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_PRODUCTS, "name", "MASK_SHOW_LAST_4")); 48 | } 49 | 50 | /** 51 | * Only select 52 | */ 53 | @Test 54 | public void testSelect() { 55 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 56 | 57 | // the alias is equal to the table name orders 58 | String expected = "SELECT " + 59 | " orders.order_id ," + 60 | " orders.customer_name ," + 61 | " orders.product_id ," + 62 | " orders.region " + 63 | "FROM ( " + 64 | " SELECT " + 65 | " order_id ," + 66 | " order_date ," + 67 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 68 | " product_id ," + 69 | " price ," + 70 | " order_status ," + 71 | " region " + 72 | " FROM " + 73 | " hive.default.orders " + 74 | " ) AS orders " + 75 | "WHERE " + 76 | " orders.region = 'beijing' "; 77 | 78 | mixedRewrite(USER_A, sql, expected); 79 | } 80 | 81 | /** 82 | * The two tables of products and orders are left joined. 83 | *

products have an alias p, order has no alias 84 | */ 85 | @Test 86 | public void testJoin() { 87 | String sql = "SELECT " + 88 | " orders.order_id ," + 89 | " orders.customer_name ," + 90 | " orders.product_id ," + 91 | " orders.region ," + 92 | " p.name ," + 93 | " p.description " + 94 | "FROM " + 95 | " orders " + 96 | "LEFT JOIN " + 97 | " products AS p " + 98 | "ON " + 99 | " orders.product_id = p.id "; 100 | 101 | String expected = "SELECT " + 102 | " orders.order_id ," + 103 | " orders.customer_name ," + 104 | " orders.product_id ," + 105 | " orders.region ," + 106 | " p.name ," + 107 | " p.description " + 108 | "FROM ( " + 109 | " SELECT " + 110 | " order_id ," + 111 | " order_date ," + 112 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 113 | " product_id ," + 114 | " price ," + 115 | " order_status ," + 116 | " region " + 117 | " FROM " + 118 | " hive.default.orders " + 119 | " ) AS orders " + 120 | "LEFT JOIN ( " + 121 | " SELECT " + 122 | " id ," + 123 | " CAST(mask_show_last_n(name, 4, 'x', 'x', 'x', -1, '1') AS STRING) AS name, " + 124 | " description " + 125 | " FROM " + 126 | " hive.default.products " + 127 | " ) AS p " + 128 | "ON " + 129 | " orders.product_id = p.id " + 130 | "WHERE " + 131 | " orders.region = 'beijing' " + 132 | " AND p.name = 'hammer' "; 133 | 134 | mixedRewrite(USER_A, sql, expected); 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/rewrite/RewriteDataMaskTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.rewrite; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | 23 | import org.junit.BeforeClass; 24 | import org.junit.Test; 25 | 26 | /** 27 | * Rewrite SQL based on data mask conditions 28 | * 29 | * @author: HamaWhite 30 | */ 31 | public class RewriteDataMaskTest extends AbstractBasicTest { 32 | 33 | @BeforeClass 34 | public static void init() { 35 | // create mysql cdc table orders 36 | createTableOfOrders(); 37 | 38 | // create mysql cdc table products 39 | createTableOfProducts(); 40 | 41 | // create mysql cdc table shipments 42 | createTableOfShipments(); 43 | 44 | // create print sink table print_sink 45 | createTableOfPrintSink(); 46 | 47 | // add data mask policies 48 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_ORDERS, "customer_name", "MASK")); 49 | policyManager.addPolicy(dataMaskPolicy(USER_A, TABLE_PRODUCTS, "name", "MASK_SHOW_LAST_4")); 50 | policyManager.addPolicy(dataMaskPolicy(USER_B, TABLE_ORDERS, "customer_name", "MASK_SHOW_FIRST_4")); 51 | } 52 | 53 | /** 54 | * Only select 55 | */ 56 | @Test 57 | public void testSelect() { 58 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 59 | 60 | // the alias is equal to the table name orders 61 | String expected = "SELECT " + 62 | " orders.order_id ," + 63 | " orders.customer_name ," + 64 | " orders.product_id ," + 65 | " orders.region " + 66 | "FROM ( " + 67 | " SELECT " + 68 | " order_id ," + 69 | " order_date ," + 70 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 71 | " product_id ," + 72 | " price ," + 73 | " order_status ," + 74 | " region " + 75 | " FROM " + 76 | " hive.default.orders " + 77 | " ) AS orders "; 78 | 79 | rewriteDataMask(USER_A, sql, expected); 80 | } 81 | 82 | /** 83 | * Only select with alias 84 | */ 85 | @Test 86 | public void testSelectWithAlias() { 87 | String sql = "SELECT o.order_id, o.customer_name, o.product_id, o.region FROM orders AS o"; 88 | 89 | // the alias is equal to 'o' 90 | String expected = "SELECT " + 91 | " o.order_id ," + 92 | " o.customer_name ," + 93 | " o.product_id ," + 94 | " o.region " + 95 | "FROM ( " + 96 | " SELECT " + 97 | " order_id ," + 98 | " order_date ," + 99 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 100 | " product_id ," + 101 | " price ," + 102 | " order_status ," + 103 | " region " + 104 | " FROM " + 105 | " hive.default.orders " + 106 | " ) AS o "; 107 | 108 | rewriteDataMask(USER_A, sql, expected); 109 | } 110 | 111 | /** 112 | * Different users configure different policies 113 | */ 114 | @Test 115 | public void testSelectDiffUser() { 116 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 117 | 118 | String expectedUserA = "SELECT " + 119 | " orders.order_id ," + 120 | " orders.customer_name ," + 121 | " orders.product_id ," + 122 | " orders.region " + 123 | "FROM ( " + 124 | " SELECT " + 125 | " order_id ," + 126 | " order_date ," + 127 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 128 | " product_id ," + 129 | " price ," + 130 | " order_status ," + 131 | " region " + 132 | " FROM " + 133 | " hive.default.orders " + 134 | " ) AS orders "; 135 | 136 | String expectedUserB = "SELECT " + 137 | " orders.order_id ," + 138 | " orders.customer_name ," + 139 | " orders.product_id ," + 140 | " orders.region " + 141 | "FROM ( " + 142 | " SELECT " + 143 | " order_id ," + 144 | " order_date ," + 145 | " CAST(mask_show_first_n(customer_name, 4, 'x', 'x', 'x', -1, '1') AS STRING) " + 146 | " AS customer_name ," + 147 | " product_id ," + 148 | " price ," + 149 | " order_status ," + 150 | " region " + 151 | " FROM " + 152 | " hive.default.orders " + 153 | " ) AS orders "; 154 | 155 | rewriteDataMask(USER_A, sql, expectedUserA); 156 | rewriteDataMask(USER_B, sql, expectedUserB); 157 | } 158 | 159 | /** 160 | * The two tables of products and orders are left joined. 161 | *

products have an alias p, order has no alias 162 | */ 163 | @Test 164 | public void testJoin() { 165 | String sql = "SELECT " + 166 | " orders.order_id ," + 167 | " orders.customer_name ," + 168 | " orders.product_id ," + 169 | " orders.region ," + 170 | " p.name ," + 171 | " p.description " + 172 | "FROM " + 173 | " orders " + 174 | "LEFT JOIN " + 175 | " products AS p " + 176 | "ON " + 177 | " orders.product_id = p.id "; 178 | 179 | String expected = "SELECT " + 180 | " orders.order_id ," + 181 | " orders.customer_name ," + 182 | " orders.product_id ," + 183 | " orders.region ," + 184 | " p.name ," + 185 | " p.description " + 186 | "FROM ( " + 187 | " SELECT " + 188 | " order_id ," + 189 | " order_date ," + 190 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 191 | " product_id ," + 192 | " price ," + 193 | " order_status ," + 194 | " region " + 195 | " FROM " + 196 | " hive.default.orders " + 197 | " ) AS orders " + 198 | "LEFT JOIN ( " + 199 | " SELECT " + 200 | " id ," + 201 | " CAST(mask_show_last_n(name, 4, 'x', 'x', 'x', -1, '1') AS STRING) AS name, " + 202 | " description " + 203 | " FROM " + 204 | " hive.default.products " + 205 | " ) AS p " + 206 | "ON " + 207 | " orders.product_id = p.id "; 208 | 209 | rewriteDataMask(USER_A, sql, expected); 210 | } 211 | 212 | /** 213 | * The products and orders two tables are left joined, and the left table comes from a sub-query 214 | */ 215 | @Test 216 | public void testJoinSubQueryWhere() { 217 | String sql = "SELECT " + 218 | " o.order_id ," + 219 | " o.customer_name ," + 220 | " o.product_id ," + 221 | " o.region ," + 222 | " p.name ," + 223 | " p.description " + 224 | "FROM ( " + 225 | " SELECT " + 226 | " order_id ," + 227 | " customer_name ," + 228 | " price ," + 229 | " product_id ," + 230 | " region " + 231 | " FROM " + 232 | " orders " + 233 | " WHERE order_status = FALSE " + 234 | " ) AS o " + 235 | "LEFT JOIN " + 236 | " products AS p " + 237 | "ON " + 238 | " o.product_id = p.id " + 239 | "WHERE " + 240 | " o.price > 45.0 " + 241 | " OR o.customer_name = 'John' "; 242 | 243 | String expected = "SELECT " + 244 | " o.order_id ," + 245 | " o.customer_name ," + 246 | " o.product_id ," + 247 | " o.region ," + 248 | " p.name ," + 249 | " p.description " + 250 | "FROM ( " + 251 | " SELECT " + 252 | " orders.order_id ," + 253 | " orders.customer_name ," + 254 | " orders.price ," + 255 | " orders.product_id ," + 256 | " orders.region " + 257 | " FROM ( " + 258 | " SELECT " + 259 | " order_id ," + 260 | " order_date ," + 261 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 262 | " product_id ," + 263 | " price ," + 264 | " order_status ," + 265 | " region " + 266 | " FROM " + 267 | " hive.default.orders " + 268 | " ) AS orders " + 269 | " WHERE " + 270 | " orders.order_status = FALSE " + 271 | " ) AS o " + 272 | "LEFT JOIN ( " + 273 | " SELECT " + 274 | " id ," + 275 | " CAST(mask_show_last_n(name, 4, 'x', 'x', 'x', -1, '1') AS STRING) AS name ," + 276 | " description " + 277 | " FROM " + 278 | " hive.default.products " + 279 | " ) AS p " + 280 | "ON " + 281 | " o.product_id = p.id " + 282 | "WHERE " + 283 | " o.price > 45.0 " + 284 | " OR o.customer_name = 'John' "; 285 | 286 | rewriteDataMask(USER_A, sql, expected); 287 | } 288 | 289 | /** 290 | * The order table order, the product table products, and the logistics information table 291 | * shipments are associated with the three tables 292 | */ 293 | @Test 294 | public void testThreeJoin() { 295 | String sql = "SELECT " + 296 | " o.order_id ," + 297 | " o.customer_name ," + 298 | " o.product_id ," + 299 | " o.region ," + 300 | " p.name ," + 301 | " p.description ," + 302 | " s.shipment_id ," + 303 | " s.origin ," + 304 | " s.destination ," + 305 | " s.is_arrived " + 306 | "FROM " + 307 | " orders AS o " + 308 | "LEFT JOIN " + 309 | " products AS p " + 310 | "ON " + 311 | " o.product_id = p.id " + 312 | "LEFT JOIN " + 313 | " shipments AS s " + 314 | "ON " + 315 | " o.order_id = s.order_id "; 316 | 317 | String expected = "SELECT " + 318 | " o.order_id ," + 319 | " o.customer_name ," + 320 | " o.product_id ," + 321 | " o.region ," + 322 | " p.name ," + 323 | " p.description ," + 324 | " s.shipment_id ," + 325 | " s.origin ," + 326 | " s.destination ," + 327 | " s.is_arrived " + 328 | "FROM ( " + 329 | " SELECT " + 330 | " order_id ," + 331 | " order_date ," + 332 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 333 | " product_id ," + 334 | " price ," + 335 | " order_status ," + 336 | " region " + 337 | " FROM " + 338 | " hive.default.orders " + 339 | " ) AS o " + 340 | "LEFT JOIN ( " + 341 | " SELECT " + 342 | " id ," + 343 | " CAST(mask_show_last_n(name, 4, 'x', 'x', 'x', -1, '1') AS STRING) AS name, " + 344 | " description " + 345 | " FROM " + 346 | " hive.default.products " + 347 | " ) AS p " + 348 | "ON " + 349 | " o.product_id = p.id " + 350 | "LEFT JOIN " + 351 | " hive.default.shipments AS s " + 352 | "ON " + 353 | " o.order_id = s.order_id "; 354 | 355 | rewriteDataMask(USER_A, sql, expected); 356 | } 357 | 358 | /** 359 | * insert-select. 360 | * insert into print table from mysql cdc stream table. 361 | */ 362 | @Test 363 | public void testInsertSelect() { 364 | String sql = "INSERT INTO print_sink SELECT * FROM orders"; 365 | 366 | // the following () is what Calcite would automatically add 367 | String expected = "INSERT INTO print_sink ( " + 368 | "SELECT " + 369 | " orders.order_id ," + 370 | " orders.order_date ," + 371 | " orders.customer_name ," + 372 | " orders.product_id ," + 373 | " orders.price ," + 374 | " orders.order_status ," + 375 | " orders.region " + 376 | "FROM ( " + 377 | " SELECT " + 378 | " order_id ," + 379 | " order_date ," + 380 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 381 | " product_id ," + 382 | " price ," + 383 | " order_status ," + 384 | " region " + 385 | " FROM " + 386 | " hive.default.orders " + 387 | " ) AS orders " + 388 | ") "; 389 | 390 | rewriteDataMask(USER_A, sql, expected); 391 | } 392 | 393 | /** 394 | * insert-select-select. 395 | * insert into print table from mysql cdc stream table. 396 | */ 397 | @Test 398 | public void testInsertSelectSelect() { 399 | String sql = "INSERT INTO print_sink SELECT * FROM (SELECT * FROM orders) AS o"; 400 | 401 | // the following () is what Calcite would automatically add 402 | String expected = "INSERT INTO print_sink ( " + 403 | "SELECT " + 404 | " o.order_id ," + 405 | " o.order_date ," + 406 | " o.customer_name ," + 407 | " o.product_id ," + 408 | " o.price ," + 409 | " o.order_status ," + 410 | " o.region " + 411 | "FROM ( " + 412 | " SELECT " + 413 | " orders.order_id ," + 414 | " orders.order_date ," + 415 | " orders.customer_name ," + 416 | " orders.product_id ," + 417 | " orders.price ," + 418 | " orders.order_status ," + 419 | " orders.region " + 420 | " FROM ( " + 421 | " SELECT " + 422 | " order_id ," + 423 | " order_date ," + 424 | " CAST(mask(customer_name) AS STRING) AS customer_name ," + 425 | " product_id ," + 426 | " price ," + 427 | " order_status ," + 428 | " region " + 429 | " FROM " + 430 | " hive.default.orders " + 431 | " ) AS orders " + 432 | " ) AS o " + 433 | ") "; 434 | 435 | rewriteDataMask(USER_A, sql, expected); 436 | } 437 | } 438 | -------------------------------------------------------------------------------- /src/test/java/com/hw/security/flink/rewrite/RewriteRowFilterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.hw.security.flink.rewrite; 20 | 21 | import com.hw.security.flink.basic.AbstractBasicTest; 22 | import com.hw.security.flink.policy.RowFilterPolicy; 23 | 24 | import org.junit.BeforeClass; 25 | import org.junit.Test; 26 | 27 | /** 28 | * Rewrite SQL based on row filter conditions 29 | * 30 | * @author: HamaWhite 31 | */ 32 | public class RewriteRowFilterTest extends AbstractBasicTest { 33 | 34 | @BeforeClass 35 | public static void init() { 36 | // create mysql cdc table orders 37 | createTableOfOrders(); 38 | 39 | // create mysql cdc table products 40 | createTableOfProducts(); 41 | 42 | // create mysql cdc table shipments 43 | createTableOfShipments(); 44 | 45 | // create print sink table print_sink 46 | createTableOfPrintSink(); 47 | 48 | // add row filter policies 49 | policyManager.addPolicy(rowFilterPolicy(USER_A, TABLE_ORDERS, "region = 'beijing'")); 50 | policyManager.addPolicy(rowFilterPolicy(USER_B, TABLE_ORDERS, "region = 'hangzhou'")); 51 | } 52 | 53 | /** 54 | * Only select 55 | */ 56 | @Test 57 | public void testSelect() { 58 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 59 | 60 | String expected = "SELECT " + 61 | " orders.order_id ," + 62 | " orders.customer_name ," + 63 | " orders.product_id ," + 64 | " orders.region " + 65 | "FROM " + 66 | " hive.default.orders AS orders " + 67 | "WHERE " + 68 | " orders.region = 'beijing' "; 69 | 70 | rewriteRowFilter(USER_A, sql, expected); 71 | } 72 | 73 | /** 74 | * Different users configure different policies 75 | */ 76 | @Test 77 | public void testSelectDiffUser() { 78 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders"; 79 | 80 | String expectedUserA = "SELECT " + 81 | " orders.order_id ," + 82 | " orders.customer_name ," + 83 | " orders.product_id ," + 84 | " orders.region " + 85 | "FROM " + 86 | " hive.default.orders AS orders " + 87 | "WHERE " + 88 | " orders.region = 'beijing' "; 89 | 90 | String expectedUserB = "SELECT " + 91 | " orders.order_id ," + 92 | " orders.customer_name ," + 93 | " orders.product_id ," + 94 | " orders.region " + 95 | "FROM " + 96 | " hive.default.orders AS orders " + 97 | "WHERE " + 98 | " orders.region = 'hangzhou' "; 99 | 100 | rewriteRowFilter(USER_A, sql, expectedUserA); 101 | rewriteRowFilter(USER_B, sql, expectedUserB); 102 | } 103 | 104 | /** 105 | * Where there is a condition 106 | */ 107 | @Test 108 | public void testSelectWhere() { 109 | String sql = "SELECT order_id, customer_name, product_id, region FROM orders WHERE price > 45.0"; 110 | 111 | String expected = "SELECT " + 112 | " orders.order_id ," + 113 | " orders.customer_name ," + 114 | " orders.product_id ," + 115 | " orders.region " + 116 | "FROM " + 117 | " hive.default.orders AS orders " + 118 | "WHERE " + 119 | " orders.price > 45.0 " + 120 | " AND orders.region = 'beijing' "; 121 | 122 | rewriteRowFilter(USER_A, sql, expected); 123 | } 124 | 125 | /** 126 | * Where there is complex condition, add a pair of parentheses to the existing multiple where 127 | * conditions 128 | */ 129 | @Test 130 | public void testSelectComplexWhere() { 131 | String sql = "SELECT " + 132 | " order_id ," + 133 | " customer_name ," + 134 | " product_id ," + 135 | " region " + 136 | "FROM " + 137 | " orders " + 138 | "WHERE " + 139 | " price > 45.0 " + 140 | " OR customer_name = 'John' "; 141 | 142 | String expected = "SELECT " + 143 | " orders.order_id ," + 144 | " orders.customer_name ," + 145 | " orders.product_id ," + 146 | " orders.region " + 147 | "FROM " + 148 | " hive.default.orders AS orders " + 149 | "WHERE " + 150 | " (orders.price > 45.0 OR orders.customer_name = 'John') " + 151 | " AND orders.region = 'beijing' "; 152 | 153 | rewriteRowFilter(USER_A, sql, expected); 154 | } 155 | 156 | /** 157 | * With group by clause 158 | */ 159 | @Test 160 | public void testSelectWhereGroupBy() { 161 | String sql = "SELECT " + 162 | " customer_name ," + 163 | " count(*) AS cnt " + 164 | "FROM " + 165 | " orders " + 166 | "WHERE " + 167 | " price > 45.0 " + 168 | "GROUP BY " + 169 | " customer_name "; 170 | 171 | String expected = "SELECT " + 172 | " orders.customer_name ," + 173 | " COUNT(*) AS cnt " + 174 | "FROM " + 175 | " hive.default.orders AS orders " + 176 | "WHERE " + 177 | " orders.price > 45.0 " + 178 | " AND orders.region = 'beijing' " + 179 | "GROUP BY " + 180 | " orders.customer_name "; 181 | 182 | rewriteRowFilter(USER_A, sql, expected); 183 | } 184 | 185 | /** 186 | * The two tables of products and orders are left joined 187 | */ 188 | @Test 189 | public void testJoin() { 190 | String sql = "SELECT " + 191 | " o.order_id ," + 192 | " o.customer_name ," + 193 | " o.product_id ," + 194 | " o.region ," + 195 | " p.name ," + 196 | " p.description " + 197 | "FROM " + 198 | " orders AS o " + 199 | "LEFT JOIN " + 200 | " products AS p " + 201 | "ON " + 202 | " o.product_id = p.id "; 203 | 204 | String expected = "SELECT " + 205 | " o.order_id ," + 206 | " o.customer_name ," + 207 | " o.product_id ," + 208 | " o.region ," + 209 | " p.name ," + 210 | " p.description " + 211 | "FROM " + 212 | " hive.default.orders AS o " + 213 | "LEFT JOIN " + 214 | " hive.default.products AS p " + 215 | "ON " + 216 | " o.product_id = p.id " + 217 | "WHERE " + 218 | " o.region = 'beijing' "; 219 | 220 | rewriteRowFilter(USER_A, sql, expected); 221 | } 222 | 223 | /** 224 | * The two tables of products and orders are left joined, but without alias 225 | */ 226 | @Test 227 | public void testJoinWithoutAlias() { 228 | String sql = "SELECT " + 229 | " orders.order_id ," + 230 | " orders.customer_name ," + 231 | " orders.product_id ," + 232 | " orders.region ," + 233 | " products.name ," + 234 | " products.description " + 235 | "FROM " + 236 | " orders " + 237 | "LEFT JOIN " + 238 | " products " + 239 | "ON " + 240 | " orders.product_id = products.id "; 241 | 242 | String expected = "SELECT " + 243 | " orders.order_id ," + 244 | " orders.customer_name ," + 245 | " orders.product_id ," + 246 | " orders.region ," + 247 | " products.name ," + 248 | " products.description " + 249 | "FROM " + 250 | " hive.default.orders AS orders " + 251 | "LEFT JOIN " + 252 | " hive.default.products AS products " + 253 | "ON " + 254 | " orders.product_id = products.id " + 255 | "WHERE " + 256 | " orders.region = 'beijing' "; 257 | 258 | rewriteRowFilter(USER_A, sql, expected); 259 | } 260 | 261 | /** 262 | * The two tables of products and orders are left joined, and there is a condition 263 | */ 264 | @Test 265 | public void testJoinWhere() { 266 | String sql = "SELECT " + 267 | " o.order_id ," + 268 | " o.customer_name ," + 269 | " o.product_id ," + 270 | " o.region ," + 271 | " p.name ," + 272 | " p.description " + 273 | "FROM " + 274 | " orders AS o " + 275 | "LEFT JOIN " + 276 | " products AS p " + 277 | "ON " + 278 | " o.product_id = p.id " + 279 | "WHERE " + 280 | " o.price > 45.0 " + 281 | " OR o.customer_name = 'John' "; 282 | 283 | String expected = "SELECT " + 284 | " o.order_id ," + 285 | " o.customer_name ," + 286 | " o.product_id ," + 287 | " o.region ," + 288 | " p.name ," + 289 | " p.description " + 290 | "FROM " + 291 | " hive.default.orders AS o " + 292 | "LEFT JOIN " + 293 | " hive.default.products AS p " + 294 | "ON " + 295 | " o.product_id = p.id " + 296 | "WHERE " + 297 | " (o.price > 45.0 OR o.customer_name = 'John') " + 298 | " AND o.region = 'beijing' "; 299 | 300 | rewriteRowFilter(USER_A, sql, expected); 301 | } 302 | 303 | /** 304 | * The products and orders two tables are left joined, and the left table comes from a sub-query 305 | */ 306 | @Test 307 | public void testJoinSubQueryWhere() { 308 | String sql = "SELECT " + 309 | " o.order_id ," + 310 | " o.customer_name ," + 311 | " o.product_id ," + 312 | " o.region ," + 313 | " p.name ," + 314 | " p.description " + 315 | "FROM ( " + 316 | " SELECT " + 317 | " order_id ," + 318 | " customer_name ," + 319 | " price ," + 320 | " product_id ," + 321 | " region " + 322 | " FROM " + 323 | " orders " + 324 | " WHERE order_status = FALSE " + 325 | " ) AS o " + 326 | "LEFT JOIN " + 327 | " products AS p " + 328 | "ON " + 329 | " o.product_id = p.id " + 330 | "WHERE " + 331 | " o.price > 45.0 " + 332 | " OR o.customer_name = 'John' "; 333 | 334 | String expected = "SELECT " + 335 | " o.order_id ," + 336 | " o.customer_name ," + 337 | " o.product_id ," + 338 | " o.region ," + 339 | " p.name ," + 340 | " p.description " + 341 | "FROM ( " + 342 | " SELECT " + 343 | " orders.order_id ," + 344 | " orders.customer_name ," + 345 | " orders.price ," + 346 | " orders.product_id ," + 347 | " orders.region " + 348 | " FROM " + 349 | " hive.default.orders AS orders " + 350 | " WHERE " + 351 | " orders.order_status = FALSE " + 352 | " AND orders.region = 'beijing' " + 353 | " ) AS o " + 354 | "LEFT JOIN " + 355 | " hive.default.products AS p " + 356 | "ON " + 357 | " o.product_id = p.id " + 358 | "WHERE " + 359 | " o.price > 45.0 " + 360 | " OR o.customer_name = 'John' "; 361 | 362 | rewriteRowFilter(USER_A, sql, expected); 363 | } 364 | 365 | /** 366 | * The two tables of orders and products are joined, and both have row-level filter conditions 367 | */ 368 | @Test 369 | public void testJoinWithBothPermissions() { 370 | RowFilterPolicy policy = rowFilterPolicy(USER_A, TABLE_PRODUCTS, "name = 'hammer'"); 371 | // add policy 372 | policyManager.addPolicy(policy); 373 | 374 | String sql = "SELECT " + 375 | " o.order_id ," + 376 | " o.customer_name ," + 377 | " o.product_id ," + 378 | " o.region ," + 379 | " p.name ," + 380 | " p.description " + 381 | "FROM " + 382 | " orders AS o " + 383 | "LEFT JOIN " + 384 | " products AS p " + 385 | "ON " + 386 | " o.product_id = p.id "; 387 | 388 | String expected = "SELECT " + 389 | " o.order_id ," + 390 | " o.customer_name ," + 391 | " o.product_id ," + 392 | " o.region ," + 393 | " p.name ," + 394 | " p.description " + 395 | "FROM " + 396 | " hive.default.orders AS o " + 397 | "LEFT JOIN " + 398 | " hive.default.products AS p " + 399 | "ON " + 400 | " o.product_id = p.id " + 401 | "WHERE " + 402 | " o.region = 'beijing' " + 403 | " AND p.name = 'hammer' "; 404 | 405 | rewriteRowFilter(USER_A, sql, expected); 406 | 407 | // remove policy 408 | policyManager.removePolicy(policy); 409 | } 410 | 411 | /** 412 | * The order table order, the product table products, and the logistics information table 413 | * shipments are associated with the three tables 414 | */ 415 | @Test 416 | public void testThreeJoin() { 417 | RowFilterPolicy policy1 = rowFilterPolicy(USER_A, TABLE_PRODUCTS, "name = 'hammer'"); 418 | RowFilterPolicy policy2 = rowFilterPolicy(USER_A, TABLE_SHIPMENTS, "is_arrived = FALSE"); 419 | 420 | // add policies 421 | policyManager.addPolicy(policy1); 422 | policyManager.addPolicy(policy2); 423 | 424 | String sql = "SELECT " + 425 | " o.order_id ," + 426 | " o.customer_name ," + 427 | " o.product_id ," + 428 | " o.region ," + 429 | " p.name ," + 430 | " p.description ," + 431 | " s.shipment_id ," + 432 | " s.origin ," + 433 | " s.destination ," + 434 | " s.is_arrived " + 435 | "FROM " + 436 | " orders AS o " + 437 | "LEFT JOIN " + 438 | " products AS p " + 439 | "ON " + 440 | " o.product_id = p.id " + 441 | "LEFT JOIN " + 442 | " shipments AS s " + 443 | "ON " + 444 | " o.order_id = s.order_id "; 445 | 446 | String expected = "SELECT " + 447 | " o.order_id ," + 448 | " o.customer_name ," + 449 | " o.product_id ," + 450 | " o.region ," + 451 | " p.name ," + 452 | " p.description ," + 453 | " s.shipment_id ," + 454 | " s.origin ," + 455 | " s.destination ," + 456 | " s.is_arrived " + 457 | "FROM " + 458 | " hive.default.orders AS o " + 459 | "LEFT JOIN " + 460 | " hive.default.products AS p " + 461 | "ON " + 462 | " o.product_id = p.id " + 463 | "LEFT JOIN " + 464 | " hive.default.shipments AS s " + 465 | "ON " + 466 | " o.order_id = s.order_id " + 467 | "WHERE " + 468 | " o.region = 'beijing' " + 469 | " AND p.name = 'hammer' " + 470 | " AND s.is_arrived = FALSE "; 471 | 472 | rewriteRowFilter(USER_A, sql, expected); 473 | 474 | // remove policies 475 | policyManager.removePolicy(policy1); 476 | policyManager.removePolicy(policy2); 477 | } 478 | 479 | /** 480 | * insert-select. 481 | * insert into print table from mysql cdc stream table. 482 | */ 483 | @Test 484 | public void testInsertSelect() { 485 | String sql = "INSERT INTO print_sink SELECT * FROM orders"; 486 | 487 | // the following () is what Calcite would automatically add 488 | String expected = "INSERT INTO print_sink ( " + 489 | "SELECT " + 490 | " orders.order_id ," + 491 | " orders.order_date ," + 492 | " orders.customer_name ," + 493 | " orders.product_id ," + 494 | " orders.price ," + 495 | " orders.order_status ," + 496 | " orders.region " + 497 | "FROM " + 498 | " hive.default.orders AS orders " + 499 | "WHERE " + 500 | " orders.region = 'beijing' " + 501 | ") "; 502 | 503 | rewriteRowFilter(USER_A, sql, expected); 504 | } 505 | 506 | /** 507 | * insert-select-select. 508 | * insert into print table from mysql cdc stream table. 509 | */ 510 | @Test 511 | public void testInsertSelectSelect() { 512 | String sql = "INSERT INTO print_sink SELECT * FROM (SELECT * FROM orders) AS o"; 513 | 514 | // the following () is what Calcite would automatically add 515 | String expected = "INSERT INTO print_sink ( " + 516 | "SELECT " + 517 | " o.order_id ," + 518 | " o.order_date ," + 519 | " o.customer_name ," + 520 | " o.product_id ," + 521 | " o.price ," + 522 | " o.order_status ," + 523 | " o.region " + 524 | "FROM ( " + 525 | " SELECT " + 526 | " orders.order_id ," + 527 | " orders.order_date ," + 528 | " orders.customer_name ," + 529 | " orders.product_id ," + 530 | " orders.price ," + 531 | " orders.order_status ," + 532 | " orders.region " + 533 | " FROM " + 534 | " hive.default.orders AS orders " + 535 | " WHERE " + 536 | " orders.region = 'beijing' " + 537 | " ) AS o " + 538 | ") "; 539 | 540 | rewriteRowFilter(USER_A, sql, expected); 541 | } 542 | } -------------------------------------------------------------------------------- /src/test/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | hive.metastore.integral.jdo.pushdown 25 | true 26 | 27 | 28 | 29 | hive.metastore.schema.verification 30 | false 31 | 32 | 33 | 34 | hive.metastore.client.capability.check 35 | false 36 | 37 | 38 | 39 | datanucleus.schema.autoCreateTables 40 | true 41 | 42 | 43 | 44 | datanucleus.schema.autoCreateAll 45 | true 46 | 47 | 48 | 49 | common-key 50 | common-val 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /style/license-header: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | -------------------------------------------------------------------------------- /style/spotless-formatter.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | --------------------------------------------------------------------------------