├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── _config.yml ├── conf ├── hive-site.xml ├── ranger-hive-audit.xml └── ranger-hive-security.xml ├── docs ├── building-spark-authorizer.md ├── img │ └── ranger-prolcy-details.png ├── install_plugin.md ├── manually_test_cases.md ├── ranger_authorization.md ├── spark_sql_authorization.md ├── sql_std_based_authorization.md └── storage_based_authorization.md ├── pom.xml ├── scalastyle-config.xml └── src └── main └── scala ├── com └── githup │ └── yaooqinn │ └── spark │ └── authorizer │ └── Logging.scala └── org └── apache ├── ranger └── authorization │ └── spark │ └── authorizer │ ├── RangerSparkSQLExtension.scala │ └── package.scala └── spark └── sql ├── catalyst └── optimizer │ ├── Authorizable.scala │ ├── Authorizer.scala │ ├── AuthorizerExtension.scala │ └── HivePrivilegeObject.scala └── hive ├── AuthzUtils.scala ├── PrivilegesBuilder.scala └── client └── AuthzImpl.scala /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Configurations 13 | 2. Environments 14 | 3. Operations 15 | 4. See error 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Additional context** 24 | Add any other context about the problem here. 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or screenshots about the feature request here. 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *#*# 2 | *.#* 3 | *.iml 4 | *.ipr 5 | *.iws 6 | *.pyc 7 | *.pyo 8 | *.swp 9 | *~ 10 | .DS_Store 11 | .cache 12 | .classpath 13 | .ensime 14 | .ensime_cache/ 15 | .ensime_lucene 16 | .generated-mima* 17 | .idea/ 18 | .idea_modules/ 19 | .project 20 | .pydevproject 21 | .scala_dependencies 22 | .settings 23 | /lib/ 24 | R-unit-tests.log 25 | R/unit-tests.out 26 | R/cran-check.out 27 | R/pkg/vignettes/sparkr-vignettes.html 28 | R/pkg/tests/fulltests/Rplots.pdf 29 | build/*.jar 30 | build/apache-maven* 31 | build/scala* 32 | build/zinc* 33 | cache 34 | checkpoint 35 | conf/*.cmd 36 | conf/*.conf 37 | conf/*.properties 38 | conf/*.sh 39 | conf/java-opts 40 | conf/slaves 41 | dependency-reduced-pom.xml 42 | derby.log 43 | dev/create-release/*final 44 | dev/create-release/*txt 45 | dev/pr-deps/ 46 | dist/ 47 | docs/_site 48 | docs/api 49 | sql/docs 50 | sql/site 51 | lib_managed/ 52 | lint-r-report.log 53 | log/ 54 | logs/ 55 | out/ 56 | project/boot/ 57 | project/build/target/ 58 | project/plugins/lib_managed/ 59 | project/plugins/project/build.properties 60 | project/plugins/src_managed/ 61 | project/plugins/target/ 62 | python/lib/pyspark.zip 63 | python/deps 64 | python/pyspark/python 65 | reports/ 66 | scalastyle-on-compile.generated.xml 67 | scalastyle-output.xml 68 | scalastyle.txt 69 | spark-*-bin-*.tgz 70 | spark-tests.log 71 | src_managed/ 72 | streaming-tests.log 73 | target/ 74 | unit-tests.log 75 | work/ 76 | 77 | # For Hive 78 | TempStatsStore/ 79 | metastore/ 80 | metastore_db/ 81 | sql/hive-thriftserver/test_warehouses 82 | warehouse/ 83 | spark-warehouse/ 84 | 85 | # For R session data 86 | .RData 87 | .RHistory 88 | .Rhistory 89 | *.Rproj 90 | *.Rproj.* 91 | 92 | .Rproj.user 93 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | 5 | deploy: 6 | provider: pages 7 | skip_cleanup: true 8 | github_token: $GITHUB_TOKEN 9 | email: yaooqinn@hotmail.com 10 | name: Kent Yao 11 | on: 12 | branch: master 13 | 14 | install: 15 | - mvn install -q -DskipTests=true -Dmaven.javadoc.skip=true -B -V 16 | 17 | script: 18 | - mvn package -q -Dmaven.javadoc.skip=true -B -V 19 | - mvn package -Pspark-2.2 -q -Dmaven.javadoc.skip=true -B -V 20 | - mvn package -Pspark-2.3 -q -Dmaven.javadoc.skip=true -B -V 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | 204 | ======================================================================= 205 | Apache Spark Subcomponents: 206 | 207 | The Apache Spark project contains subcomponents with separate copyright 208 | notices and license terms. Your use of the source code for the these 209 | subcomponents is subject to the terms and conditions of the following 210 | licenses. 211 | 212 | 213 | ======================================================================== 214 | For heapq (pyspark/heapq3.py): 215 | ======================================================================== 216 | 217 | See license/LICENSE-heapq.txt 218 | 219 | ======================================================================== 220 | For SnapTree: 221 | ======================================================================== 222 | 223 | See license/LICENSE-SnapTree.txt 224 | 225 | ======================================================================== 226 | For jbcrypt: 227 | ======================================================================== 228 | 229 | See license/LICENSE-jbcrypt.txt 230 | 231 | ======================================================================== 232 | BSD-style licenses 233 | ======================================================================== 234 | 235 | The following components are provided under a BSD-style license. See project link for details. 236 | The text of each license is also included at licenses/LICENSE-[project].txt. 237 | 238 | (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) 239 | (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model) 240 | (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) 241 | (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/) 242 | (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) 243 | (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org) 244 | (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org) 245 | (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net) 246 | (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer) 247 | (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer) 248 | (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License) 249 | (Interpreter classes (all .scala files in repl/src/main/scala 250 | except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), 251 | and for SerializableMapWrapper in JavaUtils.scala) 252 | (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/) 253 | (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/) 254 | (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/) 255 | (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/) 256 | (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/) 257 | (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) 258 | (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) 259 | (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) 260 | (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo) 261 | (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog) 262 | (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf) 263 | (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf) 264 | (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net) 265 | (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net) 266 | (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/) 267 | (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/) 268 | (BSD licence) sbt and sbt-launch-lib.bash 269 | (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE) 270 | (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE) 271 | (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE) 272 | 273 | ======================================================================== 274 | MIT licenses 275 | ======================================================================== 276 | 277 | The following components are provided under the MIT License. See project link for details. 278 | The text of each license is also included at licenses/LICENSE-[project].txt. 279 | 280 | (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org) 281 | (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org) 282 | (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org) 283 | (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org) 284 | (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/) 285 | (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt) 286 | (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org) 287 | (MIT License) jquery (https://jquery.org/license/) 288 | (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs) 289 | (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot) 290 | (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3) 291 | (MIT License) sorttable (https://github.com/stuartlangridge/sorttable) 292 | (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE) 293 | (MIT License) datatables (http://datatables.net/license) 294 | (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE) 295 | (MIT License) cookies (http://code.google.com/p/cookies/wiki/License) 296 | (MIT License) blockUI (http://jquery.malsup.com/block/) 297 | (MIT License) RowsGroup (http://datatables.net/license/mit) 298 | (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) 299 | (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE) 300 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Spark Authorizer [![Build Status](https://travis-ci.org/yaooqinn/spark-authorizer.svg?branch=master)](https://travis-ci.org/yaooqinn/spark-authorizer) [![HitCount](http://hits.dwyl.io/yaooqinn/spark-authorizer.svg)](http://hits.dwyl.io/yaooqinn/spark-authorizer) 3 | 4 | **Spark Authorizer** provides you with *SQL Standard Based Authorization* for [Apache Spark™](http://spark.apache.org) 5 | as same as [SQL Standard Based Hive Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization). 6 | While you are using Spark SQL or Dataset/DataFrame API to load data from tables embedded with [Apache Hive™](https://hive.apache.org) metastore, 7 | this library provides row/column level fine-grained access controls by [Apache Ranger™](https://ranger.apache.org) or Hive SQL Standard Based Authorization. 8 | 9 | Security is one of fundamental features for enterprise adoption. [Apache Ranger™](https://ranger.apache.org) offers many security plugins for many Hadoop ecosystem components, 10 | such as HDFS, Hive, HBase, Solr and Sqoop2. However, [Apache Spark™](http://spark.apache.org) is not counted in yet. 11 | When a secured HDFS cluster is used as a data warehouse accessed by various users and groups via different applications wrote by Spark and Hive, 12 | it is very difficult to guarantee data management in a consistent way. Apache Spark users visit data warehouse only 13 | with Storage based access controls offered by HDFS. This library shares [Ranger Hive plugin](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation#ApacheRanger0.5.0Installation-InstallingApacheHive(1.2.0)) 14 | with Hive to help Spark talking to Ranger Admin. 15 | 16 | Please refer to [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html) to see what spark-authorizer supports. 17 | 18 | ## Quick Start 19 | 20 | ### Step 1. Install Spark Authorizer 21 | 22 | Include this package in your Spark Applications using: 23 | #### spark-shell, pyspark, or spark-submit 24 | ```bash 25 | > $SPARK_HOME/bin/spark-shell --packages yaooqinn:spark-authorizer:2.1.1 26 | ``` 27 | #### sbt 28 | If you use the sbt-spark-package plugin, in your sbt build file, add: 29 | ```sbtshell 30 | spDependencies += "yaooqinn/spark-authorizer:2.1.1" 31 | ``` 32 | Otherwise, 33 | ```sbtshell 34 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven" 35 | 36 | libraryDependencies += "yaooqinn" % "spark-authorizer" % "2.1.1" 37 | ``` 38 | 39 | #### Maven 40 | In your pom.xml, add: 41 | ```xml 42 | 43 | 44 | 45 | yaooqinn 46 | spark-authorizer 47 | 2.1.1 48 | 49 | 50 | 51 | 52 | 53 | SparkPackagesRepo 54 | http://dl.bintray.com/spark-packages/maven 55 | 56 | 57 | ``` 58 | 59 | #### Manully 60 | If you [Building Spark Authorizer](https://yaooqinn.github.io/spark-authorizer/docs/building-spark-authorizer.html) manully, you can deploy via: 61 | ```bash 62 | cp target/spark-authorizer-.jar $SPARK_HOME/jars 63 | ``` 64 | 65 | ### Step 2. Install & Configure Ranger Hive Plugin 66 | 67 | Please refer to [Install Ranger Hive Plugin For Apache Spark](https://yaooqinn.github.io/spark-authorizer/docs/install_plugin.html) to learn how to deploy the plugin jars to Apache Spark and set Ranger/Hive configurations. 68 | 69 | ### Step 3. Enable Spark Authorizer 70 | 71 | In `$SPARK_HOME/conf/spark-defaults.conf`, add: 72 | 73 | ```scala 74 | spark.sql.extensions=org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension 75 | ``` 76 | **NOTE** `spark.sql.extensions` is only supported by Spark 2.2.x and later, for Spark 2.1.x please use [Version: 1.1.3.spark2.1](https://github.com/yaooqinn/spark-authorizer/tree/78f7d818db773c3567c636575845a413ac560c90) and check the previous doc. 77 | 78 | ## Interactive Spark Shell 79 | 80 | The easiest way to start using Spark is through the Scala shell: 81 | 82 | ```shell 83 | bin/spark-shell --master yarn --proxy-user hzyaoqin 84 | ``` 85 | 86 | ## Suffer for the Authorization Pain 87 | 88 | We create a ranger policy as below: 89 | ![ranger-policy-details](docs/img/ranger-prolcy-details.png) 90 | 91 | Check Privilege with some simple cases. 92 | 93 | #### Show databases 94 | 95 | ```sql 96 | scala> spark.sql("show databases").show 97 | +--------------+ 98 | | databaseName| 99 | +--------------+ 100 | | default| 101 | | spark_test_db| 102 | | tpcds_10g_ext| 103 | +--------------+ 104 | ``` 105 | 106 | #### Switch database 107 | 108 | ```sql 109 | scala> spark.sql("use spark_test_db").show 110 | 17/12/08 17:06:17 ERROR optimizer.Authorizer: 111 | +===============================+ 112 | |Spark SQL Authorization Failure| 113 | |-------------------------------| 114 | |Permission denied: user [hzyaoqin] does not have [USE] privilege on [spark_test_db] 115 | |-------------------------------| 116 | |Spark SQL Authorization Failure| 117 | +===============================+ 118 | ``` 119 | Oops... 120 | 121 | 122 | ```sql 123 | scala> spark.sql("use tpcds_10g_ext").show 124 | ++ 125 | || 126 | ++ 127 | ++ 128 | ``` 129 | LOL... 130 | 131 | 132 | ### Select 133 | ```sql 134 | scala> spark.sql("select cp_type from catalog_page limit 1").show 135 | 17/12/08 17:09:58 ERROR optimizer.Authorizer: 136 | +===============================+ 137 | |Spark SQL Authorization Failure| 138 | |-------------------------------| 139 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_type] 140 | |-------------------------------| 141 | |Spark SQL Authorization Failure| 142 | +===============================+ 143 | ``` 144 | Oops... 145 | 146 | ```sql 147 | scala> spark.sql("select * from call_center limit 1").show 148 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+ 149 | |cc_call_center_sk|cc_call_center_id|cc_rec_start_date|cc_rec_end_date|cc_closed_date_sk|cc_open_date_sk| cc_name|cc_class|cc_employees|cc_sq_ft|cc_hours| cc_manager|cc_mkt_id| cc_mkt_class| cc_mkt_desc|cc_market_manager|cc_division|cc_division_name|cc_company|cc_company_name|cc_street_number|cc_street_name|cc_street_type|cc_suite_number|cc_city| cc_county|cc_state|cc_zip| cc_country|cc_gmt_offset|cc_tax_percentage| 150 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+ 151 | | 1| AAAAAAAABAAAAAAA| 1998-01-01| null| null| 2450952|NY Metro| large| 2| 1138| 8AM-4PM|Bob Belcher| 6|More than other a...|Shared others cou...| Julius Tran| 3| pri| 6| cally| 730| Ash Hill| Boulevard| Suite 0| Midway|Williamson County| TN| 31904|United States| -5.00| 0.11| 152 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+ 153 | 154 | ``` 155 | 156 | LOL... 157 | 158 | ### Dataset/DataFrame 159 | 160 | ```scala 161 | scala> spark.read.table("catalog_page").limit(1).collect 162 | ``` 163 | ``` 164 | 17/12/11 14:46:33 ERROR optimizer.Authorizer: 165 | +===============================+ 166 | |Spark SQL Authorization Failure| 167 | |-------------------------------| 168 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_catalog_page_sk,cp_catalog_page_id,cp_promo_id,cp_start_date_sk,cp_end_date_sk,cp_department,cp_catalog_number,cp_catalog_page_number,cp_description,cp_type] 169 | |-------------------------------| 170 | |Spark SQL Authorization Failure| 171 | +===============================+ 172 | ``` 173 | Oops... 174 | 175 | ```scala 176 | scala> spark.read.table("call_center").limit(1).collect 177 | ``` 178 | ``` 179 | res3: Array[org.apache.spark.sql.Row] = Array([1,AAAAAAAABAAAAAAA,1998-01-01,null,null,2450952,NY Metro,large,2,1138,8AM-4PM,Bob Belcher,6,More than other authori,Shared others could not count fully dollars. New members ca,Julius Tran,3,pri,6,cally,730,Ash Hill,Boulevard,Suite 0,Midway,Williamson County,TN,31904,United States,-5.00,0.11]) 180 | ``` 181 | LOL... 182 | 183 | --- 184 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-leap-day 2 | -------------------------------------------------------------------------------- /conf/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | hive.security.authorization.enabled 24 | true 25 | 26 | 27 | 28 | hive.security.authorization.manager 29 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory 30 | 31 | 32 | 33 | hive.security.authenticator.manager 34 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator 35 | 36 | 37 | 38 | hive.conf.restricted.list 39 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager 40 | 41 | 42 | -------------------------------------------------------------------------------- /conf/ranger-hive-audit.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | xasecure.audit.is.enabled 24 | true 25 | 26 | 27 | 28 | xasecure.audit.destination.db 29 | false 30 | 31 | 32 | 33 | xasecure.audit.destination.db.jdbc.driver 34 | com.mysql.jdbc.Driver 35 | 36 | 37 | 38 | xasecure.audit.destination.db.jdbc.url 39 | jdbc:mysql://10.171.161.78/ranger 40 | 41 | 42 | 43 | xasecure.audit.destination.db.password 44 | rangeradmin 45 | 46 | 47 | 48 | xasecure.audit.destination.db.user 49 | rangeradmin 50 | 51 | 52 | -------------------------------------------------------------------------------- /conf/ranger-hive-security.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | ranger.plugin.hive.policy.rest.url 24 | ranger admin address like http://ranger-admin.org:6080 25 | 26 | 27 | 28 | ranger.plugin.hive.service.name 29 | a ranger hive service name 30 | 31 | 32 | 33 | ranger.plugin.hive.policy.cache.dir 34 | ./a ranger hive service name/policycache 35 | 36 | 37 | 38 | ranger.plugin.hive.policy.pollIntervalMs 39 | 5000 40 | 41 | 42 | 43 | ranger.plugin.hive.policy.source.impl 44 | org.apache.ranger.admin.client.RangerAdminRESTClient 45 | 46 | 47 | -------------------------------------------------------------------------------- /docs/building-spark-authorizer.md: -------------------------------------------------------------------------------- 1 | # Building Spark Authorizer 2 | 3 | Spark Authorizer is built using [Apache Maven](http://maven.apache.org). To build it, run: 4 | 5 | ```bash 6 | mvn clean package 7 | ``` 8 | 9 | Notes from Spark: 10 | > The Maven-based build is the build of reference for Apache Spark. 11 | Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+. 12 | Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0. 13 | 14 | So, I suggest you build this library using same Maven / Java / Scala. 15 | 16 | ## Building against different version of Apache Spark 17 | 18 | By default, spark authorizer is build against spark 2.2.2, which may be incompatible with other spark main branches. 19 | 20 | ```bash 21 | # build for spark 2.2.x 22 | maven clean package -Pspark-2.2 23 | ``` 24 | 25 | ```bash 26 | # build for spark 2.3.x 27 | maven clean package -Pspark-2.3 28 | ``` 29 | -------------------------------------------------------------------------------- /docs/img/ranger-prolcy-details.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yaooqinn/spark-authorizer/f575b0b8714bd86c8383af9d584aef5d0ed29c4e/docs/img/ranger-prolcy-details.png -------------------------------------------------------------------------------- /docs/install_plugin.md: -------------------------------------------------------------------------------- 1 | # Installing Apache Ranger Hive Plugin For Apache Spark 2 | 3 | This article illustrates how to install the Apache Ranger plugin which is made for Apache Hive to Apache Spark with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). We guarantee column/row level fine gained [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html). 4 | 5 | Apache Spark is built bundled with built-in Hive Metastore client(version 1.2.1.spark2) jars when `-Phive` is enabled. AFAIK, this version of Hive Metastore client is compatible with all Hive Metastore server 1.2.1 and higher versions. 6 | 7 | We **DO NOT** support configuring `spark.sql.hive.metastore.jars` to `maven` or a location of the jars used to instantiate the `HiveMetastoreClient`, `builtin` is the one and only option. 8 | 9 | Apache Ranger upgrades quite fast, one of the reasons may be to catch up with the higher Hive releases. AFAIK, Apache Ranger 0.6.x and higher versions do not support [1.2.1](https://issues.apache.org/jira/browse/RANGER-1056) anymore, accordingly, you may use Apache Ranger 0.5.x to avoid underlying pitfalls. 10 | 11 | An official installation guide of Apache Ranger 0.5.x can be found [here](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation). The remainder of this article will guide you how to install Apache Ranger Hive Plugin for Apache Spark, which is not mentioned in the official documentation. 12 | 13 | ## Building Apache Ranger 14 | 15 | 1. git clone git@github.com:apache/ranger.git 16 | 2. cd ranger 17 | 3. git fetch https://github.com/apache/ranger.git ranger-0.5.3-rc3:ranger-0.5.3 18 | 4. git checkout ranger-0.5.3 19 | 5. mvn clean compile package assembly:assembly install -Dmaven.test.skip=true 20 | 21 | If you failed to build the project, please refer to the instructions of the official doc to see if there are any prerequisites. 22 | 23 | If you successfully make the mvn command work, all archives of Ranger admin and plugins will be generated in `./target`, including `ranger-0.5.3-hive-plugin.tar.gz` which is exactly you need for next steps 24 | ``` 25 | -rw-r--r-- 1 kent hadoop 163667362 Oct 15 15:38 ranger-0.5.3-admin.tar.gz 26 | -rw-r--r-- 1 kent hadoop 164655504 Oct 15 15:38 ranger-0.5.3-admin.zip 27 | -rw-r--r-- 1 kent hadoop 16771141 Oct 15 15:36 ranger-0.5.3-hbase-plugin.tar.gz 28 | -rw-r--r-- 1 kent hadoop 16797394 Oct 15 15:36 ranger-0.5.3-hbase-plugin.zip 29 | -rw-r--r-- 1 kent hadoop 16722944 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.tar.gz 30 | -rw-r--r-- 1 kent hadoop 16747829 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.zip 31 | -rw-r--r-- 1 kent hadoop 16139126 Oct 15 15:35 ranger-0.5.3-hive-plugin.tar.gz 32 | -rw-r--r-- 1 kent hadoop 16165266 Oct 15 15:36 ranger-0.5.3-hive-plugin.zip 33 | -rw-r--r-- 1 kent hadoop 32975495 Oct 15 15:36 ranger-0.5.3-kafka-plugin.tar.gz 34 | -rw-r--r-- 1 kent hadoop 33012135 Oct 15 15:37 ranger-0.5.3-kafka-plugin.zip 35 | -rw-r--r-- 1 kent hadoop 71917257 Oct 15 15:38 ranger-0.5.3-kms.tar.gz 36 | -rw-r--r-- 1 kent hadoop 72005470 Oct 15 15:39 ranger-0.5.3-kms.zip 37 | -rw-r--r-- 1 kent hadoop 21298145 Oct 15 15:36 ranger-0.5.3-knox-plugin.tar.gz 38 | -rw-r--r-- 1 kent hadoop 21322990 Oct 15 15:36 ranger-0.5.3-knox-plugin.zip 39 | -rw-r--r-- 1 kent hadoop 34600 Oct 15 15:38 ranger-0.5.3-migration-util.tar.gz 40 | -rw-r--r-- 1 kent hadoop 38014 Oct 15 15:38 ranger-0.5.3-migration-util.zip 41 | -rw-r--r-- 1 kent hadoop 18485767 Oct 15 15:39 ranger-0.5.3-ranger-tools.tar.gz 42 | -rw-r--r-- 1 kent hadoop 18495143 Oct 15 15:39 ranger-0.5.3-ranger-tools.zip 43 | -rw-r--r-- 1 kent hadoop 22416054 Oct 15 15:37 ranger-0.5.3-solr-plugin.tar.gz 44 | -rw-r--r-- 1 kent hadoop 22441083 Oct 15 15:37 ranger-0.5.3-solr-plugin.zip 45 | -rw-r--r-- 1 kent hadoop 3606416 Oct 15 15:39 ranger-0.5.3-src.tar.gz 46 | -rw-r--r-- 1 kent hadoop 5481890 Oct 15 15:39 ranger-0.5.3-src.zip 47 | -rw-r--r-- 1 kent hadoop 34769024 Oct 15 15:36 ranger-0.5.3-storm-plugin.tar.gz 48 | -rw-r--r-- 1 kent hadoop 34788044 Oct 15 15:36 ranger-0.5.3-storm-plugin.zip 49 | -rw-r--r-- 1 kent hadoop 13512547 Oct 15 15:38 ranger-0.5.3-usersync.tar.gz 50 | -rw-r--r-- 1 kent hadoop 13534930 Oct 15 15:38 ranger-0.5.3-usersync.zip 51 | -rw-r--r-- 1 kent hadoop 15942158 Oct 15 15:37 ranger-0.5.3-yarn-plugin.tar.gz 52 | -rw-r--r-- 1 kent hadoop 15969320 Oct 15 15:37 ranger-0.5.3-yarn-plugin.zip 53 | ``` 54 | 55 | ## Applying Plugin to Apache Spark 56 | 57 | 1. cd target 58 | 2. tar zxf ranger-0.5.3-hive-plugin.tar.gz 59 | 3. cd ranger-0.5.3-hive-plugin/lib 60 | 61 | ``` 62 | drwxr-xr-x 2 kent hadoop 4096 Oct 16 12:34 ranger-hive-plugin-impl 63 | -rw-r--r-- 1 kent hadoop 16061 Oct 15 15:35 ranger-hive-plugin-shim-0.5.3.jar 64 | -rw-r--r-- 1 kent hadoop 16545 Oct 15 15:35 ranger-plugin-classloader-0.5.3.jar 65 | ``` 66 | 67 | Fistly, copy the above folds and files to `$SPARK_HOME/jars` 68 | 69 | ``` 70 | jersey-client-1.9.jar 71 | jersey-bundle-1.4.jar 72 | eclipselink-2.5.2.jar 73 | noggit-0.6.jar 74 | gson-2.2.4.jar 75 | httpclient-4.5.3.jar 76 | httpcore-4.4.6.jar 77 | httpmime-4.5.3.jar 78 | javax.persistence-2.1.0.jar 79 | mybatis-3.2.8.jar 80 | mysql-connector-java-5.1.39.jar 81 | ``` 82 | 83 | Secondly, add the jars listed above to `$SPARK_HOME/jars/ranger-hive-plugin-impl/` if missing. 84 | 85 | ## Configuring Ranger for Apache Spark 86 | 87 | 88 | Firstly, add the following configurations in `hive-site.xml` to enable Ranger Authorization. 89 | 90 | ``` 91 | 92 | hive.security.authorization.enabled 93 | true 94 | 95 | 96 | 97 | hive.security.authorization.manager 98 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory 99 | 100 | 101 | 102 | hive.security.authenticator.manager 103 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator 104 | 105 | 106 | 107 | hive.conf.restricted.list 108 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager 109 | 110 | ``` 111 | 112 | Secondly, create `ranger-hive-security.xml` in `$SPARK_HOME/conf` and add the following configurations for pointing to the right ranger admin server. 113 | 114 | ``` 115 | 116 | 117 | 118 | ranger.plugin.hive.policy.rest.url 119 | ranger admin address like http://ranger-admin.org:6080 120 | 121 | 122 | 123 | ranger.plugin.hive.service.name 124 | a ranger hive service name 125 | 126 | 127 | 128 | ranger.plugin.hive.policy.cache.dir 129 | ./a ranger hive service name/policycache 130 | 131 | 132 | 133 | ranger.plugin.hive.policy.pollIntervalMs 134 | 5000 135 | 136 | 137 | 138 | ranger.plugin.hive.policy.source.impl 139 | org.apache.ranger.admin.client.RangerAdminRESTClient 140 | 141 | 142 | 143 | ``` 144 | 145 | At last, create create `ranger-hive-audit.xml` in `$SPARK_HOME/conf` and add the following configurations to enable/disable auditing. 146 | 147 | ``` 148 | 149 | 150 | 151 | xasecure.audit.is.enabled 152 | true 153 | 154 | 155 | 156 | xasecure.audit.destination.db 157 | false 158 | 159 | 160 | 161 | xasecure.audit.destination.db.jdbc.driver 162 | com.mysql.jdbc.Driver 163 | 164 | 165 | 166 | xasecure.audit.destination.db.jdbc.url 167 | jdbc:mysql://10.171.161.78/ranger 168 | 169 | 170 | 171 | xasecure.audit.destination.db.password 172 | rangeradmin 173 | 174 | 175 | 176 | xasecure.audit.destination.db.user 177 | rangeradmin 178 | 179 | 180 | 181 | ``` 182 | 183 | Ranger Hive plugins should work well through `spark-authorizer`, when set `spark.sql.extensions`=`org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension` 184 | ## Additional Notes 185 | 186 | If you are using Apache Spark in `cluster` mode, the jar files under `$SPARK_HOME/jars/ranger-hive-plugin-impl/` will not be uploaded automatically. If you are not familiar with Spark source code and unable to make some modifications, I suggest you copy all files in `$SPARK_HOME/jars/ranger-hive-plugin-impl/` to `$SPARK_HOME/jars/` and **DELETE** `ranger-hive-plugin-shim-0.5.3.jar` in `$SPARK_HOME/jars/`. This works fine for your whole Spark application but with a tiny problem of Spark UI because of jersey jars confliction. 187 | 188 | Also for `cluster` mode Spark applications, `ranger.plugin.hive.policy.cache.dir` in `ranger-hive-security.xml` must be able to create on all NodeManager nodes for the Spark Driver could be generated anywhere. One convenient and effective way is to configure the relative path, such as, 189 | ``` 190 | 191 | ranger.plugin.hive.policy.cache.dir 192 | policycache 193 | 194 | ``` 195 | -------------------------------------------------------------------------------- /docs/manually_test_cases.md: -------------------------------------------------------------------------------- 1 | # Spark SQL Authorization Test Cases 2 | ## Prerequisites 3 | 4 | - Ranger Admin Server 5 | - setup ranger admin 6 | - setup some ranger policies 7 | - Ranger Hive Plugin 8 | - setup ranger hive plugin for spark 9 | - configure ranger plugin configurations to connect ranger admin 10 | - Hive Metastore 11 | - setup Hive Metastore server 12 | - setup ranger admin and spark connected rightly 13 | - Spark 14 | - maybe hdfs 15 | - maybe on yarn 16 | - spark-shell 17 | 18 | ```bash 19 | bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse 20 | ``` 21 | 22 | ```bash 23 | hadoop@hzadg-hadoop-dev2:~/spark-2.1.2-bin-hadoop2.7$ bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse 24 | 18/06/07 11:00:47 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 25 | Setting default log level to "WARN". 26 | To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 27 | 18/06/07 11:00:56 WARN SparkContext: Support for Java 7 is deprecated as of Spark 2.0.0 28 | 18/06/07 11:00:59 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded. 29 | 18/06/07 11:00:59 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist 30 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist 31 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist 32 | 18/06/07 11:01:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME. 33 | Spark context Web UI available at http://10.201.168.144:4040 34 | Spark context available as 'sc' (master = yarn, app id = application_1522381253211_0116). 35 | Spark session available as 'spark'. 36 | Welcome to 37 | ____ __ 38 | / __/__ ___ _____/ /__ 39 | _\ \/ _ \/ _ `/ __/ '_/ 40 | /___/ .__/\_,_/_/ /_/\_\ version 2.1.2 41 | /_/ 42 | 43 | Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_67) 44 | Type in expressions to have them evaluated. 45 | Type :help for more information. 46 | 47 | scala> sc.setLogLevel("info"); import org.apache.spark.sql.catalyst.optimizer.Authorizer; spark.experimental.extraOptimizations ++= Seq(Authorizer) 48 | 18/06/07 11:10:50 INFO SharedState: Warehouse path is '/user/hzyaoqin/warehouse'. 49 | 18/06/07 11:10:50 INFO HiveUtils: Initializing HiveMetastoreConnection version 1.2.1 using Spark classes. 50 | 18/06/07 11:10:51 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist 51 | 18/06/07 11:10:51 INFO metastore: Trying to connect to metastore with URI thrift://hzadg-hadoop-dev2.server.163.org:9083 52 | 18/06/07 11:10:51 INFO metastore: Connected to metastore. 53 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/d0216e65-2506-4d7c-8a7d-13f19fba55c0_resources 54 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0 55 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/hadoop/d0216e65-2506-4d7c-8a7d-13f19fba55c0 56 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0/_tmp_space.db 57 | 18/06/07 11:10:51 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.1) is /user/hzyaoqin/warehouse 58 | import org.apache.spark.sql.catalyst.optimizer.Authorizer 59 | 60 | scala> 61 | ``` 62 | ## RunnableCommand 63 | 64 | ### AlterDatabasePropertiesCommand 65 | 66 | ```sql 67 | ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...) 68 | ``` 69 | 70 | ```sql 71 | sql("alter database default set dbproperties ('abc'='xyz')").show 72 | ``` 73 | 74 | ### AlterTableAddColumnsCommand 75 | 76 | ```sql 77 | ALTER TABLE table_identifier ADD COLUMNS (col_name data_type [COMMENT col_comment], ...); 78 | ``` 79 | 80 | ### AlterTableChangeColumnCommand 81 | 82 | ```sql 83 | ALTER TABLE table_identifier 84 | CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment] 85 | [FIRST | AFTER column_name]; 86 | ``` 87 | 88 | ### AlterTableDropPartitionCommand 89 | 90 | ```sql 91 | ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE]; 92 | ``` 93 | 94 | ```sql 95 | 96 | ``` 97 | 98 | ### AlterTableRecoverPartitionsCommand 99 | 100 | ```sql 101 | ALTER TABLE table RECOVER PARTITIONS; 102 | MSCK REPAIR TABLE table; 103 | ``` 104 | 105 | ### AlterTableRenamePartitionCommand 106 | 107 | ```sql 108 | ALTER TABLE table PARTITION spec1 RENAME TO PARTITION spec2; 109 | ``` 110 | 111 | 112 | ### AlterTableRenameCommand 113 | 114 | ```sql 115 | ALTER TABLE table1 RENAME TO table2; 116 | ALTER VIEW view1 RENAME TO view2; 117 | 118 | ``` 119 | 120 | ```sql 121 | sql("alter table src10 rename to src11").show 122 | ``` 123 | 124 | ### AlterTableSetPropertiesCommand 125 | 126 | ```sql 127 | ALTER TABLE table1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...); 128 | ALTER VIEW view1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...); 129 | ``` 130 | 131 | ### AlterTableUnsetPropertiesCommand 132 | 133 | ```sql 134 | ALTER TABLE table1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...); 135 | ALTER VIEW view1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...); 136 | ``` 137 | 138 | ### AlterTableSerDePropertiesCommand 139 | 140 | ```sql 141 | ALTER TABLE table [PARTITION spec] SET SERDE serde_name [WITH SERDEPROPERTIES props]; 142 | ALTER TABLE table [PARTITION spec] SET SERDEPROPERTIES serde_properties; 143 | ``` 144 | 145 | ### AlterTableSetLocationCommand 146 | ```sql 147 | ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "loc"; 148 | ``` 149 | 150 | ### AlterViewAsCommand 151 | 152 | ```sql 153 | ALTER VIEW test_view AS SELECT 3 AS i, 4 AS j 154 | ``` 155 | 156 | Treat select clause as QUERY Hive Operation 157 | 158 | ### AnalyzeColumnCommand 159 | 160 | ```sql 161 | ANALYZE TABLE tableName COMPUTE STATISTICS FOR COLUMNS some_random_column 162 | ``` 163 | 164 | ### AnalyzeTableCommand 165 | 166 | ```sql 167 | analyze table tableName compute statistics noscan; 168 | ``` 169 | 170 | ### AnalyzePartitionCommand 171 | 172 | ```sql 173 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS; 174 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS; 175 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS; 176 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan; 177 | ANALYZE TABLE t PARTITION(ds, hr=11) COMPUTE STATISTICS noscan; 178 | ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS; 179 | ``` 180 | 181 | ### CreateDatabaseCommand 182 | 183 | ```sql 184 | CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name 185 | [COMMENT database_comment] 186 | [LOCATION database_directory] 187 | [WITH DBPROPERTIES (property_name=property_value, ...)]; 188 | ``` 189 | 190 | ### CreateDataSourceTableAsSelectCommand 191 | 192 | ```sql 193 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 194 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...]) 195 | AS SELECT ... 196 | ``` 197 | 198 | ### CreateHiveTableAsSelectCommand 199 | 200 | ```sql 201 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 202 | STORED AS format AS SELECT ... 203 | ``` 204 | 205 | ### CreateFunctionCommand 206 | 207 | ```sql 208 | CREATE TEMPORARY FUNCTION functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']] 209 | 210 | CREATE FUNCTION [databaseName.]functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']] 211 | ``` 212 | 213 | ### CreateTableCommand 214 | 215 | ```sql 216 | 217 | CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name 218 | [(col1 data_type [COMMENT col_comment], ...)] 219 | [COMMENT table_comment] 220 | [PARTITIONED BY (col3 data_type [COMMENT col_comment], ...)] 221 | [CLUSTERED BY (col1, ...) [SORTED BY (col1 [ASC|DESC], ...)] INTO num_buckets BUCKETS] 222 | [SKEWED BY (col1, col2, ...) ON ((col_value, col_value, ...), ...) 223 | [STORED AS DIRECTORIES] 224 | [ROW FORMAT row_format] 225 | [STORED AS file_format | STORED BY storage_handler_class [WITH SERDEPROPERTIES (...)]] 226 | [LOCATION path] 227 | [TBLPROPERTIES (property_name=property_value, ...)] 228 | [AS select_statement]; 229 | 230 | ``` 231 | 232 | ### CreateDataSourceTableCommand 233 | 234 | ```sql 235 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 236 | [(col1 data_type [COMMENT col_comment], ...)] 237 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...]) 238 | ``` 239 | 240 | ### CreateTableLikeCommand 241 | 242 | ```sql 243 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name 244 | LIKE [other_db_name.]existing_table_name 245 | ``` 246 | 247 | ### CreateViewCommand 248 | 249 | ```sql 250 | CREATE view t1 partitioned 251 | ON (ds) AS 252 | SELECT * 253 | FROM ( 254 | SELECT KEY, 255 | value, 256 | ds 257 | FROM t1_new 258 | UNION ALL 259 | SELECT KEY, 260 | value, 261 | t1_old.ds 262 | FROM t1_old 263 | JOIN t1_mapping 264 | ON t1_old.keymap = t1_mapping.keymap 265 | AND t1_old.ds = t1_mapping.ds ) subq; 266 | ``` 267 | 268 | ### CacheTableCommand 269 | 270 | ```sql 271 | CACHE TABLE testCacheTable AS SELECT * FROM src; 272 | ``` 273 | 274 | ### CreateTempViewUsing 275 | 276 | ### DescribeColumnCommand 277 | 278 | ```sql 279 | DESCRIBE [EXTENDED|FORMATTED] table_name column_name; 280 | ``` 281 | 282 | ### DescribeDatabaseCommand 283 | 284 | ```sql 285 | DESCRIBE DATABASE [EXTENDED] db_name 286 | ``` 287 | 288 | ### DescribeFunctionCommand 289 | 290 | ```sql 291 | DESCRIBE FUNCTION [EXTENDED] upper; 292 | ``` 293 | 294 | ### DescribeTableCommand 295 | 296 | ```sql 297 | DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec?; 298 | ``` 299 | 300 | ### DropDatabaseCommand 301 | 302 | ```sql 303 | DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE]; 304 | ``` 305 | 306 | 307 | ### DropFunctionCommand 308 | 309 | ```sql 310 | DROP TEMPORARY FUNCTION helloworld; 311 | DROP TEMPORARY FUNCTION IF EXISTS helloworld; 312 | DROP FUNCTION hello.world; 313 | DROP FUNCTION IF EXISTS hello.world; 314 | ``` 315 | 316 | 317 | ### DropTableCommand 318 | 319 | ```sql 320 | DROP TABLE [IF EXISTS] table_name; 321 | DROP VIEW [IF EXISTS] [db_name.]view_name; 322 | ``` 323 | 324 | ### ExplainCommand 325 | 326 | ```sql 327 | EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ... 328 | ``` 329 | 330 | ### InsertIntoDataSourceCommand 331 | 332 | ### InsertIntoDataSourceDirCommand 333 | 334 | ```sql 335 | INSERT OVERWRITE DIRECTORY (path=STRING)? 336 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...]) 337 | SELECT ... 338 | ``` 339 | 340 | ### InsertIntoHadoopFsRelationCommand 341 | 342 | 343 | ### InsertIntoHiveDirCommand 344 | ```sql 345 | INSERT OVERWRITE [LOCAL] DIRECTORY 346 | path 347 | [ROW FORMAT row_format] 348 | [STORED AS file_format] 349 | SELECT ... 350 | ``` 351 | 352 | ### LoadDataCommand 353 | ```sql 354 | LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename 355 | [PARTITION (partcol1=val1, partcol2=val2 ...)] 356 | ``` 357 | 358 | 359 | ### SaveIntoDataSourceCommand 360 | 361 | ### SetCommand 362 | ```sql 363 | set key = value; 364 | set -v; 365 | set; 366 | ``` 367 | 368 | ### SetDatabaseCommand 369 | 370 | ```sql 371 | USE database_name; 372 | ``` 373 | 374 | ### ShowCreateTableCommand 375 | 376 | ```sql 377 | SHOW CREATE TABLE t; 378 | ``` 379 | 380 | ### ShowColumnsCommand 381 | ```sql 382 | SHOW COLUMNS (FROM | IN) table_identifier [(FROM | IN) database]; 383 | ``` 384 | 385 | ### ShowDatabasesCommand 386 | ```sql 387 | SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards']; 388 | ``` 389 | 390 | ### ShowFunctionsCommand 391 | ```sql 392 | SHOW FUNCTIONS [LIKE pattern] 393 | ``` 394 | 395 | ### ShowPartitionsCommand 396 | ```sql 397 | SHOW PARTITIONS [db_name.]table_name [PARTITION(partition_spec)]; 398 | ``` 399 | 400 | ### ShowTablesCommand 401 | ```sql 402 | SHOW TABLES [(IN|FROM) database_name] [[LIKE] 'identifier_with_wildcards']; 403 | ``` 404 | 405 | ### ShowTablePropertiesCommand 406 | ```sql 407 | SHOW TBLPROPERTIES table_name[('propertyKey')]; 408 | ``` 409 | 410 | 411 | ### TruncateTableCommand 412 | 413 | ```sql 414 | TRUNCATE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] 415 | ``` 416 | 417 | 418 | ### UncacheTableCommand 419 | 420 | ```sql 421 | uncache table t; 422 | ``` 423 | 424 | ### Ignored 425 | 426 | ``` 427 | // AddFileCommand 428 | // AddJarCommand 429 | // ... 430 | 431 | ``` 432 | 433 | ## Projection 434 | 435 | ## LeafNode 436 | 437 | ## UnaryNode 438 | 439 | ## BinaryNode 440 | 441 | ## Union 442 | 443 | -------------------------------------------------------------------------------- /docs/ranger_authorization.md: -------------------------------------------------------------------------------- 1 | # Spark SQL Ranger Security Support Guide 2 | 3 | Ranger security support is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). 4 | 5 | Ranger is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform. The [[spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore 6 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together. 7 | 8 | ## Ranger Security Support 9 | 10 | |Configuration| Configuration File|Example| Descriptions | 11 | |---|---|---|---| 12 | |ranger.plugin.hive.policy.rest.url|ranger-hive-security.xml| http://ranger.admin.one:6080,http://ranger.admin.two.lt.163.org:6080| Comma separated list of ranger admin address| 13 | |ranger.plugin.hive.service.name|ranger-hive-security.xml||Name of the Ranger service containing policies for this YARN instance| 14 | |ranger.plugin.hive.policy.cache.dir|ranger-hive-security.xml|policycache| local cache directory for ranger policy caches| 15 | 16 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set. 17 | 18 | ## Configure Hive Metastore Client Side 19 | ```xml 20 | 21 | 22 | hive.security.authorization.enabled 23 | true 24 | 25 | 26 | hive.security.authorization.manager 27 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory 28 | 29 | 30 | hive.security.authenticator.manager 31 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator 32 | 33 | 34 | hive.conf.restricted.list 35 | 36 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager 37 | 38 | 39 | ``` 40 | 41 | Add configurations above in `$SPARK_HOME/conf/hive-site.xml` to enable Ranger security support. 42 | 43 | ## Ranger Audit Support 44 | 45 | All access to Spark SQL/Hive tables that is authorized by Ranger is automatically audited by Ranger. Auditing destination of HDFS or Solr etc is supported. 46 | 47 | |Configuration| Configuration File|Example| Descriptions | 48 | |---|---|---|---| 49 | |xasecure.audit.is.enabled|ranger-hive-audit.xml|false|When true, auditing is enabled| 50 | |xasecure.audit.jpa.javax.persistence.jdbc.driver|ranger-hive-audit.xml|com.mysql.jdbc.Driver|jdbc driver for audit to a mysql database destination| 51 | |xasecure.audit.jpa.javax.persistence.jdbc.url|ranger-hive-audit.xml| jdbc:mysql://address/dbname|database instance auditing to| 52 | |xasecure.audit.jpa.javax.persistence.jdbc.user|ranger-hive-audit.xml|*username*|user name| 53 | |xasecure.audit.jpa.javax.persistence.jdbc.password|ranger-hive-audit.xml|*Password*| Password| 54 | 55 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set to enable or disable auditing. 56 | 57 | 58 | ## Install `ranger-hive-plugin` for Spark SQL 59 | 60 | Please refer to the [​Install and Enable Ranger Hive Plugin](https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.4/bk_command-line-installation/content/ch14s05s03s02.html) for an overview on how to setup Ranger jars for Spark SQL. 61 | 62 | -------------------------------------------------------------------------------- /docs/spark_sql_authorization.md: -------------------------------------------------------------------------------- 1 | # ACL Management for Spark SQL 2 | 3 | Three primary modes for Spark SQL authorization are available by spark-authorizer: 4 | 5 | ## Storage-Based Authorization 6 | 7 | Enabling Storage Based Authorization in the `Hive Metastore Server` uses the HDFS permissions to act as the main source for verification and allows for consistent data and metadata authorization policy. This allows control over metadata access by verifying if the user has permission to access corresponding directories on the HDFS. Similar with `HiveServer2`, files and directories will be tanslated into hive metadata objects, such as dbs, tables, partitions, and be protected from end user's queries through Spark SQL as a service like [Kyuubi](https://github.com/yaooqinn/kyuubi), livy etc. 8 | 9 | Storage-Based Authorization offers users with Database, Table and Partition-level coarse-gained access control. 10 | 11 | Please refer to the [Storage-Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/storage_based_authorization.html) in the online documentation for an overview on how to configure Storage-Based Authorization for Spark SQL. 12 | 13 | ## SQL-Standard Based Authorization 14 | 15 | Enabling SQL-Standard Based Authorization gives users more fine-gained control over access comparing with Storage Based Authorization. Besides of the ability of Storage Based Authorization, SQL-Standard Based Authorization can improve it to Views and Column-level. Unfortunately, Spark SQL does not support grant/revoke statements which controls access, this might be done only through the HiveServer2. But it's gratifying that [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) makes Spark SQL be able to understand this fine-grain access control granted or revoked by Hive. 16 | 17 | For Spark SQL Client users who can directly acess HDFS, the SQL-Standard Based Authorization can be easily bypassed. 18 | 19 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage information is preserved from end users. 20 | 21 | Please refer to the [SQL-Standard Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/sql_std_based_authorization.html) in the online documentation for an overview on how to configure SQL-Standard Based Authorization for Spark SQL. 22 | 23 | ## Ranger Security Support 24 | 25 | [Apache Ranger](https://ranger.apache.org/) is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform but end before Spark or Spark SQL. The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore 26 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Apache Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together. 27 | 28 | Please refer to the [Spark SQL Ranger Security Support Guide](https://yaooqinn.github.io/spark-authorizer/docs/ranger_authorization.html) in the online documentation for an overview on how to configure Ranger for Spark SQL. 29 | -------------------------------------------------------------------------------- /docs/sql_std_based_authorization.md: -------------------------------------------------------------------------------- 1 | # Spark SQL SQL-Standard Based Authorization Guide 2 | 3 | [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). 4 | 5 | The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) can enable Spark SQL with Hive's [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) for fine grained access control. It is based on the SQL standard for authorization, and uses the familiar GRANT/REVOKE statements to control access. 6 | 7 | For Spark SQL Client users who can directly access HDFS, the SQL-Standards Based Authorization can be easily bypassed. 8 | 9 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage informations are preserved from end users. It can be used along with storage based authorization on the `Hive Metastore Server`. 10 | 11 | ## Minimum Permissions 12 | 13 | The following table shows the minimum permissions required for Spark SQL when using SQL-Standard Based Authorization: 14 | 15 | Operation | Permission required 16 | ---|--- 17 | create table | ownership of database 18 | drop table | ownership 19 | describe table | select 20 | show partitions | select 21 | alter table location | ownership; URI privilege: RWX permission + ownership (for new location) 22 | alter partition location | ownership; URI privilege: RWX permission + ownership (for new partition location) 23 | alter table add partition | insert; URI privilege: RWX permission + ownership (for partition location) 24 | alter table drop partition | delete 25 | alter table (all of them except the ones listed above) | ownership 26 | truncate table | ownership 27 | create view | select "with grant option" 28 | alter view properties | ownership 29 | alter view rename | ownership 30 | drop view | ownership 31 | analyze Table | select and insert 32 | show columns | select 33 | show table properties | Select 34 | CTAS | select (of input) and ownership (of database) 35 | select | select 36 | insert | insert and delete (for overwrite) 37 | delete | delete 38 | load | insert (output); delete (output); URI privilege: RWX permission + ownership (input location) 39 | show create table | select "with grant option" 40 | create function | admin 41 | drop function | admin 42 | MSCK |admin 43 | alter database| admin 44 | create database | URI privilege: RWX permission + ownership (if custom location specified) 45 | explain | select 46 | drop database | ownership 47 | 48 | 49 | ## Privileges 50 | Select - READ access to an object 51 | Insert - access to ADD data to an object (table) 52 | Update - access to run UPDATE queries on an object (table) 53 | Delete - access to DELETE data in an object (table) 54 | All Privileges - includes all above privileges 55 | 56 | 57 | ## Limitations 58 | 59 | Spark SQL does not support grant/revoke statement, which might be done only in Hive. 60 | -------------------------------------------------------------------------------- /docs/storage_based_authorization.md: -------------------------------------------------------------------------------- 1 | # Spark SQL Storage-Based Authorization Guide 2 | 3 | Storage-Based Authorization is one of the available Authorization methods for Spark SQL with or without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). 4 | 5 | When the `Hive Metastore Server` is configured to use Storage-Based Authorization, it uses the file system permissions for directories corresponding to the different kinds of metadata objects as the source of verification for the authorization policy. Using this authorization method is recommended in the metastore server. 6 | 7 | With Hive Metastore as the external catalog for Spark SQL, there is a corresponding directory to a database or table for each file system that is used at storage layer. Using this authorization model, the rwx permissions for this directory also determines the permissions of a user, or group, to the database or table. 8 | 9 | With Hive 0.14 or onwards as Spark SQL's metastore client, this could be enabled without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer)'s support. 10 | 11 | 12 | ## Configuring Parameters for Storage-Based Authorization 13 | 14 | ### Hive Metastore Server Side 15 | 16 | To enable Storage-based Authorization in the Hive metastore server, configure these properties in the `hive-site.xml` for the server. 17 | 18 | Configuration| Description 19 | ---|--- 20 | `hive.metastore.pre.event.listeners` | This parameter enables metastore security. Set to `org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener` 21 | `hive.security.metastore.authenticator.manager` | Set to `org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator` 22 | `hive.security.metastore.authorization.auth.reads` | When true, Hive metastore authorization checks for read access. 23 | `hive.security.metastore.authorization.manager` | A comma-separated list of the names of authorization manager classes that are to be used in the metastore for authorization. Set to `org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider` 24 | 25 | ### Hive Metastore Client Side 26 | 27 | This could be Spark SQL Client, Spark Thrift Server, HiveServer2, Kyuubi etc. Configure these properties in the `hive-site.xml` for the client. 28 | 29 | Configuration| Description 30 | ---|--- 31 | `hive.security.authorization.enabled` | Enables or disables authorization. In the Advanced hiveserver-site section, change the value to true to enable authorization for HiveServer2. In the General section, set this value to false. 32 | `hive.security.authorization.manager` | The class name of the Hive client authorization manager. For storage-based authorization, specify the value `org.apache.hadoop.hive.ql.security. authorization.StorageBasedAuthorizationProvider` 33 | 34 | ## Minimum Permissions 35 | 36 | The following table shows the minimum permissions required for Spark SQL when using Storage-Based Authorization: 37 | 38 | Operation | Permission required 39 | ---|--- 40 | alter table | table write access 41 | create table | database write access 42 | CTAS | table read access 43 | load | table write access 44 | select | table read access 45 | show tables | database read access 46 | 47 | 48 | ## Limitations 49 | 50 | Spark SQL does not support grant/revoke statement, which might be done only in Hive. 51 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 22 | 4.0.0 23 | 24 | yaooqinn 25 | spark-authorizer 26 | Spark Authorizer 27 | 2.1.1 28 | jar 29 | 30 | 31 | 32 | The Apache Software License, Version 2.0 33 | http://www.apache.org/licenses/LICENSE-2.0.txt 34 | manual 35 | 36 | 37 | 38 | 39 | 40 | yaooqinn 41 | Kent Yao 42 | hzyaoqin@corp.netease.com 43 | yaooqinn 44 | http://www.jianshu.com/u/14effefbbf32 45 | 46 | 47 | 48 | 49 | UTF-8 50 | 2.11.8 51 | 2.11 52 | 2.2.6 53 | org.apache.spark 54 | 2.2.2 55 | provided 56 | org.spark-project.hive 57 | 1.2.1.spark2 58 | 1.7 59 | 60 | 61 | 62 | 63 | central 64 | 65 | Maven Repository 66 | https://repo.maven.apache.org/maven2 67 | 68 | true 69 | 70 | 71 | false 72 | 73 | 74 | 75 | apache 76 | Apache Repository Snapshots 77 | http://repository.apache.org/snapshots 78 | 79 | false 80 | 81 | 82 | true 83 | daily 84 | warn 85 | 86 | 87 | 88 | 89 | 90 | central 91 | https://repo.maven.apache.org/maven2 92 | 93 | true 94 | 95 | 96 | false 97 | 98 | 99 | 100 | apache 101 | Apache Repository Snapshots 102 | http://repository.apache.org/snapshots 103 | 104 | false 105 | 106 | 107 | true 108 | daily 109 | warn 110 | 111 | 112 | 113 | 114 | 115 | 116 | org.scala-lang 117 | scala-library 118 | ${scala.version} 119 | provided 120 | 121 | 122 | 123 | ${spark.group} 124 | spark-sql_${scala.binary.version} 125 | ${spark.version} 126 | ${spark.scope} 127 | 128 | 129 | 130 | ${spark.group} 131 | spark-hive_${scala.binary.version} 132 | ${spark.version} 133 | ${spark.scope} 134 | 135 | 136 | * 137 | * 138 | 139 | 140 | 141 | 142 | ${hive.group} 143 | hive-exec 144 | ${hive.version} 145 | provided 146 | 147 | 148 | 149 | 150 | org.scalatest 151 | scalatest_${scala.binary.version} 152 | ${scalatest.version} 153 | test 154 | 155 | 156 | 157 | ${spark.group} 158 | spark-core_${scala.binary.version} 159 | ${spark.version} 160 | test-jar 161 | test 162 | 163 | 164 | 165 | ${spark.group} 166 | spark-catalyst_${scala.binary.version} 167 | ${spark.version} 168 | test-jar 169 | test 170 | 171 | 172 | ${spark.group} 173 | spark-sql_${scala.binary.version} 174 | ${spark.version} 175 | test-jar 176 | test 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | net.alchim31.maven 185 | scala-maven-plugin 186 | 3.2.2 187 | 188 | 189 | eclipse-add-source 190 | 191 | add-source 192 | 193 | 194 | 195 | scala-compile-first 196 | 197 | compile 198 | 199 | 200 | 201 | scala-test-compile-first 202 | 203 | testCompile 204 | 205 | 206 | 207 | 208 | ${scala.version} 209 | incremental 210 | true 211 | 212 | -unchecked 213 | -deprecation 214 | -feature 215 | -explaintypes 216 | -Yno-adapted-args 217 | 218 | 219 | -Xms1024m 220 | -Xmx1024m 221 | -XX:ReservedCodeCacheSize=512M 222 | 223 | 224 | -source 225 | ${java.version} 226 | -target 227 | ${java.version} 228 | -Xlint:all,-serial,-path,-try 229 | 230 | 231 | 232 | 233 | 234 | 235 | org.apache.maven.plugins 236 | maven-surefire-plugin 237 | 2.12.4 238 | 239 | true 240 | 241 | 242 | 243 | 244 | org.scalatest 245 | scalatest-maven-plugin 246 | 1.0 247 | 248 | ${project.build.directory}/surefire-reports 249 | . 250 | TestSuite.txt 251 | 252 | 253 | 254 | test 255 | 256 | test 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | spark-2.2 267 | 268 | 2.2.2 269 | 2.2.6 270 | 271 | 272 | 273 | 274 | spark-2.3 275 | 276 | 2.3.2 277 | 3.0.3 278 | 279 | 280 | 281 | -------------------------------------------------------------------------------- /scalastyle-config.xml: -------------------------------------------------------------------------------- 1 | 17 | 39 | 40 | 41 | Scalastyle standard configuration 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | true 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW 126 | 127 | 128 | 129 | 130 | 131 | ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | ^FunSuite[A-Za-z]*$ 141 | Tests must extend org.apache.spark.SparkFunSuite instead. 142 | 143 | 144 | 145 | 146 | ^println$ 147 | 151 | 152 | 153 | 154 | @VisibleForTesting 155 | 158 | 159 | 160 | 161 | Runtime\.getRuntime\.addShutdownHook 162 | 170 | 171 | 172 | 173 | mutable\.SynchronizedBuffer 174 | 182 | 183 | 184 | 185 | Class\.forName 186 | 193 | 194 | 195 | 196 | Await\.result 197 | 204 | 205 | 206 | 207 | Await\.ready 208 | 215 | 216 | 217 | 218 | 219 | JavaConversions 220 | Instead of importing implicits in scala.collection.JavaConversions._, import 221 | scala.collection.JavaConverters._ and use .asScala / .asJava methods 222 | 223 | 224 | 225 | org\.apache\.commons\.lang\. 226 | Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead 227 | of Commons Lang 2 (package org.apache.commons.lang.*) 228 | 229 | 230 | 231 | extractOpt 232 | Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter 233 | is slower. 234 | 235 | 236 | 237 | 238 | java,scala,3rdParty,spark 239 | javax?\..* 240 | scala\..* 241 | (?!org\.apache\.spark\.).* 242 | org\.apache\.spark\..* 243 | 244 | 245 | 246 | 247 | 248 | COMMA 249 | 250 | 251 | 252 | 253 | 254 | \)\{ 255 | 258 | 259 | 260 | 261 | (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] 262 | Use Javadoc style indentation for multiline comments 263 | 264 | 265 | 266 | case[^\n>]*=>\s*\{ 267 | Omit braces in case clauses. 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 800> 321 | 322 | 323 | 324 | 325 | 30 326 | 327 | 328 | 329 | 330 | 10 331 | 332 | 333 | 334 | 335 | 50 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | -1,0,1,2,3 347 | 348 | 349 | 350 | -------------------------------------------------------------------------------- /src/main/scala/com/githup/yaooqinn/spark/authorizer/Logging.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package com.githup.yaooqinn.spark.authorizer 19 | 20 | import org.slf4j.LoggerFactory 21 | 22 | trait Logging { 23 | 24 | lazy val logger = LoggerFactory.getLogger(this.getClass) 25 | 26 | def trace(message: => Any): Unit = { 27 | if (logger.isTraceEnabled) { 28 | logger.trace(message.toString) 29 | } 30 | } 31 | 32 | def debug(message: => Any): Unit = { 33 | if (logger.isDebugEnabled) { 34 | logger.debug(message.toString) 35 | } 36 | } 37 | 38 | def info(message: => Any): Unit = { 39 | if (logger.isInfoEnabled) { 40 | logger.info(message.toString) 41 | } 42 | } 43 | 44 | def warn(message: => Any): Unit = { 45 | logger.warn(message.toString) 46 | } 47 | 48 | def warn(message: => Any, t: Throwable): Unit = { 49 | logger.warn(message.toString, t) 50 | } 51 | 52 | def error(message: => Any, t: Throwable): Unit = { 53 | logger.error(message.toString, t) 54 | } 55 | 56 | def error(message: => Any): Unit = { 57 | logger.error(message.toString) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkSQLExtension.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import org.apache.spark.sql.SparkSessionExtensions 21 | import org.apache.spark.sql.catalyst.optimizer.AuthorizerExtension 22 | 23 | class RangerSparkSQLExtension extends Extensions { 24 | override def apply(ext: SparkSessionExtensions): Unit = { 25 | ext.injectOptimizerRule(AuthorizerExtension) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark 19 | 20 | import org.apache.spark.sql.SparkSessionExtensions 21 | 22 | package object authorizer { 23 | 24 | type Extensions = SparkSessionExtensions => Unit 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizable.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import java.io.File 21 | 22 | import com.githup.yaooqinn.spark.authorizer.Logging 23 | import org.apache.hadoop.hive.ql.plan.HiveOperation 24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveAuthzContext, HiveOperationType} 25 | 26 | import org.apache.spark.sql.SparkSession 27 | import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} 28 | import org.apache.spark.sql.catalyst.rules.Rule 29 | import org.apache.spark.sql.execution.command._ 30 | import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand} 31 | import org.apache.spark.sql.hive.{HiveExternalCatalog, PrivilegesBuilder} 32 | import org.apache.spark.sql.hive.client.AuthzImpl 33 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand 34 | import org.apache.spark.util.Utils 35 | 36 | trait Authorizable extends Rule[LogicalPlan] with Logging { 37 | 38 | def spark: SparkSession 39 | 40 | /** 41 | * Visit the [[LogicalPlan]] recursively to get all hive privilege objects, check the privileges 42 | * using Hive Authorizer V2 which provide sql based authorization and can implements 43 | * ranger-hive-plugins. 44 | * If the user is authorized, then the original plan will be returned; otherwise, interrupted by 45 | * some particular privilege exceptions. 46 | * @param plan a spark LogicalPlan for verifying privileges 47 | * @return a plan itself which has gone through the privilege check. 48 | */ 49 | override def apply(plan: LogicalPlan): LogicalPlan = { 50 | val operationType: HiveOperationType = getOperationType(plan) 51 | val authzContext = new HiveAuthzContext.Builder().build() 52 | val (in, out) = PrivilegesBuilder.build(plan) 53 | spark.sharedState.externalCatalog match { 54 | case _: HiveExternalCatalog => 55 | AuthzImpl.checkPrivileges(spark, operationType, in, out, authzContext) 56 | case _ => 57 | } 58 | // iff no exception. 59 | // We just return the original plan here, so this rule will be executed only once. 60 | plan 61 | } 62 | 63 | def policyCacheDir: Option[String] = { 64 | Option(spark.sparkContext.hadoopConfiguration.get("ranger.plugin.hive.policy.cache.dir")) 65 | } 66 | 67 | 68 | def createCacheDirIfNonExists(dir: String): Unit = { 69 | val file = new File(dir) 70 | if (!file.exists()) { 71 | if (file.mkdirs()) { 72 | info("Creating ranger policy cache directory at " + file.getAbsolutePath) 73 | file.deleteOnExit() 74 | } else { 75 | warn("Unable to create ranger policy cache directory at " + file.getAbsolutePath) 76 | } 77 | } 78 | } 79 | 80 | policyCacheDir match { 81 | case Some(dir) => createCacheDirIfNonExists(dir) 82 | case _ => 83 | // load resources from ranger configuration files 84 | Option(Utils.getContextOrSparkClassLoader.getResource("ranger-hive-security.xml")) match { 85 | case Some(url) => 86 | spark.sparkContext.hadoopConfiguration.addResource(url) 87 | policyCacheDir match { 88 | case Some(dir) => createCacheDirIfNonExists(dir) 89 | case _ => 90 | } 91 | case _ => 92 | } 93 | } 94 | 95 | /** 96 | * Mapping of [[LogicalPlan]] -> [[HiveOperation]] 97 | * @param plan a spark LogicalPlan 98 | * @return 99 | */ 100 | def getHiveOperation(plan: LogicalPlan): HiveOperation = { 101 | plan match { 102 | case c: Command => c match { 103 | case _: AlterDatabasePropertiesCommand => HiveOperation.ALTERDATABASE 104 | case p if p.nodeName == "AlterTableAddColumnsCommand" => HiveOperation.ALTERTABLE_ADDCOLS 105 | case _: AlterTableAddPartitionCommand => HiveOperation.ALTERTABLE_ADDPARTS 106 | case p if p.nodeName == "AlterTableChangeColumnCommand" => 107 | HiveOperation.ALTERTABLE_RENAMECOL 108 | case _: AlterTableDropPartitionCommand => HiveOperation.ALTERTABLE_DROPPARTS 109 | case _: AlterTableRecoverPartitionsCommand => HiveOperation.MSCK 110 | case _: AlterTableRenamePartitionCommand => HiveOperation.ALTERTABLE_RENAMEPART 111 | case a: AlterTableRenameCommand => 112 | if (!a.isView) HiveOperation.ALTERTABLE_RENAME else HiveOperation.ALTERVIEW_RENAME 113 | case _: AlterTableSetPropertiesCommand 114 | | _: AlterTableUnsetPropertiesCommand => HiveOperation.ALTERTABLE_PROPERTIES 115 | case _: AlterTableSerDePropertiesCommand => HiveOperation.ALTERTABLE_SERDEPROPERTIES 116 | case _: AlterTableSetLocationCommand => HiveOperation.ALTERTABLE_LOCATION 117 | case _: AlterViewAsCommand => HiveOperation.QUERY 118 | // case _: AlterViewAsCommand => HiveOperation.ALTERVIEW_AS 119 | 120 | case _: AnalyzeColumnCommand => HiveOperation.QUERY 121 | // case _: AnalyzeTableCommand => HiveOperation.ANALYZE_TABLE 122 | // Hive treat AnalyzeTableCommand as QUERY, obey it. 123 | case _: AnalyzeTableCommand => HiveOperation.QUERY 124 | case p if p.nodeName == "AnalyzePartitionCommand" => HiveOperation.QUERY 125 | 126 | case _: CreateDatabaseCommand => HiveOperation.CREATEDATABASE 127 | case _: CreateDataSourceTableAsSelectCommand 128 | | _: CreateHiveTableAsSelectCommand => HiveOperation.CREATETABLE_AS_SELECT 129 | case _: CreateFunctionCommand => HiveOperation.CREATEFUNCTION 130 | case _: CreateTableCommand 131 | | _: CreateDataSourceTableCommand => HiveOperation.CREATETABLE 132 | case _: CreateTableLikeCommand => HiveOperation.CREATETABLE 133 | case _: CreateViewCommand 134 | | _: CacheTableCommand 135 | | _: CreateTempViewUsing => HiveOperation.CREATEVIEW 136 | 137 | case p if p.nodeName == "DescribeColumnCommand" => HiveOperation.DESCTABLE 138 | case _: DescribeDatabaseCommand => HiveOperation.DESCDATABASE 139 | case _: DescribeFunctionCommand => HiveOperation.DESCFUNCTION 140 | case _: DescribeTableCommand => HiveOperation.DESCTABLE 141 | 142 | case _: DropDatabaseCommand => HiveOperation.DROPDATABASE 143 | // Hive don't check privileges for `drop function command`, what about a unverified user 144 | // try to drop functions. 145 | // We treat permanent functions as tables for verifying. 146 | case d: DropFunctionCommand if !d.isTemp => HiveOperation.DROPTABLE 147 | case d: DropFunctionCommand if d.isTemp => HiveOperation.DROPFUNCTION 148 | case _: DropTableCommand => HiveOperation.DROPTABLE 149 | 150 | case e: ExplainCommand => getHiveOperation(e.logicalPlan) 151 | 152 | case _: InsertIntoDataSourceCommand => HiveOperation.QUERY 153 | case p if p.nodeName == "InsertIntoDataSourceDirCommand" => HiveOperation.QUERY 154 | case _: InsertIntoHadoopFsRelationCommand => HiveOperation.CREATETABLE_AS_SELECT 155 | case p if p.nodeName == "InsertIntoHiveDirCommand" => HiveOperation.QUERY 156 | case p if p.nodeName == "InsertIntoHiveTable" => HiveOperation.QUERY 157 | 158 | case _: LoadDataCommand => HiveOperation.LOAD 159 | 160 | case p if p.nodeName == "SaveIntoDataSourceCommand" => HiveOperation.QUERY 161 | case s: SetCommand if s.kv.isEmpty || s.kv.get._2.isEmpty => HiveOperation.SHOWCONF 162 | case _: SetDatabaseCommand => HiveOperation.SWITCHDATABASE 163 | case _: ShowCreateTableCommand => HiveOperation.SHOW_CREATETABLE 164 | case _: ShowColumnsCommand => HiveOperation.SHOWCOLUMNS 165 | case _: ShowDatabasesCommand => HiveOperation.SHOWDATABASES 166 | case _: ShowFunctionsCommand => HiveOperation.SHOWFUNCTIONS 167 | case _: ShowPartitionsCommand => HiveOperation.SHOWPARTITIONS 168 | case _: ShowTablesCommand => HiveOperation.SHOWTABLES 169 | case _: ShowTablePropertiesCommand => HiveOperation.SHOW_TBLPROPERTIES 170 | case s: StreamingExplainCommand => 171 | getHiveOperation(s.queryExecution.optimizedPlan) 172 | 173 | case _: TruncateTableCommand => HiveOperation.TRUNCATETABLE 174 | 175 | case _: UncacheTableCommand => HiveOperation.DROPVIEW 176 | 177 | // Commands that do not need build privilege goes as explain type 178 | case _ => 179 | // AddFileCommand 180 | // AddJarCommand 181 | // ... 182 | HiveOperation.EXPLAIN 183 | } 184 | case _ => HiveOperation.QUERY 185 | } 186 | } 187 | 188 | def getOperationType(logicalPlan: LogicalPlan): HiveOperationType = { 189 | HiveOperationType.valueOf(getHiveOperation(logicalPlan).name()) 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import org.apache.spark.sql.SparkSession 21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan 22 | import org.apache.spark.sql.catalyst.rules.Rule 23 | 24 | /** 25 | * An Optimizer Rule to do Hive Authorization V2 for Spark SQL. 26 | * 27 | * For Apache Spark 2.1.x 28 | */ 29 | object Authorizer extends Rule[LogicalPlan] with Authorizable { 30 | override def spark: SparkSession = { 31 | SparkSession.getActiveSession.getOrElse(SparkSession.getDefaultSession.get) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/optimizer/AuthorizerExtension.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import org.apache.spark.sql.SparkSession 21 | import org.apache.spark.sql.catalyst.plans.logical._ 22 | import org.apache.spark.sql.catalyst.rules.Rule 23 | 24 | /** 25 | * An Optimizer Rule to do Hive Authorization V2 for Spark SQL. 26 | * 27 | * For Apache Spark 2.2.x and later 28 | */ 29 | case class AuthorizerExtension(spark: SparkSession) extends Rule[LogicalPlan] with Authorizable 30 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/optimizer/HivePrivilegeObject.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import java.util.{List => JList} 21 | 22 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HBO} 23 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType} 24 | 25 | /** 26 | * Helper class for initializing [[HBO]] with more Constructors. 27 | */ 28 | private[sql] object HivePrivilegeObject { 29 | def apply( 30 | `type`: HivePrivilegeObjectType, 31 | dbname: String, 32 | objectName: String, 33 | partKeys: JList[String], 34 | columns: JList[String], 35 | actionType: HivePrivObjectActionType, 36 | commandParams: JList[String]): HBO = { 37 | new HBO( 38 | `type`, dbname, objectName, partKeys, columns, actionType, commandParams) 39 | } 40 | 41 | def apply( 42 | `type`: HivePrivilegeObjectType, 43 | dbname: String, 44 | objectName: String, 45 | partKeys: JList[String], 46 | columns: JList[String], 47 | commandParams: JList[String]): HBO = { 48 | apply( 49 | `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, commandParams) 50 | } 51 | 52 | def apply( 53 | `type`: HivePrivilegeObjectType, 54 | dbname: String, 55 | objectName: String, 56 | partKeys: JList[String], 57 | columns: JList[String]): HBO = { 58 | apply( 59 | `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, null) 60 | } 61 | 62 | def apply( 63 | `type`: HivePrivilegeObjectType, 64 | dbname: String, 65 | objectName: String, 66 | actionType: HivePrivObjectActionType): HBO = { 67 | apply(`type`, dbname, objectName, null, null, actionType, null) 68 | } 69 | 70 | def apply( 71 | `type`: HivePrivilegeObjectType, 72 | dbname: String, 73 | objectName: String): HBO = { 74 | apply(`type`, dbname, objectName, HivePrivObjectActionType.OTHER) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/hive/AuthzUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.hive 19 | 20 | import scala.util.{Failure, Success, Try} 21 | 22 | private[hive] object AuthzUtils { 23 | 24 | def getFieldVal(o: Any, name: String): Any = { 25 | Try { 26 | val field = o.getClass.getDeclaredField(name) 27 | field.setAccessible(true) 28 | field.get(o) 29 | } match { 30 | case Success(value) => value 31 | case Failure(exception) => throw exception 32 | } 33 | } 34 | 35 | def setFieldVal(o: Any, name: String, value: Any): Unit = { 36 | Try { 37 | val field = o.getClass.getDeclaredField(name) 38 | field.setAccessible(true) 39 | field.set(o, value.asInstanceOf[AnyRef]) 40 | } match { 41 | case Failure(exception) => throw exception 42 | case _ => 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/hive/PrivilegesBuilder.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.hive 19 | 20 | import java.util.{ArrayList => JAList, List => JList} 21 | 22 | import scala.collection.JavaConverters._ 23 | 24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HPO} 25 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType} 26 | 27 | import org.apache.spark.sql.SaveMode 28 | import org.apache.spark.sql.catalyst.TableIdentifier 29 | import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation 30 | import org.apache.spark.sql.catalyst.catalog.CatalogTable 31 | import org.apache.spark.sql.catalyst.expressions.NamedExpression 32 | import org.apache.spark.sql.catalyst.optimizer.HivePrivilegeObject 33 | import org.apache.spark.sql.catalyst.plans.logical._ 34 | import org.apache.spark.sql.execution.command._ 35 | import org.apache.spark.sql.execution.datasources._ 36 | import org.apache.spark.sql.hive.AuthzUtils._ 37 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand 38 | import org.apache.spark.sql.types.StructField 39 | 40 | /** 41 | * [[LogicalPlan]] -> list of [[HivePrivilegeObject]]s 42 | */ 43 | private[sql] object PrivilegesBuilder { 44 | 45 | /** 46 | * Build input and output privilege objects from a Spark's [[LogicalPlan]] 47 | * 48 | * For [[ExplainCommand]]s, build its child. 49 | * For [[RunnableCommand]]s, build outputs if it has an target to write, build inputs for the 50 | * inside query if exists. 51 | * 52 | * For other queries, build inputs. 53 | * 54 | * @param plan A Spark [[LogicalPlan]] 55 | */ 56 | def build(plan: LogicalPlan): (JList[HPO], JList[HPO]) = { 57 | 58 | def doBuild(plan: LogicalPlan): (JList[HPO], JList[HPO]) = { 59 | val inputObjs = new JAList[HPO] 60 | val outputObjs = new JAList[HPO] 61 | plan match { 62 | // RunnableCommand 63 | case cmd: Command => buildCommand(cmd, inputObjs, outputObjs) 64 | // Queries 65 | case _ => buildQuery(plan, inputObjs) 66 | } 67 | (inputObjs, outputObjs) 68 | } 69 | 70 | plan match { 71 | case e: ExplainCommand => doBuild(e.logicalPlan) 72 | case p => doBuild(p) 73 | } 74 | } 75 | 76 | /** 77 | * Build HivePrivilegeObjects from Spark LogicalPlan 78 | * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects 79 | * @param hivePrivilegeObjects input or output hive privilege object list 80 | * @param projectionList Projection list after pruning 81 | */ 82 | private[this] def buildQuery( 83 | plan: LogicalPlan, 84 | hivePrivilegeObjects: JList[HPO], 85 | projectionList: Seq[NamedExpression] = Nil): Unit = { 86 | 87 | /** 88 | * Columns in Projection take priority for column level privilege checking 89 | * @param table catalogTable of a given relation 90 | */ 91 | def mergeProjection(table: CatalogTable): Unit = { 92 | if (projectionList.isEmpty) { 93 | addTableOrViewLevelObjs( 94 | table.identifier, 95 | hivePrivilegeObjects, 96 | table.partitionColumnNames, 97 | table.schema.fieldNames) 98 | } else { 99 | addTableOrViewLevelObjs( 100 | table.identifier, 101 | hivePrivilegeObjects, 102 | table.partitionColumnNames.filter(projectionList.map(_.name).contains(_)), 103 | projectionList.map(_.name)) 104 | } 105 | } 106 | 107 | plan match { 108 | case p: Project => buildQuery(p.child, hivePrivilegeObjects, p.projectList) 109 | 110 | case h if h.nodeName == "HiveTableRelation" => 111 | mergeProjection(getFieldVal(h, "tableMeta").asInstanceOf[CatalogTable]) 112 | 113 | case m if m.nodeName == "MetastoreRelation" => 114 | mergeProjection(getFieldVal(m, "catalogTable").asInstanceOf[CatalogTable]) 115 | 116 | case c if c.nodeName == "CatalogRelation" => 117 | mergeProjection(getFieldVal(c, "tableMeta").asInstanceOf[CatalogTable]) 118 | 119 | case l: LogicalRelation if l.catalogTable.nonEmpty => mergeProjection(l.catalogTable.get) 120 | 121 | case u: UnresolvedRelation => 122 | // Normally, we shouldn't meet UnresolvedRelation here in an optimized plan. 123 | // Unfortunately, the real world is always a place where miracles happen. 124 | // We check the privileges directly without resolving the plan and leave everything 125 | // to spark to do. 126 | addTableOrViewLevelObjs(u.tableIdentifier, hivePrivilegeObjects) 127 | 128 | case p => 129 | for (child <- p.children) { 130 | buildQuery(child, hivePrivilegeObjects, projectionList) 131 | } 132 | } 133 | } 134 | 135 | /** 136 | * Build HivePrivilegeObjects from Spark LogicalPlan 137 | * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects 138 | * @param inputObjs input hive privilege object list 139 | * @param outputObjs output hive privilege object list 140 | */ 141 | private[this] def buildCommand( 142 | plan: LogicalPlan, 143 | inputObjs: JList[HPO], 144 | outputObjs: JList[HPO]): Unit = { 145 | plan match { 146 | case a: AlterDatabasePropertiesCommand => addDbLevelObjs(a.databaseName, outputObjs) 147 | 148 | case a if a.nodeName == "AlterTableAddColumnsCommand" => 149 | addTableOrViewLevelObjs( 150 | getFieldVal(a, "table").asInstanceOf[TableIdentifier], 151 | inputObjs, 152 | columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name)) 153 | addTableOrViewLevelObjs( 154 | getFieldVal(a, "table").asInstanceOf[TableIdentifier], 155 | outputObjs, 156 | columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name)) 157 | 158 | case a: AlterTableAddPartitionCommand => 159 | addTableOrViewLevelObjs(a.tableName, inputObjs) 160 | addTableOrViewLevelObjs(a.tableName, outputObjs) 161 | 162 | case a if a.nodeName == "AlterTableChangeColumnCommand" => 163 | addTableOrViewLevelObjs( 164 | getFieldVal(a, "tableName").asInstanceOf[TableIdentifier], 165 | inputObjs, 166 | columns = Seq(getFieldVal(a, "columnName").asInstanceOf[String])) 167 | 168 | case a: AlterTableDropPartitionCommand => 169 | addTableOrViewLevelObjs(a.tableName, inputObjs) 170 | addTableOrViewLevelObjs(a.tableName, outputObjs) 171 | 172 | case a: AlterTableRecoverPartitionsCommand => 173 | addTableOrViewLevelObjs(a.tableName, inputObjs) 174 | addTableOrViewLevelObjs(a.tableName, outputObjs) 175 | 176 | case a: AlterTableRenameCommand if !a.isView || a.oldName.database.nonEmpty => 177 | // rename tables / permanent views 178 | addTableOrViewLevelObjs(a.oldName, inputObjs) 179 | addTableOrViewLevelObjs(a.newName, outputObjs) 180 | 181 | case a: AlterTableRenamePartitionCommand => 182 | addTableOrViewLevelObjs(a.tableName, inputObjs) 183 | addTableOrViewLevelObjs(a.tableName, outputObjs) 184 | 185 | case a: AlterTableSerDePropertiesCommand => 186 | addTableOrViewLevelObjs(a.tableName, inputObjs) 187 | addTableOrViewLevelObjs(a.tableName, outputObjs) 188 | 189 | case a: AlterTableSetLocationCommand => 190 | addTableOrViewLevelObjs(a.tableName, inputObjs) 191 | addTableOrViewLevelObjs(a.tableName, outputObjs) 192 | 193 | case a: AlterTableSetPropertiesCommand => 194 | addTableOrViewLevelObjs(a.tableName, inputObjs) 195 | addTableOrViewLevelObjs(a.tableName, outputObjs) 196 | 197 | case a: AlterTableUnsetPropertiesCommand => 198 | addTableOrViewLevelObjs(a.tableName, inputObjs) 199 | addTableOrViewLevelObjs(a.tableName, outputObjs) 200 | 201 | case a: AlterViewAsCommand => 202 | if (a.name.database.nonEmpty) { 203 | // it's a permanent view 204 | addTableOrViewLevelObjs(a.name, outputObjs) 205 | } 206 | buildQuery(a.query, inputObjs) 207 | 208 | case a: AnalyzeColumnCommand => 209 | addTableOrViewLevelObjs( 210 | a.tableIdent, inputObjs, columns = a.columnNames) 211 | addTableOrViewLevelObjs( 212 | a.tableIdent, outputObjs, columns = a.columnNames) 213 | 214 | case a if a.nodeName == "AnalyzePartitionCommand" => 215 | addTableOrViewLevelObjs( 216 | getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], inputObjs) 217 | addTableOrViewLevelObjs( 218 | getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], outputObjs) 219 | 220 | case a: AnalyzeTableCommand => 221 | addTableOrViewLevelObjs(a.tableIdent, inputObjs, columns = Seq("RAW__DATA__SIZE")) 222 | addTableOrViewLevelObjs(a.tableIdent, outputObjs) 223 | 224 | case c: CacheTableCommand => c.plan.foreach { 225 | buildQuery(_, inputObjs) 226 | } 227 | 228 | case c: CreateDatabaseCommand => addDbLevelObjs(c.databaseName, outputObjs) 229 | 230 | case c: CreateDataSourceTableAsSelectCommand => 231 | addDbLevelObjs(c.table.identifier, outputObjs) 232 | addTableOrViewLevelObjs(c.table.identifier, outputObjs, mode = c.mode) 233 | buildQuery(c.query, inputObjs) 234 | 235 | case c: CreateDataSourceTableCommand => 236 | addTableOrViewLevelObjs(c.table.identifier, outputObjs) 237 | 238 | case c: CreateFunctionCommand if !c.isTemp => 239 | addDbLevelObjs(c.databaseName, outputObjs) 240 | addFunctionLevelObjs(c.databaseName, c.functionName, outputObjs) 241 | 242 | case c: CreateHiveTableAsSelectCommand => 243 | addDbLevelObjs(c.tableDesc.identifier, outputObjs) 244 | addTableOrViewLevelObjs(c.tableDesc.identifier, outputObjs) 245 | buildQuery(c.query, inputObjs) 246 | 247 | case c: CreateTableCommand => addTableOrViewLevelObjs(c.table.identifier, outputObjs) 248 | 249 | case c: CreateTableLikeCommand => 250 | addDbLevelObjs(c.targetTable, outputObjs) 251 | addTableOrViewLevelObjs(c.targetTable, outputObjs) 252 | // hive don't handle source table's privileges, we should not obey that, because 253 | // it will cause meta information leak 254 | addDbLevelObjs(c.sourceTable, inputObjs) 255 | addTableOrViewLevelObjs(c.sourceTable, inputObjs) 256 | 257 | case c: CreateViewCommand => 258 | c.viewType match { 259 | case PersistedView => 260 | // PersistedView will be tied to a database 261 | addDbLevelObjs(c.name, outputObjs) 262 | addTableOrViewLevelObjs(c.name, outputObjs) 263 | case _ => 264 | } 265 | buildQuery(c.child, inputObjs) 266 | 267 | case d if d.nodeName == "DescribeColumnCommand" => 268 | addTableOrViewLevelObjs( 269 | getFieldVal(d, "table").asInstanceOf[TableIdentifier], 270 | inputObjs, 271 | columns = getFieldVal(d, "colNameParts").asInstanceOf[Seq[String]]) 272 | 273 | case d: DescribeDatabaseCommand => 274 | addDbLevelObjs(d.databaseName, inputObjs) 275 | 276 | case d: DescribeFunctionCommand => 277 | addFunctionLevelObjs(d.functionName.database, d.functionName.funcName, inputObjs) 278 | 279 | case d: DescribeTableCommand => addTableOrViewLevelObjs(d.table, inputObjs) 280 | 281 | case d: DropDatabaseCommand => 282 | // outputObjs are enough for privilege check, adding inputObjs for consistency with hive 283 | // behaviour in case of some unexpected issues. 284 | addDbLevelObjs(d.databaseName, inputObjs) 285 | addDbLevelObjs(d.databaseName, outputObjs) 286 | 287 | case d: DropFunctionCommand => 288 | addFunctionLevelObjs(d.databaseName, d.functionName, outputObjs) 289 | 290 | case d: DropTableCommand => addTableOrViewLevelObjs(d.tableName, outputObjs) 291 | 292 | case i: InsertIntoDataSourceCommand => 293 | i.logicalRelation.catalogTable.foreach { table => 294 | addTableOrViewLevelObjs( 295 | table.identifier, 296 | outputObjs) 297 | } 298 | buildQuery(i.query, inputObjs) 299 | 300 | case i if i.nodeName =="InsertIntoDataSourceDirCommand" => 301 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs) 302 | 303 | case i: InsertIntoHadoopFsRelationCommand => 304 | // we are able to get the override mode here, but ctas for hive table with text/orc 305 | // format and parquet with spark.sql.hive.convertMetastoreParquet=false can success 306 | // with privilege checking without claiming for UPDATE privilege of target table, 307 | // which seems to be same with Hive behaviour. 308 | // So, here we ignore the overwrite mode for such a consistency. 309 | i.catalogTable foreach { t => 310 | addTableOrViewLevelObjs( 311 | t.identifier, 312 | outputObjs, 313 | i.partitionColumns.map(_.name), 314 | t.schema.fieldNames) 315 | } 316 | buildQuery(i.query, inputObjs) 317 | 318 | case i if i.nodeName == "InsertIntoHiveDirCommand" => 319 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs) 320 | 321 | case i if i.nodeName == "InsertIntoHiveTable" => 322 | addTableOrViewLevelObjs( 323 | getFieldVal(i, "table").asInstanceOf[CatalogTable].identifier, outputObjs) 324 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs) 325 | 326 | case l: LoadDataCommand => addTableOrViewLevelObjs(l.table, outputObjs) 327 | 328 | case s if s.nodeName == "SaveIntoDataSourceCommand" => 329 | buildQuery(getFieldVal(s, "query").asInstanceOf[LogicalPlan], outputObjs) 330 | 331 | case s: SetDatabaseCommand => addDbLevelObjs(s.databaseName, inputObjs) 332 | 333 | case s: ShowColumnsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs) 334 | 335 | case s: ShowCreateTableCommand => addTableOrViewLevelObjs(s.table, inputObjs) 336 | 337 | case s: ShowFunctionsCommand => s.db.foreach(addDbLevelObjs(_, inputObjs)) 338 | 339 | case s: ShowPartitionsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs) 340 | 341 | case s: ShowTablePropertiesCommand => addTableOrViewLevelObjs(s.table, inputObjs) 342 | 343 | case s: ShowTablesCommand => addDbLevelObjs(s.databaseName, inputObjs) 344 | 345 | case s: TruncateTableCommand => addTableOrViewLevelObjs(s.tableName, outputObjs) 346 | 347 | case _ => 348 | // AddFileCommand 349 | // AddJarCommand 350 | // AnalyzeColumnCommand 351 | // ClearCacheCommand 352 | // CreateTempViewUsing 353 | // ListFilesCommand 354 | // ListJarsCommand 355 | // RefreshTable 356 | // RefreshTable 357 | // ResetCommand 358 | // SetCommand 359 | // ShowDatabasesCommand 360 | // StreamingExplainCommand 361 | // UncacheTableCommand 362 | } 363 | } 364 | 365 | /** 366 | * Add database level hive privilege objects to input or output list 367 | * @param dbName database name as hive privilege object 368 | * @param hivePrivilegeObjects input or output list 369 | */ 370 | private[this] def addDbLevelObjs( 371 | dbName: String, 372 | hivePrivilegeObjects: JList[HPO]): Unit = { 373 | hivePrivilegeObjects.add( 374 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, dbName, dbName)) 375 | } 376 | 377 | /** 378 | * Add database level hive privilege objects to input or output list 379 | * @param dbOption an option of database name as hive privilege object 380 | * @param hivePrivilegeObjects input or output hive privilege object list 381 | */ 382 | private def addDbLevelObjs( 383 | dbOption: Option[String], 384 | hivePrivilegeObjects: JList[HPO]): Unit = { 385 | dbOption match { 386 | case Some(db) => 387 | hivePrivilegeObjects.add( 388 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db)) 389 | case _ => 390 | } 391 | } 392 | 393 | /** 394 | * Add database level hive privilege objects to input or output list 395 | * @param tableIdentifier table identifier contains database name as hive privilege object 396 | * @param hivePrivilegeObjects input or output hive privilege object list 397 | */ 398 | private def addDbLevelObjs( 399 | tableIdentifier: TableIdentifier, 400 | hivePrivilegeObjects: JList[HPO]): Unit = { 401 | tableIdentifier.database match { 402 | case Some(db) => 403 | hivePrivilegeObjects.add( 404 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db)) 405 | case _ => 406 | } 407 | } 408 | 409 | /** 410 | * Add table level hive privilege objects to input or output list 411 | * @param tableIdentifier table identifier contains database name, and table name as hive 412 | * privilege object 413 | * @param hivePrivilegeObjects input or output list 414 | * @param mode Append or overwrite 415 | */ 416 | private def addTableOrViewLevelObjs( 417 | tableIdentifier: TableIdentifier, 418 | hivePrivilegeObjects: JList[HPO], 419 | partKeys: Seq[String] = Nil, 420 | columns: Seq[String] = Nil, 421 | mode: SaveMode = SaveMode.ErrorIfExists, 422 | cmdParams: Seq[String] = Nil): Unit = { 423 | tableIdentifier.database match { 424 | case Some(db) => 425 | val tbName = tableIdentifier.table 426 | val hivePrivObjectActionType = getHivePrivObjActionType(mode) 427 | hivePrivilegeObjects.add( 428 | HivePrivilegeObject( 429 | HivePrivilegeObjectType.TABLE_OR_VIEW, 430 | db, 431 | tbName, 432 | partKeys.asJava, 433 | columns.asJava, 434 | hivePrivObjectActionType, 435 | cmdParams.asJava)) 436 | case _ => 437 | } 438 | } 439 | 440 | /** 441 | * Add function level hive privilege objects to input or output list 442 | * @param databaseName database name 443 | * @param functionName function name as hive privilege object 444 | * @param hivePrivilegeObjects input or output list 445 | */ 446 | private def addFunctionLevelObjs( 447 | databaseName: Option[String], 448 | functionName: String, 449 | hivePrivilegeObjects: JList[HPO]): Unit = { 450 | databaseName match { 451 | case Some(db) => 452 | hivePrivilegeObjects.add( 453 | HivePrivilegeObject(HivePrivilegeObjectType.FUNCTION, db, functionName)) 454 | case _ => 455 | } 456 | } 457 | 458 | /** 459 | * HivePrivObjectActionType INSERT or INSERT_OVERWRITE 460 | * 461 | * @param mode Append or Overwrite 462 | * @return 463 | */ 464 | private def getHivePrivObjActionType(mode: SaveMode): HivePrivObjectActionType = { 465 | mode match { 466 | case SaveMode.Append => HivePrivObjectActionType.INSERT 467 | case SaveMode.Overwrite => HivePrivObjectActionType.INSERT_OVERWRITE 468 | case _ => HivePrivObjectActionType.OTHER 469 | } 470 | } 471 | } 472 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/hive/client/AuthzImpl.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.hive.client 19 | 20 | import java.util.{List => JList} 21 | 22 | import com.githup.yaooqinn.spark.authorizer.Logging 23 | import org.apache.hadoop.hive.ql.security.authorization.plugin._ 24 | import org.apache.hadoop.hive.ql.session.SessionState 25 | import org.apache.hadoop.security.UserGroupInformation 26 | 27 | import org.apache.spark.sql.SparkSession 28 | import org.apache.spark.sql.hive.{AuthzUtils, HiveExternalCatalog} 29 | import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader 30 | 31 | /** 32 | * A Tool for Authorizer implementation. 33 | * 34 | * The [[SessionState]] generates the authorizer and authenticator, we use these to check 35 | * the privileges of a Spark LogicalPlan, which is mapped to hive privilege objects and operation 36 | * type. 37 | * 38 | * [[SparkSession]] with hive catalog implemented has its own instance of [[SessionState]]. I am 39 | * strongly willing to reuse it, but for the reason that it belongs to an isolated classloader 40 | * which makes it unreachable for us to visit it in Spark's context classloader. So, when 41 | * [[ClassCastException]] occurs, we turn off [[IsolatedClientLoader]] to use Spark's builtin 42 | * Hive client jars to generate a new metastore client to replace the original one, once it is 43 | * generated, will be reused then. 44 | * 45 | */ 46 | object AuthzImpl extends Logging { 47 | def checkPrivileges( 48 | spark: SparkSession, 49 | hiveOpType: HiveOperationType, 50 | inputObjs: JList[HivePrivilegeObject], 51 | outputObjs: JList[HivePrivilegeObject], 52 | context: HiveAuthzContext): Unit = { 53 | val client = spark.sharedState 54 | .externalCatalog.asInstanceOf[HiveExternalCatalog] 55 | .client 56 | val clientImpl = try { 57 | client.asInstanceOf[HiveClientImpl] 58 | } catch { 59 | case _: ClassCastException => 60 | val clientLoader = 61 | AuthzUtils.getFieldVal(client, "clientLoader").asInstanceOf[IsolatedClientLoader] 62 | AuthzUtils.setFieldVal(clientLoader, "isolationOn", false) 63 | AuthzUtils.setFieldVal(clientLoader, 64 | "classLoader", new NonClosableMutableURLClassLoader(clientLoader.baseClassLoader)) 65 | clientLoader.cachedHive = null 66 | val newClient = clientLoader.createClient() 67 | AuthzUtils.setFieldVal( 68 | spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog], 69 | "client", 70 | newClient) 71 | newClient.asInstanceOf[HiveClientImpl] 72 | } 73 | 74 | val state = clientImpl.state 75 | SessionState.setCurrentSessionState(state) 76 | val user = UserGroupInformation.getCurrentUser.getShortUserName 77 | if (state.getAuthenticator.getUserName != user) { 78 | val hiveConf = state.getConf 79 | val newState = new SessionState(hiveConf, user) 80 | SessionState.start(newState) 81 | AuthzUtils.setFieldVal(clientImpl, "state", newState) 82 | } 83 | 84 | val authz = clientImpl.state.getAuthorizerV2 85 | clientImpl.withHiveState { 86 | if (authz != null) { 87 | try { 88 | authz.checkPrivileges(hiveOpType, inputObjs, outputObjs, context) 89 | } catch { 90 | case hae: HiveAccessControlException => 91 | error( 92 | s""" 93 | |+===============================+ 94 | ||Spark SQL Authorization Failure| 95 | ||-------------------------------| 96 | ||${hae.getMessage} 97 | ||-------------------------------| 98 | ||Spark SQL Authorization Failure| 99 | |+===============================+ 100 | """.stripMargin) 101 | throw hae 102 | case e: Exception => throw e 103 | } 104 | } else { 105 | warn("Authorizer V2 not configured. Skipping privilege checking") 106 | } 107 | } 108 | } 109 | } 110 | --------------------------------------------------------------------------------