├── .github
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── _config.yml
├── conf
├── hive-site.xml
├── ranger-hive-audit.xml
└── ranger-hive-security.xml
├── docs
├── building-spark-authorizer.md
├── img
│ └── ranger-prolcy-details.png
├── install_plugin.md
├── manually_test_cases.md
├── ranger_authorization.md
├── spark_sql_authorization.md
├── sql_std_based_authorization.md
└── storage_based_authorization.md
├── pom.xml
├── scalastyle-config.xml
└── src
└── main
└── scala
├── com
└── githup
│ └── yaooqinn
│ └── spark
│ └── authorizer
│ └── Logging.scala
└── org
└── apache
├── ranger
└── authorization
│ └── spark
│ └── authorizer
│ ├── RangerSparkSQLExtension.scala
│ └── package.scala
└── spark
└── sql
├── catalyst
└── optimizer
│ ├── Authorizable.scala
│ ├── Authorizer.scala
│ ├── AuthorizerExtension.scala
│ └── HivePrivilegeObject.scala
└── hive
├── AuthzUtils.scala
├── PrivilegesBuilder.scala
└── client
└── AuthzImpl.scala
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 |
5 | ---
6 |
7 | **Describe the bug**
8 | A clear and concise description of what the bug is.
9 |
10 | **To Reproduce**
11 | Steps to reproduce the behavior:
12 | 1. Configurations
13 | 2. Environments
14 | 3. Operations
15 | 4. See error
16 |
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 |
20 | **Screenshots**
21 | If applicable, add screenshots to help explain your problem.
22 |
23 | **Additional context**
24 | Add any other context about the problem here.
25 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 |
5 | ---
6 |
7 | **Is your feature request related to a problem? Please describe.**
8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
9 |
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 |
13 | **Describe alternatives you've considered**
14 | A clear and concise description of any alternative solutions or features you've considered.
15 |
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *#*#
2 | *.#*
3 | *.iml
4 | *.ipr
5 | *.iws
6 | *.pyc
7 | *.pyo
8 | *.swp
9 | *~
10 | .DS_Store
11 | .cache
12 | .classpath
13 | .ensime
14 | .ensime_cache/
15 | .ensime_lucene
16 | .generated-mima*
17 | .idea/
18 | .idea_modules/
19 | .project
20 | .pydevproject
21 | .scala_dependencies
22 | .settings
23 | /lib/
24 | R-unit-tests.log
25 | R/unit-tests.out
26 | R/cran-check.out
27 | R/pkg/vignettes/sparkr-vignettes.html
28 | R/pkg/tests/fulltests/Rplots.pdf
29 | build/*.jar
30 | build/apache-maven*
31 | build/scala*
32 | build/zinc*
33 | cache
34 | checkpoint
35 | conf/*.cmd
36 | conf/*.conf
37 | conf/*.properties
38 | conf/*.sh
39 | conf/java-opts
40 | conf/slaves
41 | dependency-reduced-pom.xml
42 | derby.log
43 | dev/create-release/*final
44 | dev/create-release/*txt
45 | dev/pr-deps/
46 | dist/
47 | docs/_site
48 | docs/api
49 | sql/docs
50 | sql/site
51 | lib_managed/
52 | lint-r-report.log
53 | log/
54 | logs/
55 | out/
56 | project/boot/
57 | project/build/target/
58 | project/plugins/lib_managed/
59 | project/plugins/project/build.properties
60 | project/plugins/src_managed/
61 | project/plugins/target/
62 | python/lib/pyspark.zip
63 | python/deps
64 | python/pyspark/python
65 | reports/
66 | scalastyle-on-compile.generated.xml
67 | scalastyle-output.xml
68 | scalastyle.txt
69 | spark-*-bin-*.tgz
70 | spark-tests.log
71 | src_managed/
72 | streaming-tests.log
73 | target/
74 | unit-tests.log
75 | work/
76 |
77 | # For Hive
78 | TempStatsStore/
79 | metastore/
80 | metastore_db/
81 | sql/hive-thriftserver/test_warehouses
82 | warehouse/
83 | spark-warehouse/
84 |
85 | # For R session data
86 | .RData
87 | .RHistory
88 | .Rhistory
89 | *.Rproj
90 | *.Rproj.*
91 |
92 | .Rproj.user
93 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - 2.11.8
4 |
5 | deploy:
6 | provider: pages
7 | skip_cleanup: true
8 | github_token: $GITHUB_TOKEN
9 | email: yaooqinn@hotmail.com
10 | name: Kent Yao
11 | on:
12 | branch: master
13 |
14 | install:
15 | - mvn install -q -DskipTests=true -Dmaven.javadoc.skip=true -B -V
16 |
17 | script:
18 | - mvn package -q -Dmaven.javadoc.skip=true -B -V
19 | - mvn package -Pspark-2.2 -q -Dmaven.javadoc.skip=true -B -V
20 | - mvn package -Pspark-2.3 -q -Dmaven.javadoc.skip=true -B -V
21 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
204 | =======================================================================
205 | Apache Spark Subcomponents:
206 |
207 | The Apache Spark project contains subcomponents with separate copyright
208 | notices and license terms. Your use of the source code for the these
209 | subcomponents is subject to the terms and conditions of the following
210 | licenses.
211 |
212 |
213 | ========================================================================
214 | For heapq (pyspark/heapq3.py):
215 | ========================================================================
216 |
217 | See license/LICENSE-heapq.txt
218 |
219 | ========================================================================
220 | For SnapTree:
221 | ========================================================================
222 |
223 | See license/LICENSE-SnapTree.txt
224 |
225 | ========================================================================
226 | For jbcrypt:
227 | ========================================================================
228 |
229 | See license/LICENSE-jbcrypt.txt
230 |
231 | ========================================================================
232 | BSD-style licenses
233 | ========================================================================
234 |
235 | The following components are provided under a BSD-style license. See project link for details.
236 | The text of each license is also included at licenses/LICENSE-[project].txt.
237 |
238 | (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
239 | (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
240 | (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
241 | (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
242 | (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
243 | (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
244 | (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
245 | (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net)
246 | (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer)
247 | (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer)
248 | (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License)
249 | (Interpreter classes (all .scala files in repl/src/main/scala
250 | except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
251 | and for SerializableMapWrapper in JavaUtils.scala)
252 | (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
253 | (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
254 | (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
255 | (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
256 | (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
257 | (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
258 | (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
259 | (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
260 | (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo)
261 | (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog)
262 | (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf)
263 | (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264 | (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265 | (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266 | (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
267 | (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
268 | (BSD licence) sbt and sbt-launch-lib.bash
269 | (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
270 | (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE)
271 | (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE)
272 |
273 | ========================================================================
274 | MIT licenses
275 | ========================================================================
276 |
277 | The following components are provided under the MIT License. See project link for details.
278 | The text of each license is also included at licenses/LICENSE-[project].txt.
279 |
280 | (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org)
281 | (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org)
282 | (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org)
283 | (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org)
284 | (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
285 | (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt)
286 | (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org)
287 | (MIT License) jquery (https://jquery.org/license/)
288 | (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)
289 | (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot)
290 | (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
291 | (MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
292 | (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
293 | (MIT License) datatables (http://datatables.net/license)
294 | (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
295 | (MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
296 | (MIT License) blockUI (http://jquery.malsup.com/block/)
297 | (MIT License) RowsGroup (http://datatables.net/license/mit)
298 | (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299 | (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Spark Authorizer [](https://travis-ci.org/yaooqinn/spark-authorizer) [](http://hits.dwyl.io/yaooqinn/spark-authorizer)
3 |
4 | **Spark Authorizer** provides you with *SQL Standard Based Authorization* for [Apache Spark™](http://spark.apache.org)
5 | as same as [SQL Standard Based Hive Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization).
6 | While you are using Spark SQL or Dataset/DataFrame API to load data from tables embedded with [Apache Hive™](https://hive.apache.org) metastore,
7 | this library provides row/column level fine-grained access controls by [Apache Ranger™](https://ranger.apache.org) or Hive SQL Standard Based Authorization.
8 |
9 | Security is one of fundamental features for enterprise adoption. [Apache Ranger™](https://ranger.apache.org) offers many security plugins for many Hadoop ecosystem components,
10 | such as HDFS, Hive, HBase, Solr and Sqoop2. However, [Apache Spark™](http://spark.apache.org) is not counted in yet.
11 | When a secured HDFS cluster is used as a data warehouse accessed by various users and groups via different applications wrote by Spark and Hive,
12 | it is very difficult to guarantee data management in a consistent way. Apache Spark users visit data warehouse only
13 | with Storage based access controls offered by HDFS. This library shares [Ranger Hive plugin](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation#ApacheRanger0.5.0Installation-InstallingApacheHive(1.2.0))
14 | with Hive to help Spark talking to Ranger Admin.
15 |
16 | Please refer to [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html) to see what spark-authorizer supports.
17 |
18 | ## Quick Start
19 |
20 | ### Step 1. Install Spark Authorizer
21 |
22 | Include this package in your Spark Applications using:
23 | #### spark-shell, pyspark, or spark-submit
24 | ```bash
25 | > $SPARK_HOME/bin/spark-shell --packages yaooqinn:spark-authorizer:2.1.1
26 | ```
27 | #### sbt
28 | If you use the sbt-spark-package plugin, in your sbt build file, add:
29 | ```sbtshell
30 | spDependencies += "yaooqinn/spark-authorizer:2.1.1"
31 | ```
32 | Otherwise,
33 | ```sbtshell
34 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
35 |
36 | libraryDependencies += "yaooqinn" % "spark-authorizer" % "2.1.1"
37 | ```
38 |
39 | #### Maven
40 | In your pom.xml, add:
41 | ```xml
42 |
43 |
44 |
45 | yaooqinn
46 | spark-authorizer
47 | 2.1.1
48 |
49 |
50 |
51 |
52 |
53 | SparkPackagesRepo
54 | http://dl.bintray.com/spark-packages/maven
55 |
56 |
57 | ```
58 |
59 | #### Manully
60 | If you [Building Spark Authorizer](https://yaooqinn.github.io/spark-authorizer/docs/building-spark-authorizer.html) manully, you can deploy via:
61 | ```bash
62 | cp target/spark-authorizer-.jar $SPARK_HOME/jars
63 | ```
64 |
65 | ### Step 2. Install & Configure Ranger Hive Plugin
66 |
67 | Please refer to [Install Ranger Hive Plugin For Apache Spark](https://yaooqinn.github.io/spark-authorizer/docs/install_plugin.html) to learn how to deploy the plugin jars to Apache Spark and set Ranger/Hive configurations.
68 |
69 | ### Step 3. Enable Spark Authorizer
70 |
71 | In `$SPARK_HOME/conf/spark-defaults.conf`, add:
72 |
73 | ```scala
74 | spark.sql.extensions=org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension
75 | ```
76 | **NOTE** `spark.sql.extensions` is only supported by Spark 2.2.x and later, for Spark 2.1.x please use [Version: 1.1.3.spark2.1](https://github.com/yaooqinn/spark-authorizer/tree/78f7d818db773c3567c636575845a413ac560c90) and check the previous doc.
77 |
78 | ## Interactive Spark Shell
79 |
80 | The easiest way to start using Spark is through the Scala shell:
81 |
82 | ```shell
83 | bin/spark-shell --master yarn --proxy-user hzyaoqin
84 | ```
85 |
86 | ## Suffer for the Authorization Pain
87 |
88 | We create a ranger policy as below:
89 | 
90 |
91 | Check Privilege with some simple cases.
92 |
93 | #### Show databases
94 |
95 | ```sql
96 | scala> spark.sql("show databases").show
97 | +--------------+
98 | | databaseName|
99 | +--------------+
100 | | default|
101 | | spark_test_db|
102 | | tpcds_10g_ext|
103 | +--------------+
104 | ```
105 |
106 | #### Switch database
107 |
108 | ```sql
109 | scala> spark.sql("use spark_test_db").show
110 | 17/12/08 17:06:17 ERROR optimizer.Authorizer:
111 | +===============================+
112 | |Spark SQL Authorization Failure|
113 | |-------------------------------|
114 | |Permission denied: user [hzyaoqin] does not have [USE] privilege on [spark_test_db]
115 | |-------------------------------|
116 | |Spark SQL Authorization Failure|
117 | +===============================+
118 | ```
119 | Oops...
120 |
121 |
122 | ```sql
123 | scala> spark.sql("use tpcds_10g_ext").show
124 | ++
125 | ||
126 | ++
127 | ++
128 | ```
129 | LOL...
130 |
131 |
132 | ### Select
133 | ```sql
134 | scala> spark.sql("select cp_type from catalog_page limit 1").show
135 | 17/12/08 17:09:58 ERROR optimizer.Authorizer:
136 | +===============================+
137 | |Spark SQL Authorization Failure|
138 | |-------------------------------|
139 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_type]
140 | |-------------------------------|
141 | |Spark SQL Authorization Failure|
142 | +===============================+
143 | ```
144 | Oops...
145 |
146 | ```sql
147 | scala> spark.sql("select * from call_center limit 1").show
148 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
149 | |cc_call_center_sk|cc_call_center_id|cc_rec_start_date|cc_rec_end_date|cc_closed_date_sk|cc_open_date_sk| cc_name|cc_class|cc_employees|cc_sq_ft|cc_hours| cc_manager|cc_mkt_id| cc_mkt_class| cc_mkt_desc|cc_market_manager|cc_division|cc_division_name|cc_company|cc_company_name|cc_street_number|cc_street_name|cc_street_type|cc_suite_number|cc_city| cc_county|cc_state|cc_zip| cc_country|cc_gmt_offset|cc_tax_percentage|
150 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
151 | | 1| AAAAAAAABAAAAAAA| 1998-01-01| null| null| 2450952|NY Metro| large| 2| 1138| 8AM-4PM|Bob Belcher| 6|More than other a...|Shared others cou...| Julius Tran| 3| pri| 6| cally| 730| Ash Hill| Boulevard| Suite 0| Midway|Williamson County| TN| 31904|United States| -5.00| 0.11|
152 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
153 |
154 | ```
155 |
156 | LOL...
157 |
158 | ### Dataset/DataFrame
159 |
160 | ```scala
161 | scala> spark.read.table("catalog_page").limit(1).collect
162 | ```
163 | ```
164 | 17/12/11 14:46:33 ERROR optimizer.Authorizer:
165 | +===============================+
166 | |Spark SQL Authorization Failure|
167 | |-------------------------------|
168 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_catalog_page_sk,cp_catalog_page_id,cp_promo_id,cp_start_date_sk,cp_end_date_sk,cp_department,cp_catalog_number,cp_catalog_page_number,cp_description,cp_type]
169 | |-------------------------------|
170 | |Spark SQL Authorization Failure|
171 | +===============================+
172 | ```
173 | Oops...
174 |
175 | ```scala
176 | scala> spark.read.table("call_center").limit(1).collect
177 | ```
178 | ```
179 | res3: Array[org.apache.spark.sql.Row] = Array([1,AAAAAAAABAAAAAAA,1998-01-01,null,null,2450952,NY Metro,large,2,1138,8AM-4PM,Bob Belcher,6,More than other authori,Shared others could not count fully dollars. New members ca,Julius Tran,3,pri,6,cally,730,Ash Hill,Boulevard,Suite 0,Midway,Williamson County,TN,31904,United States,-5.00,0.11])
180 | ```
181 | LOL...
182 |
183 | ---
184 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-leap-day
2 |
--------------------------------------------------------------------------------
/conf/hive-site.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
20 |
21 |
22 |
23 | hive.security.authorization.enabled
24 | true
25 |
26 |
27 |
28 | hive.security.authorization.manager
29 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
30 |
31 |
32 |
33 | hive.security.authenticator.manager
34 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator
35 |
36 |
37 |
38 | hive.conf.restricted.list
39 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager
40 |
41 |
42 |
--------------------------------------------------------------------------------
/conf/ranger-hive-audit.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
20 |
21 |
22 |
23 | xasecure.audit.is.enabled
24 | true
25 |
26 |
27 |
28 | xasecure.audit.destination.db
29 | false
30 |
31 |
32 |
33 | xasecure.audit.destination.db.jdbc.driver
34 | com.mysql.jdbc.Driver
35 |
36 |
37 |
38 | xasecure.audit.destination.db.jdbc.url
39 | jdbc:mysql://10.171.161.78/ranger
40 |
41 |
42 |
43 | xasecure.audit.destination.db.password
44 | rangeradmin
45 |
46 |
47 |
48 | xasecure.audit.destination.db.user
49 | rangeradmin
50 |
51 |
52 |
--------------------------------------------------------------------------------
/conf/ranger-hive-security.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
19 |
20 |
21 |
22 |
23 | ranger.plugin.hive.policy.rest.url
24 | ranger admin address like http://ranger-admin.org:6080
25 |
26 |
27 |
28 | ranger.plugin.hive.service.name
29 | a ranger hive service name
30 |
31 |
32 |
33 | ranger.plugin.hive.policy.cache.dir
34 | ./a ranger hive service name/policycache
35 |
36 |
37 |
38 | ranger.plugin.hive.policy.pollIntervalMs
39 | 5000
40 |
41 |
42 |
43 | ranger.plugin.hive.policy.source.impl
44 | org.apache.ranger.admin.client.RangerAdminRESTClient
45 |
46 |
47 |
--------------------------------------------------------------------------------
/docs/building-spark-authorizer.md:
--------------------------------------------------------------------------------
1 | # Building Spark Authorizer
2 |
3 | Spark Authorizer is built using [Apache Maven](http://maven.apache.org). To build it, run:
4 |
5 | ```bash
6 | mvn clean package
7 | ```
8 |
9 | Notes from Spark:
10 | > The Maven-based build is the build of reference for Apache Spark.
11 | Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+.
12 | Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0.
13 |
14 | So, I suggest you build this library using same Maven / Java / Scala.
15 |
16 | ## Building against different version of Apache Spark
17 |
18 | By default, spark authorizer is build against spark 2.2.2, which may be incompatible with other spark main branches.
19 |
20 | ```bash
21 | # build for spark 2.2.x
22 | maven clean package -Pspark-2.2
23 | ```
24 |
25 | ```bash
26 | # build for spark 2.3.x
27 | maven clean package -Pspark-2.3
28 | ```
29 |
--------------------------------------------------------------------------------
/docs/img/ranger-prolcy-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaooqinn/spark-authorizer/f575b0b8714bd86c8383af9d584aef5d0ed29c4e/docs/img/ranger-prolcy-details.png
--------------------------------------------------------------------------------
/docs/install_plugin.md:
--------------------------------------------------------------------------------
1 | # Installing Apache Ranger Hive Plugin For Apache Spark
2 |
3 | This article illustrates how to install the Apache Ranger plugin which is made for Apache Hive to Apache Spark with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). We guarantee column/row level fine gained [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html).
4 |
5 | Apache Spark is built bundled with built-in Hive Metastore client(version 1.2.1.spark2) jars when `-Phive` is enabled. AFAIK, this version of Hive Metastore client is compatible with all Hive Metastore server 1.2.1 and higher versions.
6 |
7 | We **DO NOT** support configuring `spark.sql.hive.metastore.jars` to `maven` or a location of the jars used to instantiate the `HiveMetastoreClient`, `builtin` is the one and only option.
8 |
9 | Apache Ranger upgrades quite fast, one of the reasons may be to catch up with the higher Hive releases. AFAIK, Apache Ranger 0.6.x and higher versions do not support [1.2.1](https://issues.apache.org/jira/browse/RANGER-1056) anymore, accordingly, you may use Apache Ranger 0.5.x to avoid underlying pitfalls.
10 |
11 | An official installation guide of Apache Ranger 0.5.x can be found [here](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation). The remainder of this article will guide you how to install Apache Ranger Hive Plugin for Apache Spark, which is not mentioned in the official documentation.
12 |
13 | ## Building Apache Ranger
14 |
15 | 1. git clone git@github.com:apache/ranger.git
16 | 2. cd ranger
17 | 3. git fetch https://github.com/apache/ranger.git ranger-0.5.3-rc3:ranger-0.5.3
18 | 4. git checkout ranger-0.5.3
19 | 5. mvn clean compile package assembly:assembly install -Dmaven.test.skip=true
20 |
21 | If you failed to build the project, please refer to the instructions of the official doc to see if there are any prerequisites.
22 |
23 | If you successfully make the mvn command work, all archives of Ranger admin and plugins will be generated in `./target`, including `ranger-0.5.3-hive-plugin.tar.gz` which is exactly you need for next steps
24 | ```
25 | -rw-r--r-- 1 kent hadoop 163667362 Oct 15 15:38 ranger-0.5.3-admin.tar.gz
26 | -rw-r--r-- 1 kent hadoop 164655504 Oct 15 15:38 ranger-0.5.3-admin.zip
27 | -rw-r--r-- 1 kent hadoop 16771141 Oct 15 15:36 ranger-0.5.3-hbase-plugin.tar.gz
28 | -rw-r--r-- 1 kent hadoop 16797394 Oct 15 15:36 ranger-0.5.3-hbase-plugin.zip
29 | -rw-r--r-- 1 kent hadoop 16722944 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.tar.gz
30 | -rw-r--r-- 1 kent hadoop 16747829 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.zip
31 | -rw-r--r-- 1 kent hadoop 16139126 Oct 15 15:35 ranger-0.5.3-hive-plugin.tar.gz
32 | -rw-r--r-- 1 kent hadoop 16165266 Oct 15 15:36 ranger-0.5.3-hive-plugin.zip
33 | -rw-r--r-- 1 kent hadoop 32975495 Oct 15 15:36 ranger-0.5.3-kafka-plugin.tar.gz
34 | -rw-r--r-- 1 kent hadoop 33012135 Oct 15 15:37 ranger-0.5.3-kafka-plugin.zip
35 | -rw-r--r-- 1 kent hadoop 71917257 Oct 15 15:38 ranger-0.5.3-kms.tar.gz
36 | -rw-r--r-- 1 kent hadoop 72005470 Oct 15 15:39 ranger-0.5.3-kms.zip
37 | -rw-r--r-- 1 kent hadoop 21298145 Oct 15 15:36 ranger-0.5.3-knox-plugin.tar.gz
38 | -rw-r--r-- 1 kent hadoop 21322990 Oct 15 15:36 ranger-0.5.3-knox-plugin.zip
39 | -rw-r--r-- 1 kent hadoop 34600 Oct 15 15:38 ranger-0.5.3-migration-util.tar.gz
40 | -rw-r--r-- 1 kent hadoop 38014 Oct 15 15:38 ranger-0.5.3-migration-util.zip
41 | -rw-r--r-- 1 kent hadoop 18485767 Oct 15 15:39 ranger-0.5.3-ranger-tools.tar.gz
42 | -rw-r--r-- 1 kent hadoop 18495143 Oct 15 15:39 ranger-0.5.3-ranger-tools.zip
43 | -rw-r--r-- 1 kent hadoop 22416054 Oct 15 15:37 ranger-0.5.3-solr-plugin.tar.gz
44 | -rw-r--r-- 1 kent hadoop 22441083 Oct 15 15:37 ranger-0.5.3-solr-plugin.zip
45 | -rw-r--r-- 1 kent hadoop 3606416 Oct 15 15:39 ranger-0.5.3-src.tar.gz
46 | -rw-r--r-- 1 kent hadoop 5481890 Oct 15 15:39 ranger-0.5.3-src.zip
47 | -rw-r--r-- 1 kent hadoop 34769024 Oct 15 15:36 ranger-0.5.3-storm-plugin.tar.gz
48 | -rw-r--r-- 1 kent hadoop 34788044 Oct 15 15:36 ranger-0.5.3-storm-plugin.zip
49 | -rw-r--r-- 1 kent hadoop 13512547 Oct 15 15:38 ranger-0.5.3-usersync.tar.gz
50 | -rw-r--r-- 1 kent hadoop 13534930 Oct 15 15:38 ranger-0.5.3-usersync.zip
51 | -rw-r--r-- 1 kent hadoop 15942158 Oct 15 15:37 ranger-0.5.3-yarn-plugin.tar.gz
52 | -rw-r--r-- 1 kent hadoop 15969320 Oct 15 15:37 ranger-0.5.3-yarn-plugin.zip
53 | ```
54 |
55 | ## Applying Plugin to Apache Spark
56 |
57 | 1. cd target
58 | 2. tar zxf ranger-0.5.3-hive-plugin.tar.gz
59 | 3. cd ranger-0.5.3-hive-plugin/lib
60 |
61 | ```
62 | drwxr-xr-x 2 kent hadoop 4096 Oct 16 12:34 ranger-hive-plugin-impl
63 | -rw-r--r-- 1 kent hadoop 16061 Oct 15 15:35 ranger-hive-plugin-shim-0.5.3.jar
64 | -rw-r--r-- 1 kent hadoop 16545 Oct 15 15:35 ranger-plugin-classloader-0.5.3.jar
65 | ```
66 |
67 | Fistly, copy the above folds and files to `$SPARK_HOME/jars`
68 |
69 | ```
70 | jersey-client-1.9.jar
71 | jersey-bundle-1.4.jar
72 | eclipselink-2.5.2.jar
73 | noggit-0.6.jar
74 | gson-2.2.4.jar
75 | httpclient-4.5.3.jar
76 | httpcore-4.4.6.jar
77 | httpmime-4.5.3.jar
78 | javax.persistence-2.1.0.jar
79 | mybatis-3.2.8.jar
80 | mysql-connector-java-5.1.39.jar
81 | ```
82 |
83 | Secondly, add the jars listed above to `$SPARK_HOME/jars/ranger-hive-plugin-impl/` if missing.
84 |
85 | ## Configuring Ranger for Apache Spark
86 |
87 |
88 | Firstly, add the following configurations in `hive-site.xml` to enable Ranger Authorization.
89 |
90 | ```
91 |
92 | hive.security.authorization.enabled
93 | true
94 |
95 |
96 |
97 | hive.security.authorization.manager
98 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
99 |
100 |
101 |
102 | hive.security.authenticator.manager
103 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator
104 |
105 |
106 |
107 | hive.conf.restricted.list
108 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager
109 |
110 | ```
111 |
112 | Secondly, create `ranger-hive-security.xml` in `$SPARK_HOME/conf` and add the following configurations for pointing to the right ranger admin server.
113 |
114 | ```
115 |
116 |
117 |
118 | ranger.plugin.hive.policy.rest.url
119 | ranger admin address like http://ranger-admin.org:6080
120 |
121 |
122 |
123 | ranger.plugin.hive.service.name
124 | a ranger hive service name
125 |
126 |
127 |
128 | ranger.plugin.hive.policy.cache.dir
129 | ./a ranger hive service name/policycache
130 |
131 |
132 |
133 | ranger.plugin.hive.policy.pollIntervalMs
134 | 5000
135 |
136 |
137 |
138 | ranger.plugin.hive.policy.source.impl
139 | org.apache.ranger.admin.client.RangerAdminRESTClient
140 |
141 |
142 |
143 | ```
144 |
145 | At last, create create `ranger-hive-audit.xml` in `$SPARK_HOME/conf` and add the following configurations to enable/disable auditing.
146 |
147 | ```
148 |
149 |
150 |
151 | xasecure.audit.is.enabled
152 | true
153 |
154 |
155 |
156 | xasecure.audit.destination.db
157 | false
158 |
159 |
160 |
161 | xasecure.audit.destination.db.jdbc.driver
162 | com.mysql.jdbc.Driver
163 |
164 |
165 |
166 | xasecure.audit.destination.db.jdbc.url
167 | jdbc:mysql://10.171.161.78/ranger
168 |
169 |
170 |
171 | xasecure.audit.destination.db.password
172 | rangeradmin
173 |
174 |
175 |
176 | xasecure.audit.destination.db.user
177 | rangeradmin
178 |
179 |
180 |
181 | ```
182 |
183 | Ranger Hive plugins should work well through `spark-authorizer`, when set `spark.sql.extensions`=`org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension`
184 | ## Additional Notes
185 |
186 | If you are using Apache Spark in `cluster` mode, the jar files under `$SPARK_HOME/jars/ranger-hive-plugin-impl/` will not be uploaded automatically. If you are not familiar with Spark source code and unable to make some modifications, I suggest you copy all files in `$SPARK_HOME/jars/ranger-hive-plugin-impl/` to `$SPARK_HOME/jars/` and **DELETE** `ranger-hive-plugin-shim-0.5.3.jar` in `$SPARK_HOME/jars/`. This works fine for your whole Spark application but with a tiny problem of Spark UI because of jersey jars confliction.
187 |
188 | Also for `cluster` mode Spark applications, `ranger.plugin.hive.policy.cache.dir` in `ranger-hive-security.xml` must be able to create on all NodeManager nodes for the Spark Driver could be generated anywhere. One convenient and effective way is to configure the relative path, such as,
189 | ```
190 |
191 | ranger.plugin.hive.policy.cache.dir
192 | policycache
193 |
194 | ```
195 |
--------------------------------------------------------------------------------
/docs/manually_test_cases.md:
--------------------------------------------------------------------------------
1 | # Spark SQL Authorization Test Cases
2 | ## Prerequisites
3 |
4 | - Ranger Admin Server
5 | - setup ranger admin
6 | - setup some ranger policies
7 | - Ranger Hive Plugin
8 | - setup ranger hive plugin for spark
9 | - configure ranger plugin configurations to connect ranger admin
10 | - Hive Metastore
11 | - setup Hive Metastore server
12 | - setup ranger admin and spark connected rightly
13 | - Spark
14 | - maybe hdfs
15 | - maybe on yarn
16 | - spark-shell
17 |
18 | ```bash
19 | bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse
20 | ```
21 |
22 | ```bash
23 | hadoop@hzadg-hadoop-dev2:~/spark-2.1.2-bin-hadoop2.7$ bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse
24 | 18/06/07 11:00:47 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
25 | Setting default log level to "WARN".
26 | To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
27 | 18/06/07 11:00:56 WARN SparkContext: Support for Java 7 is deprecated as of Spark 2.0.0
28 | 18/06/07 11:00:59 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
29 | 18/06/07 11:00:59 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
30 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
31 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
32 | 18/06/07 11:01:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
33 | Spark context Web UI available at http://10.201.168.144:4040
34 | Spark context available as 'sc' (master = yarn, app id = application_1522381253211_0116).
35 | Spark session available as 'spark'.
36 | Welcome to
37 | ____ __
38 | / __/__ ___ _____/ /__
39 | _\ \/ _ \/ _ `/ __/ '_/
40 | /___/ .__/\_,_/_/ /_/\_\ version 2.1.2
41 | /_/
42 |
43 | Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_67)
44 | Type in expressions to have them evaluated.
45 | Type :help for more information.
46 |
47 | scala> sc.setLogLevel("info"); import org.apache.spark.sql.catalyst.optimizer.Authorizer; spark.experimental.extraOptimizations ++= Seq(Authorizer)
48 | 18/06/07 11:10:50 INFO SharedState: Warehouse path is '/user/hzyaoqin/warehouse'.
49 | 18/06/07 11:10:50 INFO HiveUtils: Initializing HiveMetastoreConnection version 1.2.1 using Spark classes.
50 | 18/06/07 11:10:51 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
51 | 18/06/07 11:10:51 INFO metastore: Trying to connect to metastore with URI thrift://hzadg-hadoop-dev2.server.163.org:9083
52 | 18/06/07 11:10:51 INFO metastore: Connected to metastore.
53 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/d0216e65-2506-4d7c-8a7d-13f19fba55c0_resources
54 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0
55 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/hadoop/d0216e65-2506-4d7c-8a7d-13f19fba55c0
56 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0/_tmp_space.db
57 | 18/06/07 11:10:51 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.1) is /user/hzyaoqin/warehouse
58 | import org.apache.spark.sql.catalyst.optimizer.Authorizer
59 |
60 | scala>
61 | ```
62 | ## RunnableCommand
63 |
64 | ### AlterDatabasePropertiesCommand
65 |
66 | ```sql
67 | ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...)
68 | ```
69 |
70 | ```sql
71 | sql("alter database default set dbproperties ('abc'='xyz')").show
72 | ```
73 |
74 | ### AlterTableAddColumnsCommand
75 |
76 | ```sql
77 | ALTER TABLE table_identifier ADD COLUMNS (col_name data_type [COMMENT col_comment], ...);
78 | ```
79 |
80 | ### AlterTableChangeColumnCommand
81 |
82 | ```sql
83 | ALTER TABLE table_identifier
84 | CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
85 | [FIRST | AFTER column_name];
86 | ```
87 |
88 | ### AlterTableDropPartitionCommand
89 |
90 | ```sql
91 | ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE];
92 | ```
93 |
94 | ```sql
95 |
96 | ```
97 |
98 | ### AlterTableRecoverPartitionsCommand
99 |
100 | ```sql
101 | ALTER TABLE table RECOVER PARTITIONS;
102 | MSCK REPAIR TABLE table;
103 | ```
104 |
105 | ### AlterTableRenamePartitionCommand
106 |
107 | ```sql
108 | ALTER TABLE table PARTITION spec1 RENAME TO PARTITION spec2;
109 | ```
110 |
111 |
112 | ### AlterTableRenameCommand
113 |
114 | ```sql
115 | ALTER TABLE table1 RENAME TO table2;
116 | ALTER VIEW view1 RENAME TO view2;
117 |
118 | ```
119 |
120 | ```sql
121 | sql("alter table src10 rename to src11").show
122 | ```
123 |
124 | ### AlterTableSetPropertiesCommand
125 |
126 | ```sql
127 | ALTER TABLE table1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...);
128 | ALTER VIEW view1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...);
129 | ```
130 |
131 | ### AlterTableUnsetPropertiesCommand
132 |
133 | ```sql
134 | ALTER TABLE table1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...);
135 | ALTER VIEW view1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...);
136 | ```
137 |
138 | ### AlterTableSerDePropertiesCommand
139 |
140 | ```sql
141 | ALTER TABLE table [PARTITION spec] SET SERDE serde_name [WITH SERDEPROPERTIES props];
142 | ALTER TABLE table [PARTITION spec] SET SERDEPROPERTIES serde_properties;
143 | ```
144 |
145 | ### AlterTableSetLocationCommand
146 | ```sql
147 | ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "loc";
148 | ```
149 |
150 | ### AlterViewAsCommand
151 |
152 | ```sql
153 | ALTER VIEW test_view AS SELECT 3 AS i, 4 AS j
154 | ```
155 |
156 | Treat select clause as QUERY Hive Operation
157 |
158 | ### AnalyzeColumnCommand
159 |
160 | ```sql
161 | ANALYZE TABLE tableName COMPUTE STATISTICS FOR COLUMNS some_random_column
162 | ```
163 |
164 | ### AnalyzeTableCommand
165 |
166 | ```sql
167 | analyze table tableName compute statistics noscan;
168 | ```
169 |
170 | ### AnalyzePartitionCommand
171 |
172 | ```sql
173 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS;
174 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS;
175 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS;
176 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan;
177 | ANALYZE TABLE t PARTITION(ds, hr=11) COMPUTE STATISTICS noscan;
178 | ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS;
179 | ```
180 |
181 | ### CreateDatabaseCommand
182 |
183 | ```sql
184 | CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
185 | [COMMENT database_comment]
186 | [LOCATION database_directory]
187 | [WITH DBPROPERTIES (property_name=property_value, ...)];
188 | ```
189 |
190 | ### CreateDataSourceTableAsSelectCommand
191 |
192 | ```sql
193 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
194 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
195 | AS SELECT ...
196 | ```
197 |
198 | ### CreateHiveTableAsSelectCommand
199 |
200 | ```sql
201 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
202 | STORED AS format AS SELECT ...
203 | ```
204 |
205 | ### CreateFunctionCommand
206 |
207 | ```sql
208 | CREATE TEMPORARY FUNCTION functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']]
209 |
210 | CREATE FUNCTION [databaseName.]functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']]
211 | ```
212 |
213 | ### CreateTableCommand
214 |
215 | ```sql
216 |
217 | CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
218 | [(col1 data_type [COMMENT col_comment], ...)]
219 | [COMMENT table_comment]
220 | [PARTITIONED BY (col3 data_type [COMMENT col_comment], ...)]
221 | [CLUSTERED BY (col1, ...) [SORTED BY (col1 [ASC|DESC], ...)] INTO num_buckets BUCKETS]
222 | [SKEWED BY (col1, col2, ...) ON ((col_value, col_value, ...), ...)
223 | [STORED AS DIRECTORIES]
224 | [ROW FORMAT row_format]
225 | [STORED AS file_format | STORED BY storage_handler_class [WITH SERDEPROPERTIES (...)]]
226 | [LOCATION path]
227 | [TBLPROPERTIES (property_name=property_value, ...)]
228 | [AS select_statement];
229 |
230 | ```
231 |
232 | ### CreateDataSourceTableCommand
233 |
234 | ```sql
235 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
236 | [(col1 data_type [COMMENT col_comment], ...)]
237 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
238 | ```
239 |
240 | ### CreateTableLikeCommand
241 |
242 | ```sql
243 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
244 | LIKE [other_db_name.]existing_table_name
245 | ```
246 |
247 | ### CreateViewCommand
248 |
249 | ```sql
250 | CREATE view t1 partitioned
251 | ON (ds) AS
252 | SELECT *
253 | FROM (
254 | SELECT KEY,
255 | value,
256 | ds
257 | FROM t1_new
258 | UNION ALL
259 | SELECT KEY,
260 | value,
261 | t1_old.ds
262 | FROM t1_old
263 | JOIN t1_mapping
264 | ON t1_old.keymap = t1_mapping.keymap
265 | AND t1_old.ds = t1_mapping.ds ) subq;
266 | ```
267 |
268 | ### CacheTableCommand
269 |
270 | ```sql
271 | CACHE TABLE testCacheTable AS SELECT * FROM src;
272 | ```
273 |
274 | ### CreateTempViewUsing
275 |
276 | ### DescribeColumnCommand
277 |
278 | ```sql
279 | DESCRIBE [EXTENDED|FORMATTED] table_name column_name;
280 | ```
281 |
282 | ### DescribeDatabaseCommand
283 |
284 | ```sql
285 | DESCRIBE DATABASE [EXTENDED] db_name
286 | ```
287 |
288 | ### DescribeFunctionCommand
289 |
290 | ```sql
291 | DESCRIBE FUNCTION [EXTENDED] upper;
292 | ```
293 |
294 | ### DescribeTableCommand
295 |
296 | ```sql
297 | DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec?;
298 | ```
299 |
300 | ### DropDatabaseCommand
301 |
302 | ```sql
303 | DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];
304 | ```
305 |
306 |
307 | ### DropFunctionCommand
308 |
309 | ```sql
310 | DROP TEMPORARY FUNCTION helloworld;
311 | DROP TEMPORARY FUNCTION IF EXISTS helloworld;
312 | DROP FUNCTION hello.world;
313 | DROP FUNCTION IF EXISTS hello.world;
314 | ```
315 |
316 |
317 | ### DropTableCommand
318 |
319 | ```sql
320 | DROP TABLE [IF EXISTS] table_name;
321 | DROP VIEW [IF EXISTS] [db_name.]view_name;
322 | ```
323 |
324 | ### ExplainCommand
325 |
326 | ```sql
327 | EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
328 | ```
329 |
330 | ### InsertIntoDataSourceCommand
331 |
332 | ### InsertIntoDataSourceDirCommand
333 |
334 | ```sql
335 | INSERT OVERWRITE DIRECTORY (path=STRING)?
336 | USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
337 | SELECT ...
338 | ```
339 |
340 | ### InsertIntoHadoopFsRelationCommand
341 |
342 |
343 | ### InsertIntoHiveDirCommand
344 | ```sql
345 | INSERT OVERWRITE [LOCAL] DIRECTORY
346 | path
347 | [ROW FORMAT row_format]
348 | [STORED AS file_format]
349 | SELECT ...
350 | ```
351 |
352 | ### LoadDataCommand
353 | ```sql
354 | LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename
355 | [PARTITION (partcol1=val1, partcol2=val2 ...)]
356 | ```
357 |
358 |
359 | ### SaveIntoDataSourceCommand
360 |
361 | ### SetCommand
362 | ```sql
363 | set key = value;
364 | set -v;
365 | set;
366 | ```
367 |
368 | ### SetDatabaseCommand
369 |
370 | ```sql
371 | USE database_name;
372 | ```
373 |
374 | ### ShowCreateTableCommand
375 |
376 | ```sql
377 | SHOW CREATE TABLE t;
378 | ```
379 |
380 | ### ShowColumnsCommand
381 | ```sql
382 | SHOW COLUMNS (FROM | IN) table_identifier [(FROM | IN) database];
383 | ```
384 |
385 | ### ShowDatabasesCommand
386 | ```sql
387 | SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
388 | ```
389 |
390 | ### ShowFunctionsCommand
391 | ```sql
392 | SHOW FUNCTIONS [LIKE pattern]
393 | ```
394 |
395 | ### ShowPartitionsCommand
396 | ```sql
397 | SHOW PARTITIONS [db_name.]table_name [PARTITION(partition_spec)];
398 | ```
399 |
400 | ### ShowTablesCommand
401 | ```sql
402 | SHOW TABLES [(IN|FROM) database_name] [[LIKE] 'identifier_with_wildcards'];
403 | ```
404 |
405 | ### ShowTablePropertiesCommand
406 | ```sql
407 | SHOW TBLPROPERTIES table_name[('propertyKey')];
408 | ```
409 |
410 |
411 | ### TruncateTableCommand
412 |
413 | ```sql
414 | TRUNCATE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
415 | ```
416 |
417 |
418 | ### UncacheTableCommand
419 |
420 | ```sql
421 | uncache table t;
422 | ```
423 |
424 | ### Ignored
425 |
426 | ```
427 | // AddFileCommand
428 | // AddJarCommand
429 | // ...
430 |
431 | ```
432 |
433 | ## Projection
434 |
435 | ## LeafNode
436 |
437 | ## UnaryNode
438 |
439 | ## BinaryNode
440 |
441 | ## Union
442 |
443 |
--------------------------------------------------------------------------------
/docs/ranger_authorization.md:
--------------------------------------------------------------------------------
1 | # Spark SQL Ranger Security Support Guide
2 |
3 | Ranger security support is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
4 |
5 | Ranger is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform. The [[spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore
6 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together.
7 |
8 | ## Ranger Security Support
9 |
10 | |Configuration| Configuration File|Example| Descriptions |
11 | |---|---|---|---|
12 | |ranger.plugin.hive.policy.rest.url|ranger-hive-security.xml| http://ranger.admin.one:6080,http://ranger.admin.two.lt.163.org:6080| Comma separated list of ranger admin address|
13 | |ranger.plugin.hive.service.name|ranger-hive-security.xml||Name of the Ranger service containing policies for this YARN instance|
14 | |ranger.plugin.hive.policy.cache.dir|ranger-hive-security.xml|policycache| local cache directory for ranger policy caches|
15 |
16 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set.
17 |
18 | ## Configure Hive Metastore Client Side
19 | ```xml
20 |
21 |
22 | hive.security.authorization.enabled
23 | true
24 |
25 |
26 | hive.security.authorization.manager
27 | org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory
28 |
29 |
30 | hive.security.authenticator.manager
31 | org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator
32 |
33 |
34 | hive.conf.restricted.list
35 |
36 | hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager
37 |
38 |
39 | ```
40 |
41 | Add configurations above in `$SPARK_HOME/conf/hive-site.xml` to enable Ranger security support.
42 |
43 | ## Ranger Audit Support
44 |
45 | All access to Spark SQL/Hive tables that is authorized by Ranger is automatically audited by Ranger. Auditing destination of HDFS or Solr etc is supported.
46 |
47 | |Configuration| Configuration File|Example| Descriptions |
48 | |---|---|---|---|
49 | |xasecure.audit.is.enabled|ranger-hive-audit.xml|false|When true, auditing is enabled|
50 | |xasecure.audit.jpa.javax.persistence.jdbc.driver|ranger-hive-audit.xml|com.mysql.jdbc.Driver|jdbc driver for audit to a mysql database destination|
51 | |xasecure.audit.jpa.javax.persistence.jdbc.url|ranger-hive-audit.xml| jdbc:mysql://address/dbname|database instance auditing to|
52 | |xasecure.audit.jpa.javax.persistence.jdbc.user|ranger-hive-audit.xml|*username*|user name|
53 | |xasecure.audit.jpa.javax.persistence.jdbc.password|ranger-hive-audit.xml|*Password*| Password|
54 |
55 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set to enable or disable auditing.
56 |
57 |
58 | ## Install `ranger-hive-plugin` for Spark SQL
59 |
60 | Please refer to the [Install and Enable Ranger Hive Plugin](https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.4/bk_command-line-installation/content/ch14s05s03s02.html) for an overview on how to setup Ranger jars for Spark SQL.
61 |
62 |
--------------------------------------------------------------------------------
/docs/spark_sql_authorization.md:
--------------------------------------------------------------------------------
1 | # ACL Management for Spark SQL
2 |
3 | Three primary modes for Spark SQL authorization are available by spark-authorizer:
4 |
5 | ## Storage-Based Authorization
6 |
7 | Enabling Storage Based Authorization in the `Hive Metastore Server` uses the HDFS permissions to act as the main source for verification and allows for consistent data and metadata authorization policy. This allows control over metadata access by verifying if the user has permission to access corresponding directories on the HDFS. Similar with `HiveServer2`, files and directories will be tanslated into hive metadata objects, such as dbs, tables, partitions, and be protected from end user's queries through Spark SQL as a service like [Kyuubi](https://github.com/yaooqinn/kyuubi), livy etc.
8 |
9 | Storage-Based Authorization offers users with Database, Table and Partition-level coarse-gained access control.
10 |
11 | Please refer to the [Storage-Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/storage_based_authorization.html) in the online documentation for an overview on how to configure Storage-Based Authorization for Spark SQL.
12 |
13 | ## SQL-Standard Based Authorization
14 |
15 | Enabling SQL-Standard Based Authorization gives users more fine-gained control over access comparing with Storage Based Authorization. Besides of the ability of Storage Based Authorization, SQL-Standard Based Authorization can improve it to Views and Column-level. Unfortunately, Spark SQL does not support grant/revoke statements which controls access, this might be done only through the HiveServer2. But it's gratifying that [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) makes Spark SQL be able to understand this fine-grain access control granted or revoked by Hive.
16 |
17 | For Spark SQL Client users who can directly acess HDFS, the SQL-Standard Based Authorization can be easily bypassed.
18 |
19 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage information is preserved from end users.
20 |
21 | Please refer to the [SQL-Standard Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/sql_std_based_authorization.html) in the online documentation for an overview on how to configure SQL-Standard Based Authorization for Spark SQL.
22 |
23 | ## Ranger Security Support
24 |
25 | [Apache Ranger](https://ranger.apache.org/) is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform but end before Spark or Spark SQL. The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore
26 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Apache Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together.
27 |
28 | Please refer to the [Spark SQL Ranger Security Support Guide](https://yaooqinn.github.io/spark-authorizer/docs/ranger_authorization.html) in the online documentation for an overview on how to configure Ranger for Spark SQL.
29 |
--------------------------------------------------------------------------------
/docs/sql_std_based_authorization.md:
--------------------------------------------------------------------------------
1 | # Spark SQL SQL-Standard Based Authorization Guide
2 |
3 | [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
4 |
5 | The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) can enable Spark SQL with Hive's [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) for fine grained access control. It is based on the SQL standard for authorization, and uses the familiar GRANT/REVOKE statements to control access.
6 |
7 | For Spark SQL Client users who can directly access HDFS, the SQL-Standards Based Authorization can be easily bypassed.
8 |
9 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage informations are preserved from end users. It can be used along with storage based authorization on the `Hive Metastore Server`.
10 |
11 | ## Minimum Permissions
12 |
13 | The following table shows the minimum permissions required for Spark SQL when using SQL-Standard Based Authorization:
14 |
15 | Operation | Permission required
16 | ---|---
17 | create table | ownership of database
18 | drop table | ownership
19 | describe table | select
20 | show partitions | select
21 | alter table location | ownership; URI privilege: RWX permission + ownership (for new location)
22 | alter partition location | ownership; URI privilege: RWX permission + ownership (for new partition location)
23 | alter table add partition | insert; URI privilege: RWX permission + ownership (for partition location)
24 | alter table drop partition | delete
25 | alter table (all of them except the ones listed above) | ownership
26 | truncate table | ownership
27 | create view | select "with grant option"
28 | alter view properties | ownership
29 | alter view rename | ownership
30 | drop view | ownership
31 | analyze Table | select and insert
32 | show columns | select
33 | show table properties | Select
34 | CTAS | select (of input) and ownership (of database)
35 | select | select
36 | insert | insert and delete (for overwrite)
37 | delete | delete
38 | load | insert (output); delete (output); URI privilege: RWX permission + ownership (input location)
39 | show create table | select "with grant option"
40 | create function | admin
41 | drop function | admin
42 | MSCK |admin
43 | alter database| admin
44 | create database | URI privilege: RWX permission + ownership (if custom location specified)
45 | explain | select
46 | drop database | ownership
47 |
48 |
49 | ## Privileges
50 | Select - READ access to an object
51 | Insert - access to ADD data to an object (table)
52 | Update - access to run UPDATE queries on an object (table)
53 | Delete - access to DELETE data in an object (table)
54 | All Privileges - includes all above privileges
55 |
56 |
57 | ## Limitations
58 |
59 | Spark SQL does not support grant/revoke statement, which might be done only in Hive.
60 |
--------------------------------------------------------------------------------
/docs/storage_based_authorization.md:
--------------------------------------------------------------------------------
1 | # Spark SQL Storage-Based Authorization Guide
2 |
3 | Storage-Based Authorization is one of the available Authorization methods for Spark SQL with or without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
4 |
5 | When the `Hive Metastore Server` is configured to use Storage-Based Authorization, it uses the file system permissions for directories corresponding to the different kinds of metadata objects as the source of verification for the authorization policy. Using this authorization method is recommended in the metastore server.
6 |
7 | With Hive Metastore as the external catalog for Spark SQL, there is a corresponding directory to a database or table for each file system that is used at storage layer. Using this authorization model, the rwx permissions for this directory also determines the permissions of a user, or group, to the database or table.
8 |
9 | With Hive 0.14 or onwards as Spark SQL's metastore client, this could be enabled without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer)'s support.
10 |
11 |
12 | ## Configuring Parameters for Storage-Based Authorization
13 |
14 | ### Hive Metastore Server Side
15 |
16 | To enable Storage-based Authorization in the Hive metastore server, configure these properties in the `hive-site.xml` for the server.
17 |
18 | Configuration| Description
19 | ---|---
20 | `hive.metastore.pre.event.listeners` | This parameter enables metastore security. Set to `org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener`
21 | `hive.security.metastore.authenticator.manager` | Set to `org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator`
22 | `hive.security.metastore.authorization.auth.reads` | When true, Hive metastore authorization checks for read access.
23 | `hive.security.metastore.authorization.manager` | A comma-separated list of the names of authorization manager classes that are to be used in the metastore for authorization. Set to `org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider`
24 |
25 | ### Hive Metastore Client Side
26 |
27 | This could be Spark SQL Client, Spark Thrift Server, HiveServer2, Kyuubi etc. Configure these properties in the `hive-site.xml` for the client.
28 |
29 | Configuration| Description
30 | ---|---
31 | `hive.security.authorization.enabled` | Enables or disables authorization. In the Advanced hiveserver-site section, change the value to true to enable authorization for HiveServer2. In the General section, set this value to false.
32 | `hive.security.authorization.manager` | The class name of the Hive client authorization manager. For storage-based authorization, specify the value `org.apache.hadoop.hive.ql.security. authorization.StorageBasedAuthorizationProvider`
33 |
34 | ## Minimum Permissions
35 |
36 | The following table shows the minimum permissions required for Spark SQL when using Storage-Based Authorization:
37 |
38 | Operation | Permission required
39 | ---|---
40 | alter table | table write access
41 | create table | database write access
42 | CTAS | table read access
43 | load | table write access
44 | select | table read access
45 | show tables | database read access
46 |
47 |
48 | ## Limitations
49 |
50 | Spark SQL does not support grant/revoke statement, which might be done only in Hive.
51 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 |
22 | 4.0.0
23 |
24 | yaooqinn
25 | spark-authorizer
26 | Spark Authorizer
27 | 2.1.1
28 | jar
29 |
30 |
31 |
32 | The Apache Software License, Version 2.0
33 | http://www.apache.org/licenses/LICENSE-2.0.txt
34 | manual
35 |
36 |
37 |
38 |
39 |
40 | yaooqinn
41 | Kent Yao
42 | hzyaoqin@corp.netease.com
43 | yaooqinn
44 | http://www.jianshu.com/u/14effefbbf32
45 |
46 |
47 |
48 |
49 | UTF-8
50 | 2.11.8
51 | 2.11
52 | 2.2.6
53 | org.apache.spark
54 | 2.2.2
55 | provided
56 | org.spark-project.hive
57 | 1.2.1.spark2
58 | 1.7
59 |
60 |
61 |
62 |
63 | central
64 |
65 | Maven Repository
66 | https://repo.maven.apache.org/maven2
67 |
68 | true
69 |
70 |
71 | false
72 |
73 |
74 |
75 | apache
76 | Apache Repository Snapshots
77 | http://repository.apache.org/snapshots
78 |
79 | false
80 |
81 |
82 | true
83 | daily
84 | warn
85 |
86 |
87 |
88 |
89 |
90 | central
91 | https://repo.maven.apache.org/maven2
92 |
93 | true
94 |
95 |
96 | false
97 |
98 |
99 |
100 | apache
101 | Apache Repository Snapshots
102 | http://repository.apache.org/snapshots
103 |
104 | false
105 |
106 |
107 | true
108 | daily
109 | warn
110 |
111 |
112 |
113 |
114 |
115 |
116 | org.scala-lang
117 | scala-library
118 | ${scala.version}
119 | provided
120 |
121 |
122 |
123 | ${spark.group}
124 | spark-sql_${scala.binary.version}
125 | ${spark.version}
126 | ${spark.scope}
127 |
128 |
129 |
130 | ${spark.group}
131 | spark-hive_${scala.binary.version}
132 | ${spark.version}
133 | ${spark.scope}
134 |
135 |
136 | *
137 | *
138 |
139 |
140 |
141 |
142 | ${hive.group}
143 | hive-exec
144 | ${hive.version}
145 | provided
146 |
147 |
148 |
149 |
150 | org.scalatest
151 | scalatest_${scala.binary.version}
152 | ${scalatest.version}
153 | test
154 |
155 |
156 |
157 | ${spark.group}
158 | spark-core_${scala.binary.version}
159 | ${spark.version}
160 | test-jar
161 | test
162 |
163 |
164 |
165 | ${spark.group}
166 | spark-catalyst_${scala.binary.version}
167 | ${spark.version}
168 | test-jar
169 | test
170 |
171 |
172 | ${spark.group}
173 | spark-sql_${scala.binary.version}
174 | ${spark.version}
175 | test-jar
176 | test
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 | net.alchim31.maven
185 | scala-maven-plugin
186 | 3.2.2
187 |
188 |
189 | eclipse-add-source
190 |
191 | add-source
192 |
193 |
194 |
195 | scala-compile-first
196 |
197 | compile
198 |
199 |
200 |
201 | scala-test-compile-first
202 |
203 | testCompile
204 |
205 |
206 |
207 |
208 | ${scala.version}
209 | incremental
210 | true
211 |
212 | -unchecked
213 | -deprecation
214 | -feature
215 | -explaintypes
216 | -Yno-adapted-args
217 |
218 |
219 | -Xms1024m
220 | -Xmx1024m
221 | -XX:ReservedCodeCacheSize=512M
222 |
223 |
224 | -source
225 | ${java.version}
226 | -target
227 | ${java.version}
228 | -Xlint:all,-serial,-path,-try
229 |
230 |
231 |
232 |
233 |
234 |
235 | org.apache.maven.plugins
236 | maven-surefire-plugin
237 | 2.12.4
238 |
239 | true
240 |
241 |
242 |
243 |
244 | org.scalatest
245 | scalatest-maven-plugin
246 | 1.0
247 |
248 | ${project.build.directory}/surefire-reports
249 | .
250 | TestSuite.txt
251 |
252 |
253 |
254 | test
255 |
256 | test
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 | spark-2.2
267 |
268 | 2.2.2
269 | 2.2.6
270 |
271 |
272 |
273 |
274 | spark-2.3
275 |
276 | 2.3.2
277 | 3.0.3
278 |
279 |
280 |
281 |
--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
1 |
17 |
39 |
40 |
41 | Scalastyle standard configuration
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 | true
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 | ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW
126 |
127 |
128 |
129 |
130 |
131 | ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 | ^FunSuite[A-Za-z]*$
141 | Tests must extend org.apache.spark.SparkFunSuite instead.
142 |
143 |
144 |
145 |
146 | ^println$
147 |
151 |
152 |
153 |
154 | @VisibleForTesting
155 |
158 |
159 |
160 |
161 | Runtime\.getRuntime\.addShutdownHook
162 |
170 |
171 |
172 |
173 | mutable\.SynchronizedBuffer
174 |
182 |
183 |
184 |
185 | Class\.forName
186 |
193 |
194 |
195 |
196 | Await\.result
197 |
204 |
205 |
206 |
207 | Await\.ready
208 |
215 |
216 |
217 |
218 |
219 | JavaConversions
220 | Instead of importing implicits in scala.collection.JavaConversions._, import
221 | scala.collection.JavaConverters._ and use .asScala / .asJava methods
222 |
223 |
224 |
225 | org\.apache\.commons\.lang\.
226 | Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
227 | of Commons Lang 2 (package org.apache.commons.lang.*)
228 |
229 |
230 |
231 | extractOpt
232 | Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
233 | is slower.
234 |
235 |
236 |
237 |
238 | java,scala,3rdParty,spark
239 | javax?\..*
240 | scala\..*
241 | (?!org\.apache\.spark\.).*
242 | org\.apache\.spark\..*
243 |
244 |
245 |
246 |
247 |
248 | COMMA
249 |
250 |
251 |
252 |
253 |
254 | \)\{
255 |
258 |
259 |
260 |
261 | (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]
262 | Use Javadoc style indentation for multiline comments
263 |
264 |
265 |
266 | case[^\n>]*=>\s*\{
267 | Omit braces in case clauses.
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 | 800>
321 |
322 |
323 |
324 |
325 | 30
326 |
327 |
328 |
329 |
330 | 10
331 |
332 |
333 |
334 |
335 | 50
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 | -1,0,1,2,3
347 |
348 |
349 |
350 |
--------------------------------------------------------------------------------
/src/main/scala/com/githup/yaooqinn/spark/authorizer/Logging.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package com.githup.yaooqinn.spark.authorizer
19 |
20 | import org.slf4j.LoggerFactory
21 |
22 | trait Logging {
23 |
24 | lazy val logger = LoggerFactory.getLogger(this.getClass)
25 |
26 | def trace(message: => Any): Unit = {
27 | if (logger.isTraceEnabled) {
28 | logger.trace(message.toString)
29 | }
30 | }
31 |
32 | def debug(message: => Any): Unit = {
33 | if (logger.isDebugEnabled) {
34 | logger.debug(message.toString)
35 | }
36 | }
37 |
38 | def info(message: => Any): Unit = {
39 | if (logger.isInfoEnabled) {
40 | logger.info(message.toString)
41 | }
42 | }
43 |
44 | def warn(message: => Any): Unit = {
45 | logger.warn(message.toString)
46 | }
47 |
48 | def warn(message: => Any, t: Throwable): Unit = {
49 | logger.warn(message.toString, t)
50 | }
51 |
52 | def error(message: => Any, t: Throwable): Unit = {
53 | logger.error(message.toString, t)
54 | }
55 |
56 | def error(message: => Any): Unit = {
57 | logger.error(message.toString)
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkSQLExtension.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import org.apache.spark.sql.SparkSessionExtensions
21 | import org.apache.spark.sql.catalyst.optimizer.AuthorizerExtension
22 |
23 | class RangerSparkSQLExtension extends Extensions {
24 | override def apply(ext: SparkSessionExtensions): Unit = {
25 | ext.injectOptimizerRule(AuthorizerExtension)
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark
19 |
20 | import org.apache.spark.sql.SparkSessionExtensions
21 |
22 | package object authorizer {
23 |
24 | type Extensions = SparkSessionExtensions => Unit
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizable.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import java.io.File
21 |
22 | import com.githup.yaooqinn.spark.authorizer.Logging
23 | import org.apache.hadoop.hive.ql.plan.HiveOperation
24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveAuthzContext, HiveOperationType}
25 |
26 | import org.apache.spark.sql.SparkSession
27 | import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
28 | import org.apache.spark.sql.catalyst.rules.Rule
29 | import org.apache.spark.sql.execution.command._
30 | import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand}
31 | import org.apache.spark.sql.hive.{HiveExternalCatalog, PrivilegesBuilder}
32 | import org.apache.spark.sql.hive.client.AuthzImpl
33 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
34 | import org.apache.spark.util.Utils
35 |
36 | trait Authorizable extends Rule[LogicalPlan] with Logging {
37 |
38 | def spark: SparkSession
39 |
40 | /**
41 | * Visit the [[LogicalPlan]] recursively to get all hive privilege objects, check the privileges
42 | * using Hive Authorizer V2 which provide sql based authorization and can implements
43 | * ranger-hive-plugins.
44 | * If the user is authorized, then the original plan will be returned; otherwise, interrupted by
45 | * some particular privilege exceptions.
46 | * @param plan a spark LogicalPlan for verifying privileges
47 | * @return a plan itself which has gone through the privilege check.
48 | */
49 | override def apply(plan: LogicalPlan): LogicalPlan = {
50 | val operationType: HiveOperationType = getOperationType(plan)
51 | val authzContext = new HiveAuthzContext.Builder().build()
52 | val (in, out) = PrivilegesBuilder.build(plan)
53 | spark.sharedState.externalCatalog match {
54 | case _: HiveExternalCatalog =>
55 | AuthzImpl.checkPrivileges(spark, operationType, in, out, authzContext)
56 | case _ =>
57 | }
58 | // iff no exception.
59 | // We just return the original plan here, so this rule will be executed only once.
60 | plan
61 | }
62 |
63 | def policyCacheDir: Option[String] = {
64 | Option(spark.sparkContext.hadoopConfiguration.get("ranger.plugin.hive.policy.cache.dir"))
65 | }
66 |
67 |
68 | def createCacheDirIfNonExists(dir: String): Unit = {
69 | val file = new File(dir)
70 | if (!file.exists()) {
71 | if (file.mkdirs()) {
72 | info("Creating ranger policy cache directory at " + file.getAbsolutePath)
73 | file.deleteOnExit()
74 | } else {
75 | warn("Unable to create ranger policy cache directory at " + file.getAbsolutePath)
76 | }
77 | }
78 | }
79 |
80 | policyCacheDir match {
81 | case Some(dir) => createCacheDirIfNonExists(dir)
82 | case _ =>
83 | // load resources from ranger configuration files
84 | Option(Utils.getContextOrSparkClassLoader.getResource("ranger-hive-security.xml")) match {
85 | case Some(url) =>
86 | spark.sparkContext.hadoopConfiguration.addResource(url)
87 | policyCacheDir match {
88 | case Some(dir) => createCacheDirIfNonExists(dir)
89 | case _ =>
90 | }
91 | case _ =>
92 | }
93 | }
94 |
95 | /**
96 | * Mapping of [[LogicalPlan]] -> [[HiveOperation]]
97 | * @param plan a spark LogicalPlan
98 | * @return
99 | */
100 | def getHiveOperation(plan: LogicalPlan): HiveOperation = {
101 | plan match {
102 | case c: Command => c match {
103 | case _: AlterDatabasePropertiesCommand => HiveOperation.ALTERDATABASE
104 | case p if p.nodeName == "AlterTableAddColumnsCommand" => HiveOperation.ALTERTABLE_ADDCOLS
105 | case _: AlterTableAddPartitionCommand => HiveOperation.ALTERTABLE_ADDPARTS
106 | case p if p.nodeName == "AlterTableChangeColumnCommand" =>
107 | HiveOperation.ALTERTABLE_RENAMECOL
108 | case _: AlterTableDropPartitionCommand => HiveOperation.ALTERTABLE_DROPPARTS
109 | case _: AlterTableRecoverPartitionsCommand => HiveOperation.MSCK
110 | case _: AlterTableRenamePartitionCommand => HiveOperation.ALTERTABLE_RENAMEPART
111 | case a: AlterTableRenameCommand =>
112 | if (!a.isView) HiveOperation.ALTERTABLE_RENAME else HiveOperation.ALTERVIEW_RENAME
113 | case _: AlterTableSetPropertiesCommand
114 | | _: AlterTableUnsetPropertiesCommand => HiveOperation.ALTERTABLE_PROPERTIES
115 | case _: AlterTableSerDePropertiesCommand => HiveOperation.ALTERTABLE_SERDEPROPERTIES
116 | case _: AlterTableSetLocationCommand => HiveOperation.ALTERTABLE_LOCATION
117 | case _: AlterViewAsCommand => HiveOperation.QUERY
118 | // case _: AlterViewAsCommand => HiveOperation.ALTERVIEW_AS
119 |
120 | case _: AnalyzeColumnCommand => HiveOperation.QUERY
121 | // case _: AnalyzeTableCommand => HiveOperation.ANALYZE_TABLE
122 | // Hive treat AnalyzeTableCommand as QUERY, obey it.
123 | case _: AnalyzeTableCommand => HiveOperation.QUERY
124 | case p if p.nodeName == "AnalyzePartitionCommand" => HiveOperation.QUERY
125 |
126 | case _: CreateDatabaseCommand => HiveOperation.CREATEDATABASE
127 | case _: CreateDataSourceTableAsSelectCommand
128 | | _: CreateHiveTableAsSelectCommand => HiveOperation.CREATETABLE_AS_SELECT
129 | case _: CreateFunctionCommand => HiveOperation.CREATEFUNCTION
130 | case _: CreateTableCommand
131 | | _: CreateDataSourceTableCommand => HiveOperation.CREATETABLE
132 | case _: CreateTableLikeCommand => HiveOperation.CREATETABLE
133 | case _: CreateViewCommand
134 | | _: CacheTableCommand
135 | | _: CreateTempViewUsing => HiveOperation.CREATEVIEW
136 |
137 | case p if p.nodeName == "DescribeColumnCommand" => HiveOperation.DESCTABLE
138 | case _: DescribeDatabaseCommand => HiveOperation.DESCDATABASE
139 | case _: DescribeFunctionCommand => HiveOperation.DESCFUNCTION
140 | case _: DescribeTableCommand => HiveOperation.DESCTABLE
141 |
142 | case _: DropDatabaseCommand => HiveOperation.DROPDATABASE
143 | // Hive don't check privileges for `drop function command`, what about a unverified user
144 | // try to drop functions.
145 | // We treat permanent functions as tables for verifying.
146 | case d: DropFunctionCommand if !d.isTemp => HiveOperation.DROPTABLE
147 | case d: DropFunctionCommand if d.isTemp => HiveOperation.DROPFUNCTION
148 | case _: DropTableCommand => HiveOperation.DROPTABLE
149 |
150 | case e: ExplainCommand => getHiveOperation(e.logicalPlan)
151 |
152 | case _: InsertIntoDataSourceCommand => HiveOperation.QUERY
153 | case p if p.nodeName == "InsertIntoDataSourceDirCommand" => HiveOperation.QUERY
154 | case _: InsertIntoHadoopFsRelationCommand => HiveOperation.CREATETABLE_AS_SELECT
155 | case p if p.nodeName == "InsertIntoHiveDirCommand" => HiveOperation.QUERY
156 | case p if p.nodeName == "InsertIntoHiveTable" => HiveOperation.QUERY
157 |
158 | case _: LoadDataCommand => HiveOperation.LOAD
159 |
160 | case p if p.nodeName == "SaveIntoDataSourceCommand" => HiveOperation.QUERY
161 | case s: SetCommand if s.kv.isEmpty || s.kv.get._2.isEmpty => HiveOperation.SHOWCONF
162 | case _: SetDatabaseCommand => HiveOperation.SWITCHDATABASE
163 | case _: ShowCreateTableCommand => HiveOperation.SHOW_CREATETABLE
164 | case _: ShowColumnsCommand => HiveOperation.SHOWCOLUMNS
165 | case _: ShowDatabasesCommand => HiveOperation.SHOWDATABASES
166 | case _: ShowFunctionsCommand => HiveOperation.SHOWFUNCTIONS
167 | case _: ShowPartitionsCommand => HiveOperation.SHOWPARTITIONS
168 | case _: ShowTablesCommand => HiveOperation.SHOWTABLES
169 | case _: ShowTablePropertiesCommand => HiveOperation.SHOW_TBLPROPERTIES
170 | case s: StreamingExplainCommand =>
171 | getHiveOperation(s.queryExecution.optimizedPlan)
172 |
173 | case _: TruncateTableCommand => HiveOperation.TRUNCATETABLE
174 |
175 | case _: UncacheTableCommand => HiveOperation.DROPVIEW
176 |
177 | // Commands that do not need build privilege goes as explain type
178 | case _ =>
179 | // AddFileCommand
180 | // AddJarCommand
181 | // ...
182 | HiveOperation.EXPLAIN
183 | }
184 | case _ => HiveOperation.QUERY
185 | }
186 | }
187 |
188 | def getOperationType(logicalPlan: LogicalPlan): HiveOperationType = {
189 | HiveOperationType.valueOf(getHiveOperation(logicalPlan).name())
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
22 | import org.apache.spark.sql.catalyst.rules.Rule
23 |
24 | /**
25 | * An Optimizer Rule to do Hive Authorization V2 for Spark SQL.
26 | *
27 | * For Apache Spark 2.1.x
28 | */
29 | object Authorizer extends Rule[LogicalPlan] with Authorizable {
30 | override def spark: SparkSession = {
31 | SparkSession.getActiveSession.getOrElse(SparkSession.getDefaultSession.get)
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/AuthorizerExtension.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.plans.logical._
22 | import org.apache.spark.sql.catalyst.rules.Rule
23 |
24 | /**
25 | * An Optimizer Rule to do Hive Authorization V2 for Spark SQL.
26 | *
27 | * For Apache Spark 2.2.x and later
28 | */
29 | case class AuthorizerExtension(spark: SparkSession) extends Rule[LogicalPlan] with Authorizable
30 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/HivePrivilegeObject.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import java.util.{List => JList}
21 |
22 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HBO}
23 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType}
24 |
25 | /**
26 | * Helper class for initializing [[HBO]] with more Constructors.
27 | */
28 | private[sql] object HivePrivilegeObject {
29 | def apply(
30 | `type`: HivePrivilegeObjectType,
31 | dbname: String,
32 | objectName: String,
33 | partKeys: JList[String],
34 | columns: JList[String],
35 | actionType: HivePrivObjectActionType,
36 | commandParams: JList[String]): HBO = {
37 | new HBO(
38 | `type`, dbname, objectName, partKeys, columns, actionType, commandParams)
39 | }
40 |
41 | def apply(
42 | `type`: HivePrivilegeObjectType,
43 | dbname: String,
44 | objectName: String,
45 | partKeys: JList[String],
46 | columns: JList[String],
47 | commandParams: JList[String]): HBO = {
48 | apply(
49 | `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, commandParams)
50 | }
51 |
52 | def apply(
53 | `type`: HivePrivilegeObjectType,
54 | dbname: String,
55 | objectName: String,
56 | partKeys: JList[String],
57 | columns: JList[String]): HBO = {
58 | apply(
59 | `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, null)
60 | }
61 |
62 | def apply(
63 | `type`: HivePrivilegeObjectType,
64 | dbname: String,
65 | objectName: String,
66 | actionType: HivePrivObjectActionType): HBO = {
67 | apply(`type`, dbname, objectName, null, null, actionType, null)
68 | }
69 |
70 | def apply(
71 | `type`: HivePrivilegeObjectType,
72 | dbname: String,
73 | objectName: String): HBO = {
74 | apply(`type`, dbname, objectName, HivePrivObjectActionType.OTHER)
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/AuthzUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive
19 |
20 | import scala.util.{Failure, Success, Try}
21 |
22 | private[hive] object AuthzUtils {
23 |
24 | def getFieldVal(o: Any, name: String): Any = {
25 | Try {
26 | val field = o.getClass.getDeclaredField(name)
27 | field.setAccessible(true)
28 | field.get(o)
29 | } match {
30 | case Success(value) => value
31 | case Failure(exception) => throw exception
32 | }
33 | }
34 |
35 | def setFieldVal(o: Any, name: String, value: Any): Unit = {
36 | Try {
37 | val field = o.getClass.getDeclaredField(name)
38 | field.setAccessible(true)
39 | field.set(o, value.asInstanceOf[AnyRef])
40 | } match {
41 | case Failure(exception) => throw exception
42 | case _ =>
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/PrivilegesBuilder.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive
19 |
20 | import java.util.{ArrayList => JAList, List => JList}
21 |
22 | import scala.collection.JavaConverters._
23 |
24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HPO}
25 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType}
26 |
27 | import org.apache.spark.sql.SaveMode
28 | import org.apache.spark.sql.catalyst.TableIdentifier
29 | import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
30 | import org.apache.spark.sql.catalyst.catalog.CatalogTable
31 | import org.apache.spark.sql.catalyst.expressions.NamedExpression
32 | import org.apache.spark.sql.catalyst.optimizer.HivePrivilegeObject
33 | import org.apache.spark.sql.catalyst.plans.logical._
34 | import org.apache.spark.sql.execution.command._
35 | import org.apache.spark.sql.execution.datasources._
36 | import org.apache.spark.sql.hive.AuthzUtils._
37 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
38 | import org.apache.spark.sql.types.StructField
39 |
40 | /**
41 | * [[LogicalPlan]] -> list of [[HivePrivilegeObject]]s
42 | */
43 | private[sql] object PrivilegesBuilder {
44 |
45 | /**
46 | * Build input and output privilege objects from a Spark's [[LogicalPlan]]
47 | *
48 | * For [[ExplainCommand]]s, build its child.
49 | * For [[RunnableCommand]]s, build outputs if it has an target to write, build inputs for the
50 | * inside query if exists.
51 | *
52 | * For other queries, build inputs.
53 | *
54 | * @param plan A Spark [[LogicalPlan]]
55 | */
56 | def build(plan: LogicalPlan): (JList[HPO], JList[HPO]) = {
57 |
58 | def doBuild(plan: LogicalPlan): (JList[HPO], JList[HPO]) = {
59 | val inputObjs = new JAList[HPO]
60 | val outputObjs = new JAList[HPO]
61 | plan match {
62 | // RunnableCommand
63 | case cmd: Command => buildCommand(cmd, inputObjs, outputObjs)
64 | // Queries
65 | case _ => buildQuery(plan, inputObjs)
66 | }
67 | (inputObjs, outputObjs)
68 | }
69 |
70 | plan match {
71 | case e: ExplainCommand => doBuild(e.logicalPlan)
72 | case p => doBuild(p)
73 | }
74 | }
75 |
76 | /**
77 | * Build HivePrivilegeObjects from Spark LogicalPlan
78 | * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects
79 | * @param hivePrivilegeObjects input or output hive privilege object list
80 | * @param projectionList Projection list after pruning
81 | */
82 | private[this] def buildQuery(
83 | plan: LogicalPlan,
84 | hivePrivilegeObjects: JList[HPO],
85 | projectionList: Seq[NamedExpression] = Nil): Unit = {
86 |
87 | /**
88 | * Columns in Projection take priority for column level privilege checking
89 | * @param table catalogTable of a given relation
90 | */
91 | def mergeProjection(table: CatalogTable): Unit = {
92 | if (projectionList.isEmpty) {
93 | addTableOrViewLevelObjs(
94 | table.identifier,
95 | hivePrivilegeObjects,
96 | table.partitionColumnNames,
97 | table.schema.fieldNames)
98 | } else {
99 | addTableOrViewLevelObjs(
100 | table.identifier,
101 | hivePrivilegeObjects,
102 | table.partitionColumnNames.filter(projectionList.map(_.name).contains(_)),
103 | projectionList.map(_.name))
104 | }
105 | }
106 |
107 | plan match {
108 | case p: Project => buildQuery(p.child, hivePrivilegeObjects, p.projectList)
109 |
110 | case h if h.nodeName == "HiveTableRelation" =>
111 | mergeProjection(getFieldVal(h, "tableMeta").asInstanceOf[CatalogTable])
112 |
113 | case m if m.nodeName == "MetastoreRelation" =>
114 | mergeProjection(getFieldVal(m, "catalogTable").asInstanceOf[CatalogTable])
115 |
116 | case c if c.nodeName == "CatalogRelation" =>
117 | mergeProjection(getFieldVal(c, "tableMeta").asInstanceOf[CatalogTable])
118 |
119 | case l: LogicalRelation if l.catalogTable.nonEmpty => mergeProjection(l.catalogTable.get)
120 |
121 | case u: UnresolvedRelation =>
122 | // Normally, we shouldn't meet UnresolvedRelation here in an optimized plan.
123 | // Unfortunately, the real world is always a place where miracles happen.
124 | // We check the privileges directly without resolving the plan and leave everything
125 | // to spark to do.
126 | addTableOrViewLevelObjs(u.tableIdentifier, hivePrivilegeObjects)
127 |
128 | case p =>
129 | for (child <- p.children) {
130 | buildQuery(child, hivePrivilegeObjects, projectionList)
131 | }
132 | }
133 | }
134 |
135 | /**
136 | * Build HivePrivilegeObjects from Spark LogicalPlan
137 | * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects
138 | * @param inputObjs input hive privilege object list
139 | * @param outputObjs output hive privilege object list
140 | */
141 | private[this] def buildCommand(
142 | plan: LogicalPlan,
143 | inputObjs: JList[HPO],
144 | outputObjs: JList[HPO]): Unit = {
145 | plan match {
146 | case a: AlterDatabasePropertiesCommand => addDbLevelObjs(a.databaseName, outputObjs)
147 |
148 | case a if a.nodeName == "AlterTableAddColumnsCommand" =>
149 | addTableOrViewLevelObjs(
150 | getFieldVal(a, "table").asInstanceOf[TableIdentifier],
151 | inputObjs,
152 | columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name))
153 | addTableOrViewLevelObjs(
154 | getFieldVal(a, "table").asInstanceOf[TableIdentifier],
155 | outputObjs,
156 | columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name))
157 |
158 | case a: AlterTableAddPartitionCommand =>
159 | addTableOrViewLevelObjs(a.tableName, inputObjs)
160 | addTableOrViewLevelObjs(a.tableName, outputObjs)
161 |
162 | case a if a.nodeName == "AlterTableChangeColumnCommand" =>
163 | addTableOrViewLevelObjs(
164 | getFieldVal(a, "tableName").asInstanceOf[TableIdentifier],
165 | inputObjs,
166 | columns = Seq(getFieldVal(a, "columnName").asInstanceOf[String]))
167 |
168 | case a: AlterTableDropPartitionCommand =>
169 | addTableOrViewLevelObjs(a.tableName, inputObjs)
170 | addTableOrViewLevelObjs(a.tableName, outputObjs)
171 |
172 | case a: AlterTableRecoverPartitionsCommand =>
173 | addTableOrViewLevelObjs(a.tableName, inputObjs)
174 | addTableOrViewLevelObjs(a.tableName, outputObjs)
175 |
176 | case a: AlterTableRenameCommand if !a.isView || a.oldName.database.nonEmpty =>
177 | // rename tables / permanent views
178 | addTableOrViewLevelObjs(a.oldName, inputObjs)
179 | addTableOrViewLevelObjs(a.newName, outputObjs)
180 |
181 | case a: AlterTableRenamePartitionCommand =>
182 | addTableOrViewLevelObjs(a.tableName, inputObjs)
183 | addTableOrViewLevelObjs(a.tableName, outputObjs)
184 |
185 | case a: AlterTableSerDePropertiesCommand =>
186 | addTableOrViewLevelObjs(a.tableName, inputObjs)
187 | addTableOrViewLevelObjs(a.tableName, outputObjs)
188 |
189 | case a: AlterTableSetLocationCommand =>
190 | addTableOrViewLevelObjs(a.tableName, inputObjs)
191 | addTableOrViewLevelObjs(a.tableName, outputObjs)
192 |
193 | case a: AlterTableSetPropertiesCommand =>
194 | addTableOrViewLevelObjs(a.tableName, inputObjs)
195 | addTableOrViewLevelObjs(a.tableName, outputObjs)
196 |
197 | case a: AlterTableUnsetPropertiesCommand =>
198 | addTableOrViewLevelObjs(a.tableName, inputObjs)
199 | addTableOrViewLevelObjs(a.tableName, outputObjs)
200 |
201 | case a: AlterViewAsCommand =>
202 | if (a.name.database.nonEmpty) {
203 | // it's a permanent view
204 | addTableOrViewLevelObjs(a.name, outputObjs)
205 | }
206 | buildQuery(a.query, inputObjs)
207 |
208 | case a: AnalyzeColumnCommand =>
209 | addTableOrViewLevelObjs(
210 | a.tableIdent, inputObjs, columns = a.columnNames)
211 | addTableOrViewLevelObjs(
212 | a.tableIdent, outputObjs, columns = a.columnNames)
213 |
214 | case a if a.nodeName == "AnalyzePartitionCommand" =>
215 | addTableOrViewLevelObjs(
216 | getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], inputObjs)
217 | addTableOrViewLevelObjs(
218 | getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], outputObjs)
219 |
220 | case a: AnalyzeTableCommand =>
221 | addTableOrViewLevelObjs(a.tableIdent, inputObjs, columns = Seq("RAW__DATA__SIZE"))
222 | addTableOrViewLevelObjs(a.tableIdent, outputObjs)
223 |
224 | case c: CacheTableCommand => c.plan.foreach {
225 | buildQuery(_, inputObjs)
226 | }
227 |
228 | case c: CreateDatabaseCommand => addDbLevelObjs(c.databaseName, outputObjs)
229 |
230 | case c: CreateDataSourceTableAsSelectCommand =>
231 | addDbLevelObjs(c.table.identifier, outputObjs)
232 | addTableOrViewLevelObjs(c.table.identifier, outputObjs, mode = c.mode)
233 | buildQuery(c.query, inputObjs)
234 |
235 | case c: CreateDataSourceTableCommand =>
236 | addTableOrViewLevelObjs(c.table.identifier, outputObjs)
237 |
238 | case c: CreateFunctionCommand if !c.isTemp =>
239 | addDbLevelObjs(c.databaseName, outputObjs)
240 | addFunctionLevelObjs(c.databaseName, c.functionName, outputObjs)
241 |
242 | case c: CreateHiveTableAsSelectCommand =>
243 | addDbLevelObjs(c.tableDesc.identifier, outputObjs)
244 | addTableOrViewLevelObjs(c.tableDesc.identifier, outputObjs)
245 | buildQuery(c.query, inputObjs)
246 |
247 | case c: CreateTableCommand => addTableOrViewLevelObjs(c.table.identifier, outputObjs)
248 |
249 | case c: CreateTableLikeCommand =>
250 | addDbLevelObjs(c.targetTable, outputObjs)
251 | addTableOrViewLevelObjs(c.targetTable, outputObjs)
252 | // hive don't handle source table's privileges, we should not obey that, because
253 | // it will cause meta information leak
254 | addDbLevelObjs(c.sourceTable, inputObjs)
255 | addTableOrViewLevelObjs(c.sourceTable, inputObjs)
256 |
257 | case c: CreateViewCommand =>
258 | c.viewType match {
259 | case PersistedView =>
260 | // PersistedView will be tied to a database
261 | addDbLevelObjs(c.name, outputObjs)
262 | addTableOrViewLevelObjs(c.name, outputObjs)
263 | case _ =>
264 | }
265 | buildQuery(c.child, inputObjs)
266 |
267 | case d if d.nodeName == "DescribeColumnCommand" =>
268 | addTableOrViewLevelObjs(
269 | getFieldVal(d, "table").asInstanceOf[TableIdentifier],
270 | inputObjs,
271 | columns = getFieldVal(d, "colNameParts").asInstanceOf[Seq[String]])
272 |
273 | case d: DescribeDatabaseCommand =>
274 | addDbLevelObjs(d.databaseName, inputObjs)
275 |
276 | case d: DescribeFunctionCommand =>
277 | addFunctionLevelObjs(d.functionName.database, d.functionName.funcName, inputObjs)
278 |
279 | case d: DescribeTableCommand => addTableOrViewLevelObjs(d.table, inputObjs)
280 |
281 | case d: DropDatabaseCommand =>
282 | // outputObjs are enough for privilege check, adding inputObjs for consistency with hive
283 | // behaviour in case of some unexpected issues.
284 | addDbLevelObjs(d.databaseName, inputObjs)
285 | addDbLevelObjs(d.databaseName, outputObjs)
286 |
287 | case d: DropFunctionCommand =>
288 | addFunctionLevelObjs(d.databaseName, d.functionName, outputObjs)
289 |
290 | case d: DropTableCommand => addTableOrViewLevelObjs(d.tableName, outputObjs)
291 |
292 | case i: InsertIntoDataSourceCommand =>
293 | i.logicalRelation.catalogTable.foreach { table =>
294 | addTableOrViewLevelObjs(
295 | table.identifier,
296 | outputObjs)
297 | }
298 | buildQuery(i.query, inputObjs)
299 |
300 | case i if i.nodeName =="InsertIntoDataSourceDirCommand" =>
301 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
302 |
303 | case i: InsertIntoHadoopFsRelationCommand =>
304 | // we are able to get the override mode here, but ctas for hive table with text/orc
305 | // format and parquet with spark.sql.hive.convertMetastoreParquet=false can success
306 | // with privilege checking without claiming for UPDATE privilege of target table,
307 | // which seems to be same with Hive behaviour.
308 | // So, here we ignore the overwrite mode for such a consistency.
309 | i.catalogTable foreach { t =>
310 | addTableOrViewLevelObjs(
311 | t.identifier,
312 | outputObjs,
313 | i.partitionColumns.map(_.name),
314 | t.schema.fieldNames)
315 | }
316 | buildQuery(i.query, inputObjs)
317 |
318 | case i if i.nodeName == "InsertIntoHiveDirCommand" =>
319 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
320 |
321 | case i if i.nodeName == "InsertIntoHiveTable" =>
322 | addTableOrViewLevelObjs(
323 | getFieldVal(i, "table").asInstanceOf[CatalogTable].identifier, outputObjs)
324 | buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
325 |
326 | case l: LoadDataCommand => addTableOrViewLevelObjs(l.table, outputObjs)
327 |
328 | case s if s.nodeName == "SaveIntoDataSourceCommand" =>
329 | buildQuery(getFieldVal(s, "query").asInstanceOf[LogicalPlan], outputObjs)
330 |
331 | case s: SetDatabaseCommand => addDbLevelObjs(s.databaseName, inputObjs)
332 |
333 | case s: ShowColumnsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs)
334 |
335 | case s: ShowCreateTableCommand => addTableOrViewLevelObjs(s.table, inputObjs)
336 |
337 | case s: ShowFunctionsCommand => s.db.foreach(addDbLevelObjs(_, inputObjs))
338 |
339 | case s: ShowPartitionsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs)
340 |
341 | case s: ShowTablePropertiesCommand => addTableOrViewLevelObjs(s.table, inputObjs)
342 |
343 | case s: ShowTablesCommand => addDbLevelObjs(s.databaseName, inputObjs)
344 |
345 | case s: TruncateTableCommand => addTableOrViewLevelObjs(s.tableName, outputObjs)
346 |
347 | case _ =>
348 | // AddFileCommand
349 | // AddJarCommand
350 | // AnalyzeColumnCommand
351 | // ClearCacheCommand
352 | // CreateTempViewUsing
353 | // ListFilesCommand
354 | // ListJarsCommand
355 | // RefreshTable
356 | // RefreshTable
357 | // ResetCommand
358 | // SetCommand
359 | // ShowDatabasesCommand
360 | // StreamingExplainCommand
361 | // UncacheTableCommand
362 | }
363 | }
364 |
365 | /**
366 | * Add database level hive privilege objects to input or output list
367 | * @param dbName database name as hive privilege object
368 | * @param hivePrivilegeObjects input or output list
369 | */
370 | private[this] def addDbLevelObjs(
371 | dbName: String,
372 | hivePrivilegeObjects: JList[HPO]): Unit = {
373 | hivePrivilegeObjects.add(
374 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, dbName, dbName))
375 | }
376 |
377 | /**
378 | * Add database level hive privilege objects to input or output list
379 | * @param dbOption an option of database name as hive privilege object
380 | * @param hivePrivilegeObjects input or output hive privilege object list
381 | */
382 | private def addDbLevelObjs(
383 | dbOption: Option[String],
384 | hivePrivilegeObjects: JList[HPO]): Unit = {
385 | dbOption match {
386 | case Some(db) =>
387 | hivePrivilegeObjects.add(
388 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db))
389 | case _ =>
390 | }
391 | }
392 |
393 | /**
394 | * Add database level hive privilege objects to input or output list
395 | * @param tableIdentifier table identifier contains database name as hive privilege object
396 | * @param hivePrivilegeObjects input or output hive privilege object list
397 | */
398 | private def addDbLevelObjs(
399 | tableIdentifier: TableIdentifier,
400 | hivePrivilegeObjects: JList[HPO]): Unit = {
401 | tableIdentifier.database match {
402 | case Some(db) =>
403 | hivePrivilegeObjects.add(
404 | HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db))
405 | case _ =>
406 | }
407 | }
408 |
409 | /**
410 | * Add table level hive privilege objects to input or output list
411 | * @param tableIdentifier table identifier contains database name, and table name as hive
412 | * privilege object
413 | * @param hivePrivilegeObjects input or output list
414 | * @param mode Append or overwrite
415 | */
416 | private def addTableOrViewLevelObjs(
417 | tableIdentifier: TableIdentifier,
418 | hivePrivilegeObjects: JList[HPO],
419 | partKeys: Seq[String] = Nil,
420 | columns: Seq[String] = Nil,
421 | mode: SaveMode = SaveMode.ErrorIfExists,
422 | cmdParams: Seq[String] = Nil): Unit = {
423 | tableIdentifier.database match {
424 | case Some(db) =>
425 | val tbName = tableIdentifier.table
426 | val hivePrivObjectActionType = getHivePrivObjActionType(mode)
427 | hivePrivilegeObjects.add(
428 | HivePrivilegeObject(
429 | HivePrivilegeObjectType.TABLE_OR_VIEW,
430 | db,
431 | tbName,
432 | partKeys.asJava,
433 | columns.asJava,
434 | hivePrivObjectActionType,
435 | cmdParams.asJava))
436 | case _ =>
437 | }
438 | }
439 |
440 | /**
441 | * Add function level hive privilege objects to input or output list
442 | * @param databaseName database name
443 | * @param functionName function name as hive privilege object
444 | * @param hivePrivilegeObjects input or output list
445 | */
446 | private def addFunctionLevelObjs(
447 | databaseName: Option[String],
448 | functionName: String,
449 | hivePrivilegeObjects: JList[HPO]): Unit = {
450 | databaseName match {
451 | case Some(db) =>
452 | hivePrivilegeObjects.add(
453 | HivePrivilegeObject(HivePrivilegeObjectType.FUNCTION, db, functionName))
454 | case _ =>
455 | }
456 | }
457 |
458 | /**
459 | * HivePrivObjectActionType INSERT or INSERT_OVERWRITE
460 | *
461 | * @param mode Append or Overwrite
462 | * @return
463 | */
464 | private def getHivePrivObjActionType(mode: SaveMode): HivePrivObjectActionType = {
465 | mode match {
466 | case SaveMode.Append => HivePrivObjectActionType.INSERT
467 | case SaveMode.Overwrite => HivePrivObjectActionType.INSERT_OVERWRITE
468 | case _ => HivePrivObjectActionType.OTHER
469 | }
470 | }
471 | }
472 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/client/AuthzImpl.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive.client
19 |
20 | import java.util.{List => JList}
21 |
22 | import com.githup.yaooqinn.spark.authorizer.Logging
23 | import org.apache.hadoop.hive.ql.security.authorization.plugin._
24 | import org.apache.hadoop.hive.ql.session.SessionState
25 | import org.apache.hadoop.security.UserGroupInformation
26 |
27 | import org.apache.spark.sql.SparkSession
28 | import org.apache.spark.sql.hive.{AuthzUtils, HiveExternalCatalog}
29 | import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader
30 |
31 | /**
32 | * A Tool for Authorizer implementation.
33 | *
34 | * The [[SessionState]] generates the authorizer and authenticator, we use these to check
35 | * the privileges of a Spark LogicalPlan, which is mapped to hive privilege objects and operation
36 | * type.
37 | *
38 | * [[SparkSession]] with hive catalog implemented has its own instance of [[SessionState]]. I am
39 | * strongly willing to reuse it, but for the reason that it belongs to an isolated classloader
40 | * which makes it unreachable for us to visit it in Spark's context classloader. So, when
41 | * [[ClassCastException]] occurs, we turn off [[IsolatedClientLoader]] to use Spark's builtin
42 | * Hive client jars to generate a new metastore client to replace the original one, once it is
43 | * generated, will be reused then.
44 | *
45 | */
46 | object AuthzImpl extends Logging {
47 | def checkPrivileges(
48 | spark: SparkSession,
49 | hiveOpType: HiveOperationType,
50 | inputObjs: JList[HivePrivilegeObject],
51 | outputObjs: JList[HivePrivilegeObject],
52 | context: HiveAuthzContext): Unit = {
53 | val client = spark.sharedState
54 | .externalCatalog.asInstanceOf[HiveExternalCatalog]
55 | .client
56 | val clientImpl = try {
57 | client.asInstanceOf[HiveClientImpl]
58 | } catch {
59 | case _: ClassCastException =>
60 | val clientLoader =
61 | AuthzUtils.getFieldVal(client, "clientLoader").asInstanceOf[IsolatedClientLoader]
62 | AuthzUtils.setFieldVal(clientLoader, "isolationOn", false)
63 | AuthzUtils.setFieldVal(clientLoader,
64 | "classLoader", new NonClosableMutableURLClassLoader(clientLoader.baseClassLoader))
65 | clientLoader.cachedHive = null
66 | val newClient = clientLoader.createClient()
67 | AuthzUtils.setFieldVal(
68 | spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
69 | "client",
70 | newClient)
71 | newClient.asInstanceOf[HiveClientImpl]
72 | }
73 |
74 | val state = clientImpl.state
75 | SessionState.setCurrentSessionState(state)
76 | val user = UserGroupInformation.getCurrentUser.getShortUserName
77 | if (state.getAuthenticator.getUserName != user) {
78 | val hiveConf = state.getConf
79 | val newState = new SessionState(hiveConf, user)
80 | SessionState.start(newState)
81 | AuthzUtils.setFieldVal(clientImpl, "state", newState)
82 | }
83 |
84 | val authz = clientImpl.state.getAuthorizerV2
85 | clientImpl.withHiveState {
86 | if (authz != null) {
87 | try {
88 | authz.checkPrivileges(hiveOpType, inputObjs, outputObjs, context)
89 | } catch {
90 | case hae: HiveAccessControlException =>
91 | error(
92 | s"""
93 | |+===============================+
94 | ||Spark SQL Authorization Failure|
95 | ||-------------------------------|
96 | ||${hae.getMessage}
97 | ||-------------------------------|
98 | ||Spark SQL Authorization Failure|
99 | |+===============================+
100 | """.stripMargin)
101 | throw hae
102 | case e: Exception => throw e
103 | }
104 | } else {
105 | warn("Authorizer V2 not configured. Skipping privilege checking")
106 | }
107 | }
108 | }
109 | }
110 |
--------------------------------------------------------------------------------