├── .github
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── _config.yml
├── conf
    ├── hive-site.xml
    ├── ranger-hive-audit.xml
    └── ranger-hive-security.xml
├── docs
    ├── building-spark-authorizer.md
    ├── img
    │   └── ranger-prolcy-details.png
    ├── install_plugin.md
    ├── manually_test_cases.md
    ├── ranger_authorization.md
    ├── spark_sql_authorization.md
    ├── sql_std_based_authorization.md
    └── storage_based_authorization.md
├── pom.xml
├── scalastyle-config.xml
└── src
    └── main
        └── scala
            ├── com
                └── githup
                │   └── yaooqinn
                │       └── spark
                │           └── authorizer
                │               └── Logging.scala
            └── org
                └── apache
                    ├── ranger
                        └── authorization
                        │   └── spark
                        │       └── authorizer
                        │           ├── RangerSparkSQLExtension.scala
                        │           └── package.scala
                    └── spark
                        └── sql
                            ├── catalyst
                                └── optimizer
                                │   ├── Authorizable.scala
                                │   ├── Authorizer.scala
                                │   ├── AuthorizerExtension.scala
                                │   └── HivePrivilegeObject.scala
                            └── hive
                                ├── AuthzUtils.scala
                                ├── PrivilegesBuilder.scala
                                └── client
                                    └── AuthzImpl.scala


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | 
 5 | ---
 6 | 
 7 | **Describe the bug**
 8 | A clear and concise description of what the bug is.
 9 | 
10 | **To Reproduce**
11 | Steps to reproduce the behavior:
12 | 1. Configurations
13 | 2. Environments
14 | 3. Operations
15 | 4. See error
16 | 
17 | **Expected behavior**
18 | A clear and concise description of what you expected to happen.
19 | 
20 | **Screenshots**
21 | If applicable, add screenshots to help explain your problem.
22 | 
23 | **Additional context**
24 | Add any other context about the problem here.
25 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | 
 5 | ---
 6 | 
 7 | **Is your feature request related to a problem? Please describe.**
 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 9 | 
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 | 
13 | **Describe alternatives you've considered**
14 | A clear and concise description of any alternative solutions or features you've considered.
15 | 
16 | **Additional context**
17 | Add any other context or screenshots about the feature request here.
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *#*#
 2 | *.#*
 3 | *.iml
 4 | *.ipr
 5 | *.iws
 6 | *.pyc
 7 | *.pyo
 8 | *.swp
 9 | *~
10 | .DS_Store
11 | .cache
12 | .classpath
13 | .ensime
14 | .ensime_cache/
15 | .ensime_lucene
16 | .generated-mima*
17 | .idea/
18 | .idea_modules/
19 | .project
20 | .pydevproject
21 | .scala_dependencies
22 | .settings
23 | /lib/
24 | R-unit-tests.log
25 | R/unit-tests.out
26 | R/cran-check.out
27 | R/pkg/vignettes/sparkr-vignettes.html
28 | R/pkg/tests/fulltests/Rplots.pdf
29 | build/*.jar
30 | build/apache-maven*
31 | build/scala*
32 | build/zinc*
33 | cache
34 | checkpoint
35 | conf/*.cmd
36 | conf/*.conf
37 | conf/*.properties
38 | conf/*.sh
39 | conf/java-opts
40 | conf/slaves
41 | dependency-reduced-pom.xml
42 | derby.log
43 | dev/create-release/*final
44 | dev/create-release/*txt
45 | dev/pr-deps/
46 | dist/
47 | docs/_site
48 | docs/api
49 | sql/docs
50 | sql/site
51 | lib_managed/
52 | lint-r-report.log
53 | log/
54 | logs/
55 | out/
56 | project/boot/
57 | project/build/target/
58 | project/plugins/lib_managed/
59 | project/plugins/project/build.properties
60 | project/plugins/src_managed/
61 | project/plugins/target/
62 | python/lib/pyspark.zip
63 | python/deps
64 | python/pyspark/python
65 | reports/
66 | scalastyle-on-compile.generated.xml
67 | scalastyle-output.xml
68 | scalastyle.txt
69 | spark-*-bin-*.tgz
70 | spark-tests.log
71 | src_managed/
72 | streaming-tests.log
73 | target/
74 | unit-tests.log
75 | work/
76 | 
77 | # For Hive
78 | TempStatsStore/
79 | metastore/
80 | metastore_db/
81 | sql/hive-thriftserver/test_warehouses
82 | warehouse/
83 | spark-warehouse/
84 | 
85 | # For R session data
86 | .RData
87 | .RHistory
88 | .Rhistory
89 | *.Rproj
90 | *.Rproj.*
91 | 
92 | .Rproj.user
93 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: scala
 2 | scala:
 3 |   - 2.11.8
 4 | 
 5 | deploy:
 6 |   provider: pages
 7 |   skip_cleanup: true
 8 |   github_token: $GITHUB_TOKEN
 9 |   email: yaooqinn@hotmail.com
10 |   name: Kent Yao
11 |   on:
12 |     branch: master
13 | 
14 | install:
15 |   - mvn install -q -DskipTests=true -Dmaven.javadoc.skip=true -B -V
16 | 
17 | script:
18 |   - mvn package -q -Dmaven.javadoc.skip=true -B -V
19 |   - mvn package -Pspark-2.2 -q -Dmaven.javadoc.skip=true -B -V
20 |   - mvn package -Pspark-2.3 -q -Dmaven.javadoc.skip=true -B -V
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 
203 | 
204 | =======================================================================
205 | Apache Spark Subcomponents:
206 | 
207 | The Apache Spark project contains subcomponents with separate copyright
208 | notices and license terms. Your use of the source code for the these
209 | subcomponents is subject to the terms and conditions of the following
210 | licenses.
211 | 
212 | 
213 | ========================================================================
214 | For heapq (pyspark/heapq3.py):
215 | ========================================================================
216 | 
217 | See license/LICENSE-heapq.txt
218 | 
219 | ========================================================================
220 | For SnapTree:
221 | ========================================================================
222 | 
223 | See license/LICENSE-SnapTree.txt
224 | 
225 | ========================================================================
226 | For jbcrypt:
227 | ========================================================================
228 | 
229 | See license/LICENSE-jbcrypt.txt
230 | 
231 | ========================================================================
232 | BSD-style licenses
233 | ========================================================================
234 | 
235 | The following components are provided under a BSD-style license. See project link for details.
236 | The text of each license is also included at licenses/LICENSE-[project].txt.
237 | 
238 |      (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
239 |      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
240 |      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
241 |      (BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
242 |      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
243 |      (BSD licence) ANTLR StringTemplate (org.antlr:stringtemplate:3.2.1 - http://www.stringtemplate.org)
244 |      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
245 |      (BSD) JLine (jline:jline:0.9.94 - http://jline.sourceforge.net)
246 |      (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.3 - http://paranamer.codehaus.org/paranamer)
247 |      (BSD) ParaNamer Core (com.thoughtworks.paranamer:paranamer:2.6 - http://paranamer.codehaus.org/paranamer)
248 |      (BSD 3 Clause) Scala (http://www.scala-lang.org/download/#License)
249 |         (Interpreter classes (all .scala files in repl/src/main/scala
250 |         except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
251 |         and for SerializableMapWrapper in JavaUtils.scala)
252 |      (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
253 |      (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
254 |      (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
255 |      (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
256 |      (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
257 |      (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
258 |      (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
259 |      (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
260 |      (New BSD License) Kryo (com.esotericsoftware:kryo:3.0.3 - https://github.com/EsotericSoftware/kryo)
261 |      (New BSD License) MinLog (com.esotericsoftware:minlog:1.3.0 - https://github.com/EsotericSoftware/minlog)
262 |      (New BSD license) Protocol Buffer Java API (com.google.protobuf:protobuf-java:2.5.0 - http://code.google.com/p/protobuf)
263 |      (New BSD license) Protocol Buffer Java API (org.spark-project.protobuf:protobuf-java:2.4.1-shaded - http://code.google.com/p/protobuf)
264 |      (The BSD License) Fortran to Java ARPACK (net.sourceforge.f2j:arpack_combined_all:0.1 - http://f2j.sourceforge.net)
265 |      (The BSD License) xmlenc Library (xmlenc:xmlenc:0.52 - http://xmlenc.sourceforge.net)
266 |      (The New BSD License) Py4J (net.sf.py4j:py4j:0.10.4 - http://py4j.sourceforge.net/)
267 |      (Two-clause BSD-style license) JUnit-Interface (com.novocode:junit-interface:0.10 - http://github.com/szeiger/junit-interface/)
268 |      (BSD licence) sbt and sbt-launch-lib.bash
269 |      (BSD 3 Clause) d3.min.js (https://github.com/mbostock/d3/blob/master/LICENSE)
270 |      (BSD 3 Clause) DPark (https://github.com/douban/dpark/blob/master/LICENSE)
271 |      (BSD 3 Clause) CloudPickle (https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE)
272 | 
273 | ========================================================================
274 | MIT licenses
275 | ========================================================================
276 | 
277 | The following components are provided under the MIT License. See project link for details.
278 | The text of each license is also included at licenses/LICENSE-[project].txt.
279 | 
280 |      (MIT License) JCL 1.1.1 implemented over SLF4J (org.slf4j:jcl-over-slf4j:1.7.5 - http://www.slf4j.org)
281 |      (MIT License) JUL to SLF4J bridge (org.slf4j:jul-to-slf4j:1.7.5 - http://www.slf4j.org)
282 |      (MIT License) SLF4J API Module (org.slf4j:slf4j-api:1.7.5 - http://www.slf4j.org)
283 |      (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org)
284 |      (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
285 |      (MIT License) scopt (com.github.scopt:scopt_2.11:3.2.0 - https://github.com/scopt/scopt)
286 |      (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org)
287 |      (MIT License) jquery (https://jquery.org/license/)
288 |      (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)
289 |      (MIT License) graphlib-dot (https://github.com/cpettitt/graphlib-dot)
290 |      (MIT License) dagre-d3 (https://github.com/cpettitt/dagre-d3)
291 |      (MIT License) sorttable (https://github.com/stuartlangridge/sorttable)
292 |      (MIT License) boto (https://github.com/boto/boto/blob/develop/LICENSE)
293 |      (MIT License) datatables (http://datatables.net/license)
294 |      (MIT License) mustache (https://github.com/mustache/mustache/blob/master/LICENSE)
295 |      (MIT License) cookies (http://code.google.com/p/cookies/wiki/License)
296 |      (MIT License) blockUI (http://jquery.malsup.com/block/)
297 |      (MIT License) RowsGroup (http://datatables.net/license/mit)
298 |      (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299 |      (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Spark Authorizer [![Build Status](https://travis-ci.org/yaooqinn/spark-authorizer.svg?branch=master)](https://travis-ci.org/yaooqinn/spark-authorizer) [![HitCount](http://hits.dwyl.io/yaooqinn/spark-authorizer.svg)](http://hits.dwyl.io/yaooqinn/spark-authorizer)
  3 | 
  4 | **Spark Authorizer** provides you with *SQL Standard Based Authorization* for [Apache Spark™](http://spark.apache.org) 
  5 | as same as [SQL Standard Based Hive Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization). 
  6 | While you are using Spark SQL or Dataset/DataFrame API to load data from tables embedded with [Apache Hive™](https://hive.apache.org) metastore, 
  7 | this library provides row/column level fine-grained access controls by [Apache Ranger™](https://ranger.apache.org) or Hive SQL Standard Based Authorization.
  8 | 
  9 | Security is one of fundamental features for enterprise adoption. [Apache Ranger™](https://ranger.apache.org) offers many security plugins for many Hadoop ecosystem components, 
 10 | such as HDFS, Hive, HBase, Solr and Sqoop2. However, [Apache Spark™](http://spark.apache.org) is not counted in yet. 
 11 | When a secured HDFS cluster is used as a data warehouse accessed by various users and groups via different applications wrote by Spark and Hive, 
 12 | it is very difficult to guarantee data management in a consistent way.  Apache Spark users visit data warehouse only 
 13 | with Storage based access controls offered by HDFS. This library shares [Ranger Hive plugin](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation#ApacheRanger0.5.0Installation-InstallingApacheHive(1.2.0)) 
 14 | with Hive to help Spark talking to Ranger Admin. 
 15 | 
 16 | Please refer to [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html) to see what spark-authorizer supports.
 17 | 
 18 | ## Quick Start
 19 | 
 20 | ### Step 1. Install Spark Authorizer
 21 | 
 22 | Include this package in your Spark Applications using:
 23 | #### spark-shell, pyspark, or spark-submit
 24 | ```bash
 25 | > $SPARK_HOME/bin/spark-shell --packages yaooqinn:spark-authorizer:2.1.1
 26 | ```
 27 | #### sbt
 28 | If you use the sbt-spark-package plugin, in your sbt build file, add:
 29 | ```sbtshell
 30 | spDependencies += "yaooqinn/spark-authorizer:2.1.1"
 31 | ```
 32 | Otherwise,
 33 | ```sbtshell
 34 | resolvers += "Spark Packages Repo" at "http://dl.bintray.com/spark-packages/maven"
 35 | 
 36 | libraryDependencies += "yaooqinn" % "spark-authorizer" % "2.1.1"
 37 | ```
 38 | 
 39 | #### Maven
 40 | In your pom.xml, add:
 41 | ```xml
 42 | <dependencies>
 43 |   <!-- list of dependencies -->
 44 |   <dependency>
 45 |     <groupId>yaooqinn</groupId>
 46 |     <artifactId>spark-authorizer</artifactId>
 47 |     <version>2.1.1</version>
 48 |   </dependency>
 49 | </dependencies>
 50 | <repositories>
 51 |   <!-- list of other repositories -->
 52 |   <repository>
 53 |     <id>SparkPackagesRepo</id>
 54 |     <url>http://dl.bintray.com/spark-packages/maven</url>
 55 |   </repository>
 56 | </repositories>
 57 | ```
 58 | 
 59 | #### Manully
 60 | If you [Building Spark Authorizer](https://yaooqinn.github.io/spark-authorizer/docs/building-spark-authorizer.html) manully, you can deploy via:
 61 | ```bash
 62 | cp target/spark-authorizer-<version>.jar $SPARK_HOME/jars
 63 | ```
 64 | 
 65 | ### Step 2. Install & Configure Ranger Hive Plugin
 66 | 
 67 | Please refer to [Install Ranger Hive Plugin For Apache Spark](https://yaooqinn.github.io/spark-authorizer/docs/install_plugin.html) to learn how to deploy the plugin jars to Apache Spark and set Ranger/Hive configurations.
 68 | 
 69 | ### Step 3. Enable Spark Authorizer
 70 | 
 71 | In `$SPARK_HOME/conf/spark-defaults.conf`, add:
 72 | 
 73 | ```scala
 74 | spark.sql.extensions=org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension
 75 | ```
 76 | **NOTE** `spark.sql.extensions` is only supported by Spark 2.2.x and later, for Spark 2.1.x please use [Version: 1.1.3.spark2.1](https://github.com/yaooqinn/spark-authorizer/tree/78f7d818db773c3567c636575845a413ac560c90) and check the previous doc.
 77 | 
 78 | ## Interactive Spark Shell
 79 | 
 80 | The easiest way to start using Spark is through the Scala shell:
 81 | 
 82 | ```shell
 83 | bin/spark-shell --master yarn --proxy-user hzyaoqin
 84 | ```
 85 | 
 86 | ## Suffer for the Authorization Pain 
 87 | 
 88 | We create a ranger policy as below:
 89 | ![ranger-policy-details](docs/img/ranger-prolcy-details.png)
 90 | 
 91 | Check Privilege with some simple cases.
 92 | 
 93 | #### Show databases
 94 | 
 95 | ```sql
 96 | scala> spark.sql("show databases").show
 97 | +--------------+
 98 | |  databaseName|
 99 | +--------------+
100 | |       default|
101 | | spark_test_db|
102 | | tpcds_10g_ext|
103 | +--------------+
104 | ```
105 | 
106 | #### Switch database
107 | 
108 | ```sql
109 | scala> spark.sql("use spark_test_db").show
110 | 17/12/08 17:06:17 ERROR optimizer.Authorizer:
111 | +===============================+
112 | |Spark SQL Authorization Failure|
113 | |-------------------------------|
114 | |Permission denied: user [hzyaoqin] does not have [USE] privilege on [spark_test_db]
115 | |-------------------------------|
116 | |Spark SQL Authorization Failure|
117 | +===============================+
118 | ```
119 | Oops...
120 | 
121 | 
122 | ```sql
123 | scala> spark.sql("use tpcds_10g_ext").show
124 | ++
125 | ||
126 | ++
127 | ++
128 | ```
129 | LOL...
130 | 
131 | 
132 | ### Select 
133 | ```sql
134 | scala> spark.sql("select cp_type from catalog_page limit 1").show
135 | 17/12/08 17:09:58 ERROR optimizer.Authorizer:
136 | +===============================+
137 | |Spark SQL Authorization Failure|
138 | |-------------------------------|
139 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_type]
140 | |-------------------------------|
141 | |Spark SQL Authorization Failure|
142 | +===============================+
143 | ```
144 | Oops...
145 | 
146 | ```sql
147 | scala> spark.sql("select * from call_center limit 1").show
148 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
149 | |cc_call_center_sk|cc_call_center_id|cc_rec_start_date|cc_rec_end_date|cc_closed_date_sk|cc_open_date_sk| cc_name|cc_class|cc_employees|cc_sq_ft|cc_hours| cc_manager|cc_mkt_id|        cc_mkt_class|         cc_mkt_desc|cc_market_manager|cc_division|cc_division_name|cc_company|cc_company_name|cc_street_number|cc_street_name|cc_street_type|cc_suite_number|cc_city|        cc_county|cc_state|cc_zip|   cc_country|cc_gmt_offset|cc_tax_percentage|
150 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
151 | |                1| AAAAAAAABAAAAAAA|       1998-01-01|           null|             null|        2450952|NY Metro|   large|           2|    1138| 8AM-4PM|Bob Belcher|        6|More than other a...|Shared others cou...|      Julius Tran|          3|             pri|         6|          cally|             730|      Ash Hill|     Boulevard|        Suite 0| Midway|Williamson County|      TN| 31904|United States|        -5.00|             0.11|
152 | +-----------------+-----------------+-----------------+---------------+-----------------+---------------+--------+--------+------------+--------+--------+-----------+---------+--------------------+--------------------+-----------------+-----------+----------------+----------+---------------+----------------+--------------+--------------+---------------+-------+-----------------+--------+------+-------------+-------------+-----------------+
153 | 
154 | ```
155 | 
156 | LOL...
157 | 
158 | ### Dataset/DataFrame
159 | 
160 | ```scala
161 | scala> spark.read.table("catalog_page").limit(1).collect
162 | ```
163 | ```
164 | 17/12/11 14:46:33 ERROR optimizer.Authorizer:
165 | +===============================+
166 | |Spark SQL Authorization Failure|
167 | |-------------------------------|
168 | |Permission denied: user [hzyaoqin] does not have [SELECT] privilege on [tpcds_10g_ext/catalog_page/cp_catalog_page_sk,cp_catalog_page_id,cp_promo_id,cp_start_date_sk,cp_end_date_sk,cp_department,cp_catalog_number,cp_catalog_page_number,cp_description,cp_type]
169 | |-------------------------------|
170 | |Spark SQL Authorization Failure|
171 | +===============================+
172 | ```
173 | Oops...
174 | 
175 | ```scala
176 | scala> spark.read.table("call_center").limit(1).collect
177 | ```
178 | ```
179 | res3: Array[org.apache.spark.sql.Row] = Array([1,AAAAAAAABAAAAAAA,1998-01-01,null,null,2450952,NY Metro,large,2,1138,8AM-4PM,Bob Belcher,6,More than other authori,Shared others could not count fully dollars. New members ca,Julius Tran,3,pri,6,cally,730,Ash Hill,Boulevard,Suite 0,Midway,Williamson County,TN,31904,United States,-5.00,0.11])
180 | ```
181 | LOL...
182 | 
183 | ---
184 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-leap-day
2 | 


--------------------------------------------------------------------------------
/conf/hive-site.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>hive.security.authorization.enabled</name>
24 |         <value>true</value>
25 |     </property>
26 | 
27 |     <property>
28 |         <name>hive.security.authorization.manager</name>
29 |         <value>org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>hive.security.authenticator.manager</name>
34 |         <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>hive.conf.restricted.list</name>
39 |         <value>hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager</value>
40 |     </property>
41 | 
42 | </configuration>


--------------------------------------------------------------------------------
/conf/ranger-hive-audit.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>xasecure.audit.is.enabled</name>
24 |         <value>true</value>
25 |     </property>
26 | 
27 |     <property>
28 |         <name>xasecure.audit.destination.db</name>
29 |         <value>false</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>xasecure.audit.destination.db.jdbc.driver</name>
34 |         <value>com.mysql.jdbc.Driver</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>xasecure.audit.destination.db.jdbc.url</name>
39 |         <value>jdbc:mysql://10.171.161.78/ranger</value>
40 |     </property>
41 | 
42 |     <property>
43 |         <name>xasecure.audit.destination.db.password</name>
44 |         <value>rangeradmin</value>
45 |     </property>
46 | 
47 |     <property>
48 |         <name>xasecure.audit.destination.db.user</name>
49 |         <value>rangeradmin</value>
50 |     </property>
51 | 
52 | </configuration>


--------------------------------------------------------------------------------
/conf/ranger-hive-security.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 3 | <!--
 4 |    Licensed to the Apache Software Foundation (ASF) under one or more
 5 |    contributor license agreements.  See the NOTICE file distributed with
 6 |    this work for additional information regarding copyright ownership.
 7 |    The ASF licenses this file to You under the Apache License, Version 2.0
 8 |    (the "License"); you may not use this file except in compliance with
 9 |    the License.  You may obtain a copy of the License at
10 | 
11 |        http://www.apache.org/licenses/LICENSE-2.0
12 | 
13 |    Unless required by applicable law or agreed to in writing, software
14 |    distributed under the License is distributed on an "AS IS" BASIS,
15 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 |    See the License for the specific language governing permissions and
17 |    limitations under the License.
18 | -->
19 | 
20 | <configuration>
21 | 
22 |     <property>
23 |         <name>ranger.plugin.hive.policy.rest.url</name>
24 |         <value>ranger admin address like http://ranger-admin.org:6080</value>
25 |     </property>
26 | 
27 |     <property>
28 |         <name>ranger.plugin.hive.service.name</name>
29 |         <value>a ranger hive service name</value>
30 |     </property>
31 | 
32 |     <property>
33 |         <name>ranger.plugin.hive.policy.cache.dir</name>
34 |         <value>./a ranger hive service name/policycache</value>
35 |     </property>
36 | 
37 |     <property>
38 |         <name>ranger.plugin.hive.policy.pollIntervalMs</name>
39 |         <value>5000</value>
40 |     </property>
41 | 
42 |     <property>
43 |         <name>ranger.plugin.hive.policy.source.impl</name>
44 |         <value>org.apache.ranger.admin.client.RangerAdminRESTClient</value>
45 |     </property>
46 | 
47 | </configuration>


--------------------------------------------------------------------------------
/docs/building-spark-authorizer.md:
--------------------------------------------------------------------------------
 1 | # Building Spark Authorizer 
 2 | 
 3 | Spark Authorizer is built using [Apache Maven](http://maven.apache.org). To build it, run:
 4 | 
 5 | ```bash
 6 | mvn clean package
 7 | ```
 8 | 
 9 | Notes from Spark: 
10 | > The Maven-based build is the build of reference for Apache Spark.
11 | Building Spark using Maven requires Maven 3.3.9 or newer and Java 7+.
12 | Note that support for Java 7 is deprecated as of Spark 2.0.0 and may be removed in Spark 2.2.0.
13 | 
14 | So, I suggest you build this library using same Maven / Java / Scala.
15 | 
16 | ## Building against different version of Apache Spark
17 | 
18 | By default, spark authorizer is build against spark 2.2.2, which may be incompatible with other spark main branches.
19 | 
20 | ```bash
21 | # build for spark 2.2.x
22 | maven clean package -Pspark-2.2
23 | ```
24 | 
25 | ```bash
26 | # build for spark 2.3.x
27 | maven clean package -Pspark-2.3
28 | ```
29 | 


--------------------------------------------------------------------------------
/docs/img/ranger-prolcy-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yaooqinn/spark-authorizer/f575b0b8714bd86c8383af9d584aef5d0ed29c4e/docs/img/ranger-prolcy-details.png


--------------------------------------------------------------------------------
/docs/install_plugin.md:
--------------------------------------------------------------------------------
  1 | # Installing Apache Ranger Hive Plugin For Apache Spark
  2 | 
  3 | This article illustrates how to install the Apache Ranger plugin which is made for Apache Hive to Apache Spark with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer). We guarantee column/row level fine gained [ACL Management for Spark SQL](https://yaooqinn.github.io/spark-authorizer/docs/spark_sql_authorization.html).
  4 | 
  5 | Apache Spark is built bundled with built-in Hive Metastore client(version 1.2.1.spark2) jars when `-Phive` is enabled. AFAIK, this version of Hive Metastore client is compatible with all Hive Metastore server 1.2.1 and higher versions.
  6 | 
  7 | We **DO NOT** support configuring `spark.sql.hive.metastore.jars` to `maven` or a location of the jars used to instantiate the `HiveMetastoreClient`, `builtin` is the one and only option.
  8 | 
  9 | Apache Ranger upgrades quite fast, one of the reasons may be to catch up with the higher Hive releases. AFAIK, Apache Ranger 0.6.x and higher versions do not support [1.2.1](https://issues.apache.org/jira/browse/RANGER-1056) anymore, accordingly, you may use Apache Ranger 0.5.x to avoid underlying pitfalls.
 10 | 
 11 | An official installation guide of Apache Ranger 0.5.x can be found [here](https://cwiki.apache.org/confluence/display/RANGER/Apache+Ranger+0.5.0+Installation). The remainder of this article will guide you how to install Apache Ranger Hive Plugin for Apache Spark, which is not mentioned in the official documentation.
 12 | 
 13 | ## Building Apache Ranger
 14 | 
 15 | 1. git clone git@github.com:apache/ranger.git
 16 | 2. cd ranger
 17 | 3. git fetch https://github.com/apache/ranger.git ranger-0.5.3-rc3:ranger-0.5.3
 18 | 4. git checkout ranger-0.5.3
 19 | 5. mvn clean compile package assembly:assembly install -Dmaven.test.skip=true
 20 | 
 21 | If you failed to build the project, please refer to the instructions of the official doc to see if there are any prerequisites.
 22 | 
 23 | If you successfully make the mvn command work, all archives of Ranger admin and plugins will be generated in `./target`, including `ranger-0.5.3-hive-plugin.tar.gz` which is exactly you need for next steps
 24 | ```
 25 | -rw-r--r-- 1 kent hadoop 163667362 Oct 15 15:38 ranger-0.5.3-admin.tar.gz
 26 | -rw-r--r-- 1 kent hadoop 164655504 Oct 15 15:38 ranger-0.5.3-admin.zip
 27 | -rw-r--r-- 1 kent hadoop  16771141 Oct 15 15:36 ranger-0.5.3-hbase-plugin.tar.gz
 28 | -rw-r--r-- 1 kent hadoop  16797394 Oct 15 15:36 ranger-0.5.3-hbase-plugin.zip
 29 | -rw-r--r-- 1 kent hadoop  16722944 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.tar.gz
 30 | -rw-r--r-- 1 kent hadoop  16747829 Oct 15 15:35 ranger-0.5.3-hdfs-plugin.zip
 31 | -rw-r--r-- 1 kent hadoop  16139126 Oct 15 15:35 ranger-0.5.3-hive-plugin.tar.gz
 32 | -rw-r--r-- 1 kent hadoop  16165266 Oct 15 15:36 ranger-0.5.3-hive-plugin.zip
 33 | -rw-r--r-- 1 kent hadoop  32975495 Oct 15 15:36 ranger-0.5.3-kafka-plugin.tar.gz
 34 | -rw-r--r-- 1 kent hadoop  33012135 Oct 15 15:37 ranger-0.5.3-kafka-plugin.zip
 35 | -rw-r--r-- 1 kent hadoop  71917257 Oct 15 15:38 ranger-0.5.3-kms.tar.gz
 36 | -rw-r--r-- 1 kent hadoop  72005470 Oct 15 15:39 ranger-0.5.3-kms.zip
 37 | -rw-r--r-- 1 kent hadoop  21298145 Oct 15 15:36 ranger-0.5.3-knox-plugin.tar.gz
 38 | -rw-r--r-- 1 kent hadoop  21322990 Oct 15 15:36 ranger-0.5.3-knox-plugin.zip
 39 | -rw-r--r-- 1 kent hadoop     34600 Oct 15 15:38 ranger-0.5.3-migration-util.tar.gz
 40 | -rw-r--r-- 1 kent hadoop     38014 Oct 15 15:38 ranger-0.5.3-migration-util.zip
 41 | -rw-r--r-- 1 kent hadoop  18485767 Oct 15 15:39 ranger-0.5.3-ranger-tools.tar.gz
 42 | -rw-r--r-- 1 kent hadoop  18495143 Oct 15 15:39 ranger-0.5.3-ranger-tools.zip
 43 | -rw-r--r-- 1 kent hadoop  22416054 Oct 15 15:37 ranger-0.5.3-solr-plugin.tar.gz
 44 | -rw-r--r-- 1 kent hadoop  22441083 Oct 15 15:37 ranger-0.5.3-solr-plugin.zip
 45 | -rw-r--r-- 1 kent hadoop   3606416 Oct 15 15:39 ranger-0.5.3-src.tar.gz
 46 | -rw-r--r-- 1 kent hadoop   5481890 Oct 15 15:39 ranger-0.5.3-src.zip
 47 | -rw-r--r-- 1 kent hadoop  34769024 Oct 15 15:36 ranger-0.5.3-storm-plugin.tar.gz
 48 | -rw-r--r-- 1 kent hadoop  34788044 Oct 15 15:36 ranger-0.5.3-storm-plugin.zip
 49 | -rw-r--r-- 1 kent hadoop  13512547 Oct 15 15:38 ranger-0.5.3-usersync.tar.gz
 50 | -rw-r--r-- 1 kent hadoop  13534930 Oct 15 15:38 ranger-0.5.3-usersync.zip
 51 | -rw-r--r-- 1 kent hadoop  15942158 Oct 15 15:37 ranger-0.5.3-yarn-plugin.tar.gz
 52 | -rw-r--r-- 1 kent hadoop  15969320 Oct 15 15:37 ranger-0.5.3-yarn-plugin.zip
 53 | ```
 54 | 
 55 | ## Applying Plugin to Apache Spark
 56 | 
 57 | 1. cd target
 58 | 2. tar zxf ranger-0.5.3-hive-plugin.tar.gz
 59 | 3. cd ranger-0.5.3-hive-plugin/lib
 60 | 
 61 | ```
 62 | drwxr-xr-x 2 kent hadoop  4096 Oct 16 12:34 ranger-hive-plugin-impl
 63 | -rw-r--r-- 1 kent hadoop 16061 Oct 15 15:35 ranger-hive-plugin-shim-0.5.3.jar
 64 | -rw-r--r-- 1 kent hadoop 16545 Oct 15 15:35 ranger-plugin-classloader-0.5.3.jar
 65 | ```
 66 | 
 67 | Fistly, copy the above folds and files to `$SPARK_HOME/jars`
 68 | 
 69 | ```
 70 | jersey-client-1.9.jar
 71 | jersey-bundle-1.4.jar
 72 | eclipselink-2.5.2.jar
 73 | noggit-0.6.jar
 74 | gson-2.2.4.jar
 75 | httpclient-4.5.3.jar
 76 | httpcore-4.4.6.jar
 77 | httpmime-4.5.3.jar
 78 | javax.persistence-2.1.0.jar
 79 | mybatis-3.2.8.jar
 80 | mysql-connector-java-5.1.39.jar
 81 | ```
 82 | 
 83 | Secondly, add the jars listed above to `$SPARK_HOME/jars/ranger-hive-plugin-impl/` if missing.
 84 | 
 85 | ## Configuring Ranger for Apache Spark
 86 | 
 87 | 
 88 | Firstly, add the following configurations in `hive-site.xml` to enable Ranger Authorization.
 89 | 
 90 | ```
 91 | <property>
 92 |     <name>hive.security.authorization.enabled</name>
 93 |     <value>true</value>
 94 | </property>
 95 | 
 96 | <property>
 97 |     <name>hive.security.authorization.manager</name>
 98 |     <value>org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory</value>
 99 | </property>
100 | 
101 | <property>
102 |     <name>hive.security.authenticator.manager</name>
103 |     <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
104 | </property>
105 | 
106 | <property>
107 |     <name>hive.conf.restricted.list</name>
108 |     <value>hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager</value>
109 | </property>
110 | ```
111 | 
112 | Secondly, create `ranger-hive-security.xml` in `$SPARK_HOME/conf` and add the following configurations for pointing to the right ranger admin server.
113 | 
114 | ```
115 | <configuration>
116 | 
117 |     <property>
118 |         <name>ranger.plugin.hive.policy.rest.url</name>
119 |         <value>ranger admin address like http://ranger-admin.org:6080</value>
120 |     </property>
121 | 
122 |     <property>
123 |         <name>ranger.plugin.hive.service.name</name>
124 |         <value>a ranger hive service name</value>
125 |     </property>
126 | 
127 |     <property>
128 |         <name>ranger.plugin.hive.policy.cache.dir</name>
129 |         <value>./a ranger hive service name/policycache</value>
130 |     </property>
131 | 
132 |     <property>
133 |         <name>ranger.plugin.hive.policy.pollIntervalMs</name>
134 |         <value>5000</value>
135 |     </property>
136 | 
137 |     <property>
138 |         <name>ranger.plugin.hive.policy.source.impl</name>
139 |         <value>org.apache.ranger.admin.client.RangerAdminRESTClient</value>
140 |     </property>
141 | 
142 | </configuration>
143 | ```
144 | 
145 | At last, create create `ranger-hive-audit.xml` in `$SPARK_HOME/conf` and add the following configurations to enable/disable auditing.
146 | 
147 | ```
148 | <configuration>
149 | 
150 |     <property>
151 |         <name>xasecure.audit.is.enabled</name>
152 |         <value>true</value>
153 |     </property>
154 | 
155 |     <property>
156 |         <name>xasecure.audit.destination.db</name>
157 |         <value>false</value>
158 |     </property>
159 | 
160 |     <property>
161 |         <name>xasecure.audit.destination.db.jdbc.driver</name>
162 |         <value>com.mysql.jdbc.Driver</value>
163 |     </property>
164 | 
165 |     <property>
166 |         <name>xasecure.audit.destination.db.jdbc.url</name>
167 |         <value>jdbc:mysql://10.171.161.78/ranger</value>
168 |     </property>
169 | 
170 |     <property>
171 |         <name>xasecure.audit.destination.db.password</name>
172 |         <value>rangeradmin</value>
173 |     </property>
174 | 
175 |     <property>
176 |         <name>xasecure.audit.destination.db.user</name>
177 |         <value>rangeradmin</value>
178 |     </property>
179 | 
180 | </configuration>
181 | ```
182 | 
183 | Ranger Hive plugins should work well through `spark-authorizer`, when set `spark.sql.extensions`=`org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension`
184 | ## Additional Notes
185 | 
186 | If you are using Apache Spark in `cluster` mode, the jar files under `$SPARK_HOME/jars/ranger-hive-plugin-impl/` will not be uploaded automatically. If you are not familiar with Spark source code and unable to make some modifications, I suggest you copy all files in `$SPARK_HOME/jars/ranger-hive-plugin-impl/` to `$SPARK_HOME/jars/` and **DELETE** `ranger-hive-plugin-shim-0.5.3.jar` in `$SPARK_HOME/jars/`. This works fine for your whole Spark application but with a tiny problem of Spark UI because of jersey jars confliction.
187 | 
188 | Also for `cluster` mode Spark applications, `ranger.plugin.hive.policy.cache.dir` in `ranger-hive-security.xml` must be able to create on all NodeManager nodes for the Spark Driver could be generated anywhere. One convenient and effective way is to configure the relative path, such as,
189 | ```
190 | <property>
191 |     <name>ranger.plugin.hive.policy.cache.dir</name>
192 |     <value>policycache</value>
193 |  </property>
194 | ```
195 | 


--------------------------------------------------------------------------------
/docs/manually_test_cases.md:
--------------------------------------------------------------------------------
  1 | # Spark SQL Authorization Test Cases
  2 | ## Prerequisites
  3 | 
  4 | - Ranger Admin Server
  5 |     - setup ranger admin
  6 |     - setup some ranger policies
  7 | - Ranger Hive Plugin
  8 |     - setup ranger hive plugin for spark
  9 |     - configure ranger plugin configurations to connect ranger admin
 10 | - Hive Metastore
 11 |     - setup Hive Metastore server
 12 |     - setup ranger admin and spark connected rightly
 13 | - Spark
 14 |     - maybe hdfs
 15 |     - maybe on yarn
 16 |     - spark-shell
 17 |     
 18 | ```bash
 19 | bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse
 20 | ```
 21 | 
 22 | ```bash
 23 | hadoop@hzadg-hadoop-dev2:~/spark-2.1.2-bin-hadoop2.7$ bin/spark-shell --proxy-user hzyaoqin --conf spark.sql.warehouse.dir=/user/hzyaoqin/warehouse
 24 | 18/06/07 11:00:47 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
 25 | Setting default log level to "WARN".
 26 | To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
 27 | 18/06/07 11:00:56 WARN SparkContext: Support for Java 7 is deprecated as of Spark 2.0.0
 28 | 18/06/07 11:00:59 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
 29 | 18/06/07 11:00:59 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
 30 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
 31 | 18/06/07 11:01:00 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
 32 | 18/06/07 11:01:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
 33 | Spark context Web UI available at http://10.201.168.144:4040
 34 | Spark context available as 'sc' (master = yarn, app id = application_1522381253211_0116).
 35 | Spark session available as 'spark'.
 36 | Welcome to
 37 |       ____              __
 38 |      / __/__  ___ _____/ /__
 39 |     _\ \/ _ \/ _ `/ __/  '_/
 40 |    /___/ .__/\_,_/_/ /_/\_\   version 2.1.2
 41 |       /_/
 42 | 
 43 | Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_67)
 44 | Type in expressions to have them evaluated.
 45 | Type :help for more information.
 46 | 
 47 | scala> sc.setLogLevel("info"); import org.apache.spark.sql.catalyst.optimizer.Authorizer; spark.experimental.extraOptimizations ++= Seq(Authorizer)
 48 | 18/06/07 11:10:50 INFO SharedState: Warehouse path is '/user/hzyaoqin/warehouse'.
 49 | 18/06/07 11:10:50 INFO HiveUtils: Initializing HiveMetastoreConnection version 1.2.1 using Spark classes.
 50 | 18/06/07 11:10:51 WARN HiveConf: HiveConf of name hive.exec.partition.num.limit does not exist
 51 | 18/06/07 11:10:51 INFO metastore: Trying to connect to metastore with URI thrift://hzadg-hadoop-dev2.server.163.org:9083
 52 | 18/06/07 11:10:51 INFO metastore: Connected to metastore.
 53 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/d0216e65-2506-4d7c-8a7d-13f19fba55c0_resources
 54 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0
 55 | 18/06/07 11:10:51 INFO SessionState: Created local directory: /tmp/hadoop/d0216e65-2506-4d7c-8a7d-13f19fba55c0
 56 | 18/06/07 11:10:51 INFO SessionState: Created HDFS directory: /tmp/hive/hzyaoqin/d0216e65-2506-4d7c-8a7d-13f19fba55c0/_tmp_space.db
 57 | 18/06/07 11:10:51 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.1) is /user/hzyaoqin/warehouse
 58 | import org.apache.spark.sql.catalyst.optimizer.Authorizer
 59 | 
 60 | scala>
 61 | ```
 62 | ## RunnableCommand
 63 | 
 64 | ### AlterDatabasePropertiesCommand
 65 | 
 66 | ```sql
 67 | ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...)
 68 | ```
 69 | 
 70 | ```sql
 71 | sql("alter database default set dbproperties ('abc'='xyz')").show
 72 | ```
 73 | 
 74 | ### AlterTableAddColumnsCommand
 75 | 
 76 | ```sql
 77 | ALTER TABLE table_identifier ADD COLUMNS (col_name data_type [COMMENT col_comment], ...);
 78 | ```
 79 | 
 80 | ### AlterTableChangeColumnCommand
 81 | 
 82 | ```sql
 83 | ALTER TABLE table_identifier
 84 |     CHANGE [COLUMN] column_old_name column_new_name column_dataType [COMMENT column_comment]
 85 |     [FIRST | AFTER column_name];
 86 | ```
 87 | 
 88 | ### AlterTableDropPartitionCommand
 89 | 
 90 | ```sql
 91 | ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE];
 92 | ```
 93 | 
 94 | ```sql
 95 | 
 96 | ```
 97 | 
 98 | ### AlterTableRecoverPartitionsCommand
 99 | 
100 | ```sql
101 | ALTER TABLE table RECOVER PARTITIONS;
102 | MSCK REPAIR TABLE table;
103 | ```
104 | 
105 | ### AlterTableRenamePartitionCommand
106 | 
107 | ```sql
108 | ALTER TABLE table PARTITION spec1 RENAME TO PARTITION spec2;
109 | ```
110 | 
111 | 
112 | ### AlterTableRenameCommand
113 | 
114 | ```sql
115 | ALTER TABLE table1 RENAME TO table2;
116 | ALTER VIEW view1 RENAME TO view2;
117 | 
118 | ```
119 | 
120 | ```sql
121 | sql("alter table src10 rename to src11").show
122 | ```
123 | 
124 | ### AlterTableSetPropertiesCommand
125 | 
126 | ```sql
127 | ALTER TABLE table1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...);
128 | ALTER VIEW view1 SET TBLPROPERTIES ('key1' = 'val1', 'key2' = 'val2', ...);
129 | ```
130 | 
131 | ### AlterTableUnsetPropertiesCommand
132 | 
133 | ```sql
134 | ALTER TABLE table1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...);
135 | ALTER VIEW view1 UNSET TBLPROPERTIES [IF EXISTS] ('key1', 'key2', ...);
136 | ```
137 | 
138 | ### AlterTableSerDePropertiesCommand
139 | 
140 | ```sql
141 | ALTER TABLE table [PARTITION spec] SET SERDE serde_name [WITH SERDEPROPERTIES props];
142 | ALTER TABLE table [PARTITION spec] SET SERDEPROPERTIES serde_properties;
143 | ```
144 | 
145 | ### AlterTableSetLocationCommand
146 | ```sql
147 | ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "loc";
148 | ```
149 | 
150 | ### AlterViewAsCommand
151 | 
152 | ```sql
153 | ALTER VIEW test_view AS SELECT 3 AS i, 4 AS j
154 | ```
155 | 
156 | Treat select clause as QUERY Hive Operation
157 | 
158 | ### AnalyzeColumnCommand
159 | 
160 | ```sql
161 | ANALYZE TABLE tableName COMPUTE STATISTICS FOR COLUMNS some_random_column
162 | ```
163 | 
164 | ### AnalyzeTableCommand
165 | 
166 | ```sql
167 | analyze table tableName compute statistics noscan;
168 | ```
169 | 
170 | ### AnalyzePartitionCommand
171 | 
172 | ```sql
173 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS;
174 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS;
175 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS;
176 | ANALYZE TABLE t PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan;
177 | ANALYZE TABLE t PARTITION(ds, hr=11) COMPUTE STATISTICS noscan;
178 | ANALYZE TABLE t PARTITION(ds, hr) COMPUTE STATISTICS;
179 | ```
180 | 
181 | ### CreateDatabaseCommand
182 | 
183 | ```sql
184 | CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
185 |     [COMMENT database_comment]
186 |     [LOCATION database_directory]
187 |     [WITH DBPROPERTIES (property_name=property_value, ...)];
188 | ```
189 | 
190 | ### CreateDataSourceTableAsSelectCommand
191 | 
192 | ```sql
193 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
194 |     USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
195 |     AS SELECT ...
196 | ```
197 | 
198 | ### CreateHiveTableAsSelectCommand
199 | 
200 | ```sql
201 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
202 |     STORED AS format AS SELECT ...
203 | ```
204 | 
205 | ### CreateFunctionCommand
206 | 
207 | ```sql
208 | CREATE TEMPORARY FUNCTION functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']]
209 | 
210 | CREATE FUNCTION [databaseName.]functionName AS className [USING JAR|FILE 'uri' [, JAR|FILE 'uri']]
211 | ```
212 | 
213 | ### CreateTableCommand
214 | 
215 | ```sql
216 | 
217 | CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
218 |    [(col1 data_type [COMMENT col_comment], ...)]
219 |    [COMMENT table_comment]
220 |    [PARTITIONED BY (col3 data_type [COMMENT col_comment], ...)]
221 |    [CLUSTERED BY (col1, ...) [SORTED BY (col1 [ASC|DESC], ...)] INTO num_buckets BUCKETS]
222 |    [SKEWED BY (col1, col2, ...) ON ((col_value, col_value, ...), ...)
223 |    [STORED AS DIRECTORIES]
224 |    [ROW FORMAT row_format]
225 |    [STORED AS file_format | STORED BY storage_handler_class [WITH SERDEPROPERTIES (...)]]
226 |    [LOCATION path]
227 |    [TBLPROPERTIES (property_name=property_value, ...)]
228 |    [AS select_statement];
229 |  
230 | ```
231 | 
232 | ### CreateDataSourceTableCommand
233 | 
234 | ```sql
235 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
236 |    [(col1 data_type [COMMENT col_comment], ...)]
237 |    USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
238 | ```
239 | 
240 | ### CreateTableLikeCommand
241 | 
242 | ```sql
243 | CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
244 |    LIKE [other_db_name.]existing_table_name
245 | ```
246 | 
247 | ### CreateViewCommand
248 | 
249 | ```sql
250 | CREATE view t1 partitioned 
251 | ON (ds) AS 
252 | SELECT * 
253 | FROM   ( 
254 |               SELECT KEY, 
255 |                      value, 
256 |                      ds 
257 |               FROM   t1_new 
258 |               UNION ALL 
259 |               SELECT KEY, 
260 |                      value, 
261 |                      t1_old.ds 
262 |               FROM   t1_old 
263 |               JOIN   t1_mapping 
264 |               ON     t1_old.keymap = t1_mapping.keymap 
265 |               AND    t1_old.ds = t1_mapping.ds ) subq;
266 | ```
267 | 
268 | ### CacheTableCommand
269 | 
270 | ```sql
271 | CACHE TABLE testCacheTable AS SELECT * FROM src;
272 | ```
273 | 
274 | ### CreateTempViewUsing
275 | 
276 | ### DescribeColumnCommand
277 | 
278 | ```sql 
279 |  DESCRIBE [EXTENDED|FORMATTED] table_name column_name;
280 | ```
281 | 
282 | ### DescribeDatabaseCommand
283 | 
284 | ```sql
285 | DESCRIBE DATABASE [EXTENDED] db_name
286 | ```
287 | 
288 | ### DescribeFunctionCommand
289 | 
290 | ```sql
291 | DESCRIBE FUNCTION [EXTENDED] upper;
292 | ```
293 | 
294 | ### DescribeTableCommand
295 | 
296 | ```sql
297 | DESCRIBE [EXTENDED|FORMATTED] table_name partitionSpec?;
298 | ```
299 | 
300 | ### DropDatabaseCommand
301 | 
302 | ```sql
303 | DROP DATABASE [IF EXISTS] database_name [RESTRICT|CASCADE];
304 | ```
305 | 
306 | 
307 | ### DropFunctionCommand
308 | 
309 | ```sql
310 | DROP TEMPORARY FUNCTION helloworld;
311 | DROP TEMPORARY FUNCTION IF EXISTS helloworld;
312 | DROP FUNCTION hello.world;
313 | DROP FUNCTION IF EXISTS hello.world;
314 | ```
315 | 
316 | 
317 | ### DropTableCommand
318 | 
319 | ```sql
320 | DROP TABLE [IF EXISTS] table_name;
321 | DROP VIEW [IF EXISTS] [db_name.]view_name;
322 | ```
323 | 
324 | ### ExplainCommand
325 | 
326 | ```sql
327 | EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
328 | ```
329 | 
330 | ### InsertIntoDataSourceCommand
331 | 
332 | ### InsertIntoDataSourceDirCommand
333 | 
334 | ```sql
335 | INSERT OVERWRITE DIRECTORY (path=STRING)?
336 |    USING format OPTIONS ([option1_name "option1_value", option2_name "option2_value", ...])
337 |    SELECT ...
338 | ```
339 | 
340 | ### InsertIntoHadoopFsRelationCommand
341 | 
342 | 
343 | ### InsertIntoHiveDirCommand
344 | ```sql
345 | INSERT OVERWRITE [LOCAL] DIRECTORY
346 |    path
347 |    [ROW FORMAT row_format]
348 |    [STORED AS file_format]
349 |    SELECT ...
350 | ```
351 | 
352 | ### LoadDataCommand
353 | ```sql
354 | LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename
355 |   [PARTITION (partcol1=val1, partcol2=val2 ...)]
356 | ```
357 | 
358 | 
359 | ### SaveIntoDataSourceCommand
360 | 
361 | ### SetCommand
362 | ```sql
363 | set key = value;
364 | set -v;
365 | set;
366 | ```
367 | 
368 | ### SetDatabaseCommand
369 | 
370 | ```sql
371 | USE database_name;
372 | ```
373 | 
374 | ### ShowCreateTableCommand
375 | 
376 | ```sql
377 | SHOW CREATE TABLE t;
378 | ```
379 | 
380 | ### ShowColumnsCommand
381 | ```sql
382 | SHOW COLUMNS (FROM | IN) table_identifier [(FROM | IN) database];
383 | ```
384 | 
385 | ### ShowDatabasesCommand
386 | ```sql
387 | SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
388 | ```
389 | 
390 | ### ShowFunctionsCommand
391 | ```sql
392 | SHOW FUNCTIONS [LIKE pattern]
393 | ```
394 | 
395 | ### ShowPartitionsCommand
396 | ```sql
397 | SHOW PARTITIONS [db_name.]table_name [PARTITION(partition_spec)];
398 | ```
399 | 
400 | ### ShowTablesCommand
401 | ```sql
402 | SHOW TABLES [(IN|FROM) database_name] [[LIKE] 'identifier_with_wildcards'];
403 | ```
404 | 
405 | ### ShowTablePropertiesCommand
406 | ```sql
407 | SHOW TBLPROPERTIES table_name[('propertyKey')];
408 | ```
409 | 
410 | 
411 | ### TruncateTableCommand
412 | 
413 | ```sql
414 | TRUNCATE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
415 | ```
416 | 
417 | 
418 | ### UncacheTableCommand
419 | 
420 | ```sql
421 | uncache table t;
422 | ```
423 | 
424 | ### Ignored
425 | 
426 | ```
427 | // AddFileCommand
428 | // AddJarCommand
429 | // ...
430 | 
431 | ```
432 | 
433 | ## Projection
434 | 
435 | ## LeafNode
436 | 
437 | ## UnaryNode
438 | 
439 | ## BinaryNode
440 | 
441 | ## Union
442 | 
443 | 


--------------------------------------------------------------------------------
/docs/ranger_authorization.md:
--------------------------------------------------------------------------------
 1 | # Spark SQL Ranger Security Support Guide
 2 | 
 3 | Ranger security support is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
 4 | 
 5 | Ranger is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform. The [[spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore
 6 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together.
 7 | 
 8 | ## Ranger Security Support
 9 | 
10 | |Configuration| Configuration File|Example| Descriptions |   
11 | |---|---|---|---|   
12 | |ranger.plugin.hive.policy.rest.url|ranger-hive-security.xml| http://ranger.admin.one:6080,http://ranger.admin.two.lt.163.org:6080| Comma separated list of ranger admin address|    
13 | |ranger.plugin.hive.service.name|ranger-hive-security.xml||Name of the Ranger service containing policies for this YARN instance|    
14 | |ranger.plugin.hive.policy.cache.dir|ranger-hive-security.xml|policycache| local cache directory for ranger policy caches|
15 | 
16 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set.
17 | 
18 | ## Configure Hive Metastore Client Side
19 | ```xml
20 |  <!-- Ranger -->
21 |     <property>
22 |         <name>hive.security.authorization.enabled</name>
23 |         <value>true</value>
24 |     </property>
25 |     <property>
26 |         <name>hive.security.authorization.manager</name>
27 |         <value>org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizerFactory</value>
28 |     </property>
29 |     <property>
30 |         <name>hive.security.authenticator.manager</name>
31 |         <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
32 |     </property>
33 |     <property>
34 |         <name>hive.conf.restricted.list</name>
35 |         <value>
36 |             hive.security.authorization.enabled,hive.security.authorization.manager,hive.security.authenticator.manager
37 |         </value>
38 |     </property>
39 | ```  
40 | 
41 | Add configurations above in `$SPARK_HOME/conf/hive-site.xml` to enable Ranger security support.
42 | 
43 | ## Ranger Audit Support
44 | 
45 | All access to Spark SQL/Hive tables that is authorized by Ranger is automatically audited by Ranger. Auditing destination of HDFS or Solr etc is supported. 
46 | 
47 | |Configuration| Configuration File|Example| Descriptions |   
48 | |---|---|---|---|
49 | |xasecure.audit.is.enabled|ranger-hive-audit.xml|false|When true, auditing is enabled|
50 | |xasecure.audit.jpa.javax.persistence.jdbc.driver|ranger-hive-audit.xml|com.mysql.jdbc.Driver|jdbc driver for audit to a mysql database destination|
51 | |xasecure.audit.jpa.javax.persistence.jdbc.url|ranger-hive-audit.xml| jdbc:mysql://address/dbname|database instance auditing to|    
52 | |xasecure.audit.jpa.javax.persistence.jdbc.user|ranger-hive-audit.xml|*username*|user name|     
53 | |xasecure.audit.jpa.javax.persistence.jdbc.password|ranger-hive-audit.xml|*Password*| Password|
54 | 
55 | Create `ranger-hive-security.xml` in `$SPARK_HOME/conf` with configurations above properly set to enable or disable auditing.
56 | 
57 | 
58 | ## Install `ranger-hive-plugin` for Spark SQL
59 | 
60 | Please refer to the [​Install and Enable Ranger Hive Plugin](https://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.6.4/bk_command-line-installation/content/ch14s05s03s02.html) for an overview on how to setup Ranger jars for Spark SQL.
61 | 
62 | 


--------------------------------------------------------------------------------
/docs/spark_sql_authorization.md:
--------------------------------------------------------------------------------
 1 | # ACL Management for Spark SQL
 2 | 
 3 | Three primary modes for Spark SQL authorization are available by spark-authorizer:
 4 | 
 5 | ## Storage-Based Authorization
 6 |     
 7 | Enabling Storage Based Authorization in the `Hive Metastore Server` uses the HDFS permissions to act as the main source for verification and allows for consistent data and metadata authorization policy. This allows control over metadata access by verifying if the user has permission to access corresponding directories on the HDFS. Similar with `HiveServer2`, files and directories will be tanslated into hive metadata objects, such as dbs, tables, partitions, and be protected from end user's queries through Spark SQL as a service like [Kyuubi](https://github.com/yaooqinn/kyuubi), livy etc.
 8 | 
 9 | Storage-Based Authorization offers users with Database, Table and Partition-level coarse-gained access control.
10 | 
11 | Please refer to the [Storage-Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/storage_based_authorization.html) in the online documentation for an overview on how to configure Storage-Based Authorization for Spark SQL.
12 | 
13 | ## SQL-Standard Based Authorization
14 | 
15 | Enabling SQL-Standard Based Authorization gives users more fine-gained control over access comparing with Storage Based Authorization. Besides of the ability of Storage Based Authorization,  SQL-Standard Based Authorization can improve it to Views and Column-level. Unfortunately, Spark SQL does not support grant/revoke statements which controls access, this might be done only through the  HiveServer2. But it's gratifying that [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) makes Spark SQL be able to understand this fine-grain access control granted or revoked by Hive.
16 | 
17 | For Spark SQL Client users who can directly acess HDFS, the SQL-Standard Based Authorization can be easily bypassed.
18 | 
19 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage information is preserved from end users.
20 | 
21 | Please refer to the [SQL-Standard Based Authorization Guide](https://yaooqinn.github.io/spark-authorizer/docs/sql_std_based_authorization.html) in the online documentation for an overview on how to configure SQL-Standard Based Authorization for Spark SQL.
22 | 
23 | ## Ranger Security Support 
24 | 
25 | [Apache Ranger](https://ranger.apache.org/)  is a framework to enable, monitor and manage comprehensive data security across the Hadoop platform but end before Spark or Spark SQL. The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) enables Spark SQL with control access ability reusing [Ranger Plugin for Hive MetaStore
26 | ](https://cwiki.apache.org/confluence/display/RANGER/Ranger+Plugin+for+Hive+MetaStore). [Apache Ranger](https://ranger.apache.org/) makes the scope of existing SQL-Standard Based Authorization expanded but without supporting Spark SQL. [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) sticks them together.
27 | 
28 | Please refer to the [Spark SQL Ranger Security Support Guide](https://yaooqinn.github.io/spark-authorizer/docs/ranger_authorization.html) in the online documentation for an overview on how to configure Ranger for Spark SQL.
29 | 


--------------------------------------------------------------------------------
/docs/sql_std_based_authorization.md:
--------------------------------------------------------------------------------
 1 | # Spark SQL SQL-Standard Based Authorization Guide
 2 | 
 3 | [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) is one of the available Authorization methods for Spark SQL with [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
 4 | 
 5 | The [spark-authorizer](https://github.com/yaooqinn/spark-authorizer) can enable Spark SQL with Hive's [SQL-Standard Based Authorization](https://cwiki.apache.org/confluence/display/Hive/SQL+Standard+Based+Hive+Authorization) for fine grained access control. It is based on the SQL standard for authorization, and uses the familiar GRANT/REVOKE statements to control access.
 6 | 
 7 | For Spark SQL Client users who can directly access HDFS, the SQL-Standards Based Authorization can be easily bypassed.
 8 | 
 9 | With [Kyuubi](https://github.com/yaooqinn/kyuubi), the SQL-Standard Based Authorization is guaranteed for the security configurations, metadata, and storage informations are preserved from end users. It can be used along with storage based authorization on the `Hive Metastore Server`.
10 | 
11 | ## Minimum Permissions
12 | 
13 | The following table shows the minimum permissions required for Spark SQL when using SQL-Standard Based Authorization:
14 | 
15 | Operation |  Permission required
16 | ---|---
17 | create table  |  ownership of database
18 | drop table  | ownership
19 | describe table | select
20 | show partitions | select
21 | alter table location  |  ownership; URI privilege: RWX permission + ownership (for new location)
22 | alter partition location  |  ownership; URI privilege: RWX permission + ownership (for new partition location)
23 | alter table add partition  | insert; URI privilege: RWX permission + ownership (for partition location)
24 | alter table drop partition | delete
25 | alter table (all of them except the ones listed above) | ownership
26 | truncate table | ownership
27 | create view | select "with grant option"
28 | alter view properties  | ownership
29 | alter view rename   | ownership
30 | drop view  | ownership
31 | analyze Table |  select and insert
32 | show columns   | select
33 | show table properties  | Select
34 | CTAS | select (of input) and ownership (of database)
35 | select | select
36 | insert | insert and delete (for overwrite)
37 | delete | delete
38 | load  |  insert (output); delete (output); URI privilege: RWX permission + ownership (input location)
39 | show create table  | select "with grant option"
40 | create function | admin
41 | drop function  | admin
42 | MSCK |admin
43 | alter database|  admin
44 | create database | URI privilege: RWX permission + ownership (if custom location specified)
45 | explain | select
46 | drop database |  ownership
47 | 
48 | 
49 | ## Privileges
50 | Select - READ access to an object  
51 | Insert - access to ADD data to an object (table)  
52 | Update - access to run UPDATE queries on an object (table)  
53 | Delete - access to DELETE data in an object (table)  
54 | All Privileges - includes all above privileges
55 | 
56 | 
57 | ## Limitations
58 | 
59 | Spark SQL does not support grant/revoke statement, which might be done only in Hive.
60 | 


--------------------------------------------------------------------------------
/docs/storage_based_authorization.md:
--------------------------------------------------------------------------------
 1 | # Spark SQL Storage-Based Authorization Guide
 2 | 
 3 | Storage-Based Authorization is one of the available Authorization methods for Spark SQL with or without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer).
 4 | 
 5 | When the `Hive Metastore Server` is configured to use Storage-Based Authorization, it uses the file system permissions for directories corresponding to the different kinds of metadata objects as the source of verification for the authorization policy. Using this authorization method is recommended in the metastore server.
 6 | 
 7 | With Hive Metastore as the external catalog for Spark SQL, there is a corresponding directory to a database or table for each file system that is used at storage layer. Using this authorization model, the rwx permissions for this directory also determines the permissions of a user, or group, to the database or table.
 8 | 
 9 | With Hive 0.14 or onwards as Spark SQL's metastore client, this could be enabled without [spark-authorizer](https://github.com/yaooqinn/spark-authorizer)'s support.
10 | 
11 | 
12 | ## Configuring Parameters for Storage-Based Authorization
13 | 
14 | ### Hive Metastore Server Side
15 | 
16 | To enable Storage-based Authorization in the Hive metastore server, configure these properties in the `hive-site.xml` for the server.
17 | 
18 | Configuration| Description
19 | ---|---
20 | `hive.metastore.pre.event.listeners` | This parameter enables metastore security. Set to `org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener`
21 | `hive.security.metastore.authenticator.manager`  | Set to `org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator`
22 | `hive.security.metastore.authorization.auth.reads`   | When true, Hive metastore authorization checks for read access.
23 | `hive.security.metastore.authorization.manager`  | A comma-separated list of the names of authorization manager classes that are to be used in the metastore for authorization. Set to `org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider`
24 | 
25 | ### Hive Metastore Client Side
26 | 
27 | This could be Spark SQL Client, Spark Thrift Server, HiveServer2, Kyuubi etc. Configure these properties in the `hive-site.xml` for the client.
28 | 
29 | Configuration| Description
30 | ---|---
31 | `hive.security.authorization.enabled` | Enables or disables authorization. In the Advanced hiveserver-site section, change the value to true to enable authorization for HiveServer2. In the General section, set this value to false.
32 | `hive.security.authorization.manager` | The class name of the Hive client authorization manager. For storage-based authorization, specify the value `org.apache.hadoop.hive.ql.security. authorization.StorageBasedAuthorizationProvider`
33 | 
34 | ## Minimum Permissions
35 | 
36 | The following table shows the minimum permissions required for Spark SQL when using Storage-Based Authorization:
37 | 
38 | Operation |  Permission required
39 | ---|---
40 | alter table | table write access
41 | create table |   database write access
42 | CTAS |  table read access
43 | load   | table write access
44 | select | table read access
45 | show tables | database read access
46 | 
47 | 
48 | ## Limitations
49 | 
50 | Spark SQL does not support grant/revoke statement, which might be done only in Hive.
51 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
  4 |   ~ contributor license agreements.  See the NOTICE file distributed with
  5 |   ~ this work for additional information regarding copyright ownership.
  6 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
  7 |   ~ (the "License"); you may not use this file except in compliance with
  8 |   ~ the License.  You may obtain a copy of the License at
  9 |   ~
 10 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 11 |   ~
 12 |   ~ Unless required by applicable law or agreed to in writing, software
 13 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
 14 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |   ~ See the License for the specific language governing permissions and
 16 |   ~ limitations under the License.
 17 |   -->
 18 | 
 19 | <project xmlns="http://maven.apache.org/POM/4.0.0"
 20 |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 21 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 22 |     <modelVersion>4.0.0</modelVersion>
 23 | 
 24 |     <groupId>yaooqinn</groupId>
 25 |     <artifactId>spark-authorizer</artifactId>
 26 |     <name>Spark Authorizer</name>
 27 |     <version>2.1.1</version>
 28 |     <packaging>jar</packaging>
 29 | 
 30 |     <licenses>
 31 |         <license>
 32 |             <name>The Apache Software License, Version 2.0</name>
 33 |             <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 34 |             <distribution>manual</distribution>
 35 |         </license>
 36 |     </licenses>
 37 | 
 38 |     <developers>
 39 |         <developer>
 40 |             <id>yaooqinn</id>
 41 |             <name>Kent Yao</name>
 42 |             <email>hzyaoqin@corp.netease.com</email>
 43 |             <organization>yaooqinn</organization>
 44 |             <url>http://www.jianshu.com/u/14effefbbf32</url>
 45 |         </developer>
 46 |     </developers>
 47 | 
 48 |     <properties>
 49 |         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 50 |         <scala.version>2.11.8</scala.version>
 51 |         <scala.binary.version>2.11</scala.binary.version>
 52 |         <scalatest.version>2.2.6</scalatest.version>
 53 |         <spark.group>org.apache.spark</spark.group>
 54 |         <spark.version>2.2.2</spark.version>
 55 |         <spark.scope>provided</spark.scope>
 56 |         <hive.group>org.spark-project.hive</hive.group>
 57 |         <hive.version>1.2.1.spark2</hive.version>
 58 |         <java.version>1.7</java.version>
 59 |     </properties>
 60 | 
 61 |     <repositories>
 62 |         <repository>
 63 |             <id>central</id>
 64 |             <!-- This should be at top, it makes maven try the central repo first and then others and hence faster dep resolution -->
 65 |             <name>Maven Repository</name>
 66 |             <url>https://repo.maven.apache.org/maven2</url>
 67 |             <releases>
 68 |                 <enabled>true</enabled>
 69 |             </releases>
 70 |             <snapshots>
 71 |                 <enabled>false</enabled>
 72 |             </snapshots>
 73 |         </repository>
 74 |         <repository>
 75 |             <id>apache</id>
 76 |             <name>Apache Repository Snapshots</name>
 77 |             <url>http://repository.apache.org/snapshots</url>
 78 |             <releases>
 79 |                 <enabled>false</enabled>
 80 |             </releases>
 81 |             <snapshots>
 82 |                 <enabled>true</enabled>
 83 |                 <updatePolicy>daily</updatePolicy>
 84 |                 <checksumPolicy>warn</checksumPolicy>
 85 |             </snapshots>
 86 |         </repository>
 87 |     </repositories>
 88 |     <pluginRepositories>
 89 |         <pluginRepository>
 90 |             <id>central</id>
 91 |             <url>https://repo.maven.apache.org/maven2</url>
 92 |             <releases>
 93 |                 <enabled>true</enabled>
 94 |             </releases>
 95 |             <snapshots>
 96 |                 <enabled>false</enabled>
 97 |             </snapshots>
 98 |         </pluginRepository>
 99 |         <pluginRepository>
100 |             <id>apache</id>
101 |             <name>Apache Repository Snapshots</name>
102 |             <url>http://repository.apache.org/snapshots</url>
103 |             <releases>
104 |                 <enabled>false</enabled>
105 |             </releases>
106 |             <snapshots>
107 |                 <enabled>true</enabled>
108 |                 <updatePolicy>daily</updatePolicy>
109 |                 <checksumPolicy>warn</checksumPolicy>
110 |             </snapshots>
111 |         </pluginRepository>
112 |     </pluginRepositories>
113 | 
114 |     <dependencies>
115 |         <dependency>
116 |             <groupId>org.scala-lang</groupId>
117 |             <artifactId>scala-library</artifactId>
118 |             <version>${scala.version}</version>
119 |             <scope>provided</scope>
120 |         </dependency>
121 | 
122 |         <dependency>
123 |             <groupId>${spark.group}</groupId>
124 |             <artifactId>spark-sql_${scala.binary.version}</artifactId>
125 |             <version>${spark.version}</version>
126 |             <scope>${spark.scope}</scope>
127 |         </dependency>
128 | 
129 |         <dependency>
130 |             <groupId>${spark.group}</groupId>
131 |             <artifactId>spark-hive_${scala.binary.version}</artifactId>
132 |             <version>${spark.version}</version>
133 |             <scope>${spark.scope}</scope>
134 |             <exclusions>
135 |                 <exclusion>
136 |                     <groupId>*</groupId>
137 |                     <artifactId>*</artifactId>
138 |                 </exclusion>
139 |             </exclusions>
140 |         </dependency>
141 |         <dependency>
142 |             <groupId>${hive.group}</groupId>
143 |             <artifactId>hive-exec</artifactId>
144 |             <version>${hive.version}</version>
145 |             <scope>provided</scope>
146 |         </dependency>
147 | 
148 |         <!-- Dependencies For Suites -->
149 |         <dependency>
150 |             <groupId>org.scalatest</groupId>
151 |             <artifactId>scalatest_${scala.binary.version}</artifactId>
152 |             <version>${scalatest.version}</version>
153 |             <scope>test</scope>
154 |         </dependency>
155 | 
156 |         <dependency>
157 |             <groupId>${spark.group}</groupId>
158 |             <artifactId>spark-core_${scala.binary.version}</artifactId>
159 |             <version>${spark.version}</version>
160 |             <type>test-jar</type>
161 |             <scope>test</scope>
162 |         </dependency>
163 | 
164 |         <dependency>
165 |             <groupId>${spark.group}</groupId>
166 |             <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
167 |             <version>${spark.version}</version>
168 |             <type>test-jar</type>
169 |             <scope>test</scope>
170 |         </dependency>
171 |         <dependency>
172 |             <groupId>${spark.group}</groupId>
173 |             <artifactId>spark-sql_${scala.binary.version}</artifactId>
174 |             <version>${spark.version}</version>
175 |             <type>test-jar</type>
176 |             <scope>test</scope>
177 |         </dependency>
178 | 
179 |     </dependencies>
180 | 
181 |     <build>
182 |         <plugins>
183 |             <plugin>
184 |                 <groupId>net.alchim31.maven</groupId>
185 |                 <artifactId>scala-maven-plugin</artifactId>
186 |                 <version>3.2.2</version>
187 |                 <executions>
188 |                     <execution>
189 |                         <id>eclipse-add-source</id>
190 |                         <goals>
191 |                             <goal>add-source</goal>
192 |                         </goals>
193 |                     </execution>
194 |                     <execution>
195 |                         <id>scala-compile-first</id>
196 |                         <goals>
197 |                             <goal>compile</goal>
198 |                         </goals>
199 |                     </execution>
200 |                     <execution>
201 |                         <id>scala-test-compile-first</id>
202 |                         <goals>
203 |                             <goal>testCompile</goal>
204 |                         </goals>
205 |                     </execution>
206 |                 </executions>
207 |                 <configuration>
208 |                     <scalaVersion>${scala.version}</scalaVersion>
209 |                     <recompileMode>incremental</recompileMode>
210 |                     <useZincServer>true</useZincServer>
211 |                     <args>
212 |                         <arg>-unchecked</arg>
213 |                         <arg>-deprecation</arg>
214 |                         <arg>-feature</arg>
215 |                         <arg>-explaintypes</arg>
216 |                         <arg>-Yno-adapted-args</arg>
217 |                     </args>
218 |                     <jvmArgs>
219 |                         <jvmArg>-Xms1024m</jvmArg>
220 |                         <jvmArg>-Xmx1024m</jvmArg>
221 |                         <jvmArg>-XX:ReservedCodeCacheSize=512M</jvmArg>
222 |                     </jvmArgs>
223 |                     <javacArgs>
224 |                         <javacArg>-source</javacArg>
225 |                         <javacArg>${java.version}</javacArg>
226 |                         <javacArg>-target</javacArg>
227 |                         <javacArg>${java.version}</javacArg>
228 |                         <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
229 |                     </javacArgs>
230 |                 </configuration>
231 |             </plugin>
232 | 
233 |             <!-- disable surefire -->
234 |             <plugin>
235 |                 <groupId>org.apache.maven.plugins</groupId>
236 |                 <artifactId>maven-surefire-plugin</artifactId>
237 |                 <version>2.12.4</version>
238 |                 <configuration>
239 |                     <skipTests>true</skipTests>
240 |                 </configuration>
241 |             </plugin>
242 |             <!-- enable scalatest -->
243 |             <plugin>
244 |                 <groupId>org.scalatest</groupId>
245 |                 <artifactId>scalatest-maven-plugin</artifactId>
246 |                 <version>1.0</version>
247 |                 <configuration>
248 |                     <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
249 |                     <junitxml>.</junitxml>
250 |                     <filereports>TestSuite.txt</filereports>
251 |                 </configuration>
252 |                 <executions>
253 |                     <execution>
254 |                         <id>test</id>
255 |                         <goals>
256 |                             <goal>test</goal>
257 |                         </goals>
258 |                     </execution>
259 |                 </executions>
260 |             </plugin>
261 |         </plugins>
262 |     </build>
263 | 
264 |     <profiles>
265 |         <profile>
266 |             <id>spark-2.2</id>
267 |             <properties>
268 |                 <spark.version>2.2.2</spark.version>
269 |                 <scalatest.version>2.2.6</scalatest.version>
270 |             </properties>
271 |         </profile>
272 | 
273 |         <profile>
274 |             <id>spark-2.3</id>
275 |             <properties>
276 |                 <spark.version>2.3.2</spark.version>
277 |                 <scalatest.version>3.0.3</scalatest.version>
278 |             </properties>
279 |         </profile>
280 |     </profiles>
281 | </project>


--------------------------------------------------------------------------------
/scalastyle-config.xml:
--------------------------------------------------------------------------------
  1 | <!--
  2 |   ~ Licensed to the Apache Software Foundation (ASF) under one or more
  3 |   ~ contributor license agreements.  See the NOTICE file distributed with
  4 |   ~ this work for additional information regarding copyright ownership.
  5 |   ~ The ASF licenses this file to You under the Apache License, Version 2.0
  6 |   ~ (the "License"); you may not use this file except in compliance with
  7 |   ~ the License.  You may obtain a copy of the License at
  8 |   ~
  9 |   ~    http://www.apache.org/licenses/LICENSE-2.0
 10 |   ~
 11 |   ~ Unless required by applicable law or agreed to in writing, software
 12 |   ~ distributed under the License is distributed on an "AS IS" BASIS,
 13 |   ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |   ~ See the License for the specific language governing permissions and
 15 |   ~ limitations under the License.
 16 |   -->
 17 | <!--
 18 | 
 19 | If you wish to turn off checking for a section of code, you can put a comment in the source
 20 | before and after the section, with the following syntax:
 21 | 
 22 |   // scalastyle:off
 23 |   ...  // stuff that breaks the styles
 24 |   // scalastyle:on
 25 | 
 26 | You can also disable only one rule, by specifying its rule id, as specified in:
 27 |   http://www.scalastyle.org/rules-0.7.0.html
 28 | 
 29 |   // scalastyle:off no.finalize
 30 |   override def finalize(): Unit = ...
 31 |   // scalastyle:on no.finalize
 32 | 
 33 | This file is divided into 3 sections:
 34 |  (1) rules that we enforce.
 35 |  (2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
 36 |      (or we need to make the scalastyle rule more configurable).
 37 |  (3) rules that we don't want to enforce.
 38 | -->
 39 | 
 40 | <scalastyle>
 41 |   <name>Scalastyle standard configuration</name>
 42 | 
 43 |   <!-- ================================================================================ -->
 44 |   <!--                               rules we enforce                                   -->
 45 |   <!-- ================================================================================ -->
 46 | 
 47 |   <check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
 48 | 
 49 |   <check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
 50 |     <parameters>
 51 |        <parameter name="header"><![CDATA[/*
 52 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 53 |  * contributor license agreements.  See the NOTICE file distributed with
 54 |  * this work for additional information regarding copyright ownership.
 55 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 56 |  * (the "License"); you may not use this file except in compliance with
 57 |  * the License.  You may obtain a copy of the License at
 58 |  *
 59 |  *    http://www.apache.org/licenses/LICENSE-2.0
 60 |  *
 61 |  * Unless required by applicable law or agreed to in writing, software
 62 |  * distributed under the License is distributed on an "AS IS" BASIS,
 63 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 64 |  * See the License for the specific language governing permissions and
 65 |  * limitations under the License.
 66 |  */]]></parameter>
 67 |     </parameters>
 68 |   </check>
 69 | 
 70 |   <check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
 71 | 
 72 |   <check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
 73 | 
 74 |   <check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
 75 | 
 76 |   <check level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
 77 |     <parameters>
 78 |       <parameter name="maxLineLength"><![CDATA[100]]></parameter>
 79 |       <parameter name="tabSize"><![CDATA[2]]></parameter>
 80 |       <parameter name="ignoreImports">true</parameter>
 81 |     </parameters>
 82 |   </check>
 83 | 
 84 |   <check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
 85 |     <parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
 86 |   </check>
 87 | 
 88 |   <check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
 89 |     <parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
 90 |   </check>
 91 | 
 92 |   <check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
 93 |     <parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
 94 |   </check>
 95 | 
 96 |   <check level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
 97 |     <parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
 98 |   </check>
 99 | 
100 |   <check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
101 | 
102 |   <check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
103 | 
104 |   <check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
105 | 
106 |   <check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
107 | 
108 |   <check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
109 |     <parameters>
110 |       <parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
111 |       <parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
112 |     </parameters>
113 |   </check>
114 | 
115 |   <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
116 | 
117 |   <check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
118 | 
119 |   <check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
120 | 
121 |   <check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
122 | 
123 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
124 |    <parameters>
125 |      <parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
126 |    </parameters>
127 |   </check>
128 | 
129 |   <check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
130 |     <parameters>
131 |      <parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
132 |     </parameters>
133 |   </check>
134 | 
135 |   <!-- ??? usually shouldn't be checked into the code base. -->
136 |   <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
137 | 
138 |   <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
139 |   <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
140 |     <parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
141 |     <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
142 |   </check>
143 | 
144 |   <!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
145 |   <check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
146 |     <parameters><parameter name="regex">^println$</parameter></parameters>
147 |     <customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
148 |       // scalastyle:off println
149 |       println(...)
150 |       // scalastyle:on println]]></customMessage>
151 |   </check>
152 | 
153 |   <check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
154 |     <parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
155 |     <customMessage><![CDATA[
156 |       @VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
157 |     ]]></customMessage>
158 |   </check>
159 | 
160 |   <check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
161 |     <parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
162 |     <customMessage><![CDATA[
163 |       Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
164 |       ShutdownHookManager.addShutdownHook instead.
165 |       If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
166 |       // scalastyle:off runtimeaddshutdownhook
167 |       Runtime.getRuntime.addShutdownHook(...)
168 |       // scalastyle:on runtimeaddshutdownhook
169 |     ]]></customMessage>
170 |   </check>
171 | 
172 |   <check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
173 |     <parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
174 |     <customMessage><![CDATA[
175 |       Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
176 |       java.util.concurrent.ConcurrentLinkedQueue instead.
177 |       If you must use mutable.SynchronizedBuffer, wrap the code block with
178 |       // scalastyle:off mutablesynchronizedbuffer
179 |       mutable.SynchronizedBuffer[...]
180 |       // scalastyle:on mutablesynchronizedbuffer
181 |     ]]></customMessage>
182 |   </check>
183 | 
184 |   <check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
185 |     <parameters><parameter name="regex">Class\.forName</parameter></parameters>
186 |     <customMessage><![CDATA[
187 |       Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
188 |       If you must use Class.forName, wrap the code block with
189 |       // scalastyle:off classforname
190 |       Class.forName(...)
191 |       // scalastyle:on classforname
192 |     ]]></customMessage>
193 |   </check>
194 | 
195 |   <check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
196 |     <parameters><parameter name="regex">Await\.result</parameter></parameters>
197 |     <customMessage><![CDATA[
198 |       Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
199 |       If you must use Await.result, wrap the code block with
200 |       // scalastyle:off awaitresult
201 |       Await.result(...)
202 |       // scalastyle:on awaitresult
203 |     ]]></customMessage>
204 |   </check>
205 | 
206 |   <check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
207 |     <parameters><parameter name="regex">Await\.ready</parameter></parameters>
208 |     <customMessage><![CDATA[
209 |       Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
210 |       If you must use Await.ready, wrap the code block with
211 |       // scalastyle:off awaitready
212 |       Await.ready(...)
213 |       // scalastyle:on awaitready
214 |     ]]></customMessage>
215 |   </check>
216 | 
217 |   <!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
218 |   <check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
219 |     <parameters><parameter name="regex">JavaConversions</parameter></parameters>
220 |     <customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
221 |     scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
222 |   </check>
223 | 
224 |   <check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
225 |     <parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
226 |     <customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
227 |     of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
228 |   </check>
229 | 
230 |   <check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
231 |     <parameters><parameter name="regex">extractOpt</parameter></parameters>
232 |     <customMessage>Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
233 |     is slower.  </customMessage>
234 |   </check>
235 | 
236 |   <check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
237 |     <parameters>
238 |       <parameter name="groups">java,scala,3rdParty,spark</parameter>
239 |       <parameter name="group.java">javax?\..*</parameter>
240 |       <parameter name="group.scala">scala\..*</parameter>
241 |       <parameter name="group.3rdParty">(?!org\.apache\.spark\.).*</parameter>
242 |       <parameter name="group.spark">org\.apache\.spark\..*</parameter>
243 |     </parameters>
244 |   </check>
245 | 
246 |   <check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
247 |     <parameters>
248 |       <parameter name="tokens">COMMA</parameter>
249 |     </parameters>
250 |   </check>
251 | 
252 |   <!-- SPARK-3854: Single Space between ')' and '{' -->
253 |   <check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
254 |     <parameters><parameter name="regex">\)\{</parameter></parameters>
255 |     <customMessage><![CDATA[
256 |       Single Space between ')' and `{`.
257 |     ]]></customMessage>
258 |   </check>
259 | 
260 |   <check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
261 |     <parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1  [*]</parameter></parameters>
262 |     <customMessage>Use Javadoc style indentation for multiline comments</customMessage>
263 |   </check>
264 | 
265 |   <check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
266 |     <parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
267 |     <customMessage>Omit braces in case clauses.</customMessage>
268 |   </check>
269 | 
270 |   <!-- SPARK-16877: Avoid Java annotations -->
271 |   <check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
272 | 
273 |   <check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
274 | 
275 |   <!-- ================================================================================ -->
276 |   <!--       rules we'd like to enforce, but haven't cleaned up the codebase yet        -->
277 |   <!-- ================================================================================ -->
278 | 
279 |   <!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
280 |   <!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
281 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
282 |   <check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
283 | 
284 |   <!-- This breaks symbolic method names so we don't turn it on. -->
285 |   <!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
286 |   <check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
287 |     <parameters>
288 |     <parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
289 |     </parameters>
290 |   </check>
291 | 
292 |   <!-- Should turn this on, but we have a few places that need to be fixed first -->
293 |   <check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
294 | 
295 |   <!-- ================================================================================ -->
296 |   <!--                               rules we don't want                                -->
297 |   <!-- ================================================================================ -->
298 | 
299 |   <check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
300 |     <parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
301 |   </check>
302 | 
303 |   <!-- We want the opposite of this: NewLineAtEofChecker -->
304 |   <check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
305 | 
306 |   <!-- This one complains about all kinds of random things. Disable. -->
307 |   <check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
308 | 
309 |   <!-- We use return quite a bit for control flows and guards -->
310 |   <check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
311 | 
312 |   <!-- We use null a lot in low level code and to interface with 3rd party code -->
313 |   <check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
314 | 
315 |   <!-- Doesn't seem super big deal here ... -->
316 |   <check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
317 | 
318 |   <!-- Doesn't seem super big deal here ... -->
319 |   <check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
320 |     <parameters><parameter name="maxFileLength">800></parameter></parameters>
321 |   </check>
322 | 
323 |   <!-- Doesn't seem super big deal here ... -->
324 |   <check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
325 |     <parameters><parameter name="maxTypes">30</parameter></parameters>
326 |   </check>
327 | 
328 |   <!-- Doesn't seem super big deal here ... -->
329 |   <check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
330 |     <parameters><parameter name="maximum">10</parameter></parameters>
331 |   </check>
332 | 
333 |   <!-- Doesn't seem super big deal here ... -->
334 |   <check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
335 |     <parameters><parameter name="maxLength">50</parameter></parameters>
336 |   </check>
337 | 
338 |   <!-- Not exactly feasible to enforce this right now. -->
339 |   <!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
340 |   <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
341 |     <parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
342 |   </check>
343 | 
344 |   <!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
345 |   <check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
346 |     <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
347 |   </check>
348 | 
349 | </scalastyle>
350 | 


--------------------------------------------------------------------------------
/src/main/scala/com/githup/yaooqinn/spark/authorizer/Logging.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package com.githup.yaooqinn.spark.authorizer
19 | 
20 | import org.slf4j.LoggerFactory
21 | 
22 | trait Logging {
23 | 
24 |   lazy val logger = LoggerFactory.getLogger(this.getClass)
25 | 
26 |   def trace(message: => Any): Unit = {
27 |     if (logger.isTraceEnabled) {
28 |       logger.trace(message.toString)
29 |     }
30 |   }
31 | 
32 |   def debug(message: => Any): Unit = {
33 |     if (logger.isDebugEnabled) {
34 |       logger.debug(message.toString)
35 |     }
36 |   }
37 | 
38 |   def info(message: => Any): Unit = {
39 |     if (logger.isInfoEnabled) {
40 |       logger.info(message.toString)
41 |     }
42 |   }
43 | 
44 |   def warn(message: => Any): Unit = {
45 |     logger.warn(message.toString)
46 |   }
47 | 
48 |   def warn(message: => Any, t: Throwable): Unit = {
49 |     logger.warn(message.toString, t)
50 |   }
51 | 
52 |   def error(message: => Any, t: Throwable): Unit = {
53 |     logger.error(message.toString, t)
54 |   }
55 | 
56 |   def error(message: => Any): Unit = {
57 |     logger.error(message.toString)
58 |   }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkSQLExtension.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  *   contributor license agreements.  See the NOTICE file distributed with
 4 |  *   this work for additional information regarding copyright ownership.
 5 |  *   The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  *   (the "License"); you may not use this file except in compliance with
 7 |  *   the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *       http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  *   Unless required by applicable law or agreed to in writing, software
12 |  *   distributed under the License is distributed on an "AS IS" BASIS,
13 |  *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  *   See the License for the specific language governing permissions and
15 |  *   limitations under the License.
16 |  */
17 | 
18 | package org.apache.ranger.authorization.spark.authorizer
19 | 
20 | import org.apache.spark.sql.SparkSessionExtensions
21 | import org.apache.spark.sql.catalyst.optimizer.AuthorizerExtension
22 | 
23 | class RangerSparkSQLExtension extends Extensions {
24 |   override def apply(ext: SparkSessionExtensions): Unit = {
25 |     ext.injectOptimizerRule(AuthorizerExtension)
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/package.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.ranger.authorization.spark
19 | 
20 | import org.apache.spark.sql.SparkSessionExtensions
21 | 
22 | package object authorizer {
23 | 
24 |   type Extensions = SparkSessionExtensions => Unit
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizable.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.catalyst.optimizer
 19 | 
 20 | import java.io.File
 21 | 
 22 | import com.githup.yaooqinn.spark.authorizer.Logging
 23 | import org.apache.hadoop.hive.ql.plan.HiveOperation
 24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HiveAuthzContext, HiveOperationType}
 25 | 
 26 | import org.apache.spark.sql.SparkSession
 27 | import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
 28 | import org.apache.spark.sql.catalyst.rules.Rule
 29 | import org.apache.spark.sql.execution.command._
 30 | import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, InsertIntoDataSourceCommand, InsertIntoHadoopFsRelationCommand}
 31 | import org.apache.spark.sql.hive.{HiveExternalCatalog, PrivilegesBuilder}
 32 | import org.apache.spark.sql.hive.client.AuthzImpl
 33 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
 34 | import org.apache.spark.util.Utils
 35 | 
 36 | trait Authorizable extends Rule[LogicalPlan] with Logging {
 37 | 
 38 |   def spark: SparkSession
 39 | 
 40 |   /**
 41 |    * Visit the [[LogicalPlan]] recursively to get all hive privilege objects, check the privileges
 42 |    * using Hive Authorizer V2 which provide sql based authorization and can implements
 43 |    * ranger-hive-plugins.
 44 |    * If the user is authorized, then the original plan will be returned; otherwise, interrupted by
 45 |    * some particular privilege exceptions.
 46 |    * @param plan a spark LogicalPlan for verifying privileges
 47 |    * @return a plan itself which has gone through the privilege check.
 48 |    */
 49 |   override def apply(plan: LogicalPlan): LogicalPlan = {
 50 |     val operationType: HiveOperationType = getOperationType(plan)
 51 |     val authzContext = new HiveAuthzContext.Builder().build()
 52 |     val (in, out) = PrivilegesBuilder.build(plan)
 53 |     spark.sharedState.externalCatalog match {
 54 |       case _: HiveExternalCatalog =>
 55 |         AuthzImpl.checkPrivileges(spark, operationType, in, out, authzContext)
 56 |       case _ =>
 57 |     }
 58 |     // iff no exception.
 59 |     // We just return the original plan here, so this rule will be executed only once.
 60 |     plan
 61 |   }
 62 | 
 63 |   def policyCacheDir: Option[String] = {
 64 |     Option(spark.sparkContext.hadoopConfiguration.get("ranger.plugin.hive.policy.cache.dir"))
 65 |   }
 66 | 
 67 | 
 68 |   def createCacheDirIfNonExists(dir: String): Unit = {
 69 |     val file = new File(dir)
 70 |     if (!file.exists()) {
 71 |       if (file.mkdirs()) {
 72 |         info("Creating ranger policy cache directory at " + file.getAbsolutePath)
 73 |         file.deleteOnExit()
 74 |       } else {
 75 |         warn("Unable to create ranger policy cache directory at " + file.getAbsolutePath)
 76 |       }
 77 |     }
 78 |   }
 79 | 
 80 |   policyCacheDir match {
 81 |     case Some(dir) => createCacheDirIfNonExists(dir)
 82 |     case _ =>
 83 |       // load resources from ranger configuration files
 84 |       Option(Utils.getContextOrSparkClassLoader.getResource("ranger-hive-security.xml")) match {
 85 |         case Some(url) =>
 86 |           spark.sparkContext.hadoopConfiguration.addResource(url)
 87 |           policyCacheDir match {
 88 |             case Some(dir) => createCacheDirIfNonExists(dir)
 89 |             case _ =>
 90 |           }
 91 |         case _ =>
 92 |       }
 93 |   }
 94 | 
 95 |   /**
 96 |    * Mapping of [[LogicalPlan]] -> [[HiveOperation]]
 97 |    * @param plan a spark LogicalPlan
 98 |    * @return
 99 |    */
100 |   def getHiveOperation(plan: LogicalPlan): HiveOperation = {
101 |     plan match {
102 |       case c: Command => c match {
103 |         case _: AlterDatabasePropertiesCommand => HiveOperation.ALTERDATABASE
104 |         case p if p.nodeName == "AlterTableAddColumnsCommand" => HiveOperation.ALTERTABLE_ADDCOLS
105 |         case _: AlterTableAddPartitionCommand => HiveOperation.ALTERTABLE_ADDPARTS
106 |         case p if p.nodeName == "AlterTableChangeColumnCommand" =>
107 |           HiveOperation.ALTERTABLE_RENAMECOL
108 |         case _: AlterTableDropPartitionCommand => HiveOperation.ALTERTABLE_DROPPARTS
109 |         case _: AlterTableRecoverPartitionsCommand => HiveOperation.MSCK
110 |         case _: AlterTableRenamePartitionCommand => HiveOperation.ALTERTABLE_RENAMEPART
111 |         case a: AlterTableRenameCommand =>
112 |           if (!a.isView) HiveOperation.ALTERTABLE_RENAME else HiveOperation.ALTERVIEW_RENAME
113 |         case _: AlterTableSetPropertiesCommand
114 |              | _: AlterTableUnsetPropertiesCommand => HiveOperation.ALTERTABLE_PROPERTIES
115 |         case _: AlterTableSerDePropertiesCommand => HiveOperation.ALTERTABLE_SERDEPROPERTIES
116 |         case _: AlterTableSetLocationCommand => HiveOperation.ALTERTABLE_LOCATION
117 |         case _: AlterViewAsCommand => HiveOperation.QUERY
118 |         // case _: AlterViewAsCommand => HiveOperation.ALTERVIEW_AS
119 | 
120 |         case _: AnalyzeColumnCommand => HiveOperation.QUERY
121 |         // case _: AnalyzeTableCommand => HiveOperation.ANALYZE_TABLE
122 |         // Hive treat AnalyzeTableCommand as QUERY, obey it.
123 |         case _: AnalyzeTableCommand => HiveOperation.QUERY
124 |         case p if p.nodeName == "AnalyzePartitionCommand" => HiveOperation.QUERY
125 | 
126 |         case _: CreateDatabaseCommand => HiveOperation.CREATEDATABASE
127 |         case _: CreateDataSourceTableAsSelectCommand
128 |              | _: CreateHiveTableAsSelectCommand => HiveOperation.CREATETABLE_AS_SELECT
129 |         case _: CreateFunctionCommand => HiveOperation.CREATEFUNCTION
130 |         case _: CreateTableCommand
131 |              | _: CreateDataSourceTableCommand => HiveOperation.CREATETABLE
132 |         case _: CreateTableLikeCommand => HiveOperation.CREATETABLE
133 |         case _: CreateViewCommand
134 |              | _: CacheTableCommand
135 |              | _: CreateTempViewUsing => HiveOperation.CREATEVIEW
136 | 
137 |         case p if p.nodeName == "DescribeColumnCommand" => HiveOperation.DESCTABLE
138 |         case _: DescribeDatabaseCommand => HiveOperation.DESCDATABASE
139 |         case _: DescribeFunctionCommand => HiveOperation.DESCFUNCTION
140 |         case _: DescribeTableCommand => HiveOperation.DESCTABLE
141 | 
142 |         case _: DropDatabaseCommand => HiveOperation.DROPDATABASE
143 |         // Hive don't check privileges for `drop function command`, what about a unverified user
144 |         // try to drop functions.
145 |         // We treat permanent functions as tables for verifying.
146 |         case d: DropFunctionCommand if !d.isTemp => HiveOperation.DROPTABLE
147 |         case d: DropFunctionCommand if d.isTemp => HiveOperation.DROPFUNCTION
148 |         case _: DropTableCommand => HiveOperation.DROPTABLE
149 | 
150 |         case e: ExplainCommand => getHiveOperation(e.logicalPlan)
151 | 
152 |         case _: InsertIntoDataSourceCommand => HiveOperation.QUERY
153 |         case p if p.nodeName == "InsertIntoDataSourceDirCommand" => HiveOperation.QUERY
154 |         case _: InsertIntoHadoopFsRelationCommand => HiveOperation.CREATETABLE_AS_SELECT
155 |         case p if p.nodeName == "InsertIntoHiveDirCommand" => HiveOperation.QUERY
156 |         case p if p.nodeName == "InsertIntoHiveTable" => HiveOperation.QUERY
157 | 
158 |         case _: LoadDataCommand => HiveOperation.LOAD
159 | 
160 |         case p if p.nodeName == "SaveIntoDataSourceCommand" => HiveOperation.QUERY
161 |         case s: SetCommand if s.kv.isEmpty || s.kv.get._2.isEmpty => HiveOperation.SHOWCONF
162 |         case _: SetDatabaseCommand => HiveOperation.SWITCHDATABASE
163 |         case _: ShowCreateTableCommand => HiveOperation.SHOW_CREATETABLE
164 |         case _: ShowColumnsCommand => HiveOperation.SHOWCOLUMNS
165 |         case _: ShowDatabasesCommand => HiveOperation.SHOWDATABASES
166 |         case _: ShowFunctionsCommand => HiveOperation.SHOWFUNCTIONS
167 |         case _: ShowPartitionsCommand => HiveOperation.SHOWPARTITIONS
168 |         case _: ShowTablesCommand => HiveOperation.SHOWTABLES
169 |         case _: ShowTablePropertiesCommand => HiveOperation.SHOW_TBLPROPERTIES
170 |         case s: StreamingExplainCommand =>
171 |           getHiveOperation(s.queryExecution.optimizedPlan)
172 | 
173 |         case _: TruncateTableCommand => HiveOperation.TRUNCATETABLE
174 | 
175 |         case _: UncacheTableCommand => HiveOperation.DROPVIEW
176 | 
177 |         // Commands that do not need build privilege goes as explain type
178 |         case _ =>
179 |           // AddFileCommand
180 |           // AddJarCommand
181 |           // ...
182 |           HiveOperation.EXPLAIN
183 |       }
184 |       case _ => HiveOperation.QUERY
185 |     }
186 |   }
187 | 
188 |   def getOperationType(logicalPlan: LogicalPlan): HiveOperationType = {
189 |     HiveOperationType.valueOf(getHiveOperation(logicalPlan).name())
190 |   }
191 | }
192 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Authorizer.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.catalyst.optimizer
19 | 
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
22 | import org.apache.spark.sql.catalyst.rules.Rule
23 | 
24 | /**
25 |  * An Optimizer Rule to do Hive Authorization V2 for Spark SQL.
26 |  *
27 |  * For Apache Spark 2.1.x
28 |  */
29 | object Authorizer extends Rule[LogicalPlan] with Authorizable {
30 |   override def spark: SparkSession = {
31 |     SparkSession.getActiveSession.getOrElse(SparkSession.getDefaultSession.get)
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/AuthorizerExtension.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.catalyst.optimizer
19 | 
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.plans.logical._
22 | import org.apache.spark.sql.catalyst.rules.Rule
23 | 
24 | /**
25 |  * An Optimizer Rule to do Hive Authorization V2 for Spark SQL.
26 |  *
27 |  * For Apache Spark 2.2.x and later
28 |  */
29 | case class AuthorizerExtension(spark: SparkSession) extends Rule[LogicalPlan] with Authorizable
30 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/HivePrivilegeObject.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.catalyst.optimizer
19 | 
20 | import java.util.{List => JList}
21 | 
22 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HBO}
23 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType}
24 | 
25 | /**
26 |  * Helper class for initializing [[HBO]] with more Constructors.
27 |  */
28 | private[sql] object HivePrivilegeObject {
29 |   def apply(
30 |       `type`: HivePrivilegeObjectType,
31 |       dbname: String,
32 |       objectName: String,
33 |       partKeys: JList[String],
34 |       columns: JList[String],
35 |       actionType: HivePrivObjectActionType,
36 |       commandParams: JList[String]): HBO = {
37 |     new HBO(
38 |       `type`, dbname, objectName, partKeys, columns, actionType, commandParams)
39 |   }
40 | 
41 |   def apply(
42 |       `type`: HivePrivilegeObjectType,
43 |       dbname: String,
44 |       objectName: String,
45 |       partKeys: JList[String],
46 |       columns: JList[String],
47 |       commandParams: JList[String]): HBO = {
48 |     apply(
49 |       `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, commandParams)
50 |   }
51 | 
52 |   def apply(
53 |       `type`: HivePrivilegeObjectType,
54 |       dbname: String,
55 |       objectName: String,
56 |       partKeys: JList[String],
57 |       columns: JList[String]): HBO = {
58 |     apply(
59 |       `type`, dbname, objectName, partKeys, columns, HivePrivObjectActionType.OTHER, null)
60 |   }
61 | 
62 |   def apply(
63 |       `type`: HivePrivilegeObjectType,
64 |       dbname: String,
65 |       objectName: String,
66 |       actionType: HivePrivObjectActionType): HBO = {
67 |     apply(`type`, dbname, objectName, null, null, actionType, null)
68 |   }
69 | 
70 |   def apply(
71 |       `type`: HivePrivilegeObjectType,
72 |       dbname: String,
73 |       objectName: String): HBO = {
74 |     apply(`type`, dbname, objectName, HivePrivObjectActionType.OTHER)
75 |   }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/AuthzUtils.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *     http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.sql.hive
19 | 
20 | import scala.util.{Failure, Success, Try}
21 | 
22 | private[hive] object AuthzUtils {
23 | 
24 |   def getFieldVal(o: Any, name: String): Any = {
25 |     Try {
26 |       val field = o.getClass.getDeclaredField(name)
27 |       field.setAccessible(true)
28 |       field.get(o)
29 |     } match {
30 |       case Success(value) => value
31 |       case Failure(exception) => throw exception
32 |     }
33 |   }
34 | 
35 |   def setFieldVal(o: Any, name: String, value: Any): Unit = {
36 |     Try {
37 |       val field = o.getClass.getDeclaredField(name)
38 |       field.setAccessible(true)
39 |       field.set(o, value.asInstanceOf[AnyRef])
40 |     } match {
41 |       case Failure(exception) => throw exception
42 |       case _ =>
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/PrivilegesBuilder.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.hive
 19 | 
 20 | import java.util.{ArrayList => JAList, List => JList}
 21 | 
 22 | import scala.collection.JavaConverters._
 23 | 
 24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.{HivePrivilegeObject => HPO}
 25 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.{HivePrivilegeObjectType, HivePrivObjectActionType}
 26 | 
 27 | import org.apache.spark.sql.SaveMode
 28 | import org.apache.spark.sql.catalyst.TableIdentifier
 29 | import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 30 | import org.apache.spark.sql.catalyst.catalog.CatalogTable
 31 | import org.apache.spark.sql.catalyst.expressions.NamedExpression
 32 | import org.apache.spark.sql.catalyst.optimizer.HivePrivilegeObject
 33 | import org.apache.spark.sql.catalyst.plans.logical._
 34 | import org.apache.spark.sql.execution.command._
 35 | import org.apache.spark.sql.execution.datasources._
 36 | import org.apache.spark.sql.hive.AuthzUtils._
 37 | import org.apache.spark.sql.hive.execution.CreateHiveTableAsSelectCommand
 38 | import org.apache.spark.sql.types.StructField
 39 | 
 40 | /**
 41 |  * [[LogicalPlan]] -> list of [[HivePrivilegeObject]]s
 42 |  */
 43 | private[sql] object PrivilegesBuilder {
 44 | 
 45 |   /**
 46 |    * Build input and output privilege objects from a Spark's [[LogicalPlan]]
 47 |    *
 48 |    * For [[ExplainCommand]]s, build its child.
 49 |    * For [[RunnableCommand]]s, build outputs if it has an target to write, build inputs for the
 50 |    * inside query if exists.
 51 |    *
 52 |    * For other queries, build inputs.
 53 |    *
 54 |    * @param plan A Spark [[LogicalPlan]]
 55 |    */
 56 |   def build(plan: LogicalPlan): (JList[HPO], JList[HPO]) = {
 57 | 
 58 |     def doBuild(plan: LogicalPlan): (JList[HPO], JList[HPO]) = {
 59 |       val inputObjs = new JAList[HPO]
 60 |       val outputObjs = new JAList[HPO]
 61 |       plan match {
 62 |         // RunnableCommand
 63 |         case cmd: Command => buildCommand(cmd, inputObjs, outputObjs)
 64 |         // Queries
 65 |         case _ => buildQuery(plan, inputObjs)
 66 |       }
 67 |       (inputObjs, outputObjs)
 68 |     }
 69 | 
 70 |     plan match {
 71 |       case e: ExplainCommand => doBuild(e.logicalPlan)
 72 |       case p => doBuild(p)
 73 |     }
 74 |   }
 75 | 
 76 |   /**
 77 |    * Build HivePrivilegeObjects from Spark LogicalPlan
 78 |    * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects
 79 |    * @param hivePrivilegeObjects input or output hive privilege object list
 80 |    * @param projectionList Projection list after pruning
 81 |    */
 82 |   private[this] def buildQuery(
 83 |       plan: LogicalPlan,
 84 |       hivePrivilegeObjects: JList[HPO],
 85 |       projectionList: Seq[NamedExpression] = Nil): Unit = {
 86 | 
 87 |     /**
 88 |      * Columns in Projection take priority for column level privilege checking
 89 |      * @param table catalogTable of a given relation
 90 |      */
 91 |     def mergeProjection(table: CatalogTable): Unit = {
 92 |       if (projectionList.isEmpty) {
 93 |         addTableOrViewLevelObjs(
 94 |           table.identifier,
 95 |           hivePrivilegeObjects,
 96 |           table.partitionColumnNames,
 97 |           table.schema.fieldNames)
 98 |       } else {
 99 |         addTableOrViewLevelObjs(
100 |           table.identifier,
101 |           hivePrivilegeObjects,
102 |           table.partitionColumnNames.filter(projectionList.map(_.name).contains(_)),
103 |           projectionList.map(_.name))
104 |       }
105 |     }
106 | 
107 |     plan match {
108 |       case p: Project => buildQuery(p.child, hivePrivilegeObjects, p.projectList)
109 | 
110 |       case h if h.nodeName == "HiveTableRelation" =>
111 |         mergeProjection(getFieldVal(h, "tableMeta").asInstanceOf[CatalogTable])
112 | 
113 |       case m if m.nodeName == "MetastoreRelation" =>
114 |         mergeProjection(getFieldVal(m, "catalogTable").asInstanceOf[CatalogTable])
115 |       
116 |       case c if c.nodeName == "CatalogRelation" =>
117 |         mergeProjection(getFieldVal(c, "tableMeta").asInstanceOf[CatalogTable])
118 | 
119 |       case l: LogicalRelation if l.catalogTable.nonEmpty => mergeProjection(l.catalogTable.get)
120 | 
121 |       case u: UnresolvedRelation =>
122 |         // Normally, we shouldn't meet UnresolvedRelation here in an optimized plan.
123 |         // Unfortunately, the real world is always a place where miracles happen.
124 |         // We check the privileges directly without resolving the plan and leave everything
125 |         // to spark to do.
126 |         addTableOrViewLevelObjs(u.tableIdentifier, hivePrivilegeObjects)
127 | 
128 |       case p =>
129 |         for (child <- p.children) {
130 |           buildQuery(child, hivePrivilegeObjects, projectionList)
131 |         }
132 |     }
133 |   }
134 | 
135 |   /**
136 |    * Build HivePrivilegeObjects from Spark LogicalPlan
137 |    * @param plan a Spark LogicalPlan used to generate HivePrivilegeObjects
138 |    * @param inputObjs input hive privilege object list
139 |    * @param outputObjs output hive privilege object list
140 |    */
141 |   private[this] def buildCommand(
142 |       plan: LogicalPlan,
143 |       inputObjs: JList[HPO],
144 |       outputObjs: JList[HPO]): Unit = {
145 |     plan match {
146 |       case a: AlterDatabasePropertiesCommand => addDbLevelObjs(a.databaseName, outputObjs)
147 | 
148 |       case a if a.nodeName == "AlterTableAddColumnsCommand" =>
149 |         addTableOrViewLevelObjs(
150 |           getFieldVal(a, "table").asInstanceOf[TableIdentifier],
151 |           inputObjs,
152 |           columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name))
153 |         addTableOrViewLevelObjs(
154 |           getFieldVal(a, "table").asInstanceOf[TableIdentifier],
155 |           outputObjs,
156 |           columns = getFieldVal(a, "colsToAdd").asInstanceOf[Seq[StructField]].map(_.name))
157 | 
158 |       case a: AlterTableAddPartitionCommand =>
159 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
160 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
161 | 
162 |       case a if a.nodeName == "AlterTableChangeColumnCommand" =>
163 |         addTableOrViewLevelObjs(
164 |           getFieldVal(a, "tableName").asInstanceOf[TableIdentifier],
165 |           inputObjs,
166 |           columns = Seq(getFieldVal(a, "columnName").asInstanceOf[String]))
167 | 
168 |       case a: AlterTableDropPartitionCommand =>
169 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
170 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
171 | 
172 |       case a: AlterTableRecoverPartitionsCommand =>
173 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
174 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
175 | 
176 |       case a: AlterTableRenameCommand if !a.isView || a.oldName.database.nonEmpty =>
177 |         // rename tables / permanent views
178 |         addTableOrViewLevelObjs(a.oldName, inputObjs)
179 |         addTableOrViewLevelObjs(a.newName, outputObjs)
180 | 
181 |       case a: AlterTableRenamePartitionCommand =>
182 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
183 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
184 | 
185 |       case a: AlterTableSerDePropertiesCommand =>
186 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
187 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
188 | 
189 |       case a: AlterTableSetLocationCommand =>
190 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
191 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
192 | 
193 |       case a: AlterTableSetPropertiesCommand =>
194 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
195 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
196 | 
197 |       case a: AlterTableUnsetPropertiesCommand =>
198 |         addTableOrViewLevelObjs(a.tableName, inputObjs)
199 |         addTableOrViewLevelObjs(a.tableName, outputObjs)
200 | 
201 |       case a: AlterViewAsCommand =>
202 |         if (a.name.database.nonEmpty) {
203 |           // it's a permanent view
204 |           addTableOrViewLevelObjs(a.name, outputObjs)
205 |         }
206 |         buildQuery(a.query, inputObjs)
207 | 
208 |       case a: AnalyzeColumnCommand =>
209 |         addTableOrViewLevelObjs(
210 |           a.tableIdent, inputObjs, columns = a.columnNames)
211 |         addTableOrViewLevelObjs(
212 |           a.tableIdent, outputObjs, columns = a.columnNames)
213 | 
214 |       case a if a.nodeName == "AnalyzePartitionCommand" =>
215 |         addTableOrViewLevelObjs(
216 |           getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], inputObjs)
217 |         addTableOrViewLevelObjs(
218 |           getFieldVal(a, "tableIdent").asInstanceOf[TableIdentifier], outputObjs)
219 | 
220 |       case a: AnalyzeTableCommand =>
221 |         addTableOrViewLevelObjs(a.tableIdent, inputObjs, columns = Seq("RAW__DATA__SIZE"))
222 |         addTableOrViewLevelObjs(a.tableIdent, outputObjs)
223 | 
224 |       case c: CacheTableCommand => c.plan.foreach {
225 |         buildQuery(_, inputObjs)
226 |       }
227 | 
228 |       case c: CreateDatabaseCommand => addDbLevelObjs(c.databaseName, outputObjs)
229 | 
230 |       case c: CreateDataSourceTableAsSelectCommand =>
231 |         addDbLevelObjs(c.table.identifier, outputObjs)
232 |         addTableOrViewLevelObjs(c.table.identifier, outputObjs, mode = c.mode)
233 |         buildQuery(c.query, inputObjs)
234 | 
235 |       case c: CreateDataSourceTableCommand =>
236 |         addTableOrViewLevelObjs(c.table.identifier, outputObjs)
237 | 
238 |       case c: CreateFunctionCommand if !c.isTemp =>
239 |         addDbLevelObjs(c.databaseName, outputObjs)
240 |         addFunctionLevelObjs(c.databaseName, c.functionName, outputObjs)
241 | 
242 |       case c: CreateHiveTableAsSelectCommand =>
243 |         addDbLevelObjs(c.tableDesc.identifier, outputObjs)
244 |         addTableOrViewLevelObjs(c.tableDesc.identifier, outputObjs)
245 |         buildQuery(c.query, inputObjs)
246 | 
247 |       case c: CreateTableCommand => addTableOrViewLevelObjs(c.table.identifier, outputObjs)
248 | 
249 |       case c: CreateTableLikeCommand =>
250 |         addDbLevelObjs(c.targetTable, outputObjs)
251 |         addTableOrViewLevelObjs(c.targetTable, outputObjs)
252 |         // hive don't handle source table's privileges, we should not obey that, because
253 |         // it will cause meta information leak
254 |         addDbLevelObjs(c.sourceTable, inputObjs)
255 |         addTableOrViewLevelObjs(c.sourceTable, inputObjs)
256 | 
257 |       case c: CreateViewCommand =>
258 |         c.viewType match {
259 |           case PersistedView =>
260 |             // PersistedView will be tied to a database
261 |             addDbLevelObjs(c.name, outputObjs)
262 |             addTableOrViewLevelObjs(c.name, outputObjs)
263 |           case _ =>
264 |         }
265 |         buildQuery(c.child, inputObjs)
266 | 
267 |       case d if d.nodeName == "DescribeColumnCommand" =>
268 |         addTableOrViewLevelObjs(
269 |           getFieldVal(d, "table").asInstanceOf[TableIdentifier],
270 |           inputObjs,
271 |           columns = getFieldVal(d, "colNameParts").asInstanceOf[Seq[String]])
272 | 
273 |       case d: DescribeDatabaseCommand =>
274 |         addDbLevelObjs(d.databaseName, inputObjs)
275 | 
276 |       case d: DescribeFunctionCommand =>
277 |         addFunctionLevelObjs(d.functionName.database, d.functionName.funcName, inputObjs)
278 | 
279 |       case d: DescribeTableCommand => addTableOrViewLevelObjs(d.table, inputObjs)
280 | 
281 |       case d: DropDatabaseCommand =>
282 |         // outputObjs are enough for privilege check, adding inputObjs for consistency with hive
283 |         // behaviour in case of some unexpected issues.
284 |         addDbLevelObjs(d.databaseName, inputObjs)
285 |         addDbLevelObjs(d.databaseName, outputObjs)
286 | 
287 |       case d: DropFunctionCommand =>
288 |         addFunctionLevelObjs(d.databaseName, d.functionName, outputObjs)
289 | 
290 |       case d: DropTableCommand => addTableOrViewLevelObjs(d.tableName, outputObjs)
291 | 
292 |       case i: InsertIntoDataSourceCommand =>
293 |         i.logicalRelation.catalogTable.foreach { table =>
294 |           addTableOrViewLevelObjs(
295 |             table.identifier,
296 |             outputObjs)
297 |         }
298 |         buildQuery(i.query, inputObjs)
299 | 
300 |       case i if i.nodeName =="InsertIntoDataSourceDirCommand" =>
301 |         buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
302 | 
303 |       case i: InsertIntoHadoopFsRelationCommand =>
304 |         // we are able to get the override mode here, but ctas for hive table with text/orc
305 |         // format and parquet with spark.sql.hive.convertMetastoreParquet=false can success
306 |         // with privilege checking without claiming for UPDATE privilege of target table,
307 |         // which seems to be same with Hive behaviour.
308 |         // So, here we ignore the overwrite mode for such a consistency.
309 |         i.catalogTable foreach { t =>
310 |           addTableOrViewLevelObjs(
311 |             t.identifier,
312 |             outputObjs,
313 |             i.partitionColumns.map(_.name),
314 |             t.schema.fieldNames)
315 |         }
316 |         buildQuery(i.query, inputObjs)
317 | 
318 |       case i if i.nodeName == "InsertIntoHiveDirCommand" =>
319 |         buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
320 | 
321 |       case i if i.nodeName == "InsertIntoHiveTable" =>
322 |         addTableOrViewLevelObjs(
323 |           getFieldVal(i, "table").asInstanceOf[CatalogTable].identifier, outputObjs)
324 |         buildQuery(getFieldVal(i, "query").asInstanceOf[LogicalPlan], inputObjs)
325 | 
326 |       case l: LoadDataCommand => addTableOrViewLevelObjs(l.table, outputObjs)
327 | 
328 |       case s if s.nodeName == "SaveIntoDataSourceCommand" =>
329 |         buildQuery(getFieldVal(s, "query").asInstanceOf[LogicalPlan], outputObjs)
330 | 
331 |       case s: SetDatabaseCommand => addDbLevelObjs(s.databaseName, inputObjs)
332 | 
333 |       case s: ShowColumnsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs)
334 | 
335 |       case s: ShowCreateTableCommand => addTableOrViewLevelObjs(s.table, inputObjs)
336 | 
337 |       case s: ShowFunctionsCommand => s.db.foreach(addDbLevelObjs(_, inputObjs))
338 | 
339 |       case s: ShowPartitionsCommand => addTableOrViewLevelObjs(s.tableName, inputObjs)
340 | 
341 |       case s: ShowTablePropertiesCommand => addTableOrViewLevelObjs(s.table, inputObjs)
342 | 
343 |       case s: ShowTablesCommand => addDbLevelObjs(s.databaseName, inputObjs)
344 | 
345 |       case s: TruncateTableCommand => addTableOrViewLevelObjs(s.tableName, outputObjs)
346 | 
347 |       case _ =>
348 |       // AddFileCommand
349 |       // AddJarCommand
350 |       // AnalyzeColumnCommand
351 |       // ClearCacheCommand
352 |       // CreateTempViewUsing
353 |       // ListFilesCommand
354 |       // ListJarsCommand
355 |       // RefreshTable
356 |       // RefreshTable
357 |       // ResetCommand
358 |       // SetCommand
359 |       // ShowDatabasesCommand
360 |       // StreamingExplainCommand
361 |       // UncacheTableCommand
362 |     }
363 |   }
364 | 
365 |   /**
366 |    * Add database level hive privilege objects to input or output list
367 |    * @param dbName database name as hive privilege object
368 |    * @param hivePrivilegeObjects input or output list
369 |    */
370 |   private[this] def addDbLevelObjs(
371 |       dbName: String,
372 |       hivePrivilegeObjects: JList[HPO]): Unit = {
373 |     hivePrivilegeObjects.add(
374 |       HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, dbName, dbName))
375 |   }
376 | 
377 |   /**
378 |    * Add database level hive privilege objects to input or output list
379 |    * @param dbOption an option of database name as hive privilege object
380 |    * @param hivePrivilegeObjects input or output hive privilege object list
381 |    */
382 |   private def addDbLevelObjs(
383 |       dbOption: Option[String],
384 |       hivePrivilegeObjects: JList[HPO]): Unit = {
385 |     dbOption match {
386 |       case Some(db) =>
387 |         hivePrivilegeObjects.add(
388 |           HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db))
389 |       case _ =>
390 |     }
391 |   }
392 | 
393 |   /**
394 |    * Add database level hive privilege objects to input or output list
395 |    * @param tableIdentifier table identifier contains database name as hive privilege object
396 |    * @param hivePrivilegeObjects input or output hive privilege object list
397 |    */
398 |   private def addDbLevelObjs(
399 |       tableIdentifier: TableIdentifier,
400 |       hivePrivilegeObjects: JList[HPO]): Unit = {
401 |     tableIdentifier.database match {
402 |       case Some(db) =>
403 |         hivePrivilegeObjects.add(
404 |           HivePrivilegeObject(HivePrivilegeObjectType.DATABASE, db, db))
405 |       case _ =>
406 |     }
407 |   }
408 | 
409 |   /**
410 |    * Add table level hive privilege objects to input or output list
411 |    * @param tableIdentifier table identifier contains database name, and table name as hive
412 |    *                        privilege object
413 |    * @param hivePrivilegeObjects input or output list
414 |    * @param mode Append or overwrite
415 |    */
416 |   private def addTableOrViewLevelObjs(
417 |       tableIdentifier: TableIdentifier,
418 |       hivePrivilegeObjects: JList[HPO],
419 |       partKeys: Seq[String] = Nil,
420 |       columns: Seq[String] = Nil,
421 |       mode: SaveMode = SaveMode.ErrorIfExists,
422 |       cmdParams: Seq[String] = Nil): Unit = {
423 |     tableIdentifier.database match {
424 |       case Some(db) =>
425 |         val tbName = tableIdentifier.table
426 |         val hivePrivObjectActionType = getHivePrivObjActionType(mode)
427 |         hivePrivilegeObjects.add(
428 |           HivePrivilegeObject(
429 |             HivePrivilegeObjectType.TABLE_OR_VIEW,
430 |             db,
431 |             tbName,
432 |             partKeys.asJava,
433 |             columns.asJava,
434 |             hivePrivObjectActionType,
435 |             cmdParams.asJava))
436 |       case _ =>
437 |     }
438 |   }
439 | 
440 |   /**
441 |    * Add function level hive privilege objects to input or output list
442 |    * @param databaseName database name
443 |    * @param functionName function name as hive privilege object
444 |    * @param hivePrivilegeObjects input or output list
445 |    */
446 |   private def addFunctionLevelObjs(
447 |       databaseName: Option[String],
448 |       functionName: String,
449 |       hivePrivilegeObjects: JList[HPO]): Unit = {
450 |     databaseName match {
451 |       case Some(db) =>
452 |         hivePrivilegeObjects.add(
453 |           HivePrivilegeObject(HivePrivilegeObjectType.FUNCTION, db, functionName))
454 |       case _ =>
455 |     }
456 |   }
457 | 
458 |   /**
459 |    * HivePrivObjectActionType INSERT or INSERT_OVERWRITE
460 |    *
461 |    * @param mode Append or Overwrite
462 |    * @return
463 |    */
464 |   private def getHivePrivObjActionType(mode: SaveMode): HivePrivObjectActionType = {
465 |     mode match {
466 |       case SaveMode.Append => HivePrivObjectActionType.INSERT
467 |       case SaveMode.Overwrite => HivePrivObjectActionType.INSERT_OVERWRITE
468 |       case _ => HivePrivObjectActionType.OTHER
469 |     }
470 |   }
471 | }
472 | 


--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/client/AuthzImpl.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *     http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.sql.hive.client
 19 | 
 20 | import java.util.{List => JList}
 21 | 
 22 | import com.githup.yaooqinn.spark.authorizer.Logging
 23 | import org.apache.hadoop.hive.ql.security.authorization.plugin._
 24 | import org.apache.hadoop.hive.ql.session.SessionState
 25 | import org.apache.hadoop.security.UserGroupInformation
 26 | 
 27 | import org.apache.spark.sql.SparkSession
 28 | import org.apache.spark.sql.hive.{AuthzUtils, HiveExternalCatalog}
 29 | import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader
 30 | 
 31 | /**
 32 |  * A Tool for Authorizer implementation.
 33 |  *
 34 |  * The [[SessionState]] generates the authorizer and authenticator, we use these to check
 35 |  * the privileges of a Spark LogicalPlan, which is mapped to hive privilege objects and operation
 36 |  * type.
 37 |  *
 38 |  * [[SparkSession]] with hive catalog implemented has its own instance of [[SessionState]]. I am
 39 |  * strongly willing to reuse it, but for the reason that it belongs to an isolated classloader
 40 |  * which makes it unreachable for us to visit it in Spark's context classloader. So, when
 41 |  * [[ClassCastException]] occurs, we turn off [[IsolatedClientLoader]] to use Spark's builtin
 42 |  * Hive client jars to generate a new metastore client to replace the original one, once it is
 43 |  * generated, will be reused then.
 44 |  *
 45 |  */
 46 | object AuthzImpl extends Logging {
 47 |   def checkPrivileges(
 48 |       spark: SparkSession,
 49 |       hiveOpType: HiveOperationType,
 50 |       inputObjs: JList[HivePrivilegeObject],
 51 |       outputObjs: JList[HivePrivilegeObject],
 52 |       context: HiveAuthzContext): Unit = {
 53 |     val client = spark.sharedState
 54 |       .externalCatalog.asInstanceOf[HiveExternalCatalog]
 55 |       .client
 56 |     val clientImpl = try {
 57 |       client.asInstanceOf[HiveClientImpl]
 58 |     } catch {
 59 |       case _: ClassCastException =>
 60 |         val clientLoader =
 61 |           AuthzUtils.getFieldVal(client, "clientLoader").asInstanceOf[IsolatedClientLoader]
 62 |         AuthzUtils.setFieldVal(clientLoader, "isolationOn", false)
 63 |         AuthzUtils.setFieldVal(clientLoader,
 64 |           "classLoader", new NonClosableMutableURLClassLoader(clientLoader.baseClassLoader))
 65 |         clientLoader.cachedHive = null
 66 |         val newClient = clientLoader.createClient()
 67 |         AuthzUtils.setFieldVal(
 68 |           spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
 69 |           "client",
 70 |           newClient)
 71 |         newClient.asInstanceOf[HiveClientImpl]
 72 |     }
 73 | 
 74 |     val state = clientImpl.state
 75 |     SessionState.setCurrentSessionState(state)
 76 |     val user = UserGroupInformation.getCurrentUser.getShortUserName
 77 |     if (state.getAuthenticator.getUserName != user) {
 78 |       val hiveConf = state.getConf
 79 |       val newState = new SessionState(hiveConf, user)
 80 |       SessionState.start(newState)
 81 |       AuthzUtils.setFieldVal(clientImpl, "state", newState)
 82 |     }
 83 | 
 84 |     val authz = clientImpl.state.getAuthorizerV2
 85 |     clientImpl.withHiveState {
 86 |       if (authz != null) {
 87 |         try {
 88 |           authz.checkPrivileges(hiveOpType, inputObjs, outputObjs, context)
 89 |         } catch {
 90 |           case hae: HiveAccessControlException =>
 91 |             error(
 92 |               s"""
 93 |                  |+===============================+
 94 |                  ||Spark SQL Authorization Failure|
 95 |                  ||-------------------------------|
 96 |                  ||${hae.getMessage}
 97 |                  ||-------------------------------|
 98 |                  ||Spark SQL Authorization Failure|
 99 |                  |+===============================+
100 |                """.stripMargin)
101 |             throw hae
102 |           case e: Exception => throw e
103 |         }
104 |       } else {
105 |         warn("Authorizer V2 not configured. Skipping privilege checking")
106 |       }
107 |     }
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------