├── _config.yml ├── .gitignore ├── src ├── test │ ├── resources │ │ ├── tpcds │ │ │ ├── q55.sql │ │ │ ├── q96.sql │ │ │ ├── q3.sql │ │ │ ├── q22.sql │ │ │ ├── q52.sql │ │ │ ├── q42.sql │ │ │ ├── q15.sql │ │ │ ├── q82.sql │ │ │ ├── q37.sql │ │ │ ├── q32.sql │ │ │ ├── q7.sql │ │ │ ├── q26.sql │ │ │ ├── q84.sql │ │ │ ├── q93.sql │ │ │ ├── q92.sql │ │ │ ├── q6.sql │ │ │ ├── q45.sql │ │ │ ├── q19.sql │ │ │ ├── q27.sql │ │ │ ├── q20.sql │ │ │ ├── q98.sql │ │ │ ├── q1.sql │ │ │ ├── q12.sql │ │ │ ├── q86.sql │ │ │ ├── q36.sql │ │ │ ├── q90.sql │ │ │ ├── q94.sql │ │ │ ├── q91.sql │ │ │ ├── q65.sql │ │ │ ├── q79.sql │ │ │ ├── q40.sql │ │ │ ├── q87.sql │ │ │ ├── q67.sql │ │ │ ├── q21.sql │ │ │ ├── q24a.sql │ │ │ ├── q24b.sql │ │ │ ├── q16.sql │ │ │ ├── q97.sql │ │ │ ├── q95.sql │ │ │ ├── q25.sql │ │ │ ├── q29.sql │ │ │ ├── q38.sql │ │ │ ├── q43.sql │ │ │ ├── q18.sql │ │ │ ├── q89.sql │ │ │ ├── q70.sql │ │ │ ├── q73.sql │ │ │ ├── q30.sql │ │ │ ├── q61.sql │ │ │ ├── q62.sql │ │ │ ├── q99.sql │ │ │ ├── q46.sql │ │ │ ├── q68.sql │ │ │ ├── q39a.sql │ │ │ ├── q44.sql │ │ │ ├── q63.sql │ │ │ ├── q39b.sql │ │ │ ├── q81.sql │ │ │ ├── q71.sql │ │ │ ├── q53.sql │ │ │ ├── q72.sql │ │ │ ├── q34.sql │ │ │ ├── q76.sql │ │ │ ├── q69.sql │ │ │ ├── q13.sql │ │ │ ├── q17.sql │ │ │ ├── q35.sql │ │ │ ├── q50.sql │ │ │ ├── q54.sql │ │ │ ├── q48.sql │ │ │ ├── q57.sql │ │ │ ├── q9.sql │ │ │ ├── q51.sql │ │ │ ├── q60.sql │ │ │ ├── q10.sql │ │ │ ├── q23a.sql │ │ │ ├── q33.sql │ │ │ ├── q56.sql │ │ │ ├── q83.sql │ │ │ ├── q31.sql │ │ │ ├── q41.sql │ │ │ ├── q47.sql │ │ │ ├── q74.sql │ │ │ ├── q28.sql │ │ │ ├── q58.sql │ │ │ ├── q23b.sql │ │ │ ├── q78.sql │ │ │ ├── q59.sql │ │ │ ├── q2.sql │ │ │ ├── q85.sql │ │ │ ├── q11.sql │ │ │ ├── q75.sql │ │ │ ├── q80.sql │ │ │ ├── q77.sql │ │ │ ├── q64.sql │ │ │ ├── q14b.sql │ │ │ ├── q14a.sql │ │ │ ├── q5.sql │ │ │ ├── q49.sql │ │ │ └── q4.sql │ │ ├── log4j.properties │ │ └── ranger-spark-security.xml │ └── scala │ │ └── org │ │ └── apache │ │ ├── spark │ │ └── sql │ │ │ ├── RangerSparkTestUtils.scala │ │ │ ├── execution │ │ │ └── RangerSparkPlanOmitStrategyTest.scala │ │ │ └── catalyst │ │ │ └── optimizer │ │ │ ├── RangerSparkRowFilterExtensionTest.scala │ │ │ └── RangerSparkMaskingExtensionTest.scala │ │ └── ranger │ │ └── services │ │ └── spark │ │ └── RangerAdminClientImpl.scala └── main │ └── scala │ └── org │ └── apache │ ├── ranger │ └── authorization │ │ └── spark │ │ └── authorizer │ │ ├── SparkAccessControlException.scala │ │ ├── authorizer.scala │ │ ├── SparkObjectType.scala │ │ ├── SparkPrivObjectActionType.scala │ │ ├── SparkPrivilegeObjectType.scala │ │ ├── SparkAccessType.scala │ │ ├── RangerSparkAuditHandler.scala │ │ ├── RangerSparkSQLExtension.scala │ │ ├── SparkOperationType.scala │ │ ├── RangerSparkPlugin.scala │ │ ├── RangerSparkAccessRequest.scala │ │ ├── RangerSparkResource.scala │ │ └── SparkPrivilegeObject.scala │ └── spark │ └── sql │ ├── catalyst │ ├── plans │ │ └── logical │ │ │ ├── RangerSparkMasking.scala │ │ │ └── RangerSparkRowFilter.scala │ └── optimizer │ │ └── RangerSparkOptimizer.scala │ ├── execution │ ├── RangerSparkPlanOmitStrategy.scala │ ├── RangerShowDatabasesCommand.scala │ └── RangerShowTablesCommand.scala │ └── AuthzUtils.scala ├── .github ├── ISSUE_TEMPLATE │ ├── notice.md │ └── custom.md └── workflows │ └── master.yml ├── docs └── installation-addons.md ├── .travis.yml └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-leap-day 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | dependency-reduced-pom.xml 3 | /derby.log 4 | /.idea/ 5 | /spark-ranger.iml 6 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q55.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | sum(ss_ext_sales_price) ext_price 5 | FROM date_dim, store_sales, item 6 | WHERE d_date_sk = ss_sold_date_sk 7 | AND ss_item_sk = i_item_sk 8 | AND i_manager_id = 28 9 | AND d_moy = 11 10 | AND d_year = 1999 11 | GROUP BY i_brand, i_brand_id 12 | ORDER BY ext_price DESC, brand_id 13 | LIMIT 100 14 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q96.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM store_sales, household_demographics, time_dim, store 3 | WHERE ss_sold_time_sk = time_dim.t_time_sk 4 | AND ss_hdemo_sk = household_demographics.hd_demo_sk 5 | AND ss_store_sk = s_store_sk 6 | AND time_dim.t_hour = 20 7 | AND time_dim.t_minute >= 30 8 | AND household_demographics.hd_dep_count = 7 9 | AND store.s_store_name = 'ese' 10 | ORDER BY count(*) 11 | LIMIT 100 12 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q3.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | SUM(ss_ext_sales_price) sum_agg 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manufact_id = 128 10 | AND dt.d_moy = 11 11 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id 12 | ORDER BY dt.d_year, sum_agg DESC, brand_id 13 | LIMIT 100 14 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q22.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_product_name, 3 | i_brand, 4 | i_class, 5 | i_category, 6 | avg(inv_quantity_on_hand) qoh 7 | FROM inventory, date_dim, item, warehouse 8 | WHERE inv_date_sk = d_date_sk 9 | AND inv_item_sk = i_item_sk 10 | AND inv_warehouse_sk = w_warehouse_sk 11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 12 | GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category) 13 | ORDER BY qoh, i_product_name, i_brand, i_class, i_category 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q52.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_brand_id brand_id, 4 | item.i_brand brand, 5 | sum(ss_ext_sales_price) ext_price 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manager_id = 1 10 | AND dt.d_moy = 11 11 | AND dt.d_year = 2000 12 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id 13 | ORDER BY dt.d_year, ext_price DESC, brand_id 14 | LIMIT 100 15 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q42.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | dt.d_year, 3 | item.i_category_id, 4 | item.i_category, 5 | sum(ss_ext_sales_price) 6 | FROM date_dim dt, store_sales, item 7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk 8 | AND store_sales.ss_item_sk = item.i_item_sk 9 | AND item.i_manager_id = 1 10 | AND dt.d_moy = 11 11 | AND dt.d_year = 2000 12 | GROUP BY dt.d_year 13 | , item.i_category_id 14 | , item.i_category 15 | ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year 16 | , item.i_category_id 17 | , item.i_category 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q15.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_zip, 3 | sum(cs_sales_price) 4 | FROM catalog_sales, customer, customer_address, date_dim 5 | WHERE cs_bill_customer_sk = c_customer_sk 6 | AND c_current_addr_sk = ca_address_sk 7 | AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475', 8 | '85392', '85460', '80348', '81792') 9 | OR ca_state IN ('CA', 'WA', 'GA') 10 | OR cs_sales_price > 500) 11 | AND cs_sold_date_sk = d_date_sk 12 | AND d_qoy = 2 AND d_year = 2001 13 | GROUP BY ca_zip 14 | ORDER BY ca_zip 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q82.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | i_current_price 5 | FROM item, inventory, date_dim, store_sales 6 | WHERE i_current_price BETWEEN 62 AND 62 + 30 7 | AND inv_item_sk = i_item_sk 8 | AND d_date_sk = inv_date_sk 9 | AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days) 10 | AND i_manufact_id IN (129, 270, 821, 423) 11 | AND inv_quantity_on_hand BETWEEN 100 AND 500 12 | AND ss_item_sk = i_item_sk 13 | GROUP BY i_item_id, i_item_desc, i_current_price 14 | ORDER BY i_item_id 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q37.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | i_current_price 5 | FROM item, inventory, date_dim, catalog_sales 6 | WHERE i_current_price BETWEEN 68 AND 68 + 30 7 | AND inv_item_sk = i_item_sk 8 | AND d_date_sk = inv_date_sk 9 | AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days) 10 | AND i_manufact_id IN (677, 940, 694, 808) 11 | AND inv_quantity_on_hand BETWEEN 100 AND 500 12 | AND cs_item_sk = i_item_sk 13 | GROUP BY i_item_id, i_item_desc, i_current_price 14 | ORDER BY i_item_id 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q32.sql: -------------------------------------------------------------------------------- 1 | SELECT 1 AS `excess discount amount ` 2 | FROM 3 | catalog_sales, item, date_dim 4 | WHERE 5 | i_manufact_id = 977 6 | AND i_item_sk = cs_item_sk 7 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) 8 | AND d_date_sk = cs_sold_date_sk 9 | AND cs_ext_discount_amt > ( 10 | SELECT 1.3 * avg(cs_ext_discount_amt) 11 | FROM catalog_sales, date_dim 12 | WHERE cs_item_sk = i_item_sk 13 | AND d_date BETWEEN '2000-01-27]' AND (cast('2000-01-27' AS DATE) + interval 90 days) 14 | AND d_date_sk = cs_sold_date_sk) 15 | LIMIT 100 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/notice.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Notice 3 | about: This library is deprecated. 4 | title: '' 5 | labels: '' 6 | assignees: yaooqinn 7 | 8 | --- 9 | 10 | This library has been contributed to https://github.com/apache/submarine as a sub-module, and that module can still be used individually. 11 | The project here will no longer be updated. 12 | If you have any questions please go to https://github.com/apache/submarine/blob/master/docs/submarine-security/spark-security/README.md to learn how to use and give feedback to the apache submarine community by following https://submarine.apache.org/community/contributors.html 13 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q7.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | avg(ss_quantity) agg1, 4 | avg(ss_list_price) agg2, 5 | avg(ss_coupon_amt) agg3, 6 | avg(ss_sales_price) agg4 7 | FROM store_sales, customer_demographics, date_dim, item, promotion 8 | WHERE ss_sold_date_sk = d_date_sk AND 9 | ss_item_sk = i_item_sk AND 10 | ss_cdemo_sk = cd_demo_sk AND 11 | ss_promo_sk = p_promo_sk AND 12 | cd_gender = 'M' AND 13 | cd_marital_status = 'S' AND 14 | cd_education_status = 'College' AND 15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND 16 | d_year = 2000 17 | GROUP BY i_item_id 18 | ORDER BY i_item_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q26.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | avg(cs_quantity) agg1, 4 | avg(cs_list_price) agg2, 5 | avg(cs_coupon_amt) agg3, 6 | avg(cs_sales_price) agg4 7 | FROM catalog_sales, customer_demographics, date_dim, item, promotion 8 | WHERE cs_sold_date_sk = d_date_sk AND 9 | cs_item_sk = i_item_sk AND 10 | cs_bill_cdemo_sk = cd_demo_sk AND 11 | cs_promo_sk = p_promo_sk AND 12 | cd_gender = 'M' AND 13 | cd_marital_status = 'S' AND 14 | cd_education_status = 'College' AND 15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND 16 | d_year = 2000 17 | GROUP BY i_item_id 18 | ORDER BY i_item_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q84.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_customer_id AS customer_id, 3 | concat(c_last_name, ', ', c_first_name) AS customername 4 | FROM customer 5 | , customer_address 6 | , customer_demographics 7 | , household_demographics 8 | , income_band 9 | , store_returns 10 | WHERE ca_city = 'Edgewood' 11 | AND c_current_addr_sk = ca_address_sk 12 | AND ib_lower_bound >= 38128 13 | AND ib_upper_bound <= 38128 + 50000 14 | AND ib_income_band_sk = hd_income_band_sk 15 | AND cd_demo_sk = c_current_cdemo_sk 16 | AND hd_demo_sk = c_current_hdemo_sk 17 | AND sr_cdemo_sk = cd_demo_sk 18 | ORDER BY c_customer_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q93.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ss_customer_sk, 3 | sum(act_sales) sumsales 4 | FROM (SELECT 5 | ss_item_sk, 6 | ss_ticket_number, 7 | ss_customer_sk, 8 | CASE WHEN sr_return_quantity IS NOT NULL 9 | THEN (ss_quantity - sr_return_quantity) * ss_sales_price 10 | ELSE (ss_quantity * ss_sales_price) END act_sales 11 | FROM store_sales 12 | LEFT OUTER JOIN store_returns 13 | ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number) 14 | , 15 | reason 16 | WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t 17 | GROUP BY ss_customer_sk 18 | ORDER BY sumsales, ss_customer_sk 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q92.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount ` 2 | FROM web_sales, item, date_dim 3 | WHERE i_manufact_id = 350 4 | AND i_item_sk = ws_item_sk 5 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) 6 | AND d_date_sk = ws_sold_date_sk 7 | AND ws_ext_discount_amt > 8 | ( 9 | SELECT 1.3 * avg(ws_ext_discount_amt) 10 | FROM web_sales, date_dim 11 | WHERE ws_item_sk = i_item_sk 12 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days) 13 | AND d_date_sk = ws_sold_date_sk 14 | ) 15 | ORDER BY sum(ws_ext_discount_amt) 16 | LIMIT 100 17 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q6.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.ca_state state, 3 | count(*) cnt 4 | FROM 5 | customer_address a, customer c, store_sales s, date_dim d, item i 6 | WHERE a.ca_address_sk = c.c_current_addr_sk 7 | AND c.c_customer_sk = s.ss_customer_sk 8 | AND s.ss_sold_date_sk = d.d_date_sk 9 | AND s.ss_item_sk = i.i_item_sk 10 | AND d.d_month_seq = 11 | (SELECT DISTINCT (d_month_seq) 12 | FROM date_dim 13 | WHERE d_year = 2000 AND d_moy = 1) 14 | AND i.i_current_price > 1.2 * 15 | (SELECT avg(j.i_current_price) 16 | FROM item j 17 | WHERE j.i_category = i.i_category) 18 | GROUP BY a.ca_state 19 | HAVING count(*) >= 10 20 | ORDER BY cnt 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | # Notice: 11 | This library has been contributed to https://github.com/apache/submarine as a sub-module, and that module can still be used individually. 12 | 13 | The project here will no longer be updated. 14 | 15 | If you have any questions please go to 16 | 17 | https://github.com/apache/submarine/tree/master/docs/submarine-security/spark/README.md 18 | 19 | to learn how to use and give feedback to the apache submarine community by following https://submarine.apache.org/community/contributors.html 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q45.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_zip, 3 | ca_city, 4 | sum(ws_sales_price) 5 | FROM web_sales, customer, customer_address, date_dim, item 6 | WHERE ws_bill_customer_sk = c_customer_sk 7 | AND c_current_addr_sk = ca_address_sk 8 | AND ws_item_sk = i_item_sk 9 | AND (substr(ca_zip, 1, 5) IN 10 | ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792') 11 | OR 12 | i_item_id IN (SELECT i_item_id 13 | FROM item 14 | WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) 15 | ) 16 | ) 17 | AND ws_sold_date_sk = d_date_sk 18 | AND d_qoy = 2 AND d_year = 2001 19 | GROUP BY ca_zip, ca_city 20 | ORDER BY ca_zip, ca_city 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q19.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | i_manufact_id, 5 | i_manufact, 6 | sum(ss_ext_sales_price) ext_price 7 | FROM date_dim, store_sales, item, customer, customer_address, store 8 | WHERE d_date_sk = ss_sold_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND i_manager_id = 8 11 | AND d_moy = 11 12 | AND d_year = 1998 13 | AND ss_customer_sk = c_customer_sk 14 | AND c_current_addr_sk = ca_address_sk 15 | AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5) 16 | AND ss_store_sk = s_store_sk 17 | GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact 18 | ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q27.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | s_state, 4 | grouping(s_state) g_state, 5 | avg(ss_quantity) agg1, 6 | avg(ss_list_price) agg2, 7 | avg(ss_coupon_amt) agg3, 8 | avg(ss_sales_price) agg4 9 | FROM store_sales, customer_demographics, date_dim, store, item 10 | WHERE ss_sold_date_sk = d_date_sk AND 11 | ss_item_sk = i_item_sk AND 12 | ss_store_sk = s_store_sk AND 13 | ss_cdemo_sk = cd_demo_sk AND 14 | cd_gender = 'M' AND 15 | cd_marital_status = 'S' AND 16 | cd_education_status = 'College' AND 17 | d_year = 2002 AND 18 | s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN') 19 | GROUP BY ROLLUP (i_item_id, s_state) 20 | ORDER BY i_item_id, s_state 21 | LIMIT 100 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q20.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(cs_ext_sales_price) AS itemrevenue, 7 | sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM catalog_sales, item, date_dim 11 | WHERE cs_item_sk = i_item_sk 12 | AND i_category IN ('Sports', 'Books', 'Home') 13 | AND cs_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 15 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 16 | GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price 17 | ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio 18 | LIMIT 100 19 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q98.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(ss_ext_sales_price) AS itemrevenue, 7 | sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM 11 | store_sales, item, date_dim 12 | WHERE 13 | ss_item_sk = i_item_sk 14 | AND i_category IN ('Sports', 'Books', 'Home') 15 | AND ss_sold_date_sk = d_date_sk 16 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 18 | GROUP BY 19 | i_item_id, i_item_desc, i_category, i_class, i_current_price 20 | ORDER BY 21 | i_category, i_class, i_item_id, i_item_desc, revenueratio 22 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q1.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | ( SELECT 3 | sr_customer_sk AS ctr_customer_sk, 4 | sr_store_sk AS ctr_store_sk, 5 | sum(sr_return_amt) AS ctr_total_return 6 | FROM store_returns, date_dim 7 | WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000 8 | GROUP BY sr_customer_sk, sr_store_sk) 9 | SELECT c_customer_id 10 | FROM customer_total_return ctr1, store, customer 11 | WHERE ctr1.ctr_total_return > 12 | (SELECT avg(ctr_total_return) * 1.2 13 | FROM customer_total_return ctr2 14 | WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk) 15 | AND s_store_sk = ctr1.ctr_store_sk 16 | AND s_state = 'TN' 17 | AND ctr1.ctr_customer_sk = c_customer_sk 18 | ORDER BY c_customer_id 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q12.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | i_category, 4 | i_class, 5 | i_current_price, 6 | sum(ws_ext_sales_price) AS itemrevenue, 7 | sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price)) 8 | OVER 9 | (PARTITION BY i_class) AS revenueratio 10 | FROM 11 | web_sales, item, date_dim 12 | WHERE 13 | ws_item_sk = i_item_sk 14 | AND i_category IN ('Sports', 'Books', 'Home') 15 | AND ws_sold_date_sk = d_date_sk 16 | AND d_date BETWEEN cast('1999-02-22' AS DATE) 17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days) 18 | GROUP BY 19 | i_item_id, i_item_desc, i_category, i_class, i_current_price 20 | ORDER BY 21 | i_category, i_class, i_item_id, i_item_desc, revenueratio 22 | LIMIT 100 23 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q86.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ws_net_paid) AS total_sum, 3 | i_category, 4 | i_class, 5 | grouping(i_category) + grouping(i_class) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(i_category) + grouping(i_class), 9 | CASE WHEN grouping(i_class) = 0 10 | THEN i_category END 11 | ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent 12 | FROM 13 | web_sales, date_dim d1, item 14 | WHERE 15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11 16 | AND d1.d_date_sk = ws_sold_date_sk 17 | AND i_item_sk = ws_item_sk 18 | GROUP BY ROLLUP (i_category, i_class) 19 | ORDER BY 20 | lochierarchy DESC, 21 | CASE WHEN lochierarchy = 0 22 | THEN i_category END, 23 | rank_within_parent 24 | LIMIT 100 25 | -------------------------------------------------------------------------------- /.github/workflows/master.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-18.04 15 | strategy: 16 | matrix: 17 | spark: [ '2.3', '2.4' ] 18 | ranger: [ '1.0', '1.1', '1.2', '2.0' ] 19 | name: Build with Spark ${{ matrix.spark }} / Ranger ${{ matrix.ranger }} 20 | 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Set up JDK 1.8 24 | uses: actions/setup-java@v1 25 | with: 26 | version: 1.8 27 | - name: Build with Maven 28 | run: mvn clean install -Pspark-${{ matrix.spark }} -Pranger-${{ matrix.ranger }} -Dmaven.javadoc.skip=true -B -V 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q36.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin, 3 | i_category, 4 | i_class, 5 | grouping(i_category) + grouping(i_class) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(i_category) + grouping(i_class), 9 | CASE WHEN grouping(i_class) = 0 10 | THEN i_category END 11 | ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent 12 | FROM 13 | store_sales, date_dim d1, item, store 14 | WHERE 15 | d1.d_year = 2001 16 | AND d1.d_date_sk = ss_sold_date_sk 17 | AND i_item_sk = ss_item_sk 18 | AND s_store_sk = ss_store_sk 19 | AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN') 20 | GROUP BY ROLLUP (i_category, i_class) 21 | ORDER BY 22 | lochierarchy DESC 23 | , CASE WHEN lochierarchy = 0 24 | THEN i_category END 25 | , rank_within_parent 26 | LIMIT 100 27 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q90.sql: -------------------------------------------------------------------------------- 1 | SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio 2 | FROM (SELECT count(*) amc 3 | FROM web_sales, household_demographics, time_dim, web_page 4 | WHERE ws_sold_time_sk = time_dim.t_time_sk 5 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk 6 | AND ws_web_page_sk = web_page.wp_web_page_sk 7 | AND time_dim.t_hour BETWEEN 8 AND 8 + 1 8 | AND household_demographics.hd_dep_count = 6 9 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) at, 10 | (SELECT count(*) pmc 11 | FROM web_sales, household_demographics, time_dim, web_page 12 | WHERE ws_sold_time_sk = time_dim.t_time_sk 13 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk 14 | AND ws_web_page_sk = web_page.wp_web_page_sk 15 | AND time_dim.t_hour BETWEEN 19 AND 19 + 1 16 | AND household_demographics.hd_dep_count = 6 17 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt 18 | ORDER BY am_pm_ratio 19 | LIMIT 100 20 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q94.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | count(DISTINCT ws_order_number) AS `order count `, 3 | sum(ws_ext_ship_cost) AS `total shipping cost `, 4 | sum(ws_net_profit) AS `total net profit ` 5 | FROM 6 | web_sales ws1, date_dim, customer_address, web_site 7 | WHERE 8 | d_date BETWEEN '1999-02-01' AND 9 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 days) 10 | AND ws1.ws_ship_date_sk = d_date_sk 11 | AND ws1.ws_ship_addr_sk = ca_address_sk 12 | AND ca_state = 'IL' 13 | AND ws1.ws_web_site_sk = web_site_sk 14 | AND web_company_name = 'pri' 15 | AND EXISTS(SELECT * 16 | FROM web_sales ws2 17 | WHERE ws1.ws_order_number = ws2.ws_order_number 18 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 19 | AND NOT EXISTS(SELECT * 20 | FROM web_returns wr1 21 | WHERE ws1.ws_order_number = wr1.wr_order_number) 22 | ORDER BY count(DISTINCT ws_order_number) 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q91.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cc_call_center_id Call_Center, 3 | cc_name Call_Center_Name, 4 | cc_manager Manager, 5 | sum(cr_net_loss) Returns_Loss 6 | FROM 7 | call_center, catalog_returns, date_dim, customer, customer_address, 8 | customer_demographics, household_demographics 9 | WHERE 10 | cr_call_center_sk = cc_call_center_sk 11 | AND cr_returned_date_sk = d_date_sk 12 | AND cr_returning_customer_sk = c_customer_sk 13 | AND cd_demo_sk = c_current_cdemo_sk 14 | AND hd_demo_sk = c_current_hdemo_sk 15 | AND ca_address_sk = c_current_addr_sk 16 | AND d_year = 1998 17 | AND d_moy = 11 18 | AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown') 19 | OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree')) 20 | AND hd_buy_potential LIKE 'Unknown%' 21 | AND ca_gmt_offset = -7 22 | GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status 23 | ORDER BY sum(cr_net_loss) DESC 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q65.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | i_item_desc, 4 | sc.revenue, 5 | i_current_price, 6 | i_wholesale_cost, 7 | i_brand 8 | FROM store, item, 9 | (SELECT 10 | ss_store_sk, 11 | avg(revenue) AS ave 12 | FROM 13 | (SELECT 14 | ss_store_sk, 15 | ss_item_sk, 16 | sum(ss_sales_price) AS revenue 17 | FROM store_sales, date_dim 18 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 19 | GROUP BY ss_store_sk, ss_item_sk) sa 20 | GROUP BY ss_store_sk) sb, 21 | (SELECT 22 | ss_store_sk, 23 | ss_item_sk, 24 | sum(ss_sales_price) AS revenue 25 | FROM store_sales, date_dim 26 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11 27 | GROUP BY ss_store_sk, ss_item_sk) sc 28 | WHERE sb.ss_store_sk = sc.ss_store_sk AND 29 | sc.revenue <= 0.1 * sb.ave AND 30 | s_store_sk = sc.ss_store_sk AND 31 | i_item_sk = sc.ss_item_sk 32 | ORDER BY s_store_name, i_item_desc 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q79.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | substr(s_city, 1, 30), 5 | ss_ticket_number, 6 | amt, 7 | profit 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | store.s_city, 13 | sum(ss_coupon_amt) amt, 14 | sum(ss_net_profit) profit 15 | FROM store_sales, date_dim, store, household_demographics 16 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 17 | AND store_sales.ss_store_sk = store.s_store_sk 18 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 19 | AND (household_demographics.hd_dep_count = 6 OR 20 | household_demographics.hd_vehicle_count > 2) 21 | AND date_dim.d_dow = 1 22 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 23 | AND store.s_number_employees BETWEEN 200 AND 295 24 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer 25 | WHERE ss_customer_sk = c_customer_sk 26 | ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit 27 | LIMIT 100 28 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q40.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | w_state, 3 | i_item_id, 4 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) 5 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0) 6 | ELSE 0 END) AS sales_before, 7 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) 8 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0) 9 | ELSE 0 END) AS sales_after 10 | FROM 11 | catalog_sales 12 | LEFT OUTER JOIN catalog_returns ON 13 | (cs_order_number = cr_order_number 14 | AND cs_item_sk = cr_item_sk) 15 | , warehouse, item, date_dim 16 | WHERE 17 | i_current_price BETWEEN 0.99 AND 1.49 18 | AND i_item_sk = cs_item_sk 19 | AND cs_warehouse_sk = w_warehouse_sk 20 | AND cs_sold_date_sk = d_date_sk 21 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) 22 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) 23 | GROUP BY w_state, i_item_id 24 | ORDER BY w_state, i_item_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q87.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM ((SELECT DISTINCT 3 | c_last_name, 4 | c_first_name, 5 | d_date 6 | FROM store_sales, date_dim, customer 7 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 8 | AND store_sales.ss_customer_sk = customer.c_customer_sk 9 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 10 | EXCEPT 11 | (SELECT DISTINCT 12 | c_last_name, 13 | c_first_name, 14 | d_date 15 | FROM catalog_sales, date_dim, customer 16 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 17 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 18 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 19 | EXCEPT 20 | (SELECT DISTINCT 21 | c_last_name, 22 | c_first_name, 23 | d_date 24 | FROM web_sales, date_dim, customer 25 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk 26 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk 27 | AND d_month_seq BETWEEN 1200 AND 1200 + 11) 28 | ) cool_cust 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q67.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM 3 | (SELECT 4 | i_category, 5 | i_class, 6 | i_brand, 7 | i_product_name, 8 | d_year, 9 | d_qoy, 10 | d_moy, 11 | s_store_id, 12 | sumsales, 13 | rank() 14 | OVER (PARTITION BY i_category 15 | ORDER BY sumsales DESC) rk 16 | FROM 17 | (SELECT 18 | i_category, 19 | i_class, 20 | i_brand, 21 | i_product_name, 22 | d_year, 23 | d_qoy, 24 | d_moy, 25 | s_store_id, 26 | sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales 27 | FROM store_sales, date_dim, store, item 28 | WHERE ss_sold_date_sk = d_date_sk 29 | AND ss_item_sk = i_item_sk 30 | AND ss_store_sk = s_store_sk 31 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 32 | GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy, 33 | d_moy, s_store_id)) dw1) dw2 34 | WHERE rk <= 100 35 | ORDER BY 36 | i_category, i_class, i_brand, i_product_name, d_year, 37 | d_qoy, d_moy, s_store_id, sumsales, rk 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q21.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM ( 3 | SELECT 4 | w_warehouse_name, 5 | i_item_id, 6 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE)) 7 | THEN inv_quantity_on_hand 8 | ELSE 0 END) AS inv_before, 9 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE)) 10 | THEN inv_quantity_on_hand 11 | ELSE 0 END) AS inv_after 12 | FROM inventory, warehouse, item, date_dim 13 | WHERE i_current_price BETWEEN 0.99 AND 1.49 14 | AND i_item_sk = inv_item_sk 15 | AND inv_warehouse_sk = w_warehouse_sk 16 | AND inv_date_sk = d_date_sk 17 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days) 18 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days) 19 | GROUP BY w_warehouse_name, i_item_id) x 20 | WHERE (CASE WHEN inv_before > 0 21 | THEN inv_after / inv_before 22 | ELSE NULL 23 | END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0 24 | ORDER BY w_warehouse_name, i_item_id 25 | LIMIT 100 26 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q24a.sql: -------------------------------------------------------------------------------- 1 | WITH ssales AS 2 | (SELECT 3 | c_last_name, 4 | c_first_name, 5 | s_store_name, 6 | ca_state, 7 | s_state, 8 | i_color, 9 | i_current_price, 10 | i_manager_id, 11 | i_units, 12 | i_size, 13 | sum(ss_net_paid) netpaid 14 | FROM store_sales, store_returns, store, item, customer, customer_address 15 | WHERE ss_ticket_number = sr_ticket_number 16 | AND ss_item_sk = sr_item_sk 17 | AND ss_customer_sk = c_customer_sk 18 | AND ss_item_sk = i_item_sk 19 | AND ss_store_sk = s_store_sk 20 | AND c_birth_country = upper(ca_country) 21 | AND s_zip = ca_zip 22 | AND s_market_id = 8 23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color, 24 | i_current_price, i_manager_id, i_units, i_size) 25 | SELECT 26 | c_last_name, 27 | c_first_name, 28 | s_store_name, 29 | sum(netpaid) paid 30 | FROM ssales 31 | WHERE i_color = 'pale' 32 | GROUP BY c_last_name, c_first_name, s_store_name 33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) 34 | FROM ssales) 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q24b.sql: -------------------------------------------------------------------------------- 1 | WITH ssales AS 2 | (SELECT 3 | c_last_name, 4 | c_first_name, 5 | s_store_name, 6 | ca_state, 7 | s_state, 8 | i_color, 9 | i_current_price, 10 | i_manager_id, 11 | i_units, 12 | i_size, 13 | sum(ss_net_paid) netpaid 14 | FROM store_sales, store_returns, store, item, customer, customer_address 15 | WHERE ss_ticket_number = sr_ticket_number 16 | AND ss_item_sk = sr_item_sk 17 | AND ss_customer_sk = c_customer_sk 18 | AND ss_item_sk = i_item_sk 19 | AND ss_store_sk = s_store_sk 20 | AND c_birth_country = upper(ca_country) 21 | AND s_zip = ca_zip 22 | AND s_market_id = 8 23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, 24 | i_color, i_current_price, i_manager_id, i_units, i_size) 25 | SELECT 26 | c_last_name, 27 | c_first_name, 28 | s_store_name, 29 | sum(netpaid) paid 30 | FROM ssales 31 | WHERE i_color = 'chiffon' 32 | GROUP BY c_last_name, c_first_name, s_store_name 33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid) 34 | FROM ssales) 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q16.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | count(DISTINCT cs_order_number) AS `order count `, 3 | sum(cs_ext_ship_cost) AS `total shipping cost `, 4 | sum(cs_net_profit) AS `total net profit ` 5 | FROM 6 | catalog_sales cs1, date_dim, customer_address, call_center 7 | WHERE 8 | d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days) 9 | AND cs1.cs_ship_date_sk = d_date_sk 10 | AND cs1.cs_ship_addr_sk = ca_address_sk 11 | AND ca_state = 'GA' 12 | AND cs1.cs_call_center_sk = cc_call_center_sk 13 | AND cc_county IN 14 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') 15 | AND EXISTS(SELECT * 16 | FROM catalog_sales cs2 17 | WHERE cs1.cs_order_number = cs2.cs_order_number 18 | AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) 19 | AND NOT EXISTS(SELECT * 20 | FROM catalog_returns cr1 21 | WHERE cs1.cs_order_number = cr1.cr_order_number) 22 | ORDER BY count(DISTINCT cs_order_number) 23 | LIMIT 100 24 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkAccessControlException.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | class SparkAccessControlException(msg: String) extends Exception(msg) 21 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q97.sql: -------------------------------------------------------------------------------- 1 | WITH ssci AS ( 2 | SELECT 3 | ss_customer_sk customer_sk, 4 | ss_item_sk item_sk 5 | FROM store_sales, date_dim 6 | WHERE ss_sold_date_sk = d_date_sk 7 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 8 | GROUP BY ss_customer_sk, ss_item_sk), 9 | csci AS ( 10 | SELECT 11 | cs_bill_customer_sk customer_sk, 12 | cs_item_sk item_sk 13 | FROM catalog_sales, date_dim 14 | WHERE cs_sold_date_sk = d_date_sk 15 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 16 | GROUP BY cs_bill_customer_sk, cs_item_sk) 17 | SELECT 18 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL 19 | THEN 1 20 | ELSE 0 END) store_only, 21 | sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL 22 | THEN 1 23 | ELSE 0 END) catalog_only, 24 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL 25 | THEN 1 26 | ELSE 0 END) store_and_catalog 27 | FROM ssci 28 | FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk 29 | AND ssci.item_sk = csci.item_sk) 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q95.sql: -------------------------------------------------------------------------------- 1 | WITH ws_wh AS 2 | (SELECT 3 | ws1.ws_order_number, 4 | ws1.ws_warehouse_sk wh1, 5 | ws2.ws_warehouse_sk wh2 6 | FROM web_sales ws1, web_sales ws2 7 | WHERE ws1.ws_order_number = ws2.ws_order_number 8 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) 9 | SELECT 10 | count(DISTINCT ws_order_number) AS `order count `, 11 | sum(ws_ext_ship_cost) AS `total shipping cost `, 12 | sum(ws_net_profit) AS `total net profit ` 13 | FROM 14 | web_sales ws1, date_dim, customer_address, web_site 15 | WHERE 16 | d_date BETWEEN '1999-02-01' AND 17 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY) 18 | AND ws1.ws_ship_date_sk = d_date_sk 19 | AND ws1.ws_ship_addr_sk = ca_address_sk 20 | AND ca_state = 'IL' 21 | AND ws1.ws_web_site_sk = web_site_sk 22 | AND web_company_name = 'pri' 23 | AND ws1.ws_order_number IN (SELECT ws_order_number 24 | FROM ws_wh) 25 | AND ws1.ws_order_number IN (SELECT wr_order_number 26 | FROM web_returns, ws_wh 27 | WHERE wr_order_number = ws_wh.ws_order_number) 28 | ORDER BY count(DISTINCT ws_order_number) 29 | LIMIT 100 30 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q25.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_store_id, 5 | s_store_name, 6 | sum(ss_net_profit) AS store_sales_profit, 7 | sum(sr_net_loss) AS store_returns_loss, 8 | sum(cs_net_profit) AS catalog_sales_profit 9 | FROM 10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, 11 | store, item 12 | WHERE 13 | d1.d_moy = 4 14 | AND d1.d_year = 2001 15 | AND d1.d_date_sk = ss_sold_date_sk 16 | AND i_item_sk = ss_item_sk 17 | AND s_store_sk = ss_store_sk 18 | AND ss_customer_sk = sr_customer_sk 19 | AND ss_item_sk = sr_item_sk 20 | AND ss_ticket_number = sr_ticket_number 21 | AND sr_returned_date_sk = d2.d_date_sk 22 | AND d2.d_moy BETWEEN 4 AND 10 23 | AND d2.d_year = 2001 24 | AND sr_customer_sk = cs_bill_customer_sk 25 | AND sr_item_sk = cs_item_sk 26 | AND cs_sold_date_sk = d3.d_date_sk 27 | AND d3.d_moy BETWEEN 4 AND 10 28 | AND d3.d_year = 2001 29 | GROUP BY 30 | i_item_id, i_item_desc, s_store_id, s_store_name 31 | ORDER BY 32 | i_item_id, i_item_desc, s_store_id, s_store_name 33 | LIMIT 100 -------------------------------------------------------------------------------- /src/test/resources/tpcds/q29.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_store_id, 5 | s_store_name, 6 | sum(ss_quantity) AS store_sales_quantity, 7 | sum(sr_return_quantity) AS store_returns_quantity, 8 | sum(cs_quantity) AS catalog_sales_quantity 9 | FROM 10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, 11 | date_dim d3, store, item 12 | WHERE 13 | d1.d_moy = 9 14 | AND d1.d_year = 1999 15 | AND d1.d_date_sk = ss_sold_date_sk 16 | AND i_item_sk = ss_item_sk 17 | AND s_store_sk = ss_store_sk 18 | AND ss_customer_sk = sr_customer_sk 19 | AND ss_item_sk = sr_item_sk 20 | AND ss_ticket_number = sr_ticket_number 21 | AND sr_returned_date_sk = d2.d_date_sk 22 | AND d2.d_moy BETWEEN 9 AND 9 + 3 23 | AND d2.d_year = 1999 24 | AND sr_customer_sk = cs_bill_customer_sk 25 | AND sr_item_sk = cs_item_sk 26 | AND cs_sold_date_sk = d3.d_date_sk 27 | AND d3.d_year IN (1999, 1999 + 1, 1999 + 2) 28 | GROUP BY 29 | i_item_id, i_item_desc, s_store_id, s_store_name 30 | ORDER BY 31 | i_item_id, i_item_desc, s_store_id, s_store_name 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/authorizer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark 19 | 20 | import org.apache.spark.sql.SparkSessionExtensions 21 | 22 | package object authorizer { 23 | 24 | type Extensions = SparkSessionExtensions => Unit 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q38.sql: -------------------------------------------------------------------------------- 1 | SELECT count(*) 2 | FROM ( 3 | SELECT DISTINCT 4 | c_last_name, 5 | c_first_name, 6 | d_date 7 | FROM store_sales, date_dim, customer 8 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 9 | AND store_sales.ss_customer_sk = customer.c_customer_sk 10 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 11 | INTERSECT 12 | SELECT DISTINCT 13 | c_last_name, 14 | c_first_name, 15 | d_date 16 | FROM catalog_sales, date_dim, customer 17 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk 18 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk 19 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 20 | INTERSECT 21 | SELECT DISTINCT 22 | c_last_name, 23 | c_first_name, 24 | d_date 25 | FROM web_sales, date_dim, customer 26 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk 27 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk 28 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 29 | ) hot_cust 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkObjectType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | object SparkObjectType extends Enumeration { 21 | type SparkObjectType = Value 22 | 23 | val NONE, DATABASE, TABLE, VIEW, COLUMN, FUNCTION, URI = Value 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivObjectActionType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | object SparkPrivObjectActionType extends Enumeration { 21 | type SparkPrivObjectActionType = Value 22 | val OTHER, INSERT, INSERT_OVERWRITE = Value 23 | } 24 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q43.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | s_store_id, 4 | sum(CASE WHEN (d_day_name = 'Sunday') 5 | THEN ss_sales_price 6 | ELSE NULL END) sun_sales, 7 | sum(CASE WHEN (d_day_name = 'Monday') 8 | THEN ss_sales_price 9 | ELSE NULL END) mon_sales, 10 | sum(CASE WHEN (d_day_name = 'Tuesday') 11 | THEN ss_sales_price 12 | ELSE NULL END) tue_sales, 13 | sum(CASE WHEN (d_day_name = 'Wednesday') 14 | THEN ss_sales_price 15 | ELSE NULL END) wed_sales, 16 | sum(CASE WHEN (d_day_name = 'Thursday') 17 | THEN ss_sales_price 18 | ELSE NULL END) thu_sales, 19 | sum(CASE WHEN (d_day_name = 'Friday') 20 | THEN ss_sales_price 21 | ELSE NULL END) fri_sales, 22 | sum(CASE WHEN (d_day_name = 'Saturday') 23 | THEN ss_sales_price 24 | ELSE NULL END) sat_sales 25 | FROM date_dim, store_sales, store 26 | WHERE d_date_sk = ss_sold_date_sk AND 27 | s_store_sk = ss_store_sk AND 28 | s_gmt_offset = -5 AND 29 | d_year = 2000 30 | GROUP BY s_store_name, s_store_id 31 | ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales, 32 | thu_sales, fri_sales, sat_sales 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /docs/installation-addons.md: -------------------------------------------------------------------------------- 1 | # Installation Addons 2 | 3 | We have listed some tips and known problems about this library you can consider. 4 | 5 | ## Ranger Admin does not list databases, tables and columns when you create or edit policies. 6 | 7 | Because the Ranger Admin does use Hadoop 3 Hive libraries, listing databases, tables and columns do not work in Ranger Admin. To configure listing capabality put below files in $RANGER_HOME/ews/webapp/WEB-INF/lib/ : 8 | 9 | - hive-exec-1.2.1.spark2.jar (Hadoop 3 compatible version needed. You can download from [here](https://github.com/MobinRanjbar/hive-exec-jar/releases). 10 | - hive-jdbc-1.2.1.spark2.jar (Available on Spark Jars folder) 11 | - hive-metastore-1.2.1.spark2.jar (Available on Spark Jars folder) 12 | - hive-service-1.2.1.jar (Download from internet) 13 | 14 | and ranger-admin restart. 15 | 16 | ## The dependency issues in Apache Ranger 2.X.X 17 | 18 | ### NoClassDefFoundError: com.kstruct.gethostname4j.Hostname 19 | 20 | To resolve it, place 'gethostname4j.jar' into $SPARK_HOME/jars. 21 | 22 | ### NoClassDefFoundError: com.sun.jna.Platform 23 | 24 | To resolve it, place 'jna-5.5.0.jar' into $SPARK_HOME/jars. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivilegeObjectType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | object SparkPrivilegeObjectType extends Enumeration { 21 | type SparkPrivilegeObjectType = Value 22 | val DATABASE, TABLE_OR_VIEW, FUNCTION, DFS_URI = Value 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkAccessType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | object SparkAccessType extends Enumeration { 21 | type SparkAccessType = Value 22 | 23 | val NONE, CREATE, ALTER, DROP, SELECT, UPDATE, USE, READ, WRITE, ALL, ADMIN = Value 24 | } 25 | 26 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one or more 3 | # contributor license agreements. See the NOTICE file distributed with 4 | # this work for additional information regarding copyright ownership. 5 | # The ASF licenses this file to You under the Apache License, Version 2.0 6 | # (the "License"); you may not use this file except in compliance with 7 | # the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Set everything to be logged to the console 19 | log4j.rootCategory=FATAL, console 20 | log4j.appender.console=org.apache.log4j.ConsoleAppender 21 | log4j.appender.console.target=System.err 22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n -------------------------------------------------------------------------------- /src/test/resources/tpcds/q18.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | ca_country, 4 | ca_state, 5 | ca_county, 6 | avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1, 7 | avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2, 8 | avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3, 9 | avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4, 10 | avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5, 11 | avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6, 12 | avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7 13 | FROM catalog_sales, customer_demographics cd1, 14 | customer_demographics cd2, customer, customer_address, date_dim, item 15 | WHERE cs_sold_date_sk = d_date_sk AND 16 | cs_item_sk = i_item_sk AND 17 | cs_bill_cdemo_sk = cd1.cd_demo_sk AND 18 | cs_bill_customer_sk = c_customer_sk AND 19 | cd1.cd_gender = 'F' AND 20 | cd1.cd_education_status = 'Unknown' AND 21 | c_current_cdemo_sk = cd2.cd_demo_sk AND 22 | c_current_addr_sk = ca_address_sk AND 23 | c_birth_month IN (1, 6, 8, 9, 12, 2) AND 24 | d_year = 1998 AND 25 | ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS') 26 | GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county) 27 | ORDER BY ca_country, ca_state, ca_county, i_item_id 28 | LIMIT 100 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q89.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM ( 3 | SELECT 4 | i_category, 5 | i_class, 6 | i_brand, 7 | s_store_name, 8 | s_company_name, 9 | d_moy, 10 | sum(ss_sales_price) sum_sales, 11 | avg(sum(ss_sales_price)) 12 | OVER 13 | (PARTITION BY i_category, i_brand, s_store_name, s_company_name) 14 | avg_monthly_sales 15 | FROM item, store_sales, date_dim, store 16 | WHERE ss_item_sk = i_item_sk AND 17 | ss_sold_date_sk = d_date_sk AND 18 | ss_store_sk = s_store_sk AND 19 | d_year IN (1999) AND 20 | ((i_category IN ('Books', 'Electronics', 'Sports') AND 21 | i_class IN ('computers', 'stereo', 'football')) 22 | OR (i_category IN ('Men', 'Jewelry', 'Women') AND 23 | i_class IN ('shirts', 'birdal', 'dresses'))) 24 | GROUP BY i_category, i_class, i_brand, 25 | s_store_name, s_company_name, d_moy) tmp1 26 | WHERE CASE WHEN (avg_monthly_sales <> 0) 27 | THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) 28 | ELSE NULL END > 0.1 29 | ORDER BY sum_sales - avg_monthly_sales, s_store_name 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q70.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sum(ss_net_profit) AS total_sum, 3 | s_state, 4 | s_county, 5 | grouping(s_state) + grouping(s_county) AS lochierarchy, 6 | rank() 7 | OVER ( 8 | PARTITION BY grouping(s_state) + grouping(s_county), 9 | CASE WHEN grouping(s_county) = 0 10 | THEN s_state END 11 | ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent 12 | FROM 13 | store_sales, date_dim d1, store 14 | WHERE 15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11 16 | AND d1.d_date_sk = ss_sold_date_sk 17 | AND s_store_sk = ss_store_sk 18 | AND s_state IN 19 | (SELECT s_state 20 | FROM 21 | (SELECT 22 | s_state AS s_state, 23 | rank() 24 | OVER (PARTITION BY s_state 25 | ORDER BY sum(ss_net_profit) DESC) AS ranking 26 | FROM store_sales, store, date_dim 27 | WHERE d_month_seq BETWEEN 1200 AND 1200 + 11 28 | AND d_date_sk = ss_sold_date_sk 29 | AND s_store_sk = ss_store_sk 30 | GROUP BY s_state) tmp1 31 | WHERE ranking <= 5) 32 | GROUP BY ROLLUP (s_state, s_county) 33 | ORDER BY 34 | lochierarchy DESC 35 | , CASE WHEN lochierarchy = 0 36 | THEN s_state END 37 | , rank_within_parent 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkAuditHandler.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import org.apache.ranger.plugin.audit.RangerDefaultAuditHandler 21 | 22 | class RangerSparkAuditHandler extends RangerDefaultAuditHandler { 23 | 24 | // TODO(Kent Yao): Implementing meaningfully audit functions 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q73.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | c_salutation, 5 | c_preferred_cust_flag, 6 | ss_ticket_number, 7 | cnt 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | count(*) cnt 13 | FROM store_sales, date_dim, store, household_demographics 14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 15 | AND store_sales.ss_store_sk = store.s_store_sk 16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 17 | AND date_dim.d_dom BETWEEN 1 AND 2 18 | AND (household_demographics.hd_buy_potential = '>10000' OR 19 | household_demographics.hd_buy_potential = 'unknown') 20 | AND household_demographics.hd_vehicle_count > 0 21 | AND CASE WHEN household_demographics.hd_vehicle_count > 0 22 | THEN 23 | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 24 | ELSE NULL END > 1 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County') 27 | GROUP BY ss_ticket_number, ss_customer_sk) dj, customer 28 | WHERE ss_customer_sk = c_customer_sk 29 | AND cnt BETWEEN 1 AND 5 30 | ORDER BY cnt DESC 31 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RangerSparkMasking.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.plans.logical 19 | 20 | import org.apache.spark.sql.catalyst.expressions.Attribute 21 | 22 | /** 23 | * A marker [[LogicalPlan]] for column data masking 24 | */ 25 | case class RangerSparkMasking(child: LogicalPlan) extends UnaryNode { 26 | override def output: Seq[Attribute] = child.output 27 | } 28 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q30.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | (SELECT 3 | wr_returning_customer_sk AS ctr_customer_sk, 4 | ca_state AS ctr_state, 5 | sum(wr_return_amt) AS ctr_total_return 6 | FROM web_returns, date_dim, customer_address 7 | WHERE wr_returned_date_sk = d_date_sk 8 | AND d_year = 2002 9 | AND wr_returning_addr_sk = ca_address_sk 10 | GROUP BY wr_returning_customer_sk, ca_state) 11 | SELECT 12 | c_customer_id, 13 | c_salutation, 14 | c_first_name, 15 | c_last_name, 16 | c_preferred_cust_flag, 17 | c_birth_day, 18 | c_birth_month, 19 | c_birth_year, 20 | c_birth_country, 21 | c_login, 22 | c_email_address, 23 | c_last_review_date, 24 | ctr_total_return 25 | FROM customer_total_return ctr1, customer_address, customer 26 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 27 | FROM customer_total_return ctr2 28 | WHERE ctr1.ctr_state = ctr2.ctr_state) 29 | AND ca_address_sk = c_current_addr_sk 30 | AND ca_state = 'GA' 31 | AND ctr1.ctr_customer_sk = c_customer_sk 32 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag 33 | , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address 34 | , c_last_review_date, ctr_total_return 35 | LIMIT 100 36 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q61.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | promotions, 3 | total, 4 | cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100 5 | FROM 6 | (SELECT sum(ss_ext_sales_price) promotions 7 | FROM store_sales, store, promotion, date_dim, customer, customer_address, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_store_sk = s_store_sk 10 | AND ss_promo_sk = p_promo_sk 11 | AND ss_customer_sk = c_customer_sk 12 | AND ca_address_sk = c_current_addr_sk 13 | AND ss_item_sk = i_item_sk 14 | AND ca_gmt_offset = -5 15 | AND i_category = 'Jewelry' 16 | AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y') 17 | AND s_gmt_offset = -5 18 | AND d_year = 1998 19 | AND d_moy = 11) promotional_sales, 20 | (SELECT sum(ss_ext_sales_price) total 21 | FROM store_sales, store, date_dim, customer, customer_address, item 22 | WHERE ss_sold_date_sk = d_date_sk 23 | AND ss_store_sk = s_store_sk 24 | AND ss_customer_sk = c_customer_sk 25 | AND ca_address_sk = c_current_addr_sk 26 | AND ss_item_sk = i_item_sk 27 | AND ca_gmt_offset = -5 28 | AND i_category = 'Jewelry' 29 | AND s_gmt_offset = -5 30 | AND d_year = 1998 31 | AND d_moy = 11) all_sales 32 | ORDER BY promotions, total 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q62.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(w_warehouse_name, 1, 20), 3 | sm_type, 4 | web_name, 5 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30) 6 | THEN 1 7 | ELSE 0 END) AS `30 days `, 8 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND 9 | (ws_ship_date_sk - ws_sold_date_sk <= 60) 10 | THEN 1 11 | ELSE 0 END) AS `31 - 60 days `, 12 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND 13 | (ws_ship_date_sk - ws_sold_date_sk <= 90) 14 | THEN 1 15 | ELSE 0 END) AS `61 - 90 days `, 16 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND 17 | (ws_ship_date_sk - ws_sold_date_sk <= 120) 18 | THEN 1 19 | ELSE 0 END) AS `91 - 120 days `, 20 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120) 21 | THEN 1 22 | ELSE 0 END) AS `>120 days ` 23 | FROM 24 | web_sales, warehouse, ship_mode, web_site, date_dim 25 | WHERE 26 | d_month_seq BETWEEN 1200 AND 1200 + 11 27 | AND ws_ship_date_sk = d_date_sk 28 | AND ws_warehouse_sk = w_warehouse_sk 29 | AND ws_ship_mode_sk = sm_ship_mode_sk 30 | AND ws_web_site_sk = web_site_sk 31 | GROUP BY 32 | substr(w_warehouse_name, 1, 20), sm_type, web_name 33 | ORDER BY 34 | substr(w_warehouse_name, 1, 20), sm_type, web_name 35 | LIMIT 100 36 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q99.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(w_warehouse_name, 1, 20), 3 | sm_type, 4 | cc_name, 5 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30) 6 | THEN 1 7 | ELSE 0 END) AS `30 days `, 8 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND 9 | (cs_ship_date_sk - cs_sold_date_sk <= 60) 10 | THEN 1 11 | ELSE 0 END) AS `31 - 60 days `, 12 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND 13 | (cs_ship_date_sk - cs_sold_date_sk <= 90) 14 | THEN 1 15 | ELSE 0 END) AS `61 - 90 days `, 16 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND 17 | (cs_ship_date_sk - cs_sold_date_sk <= 120) 18 | THEN 1 19 | ELSE 0 END) AS `91 - 120 days `, 20 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120) 21 | THEN 1 22 | ELSE 0 END) AS `>120 days ` 23 | FROM 24 | catalog_sales, warehouse, ship_mode, call_center, date_dim 25 | WHERE 26 | d_month_seq BETWEEN 1200 AND 1200 + 11 27 | AND cs_ship_date_sk = d_date_sk 28 | AND cs_warehouse_sk = w_warehouse_sk 29 | AND cs_ship_mode_sk = sm_ship_mode_sk 30 | AND cs_call_center_sk = cc_call_center_sk 31 | GROUP BY 32 | substr(w_warehouse_name, 1, 20), sm_type, cc_name 33 | ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name 34 | LIMIT 100 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q46.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | ca_city, 5 | bought_city, 6 | ss_ticket_number, 7 | amt, 8 | profit 9 | FROM 10 | (SELECT 11 | ss_ticket_number, 12 | ss_customer_sk, 13 | ca_city bought_city, 14 | sum(ss_coupon_amt) amt, 15 | sum(ss_net_profit) profit 16 | FROM store_sales, date_dim, store, household_demographics, customer_address 17 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 18 | AND store_sales.ss_store_sk = store.s_store_sk 19 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 20 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk 21 | AND (household_demographics.hd_dep_count = 4 OR 22 | household_demographics.hd_vehicle_count = 3) 23 | AND date_dim.d_dow IN (6, 0) 24 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 25 | AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview') 26 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer, 27 | customer_address current_addr 28 | WHERE ss_customer_sk = c_customer_sk 29 | AND customer.c_current_addr_sk = current_addr.ca_address_sk 30 | AND current_addr.ca_city <> bought_city 31 | ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number 32 | LIMIT 100 33 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q68.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | ca_city, 5 | bought_city, 6 | ss_ticket_number, 7 | extended_price, 8 | extended_tax, 9 | list_price 10 | FROM (SELECT 11 | ss_ticket_number, 12 | ss_customer_sk, 13 | ca_city bought_city, 14 | sum(ss_ext_sales_price) extended_price, 15 | sum(ss_ext_list_price) list_price, 16 | sum(ss_ext_tax) extended_tax 17 | FROM store_sales, date_dim, store, household_demographics, customer_address 18 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 19 | AND store_sales.ss_store_sk = store.s_store_sk 20 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 21 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk 22 | AND date_dim.d_dom BETWEEN 1 AND 2 23 | AND (household_demographics.hd_dep_count = 4 OR 24 | household_demographics.hd_vehicle_count = 3) 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_city IN ('Midway', 'Fairview') 27 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, 28 | customer, 29 | customer_address current_addr 30 | WHERE ss_customer_sk = c_customer_sk 31 | AND customer.c_current_addr_sk = current_addr.ca_address_sk 32 | AND current_addr.ca_city <> bought_city 33 | ORDER BY c_last_name, ss_ticket_number 34 | LIMIT 100 35 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q39a.sql: -------------------------------------------------------------------------------- 1 | WITH inv AS 2 | (SELECT 3 | w_warehouse_name, 4 | w_warehouse_sk, 5 | i_item_sk, 6 | d_moy, 7 | stdev, 8 | mean, 9 | CASE mean 10 | WHEN 0 11 | THEN NULL 12 | ELSE stdev / mean END cov 13 | FROM (SELECT 14 | w_warehouse_name, 15 | w_warehouse_sk, 16 | i_item_sk, 17 | d_moy, 18 | stddev_samp(inv_quantity_on_hand) stdev, 19 | avg(inv_quantity_on_hand) mean 20 | FROM inventory, item, warehouse, date_dim 21 | WHERE inv_item_sk = i_item_sk 22 | AND inv_warehouse_sk = w_warehouse_sk 23 | AND inv_date_sk = d_date_sk 24 | AND d_year = 2001 25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo 26 | WHERE CASE mean 27 | WHEN 0 28 | THEN 0 29 | ELSE stdev / mean END > 1) 30 | SELECT 31 | inv1.w_warehouse_sk, 32 | inv1.i_item_sk, 33 | inv1.d_moy, 34 | inv1.mean, 35 | inv1.cov, 36 | inv2.w_warehouse_sk, 37 | inv2.i_item_sk, 38 | inv2.d_moy, 39 | inv2.mean, 40 | inv2.cov 41 | FROM inv inv1, inv inv2 42 | WHERE inv1.i_item_sk = inv2.i_item_sk 43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk 44 | AND inv1.d_moy = 1 45 | AND inv2.d_moy = 1 + 1 46 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov 47 | , inv2.d_moy, inv2.mean, inv2.cov 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q44.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | asceding.rnk, 3 | i1.i_product_name best_performing, 4 | i2.i_product_name worst_performing 5 | FROM (SELECT * 6 | FROM (SELECT 7 | item_sk, 8 | rank() 9 | OVER ( 10 | ORDER BY rank_col ASC) rnk 11 | FROM (SELECT 12 | ss_item_sk item_sk, 13 | avg(ss_net_profit) rank_col 14 | FROM store_sales ss1 15 | WHERE ss_store_sk = 4 16 | GROUP BY ss_item_sk 17 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col 18 | FROM store_sales 19 | WHERE ss_store_sk = 4 20 | AND ss_addr_sk IS NULL 21 | GROUP BY ss_store_sk)) V1) V11 22 | WHERE rnk < 11) asceding, 23 | (SELECT * 24 | FROM (SELECT 25 | item_sk, 26 | rank() 27 | OVER ( 28 | ORDER BY rank_col DESC) rnk 29 | FROM (SELECT 30 | ss_item_sk item_sk, 31 | avg(ss_net_profit) rank_col 32 | FROM store_sales ss1 33 | WHERE ss_store_sk = 4 34 | GROUP BY ss_item_sk 35 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col 36 | FROM store_sales 37 | WHERE ss_store_sk = 4 38 | AND ss_addr_sk IS NULL 39 | GROUP BY ss_store_sk)) V2) V21 40 | WHERE rnk < 11) descending, 41 | item i1, item i2 42 | WHERE asceding.rnk = descending.rnk 43 | AND i1.i_item_sk = asceding.item_sk 44 | AND i2.i_item_sk = descending.item_sk 45 | ORDER BY asceding.rnk 46 | LIMIT 100 47 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q63.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM (SELECT 3 | i_manager_id, 4 | sum(ss_sales_price) sum_sales, 5 | avg(sum(ss_sales_price)) 6 | OVER (PARTITION BY i_manager_id) avg_monthly_sales 7 | FROM item 8 | , store_sales 9 | , date_dim 10 | , store 11 | WHERE ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND ss_store_sk = s_store_sk 14 | AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7, 15 | 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) 16 | AND ((i_category IN ('Books', 'Children', 'Electronics') 17 | AND i_class IN ('personal', 'portable', 'refernece', 'self-help') 18 | AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 19 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 20 | OR (i_category IN ('Women', 'Music', 'Men') 21 | AND i_class IN ('accessories', 'classical', 'fragrances', 'pants') 22 | AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 23 | 'importoamalg #1'))) 24 | GROUP BY i_manager_id, d_moy) tmp1 25 | WHERE CASE WHEN avg_monthly_sales > 0 26 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 27 | ELSE NULL END > 0.1 28 | ORDER BY i_manager_id 29 | , avg_monthly_sales 30 | , sum_sales 31 | LIMIT 100 32 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q39b.sql: -------------------------------------------------------------------------------- 1 | WITH inv AS 2 | (SELECT 3 | w_warehouse_name, 4 | w_warehouse_sk, 5 | i_item_sk, 6 | d_moy, 7 | stdev, 8 | mean, 9 | CASE mean 10 | WHEN 0 11 | THEN NULL 12 | ELSE stdev / mean END cov 13 | FROM (SELECT 14 | w_warehouse_name, 15 | w_warehouse_sk, 16 | i_item_sk, 17 | d_moy, 18 | stddev_samp(inv_quantity_on_hand) stdev, 19 | avg(inv_quantity_on_hand) mean 20 | FROM inventory, item, warehouse, date_dim 21 | WHERE inv_item_sk = i_item_sk 22 | AND inv_warehouse_sk = w_warehouse_sk 23 | AND inv_date_sk = d_date_sk 24 | AND d_year = 2001 25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo 26 | WHERE CASE mean 27 | WHEN 0 28 | THEN 0 29 | ELSE stdev / mean END > 1) 30 | SELECT 31 | inv1.w_warehouse_sk, 32 | inv1.i_item_sk, 33 | inv1.d_moy, 34 | inv1.mean, 35 | inv1.cov, 36 | inv2.w_warehouse_sk, 37 | inv2.i_item_sk, 38 | inv2.d_moy, 39 | inv2.mean, 40 | inv2.cov 41 | FROM inv inv1, inv inv2 42 | WHERE inv1.i_item_sk = inv2.i_item_sk 43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk 44 | AND inv1.d_moy = 1 45 | AND inv2.d_moy = 1 + 1 46 | AND inv1.cov > 1.5 47 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov 48 | , inv2.d_moy, inv2.mean, inv2.cov 49 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RangerSparkRowFilter.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.plans.logical 19 | import org.apache.spark.sql.catalyst.expressions.Attribute 20 | 21 | /** 22 | * A wrapper for a transformed plan with row level filter applied, which will be removed during 23 | * LogicalPlan -> PhysicalPlan 24 | * 25 | */ 26 | case class RangerSparkRowFilter(child: LogicalPlan) extends UnaryNode { 27 | override def output: Seq[Attribute] = child.output 28 | } 29 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q81.sql: -------------------------------------------------------------------------------- 1 | WITH customer_total_return AS 2 | (SELECT 3 | cr_returning_customer_sk AS ctr_customer_sk, 4 | ca_state AS ctr_state, 5 | sum(cr_return_amt_inc_tax) AS ctr_total_return 6 | FROM catalog_returns, date_dim, customer_address 7 | WHERE cr_returned_date_sk = d_date_sk 8 | AND d_year = 2000 9 | AND cr_returning_addr_sk = ca_address_sk 10 | GROUP BY cr_returning_customer_sk, ca_state ) 11 | SELECT 12 | c_customer_id, 13 | c_salutation, 14 | c_first_name, 15 | c_last_name, 16 | ca_street_number, 17 | ca_street_name, 18 | ca_street_type, 19 | ca_suite_number, 20 | ca_city, 21 | ca_county, 22 | ca_state, 23 | ca_zip, 24 | ca_country, 25 | ca_gmt_offset, 26 | ca_location_type, 27 | ctr_total_return 28 | FROM customer_total_return ctr1, customer_address, customer 29 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2 30 | FROM customer_total_return ctr2 31 | WHERE ctr1.ctr_state = ctr2.ctr_state) 32 | AND ca_address_sk = c_current_addr_sk 33 | AND ca_state = 'GA' 34 | AND ctr1.ctr_customer_sk = c_customer_sk 35 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name 36 | , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset 37 | , ca_location_type, ctr_total_return 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q71.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_brand_id brand_id, 3 | i_brand brand, 4 | t_hour, 5 | t_minute, 6 | sum(ext_price) ext_price 7 | FROM item, 8 | (SELECT 9 | ws_ext_sales_price AS ext_price, 10 | ws_sold_date_sk AS sold_date_sk, 11 | ws_item_sk AS sold_item_sk, 12 | ws_sold_time_sk AS time_sk 13 | FROM web_sales, date_dim 14 | WHERE d_date_sk = ws_sold_date_sk 15 | AND d_moy = 11 16 | AND d_year = 1999 17 | UNION ALL 18 | SELECT 19 | cs_ext_sales_price AS ext_price, 20 | cs_sold_date_sk AS sold_date_sk, 21 | cs_item_sk AS sold_item_sk, 22 | cs_sold_time_sk AS time_sk 23 | FROM catalog_sales, date_dim 24 | WHERE d_date_sk = cs_sold_date_sk 25 | AND d_moy = 11 26 | AND d_year = 1999 27 | UNION ALL 28 | SELECT 29 | ss_ext_sales_price AS ext_price, 30 | ss_sold_date_sk AS sold_date_sk, 31 | ss_item_sk AS sold_item_sk, 32 | ss_sold_time_sk AS time_sk 33 | FROM store_sales, date_dim 34 | WHERE d_date_sk = ss_sold_date_sk 35 | AND d_moy = 11 36 | AND d_year = 1999 37 | ) AS tmp, time_dim 38 | WHERE 39 | sold_item_sk = i_item_sk 40 | AND i_manager_id = 1 41 | AND time_sk = t_time_sk 42 | AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner') 43 | GROUP BY i_brand, i_brand_id, t_hour, t_minute 44 | ORDER BY ext_price DESC, brand_id 45 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q53.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM 3 | (SELECT 4 | i_manufact_id, 5 | sum(ss_sales_price) sum_sales, 6 | avg(sum(ss_sales_price)) 7 | OVER (PARTITION BY i_manufact_id) avg_quarterly_sales 8 | FROM item, store_sales, date_dim, store 9 | WHERE ss_item_sk = i_item_sk AND 10 | ss_sold_date_sk = d_date_sk AND 11 | ss_store_sk = s_store_sk AND 12 | d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 13 | 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND 14 | ((i_category IN ('Books', 'Children', 'Electronics') AND 15 | i_class IN ('personal', 'portable', 'reference', 'self-help') AND 16 | i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7', 17 | 'exportiunivamalg #9', 'scholaramalgamalg #9')) 18 | OR 19 | (i_category IN ('Women', 'Music', 'Men') AND 20 | i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND 21 | i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1', 22 | 'importoamalg #1'))) 23 | GROUP BY i_manufact_id, d_qoy) tmp1 24 | WHERE CASE WHEN avg_quarterly_sales > 0 25 | THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales 26 | ELSE NULL END > 0.1 27 | ORDER BY avg_quarterly_sales, 28 | sum_sales, 29 | i_manufact_id 30 | LIMIT 100 31 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q72.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_desc, 3 | w_warehouse_name, 4 | d1.d_week_seq, 5 | count(CASE WHEN p_promo_sk IS NULL 6 | THEN 1 7 | ELSE 0 END) no_promo, 8 | count(CASE WHEN p_promo_sk IS NOT NULL 9 | THEN 1 10 | ELSE 0 END) promo, 11 | count(*) total_cnt 12 | FROM catalog_sales 13 | JOIN inventory ON (cs_item_sk = inv_item_sk) 14 | JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk) 15 | JOIN item ON (i_item_sk = cs_item_sk) 16 | JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk) 17 | JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk) 18 | JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk) 19 | JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk) 20 | JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk) 21 | LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk) 22 | LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number) 23 | WHERE d1.d_week_seq = d2.d_week_seq 24 | AND inv_quantity_on_hand < cs_quantity 25 | AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days) 26 | AND hd_buy_potential = '>10000' 27 | AND d1.d_year = 1999 28 | AND hd_buy_potential = '>10000' 29 | AND cd_marital_status = 'D' 30 | AND d1.d_year = 1999 31 | GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq 32 | ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q34.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | c_last_name, 3 | c_first_name, 4 | c_salutation, 5 | c_preferred_cust_flag, 6 | ss_ticket_number, 7 | cnt 8 | FROM 9 | (SELECT 10 | ss_ticket_number, 11 | ss_customer_sk, 12 | count(*) cnt 13 | FROM store_sales, date_dim, store, household_demographics 14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk 15 | AND store_sales.ss_store_sk = store.s_store_sk 16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk 17 | AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28) 18 | AND (household_demographics.hd_buy_potential = '>10000' OR 19 | household_demographics.hd_buy_potential = 'unknown') 20 | AND household_demographics.hd_vehicle_count > 0 21 | AND (CASE WHEN household_demographics.hd_vehicle_count > 0 22 | THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count 23 | ELSE NULL 24 | END) > 1.2 25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2) 26 | AND store.s_county IN 27 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 28 | 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County') 29 | GROUP BY ss_ticket_number, ss_customer_sk) dn, customer 30 | WHERE ss_customer_sk = c_customer_sk 31 | AND cnt BETWEEN 15 AND 20 32 | ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC 33 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q76.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | channel, 3 | col_name, 4 | d_year, 5 | d_qoy, 6 | i_category, 7 | COUNT(*) sales_cnt, 8 | SUM(ext_sales_price) sales_amt 9 | FROM ( 10 | SELECT 11 | 'store' AS channel, 12 | ss_store_sk col_name, 13 | d_year, 14 | d_qoy, 15 | i_category, 16 | ss_ext_sales_price ext_sales_price 17 | FROM store_sales, item, date_dim 18 | WHERE ss_store_sk IS NULL 19 | AND ss_sold_date_sk = d_date_sk 20 | AND ss_item_sk = i_item_sk 21 | UNION ALL 22 | SELECT 23 | 'web' AS channel, 24 | ws_ship_customer_sk col_name, 25 | d_year, 26 | d_qoy, 27 | i_category, 28 | ws_ext_sales_price ext_sales_price 29 | FROM web_sales, item, date_dim 30 | WHERE ws_ship_customer_sk IS NULL 31 | AND ws_sold_date_sk = d_date_sk 32 | AND ws_item_sk = i_item_sk 33 | UNION ALL 34 | SELECT 35 | 'catalog' AS channel, 36 | cs_ship_addr_sk col_name, 37 | d_year, 38 | d_qoy, 39 | i_category, 40 | cs_ext_sales_price ext_sales_price 41 | FROM catalog_sales, item, date_dim 42 | WHERE cs_ship_addr_sk IS NULL 43 | AND cs_sold_date_sk = d_date_sk 44 | AND cs_item_sk = i_item_sk) foo 45 | GROUP BY channel, col_name, d_year, d_qoy, i_category 46 | ORDER BY channel, col_name, d_year, d_qoy, i_category 47 | LIMIT 100 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q69.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3 10 | FROM 11 | customer c, customer_address ca, customer_demographics 12 | WHERE 13 | c.c_current_addr_sk = ca.ca_address_sk AND 14 | ca_state IN ('KY', 'GA', 'NM') AND 15 | cd_demo_sk = c.c_current_cdemo_sk AND 16 | exists(SELECT * 17 | FROM store_sales, date_dim 18 | WHERE c.c_customer_sk = ss_customer_sk AND 19 | ss_sold_date_sk = d_date_sk AND 20 | d_year = 2001 AND 21 | d_moy BETWEEN 4 AND 4 + 2) AND 22 | (NOT exists(SELECT * 23 | FROM web_sales, date_dim 24 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 25 | ws_sold_date_sk = d_date_sk AND 26 | d_year = 2001 AND 27 | d_moy BETWEEN 4 AND 4 + 2) AND 28 | NOT exists(SELECT * 29 | FROM catalog_sales, date_dim 30 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 31 | cs_sold_date_sk = d_date_sk AND 32 | d_year = 2001 AND 33 | d_moy BETWEEN 4 AND 4 + 2)) 34 | GROUP BY cd_gender, cd_marital_status, cd_education_status, 35 | cd_purchase_estimate, cd_credit_rating 36 | ORDER BY cd_gender, cd_marital_status, cd_education_status, 37 | cd_purchase_estimate, cd_credit_rating 38 | LIMIT 100 39 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/RangerSparkPlanOmitStrategy.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.execution 19 | 20 | import org.apache.spark.sql.{SparkSession, Strategy} 21 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RangerSparkMasking, RangerSparkRowFilter} 22 | 23 | /** 24 | * An Apache Spark's [[Strategy]] extension for omitting marker for row level filtering and data 25 | * masking. 26 | */ 27 | case class RangerSparkPlanOmitStrategy(spark: SparkSession) extends Strategy { 28 | override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { 29 | case RangerSparkRowFilter(child) => planLater(child) :: Nil 30 | case RangerSparkMasking(child) => planLater(child) :: Nil 31 | case _ => Nil 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q13.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | avg(ss_quantity), 3 | avg(ss_ext_sales_price), 4 | avg(ss_ext_wholesale_cost), 5 | sum(ss_ext_wholesale_cost) 6 | FROM store_sales 7 | , store 8 | , customer_demographics 9 | , household_demographics 10 | , customer_address 11 | , date_dim 12 | WHERE s_store_sk = ss_store_sk 13 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001 14 | AND ((ss_hdemo_sk = hd_demo_sk 15 | AND cd_demo_sk = ss_cdemo_sk 16 | AND cd_marital_status = 'M' 17 | AND cd_education_status = 'Advanced Degree' 18 | AND ss_sales_price BETWEEN 100.00 AND 150.00 19 | AND hd_dep_count = 3 20 | ) OR 21 | (ss_hdemo_sk = hd_demo_sk 22 | AND cd_demo_sk = ss_cdemo_sk 23 | AND cd_marital_status = 'S' 24 | AND cd_education_status = 'College' 25 | AND ss_sales_price BETWEEN 50.00 AND 100.00 26 | AND hd_dep_count = 1 27 | ) OR 28 | (ss_hdemo_sk = hd_demo_sk 29 | AND cd_demo_sk = ss_cdemo_sk 30 | AND cd_marital_status = 'W' 31 | AND cd_education_status = '2 yr Degree' 32 | AND ss_sales_price BETWEEN 150.00 AND 200.00 33 | AND hd_dep_count = 1 34 | )) 35 | AND ((ss_addr_sk = ca_address_sk 36 | AND ca_country = 'United States' 37 | AND ca_state IN ('TX', 'OH', 'TX') 38 | AND ss_net_profit BETWEEN 100 AND 200 39 | ) OR 40 | (ss_addr_sk = ca_address_sk 41 | AND ca_country = 'United States' 42 | AND ca_state IN ('OR', 'NM', 'KY') 43 | AND ss_net_profit BETWEEN 150 AND 300 44 | ) OR 45 | (ss_addr_sk = ca_address_sk 46 | AND ca_country = 'United States' 47 | AND ca_state IN ('VA', 'TX', 'MS') 48 | AND ss_net_profit BETWEEN 50 AND 250 49 | )) 50 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q17.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | i_item_id, 3 | i_item_desc, 4 | s_state, 5 | count(ss_quantity) AS store_sales_quantitycount, 6 | avg(ss_quantity) AS store_sales_quantityave, 7 | stddev_samp(ss_quantity) AS store_sales_quantitystdev, 8 | stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov, 9 | count(sr_return_quantity) as_store_returns_quantitycount, 10 | avg(sr_return_quantity) as_store_returns_quantityave, 11 | stddev_samp(sr_return_quantity) as_store_returns_quantitystdev, 12 | stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov, 13 | count(cs_quantity) AS catalog_sales_quantitycount, 14 | avg(cs_quantity) AS catalog_sales_quantityave, 15 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev, 16 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov 17 | FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item 18 | WHERE d1.d_quarter_name = '2001Q1' 19 | AND d1.d_date_sk = ss_sold_date_sk 20 | AND i_item_sk = ss_item_sk 21 | AND s_store_sk = ss_store_sk 22 | AND ss_customer_sk = sr_customer_sk 23 | AND ss_item_sk = sr_item_sk 24 | AND ss_ticket_number = sr_ticket_number 25 | AND sr_returned_date_sk = d2.d_date_sk 26 | AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') 27 | AND sr_customer_sk = cs_bill_customer_sk 28 | AND sr_item_sk = cs_item_sk 29 | AND cs_sold_date_sk = d3.d_date_sk 30 | AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3') 31 | GROUP BY i_item_id, i_item_desc, s_state 32 | ORDER BY i_item_id, i_item_desc, s_state 33 | LIMIT 100 34 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q35.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | ca_state, 3 | cd_gender, 4 | cd_marital_status, 5 | count(*) cnt1, 6 | min(cd_dep_count), 7 | max(cd_dep_count), 8 | avg(cd_dep_count), 9 | cd_dep_employed_count, 10 | count(*) cnt2, 11 | min(cd_dep_employed_count), 12 | max(cd_dep_employed_count), 13 | avg(cd_dep_employed_count), 14 | cd_dep_college_count, 15 | count(*) cnt3, 16 | min(cd_dep_college_count), 17 | max(cd_dep_college_count), 18 | avg(cd_dep_college_count) 19 | FROM 20 | customer c, customer_address ca, customer_demographics 21 | WHERE 22 | c.c_current_addr_sk = ca.ca_address_sk AND 23 | cd_demo_sk = c.c_current_cdemo_sk AND 24 | exists(SELECT * 25 | FROM store_sales, date_dim 26 | WHERE c.c_customer_sk = ss_customer_sk AND 27 | ss_sold_date_sk = d_date_sk AND 28 | d_year = 2002 AND 29 | d_qoy < 4) AND 30 | (exists(SELECT * 31 | FROM web_sales, date_dim 32 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 33 | ws_sold_date_sk = d_date_sk AND 34 | d_year = 2002 AND 35 | d_qoy < 4) OR 36 | exists(SELECT * 37 | FROM catalog_sales, date_dim 38 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 39 | cs_sold_date_sk = d_date_sk AND 40 | d_year = 2002 AND 41 | d_qoy < 4)) 42 | GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count, 43 | cd_dep_employed_count, cd_dep_college_count 44 | ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count, 45 | cd_dep_employed_count, cd_dep_college_count 46 | LIMIT 100 47 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkSQLExtension.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import org.apache.spark.sql.SparkSessionExtensions 21 | import org.apache.spark.sql.catalyst.optimizer.{RangerSparkAuthorizerExtension, RangerSparkMaskingExtension, RangerSparkRowFilterExtension} 22 | import org.apache.spark.sql.execution.RangerSparkPlanOmitStrategy 23 | 24 | class RangerSparkSQLExtension extends Extensions { 25 | override def apply(ext: SparkSessionExtensions): Unit = { 26 | ext.injectOptimizerRule(RangerSparkAuthorizerExtension) 27 | ext.injectOptimizerRule(RangerSparkRowFilterExtension) 28 | ext.injectOptimizerRule(RangerSparkMaskingExtension) 29 | ext.injectPlannerStrategy(RangerSparkPlanOmitStrategy) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/AuthzUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql 19 | 20 | import scala.util.{Failure, Success, Try} 21 | 22 | private[sql] object AuthzUtils { 23 | 24 | def getFieldVal(o: Any, name: String): Any = { 25 | Try { 26 | val field = o.getClass.getDeclaredField(name) 27 | field.setAccessible(true) 28 | field.get(o) 29 | } match { 30 | case Success(value) => value 31 | case Failure(exception) => throw exception 32 | } 33 | } 34 | 35 | def setFieldVal(o: Any, name: String, value: Any): Unit = { 36 | Try { 37 | val field = o.getClass.getDeclaredField(name) 38 | field.setAccessible(true) 39 | field.set(o, value.asInstanceOf[AnyRef]) 40 | } match { 41 | case Failure(exception) => throw exception 42 | case _ => 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkOptimizer.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import org.apache.spark.sql.SparkSession 21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan 22 | import org.apache.spark.sql.catalyst.rules.RuleExecutor 23 | 24 | /** 25 | * An Optimizer without all `spark.sql.extensions` 26 | */ 27 | class RangerSparkOptimizer(spark: SparkSession) extends RuleExecutor[LogicalPlan] { 28 | 29 | override def batches: Seq[Batch] = { 30 | val optimizer = spark.sessionState.optimizer 31 | val extRules = optimizer.extendedOperatorOptimizationRules 32 | optimizer.batches.map { batch => 33 | val ruleSet = batch.rules.toSet -- extRules 34 | Batch(batch.name, FixedPoint(batch.strategy.maxIterations), ruleSet.toSeq: _*) 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q50.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | s_store_name, 3 | s_company_id, 4 | s_street_number, 5 | s_street_name, 6 | s_street_type, 7 | s_suite_number, 8 | s_city, 9 | s_county, 10 | s_state, 11 | s_zip, 12 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30) 13 | THEN 1 14 | ELSE 0 END) AS `30 days `, 15 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND 16 | (sr_returned_date_sk - ss_sold_date_sk <= 60) 17 | THEN 1 18 | ELSE 0 END) AS `31 - 60 days `, 19 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND 20 | (sr_returned_date_sk - ss_sold_date_sk <= 90) 21 | THEN 1 22 | ELSE 0 END) AS `61 - 90 days `, 23 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND 24 | (sr_returned_date_sk - ss_sold_date_sk <= 120) 25 | THEN 1 26 | ELSE 0 END) AS `91 - 120 days `, 27 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120) 28 | THEN 1 29 | ELSE 0 END) AS `>120 days ` 30 | FROM 31 | store_sales, store_returns, store, date_dim d1, date_dim d2 32 | WHERE 33 | d2.d_year = 2001 34 | AND d2.d_moy = 8 35 | AND ss_ticket_number = sr_ticket_number 36 | AND ss_item_sk = sr_item_sk 37 | AND ss_sold_date_sk = d1.d_date_sk 38 | AND sr_returned_date_sk = d2.d_date_sk 39 | AND ss_customer_sk = sr_customer_sk 40 | AND ss_store_sk = s_store_sk 41 | GROUP BY 42 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, 43 | s_suite_number, s_city, s_county, s_state, s_zip 44 | ORDER BY 45 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type, 46 | s_suite_number, s_city, s_county, s_state, s_zip 47 | LIMIT 100 48 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q54.sql: -------------------------------------------------------------------------------- 1 | WITH my_customers AS ( 2 | SELECT DISTINCT 3 | c_customer_sk, 4 | c_current_addr_sk 5 | FROM 6 | (SELECT 7 | cs_sold_date_sk sold_date_sk, 8 | cs_bill_customer_sk customer_sk, 9 | cs_item_sk item_sk 10 | FROM catalog_sales 11 | UNION ALL 12 | SELECT 13 | ws_sold_date_sk sold_date_sk, 14 | ws_bill_customer_sk customer_sk, 15 | ws_item_sk item_sk 16 | FROM web_sales 17 | ) cs_or_ws_sales, 18 | item, 19 | date_dim, 20 | customer 21 | WHERE sold_date_sk = d_date_sk 22 | AND item_sk = i_item_sk 23 | AND i_category = 'Women' 24 | AND i_class = 'maternity' 25 | AND c_customer_sk = cs_or_ws_sales.customer_sk 26 | AND d_moy = 12 27 | AND d_year = 1998 28 | ) 29 | , my_revenue AS ( 30 | SELECT 31 | c_customer_sk, 32 | sum(ss_ext_sales_price) AS revenue 33 | FROM my_customers, 34 | store_sales, 35 | customer_address, 36 | store, 37 | date_dim 38 | WHERE c_current_addr_sk = ca_address_sk 39 | AND ca_county = s_county 40 | AND ca_state = s_state 41 | AND ss_sold_date_sk = d_date_sk 42 | AND c_customer_sk = ss_customer_sk 43 | AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1 44 | FROM date_dim 45 | WHERE d_year = 1998 AND d_moy = 12) 46 | AND (SELECT DISTINCT d_month_seq + 3 47 | FROM date_dim 48 | WHERE d_year = 1998 AND d_moy = 12) 49 | GROUP BY c_customer_sk 50 | ) 51 | , segments AS 52 | (SELECT cast((revenue / 50) AS INT) AS segment 53 | FROM my_revenue) 54 | SELECT 55 | segment, 56 | count(*) AS num_customers, 57 | segment * 50 AS segment_base 58 | FROM segments 59 | GROUP BY segment 60 | ORDER BY segment, num_customers 61 | LIMIT 100 62 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q48.sql: -------------------------------------------------------------------------------- 1 | SELECT sum(ss_quantity) 2 | FROM store_sales, store, customer_demographics, customer_address, date_dim 3 | WHERE s_store_sk = ss_store_sk 4 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001 5 | AND 6 | ( 7 | ( 8 | cd_demo_sk = ss_cdemo_sk 9 | AND 10 | cd_marital_status = 'M' 11 | AND 12 | cd_education_status = '4 yr Degree' 13 | AND 14 | ss_sales_price BETWEEN 100.00 AND 150.00 15 | ) 16 | OR 17 | ( 18 | cd_demo_sk = ss_cdemo_sk 19 | AND 20 | cd_marital_status = 'D' 21 | AND 22 | cd_education_status = '2 yr Degree' 23 | AND 24 | ss_sales_price BETWEEN 50.00 AND 100.00 25 | ) 26 | OR 27 | ( 28 | cd_demo_sk = ss_cdemo_sk 29 | AND 30 | cd_marital_status = 'S' 31 | AND 32 | cd_education_status = 'College' 33 | AND 34 | ss_sales_price BETWEEN 150.00 AND 200.00 35 | ) 36 | ) 37 | AND 38 | ( 39 | ( 40 | ss_addr_sk = ca_address_sk 41 | AND 42 | ca_country = 'United States' 43 | AND 44 | ca_state IN ('CO', 'OH', 'TX') 45 | AND ss_net_profit BETWEEN 0 AND 2000 46 | ) 47 | OR 48 | (ss_addr_sk = ca_address_sk 49 | AND 50 | ca_country = 'United States' 51 | AND 52 | ca_state IN ('OR', 'MN', 'KY') 53 | AND ss_net_profit BETWEEN 150 AND 3000 54 | ) 55 | OR 56 | (ss_addr_sk = ca_address_sk 57 | AND 58 | ca_country = 'United States' 59 | AND 60 | ca_state IN ('VA', 'CA', 'MS') 61 | AND ss_net_profit BETWEEN 50 AND 25000 62 | ) 63 | ) 64 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q57.sql: -------------------------------------------------------------------------------- 1 | WITH v1 AS ( 2 | SELECT 3 | i_category, 4 | i_brand, 5 | cc_name, 6 | d_year, 7 | d_moy, 8 | sum(cs_sales_price) sum_sales, 9 | avg(sum(cs_sales_price)) 10 | OVER 11 | (PARTITION BY i_category, i_brand, cc_name, d_year) 12 | avg_monthly_sales, 13 | rank() 14 | OVER 15 | (PARTITION BY i_category, i_brand, cc_name 16 | ORDER BY d_year, d_moy) rn 17 | FROM item, catalog_sales, date_dim, call_center 18 | WHERE cs_item_sk = i_item_sk AND 19 | cs_sold_date_sk = d_date_sk AND 20 | cc_call_center_sk = cs_call_center_sk AND 21 | ( 22 | d_year = 1999 OR 23 | (d_year = 1999 - 1 AND d_moy = 12) OR 24 | (d_year = 1999 + 1 AND d_moy = 1) 25 | ) 26 | GROUP BY i_category, i_brand, 27 | cc_name, d_year, d_moy), 28 | v2 AS ( 29 | SELECT 30 | v1.i_category, 31 | v1.i_brand, 32 | v1.cc_name, 33 | v1.d_year, 34 | v1.d_moy, 35 | v1.avg_monthly_sales, 36 | v1.sum_sales, 37 | v1_lag.sum_sales psum, 38 | v1_lead.sum_sales nsum 39 | FROM v1, v1 v1_lag, v1 v1_lead 40 | WHERE v1.i_category = v1_lag.i_category AND 41 | v1.i_category = v1_lead.i_category AND 42 | v1.i_brand = v1_lag.i_brand AND 43 | v1.i_brand = v1_lead.i_brand AND 44 | v1.cc_name = v1_lag.cc_name AND 45 | v1.cc_name = v1_lead.cc_name AND 46 | v1.rn = v1_lag.rn + 1 AND 47 | v1.rn = v1_lead.rn - 1) 48 | SELECT * 49 | FROM v2 50 | WHERE d_year = 1999 AND 51 | avg_monthly_sales > 0 AND 52 | CASE WHEN avg_monthly_sales > 0 53 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 54 | ELSE NULL END > 0.1 55 | ORDER BY sum_sales - avg_monthly_sales, 3 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q9.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | CASE WHEN (SELECT count(*) 3 | FROM store_sales 4 | WHERE ss_quantity BETWEEN 1 AND 20) > 62316685 5 | THEN (SELECT avg(ss_ext_discount_amt) 6 | FROM store_sales 7 | WHERE ss_quantity BETWEEN 1 AND 20) 8 | ELSE (SELECT avg(ss_net_paid) 9 | FROM store_sales 10 | WHERE ss_quantity BETWEEN 1 AND 20) END bucket1, 11 | CASE WHEN (SELECT count(*) 12 | FROM store_sales 13 | WHERE ss_quantity BETWEEN 21 AND 40) > 19045798 14 | THEN (SELECT avg(ss_ext_discount_amt) 15 | FROM store_sales 16 | WHERE ss_quantity BETWEEN 21 AND 40) 17 | ELSE (SELECT avg(ss_net_paid) 18 | FROM store_sales 19 | WHERE ss_quantity BETWEEN 21 AND 40) END bucket2, 20 | CASE WHEN (SELECT count(*) 21 | FROM store_sales 22 | WHERE ss_quantity BETWEEN 41 AND 60) > 365541424 23 | THEN (SELECT avg(ss_ext_discount_amt) 24 | FROM store_sales 25 | WHERE ss_quantity BETWEEN 41 AND 60) 26 | ELSE (SELECT avg(ss_net_paid) 27 | FROM store_sales 28 | WHERE ss_quantity BETWEEN 41 AND 60) END bucket3, 29 | CASE WHEN (SELECT count(*) 30 | FROM store_sales 31 | WHERE ss_quantity BETWEEN 61 AND 80) > 216357808 32 | THEN (SELECT avg(ss_ext_discount_amt) 33 | FROM store_sales 34 | WHERE ss_quantity BETWEEN 61 AND 80) 35 | ELSE (SELECT avg(ss_net_paid) 36 | FROM store_sales 37 | WHERE ss_quantity BETWEEN 61 AND 80) END bucket4, 38 | CASE WHEN (SELECT count(*) 39 | FROM store_sales 40 | WHERE ss_quantity BETWEEN 81 AND 100) > 184483884 41 | THEN (SELECT avg(ss_ext_discount_amt) 42 | FROM store_sales 43 | WHERE ss_quantity BETWEEN 81 AND 100) 44 | ELSE (SELECT avg(ss_net_paid) 45 | FROM store_sales 46 | WHERE ss_quantity BETWEEN 81 AND 100) END bucket5 47 | FROM reason 48 | WHERE r_reason_sk = 1 49 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/RangerSparkTestUtils.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql 19 | 20 | import java.security.PrivilegedExceptionAction 21 | 22 | import org.apache.hadoop.security.UserGroupInformation 23 | import org.apache.spark.sql.catalyst.optimizer.{RangerSparkMaskingExtension, RangerSparkRowFilterExtension} 24 | import org.apache.spark.sql.execution.RangerSparkPlanOmitStrategy 25 | 26 | object RangerSparkTestUtils { 27 | 28 | def injectRules(spark: SparkSession): Unit = { 29 | spark.extensions.injectOptimizerRule(RangerSparkRowFilterExtension) 30 | spark.extensions.injectOptimizerRule(RangerSparkMaskingExtension) 31 | spark.extensions.injectPlannerStrategy(RangerSparkPlanOmitStrategy) 32 | } 33 | 34 | def withUser[T](user: String)(f: => T): T = { 35 | val ugi = UserGroupInformation.createRemoteUser(user) 36 | ugi.doAs(new PrivilegedExceptionAction[T] { 37 | override def run(): T = f 38 | }) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkOperationType.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | /** 21 | * Subset of HiveOperationTypes supported by Apache Spark. 22 | * 23 | */ 24 | object SparkOperationType extends Enumeration { 25 | type SparkOperationType = Value 26 | 27 | val 28 | ALTERDATABASE, ALTERTABLE_ADDCOLS, ALTERTABLE_ADDPARTS, ALTERTABLE_RENAMECOL, 29 | ALTERTABLE_DROPPARTS, MSCK, ALTERTABLE_RENAMEPART, ALTERTABLE_RENAME, 30 | ALTERVIEW_RENAME, ALTERTABLE_PROPERTIES, ALTERTABLE_SERDEPROPERTIES, 31 | ALTERTABLE_LOCATION, QUERY, CREATEDATABASE, CREATETABLE_AS_SELECT, CREATEFUNCTION, CREATETABLE, 32 | CREATEVIEW, DESCTABLE, DESCDATABASE, DESCFUNCTION, DROPDATABASE, DROPTABLE, DROPFUNCTION, LOAD, 33 | SHOWCONF, SWITCHDATABASE, SHOW_CREATETABLE, SHOWCOLUMNS, SHOWDATABASES, SHOWFUNCTIONS, 34 | SHOWPARTITIONS, SHOWTABLES, SHOW_TBLPROPERTIES, TRUNCATETABLE, DROPVIEW, EXPLAIN = Value 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/RangerShowDatabasesCommand.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.execution 19 | 20 | import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType} 21 | import org.apache.spark.sql.execution.command.{RunnableCommand, ShowDatabasesCommand} 22 | import org.apache.spark.sql.{Row, SparkSession} 23 | 24 | case class RangerShowDatabasesCommand(child: ShowDatabasesCommand) extends RunnableCommand { 25 | override val output = child.output 26 | 27 | override def run(sparkSession: SparkSession): Seq[Row] = { 28 | val rows = child.run(sparkSession) 29 | rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r))) 30 | } 31 | 32 | private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = { 33 | val database = row.getString(0) 34 | new SparkPrivilegeObject(SparkPrivilegeObjectType.DATABASE, database, database) 35 | } 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q51.sql: -------------------------------------------------------------------------------- 1 | WITH web_v1 AS ( 2 | SELECT 3 | ws_item_sk item_sk, 4 | d_date, 5 | sum(sum(ws_sales_price)) 6 | OVER (PARTITION BY ws_item_sk 7 | ORDER BY d_date 8 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales 9 | FROM web_sales, date_dim 10 | WHERE ws_sold_date_sk = d_date_sk 11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 12 | AND ws_item_sk IS NOT NULL 13 | GROUP BY ws_item_sk, d_date), 14 | store_v1 AS ( 15 | SELECT 16 | ss_item_sk item_sk, 17 | d_date, 18 | sum(sum(ss_sales_price)) 19 | OVER (PARTITION BY ss_item_sk 20 | ORDER BY d_date 21 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales 22 | FROM store_sales, date_dim 23 | WHERE ss_sold_date_sk = d_date_sk 24 | AND d_month_seq BETWEEN 1200 AND 1200 + 11 25 | AND ss_item_sk IS NOT NULL 26 | GROUP BY ss_item_sk, d_date) 27 | SELECT * 28 | FROM (SELECT 29 | item_sk, 30 | d_date, 31 | web_sales, 32 | store_sales, 33 | max(web_sales) 34 | OVER (PARTITION BY item_sk 35 | ORDER BY d_date 36 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative, 37 | max(store_sales) 38 | OVER (PARTITION BY item_sk 39 | ORDER BY d_date 40 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative 41 | FROM (SELECT 42 | CASE WHEN web.item_sk IS NOT NULL 43 | THEN web.item_sk 44 | ELSE store.item_sk END item_sk, 45 | CASE WHEN web.d_date IS NOT NULL 46 | THEN web.d_date 47 | ELSE store.d_date END d_date, 48 | web.cume_sales web_sales, 49 | store.cume_sales store_sales 50 | FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk 51 | AND web.d_date = store.d_date) 52 | ) x) y 53 | WHERE web_cumulative > store_cumulative 54 | ORDER BY item_sk, d_date 55 | LIMIT 100 56 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | jdk: 5 | - openjdk8 6 | 7 | cache: 8 | directories: 9 | - $HOME/.m2 10 | 11 | before_deploy: 12 | - mvn clean package -DskipTests=true 13 | 14 | deploy: 15 | - provider: pages 16 | skip_cleanup: true 17 | github_token: $GITHUB_TOKEN 18 | email: yaooqinn@hotmail.com 19 | name: Kent Yao 20 | on: 21 | branch: master 22 | - provider: releases 23 | api_key: $GITHUB_TOKEN 24 | file_glob: true 25 | file: target/spark-ranger-*.jar 26 | skip_cleanup: true 27 | on: 28 | tags: true 29 | 30 | matrix: 31 | include: 32 | - name: spark2.3-ranger-1.0 33 | language: scala 34 | env: $PROFILES="-Pspark-2.3 -Pranger-1.0" 35 | - name: spark2.3-ranger-1.1 36 | language: scala 37 | env: $PROFILES="-Pspark-2.3 -Pranger-1.1" 38 | - name: spark2.3-ranger-1.2 39 | language: scala 40 | env: $PROFILES="-Pspark-2.3 -Pranger-1.2" 41 | - name: spark2.3-ranger-2.0 42 | language: scala 43 | env: $PROFILES="-Pspark-2.3 -Pranger-2.0" 44 | - name: spark2.4-ranger-1.0 45 | language: scala 46 | env: $PROFILES="-Pspark-2.4 -Pranger-1.0" 47 | - name: spark2.4-ranger-1.1 48 | language: scala 49 | env: $PROFILES="-Pspark-2.4 -Pranger-1.1" 50 | - name: spark2.4-ranger-1.2 51 | language: scala 52 | env: $PROFILES="-Pspark-2.4 -Pranger-1.2" 53 | - name: spark2.4-ranger-2.0 54 | language: scala 55 | env: $PROFILES="-Pspark-2.4 -Pranger-2.0" 56 | 57 | script: 58 | - mvn --no-transfer-progress clean install $PROFILES -Dmaven.javadoc.skip=true -B -V 59 | 60 | after_success: 61 | - bash <(curl -s https://codecov.io/bash) 62 | - echo "Travis exited with ${TRAVIS_TEST_RESULT}" 63 | 64 | after_failure: 65 | - echo "Travis exited with ${TRAVIS_TEST_RESULT}" -------------------------------------------------------------------------------- /src/test/resources/ranger-spark-security.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 19 | 20 | 21 | 22 | ranger.plugin.spark.service.name 23 | hive_jenkins 24 | 25 | Name of the Ranger service containing policies for this SampleApp instance 26 | 27 | 28 | 29 | 30 | ranger.plugin.spark.policy.source.impl 31 | org.apache.ranger.services.spark.RangerAdminClientImpl 32 | 33 | Policy source. 34 | 35 | 36 | 37 | 38 | ranger.plugin.spark.policy.cache.dir 39 | target/test-classes 40 | 41 | Directory where Ranger policies are cached after successful retrieval from the source 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q60.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_item_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM store_sales, date_dim, customer_address, item 6 | WHERE 7 | i_item_id IN (SELECT i_item_id 8 | FROM item 9 | WHERE i_category IN ('Music')) 10 | AND ss_item_sk = i_item_sk 11 | AND ss_sold_date_sk = d_date_sk 12 | AND d_year = 1998 13 | AND d_moy = 9 14 | AND ss_addr_sk = ca_address_sk 15 | AND ca_gmt_offset = -5 16 | GROUP BY i_item_id), 17 | cs AS ( 18 | SELECT 19 | i_item_id, 20 | sum(cs_ext_sales_price) total_sales 21 | FROM catalog_sales, date_dim, customer_address, item 22 | WHERE 23 | i_item_id IN (SELECT i_item_id 24 | FROM item 25 | WHERE i_category IN ('Music')) 26 | AND cs_item_sk = i_item_sk 27 | AND cs_sold_date_sk = d_date_sk 28 | AND d_year = 1998 29 | AND d_moy = 9 30 | AND cs_bill_addr_sk = ca_address_sk 31 | AND ca_gmt_offset = -5 32 | GROUP BY i_item_id), 33 | ws AS ( 34 | SELECT 35 | i_item_id, 36 | sum(ws_ext_sales_price) total_sales 37 | FROM web_sales, date_dim, customer_address, item 38 | WHERE 39 | i_item_id IN (SELECT i_item_id 40 | FROM item 41 | WHERE i_category IN ('Music')) 42 | AND ws_item_sk = i_item_sk 43 | AND ws_sold_date_sk = d_date_sk 44 | AND d_year = 1998 45 | AND d_moy = 9 46 | AND ws_bill_addr_sk = ca_address_sk 47 | AND ca_gmt_offset = -5 48 | GROUP BY i_item_id) 49 | SELECT 50 | i_item_id, 51 | sum(total_sales) total_sales 52 | FROM (SELECT * 53 | FROM ss 54 | UNION ALL 55 | SELECT * 56 | FROM cs 57 | UNION ALL 58 | SELECT * 59 | FROM ws) tmp1 60 | GROUP BY i_item_id 61 | ORDER BY i_item_id, total_sales 62 | LIMIT 100 63 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q10.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | cd_gender, 3 | cd_marital_status, 4 | cd_education_status, 5 | count(*) cnt1, 6 | cd_purchase_estimate, 7 | count(*) cnt2, 8 | cd_credit_rating, 9 | count(*) cnt3, 10 | cd_dep_count, 11 | count(*) cnt4, 12 | cd_dep_employed_count, 13 | count(*) cnt5, 14 | cd_dep_college_count, 15 | count(*) cnt6 16 | FROM 17 | customer c, customer_address ca, customer_demographics 18 | WHERE 19 | c.c_current_addr_sk = ca.ca_address_sk AND 20 | ca_county IN ('Rush County', 'Toole County', 'Jefferson County', 21 | 'Dona Ana County', 'La Porte County') AND 22 | cd_demo_sk = c.c_current_cdemo_sk AND 23 | exists(SELECT * 24 | FROM store_sales, date_dim 25 | WHERE c.c_customer_sk = ss_customer_sk AND 26 | ss_sold_date_sk = d_date_sk AND 27 | d_year = 2002 AND 28 | d_moy BETWEEN 1 AND 1 + 3) AND 29 | (exists(SELECT * 30 | FROM web_sales, date_dim 31 | WHERE c.c_customer_sk = ws_bill_customer_sk AND 32 | ws_sold_date_sk = d_date_sk AND 33 | d_year = 2002 AND 34 | d_moy BETWEEN 1 AND 1 + 3) OR 35 | exists(SELECT * 36 | FROM catalog_sales, date_dim 37 | WHERE c.c_customer_sk = cs_ship_customer_sk AND 38 | cs_sold_date_sk = d_date_sk AND 39 | d_year = 2002 AND 40 | d_moy BETWEEN 1 AND 1 + 3)) 41 | GROUP BY cd_gender, 42 | cd_marital_status, 43 | cd_education_status, 44 | cd_purchase_estimate, 45 | cd_credit_rating, 46 | cd_dep_count, 47 | cd_dep_employed_count, 48 | cd_dep_college_count 49 | ORDER BY cd_gender, 50 | cd_marital_status, 51 | cd_education_status, 52 | cd_purchase_estimate, 53 | cd_credit_rating, 54 | cd_dep_count, 55 | cd_dep_employed_count, 56 | cd_dep_college_count 57 | LIMIT 100 58 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q23a.sql: -------------------------------------------------------------------------------- 1 | WITH frequent_ss_items AS 2 | (SELECT 3 | substr(i_item_desc, 1, 30) itemdesc, 4 | i_item_sk item_sk, 5 | d_date solddate, 6 | count(*) cnt 7 | FROM store_sales, date_dim, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date 12 | HAVING count(*) > 4), 13 | max_store_sales AS 14 | (SELECT max(csales) tpcds_cmax 15 | FROM (SELECT 16 | c_customer_sk, 17 | sum(ss_quantity * ss_sales_price) csales 18 | FROM store_sales, customer, date_dim 19 | WHERE ss_customer_sk = c_customer_sk 20 | AND ss_sold_date_sk = d_date_sk 21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 22 | GROUP BY c_customer_sk) x), 23 | best_ss_customer AS 24 | (SELECT 25 | c_customer_sk, 26 | sum(ss_quantity * ss_sales_price) ssales 27 | FROM store_sales, customer 28 | WHERE ss_customer_sk = c_customer_sk 29 | GROUP BY c_customer_sk 30 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * 31 | (SELECT * 32 | FROM max_store_sales)) 33 | SELECT sum(sales) 34 | FROM ((SELECT cs_quantity * cs_list_price sales 35 | FROM catalog_sales, date_dim 36 | WHERE d_year = 2000 37 | AND d_moy = 2 38 | AND cs_sold_date_sk = d_date_sk 39 | AND cs_item_sk IN (SELECT item_sk 40 | FROM frequent_ss_items) 41 | AND cs_bill_customer_sk IN (SELECT c_customer_sk 42 | FROM best_ss_customer)) 43 | UNION ALL 44 | (SELECT ws_quantity * ws_list_price sales 45 | FROM web_sales, date_dim 46 | WHERE d_year = 2000 47 | AND d_moy = 2 48 | AND ws_sold_date_sk = d_date_sk 49 | AND ws_item_sk IN (SELECT item_sk 50 | FROM frequent_ss_items) 51 | AND ws_bill_customer_sk IN (SELECT c_customer_sk 52 | FROM best_ss_customer))) y 53 | LIMIT 100 54 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/sql/execution/RangerShowTablesCommand.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.execution 19 | 20 | import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType} 21 | import org.apache.spark.sql.execution.command.{RunnableCommand, ShowTablesCommand} 22 | import org.apache.spark.sql.{Row, SparkSession} 23 | import org.apache.spark.sql.catalyst.expressions.Attribute 24 | 25 | case class RangerShowTablesCommand(child: ShowTablesCommand) extends RunnableCommand { 26 | 27 | override val output: Seq[Attribute] = child.output 28 | override def run(sparkSession: SparkSession): Seq[Row] = { 29 | val rows = child.run(sparkSession) 30 | rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r))) 31 | } 32 | 33 | private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = { 34 | val database = row.getString(0) 35 | val table = row.getString(1) 36 | new SparkPrivilegeObject(SparkPrivilegeObjectType.TABLE_OR_VIEW, database, table) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q33.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_manufact_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM 6 | store_sales, date_dim, customer_address, item 7 | WHERE 8 | i_manufact_id IN (SELECT i_manufact_id 9 | FROM item 10 | WHERE i_category IN ('Electronics')) 11 | AND ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year = 1998 14 | AND d_moy = 5 15 | AND ss_addr_sk = ca_address_sk 16 | AND ca_gmt_offset = -5 17 | GROUP BY i_manufact_id), cs AS 18 | (SELECT 19 | i_manufact_id, 20 | sum(cs_ext_sales_price) total_sales 21 | FROM catalog_sales, date_dim, customer_address, item 22 | WHERE 23 | i_manufact_id IN ( 24 | SELECT i_manufact_id 25 | FROM item 26 | WHERE 27 | i_category IN ('Electronics')) 28 | AND cs_item_sk = i_item_sk 29 | AND cs_sold_date_sk = d_date_sk 30 | AND d_year = 1998 31 | AND d_moy = 5 32 | AND cs_bill_addr_sk = ca_address_sk 33 | AND ca_gmt_offset = -5 34 | GROUP BY i_manufact_id), 35 | ws AS ( 36 | SELECT 37 | i_manufact_id, 38 | sum(ws_ext_sales_price) total_sales 39 | FROM 40 | web_sales, date_dim, customer_address, item 41 | WHERE 42 | i_manufact_id IN (SELECT i_manufact_id 43 | FROM item 44 | WHERE i_category IN ('Electronics')) 45 | AND ws_item_sk = i_item_sk 46 | AND ws_sold_date_sk = d_date_sk 47 | AND d_year = 1998 48 | AND d_moy = 5 49 | AND ws_bill_addr_sk = ca_address_sk 50 | AND ca_gmt_offset = -5 51 | GROUP BY i_manufact_id) 52 | SELECT 53 | i_manufact_id, 54 | sum(total_sales) total_sales 55 | FROM (SELECT * 56 | FROM ss 57 | UNION ALL 58 | SELECT * 59 | FROM cs 60 | UNION ALL 61 | SELECT * 62 | FROM ws) tmp1 63 | GROUP BY i_manufact_id 64 | ORDER BY total_sales 65 | LIMIT 100 66 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q56.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS ( 2 | SELECT 3 | i_item_id, 4 | sum(ss_ext_sales_price) total_sales 5 | FROM 6 | store_sales, date_dim, customer_address, item 7 | WHERE 8 | i_item_id IN (SELECT i_item_id 9 | FROM item 10 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 11 | AND ss_item_sk = i_item_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year = 2001 14 | AND d_moy = 2 15 | AND ss_addr_sk = ca_address_sk 16 | AND ca_gmt_offset = -5 17 | GROUP BY i_item_id), 18 | cs AS ( 19 | SELECT 20 | i_item_id, 21 | sum(cs_ext_sales_price) total_sales 22 | FROM 23 | catalog_sales, date_dim, customer_address, item 24 | WHERE 25 | i_item_id IN (SELECT i_item_id 26 | FROM item 27 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 28 | AND cs_item_sk = i_item_sk 29 | AND cs_sold_date_sk = d_date_sk 30 | AND d_year = 2001 31 | AND d_moy = 2 32 | AND cs_bill_addr_sk = ca_address_sk 33 | AND ca_gmt_offset = -5 34 | GROUP BY i_item_id), 35 | ws AS ( 36 | SELECT 37 | i_item_id, 38 | sum(ws_ext_sales_price) total_sales 39 | FROM 40 | web_sales, date_dim, customer_address, item 41 | WHERE 42 | i_item_id IN (SELECT i_item_id 43 | FROM item 44 | WHERE i_color IN ('slate', 'blanched', 'burnished')) 45 | AND ws_item_sk = i_item_sk 46 | AND ws_sold_date_sk = d_date_sk 47 | AND d_year = 2001 48 | AND d_moy = 2 49 | AND ws_bill_addr_sk = ca_address_sk 50 | AND ca_gmt_offset = -5 51 | GROUP BY i_item_id) 52 | SELECT 53 | i_item_id, 54 | sum(total_sales) total_sales 55 | FROM (SELECT * 56 | FROM ss 57 | UNION ALL 58 | SELECT * 59 | FROM cs 60 | UNION ALL 61 | SELECT * 62 | FROM ws) tmp1 63 | GROUP BY i_item_id 64 | ORDER BY total_sales 65 | LIMIT 100 66 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q83.sql: -------------------------------------------------------------------------------- 1 | WITH sr_items AS 2 | (SELECT 3 | i_item_id item_id, 4 | sum(sr_return_quantity) sr_item_qty 5 | FROM store_returns, item, date_dim 6 | WHERE sr_item_sk = i_item_sk 7 | AND d_date IN (SELECT d_date 8 | FROM date_dim 9 | WHERE d_week_seq IN 10 | (SELECT d_week_seq 11 | FROM date_dim 12 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 13 | AND sr_returned_date_sk = d_date_sk 14 | GROUP BY i_item_id), 15 | cr_items AS 16 | (SELECT 17 | i_item_id item_id, 18 | sum(cr_return_quantity) cr_item_qty 19 | FROM catalog_returns, item, date_dim 20 | WHERE cr_item_sk = i_item_sk 21 | AND d_date IN (SELECT d_date 22 | FROM date_dim 23 | WHERE d_week_seq IN 24 | (SELECT d_week_seq 25 | FROM date_dim 26 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 27 | AND cr_returned_date_sk = d_date_sk 28 | GROUP BY i_item_id), 29 | wr_items AS 30 | (SELECT 31 | i_item_id item_id, 32 | sum(wr_return_quantity) wr_item_qty 33 | FROM web_returns, item, date_dim 34 | WHERE wr_item_sk = i_item_sk AND d_date IN 35 | (SELECT d_date 36 | FROM date_dim 37 | WHERE d_week_seq IN 38 | (SELECT d_week_seq 39 | FROM date_dim 40 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17'))) 41 | AND wr_returned_date_sk = d_date_sk 42 | GROUP BY i_item_id) 43 | SELECT 44 | sr_items.item_id, 45 | sr_item_qty, 46 | sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev, 47 | cr_item_qty, 48 | cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev, 49 | wr_item_qty, 50 | wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev, 51 | (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average 52 | FROM sr_items, cr_items, wr_items 53 | WHERE sr_items.item_id = cr_items.item_id 54 | AND sr_items.item_id = wr_items.item_id 55 | ORDER BY sr_items.item_id, sr_item_qty 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q31.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS 2 | (SELECT 3 | ca_county, 4 | d_qoy, 5 | d_year, 6 | sum(ss_ext_sales_price) AS store_sales 7 | FROM store_sales, date_dim, customer_address 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_addr_sk = ca_address_sk 10 | GROUP BY ca_county, d_qoy, d_year), 11 | ws AS 12 | (SELECT 13 | ca_county, 14 | d_qoy, 15 | d_year, 16 | sum(ws_ext_sales_price) AS web_sales 17 | FROM web_sales, date_dim, customer_address 18 | WHERE ws_sold_date_sk = d_date_sk 19 | AND ws_bill_addr_sk = ca_address_sk 20 | GROUP BY ca_county, d_qoy, d_year) 21 | SELECT 22 | ss1.ca_county, 23 | ss1.d_year, 24 | ws2.web_sales / ws1.web_sales web_q1_q2_increase, 25 | ss2.store_sales / ss1.store_sales store_q1_q2_increase, 26 | ws3.web_sales / ws2.web_sales web_q2_q3_increase, 27 | ss3.store_sales / ss2.store_sales store_q2_q3_increase 28 | FROM 29 | ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 30 | WHERE 31 | ss1.d_qoy = 1 32 | AND ss1.d_year = 2000 33 | AND ss1.ca_county = ss2.ca_county 34 | AND ss2.d_qoy = 2 35 | AND ss2.d_year = 2000 36 | AND ss2.ca_county = ss3.ca_county 37 | AND ss3.d_qoy = 3 38 | AND ss3.d_year = 2000 39 | AND ss1.ca_county = ws1.ca_county 40 | AND ws1.d_qoy = 1 41 | AND ws1.d_year = 2000 42 | AND ws1.ca_county = ws2.ca_county 43 | AND ws2.d_qoy = 2 44 | AND ws2.d_year = 2000 45 | AND ws1.ca_county = ws3.ca_county 46 | AND ws3.d_qoy = 3 47 | AND ws3.d_year = 2000 48 | AND CASE WHEN ws1.web_sales > 0 49 | THEN ws2.web_sales / ws1.web_sales 50 | ELSE NULL END 51 | > CASE WHEN ss1.store_sales > 0 52 | THEN ss2.store_sales / ss1.store_sales 53 | ELSE NULL END 54 | AND CASE WHEN ws2.web_sales > 0 55 | THEN ws3.web_sales / ws2.web_sales 56 | ELSE NULL END 57 | > CASE WHEN ss2.store_sales > 0 58 | THEN ss3.store_sales / ss2.store_sales 59 | ELSE NULL END 60 | ORDER BY ss1.ca_county 61 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q41.sql: -------------------------------------------------------------------------------- 1 | SELECT DISTINCT (i_product_name) 2 | FROM item i1 3 | WHERE i_manufact_id BETWEEN 738 AND 738 + 40 4 | AND (SELECT count(*) AS item_cnt 5 | FROM item 6 | WHERE (i_manufact = i1.i_manufact AND 7 | ((i_category = 'Women' AND 8 | (i_color = 'powder' OR i_color = 'khaki') AND 9 | (i_units = 'Ounce' OR i_units = 'Oz') AND 10 | (i_size = 'medium' OR i_size = 'extra large') 11 | ) OR 12 | (i_category = 'Women' AND 13 | (i_color = 'brown' OR i_color = 'honeydew') AND 14 | (i_units = 'Bunch' OR i_units = 'Ton') AND 15 | (i_size = 'N/A' OR i_size = 'small') 16 | ) OR 17 | (i_category = 'Men' AND 18 | (i_color = 'floral' OR i_color = 'deep') AND 19 | (i_units = 'N/A' OR i_units = 'Dozen') AND 20 | (i_size = 'petite' OR i_size = 'large') 21 | ) OR 22 | (i_category = 'Men' AND 23 | (i_color = 'light' OR i_color = 'cornflower') AND 24 | (i_units = 'Box' OR i_units = 'Pound') AND 25 | (i_size = 'medium' OR i_size = 'extra large') 26 | ))) OR 27 | (i_manufact = i1.i_manufact AND 28 | ((i_category = 'Women' AND 29 | (i_color = 'midnight' OR i_color = 'snow') AND 30 | (i_units = 'Pallet' OR i_units = 'Gross') AND 31 | (i_size = 'medium' OR i_size = 'extra large') 32 | ) OR 33 | (i_category = 'Women' AND 34 | (i_color = 'cyan' OR i_color = 'papaya') AND 35 | (i_units = 'Cup' OR i_units = 'Dram') AND 36 | (i_size = 'N/A' OR i_size = 'small') 37 | ) OR 38 | (i_category = 'Men' AND 39 | (i_color = 'orange' OR i_color = 'frosted') AND 40 | (i_units = 'Each' OR i_units = 'Tbl') AND 41 | (i_size = 'petite' OR i_size = 'large') 42 | ) OR 43 | (i_category = 'Men' AND 44 | (i_color = 'forest' OR i_color = 'ghost') AND 45 | (i_units = 'Lb' OR i_units = 'Bundle') AND 46 | (i_size = 'medium' OR i_size = 'extra large') 47 | )))) > 0 48 | ORDER BY i_product_name 49 | LIMIT 100 50 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/execution/RangerSparkPlanOmitStrategyTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.execution 19 | 20 | import org.apache.spark.sql.catalyst.plans.logical.{RangerSparkMasking, RangerSparkRowFilter} 21 | import org.apache.spark.sql.hive.test.TestHive 22 | import org.scalatest.FunSuite 23 | 24 | class RangerSparkPlanOmitStrategyTest extends FunSuite { 25 | 26 | private val spark = TestHive.sparkSession 27 | 28 | test("ranger spark plan omit strategy") { 29 | val strategy = RangerSparkPlanOmitStrategy(spark) 30 | val df = spark.range(0, 5) 31 | val plan1 = df.queryExecution.optimizedPlan 32 | assert(strategy.apply(plan1) === Nil) 33 | val plan2 = RangerSparkRowFilter(plan1) 34 | assert(strategy.apply(plan2) === PlanLater(plan1) :: Nil) 35 | val plan3 = RangerSparkMasking(plan1) 36 | assert(strategy.apply(plan3) === PlanLater(plan1) :: Nil) 37 | val plan4 = RangerSparkMasking(plan2) 38 | assert(strategy.apply(plan4) === PlanLater(plan2) :: Nil) 39 | val plan5 = RangerSparkRowFilter(plan3) 40 | assert(strategy.apply(plan5) === PlanLater(plan3) :: Nil) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q47.sql: -------------------------------------------------------------------------------- 1 | WITH v1 AS ( 2 | SELECT 3 | i_category, 4 | i_brand, 5 | s_store_name, 6 | s_company_name, 7 | d_year, 8 | d_moy, 9 | sum(ss_sales_price) sum_sales, 10 | avg(sum(ss_sales_price)) 11 | OVER 12 | (PARTITION BY i_category, i_brand, 13 | s_store_name, s_company_name, d_year) 14 | avg_monthly_sales, 15 | rank() 16 | OVER 17 | (PARTITION BY i_category, i_brand, 18 | s_store_name, s_company_name 19 | ORDER BY d_year, d_moy) rn 20 | FROM item, store_sales, date_dim, store 21 | WHERE ss_item_sk = i_item_sk AND 22 | ss_sold_date_sk = d_date_sk AND 23 | ss_store_sk = s_store_sk AND 24 | ( 25 | d_year = 1999 OR 26 | (d_year = 1999 - 1 AND d_moy = 12) OR 27 | (d_year = 1999 + 1 AND d_moy = 1) 28 | ) 29 | GROUP BY i_category, i_brand, 30 | s_store_name, s_company_name, 31 | d_year, d_moy), 32 | v2 AS ( 33 | SELECT 34 | v1.i_category, 35 | v1.i_brand, 36 | v1.s_store_name, 37 | v1.s_company_name, 38 | v1.d_year, 39 | v1.d_moy, 40 | v1.avg_monthly_sales, 41 | v1.sum_sales, 42 | v1_lag.sum_sales psum, 43 | v1_lead.sum_sales nsum 44 | FROM v1, v1 v1_lag, v1 v1_lead 45 | WHERE v1.i_category = v1_lag.i_category AND 46 | v1.i_category = v1_lead.i_category AND 47 | v1.i_brand = v1_lag.i_brand AND 48 | v1.i_brand = v1_lead.i_brand AND 49 | v1.s_store_name = v1_lag.s_store_name AND 50 | v1.s_store_name = v1_lead.s_store_name AND 51 | v1.s_company_name = v1_lag.s_company_name AND 52 | v1.s_company_name = v1_lead.s_company_name AND 53 | v1.rn = v1_lag.rn + 1 AND 54 | v1.rn = v1_lead.rn - 1) 55 | SELECT * 56 | FROM v2 57 | WHERE d_year = 1999 AND 58 | avg_monthly_sales > 0 AND 59 | CASE WHEN avg_monthly_sales > 0 60 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales 61 | ELSE NULL END > 0.1 62 | ORDER BY sum_sales - avg_monthly_sales, 3 63 | LIMIT 100 64 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q74.sql: -------------------------------------------------------------------------------- 1 | WITH year_total AS ( 2 | SELECT 3 | c_customer_id customer_id, 4 | c_first_name customer_first_name, 5 | c_last_name customer_last_name, 6 | d_year AS year, 7 | sum(ss_net_paid) year_total, 8 | 's' sale_type 9 | FROM 10 | customer, store_sales, date_dim 11 | WHERE c_customer_sk = ss_customer_sk 12 | AND ss_sold_date_sk = d_date_sk 13 | AND d_year IN (2001, 2001 + 1) 14 | GROUP BY 15 | c_customer_id, c_first_name, c_last_name, d_year 16 | UNION ALL 17 | SELECT 18 | c_customer_id customer_id, 19 | c_first_name customer_first_name, 20 | c_last_name customer_last_name, 21 | d_year AS year, 22 | sum(ws_net_paid) year_total, 23 | 'w' sale_type 24 | FROM 25 | customer, web_sales, date_dim 26 | WHERE c_customer_sk = ws_bill_customer_sk 27 | AND ws_sold_date_sk = d_date_sk 28 | AND d_year IN (2001, 2001 + 1) 29 | GROUP BY 30 | c_customer_id, c_first_name, c_last_name, d_year) 31 | SELECT 32 | t_s_secyear.customer_id, 33 | t_s_secyear.customer_first_name, 34 | t_s_secyear.customer_last_name 35 | FROM 36 | year_total t_s_firstyear, year_total t_s_secyear, 37 | year_total t_w_firstyear, year_total t_w_secyear 38 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id 39 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id 40 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id 41 | AND t_s_firstyear.sale_type = 's' 42 | AND t_w_firstyear.sale_type = 'w' 43 | AND t_s_secyear.sale_type = 's' 44 | AND t_w_secyear.sale_type = 'w' 45 | AND t_s_firstyear.year = 2001 46 | AND t_s_secyear.year = 2001 + 1 47 | AND t_w_firstyear.year = 2001 48 | AND t_w_secyear.year = 2001 + 1 49 | AND t_s_firstyear.year_total > 0 50 | AND t_w_firstyear.year_total > 0 51 | AND CASE WHEN t_w_firstyear.year_total > 0 52 | THEN t_w_secyear.year_total / t_w_firstyear.year_total 53 | ELSE NULL END 54 | > CASE WHEN t_s_firstyear.year_total > 0 55 | THEN t_s_secyear.year_total / t_s_firstyear.year_total 56 | ELSE NULL END 57 | ORDER BY 1, 1, 1 58 | LIMIT 100 59 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q28.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM (SELECT 3 | avg(ss_list_price) B1_LP, 4 | count(ss_list_price) B1_CNT, 5 | count(DISTINCT ss_list_price) B1_CNTD 6 | FROM store_sales 7 | WHERE ss_quantity BETWEEN 0 AND 5 8 | AND (ss_list_price BETWEEN 8 AND 8 + 10 9 | OR ss_coupon_amt BETWEEN 459 AND 459 + 1000 10 | OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1, 11 | (SELECT 12 | avg(ss_list_price) B2_LP, 13 | count(ss_list_price) B2_CNT, 14 | count(DISTINCT ss_list_price) B2_CNTD 15 | FROM store_sales 16 | WHERE ss_quantity BETWEEN 6 AND 10 17 | AND (ss_list_price BETWEEN 90 AND 90 + 10 18 | OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000 19 | OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2, 20 | (SELECT 21 | avg(ss_list_price) B3_LP, 22 | count(ss_list_price) B3_CNT, 23 | count(DISTINCT ss_list_price) B3_CNTD 24 | FROM store_sales 25 | WHERE ss_quantity BETWEEN 11 AND 15 26 | AND (ss_list_price BETWEEN 142 AND 142 + 10 27 | OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000 28 | OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3, 29 | (SELECT 30 | avg(ss_list_price) B4_LP, 31 | count(ss_list_price) B4_CNT, 32 | count(DISTINCT ss_list_price) B4_CNTD 33 | FROM store_sales 34 | WHERE ss_quantity BETWEEN 16 AND 20 35 | AND (ss_list_price BETWEEN 135 AND 135 + 10 36 | OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000 37 | OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4, 38 | (SELECT 39 | avg(ss_list_price) B5_LP, 40 | count(ss_list_price) B5_CNT, 41 | count(DISTINCT ss_list_price) B5_CNTD 42 | FROM store_sales 43 | WHERE ss_quantity BETWEEN 21 AND 25 44 | AND (ss_list_price BETWEEN 122 AND 122 + 10 45 | OR ss_coupon_amt BETWEEN 836 AND 836 + 1000 46 | OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5, 47 | (SELECT 48 | avg(ss_list_price) B6_LP, 49 | count(ss_list_price) B6_CNT, 50 | count(DISTINCT ss_list_price) B6_CNTD 51 | FROM store_sales 52 | WHERE ss_quantity BETWEEN 26 AND 30 53 | AND (ss_list_price BETWEEN 154 AND 154 + 10 54 | OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000 55 | OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6 56 | LIMIT 100 57 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q58.sql: -------------------------------------------------------------------------------- 1 | WITH ss_items AS 2 | (SELECT 3 | i_item_id item_id, 4 | sum(ss_ext_sales_price) ss_item_rev 5 | FROM store_sales, item, date_dim 6 | WHERE ss_item_sk = i_item_sk 7 | AND d_date IN (SELECT d_date 8 | FROM date_dim 9 | WHERE d_week_seq = (SELECT d_week_seq 10 | FROM date_dim 11 | WHERE d_date = '2000-01-03')) 12 | AND ss_sold_date_sk = d_date_sk 13 | GROUP BY i_item_id), 14 | cs_items AS 15 | (SELECT 16 | i_item_id item_id, 17 | sum(cs_ext_sales_price) cs_item_rev 18 | FROM catalog_sales, item, date_dim 19 | WHERE cs_item_sk = i_item_sk 20 | AND d_date IN (SELECT d_date 21 | FROM date_dim 22 | WHERE d_week_seq = (SELECT d_week_seq 23 | FROM date_dim 24 | WHERE d_date = '2000-01-03')) 25 | AND cs_sold_date_sk = d_date_sk 26 | GROUP BY i_item_id), 27 | ws_items AS 28 | (SELECT 29 | i_item_id item_id, 30 | sum(ws_ext_sales_price) ws_item_rev 31 | FROM web_sales, item, date_dim 32 | WHERE ws_item_sk = i_item_sk 33 | AND d_date IN (SELECT d_date 34 | FROM date_dim 35 | WHERE d_week_seq = (SELECT d_week_seq 36 | FROM date_dim 37 | WHERE d_date = '2000-01-03')) 38 | AND ws_sold_date_sk = d_date_sk 39 | GROUP BY i_item_id) 40 | SELECT 41 | ss_items.item_id, 42 | ss_item_rev, 43 | ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev, 44 | cs_item_rev, 45 | cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev, 46 | ws_item_rev, 47 | ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev, 48 | (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average 49 | FROM ss_items, cs_items, ws_items 50 | WHERE ss_items.item_id = cs_items.item_id 51 | AND ss_items.item_id = ws_items.item_id 52 | AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev 53 | AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev 54 | AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev 55 | AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev 56 | AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev 57 | AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev 58 | ORDER BY item_id, ss_item_rev 59 | LIMIT 100 60 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkRowFilterExtensionTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import org.apache.spark.sql.hive.test.TestHive 21 | import org.scalatest.FunSuite 22 | import org.apache.spark.sql.RangerSparkTestUtils._ 23 | import org.apache.spark.sql.catalyst.plans.logical.{Filter, RangerSparkRowFilter} 24 | 25 | class RangerSparkRowFilterExtensionTest extends FunSuite { 26 | 27 | private val spark = TestHive.sparkSession 28 | 29 | test("ranger spark row filter extension") { 30 | val extension = RangerSparkRowFilterExtension(spark) 31 | val plan = spark.sql("select * from src").queryExecution.optimizedPlan 32 | println(plan) 33 | withUser("bob") { 34 | val newPlan = extension.apply(plan) 35 | assert(newPlan.isInstanceOf[RangerSparkRowFilter]) 36 | val filters = newPlan.collect { case f: Filter => f } 37 | assert(filters.nonEmpty, "ranger row level filters should be applied automatically") 38 | println(newPlan) 39 | } 40 | withUser("alice") { 41 | val newPlan = extension.apply(plan) 42 | assert(newPlan.isInstanceOf[RangerSparkRowFilter]) 43 | val filters = newPlan.collect { case f: Filter => f } 44 | assert(filters.isEmpty, "alice does not have implicit filters") 45 | println(newPlan) 46 | } 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q23b.sql: -------------------------------------------------------------------------------- 1 | WITH frequent_ss_items AS 2 | (SELECT 3 | substr(i_item_desc, 1, 30) itemdesc, 4 | i_item_sk item_sk, 5 | d_date solddate, 6 | count(*) cnt 7 | FROM store_sales, date_dim, item 8 | WHERE ss_sold_date_sk = d_date_sk 9 | AND ss_item_sk = i_item_sk 10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date 12 | HAVING count(*) > 4), 13 | max_store_sales AS 14 | (SELECT max(csales) tpcds_cmax 15 | FROM (SELECT 16 | c_customer_sk, 17 | sum(ss_quantity * ss_sales_price) csales 18 | FROM store_sales, customer, date_dim 19 | WHERE ss_customer_sk = c_customer_sk 20 | AND ss_sold_date_sk = d_date_sk 21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3) 22 | GROUP BY c_customer_sk) x), 23 | best_ss_customer AS 24 | (SELECT 25 | c_customer_sk, 26 | sum(ss_quantity * ss_sales_price) ssales 27 | FROM store_sales 28 | , customer 29 | WHERE ss_customer_sk = c_customer_sk 30 | GROUP BY c_customer_sk 31 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) * 32 | (SELECT * 33 | FROM max_store_sales)) 34 | SELECT 35 | c_last_name, 36 | c_first_name, 37 | sales 38 | FROM ((SELECT 39 | c_last_name, 40 | c_first_name, 41 | sum(cs_quantity * cs_list_price) sales 42 | FROM catalog_sales, customer, date_dim 43 | WHERE d_year = 2000 44 | AND d_moy = 2 45 | AND cs_sold_date_sk = d_date_sk 46 | AND cs_item_sk IN (SELECT item_sk 47 | FROM frequent_ss_items) 48 | AND cs_bill_customer_sk IN (SELECT c_customer_sk 49 | FROM best_ss_customer) 50 | AND cs_bill_customer_sk = c_customer_sk 51 | GROUP BY c_last_name, c_first_name) 52 | UNION ALL 53 | (SELECT 54 | c_last_name, 55 | c_first_name, 56 | sum(ws_quantity * ws_list_price) sales 57 | FROM web_sales, customer, date_dim 58 | WHERE d_year = 2000 59 | AND d_moy = 2 60 | AND ws_sold_date_sk = d_date_sk 61 | AND ws_item_sk IN (SELECT item_sk 62 | FROM frequent_ss_items) 63 | AND ws_bill_customer_sk IN (SELECT c_customer_sk 64 | FROM best_ss_customer) 65 | AND ws_bill_customer_sk = c_customer_sk 66 | GROUP BY c_last_name, c_first_name)) y 67 | ORDER BY c_last_name, c_first_name, sales 68 | LIMIT 100 69 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkMaskingExtensionTest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.spark.sql.catalyst.optimizer 19 | 20 | import org.apache.spark.sql.hive.test.TestHive 21 | import org.apache.spark.sql.RangerSparkTestUtils._ 22 | import org.apache.spark.sql.catalyst.expressions.Alias 23 | import org.apache.spark.sql.catalyst.plans.logical.{Project, RangerSparkMasking} 24 | import org.scalatest.FunSuite 25 | 26 | class RangerSparkMaskingExtensionTest extends FunSuite { 27 | 28 | private val spark = TestHive.sparkSession 29 | 30 | test("data masking for bob show last 4") { 31 | val extension = RangerSparkMaskingExtension(spark) 32 | val plan = spark.sql("select * from src").queryExecution.optimizedPlan 33 | println(plan) 34 | withUser("bob") { 35 | val newPlan = extension.apply(plan) 36 | assert(newPlan.isInstanceOf[Project]) 37 | val project = newPlan.asInstanceOf[Project] 38 | val key = project.projectList.head 39 | assert(key.name === "key", "no affect on un masking attribute") 40 | val value = project.projectList.tail 41 | assert(value.head.name === "value", "attibute name should be unchanged") 42 | assert(value.head.asInstanceOf[Alias].child.sql === 43 | "mask_show_last_n(`value`, 4, 'x', 'x', 'x', -1, '1')") 44 | } 45 | 46 | withUser("alice") { 47 | val newPlan = extension.apply(plan) 48 | assert(newPlan === RangerSparkMasking(plan)) 49 | } 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q78.sql: -------------------------------------------------------------------------------- 1 | WITH ws AS 2 | (SELECT 3 | d_year AS ws_sold_year, 4 | ws_item_sk, 5 | ws_bill_customer_sk ws_customer_sk, 6 | sum(ws_quantity) ws_qty, 7 | sum(ws_wholesale_cost) ws_wc, 8 | sum(ws_sales_price) ws_sp 9 | FROM web_sales 10 | LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk 11 | JOIN date_dim ON ws_sold_date_sk = d_date_sk 12 | WHERE wr_order_number IS NULL 13 | GROUP BY d_year, ws_item_sk, ws_bill_customer_sk 14 | ), 15 | cs AS 16 | (SELECT 17 | d_year AS cs_sold_year, 18 | cs_item_sk, 19 | cs_bill_customer_sk cs_customer_sk, 20 | sum(cs_quantity) cs_qty, 21 | sum(cs_wholesale_cost) cs_wc, 22 | sum(cs_sales_price) cs_sp 23 | FROM catalog_sales 24 | LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk 25 | JOIN date_dim ON cs_sold_date_sk = d_date_sk 26 | WHERE cr_order_number IS NULL 27 | GROUP BY d_year, cs_item_sk, cs_bill_customer_sk 28 | ), 29 | ss AS 30 | (SELECT 31 | d_year AS ss_sold_year, 32 | ss_item_sk, 33 | ss_customer_sk, 34 | sum(ss_quantity) ss_qty, 35 | sum(ss_wholesale_cost) ss_wc, 36 | sum(ss_sales_price) ss_sp 37 | FROM store_sales 38 | LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk 39 | JOIN date_dim ON ss_sold_date_sk = d_date_sk 40 | WHERE sr_ticket_number IS NULL 41 | GROUP BY d_year, ss_item_sk, ss_customer_sk 42 | ) 43 | SELECT 44 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio, 45 | ss_qty store_qty, 46 | ss_wc store_wholesale_cost, 47 | ss_sp store_sales_price, 48 | coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty, 49 | coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost, 50 | coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price 51 | FROM ss 52 | LEFT JOIN ws 53 | ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk) 54 | LEFT JOIN cs 55 | ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk) 56 | WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000 57 | ORDER BY 58 | ratio, 59 | ss_qty DESC, ss_wc DESC, ss_sp DESC, 60 | other_chan_qty, 61 | other_chan_wholesale_cost, 62 | other_chan_sales_price, 63 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) 64 | LIMIT 100 65 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q59.sql: -------------------------------------------------------------------------------- 1 | WITH wss AS 2 | (SELECT 3 | d_week_seq, 4 | ss_store_sk, 5 | sum(CASE WHEN (d_day_name = 'Sunday') 6 | THEN ss_sales_price 7 | ELSE NULL END) sun_sales, 8 | sum(CASE WHEN (d_day_name = 'Monday') 9 | THEN ss_sales_price 10 | ELSE NULL END) mon_sales, 11 | sum(CASE WHEN (d_day_name = 'Tuesday') 12 | THEN ss_sales_price 13 | ELSE NULL END) tue_sales, 14 | sum(CASE WHEN (d_day_name = 'Wednesday') 15 | THEN ss_sales_price 16 | ELSE NULL END) wed_sales, 17 | sum(CASE WHEN (d_day_name = 'Thursday') 18 | THEN ss_sales_price 19 | ELSE NULL END) thu_sales, 20 | sum(CASE WHEN (d_day_name = 'Friday') 21 | THEN ss_sales_price 22 | ELSE NULL END) fri_sales, 23 | sum(CASE WHEN (d_day_name = 'Saturday') 24 | THEN ss_sales_price 25 | ELSE NULL END) sat_sales 26 | FROM store_sales, date_dim 27 | WHERE d_date_sk = ss_sold_date_sk 28 | GROUP BY d_week_seq, ss_store_sk 29 | ) 30 | SELECT 31 | s_store_name1, 32 | s_store_id1, 33 | d_week_seq1, 34 | sun_sales1 / sun_sales2, 35 | mon_sales1 / mon_sales2, 36 | tue_sales1 / tue_sales2, 37 | wed_sales1 / wed_sales2, 38 | thu_sales1 / thu_sales2, 39 | fri_sales1 / fri_sales2, 40 | sat_sales1 / sat_sales2 41 | FROM 42 | (SELECT 43 | s_store_name s_store_name1, 44 | wss.d_week_seq d_week_seq1, 45 | s_store_id s_store_id1, 46 | sun_sales sun_sales1, 47 | mon_sales mon_sales1, 48 | tue_sales tue_sales1, 49 | wed_sales wed_sales1, 50 | thu_sales thu_sales1, 51 | fri_sales fri_sales1, 52 | sat_sales sat_sales1 53 | FROM wss, store, date_dim d 54 | WHERE d.d_week_seq = wss.d_week_seq AND 55 | ss_store_sk = s_store_sk AND 56 | d_month_seq BETWEEN 1212 AND 1212 + 11) y, 57 | (SELECT 58 | s_store_name s_store_name2, 59 | wss.d_week_seq d_week_seq2, 60 | s_store_id s_store_id2, 61 | sun_sales sun_sales2, 62 | mon_sales mon_sales2, 63 | tue_sales tue_sales2, 64 | wed_sales wed_sales2, 65 | thu_sales thu_sales2, 66 | fri_sales fri_sales2, 67 | sat_sales sat_sales2 68 | FROM wss, store, date_dim d 69 | WHERE d.d_week_seq = wss.d_week_seq AND 70 | ss_store_sk = s_store_sk AND 71 | d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x 72 | WHERE s_store_id1 = s_store_id2 73 | AND d_week_seq1 = d_week_seq2 - 52 74 | ORDER BY s_store_name1, s_store_id1, d_week_seq1 75 | LIMIT 100 76 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q2.sql: -------------------------------------------------------------------------------- 1 | WITH wscs AS 2 | ( SELECT 3 | sold_date_sk, 4 | sales_price 5 | FROM (SELECT 6 | ws_sold_date_sk sold_date_sk, 7 | ws_ext_sales_price sales_price 8 | FROM web_sales) x 9 | UNION ALL 10 | (SELECT 11 | cs_sold_date_sk sold_date_sk, 12 | cs_ext_sales_price sales_price 13 | FROM catalog_sales)), 14 | wswscs AS 15 | ( SELECT 16 | d_week_seq, 17 | sum(CASE WHEN (d_day_name = 'Sunday') 18 | THEN sales_price 19 | ELSE NULL END) 20 | sun_sales, 21 | sum(CASE WHEN (d_day_name = 'Monday') 22 | THEN sales_price 23 | ELSE NULL END) 24 | mon_sales, 25 | sum(CASE WHEN (d_day_name = 'Tuesday') 26 | THEN sales_price 27 | ELSE NULL END) 28 | tue_sales, 29 | sum(CASE WHEN (d_day_name = 'Wednesday') 30 | THEN sales_price 31 | ELSE NULL END) 32 | wed_sales, 33 | sum(CASE WHEN (d_day_name = 'Thursday') 34 | THEN sales_price 35 | ELSE NULL END) 36 | thu_sales, 37 | sum(CASE WHEN (d_day_name = 'Friday') 38 | THEN sales_price 39 | ELSE NULL END) 40 | fri_sales, 41 | sum(CASE WHEN (d_day_name = 'Saturday') 42 | THEN sales_price 43 | ELSE NULL END) 44 | sat_sales 45 | FROM wscs, date_dim 46 | WHERE d_date_sk = sold_date_sk 47 | GROUP BY d_week_seq) 48 | SELECT 49 | d_week_seq1, 50 | round(sun_sales1 / sun_sales2, 2), 51 | round(mon_sales1 / mon_sales2, 2), 52 | round(tue_sales1 / tue_sales2, 2), 53 | round(wed_sales1 / wed_sales2, 2), 54 | round(thu_sales1 / thu_sales2, 2), 55 | round(fri_sales1 / fri_sales2, 2), 56 | round(sat_sales1 / sat_sales2, 2) 57 | FROM 58 | (SELECT 59 | wswscs.d_week_seq d_week_seq1, 60 | sun_sales sun_sales1, 61 | mon_sales mon_sales1, 62 | tue_sales tue_sales1, 63 | wed_sales wed_sales1, 64 | thu_sales thu_sales1, 65 | fri_sales fri_sales1, 66 | sat_sales sat_sales1 67 | FROM wswscs, date_dim 68 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y, 69 | (SELECT 70 | wswscs.d_week_seq d_week_seq2, 71 | sun_sales sun_sales2, 72 | mon_sales mon_sales2, 73 | tue_sales tue_sales2, 74 | wed_sales wed_sales2, 75 | thu_sales thu_sales2, 76 | fri_sales fri_sales2, 77 | sat_sales sat_sales2 78 | FROM wswscs, date_dim 79 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z 80 | WHERE d_week_seq1 = d_week_seq2 - 53 81 | ORDER BY d_week_seq1 82 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q85.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | substr(r_reason_desc, 1, 20), 3 | avg(ws_quantity), 4 | avg(wr_refunded_cash), 5 | avg(wr_fee) 6 | FROM web_sales, web_returns, web_page, customer_demographics cd1, 7 | customer_demographics cd2, customer_address, date_dim, reason 8 | WHERE ws_web_page_sk = wp_web_page_sk 9 | AND ws_item_sk = wr_item_sk 10 | AND ws_order_number = wr_order_number 11 | AND ws_sold_date_sk = d_date_sk AND d_year = 2000 12 | AND cd1.cd_demo_sk = wr_refunded_cdemo_sk 13 | AND cd2.cd_demo_sk = wr_returning_cdemo_sk 14 | AND ca_address_sk = wr_refunded_addr_sk 15 | AND r_reason_sk = wr_reason_sk 16 | AND 17 | ( 18 | ( 19 | cd1.cd_marital_status = 'M' 20 | AND 21 | cd1.cd_marital_status = cd2.cd_marital_status 22 | AND 23 | cd1.cd_education_status = 'Advanced Degree' 24 | AND 25 | cd1.cd_education_status = cd2.cd_education_status 26 | AND 27 | ws_sales_price BETWEEN 100.00 AND 150.00 28 | ) 29 | OR 30 | ( 31 | cd1.cd_marital_status = 'S' 32 | AND 33 | cd1.cd_marital_status = cd2.cd_marital_status 34 | AND 35 | cd1.cd_education_status = 'College' 36 | AND 37 | cd1.cd_education_status = cd2.cd_education_status 38 | AND 39 | ws_sales_price BETWEEN 50.00 AND 100.00 40 | ) 41 | OR 42 | ( 43 | cd1.cd_marital_status = 'W' 44 | AND 45 | cd1.cd_marital_status = cd2.cd_marital_status 46 | AND 47 | cd1.cd_education_status = '2 yr Degree' 48 | AND 49 | cd1.cd_education_status = cd2.cd_education_status 50 | AND 51 | ws_sales_price BETWEEN 150.00 AND 200.00 52 | ) 53 | ) 54 | AND 55 | ( 56 | ( 57 | ca_country = 'United States' 58 | AND 59 | ca_state IN ('IN', 'OH', 'NJ') 60 | AND ws_net_profit BETWEEN 100 AND 200 61 | ) 62 | OR 63 | ( 64 | ca_country = 'United States' 65 | AND 66 | ca_state IN ('WI', 'CT', 'KY') 67 | AND ws_net_profit BETWEEN 150 AND 300 68 | ) 69 | OR 70 | ( 71 | ca_country = 'United States' 72 | AND 73 | ca_state IN ('LA', 'IA', 'AR') 74 | AND ws_net_profit BETWEEN 50 AND 250 75 | ) 76 | ) 77 | GROUP BY r_reason_desc 78 | ORDER BY substr(r_reason_desc, 1, 20) 79 | , avg(ws_quantity) 80 | , avg(wr_refunded_cash) 81 | , avg(wr_fee) 82 | LIMIT 100 83 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q11.sql: -------------------------------------------------------------------------------- 1 | WITH year_total AS ( 2 | SELECT 3 | c_customer_id customer_id, 4 | c_first_name customer_first_name, 5 | c_last_name customer_last_name, 6 | c_preferred_cust_flag customer_preferred_cust_flag, 7 | c_birth_country customer_birth_country, 8 | c_login customer_login, 9 | c_email_address customer_email_address, 10 | d_year dyear, 11 | sum(ss_ext_list_price - ss_ext_discount_amt) year_total, 12 | 's' sale_type 13 | FROM customer, store_sales, date_dim 14 | WHERE c_customer_sk = ss_customer_sk 15 | AND ss_sold_date_sk = d_date_sk 16 | GROUP BY c_customer_id 17 | , c_first_name 18 | , c_last_name 19 | , d_year 20 | , c_preferred_cust_flag 21 | , c_birth_country 22 | , c_login 23 | , c_email_address 24 | , d_year 25 | UNION ALL 26 | SELECT 27 | c_customer_id customer_id, 28 | c_first_name customer_first_name, 29 | c_last_name customer_last_name, 30 | c_preferred_cust_flag customer_preferred_cust_flag, 31 | c_birth_country customer_birth_country, 32 | c_login customer_login, 33 | c_email_address customer_email_address, 34 | d_year dyear, 35 | sum(ws_ext_list_price - ws_ext_discount_amt) year_total, 36 | 'w' sale_type 37 | FROM customer, web_sales, date_dim 38 | WHERE c_customer_sk = ws_bill_customer_sk 39 | AND ws_sold_date_sk = d_date_sk 40 | GROUP BY 41 | c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country, 42 | c_login, c_email_address, d_year) 43 | SELECT t_s_secyear.customer_preferred_cust_flag 44 | FROM year_total t_s_firstyear 45 | , year_total t_s_secyear 46 | , year_total t_w_firstyear 47 | , year_total t_w_secyear 48 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id 49 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id 50 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id 51 | AND t_s_firstyear.sale_type = 's' 52 | AND t_w_firstyear.sale_type = 'w' 53 | AND t_s_secyear.sale_type = 's' 54 | AND t_w_secyear.sale_type = 'w' 55 | AND t_s_firstyear.dyear = 2001 56 | AND t_s_secyear.dyear = 2001 + 1 57 | AND t_w_firstyear.dyear = 2001 58 | AND t_w_secyear.dyear = 2001 + 1 59 | AND t_s_firstyear.year_total > 0 60 | AND t_w_firstyear.year_total > 0 61 | AND CASE WHEN t_w_firstyear.year_total > 0 62 | THEN t_w_secyear.year_total / t_w_firstyear.year_total 63 | ELSE NULL END 64 | > CASE WHEN t_s_firstyear.year_total > 0 65 | THEN t_s_secyear.year_total / t_s_firstyear.year_total 66 | ELSE NULL END 67 | ORDER BY t_s_secyear.customer_preferred_cust_flag 68 | LIMIT 100 69 | -------------------------------------------------------------------------------- /src/test/scala/org/apache/ranger/services/spark/RangerAdminClientImpl.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.services.spark 19 | 20 | import java.nio.file.{Files, FileSystems} 21 | import java.util 22 | 23 | import com.google.gson.GsonBuilder 24 | import org.apache.commons.logging.{Log, LogFactory} 25 | import org.apache.ranger.admin.client.RangerAdminRESTClient 26 | import org.apache.ranger.plugin.util.{GrantRevokeRequest, ServicePolicies, ServiceTags} 27 | 28 | class RangerAdminClientImpl extends RangerAdminRESTClient { 29 | private val LOG: Log = LogFactory.getLog(classOf[RangerAdminClientImpl]) 30 | private val cacheFilename = "sparkSql_hive_jenkins.json" 31 | private val gson = 32 | new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").setPrettyPrinting().create 33 | 34 | override def init(serviceName: String, appId: String, configPropertyPrefix: String): Unit = {} 35 | 36 | override def getServicePoliciesIfUpdated( 37 | lastKnownVersion: Long, 38 | lastActivationTimeInMillis: Long): ServicePolicies = { 39 | val basedir = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath 40 | val cachePath = FileSystems.getDefault.getPath(basedir, cacheFilename) 41 | LOG.info("Reading policies from " + cachePath) 42 | val bytes = Files.readAllBytes(cachePath) 43 | gson.fromJson(new String(bytes), classOf[ServicePolicies]) 44 | } 45 | 46 | override def grantAccess(request: GrantRevokeRequest): Unit = {} 47 | 48 | override def revokeAccess(request: GrantRevokeRequest): Unit = {} 49 | 50 | override def getServiceTagsIfUpdated( 51 | lastKnownVersion: Long, 52 | lastActivationTimeInMillis: Long): ServiceTags = null 53 | 54 | override def getTagTypes(tagTypePattern: String): util.List[String] = null 55 | } 56 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q75.sql: -------------------------------------------------------------------------------- 1 | WITH all_sales AS ( 2 | SELECT 3 | d_year, 4 | i_brand_id, 5 | i_class_id, 6 | i_category_id, 7 | i_manufact_id, 8 | SUM(sales_cnt) AS sales_cnt, 9 | SUM(sales_amt) AS sales_amt 10 | FROM ( 11 | SELECT 12 | d_year, 13 | i_brand_id, 14 | i_class_id, 15 | i_category_id, 16 | i_manufact_id, 17 | cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt, 18 | cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt 19 | FROM catalog_sales 20 | JOIN item ON i_item_sk = cs_item_sk 21 | JOIN date_dim ON d_date_sk = cs_sold_date_sk 22 | LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number 23 | AND cs_item_sk = cr_item_sk) 24 | WHERE i_category = 'Books' 25 | UNION 26 | SELECT 27 | d_year, 28 | i_brand_id, 29 | i_class_id, 30 | i_category_id, 31 | i_manufact_id, 32 | ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt, 33 | ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt 34 | FROM store_sales 35 | JOIN item ON i_item_sk = ss_item_sk 36 | JOIN date_dim ON d_date_sk = ss_sold_date_sk 37 | LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number 38 | AND ss_item_sk = sr_item_sk) 39 | WHERE i_category = 'Books' 40 | UNION 41 | SELECT 42 | d_year, 43 | i_brand_id, 44 | i_class_id, 45 | i_category_id, 46 | i_manufact_id, 47 | ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt, 48 | ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt 49 | FROM web_sales 50 | JOIN item ON i_item_sk = ws_item_sk 51 | JOIN date_dim ON d_date_sk = ws_sold_date_sk 52 | LEFT JOIN web_returns ON (ws_order_number = wr_order_number 53 | AND ws_item_sk = wr_item_sk) 54 | WHERE i_category = 'Books') sales_detail 55 | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) 56 | SELECT 57 | prev_yr.d_year AS prev_year, 58 | curr_yr.d_year AS year, 59 | curr_yr.i_brand_id, 60 | curr_yr.i_class_id, 61 | curr_yr.i_category_id, 62 | curr_yr.i_manufact_id, 63 | prev_yr.sales_cnt AS prev_yr_cnt, 64 | curr_yr.sales_cnt AS curr_yr_cnt, 65 | curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff, 66 | curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff 67 | FROM all_sales curr_yr, all_sales prev_yr 68 | WHERE curr_yr.i_brand_id = prev_yr.i_brand_id 69 | AND curr_yr.i_class_id = prev_yr.i_class_id 70 | AND curr_yr.i_category_id = prev_yr.i_category_id 71 | AND curr_yr.i_manufact_id = prev_yr.i_manufact_id 72 | AND curr_yr.d_year = 2002 73 | AND prev_yr.d_year = 2002 - 1 74 | AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9 75 | ORDER BY sales_cnt_diff 76 | LIMIT 100 77 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkPlugin.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import java.io.{File, IOException} 21 | 22 | import org.apache.commons.logging.LogFactory 23 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext 24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE 25 | import org.apache.ranger.authorization.hadoop.config.RangerConfiguration 26 | import org.apache.ranger.plugin.service.RangerBasePlugin 27 | 28 | class RangerSparkPlugin private extends RangerBasePlugin("spark", "sparkSql") { 29 | import RangerSparkPlugin._ 30 | 31 | private val LOG = LogFactory.getLog(classOf[RangerSparkPlugin]) 32 | 33 | lazy val fsScheme: Array[String] = RangerConfiguration.getInstance() 34 | .get("ranger.plugin.spark.urlauth.filesystem.schemes", "hdfs:,file:") 35 | .split(",") 36 | .map(_.trim) 37 | 38 | override def init(): Unit = { 39 | super.init() 40 | val cacheDir = new File(rangerConf.get("ranger.plugin.spark.policy.cache.dir")) 41 | if (cacheDir.exists() && 42 | (!cacheDir.isDirectory || !cacheDir.canRead || !cacheDir.canWrite)) { 43 | throw new IOException("Policy cache directory already exists at" + 44 | cacheDir.getAbsolutePath + ", but it is unavailable") 45 | } 46 | 47 | if (!cacheDir.exists() && !cacheDir.mkdirs()) { 48 | throw new IOException("Unable to create ranger policy cache directory at" + 49 | cacheDir.getAbsolutePath) 50 | } 51 | LOG.info("Policy cache directory successfully set to " + cacheDir.getAbsolutePath) 52 | } 53 | } 54 | 55 | object RangerSparkPlugin { 56 | 57 | private val rangerConf: RangerConfiguration = RangerConfiguration.getInstance 58 | 59 | val showColumnsOption: String = rangerConf.get( 60 | "xasecure.spark.describetable.showcolumns.authorization.option", "NONE") 61 | 62 | def build(): Builder = new Builder 63 | 64 | class Builder { 65 | 66 | @volatile private var sparkPlugin: RangerSparkPlugin = _ 67 | 68 | def getOrCreate(): RangerSparkPlugin = RangerSparkPlugin.synchronized { 69 | if (sparkPlugin == null) { 70 | sparkPlugin = new RangerSparkPlugin 71 | sparkPlugin.init() 72 | sparkPlugin 73 | } else { 74 | sparkPlugin 75 | } 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q80.sql: -------------------------------------------------------------------------------- 1 | WITH ssr AS 2 | (SELECT 3 | s_store_id AS store_id, 4 | sum(ss_ext_sales_price) AS sales, 5 | sum(coalesce(sr_return_amt, 0)) AS returns, 6 | sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit 7 | FROM store_sales 8 | LEFT OUTER JOIN store_returns ON 9 | (ss_item_sk = sr_item_sk AND 10 | ss_ticket_number = sr_ticket_number) 11 | , 12 | date_dim, store, item, promotion 13 | WHERE ss_sold_date_sk = d_date_sk 14 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 15 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 16 | AND ss_store_sk = s_store_sk 17 | AND ss_item_sk = i_item_sk 18 | AND i_current_price > 50 19 | AND ss_promo_sk = p_promo_sk 20 | AND p_channel_tv = 'N' 21 | GROUP BY s_store_id), 22 | csr AS 23 | (SELECT 24 | cp_catalog_page_id AS catalog_page_id, 25 | sum(cs_ext_sales_price) AS sales, 26 | sum(coalesce(cr_return_amount, 0)) AS returns, 27 | sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit 28 | FROM catalog_sales 29 | LEFT OUTER JOIN catalog_returns ON 30 | (cs_item_sk = cr_item_sk AND 31 | cs_order_number = cr_order_number) 32 | , 33 | date_dim, catalog_page, item, promotion 34 | WHERE cs_sold_date_sk = d_date_sk 35 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 36 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 37 | AND cs_catalog_page_sk = cp_catalog_page_sk 38 | AND cs_item_sk = i_item_sk 39 | AND i_current_price > 50 40 | AND cs_promo_sk = p_promo_sk 41 | AND p_channel_tv = 'N' 42 | GROUP BY cp_catalog_page_id), 43 | wsr AS 44 | (SELECT 45 | web_site_id, 46 | sum(ws_ext_sales_price) AS sales, 47 | sum(coalesce(wr_return_amt, 0)) AS returns, 48 | sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit 49 | FROM web_sales 50 | LEFT OUTER JOIN web_returns ON 51 | (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number) 52 | , 53 | date_dim, web_site, item, promotion 54 | WHERE ws_sold_date_sk = d_date_sk 55 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 56 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days) 57 | AND ws_web_site_sk = web_site_sk 58 | AND ws_item_sk = i_item_sk 59 | AND i_current_price > 50 60 | AND ws_promo_sk = p_promo_sk 61 | AND p_channel_tv = 'N' 62 | GROUP BY web_site_id) 63 | SELECT 64 | channel, 65 | id, 66 | sum(sales) AS sales, 67 | sum(returns) AS returns, 68 | sum(profit) AS profit 69 | FROM (SELECT 70 | 'store channel' AS channel, 71 | concat('store', store_id) AS id, 72 | sales, 73 | returns, 74 | profit 75 | FROM ssr 76 | UNION ALL 77 | SELECT 78 | 'catalog channel' AS channel, 79 | concat('catalog_page', catalog_page_id) AS id, 80 | sales, 81 | returns, 82 | profit 83 | FROM csr 84 | UNION ALL 85 | SELECT 86 | 'web channel' AS channel, 87 | concat('web_site', web_site_id) AS id, 88 | sales, 89 | returns, 90 | profit 91 | FROM wsr) x 92 | GROUP BY ROLLUP (channel, id) 93 | ORDER BY channel, id 94 | LIMIT 100 95 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q77.sql: -------------------------------------------------------------------------------- 1 | WITH ss AS 2 | (SELECT 3 | s_store_sk, 4 | sum(ss_ext_sales_price) AS sales, 5 | sum(ss_net_profit) AS profit 6 | FROM store_sales, date_dim, store 7 | WHERE ss_sold_date_sk = d_date_sk 8 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 9 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 10 | AND ss_store_sk = s_store_sk 11 | GROUP BY s_store_sk), 12 | sr AS 13 | (SELECT 14 | s_store_sk, 15 | sum(sr_return_amt) AS returns, 16 | sum(sr_net_loss) AS profit_loss 17 | FROM store_returns, date_dim, store 18 | WHERE sr_returned_date_sk = d_date_sk 19 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 20 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 21 | AND sr_store_sk = s_store_sk 22 | GROUP BY s_store_sk), 23 | cs AS 24 | (SELECT 25 | cs_call_center_sk, 26 | sum(cs_ext_sales_price) AS sales, 27 | sum(cs_net_profit) AS profit 28 | FROM catalog_sales, date_dim 29 | WHERE cs_sold_date_sk = d_date_sk 30 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 31 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 32 | GROUP BY cs_call_center_sk), 33 | cr AS 34 | (SELECT 35 | sum(cr_return_amount) AS returns, 36 | sum(cr_net_loss) AS profit_loss 37 | FROM catalog_returns, date_dim 38 | WHERE cr_returned_date_sk = d_date_sk 39 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 40 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)), 41 | ws AS 42 | (SELECT 43 | wp_web_page_sk, 44 | sum(ws_ext_sales_price) AS sales, 45 | sum(ws_net_profit) AS profit 46 | FROM web_sales, date_dim, web_page 47 | WHERE ws_sold_date_sk = d_date_sk 48 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 49 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 50 | AND ws_web_page_sk = wp_web_page_sk 51 | GROUP BY wp_web_page_sk), 52 | wr AS 53 | (SELECT 54 | wp_web_page_sk, 55 | sum(wr_return_amt) AS returns, 56 | sum(wr_net_loss) AS profit_loss 57 | FROM web_returns, date_dim, web_page 58 | WHERE wr_returned_date_sk = d_date_sk 59 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND 60 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days) 61 | AND wr_web_page_sk = wp_web_page_sk 62 | GROUP BY wp_web_page_sk) 63 | SELECT 64 | channel, 65 | id, 66 | sum(sales) AS sales, 67 | sum(returns) AS returns, 68 | sum(profit) AS profit 69 | FROM 70 | (SELECT 71 | 'store channel' AS channel, 72 | ss.s_store_sk AS id, 73 | sales, 74 | coalesce(returns, 0) AS returns, 75 | (profit - coalesce(profit_loss, 0)) AS profit 76 | FROM ss 77 | LEFT JOIN sr 78 | ON ss.s_store_sk = sr.s_store_sk 79 | UNION ALL 80 | SELECT 81 | 'catalog channel' AS channel, 82 | cs_call_center_sk AS id, 83 | sales, 84 | returns, 85 | (profit - profit_loss) AS profit 86 | FROM cs, cr 87 | UNION ALL 88 | SELECT 89 | 'web channel' AS channel, 90 | ws.wp_web_page_sk AS id, 91 | sales, 92 | coalesce(returns, 0) returns, 93 | (profit - coalesce(profit_loss, 0)) AS profit 94 | FROM ws 95 | LEFT JOIN wr 96 | ON ws.wp_web_page_sk = wr.wp_web_page_sk 97 | ) x 98 | GROUP BY ROLLUP (channel, id) 99 | ORDER BY channel, id 100 | LIMIT 100 101 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q64.sql: -------------------------------------------------------------------------------- 1 | WITH cs_ui AS 2 | (SELECT 3 | cs_item_sk, 4 | sum(cs_ext_list_price) AS sale, 5 | sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund 6 | FROM catalog_sales 7 | , catalog_returns 8 | WHERE cs_item_sk = cr_item_sk 9 | AND cs_order_number = cr_order_number 10 | GROUP BY cs_item_sk 11 | HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)), 12 | cross_sales AS 13 | (SELECT 14 | i_product_name product_name, 15 | i_item_sk item_sk, 16 | s_store_name store_name, 17 | s_zip store_zip, 18 | ad1.ca_street_number b_street_number, 19 | ad1.ca_street_name b_streen_name, 20 | ad1.ca_city b_city, 21 | ad1.ca_zip b_zip, 22 | ad2.ca_street_number c_street_number, 23 | ad2.ca_street_name c_street_name, 24 | ad2.ca_city c_city, 25 | ad2.ca_zip c_zip, 26 | d1.d_year AS syear, 27 | d2.d_year AS fsyear, 28 | d3.d_year s2year, 29 | count(*) cnt, 30 | sum(ss_wholesale_cost) s1, 31 | sum(ss_list_price) s2, 32 | sum(ss_coupon_amt) s3 33 | FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3, 34 | store, customer, customer_demographics cd1, customer_demographics cd2, 35 | promotion, household_demographics hd1, household_demographics hd2, 36 | customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item 37 | WHERE ss_store_sk = s_store_sk AND 38 | ss_sold_date_sk = d1.d_date_sk AND 39 | ss_customer_sk = c_customer_sk AND 40 | ss_cdemo_sk = cd1.cd_demo_sk AND 41 | ss_hdemo_sk = hd1.hd_demo_sk AND 42 | ss_addr_sk = ad1.ca_address_sk AND 43 | ss_item_sk = i_item_sk AND 44 | ss_item_sk = sr_item_sk AND 45 | ss_ticket_number = sr_ticket_number AND 46 | ss_item_sk = cs_ui.cs_item_sk AND 47 | c_current_cdemo_sk = cd2.cd_demo_sk AND 48 | c_current_hdemo_sk = hd2.hd_demo_sk AND 49 | c_current_addr_sk = ad2.ca_address_sk AND 50 | c_first_sales_date_sk = d2.d_date_sk AND 51 | c_first_shipto_date_sk = d3.d_date_sk AND 52 | ss_promo_sk = p_promo_sk AND 53 | hd1.hd_income_band_sk = ib1.ib_income_band_sk AND 54 | hd2.hd_income_band_sk = ib2.ib_income_band_sk AND 55 | cd1.cd_marital_status <> cd2.cd_marital_status AND 56 | i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND 57 | i_current_price BETWEEN 64 AND 64 + 10 AND 58 | i_current_price BETWEEN 64 + 1 AND 64 + 15 59 | GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number, 60 | ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number, 61 | ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year 62 | ) 63 | SELECT 64 | cs1.product_name, 65 | cs1.store_name, 66 | cs1.store_zip, 67 | cs1.b_street_number, 68 | cs1.b_streen_name, 69 | cs1.b_city, 70 | cs1.b_zip, 71 | cs1.c_street_number, 72 | cs1.c_street_name, 73 | cs1.c_city, 74 | cs1.c_zip, 75 | cs1.syear, 76 | cs1.cnt, 77 | cs1.s1, 78 | cs1.s2, 79 | cs1.s3, 80 | cs2.s1, 81 | cs2.s2, 82 | cs2.s3, 83 | cs2.syear, 84 | cs2.cnt 85 | FROM cross_sales cs1, cross_sales cs2 86 | WHERE cs1.item_sk = cs2.item_sk AND 87 | cs1.syear = 1999 AND 88 | cs2.syear = 1999 + 1 AND 89 | cs2.cnt <= cs1.cnt AND 90 | cs1.store_name = cs2.store_name AND 91 | cs1.store_zip = cs2.store_zip 92 | ORDER BY cs1.product_name, cs1.store_name, cs2.cnt 93 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkAccessRequest.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import java.util.Date 21 | 22 | import org.apache.ranger.authorization.spark.authorizer.SparkAccessType.SparkAccessType 23 | import org.apache.ranger.plugin.policyengine.{RangerAccessRequestImpl, RangerPolicyEngine} 24 | import org.apache.ranger.plugin.util.RangerAccessRequestUtil 25 | 26 | import scala.collection.JavaConverters._ 27 | 28 | class RangerSparkAccessRequest private extends RangerAccessRequestImpl { 29 | 30 | private var accessType = SparkAccessType.NONE 31 | 32 | def this( 33 | resource: RangerSparkResource, 34 | user: String, 35 | groups: Set[String], 36 | opType: String, 37 | accessType: SparkAccessType, 38 | clusterName: String) { 39 | this() 40 | this.setResource(resource) 41 | this.setUser(user) 42 | this.setUserGroups(groups.asJava) 43 | this.setAccessTime(new Date) 44 | this.setAction(opType) 45 | this.setSparkAccessType(accessType) 46 | this.setUser(user) 47 | this.setClusterName(clusterName) 48 | } 49 | 50 | def this(resource: RangerSparkResource, user: String, groups: Set[String], 51 | clusterName: String) = { 52 | this(resource, user, groups, "METADATA OPERATION", SparkAccessType.USE, clusterName) 53 | } 54 | 55 | def getSparkAccessType: SparkAccessType = accessType 56 | 57 | def setSparkAccessType(accessType: SparkAccessType): Unit = { 58 | this.accessType = accessType 59 | accessType match { 60 | case SparkAccessType.USE => this.setAccessType(RangerPolicyEngine.ANY_ACCESS) 61 | case SparkAccessType.ADMIN => this.setAccessType(RangerPolicyEngine.ADMIN_ACCESS) 62 | case _ => this.setAccessType(accessType.toString.toLowerCase) 63 | } 64 | } 65 | 66 | def copy(): RangerSparkAccessRequest = { 67 | val ret = new RangerSparkAccessRequest() 68 | ret.setResource(getResource) 69 | ret.setAccessType(getAccessType) 70 | ret.setUser(getUser) 71 | ret.setUserGroups(getUserGroups) 72 | ret.setAccessTime(getAccessTime) 73 | ret.setAction(getAction) 74 | ret.setClientIPAddress(getClientIPAddress) 75 | ret.setRemoteIPAddress(getRemoteIPAddress) 76 | ret.setForwardedAddresses(getForwardedAddresses) 77 | ret.setRequestData(getRequestData) 78 | ret.setClientType(getClientType) 79 | ret.setSessionId(getSessionId) 80 | ret.setContext(RangerAccessRequestUtil.copyContext(getContext)) 81 | ret.accessType = accessType 82 | ret.setClusterName(getClusterName) 83 | ret 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q14b.sql: -------------------------------------------------------------------------------- 1 | WITH cross_items AS 2 | (SELECT i_item_sk ss_item_sk 3 | FROM item, 4 | (SELECT 5 | iss.i_brand_id brand_id, 6 | iss.i_class_id class_id, 7 | iss.i_category_id category_id 8 | FROM store_sales, item iss, date_dim d1 9 | WHERE ss_item_sk = iss.i_item_sk 10 | AND ss_sold_date_sk = d1.d_date_sk 11 | AND d1.d_year BETWEEN 1999 AND 1999 + 2 12 | INTERSECT 13 | SELECT 14 | ics.i_brand_id, 15 | ics.i_class_id, 16 | ics.i_category_id 17 | FROM catalog_sales, item ics, date_dim d2 18 | WHERE cs_item_sk = ics.i_item_sk 19 | AND cs_sold_date_sk = d2.d_date_sk 20 | AND d2.d_year BETWEEN 1999 AND 1999 + 2 21 | INTERSECT 22 | SELECT 23 | iws.i_brand_id, 24 | iws.i_class_id, 25 | iws.i_category_id 26 | FROM web_sales, item iws, date_dim d3 27 | WHERE ws_item_sk = iws.i_item_sk 28 | AND ws_sold_date_sk = d3.d_date_sk 29 | AND d3.d_year BETWEEN 1999 AND 1999 + 2) x 30 | WHERE i_brand_id = brand_id 31 | AND i_class_id = class_id 32 | AND i_category_id = category_id 33 | ), 34 | avg_sales AS 35 | (SELECT avg(quantity * list_price) average_sales 36 | FROM (SELECT 37 | ss_quantity quantity, 38 | ss_list_price list_price 39 | FROM store_sales, date_dim 40 | WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 41 | UNION ALL 42 | SELECT 43 | cs_quantity quantity, 44 | cs_list_price list_price 45 | FROM catalog_sales, date_dim 46 | WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2 47 | UNION ALL 48 | SELECT 49 | ws_quantity quantity, 50 | ws_list_price list_price 51 | FROM web_sales, date_dim 52 | WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x) 53 | SELECT * 54 | FROM 55 | (SELECT 56 | 'store' channel, 57 | i_brand_id, 58 | i_class_id, 59 | i_category_id, 60 | sum(ss_quantity * ss_list_price) sales, 61 | count(*) number_sales 62 | FROM store_sales, item, date_dim 63 | WHERE ss_item_sk IN (SELECT ss_item_sk 64 | FROM cross_items) 65 | AND ss_item_sk = i_item_sk 66 | AND ss_sold_date_sk = d_date_sk 67 | AND d_week_seq = (SELECT d_week_seq 68 | FROM date_dim 69 | WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11) 70 | GROUP BY i_brand_id, i_class_id, i_category_id 71 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales 72 | FROM avg_sales)) this_year, 73 | (SELECT 74 | 'store' channel, 75 | i_brand_id, 76 | i_class_id, 77 | i_category_id, 78 | sum(ss_quantity * ss_list_price) sales, 79 | count(*) number_sales 80 | FROM store_sales, item, date_dim 81 | WHERE ss_item_sk IN (SELECT ss_item_sk 82 | FROM cross_items) 83 | AND ss_item_sk = i_item_sk 84 | AND ss_sold_date_sk = d_date_sk 85 | AND d_week_seq = (SELECT d_week_seq 86 | FROM date_dim 87 | WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11) 88 | GROUP BY i_brand_id, i_class_id, i_category_id 89 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales 90 | FROM avg_sales)) last_year 91 | WHERE this_year.i_brand_id = last_year.i_brand_id 92 | AND this_year.i_class_id = last_year.i_class_id 93 | AND this_year.i_category_id = last_year.i_category_id 94 | ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id 95 | LIMIT 100 96 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkResource.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import org.apache.ranger.authorization.spark.authorizer.SparkObjectType.SparkObjectType 21 | import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl 22 | 23 | class RangerSparkResource( 24 | objectType: SparkObjectType, 25 | databaseOrUrl: Option[String], 26 | tableOrUdf: String, 27 | column: String) extends RangerAccessResourceImpl { 28 | import SparkObjectType._ 29 | import RangerSparkResource._ 30 | 31 | def this(objectType: SparkObjectType, databaseOrUrl: Option[String], tableOrUdf: String) = { 32 | this(objectType, databaseOrUrl, tableOrUdf, null) 33 | } 34 | 35 | def this(objectType: SparkObjectType, databaseOrUrl: Option[String]) = { 36 | this(objectType, databaseOrUrl, null) 37 | } 38 | 39 | objectType match { 40 | case DATABASE => setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*")) 41 | case FUNCTION => 42 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse("")) 43 | setValue(KEY_UDF, tableOrUdf) 44 | case COLUMN => 45 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*")) 46 | setValue(KEY_TABLE, tableOrUdf) 47 | setValue(KEY_COLUMN, column) 48 | case TABLE | VIEW => 49 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*")) 50 | setValue(KEY_TABLE, tableOrUdf) 51 | case URI => setValue(KEY_URL, databaseOrUrl.getOrElse("*")) 52 | case _ => 53 | } 54 | 55 | def getObjectType: SparkObjectType = objectType 56 | 57 | def getDatabase: String = getValue(KEY_DATABASE).asInstanceOf[String] 58 | 59 | def getTable: String = getValue(KEY_TABLE).asInstanceOf[String] 60 | 61 | def getUdf: String = getValue(KEY_UDF).asInstanceOf[String] 62 | 63 | def getColumn: String = getValue(KEY_COLUMN).asInstanceOf[String] 64 | 65 | def getUrl: String = getValue(KEY_URL).asInstanceOf[String] 66 | 67 | } 68 | 69 | object RangerSparkResource { 70 | 71 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String], tableOrUdf: String, 72 | column: String): RangerSparkResource = { 73 | new RangerSparkResource(objectType, databaseOrUrl, tableOrUdf, column) 74 | } 75 | 76 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String], 77 | tableOrUdf: String): RangerSparkResource = { 78 | new RangerSparkResource(objectType, databaseOrUrl, tableOrUdf) 79 | } 80 | 81 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String]): RangerSparkResource = { 82 | new RangerSparkResource(objectType, databaseOrUrl) 83 | } 84 | 85 | private val KEY_DATABASE = "database" 86 | private val KEY_TABLE = "table" 87 | private val KEY_UDF = "udf" 88 | private val KEY_COLUMN = "column" 89 | private val KEY_URL = "url" 90 | } 91 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q14a.sql: -------------------------------------------------------------------------------- 1 | WITH cross_items AS 2 | (SELECT i_item_sk ss_item_sk 3 | FROM item, 4 | (SELECT 5 | iss.i_brand_id brand_id, 6 | iss.i_class_id class_id, 7 | iss.i_category_id category_id 8 | FROM store_sales, item iss, date_dim d1 9 | WHERE ss_item_sk = iss.i_item_sk 10 | AND ss_sold_date_sk = d1.d_date_sk 11 | AND d1.d_year BETWEEN 1999 AND 1999 + 2 12 | INTERSECT 13 | SELECT 14 | ics.i_brand_id, 15 | ics.i_class_id, 16 | ics.i_category_id 17 | FROM catalog_sales, item ics, date_dim d2 18 | WHERE cs_item_sk = ics.i_item_sk 19 | AND cs_sold_date_sk = d2.d_date_sk 20 | AND d2.d_year BETWEEN 1999 AND 1999 + 2 21 | INTERSECT 22 | SELECT 23 | iws.i_brand_id, 24 | iws.i_class_id, 25 | iws.i_category_id 26 | FROM web_sales, item iws, date_dim d3 27 | WHERE ws_item_sk = iws.i_item_sk 28 | AND ws_sold_date_sk = d3.d_date_sk 29 | AND d3.d_year BETWEEN 1999 AND 1999 + 2) x 30 | WHERE i_brand_id = brand_id 31 | AND i_class_id = class_id 32 | AND i_category_id = category_id 33 | ), 34 | avg_sales AS 35 | (SELECT avg(quantity * list_price) average_sales 36 | FROM ( 37 | SELECT 38 | ss_quantity quantity, 39 | ss_list_price list_price 40 | FROM store_sales, date_dim 41 | WHERE ss_sold_date_sk = d_date_sk 42 | AND d_year BETWEEN 1999 AND 2001 43 | UNION ALL 44 | SELECT 45 | cs_quantity quantity, 46 | cs_list_price list_price 47 | FROM catalog_sales, date_dim 48 | WHERE cs_sold_date_sk = d_date_sk 49 | AND d_year BETWEEN 1999 AND 1999 + 2 50 | UNION ALL 51 | SELECT 52 | ws_quantity quantity, 53 | ws_list_price list_price 54 | FROM web_sales, date_dim 55 | WHERE ws_sold_date_sk = d_date_sk 56 | AND d_year BETWEEN 1999 AND 1999 + 2) x) 57 | SELECT 58 | channel, 59 | i_brand_id, 60 | i_class_id, 61 | i_category_id, 62 | sum(sales), 63 | sum(number_sales) 64 | FROM ( 65 | SELECT 66 | 'store' channel, 67 | i_brand_id, 68 | i_class_id, 69 | i_category_id, 70 | sum(ss_quantity * ss_list_price) sales, 71 | count(*) number_sales 72 | FROM store_sales, item, date_dim 73 | WHERE ss_item_sk IN (SELECT ss_item_sk 74 | FROM cross_items) 75 | AND ss_item_sk = i_item_sk 76 | AND ss_sold_date_sk = d_date_sk 77 | AND d_year = 1999 + 2 78 | AND d_moy = 11 79 | GROUP BY i_brand_id, i_class_id, i_category_id 80 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales 81 | FROM avg_sales) 82 | UNION ALL 83 | SELECT 84 | 'catalog' channel, 85 | i_brand_id, 86 | i_class_id, 87 | i_category_id, 88 | sum(cs_quantity * cs_list_price) sales, 89 | count(*) number_sales 90 | FROM catalog_sales, item, date_dim 91 | WHERE cs_item_sk IN (SELECT ss_item_sk 92 | FROM cross_items) 93 | AND cs_item_sk = i_item_sk 94 | AND cs_sold_date_sk = d_date_sk 95 | AND d_year = 1999 + 2 96 | AND d_moy = 11 97 | GROUP BY i_brand_id, i_class_id, i_category_id 98 | HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales) 99 | UNION ALL 100 | SELECT 101 | 'web' channel, 102 | i_brand_id, 103 | i_class_id, 104 | i_category_id, 105 | sum(ws_quantity * ws_list_price) sales, 106 | count(*) number_sales 107 | FROM web_sales, item, date_dim 108 | WHERE ws_item_sk IN (SELECT ss_item_sk 109 | FROM cross_items) 110 | AND ws_item_sk = i_item_sk 111 | AND ws_sold_date_sk = d_date_sk 112 | AND d_year = 1999 + 2 113 | AND d_moy = 11 114 | GROUP BY i_brand_id, i_class_id, i_category_id 115 | HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales 116 | FROM avg_sales) 117 | ) y 118 | GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id) 119 | ORDER BY channel, i_brand_id, i_class_id, i_category_id 120 | LIMIT 100 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Notice: 2 | 3 | This library has been contribute to https://github.com/apache/submarine as a sub-module, 4 | and that module can still be used individually. 5 | 6 | The project here will no longer be updated. 7 | 8 | If you have any questions please go to 9 | 10 | https://github.com/apache/submarine/tree/master/docs/submarine-security/spark/README.md 11 | 12 | to learn how to use and give feedback to the apache submarine community by following 13 | https://submarine.apache.org/community/contributors.html 14 | 15 | 16 | # Spark SQL Ranger Security Plugin [![License](https://img.shields.io/badge/license-Apache%202-4EB1BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html) [![](https://tokei.rs/b1/github/yaooqinn/spark-ranger)](https://github.com/yaooqinn/spark-ranger) [![codecov](https://codecov.io/gh/yaooqinn/spark-ranger/branch/master/graph/badge.svg)](https://codecov.io/gh/yaooqinn/spark-ranger) [![Build Status](https://travis-ci.com/yaooqinn/spark-ranger.svg?branch=master)](https://travis-ci.com/yaooqinn/spark-ranger) [![HitCount](http://hits.dwyl.io/yaooqinn/spark-ranger.svg)](http://hits.dwyl.io/yaooqinn/spark-ranger) 17 | 18 | ACL Management for Apache Spark SQL with Apache Ranger, enabling: 19 | 20 | - Table/Column level authorization 21 | - Row level filtering 22 | - Data masking 23 | 24 | ## Build 25 | Spark SQL Ranger Security Plugin is built based on [Apache Maven](http://maven.apache.org), 26 | 27 | ```bash 28 | mvn clean package -Pspark-2.3 -Pranger-1.0 -DskipTests 29 | ``` 30 | 31 | Currently, available profiles are: 32 | 33 | Spark: -Pspark-2.3, -Pspark-2.4 34 | 35 | Ranger: -Pranger-1.0, -Pranger-1.1, -Pranger-1.2 -Pranger-2.0 36 | 37 | ## Usage 38 | 39 | ### Installation 40 | 41 | Place the spark-ranger-<version>.jar into $SPARK_HOME/jars. 42 | 43 | ### Installation Addons 44 | 45 | You can find some tips and known problems about this library [here](docs/installation-addons.md). 46 | 47 | ### Configurations 48 | 49 | #### Ranger admin client configurations 50 | 51 | Create ranger-spark-security.xml in $SPARK_HOME/conf and add the following configurations for pointing to the right ranger admin server 52 | 53 | ```xml 54 | 55 | 56 | 57 | 58 | ranger.plugin.spark.policy.rest.url 59 | ranger admin address like http://ranger-admin.org:6080 60 | 61 | 62 | 63 | ranger.plugin.spark.service.name 64 | a ranger hive service name 65 | 66 | 67 | 68 | ranger.plugin.spark.policy.cache.dir 69 | ./a ranger hive service name/policycache 70 | 71 | 72 | 73 | ranger.plugin.spark.policy.pollIntervalMs 74 | 5000 75 | 76 | 77 | 78 | ranger.plugin.spark.policy.source.impl 79 | org.apache.ranger.admin.client.RangerAdminRESTClient 80 | 81 | 82 | 83 | ``` 84 | 85 | Create ranger-spark-audit.xml in $SPARK_HOME/conf and add the following configurations to enable/disable auditing. 86 | 87 | ```xml 88 | 89 | 90 | 91 | xasecure.audit.is.enabled 92 | true 93 | 94 | 95 | 96 | xasecure.audit.destination.db 97 | false 98 | 99 | 100 | 101 | xasecure.audit.destination.db.jdbc.driver 102 | com.mysql.jdbc.Driver 103 | 104 | 105 | 106 | xasecure.audit.destination.db.jdbc.url 107 | jdbc:mysql://10.171.161.78/ranger 108 | 109 | 110 | 111 | xasecure.audit.destination.db.password 112 | rangeradmin 113 | 114 | 115 | 116 | xasecure.audit.destination.db.user 117 | rangeradmin 118 | 119 | 120 | 121 | 122 | ``` 123 | 124 | #### Enable plugin via spark extensions 125 | 126 | spark.sql.extensions=org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension 127 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q5.sql: -------------------------------------------------------------------------------- 1 | WITH ssr AS 2 | ( SELECT 3 | s_store_id, 4 | sum(sales_price) AS sales, 5 | sum(profit) AS profit, 6 | sum(return_amt) AS RETURNS, 7 | sum(net_loss) AS profit_loss 8 | FROM 9 | (SELECT 10 | ss_store_sk AS store_sk, 11 | ss_sold_date_sk AS date_sk, 12 | ss_ext_sales_price AS sales_price, 13 | ss_net_profit AS profit, 14 | cast(0 AS DECIMAL(7, 2)) AS return_amt, 15 | cast(0 AS DECIMAL(7, 2)) AS net_loss 16 | FROM store_sales 17 | UNION ALL 18 | SELECT 19 | sr_store_sk AS store_sk, 20 | sr_returned_date_sk AS date_sk, 21 | cast(0 AS DECIMAL(7, 2)) AS sales_price, 22 | cast(0 AS DECIMAL(7, 2)) AS profit, 23 | sr_return_amt AS return_amt, 24 | sr_net_loss AS net_loss 25 | FROM store_returns) 26 | salesreturns, date_dim, store 27 | WHERE date_sk = d_date_sk 28 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 29 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) 30 | AND store_sk = s_store_sk 31 | GROUP BY s_store_id), 32 | csr AS 33 | ( SELECT 34 | cp_catalog_page_id, 35 | sum(sales_price) AS sales, 36 | sum(profit) AS profit, 37 | sum(return_amt) AS RETURNS, 38 | sum(net_loss) AS profit_loss 39 | FROM 40 | (SELECT 41 | cs_catalog_page_sk AS page_sk, 42 | cs_sold_date_sk AS date_sk, 43 | cs_ext_sales_price AS sales_price, 44 | cs_net_profit AS profit, 45 | cast(0 AS DECIMAL(7, 2)) AS return_amt, 46 | cast(0 AS DECIMAL(7, 2)) AS net_loss 47 | FROM catalog_sales 48 | UNION ALL 49 | SELECT 50 | cr_catalog_page_sk AS page_sk, 51 | cr_returned_date_sk AS date_sk, 52 | cast(0 AS DECIMAL(7, 2)) AS sales_price, 53 | cast(0 AS DECIMAL(7, 2)) AS profit, 54 | cr_return_amount AS return_amt, 55 | cr_net_loss AS net_loss 56 | FROM catalog_returns 57 | ) salesreturns, date_dim, catalog_page 58 | WHERE date_sk = d_date_sk 59 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 60 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) 61 | AND page_sk = cp_catalog_page_sk 62 | GROUP BY cp_catalog_page_id) 63 | , 64 | wsr AS 65 | ( SELECT 66 | web_site_id, 67 | sum(sales_price) AS sales, 68 | sum(profit) AS profit, 69 | sum(return_amt) AS RETURNS, 70 | sum(net_loss) AS profit_loss 71 | FROM 72 | (SELECT 73 | ws_web_site_sk AS wsr_web_site_sk, 74 | ws_sold_date_sk AS date_sk, 75 | ws_ext_sales_price AS sales_price, 76 | ws_net_profit AS profit, 77 | cast(0 AS DECIMAL(7, 2)) AS return_amt, 78 | cast(0 AS DECIMAL(7, 2)) AS net_loss 79 | FROM web_sales 80 | UNION ALL 81 | SELECT 82 | ws_web_site_sk AS wsr_web_site_sk, 83 | wr_returned_date_sk AS date_sk, 84 | cast(0 AS DECIMAL(7, 2)) AS sales_price, 85 | cast(0 AS DECIMAL(7, 2)) AS profit, 86 | wr_return_amt AS return_amt, 87 | wr_net_loss AS net_loss 88 | FROM web_returns 89 | LEFT OUTER JOIN web_sales ON 90 | (wr_item_sk = ws_item_sk 91 | AND wr_order_number = ws_order_number) 92 | ) salesreturns, date_dim, web_site 93 | WHERE date_sk = d_date_sk 94 | AND d_date BETWEEN cast('2000-08-23' AS DATE) 95 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days)) 96 | AND wsr_web_site_sk = web_site_sk 97 | GROUP BY web_site_id) 98 | SELECT 99 | channel, 100 | id, 101 | sum(sales) AS sales, 102 | sum(returns) AS returns, 103 | sum(profit) AS profit 104 | FROM 105 | (SELECT 106 | 'store channel' AS channel, 107 | concat('store', s_store_id) AS id, 108 | sales, 109 | returns, 110 | (profit - profit_loss) AS profit 111 | FROM ssr 112 | UNION ALL 113 | SELECT 114 | 'catalog channel' AS channel, 115 | concat('catalog_page', cp_catalog_page_id) AS id, 116 | sales, 117 | returns, 118 | (profit - profit_loss) AS profit 119 | FROM csr 120 | UNION ALL 121 | SELECT 122 | 'web channel' AS channel, 123 | concat('web_site', web_site_id) AS id, 124 | sales, 125 | returns, 126 | (profit - profit_loss) AS profit 127 | FROM wsr 128 | ) x 129 | GROUP BY ROLLUP (channel, id) 130 | ORDER BY channel, id 131 | LIMIT 100 132 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q49.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | 'web' AS channel, 3 | web.item, 4 | web.return_ratio, 5 | web.return_rank, 6 | web.currency_rank 7 | FROM ( 8 | SELECT 9 | item, 10 | return_ratio, 11 | currency_ratio, 12 | rank() 13 | OVER ( 14 | ORDER BY return_ratio) AS return_rank, 15 | rank() 16 | OVER ( 17 | ORDER BY currency_ratio) AS currency_rank 18 | FROM 19 | (SELECT 20 | ws.ws_item_sk AS item, 21 | (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) / 22 | cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, 23 | (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) / 24 | cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio 25 | FROM 26 | web_sales ws LEFT OUTER JOIN web_returns wr 27 | ON (ws.ws_order_number = wr.wr_order_number AND 28 | ws.ws_item_sk = wr.wr_item_sk) 29 | , date_dim 30 | WHERE 31 | wr.wr_return_amt > 10000 32 | AND ws.ws_net_profit > 1 33 | AND ws.ws_net_paid > 0 34 | AND ws.ws_quantity > 0 35 | AND ws_sold_date_sk = d_date_sk 36 | AND d_year = 2001 37 | AND d_moy = 12 38 | GROUP BY ws.ws_item_sk 39 | ) in_web 40 | ) web 41 | WHERE (web.return_rank <= 10 OR web.currency_rank <= 10) 42 | UNION 43 | SELECT 44 | 'catalog' AS channel, 45 | catalog.item, 46 | catalog.return_ratio, 47 | catalog.return_rank, 48 | catalog.currency_rank 49 | FROM ( 50 | SELECT 51 | item, 52 | return_ratio, 53 | currency_ratio, 54 | rank() 55 | OVER ( 56 | ORDER BY return_ratio) AS return_rank, 57 | rank() 58 | OVER ( 59 | ORDER BY currency_ratio) AS currency_rank 60 | FROM 61 | (SELECT 62 | cs.cs_item_sk AS item, 63 | (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) / 64 | cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, 65 | (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) / 66 | cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio 67 | FROM 68 | catalog_sales cs LEFT OUTER JOIN catalog_returns cr 69 | ON (cs.cs_order_number = cr.cr_order_number AND 70 | cs.cs_item_sk = cr.cr_item_sk) 71 | , date_dim 72 | WHERE 73 | cr.cr_return_amount > 10000 74 | AND cs.cs_net_profit > 1 75 | AND cs.cs_net_paid > 0 76 | AND cs.cs_quantity > 0 77 | AND cs_sold_date_sk = d_date_sk 78 | AND d_year = 2001 79 | AND d_moy = 12 80 | GROUP BY cs.cs_item_sk 81 | ) in_cat 82 | ) catalog 83 | WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10) 84 | UNION 85 | SELECT 86 | 'store' AS channel, 87 | store.item, 88 | store.return_ratio, 89 | store.return_rank, 90 | store.currency_rank 91 | FROM ( 92 | SELECT 93 | item, 94 | return_ratio, 95 | currency_ratio, 96 | rank() 97 | OVER ( 98 | ORDER BY return_ratio) AS return_rank, 99 | rank() 100 | OVER ( 101 | ORDER BY currency_ratio) AS currency_rank 102 | FROM 103 | (SELECT 104 | sts.ss_item_sk AS item, 105 | (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) / 106 | cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio, 107 | (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) / 108 | cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio 109 | FROM 110 | store_sales sts LEFT OUTER JOIN store_returns sr 111 | ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk) 112 | , date_dim 113 | WHERE 114 | sr.sr_return_amt > 10000 115 | AND sts.ss_net_profit > 1 116 | AND sts.ss_net_paid > 0 117 | AND sts.ss_quantity > 0 118 | AND ss_sold_date_sk = d_date_sk 119 | AND d_year = 2001 120 | AND d_moy = 12 121 | GROUP BY sts.ss_item_sk 122 | ) in_store 123 | ) store 124 | WHERE (store.return_rank <= 10 OR store.currency_rank <= 10) 125 | ORDER BY 1, 4, 5 126 | LIMIT 100 127 | -------------------------------------------------------------------------------- /src/test/resources/tpcds/q4.sql: -------------------------------------------------------------------------------- 1 | WITH year_total AS ( 2 | SELECT 3 | c_customer_id customer_id, 4 | c_first_name customer_first_name, 5 | c_last_name customer_last_name, 6 | c_preferred_cust_flag customer_preferred_cust_flag, 7 | c_birth_country customer_birth_country, 8 | c_login customer_login, 9 | c_email_address customer_email_address, 10 | d_year dyear, 11 | sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) + 12 | ss_ext_sales_price) / 2) year_total, 13 | 's' sale_type 14 | FROM customer, store_sales, date_dim 15 | WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk 16 | GROUP BY c_customer_id, 17 | c_first_name, 18 | c_last_name, 19 | c_preferred_cust_flag, 20 | c_birth_country, 21 | c_login, 22 | c_email_address, 23 | d_year 24 | UNION ALL 25 | SELECT 26 | c_customer_id customer_id, 27 | c_first_name customer_first_name, 28 | c_last_name customer_last_name, 29 | c_preferred_cust_flag customer_preferred_cust_flag, 30 | c_birth_country customer_birth_country, 31 | c_login customer_login, 32 | c_email_address customer_email_address, 33 | d_year dyear, 34 | sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) + 35 | cs_ext_sales_price) / 2)) year_total, 36 | 'c' sale_type 37 | FROM customer, catalog_sales, date_dim 38 | WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk 39 | GROUP BY c_customer_id, 40 | c_first_name, 41 | c_last_name, 42 | c_preferred_cust_flag, 43 | c_birth_country, 44 | c_login, 45 | c_email_address, 46 | d_year 47 | UNION ALL 48 | SELECT 49 | c_customer_id customer_id, 50 | c_first_name customer_first_name, 51 | c_last_name customer_last_name, 52 | c_preferred_cust_flag customer_preferred_cust_flag, 53 | c_birth_country customer_birth_country, 54 | c_login customer_login, 55 | c_email_address customer_email_address, 56 | d_year dyear, 57 | sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) / 58 | 2)) year_total, 59 | 'w' sale_type 60 | FROM customer, web_sales, date_dim 61 | WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk 62 | GROUP BY c_customer_id, 63 | c_first_name, 64 | c_last_name, 65 | c_preferred_cust_flag, 66 | c_birth_country, 67 | c_login, 68 | c_email_address, 69 | d_year) 70 | SELECT 71 | t_s_secyear.customer_id, 72 | t_s_secyear.customer_first_name, 73 | t_s_secyear.customer_last_name, 74 | t_s_secyear.customer_preferred_cust_flag, 75 | t_s_secyear.customer_birth_country, 76 | t_s_secyear.customer_login, 77 | t_s_secyear.customer_email_address 78 | FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear, 79 | year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear 80 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id 81 | AND t_s_firstyear.customer_id = t_c_secyear.customer_id 82 | AND t_s_firstyear.customer_id = t_c_firstyear.customer_id 83 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id 84 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id 85 | AND t_s_firstyear.sale_type = 's' 86 | AND t_c_firstyear.sale_type = 'c' 87 | AND t_w_firstyear.sale_type = 'w' 88 | AND t_s_secyear.sale_type = 's' 89 | AND t_c_secyear.sale_type = 'c' 90 | AND t_w_secyear.sale_type = 'w' 91 | AND t_s_firstyear.dyear = 2001 92 | AND t_s_secyear.dyear = 2001 + 1 93 | AND t_c_firstyear.dyear = 2001 94 | AND t_c_secyear.dyear = 2001 + 1 95 | AND t_w_firstyear.dyear = 2001 96 | AND t_w_secyear.dyear = 2001 + 1 97 | AND t_s_firstyear.year_total > 0 98 | AND t_c_firstyear.year_total > 0 99 | AND t_w_firstyear.year_total > 0 100 | AND CASE WHEN t_c_firstyear.year_total > 0 101 | THEN t_c_secyear.year_total / t_c_firstyear.year_total 102 | ELSE NULL END 103 | > CASE WHEN t_s_firstyear.year_total > 0 104 | THEN t_s_secyear.year_total / t_s_firstyear.year_total 105 | ELSE NULL END 106 | AND CASE WHEN t_c_firstyear.year_total > 0 107 | THEN t_c_secyear.year_total / t_c_firstyear.year_total 108 | ELSE NULL END 109 | > CASE WHEN t_w_firstyear.year_total > 0 110 | THEN t_w_secyear.year_total / t_w_firstyear.year_total 111 | ELSE NULL END 112 | ORDER BY 113 | t_s_secyear.customer_id, 114 | t_s_secyear.customer_first_name, 115 | t_s_secyear.customer_last_name, 116 | t_s_secyear.customer_preferred_cust_flag, 117 | t_s_secyear.customer_birth_country, 118 | t_s_secyear.customer_login, 119 | t_s_secyear.customer_email_address 120 | LIMIT 100 121 | -------------------------------------------------------------------------------- /src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivilegeObject.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.ranger.authorization.spark.authorizer 19 | 20 | import org.apache.ranger.authorization.spark.authorizer.SparkPrivilegeObjectType.SparkPrivilegeObjectType 21 | 22 | import scala.collection.JavaConverters._ 23 | import org.apache.ranger.authorization.spark.authorizer.SparkPrivObjectActionType.SparkPrivObjectActionType 24 | 25 | class SparkPrivilegeObject( 26 | private val typ: SparkPrivilegeObjectType, 27 | private val dbname: String, 28 | private val objectName: String, 29 | private val partKeys: Seq[String], 30 | private val columns: Seq[String], 31 | private val actionType: SparkPrivObjectActionType) 32 | extends Ordered[SparkPrivilegeObject] { 33 | 34 | override def compare(that: SparkPrivilegeObject): Int = { 35 | typ compareTo that.typ match { 36 | case 0 => 37 | compare(dbname, that.dbname) match { 38 | case 0 => 39 | compare(objectName, that.objectName) match { 40 | case 0 => 41 | compare(partKeys, that.partKeys) match { 42 | case 0 => compare(columns, that.columns) 43 | case o => o 44 | } 45 | case o => o 46 | } 47 | case o => o 48 | } 49 | case o => o 50 | } 51 | } 52 | 53 | private def compare(o1: String, o2: String): Int = { 54 | if (o1 != null) { 55 | if (o2 != null) o1.compareTo(o2) else 1 56 | } else { 57 | if (o2 != null) -1 else 0 58 | } 59 | } 60 | 61 | private def compare(o1: Seq[String], o2: Seq[String]): Int = { 62 | if (o1 != null) { 63 | if (o2 != null) { 64 | for ((x, y) <- o1.zip(o2)) { 65 | val ret = compare(x, y) 66 | if (ret != 0) { 67 | return ret 68 | } 69 | } 70 | if (o1.size > o2.size) { 71 | 1 72 | } else if (o1.size < o2.size) { 73 | -1 74 | } else { 75 | 0 76 | } 77 | } else { 78 | 1 79 | } 80 | } else { 81 | if (o2 != null) { 82 | -1 83 | } else { 84 | 0 85 | } 86 | } 87 | } 88 | 89 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String, 90 | partKeys: Seq[String], columns: Seq[String]) = 91 | this(typ, dbname, objectName, partKeys, columns, SparkPrivObjectActionType.OTHER) 92 | 93 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String, 94 | actionType: SparkPrivObjectActionType) = 95 | this(typ, dbname, objectName, Nil, Nil, actionType) 96 | 97 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String) = 98 | this(typ, dbname, objectName, SparkPrivObjectActionType.OTHER) 99 | 100 | def getType: SparkPrivilegeObjectType = typ 101 | 102 | def getDbname: String = dbname 103 | 104 | def getObjectName: String = objectName 105 | 106 | def getActionType: SparkPrivObjectActionType = actionType 107 | 108 | def getPartKeys: Seq[String] = partKeys 109 | 110 | def getColumns: Seq[String] = columns 111 | 112 | override def toString: String = { 113 | val name = typ match { 114 | case SparkPrivilegeObjectType.DATABASE => dbname 115 | case SparkPrivilegeObjectType.TABLE_OR_VIEW => 116 | getDbObjectName + (if (partKeys != null) partKeys.asJava.toString else "") 117 | case SparkPrivilegeObjectType.FUNCTION => getDbObjectName 118 | case _ => "" 119 | } 120 | 121 | val at = if (actionType != null) { 122 | actionType match { 123 | case SparkPrivObjectActionType.INSERT | 124 | SparkPrivObjectActionType.INSERT_OVERWRITE => ", action=" + actionType 125 | case _ => "" 126 | } 127 | } else { 128 | "" 129 | } 130 | "Object [type=" + typ + ", name=" + name + at + "]" 131 | } 132 | 133 | private def getDbObjectName: String = { 134 | (if (dbname == null) "" else dbname + ".") + objectName 135 | } 136 | } 137 | --------------------------------------------------------------------------------