├── _config.yml
├── .gitignore
├── src
├── test
│ ├── resources
│ │ ├── tpcds
│ │ │ ├── q55.sql
│ │ │ ├── q96.sql
│ │ │ ├── q3.sql
│ │ │ ├── q22.sql
│ │ │ ├── q52.sql
│ │ │ ├── q42.sql
│ │ │ ├── q15.sql
│ │ │ ├── q82.sql
│ │ │ ├── q37.sql
│ │ │ ├── q32.sql
│ │ │ ├── q7.sql
│ │ │ ├── q26.sql
│ │ │ ├── q84.sql
│ │ │ ├── q93.sql
│ │ │ ├── q92.sql
│ │ │ ├── q6.sql
│ │ │ ├── q45.sql
│ │ │ ├── q19.sql
│ │ │ ├── q27.sql
│ │ │ ├── q20.sql
│ │ │ ├── q98.sql
│ │ │ ├── q1.sql
│ │ │ ├── q12.sql
│ │ │ ├── q86.sql
│ │ │ ├── q36.sql
│ │ │ ├── q90.sql
│ │ │ ├── q94.sql
│ │ │ ├── q91.sql
│ │ │ ├── q65.sql
│ │ │ ├── q79.sql
│ │ │ ├── q40.sql
│ │ │ ├── q87.sql
│ │ │ ├── q67.sql
│ │ │ ├── q21.sql
│ │ │ ├── q24a.sql
│ │ │ ├── q24b.sql
│ │ │ ├── q16.sql
│ │ │ ├── q97.sql
│ │ │ ├── q95.sql
│ │ │ ├── q25.sql
│ │ │ ├── q29.sql
│ │ │ ├── q38.sql
│ │ │ ├── q43.sql
│ │ │ ├── q18.sql
│ │ │ ├── q89.sql
│ │ │ ├── q70.sql
│ │ │ ├── q73.sql
│ │ │ ├── q30.sql
│ │ │ ├── q61.sql
│ │ │ ├── q62.sql
│ │ │ ├── q99.sql
│ │ │ ├── q46.sql
│ │ │ ├── q68.sql
│ │ │ ├── q39a.sql
│ │ │ ├── q44.sql
│ │ │ ├── q63.sql
│ │ │ ├── q39b.sql
│ │ │ ├── q81.sql
│ │ │ ├── q71.sql
│ │ │ ├── q53.sql
│ │ │ ├── q72.sql
│ │ │ ├── q34.sql
│ │ │ ├── q76.sql
│ │ │ ├── q69.sql
│ │ │ ├── q13.sql
│ │ │ ├── q17.sql
│ │ │ ├── q35.sql
│ │ │ ├── q50.sql
│ │ │ ├── q54.sql
│ │ │ ├── q48.sql
│ │ │ ├── q57.sql
│ │ │ ├── q9.sql
│ │ │ ├── q51.sql
│ │ │ ├── q60.sql
│ │ │ ├── q10.sql
│ │ │ ├── q23a.sql
│ │ │ ├── q33.sql
│ │ │ ├── q56.sql
│ │ │ ├── q83.sql
│ │ │ ├── q31.sql
│ │ │ ├── q41.sql
│ │ │ ├── q47.sql
│ │ │ ├── q74.sql
│ │ │ ├── q28.sql
│ │ │ ├── q58.sql
│ │ │ ├── q23b.sql
│ │ │ ├── q78.sql
│ │ │ ├── q59.sql
│ │ │ ├── q2.sql
│ │ │ ├── q85.sql
│ │ │ ├── q11.sql
│ │ │ ├── q75.sql
│ │ │ ├── q80.sql
│ │ │ ├── q77.sql
│ │ │ ├── q64.sql
│ │ │ ├── q14b.sql
│ │ │ ├── q14a.sql
│ │ │ ├── q5.sql
│ │ │ ├── q49.sql
│ │ │ └── q4.sql
│ │ ├── log4j.properties
│ │ └── ranger-spark-security.xml
│ └── scala
│ │ └── org
│ │ └── apache
│ │ ├── spark
│ │ └── sql
│ │ │ ├── RangerSparkTestUtils.scala
│ │ │ ├── execution
│ │ │ └── RangerSparkPlanOmitStrategyTest.scala
│ │ │ └── catalyst
│ │ │ └── optimizer
│ │ │ ├── RangerSparkRowFilterExtensionTest.scala
│ │ │ └── RangerSparkMaskingExtensionTest.scala
│ │ └── ranger
│ │ └── services
│ │ └── spark
│ │ └── RangerAdminClientImpl.scala
└── main
│ └── scala
│ └── org
│ └── apache
│ ├── ranger
│ └── authorization
│ │ └── spark
│ │ └── authorizer
│ │ ├── SparkAccessControlException.scala
│ │ ├── authorizer.scala
│ │ ├── SparkObjectType.scala
│ │ ├── SparkPrivObjectActionType.scala
│ │ ├── SparkPrivilegeObjectType.scala
│ │ ├── SparkAccessType.scala
│ │ ├── RangerSparkAuditHandler.scala
│ │ ├── RangerSparkSQLExtension.scala
│ │ ├── SparkOperationType.scala
│ │ ├── RangerSparkPlugin.scala
│ │ ├── RangerSparkAccessRequest.scala
│ │ ├── RangerSparkResource.scala
│ │ └── SparkPrivilegeObject.scala
│ └── spark
│ └── sql
│ ├── catalyst
│ ├── plans
│ │ └── logical
│ │ │ ├── RangerSparkMasking.scala
│ │ │ └── RangerSparkRowFilter.scala
│ └── optimizer
│ │ └── RangerSparkOptimizer.scala
│ ├── execution
│ ├── RangerSparkPlanOmitStrategy.scala
│ ├── RangerShowDatabasesCommand.scala
│ └── RangerShowTablesCommand.scala
│ └── AuthzUtils.scala
├── .github
├── ISSUE_TEMPLATE
│ ├── notice.md
│ └── custom.md
└── workflows
│ └── master.yml
├── docs
└── installation-addons.md
├── .travis.yml
└── README.md
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-leap-day
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target/
2 | dependency-reduced-pom.xml
3 | /derby.log
4 | /.idea/
5 | /spark-ranger.iml
6 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q55.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_brand_id brand_id,
3 | i_brand brand,
4 | sum(ss_ext_sales_price) ext_price
5 | FROM date_dim, store_sales, item
6 | WHERE d_date_sk = ss_sold_date_sk
7 | AND ss_item_sk = i_item_sk
8 | AND i_manager_id = 28
9 | AND d_moy = 11
10 | AND d_year = 1999
11 | GROUP BY i_brand, i_brand_id
12 | ORDER BY ext_price DESC, brand_id
13 | LIMIT 100
14 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q96.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*)
2 | FROM store_sales, household_demographics, time_dim, store
3 | WHERE ss_sold_time_sk = time_dim.t_time_sk
4 | AND ss_hdemo_sk = household_demographics.hd_demo_sk
5 | AND ss_store_sk = s_store_sk
6 | AND time_dim.t_hour = 20
7 | AND time_dim.t_minute >= 30
8 | AND household_demographics.hd_dep_count = 7
9 | AND store.s_store_name = 'ese'
10 | ORDER BY count(*)
11 | LIMIT 100
12 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q3.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | dt.d_year,
3 | item.i_brand_id brand_id,
4 | item.i_brand brand,
5 | SUM(ss_ext_sales_price) sum_agg
6 | FROM date_dim dt, store_sales, item
7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
8 | AND store_sales.ss_item_sk = item.i_item_sk
9 | AND item.i_manufact_id = 128
10 | AND dt.d_moy = 11
11 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id
12 | ORDER BY dt.d_year, sum_agg DESC, brand_id
13 | LIMIT 100
14 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q22.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_product_name,
3 | i_brand,
4 | i_class,
5 | i_category,
6 | avg(inv_quantity_on_hand) qoh
7 | FROM inventory, date_dim, item, warehouse
8 | WHERE inv_date_sk = d_date_sk
9 | AND inv_item_sk = i_item_sk
10 | AND inv_warehouse_sk = w_warehouse_sk
11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
12 | GROUP BY ROLLUP (i_product_name, i_brand, i_class, i_category)
13 | ORDER BY qoh, i_product_name, i_brand, i_class, i_category
14 | LIMIT 100
15 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q52.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | dt.d_year,
3 | item.i_brand_id brand_id,
4 | item.i_brand brand,
5 | sum(ss_ext_sales_price) ext_price
6 | FROM date_dim dt, store_sales, item
7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
8 | AND store_sales.ss_item_sk = item.i_item_sk
9 | AND item.i_manager_id = 1
10 | AND dt.d_moy = 11
11 | AND dt.d_year = 2000
12 | GROUP BY dt.d_year, item.i_brand, item.i_brand_id
13 | ORDER BY dt.d_year, ext_price DESC, brand_id
14 | LIMIT 100
15 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q42.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | dt.d_year,
3 | item.i_category_id,
4 | item.i_category,
5 | sum(ss_ext_sales_price)
6 | FROM date_dim dt, store_sales, item
7 | WHERE dt.d_date_sk = store_sales.ss_sold_date_sk
8 | AND store_sales.ss_item_sk = item.i_item_sk
9 | AND item.i_manager_id = 1
10 | AND dt.d_moy = 11
11 | AND dt.d_year = 2000
12 | GROUP BY dt.d_year
13 | , item.i_category_id
14 | , item.i_category
15 | ORDER BY sum(ss_ext_sales_price) DESC, dt.d_year
16 | , item.i_category_id
17 | , item.i_category
18 | LIMIT 100
19 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q15.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | ca_zip,
3 | sum(cs_sales_price)
4 | FROM catalog_sales, customer, customer_address, date_dim
5 | WHERE cs_bill_customer_sk = c_customer_sk
6 | AND c_current_addr_sk = ca_address_sk
7 | AND (substr(ca_zip, 1, 5) IN ('85669', '86197', '88274', '83405', '86475',
8 | '85392', '85460', '80348', '81792')
9 | OR ca_state IN ('CA', 'WA', 'GA')
10 | OR cs_sales_price > 500)
11 | AND cs_sold_date_sk = d_date_sk
12 | AND d_qoy = 2 AND d_year = 2001
13 | GROUP BY ca_zip
14 | ORDER BY ca_zip
15 | LIMIT 100
16 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q82.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | i_item_desc,
4 | i_current_price
5 | FROM item, inventory, date_dim, store_sales
6 | WHERE i_current_price BETWEEN 62 AND 62 + 30
7 | AND inv_item_sk = i_item_sk
8 | AND d_date_sk = inv_date_sk
9 | AND d_date BETWEEN cast('2000-05-25' AS DATE) AND (cast('2000-05-25' AS DATE) + INTERVAL 60 days)
10 | AND i_manufact_id IN (129, 270, 821, 423)
11 | AND inv_quantity_on_hand BETWEEN 100 AND 500
12 | AND ss_item_sk = i_item_sk
13 | GROUP BY i_item_id, i_item_desc, i_current_price
14 | ORDER BY i_item_id
15 | LIMIT 100
16 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q37.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | i_item_desc,
4 | i_current_price
5 | FROM item, inventory, date_dim, catalog_sales
6 | WHERE i_current_price BETWEEN 68 AND 68 + 30
7 | AND inv_item_sk = i_item_sk
8 | AND d_date_sk = inv_date_sk
9 | AND d_date BETWEEN cast('2000-02-01' AS DATE) AND (cast('2000-02-01' AS DATE) + INTERVAL 60 days)
10 | AND i_manufact_id IN (677, 940, 694, 808)
11 | AND inv_quantity_on_hand BETWEEN 100 AND 500
12 | AND cs_item_sk = i_item_sk
13 | GROUP BY i_item_id, i_item_desc, i_current_price
14 | ORDER BY i_item_id
15 | LIMIT 100
16 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q32.sql:
--------------------------------------------------------------------------------
1 | SELECT 1 AS `excess discount amount `
2 | FROM
3 | catalog_sales, item, date_dim
4 | WHERE
5 | i_manufact_id = 977
6 | AND i_item_sk = cs_item_sk
7 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days)
8 | AND d_date_sk = cs_sold_date_sk
9 | AND cs_ext_discount_amt > (
10 | SELECT 1.3 * avg(cs_ext_discount_amt)
11 | FROM catalog_sales, date_dim
12 | WHERE cs_item_sk = i_item_sk
13 | AND d_date BETWEEN '2000-01-27]' AND (cast('2000-01-27' AS DATE) + interval 90 days)
14 | AND d_date_sk = cs_sold_date_sk)
15 | LIMIT 100
16 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/notice.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Notice
3 | about: This library is deprecated.
4 | title: ''
5 | labels: ''
6 | assignees: yaooqinn
7 |
8 | ---
9 |
10 | This library has been contributed to https://github.com/apache/submarine as a sub-module, and that module can still be used individually.
11 | The project here will no longer be updated.
12 | If you have any questions please go to https://github.com/apache/submarine/blob/master/docs/submarine-security/spark-security/README.md to learn how to use and give feedback to the apache submarine community by following https://submarine.apache.org/community/contributors.html
13 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q7.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | avg(ss_quantity) agg1,
4 | avg(ss_list_price) agg2,
5 | avg(ss_coupon_amt) agg3,
6 | avg(ss_sales_price) agg4
7 | FROM store_sales, customer_demographics, date_dim, item, promotion
8 | WHERE ss_sold_date_sk = d_date_sk AND
9 | ss_item_sk = i_item_sk AND
10 | ss_cdemo_sk = cd_demo_sk AND
11 | ss_promo_sk = p_promo_sk AND
12 | cd_gender = 'M' AND
13 | cd_marital_status = 'S' AND
14 | cd_education_status = 'College' AND
15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND
16 | d_year = 2000
17 | GROUP BY i_item_id
18 | ORDER BY i_item_id
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q26.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | avg(cs_quantity) agg1,
4 | avg(cs_list_price) agg2,
5 | avg(cs_coupon_amt) agg3,
6 | avg(cs_sales_price) agg4
7 | FROM catalog_sales, customer_demographics, date_dim, item, promotion
8 | WHERE cs_sold_date_sk = d_date_sk AND
9 | cs_item_sk = i_item_sk AND
10 | cs_bill_cdemo_sk = cd_demo_sk AND
11 | cs_promo_sk = p_promo_sk AND
12 | cd_gender = 'M' AND
13 | cd_marital_status = 'S' AND
14 | cd_education_status = 'College' AND
15 | (p_channel_email = 'N' OR p_channel_event = 'N') AND
16 | d_year = 2000
17 | GROUP BY i_item_id
18 | ORDER BY i_item_id
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q84.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_customer_id AS customer_id,
3 | concat(c_last_name, ', ', c_first_name) AS customername
4 | FROM customer
5 | , customer_address
6 | , customer_demographics
7 | , household_demographics
8 | , income_band
9 | , store_returns
10 | WHERE ca_city = 'Edgewood'
11 | AND c_current_addr_sk = ca_address_sk
12 | AND ib_lower_bound >= 38128
13 | AND ib_upper_bound <= 38128 + 50000
14 | AND ib_income_band_sk = hd_income_band_sk
15 | AND cd_demo_sk = c_current_cdemo_sk
16 | AND hd_demo_sk = c_current_hdemo_sk
17 | AND sr_cdemo_sk = cd_demo_sk
18 | ORDER BY c_customer_id
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q93.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | ss_customer_sk,
3 | sum(act_sales) sumsales
4 | FROM (SELECT
5 | ss_item_sk,
6 | ss_ticket_number,
7 | ss_customer_sk,
8 | CASE WHEN sr_return_quantity IS NOT NULL
9 | THEN (ss_quantity - sr_return_quantity) * ss_sales_price
10 | ELSE (ss_quantity * ss_sales_price) END act_sales
11 | FROM store_sales
12 | LEFT OUTER JOIN store_returns
13 | ON (sr_item_sk = ss_item_sk AND sr_ticket_number = ss_ticket_number)
14 | ,
15 | reason
16 | WHERE sr_reason_sk = r_reason_sk AND r_reason_desc = 'reason 28') t
17 | GROUP BY ss_customer_sk
18 | ORDER BY sumsales, ss_customer_sk
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q92.sql:
--------------------------------------------------------------------------------
1 | SELECT sum(ws_ext_discount_amt) AS `Excess Discount Amount `
2 | FROM web_sales, item, date_dim
3 | WHERE i_manufact_id = 350
4 | AND i_item_sk = ws_item_sk
5 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days)
6 | AND d_date_sk = ws_sold_date_sk
7 | AND ws_ext_discount_amt >
8 | (
9 | SELECT 1.3 * avg(ws_ext_discount_amt)
10 | FROM web_sales, date_dim
11 | WHERE ws_item_sk = i_item_sk
12 | AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + INTERVAL 90 days)
13 | AND d_date_sk = ws_sold_date_sk
14 | )
15 | ORDER BY sum(ws_ext_discount_amt)
16 | LIMIT 100
17 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q6.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.ca_state state,
3 | count(*) cnt
4 | FROM
5 | customer_address a, customer c, store_sales s, date_dim d, item i
6 | WHERE a.ca_address_sk = c.c_current_addr_sk
7 | AND c.c_customer_sk = s.ss_customer_sk
8 | AND s.ss_sold_date_sk = d.d_date_sk
9 | AND s.ss_item_sk = i.i_item_sk
10 | AND d.d_month_seq =
11 | (SELECT DISTINCT (d_month_seq)
12 | FROM date_dim
13 | WHERE d_year = 2000 AND d_moy = 1)
14 | AND i.i_current_price > 1.2 *
15 | (SELECT avg(j.i_current_price)
16 | FROM item j
17 | WHERE j.i_category = i.i_category)
18 | GROUP BY a.ca_state
19 | HAVING count(*) >= 10
20 | ORDER BY cnt
21 | LIMIT 100
22 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Custom issue template
3 | about: Describe this issue template's purpose here.
4 | title: ''
5 | labels: ''
6 | assignees: ''
7 |
8 | ---
9 |
10 | # Notice:
11 | This library has been contributed to https://github.com/apache/submarine as a sub-module, and that module can still be used individually.
12 |
13 | The project here will no longer be updated.
14 |
15 | If you have any questions please go to
16 |
17 | https://github.com/apache/submarine/tree/master/docs/submarine-security/spark/README.md
18 |
19 | to learn how to use and give feedback to the apache submarine community by following https://submarine.apache.org/community/contributors.html
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q45.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | ca_zip,
3 | ca_city,
4 | sum(ws_sales_price)
5 | FROM web_sales, customer, customer_address, date_dim, item
6 | WHERE ws_bill_customer_sk = c_customer_sk
7 | AND c_current_addr_sk = ca_address_sk
8 | AND ws_item_sk = i_item_sk
9 | AND (substr(ca_zip, 1, 5) IN
10 | ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')
11 | OR
12 | i_item_id IN (SELECT i_item_id
13 | FROM item
14 | WHERE i_item_sk IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29)
15 | )
16 | )
17 | AND ws_sold_date_sk = d_date_sk
18 | AND d_qoy = 2 AND d_year = 2001
19 | GROUP BY ca_zip, ca_city
20 | ORDER BY ca_zip, ca_city
21 | LIMIT 100
22 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q19.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_brand_id brand_id,
3 | i_brand brand,
4 | i_manufact_id,
5 | i_manufact,
6 | sum(ss_ext_sales_price) ext_price
7 | FROM date_dim, store_sales, item, customer, customer_address, store
8 | WHERE d_date_sk = ss_sold_date_sk
9 | AND ss_item_sk = i_item_sk
10 | AND i_manager_id = 8
11 | AND d_moy = 11
12 | AND d_year = 1998
13 | AND ss_customer_sk = c_customer_sk
14 | AND c_current_addr_sk = ca_address_sk
15 | AND substr(ca_zip, 1, 5) <> substr(s_zip, 1, 5)
16 | AND ss_store_sk = s_store_sk
17 | GROUP BY i_brand, i_brand_id, i_manufact_id, i_manufact
18 | ORDER BY ext_price DESC, brand, brand_id, i_manufact_id, i_manufact
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q27.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | s_state,
4 | grouping(s_state) g_state,
5 | avg(ss_quantity) agg1,
6 | avg(ss_list_price) agg2,
7 | avg(ss_coupon_amt) agg3,
8 | avg(ss_sales_price) agg4
9 | FROM store_sales, customer_demographics, date_dim, store, item
10 | WHERE ss_sold_date_sk = d_date_sk AND
11 | ss_item_sk = i_item_sk AND
12 | ss_store_sk = s_store_sk AND
13 | ss_cdemo_sk = cd_demo_sk AND
14 | cd_gender = 'M' AND
15 | cd_marital_status = 'S' AND
16 | cd_education_status = 'College' AND
17 | d_year = 2002 AND
18 | s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN')
19 | GROUP BY ROLLUP (i_item_id, s_state)
20 | ORDER BY i_item_id, s_state
21 | LIMIT 100
22 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q20.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_desc,
3 | i_category,
4 | i_class,
5 | i_current_price,
6 | sum(cs_ext_sales_price) AS itemrevenue,
7 | sum(cs_ext_sales_price) * 100 / sum(sum(cs_ext_sales_price))
8 | OVER
9 | (PARTITION BY i_class) AS revenueratio
10 | FROM catalog_sales, item, date_dim
11 | WHERE cs_item_sk = i_item_sk
12 | AND i_category IN ('Sports', 'Books', 'Home')
13 | AND cs_sold_date_sk = d_date_sk
14 | AND d_date BETWEEN cast('1999-02-22' AS DATE)
15 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
16 | GROUP BY i_item_id, i_item_desc, i_category, i_class, i_current_price
17 | ORDER BY i_category, i_class, i_item_id, i_item_desc, revenueratio
18 | LIMIT 100
19 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q98.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_desc,
3 | i_category,
4 | i_class,
5 | i_current_price,
6 | sum(ss_ext_sales_price) AS itemrevenue,
7 | sum(ss_ext_sales_price) * 100 / sum(sum(ss_ext_sales_price))
8 | OVER
9 | (PARTITION BY i_class) AS revenueratio
10 | FROM
11 | store_sales, item, date_dim
12 | WHERE
13 | ss_item_sk = i_item_sk
14 | AND i_category IN ('Sports', 'Books', 'Home')
15 | AND ss_sold_date_sk = d_date_sk
16 | AND d_date BETWEEN cast('1999-02-22' AS DATE)
17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
18 | GROUP BY
19 | i_item_id, i_item_desc, i_category, i_class, i_current_price
20 | ORDER BY
21 | i_category, i_class, i_item_id, i_item_desc, revenueratio
22 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q1.sql:
--------------------------------------------------------------------------------
1 | WITH customer_total_return AS
2 | ( SELECT
3 | sr_customer_sk AS ctr_customer_sk,
4 | sr_store_sk AS ctr_store_sk,
5 | sum(sr_return_amt) AS ctr_total_return
6 | FROM store_returns, date_dim
7 | WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000
8 | GROUP BY sr_customer_sk, sr_store_sk)
9 | SELECT c_customer_id
10 | FROM customer_total_return ctr1, store, customer
11 | WHERE ctr1.ctr_total_return >
12 | (SELECT avg(ctr_total_return) * 1.2
13 | FROM customer_total_return ctr2
14 | WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
15 | AND s_store_sk = ctr1.ctr_store_sk
16 | AND s_state = 'TN'
17 | AND ctr1.ctr_customer_sk = c_customer_sk
18 | ORDER BY c_customer_id
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q12.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_desc,
3 | i_category,
4 | i_class,
5 | i_current_price,
6 | sum(ws_ext_sales_price) AS itemrevenue,
7 | sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price))
8 | OVER
9 | (PARTITION BY i_class) AS revenueratio
10 | FROM
11 | web_sales, item, date_dim
12 | WHERE
13 | ws_item_sk = i_item_sk
14 | AND i_category IN ('Sports', 'Books', 'Home')
15 | AND ws_sold_date_sk = d_date_sk
16 | AND d_date BETWEEN cast('1999-02-22' AS DATE)
17 | AND (cast('1999-02-22' AS DATE) + INTERVAL 30 days)
18 | GROUP BY
19 | i_item_id, i_item_desc, i_category, i_class, i_current_price
20 | ORDER BY
21 | i_category, i_class, i_item_id, i_item_desc, revenueratio
22 | LIMIT 100
23 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q86.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sum(ws_net_paid) AS total_sum,
3 | i_category,
4 | i_class,
5 | grouping(i_category) + grouping(i_class) AS lochierarchy,
6 | rank()
7 | OVER (
8 | PARTITION BY grouping(i_category) + grouping(i_class),
9 | CASE WHEN grouping(i_class) = 0
10 | THEN i_category END
11 | ORDER BY sum(ws_net_paid) DESC) AS rank_within_parent
12 | FROM
13 | web_sales, date_dim d1, item
14 | WHERE
15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11
16 | AND d1.d_date_sk = ws_sold_date_sk
17 | AND i_item_sk = ws_item_sk
18 | GROUP BY ROLLUP (i_category, i_class)
19 | ORDER BY
20 | lochierarchy DESC,
21 | CASE WHEN lochierarchy = 0
22 | THEN i_category END,
23 | rank_within_parent
24 | LIMIT 100
25 |
--------------------------------------------------------------------------------
/.github/workflows/master.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 |
11 | jobs:
12 | build:
13 |
14 | runs-on: ubuntu-18.04
15 | strategy:
16 | matrix:
17 | spark: [ '2.3', '2.4' ]
18 | ranger: [ '1.0', '1.1', '1.2', '2.0' ]
19 | name: Build with Spark ${{ matrix.spark }} / Ranger ${{ matrix.ranger }}
20 |
21 | steps:
22 | - uses: actions/checkout@v2
23 | - name: Set up JDK 1.8
24 | uses: actions/setup-java@v1
25 | with:
26 | version: 1.8
27 | - name: Build with Maven
28 | run: mvn clean install -Pspark-${{ matrix.spark }} -Pranger-${{ matrix.ranger }} -Dmaven.javadoc.skip=true -B -V
29 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q36.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sum(ss_net_profit) / sum(ss_ext_sales_price) AS gross_margin,
3 | i_category,
4 | i_class,
5 | grouping(i_category) + grouping(i_class) AS lochierarchy,
6 | rank()
7 | OVER (
8 | PARTITION BY grouping(i_category) + grouping(i_class),
9 | CASE WHEN grouping(i_class) = 0
10 | THEN i_category END
11 | ORDER BY sum(ss_net_profit) / sum(ss_ext_sales_price) ASC) AS rank_within_parent
12 | FROM
13 | store_sales, date_dim d1, item, store
14 | WHERE
15 | d1.d_year = 2001
16 | AND d1.d_date_sk = ss_sold_date_sk
17 | AND i_item_sk = ss_item_sk
18 | AND s_store_sk = ss_store_sk
19 | AND s_state IN ('TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN', 'TN')
20 | GROUP BY ROLLUP (i_category, i_class)
21 | ORDER BY
22 | lochierarchy DESC
23 | , CASE WHEN lochierarchy = 0
24 | THEN i_category END
25 | , rank_within_parent
26 | LIMIT 100
27 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q90.sql:
--------------------------------------------------------------------------------
1 | SELECT cast(amc AS DECIMAL(15, 4)) / cast(pmc AS DECIMAL(15, 4)) am_pm_ratio
2 | FROM (SELECT count(*) amc
3 | FROM web_sales, household_demographics, time_dim, web_page
4 | WHERE ws_sold_time_sk = time_dim.t_time_sk
5 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
6 | AND ws_web_page_sk = web_page.wp_web_page_sk
7 | AND time_dim.t_hour BETWEEN 8 AND 8 + 1
8 | AND household_demographics.hd_dep_count = 6
9 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) at,
10 | (SELECT count(*) pmc
11 | FROM web_sales, household_demographics, time_dim, web_page
12 | WHERE ws_sold_time_sk = time_dim.t_time_sk
13 | AND ws_ship_hdemo_sk = household_demographics.hd_demo_sk
14 | AND ws_web_page_sk = web_page.wp_web_page_sk
15 | AND time_dim.t_hour BETWEEN 19 AND 19 + 1
16 | AND household_demographics.hd_dep_count = 6
17 | AND web_page.wp_char_count BETWEEN 5000 AND 5200) pt
18 | ORDER BY am_pm_ratio
19 | LIMIT 100
20 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q94.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | count(DISTINCT ws_order_number) AS `order count `,
3 | sum(ws_ext_ship_cost) AS `total shipping cost `,
4 | sum(ws_net_profit) AS `total net profit `
5 | FROM
6 | web_sales ws1, date_dim, customer_address, web_site
7 | WHERE
8 | d_date BETWEEN '1999-02-01' AND
9 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 days)
10 | AND ws1.ws_ship_date_sk = d_date_sk
11 | AND ws1.ws_ship_addr_sk = ca_address_sk
12 | AND ca_state = 'IL'
13 | AND ws1.ws_web_site_sk = web_site_sk
14 | AND web_company_name = 'pri'
15 | AND EXISTS(SELECT *
16 | FROM web_sales ws2
17 | WHERE ws1.ws_order_number = ws2.ws_order_number
18 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
19 | AND NOT EXISTS(SELECT *
20 | FROM web_returns wr1
21 | WHERE ws1.ws_order_number = wr1.wr_order_number)
22 | ORDER BY count(DISTINCT ws_order_number)
23 | LIMIT 100
24 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q91.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cc_call_center_id Call_Center,
3 | cc_name Call_Center_Name,
4 | cc_manager Manager,
5 | sum(cr_net_loss) Returns_Loss
6 | FROM
7 | call_center, catalog_returns, date_dim, customer, customer_address,
8 | customer_demographics, household_demographics
9 | WHERE
10 | cr_call_center_sk = cc_call_center_sk
11 | AND cr_returned_date_sk = d_date_sk
12 | AND cr_returning_customer_sk = c_customer_sk
13 | AND cd_demo_sk = c_current_cdemo_sk
14 | AND hd_demo_sk = c_current_hdemo_sk
15 | AND ca_address_sk = c_current_addr_sk
16 | AND d_year = 1998
17 | AND d_moy = 11
18 | AND ((cd_marital_status = 'M' AND cd_education_status = 'Unknown')
19 | OR (cd_marital_status = 'W' AND cd_education_status = 'Advanced Degree'))
20 | AND hd_buy_potential LIKE 'Unknown%'
21 | AND ca_gmt_offset = -7
22 | GROUP BY cc_call_center_id, cc_name, cc_manager, cd_marital_status, cd_education_status
23 | ORDER BY sum(cr_net_loss) DESC
24 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q65.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | s_store_name,
3 | i_item_desc,
4 | sc.revenue,
5 | i_current_price,
6 | i_wholesale_cost,
7 | i_brand
8 | FROM store, item,
9 | (SELECT
10 | ss_store_sk,
11 | avg(revenue) AS ave
12 | FROM
13 | (SELECT
14 | ss_store_sk,
15 | ss_item_sk,
16 | sum(ss_sales_price) AS revenue
17 | FROM store_sales, date_dim
18 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11
19 | GROUP BY ss_store_sk, ss_item_sk) sa
20 | GROUP BY ss_store_sk) sb,
21 | (SELECT
22 | ss_store_sk,
23 | ss_item_sk,
24 | sum(ss_sales_price) AS revenue
25 | FROM store_sales, date_dim
26 | WHERE ss_sold_date_sk = d_date_sk AND d_month_seq BETWEEN 1176 AND 1176 + 11
27 | GROUP BY ss_store_sk, ss_item_sk) sc
28 | WHERE sb.ss_store_sk = sc.ss_store_sk AND
29 | sc.revenue <= 0.1 * sb.ave AND
30 | s_store_sk = sc.ss_store_sk AND
31 | i_item_sk = sc.ss_item_sk
32 | ORDER BY s_store_name, i_item_desc
33 | LIMIT 100
34 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q79.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_last_name,
3 | c_first_name,
4 | substr(s_city, 1, 30),
5 | ss_ticket_number,
6 | amt,
7 | profit
8 | FROM
9 | (SELECT
10 | ss_ticket_number,
11 | ss_customer_sk,
12 | store.s_city,
13 | sum(ss_coupon_amt) amt,
14 | sum(ss_net_profit) profit
15 | FROM store_sales, date_dim, store, household_demographics
16 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
17 | AND store_sales.ss_store_sk = store.s_store_sk
18 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
19 | AND (household_demographics.hd_dep_count = 6 OR
20 | household_demographics.hd_vehicle_count > 2)
21 | AND date_dim.d_dow = 1
22 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2)
23 | AND store.s_number_employees BETWEEN 200 AND 295
24 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, store.s_city) ms, customer
25 | WHERE ss_customer_sk = c_customer_sk
26 | ORDER BY c_last_name, c_first_name, substr(s_city, 1, 30), profit
27 | LIMIT 100
28 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q40.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | w_state,
3 | i_item_id,
4 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE))
5 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0)
6 | ELSE 0 END) AS sales_before,
7 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE))
8 | THEN cs_sales_price - coalesce(cr_refunded_cash, 0)
9 | ELSE 0 END) AS sales_after
10 | FROM
11 | catalog_sales
12 | LEFT OUTER JOIN catalog_returns ON
13 | (cs_order_number = cr_order_number
14 | AND cs_item_sk = cr_item_sk)
15 | , warehouse, item, date_dim
16 | WHERE
17 | i_current_price BETWEEN 0.99 AND 1.49
18 | AND i_item_sk = cs_item_sk
19 | AND cs_warehouse_sk = w_warehouse_sk
20 | AND cs_sold_date_sk = d_date_sk
21 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days)
22 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days)
23 | GROUP BY w_state, i_item_id
24 | ORDER BY w_state, i_item_id
25 | LIMIT 100
26 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q87.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*)
2 | FROM ((SELECT DISTINCT
3 | c_last_name,
4 | c_first_name,
5 | d_date
6 | FROM store_sales, date_dim, customer
7 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
8 | AND store_sales.ss_customer_sk = customer.c_customer_sk
9 | AND d_month_seq BETWEEN 1200 AND 1200 + 11)
10 | EXCEPT
11 | (SELECT DISTINCT
12 | c_last_name,
13 | c_first_name,
14 | d_date
15 | FROM catalog_sales, date_dim, customer
16 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
17 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
18 | AND d_month_seq BETWEEN 1200 AND 1200 + 11)
19 | EXCEPT
20 | (SELECT DISTINCT
21 | c_last_name,
22 | c_first_name,
23 | d_date
24 | FROM web_sales, date_dim, customer
25 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk
26 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk
27 | AND d_month_seq BETWEEN 1200 AND 1200 + 11)
28 | ) cool_cust
29 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q67.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM
3 | (SELECT
4 | i_category,
5 | i_class,
6 | i_brand,
7 | i_product_name,
8 | d_year,
9 | d_qoy,
10 | d_moy,
11 | s_store_id,
12 | sumsales,
13 | rank()
14 | OVER (PARTITION BY i_category
15 | ORDER BY sumsales DESC) rk
16 | FROM
17 | (SELECT
18 | i_category,
19 | i_class,
20 | i_brand,
21 | i_product_name,
22 | d_year,
23 | d_qoy,
24 | d_moy,
25 | s_store_id,
26 | sum(coalesce(ss_sales_price * ss_quantity, 0)) sumsales
27 | FROM store_sales, date_dim, store, item
28 | WHERE ss_sold_date_sk = d_date_sk
29 | AND ss_item_sk = i_item_sk
30 | AND ss_store_sk = s_store_sk
31 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
32 | GROUP BY ROLLUP (i_category, i_class, i_brand, i_product_name, d_year, d_qoy,
33 | d_moy, s_store_id)) dw1) dw2
34 | WHERE rk <= 100
35 | ORDER BY
36 | i_category, i_class, i_brand, i_product_name, d_year,
37 | d_qoy, d_moy, s_store_id, sumsales, rk
38 | LIMIT 100
39 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q21.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM (
3 | SELECT
4 | w_warehouse_name,
5 | i_item_id,
6 | sum(CASE WHEN (cast(d_date AS DATE) < cast('2000-03-11' AS DATE))
7 | THEN inv_quantity_on_hand
8 | ELSE 0 END) AS inv_before,
9 | sum(CASE WHEN (cast(d_date AS DATE) >= cast('2000-03-11' AS DATE))
10 | THEN inv_quantity_on_hand
11 | ELSE 0 END) AS inv_after
12 | FROM inventory, warehouse, item, date_dim
13 | WHERE i_current_price BETWEEN 0.99 AND 1.49
14 | AND i_item_sk = inv_item_sk
15 | AND inv_warehouse_sk = w_warehouse_sk
16 | AND inv_date_sk = d_date_sk
17 | AND d_date BETWEEN (cast('2000-03-11' AS DATE) - INTERVAL 30 days)
18 | AND (cast('2000-03-11' AS DATE) + INTERVAL 30 days)
19 | GROUP BY w_warehouse_name, i_item_id) x
20 | WHERE (CASE WHEN inv_before > 0
21 | THEN inv_after / inv_before
22 | ELSE NULL
23 | END) BETWEEN 2.0 / 3.0 AND 3.0 / 2.0
24 | ORDER BY w_warehouse_name, i_item_id
25 | LIMIT 100
26 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q24a.sql:
--------------------------------------------------------------------------------
1 | WITH ssales AS
2 | (SELECT
3 | c_last_name,
4 | c_first_name,
5 | s_store_name,
6 | ca_state,
7 | s_state,
8 | i_color,
9 | i_current_price,
10 | i_manager_id,
11 | i_units,
12 | i_size,
13 | sum(ss_net_paid) netpaid
14 | FROM store_sales, store_returns, store, item, customer, customer_address
15 | WHERE ss_ticket_number = sr_ticket_number
16 | AND ss_item_sk = sr_item_sk
17 | AND ss_customer_sk = c_customer_sk
18 | AND ss_item_sk = i_item_sk
19 | AND ss_store_sk = s_store_sk
20 | AND c_birth_country = upper(ca_country)
21 | AND s_zip = ca_zip
22 | AND s_market_id = 8
23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state, i_color,
24 | i_current_price, i_manager_id, i_units, i_size)
25 | SELECT
26 | c_last_name,
27 | c_first_name,
28 | s_store_name,
29 | sum(netpaid) paid
30 | FROM ssales
31 | WHERE i_color = 'pale'
32 | GROUP BY c_last_name, c_first_name, s_store_name
33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid)
34 | FROM ssales)
35 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q24b.sql:
--------------------------------------------------------------------------------
1 | WITH ssales AS
2 | (SELECT
3 | c_last_name,
4 | c_first_name,
5 | s_store_name,
6 | ca_state,
7 | s_state,
8 | i_color,
9 | i_current_price,
10 | i_manager_id,
11 | i_units,
12 | i_size,
13 | sum(ss_net_paid) netpaid
14 | FROM store_sales, store_returns, store, item, customer, customer_address
15 | WHERE ss_ticket_number = sr_ticket_number
16 | AND ss_item_sk = sr_item_sk
17 | AND ss_customer_sk = c_customer_sk
18 | AND ss_item_sk = i_item_sk
19 | AND ss_store_sk = s_store_sk
20 | AND c_birth_country = upper(ca_country)
21 | AND s_zip = ca_zip
22 | AND s_market_id = 8
23 | GROUP BY c_last_name, c_first_name, s_store_name, ca_state, s_state,
24 | i_color, i_current_price, i_manager_id, i_units, i_size)
25 | SELECT
26 | c_last_name,
27 | c_first_name,
28 | s_store_name,
29 | sum(netpaid) paid
30 | FROM ssales
31 | WHERE i_color = 'chiffon'
32 | GROUP BY c_last_name, c_first_name, s_store_name
33 | HAVING sum(netpaid) > (SELECT 0.05 * avg(netpaid)
34 | FROM ssales)
35 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q16.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | count(DISTINCT cs_order_number) AS `order count `,
3 | sum(cs_ext_ship_cost) AS `total shipping cost `,
4 | sum(cs_net_profit) AS `total net profit `
5 | FROM
6 | catalog_sales cs1, date_dim, customer_address, call_center
7 | WHERE
8 | d_date BETWEEN '2002-02-01' AND (CAST('2002-02-01' AS DATE) + INTERVAL 60 days)
9 | AND cs1.cs_ship_date_sk = d_date_sk
10 | AND cs1.cs_ship_addr_sk = ca_address_sk
11 | AND ca_state = 'GA'
12 | AND cs1.cs_call_center_sk = cc_call_center_sk
13 | AND cc_county IN
14 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County')
15 | AND EXISTS(SELECT *
16 | FROM catalog_sales cs2
17 | WHERE cs1.cs_order_number = cs2.cs_order_number
18 | AND cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk)
19 | AND NOT EXISTS(SELECT *
20 | FROM catalog_returns cr1
21 | WHERE cs1.cs_order_number = cr1.cr_order_number)
22 | ORDER BY count(DISTINCT cs_order_number)
23 | LIMIT 100
24 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkAccessControlException.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | class SparkAccessControlException(msg: String) extends Exception(msg)
21 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q97.sql:
--------------------------------------------------------------------------------
1 | WITH ssci AS (
2 | SELECT
3 | ss_customer_sk customer_sk,
4 | ss_item_sk item_sk
5 | FROM store_sales, date_dim
6 | WHERE ss_sold_date_sk = d_date_sk
7 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
8 | GROUP BY ss_customer_sk, ss_item_sk),
9 | csci AS (
10 | SELECT
11 | cs_bill_customer_sk customer_sk,
12 | cs_item_sk item_sk
13 | FROM catalog_sales, date_dim
14 | WHERE cs_sold_date_sk = d_date_sk
15 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
16 | GROUP BY cs_bill_customer_sk, cs_item_sk)
17 | SELECT
18 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NULL
19 | THEN 1
20 | ELSE 0 END) store_only,
21 | sum(CASE WHEN ssci.customer_sk IS NULL AND csci.customer_sk IS NOT NULL
22 | THEN 1
23 | ELSE 0 END) catalog_only,
24 | sum(CASE WHEN ssci.customer_sk IS NOT NULL AND csci.customer_sk IS NOT NULL
25 | THEN 1
26 | ELSE 0 END) store_and_catalog
27 | FROM ssci
28 | FULL OUTER JOIN csci ON (ssci.customer_sk = csci.customer_sk
29 | AND ssci.item_sk = csci.item_sk)
30 | LIMIT 100
31 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q95.sql:
--------------------------------------------------------------------------------
1 | WITH ws_wh AS
2 | (SELECT
3 | ws1.ws_order_number,
4 | ws1.ws_warehouse_sk wh1,
5 | ws2.ws_warehouse_sk wh2
6 | FROM web_sales ws1, web_sales ws2
7 | WHERE ws1.ws_order_number = ws2.ws_order_number
8 | AND ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
9 | SELECT
10 | count(DISTINCT ws_order_number) AS `order count `,
11 | sum(ws_ext_ship_cost) AS `total shipping cost `,
12 | sum(ws_net_profit) AS `total net profit `
13 | FROM
14 | web_sales ws1, date_dim, customer_address, web_site
15 | WHERE
16 | d_date BETWEEN '1999-02-01' AND
17 | (CAST('1999-02-01' AS DATE) + INTERVAL 60 DAY)
18 | AND ws1.ws_ship_date_sk = d_date_sk
19 | AND ws1.ws_ship_addr_sk = ca_address_sk
20 | AND ca_state = 'IL'
21 | AND ws1.ws_web_site_sk = web_site_sk
22 | AND web_company_name = 'pri'
23 | AND ws1.ws_order_number IN (SELECT ws_order_number
24 | FROM ws_wh)
25 | AND ws1.ws_order_number IN (SELECT wr_order_number
26 | FROM web_returns, ws_wh
27 | WHERE wr_order_number = ws_wh.ws_order_number)
28 | ORDER BY count(DISTINCT ws_order_number)
29 | LIMIT 100
30 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q25.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | i_item_desc,
4 | s_store_id,
5 | s_store_name,
6 | sum(ss_net_profit) AS store_sales_profit,
7 | sum(sr_net_loss) AS store_returns_loss,
8 | sum(cs_net_profit) AS catalog_sales_profit
9 | FROM
10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3,
11 | store, item
12 | WHERE
13 | d1.d_moy = 4
14 | AND d1.d_year = 2001
15 | AND d1.d_date_sk = ss_sold_date_sk
16 | AND i_item_sk = ss_item_sk
17 | AND s_store_sk = ss_store_sk
18 | AND ss_customer_sk = sr_customer_sk
19 | AND ss_item_sk = sr_item_sk
20 | AND ss_ticket_number = sr_ticket_number
21 | AND sr_returned_date_sk = d2.d_date_sk
22 | AND d2.d_moy BETWEEN 4 AND 10
23 | AND d2.d_year = 2001
24 | AND sr_customer_sk = cs_bill_customer_sk
25 | AND sr_item_sk = cs_item_sk
26 | AND cs_sold_date_sk = d3.d_date_sk
27 | AND d3.d_moy BETWEEN 4 AND 10
28 | AND d3.d_year = 2001
29 | GROUP BY
30 | i_item_id, i_item_desc, s_store_id, s_store_name
31 | ORDER BY
32 | i_item_id, i_item_desc, s_store_id, s_store_name
33 | LIMIT 100
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q29.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | i_item_desc,
4 | s_store_id,
5 | s_store_name,
6 | sum(ss_quantity) AS store_sales_quantity,
7 | sum(sr_return_quantity) AS store_returns_quantity,
8 | sum(cs_quantity) AS catalog_sales_quantity
9 | FROM
10 | store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2,
11 | date_dim d3, store, item
12 | WHERE
13 | d1.d_moy = 9
14 | AND d1.d_year = 1999
15 | AND d1.d_date_sk = ss_sold_date_sk
16 | AND i_item_sk = ss_item_sk
17 | AND s_store_sk = ss_store_sk
18 | AND ss_customer_sk = sr_customer_sk
19 | AND ss_item_sk = sr_item_sk
20 | AND ss_ticket_number = sr_ticket_number
21 | AND sr_returned_date_sk = d2.d_date_sk
22 | AND d2.d_moy BETWEEN 9 AND 9 + 3
23 | AND d2.d_year = 1999
24 | AND sr_customer_sk = cs_bill_customer_sk
25 | AND sr_item_sk = cs_item_sk
26 | AND cs_sold_date_sk = d3.d_date_sk
27 | AND d3.d_year IN (1999, 1999 + 1, 1999 + 2)
28 | GROUP BY
29 | i_item_id, i_item_desc, s_store_id, s_store_name
30 | ORDER BY
31 | i_item_id, i_item_desc, s_store_id, s_store_name
32 | LIMIT 100
33 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/authorizer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark
19 |
20 | import org.apache.spark.sql.SparkSessionExtensions
21 |
22 | package object authorizer {
23 |
24 | type Extensions = SparkSessionExtensions => Unit
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q38.sql:
--------------------------------------------------------------------------------
1 | SELECT count(*)
2 | FROM (
3 | SELECT DISTINCT
4 | c_last_name,
5 | c_first_name,
6 | d_date
7 | FROM store_sales, date_dim, customer
8 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
9 | AND store_sales.ss_customer_sk = customer.c_customer_sk
10 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
11 | INTERSECT
12 | SELECT DISTINCT
13 | c_last_name,
14 | c_first_name,
15 | d_date
16 | FROM catalog_sales, date_dim, customer
17 | WHERE catalog_sales.cs_sold_date_sk = date_dim.d_date_sk
18 | AND catalog_sales.cs_bill_customer_sk = customer.c_customer_sk
19 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
20 | INTERSECT
21 | SELECT DISTINCT
22 | c_last_name,
23 | c_first_name,
24 | d_date
25 | FROM web_sales, date_dim, customer
26 | WHERE web_sales.ws_sold_date_sk = date_dim.d_date_sk
27 | AND web_sales.ws_bill_customer_sk = customer.c_customer_sk
28 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
29 | ) hot_cust
30 | LIMIT 100
31 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkObjectType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | object SparkObjectType extends Enumeration {
21 | type SparkObjectType = Value
22 |
23 | val NONE, DATABASE, TABLE, VIEW, COLUMN, FUNCTION, URI = Value
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivObjectActionType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | object SparkPrivObjectActionType extends Enumeration {
21 | type SparkPrivObjectActionType = Value
22 | val OTHER, INSERT, INSERT_OVERWRITE = Value
23 | }
24 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q43.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | s_store_name,
3 | s_store_id,
4 | sum(CASE WHEN (d_day_name = 'Sunday')
5 | THEN ss_sales_price
6 | ELSE NULL END) sun_sales,
7 | sum(CASE WHEN (d_day_name = 'Monday')
8 | THEN ss_sales_price
9 | ELSE NULL END) mon_sales,
10 | sum(CASE WHEN (d_day_name = 'Tuesday')
11 | THEN ss_sales_price
12 | ELSE NULL END) tue_sales,
13 | sum(CASE WHEN (d_day_name = 'Wednesday')
14 | THEN ss_sales_price
15 | ELSE NULL END) wed_sales,
16 | sum(CASE WHEN (d_day_name = 'Thursday')
17 | THEN ss_sales_price
18 | ELSE NULL END) thu_sales,
19 | sum(CASE WHEN (d_day_name = 'Friday')
20 | THEN ss_sales_price
21 | ELSE NULL END) fri_sales,
22 | sum(CASE WHEN (d_day_name = 'Saturday')
23 | THEN ss_sales_price
24 | ELSE NULL END) sat_sales
25 | FROM date_dim, store_sales, store
26 | WHERE d_date_sk = ss_sold_date_sk AND
27 | s_store_sk = ss_store_sk AND
28 | s_gmt_offset = -5 AND
29 | d_year = 2000
30 | GROUP BY s_store_name, s_store_id
31 | ORDER BY s_store_name, s_store_id, sun_sales, mon_sales, tue_sales, wed_sales,
32 | thu_sales, fri_sales, sat_sales
33 | LIMIT 100
34 |
--------------------------------------------------------------------------------
/docs/installation-addons.md:
--------------------------------------------------------------------------------
1 | # Installation Addons
2 |
3 | We have listed some tips and known problems about this library you can consider.
4 |
5 | ## Ranger Admin does not list databases, tables and columns when you create or edit policies.
6 |
7 | Because the Ranger Admin does use Hadoop 3 Hive libraries, listing databases, tables and columns do not work in Ranger Admin. To configure listing capabality put below files in $RANGER_HOME/ews/webapp/WEB-INF/lib/ :
8 |
9 | - hive-exec-1.2.1.spark2.jar (Hadoop 3 compatible version needed. You can download from [here](https://github.com/MobinRanjbar/hive-exec-jar/releases).
10 | - hive-jdbc-1.2.1.spark2.jar (Available on Spark Jars folder)
11 | - hive-metastore-1.2.1.spark2.jar (Available on Spark Jars folder)
12 | - hive-service-1.2.1.jar (Download from internet)
13 |
14 | and ranger-admin restart.
15 |
16 | ## The dependency issues in Apache Ranger 2.X.X
17 |
18 | ### NoClassDefFoundError: com.kstruct.gethostname4j.Hostname
19 |
20 | To resolve it, place 'gethostname4j.jar' into $SPARK_HOME/jars.
21 |
22 | ### NoClassDefFoundError: com.sun.jna.Platform
23 |
24 | To resolve it, place 'jna-5.5.0.jar' into $SPARK_HOME/jars.
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivilegeObjectType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | object SparkPrivilegeObjectType extends Enumeration {
21 | type SparkPrivilegeObjectType = Value
22 | val DATABASE, TABLE_OR_VIEW, FUNCTION, DFS_URI = Value
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkAccessType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | object SparkAccessType extends Enumeration {
21 | type SparkAccessType = Value
22 |
23 | val NONE, CREATE, ALTER, DROP, SELECT, UPDATE, USE, READ, WRITE, ALL, ADMIN = Value
24 | }
25 |
26 |
--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # Set everything to be logged to the console
19 | log4j.rootCategory=FATAL, console
20 | log4j.appender.console=org.apache.log4j.ConsoleAppender
21 | log4j.appender.console.target=System.err
22 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
23 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q18.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | ca_country,
4 | ca_state,
5 | ca_county,
6 | avg(cast(cs_quantity AS DECIMAL(12, 2))) agg1,
7 | avg(cast(cs_list_price AS DECIMAL(12, 2))) agg2,
8 | avg(cast(cs_coupon_amt AS DECIMAL(12, 2))) agg3,
9 | avg(cast(cs_sales_price AS DECIMAL(12, 2))) agg4,
10 | avg(cast(cs_net_profit AS DECIMAL(12, 2))) agg5,
11 | avg(cast(c_birth_year AS DECIMAL(12, 2))) agg6,
12 | avg(cast(cd1.cd_dep_count AS DECIMAL(12, 2))) agg7
13 | FROM catalog_sales, customer_demographics cd1,
14 | customer_demographics cd2, customer, customer_address, date_dim, item
15 | WHERE cs_sold_date_sk = d_date_sk AND
16 | cs_item_sk = i_item_sk AND
17 | cs_bill_cdemo_sk = cd1.cd_demo_sk AND
18 | cs_bill_customer_sk = c_customer_sk AND
19 | cd1.cd_gender = 'F' AND
20 | cd1.cd_education_status = 'Unknown' AND
21 | c_current_cdemo_sk = cd2.cd_demo_sk AND
22 | c_current_addr_sk = ca_address_sk AND
23 | c_birth_month IN (1, 6, 8, 9, 12, 2) AND
24 | d_year = 1998 AND
25 | ca_state IN ('MS', 'IN', 'ND', 'OK', 'NM', 'VA', 'MS')
26 | GROUP BY ROLLUP (i_item_id, ca_country, ca_state, ca_county)
27 | ORDER BY ca_country, ca_state, ca_county, i_item_id
28 | LIMIT 100
29 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q89.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM (
3 | SELECT
4 | i_category,
5 | i_class,
6 | i_brand,
7 | s_store_name,
8 | s_company_name,
9 | d_moy,
10 | sum(ss_sales_price) sum_sales,
11 | avg(sum(ss_sales_price))
12 | OVER
13 | (PARTITION BY i_category, i_brand, s_store_name, s_company_name)
14 | avg_monthly_sales
15 | FROM item, store_sales, date_dim, store
16 | WHERE ss_item_sk = i_item_sk AND
17 | ss_sold_date_sk = d_date_sk AND
18 | ss_store_sk = s_store_sk AND
19 | d_year IN (1999) AND
20 | ((i_category IN ('Books', 'Electronics', 'Sports') AND
21 | i_class IN ('computers', 'stereo', 'football'))
22 | OR (i_category IN ('Men', 'Jewelry', 'Women') AND
23 | i_class IN ('shirts', 'birdal', 'dresses')))
24 | GROUP BY i_category, i_class, i_brand,
25 | s_store_name, s_company_name, d_moy) tmp1
26 | WHERE CASE WHEN (avg_monthly_sales <> 0)
27 | THEN (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales)
28 | ELSE NULL END > 0.1
29 | ORDER BY sum_sales - avg_monthly_sales, s_store_name
30 | LIMIT 100
31 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q70.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | sum(ss_net_profit) AS total_sum,
3 | s_state,
4 | s_county,
5 | grouping(s_state) + grouping(s_county) AS lochierarchy,
6 | rank()
7 | OVER (
8 | PARTITION BY grouping(s_state) + grouping(s_county),
9 | CASE WHEN grouping(s_county) = 0
10 | THEN s_state END
11 | ORDER BY sum(ss_net_profit) DESC) AS rank_within_parent
12 | FROM
13 | store_sales, date_dim d1, store
14 | WHERE
15 | d1.d_month_seq BETWEEN 1200 AND 1200 + 11
16 | AND d1.d_date_sk = ss_sold_date_sk
17 | AND s_store_sk = ss_store_sk
18 | AND s_state IN
19 | (SELECT s_state
20 | FROM
21 | (SELECT
22 | s_state AS s_state,
23 | rank()
24 | OVER (PARTITION BY s_state
25 | ORDER BY sum(ss_net_profit) DESC) AS ranking
26 | FROM store_sales, store, date_dim
27 | WHERE d_month_seq BETWEEN 1200 AND 1200 + 11
28 | AND d_date_sk = ss_sold_date_sk
29 | AND s_store_sk = ss_store_sk
30 | GROUP BY s_state) tmp1
31 | WHERE ranking <= 5)
32 | GROUP BY ROLLUP (s_state, s_county)
33 | ORDER BY
34 | lochierarchy DESC
35 | , CASE WHEN lochierarchy = 0
36 | THEN s_state END
37 | , rank_within_parent
38 | LIMIT 100
39 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkAuditHandler.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import org.apache.ranger.plugin.audit.RangerDefaultAuditHandler
21 |
22 | class RangerSparkAuditHandler extends RangerDefaultAuditHandler {
23 |
24 | // TODO(Kent Yao): Implementing meaningfully audit functions
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q73.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_last_name,
3 | c_first_name,
4 | c_salutation,
5 | c_preferred_cust_flag,
6 | ss_ticket_number,
7 | cnt
8 | FROM
9 | (SELECT
10 | ss_ticket_number,
11 | ss_customer_sk,
12 | count(*) cnt
13 | FROM store_sales, date_dim, store, household_demographics
14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
15 | AND store_sales.ss_store_sk = store.s_store_sk
16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
17 | AND date_dim.d_dom BETWEEN 1 AND 2
18 | AND (household_demographics.hd_buy_potential = '>10000' OR
19 | household_demographics.hd_buy_potential = 'unknown')
20 | AND household_demographics.hd_vehicle_count > 0
21 | AND CASE WHEN household_demographics.hd_vehicle_count > 0
22 | THEN
23 | household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
24 | ELSE NULL END > 1
25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2)
26 | AND store.s_county IN ('Williamson County', 'Franklin Parish', 'Bronx County', 'Orange County')
27 | GROUP BY ss_ticket_number, ss_customer_sk) dj, customer
28 | WHERE ss_customer_sk = c_customer_sk
29 | AND cnt BETWEEN 1 AND 5
30 | ORDER BY cnt DESC
31 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RangerSparkMasking.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.plans.logical
19 |
20 | import org.apache.spark.sql.catalyst.expressions.Attribute
21 |
22 | /**
23 | * A marker [[LogicalPlan]] for column data masking
24 | */
25 | case class RangerSparkMasking(child: LogicalPlan) extends UnaryNode {
26 | override def output: Seq[Attribute] = child.output
27 | }
28 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q30.sql:
--------------------------------------------------------------------------------
1 | WITH customer_total_return AS
2 | (SELECT
3 | wr_returning_customer_sk AS ctr_customer_sk,
4 | ca_state AS ctr_state,
5 | sum(wr_return_amt) AS ctr_total_return
6 | FROM web_returns, date_dim, customer_address
7 | WHERE wr_returned_date_sk = d_date_sk
8 | AND d_year = 2002
9 | AND wr_returning_addr_sk = ca_address_sk
10 | GROUP BY wr_returning_customer_sk, ca_state)
11 | SELECT
12 | c_customer_id,
13 | c_salutation,
14 | c_first_name,
15 | c_last_name,
16 | c_preferred_cust_flag,
17 | c_birth_day,
18 | c_birth_month,
19 | c_birth_year,
20 | c_birth_country,
21 | c_login,
22 | c_email_address,
23 | c_last_review_date,
24 | ctr_total_return
25 | FROM customer_total_return ctr1, customer_address, customer
26 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2
27 | FROM customer_total_return ctr2
28 | WHERE ctr1.ctr_state = ctr2.ctr_state)
29 | AND ca_address_sk = c_current_addr_sk
30 | AND ca_state = 'GA'
31 | AND ctr1.ctr_customer_sk = c_customer_sk
32 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag
33 | , c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address
34 | , c_last_review_date, ctr_total_return
35 | LIMIT 100
36 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q61.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | promotions,
3 | total,
4 | cast(promotions AS DECIMAL(15, 4)) / cast(total AS DECIMAL(15, 4)) * 100
5 | FROM
6 | (SELECT sum(ss_ext_sales_price) promotions
7 | FROM store_sales, store, promotion, date_dim, customer, customer_address, item
8 | WHERE ss_sold_date_sk = d_date_sk
9 | AND ss_store_sk = s_store_sk
10 | AND ss_promo_sk = p_promo_sk
11 | AND ss_customer_sk = c_customer_sk
12 | AND ca_address_sk = c_current_addr_sk
13 | AND ss_item_sk = i_item_sk
14 | AND ca_gmt_offset = -5
15 | AND i_category = 'Jewelry'
16 | AND (p_channel_dmail = 'Y' OR p_channel_email = 'Y' OR p_channel_tv = 'Y')
17 | AND s_gmt_offset = -5
18 | AND d_year = 1998
19 | AND d_moy = 11) promotional_sales,
20 | (SELECT sum(ss_ext_sales_price) total
21 | FROM store_sales, store, date_dim, customer, customer_address, item
22 | WHERE ss_sold_date_sk = d_date_sk
23 | AND ss_store_sk = s_store_sk
24 | AND ss_customer_sk = c_customer_sk
25 | AND ca_address_sk = c_current_addr_sk
26 | AND ss_item_sk = i_item_sk
27 | AND ca_gmt_offset = -5
28 | AND i_category = 'Jewelry'
29 | AND s_gmt_offset = -5
30 | AND d_year = 1998
31 | AND d_moy = 11) all_sales
32 | ORDER BY promotions, total
33 | LIMIT 100
34 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q62.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | substr(w_warehouse_name, 1, 20),
3 | sm_type,
4 | web_name,
5 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk <= 30)
6 | THEN 1
7 | ELSE 0 END) AS `30 days `,
8 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 30) AND
9 | (ws_ship_date_sk - ws_sold_date_sk <= 60)
10 | THEN 1
11 | ELSE 0 END) AS `31 - 60 days `,
12 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 60) AND
13 | (ws_ship_date_sk - ws_sold_date_sk <= 90)
14 | THEN 1
15 | ELSE 0 END) AS `61 - 90 days `,
16 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 90) AND
17 | (ws_ship_date_sk - ws_sold_date_sk <= 120)
18 | THEN 1
19 | ELSE 0 END) AS `91 - 120 days `,
20 | sum(CASE WHEN (ws_ship_date_sk - ws_sold_date_sk > 120)
21 | THEN 1
22 | ELSE 0 END) AS `>120 days `
23 | FROM
24 | web_sales, warehouse, ship_mode, web_site, date_dim
25 | WHERE
26 | d_month_seq BETWEEN 1200 AND 1200 + 11
27 | AND ws_ship_date_sk = d_date_sk
28 | AND ws_warehouse_sk = w_warehouse_sk
29 | AND ws_ship_mode_sk = sm_ship_mode_sk
30 | AND ws_web_site_sk = web_site_sk
31 | GROUP BY
32 | substr(w_warehouse_name, 1, 20), sm_type, web_name
33 | ORDER BY
34 | substr(w_warehouse_name, 1, 20), sm_type, web_name
35 | LIMIT 100
36 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q99.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | substr(w_warehouse_name, 1, 20),
3 | sm_type,
4 | cc_name,
5 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk <= 30)
6 | THEN 1
7 | ELSE 0 END) AS `30 days `,
8 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 30) AND
9 | (cs_ship_date_sk - cs_sold_date_sk <= 60)
10 | THEN 1
11 | ELSE 0 END) AS `31 - 60 days `,
12 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 60) AND
13 | (cs_ship_date_sk - cs_sold_date_sk <= 90)
14 | THEN 1
15 | ELSE 0 END) AS `61 - 90 days `,
16 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 90) AND
17 | (cs_ship_date_sk - cs_sold_date_sk <= 120)
18 | THEN 1
19 | ELSE 0 END) AS `91 - 120 days `,
20 | sum(CASE WHEN (cs_ship_date_sk - cs_sold_date_sk > 120)
21 | THEN 1
22 | ELSE 0 END) AS `>120 days `
23 | FROM
24 | catalog_sales, warehouse, ship_mode, call_center, date_dim
25 | WHERE
26 | d_month_seq BETWEEN 1200 AND 1200 + 11
27 | AND cs_ship_date_sk = d_date_sk
28 | AND cs_warehouse_sk = w_warehouse_sk
29 | AND cs_ship_mode_sk = sm_ship_mode_sk
30 | AND cs_call_center_sk = cc_call_center_sk
31 | GROUP BY
32 | substr(w_warehouse_name, 1, 20), sm_type, cc_name
33 | ORDER BY substr(w_warehouse_name, 1, 20), sm_type, cc_name
34 | LIMIT 100
35 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q46.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_last_name,
3 | c_first_name,
4 | ca_city,
5 | bought_city,
6 | ss_ticket_number,
7 | amt,
8 | profit
9 | FROM
10 | (SELECT
11 | ss_ticket_number,
12 | ss_customer_sk,
13 | ca_city bought_city,
14 | sum(ss_coupon_amt) amt,
15 | sum(ss_net_profit) profit
16 | FROM store_sales, date_dim, store, household_demographics, customer_address
17 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
18 | AND store_sales.ss_store_sk = store.s_store_sk
19 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
20 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk
21 | AND (household_demographics.hd_dep_count = 4 OR
22 | household_demographics.hd_vehicle_count = 3)
23 | AND date_dim.d_dow IN (6, 0)
24 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2)
25 | AND store.s_city IN ('Fairview', 'Midway', 'Fairview', 'Fairview', 'Fairview')
26 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn, customer,
27 | customer_address current_addr
28 | WHERE ss_customer_sk = c_customer_sk
29 | AND customer.c_current_addr_sk = current_addr.ca_address_sk
30 | AND current_addr.ca_city <> bought_city
31 | ORDER BY c_last_name, c_first_name, ca_city, bought_city, ss_ticket_number
32 | LIMIT 100
33 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q68.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_last_name,
3 | c_first_name,
4 | ca_city,
5 | bought_city,
6 | ss_ticket_number,
7 | extended_price,
8 | extended_tax,
9 | list_price
10 | FROM (SELECT
11 | ss_ticket_number,
12 | ss_customer_sk,
13 | ca_city bought_city,
14 | sum(ss_ext_sales_price) extended_price,
15 | sum(ss_ext_list_price) list_price,
16 | sum(ss_ext_tax) extended_tax
17 | FROM store_sales, date_dim, store, household_demographics, customer_address
18 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
19 | AND store_sales.ss_store_sk = store.s_store_sk
20 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
21 | AND store_sales.ss_addr_sk = customer_address.ca_address_sk
22 | AND date_dim.d_dom BETWEEN 1 AND 2
23 | AND (household_demographics.hd_dep_count = 4 OR
24 | household_demographics.hd_vehicle_count = 3)
25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2)
26 | AND store.s_city IN ('Midway', 'Fairview')
27 | GROUP BY ss_ticket_number, ss_customer_sk, ss_addr_sk, ca_city) dn,
28 | customer,
29 | customer_address current_addr
30 | WHERE ss_customer_sk = c_customer_sk
31 | AND customer.c_current_addr_sk = current_addr.ca_address_sk
32 | AND current_addr.ca_city <> bought_city
33 | ORDER BY c_last_name, ss_ticket_number
34 | LIMIT 100
35 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q39a.sql:
--------------------------------------------------------------------------------
1 | WITH inv AS
2 | (SELECT
3 | w_warehouse_name,
4 | w_warehouse_sk,
5 | i_item_sk,
6 | d_moy,
7 | stdev,
8 | mean,
9 | CASE mean
10 | WHEN 0
11 | THEN NULL
12 | ELSE stdev / mean END cov
13 | FROM (SELECT
14 | w_warehouse_name,
15 | w_warehouse_sk,
16 | i_item_sk,
17 | d_moy,
18 | stddev_samp(inv_quantity_on_hand) stdev,
19 | avg(inv_quantity_on_hand) mean
20 | FROM inventory, item, warehouse, date_dim
21 | WHERE inv_item_sk = i_item_sk
22 | AND inv_warehouse_sk = w_warehouse_sk
23 | AND inv_date_sk = d_date_sk
24 | AND d_year = 2001
25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo
26 | WHERE CASE mean
27 | WHEN 0
28 | THEN 0
29 | ELSE stdev / mean END > 1)
30 | SELECT
31 | inv1.w_warehouse_sk,
32 | inv1.i_item_sk,
33 | inv1.d_moy,
34 | inv1.mean,
35 | inv1.cov,
36 | inv2.w_warehouse_sk,
37 | inv2.i_item_sk,
38 | inv2.d_moy,
39 | inv2.mean,
40 | inv2.cov
41 | FROM inv inv1, inv inv2
42 | WHERE inv1.i_item_sk = inv2.i_item_sk
43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk
44 | AND inv1.d_moy = 1
45 | AND inv2.d_moy = 1 + 1
46 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov
47 | , inv2.d_moy, inv2.mean, inv2.cov
48 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q44.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | asceding.rnk,
3 | i1.i_product_name best_performing,
4 | i2.i_product_name worst_performing
5 | FROM (SELECT *
6 | FROM (SELECT
7 | item_sk,
8 | rank()
9 | OVER (
10 | ORDER BY rank_col ASC) rnk
11 | FROM (SELECT
12 | ss_item_sk item_sk,
13 | avg(ss_net_profit) rank_col
14 | FROM store_sales ss1
15 | WHERE ss_store_sk = 4
16 | GROUP BY ss_item_sk
17 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col
18 | FROM store_sales
19 | WHERE ss_store_sk = 4
20 | AND ss_addr_sk IS NULL
21 | GROUP BY ss_store_sk)) V1) V11
22 | WHERE rnk < 11) asceding,
23 | (SELECT *
24 | FROM (SELECT
25 | item_sk,
26 | rank()
27 | OVER (
28 | ORDER BY rank_col DESC) rnk
29 | FROM (SELECT
30 | ss_item_sk item_sk,
31 | avg(ss_net_profit) rank_col
32 | FROM store_sales ss1
33 | WHERE ss_store_sk = 4
34 | GROUP BY ss_item_sk
35 | HAVING avg(ss_net_profit) > 0.9 * (SELECT avg(ss_net_profit) rank_col
36 | FROM store_sales
37 | WHERE ss_store_sk = 4
38 | AND ss_addr_sk IS NULL
39 | GROUP BY ss_store_sk)) V2) V21
40 | WHERE rnk < 11) descending,
41 | item i1, item i2
42 | WHERE asceding.rnk = descending.rnk
43 | AND i1.i_item_sk = asceding.item_sk
44 | AND i2.i_item_sk = descending.item_sk
45 | ORDER BY asceding.rnk
46 | LIMIT 100
47 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q63.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM (SELECT
3 | i_manager_id,
4 | sum(ss_sales_price) sum_sales,
5 | avg(sum(ss_sales_price))
6 | OVER (PARTITION BY i_manager_id) avg_monthly_sales
7 | FROM item
8 | , store_sales
9 | , date_dim
10 | , store
11 | WHERE ss_item_sk = i_item_sk
12 | AND ss_sold_date_sk = d_date_sk
13 | AND ss_store_sk = s_store_sk
14 | AND d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6, 1200 + 7,
15 | 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11)
16 | AND ((i_category IN ('Books', 'Children', 'Electronics')
17 | AND i_class IN ('personal', 'portable', 'refernece', 'self-help')
18 | AND i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7',
19 | 'exportiunivamalg #9', 'scholaramalgamalg #9'))
20 | OR (i_category IN ('Women', 'Music', 'Men')
21 | AND i_class IN ('accessories', 'classical', 'fragrances', 'pants')
22 | AND i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1',
23 | 'importoamalg #1')))
24 | GROUP BY i_manager_id, d_moy) tmp1
25 | WHERE CASE WHEN avg_monthly_sales > 0
26 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales
27 | ELSE NULL END > 0.1
28 | ORDER BY i_manager_id
29 | , avg_monthly_sales
30 | , sum_sales
31 | LIMIT 100
32 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q39b.sql:
--------------------------------------------------------------------------------
1 | WITH inv AS
2 | (SELECT
3 | w_warehouse_name,
4 | w_warehouse_sk,
5 | i_item_sk,
6 | d_moy,
7 | stdev,
8 | mean,
9 | CASE mean
10 | WHEN 0
11 | THEN NULL
12 | ELSE stdev / mean END cov
13 | FROM (SELECT
14 | w_warehouse_name,
15 | w_warehouse_sk,
16 | i_item_sk,
17 | d_moy,
18 | stddev_samp(inv_quantity_on_hand) stdev,
19 | avg(inv_quantity_on_hand) mean
20 | FROM inventory, item, warehouse, date_dim
21 | WHERE inv_item_sk = i_item_sk
22 | AND inv_warehouse_sk = w_warehouse_sk
23 | AND inv_date_sk = d_date_sk
24 | AND d_year = 2001
25 | GROUP BY w_warehouse_name, w_warehouse_sk, i_item_sk, d_moy) foo
26 | WHERE CASE mean
27 | WHEN 0
28 | THEN 0
29 | ELSE stdev / mean END > 1)
30 | SELECT
31 | inv1.w_warehouse_sk,
32 | inv1.i_item_sk,
33 | inv1.d_moy,
34 | inv1.mean,
35 | inv1.cov,
36 | inv2.w_warehouse_sk,
37 | inv2.i_item_sk,
38 | inv2.d_moy,
39 | inv2.mean,
40 | inv2.cov
41 | FROM inv inv1, inv inv2
42 | WHERE inv1.i_item_sk = inv2.i_item_sk
43 | AND inv1.w_warehouse_sk = inv2.w_warehouse_sk
44 | AND inv1.d_moy = 1
45 | AND inv2.d_moy = 1 + 1
46 | AND inv1.cov > 1.5
47 | ORDER BY inv1.w_warehouse_sk, inv1.i_item_sk, inv1.d_moy, inv1.mean, inv1.cov
48 | , inv2.d_moy, inv2.mean, inv2.cov
49 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/RangerSparkRowFilter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.plans.logical
19 | import org.apache.spark.sql.catalyst.expressions.Attribute
20 |
21 | /**
22 | * A wrapper for a transformed plan with row level filter applied, which will be removed during
23 | * LogicalPlan -> PhysicalPlan
24 | *
25 | */
26 | case class RangerSparkRowFilter(child: LogicalPlan) extends UnaryNode {
27 | override def output: Seq[Attribute] = child.output
28 | }
29 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q81.sql:
--------------------------------------------------------------------------------
1 | WITH customer_total_return AS
2 | (SELECT
3 | cr_returning_customer_sk AS ctr_customer_sk,
4 | ca_state AS ctr_state,
5 | sum(cr_return_amt_inc_tax) AS ctr_total_return
6 | FROM catalog_returns, date_dim, customer_address
7 | WHERE cr_returned_date_sk = d_date_sk
8 | AND d_year = 2000
9 | AND cr_returning_addr_sk = ca_address_sk
10 | GROUP BY cr_returning_customer_sk, ca_state )
11 | SELECT
12 | c_customer_id,
13 | c_salutation,
14 | c_first_name,
15 | c_last_name,
16 | ca_street_number,
17 | ca_street_name,
18 | ca_street_type,
19 | ca_suite_number,
20 | ca_city,
21 | ca_county,
22 | ca_state,
23 | ca_zip,
24 | ca_country,
25 | ca_gmt_offset,
26 | ca_location_type,
27 | ctr_total_return
28 | FROM customer_total_return ctr1, customer_address, customer
29 | WHERE ctr1.ctr_total_return > (SELECT avg(ctr_total_return) * 1.2
30 | FROM customer_total_return ctr2
31 | WHERE ctr1.ctr_state = ctr2.ctr_state)
32 | AND ca_address_sk = c_current_addr_sk
33 | AND ca_state = 'GA'
34 | AND ctr1.ctr_customer_sk = c_customer_sk
35 | ORDER BY c_customer_id, c_salutation, c_first_name, c_last_name, ca_street_number, ca_street_name
36 | , ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset
37 | , ca_location_type, ctr_total_return
38 | LIMIT 100
39 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q71.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_brand_id brand_id,
3 | i_brand brand,
4 | t_hour,
5 | t_minute,
6 | sum(ext_price) ext_price
7 | FROM item,
8 | (SELECT
9 | ws_ext_sales_price AS ext_price,
10 | ws_sold_date_sk AS sold_date_sk,
11 | ws_item_sk AS sold_item_sk,
12 | ws_sold_time_sk AS time_sk
13 | FROM web_sales, date_dim
14 | WHERE d_date_sk = ws_sold_date_sk
15 | AND d_moy = 11
16 | AND d_year = 1999
17 | UNION ALL
18 | SELECT
19 | cs_ext_sales_price AS ext_price,
20 | cs_sold_date_sk AS sold_date_sk,
21 | cs_item_sk AS sold_item_sk,
22 | cs_sold_time_sk AS time_sk
23 | FROM catalog_sales, date_dim
24 | WHERE d_date_sk = cs_sold_date_sk
25 | AND d_moy = 11
26 | AND d_year = 1999
27 | UNION ALL
28 | SELECT
29 | ss_ext_sales_price AS ext_price,
30 | ss_sold_date_sk AS sold_date_sk,
31 | ss_item_sk AS sold_item_sk,
32 | ss_sold_time_sk AS time_sk
33 | FROM store_sales, date_dim
34 | WHERE d_date_sk = ss_sold_date_sk
35 | AND d_moy = 11
36 | AND d_year = 1999
37 | ) AS tmp, time_dim
38 | WHERE
39 | sold_item_sk = i_item_sk
40 | AND i_manager_id = 1
41 | AND time_sk = t_time_sk
42 | AND (t_meal_time = 'breakfast' OR t_meal_time = 'dinner')
43 | GROUP BY i_brand, i_brand_id, t_hour, t_minute
44 | ORDER BY ext_price DESC, brand_id
45 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q53.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM
3 | (SELECT
4 | i_manufact_id,
5 | sum(ss_sales_price) sum_sales,
6 | avg(sum(ss_sales_price))
7 | OVER (PARTITION BY i_manufact_id) avg_quarterly_sales
8 | FROM item, store_sales, date_dim, store
9 | WHERE ss_item_sk = i_item_sk AND
10 | ss_sold_date_sk = d_date_sk AND
11 | ss_store_sk = s_store_sk AND
12 | d_month_seq IN (1200, 1200 + 1, 1200 + 2, 1200 + 3, 1200 + 4, 1200 + 5, 1200 + 6,
13 | 1200 + 7, 1200 + 8, 1200 + 9, 1200 + 10, 1200 + 11) AND
14 | ((i_category IN ('Books', 'Children', 'Electronics') AND
15 | i_class IN ('personal', 'portable', 'reference', 'self-help') AND
16 | i_brand IN ('scholaramalgamalg #14', 'scholaramalgamalg #7',
17 | 'exportiunivamalg #9', 'scholaramalgamalg #9'))
18 | OR
19 | (i_category IN ('Women', 'Music', 'Men') AND
20 | i_class IN ('accessories', 'classical', 'fragrances', 'pants') AND
21 | i_brand IN ('amalgimporto #1', 'edu packscholar #1', 'exportiimporto #1',
22 | 'importoamalg #1')))
23 | GROUP BY i_manufact_id, d_qoy) tmp1
24 | WHERE CASE WHEN avg_quarterly_sales > 0
25 | THEN abs(sum_sales - avg_quarterly_sales) / avg_quarterly_sales
26 | ELSE NULL END > 0.1
27 | ORDER BY avg_quarterly_sales,
28 | sum_sales,
29 | i_manufact_id
30 | LIMIT 100
31 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q72.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_desc,
3 | w_warehouse_name,
4 | d1.d_week_seq,
5 | count(CASE WHEN p_promo_sk IS NULL
6 | THEN 1
7 | ELSE 0 END) no_promo,
8 | count(CASE WHEN p_promo_sk IS NOT NULL
9 | THEN 1
10 | ELSE 0 END) promo,
11 | count(*) total_cnt
12 | FROM catalog_sales
13 | JOIN inventory ON (cs_item_sk = inv_item_sk)
14 | JOIN warehouse ON (w_warehouse_sk = inv_warehouse_sk)
15 | JOIN item ON (i_item_sk = cs_item_sk)
16 | JOIN customer_demographics ON (cs_bill_cdemo_sk = cd_demo_sk)
17 | JOIN household_demographics ON (cs_bill_hdemo_sk = hd_demo_sk)
18 | JOIN date_dim d1 ON (cs_sold_date_sk = d1.d_date_sk)
19 | JOIN date_dim d2 ON (inv_date_sk = d2.d_date_sk)
20 | JOIN date_dim d3 ON (cs_ship_date_sk = d3.d_date_sk)
21 | LEFT OUTER JOIN promotion ON (cs_promo_sk = p_promo_sk)
22 | LEFT OUTER JOIN catalog_returns ON (cr_item_sk = cs_item_sk AND cr_order_number = cs_order_number)
23 | WHERE d1.d_week_seq = d2.d_week_seq
24 | AND inv_quantity_on_hand < cs_quantity
25 | AND d3.d_date > (cast(d1.d_date AS DATE) + interval 5 days)
26 | AND hd_buy_potential = '>10000'
27 | AND d1.d_year = 1999
28 | AND hd_buy_potential = '>10000'
29 | AND cd_marital_status = 'D'
30 | AND d1.d_year = 1999
31 | GROUP BY i_item_desc, w_warehouse_name, d1.d_week_seq
32 | ORDER BY total_cnt DESC, i_item_desc, w_warehouse_name, d_week_seq
33 | LIMIT 100
34 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q34.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | c_last_name,
3 | c_first_name,
4 | c_salutation,
5 | c_preferred_cust_flag,
6 | ss_ticket_number,
7 | cnt
8 | FROM
9 | (SELECT
10 | ss_ticket_number,
11 | ss_customer_sk,
12 | count(*) cnt
13 | FROM store_sales, date_dim, store, household_demographics
14 | WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk
15 | AND store_sales.ss_store_sk = store.s_store_sk
16 | AND store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
17 | AND (date_dim.d_dom BETWEEN 1 AND 3 OR date_dim.d_dom BETWEEN 25 AND 28)
18 | AND (household_demographics.hd_buy_potential = '>10000' OR
19 | household_demographics.hd_buy_potential = 'unknown')
20 | AND household_demographics.hd_vehicle_count > 0
21 | AND (CASE WHEN household_demographics.hd_vehicle_count > 0
22 | THEN household_demographics.hd_dep_count / household_demographics.hd_vehicle_count
23 | ELSE NULL
24 | END) > 1.2
25 | AND date_dim.d_year IN (1999, 1999 + 1, 1999 + 2)
26 | AND store.s_county IN
27 | ('Williamson County', 'Williamson County', 'Williamson County', 'Williamson County',
28 | 'Williamson County', 'Williamson County', 'Williamson County', 'Williamson County')
29 | GROUP BY ss_ticket_number, ss_customer_sk) dn, customer
30 | WHERE ss_customer_sk = c_customer_sk
31 | AND cnt BETWEEN 15 AND 20
32 | ORDER BY c_last_name, c_first_name, c_salutation, c_preferred_cust_flag DESC
33 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q76.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | channel,
3 | col_name,
4 | d_year,
5 | d_qoy,
6 | i_category,
7 | COUNT(*) sales_cnt,
8 | SUM(ext_sales_price) sales_amt
9 | FROM (
10 | SELECT
11 | 'store' AS channel,
12 | ss_store_sk col_name,
13 | d_year,
14 | d_qoy,
15 | i_category,
16 | ss_ext_sales_price ext_sales_price
17 | FROM store_sales, item, date_dim
18 | WHERE ss_store_sk IS NULL
19 | AND ss_sold_date_sk = d_date_sk
20 | AND ss_item_sk = i_item_sk
21 | UNION ALL
22 | SELECT
23 | 'web' AS channel,
24 | ws_ship_customer_sk col_name,
25 | d_year,
26 | d_qoy,
27 | i_category,
28 | ws_ext_sales_price ext_sales_price
29 | FROM web_sales, item, date_dim
30 | WHERE ws_ship_customer_sk IS NULL
31 | AND ws_sold_date_sk = d_date_sk
32 | AND ws_item_sk = i_item_sk
33 | UNION ALL
34 | SELECT
35 | 'catalog' AS channel,
36 | cs_ship_addr_sk col_name,
37 | d_year,
38 | d_qoy,
39 | i_category,
40 | cs_ext_sales_price ext_sales_price
41 | FROM catalog_sales, item, date_dim
42 | WHERE cs_ship_addr_sk IS NULL
43 | AND cs_sold_date_sk = d_date_sk
44 | AND cs_item_sk = i_item_sk) foo
45 | GROUP BY channel, col_name, d_year, d_qoy, i_category
46 | ORDER BY channel, col_name, d_year, d_qoy, i_category
47 | LIMIT 100
48 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q69.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cd_gender,
3 | cd_marital_status,
4 | cd_education_status,
5 | count(*) cnt1,
6 | cd_purchase_estimate,
7 | count(*) cnt2,
8 | cd_credit_rating,
9 | count(*) cnt3
10 | FROM
11 | customer c, customer_address ca, customer_demographics
12 | WHERE
13 | c.c_current_addr_sk = ca.ca_address_sk AND
14 | ca_state IN ('KY', 'GA', 'NM') AND
15 | cd_demo_sk = c.c_current_cdemo_sk AND
16 | exists(SELECT *
17 | FROM store_sales, date_dim
18 | WHERE c.c_customer_sk = ss_customer_sk AND
19 | ss_sold_date_sk = d_date_sk AND
20 | d_year = 2001 AND
21 | d_moy BETWEEN 4 AND 4 + 2) AND
22 | (NOT exists(SELECT *
23 | FROM web_sales, date_dim
24 | WHERE c.c_customer_sk = ws_bill_customer_sk AND
25 | ws_sold_date_sk = d_date_sk AND
26 | d_year = 2001 AND
27 | d_moy BETWEEN 4 AND 4 + 2) AND
28 | NOT exists(SELECT *
29 | FROM catalog_sales, date_dim
30 | WHERE c.c_customer_sk = cs_ship_customer_sk AND
31 | cs_sold_date_sk = d_date_sk AND
32 | d_year = 2001 AND
33 | d_moy BETWEEN 4 AND 4 + 2))
34 | GROUP BY cd_gender, cd_marital_status, cd_education_status,
35 | cd_purchase_estimate, cd_credit_rating
36 | ORDER BY cd_gender, cd_marital_status, cd_education_status,
37 | cd_purchase_estimate, cd_credit_rating
38 | LIMIT 100
39 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/RangerSparkPlanOmitStrategy.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution
19 |
20 | import org.apache.spark.sql.{SparkSession, Strategy}
21 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RangerSparkMasking, RangerSparkRowFilter}
22 |
23 | /**
24 | * An Apache Spark's [[Strategy]] extension for omitting marker for row level filtering and data
25 | * masking.
26 | */
27 | case class RangerSparkPlanOmitStrategy(spark: SparkSession) extends Strategy {
28 | override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
29 | case RangerSparkRowFilter(child) => planLater(child) :: Nil
30 | case RangerSparkMasking(child) => planLater(child) :: Nil
31 | case _ => Nil
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q13.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | avg(ss_quantity),
3 | avg(ss_ext_sales_price),
4 | avg(ss_ext_wholesale_cost),
5 | sum(ss_ext_wholesale_cost)
6 | FROM store_sales
7 | , store
8 | , customer_demographics
9 | , household_demographics
10 | , customer_address
11 | , date_dim
12 | WHERE s_store_sk = ss_store_sk
13 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001
14 | AND ((ss_hdemo_sk = hd_demo_sk
15 | AND cd_demo_sk = ss_cdemo_sk
16 | AND cd_marital_status = 'M'
17 | AND cd_education_status = 'Advanced Degree'
18 | AND ss_sales_price BETWEEN 100.00 AND 150.00
19 | AND hd_dep_count = 3
20 | ) OR
21 | (ss_hdemo_sk = hd_demo_sk
22 | AND cd_demo_sk = ss_cdemo_sk
23 | AND cd_marital_status = 'S'
24 | AND cd_education_status = 'College'
25 | AND ss_sales_price BETWEEN 50.00 AND 100.00
26 | AND hd_dep_count = 1
27 | ) OR
28 | (ss_hdemo_sk = hd_demo_sk
29 | AND cd_demo_sk = ss_cdemo_sk
30 | AND cd_marital_status = 'W'
31 | AND cd_education_status = '2 yr Degree'
32 | AND ss_sales_price BETWEEN 150.00 AND 200.00
33 | AND hd_dep_count = 1
34 | ))
35 | AND ((ss_addr_sk = ca_address_sk
36 | AND ca_country = 'United States'
37 | AND ca_state IN ('TX', 'OH', 'TX')
38 | AND ss_net_profit BETWEEN 100 AND 200
39 | ) OR
40 | (ss_addr_sk = ca_address_sk
41 | AND ca_country = 'United States'
42 | AND ca_state IN ('OR', 'NM', 'KY')
43 | AND ss_net_profit BETWEEN 150 AND 300
44 | ) OR
45 | (ss_addr_sk = ca_address_sk
46 | AND ca_country = 'United States'
47 | AND ca_state IN ('VA', 'TX', 'MS')
48 | AND ss_net_profit BETWEEN 50 AND 250
49 | ))
50 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q17.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | i_item_id,
3 | i_item_desc,
4 | s_state,
5 | count(ss_quantity) AS store_sales_quantitycount,
6 | avg(ss_quantity) AS store_sales_quantityave,
7 | stddev_samp(ss_quantity) AS store_sales_quantitystdev,
8 | stddev_samp(ss_quantity) / avg(ss_quantity) AS store_sales_quantitycov,
9 | count(sr_return_quantity) as_store_returns_quantitycount,
10 | avg(sr_return_quantity) as_store_returns_quantityave,
11 | stddev_samp(sr_return_quantity) as_store_returns_quantitystdev,
12 | stddev_samp(sr_return_quantity) / avg(sr_return_quantity) AS store_returns_quantitycov,
13 | count(cs_quantity) AS catalog_sales_quantitycount,
14 | avg(cs_quantity) AS catalog_sales_quantityave,
15 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitystdev,
16 | stddev_samp(cs_quantity) / avg(cs_quantity) AS catalog_sales_quantitycov
17 | FROM store_sales, store_returns, catalog_sales, date_dim d1, date_dim d2, date_dim d3, store, item
18 | WHERE d1.d_quarter_name = '2001Q1'
19 | AND d1.d_date_sk = ss_sold_date_sk
20 | AND i_item_sk = ss_item_sk
21 | AND s_store_sk = ss_store_sk
22 | AND ss_customer_sk = sr_customer_sk
23 | AND ss_item_sk = sr_item_sk
24 | AND ss_ticket_number = sr_ticket_number
25 | AND sr_returned_date_sk = d2.d_date_sk
26 | AND d2.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')
27 | AND sr_customer_sk = cs_bill_customer_sk
28 | AND sr_item_sk = cs_item_sk
29 | AND cs_sold_date_sk = d3.d_date_sk
30 | AND d3.d_quarter_name IN ('2001Q1', '2001Q2', '2001Q3')
31 | GROUP BY i_item_id, i_item_desc, s_state
32 | ORDER BY i_item_id, i_item_desc, s_state
33 | LIMIT 100
34 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q35.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | ca_state,
3 | cd_gender,
4 | cd_marital_status,
5 | count(*) cnt1,
6 | min(cd_dep_count),
7 | max(cd_dep_count),
8 | avg(cd_dep_count),
9 | cd_dep_employed_count,
10 | count(*) cnt2,
11 | min(cd_dep_employed_count),
12 | max(cd_dep_employed_count),
13 | avg(cd_dep_employed_count),
14 | cd_dep_college_count,
15 | count(*) cnt3,
16 | min(cd_dep_college_count),
17 | max(cd_dep_college_count),
18 | avg(cd_dep_college_count)
19 | FROM
20 | customer c, customer_address ca, customer_demographics
21 | WHERE
22 | c.c_current_addr_sk = ca.ca_address_sk AND
23 | cd_demo_sk = c.c_current_cdemo_sk AND
24 | exists(SELECT *
25 | FROM store_sales, date_dim
26 | WHERE c.c_customer_sk = ss_customer_sk AND
27 | ss_sold_date_sk = d_date_sk AND
28 | d_year = 2002 AND
29 | d_qoy < 4) AND
30 | (exists(SELECT *
31 | FROM web_sales, date_dim
32 | WHERE c.c_customer_sk = ws_bill_customer_sk AND
33 | ws_sold_date_sk = d_date_sk AND
34 | d_year = 2002 AND
35 | d_qoy < 4) OR
36 | exists(SELECT *
37 | FROM catalog_sales, date_dim
38 | WHERE c.c_customer_sk = cs_ship_customer_sk AND
39 | cs_sold_date_sk = d_date_sk AND
40 | d_year = 2002 AND
41 | d_qoy < 4))
42 | GROUP BY ca_state, cd_gender, cd_marital_status, cd_dep_count,
43 | cd_dep_employed_count, cd_dep_college_count
44 | ORDER BY ca_state, cd_gender, cd_marital_status, cd_dep_count,
45 | cd_dep_employed_count, cd_dep_college_count
46 | LIMIT 100
47 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkSQLExtension.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import org.apache.spark.sql.SparkSessionExtensions
21 | import org.apache.spark.sql.catalyst.optimizer.{RangerSparkAuthorizerExtension, RangerSparkMaskingExtension, RangerSparkRowFilterExtension}
22 | import org.apache.spark.sql.execution.RangerSparkPlanOmitStrategy
23 |
24 | class RangerSparkSQLExtension extends Extensions {
25 | override def apply(ext: SparkSessionExtensions): Unit = {
26 | ext.injectOptimizerRule(RangerSparkAuthorizerExtension)
27 | ext.injectOptimizerRule(RangerSparkRowFilterExtension)
28 | ext.injectOptimizerRule(RangerSparkMaskingExtension)
29 | ext.injectPlannerStrategy(RangerSparkPlanOmitStrategy)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/AuthzUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql
19 |
20 | import scala.util.{Failure, Success, Try}
21 |
22 | private[sql] object AuthzUtils {
23 |
24 | def getFieldVal(o: Any, name: String): Any = {
25 | Try {
26 | val field = o.getClass.getDeclaredField(name)
27 | field.setAccessible(true)
28 | field.get(o)
29 | } match {
30 | case Success(value) => value
31 | case Failure(exception) => throw exception
32 | }
33 | }
34 |
35 | def setFieldVal(o: Any, name: String, value: Any): Unit = {
36 | Try {
37 | val field = o.getClass.getDeclaredField(name)
38 | field.setAccessible(true)
39 | field.set(o, value.asInstanceOf[AnyRef])
40 | } match {
41 | case Failure(exception) => throw exception
42 | case _ =>
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkOptimizer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
22 | import org.apache.spark.sql.catalyst.rules.RuleExecutor
23 |
24 | /**
25 | * An Optimizer without all `spark.sql.extensions`
26 | */
27 | class RangerSparkOptimizer(spark: SparkSession) extends RuleExecutor[LogicalPlan] {
28 |
29 | override def batches: Seq[Batch] = {
30 | val optimizer = spark.sessionState.optimizer
31 | val extRules = optimizer.extendedOperatorOptimizationRules
32 | optimizer.batches.map { batch =>
33 | val ruleSet = batch.rules.toSet -- extRules
34 | Batch(batch.name, FixedPoint(batch.strategy.maxIterations), ruleSet.toSeq: _*)
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q50.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | s_store_name,
3 | s_company_id,
4 | s_street_number,
5 | s_street_name,
6 | s_street_type,
7 | s_suite_number,
8 | s_city,
9 | s_county,
10 | s_state,
11 | s_zip,
12 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk <= 30)
13 | THEN 1
14 | ELSE 0 END) AS `30 days `,
15 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 30) AND
16 | (sr_returned_date_sk - ss_sold_date_sk <= 60)
17 | THEN 1
18 | ELSE 0 END) AS `31 - 60 days `,
19 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 60) AND
20 | (sr_returned_date_sk - ss_sold_date_sk <= 90)
21 | THEN 1
22 | ELSE 0 END) AS `61 - 90 days `,
23 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 90) AND
24 | (sr_returned_date_sk - ss_sold_date_sk <= 120)
25 | THEN 1
26 | ELSE 0 END) AS `91 - 120 days `,
27 | sum(CASE WHEN (sr_returned_date_sk - ss_sold_date_sk > 120)
28 | THEN 1
29 | ELSE 0 END) AS `>120 days `
30 | FROM
31 | store_sales, store_returns, store, date_dim d1, date_dim d2
32 | WHERE
33 | d2.d_year = 2001
34 | AND d2.d_moy = 8
35 | AND ss_ticket_number = sr_ticket_number
36 | AND ss_item_sk = sr_item_sk
37 | AND ss_sold_date_sk = d1.d_date_sk
38 | AND sr_returned_date_sk = d2.d_date_sk
39 | AND ss_customer_sk = sr_customer_sk
40 | AND ss_store_sk = s_store_sk
41 | GROUP BY
42 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type,
43 | s_suite_number, s_city, s_county, s_state, s_zip
44 | ORDER BY
45 | s_store_name, s_company_id, s_street_number, s_street_name, s_street_type,
46 | s_suite_number, s_city, s_county, s_state, s_zip
47 | LIMIT 100
48 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q54.sql:
--------------------------------------------------------------------------------
1 | WITH my_customers AS (
2 | SELECT DISTINCT
3 | c_customer_sk,
4 | c_current_addr_sk
5 | FROM
6 | (SELECT
7 | cs_sold_date_sk sold_date_sk,
8 | cs_bill_customer_sk customer_sk,
9 | cs_item_sk item_sk
10 | FROM catalog_sales
11 | UNION ALL
12 | SELECT
13 | ws_sold_date_sk sold_date_sk,
14 | ws_bill_customer_sk customer_sk,
15 | ws_item_sk item_sk
16 | FROM web_sales
17 | ) cs_or_ws_sales,
18 | item,
19 | date_dim,
20 | customer
21 | WHERE sold_date_sk = d_date_sk
22 | AND item_sk = i_item_sk
23 | AND i_category = 'Women'
24 | AND i_class = 'maternity'
25 | AND c_customer_sk = cs_or_ws_sales.customer_sk
26 | AND d_moy = 12
27 | AND d_year = 1998
28 | )
29 | , my_revenue AS (
30 | SELECT
31 | c_customer_sk,
32 | sum(ss_ext_sales_price) AS revenue
33 | FROM my_customers,
34 | store_sales,
35 | customer_address,
36 | store,
37 | date_dim
38 | WHERE c_current_addr_sk = ca_address_sk
39 | AND ca_county = s_county
40 | AND ca_state = s_state
41 | AND ss_sold_date_sk = d_date_sk
42 | AND c_customer_sk = ss_customer_sk
43 | AND d_month_seq BETWEEN (SELECT DISTINCT d_month_seq + 1
44 | FROM date_dim
45 | WHERE d_year = 1998 AND d_moy = 12)
46 | AND (SELECT DISTINCT d_month_seq + 3
47 | FROM date_dim
48 | WHERE d_year = 1998 AND d_moy = 12)
49 | GROUP BY c_customer_sk
50 | )
51 | , segments AS
52 | (SELECT cast((revenue / 50) AS INT) AS segment
53 | FROM my_revenue)
54 | SELECT
55 | segment,
56 | count(*) AS num_customers,
57 | segment * 50 AS segment_base
58 | FROM segments
59 | GROUP BY segment
60 | ORDER BY segment, num_customers
61 | LIMIT 100
62 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q48.sql:
--------------------------------------------------------------------------------
1 | SELECT sum(ss_quantity)
2 | FROM store_sales, store, customer_demographics, customer_address, date_dim
3 | WHERE s_store_sk = ss_store_sk
4 | AND ss_sold_date_sk = d_date_sk AND d_year = 2001
5 | AND
6 | (
7 | (
8 | cd_demo_sk = ss_cdemo_sk
9 | AND
10 | cd_marital_status = 'M'
11 | AND
12 | cd_education_status = '4 yr Degree'
13 | AND
14 | ss_sales_price BETWEEN 100.00 AND 150.00
15 | )
16 | OR
17 | (
18 | cd_demo_sk = ss_cdemo_sk
19 | AND
20 | cd_marital_status = 'D'
21 | AND
22 | cd_education_status = '2 yr Degree'
23 | AND
24 | ss_sales_price BETWEEN 50.00 AND 100.00
25 | )
26 | OR
27 | (
28 | cd_demo_sk = ss_cdemo_sk
29 | AND
30 | cd_marital_status = 'S'
31 | AND
32 | cd_education_status = 'College'
33 | AND
34 | ss_sales_price BETWEEN 150.00 AND 200.00
35 | )
36 | )
37 | AND
38 | (
39 | (
40 | ss_addr_sk = ca_address_sk
41 | AND
42 | ca_country = 'United States'
43 | AND
44 | ca_state IN ('CO', 'OH', 'TX')
45 | AND ss_net_profit BETWEEN 0 AND 2000
46 | )
47 | OR
48 | (ss_addr_sk = ca_address_sk
49 | AND
50 | ca_country = 'United States'
51 | AND
52 | ca_state IN ('OR', 'MN', 'KY')
53 | AND ss_net_profit BETWEEN 150 AND 3000
54 | )
55 | OR
56 | (ss_addr_sk = ca_address_sk
57 | AND
58 | ca_country = 'United States'
59 | AND
60 | ca_state IN ('VA', 'CA', 'MS')
61 | AND ss_net_profit BETWEEN 50 AND 25000
62 | )
63 | )
64 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q57.sql:
--------------------------------------------------------------------------------
1 | WITH v1 AS (
2 | SELECT
3 | i_category,
4 | i_brand,
5 | cc_name,
6 | d_year,
7 | d_moy,
8 | sum(cs_sales_price) sum_sales,
9 | avg(sum(cs_sales_price))
10 | OVER
11 | (PARTITION BY i_category, i_brand, cc_name, d_year)
12 | avg_monthly_sales,
13 | rank()
14 | OVER
15 | (PARTITION BY i_category, i_brand, cc_name
16 | ORDER BY d_year, d_moy) rn
17 | FROM item, catalog_sales, date_dim, call_center
18 | WHERE cs_item_sk = i_item_sk AND
19 | cs_sold_date_sk = d_date_sk AND
20 | cc_call_center_sk = cs_call_center_sk AND
21 | (
22 | d_year = 1999 OR
23 | (d_year = 1999 - 1 AND d_moy = 12) OR
24 | (d_year = 1999 + 1 AND d_moy = 1)
25 | )
26 | GROUP BY i_category, i_brand,
27 | cc_name, d_year, d_moy),
28 | v2 AS (
29 | SELECT
30 | v1.i_category,
31 | v1.i_brand,
32 | v1.cc_name,
33 | v1.d_year,
34 | v1.d_moy,
35 | v1.avg_monthly_sales,
36 | v1.sum_sales,
37 | v1_lag.sum_sales psum,
38 | v1_lead.sum_sales nsum
39 | FROM v1, v1 v1_lag, v1 v1_lead
40 | WHERE v1.i_category = v1_lag.i_category AND
41 | v1.i_category = v1_lead.i_category AND
42 | v1.i_brand = v1_lag.i_brand AND
43 | v1.i_brand = v1_lead.i_brand AND
44 | v1.cc_name = v1_lag.cc_name AND
45 | v1.cc_name = v1_lead.cc_name AND
46 | v1.rn = v1_lag.rn + 1 AND
47 | v1.rn = v1_lead.rn - 1)
48 | SELECT *
49 | FROM v2
50 | WHERE d_year = 1999 AND
51 | avg_monthly_sales > 0 AND
52 | CASE WHEN avg_monthly_sales > 0
53 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales
54 | ELSE NULL END > 0.1
55 | ORDER BY sum_sales - avg_monthly_sales, 3
56 | LIMIT 100
57 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q9.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | CASE WHEN (SELECT count(*)
3 | FROM store_sales
4 | WHERE ss_quantity BETWEEN 1 AND 20) > 62316685
5 | THEN (SELECT avg(ss_ext_discount_amt)
6 | FROM store_sales
7 | WHERE ss_quantity BETWEEN 1 AND 20)
8 | ELSE (SELECT avg(ss_net_paid)
9 | FROM store_sales
10 | WHERE ss_quantity BETWEEN 1 AND 20) END bucket1,
11 | CASE WHEN (SELECT count(*)
12 | FROM store_sales
13 | WHERE ss_quantity BETWEEN 21 AND 40) > 19045798
14 | THEN (SELECT avg(ss_ext_discount_amt)
15 | FROM store_sales
16 | WHERE ss_quantity BETWEEN 21 AND 40)
17 | ELSE (SELECT avg(ss_net_paid)
18 | FROM store_sales
19 | WHERE ss_quantity BETWEEN 21 AND 40) END bucket2,
20 | CASE WHEN (SELECT count(*)
21 | FROM store_sales
22 | WHERE ss_quantity BETWEEN 41 AND 60) > 365541424
23 | THEN (SELECT avg(ss_ext_discount_amt)
24 | FROM store_sales
25 | WHERE ss_quantity BETWEEN 41 AND 60)
26 | ELSE (SELECT avg(ss_net_paid)
27 | FROM store_sales
28 | WHERE ss_quantity BETWEEN 41 AND 60) END bucket3,
29 | CASE WHEN (SELECT count(*)
30 | FROM store_sales
31 | WHERE ss_quantity BETWEEN 61 AND 80) > 216357808
32 | THEN (SELECT avg(ss_ext_discount_amt)
33 | FROM store_sales
34 | WHERE ss_quantity BETWEEN 61 AND 80)
35 | ELSE (SELECT avg(ss_net_paid)
36 | FROM store_sales
37 | WHERE ss_quantity BETWEEN 61 AND 80) END bucket4,
38 | CASE WHEN (SELECT count(*)
39 | FROM store_sales
40 | WHERE ss_quantity BETWEEN 81 AND 100) > 184483884
41 | THEN (SELECT avg(ss_ext_discount_amt)
42 | FROM store_sales
43 | WHERE ss_quantity BETWEEN 81 AND 100)
44 | ELSE (SELECT avg(ss_net_paid)
45 | FROM store_sales
46 | WHERE ss_quantity BETWEEN 81 AND 100) END bucket5
47 | FROM reason
48 | WHERE r_reason_sk = 1
49 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/RangerSparkTestUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql
19 |
20 | import java.security.PrivilegedExceptionAction
21 |
22 | import org.apache.hadoop.security.UserGroupInformation
23 | import org.apache.spark.sql.catalyst.optimizer.{RangerSparkMaskingExtension, RangerSparkRowFilterExtension}
24 | import org.apache.spark.sql.execution.RangerSparkPlanOmitStrategy
25 |
26 | object RangerSparkTestUtils {
27 |
28 | def injectRules(spark: SparkSession): Unit = {
29 | spark.extensions.injectOptimizerRule(RangerSparkRowFilterExtension)
30 | spark.extensions.injectOptimizerRule(RangerSparkMaskingExtension)
31 | spark.extensions.injectPlannerStrategy(RangerSparkPlanOmitStrategy)
32 | }
33 |
34 | def withUser[T](user: String)(f: => T): T = {
35 | val ugi = UserGroupInformation.createRemoteUser(user)
36 | ugi.doAs(new PrivilegedExceptionAction[T] {
37 | override def run(): T = f
38 | })
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkOperationType.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | /**
21 | * Subset of HiveOperationTypes supported by Apache Spark.
22 | *
23 | */
24 | object SparkOperationType extends Enumeration {
25 | type SparkOperationType = Value
26 |
27 | val
28 | ALTERDATABASE, ALTERTABLE_ADDCOLS, ALTERTABLE_ADDPARTS, ALTERTABLE_RENAMECOL,
29 | ALTERTABLE_DROPPARTS, MSCK, ALTERTABLE_RENAMEPART, ALTERTABLE_RENAME,
30 | ALTERVIEW_RENAME, ALTERTABLE_PROPERTIES, ALTERTABLE_SERDEPROPERTIES,
31 | ALTERTABLE_LOCATION, QUERY, CREATEDATABASE, CREATETABLE_AS_SELECT, CREATEFUNCTION, CREATETABLE,
32 | CREATEVIEW, DESCTABLE, DESCDATABASE, DESCFUNCTION, DROPDATABASE, DROPTABLE, DROPFUNCTION, LOAD,
33 | SHOWCONF, SWITCHDATABASE, SHOW_CREATETABLE, SHOWCOLUMNS, SHOWDATABASES, SHOWFUNCTIONS,
34 | SHOWPARTITIONS, SHOWTABLES, SHOW_TBLPROPERTIES, TRUNCATETABLE, DROPVIEW, EXPLAIN = Value
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/RangerShowDatabasesCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution
19 |
20 | import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType}
21 | import org.apache.spark.sql.execution.command.{RunnableCommand, ShowDatabasesCommand}
22 | import org.apache.spark.sql.{Row, SparkSession}
23 |
24 | case class RangerShowDatabasesCommand(child: ShowDatabasesCommand) extends RunnableCommand {
25 | override val output = child.output
26 |
27 | override def run(sparkSession: SparkSession): Seq[Row] = {
28 | val rows = child.run(sparkSession)
29 | rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r)))
30 | }
31 |
32 | private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = {
33 | val database = row.getString(0)
34 | new SparkPrivilegeObject(SparkPrivilegeObjectType.DATABASE, database, database)
35 | }
36 |
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q51.sql:
--------------------------------------------------------------------------------
1 | WITH web_v1 AS (
2 | SELECT
3 | ws_item_sk item_sk,
4 | d_date,
5 | sum(sum(ws_sales_price))
6 | OVER (PARTITION BY ws_item_sk
7 | ORDER BY d_date
8 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales
9 | FROM web_sales, date_dim
10 | WHERE ws_sold_date_sk = d_date_sk
11 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
12 | AND ws_item_sk IS NOT NULL
13 | GROUP BY ws_item_sk, d_date),
14 | store_v1 AS (
15 | SELECT
16 | ss_item_sk item_sk,
17 | d_date,
18 | sum(sum(ss_sales_price))
19 | OVER (PARTITION BY ss_item_sk
20 | ORDER BY d_date
21 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) cume_sales
22 | FROM store_sales, date_dim
23 | WHERE ss_sold_date_sk = d_date_sk
24 | AND d_month_seq BETWEEN 1200 AND 1200 + 11
25 | AND ss_item_sk IS NOT NULL
26 | GROUP BY ss_item_sk, d_date)
27 | SELECT *
28 | FROM (SELECT
29 | item_sk,
30 | d_date,
31 | web_sales,
32 | store_sales,
33 | max(web_sales)
34 | OVER (PARTITION BY item_sk
35 | ORDER BY d_date
36 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) web_cumulative,
37 | max(store_sales)
38 | OVER (PARTITION BY item_sk
39 | ORDER BY d_date
40 | ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) store_cumulative
41 | FROM (SELECT
42 | CASE WHEN web.item_sk IS NOT NULL
43 | THEN web.item_sk
44 | ELSE store.item_sk END item_sk,
45 | CASE WHEN web.d_date IS NOT NULL
46 | THEN web.d_date
47 | ELSE store.d_date END d_date,
48 | web.cume_sales web_sales,
49 | store.cume_sales store_sales
50 | FROM web_v1 web FULL OUTER JOIN store_v1 store ON (web.item_sk = store.item_sk
51 | AND web.d_date = store.d_date)
52 | ) x) y
53 | WHERE web_cumulative > store_cumulative
54 | ORDER BY item_sk, d_date
55 | LIMIT 100
56 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - 2.11.8
4 | jdk:
5 | - openjdk8
6 |
7 | cache:
8 | directories:
9 | - $HOME/.m2
10 |
11 | before_deploy:
12 | - mvn clean package -DskipTests=true
13 |
14 | deploy:
15 | - provider: pages
16 | skip_cleanup: true
17 | github_token: $GITHUB_TOKEN
18 | email: yaooqinn@hotmail.com
19 | name: Kent Yao
20 | on:
21 | branch: master
22 | - provider: releases
23 | api_key: $GITHUB_TOKEN
24 | file_glob: true
25 | file: target/spark-ranger-*.jar
26 | skip_cleanup: true
27 | on:
28 | tags: true
29 |
30 | matrix:
31 | include:
32 | - name: spark2.3-ranger-1.0
33 | language: scala
34 | env: $PROFILES="-Pspark-2.3 -Pranger-1.0"
35 | - name: spark2.3-ranger-1.1
36 | language: scala
37 | env: $PROFILES="-Pspark-2.3 -Pranger-1.1"
38 | - name: spark2.3-ranger-1.2
39 | language: scala
40 | env: $PROFILES="-Pspark-2.3 -Pranger-1.2"
41 | - name: spark2.3-ranger-2.0
42 | language: scala
43 | env: $PROFILES="-Pspark-2.3 -Pranger-2.0"
44 | - name: spark2.4-ranger-1.0
45 | language: scala
46 | env: $PROFILES="-Pspark-2.4 -Pranger-1.0"
47 | - name: spark2.4-ranger-1.1
48 | language: scala
49 | env: $PROFILES="-Pspark-2.4 -Pranger-1.1"
50 | - name: spark2.4-ranger-1.2
51 | language: scala
52 | env: $PROFILES="-Pspark-2.4 -Pranger-1.2"
53 | - name: spark2.4-ranger-2.0
54 | language: scala
55 | env: $PROFILES="-Pspark-2.4 -Pranger-2.0"
56 |
57 | script:
58 | - mvn --no-transfer-progress clean install $PROFILES -Dmaven.javadoc.skip=true -B -V
59 |
60 | after_success:
61 | - bash <(curl -s https://codecov.io/bash)
62 | - echo "Travis exited with ${TRAVIS_TEST_RESULT}"
63 |
64 | after_failure:
65 | - echo "Travis exited with ${TRAVIS_TEST_RESULT}"
--------------------------------------------------------------------------------
/src/test/resources/ranger-spark-security.xml:
--------------------------------------------------------------------------------
1 |
2 |
18 |
19 |
20 |
21 |
22 | ranger.plugin.spark.service.name
23 | hive_jenkins
24 |
25 | Name of the Ranger service containing policies for this SampleApp instance
26 |
27 |
28 |
29 |
30 | ranger.plugin.spark.policy.source.impl
31 | org.apache.ranger.services.spark.RangerAdminClientImpl
32 |
33 | Policy source.
34 |
35 |
36 |
37 |
38 | ranger.plugin.spark.policy.cache.dir
39 | target/test-classes
40 |
41 | Directory where Ranger policies are cached after successful retrieval from the source
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q60.sql:
--------------------------------------------------------------------------------
1 | WITH ss AS (
2 | SELECT
3 | i_item_id,
4 | sum(ss_ext_sales_price) total_sales
5 | FROM store_sales, date_dim, customer_address, item
6 | WHERE
7 | i_item_id IN (SELECT i_item_id
8 | FROM item
9 | WHERE i_category IN ('Music'))
10 | AND ss_item_sk = i_item_sk
11 | AND ss_sold_date_sk = d_date_sk
12 | AND d_year = 1998
13 | AND d_moy = 9
14 | AND ss_addr_sk = ca_address_sk
15 | AND ca_gmt_offset = -5
16 | GROUP BY i_item_id),
17 | cs AS (
18 | SELECT
19 | i_item_id,
20 | sum(cs_ext_sales_price) total_sales
21 | FROM catalog_sales, date_dim, customer_address, item
22 | WHERE
23 | i_item_id IN (SELECT i_item_id
24 | FROM item
25 | WHERE i_category IN ('Music'))
26 | AND cs_item_sk = i_item_sk
27 | AND cs_sold_date_sk = d_date_sk
28 | AND d_year = 1998
29 | AND d_moy = 9
30 | AND cs_bill_addr_sk = ca_address_sk
31 | AND ca_gmt_offset = -5
32 | GROUP BY i_item_id),
33 | ws AS (
34 | SELECT
35 | i_item_id,
36 | sum(ws_ext_sales_price) total_sales
37 | FROM web_sales, date_dim, customer_address, item
38 | WHERE
39 | i_item_id IN (SELECT i_item_id
40 | FROM item
41 | WHERE i_category IN ('Music'))
42 | AND ws_item_sk = i_item_sk
43 | AND ws_sold_date_sk = d_date_sk
44 | AND d_year = 1998
45 | AND d_moy = 9
46 | AND ws_bill_addr_sk = ca_address_sk
47 | AND ca_gmt_offset = -5
48 | GROUP BY i_item_id)
49 | SELECT
50 | i_item_id,
51 | sum(total_sales) total_sales
52 | FROM (SELECT *
53 | FROM ss
54 | UNION ALL
55 | SELECT *
56 | FROM cs
57 | UNION ALL
58 | SELECT *
59 | FROM ws) tmp1
60 | GROUP BY i_item_id
61 | ORDER BY i_item_id, total_sales
62 | LIMIT 100
63 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q10.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | cd_gender,
3 | cd_marital_status,
4 | cd_education_status,
5 | count(*) cnt1,
6 | cd_purchase_estimate,
7 | count(*) cnt2,
8 | cd_credit_rating,
9 | count(*) cnt3,
10 | cd_dep_count,
11 | count(*) cnt4,
12 | cd_dep_employed_count,
13 | count(*) cnt5,
14 | cd_dep_college_count,
15 | count(*) cnt6
16 | FROM
17 | customer c, customer_address ca, customer_demographics
18 | WHERE
19 | c.c_current_addr_sk = ca.ca_address_sk AND
20 | ca_county IN ('Rush County', 'Toole County', 'Jefferson County',
21 | 'Dona Ana County', 'La Porte County') AND
22 | cd_demo_sk = c.c_current_cdemo_sk AND
23 | exists(SELECT *
24 | FROM store_sales, date_dim
25 | WHERE c.c_customer_sk = ss_customer_sk AND
26 | ss_sold_date_sk = d_date_sk AND
27 | d_year = 2002 AND
28 | d_moy BETWEEN 1 AND 1 + 3) AND
29 | (exists(SELECT *
30 | FROM web_sales, date_dim
31 | WHERE c.c_customer_sk = ws_bill_customer_sk AND
32 | ws_sold_date_sk = d_date_sk AND
33 | d_year = 2002 AND
34 | d_moy BETWEEN 1 AND 1 + 3) OR
35 | exists(SELECT *
36 | FROM catalog_sales, date_dim
37 | WHERE c.c_customer_sk = cs_ship_customer_sk AND
38 | cs_sold_date_sk = d_date_sk AND
39 | d_year = 2002 AND
40 | d_moy BETWEEN 1 AND 1 + 3))
41 | GROUP BY cd_gender,
42 | cd_marital_status,
43 | cd_education_status,
44 | cd_purchase_estimate,
45 | cd_credit_rating,
46 | cd_dep_count,
47 | cd_dep_employed_count,
48 | cd_dep_college_count
49 | ORDER BY cd_gender,
50 | cd_marital_status,
51 | cd_education_status,
52 | cd_purchase_estimate,
53 | cd_credit_rating,
54 | cd_dep_count,
55 | cd_dep_employed_count,
56 | cd_dep_college_count
57 | LIMIT 100
58 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q23a.sql:
--------------------------------------------------------------------------------
1 | WITH frequent_ss_items AS
2 | (SELECT
3 | substr(i_item_desc, 1, 30) itemdesc,
4 | i_item_sk item_sk,
5 | d_date solddate,
6 | count(*) cnt
7 | FROM store_sales, date_dim, item
8 | WHERE ss_sold_date_sk = d_date_sk
9 | AND ss_item_sk = i_item_sk
10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
12 | HAVING count(*) > 4),
13 | max_store_sales AS
14 | (SELECT max(csales) tpcds_cmax
15 | FROM (SELECT
16 | c_customer_sk,
17 | sum(ss_quantity * ss_sales_price) csales
18 | FROM store_sales, customer, date_dim
19 | WHERE ss_customer_sk = c_customer_sk
20 | AND ss_sold_date_sk = d_date_sk
21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
22 | GROUP BY c_customer_sk) x),
23 | best_ss_customer AS
24 | (SELECT
25 | c_customer_sk,
26 | sum(ss_quantity * ss_sales_price) ssales
27 | FROM store_sales, customer
28 | WHERE ss_customer_sk = c_customer_sk
29 | GROUP BY c_customer_sk
30 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
31 | (SELECT *
32 | FROM max_store_sales))
33 | SELECT sum(sales)
34 | FROM ((SELECT cs_quantity * cs_list_price sales
35 | FROM catalog_sales, date_dim
36 | WHERE d_year = 2000
37 | AND d_moy = 2
38 | AND cs_sold_date_sk = d_date_sk
39 | AND cs_item_sk IN (SELECT item_sk
40 | FROM frequent_ss_items)
41 | AND cs_bill_customer_sk IN (SELECT c_customer_sk
42 | FROM best_ss_customer))
43 | UNION ALL
44 | (SELECT ws_quantity * ws_list_price sales
45 | FROM web_sales, date_dim
46 | WHERE d_year = 2000
47 | AND d_moy = 2
48 | AND ws_sold_date_sk = d_date_sk
49 | AND ws_item_sk IN (SELECT item_sk
50 | FROM frequent_ss_items)
51 | AND ws_bill_customer_sk IN (SELECT c_customer_sk
52 | FROM best_ss_customer))) y
53 | LIMIT 100
54 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/RangerShowTablesCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution
19 |
20 | import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType}
21 | import org.apache.spark.sql.execution.command.{RunnableCommand, ShowTablesCommand}
22 | import org.apache.spark.sql.{Row, SparkSession}
23 | import org.apache.spark.sql.catalyst.expressions.Attribute
24 |
25 | case class RangerShowTablesCommand(child: ShowTablesCommand) extends RunnableCommand {
26 |
27 | override val output: Seq[Attribute] = child.output
28 | override def run(sparkSession: SparkSession): Seq[Row] = {
29 | val rows = child.run(sparkSession)
30 | rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r)))
31 | }
32 |
33 | private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = {
34 | val database = row.getString(0)
35 | val table = row.getString(1)
36 | new SparkPrivilegeObject(SparkPrivilegeObjectType.TABLE_OR_VIEW, database, table)
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q33.sql:
--------------------------------------------------------------------------------
1 | WITH ss AS (
2 | SELECT
3 | i_manufact_id,
4 | sum(ss_ext_sales_price) total_sales
5 | FROM
6 | store_sales, date_dim, customer_address, item
7 | WHERE
8 | i_manufact_id IN (SELECT i_manufact_id
9 | FROM item
10 | WHERE i_category IN ('Electronics'))
11 | AND ss_item_sk = i_item_sk
12 | AND ss_sold_date_sk = d_date_sk
13 | AND d_year = 1998
14 | AND d_moy = 5
15 | AND ss_addr_sk = ca_address_sk
16 | AND ca_gmt_offset = -5
17 | GROUP BY i_manufact_id), cs AS
18 | (SELECT
19 | i_manufact_id,
20 | sum(cs_ext_sales_price) total_sales
21 | FROM catalog_sales, date_dim, customer_address, item
22 | WHERE
23 | i_manufact_id IN (
24 | SELECT i_manufact_id
25 | FROM item
26 | WHERE
27 | i_category IN ('Electronics'))
28 | AND cs_item_sk = i_item_sk
29 | AND cs_sold_date_sk = d_date_sk
30 | AND d_year = 1998
31 | AND d_moy = 5
32 | AND cs_bill_addr_sk = ca_address_sk
33 | AND ca_gmt_offset = -5
34 | GROUP BY i_manufact_id),
35 | ws AS (
36 | SELECT
37 | i_manufact_id,
38 | sum(ws_ext_sales_price) total_sales
39 | FROM
40 | web_sales, date_dim, customer_address, item
41 | WHERE
42 | i_manufact_id IN (SELECT i_manufact_id
43 | FROM item
44 | WHERE i_category IN ('Electronics'))
45 | AND ws_item_sk = i_item_sk
46 | AND ws_sold_date_sk = d_date_sk
47 | AND d_year = 1998
48 | AND d_moy = 5
49 | AND ws_bill_addr_sk = ca_address_sk
50 | AND ca_gmt_offset = -5
51 | GROUP BY i_manufact_id)
52 | SELECT
53 | i_manufact_id,
54 | sum(total_sales) total_sales
55 | FROM (SELECT *
56 | FROM ss
57 | UNION ALL
58 | SELECT *
59 | FROM cs
60 | UNION ALL
61 | SELECT *
62 | FROM ws) tmp1
63 | GROUP BY i_manufact_id
64 | ORDER BY total_sales
65 | LIMIT 100
66 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q56.sql:
--------------------------------------------------------------------------------
1 | WITH ss AS (
2 | SELECT
3 | i_item_id,
4 | sum(ss_ext_sales_price) total_sales
5 | FROM
6 | store_sales, date_dim, customer_address, item
7 | WHERE
8 | i_item_id IN (SELECT i_item_id
9 | FROM item
10 | WHERE i_color IN ('slate', 'blanched', 'burnished'))
11 | AND ss_item_sk = i_item_sk
12 | AND ss_sold_date_sk = d_date_sk
13 | AND d_year = 2001
14 | AND d_moy = 2
15 | AND ss_addr_sk = ca_address_sk
16 | AND ca_gmt_offset = -5
17 | GROUP BY i_item_id),
18 | cs AS (
19 | SELECT
20 | i_item_id,
21 | sum(cs_ext_sales_price) total_sales
22 | FROM
23 | catalog_sales, date_dim, customer_address, item
24 | WHERE
25 | i_item_id IN (SELECT i_item_id
26 | FROM item
27 | WHERE i_color IN ('slate', 'blanched', 'burnished'))
28 | AND cs_item_sk = i_item_sk
29 | AND cs_sold_date_sk = d_date_sk
30 | AND d_year = 2001
31 | AND d_moy = 2
32 | AND cs_bill_addr_sk = ca_address_sk
33 | AND ca_gmt_offset = -5
34 | GROUP BY i_item_id),
35 | ws AS (
36 | SELECT
37 | i_item_id,
38 | sum(ws_ext_sales_price) total_sales
39 | FROM
40 | web_sales, date_dim, customer_address, item
41 | WHERE
42 | i_item_id IN (SELECT i_item_id
43 | FROM item
44 | WHERE i_color IN ('slate', 'blanched', 'burnished'))
45 | AND ws_item_sk = i_item_sk
46 | AND ws_sold_date_sk = d_date_sk
47 | AND d_year = 2001
48 | AND d_moy = 2
49 | AND ws_bill_addr_sk = ca_address_sk
50 | AND ca_gmt_offset = -5
51 | GROUP BY i_item_id)
52 | SELECT
53 | i_item_id,
54 | sum(total_sales) total_sales
55 | FROM (SELECT *
56 | FROM ss
57 | UNION ALL
58 | SELECT *
59 | FROM cs
60 | UNION ALL
61 | SELECT *
62 | FROM ws) tmp1
63 | GROUP BY i_item_id
64 | ORDER BY total_sales
65 | LIMIT 100
66 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q83.sql:
--------------------------------------------------------------------------------
1 | WITH sr_items AS
2 | (SELECT
3 | i_item_id item_id,
4 | sum(sr_return_quantity) sr_item_qty
5 | FROM store_returns, item, date_dim
6 | WHERE sr_item_sk = i_item_sk
7 | AND d_date IN (SELECT d_date
8 | FROM date_dim
9 | WHERE d_week_seq IN
10 | (SELECT d_week_seq
11 | FROM date_dim
12 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17')))
13 | AND sr_returned_date_sk = d_date_sk
14 | GROUP BY i_item_id),
15 | cr_items AS
16 | (SELECT
17 | i_item_id item_id,
18 | sum(cr_return_quantity) cr_item_qty
19 | FROM catalog_returns, item, date_dim
20 | WHERE cr_item_sk = i_item_sk
21 | AND d_date IN (SELECT d_date
22 | FROM date_dim
23 | WHERE d_week_seq IN
24 | (SELECT d_week_seq
25 | FROM date_dim
26 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17')))
27 | AND cr_returned_date_sk = d_date_sk
28 | GROUP BY i_item_id),
29 | wr_items AS
30 | (SELECT
31 | i_item_id item_id,
32 | sum(wr_return_quantity) wr_item_qty
33 | FROM web_returns, item, date_dim
34 | WHERE wr_item_sk = i_item_sk AND d_date IN
35 | (SELECT d_date
36 | FROM date_dim
37 | WHERE d_week_seq IN
38 | (SELECT d_week_seq
39 | FROM date_dim
40 | WHERE d_date IN ('2000-06-30', '2000-09-27', '2000-11-17')))
41 | AND wr_returned_date_sk = d_date_sk
42 | GROUP BY i_item_id)
43 | SELECT
44 | sr_items.item_id,
45 | sr_item_qty,
46 | sr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 sr_dev,
47 | cr_item_qty,
48 | cr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 cr_dev,
49 | wr_item_qty,
50 | wr_item_qty / (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 * 100 wr_dev,
51 | (sr_item_qty + cr_item_qty + wr_item_qty) / 3.0 average
52 | FROM sr_items, cr_items, wr_items
53 | WHERE sr_items.item_id = cr_items.item_id
54 | AND sr_items.item_id = wr_items.item_id
55 | ORDER BY sr_items.item_id, sr_item_qty
56 | LIMIT 100
57 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q31.sql:
--------------------------------------------------------------------------------
1 | WITH ss AS
2 | (SELECT
3 | ca_county,
4 | d_qoy,
5 | d_year,
6 | sum(ss_ext_sales_price) AS store_sales
7 | FROM store_sales, date_dim, customer_address
8 | WHERE ss_sold_date_sk = d_date_sk
9 | AND ss_addr_sk = ca_address_sk
10 | GROUP BY ca_county, d_qoy, d_year),
11 | ws AS
12 | (SELECT
13 | ca_county,
14 | d_qoy,
15 | d_year,
16 | sum(ws_ext_sales_price) AS web_sales
17 | FROM web_sales, date_dim, customer_address
18 | WHERE ws_sold_date_sk = d_date_sk
19 | AND ws_bill_addr_sk = ca_address_sk
20 | GROUP BY ca_county, d_qoy, d_year)
21 | SELECT
22 | ss1.ca_county,
23 | ss1.d_year,
24 | ws2.web_sales / ws1.web_sales web_q1_q2_increase,
25 | ss2.store_sales / ss1.store_sales store_q1_q2_increase,
26 | ws3.web_sales / ws2.web_sales web_q2_q3_increase,
27 | ss3.store_sales / ss2.store_sales store_q2_q3_increase
28 | FROM
29 | ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3
30 | WHERE
31 | ss1.d_qoy = 1
32 | AND ss1.d_year = 2000
33 | AND ss1.ca_county = ss2.ca_county
34 | AND ss2.d_qoy = 2
35 | AND ss2.d_year = 2000
36 | AND ss2.ca_county = ss3.ca_county
37 | AND ss3.d_qoy = 3
38 | AND ss3.d_year = 2000
39 | AND ss1.ca_county = ws1.ca_county
40 | AND ws1.d_qoy = 1
41 | AND ws1.d_year = 2000
42 | AND ws1.ca_county = ws2.ca_county
43 | AND ws2.d_qoy = 2
44 | AND ws2.d_year = 2000
45 | AND ws1.ca_county = ws3.ca_county
46 | AND ws3.d_qoy = 3
47 | AND ws3.d_year = 2000
48 | AND CASE WHEN ws1.web_sales > 0
49 | THEN ws2.web_sales / ws1.web_sales
50 | ELSE NULL END
51 | > CASE WHEN ss1.store_sales > 0
52 | THEN ss2.store_sales / ss1.store_sales
53 | ELSE NULL END
54 | AND CASE WHEN ws2.web_sales > 0
55 | THEN ws3.web_sales / ws2.web_sales
56 | ELSE NULL END
57 | > CASE WHEN ss2.store_sales > 0
58 | THEN ss3.store_sales / ss2.store_sales
59 | ELSE NULL END
60 | ORDER BY ss1.ca_county
61 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q41.sql:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT (i_product_name)
2 | FROM item i1
3 | WHERE i_manufact_id BETWEEN 738 AND 738 + 40
4 | AND (SELECT count(*) AS item_cnt
5 | FROM item
6 | WHERE (i_manufact = i1.i_manufact AND
7 | ((i_category = 'Women' AND
8 | (i_color = 'powder' OR i_color = 'khaki') AND
9 | (i_units = 'Ounce' OR i_units = 'Oz') AND
10 | (i_size = 'medium' OR i_size = 'extra large')
11 | ) OR
12 | (i_category = 'Women' AND
13 | (i_color = 'brown' OR i_color = 'honeydew') AND
14 | (i_units = 'Bunch' OR i_units = 'Ton') AND
15 | (i_size = 'N/A' OR i_size = 'small')
16 | ) OR
17 | (i_category = 'Men' AND
18 | (i_color = 'floral' OR i_color = 'deep') AND
19 | (i_units = 'N/A' OR i_units = 'Dozen') AND
20 | (i_size = 'petite' OR i_size = 'large')
21 | ) OR
22 | (i_category = 'Men' AND
23 | (i_color = 'light' OR i_color = 'cornflower') AND
24 | (i_units = 'Box' OR i_units = 'Pound') AND
25 | (i_size = 'medium' OR i_size = 'extra large')
26 | ))) OR
27 | (i_manufact = i1.i_manufact AND
28 | ((i_category = 'Women' AND
29 | (i_color = 'midnight' OR i_color = 'snow') AND
30 | (i_units = 'Pallet' OR i_units = 'Gross') AND
31 | (i_size = 'medium' OR i_size = 'extra large')
32 | ) OR
33 | (i_category = 'Women' AND
34 | (i_color = 'cyan' OR i_color = 'papaya') AND
35 | (i_units = 'Cup' OR i_units = 'Dram') AND
36 | (i_size = 'N/A' OR i_size = 'small')
37 | ) OR
38 | (i_category = 'Men' AND
39 | (i_color = 'orange' OR i_color = 'frosted') AND
40 | (i_units = 'Each' OR i_units = 'Tbl') AND
41 | (i_size = 'petite' OR i_size = 'large')
42 | ) OR
43 | (i_category = 'Men' AND
44 | (i_color = 'forest' OR i_color = 'ghost') AND
45 | (i_units = 'Lb' OR i_units = 'Bundle') AND
46 | (i_size = 'medium' OR i_size = 'extra large')
47 | )))) > 0
48 | ORDER BY i_product_name
49 | LIMIT 100
50 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/execution/RangerSparkPlanOmitStrategyTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution
19 |
20 | import org.apache.spark.sql.catalyst.plans.logical.{RangerSparkMasking, RangerSparkRowFilter}
21 | import org.apache.spark.sql.hive.test.TestHive
22 | import org.scalatest.FunSuite
23 |
24 | class RangerSparkPlanOmitStrategyTest extends FunSuite {
25 |
26 | private val spark = TestHive.sparkSession
27 |
28 | test("ranger spark plan omit strategy") {
29 | val strategy = RangerSparkPlanOmitStrategy(spark)
30 | val df = spark.range(0, 5)
31 | val plan1 = df.queryExecution.optimizedPlan
32 | assert(strategy.apply(plan1) === Nil)
33 | val plan2 = RangerSparkRowFilter(plan1)
34 | assert(strategy.apply(plan2) === PlanLater(plan1) :: Nil)
35 | val plan3 = RangerSparkMasking(plan1)
36 | assert(strategy.apply(plan3) === PlanLater(plan1) :: Nil)
37 | val plan4 = RangerSparkMasking(plan2)
38 | assert(strategy.apply(plan4) === PlanLater(plan2) :: Nil)
39 | val plan5 = RangerSparkRowFilter(plan3)
40 | assert(strategy.apply(plan5) === PlanLater(plan3) :: Nil)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q47.sql:
--------------------------------------------------------------------------------
1 | WITH v1 AS (
2 | SELECT
3 | i_category,
4 | i_brand,
5 | s_store_name,
6 | s_company_name,
7 | d_year,
8 | d_moy,
9 | sum(ss_sales_price) sum_sales,
10 | avg(sum(ss_sales_price))
11 | OVER
12 | (PARTITION BY i_category, i_brand,
13 | s_store_name, s_company_name, d_year)
14 | avg_monthly_sales,
15 | rank()
16 | OVER
17 | (PARTITION BY i_category, i_brand,
18 | s_store_name, s_company_name
19 | ORDER BY d_year, d_moy) rn
20 | FROM item, store_sales, date_dim, store
21 | WHERE ss_item_sk = i_item_sk AND
22 | ss_sold_date_sk = d_date_sk AND
23 | ss_store_sk = s_store_sk AND
24 | (
25 | d_year = 1999 OR
26 | (d_year = 1999 - 1 AND d_moy = 12) OR
27 | (d_year = 1999 + 1 AND d_moy = 1)
28 | )
29 | GROUP BY i_category, i_brand,
30 | s_store_name, s_company_name,
31 | d_year, d_moy),
32 | v2 AS (
33 | SELECT
34 | v1.i_category,
35 | v1.i_brand,
36 | v1.s_store_name,
37 | v1.s_company_name,
38 | v1.d_year,
39 | v1.d_moy,
40 | v1.avg_monthly_sales,
41 | v1.sum_sales,
42 | v1_lag.sum_sales psum,
43 | v1_lead.sum_sales nsum
44 | FROM v1, v1 v1_lag, v1 v1_lead
45 | WHERE v1.i_category = v1_lag.i_category AND
46 | v1.i_category = v1_lead.i_category AND
47 | v1.i_brand = v1_lag.i_brand AND
48 | v1.i_brand = v1_lead.i_brand AND
49 | v1.s_store_name = v1_lag.s_store_name AND
50 | v1.s_store_name = v1_lead.s_store_name AND
51 | v1.s_company_name = v1_lag.s_company_name AND
52 | v1.s_company_name = v1_lead.s_company_name AND
53 | v1.rn = v1_lag.rn + 1 AND
54 | v1.rn = v1_lead.rn - 1)
55 | SELECT *
56 | FROM v2
57 | WHERE d_year = 1999 AND
58 | avg_monthly_sales > 0 AND
59 | CASE WHEN avg_monthly_sales > 0
60 | THEN abs(sum_sales - avg_monthly_sales) / avg_monthly_sales
61 | ELSE NULL END > 0.1
62 | ORDER BY sum_sales - avg_monthly_sales, 3
63 | LIMIT 100
64 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q74.sql:
--------------------------------------------------------------------------------
1 | WITH year_total AS (
2 | SELECT
3 | c_customer_id customer_id,
4 | c_first_name customer_first_name,
5 | c_last_name customer_last_name,
6 | d_year AS year,
7 | sum(ss_net_paid) year_total,
8 | 's' sale_type
9 | FROM
10 | customer, store_sales, date_dim
11 | WHERE c_customer_sk = ss_customer_sk
12 | AND ss_sold_date_sk = d_date_sk
13 | AND d_year IN (2001, 2001 + 1)
14 | GROUP BY
15 | c_customer_id, c_first_name, c_last_name, d_year
16 | UNION ALL
17 | SELECT
18 | c_customer_id customer_id,
19 | c_first_name customer_first_name,
20 | c_last_name customer_last_name,
21 | d_year AS year,
22 | sum(ws_net_paid) year_total,
23 | 'w' sale_type
24 | FROM
25 | customer, web_sales, date_dim
26 | WHERE c_customer_sk = ws_bill_customer_sk
27 | AND ws_sold_date_sk = d_date_sk
28 | AND d_year IN (2001, 2001 + 1)
29 | GROUP BY
30 | c_customer_id, c_first_name, c_last_name, d_year)
31 | SELECT
32 | t_s_secyear.customer_id,
33 | t_s_secyear.customer_first_name,
34 | t_s_secyear.customer_last_name
35 | FROM
36 | year_total t_s_firstyear, year_total t_s_secyear,
37 | year_total t_w_firstyear, year_total t_w_secyear
38 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
39 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id
40 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
41 | AND t_s_firstyear.sale_type = 's'
42 | AND t_w_firstyear.sale_type = 'w'
43 | AND t_s_secyear.sale_type = 's'
44 | AND t_w_secyear.sale_type = 'w'
45 | AND t_s_firstyear.year = 2001
46 | AND t_s_secyear.year = 2001 + 1
47 | AND t_w_firstyear.year = 2001
48 | AND t_w_secyear.year = 2001 + 1
49 | AND t_s_firstyear.year_total > 0
50 | AND t_w_firstyear.year_total > 0
51 | AND CASE WHEN t_w_firstyear.year_total > 0
52 | THEN t_w_secyear.year_total / t_w_firstyear.year_total
53 | ELSE NULL END
54 | > CASE WHEN t_s_firstyear.year_total > 0
55 | THEN t_s_secyear.year_total / t_s_firstyear.year_total
56 | ELSE NULL END
57 | ORDER BY 1, 1, 1
58 | LIMIT 100
59 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q28.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM (SELECT
3 | avg(ss_list_price) B1_LP,
4 | count(ss_list_price) B1_CNT,
5 | count(DISTINCT ss_list_price) B1_CNTD
6 | FROM store_sales
7 | WHERE ss_quantity BETWEEN 0 AND 5
8 | AND (ss_list_price BETWEEN 8 AND 8 + 10
9 | OR ss_coupon_amt BETWEEN 459 AND 459 + 1000
10 | OR ss_wholesale_cost BETWEEN 57 AND 57 + 20)) B1,
11 | (SELECT
12 | avg(ss_list_price) B2_LP,
13 | count(ss_list_price) B2_CNT,
14 | count(DISTINCT ss_list_price) B2_CNTD
15 | FROM store_sales
16 | WHERE ss_quantity BETWEEN 6 AND 10
17 | AND (ss_list_price BETWEEN 90 AND 90 + 10
18 | OR ss_coupon_amt BETWEEN 2323 AND 2323 + 1000
19 | OR ss_wholesale_cost BETWEEN 31 AND 31 + 20)) B2,
20 | (SELECT
21 | avg(ss_list_price) B3_LP,
22 | count(ss_list_price) B3_CNT,
23 | count(DISTINCT ss_list_price) B3_CNTD
24 | FROM store_sales
25 | WHERE ss_quantity BETWEEN 11 AND 15
26 | AND (ss_list_price BETWEEN 142 AND 142 + 10
27 | OR ss_coupon_amt BETWEEN 12214 AND 12214 + 1000
28 | OR ss_wholesale_cost BETWEEN 79 AND 79 + 20)) B3,
29 | (SELECT
30 | avg(ss_list_price) B4_LP,
31 | count(ss_list_price) B4_CNT,
32 | count(DISTINCT ss_list_price) B4_CNTD
33 | FROM store_sales
34 | WHERE ss_quantity BETWEEN 16 AND 20
35 | AND (ss_list_price BETWEEN 135 AND 135 + 10
36 | OR ss_coupon_amt BETWEEN 6071 AND 6071 + 1000
37 | OR ss_wholesale_cost BETWEEN 38 AND 38 + 20)) B4,
38 | (SELECT
39 | avg(ss_list_price) B5_LP,
40 | count(ss_list_price) B5_CNT,
41 | count(DISTINCT ss_list_price) B5_CNTD
42 | FROM store_sales
43 | WHERE ss_quantity BETWEEN 21 AND 25
44 | AND (ss_list_price BETWEEN 122 AND 122 + 10
45 | OR ss_coupon_amt BETWEEN 836 AND 836 + 1000
46 | OR ss_wholesale_cost BETWEEN 17 AND 17 + 20)) B5,
47 | (SELECT
48 | avg(ss_list_price) B6_LP,
49 | count(ss_list_price) B6_CNT,
50 | count(DISTINCT ss_list_price) B6_CNTD
51 | FROM store_sales
52 | WHERE ss_quantity BETWEEN 26 AND 30
53 | AND (ss_list_price BETWEEN 154 AND 154 + 10
54 | OR ss_coupon_amt BETWEEN 7326 AND 7326 + 1000
55 | OR ss_wholesale_cost BETWEEN 7 AND 7 + 20)) B6
56 | LIMIT 100
57 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q58.sql:
--------------------------------------------------------------------------------
1 | WITH ss_items AS
2 | (SELECT
3 | i_item_id item_id,
4 | sum(ss_ext_sales_price) ss_item_rev
5 | FROM store_sales, item, date_dim
6 | WHERE ss_item_sk = i_item_sk
7 | AND d_date IN (SELECT d_date
8 | FROM date_dim
9 | WHERE d_week_seq = (SELECT d_week_seq
10 | FROM date_dim
11 | WHERE d_date = '2000-01-03'))
12 | AND ss_sold_date_sk = d_date_sk
13 | GROUP BY i_item_id),
14 | cs_items AS
15 | (SELECT
16 | i_item_id item_id,
17 | sum(cs_ext_sales_price) cs_item_rev
18 | FROM catalog_sales, item, date_dim
19 | WHERE cs_item_sk = i_item_sk
20 | AND d_date IN (SELECT d_date
21 | FROM date_dim
22 | WHERE d_week_seq = (SELECT d_week_seq
23 | FROM date_dim
24 | WHERE d_date = '2000-01-03'))
25 | AND cs_sold_date_sk = d_date_sk
26 | GROUP BY i_item_id),
27 | ws_items AS
28 | (SELECT
29 | i_item_id item_id,
30 | sum(ws_ext_sales_price) ws_item_rev
31 | FROM web_sales, item, date_dim
32 | WHERE ws_item_sk = i_item_sk
33 | AND d_date IN (SELECT d_date
34 | FROM date_dim
35 | WHERE d_week_seq = (SELECT d_week_seq
36 | FROM date_dim
37 | WHERE d_date = '2000-01-03'))
38 | AND ws_sold_date_sk = d_date_sk
39 | GROUP BY i_item_id)
40 | SELECT
41 | ss_items.item_id,
42 | ss_item_rev,
43 | ss_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ss_dev,
44 | cs_item_rev,
45 | cs_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 cs_dev,
46 | ws_item_rev,
47 | ws_item_rev / (ss_item_rev + cs_item_rev + ws_item_rev) / 3 * 100 ws_dev,
48 | (ss_item_rev + cs_item_rev + ws_item_rev) / 3 average
49 | FROM ss_items, cs_items, ws_items
50 | WHERE ss_items.item_id = cs_items.item_id
51 | AND ss_items.item_id = ws_items.item_id
52 | AND ss_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev
53 | AND ss_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev
54 | AND cs_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev
55 | AND cs_item_rev BETWEEN 0.9 * ws_item_rev AND 1.1 * ws_item_rev
56 | AND ws_item_rev BETWEEN 0.9 * ss_item_rev AND 1.1 * ss_item_rev
57 | AND ws_item_rev BETWEEN 0.9 * cs_item_rev AND 1.1 * cs_item_rev
58 | ORDER BY item_id, ss_item_rev
59 | LIMIT 100
60 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkRowFilterExtensionTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import org.apache.spark.sql.hive.test.TestHive
21 | import org.scalatest.FunSuite
22 | import org.apache.spark.sql.RangerSparkTestUtils._
23 | import org.apache.spark.sql.catalyst.plans.logical.{Filter, RangerSparkRowFilter}
24 |
25 | class RangerSparkRowFilterExtensionTest extends FunSuite {
26 |
27 | private val spark = TestHive.sparkSession
28 |
29 | test("ranger spark row filter extension") {
30 | val extension = RangerSparkRowFilterExtension(spark)
31 | val plan = spark.sql("select * from src").queryExecution.optimizedPlan
32 | println(plan)
33 | withUser("bob") {
34 | val newPlan = extension.apply(plan)
35 | assert(newPlan.isInstanceOf[RangerSparkRowFilter])
36 | val filters = newPlan.collect { case f: Filter => f }
37 | assert(filters.nonEmpty, "ranger row level filters should be applied automatically")
38 | println(newPlan)
39 | }
40 | withUser("alice") {
41 | val newPlan = extension.apply(plan)
42 | assert(newPlan.isInstanceOf[RangerSparkRowFilter])
43 | val filters = newPlan.collect { case f: Filter => f }
44 | assert(filters.isEmpty, "alice does not have implicit filters")
45 | println(newPlan)
46 | }
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q23b.sql:
--------------------------------------------------------------------------------
1 | WITH frequent_ss_items AS
2 | (SELECT
3 | substr(i_item_desc, 1, 30) itemdesc,
4 | i_item_sk item_sk,
5 | d_date solddate,
6 | count(*) cnt
7 | FROM store_sales, date_dim, item
8 | WHERE ss_sold_date_sk = d_date_sk
9 | AND ss_item_sk = i_item_sk
10 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
11 | GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date
12 | HAVING count(*) > 4),
13 | max_store_sales AS
14 | (SELECT max(csales) tpcds_cmax
15 | FROM (SELECT
16 | c_customer_sk,
17 | sum(ss_quantity * ss_sales_price) csales
18 | FROM store_sales, customer, date_dim
19 | WHERE ss_customer_sk = c_customer_sk
20 | AND ss_sold_date_sk = d_date_sk
21 | AND d_year IN (2000, 2000 + 1, 2000 + 2, 2000 + 3)
22 | GROUP BY c_customer_sk) x),
23 | best_ss_customer AS
24 | (SELECT
25 | c_customer_sk,
26 | sum(ss_quantity * ss_sales_price) ssales
27 | FROM store_sales
28 | , customer
29 | WHERE ss_customer_sk = c_customer_sk
30 | GROUP BY c_customer_sk
31 | HAVING sum(ss_quantity * ss_sales_price) > (50 / 100.0) *
32 | (SELECT *
33 | FROM max_store_sales))
34 | SELECT
35 | c_last_name,
36 | c_first_name,
37 | sales
38 | FROM ((SELECT
39 | c_last_name,
40 | c_first_name,
41 | sum(cs_quantity * cs_list_price) sales
42 | FROM catalog_sales, customer, date_dim
43 | WHERE d_year = 2000
44 | AND d_moy = 2
45 | AND cs_sold_date_sk = d_date_sk
46 | AND cs_item_sk IN (SELECT item_sk
47 | FROM frequent_ss_items)
48 | AND cs_bill_customer_sk IN (SELECT c_customer_sk
49 | FROM best_ss_customer)
50 | AND cs_bill_customer_sk = c_customer_sk
51 | GROUP BY c_last_name, c_first_name)
52 | UNION ALL
53 | (SELECT
54 | c_last_name,
55 | c_first_name,
56 | sum(ws_quantity * ws_list_price) sales
57 | FROM web_sales, customer, date_dim
58 | WHERE d_year = 2000
59 | AND d_moy = 2
60 | AND ws_sold_date_sk = d_date_sk
61 | AND ws_item_sk IN (SELECT item_sk
62 | FROM frequent_ss_items)
63 | AND ws_bill_customer_sk IN (SELECT c_customer_sk
64 | FROM best_ss_customer)
65 | AND ws_bill_customer_sk = c_customer_sk
66 | GROUP BY c_last_name, c_first_name)) y
67 | ORDER BY c_last_name, c_first_name, sales
68 | LIMIT 100
69 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RangerSparkMaskingExtensionTest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.catalyst.optimizer
19 |
20 | import org.apache.spark.sql.hive.test.TestHive
21 | import org.apache.spark.sql.RangerSparkTestUtils._
22 | import org.apache.spark.sql.catalyst.expressions.Alias
23 | import org.apache.spark.sql.catalyst.plans.logical.{Project, RangerSparkMasking}
24 | import org.scalatest.FunSuite
25 |
26 | class RangerSparkMaskingExtensionTest extends FunSuite {
27 |
28 | private val spark = TestHive.sparkSession
29 |
30 | test("data masking for bob show last 4") {
31 | val extension = RangerSparkMaskingExtension(spark)
32 | val plan = spark.sql("select * from src").queryExecution.optimizedPlan
33 | println(plan)
34 | withUser("bob") {
35 | val newPlan = extension.apply(plan)
36 | assert(newPlan.isInstanceOf[Project])
37 | val project = newPlan.asInstanceOf[Project]
38 | val key = project.projectList.head
39 | assert(key.name === "key", "no affect on un masking attribute")
40 | val value = project.projectList.tail
41 | assert(value.head.name === "value", "attibute name should be unchanged")
42 | assert(value.head.asInstanceOf[Alias].child.sql ===
43 | "mask_show_last_n(`value`, 4, 'x', 'x', 'x', -1, '1')")
44 | }
45 |
46 | withUser("alice") {
47 | val newPlan = extension.apply(plan)
48 | assert(newPlan === RangerSparkMasking(plan))
49 | }
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q78.sql:
--------------------------------------------------------------------------------
1 | WITH ws AS
2 | (SELECT
3 | d_year AS ws_sold_year,
4 | ws_item_sk,
5 | ws_bill_customer_sk ws_customer_sk,
6 | sum(ws_quantity) ws_qty,
7 | sum(ws_wholesale_cost) ws_wc,
8 | sum(ws_sales_price) ws_sp
9 | FROM web_sales
10 | LEFT JOIN web_returns ON wr_order_number = ws_order_number AND ws_item_sk = wr_item_sk
11 | JOIN date_dim ON ws_sold_date_sk = d_date_sk
12 | WHERE wr_order_number IS NULL
13 | GROUP BY d_year, ws_item_sk, ws_bill_customer_sk
14 | ),
15 | cs AS
16 | (SELECT
17 | d_year AS cs_sold_year,
18 | cs_item_sk,
19 | cs_bill_customer_sk cs_customer_sk,
20 | sum(cs_quantity) cs_qty,
21 | sum(cs_wholesale_cost) cs_wc,
22 | sum(cs_sales_price) cs_sp
23 | FROM catalog_sales
24 | LEFT JOIN catalog_returns ON cr_order_number = cs_order_number AND cs_item_sk = cr_item_sk
25 | JOIN date_dim ON cs_sold_date_sk = d_date_sk
26 | WHERE cr_order_number IS NULL
27 | GROUP BY d_year, cs_item_sk, cs_bill_customer_sk
28 | ),
29 | ss AS
30 | (SELECT
31 | d_year AS ss_sold_year,
32 | ss_item_sk,
33 | ss_customer_sk,
34 | sum(ss_quantity) ss_qty,
35 | sum(ss_wholesale_cost) ss_wc,
36 | sum(ss_sales_price) ss_sp
37 | FROM store_sales
38 | LEFT JOIN store_returns ON sr_ticket_number = ss_ticket_number AND ss_item_sk = sr_item_sk
39 | JOIN date_dim ON ss_sold_date_sk = d_date_sk
40 | WHERE sr_ticket_number IS NULL
41 | GROUP BY d_year, ss_item_sk, ss_customer_sk
42 | )
43 | SELECT
44 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2) ratio,
45 | ss_qty store_qty,
46 | ss_wc store_wholesale_cost,
47 | ss_sp store_sales_price,
48 | coalesce(ws_qty, 0) + coalesce(cs_qty, 0) other_chan_qty,
49 | coalesce(ws_wc, 0) + coalesce(cs_wc, 0) other_chan_wholesale_cost,
50 | coalesce(ws_sp, 0) + coalesce(cs_sp, 0) other_chan_sales_price
51 | FROM ss
52 | LEFT JOIN ws
53 | ON (ws_sold_year = ss_sold_year AND ws_item_sk = ss_item_sk AND ws_customer_sk = ss_customer_sk)
54 | LEFT JOIN cs
55 | ON (cs_sold_year = ss_sold_year AND cs_item_sk = ss_item_sk AND cs_customer_sk = ss_customer_sk)
56 | WHERE coalesce(ws_qty, 0) > 0 AND coalesce(cs_qty, 0) > 0 AND ss_sold_year = 2000
57 | ORDER BY
58 | ratio,
59 | ss_qty DESC, ss_wc DESC, ss_sp DESC,
60 | other_chan_qty,
61 | other_chan_wholesale_cost,
62 | other_chan_sales_price,
63 | round(ss_qty / (coalesce(ws_qty + cs_qty, 1)), 2)
64 | LIMIT 100
65 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q59.sql:
--------------------------------------------------------------------------------
1 | WITH wss AS
2 | (SELECT
3 | d_week_seq,
4 | ss_store_sk,
5 | sum(CASE WHEN (d_day_name = 'Sunday')
6 | THEN ss_sales_price
7 | ELSE NULL END) sun_sales,
8 | sum(CASE WHEN (d_day_name = 'Monday')
9 | THEN ss_sales_price
10 | ELSE NULL END) mon_sales,
11 | sum(CASE WHEN (d_day_name = 'Tuesday')
12 | THEN ss_sales_price
13 | ELSE NULL END) tue_sales,
14 | sum(CASE WHEN (d_day_name = 'Wednesday')
15 | THEN ss_sales_price
16 | ELSE NULL END) wed_sales,
17 | sum(CASE WHEN (d_day_name = 'Thursday')
18 | THEN ss_sales_price
19 | ELSE NULL END) thu_sales,
20 | sum(CASE WHEN (d_day_name = 'Friday')
21 | THEN ss_sales_price
22 | ELSE NULL END) fri_sales,
23 | sum(CASE WHEN (d_day_name = 'Saturday')
24 | THEN ss_sales_price
25 | ELSE NULL END) sat_sales
26 | FROM store_sales, date_dim
27 | WHERE d_date_sk = ss_sold_date_sk
28 | GROUP BY d_week_seq, ss_store_sk
29 | )
30 | SELECT
31 | s_store_name1,
32 | s_store_id1,
33 | d_week_seq1,
34 | sun_sales1 / sun_sales2,
35 | mon_sales1 / mon_sales2,
36 | tue_sales1 / tue_sales2,
37 | wed_sales1 / wed_sales2,
38 | thu_sales1 / thu_sales2,
39 | fri_sales1 / fri_sales2,
40 | sat_sales1 / sat_sales2
41 | FROM
42 | (SELECT
43 | s_store_name s_store_name1,
44 | wss.d_week_seq d_week_seq1,
45 | s_store_id s_store_id1,
46 | sun_sales sun_sales1,
47 | mon_sales mon_sales1,
48 | tue_sales tue_sales1,
49 | wed_sales wed_sales1,
50 | thu_sales thu_sales1,
51 | fri_sales fri_sales1,
52 | sat_sales sat_sales1
53 | FROM wss, store, date_dim d
54 | WHERE d.d_week_seq = wss.d_week_seq AND
55 | ss_store_sk = s_store_sk AND
56 | d_month_seq BETWEEN 1212 AND 1212 + 11) y,
57 | (SELECT
58 | s_store_name s_store_name2,
59 | wss.d_week_seq d_week_seq2,
60 | s_store_id s_store_id2,
61 | sun_sales sun_sales2,
62 | mon_sales mon_sales2,
63 | tue_sales tue_sales2,
64 | wed_sales wed_sales2,
65 | thu_sales thu_sales2,
66 | fri_sales fri_sales2,
67 | sat_sales sat_sales2
68 | FROM wss, store, date_dim d
69 | WHERE d.d_week_seq = wss.d_week_seq AND
70 | ss_store_sk = s_store_sk AND
71 | d_month_seq BETWEEN 1212 + 12 AND 1212 + 23) x
72 | WHERE s_store_id1 = s_store_id2
73 | AND d_week_seq1 = d_week_seq2 - 52
74 | ORDER BY s_store_name1, s_store_id1, d_week_seq1
75 | LIMIT 100
76 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q2.sql:
--------------------------------------------------------------------------------
1 | WITH wscs AS
2 | ( SELECT
3 | sold_date_sk,
4 | sales_price
5 | FROM (SELECT
6 | ws_sold_date_sk sold_date_sk,
7 | ws_ext_sales_price sales_price
8 | FROM web_sales) x
9 | UNION ALL
10 | (SELECT
11 | cs_sold_date_sk sold_date_sk,
12 | cs_ext_sales_price sales_price
13 | FROM catalog_sales)),
14 | wswscs AS
15 | ( SELECT
16 | d_week_seq,
17 | sum(CASE WHEN (d_day_name = 'Sunday')
18 | THEN sales_price
19 | ELSE NULL END)
20 | sun_sales,
21 | sum(CASE WHEN (d_day_name = 'Monday')
22 | THEN sales_price
23 | ELSE NULL END)
24 | mon_sales,
25 | sum(CASE WHEN (d_day_name = 'Tuesday')
26 | THEN sales_price
27 | ELSE NULL END)
28 | tue_sales,
29 | sum(CASE WHEN (d_day_name = 'Wednesday')
30 | THEN sales_price
31 | ELSE NULL END)
32 | wed_sales,
33 | sum(CASE WHEN (d_day_name = 'Thursday')
34 | THEN sales_price
35 | ELSE NULL END)
36 | thu_sales,
37 | sum(CASE WHEN (d_day_name = 'Friday')
38 | THEN sales_price
39 | ELSE NULL END)
40 | fri_sales,
41 | sum(CASE WHEN (d_day_name = 'Saturday')
42 | THEN sales_price
43 | ELSE NULL END)
44 | sat_sales
45 | FROM wscs, date_dim
46 | WHERE d_date_sk = sold_date_sk
47 | GROUP BY d_week_seq)
48 | SELECT
49 | d_week_seq1,
50 | round(sun_sales1 / sun_sales2, 2),
51 | round(mon_sales1 / mon_sales2, 2),
52 | round(tue_sales1 / tue_sales2, 2),
53 | round(wed_sales1 / wed_sales2, 2),
54 | round(thu_sales1 / thu_sales2, 2),
55 | round(fri_sales1 / fri_sales2, 2),
56 | round(sat_sales1 / sat_sales2, 2)
57 | FROM
58 | (SELECT
59 | wswscs.d_week_seq d_week_seq1,
60 | sun_sales sun_sales1,
61 | mon_sales mon_sales1,
62 | tue_sales tue_sales1,
63 | wed_sales wed_sales1,
64 | thu_sales thu_sales1,
65 | fri_sales fri_sales1,
66 | sat_sales sat_sales1
67 | FROM wswscs, date_dim
68 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001) y,
69 | (SELECT
70 | wswscs.d_week_seq d_week_seq2,
71 | sun_sales sun_sales2,
72 | mon_sales mon_sales2,
73 | tue_sales tue_sales2,
74 | wed_sales wed_sales2,
75 | thu_sales thu_sales2,
76 | fri_sales fri_sales2,
77 | sat_sales sat_sales2
78 | FROM wswscs, date_dim
79 | WHERE date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1) z
80 | WHERE d_week_seq1 = d_week_seq2 - 53
81 | ORDER BY d_week_seq1
82 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q85.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | substr(r_reason_desc, 1, 20),
3 | avg(ws_quantity),
4 | avg(wr_refunded_cash),
5 | avg(wr_fee)
6 | FROM web_sales, web_returns, web_page, customer_demographics cd1,
7 | customer_demographics cd2, customer_address, date_dim, reason
8 | WHERE ws_web_page_sk = wp_web_page_sk
9 | AND ws_item_sk = wr_item_sk
10 | AND ws_order_number = wr_order_number
11 | AND ws_sold_date_sk = d_date_sk AND d_year = 2000
12 | AND cd1.cd_demo_sk = wr_refunded_cdemo_sk
13 | AND cd2.cd_demo_sk = wr_returning_cdemo_sk
14 | AND ca_address_sk = wr_refunded_addr_sk
15 | AND r_reason_sk = wr_reason_sk
16 | AND
17 | (
18 | (
19 | cd1.cd_marital_status = 'M'
20 | AND
21 | cd1.cd_marital_status = cd2.cd_marital_status
22 | AND
23 | cd1.cd_education_status = 'Advanced Degree'
24 | AND
25 | cd1.cd_education_status = cd2.cd_education_status
26 | AND
27 | ws_sales_price BETWEEN 100.00 AND 150.00
28 | )
29 | OR
30 | (
31 | cd1.cd_marital_status = 'S'
32 | AND
33 | cd1.cd_marital_status = cd2.cd_marital_status
34 | AND
35 | cd1.cd_education_status = 'College'
36 | AND
37 | cd1.cd_education_status = cd2.cd_education_status
38 | AND
39 | ws_sales_price BETWEEN 50.00 AND 100.00
40 | )
41 | OR
42 | (
43 | cd1.cd_marital_status = 'W'
44 | AND
45 | cd1.cd_marital_status = cd2.cd_marital_status
46 | AND
47 | cd1.cd_education_status = '2 yr Degree'
48 | AND
49 | cd1.cd_education_status = cd2.cd_education_status
50 | AND
51 | ws_sales_price BETWEEN 150.00 AND 200.00
52 | )
53 | )
54 | AND
55 | (
56 | (
57 | ca_country = 'United States'
58 | AND
59 | ca_state IN ('IN', 'OH', 'NJ')
60 | AND ws_net_profit BETWEEN 100 AND 200
61 | )
62 | OR
63 | (
64 | ca_country = 'United States'
65 | AND
66 | ca_state IN ('WI', 'CT', 'KY')
67 | AND ws_net_profit BETWEEN 150 AND 300
68 | )
69 | OR
70 | (
71 | ca_country = 'United States'
72 | AND
73 | ca_state IN ('LA', 'IA', 'AR')
74 | AND ws_net_profit BETWEEN 50 AND 250
75 | )
76 | )
77 | GROUP BY r_reason_desc
78 | ORDER BY substr(r_reason_desc, 1, 20)
79 | , avg(ws_quantity)
80 | , avg(wr_refunded_cash)
81 | , avg(wr_fee)
82 | LIMIT 100
83 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q11.sql:
--------------------------------------------------------------------------------
1 | WITH year_total AS (
2 | SELECT
3 | c_customer_id customer_id,
4 | c_first_name customer_first_name,
5 | c_last_name customer_last_name,
6 | c_preferred_cust_flag customer_preferred_cust_flag,
7 | c_birth_country customer_birth_country,
8 | c_login customer_login,
9 | c_email_address customer_email_address,
10 | d_year dyear,
11 | sum(ss_ext_list_price - ss_ext_discount_amt) year_total,
12 | 's' sale_type
13 | FROM customer, store_sales, date_dim
14 | WHERE c_customer_sk = ss_customer_sk
15 | AND ss_sold_date_sk = d_date_sk
16 | GROUP BY c_customer_id
17 | , c_first_name
18 | , c_last_name
19 | , d_year
20 | , c_preferred_cust_flag
21 | , c_birth_country
22 | , c_login
23 | , c_email_address
24 | , d_year
25 | UNION ALL
26 | SELECT
27 | c_customer_id customer_id,
28 | c_first_name customer_first_name,
29 | c_last_name customer_last_name,
30 | c_preferred_cust_flag customer_preferred_cust_flag,
31 | c_birth_country customer_birth_country,
32 | c_login customer_login,
33 | c_email_address customer_email_address,
34 | d_year dyear,
35 | sum(ws_ext_list_price - ws_ext_discount_amt) year_total,
36 | 'w' sale_type
37 | FROM customer, web_sales, date_dim
38 | WHERE c_customer_sk = ws_bill_customer_sk
39 | AND ws_sold_date_sk = d_date_sk
40 | GROUP BY
41 | c_customer_id, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_country,
42 | c_login, c_email_address, d_year)
43 | SELECT t_s_secyear.customer_preferred_cust_flag
44 | FROM year_total t_s_firstyear
45 | , year_total t_s_secyear
46 | , year_total t_w_firstyear
47 | , year_total t_w_secyear
48 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
49 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id
50 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
51 | AND t_s_firstyear.sale_type = 's'
52 | AND t_w_firstyear.sale_type = 'w'
53 | AND t_s_secyear.sale_type = 's'
54 | AND t_w_secyear.sale_type = 'w'
55 | AND t_s_firstyear.dyear = 2001
56 | AND t_s_secyear.dyear = 2001 + 1
57 | AND t_w_firstyear.dyear = 2001
58 | AND t_w_secyear.dyear = 2001 + 1
59 | AND t_s_firstyear.year_total > 0
60 | AND t_w_firstyear.year_total > 0
61 | AND CASE WHEN t_w_firstyear.year_total > 0
62 | THEN t_w_secyear.year_total / t_w_firstyear.year_total
63 | ELSE NULL END
64 | > CASE WHEN t_s_firstyear.year_total > 0
65 | THEN t_s_secyear.year_total / t_s_firstyear.year_total
66 | ELSE NULL END
67 | ORDER BY t_s_secyear.customer_preferred_cust_flag
68 | LIMIT 100
69 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/ranger/services/spark/RangerAdminClientImpl.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.services.spark
19 |
20 | import java.nio.file.{Files, FileSystems}
21 | import java.util
22 |
23 | import com.google.gson.GsonBuilder
24 | import org.apache.commons.logging.{Log, LogFactory}
25 | import org.apache.ranger.admin.client.RangerAdminRESTClient
26 | import org.apache.ranger.plugin.util.{GrantRevokeRequest, ServicePolicies, ServiceTags}
27 |
28 | class RangerAdminClientImpl extends RangerAdminRESTClient {
29 | private val LOG: Log = LogFactory.getLog(classOf[RangerAdminClientImpl])
30 | private val cacheFilename = "sparkSql_hive_jenkins.json"
31 | private val gson =
32 | new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").setPrettyPrinting().create
33 |
34 | override def init(serviceName: String, appId: String, configPropertyPrefix: String): Unit = {}
35 |
36 | override def getServicePoliciesIfUpdated(
37 | lastKnownVersion: Long,
38 | lastActivationTimeInMillis: Long): ServicePolicies = {
39 | val basedir = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath
40 | val cachePath = FileSystems.getDefault.getPath(basedir, cacheFilename)
41 | LOG.info("Reading policies from " + cachePath)
42 | val bytes = Files.readAllBytes(cachePath)
43 | gson.fromJson(new String(bytes), classOf[ServicePolicies])
44 | }
45 |
46 | override def grantAccess(request: GrantRevokeRequest): Unit = {}
47 |
48 | override def revokeAccess(request: GrantRevokeRequest): Unit = {}
49 |
50 | override def getServiceTagsIfUpdated(
51 | lastKnownVersion: Long,
52 | lastActivationTimeInMillis: Long): ServiceTags = null
53 |
54 | override def getTagTypes(tagTypePattern: String): util.List[String] = null
55 | }
56 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q75.sql:
--------------------------------------------------------------------------------
1 | WITH all_sales AS (
2 | SELECT
3 | d_year,
4 | i_brand_id,
5 | i_class_id,
6 | i_category_id,
7 | i_manufact_id,
8 | SUM(sales_cnt) AS sales_cnt,
9 | SUM(sales_amt) AS sales_amt
10 | FROM (
11 | SELECT
12 | d_year,
13 | i_brand_id,
14 | i_class_id,
15 | i_category_id,
16 | i_manufact_id,
17 | cs_quantity - COALESCE(cr_return_quantity, 0) AS sales_cnt,
18 | cs_ext_sales_price - COALESCE(cr_return_amount, 0.0) AS sales_amt
19 | FROM catalog_sales
20 | JOIN item ON i_item_sk = cs_item_sk
21 | JOIN date_dim ON d_date_sk = cs_sold_date_sk
22 | LEFT JOIN catalog_returns ON (cs_order_number = cr_order_number
23 | AND cs_item_sk = cr_item_sk)
24 | WHERE i_category = 'Books'
25 | UNION
26 | SELECT
27 | d_year,
28 | i_brand_id,
29 | i_class_id,
30 | i_category_id,
31 | i_manufact_id,
32 | ss_quantity - COALESCE(sr_return_quantity, 0) AS sales_cnt,
33 | ss_ext_sales_price - COALESCE(sr_return_amt, 0.0) AS sales_amt
34 | FROM store_sales
35 | JOIN item ON i_item_sk = ss_item_sk
36 | JOIN date_dim ON d_date_sk = ss_sold_date_sk
37 | LEFT JOIN store_returns ON (ss_ticket_number = sr_ticket_number
38 | AND ss_item_sk = sr_item_sk)
39 | WHERE i_category = 'Books'
40 | UNION
41 | SELECT
42 | d_year,
43 | i_brand_id,
44 | i_class_id,
45 | i_category_id,
46 | i_manufact_id,
47 | ws_quantity - COALESCE(wr_return_quantity, 0) AS sales_cnt,
48 | ws_ext_sales_price - COALESCE(wr_return_amt, 0.0) AS sales_amt
49 | FROM web_sales
50 | JOIN item ON i_item_sk = ws_item_sk
51 | JOIN date_dim ON d_date_sk = ws_sold_date_sk
52 | LEFT JOIN web_returns ON (ws_order_number = wr_order_number
53 | AND ws_item_sk = wr_item_sk)
54 | WHERE i_category = 'Books') sales_detail
55 | GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id)
56 | SELECT
57 | prev_yr.d_year AS prev_year,
58 | curr_yr.d_year AS year,
59 | curr_yr.i_brand_id,
60 | curr_yr.i_class_id,
61 | curr_yr.i_category_id,
62 | curr_yr.i_manufact_id,
63 | prev_yr.sales_cnt AS prev_yr_cnt,
64 | curr_yr.sales_cnt AS curr_yr_cnt,
65 | curr_yr.sales_cnt - prev_yr.sales_cnt AS sales_cnt_diff,
66 | curr_yr.sales_amt - prev_yr.sales_amt AS sales_amt_diff
67 | FROM all_sales curr_yr, all_sales prev_yr
68 | WHERE curr_yr.i_brand_id = prev_yr.i_brand_id
69 | AND curr_yr.i_class_id = prev_yr.i_class_id
70 | AND curr_yr.i_category_id = prev_yr.i_category_id
71 | AND curr_yr.i_manufact_id = prev_yr.i_manufact_id
72 | AND curr_yr.d_year = 2002
73 | AND prev_yr.d_year = 2002 - 1
74 | AND CAST(curr_yr.sales_cnt AS DECIMAL(17, 2)) / CAST(prev_yr.sales_cnt AS DECIMAL(17, 2)) < 0.9
75 | ORDER BY sales_cnt_diff
76 | LIMIT 100
77 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkPlugin.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import java.io.{File, IOException}
21 |
22 | import org.apache.commons.logging.LogFactory
23 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext
24 | import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE
25 | import org.apache.ranger.authorization.hadoop.config.RangerConfiguration
26 | import org.apache.ranger.plugin.service.RangerBasePlugin
27 |
28 | class RangerSparkPlugin private extends RangerBasePlugin("spark", "sparkSql") {
29 | import RangerSparkPlugin._
30 |
31 | private val LOG = LogFactory.getLog(classOf[RangerSparkPlugin])
32 |
33 | lazy val fsScheme: Array[String] = RangerConfiguration.getInstance()
34 | .get("ranger.plugin.spark.urlauth.filesystem.schemes", "hdfs:,file:")
35 | .split(",")
36 | .map(_.trim)
37 |
38 | override def init(): Unit = {
39 | super.init()
40 | val cacheDir = new File(rangerConf.get("ranger.plugin.spark.policy.cache.dir"))
41 | if (cacheDir.exists() &&
42 | (!cacheDir.isDirectory || !cacheDir.canRead || !cacheDir.canWrite)) {
43 | throw new IOException("Policy cache directory already exists at" +
44 | cacheDir.getAbsolutePath + ", but it is unavailable")
45 | }
46 |
47 | if (!cacheDir.exists() && !cacheDir.mkdirs()) {
48 | throw new IOException("Unable to create ranger policy cache directory at" +
49 | cacheDir.getAbsolutePath)
50 | }
51 | LOG.info("Policy cache directory successfully set to " + cacheDir.getAbsolutePath)
52 | }
53 | }
54 |
55 | object RangerSparkPlugin {
56 |
57 | private val rangerConf: RangerConfiguration = RangerConfiguration.getInstance
58 |
59 | val showColumnsOption: String = rangerConf.get(
60 | "xasecure.spark.describetable.showcolumns.authorization.option", "NONE")
61 |
62 | def build(): Builder = new Builder
63 |
64 | class Builder {
65 |
66 | @volatile private var sparkPlugin: RangerSparkPlugin = _
67 |
68 | def getOrCreate(): RangerSparkPlugin = RangerSparkPlugin.synchronized {
69 | if (sparkPlugin == null) {
70 | sparkPlugin = new RangerSparkPlugin
71 | sparkPlugin.init()
72 | sparkPlugin
73 | } else {
74 | sparkPlugin
75 | }
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q80.sql:
--------------------------------------------------------------------------------
1 | WITH ssr AS
2 | (SELECT
3 | s_store_id AS store_id,
4 | sum(ss_ext_sales_price) AS sales,
5 | sum(coalesce(sr_return_amt, 0)) AS returns,
6 | sum(ss_net_profit - coalesce(sr_net_loss, 0)) AS profit
7 | FROM store_sales
8 | LEFT OUTER JOIN store_returns ON
9 | (ss_item_sk = sr_item_sk AND
10 | ss_ticket_number = sr_ticket_number)
11 | ,
12 | date_dim, store, item, promotion
13 | WHERE ss_sold_date_sk = d_date_sk
14 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
15 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days)
16 | AND ss_store_sk = s_store_sk
17 | AND ss_item_sk = i_item_sk
18 | AND i_current_price > 50
19 | AND ss_promo_sk = p_promo_sk
20 | AND p_channel_tv = 'N'
21 | GROUP BY s_store_id),
22 | csr AS
23 | (SELECT
24 | cp_catalog_page_id AS catalog_page_id,
25 | sum(cs_ext_sales_price) AS sales,
26 | sum(coalesce(cr_return_amount, 0)) AS returns,
27 | sum(cs_net_profit - coalesce(cr_net_loss, 0)) AS profit
28 | FROM catalog_sales
29 | LEFT OUTER JOIN catalog_returns ON
30 | (cs_item_sk = cr_item_sk AND
31 | cs_order_number = cr_order_number)
32 | ,
33 | date_dim, catalog_page, item, promotion
34 | WHERE cs_sold_date_sk = d_date_sk
35 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
36 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days)
37 | AND cs_catalog_page_sk = cp_catalog_page_sk
38 | AND cs_item_sk = i_item_sk
39 | AND i_current_price > 50
40 | AND cs_promo_sk = p_promo_sk
41 | AND p_channel_tv = 'N'
42 | GROUP BY cp_catalog_page_id),
43 | wsr AS
44 | (SELECT
45 | web_site_id,
46 | sum(ws_ext_sales_price) AS sales,
47 | sum(coalesce(wr_return_amt, 0)) AS returns,
48 | sum(ws_net_profit - coalesce(wr_net_loss, 0)) AS profit
49 | FROM web_sales
50 | LEFT OUTER JOIN web_returns ON
51 | (ws_item_sk = wr_item_sk AND ws_order_number = wr_order_number)
52 | ,
53 | date_dim, web_site, item, promotion
54 | WHERE ws_sold_date_sk = d_date_sk
55 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
56 | AND (cast('2000-08-23' AS DATE) + INTERVAL 30 days)
57 | AND ws_web_site_sk = web_site_sk
58 | AND ws_item_sk = i_item_sk
59 | AND i_current_price > 50
60 | AND ws_promo_sk = p_promo_sk
61 | AND p_channel_tv = 'N'
62 | GROUP BY web_site_id)
63 | SELECT
64 | channel,
65 | id,
66 | sum(sales) AS sales,
67 | sum(returns) AS returns,
68 | sum(profit) AS profit
69 | FROM (SELECT
70 | 'store channel' AS channel,
71 | concat('store', store_id) AS id,
72 | sales,
73 | returns,
74 | profit
75 | FROM ssr
76 | UNION ALL
77 | SELECT
78 | 'catalog channel' AS channel,
79 | concat('catalog_page', catalog_page_id) AS id,
80 | sales,
81 | returns,
82 | profit
83 | FROM csr
84 | UNION ALL
85 | SELECT
86 | 'web channel' AS channel,
87 | concat('web_site', web_site_id) AS id,
88 | sales,
89 | returns,
90 | profit
91 | FROM wsr) x
92 | GROUP BY ROLLUP (channel, id)
93 | ORDER BY channel, id
94 | LIMIT 100
95 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q77.sql:
--------------------------------------------------------------------------------
1 | WITH ss AS
2 | (SELECT
3 | s_store_sk,
4 | sum(ss_ext_sales_price) AS sales,
5 | sum(ss_net_profit) AS profit
6 | FROM store_sales, date_dim, store
7 | WHERE ss_sold_date_sk = d_date_sk
8 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
9 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)
10 | AND ss_store_sk = s_store_sk
11 | GROUP BY s_store_sk),
12 | sr AS
13 | (SELECT
14 | s_store_sk,
15 | sum(sr_return_amt) AS returns,
16 | sum(sr_net_loss) AS profit_loss
17 | FROM store_returns, date_dim, store
18 | WHERE sr_returned_date_sk = d_date_sk
19 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
20 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)
21 | AND sr_store_sk = s_store_sk
22 | GROUP BY s_store_sk),
23 | cs AS
24 | (SELECT
25 | cs_call_center_sk,
26 | sum(cs_ext_sales_price) AS sales,
27 | sum(cs_net_profit) AS profit
28 | FROM catalog_sales, date_dim
29 | WHERE cs_sold_date_sk = d_date_sk
30 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
31 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)
32 | GROUP BY cs_call_center_sk),
33 | cr AS
34 | (SELECT
35 | sum(cr_return_amount) AS returns,
36 | sum(cr_net_loss) AS profit_loss
37 | FROM catalog_returns, date_dim
38 | WHERE cr_returned_date_sk = d_date_sk
39 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
40 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)),
41 | ws AS
42 | (SELECT
43 | wp_web_page_sk,
44 | sum(ws_ext_sales_price) AS sales,
45 | sum(ws_net_profit) AS profit
46 | FROM web_sales, date_dim, web_page
47 | WHERE ws_sold_date_sk = d_date_sk
48 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
49 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)
50 | AND ws_web_page_sk = wp_web_page_sk
51 | GROUP BY wp_web_page_sk),
52 | wr AS
53 | (SELECT
54 | wp_web_page_sk,
55 | sum(wr_return_amt) AS returns,
56 | sum(wr_net_loss) AS profit_loss
57 | FROM web_returns, date_dim, web_page
58 | WHERE wr_returned_date_sk = d_date_sk
59 | AND d_date BETWEEN cast('2000-08-03' AS DATE) AND
60 | (cast('2000-08-03' AS DATE) + INTERVAL 30 days)
61 | AND wr_web_page_sk = wp_web_page_sk
62 | GROUP BY wp_web_page_sk)
63 | SELECT
64 | channel,
65 | id,
66 | sum(sales) AS sales,
67 | sum(returns) AS returns,
68 | sum(profit) AS profit
69 | FROM
70 | (SELECT
71 | 'store channel' AS channel,
72 | ss.s_store_sk AS id,
73 | sales,
74 | coalesce(returns, 0) AS returns,
75 | (profit - coalesce(profit_loss, 0)) AS profit
76 | FROM ss
77 | LEFT JOIN sr
78 | ON ss.s_store_sk = sr.s_store_sk
79 | UNION ALL
80 | SELECT
81 | 'catalog channel' AS channel,
82 | cs_call_center_sk AS id,
83 | sales,
84 | returns,
85 | (profit - profit_loss) AS profit
86 | FROM cs, cr
87 | UNION ALL
88 | SELECT
89 | 'web channel' AS channel,
90 | ws.wp_web_page_sk AS id,
91 | sales,
92 | coalesce(returns, 0) returns,
93 | (profit - coalesce(profit_loss, 0)) AS profit
94 | FROM ws
95 | LEFT JOIN wr
96 | ON ws.wp_web_page_sk = wr.wp_web_page_sk
97 | ) x
98 | GROUP BY ROLLUP (channel, id)
99 | ORDER BY channel, id
100 | LIMIT 100
101 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q64.sql:
--------------------------------------------------------------------------------
1 | WITH cs_ui AS
2 | (SELECT
3 | cs_item_sk,
4 | sum(cs_ext_list_price) AS sale,
5 | sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit) AS refund
6 | FROM catalog_sales
7 | , catalog_returns
8 | WHERE cs_item_sk = cr_item_sk
9 | AND cs_order_number = cr_order_number
10 | GROUP BY cs_item_sk
11 | HAVING sum(cs_ext_list_price) > 2 * sum(cr_refunded_cash + cr_reversed_charge + cr_store_credit)),
12 | cross_sales AS
13 | (SELECT
14 | i_product_name product_name,
15 | i_item_sk item_sk,
16 | s_store_name store_name,
17 | s_zip store_zip,
18 | ad1.ca_street_number b_street_number,
19 | ad1.ca_street_name b_streen_name,
20 | ad1.ca_city b_city,
21 | ad1.ca_zip b_zip,
22 | ad2.ca_street_number c_street_number,
23 | ad2.ca_street_name c_street_name,
24 | ad2.ca_city c_city,
25 | ad2.ca_zip c_zip,
26 | d1.d_year AS syear,
27 | d2.d_year AS fsyear,
28 | d3.d_year s2year,
29 | count(*) cnt,
30 | sum(ss_wholesale_cost) s1,
31 | sum(ss_list_price) s2,
32 | sum(ss_coupon_amt) s3
33 | FROM store_sales, store_returns, cs_ui, date_dim d1, date_dim d2, date_dim d3,
34 | store, customer, customer_demographics cd1, customer_demographics cd2,
35 | promotion, household_demographics hd1, household_demographics hd2,
36 | customer_address ad1, customer_address ad2, income_band ib1, income_band ib2, item
37 | WHERE ss_store_sk = s_store_sk AND
38 | ss_sold_date_sk = d1.d_date_sk AND
39 | ss_customer_sk = c_customer_sk AND
40 | ss_cdemo_sk = cd1.cd_demo_sk AND
41 | ss_hdemo_sk = hd1.hd_demo_sk AND
42 | ss_addr_sk = ad1.ca_address_sk AND
43 | ss_item_sk = i_item_sk AND
44 | ss_item_sk = sr_item_sk AND
45 | ss_ticket_number = sr_ticket_number AND
46 | ss_item_sk = cs_ui.cs_item_sk AND
47 | c_current_cdemo_sk = cd2.cd_demo_sk AND
48 | c_current_hdemo_sk = hd2.hd_demo_sk AND
49 | c_current_addr_sk = ad2.ca_address_sk AND
50 | c_first_sales_date_sk = d2.d_date_sk AND
51 | c_first_shipto_date_sk = d3.d_date_sk AND
52 | ss_promo_sk = p_promo_sk AND
53 | hd1.hd_income_band_sk = ib1.ib_income_band_sk AND
54 | hd2.hd_income_band_sk = ib2.ib_income_band_sk AND
55 | cd1.cd_marital_status <> cd2.cd_marital_status AND
56 | i_color IN ('purple', 'burlywood', 'indian', 'spring', 'floral', 'medium') AND
57 | i_current_price BETWEEN 64 AND 64 + 10 AND
58 | i_current_price BETWEEN 64 + 1 AND 64 + 15
59 | GROUP BY i_product_name, i_item_sk, s_store_name, s_zip, ad1.ca_street_number,
60 | ad1.ca_street_name, ad1.ca_city, ad1.ca_zip, ad2.ca_street_number,
61 | ad2.ca_street_name, ad2.ca_city, ad2.ca_zip, d1.d_year, d2.d_year, d3.d_year
62 | )
63 | SELECT
64 | cs1.product_name,
65 | cs1.store_name,
66 | cs1.store_zip,
67 | cs1.b_street_number,
68 | cs1.b_streen_name,
69 | cs1.b_city,
70 | cs1.b_zip,
71 | cs1.c_street_number,
72 | cs1.c_street_name,
73 | cs1.c_city,
74 | cs1.c_zip,
75 | cs1.syear,
76 | cs1.cnt,
77 | cs1.s1,
78 | cs1.s2,
79 | cs1.s3,
80 | cs2.s1,
81 | cs2.s2,
82 | cs2.s3,
83 | cs2.syear,
84 | cs2.cnt
85 | FROM cross_sales cs1, cross_sales cs2
86 | WHERE cs1.item_sk = cs2.item_sk AND
87 | cs1.syear = 1999 AND
88 | cs2.syear = 1999 + 1 AND
89 | cs2.cnt <= cs1.cnt AND
90 | cs1.store_name = cs2.store_name AND
91 | cs1.store_zip = cs2.store_zip
92 | ORDER BY cs1.product_name, cs1.store_name, cs2.cnt
93 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkAccessRequest.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import java.util.Date
21 |
22 | import org.apache.ranger.authorization.spark.authorizer.SparkAccessType.SparkAccessType
23 | import org.apache.ranger.plugin.policyengine.{RangerAccessRequestImpl, RangerPolicyEngine}
24 | import org.apache.ranger.plugin.util.RangerAccessRequestUtil
25 |
26 | import scala.collection.JavaConverters._
27 |
28 | class RangerSparkAccessRequest private extends RangerAccessRequestImpl {
29 |
30 | private var accessType = SparkAccessType.NONE
31 |
32 | def this(
33 | resource: RangerSparkResource,
34 | user: String,
35 | groups: Set[String],
36 | opType: String,
37 | accessType: SparkAccessType,
38 | clusterName: String) {
39 | this()
40 | this.setResource(resource)
41 | this.setUser(user)
42 | this.setUserGroups(groups.asJava)
43 | this.setAccessTime(new Date)
44 | this.setAction(opType)
45 | this.setSparkAccessType(accessType)
46 | this.setUser(user)
47 | this.setClusterName(clusterName)
48 | }
49 |
50 | def this(resource: RangerSparkResource, user: String, groups: Set[String],
51 | clusterName: String) = {
52 | this(resource, user, groups, "METADATA OPERATION", SparkAccessType.USE, clusterName)
53 | }
54 |
55 | def getSparkAccessType: SparkAccessType = accessType
56 |
57 | def setSparkAccessType(accessType: SparkAccessType): Unit = {
58 | this.accessType = accessType
59 | accessType match {
60 | case SparkAccessType.USE => this.setAccessType(RangerPolicyEngine.ANY_ACCESS)
61 | case SparkAccessType.ADMIN => this.setAccessType(RangerPolicyEngine.ADMIN_ACCESS)
62 | case _ => this.setAccessType(accessType.toString.toLowerCase)
63 | }
64 | }
65 |
66 | def copy(): RangerSparkAccessRequest = {
67 | val ret = new RangerSparkAccessRequest()
68 | ret.setResource(getResource)
69 | ret.setAccessType(getAccessType)
70 | ret.setUser(getUser)
71 | ret.setUserGroups(getUserGroups)
72 | ret.setAccessTime(getAccessTime)
73 | ret.setAction(getAction)
74 | ret.setClientIPAddress(getClientIPAddress)
75 | ret.setRemoteIPAddress(getRemoteIPAddress)
76 | ret.setForwardedAddresses(getForwardedAddresses)
77 | ret.setRequestData(getRequestData)
78 | ret.setClientType(getClientType)
79 | ret.setSessionId(getSessionId)
80 | ret.setContext(RangerAccessRequestUtil.copyContext(getContext))
81 | ret.accessType = accessType
82 | ret.setClusterName(getClusterName)
83 | ret
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q14b.sql:
--------------------------------------------------------------------------------
1 | WITH cross_items AS
2 | (SELECT i_item_sk ss_item_sk
3 | FROM item,
4 | (SELECT
5 | iss.i_brand_id brand_id,
6 | iss.i_class_id class_id,
7 | iss.i_category_id category_id
8 | FROM store_sales, item iss, date_dim d1
9 | WHERE ss_item_sk = iss.i_item_sk
10 | AND ss_sold_date_sk = d1.d_date_sk
11 | AND d1.d_year BETWEEN 1999 AND 1999 + 2
12 | INTERSECT
13 | SELECT
14 | ics.i_brand_id,
15 | ics.i_class_id,
16 | ics.i_category_id
17 | FROM catalog_sales, item ics, date_dim d2
18 | WHERE cs_item_sk = ics.i_item_sk
19 | AND cs_sold_date_sk = d2.d_date_sk
20 | AND d2.d_year BETWEEN 1999 AND 1999 + 2
21 | INTERSECT
22 | SELECT
23 | iws.i_brand_id,
24 | iws.i_class_id,
25 | iws.i_category_id
26 | FROM web_sales, item iws, date_dim d3
27 | WHERE ws_item_sk = iws.i_item_sk
28 | AND ws_sold_date_sk = d3.d_date_sk
29 | AND d3.d_year BETWEEN 1999 AND 1999 + 2) x
30 | WHERE i_brand_id = brand_id
31 | AND i_class_id = class_id
32 | AND i_category_id = category_id
33 | ),
34 | avg_sales AS
35 | (SELECT avg(quantity * list_price) average_sales
36 | FROM (SELECT
37 | ss_quantity quantity,
38 | ss_list_price list_price
39 | FROM store_sales, date_dim
40 | WHERE ss_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2
41 | UNION ALL
42 | SELECT
43 | cs_quantity quantity,
44 | cs_list_price list_price
45 | FROM catalog_sales, date_dim
46 | WHERE cs_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2
47 | UNION ALL
48 | SELECT
49 | ws_quantity quantity,
50 | ws_list_price list_price
51 | FROM web_sales, date_dim
52 | WHERE ws_sold_date_sk = d_date_sk AND d_year BETWEEN 1999 AND 1999 + 2) x)
53 | SELECT *
54 | FROM
55 | (SELECT
56 | 'store' channel,
57 | i_brand_id,
58 | i_class_id,
59 | i_category_id,
60 | sum(ss_quantity * ss_list_price) sales,
61 | count(*) number_sales
62 | FROM store_sales, item, date_dim
63 | WHERE ss_item_sk IN (SELECT ss_item_sk
64 | FROM cross_items)
65 | AND ss_item_sk = i_item_sk
66 | AND ss_sold_date_sk = d_date_sk
67 | AND d_week_seq = (SELECT d_week_seq
68 | FROM date_dim
69 | WHERE d_year = 1999 + 1 AND d_moy = 12 AND d_dom = 11)
70 | GROUP BY i_brand_id, i_class_id, i_category_id
71 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
72 | FROM avg_sales)) this_year,
73 | (SELECT
74 | 'store' channel,
75 | i_brand_id,
76 | i_class_id,
77 | i_category_id,
78 | sum(ss_quantity * ss_list_price) sales,
79 | count(*) number_sales
80 | FROM store_sales, item, date_dim
81 | WHERE ss_item_sk IN (SELECT ss_item_sk
82 | FROM cross_items)
83 | AND ss_item_sk = i_item_sk
84 | AND ss_sold_date_sk = d_date_sk
85 | AND d_week_seq = (SELECT d_week_seq
86 | FROM date_dim
87 | WHERE d_year = 1999 AND d_moy = 12 AND d_dom = 11)
88 | GROUP BY i_brand_id, i_class_id, i_category_id
89 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
90 | FROM avg_sales)) last_year
91 | WHERE this_year.i_brand_id = last_year.i_brand_id
92 | AND this_year.i_class_id = last_year.i_class_id
93 | AND this_year.i_category_id = last_year.i_category_id
94 | ORDER BY this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id
95 | LIMIT 100
96 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/RangerSparkResource.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import org.apache.ranger.authorization.spark.authorizer.SparkObjectType.SparkObjectType
21 | import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl
22 |
23 | class RangerSparkResource(
24 | objectType: SparkObjectType,
25 | databaseOrUrl: Option[String],
26 | tableOrUdf: String,
27 | column: String) extends RangerAccessResourceImpl {
28 | import SparkObjectType._
29 | import RangerSparkResource._
30 |
31 | def this(objectType: SparkObjectType, databaseOrUrl: Option[String], tableOrUdf: String) = {
32 | this(objectType, databaseOrUrl, tableOrUdf, null)
33 | }
34 |
35 | def this(objectType: SparkObjectType, databaseOrUrl: Option[String]) = {
36 | this(objectType, databaseOrUrl, null)
37 | }
38 |
39 | objectType match {
40 | case DATABASE => setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*"))
41 | case FUNCTION =>
42 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse(""))
43 | setValue(KEY_UDF, tableOrUdf)
44 | case COLUMN =>
45 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*"))
46 | setValue(KEY_TABLE, tableOrUdf)
47 | setValue(KEY_COLUMN, column)
48 | case TABLE | VIEW =>
49 | setValue(KEY_DATABASE, databaseOrUrl.getOrElse("*"))
50 | setValue(KEY_TABLE, tableOrUdf)
51 | case URI => setValue(KEY_URL, databaseOrUrl.getOrElse("*"))
52 | case _ =>
53 | }
54 |
55 | def getObjectType: SparkObjectType = objectType
56 |
57 | def getDatabase: String = getValue(KEY_DATABASE).asInstanceOf[String]
58 |
59 | def getTable: String = getValue(KEY_TABLE).asInstanceOf[String]
60 |
61 | def getUdf: String = getValue(KEY_UDF).asInstanceOf[String]
62 |
63 | def getColumn: String = getValue(KEY_COLUMN).asInstanceOf[String]
64 |
65 | def getUrl: String = getValue(KEY_URL).asInstanceOf[String]
66 |
67 | }
68 |
69 | object RangerSparkResource {
70 |
71 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String], tableOrUdf: String,
72 | column: String): RangerSparkResource = {
73 | new RangerSparkResource(objectType, databaseOrUrl, tableOrUdf, column)
74 | }
75 |
76 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String],
77 | tableOrUdf: String): RangerSparkResource = {
78 | new RangerSparkResource(objectType, databaseOrUrl, tableOrUdf)
79 | }
80 |
81 | def apply(objectType: SparkObjectType, databaseOrUrl: Option[String]): RangerSparkResource = {
82 | new RangerSparkResource(objectType, databaseOrUrl)
83 | }
84 |
85 | private val KEY_DATABASE = "database"
86 | private val KEY_TABLE = "table"
87 | private val KEY_UDF = "udf"
88 | private val KEY_COLUMN = "column"
89 | private val KEY_URL = "url"
90 | }
91 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q14a.sql:
--------------------------------------------------------------------------------
1 | WITH cross_items AS
2 | (SELECT i_item_sk ss_item_sk
3 | FROM item,
4 | (SELECT
5 | iss.i_brand_id brand_id,
6 | iss.i_class_id class_id,
7 | iss.i_category_id category_id
8 | FROM store_sales, item iss, date_dim d1
9 | WHERE ss_item_sk = iss.i_item_sk
10 | AND ss_sold_date_sk = d1.d_date_sk
11 | AND d1.d_year BETWEEN 1999 AND 1999 + 2
12 | INTERSECT
13 | SELECT
14 | ics.i_brand_id,
15 | ics.i_class_id,
16 | ics.i_category_id
17 | FROM catalog_sales, item ics, date_dim d2
18 | WHERE cs_item_sk = ics.i_item_sk
19 | AND cs_sold_date_sk = d2.d_date_sk
20 | AND d2.d_year BETWEEN 1999 AND 1999 + 2
21 | INTERSECT
22 | SELECT
23 | iws.i_brand_id,
24 | iws.i_class_id,
25 | iws.i_category_id
26 | FROM web_sales, item iws, date_dim d3
27 | WHERE ws_item_sk = iws.i_item_sk
28 | AND ws_sold_date_sk = d3.d_date_sk
29 | AND d3.d_year BETWEEN 1999 AND 1999 + 2) x
30 | WHERE i_brand_id = brand_id
31 | AND i_class_id = class_id
32 | AND i_category_id = category_id
33 | ),
34 | avg_sales AS
35 | (SELECT avg(quantity * list_price) average_sales
36 | FROM (
37 | SELECT
38 | ss_quantity quantity,
39 | ss_list_price list_price
40 | FROM store_sales, date_dim
41 | WHERE ss_sold_date_sk = d_date_sk
42 | AND d_year BETWEEN 1999 AND 2001
43 | UNION ALL
44 | SELECT
45 | cs_quantity quantity,
46 | cs_list_price list_price
47 | FROM catalog_sales, date_dim
48 | WHERE cs_sold_date_sk = d_date_sk
49 | AND d_year BETWEEN 1999 AND 1999 + 2
50 | UNION ALL
51 | SELECT
52 | ws_quantity quantity,
53 | ws_list_price list_price
54 | FROM web_sales, date_dim
55 | WHERE ws_sold_date_sk = d_date_sk
56 | AND d_year BETWEEN 1999 AND 1999 + 2) x)
57 | SELECT
58 | channel,
59 | i_brand_id,
60 | i_class_id,
61 | i_category_id,
62 | sum(sales),
63 | sum(number_sales)
64 | FROM (
65 | SELECT
66 | 'store' channel,
67 | i_brand_id,
68 | i_class_id,
69 | i_category_id,
70 | sum(ss_quantity * ss_list_price) sales,
71 | count(*) number_sales
72 | FROM store_sales, item, date_dim
73 | WHERE ss_item_sk IN (SELECT ss_item_sk
74 | FROM cross_items)
75 | AND ss_item_sk = i_item_sk
76 | AND ss_sold_date_sk = d_date_sk
77 | AND d_year = 1999 + 2
78 | AND d_moy = 11
79 | GROUP BY i_brand_id, i_class_id, i_category_id
80 | HAVING sum(ss_quantity * ss_list_price) > (SELECT average_sales
81 | FROM avg_sales)
82 | UNION ALL
83 | SELECT
84 | 'catalog' channel,
85 | i_brand_id,
86 | i_class_id,
87 | i_category_id,
88 | sum(cs_quantity * cs_list_price) sales,
89 | count(*) number_sales
90 | FROM catalog_sales, item, date_dim
91 | WHERE cs_item_sk IN (SELECT ss_item_sk
92 | FROM cross_items)
93 | AND cs_item_sk = i_item_sk
94 | AND cs_sold_date_sk = d_date_sk
95 | AND d_year = 1999 + 2
96 | AND d_moy = 11
97 | GROUP BY i_brand_id, i_class_id, i_category_id
98 | HAVING sum(cs_quantity * cs_list_price) > (SELECT average_sales FROM avg_sales)
99 | UNION ALL
100 | SELECT
101 | 'web' channel,
102 | i_brand_id,
103 | i_class_id,
104 | i_category_id,
105 | sum(ws_quantity * ws_list_price) sales,
106 | count(*) number_sales
107 | FROM web_sales, item, date_dim
108 | WHERE ws_item_sk IN (SELECT ss_item_sk
109 | FROM cross_items)
110 | AND ws_item_sk = i_item_sk
111 | AND ws_sold_date_sk = d_date_sk
112 | AND d_year = 1999 + 2
113 | AND d_moy = 11
114 | GROUP BY i_brand_id, i_class_id, i_category_id
115 | HAVING sum(ws_quantity * ws_list_price) > (SELECT average_sales
116 | FROM avg_sales)
117 | ) y
118 | GROUP BY ROLLUP (channel, i_brand_id, i_class_id, i_category_id)
119 | ORDER BY channel, i_brand_id, i_class_id, i_category_id
120 | LIMIT 100
121 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Notice:
2 |
3 | This library has been contribute to https://github.com/apache/submarine as a sub-module,
4 | and that module can still be used individually.
5 |
6 | The project here will no longer be updated.
7 |
8 | If you have any questions please go to
9 |
10 | https://github.com/apache/submarine/tree/master/docs/submarine-security/spark/README.md
11 |
12 | to learn how to use and give feedback to the apache submarine community by following
13 | https://submarine.apache.org/community/contributors.html
14 |
15 |
16 | # Spark SQL Ranger Security Plugin [](https://www.apache.org/licenses/LICENSE-2.0.html) [](https://github.com/yaooqinn/spark-ranger) [](https://codecov.io/gh/yaooqinn/spark-ranger) [](https://travis-ci.com/yaooqinn/spark-ranger) [](http://hits.dwyl.io/yaooqinn/spark-ranger)
17 |
18 | ACL Management for Apache Spark SQL with Apache Ranger, enabling:
19 |
20 | - Table/Column level authorization
21 | - Row level filtering
22 | - Data masking
23 |
24 | ## Build
25 | Spark SQL Ranger Security Plugin is built based on [Apache Maven](http://maven.apache.org),
26 |
27 | ```bash
28 | mvn clean package -Pspark-2.3 -Pranger-1.0 -DskipTests
29 | ```
30 |
31 | Currently, available profiles are:
32 |
33 | Spark: -Pspark-2.3, -Pspark-2.4
34 |
35 | Ranger: -Pranger-1.0, -Pranger-1.1, -Pranger-1.2 -Pranger-2.0
36 |
37 | ## Usage
38 |
39 | ### Installation
40 |
41 | Place the spark-ranger-<version>.jar into $SPARK_HOME/jars.
42 |
43 | ### Installation Addons
44 |
45 | You can find some tips and known problems about this library [here](docs/installation-addons.md).
46 |
47 | ### Configurations
48 |
49 | #### Ranger admin client configurations
50 |
51 | Create ranger-spark-security.xml in $SPARK_HOME/conf and add the following configurations for pointing to the right ranger admin server
52 |
53 | ```xml
54 |
55 |
56 |
57 |
58 | ranger.plugin.spark.policy.rest.url
59 | ranger admin address like http://ranger-admin.org:6080
60 |
61 |
62 |
63 | ranger.plugin.spark.service.name
64 | a ranger hive service name
65 |
66 |
67 |
68 | ranger.plugin.spark.policy.cache.dir
69 | ./a ranger hive service name/policycache
70 |
71 |
72 |
73 | ranger.plugin.spark.policy.pollIntervalMs
74 | 5000
75 |
76 |
77 |
78 | ranger.plugin.spark.policy.source.impl
79 | org.apache.ranger.admin.client.RangerAdminRESTClient
80 |
81 |
82 |
83 | ```
84 |
85 | Create ranger-spark-audit.xml in $SPARK_HOME/conf and add the following configurations to enable/disable auditing.
86 |
87 | ```xml
88 |
89 |
90 |
91 | xasecure.audit.is.enabled
92 | true
93 |
94 |
95 |
96 | xasecure.audit.destination.db
97 | false
98 |
99 |
100 |
101 | xasecure.audit.destination.db.jdbc.driver
102 | com.mysql.jdbc.Driver
103 |
104 |
105 |
106 | xasecure.audit.destination.db.jdbc.url
107 | jdbc:mysql://10.171.161.78/ranger
108 |
109 |
110 |
111 | xasecure.audit.destination.db.password
112 | rangeradmin
113 |
114 |
115 |
116 | xasecure.audit.destination.db.user
117 | rangeradmin
118 |
119 |
120 |
121 |
122 | ```
123 |
124 | #### Enable plugin via spark extensions
125 |
126 | spark.sql.extensions=org.apache.ranger.authorization.spark.authorizer.RangerSparkSQLExtension
127 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q5.sql:
--------------------------------------------------------------------------------
1 | WITH ssr AS
2 | ( SELECT
3 | s_store_id,
4 | sum(sales_price) AS sales,
5 | sum(profit) AS profit,
6 | sum(return_amt) AS RETURNS,
7 | sum(net_loss) AS profit_loss
8 | FROM
9 | (SELECT
10 | ss_store_sk AS store_sk,
11 | ss_sold_date_sk AS date_sk,
12 | ss_ext_sales_price AS sales_price,
13 | ss_net_profit AS profit,
14 | cast(0 AS DECIMAL(7, 2)) AS return_amt,
15 | cast(0 AS DECIMAL(7, 2)) AS net_loss
16 | FROM store_sales
17 | UNION ALL
18 | SELECT
19 | sr_store_sk AS store_sk,
20 | sr_returned_date_sk AS date_sk,
21 | cast(0 AS DECIMAL(7, 2)) AS sales_price,
22 | cast(0 AS DECIMAL(7, 2)) AS profit,
23 | sr_return_amt AS return_amt,
24 | sr_net_loss AS net_loss
25 | FROM store_returns)
26 | salesreturns, date_dim, store
27 | WHERE date_sk = d_date_sk
28 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
29 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days))
30 | AND store_sk = s_store_sk
31 | GROUP BY s_store_id),
32 | csr AS
33 | ( SELECT
34 | cp_catalog_page_id,
35 | sum(sales_price) AS sales,
36 | sum(profit) AS profit,
37 | sum(return_amt) AS RETURNS,
38 | sum(net_loss) AS profit_loss
39 | FROM
40 | (SELECT
41 | cs_catalog_page_sk AS page_sk,
42 | cs_sold_date_sk AS date_sk,
43 | cs_ext_sales_price AS sales_price,
44 | cs_net_profit AS profit,
45 | cast(0 AS DECIMAL(7, 2)) AS return_amt,
46 | cast(0 AS DECIMAL(7, 2)) AS net_loss
47 | FROM catalog_sales
48 | UNION ALL
49 | SELECT
50 | cr_catalog_page_sk AS page_sk,
51 | cr_returned_date_sk AS date_sk,
52 | cast(0 AS DECIMAL(7, 2)) AS sales_price,
53 | cast(0 AS DECIMAL(7, 2)) AS profit,
54 | cr_return_amount AS return_amt,
55 | cr_net_loss AS net_loss
56 | FROM catalog_returns
57 | ) salesreturns, date_dim, catalog_page
58 | WHERE date_sk = d_date_sk
59 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
60 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days))
61 | AND page_sk = cp_catalog_page_sk
62 | GROUP BY cp_catalog_page_id)
63 | ,
64 | wsr AS
65 | ( SELECT
66 | web_site_id,
67 | sum(sales_price) AS sales,
68 | sum(profit) AS profit,
69 | sum(return_amt) AS RETURNS,
70 | sum(net_loss) AS profit_loss
71 | FROM
72 | (SELECT
73 | ws_web_site_sk AS wsr_web_site_sk,
74 | ws_sold_date_sk AS date_sk,
75 | ws_ext_sales_price AS sales_price,
76 | ws_net_profit AS profit,
77 | cast(0 AS DECIMAL(7, 2)) AS return_amt,
78 | cast(0 AS DECIMAL(7, 2)) AS net_loss
79 | FROM web_sales
80 | UNION ALL
81 | SELECT
82 | ws_web_site_sk AS wsr_web_site_sk,
83 | wr_returned_date_sk AS date_sk,
84 | cast(0 AS DECIMAL(7, 2)) AS sales_price,
85 | cast(0 AS DECIMAL(7, 2)) AS profit,
86 | wr_return_amt AS return_amt,
87 | wr_net_loss AS net_loss
88 | FROM web_returns
89 | LEFT OUTER JOIN web_sales ON
90 | (wr_item_sk = ws_item_sk
91 | AND wr_order_number = ws_order_number)
92 | ) salesreturns, date_dim, web_site
93 | WHERE date_sk = d_date_sk
94 | AND d_date BETWEEN cast('2000-08-23' AS DATE)
95 | AND ((cast('2000-08-23' AS DATE) + INTERVAL 14 days))
96 | AND wsr_web_site_sk = web_site_sk
97 | GROUP BY web_site_id)
98 | SELECT
99 | channel,
100 | id,
101 | sum(sales) AS sales,
102 | sum(returns) AS returns,
103 | sum(profit) AS profit
104 | FROM
105 | (SELECT
106 | 'store channel' AS channel,
107 | concat('store', s_store_id) AS id,
108 | sales,
109 | returns,
110 | (profit - profit_loss) AS profit
111 | FROM ssr
112 | UNION ALL
113 | SELECT
114 | 'catalog channel' AS channel,
115 | concat('catalog_page', cp_catalog_page_id) AS id,
116 | sales,
117 | returns,
118 | (profit - profit_loss) AS profit
119 | FROM csr
120 | UNION ALL
121 | SELECT
122 | 'web channel' AS channel,
123 | concat('web_site', web_site_id) AS id,
124 | sales,
125 | returns,
126 | (profit - profit_loss) AS profit
127 | FROM wsr
128 | ) x
129 | GROUP BY ROLLUP (channel, id)
130 | ORDER BY channel, id
131 | LIMIT 100
132 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q49.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | 'web' AS channel,
3 | web.item,
4 | web.return_ratio,
5 | web.return_rank,
6 | web.currency_rank
7 | FROM (
8 | SELECT
9 | item,
10 | return_ratio,
11 | currency_ratio,
12 | rank()
13 | OVER (
14 | ORDER BY return_ratio) AS return_rank,
15 | rank()
16 | OVER (
17 | ORDER BY currency_ratio) AS currency_rank
18 | FROM
19 | (SELECT
20 | ws.ws_item_sk AS item,
21 | (cast(sum(coalesce(wr.wr_return_quantity, 0)) AS DECIMAL(15, 4)) /
22 | cast(sum(coalesce(ws.ws_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio,
23 | (cast(sum(coalesce(wr.wr_return_amt, 0)) AS DECIMAL(15, 4)) /
24 | cast(sum(coalesce(ws.ws_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio
25 | FROM
26 | web_sales ws LEFT OUTER JOIN web_returns wr
27 | ON (ws.ws_order_number = wr.wr_order_number AND
28 | ws.ws_item_sk = wr.wr_item_sk)
29 | , date_dim
30 | WHERE
31 | wr.wr_return_amt > 10000
32 | AND ws.ws_net_profit > 1
33 | AND ws.ws_net_paid > 0
34 | AND ws.ws_quantity > 0
35 | AND ws_sold_date_sk = d_date_sk
36 | AND d_year = 2001
37 | AND d_moy = 12
38 | GROUP BY ws.ws_item_sk
39 | ) in_web
40 | ) web
41 | WHERE (web.return_rank <= 10 OR web.currency_rank <= 10)
42 | UNION
43 | SELECT
44 | 'catalog' AS channel,
45 | catalog.item,
46 | catalog.return_ratio,
47 | catalog.return_rank,
48 | catalog.currency_rank
49 | FROM (
50 | SELECT
51 | item,
52 | return_ratio,
53 | currency_ratio,
54 | rank()
55 | OVER (
56 | ORDER BY return_ratio) AS return_rank,
57 | rank()
58 | OVER (
59 | ORDER BY currency_ratio) AS currency_rank
60 | FROM
61 | (SELECT
62 | cs.cs_item_sk AS item,
63 | (cast(sum(coalesce(cr.cr_return_quantity, 0)) AS DECIMAL(15, 4)) /
64 | cast(sum(coalesce(cs.cs_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio,
65 | (cast(sum(coalesce(cr.cr_return_amount, 0)) AS DECIMAL(15, 4)) /
66 | cast(sum(coalesce(cs.cs_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio
67 | FROM
68 | catalog_sales cs LEFT OUTER JOIN catalog_returns cr
69 | ON (cs.cs_order_number = cr.cr_order_number AND
70 | cs.cs_item_sk = cr.cr_item_sk)
71 | , date_dim
72 | WHERE
73 | cr.cr_return_amount > 10000
74 | AND cs.cs_net_profit > 1
75 | AND cs.cs_net_paid > 0
76 | AND cs.cs_quantity > 0
77 | AND cs_sold_date_sk = d_date_sk
78 | AND d_year = 2001
79 | AND d_moy = 12
80 | GROUP BY cs.cs_item_sk
81 | ) in_cat
82 | ) catalog
83 | WHERE (catalog.return_rank <= 10 OR catalog.currency_rank <= 10)
84 | UNION
85 | SELECT
86 | 'store' AS channel,
87 | store.item,
88 | store.return_ratio,
89 | store.return_rank,
90 | store.currency_rank
91 | FROM (
92 | SELECT
93 | item,
94 | return_ratio,
95 | currency_ratio,
96 | rank()
97 | OVER (
98 | ORDER BY return_ratio) AS return_rank,
99 | rank()
100 | OVER (
101 | ORDER BY currency_ratio) AS currency_rank
102 | FROM
103 | (SELECT
104 | sts.ss_item_sk AS item,
105 | (cast(sum(coalesce(sr.sr_return_quantity, 0)) AS DECIMAL(15, 4)) /
106 | cast(sum(coalesce(sts.ss_quantity, 0)) AS DECIMAL(15, 4))) AS return_ratio,
107 | (cast(sum(coalesce(sr.sr_return_amt, 0)) AS DECIMAL(15, 4)) /
108 | cast(sum(coalesce(sts.ss_net_paid, 0)) AS DECIMAL(15, 4))) AS currency_ratio
109 | FROM
110 | store_sales sts LEFT OUTER JOIN store_returns sr
111 | ON (sts.ss_ticket_number = sr.sr_ticket_number AND sts.ss_item_sk = sr.sr_item_sk)
112 | , date_dim
113 | WHERE
114 | sr.sr_return_amt > 10000
115 | AND sts.ss_net_profit > 1
116 | AND sts.ss_net_paid > 0
117 | AND sts.ss_quantity > 0
118 | AND ss_sold_date_sk = d_date_sk
119 | AND d_year = 2001
120 | AND d_moy = 12
121 | GROUP BY sts.ss_item_sk
122 | ) in_store
123 | ) store
124 | WHERE (store.return_rank <= 10 OR store.currency_rank <= 10)
125 | ORDER BY 1, 4, 5
126 | LIMIT 100
127 |
--------------------------------------------------------------------------------
/src/test/resources/tpcds/q4.sql:
--------------------------------------------------------------------------------
1 | WITH year_total AS (
2 | SELECT
3 | c_customer_id customer_id,
4 | c_first_name customer_first_name,
5 | c_last_name customer_last_name,
6 | c_preferred_cust_flag customer_preferred_cust_flag,
7 | c_birth_country customer_birth_country,
8 | c_login customer_login,
9 | c_email_address customer_email_address,
10 | d_year dyear,
11 | sum(((ss_ext_list_price - ss_ext_wholesale_cost - ss_ext_discount_amt) +
12 | ss_ext_sales_price) / 2) year_total,
13 | 's' sale_type
14 | FROM customer, store_sales, date_dim
15 | WHERE c_customer_sk = ss_customer_sk AND ss_sold_date_sk = d_date_sk
16 | GROUP BY c_customer_id,
17 | c_first_name,
18 | c_last_name,
19 | c_preferred_cust_flag,
20 | c_birth_country,
21 | c_login,
22 | c_email_address,
23 | d_year
24 | UNION ALL
25 | SELECT
26 | c_customer_id customer_id,
27 | c_first_name customer_first_name,
28 | c_last_name customer_last_name,
29 | c_preferred_cust_flag customer_preferred_cust_flag,
30 | c_birth_country customer_birth_country,
31 | c_login customer_login,
32 | c_email_address customer_email_address,
33 | d_year dyear,
34 | sum((((cs_ext_list_price - cs_ext_wholesale_cost - cs_ext_discount_amt) +
35 | cs_ext_sales_price) / 2)) year_total,
36 | 'c' sale_type
37 | FROM customer, catalog_sales, date_dim
38 | WHERE c_customer_sk = cs_bill_customer_sk AND cs_sold_date_sk = d_date_sk
39 | GROUP BY c_customer_id,
40 | c_first_name,
41 | c_last_name,
42 | c_preferred_cust_flag,
43 | c_birth_country,
44 | c_login,
45 | c_email_address,
46 | d_year
47 | UNION ALL
48 | SELECT
49 | c_customer_id customer_id,
50 | c_first_name customer_first_name,
51 | c_last_name customer_last_name,
52 | c_preferred_cust_flag customer_preferred_cust_flag,
53 | c_birth_country customer_birth_country,
54 | c_login customer_login,
55 | c_email_address customer_email_address,
56 | d_year dyear,
57 | sum((((ws_ext_list_price - ws_ext_wholesale_cost - ws_ext_discount_amt) + ws_ext_sales_price) /
58 | 2)) year_total,
59 | 'w' sale_type
60 | FROM customer, web_sales, date_dim
61 | WHERE c_customer_sk = ws_bill_customer_sk AND ws_sold_date_sk = d_date_sk
62 | GROUP BY c_customer_id,
63 | c_first_name,
64 | c_last_name,
65 | c_preferred_cust_flag,
66 | c_birth_country,
67 | c_login,
68 | c_email_address,
69 | d_year)
70 | SELECT
71 | t_s_secyear.customer_id,
72 | t_s_secyear.customer_first_name,
73 | t_s_secyear.customer_last_name,
74 | t_s_secyear.customer_preferred_cust_flag,
75 | t_s_secyear.customer_birth_country,
76 | t_s_secyear.customer_login,
77 | t_s_secyear.customer_email_address
78 | FROM year_total t_s_firstyear, year_total t_s_secyear, year_total t_c_firstyear,
79 | year_total t_c_secyear, year_total t_w_firstyear, year_total t_w_secyear
80 | WHERE t_s_secyear.customer_id = t_s_firstyear.customer_id
81 | AND t_s_firstyear.customer_id = t_c_secyear.customer_id
82 | AND t_s_firstyear.customer_id = t_c_firstyear.customer_id
83 | AND t_s_firstyear.customer_id = t_w_firstyear.customer_id
84 | AND t_s_firstyear.customer_id = t_w_secyear.customer_id
85 | AND t_s_firstyear.sale_type = 's'
86 | AND t_c_firstyear.sale_type = 'c'
87 | AND t_w_firstyear.sale_type = 'w'
88 | AND t_s_secyear.sale_type = 's'
89 | AND t_c_secyear.sale_type = 'c'
90 | AND t_w_secyear.sale_type = 'w'
91 | AND t_s_firstyear.dyear = 2001
92 | AND t_s_secyear.dyear = 2001 + 1
93 | AND t_c_firstyear.dyear = 2001
94 | AND t_c_secyear.dyear = 2001 + 1
95 | AND t_w_firstyear.dyear = 2001
96 | AND t_w_secyear.dyear = 2001 + 1
97 | AND t_s_firstyear.year_total > 0
98 | AND t_c_firstyear.year_total > 0
99 | AND t_w_firstyear.year_total > 0
100 | AND CASE WHEN t_c_firstyear.year_total > 0
101 | THEN t_c_secyear.year_total / t_c_firstyear.year_total
102 | ELSE NULL END
103 | > CASE WHEN t_s_firstyear.year_total > 0
104 | THEN t_s_secyear.year_total / t_s_firstyear.year_total
105 | ELSE NULL END
106 | AND CASE WHEN t_c_firstyear.year_total > 0
107 | THEN t_c_secyear.year_total / t_c_firstyear.year_total
108 | ELSE NULL END
109 | > CASE WHEN t_w_firstyear.year_total > 0
110 | THEN t_w_secyear.year_total / t_w_firstyear.year_total
111 | ELSE NULL END
112 | ORDER BY
113 | t_s_secyear.customer_id,
114 | t_s_secyear.customer_first_name,
115 | t_s_secyear.customer_last_name,
116 | t_s_secyear.customer_preferred_cust_flag,
117 | t_s_secyear.customer_birth_country,
118 | t_s_secyear.customer_login,
119 | t_s_secyear.customer_email_address
120 | LIMIT 100
121 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/ranger/authorization/spark/authorizer/SparkPrivilegeObject.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.ranger.authorization.spark.authorizer
19 |
20 | import org.apache.ranger.authorization.spark.authorizer.SparkPrivilegeObjectType.SparkPrivilegeObjectType
21 |
22 | import scala.collection.JavaConverters._
23 | import org.apache.ranger.authorization.spark.authorizer.SparkPrivObjectActionType.SparkPrivObjectActionType
24 |
25 | class SparkPrivilegeObject(
26 | private val typ: SparkPrivilegeObjectType,
27 | private val dbname: String,
28 | private val objectName: String,
29 | private val partKeys: Seq[String],
30 | private val columns: Seq[String],
31 | private val actionType: SparkPrivObjectActionType)
32 | extends Ordered[SparkPrivilegeObject] {
33 |
34 | override def compare(that: SparkPrivilegeObject): Int = {
35 | typ compareTo that.typ match {
36 | case 0 =>
37 | compare(dbname, that.dbname) match {
38 | case 0 =>
39 | compare(objectName, that.objectName) match {
40 | case 0 =>
41 | compare(partKeys, that.partKeys) match {
42 | case 0 => compare(columns, that.columns)
43 | case o => o
44 | }
45 | case o => o
46 | }
47 | case o => o
48 | }
49 | case o => o
50 | }
51 | }
52 |
53 | private def compare(o1: String, o2: String): Int = {
54 | if (o1 != null) {
55 | if (o2 != null) o1.compareTo(o2) else 1
56 | } else {
57 | if (o2 != null) -1 else 0
58 | }
59 | }
60 |
61 | private def compare(o1: Seq[String], o2: Seq[String]): Int = {
62 | if (o1 != null) {
63 | if (o2 != null) {
64 | for ((x, y) <- o1.zip(o2)) {
65 | val ret = compare(x, y)
66 | if (ret != 0) {
67 | return ret
68 | }
69 | }
70 | if (o1.size > o2.size) {
71 | 1
72 | } else if (o1.size < o2.size) {
73 | -1
74 | } else {
75 | 0
76 | }
77 | } else {
78 | 1
79 | }
80 | } else {
81 | if (o2 != null) {
82 | -1
83 | } else {
84 | 0
85 | }
86 | }
87 | }
88 |
89 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String,
90 | partKeys: Seq[String], columns: Seq[String]) =
91 | this(typ, dbname, objectName, partKeys, columns, SparkPrivObjectActionType.OTHER)
92 |
93 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String,
94 | actionType: SparkPrivObjectActionType) =
95 | this(typ, dbname, objectName, Nil, Nil, actionType)
96 |
97 | def this(typ: SparkPrivilegeObjectType, dbname: String, objectName: String) =
98 | this(typ, dbname, objectName, SparkPrivObjectActionType.OTHER)
99 |
100 | def getType: SparkPrivilegeObjectType = typ
101 |
102 | def getDbname: String = dbname
103 |
104 | def getObjectName: String = objectName
105 |
106 | def getActionType: SparkPrivObjectActionType = actionType
107 |
108 | def getPartKeys: Seq[String] = partKeys
109 |
110 | def getColumns: Seq[String] = columns
111 |
112 | override def toString: String = {
113 | val name = typ match {
114 | case SparkPrivilegeObjectType.DATABASE => dbname
115 | case SparkPrivilegeObjectType.TABLE_OR_VIEW =>
116 | getDbObjectName + (if (partKeys != null) partKeys.asJava.toString else "")
117 | case SparkPrivilegeObjectType.FUNCTION => getDbObjectName
118 | case _ => ""
119 | }
120 |
121 | val at = if (actionType != null) {
122 | actionType match {
123 | case SparkPrivObjectActionType.INSERT |
124 | SparkPrivObjectActionType.INSERT_OVERWRITE => ", action=" + actionType
125 | case _ => ""
126 | }
127 | } else {
128 | ""
129 | }
130 | "Object [type=" + typ + ", name=" + name + at + "]"
131 | }
132 |
133 | private def getDbObjectName: String = {
134 | (if (dbname == null) "" else dbname + ".") + objectName
135 | }
136 | }
137 |
--------------------------------------------------------------------------------