├── DataFrame Solutions
├── 1158. Market Analysis I (Medium).txt
├── 1212. Team Scores in Football Tournament (Medium).txt
├── 1355. Activity Participants (Medium).txt
├── 1445. Apples & Oranges (Medium).txt
├── 1596. The Most Frequently Ordered Products for Each Customer (Medium).txt
├── 175. Combine Two Tables (Easy).txt
├── 181. Employees Earning More Than Their Managers (Easy).txt
├── 182. Duplicate Emails (Easy).txt
├── 183. Customers Who Never Order (Easy).txt
├── 1907. Count Salary Categories (Medium).txt
├── 1934. Confirmation Rate (Medium).txt
├── 196. Delete Duplicate Emails (Easy).txt
├── 197. Rising Temperature (Easy).txt
├── 1988. Find Cutoff Score for Each School (Medium).txt
├── 2051. The Category of Each Member in the Store (Medium).txt
├── 511. Game Play Analysis I (Easy).txt
├── 512. Game Play Analysis II (Easy).txt
├── 577. Employee Bonus (Easy).txt
├── 584. Find Customer Referee (Easy).txt
├── 586. Customer Placing the Largest Number of Orders (Easy).txt
└── 595. Big Countries (Easy).txt
├── README.md
├── apache_spark.png
├── leetcode.png
├── plus.png
└── postgresql_dump_file
└── leetcodedb.sql
/DataFrame Solutions/1158. Market Analysis I (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM users_1158) AS users"
2 |
3 | val usersDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM orders_1158) AS orders"
6 |
7 | val ordersDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val query = "(SELECT * FROM items_1158) AS items"
10 |
11 | val itemsDF = spark.read.jdbc(url, query, connectionProperties)
12 |
13 | val orders2019DF = ordersDF.where(year($"order_date")===2019).groupBy($"buyer_id").agg(count($"order_id").as("orders_in_2019"))
14 |
15 | val resultDF = usersDF.as("u").join(orders2019DF.as("o"),$"u.user_id"===$"o.buyer_id","left_outer").select($"u.user_id",$"u.join_date",coalesce($"o.orders_in_2019",lit(0)))
16 |
17 | resultDF.show
18 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1212. Team Scores in Football Tournament (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM teams_1212) AS teams"
2 |
3 | val teamsDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM matches_1212) AS matches"
6 |
7 | val matchesDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val tiedMatchesDF = matchesDF.where($"guest_goals" === $"host_goals").select($"guest_team",$"host_team",$"guest_goals",$"host_goals")
10 |
11 | val unionDF = matchesDF.select($"host_team",$"guest_team",$"host_goals",$"guest_goals").union(tiedMatchesDF)
12 |
13 | val reportDF = unionDF.withColumn("winner",when($"host_goals">$"guest_goals",$"host_team").when($"host_goals"<$"guest_goals",$"host_team").otherwise($"host_team")).withColumn("points",when($"host_goals"===$"guest_goals",lit(1)).otherwise(lit(3))).select($"winner",$"points").groupBy($"winner").agg(sum($"points").as("num_points"))
14 |
15 | val resultDF = teamsDF.as("t").join(reportDF.as("r"),$"t.team_id"===$"r.winner","left_outer").select($"t.team_id",$"t.team_name",coalesce($"r.num_points",lit(0))).orderBy($"r.num_points".desc,$"t.team_id")
16 |
17 | resultDF.show
18 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1355. Activity Participants (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM friends_1355) AS friends"
2 |
3 | val friendsDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val groupedDF = friendsDF.groupBy($"activity").agg(count($"id").as("cnt"))
6 |
7 | val resultDF = groupedDF.withColumn("min_count",min($"cnt").over()).withColumn("max_count",max($"cnt").over()).where($"cnt"=!=$"max_count" && $"cnt"=!=$"min_count").select($"activity")
8 |
9 | resultDF.show
10 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1445. Apples & Oranges (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM sales_1445) AS sales"
2 |
3 | val salesDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val applesDF = salesDF.where($"fruit"==="apples")
6 |
7 | val orangesDF = salesDF.where($"fruit"==="oranges")
8 |
9 | val resultDF = applesDF.as("a").join(orangesDF.as("o"),$"a.sale_date"===$"o.sale_date","full_outer").select($"a.sale_date",($"a.sold_num"-$"o.sold_num").as("diff"))
10 |
11 | resultDF.show
12 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1596. The Most Frequently Ordered Products for Each Customer (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM customers_1596) AS customers"
2 |
3 | val customersDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM orders_1596) AS orders"
6 |
7 | val ordersDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val query = "(SELECT * FROM products_1596) AS products"
10 |
11 | val productsDF = spark.read.jdbc(url, query, connectionProperties)
12 |
13 |
14 | val groupedDF = ordersDF.groupBy($"customer_id",$"product_id").agg(count(lit(1)).as("cnt"))
15 |
16 | import org.apache.spark.sql.expressions.Window
17 |
18 | val windowSpec = Window.partitionBy("customer_id").orderBy(desc("cnt"))
19 |
20 | val rankedDF = groupedDF.withColumn("rank",dense_rank().over(windowSpec)).where($"rank"===lit(1)).select($"customer_id",$"product_id")
21 |
22 | val resultDF = rankedDF.as("r").join(productsDF.as("p"),$"p.product_id"===$"r.product_id").selectExpr("r.*","product_name")
23 |
24 | resultDF.show
25 |
--------------------------------------------------------------------------------
/DataFrame Solutions/175. Combine Two Tables (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM person_175) AS person"
2 | val personDF = spark.read.jdbc(url, query, connectionProperties)
3 |
4 | val query = "(SELECT * FROM address_175) AS address"
5 | val addressDF = spark.read.jdbc(url, query, connectionProperties)
6 |
7 | val joinCondition = personDF.col("personid") === addressDF.col("personid")
8 | val joinedDF = personDF.join(addressDF,joinCondition,"inner")
9 |
10 | joinedDF.show
11 |
--------------------------------------------------------------------------------
/DataFrame Solutions/181. Employees Earning More Than Their Managers (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM employee_181) AS employee"
2 |
3 | val employeeDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val joinedDF = employeeDF.as("emp").join(employeeDF.as("mgr"),$"emp.manager_id"===$"mgr.id" && $"emp.salary" > $"mgr.salary","inner").select($"emp.name")
6 |
7 | joinedDF.show
8 |
--------------------------------------------------------------------------------
/DataFrame Solutions/182. Duplicate Emails (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM person_182) AS person"
2 |
3 | val personDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val duplicateDF = personDF.groupBy(col("email")).agg(count(col("id")).as("cnt")).where($"cnt">1).select(col("email"))
6 |
7 | duplicateDF.show
8 |
9 |
--------------------------------------------------------------------------------
/DataFrame Solutions/183. Customers Who Never Order (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM customers_183) AS customers"
2 |
3 | val customerDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM orders_183) AS orders"
6 |
7 | val orderDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | // Direct Method
10 |
11 | val joinedDF = customerDF.as("c").join(orderDF.as("o"),$"c.id"===$"o.customer_id","left_anti").select($"c.name")
12 |
13 | //Conventional Method
14 |
15 | val joinedDF = customerDF.as("c").join(orderDF.as("o"),$"c.id"===$"o.customer_id","left_outer").where($"o.id".isNull).select($"c.name")
16 |
17 | joinedDF.show
18 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1907. Count Salary Categories (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM accounts_1907) AS accounts"
2 |
3 | val accountsDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val categorizedDF = accountsDF.withColumn("category",when($"income"<20000,"Low Salary").when($"income">=20000 && $"income"<=50000,"Average Salary").otherwise("High Salary"))
6 |
7 | val groupedDF = categorizedDF.groupBy($"category").agg(count($"account_id").as("accounts_count"))
8 |
9 | val categoryDF = Seq("Low Salary","Average Salary","High Salary").toDF("category")
10 |
11 | val resultDF = categoryDF.as("c")
12 | .join(groupedDF.as("g"),$"c.category"===$"g.category","left_outer")
13 | .select($"c.category",coalesce($"g.accounts_count",lit(0)).as("accounts_count"))
14 |
15 | resultDF.show
16 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1934. Confirmation Rate (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM signups_1934) AS signups"
2 |
3 | val signupsDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM confirmations_1934) AS confirmations"
6 |
7 | val confirmationsDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | import org.apache.spark.sql.expressions.Window
10 |
11 | val w = Window.partitionBy("user_id")
12 |
13 | val crDF = confirmationsDF.withColumn("confirmation_count",count(when($"action"===lit("confirmed"),lit(1)).otherwise(null)).over(w)).withColumn("total_count",count($"action").over(w)).select($"user_id",round($"confirmation_count"/$"total_count",2).as("confirmation_rate"))
14 |
15 | val resultDF = signupsDF.as("s").join(crDF.as("c"),$"s.user_id"===$"c.user_id","left_outer").select($"s.user_id",coalesce($"confirmation_rate",lit(0))).distinct
16 |
--------------------------------------------------------------------------------
/DataFrame Solutions/196. Delete Duplicate Emails (Easy).txt:
--------------------------------------------------------------------------------
1 | import org.apache.spark.sql.expressions.Window
2 |
3 | val query = "(SELECT * FROM person_196) AS person"
4 |
5 | val personDF = spark.read.jdbc(url, query, connectionProperties)
6 |
7 | val w = Window.partitionBy("email")
8 |
9 | val distinctPersonDF = personDF.withColumn("min",min($"id").over(w)).where($"min"===$"id").drop($"min")
10 |
11 | distinctPersonDF.show
12 |
--------------------------------------------------------------------------------
/DataFrame Solutions/197. Rising Temperature (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM weather_197) AS weather"
2 |
3 | val weatherDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val resultDF = weatherDF.as("w1")
6 | .join(weatherDF.as("w2"),$"w2.record_date"+1===$"w1.record_date" && $"w2.temperature"<$"w1.temperature","inner")
7 | .select($"w1.id")
8 |
9 | resultDF.show
10 |
--------------------------------------------------------------------------------
/DataFrame Solutions/1988. Find Cutoff Score for Each School (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM school_1988) AS school"
2 |
3 | val schoolDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM exam_1988) AS exam"
6 |
7 | val examDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val resultDF = schoolDF.as("s").join(examDF.as("e"),$"s.capacity">=$"e.student_count","left_outer").groupBy($"s.school_id").agg(coalesce(min($"e.score"),lit(-1)).as("score"))
10 |
11 | resultDF.show
12 |
--------------------------------------------------------------------------------
/DataFrame Solutions/2051. The Category of Each Member in the Store (Medium).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM members_2051) AS members"
2 |
3 | val membersDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM visits_2051) AS visits"
6 |
7 | val visitsDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val query = "(SELECT * FROM purchases_2051) AS purchases"
10 |
11 | val purchasesDF = spark.read.jdbc(url, query, connectionProperties)
12 |
13 | import org.apache.spark.sql.expressions.Window
14 |
15 | val w = Window.partitionBy("member_id")
16 |
17 | val joinedDF = visitsDF.as("v").join(purchasesDF.as("p"),$"v.visit_id"===$"p.visit_id","left_outer").withColumn("purchase_count",count(when($"p.visit_id".isNotNull,lit(1))).over(w)).withColumn("total_count",count($"v.visit_id").over(w)).select($"member_id",($"purchase_count"*lit(100)/$"total_count").as("conversion")).distinct
18 |
19 | val conDF = joinedDF.withColumn("category",when($"conversion">=80,"Diamond").when($"conversion">=50 && $"conversion"<80,"Gold").when($"conversion"<50,"Silver"))
20 |
21 | val resultDF = membersDF.as("m").join(conDF.as("c"),$"m.member_id"===$"c.member_id","left_outer").select($"m.member_id",$"m.name",coalesce($"c.category",lit("Bronze")))
22 |
23 | resultDF.show
24 |
--------------------------------------------------------------------------------
/DataFrame Solutions/511. Game Play Analysis I (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM activity_511) AS activity"
2 |
3 | val activityDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val firstloginDF = activityDF
6 | .groupBy($"player_id")
7 | .agg(min($"event_date").as("first_login"))
8 |
9 |
10 | firstloginDF.show
11 |
--------------------------------------------------------------------------------
/DataFrame Solutions/512. Game Play Analysis II (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM activity_511) AS activity"
2 |
3 | val activityDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val firstloginDF = activityDF
6 | .groupBy($"player_id")
7 | .agg(min($"event_date").as("first_login"))
8 |
9 | val fisrtDeviceDF = activityDF.as("act")
10 | .join(firstloginDF.as("fl"),$"act.player_id"===$"fl.player_id" && $"act.event_date"===$"fl.first_login","inner")
11 | .select($"act.player_id",$"act.device_id")
12 |
13 | fisrtDeviceDF.show
14 |
--------------------------------------------------------------------------------
/DataFrame Solutions/577. Employee Bonus (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM employee_577) AS employee"
2 |
3 | val employeeDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val query = "(SELECT * FROM bonus_577) AS bonus"
6 |
7 | val bonusDF = spark.read.jdbc(url, query, connectionProperties)
8 |
9 | val resultDF = employeeDF.as("emp")
10 | .join(bonusDF.as("bn"),$"emp.empId"===$"bn.empId","left_outer")
11 | .where($"bn.bonus".isNull || $"bn.bonus"<1000)
12 | .select($"emp.name",$"bn.bonus")
13 | resultDF.show
14 |
--------------------------------------------------------------------------------
/DataFrame Solutions/584. Find Customer Referee (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM customer_584) AS customer"
2 |
3 | val customerDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val resultDF = customerDF.where($"reference_id".isNull || $"reference_id" =!= 2).select($"name")
6 |
7 | resultDF.show
8 |
--------------------------------------------------------------------------------
/DataFrame Solutions/586. Customer Placing the Largest Number of Orders (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM orders_586) AS orders"
2 |
3 | val ordersDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val resultDF = ordersDF.groupBy($"customer_number").agg(count($"order_number").as("cnt")).orderBy(desc("cnt")).select($"customer_number").limit(1)
6 |
7 | resultDF.show
8 |
--------------------------------------------------------------------------------
/DataFrame Solutions/595. Big Countries (Easy).txt:
--------------------------------------------------------------------------------
1 | val query = "(SELECT * FROM world_595) AS world"
2 |
3 | val worldDF = spark.read.jdbc(url, query, connectionProperties)
4 |
5 | val resultDF = worldDF.where($"population" >= 25000000 || $"area" >= 3000000).select($"name",$"population",$"area")
6 |
7 | resultDF.show
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Spark Solutions + Leetcode SQL Questions
2 |