├── .gitignore
├── Anti_Patterns
├── 01_Ambiguous_Group
│ └── README.md
├── 02_Bad_Subquery
│ └── README.md
└── 03_Fail_to_Use_Index
│ └── README.md
├── Hacks
├── 01_Random_Sampling
│ ├── README.md
│ └── solution.sql
├── 02_NULL_pathology
│ ├── README.md
│ └── db.sql
├── 03_Full_Join
│ ├── README.md
│ └── db.sql
├── 04_Dynamic_Query
│ ├── README.md
│ └── dynamic.ipynb
├── 05_Stored_Precesure
│ ├── README.md
│ └── stored_procedure.sql
├── 06_Hacking_Aggregation
│ ├── README.md
│ └── solution.sql
├── 07_Multi_Column_Partition
│ ├── README.md
│ ├── db.sql
│ └── use_case.sql
└── 08_pandasql
│ ├── customers.csv
│ └── pandasql.ipynb
├── Interview
├── 01_Facebook_Advertiser_Status
│ ├── README.md
│ ├── db.sql
│ ├── fig
│ │ └── transition.png
│ └── solution.sql
├── 02_Spotify_Listening_History
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 03_Monthly_Active_User
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 04_Page_Recommendation
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 05_Pivoting_Numeric_Data
│ ├── Expenses.sql
│ ├── README.md
│ ├── db.sql
│ └── pivot.sql
├── 06_Pivoting_Text_Data
│ ├── README.md
│ ├── db.sql
│ └── pivot.sql
├── 07_Unpivoting_Tables
│ ├── README.md
│ ├── pivot_table.sql
│ └── un_pivot_table.sql
├── 08_Group_by_Bins
│ ├── README.md
│ └── solution.sql
├── 09_Consecutive_Active_Users
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 10_Spotify_Recommend_Friend
│ ├── README.md
│ ├── db.sql
│ ├── fig
│ │ └── symmetry.png
│ └── solution.sql
├── 11_Spotify_Similar_Friends
│ ├── README.md
│ ├── cartesian.txt
│ └── solution.sql
├── 12_Invalid_Search
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 13_Text_Confirmation
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 14_Facebook_Common_Friend
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── 15_Facebook_People_You_May_Know
│ ├── README.md
│ └── solution.sql
└── 16_Instagram_Common_Follower
│ ├── README.md
│ ├── db.sql
│ └── solution.sql
├── LICENSE
├── LeetCode
├── 185_Department_Top_Three_Salaries
│ ├── README.md
│ ├── db.sql
│ ├── mssql_pre_filter.sql
│ ├── mssql_window.sql
│ └── mysql_correlated_subquery.sql
├── 262_Trips_and_Users
│ ├── README.md
│ ├── db.sql
│ ├── mysql_pre_filter.sql
│ ├── mysql_set.sql
│ └── mysql_simple.sql
├── 579_Find_Cumulative_Salary_of_an_Employee
│ ├── README.md
│ ├── db.sql
│ ├── mssql_lag.sql
│ ├── mysql_join_tmp_table.sql
│ ├── mysql_set.sql
│ └── mysql_single_join.sql
├── 601_Human_Traffic_of_Stadium
│ ├── README.md
│ ├── db.sql
│ ├── mssql_pre_filter.sql
│ ├── mssql_window.sql
│ └── mysql_simple.sql
├── 615_Average_Salary
│ ├── README.md
│ ├── db.sql
│ ├── mysql.sql
│ └── mysql_session_vars.sql
├── 618_Students_Report_by_Geography
│ ├── README.md
│ ├── db.sql
│ ├── mssql_full_join.sql
│ ├── mysql_session_vars.sql
│ └── self-join-rownums.sql
└── others
│ ├── 1045_Customers_Who_Bought_All_Products.sql
│ ├── 1050_Actors_and_Directors_Who_Cooperated_At_Least_Three_Times.sql
│ ├── 1068_Product_Sales_Analysis_I.sql
│ ├── 1069_Product_Sales_Analysis_II.sql
│ ├── 1070_Product_Sales_Analysis_III.sql
│ ├── 1075_Project_Employees_I.sql
│ ├── 1076_Project_Employees_II.sql
│ ├── 1077_Project_Employees_III.sql
│ ├── 1082_Sales_Analysis_I.sql
│ ├── 1083_Sales_Analysis_II.sql
│ ├── 1084_Sales_Analysis_III.sql
│ ├── 1097_Game_Play_Analysis_V.sql
│ ├── 1098_Unpopular_Books.sql
│ ├── 1107_New_Users_Daily_Count.sql
│ ├── 1112_Highest_Grade_For_Each_Student.sql
│ ├── 1113_Reported_Posts.sql
│ ├── 1126_Active_Businesses.sql
│ ├── 1127_User_Purchase_Platform.sql
│ ├── 1132_Reported_Posts_II.sql
│ ├── 1141_User_Activity_for_the_Past_30_Days_I.sql
│ ├── 1142_User_Activity_for_the_Past_30_Days_II.sql
│ ├── 1148_Article_Views_I.sql
│ ├── 1149_Article_Views_II.sql
│ ├── 1158_Market_Analysis_I.sql
│ ├── 1159_Market_Analysis_II.sql
│ ├── 1164_Product_Price_at_a_Given_Date.sql
│ ├── 1173_Immediate_Food_Delivery_I.sql
│ ├── 1174_Immediate_Food_Delivery_II.sql
│ ├── 1179_Reformat_Department_Table.sql
│ ├── 175_Combine_Two_Tables.sql
│ ├── 176_Second_Highest_Salary.sql
│ ├── 177_Nth_Highest_Salary.sql
│ ├── 178_Rank_Scores.sql
│ ├── 180_Consecutive_Numbers.sql
│ ├── 181_Employees_Earning_More_Than_Their_Managers.sql
│ ├── 182_Duplicate_Emails.sql
│ ├── 183_Customers_Who_Never_Order.sql
│ ├── 184_Department_Highest_Salary.sql
│ ├── 185_Department_Top_Three_Salaries.sql
│ ├── 196_Delete_Duplicate_Emails.sql
│ ├── 197_Rising_Temperature.sql
│ ├── 262_Trips_and_Users.sql
│ ├── 569_Median_Employee_Salary.sql
│ ├── 570_Managers_with_at_Least_5_Direct_Reports.sql
│ ├── 571_Find_Median_Given_Frequency_of_Numbers.sql
│ ├── 574_Winning_Candidate.sql
│ ├── 577_Employee_Bonus.sql
│ ├── 578_Get_Highest_Answer_Rate_Question.sql
│ ├── 579_Find_Cumulative_Salary_of_an_Employee.sql
│ ├── 580_Count_Student_Number_in_Departments.sql
│ ├── 584_Find_Customer_Referee.sql
│ ├── 585_Investments_in_2016.sql
│ ├── 586_Customer_Placing_the_Largest_Number_of_Orders.sql
│ ├── 595_Big_Countries.sql
│ ├── 596_Classes_More_Than_5_Students.sql
│ ├── 597_Friend_Requests_I-_Overall_Acceptance_Rate.sql
│ ├── 601_Human_Traffic_of_Stadium.sql
│ ├── 602_Friend_Requests_II-_Who_Has_the_Most_Friends.sql
│ ├── 603_Consecutive_Available_Seats.sql
│ ├── 607_Sales_Person.sql
│ ├── 608_Tree_Node.sql
│ ├── 610_Triangle_Judgement.sql
│ ├── 612_Shortest_Distance_in_a_Plane.sql
│ ├── 613_Shortest_Distance_in_a_Line.sql
│ ├── 614_Second_Degree_Follower.sql
│ ├── 615_Average_Salary_Departments_VS_Company.sql
│ ├── 618_Students_Report_By_Geography.sql
│ ├── 619_Biggest_Single_Number.sql
│ ├── 620_Not_Boring_Movies.sql
│ ├── 626_Exchange_Seats.sql
│ └── 627_Swap_Salary.sql
├── README.md
├── assets
└── sql_order.png
└── databases
├── classicmodels
└── mysqlsampledatabase.sql
├── practice.sql
├── sakila-db
├── sakila-data.sql
└── sakila-schema.sql
└── world_db
├── README.txt
└── world.sql
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .ipynb_checkpoints/
3 | archive/
--------------------------------------------------------------------------------
/Anti_Patterns/02_Bad_Subquery/README.md:
--------------------------------------------------------------------------------
1 | # Bad Subquery
2 | This notebook goes through several common types of bad subqueries. Subqueries are not always bad, and sometimes cannot be avoided. But they are often misused because analysts are too lazy to even think about why they need it.
3 |
4 | ### Not Using Having Clause
5 | We want to select countries with more than 100 cities.
6 |
7 | ```sql
8 | SELECT CountryCode, city_tally
9 | FROM (
10 | SELECT
11 | CountryCode
12 | ,COUNT(*) AS city_tally
13 | FROM city
14 | GROUP BY CountryCode
15 | ) AS _
16 | WHERE city_tally >= 100
17 | ORDER BY city_tally DESC;
18 | ```
19 | ```
20 | +-------------+------------+
21 | | CountryCode | city_tally |
22 | +-------------+------------+
23 | | CHN | 363 |
24 | | IND | 341 |
25 | | USA | 274 |
26 | | BRA | 250 |
27 | | JPN | 248 |
28 | | RUS | 189 |
29 | | MEX | 173 |
30 | | PHL | 136 |
31 | +-------------+------------+
32 | 8 rows in set (0.00 sec)
33 | ```
34 |
35 | The subquery can be avoided by moving the *WHERE* clause into the *HAVING* clause.
36 |
37 | ```sql
38 | SELECT
39 | CountryCode
40 | ,COUNT(*) AS city_tally
41 | FROM city
42 | GROUP BY CountryCode
43 | HAVING city_tally >= 100
44 | ORDER BY city_tally DESC;
45 | ```
46 | ```
47 | +-------------+------------+
48 | | CountryCode | city_tally |
49 | +-------------+------------+
50 | | CHN | 363 |
51 | | IND | 341 |
52 | | USA | 274 |
53 | | BRA | 250 |
54 | | JPN | 248 |
55 | | RUS | 189 |
56 | | MEX | 173 |
57 | | PHL | 136 |
58 | +-------------+------------+
59 | 8 rows in set (0.00 sec)
60 | ```
61 |
62 | If you don't need to return the city count, you can move the entire expression into *HAVING* clause.
63 |
64 | ```sql
65 | SELECT
66 | CountryCode
67 | FROM city
68 | GROUP BY CountryCode
69 | HAVING COUNT(*) >= 100;
70 | ```
71 | ```
72 | +-------------+
73 | | CountryCode |
74 | +-------------+
75 | | BRA |
76 | | CHN |
77 | | IND |
78 | | JPN |
79 | | MEX |
80 | | PHL |
81 | | RUS |
82 | | USA |
83 | +-------------+
84 | 8 rows in set (0.01 sec)
85 | ```
--------------------------------------------------------------------------------
/Anti_Patterns/03_Fail_to_Use_Index/README.md:
--------------------------------------------------------------------------------
1 | # Failure to Use Index
2 |
3 | In this notebook we use *sakila* database. Specifically, we want to count how many movies each actor has starred. We obviously need to group by the first name and last name of the actor.
4 |
5 | ```sql
6 | SELECT actor.first_name, actor.last_name, COUNT(*)
7 | FROM sakila.film_actor
8 | INNER JOIN sakila.actor USING(actor_id)
9 | GROUP BY actor.first_name, actor.last_name
10 | ORDER BY 1, 2
11 | LIMIT 5;
12 | ```
13 | ```
14 | +------------+-----------+----------+
15 | | first_name | last_name | COUNT(*) |
16 | +------------+-----------+----------+
17 | | ADAM | GRANT | 18 |
18 | | ADAM | HOPPER | 22 |
19 | | AL | GARLAND | 26 |
20 | | ALAN | DREYFUSS | 27 |
21 | | ALBERT | JOHANSSON | 33 |
22 | +------------+-----------+----------+
23 | 5 rows in set (0.01 sec)
24 | ```
25 |
26 | But we can do much better by grouping by index. Notice that index uniquely identifies an actor, just as first name and last name uniquely identify an actor.
27 |
28 | ```sql
29 | SELECT actor.first_name, actor.last_name, COUNT(*)
30 | FROM sakila.film_actor
31 | INNER JOIN sakila.actor USING(actor_id)
32 | GROUP BY film_actor.actor_id
33 | ORDER BY 1, 2
34 | LIMIT 5;
35 | ```
36 | ```
37 | +------------+-----------+----------+
38 | | first_name | last_name | COUNT(*) |
39 | +------------+-----------+----------+
40 | | ADAM | GRANT | 18 |
41 | | ADAM | HOPPER | 22 |
42 | | AL | GARLAND | 26 |
43 | | ALAN | DREYFUSS | 27 |
44 | | ALBERT | JOHANSSON | 33 |
45 | +------------+-----------+----------+
46 | 5 rows in set (0.00 sec)
47 | ```
48 |
49 | You may notice that the first two columns are neither in the GROUP BY clause nor aggregated over. This does not throw an error in MySQL, because it recognizes they are functionally dependent on the GROUP BY column.
50 |
51 | The most correct version is to do one of the following:
52 |
53 | ```sql
54 | SELECT MAX(actor.first_name), MAX(actor.last_name), COUNT(*)
55 | FROM sakila.film_actor
56 | INNER JOIN sakila.actor USING(actor_id)
57 | GROUP BY film_actor.actor_id
58 | ORDER BY 1, 2
59 | LIMIT 5;
60 |
61 | SELECT actor.first_name, actor.last_name, COUNT(*)
62 | FROM sakila.film_actor
63 | INNER JOIN sakila.actor USING(actor_id)
64 | GROUP BY film_actor.actor_id, 1, 2
65 | ORDER BY 1, 2
66 | LIMIT 5;
67 |
68 | SELECT actor.first_name, actor.last_name, c.cnt FROM sakila.actor
69 | INNER JOIN (
70 | SELECT actor_id, COUNT(*) AS cnt FROM sakila.film_actor
71 | GROUP BY actor_id
72 | ) AS c USING(actor_id)
73 | ORDER BY 1, 2
74 | LIMIT 5;
75 | ```
76 |
77 | In the last case, be careful that temporary table does not have index, and the *INNER* join could be slow.
--------------------------------------------------------------------------------
/Hacks/01_Random_Sampling/solution.sql:
--------------------------------------------------------------------------------
1 | -- create source table
2 | DROP TABLE IF EXISTS sample_dist_stats;
3 | CREATE TEMPORARY TABLE sample_dist_stats AS
4 | SELECT
5 | continent
6 | ,COUNT(*) AS city_tally
7 | ,10/COUNT(*) AS p
8 | ,10 AS mean
9 | ,SQRT(COUNT(*) * (10/COUNT(*)) * (1-10/COUNT(*))) AS std
10 | FROM city_by_continent
11 | GROUP BY continent
12 | ORDER BY city_tally;
13 |
14 | SET @sample_size = 10;
15 |
16 | -- sampling pipeline
17 | WITH
18 | city_group AS
19 | (SELECT
20 | c.*
21 | ,COUNT(*) OVER (PARTITION BY continent) AS group_size
22 | FROM city_by_continent AS c)
23 | ,city_prob_assign AS
24 | (SELECT
25 | c.*
26 | -- beaware expectation of sample size group_size * @sample_size / group_size AS mean = @sample_size
27 | ,SQRT(group_size * (@sample_size / group_size) * (1 - @sample_size / group_size)) AS std
28 | ,RAND() AS prob
29 | FROM city_group AS c)
30 | ,city_prob_cutoff AS
31 | (SELECT
32 | c.*
33 | ,(@sample_size + CEIL(2 * std)) / group_size AS cutoff
34 | FROM city_prob_assign AS c)
35 | -- filter by cuffoff
36 | -- note that this step cannot be merged with previous
37 | ,city_sample AS
38 | (SELECT
39 | c.*
40 | -- window is evaluated after WHERE clause!
41 | -- ranking is cheap when performed on small group!
42 | ,RANK() OVER (PARTITION BY continent ORDER BY prob) AS group_row_num
43 | FROM city_prob_cutoff AS c
44 | WHERE prob < cutoff)
45 | -- final, fixed size sample
46 | SELECT *
47 | FROM city_sample
48 | WHERE group_row_num <= @sample_size;
--------------------------------------------------------------------------------
/Hacks/02_NULL_pathology/db.sql:
--------------------------------------------------------------------------------
1 | CREATE SCHEMA IF NOT EXISTS Practice;
2 | USE Practice;
3 |
4 |
5 | -- phpMyAdmin SQL Dump
6 | -- version 4.7.7
7 | -- https://www.phpmyadmin.net/
8 | --
9 | -- Host: localhost:8889
10 | -- Generation Time: Mar 17, 2019 at 11:27 AM
11 | -- Server version: 5.6.38
12 | -- PHP Version: 7.2.1
13 |
14 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
15 | SET time_zone = "+00:00";
16 |
17 | --
18 | -- Database: `practice`
19 | --
20 |
21 | -- --------------------------------------------------------
22 |
23 | --
24 | -- Table structure for table `Balance`
25 | --
26 |
27 | DROP TABLE IF EXISTS `Balance`;
28 | CREATE TABLE `Balance` (
29 | `id` int(11) NOT NULL,
30 | `name` varchar(10) DEFAULT NULL,
31 | `balance` int(11) DEFAULT NULL
32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
33 |
34 | --
35 | -- Dumping data for table `Balance`
36 | --
37 |
38 | INSERT INTO `Balance` (`id`, `name`, `balance`) VALUES
39 | (1, 'Alice', 10),
40 | (2, 'Bob', 5),
41 | (3, NULL, 20),
42 | (4, 'Cindy', NULL),
43 | (5, 'Bob', 10),
44 | (6, 'Cindy', 100);
45 |
46 | --
47 | -- Indexes for dumped tables
48 | --
49 |
50 | --
51 | -- Indexes for table `Balance`
52 | --
53 | ALTER TABLE `Balance`
54 | ADD PRIMARY KEY (`id`);
55 |
56 | --
57 | -- AUTO_INCREMENT for dumped tables
58 | --
59 |
60 | --
61 | -- AUTO_INCREMENT for table `Balance`
62 | --
63 | ALTER TABLE `Balance`
64 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=7;
65 |
--------------------------------------------------------------------------------
/Hacks/03_Full_Join/README.md:
--------------------------------------------------------------------------------
1 | # Full Join
2 |
3 | MySQL does not support full join. There are two ways to simulate it.
4 |
5 | Load the database file [db.sql](db.sql) to localhost MySQL. Two simple tables will be created in the Practice database.
6 | ```
7 | mysql < db.sql -uroot -p
8 | ```
9 |
10 | Let's take a look at the tables we want to join. Eveidently, *letter1* table does not contain C; *letter2* table does not contain B.
11 |
12 | ```
13 | mysql> SELECT * FROM letter1;
14 | +----+--------+
15 | | id | letter |
16 | +----+--------+
17 | | 1 | A |
18 | | 2 | B |
19 | | 3 | A |
20 | +----+--------+
21 | 3 rows in set (0.01 sec)
22 |
23 | mysql> SELECT * FROM letter2;
24 | +----+--------+
25 | | id | letter |
26 | +----+--------+
27 | | 1 | A |
28 | | 2 | C |
29 | | 3 | A |
30 | +----+--------+
31 | 3 rows in set (0.00 sec)
32 | ```
33 |
34 | ---
35 | ### Hack
36 | ```sql
37 | SELECT
38 | l1.id
39 | ,l1.letter
40 | ,l2.id
41 | ,l2.letter
42 | FROM letter1 l1
43 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter
44 | UNION ALL
45 | SELECT
46 | l1.id
47 | ,l1.letter
48 | ,l2.id
49 | ,l2.letter
50 | FROM letter1 l1
51 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter
52 | WHERE l1.letter IS NULL;
53 | ```
54 | ```
55 | +------+--------+------+--------+
56 | | id | letter | id | letter |
57 | +------+--------+------+--------+
58 | | 1 | A | 1 | A |
59 | | 3 | A | 1 | A |
60 | | 1 | A | 3 | A |
61 | | 3 | A | 3 | A |
62 | | 2 | B | NULL | NULL |
63 | | NULL | NULL | 2 | C |
64 | +------+--------+------+--------+
65 | 6 rows in set (0.00 sec)
66 | ```
67 |
68 | ```sql
69 | SELECT
70 | l1.id
71 | ,l1.letter
72 | ,l2.id
73 | ,l2.letter
74 | FROM letter1 l1
75 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter
76 | UNION ALL
77 | SELECT
78 | l1.id
79 | ,l1.letter
80 | ,l2.id
81 | ,l2.letter
82 | FROM letter1 l1
83 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter
84 | WHERE l2.letter IS NULL;
85 | ```
86 | ```
87 | +------+--------+------+--------+
88 | | id | letter | id | letter |
89 | +------+--------+------+--------+
90 | | 1 | A | 1 | A |
91 | | 1 | A | 3 | A |
92 | | 3 | A | 1 | A |
93 | | 3 | A | 3 | A |
94 | | NULL | NULL | 2 | C |
95 | | 2 | B | NULL | NULL |
96 | +------+--------+------+--------+
97 | 6 rows in set (0.00 sec)
98 | ```
99 |
100 | ---
101 | ### Warning
102 | Do not use *UNION*, which creates a hashset. If the returned rows are not distinct, you get fewer rows than expected!
103 |
104 | ```sql
105 | SELECT l1.letter, l2.letter
106 | FROM letter1 l1
107 | LEFT JOIN letter2 l2
108 | ON l1.letter=l2.letter
109 | UNION
110 | SELECT l1.letter, l2.letter
111 | FROM letter1 l1
112 | RIGHT JOIN letter2 l2
113 | ON l1.letter=l2.letter;
114 | ```
115 | ```
116 | +--------+--------+
117 | | letter | letter |
118 | +--------+--------+
119 | | A | A |
120 | | B | NULL |
121 | | NULL | C |
122 | +--------+--------+
123 | 3 rows in set (0.01 sec)
124 | ```
125 |
126 | The answer should be:
127 | ```sql
128 | SELECT l1.letter, l2.letter FROM letter1 l1
129 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter
130 | UNION ALL
131 | SELECT l1.letter, l2.letter FROM letter1 l1
132 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter
133 | WHERE l1.letter IS NULL;
134 | ```
135 | ```
136 | +--------+--------+
137 | | letter | letter |
138 | +--------+--------+
139 | | A | A |
140 | | A | A |
141 | | A | A |
142 | | A | A |
143 | | B | NULL |
144 | | NULL | C |
145 | +--------+--------+
146 | 6 rows in set (0.00 sec)
147 | ```
--------------------------------------------------------------------------------
/Hacks/03_Full_Join/db.sql:
--------------------------------------------------------------------------------
1 | CREATE SCHEMA IF NOT EXISTS Practice;
2 | USE Practice;
3 |
4 | -- phpMyAdmin SQL Dump
5 | -- version 4.7.7
6 | -- https://www.phpmyadmin.net/
7 | --
8 | -- Host: localhost:8889
9 | -- Generation Time: Mar 18, 2019 at 03:26 AM
10 | -- Server version: 5.6.38
11 | -- PHP Version: 7.2.1
12 |
13 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
14 | SET time_zone = "+00:00";
15 |
16 | --
17 | -- Database: `practice`
18 | --
19 | CREATE DATABASE IF NOT EXISTS `practice` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
20 | USE `practice`;
21 |
22 | -- --------------------------------------------------------
23 |
24 | --
25 | -- Table structure for table `Letter1`
26 | --
27 |
28 | DROP TABLE IF EXISTS `Letter1`;
29 | CREATE TABLE `Letter1` (
30 | `id` int(11) NOT NULL,
31 | `letter` varchar(1) NOT NULL
32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
33 |
34 | --
35 | -- Dumping data for table `Letter1`
36 | --
37 |
38 | INSERT INTO `Letter1` (`id`, `letter`) VALUES
39 | (1, 'N'),
40 | (2, 'B'),
41 | (3, 'N');
42 |
43 | -- --------------------------------------------------------
44 |
45 | --
46 | -- Table structure for table `Letter2`
47 | --
48 |
49 | DROP TABLE IF EXISTS `Letter2`;
50 | CREATE TABLE `Letter2` (
51 | `id` int(11) NOT NULL,
52 | `letter` varchar(1) NOT NULL
53 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
54 |
55 | --
56 | -- Dumping data for table `Letter2`
57 | --
58 |
59 | INSERT INTO `Letter2` (`id`, `letter`) VALUES
60 | (1, 'A'),
61 | (2, 'C'),
62 | (3, 'A');
63 |
64 | --
65 | -- Indexes for dumped tables
66 | --
67 |
68 | --
69 | -- Indexes for table `Letter1`
70 | --
71 | ALTER TABLE `Letter1`
72 | ADD PRIMARY KEY (`id`);
73 |
74 | --
75 | -- Indexes for table `Letter2`
76 | --
77 | ALTER TABLE `Letter2`
78 | ADD PRIMARY KEY (`id`);
79 |
80 | --
81 | -- AUTO_INCREMENT for dumped tables
82 | --
83 |
84 | --
85 | -- AUTO_INCREMENT for table `Letter1`
86 | --
87 | ALTER TABLE `Letter1`
88 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=4;
89 |
90 | --
91 | -- AUTO_INCREMENT for table `Letter2`
92 | --
93 | ALTER TABLE `Letter2`
94 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=4;
95 |
--------------------------------------------------------------------------------
/Hacks/04_Dynamic_Query/README.md:
--------------------------------------------------------------------------------
1 | # Dynamic Query
2 |
3 | In the interview question, we covered a classic problem of [grouping by bins](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Interview/08_Group_by_Bins). This [notebook](dynamic.ipynb) illustrates how to generate and execute query dynamically in Python. You can easily build hundreds of bins without manually writing hundreds of *WHEN* conditions.
4 |
5 | ```sql
6 | SELECT CASE WHEN creditLimit BETWEEN 0 AND 9999 THEN '0~9999'
7 | WHEN creditLimit BETWEEN 10000 AND 19999 THEN '10000~19999'
8 | WHEN creditLimit BETWEEN 20000 AND 29999 THEN '20000~29999'
9 | WHEN creditLimit BETWEEN 30000 AND 39999 THEN '30000~39999'
10 | WHEN creditLimit BETWEEN 40000 AND 49999 THEN '40000~49999'
11 | WHEN creditLimit BETWEEN 50000 AND 59999 THEN '50000~59999'
12 | WHEN creditLimit BETWEEN 60000 AND 69999 THEN '60000~69999'
13 | WHEN creditLimit BETWEEN 70000 AND 79999 THEN '70000~79999'
14 | WHEN creditLimit BETWEEN 80000 AND 89999 THEN '80000~89999'
15 | WHEN creditLimit BETWEEN 90000 AND 99999 THEN '90000~99999'
16 | WHEN creditLimit BETWEEN 100000 AND 109999 THEN '100000~109999'
17 | WHEN creditLimit BETWEEN 110000 AND 119999 THEN '110000~119999'
18 | WHEN creditLimit BETWEEN 120000 AND 129999 THEN '120000~129999'
19 | WHEN creditLimit BETWEEN 130000 AND 139999 THEN '130000~139999'
20 | WHEN creditLimit BETWEEN 140000 AND 149999 THEN '140000~149999'
21 | WHEN creditLimit BETWEEN 150000 AND 159999 THEN '150000~159999'
22 | WHEN creditLimit BETWEEN 160000 AND 169999 THEN '160000~169999'
23 | WHEN creditLimit BETWEEN 170000 AND 179999 THEN '170000~179999'
24 | WHEN creditLimit BETWEEN 180000 AND 189999 THEN '180000~189999'
25 | WHEN creditLimit BETWEEN 190000 AND 199999 THEN '190000~199999'
26 | WHEN creditLimit BETWEEN 200000 AND 209999 THEN '200000~209999'
27 | WHEN creditLimit BETWEEN 210000 AND 219999 THEN '210000~219999'
28 | WHEN creditLimit BETWEEN 220000 AND 229999 THEN '220000~229999'
29 | END AS credit_range, COUNT(*) AS customer_tally FROM customers GROUP BY credit_range;
30 | ```
31 |
32 | ---
33 | ### Dependencies
34 | * sqlalchemy
35 | * pandas
--------------------------------------------------------------------------------
/Hacks/05_Stored_Precesure/README.md:
--------------------------------------------------------------------------------
1 | # Stored Precesure
2 |
3 | ---
4 | ### Building Procedure
5 | For complex queries that are frequently reused, with a few changes in parameters, we can store it as a procedure, and run it like a function. The syntax to create stored procedure is:
6 |
7 | ```sql
8 | DROP PROCEDURE IF EXISTS my_procedure;
9 |
10 | DELIMITER //
11 | CREATE PROCEDURE my_procedure
12 | (IN arg1 INT(11), arg2 VARCHAR(10))
13 |
14 | BEGIN
15 |
16 | SELECT arg1, arg2;
17 |
18 | END//
19 | DELIMITER ;
20 | ```
21 |
22 | The *DELIMITER //* statement simply tells SQL to treat *//* as marking the end of statement, so that when creating the procedure *SELECT arg1, arg2;*, SQL does not interpret it as a statement and execute it. After creating the procedure, we need to change back the delimiter to *;*.
23 |
24 | ---
25 | ### Calling Procedure
26 | To call a procesure, simply substitute parameters.
27 | ```
28 | CALL my_procedure(1, 'hello');
29 |
30 | +------+-------+
31 | | arg1 | arg2 |
32 | +------+-------+
33 | | 1 | hello |
34 | +------+-------+
35 | 1 row in set (0.00 sec)
36 | ```
37 |
38 | ---
39 | ### Example Use Case
40 | We can use custom procedure to store the random sampling [query](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Topics/01_Random_Sampling). See implementation [here](stored_procedure.sql).
41 |
42 | Using stored procesure, we can simly sample any desired number of rows we want, by passing sample size as an argument. For example, to draw 3 cities from each continent, we can simply call the procedure as follows:
43 |
44 | ```
45 | mysql> CALL sample_by_continent(3);
46 |
47 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+
48 | | city_name | country_name | continent | group_size | std | prob | cutoff | group_row_num |
49 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+
50 | | Nazilli | Turkey | Asia | 1766 | 1.7305788923769574 | 0.7224973214068425 | 0.0040 | 1 |
51 | | Semnan | Iran | Asia | 1766 | 1.7305788923769574 | 0.7094183831563391 | 0.0040 | 2 |
52 | | Yuci | China | Asia | 1766 | 1.7305788923769574 | 0.3795999822948128 | 0.0040 | 3 |
53 | | Manchester | United Kingdom | Europe | 841 | 1.7289585538059709 | 0.7099240475426652 | 0.0083 | 1 |
54 | | Vitebsk | Belarus | Europe | 841 | 1.7289585538059709 | 0.24038589023089585 | 0.0083 | 2 |
55 | | Arzamas | Russian Federation | Europe | 841 | 1.7289585538059709 | 0.07215733926012864 | 0.0083 | 3 |
56 | | Saint-Pierre | Saint Pierre and Miquelon | North America | 581 | 1.7275732570756073 | 0.9816732946612623 | 0.0120 | 1 |
57 | | Compton | United States | North America | 581 | 1.7275732570756073 | 0.9894793350151547 | 0.0120 | 2 |
58 | | Grand Rapids | United States | North America | 581 | 1.7275732570756073 | 0.0023768218256317282 | 0.0120 | 3 |
59 | | al-Qadarif | Sudan | Africa | 366 | 1.7249376000117878 | 0.6935911212988115 | 0.0191 | 1 |
60 | | Daloa | Côte d’Ivoire | Africa | 366 | 1.7249376000117878 | 0.06386513827728586 | 0.0191 | 2 |
61 | | Abidjan | Côte d’Ivoire | Africa | 366 | 1.7249376000117878 | 0.23855235822363977 | 0.0191 | 3 |
62 | | Adamstown | Pitcairn | Oceania | 55 | 1.684150708706428 | 0.2901749911626568 | 0.1273 | 1 |
63 | | Yangor | Nauru | Oceania | 55 | 1.684150708706428 | 0.44737576548697033 | 0.1273 | 2 |
64 | | Cairns | Australia | Oceania | 55 | 1.684150708706428 | 0.3663538846805262 | 0.1273 | 3 |
65 | | Maracaíbo | Venezuela | South America | 470 | 1.7265140393782532 | 0.8527891355126995 | 0.0149 | 1 |
66 | | Guacara | Venezuela | South America | 470 | 1.7265140393782532 | 0.2539973559360927 | 0.0149 | 2 |
67 | | Corrientes | Argentina | South America | 470 | 1.7265140393782532 | 0.7116194038760097 | 0.0149 | 3 |
68 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+
69 | 18 rows in set (0.01 sec)
70 |
71 | Query OK, 0 rows affected (0.01 sec)
72 | ```
73 |
74 |
--------------------------------------------------------------------------------
/Hacks/05_Stored_Precesure/stored_procedure.sql:
--------------------------------------------------------------------------------
1 | DROP PROCEDURE IF EXISTS sample_by_continent;
2 | DELIMITER //
3 | CREATE PROCEDURE sample_by_continent
4 | (IN sample_size INT(11))
5 | BEGIN
6 |
7 | WITH
8 | city_group AS
9 | (SELECT
10 | c.*
11 | ,COUNT(*) OVER (PARTITION BY continent) AS group_size
12 | FROM city_by_continent AS c)
13 | ,city_prob_assign AS
14 | (SELECT
15 | c.*
16 | -- beaware expectation of sample size group_size * sample_size / group_size AS mean = sample_size
17 | ,SQRT(group_size * (sample_size / group_size) * (1 - sample_size / group_size)) AS std
18 | ,RAND() AS prob
19 | FROM city_group AS c)
20 | ,city_prob_cutoff AS
21 | (SELECT
22 | c.*
23 | ,(sample_size + CEIL(2 * std)) / group_size AS cutoff
24 | FROM city_prob_assign AS c)
25 | -- filter by cuffoff
26 | -- note that this step cannot be merged with previous
27 | ,city_sample AS
28 | (SELECT
29 | c.*
30 | -- window is evaluated after WHERE clause!
31 | -- ranking is cheap when performed on small group!
32 | ,RANK() OVER (PARTITION BY continent ORDER BY prob) AS group_row_num
33 | FROM city_prob_cutoff AS c
34 | WHERE prob < cutoff)
35 | -- final, fixed size sample
36 | SELECT *
37 | FROM city_sample
38 | WHERE group_row_num <= sample_size;
39 |
40 | END//
41 | DELIMITER ;
--------------------------------------------------------------------------------
/Hacks/06_Hacking_Aggregation/README.md:
--------------------------------------------------------------------------------
1 | # Hacking Aggregation
2 |
3 | This notebook go through several ways to find maximum (and conversely minimum) without using aggregate function. We'll use the world database.
4 |
5 | #### Global Aggregate: Select Country with the Largest GNP
6 | This is easy with a scalar subquery, using *MAX()* function.
7 | ```
8 | SELECT Name
9 | FROM country
10 | WHERE GNP = (SELECT MAX(GNP) FROM country);
11 |
12 | +---------------+
13 | | Name |
14 | +---------------+
15 | | United States |
16 | +---------------+
17 | 1 row in set (0.00 sec)
18 | ```
19 |
20 | Without aggregate function, we can hack it by using *ALL*.
21 | ```
22 | SELECT Name
23 | FROM country
24 | WHERE GNP >= ALL(SELECT GNP FROM country);
25 |
26 | +---------------+
27 | | Name |
28 | +---------------+
29 | | United States |
30 | +---------------+
31 | 1 row in set (0.00 sec)
32 | ```
33 |
34 | __Warning__: if any country has __NULL__ in the *GNP* column, the query will return an empty set. See [this](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Hacks/02_NULL_pathology) notebook for more pathological examples of __NULL__ behavior. To prevent such behavior, remove __NULL__ from the set.
35 |
36 | ```sql
37 | SELECT Name
38 | FROM country
39 | WHERE GNP >= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL);
40 | ```
41 |
42 | Similarly, we can find country with lowest GNP, country whose GNP is above / below average.
43 |
44 | ```sql
45 | SELECT Name
46 | FROM country
47 | WHERE GNP <= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL);
48 |
49 | SELECT Name
50 | FROM country
51 | WHERE GNP >= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL);
52 |
53 | SELECT Name
54 | FROM country
55 | WHERE GNP <= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL);
56 | ```
57 |
58 | ___
59 | #### Group Aggregate: Find Largest Country on Each Continent
60 | Here we normally need to group by continent.
61 | ```sql
62 | SELECT
63 | Name
64 | ,Continent
65 | ,SurfaceArea
66 | FROM country
67 | WHERE (Continent, SurfaceArea) IN (
68 | SELECT Continent, MAX(SurfaceArea) AS SurfaceArea
69 | FROM country GROUP BY Continent
70 | );
71 | ```
72 | ```
73 | +--------------------+---------------+-------------+
74 | | Name | Continent | SurfaceArea |
75 | +--------------------+---------------+-------------+
76 | | Antarctica | Antarctica | 13120000.00 |
77 | | Australia | Oceania | 7741220.00 |
78 | | Brazil | South America | 8547403.00 |
79 | | Canada | North America | 9970610.00 |
80 | | China | Asia | 9572900.00 |
81 | | Russian Federation | Europe | 17075400.00 |
82 | | Sudan | Africa | 2505813.00 |
83 | +--------------------+---------------+-------------+
84 | 7 rows in set (0.00 sec)
85 | ```
86 |
87 | Without using *GROUP BY*, we can simply add a condition on *Continent*, turning the scalar subquery into a __correlated subquery__.
88 | ```sql
89 | SELECT
90 | a.Name
91 | ,a.Continent
92 | ,a.SurfaceArea
93 | FROM country AS a
94 | WHERE a.SurfaceArea >= ALL(
95 | SELECT b.SurfaceArea
96 | FROM country AS b
97 | WHERE a.Continent = b.Continent
98 | AND b.SurfaceArea IS NOT NULL
99 | );
100 | ```
101 | ```
102 | +--------------------+---------------+-------------+
103 | | Name | Continent | SurfaceArea |
104 | +--------------------+---------------+-------------+
105 | | Antarctica | Antarctica | 13120000.00 |
106 | | Australia | Oceania | 7741220.00 |
107 | | Brazil | South America | 8547403.00 |
108 | | Canada | North America | 9970610.00 |
109 | | China | Asia | 9572900.00 |
110 | | Russian Federation | Europe | 17075400.00 |
111 | | Sudan | Africa | 2505813.00 |
112 | +--------------------+---------------+-------------+
113 | 7 rows in set (0.01 sec)
114 | ```
115 |
116 | Just as before, we can find country whose surface area is smallest, and whose area is above/below average, comparing to other countries in the same continent.
117 |
118 | ```sql
119 | SELECT
120 | a.Name
121 | ,a.Continent
122 | ,a.SurfaceArea
123 | FROM country AS a
124 | WHERE a.SurfaceArea <= ALL(
125 | SELECT b.SurfaceArea
126 | FROM country AS b
127 | WHERE a.Continent = b.Continent
128 | AND b.SurfaceArea IS NOT NULL
129 | );
130 |
131 | SELECT
132 | a.Name
133 | ,a.Continent
134 | ,a.SurfaceArea
135 | FROM country AS a
136 | WHERE a.SurfaceArea >= ALL(
137 | SELECT AVG(b.SurfaceArea)
138 | FROM country AS b
139 | WHERE a.Continent = b.Continent
140 | AND b.SurfaceArea IS NOT NULL
141 | );
142 |
143 | SELECT
144 | a.Name
145 | ,a.Continent
146 | ,a.SurfaceArea
147 | FROM country AS a
148 | WHERE a.SurfaceArea <= ALL(
149 | SELECT AVG(b.SurfaceArea)
150 | FROM country AS b
151 | WHERE a.Continent = b.Continent
152 | AND b.SurfaceArea IS NOT NULL
153 | );
154 | ```
155 |
156 | ### Note
157 | By using 'less or equal', we are including the compared object itself during the comparison. Without equal sign, the query will return nothing, because it is impossible to have one row that beats every other row including itself. If we don't use equal sign, we need to exclude the object from comparing against itself.
158 |
159 | ```sql
160 | -- Bad
161 | SELECT Name
162 | FROM country
163 | WHERE GNP > ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL);
164 |
165 | Empty set (0.00 sec)
166 | ```
167 |
168 | ```sql
169 | -- Good
170 | SELECT a.Name
171 | FROM country AS a
172 | WHERE GNP > ALL(
173 | SELECT b.GNP FROM country AS b
174 | WHERE b.GNP IS NOT NULL
175 | AND b.Name != a.Name
176 | );
177 | ```
178 | ```
179 | +---------------+
180 | | Name |
181 | +---------------+
182 | | United States |
183 | +---------------+
184 | 1 row in set (0.00 sec)
185 | ```
186 | ___
187 | ### Parting Thoughts
188 | There are 30 countries whose GNP is above average, and 209 countries who GNP is below average. This is a positively skewed distirbution, whose mean is significantly above median.
189 |
190 |
--------------------------------------------------------------------------------
/Hacks/06_Hacking_Aggregation/solution.sql:
--------------------------------------------------------------------------------
1 | -- global aggregate
2 | SELECT Name
3 | FROM country
4 | WHERE GNP >= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL);
5 |
6 | SELECT Name
7 | FROM country
8 | WHERE GNP <= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL);
9 |
10 | SELECT Name
11 | FROM country
12 | WHERE GNP >= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL);
13 |
14 | SELECT Name
15 | FROM country
16 | WHERE GNP <= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL);
17 |
18 | -- group aggregate
19 | SELECT
20 | a.Name
21 | ,a.Continent
22 | ,a.SurfaceArea
23 | FROM country AS a
24 | WHERE a.SurfaceArea >= ALL(
25 | SELECT b.SurfaceArea
26 | FROM country AS b
27 | WHERE a.Continent = b.Continent
28 | AND b.SurfaceArea IS NOT NULL
29 | );
30 |
31 | SELECT
32 | a.Name
33 | ,a.Continent
34 | ,a.SurfaceArea
35 | FROM country AS a
36 | WHERE a.SurfaceArea <= ALL(
37 | SELECT b.SurfaceArea
38 | FROM country AS b
39 | WHERE a.Continent = b.Continent
40 | AND b.SurfaceArea IS NOT NULL
41 | );
42 |
43 | SELECT
44 | a.Name
45 | ,a.Continent
46 | ,a.SurfaceArea
47 | FROM country AS a
48 | WHERE a.SurfaceArea >= ALL(
49 | SELECT AVG(b.SurfaceArea)
50 | FROM country AS b
51 | WHERE a.Continent = b.Continent
52 | AND b.SurfaceArea IS NOT NULL
53 | );
54 |
55 | SELECT
56 | a.Name
57 | ,a.Continent
58 | ,a.SurfaceArea
59 | FROM country AS a
60 | WHERE a.SurfaceArea <= ALL(
61 | SELECT AVG(b.SurfaceArea)
62 | FROM country AS b
63 | WHERE a.Continent = b.Continent
64 | AND b.SurfaceArea IS NOT NULL
65 | );
66 |
--------------------------------------------------------------------------------
/Hacks/07_Multi_Column_Partition/README.md:
--------------------------------------------------------------------------------
1 | # Multi-column Partitioning
2 |
3 | Just as GROUP BY can be applied to multiple columns, so can we PARTITION BY multiple columns. In the sample quiz [table](db.sql), we have one column for student_id, each student can take multiple classes, each classes can have multiple quizzes. To answer question such as "this is Mike's third quiz in deep learning class," we need to partition by both student_id and course.
4 |
5 | Load the table.
6 | ```
7 | mysql < db.sql -uroot -p
8 | ```
9 |
10 | ___
11 | ### Multiple-column Partition
12 | ```sql
13 | SELECT *, ROW_NUMBER() OVER w AS quiz_number
14 | FROM Quiz
15 | WINDOW w AS (PARTITION BY user_id, course ORDER BY quiz_date);
16 | ```
17 | ```
18 | +----+---------+--------+------------+-------------+
19 | | id | user_id | course | quiz_date | quiz_number |
20 | +----+---------+--------+------------+-------------+
21 | | 6 | john | CS230 | 2019-02-01 | 1 |
22 | | 7 | john | CS230 | 2019-02-12 | 2 |
23 | | 10 | john | CS246 | 2019-03-06 | 1 |
24 | | 8 | john | CS246 | 2019-03-09 | 2 |
25 | | 9 | john | CS246 | 2019-03-29 | 3 |
26 | | 1 | shaw | CS229 | 2019-02-08 | 1 |
27 | | 2 | shaw | CS229 | 2019-03-01 | 2 |
28 | | 3 | shaw | CS230 | 2019-03-04 | 1 |
29 | | 4 | shaw | CS230 | 2019-03-14 | 2 |
30 | | 5 | shaw | CS231 | 2019-02-14 | 1 |
31 | +----+---------+--------+------------+-------------+
32 | 10 rows in set (0.01 sec)
33 | ```
34 |
35 | ___
36 | ### Multiple-column Order
37 | Similarly, we can ORDER BY multiple columns in the window function to break ties. This is useful in resolving edge cases.
38 |
39 |
40 | ```sql
41 | SELECT *, ROW_NUMBER() OVER w AS quiz_number
42 | FROM Quiz
43 | WINDOW w AS (PARTITION BY user_id ORDER BY course, quiz_date);
44 | ```
45 | ```
46 | +----+---------+--------+------------+-------------+
47 | | id | user_id | course | quiz_date | quiz_number |
48 | +----+---------+--------+------------+-------------+
49 | | 6 | john | CS230 | 2019-02-01 | 1 |
50 | | 7 | john | CS230 | 2019-02-12 | 2 |
51 | | 10 | john | CS246 | 2019-03-06 | 3 |
52 | | 8 | john | CS246 | 2019-03-09 | 4 |
53 | | 9 | john | CS246 | 2019-03-29 | 5 |
54 | | 1 | shaw | CS229 | 2019-02-08 | 1 |
55 | | 2 | shaw | CS229 | 2019-03-01 | 2 |
56 | | 3 | shaw | CS230 | 2019-03-04 | 3 |
57 | | 4 | shaw | CS230 | 2019-03-14 | 4 |
58 | | 5 | shaw | CS231 | 2019-02-14 | 5 |
59 | +----+---------+--------+------------+-------------+
60 | 10 rows in set (0.00 sec)
61 | ```
62 |
63 |
--------------------------------------------------------------------------------
/Hacks/07_Multi_Column_Partition/db.sql:
--------------------------------------------------------------------------------
1 |
2 | -- phpMyAdmin SQL Dump
3 | -- version 4.7.7
4 | -- https://www.phpmyadmin.net/
5 | --
6 | -- Host: localhost:8889
7 | -- Generation Time: Mar 21, 2019 at 05:22 AM
8 | -- Server version: 5.6.38
9 | -- PHP Version: 7.2.1
10 |
11 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
12 | SET time_zone = "+00:00";
13 |
14 | --
15 | -- Database: `practice`
16 | --
17 | CREATE SCHEMA IF NOT EXISTS Practice;
18 | USE Practice;
19 |
20 | -- --------------------------------------------------------
21 |
22 | --
23 | -- Table structure for table `Quiz`
24 | --
25 |
26 | DROP TABLE IF EXISTS `Quiz`;
27 | CREATE TABLE `Quiz` (
28 | `id` int(11) NOT NULL,
29 | `user_id` varchar(10) NOT NULL,
30 | `course` varchar(10) NOT NULL,
31 | `quiz_date` date NOT NULL
32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
33 |
34 | --
35 | -- Dumping data for table `Quiz`
36 | --
37 |
38 | INSERT INTO `Quiz` (`id`, `user_id`, `course`, `quiz_date`) VALUES
39 | (1, 'shaw', 'CS229', '2019-02-08'),
40 | (2, 'shaw', 'CS229', '2019-03-01'),
41 | (3, 'shaw', 'CS230', '2019-03-04'),
42 | (4, 'shaw', 'CS230', '2019-03-14'),
43 | (5, 'shaw', 'CS231', '2019-02-14'),
44 | (6, 'john', 'CS230', '2019-02-01'),
45 | (7, 'john', 'CS230', '2019-02-12'),
46 | (8, 'john', 'CS246', '2019-03-09'),
47 | (9, 'john', 'CS246', '2019-03-29'),
48 | (10, 'john', 'CS246', '2019-03-06');
49 |
50 | --
51 | -- Indexes for dumped tables
52 | --
53 |
54 | --
55 | -- Indexes for table `Quiz`
56 | --
57 | ALTER TABLE `Quiz`
58 | ADD PRIMARY KEY (`id`);
59 |
60 | --
61 | -- AUTO_INCREMENT for dumped tables
62 | --
63 |
64 | --
65 | -- AUTO_INCREMENT for table `Quiz`
66 | --
67 | ALTER TABLE `Quiz`
68 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11;
69 |
--------------------------------------------------------------------------------
/Hacks/07_Multi_Column_Partition/use_case.sql:
--------------------------------------------------------------------------------
1 | -- multi-column partition
2 | SELECT *, ROW_NUMBER() OVER w AS quiz_number
3 | FROM Quiz
4 | WINDOW w AS (PARTITION BY user_id, course ORDER BY quiz_date);
5 |
6 | -- multi-column order
7 | SELECT *, ROW_NUMBER() OVER w AS quiz_number
8 | FROM Quiz
9 | WINDOW w AS (PARTITION BY user_id ORDER BY course, quiz_date);
10 |
--------------------------------------------------------------------------------
/Interview/01_Facebook_Advertiser_Status/README.md:
--------------------------------------------------------------------------------
1 | # Facebook Advertiser Status
2 |
3 | ### Key Concepts
4 | * Conditional update with join.
5 | * Left join vs. inner join.
6 | * Edge case: adding new users.
7 | * Boolean algebra and simplification.
8 |
9 | ---
10 | ### Tables:
11 | * *DailyPay*: user_id (showing today paid ads fee) on day T. __Only__ advertisers who paid will show up in this table.
12 | * *Advertiser*: two columns, user_id and their status on day T-1
13 | Use today’s payment log in *DailyPay* table to update status in *Advertiser* table
14 |
15 | ### Status:
16 | * New: users registered on day T.
17 | * Existing: users who paid on day T-1 and on day T.
18 | * Churn: users who paid on day T-1 but not on day T.
19 | * Resurrect: users who did not pay on T-1 but paid on day T.
20 |
21 | ### State Transition
22 |
23 |
24 |
25 |
26 |
27 | |#| Start | End | Condition |
28 | |-|----|----------|-----------|
29 | |1|NEW|EXISTING|Paid on day T|
30 | |2|NEW|CHURN|No pay on day T|
31 | |3|EXISTING|EXISTING|Paid on day T|
32 | |4|EXISTING|CHURN|No pay on day T|
33 | |5|CHURN|RESURRECT|Paid on day T|
34 | |6|CHURN|CHURN|No pay on day T|
35 | |7|RESURRECT|EXISTING|Paid on day T|
36 | |8|RESURRECT|CHURN|No pay on day T|
37 |
38 | By examining the above table. We can see that as long as user has not paid on day T, his status is updated to CHURN regardless of previous status (check with interviewer that all new users who registered on day T did pay, and if they didn't, they are not immediately considered as CHURN.
39 |
40 | When user did pay on day T (#1, 3, 5, 7). They can become either EXISTING or RESURRECT, depending on their previous state. RESURRECT is only possible when previous state is CHURN. When previous state is anything else, status is updated to EXISTING.
41 |
42 | ### Sample Database
43 | Load the database file [db.sql](db.sql) to localhost MySQL. An Advertiser database will be created with two tables.
44 |
45 | ```bash
46 | mysql < db.sql -uroot -p
47 | ```
48 |
49 | ```
50 | mysql> SELECT * FROM Advertiser;
51 | +----+---------+-----------+
52 | | id | user_id | status |
53 | +----+---------+-----------+
54 | | 1 | bing | NEW |
55 | | 2 | yahoo | NEW |
56 | | 3 | alibaba | EXISTING |
57 | | 4 | baidu | EXISTING |
58 | | 5 | target | CHURN |
59 | | 6 | tesla | CHURN |
60 | | 7 | morgan | RESURRECT |
61 | | 8 | chase | RESURRECT |
62 | +----+---------+-----------+
63 | 8 rows in set (0.00 sec)
64 |
65 | mysql> SELECT * FROM DailyPay;
66 | +----+---------+------+
67 | | id | user_id | paid |
68 | +----+---------+------+
69 | | 1 | yahoo | 45 |
70 | | 2 | alibaba | 100 |
71 | | 3 | target | 13 |
72 | | 4 | morgan | 600 |
73 | | 5 | fitdata | 1 |
74 | +----+---------+------+
75 | 5 rows in set (0.00 sec)
76 | ```
77 |
78 | ---
79 | ### Solution
80 | #### Step 1. Update Existing Advertiser
81 | After simplifying the boolean algebra, we only need three conditions. State __explicitly__ we don't need "ELSE status" in the CASE statement because we've covered all possible conditions. Also emphasize we need __LEFT JOIN__ to find out who did not pay on day T.
82 | ```sql
83 | UPDATE Advertiser AS a
84 | LEFT JOIN DailyPay AS d
85 | ON a.user_id = d.user_id
86 | SET a.status = CASE
87 | WHEN d.paid IS NULL THEN "CHURN"
88 | WHEN a.status = "CHURN" AND d.paid IS NOT NULL THEN "RESURRECT"
89 | WHEN a.status != "CHURN" AND d.paid IS NOT NULL THEN "EXISTING"
90 | END;
91 | ```
92 |
93 | Check the *Advertiser* to see if the update make sense.
94 | ```
95 | mysql> SELECT * FROM Advertiser;
96 | +----+---------+-----------+
97 | | id | user_id | status |
98 | +----+---------+-----------+
99 | | 1 | bing | CHURN |
100 | | 2 | yahoo | EXISTING |
101 | | 3 | alibaba | EXISTING |
102 | | 4 | baidu | CHURN |
103 | | 5 | target | RESURRECT |
104 | | 6 | tesla | CHURN |
105 | | 7 | morgan | EXISTING |
106 | | 8 | chase | CHURN |
107 | +----+---------+-----------+
108 | 8 rows in set (0.00 sec)
109 | ```
110 |
111 | #### Step 2. Insert New Advertiser
112 | Note that we missed the new user. To find the new user, left join *DailyPay* with *Advertiser*. If there is no match on the right, the user is new.
113 |
114 | ```sql
115 | INSERT INTO
116 | Advertiser (user_id, status)
117 | SELECT d.user_id
118 | ,"NEW" as status
119 | FROM DailyPay AS d
120 | LEFT JOIN Advertiser AS a
121 | ON d.user_id = a.user_id
122 | WHERE a.user_id IS NULL;
123 | ```
124 |
125 | Check again that the new users are added.
126 | ```
127 | SELECT * FROM Advertiser;
128 | +----+---------+-----------+
129 | | id | user_id | status |
130 | +----+---------+-----------+
131 | | 1 | bing | CHURN |
132 | | 2 | yahoo | EXISTING |
133 | | 3 | alibaba | EXISTING |
134 | | 4 | baidu | CHURN |
135 | | 5 | target | RESURRECT |
136 | | 6 | tesla | CHURN |
137 | | 7 | morgan | EXISTING |
138 | | 8 | chase | CHURN |
139 | | 9 | fitdata | NEW |
140 | +----+---------+-----------+
141 | 9 rows in set (0.00 sec)
142 | ```
143 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/01_Facebook_Advertiser_Status/db.sql:
--------------------------------------------------------------------------------
1 |
2 | DROP SCHEMA IF EXISTS Advertiser;
3 | CREATE SCHEMA Advertiser;
4 | USE Advertiser;
5 |
6 | DROP TABLE IF EXISTS `Advertiser`;
7 | CREATE TABLE `Advertiser` (
8 | `id` int(11) NOT NULL,
9 | `user_id` varchar(10) NOT NULL,
10 | `status` enum('CHURN','NEW','EXISTING','RESURRECT') NOT NULL
11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
12 |
13 | INSERT INTO `Advertiser` (`id`, `user_id`, `status`) VALUES
14 | (1, 'bing', 'NEW'),
15 | (2, 'yahoo', 'NEW'),
16 | (3, 'alibaba', 'EXISTING'),
17 | (4, 'baidu', 'EXISTING'),
18 | (5, 'target', 'CHURN'),
19 | (6, 'tesla', 'CHURN'),
20 | (7, 'morgan', 'RESURRECT'),
21 | (8, 'chase', 'RESURRECT');
22 |
23 | ALTER TABLE `Advertiser`
24 | ADD PRIMARY KEY (`id`);
25 |
26 | ALTER TABLE `Advertiser`
27 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=9;
28 |
29 | -- Table structure for table `DailyPay`
30 | DROP TABLE IF EXISTS `DailyPay`;
31 | CREATE TABLE `DailyPay` (
32 | `id` int(11) NOT NULL,
33 | `user_id` varchar(10) NOT NULL,
34 | `paid` int(11) DEFAULT NULL
35 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
36 |
37 | INSERT INTO `DailyPay` (`id`, `user_id`, `paid`) VALUES
38 | (1, 'yahoo', 45),
39 | (2, 'alibaba', 100),
40 | (3, 'target', 13),
41 | (4, 'morgan', 600),
42 | (5, 'fitdata', 1);
43 |
44 | ALTER TABLE `DailyPay`
45 | ADD PRIMARY KEY (`id`);
46 |
47 | ALTER TABLE `DailyPay`
48 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6;
49 |
--------------------------------------------------------------------------------
/Interview/01_Facebook_Advertiser_Status/fig/transition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/Interview/01_Facebook_Advertiser_Status/fig/transition.png
--------------------------------------------------------------------------------
/Interview/01_Facebook_Advertiser_Status/solution.sql:
--------------------------------------------------------------------------------
1 | -- Step 1. Update existing advertiser
2 | UPDATE Advertiser AS a
3 | LEFT JOIN DailyPay AS d
4 | ON a.user_id = d.user_id
5 | SET a.status = CASE
6 | WHEN d.paid IS NULL THEN "CHURN"
7 | WHEN a.status = "CHURN" AND d.paid IS NOT NULL THEN "RESURRECT"
8 | WHEN a.status != "CHURN" AND d.paid IS NOT NULL THEN "EXISTING"
9 | END;
10 |
11 | -- Step 2. Insert new advertiser
12 | INSERT INTO
13 | Advertiser (user_id, status)
14 | SELECT d.user_id
15 | ,"NEW" as status
16 | FROM DailyPay AS d
17 | LEFT JOIN Advertiser AS a
18 | ON d.user_id = a.user_id
19 | WHERE a.user_id IS NULL;
--------------------------------------------------------------------------------
/Interview/02_Spotify_Listening_History/README.md:
--------------------------------------------------------------------------------
1 | # Spotify Listening History
2 |
3 | ### Key Concepts
4 | * Update aggregate table with event log.
5 | * Temporary table & reusability.
6 | * Update with join statement.
7 | * Edge case: adding new user-song pair.
8 | * Aggregation.
9 |
10 | ---
11 | ### Two Tables
12 | You have a History table where you have date, user_id, song_id and count(tally).
13 | It shows at the end of each day how many times in her history a user has listened to a given song.
14 | So count is cumulative sum.
15 |
16 | You have to update this on a daily basis based on a second *Daily* table that records
17 | in real time when a user listens to a given song.
18 |
19 | Basically, at the end of each day, you go to this second table and pull a count
20 | of each user/song combination and then add this count to the first table that
21 | has the lifetime count.
22 |
23 | ### Sample Database
24 | Load the database file [db.sql](db.sql) to localhost MySQL. A Spotify database will be created with two tables.
25 |
26 | ```bash
27 | mysql < db.sql -uroot -p
28 | ```
29 |
30 | ```
31 | mysql> SELECT * from History;
32 | +----+---------+---------+-------+
33 | | id | user_id | song_id | tally |
34 | +----+---------+---------+-------+
35 | | 1 | shaw | rise | 2 |
36 | | 2 | linda | lemon | 4 |
37 | +----+---------+---------+-------+
38 | 2 rows in set (0.00 sec)
39 |
40 | mysql> SELECT * from Daily;
41 | +----+---------+---------+---------------------+
42 | | id | user_id | song_id | time_stamp |
43 | +----+---------+---------+---------------------+
44 | | 1 | shaw | rise | 2019-03-01 05:33:08 |
45 | | 2 | shaw | rise | 2019-03-01 16:00:00 |
46 | | 3 | shaw | goodie | 2019-03-01 10:15:00 |
47 | | 4 | linda | lemon | 2019-02-28 00:00:00 |
48 | | 5 | mark | game | 2019-03-01 04:00:00 |
49 | +----+---------+---------+---------------------+
50 | 5 rows in set (0.00 sec)
51 | ```
52 |
53 | ---
54 | ### Observation
55 | * Note that the *Daily* table is a event-log. To update *History*, we need to aggregate the event log, grouping by *user_id* and *song_id*.
56 | * A user may listen to a new song for the first time, in which case no existing (*user_id*, *song_id*) compound key pair exists in the *History* table. So we need an additional INSERT statement.
57 |
58 | ### Solution
59 | __Step 1. Build temporary table.__
60 | For both the UPDATE and INSERT statements, we need the same aggregated information from the *Daily* table. So we can save it as a temporary table.
61 | ```sql
62 | SET @now = "2019-03-01 00:00:00";
63 |
64 | -- Create tamporary table
65 | DROP TABLE IF EXISTS daily_count;
66 | CREATE TEMPORARY TABLE daily_count
67 | SELECT
68 | user_id
69 | ,song_id
70 | ,COUNT(*) AS tally
71 | FROM Daily
72 | WHERE DATEDIFF(@now, time_stamp) = 0
73 | GROUP BY user_id, song_id;
74 | ```
75 |
76 | Check the temporary table.
77 | ```
78 | mysql> SELECT * FROM daily_count;
79 | +---------+---------+-------+
80 | | user_id | song_id | tally |
81 | +---------+---------+-------+
82 | | mark | game | 1 |
83 | | shaw | goodie | 1 |
84 | | shaw | rise | 2 |
85 | +---------+---------+-------+
86 | 3 rows in set (0.00 sec)
87 | ```
88 |
89 | __Step 2. Update existing pair.__ It's okay to join the temporary table with the History table during the update process, because History is independent of the temporary table.
90 | ```sql
91 | UPDATE History AS uh
92 | JOIN daily_count AS dc
93 | ON uh.user_id = dc.user_id
94 | AND uh.song_id = dc.song_id
95 | SET uh.tally = uh.tally + dc.tally;
96 | ```
97 |
98 | Check if update is correct: *shaw* listened to *rise* twice on March 1. So the compound key is incremented by 2. On the other hand, linda did not listened to any song on March 1. So her number doesn't change.
99 | ```
100 | mysql> SELECT * FROM History;
101 | +----+---------+---------+-------+
102 | | id | user_id | song_id | tally |
103 | +----+---------+---------+-------+
104 | | 1 | shaw | rise | 4 |
105 | | 2 | linda | lemon | 4 |
106 | +----+---------+---------+-------+
107 | 2 rows in set (0.00 sec)
108 | ```
109 |
110 | __Step 3. Insert new pair.__ After updating existing (*user_id*, *song_id*) compound key pair, we need to insert new ones:
111 |
112 | ```sql
113 | INSERT INTO History (user_id, song_id, tally)
114 | SELECT
115 | dc.user_id
116 | ,dc.song_id
117 | ,dc.tally
118 | FROM daily_count AS dc
119 | LEFT JOIN History AS uh
120 | ON dc.user_id = uh.user_id
121 | AND dc.song_id = uh.song_id
122 | WHERE uh.tally IS NULL;
123 | ```
124 |
125 | Check that the insertions are correct.
126 | ```
127 | mysql> SELECT * FROM History;
128 | +----+---------+---------+-------+
129 | | id | user_id | song_id | tally |
130 | +----+---------+---------+-------+
131 | | 1 | shaw | rise | 4 |
132 | | 2 | linda | lemon | 4 |
133 | | 3 | mark | game | 1 |
134 | | 4 | shaw | goodie | 1 |
135 | +----+---------+---------+-------+
136 | 4 rows in set (0.00 sec)
137 | ```
138 |
139 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/02_Spotify_Listening_History/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS Spotify;
2 | CREATE SCHEMA Spotify;
3 | USE Spotify;
4 |
5 | DROP TABLE IF EXISTS `Daily`;
6 | CREATE TABLE `Daily` (
7 | `id` int(11) NOT NULL,
8 | `user_id` varchar(15) DEFAULT NULL,
9 | `song_id` varchar(15) DEFAULT NULL,
10 | `time_stamp` datetime DEFAULT NULL
11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
12 |
13 | INSERT INTO `Daily` (`id`, `user_id`, `song_id`, `time_stamp`) VALUES
14 | (1, 'shaw', 'rise', '2019-03-01 05:33:08'),
15 | (2, 'shaw', 'rise', '2019-03-01 16:00:00'),
16 | (3, 'shaw', 'goodie', '2019-03-01 10:15:00'),
17 | (4, 'linda', 'lemon', '2019-02-28 00:00:00'),
18 | (5, 'mark', 'game', '2019-03-01 04:00:00');
19 |
20 | ALTER TABLE `Daily`
21 | ADD PRIMARY KEY (`id`);
22 |
23 | ALTER TABLE `Daily`
24 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6;
25 |
26 |
27 | DROP TABLE IF EXISTS `History`;
28 | CREATE TABLE `History` (
29 | `id` int(11) NOT NULL,
30 | `user_id` varchar(5) DEFAULT NULL,
31 | `song_id` varchar(10) DEFAULT NULL,
32 | `tally` int(11) DEFAULT NULL
33 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
34 |
35 | INSERT INTO `History` (`id`, `user_id`, `song_id`, `tally`) VALUES
36 | (1, 'shaw', 'rise', 2),
37 | (2, 'linda', 'lemon', 4);
38 |
39 | ALTER TABLE `History`
40 | ADD PRIMARY KEY (`id`);
41 |
42 | ALTER TABLE `History`
43 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=3;
44 |
--------------------------------------------------------------------------------
/Interview/02_Spotify_Listening_History/solution.sql:
--------------------------------------------------------------------------------
1 | SET @now = "2019-03-01 00:00:00";
2 |
3 | -- Step 1. Build temporary table
4 | DROP TABLE IF EXISTS daily_count;
5 | CREATE TEMPORARY TABLE daily_count
6 | SELECT
7 | user_id
8 | ,song_id
9 | ,COUNT(*) AS tally
10 | FROM Daily
11 | WHERE DATEDIFF(@now, time_stamp) = 0
12 | GROUP BY user_id, song_id;
13 |
14 | -- Step 2. Update existing pair
15 | UPDATE History AS uh
16 | JOIN daily_count AS dc
17 | ON uh.user_id = dc.user_id
18 | AND uh.song_id = dc.song_id
19 | SET uh.tally = uh.tally + dc.tally;
20 |
21 | -- Step 3. Insert new pair
22 | INSERT INTO History (user_id, song_id, tally)
23 | SELECT
24 | dc.user_id
25 | ,dc.song_id
26 | ,dc.tally
27 | FROM daily_count AS dc
28 | LEFT JOIN History AS uh
29 | ON dc.user_id = uh.user_id
30 | AND dc.song_id = uh.song_id
31 | WHERE uh.tally IS NULL;
32 |
33 |
--------------------------------------------------------------------------------
/Interview/03_Monthly_Active_User/README.md:
--------------------------------------------------------------------------------
1 | # Monthly Active User
2 |
3 | This post is inspired by this [page](https://www.programmerinterview.com/index.php/database-sql/practice-interview-question-2/). However, the solution from there is not completely correct, as they selected non-aggregated columns (Question 1). Also, *DISTINCT* keyword is not necessary in Question 2.
4 |
5 | ### Key Concepts
6 | * Aggregate function
7 | * Left join
8 | * Removing duplicate
9 | * Functional dependency
10 |
11 | ### Sample data
12 | Load the database file [db.sql](db.sql) to localhost MySQL. A MAU database will be created with two tables.
13 |
14 | ```bash
15 | mysql < db.sql -uroot -p
16 | ```
17 | ```
18 | mysql> SELECT * FROM User;
19 | +---------+---------+--------------+
20 | | user_id | name | phone_num |
21 | +---------+---------+--------------+
22 | | jkog | Jing | 202-555-0176 |
23 | | niceguy | Goodman | 202-555-0174 |
24 | | sanhoo | Sanjay | 202-555-0100 |
25 | | shaw123 | Shaw | 202-555-0111 |
26 | +---------+---------+--------------+
27 | 4 rows in set (0.00 sec)
28 | ```
29 |
30 | Every time a user logs in a new row is inserted into the UserHistory table with user_id, current date and action (where action = "logged_on").
31 | ```
32 | mysql> SELECT * FROM UserHistory;
33 | +---------+------------+-----------+
34 | | user_id | date | action |
35 | +---------+------------+-----------+
36 | | shaw123 | 2019-02-20 | logged_on |
37 | | shaw123 | 2019-03-12 | signed_up |
38 | | sanhoo | 2019-02-27 | logged_on |
39 | | sanhoo | 2019-01-01 | logged_on |
40 | | niceguy | 2019-01-22 | logged_on |
41 | +---------+------------+-----------+
42 | 5 rows in set (0.00 sec)
43 | ```
44 |
45 | ---
46 | ### Q1: Find monthly active users.
47 | *Write a SQL query that returns the name, phone number and most recent date for any user that has logged in over the last 30 days (you can tell a user has logged in if the action field in UserHistory is set to "logged_on").*
48 |
49 | ```sql
50 | SET @today := "2019-03-01";
51 | SELECT
52 | User.name
53 | ,User.phone_num
54 | ,MAX(UserHistory.date)
55 | FROM User, UserHistory
56 | WHERE User.user_id = UserHistory.user_id
57 | AND UserHistory.action = 'logged_on'
58 | AND UserHistory.date >= DATE_SUB(@today, INTERVAL 30 DAY)
59 | GROUP BY User.user_id;
60 |
61 | ```
62 | ```
63 | +--------+--------------+-----------------------+
64 | | name | phone_num | max(UserHistory.date) |
65 | +--------+--------------+-----------------------+
66 | | Sanjay | 202-555-0100 | 2019-02-27 |
67 | | Shaw | 202-555-0111 | 2019-02-20 |
68 | +--------+--------------+-----------------------+
69 | 2 rows in set (0.00 sec)
70 |
71 | ```
72 |
73 | The above solution is only correct when phone_num and name are functionally dependent on user_id. That is, for every user_id, there is a unique phone_num and name, so we can get away with selecting non-aggregated columns (which are also not in group by clause).
74 |
75 | Depending on database engine configuration, an error may be thrown when a selected column is neither aggregated nor in the group by clause. If we are certain that one-on-one mapping exists, we can add a aggregate function to the additional columns (*MAX()* or *MIN()*).
76 |
77 | ```sql
78 | SET @today := "2019-03-01";
79 | SELECT
80 | MAX(u.name) -- functionally dependent on user_id
81 | ,MAX(u.phone_num) -- functionally dependent on user_id
82 | ,MAX(h.date) AS recent_date
83 | FROM User AS u, UserHistory AS h
84 | WHERE u.user_id = h.user_id
85 | AND h.action = "logged_on"
86 | AND DATEDIFF(@today, h.date) <= 30 -- DATEDIFF(later, earlier)
87 | GROUP BY u.user_id
88 | ORDER BY recent_date;
89 |
90 | ```
91 | ```
92 | +-------------+------------------+-------------+
93 | | MAX(u.name) | MAX(u.phone_num) | recent_date |
94 | +-------------+------------------+-------------+
95 | | Shaw | 202-555-0111 | 2019-02-20 |
96 | | Sanjay | 202-555-0100 | 2019-02-27 |
97 | +-------------+------------------+-------------+
98 | 2 rows in set (0.01 sec)
99 | ```
100 |
101 | Inner join also serves the purpose, and avoid making a cartesian product between two tables as in the cross join above (although query optimizer can take care of such trivial optimization, it's useful to know).
102 |
103 | __Why__ inner join: *user_id* in *UserHistory* table is a foreign key referring to *User* table primary key. Meaning that it is a subset of the primary key column. There may exists users who never logged on, and never appeared in the *UserHistory* table. Since we are interested in monthly active users. It's safe to ignore those inactive users.
104 |
105 | ```sql
106 | -- using inner join
107 | SET @today := "2019-03-01";
108 | SELECT
109 | MAX(u.name) -- functionally dependent on user_id
110 | ,MAX(u.phone_num) -- functionally dependent on user_id
111 | ,MAX(h.date) AS recent_date
112 | FROM User AS u
113 | JOIN UserHistory AS h
114 | ON u.user_id = h.user_id
115 | WHERE h.action = "logged_on"
116 | AND DATEDIFF(@today, h.date) <= 30
117 | GROUP BY u.user_id
118 | ORDER BY recent_date;
119 | ```
120 |
121 | If any selected column is __not__ functionally dependent on the group by column, then unpredictable result may be returned, or error may be thrown. To avoid such trouble, only select aggregated columns and group by columns into a temporary tables, and join the temporary table with the original table to retrieve other desired columns.
122 |
123 | ---
124 | ### Q2. Find inactive users
125 | *Write a SQL query to determine which user_ids in the User table are not contained in the UserHistory table (assume the UserHistory table has a subset of the user_ids in User table). Do not use the SQL MINUS statement. Note: the UserHistory table can have multiple entries for each user_id (Note that your SQL should be compatible with MySQL 5.0, and avoid using subqueries)*
126 |
127 | See [here](https://www.programmerinterview.com/index.php/database-sql/practice-interview-question-2-continued/) for a detailed walk-through. However, the solution is not totally correct. We __don't__ need the *DISTINCT* keyword here. Because if a *user_id* has no match in the *UserHistory* table, that row it returned __only once__.
128 |
129 | ```sql
130 | SELECT *
131 | FROM User AS u
132 | LEFT JOIN UserHistory AS h
133 | ON u.user_id = h.user_id
134 | WHERE h.user_id IS NULL;
135 | ```
136 | ```
137 | +---------+------+--------------+---------+------+--------+
138 | | user_id | name | phone_num | user_id | date | action |
139 | +---------+------+--------------+---------+------+--------+
140 | | jkog | Jing | 202-555-0176 | NULL | NULL | NULL |
141 | +---------+------+--------------+---------+------+--------+
142 | 1 row in set (0.00 sec)
143 | ```
144 |
145 | A less efficient approach is to retain valid users in a hashset. Remember that __NOT IN__ requires full traversal of every element in the hashset for a single check (*DISTINCT* keyword turns the set into hash set, but makes no difference on the result).
146 |
147 | ```sql
148 | SELECT *
149 | FROM User
150 | WHERE user_id NOT IN (
151 | SELECT DISTINCT user_id FROM UserHistory
152 | );
153 | ```
154 | ```
155 | +---------+------+--------------+
156 | | user_id | name | phone_num |
157 | +---------+------+--------------+
158 | | jkog | Jing | 202-555-0176 |
159 | +---------+------+--------------+
160 | 1 row in set (0.00 sec)
161 | ```
162 |
163 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/03_Monthly_Active_User/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS MAU;
2 | CREATE SCHEMA MAU;
3 | USE MAU;
4 |
5 |
6 | -- --------------------------------------------------------
7 |
8 | --
9 | -- Table structure for table `User`
10 | --
11 |
12 | DROP TABLE IF EXISTS `User`;
13 | CREATE TABLE `User` (
14 | `user_id` varchar(10) NOT NULL,
15 | `name` varchar(10) NOT NULL,
16 | `phone_num` varchar(12) NOT NULL
17 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
18 |
19 | INSERT INTO `User` (`user_id`, `name`, `phone_num`) VALUES
20 | ('jkog', 'Jing', '202-555-0176'),
21 | ('niceguy', 'Goodman', '202-555-0174'),
22 | ('sanhoo', 'Sanjay', '202-555-0100'),
23 | ('shaw123', 'Shaw', '202-555-0111');
24 |
25 | ALTER TABLE `User`
26 | ADD PRIMARY KEY (`user_id`);
27 |
28 |
29 | -- --------------------------------------------------------
30 |
31 | --
32 | -- Table structure for table `UserHistory`
33 | --
34 |
35 | DROP TABLE IF EXISTS `UserHistory`;
36 | CREATE TABLE `UserHistory` (
37 | `user_id` varchar(10) NOT NULL,
38 | `date` date NOT NULL,
39 | `action` varchar(10) NOT NULL
40 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
41 |
42 | INSERT INTO `UserHistory` (`user_id`, `date`, `action`) VALUES
43 | ('sanhoo', '2019-01-01', 'logged_on'),
44 | ('niceguy', '2019-01-22', 'logged_on'),
45 | ('shaw123', '2019-02-20', 'logged_on'),
46 | ('sanhoo', '2019-02-27', 'logged_on'),
47 | ('shaw123', '2019-03-12', 'signed_up');
48 |
49 | ALTER TABLE `UserHistory`
50 | ADD PRIMARY KEY (`user_id`, `date`);
--------------------------------------------------------------------------------
/Interview/03_Monthly_Active_User/solution.sql:
--------------------------------------------------------------------------------
1 | -- Q1: find monthly active users
2 | SET @today := "2019-03-01";
3 | SELECT
4 | MAX(u.name) -- functionally dependent on user_id
5 | ,MAX(u.phone_num) -- functionally dependent on user_id
6 | ,MAX(h.date) AS recent_date
7 | FROM User AS u
8 | JOIN UserHistory AS h
9 | ON u.user_id = h.user_id
10 | WHERE h.action = "logged_on"
11 | AND DATEDIFF(@today, h.date) <= 30
12 | GROUP BY u.user_id
13 | ORDER BY recent_date;
14 |
15 | -- Q2: find inactive users (all-time)
16 | SELECT *
17 | FROM User AS u
18 | LEFT JOIN UserHistory AS h
19 | ON u.user_id = h.user_id
20 | WHERE h.user_id IS NULL;
--------------------------------------------------------------------------------
/Interview/04_Page_Recommendation/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS Recommendation;
2 | CREATE SCHEMA Recommendation;
3 | USE Recommendation;
4 |
5 | -- --------------------------------------------------------
6 |
7 | --
8 | -- Table structure for table `Friendship`
9 | --
10 |
11 | DROP TABLE IF EXISTS `Friendship`;
12 | CREATE TABLE `Friendship` (
13 | `id` int(11) NOT NULL,
14 | `user_id` varchar(10) DEFAULT NULL,
15 | `friend_id` varchar(10) DEFAULT NULL
16 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
17 |
18 | INSERT INTO `Friendship` (`id`, `user_id`, `friend_id`) VALUES
19 | (1, 'alice', 'bob'),
20 | (2, 'alice', 'charles'),
21 | (3, 'alice', 'david'),
22 | (4, 'bob', 'david');
23 |
24 | ALTER TABLE `Friendship`
25 | ADD PRIMARY KEY (`id`);
26 |
27 | ALTER TABLE `Friendship`
28 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=5;
29 |
30 | -- --------------------------------------------------------
31 |
32 | --
33 | -- Table structure for table `PageFollow`
34 | --
35 |
36 | DROP TABLE IF EXISTS `PageFollow`;
37 | CREATE TABLE `PageFollow` (
38 | `id` int(11) NOT NULL,
39 | `user_id` varchar(10) DEFAULT NULL,
40 | `page_id` varchar(10) DEFAULT NULL
41 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
42 |
43 | INSERT INTO `PageFollow` (`id`, `user_id`, `page_id`) VALUES
44 | (1, 'alice', 'google'),
45 | (2, 'bob', 'google'),
46 | (3, 'charles', 'google'),
47 | (4, 'bob', 'linkedin'),
48 | (5, 'charles', 'linkedin'),
49 | (6, 'david', 'linkedin'),
50 | (7, 'david', 'github'),
51 | (8, 'charles', 'github'),
52 | (9, 'alice', 'facebook'),
53 | (10, 'bob', 'facebook');
54 |
55 | ALTER TABLE `PageFollow`
56 | ADD PRIMARY KEY (`id`);
57 |
58 | ALTER TABLE `PageFollow`
59 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11;
60 |
61 |
--------------------------------------------------------------------------------
/Interview/04_Page_Recommendation/solution.sql:
--------------------------------------------------------------------------------
1 | -- MySQL solution
2 | WITH two_way_friendship AS (
3 | SELECT
4 | user_id
5 | ,friend_id
6 | FROM Friendship
7 | UNION
8 | SELECT
9 | friend_id
10 | ,user_id
11 | FROM Friendship
12 | )
13 | SELECT
14 | f.user_id
15 | ,p.page_id
16 | ,COUNT(*) AS friends_follower
17 | FROM two_way_friendship AS f
18 | LEFT JOIN PageFollow AS p
19 | ON f.friend_id = p.user_id
20 | WHERE NOT EXISTS (
21 | SELECT 1 FROM PageFollow AS p2
22 | WHERE f.user_id = p2.user_id
23 | AND p.page_id = p2.page_id
24 | )
25 | GROUP BY f.user_id, p.page_id
26 | ORDER BY f.user_id ASC, COUNT(*) DESC;
27 |
28 | -- MySQL equivalent solution
29 | WITH two_way_friendship AS(
30 | SELECT
31 | user_id
32 | ,friend_id
33 | FROM Friendship
34 | UNION
35 | SELECT
36 | friend_id
37 | ,user_id
38 | FROM Friendship
39 | )
40 | SELECT
41 | f.user_id
42 | ,p.page_id
43 | ,COUNT(*) AS friends_follower
44 | FROM two_way_friendship AS f
45 | LEFT JOIN PageFollow AS p
46 | ON f.friend_id = p.user_id
47 | WHERE (f.user_id, p.page_id) NOT IN (
48 | SELECT user_id, page_id FROM PageFollow
49 | )
50 | GROUP BY f.user_id, p.page_id
51 | ORDER BY f.user_id ASC, COUNT(*) DESC;
--------------------------------------------------------------------------------
/Interview/05_Pivoting_Numeric_Data/Expenses.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE Expenses AS
2 | SELECT
3 | CASE type
4 | WHEN "INV" THEN "Invest"
5 | WHEN "LOV" THEN "Family"
6 | WHEN "EDU" THEN "Education"
7 | WHEN "BOO" THEN "Book"
8 | WHEN "STA" THEN "Stationary"
9 | WHEN "PEN" THEN "Pen"
10 | WHEN "FOO" THEN "Food"
11 | WHEN "CLO" THEN "Clothes"
12 | WHEN "SAR" THEN "Sartorial"
13 | WHEN "SHO" THEN "Shoes"
14 | WHEN "ACC" THEN "Accessory"
15 | WHEN "HYG" THEN "Health"
16 | WHEN "FUR" THEN "Home"
17 | WHEN "ELE" THEN "Electronics"
18 | WHEN "COM" THEN "Communication"
19 | WHEN "SOC" THEN "Social"
20 | WHEN "TRA" THEN "Transportation"
21 | WHEN "HOU" THEN "Housing"
22 | WHEN "AUT" THEN "Automobile"
23 | WHEN "ENT" THEN "Entertainment"
24 | WHEN "SOF" THEN "Software"
25 | WHEN "LEG" THEN "Legal"
26 | WHEN "TAX" THEN "Tax"
27 | WHEN "INS" THEN "Insurance"
28 | WHEN "MIS" THEN "Miscellany"
29 | ELSE type
30 | END AS "category"
31 | ,ROUND(abs(priceCNY) / 6.6, 2) AS "cost"
32 | ,time
33 | FROM items
34 | WHERE userID LIKE "shaw%"
35 | AND deleted = 0
36 | AND time BETWEEN '2018-01-01 00:00:00' AND '2019-01-01 00:00:00';
37 |
--------------------------------------------------------------------------------
/Interview/05_Pivoting_Numeric_Data/pivot.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | category
3 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 12 THEN cost ELSE 0 END) AS 'Dec'
4 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 11 THEN cost ELSE 0 END) AS 'Nov'
5 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 10 THEN cost ELSE 0 END) AS 'Oct'
6 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 09 THEN cost ELSE 0 END) AS 'Sep'
7 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 08 THEN cost ELSE 0 END) AS 'Aug'
8 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 07 THEN cost ELSE 0 END) AS 'Jul'
9 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 06 THEN cost ELSE 0 END) AS 'Jun'
10 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 05 THEN cost ELSE 0 END) AS 'May'
11 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 04 THEN cost ELSE 0 END) AS 'Apr'
12 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 03 THEN cost ELSE 0 END) AS 'Mar'
13 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 02 THEN cost ELSE 0 END) AS 'Feb'
14 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 01 THEN cost ELSE 0 END) AS 'Jan'
15 | FROM Expenses
16 | GROUP BY category;
17 |
--------------------------------------------------------------------------------
/Interview/06_Pivoting_Text_Data/db.sql:
--------------------------------------------------------------------------------
1 | CREATE SCHEMA IF NOT EXISTS Practice;
2 | USE Practice;
3 |
4 | -- ---------------------------------------
5 |
6 | --
7 | -- Table structure for table `CourseGrade`
8 | --
9 |
10 | DROP TABLE IF EXISTS `CourseGrade`;
11 | CREATE TABLE `CourseGrade` (
12 | `name` varchar(10) NOT NULL,
13 | `course` varchar(10) NOT NULL,
14 | `grade` varchar(1) NOT NULL
15 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
16 |
17 | --
18 | -- Dumping data for table `CourseGrade`
19 | --
20 |
21 | INSERT INTO `CourseGrade` (`name`, `course`, `grade`) VALUES
22 | ('Alice', 'CS106B', 'A'),
23 | ('Alice', 'CS229', 'A'),
24 | ('Alice', 'CS224N', 'B'),
25 | ('Bob', 'CS106B', 'C'),
26 | ('Bob', 'CS229', 'F'),
27 | ('Bob', 'CS224N', 'F'),
28 | ('Charlie', 'CS106B', 'B'),
29 | ('Charlie', 'CS229', 'B'),
30 | ('Charlie', 'CS224N', 'A'),
31 | ('David', 'CS106B', 'C'),
32 | ('David', 'CS229', 'C'),
33 | ('David', 'CS224N', 'D'),
34 | ('Elsa', 'CS106B', 'B'),
35 | ('Elsa', 'CS229', 'B'),
36 | ('Elsa', 'CS224N', 'A');
37 |
38 | --
39 | -- Indexes for dumped tables
40 | --
41 |
42 | --
43 | -- Indexes for table `CourseGrade`
44 | --
45 | ALTER TABLE `CourseGrade`
46 | ADD KEY `name` (`name`);
47 |
--------------------------------------------------------------------------------
/Interview/06_Pivoting_Text_Data/pivot.sql:
--------------------------------------------------------------------------------
1 | -- using self join (cross join)
2 | SELECT
3 | t1.name
4 | ,t1.grade AS 'CS106B'
5 | ,t2.grade AS 'CS229'
6 | ,t3.grade AS 'CS224N'
7 | FROM CourseGrade AS t1
8 | ,CourseGrade AS t2
9 | ,CourseGrade AS t3
10 | WHERE t1.name = t2.name
11 | AND t2.name = t3.name
12 | AND t1.course = 'CS106B'
13 | AND t2.course = 'CS229'
14 | AND t3.course = 'CS224N';
15 |
16 | -- using self join (inner join)
17 | SELECT
18 | t1.name
19 | ,t1.grade AS 'CS106B'
20 | ,t2.grade AS 'CS229'
21 | ,t3.grade AS 'CS224N'
22 | FROM CourseGrade AS t1
23 | JOIN CourseGrade AS t2
24 | ON t1.course = 'CS106B'
25 | AND t2.course = 'CS229'
26 | AND t1.name = t2.name
27 | JOIN CourseGrade AS t3
28 | ON t3.course = 'CS224N'
29 | AND t2.name = t3.name;
30 |
31 |
32 | -- using case statement and aggregation
33 | SELECT
34 | name
35 | ,MAX(CASE WHEN course = 'CS106B' THEN grade ELSE NULL END) AS 'CS106B'
36 | ,MAX(CASE WHEN course = 'CS229' THEN grade ELSE NULL END) AS 'CS229'
37 | ,MAX(CASE WHEN course = 'CS224N' THEN grade ELSE NULL END) AS 'CS224N'
38 | FROM CourseGrade
39 | GROUP BY name;
--------------------------------------------------------------------------------
/Interview/07_Unpivoting_Tables/pivot_table.sql:
--------------------------------------------------------------------------------
1 | USE Practice;
2 |
3 | DROP TABLE IF EXISTS course_grade_pivoted;
4 | CREATE TABLE course_grade_pivoted AS
5 | SELECT
6 | name
7 | ,MAX(CASE WHEN course = 'CS106B' THEN grade ELSE NULL END) AS 'CS106B'
8 | ,MAX(CASE WHEN course = 'CS229' THEN grade ELSE NULL END) AS 'CS229'
9 | ,MAX(CASE WHEN course = 'CS224N' THEN grade ELSE NULL END) AS 'CS224N'
10 | FROM CourseGrade
11 | GROUP BY name;
12 |
13 | USE Grocery;
14 | DROP TABLE IF EXISTS expenses_pivoted;
15 | CREATE TABLE expenses_pivoted AS
16 | SELECT
17 | category
18 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 12 THEN cost ELSE 0 END) AS 'Dec_'
19 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 11 THEN cost ELSE 0 END) AS 'Nov'
20 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 10 THEN cost ELSE 0 END) AS 'Oct'
21 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 09 THEN cost ELSE 0 END) AS 'Sep'
22 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 08 THEN cost ELSE 0 END) AS 'Aug'
23 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 07 THEN cost ELSE 0 END) AS 'Jul'
24 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 06 THEN cost ELSE 0 END) AS 'Jun'
25 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 05 THEN cost ELSE 0 END) AS 'May'
26 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 04 THEN cost ELSE 0 END) AS 'Apr'
27 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 03 THEN cost ELSE 0 END) AS 'Mar'
28 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 02 THEN cost ELSE 0 END) AS 'Feb'
29 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 01 THEN cost ELSE 0 END) AS 'Jan'
30 | FROM Expenses
31 | GROUP BY category;
32 |
--------------------------------------------------------------------------------
/Interview/07_Unpivoting_Tables/un_pivot_table.sql:
--------------------------------------------------------------------------------
1 | -- un-pivot course_grade table
2 | SELECT
3 | name
4 | ,aux.course
5 | ,CASE aux.course
6 | WHEN 'CS106B' THEN CS106B
7 | WHEN 'CS229' THEN CS229
8 | WHEN 'CS224N' THEN CS224N
9 | END AS grade
10 | FROM course_grade_pivoted,
11 | (
12 | SELECT 'CS106B' AS course
13 | UNION ALL
14 | SELECT 'CS229'
15 | UNION ALL
16 | SELECT 'CS224N' year
17 | ) aux;
18 |
19 | -- un-pivot expenses table
20 | SELECT
21 | category
22 | ,aux.month
23 | ,CASE aux.month
24 | WHEN 'Jan' THEN Jan
25 | WHEN 'Feb' THEN Feb
26 | WHEN 'Mar' THEN Mar
27 | WHEN 'Apr' THEN Apr
28 | WHEN 'May' THEN May
29 | WHEN 'Jun' THEN Jun
30 | WHEN 'Jul' THEN Jul
31 | WHEN 'Aug' THEN Aug
32 | WHEN 'Sep' THEN Sep
33 | WHEN 'Oct' THEN Oct
34 | WHEN 'Nov' THEN Nov
35 | WHEN 'Dec' THEN Dec_
36 | END AS month
37 | FROM expenses_pivoted,
38 | (
39 | SELECT 'Jan' AS month
40 | UNION ALL
41 | SELECT 'Feb'
42 | UNION ALL
43 | SELECT 'Mar'
44 | UNION ALL
45 | SELECT 'Apr'
46 | UNION ALL
47 | SELECT 'May'
48 | UNION ALL
49 | SELECT 'Jun'
50 | UNION ALL
51 | SELECT 'Jul'
52 | UNION ALL
53 | SELECT 'Aug'
54 | UNION ALL
55 | SELECT 'Sep'
56 | UNION ALL
57 | SELECT 'Oct'
58 | UNION ALL
59 | SELECT 'Nov'
60 | UNION ALL
61 | SELECT 'Dec'
62 | ) AS aux
63 | LIMIT 24;
--------------------------------------------------------------------------------
/Interview/08_Group_by_Bins/README.md:
--------------------------------------------------------------------------------
1 | # Group by Bins
2 |
3 | One common task is to put rows into bins, meaning that there does not exist a conveninet column for us to GROUP BY. In this notebook, we will use the [*classicmodels* database](../../databases/classicmodels/mysqlsampledatabase.sql). Load the database:
4 |
5 | ```bash
6 | mysql < mysqlsampledatabase.sql -uroot -p
7 | ```
8 |
9 | It contains a *customers* table. We will put customers into different class of credit limit.
10 |
11 | * 0~50,000
12 | * 50,001~100,000
13 | * over 100,000
14 |
15 | ___
16 |
17 | ### Step 1. Create Column to Group By
18 |
19 | ```sql
20 | SELECT
21 | customerName
22 | ,CASE
23 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k'
24 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k'
25 | ELSE '> 100k'
26 | END AS credit_range
27 | FROM customers
28 | LIMIT 5;
29 | ```
30 | ```
31 | +----------------------------+--------------+
32 | | customerName | credit_range |
33 | +----------------------------+--------------+
34 | | Atelier graphique | 0 ~ 50k |
35 | | Signal Gift Stores | 50 ~ 100k |
36 | | Australian Collectors, Co. | > 100k |
37 | | La Rochelle Gifts | > 100k |
38 | | Baane Mini Imports | 50 ~ 100k |
39 | +----------------------------+--------------+
40 | ```
41 |
42 | ### Step 2. Group by Bin
43 | ```sql
44 | SELECT
45 | CASE
46 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k'
47 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k'
48 | ELSE '> 100k'
49 | END AS credit_range
50 | ,COUNT(*) AS customer_tally
51 | FROM customers
52 | GROUP BY credit_range;
53 | ```
54 | ```
55 | +--------------+----------------+
56 | | credit_range | customer_tally |
57 | +--------------+----------------+
58 | | 0 ~ 50k | 37 |
59 | | 50 ~ 100k | 60 |
60 | | > 100k | 25 |
61 | +--------------+----------------+
62 | 3 rows in set (0.00 sec)
63 | ```
64 |
65 | ---
66 | ### Parting thought
67 | This type of question can easily generalize to a broad range of topics. For example, what is the distribution of user's age, what is the distribution of revenue from each online order?
68 |
69 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/08_Group_by_Bins/solution.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | CASE
3 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k'
4 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k'
5 | ELSE '> 100k'
6 | END AS credit_range
7 | ,COUNT(*) AS customer_tally
8 | FROM customers
9 | GROUP BY credit_range;
--------------------------------------------------------------------------------
/Interview/09_Consecutive_Active_Users/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS WAU;
2 | CREATE SCHEMA WAU;
3 | USE WAU;
4 |
5 | CREATE TABLE Login (
6 | id INT(11) NOT NULL
7 | ,user_id int(11)
8 | ,ts DATE
9 | );
10 |
11 | ALTER TABLE Login
12 | ADD PRIMARY KEY (id);
13 |
14 | ALTER TABLE Login
15 | MODIFY id int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=1;
16 |
17 | INSERT INTO Login (user_id, ts) VALUES
18 | (1, "2019-02-14"),
19 | (1, "2019-02-13"),
20 | (1, "2019-02-12"),
21 | (1, "2019-02-11"),
22 | (2, "2019-02-14"),
23 | (2, "2019-02-12"),
24 | (2, "2019-02-11"),
25 | (2, "2019-02-10"),
26 | (3, "2019-02-14"),
27 | (3, "2019-02-12");
28 |
29 | INSERT INTO Login (user_id, ts) VALUES
30 | (4, "2019-02-09"),
31 | (4, "2019-02-08"),
32 | (4, "2019-02-08"),
33 | (4, "2019-02-07");
34 |
--------------------------------------------------------------------------------
/Interview/09_Consecutive_Active_Users/solution.sql:
--------------------------------------------------------------------------------
1 | -- self join method
2 | SET @now = "2019-02-14";
3 | WITH tmp AS (
4 | SELECT DISTINCT user_id, ts FROM Login)
5 | SELECT *
6 | FROM tmp AS d0
7 | JOIN tmp AS d1
8 | ON d0.ts = @now
9 | AND DATEDIFF(@now, d1.ts) = 1
10 | AND d0.user_id = d1.user_id
11 | JOIN tmp AS d2
12 | ON DATEDIFF(@now, d2.ts) = 2
13 | AND d2.user_id = d1.user_id;
14 |
15 | -- window method
16 | SET @now = "2019-02-14";
17 | WITH tmp AS (
18 | SELECT
19 | user_id
20 | ,ts
21 | ,DATEDIFF(@now, LAG(ts, 1) OVER w) AS day_from_pre1
22 | ,DATEDIFF(@now, LAG(ts, 2) OVER w) AS day_from_pre2
23 | FROM Login
24 | WINDOW w AS (PARTITION BY user_id ORDER BY ts)
25 | )
26 | SELECT user_id
27 | FROM tmp
28 | WHERE ts = @now
29 | AND day_from_pre1 = 1
30 | AND day_from_pre2 = 2;
--------------------------------------------------------------------------------
/Interview/10_Spotify_Recommend_Friend/db.sql:
--------------------------------------------------------------------------------
1 | -- phpMyAdmin SQL Dump
2 | -- version 4.7.7
3 | -- https://www.phpmyadmin.net/
4 | --
5 | -- Host: localhost:8889
6 | -- Generation Time: Mar 18, 2019 at 01:41 AM
7 | -- Server version: 5.6.38
8 | -- PHP Version: 7.2.1
9 |
10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
11 | SET time_zone = "+00:00";
12 |
13 | --
14 | -- Database: `SpotifyFriend`
15 | --
16 | CREATE DATABASE IF NOT EXISTS `SpotifyFriend` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
17 | USE `SpotifyFriend`;
18 |
19 | -- --------------------------------------------------------
20 |
21 | --
22 | -- Table structure for table `Song`
23 | --
24 |
25 | DROP TABLE IF EXISTS `Song`;
26 | CREATE TABLE `Song` (
27 | `id` int(11) NOT NULL,
28 | `user_id` varchar(10) NOT NULL,
29 | `song` varchar(20) NOT NULL,
30 | `ts` date NOT NULL
31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
32 |
33 | --
34 | -- Dumping data for table `Song`
35 | --
36 |
37 | INSERT INTO `Song` (`id`, `user_id`, `song`, `ts`) VALUES
38 | (1, 'Alex', 'Kiroro', '2019-03-17'),
39 | (2, 'Alex', 'Shape of My Heart', '2019-03-17'),
40 | (3, 'Alex', 'Clair de Lune', '2019-03-17'),
41 | (4, 'Alex', 'The Fall', '2019-03-17'),
42 | (5, 'Alex', 'Forever Young', '2019-03-17'),
43 | (6, 'Bill', 'Shape of My Heart', '2019-03-17'),
44 | (7, 'Bill', 'Clair de Lune', '2019-03-17'),
45 | (8, 'Bill', 'The Fall', '2019-03-17'),
46 | (9, 'Bill', 'Forever Young', '2019-03-17'),
47 | (10, 'Bill', 'My Love', '2019-03-14'),
48 | (11, 'Alex', 'Kiroro', '2019-03-17'),
49 | (12, 'Alex', 'Shape of My Heart', '2019-03-17'),
50 | (13, 'Alex', 'Shape of My Heart', '2019-03-17'),
51 | (14, 'Bill', 'Shape of My Heart', '2019-03-17'),
52 | (15, 'Bill', 'Shape of My Heart', '2019-03-17'),
53 | (16, 'Bill', 'Shape of My Heart', '2019-03-17'),
54 | (17, 'Cindy', 'Kiroro', '2019-03-17'),
55 | (18, 'Cindy', 'Clair de Lune', '2019-03-17'),
56 | (19, 'Cindy', 'My Love', '2019-03-14'),
57 | (20, 'Cindy', 'Clair de Lune', '2019-03-14'),
58 | (21, 'Cindy', 'Lemon Tree', '2019-03-14'),
59 | (22, 'Cindy', 'Mad World', '2019-03-14'),
60 | (23, 'Bill', 'Lemon Tree', '2019-03-14'),
61 | (24, 'Bill', 'Mad World', '2019-03-14'),
62 | (25, 'Bill', 'My Love', '2019-03-14');
63 |
64 | -- --------------------------------------------------------
65 |
66 | --
67 | -- Table structure for table `User`
68 | --
69 |
70 | DROP TABLE IF EXISTS `User`;
71 | CREATE TABLE `User` (
72 | `id` int(11) NOT NULL,
73 | `user_id` varchar(10) NOT NULL,
74 | `friend_id` varchar(10) NOT NULL
75 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
76 |
77 | --
78 | -- Dumping data for table `User`
79 | --
80 |
81 | INSERT INTO `User` (`id`, `user_id`, `friend_id`) VALUES
82 | (1, 'Cindy', 'Bill');
83 |
84 | --
85 | -- Indexes for dumped tables
86 | --
87 |
88 | --
89 | -- Indexes for table `Song`
90 | --
91 | ALTER TABLE `Song`
92 | ADD PRIMARY KEY (`id`);
93 |
94 | --
95 | -- Indexes for table `User`
96 | --
97 | ALTER TABLE `User`
98 | ADD PRIMARY KEY (`id`);
99 |
100 | --
101 | -- AUTO_INCREMENT for dumped tables
102 | --
103 |
104 | --
105 | -- AUTO_INCREMENT for table `Song`
106 | --
107 | ALTER TABLE `Song`
108 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=26;
109 |
110 | --
111 | -- AUTO_INCREMENT for table `User`
112 | --
113 | ALTER TABLE `User`
114 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=2;
115 |
--------------------------------------------------------------------------------
/Interview/10_Spotify_Recommend_Friend/fig/symmetry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/Interview/10_Spotify_Recommend_Friend/fig/symmetry.png
--------------------------------------------------------------------------------
/Interview/10_Spotify_Recommend_Friend/solution.sql:
--------------------------------------------------------------------------------
1 | -- full solution
2 | SELECT
3 | s1.user_id
4 | ,s2.user_id AS recommended
5 | FROM Song AS s1
6 | JOIN Song AS s2
7 | ON s1.user_id != s2.user_id
8 | AND s1.ts = s2.ts
9 | AND s1.song = s2.song
10 | WHERE (s1.user_id, s2.user_id) NOT IN (
11 | SELECT user_id, friend_id FROM User
12 | UNION
13 | SELECT friend_id, user_id FROM User
14 | )
15 | GROUP BY s1.user_id, s2.user_id, s2.ts
16 | HAVING COUNT(DISTINCT s2.song) >= 3;
--------------------------------------------------------------------------------
/Interview/11_Spotify_Similar_Friends/solution.sql:
--------------------------------------------------------------------------------
1 | -- MySQL
2 | SELECT DISTINCT
3 | u.user_id
4 | ,u.friend_id
5 | FROM User AS u
6 | JOIN Song AS s1
7 | ON u.user_id = s1.user_id
8 | JOIN Song AS s2
9 | ON u.friend_id = s2.user_id
10 | WHERE s1.ts = s2.ts
11 | AND s1.song = s2.song
12 | GROUP BY s1.ts, u.user_id, u.friend_id
13 | HAVING COUNT(DISTINCT s1.song) >= 3;
14 |
15 | -- cross join
16 | SELECT DISTINCT
17 | u.user_id, u.friend_id
18 | FROM
19 | User u, Song s1, Song s2
20 | WHERE
21 | u.user_id = s1.user_id
22 | AND u.friend_id = s2.user_id
23 | AND s1.song = s2.song
24 | AND s1.ts = s2.ts
25 | GROUP BY 1, 2, s1.ts
26 | HAVING COUNT(DISTINCT s1.song) >= 3;
27 |
28 | -- prefiltering pipeline
29 | WITH active_user_id AS (
30 | SELECT
31 | u.user_id
32 | ,s.ts
33 | ,COUNT(*) AS song_tally
34 | FROM user AS u
35 | JOIN song AS s
36 | ON u.user_id = s.user_id
37 | GROUP BY user_id, ts
38 | HAVING COUNT(*) >= 3
39 | )
40 | ,active_friend_id AS (
41 | SELECT
42 | u.friend_id
43 | ,s.ts
44 | ,COUNT(*) AS song_tally
45 | FROM user AS u
46 | JOIN song AS s
47 | ON u.friend_id = s.user_id
48 | GROUP BY friend_id, ts
49 | HAVING COUNT(*) >= 3
50 | )
51 | ,possible_match AS (
52 | SELECT
53 | u.user_id
54 | ,f.friend_id
55 | ,f.ts
56 | FROM active_user_id AS u
57 | JOIN active_friend_id AS f
58 | ON u.ts = f.ts
59 | )
60 | SELECT DISTINCT
61 | p.user_id
62 | ,p.friend_id
63 | FROM possible_match AS p
64 | JOIN song AS s1
65 | ON p.ts = s1.ts
66 | AND p.user_id = s1.user_id
67 | JOIN song AS s2
68 | ON p.ts = s2.ts
69 | AND p.friend_id = s2.user_id
70 | WHERE s2.song = s1.song
71 | GROUP BY p.user_id, p.friend_id, s1.ts
72 | HAVING COUNT(DISTINCT s1.song) >= 3;
--------------------------------------------------------------------------------
/Interview/12_Invalid_Search/db.sql:
--------------------------------------------------------------------------------
1 | -- phpMyAdmin SQL Dump
2 | -- version 4.7.7
3 | -- https://www.phpmyadmin.net/
4 | --
5 | -- Host: localhost:8889
6 | -- Generation Time: Mar 18, 2019 at 09:23 AM
7 | -- Server version: 5.6.38
8 | -- PHP Version: 7.2.1
9 |
10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
11 | SET time_zone = "+00:00";
12 |
13 | --
14 | -- Database: `Search`
15 | --
16 | CREATE DATABASE IF NOT EXISTS `Search` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
17 | USE `Search`;
18 |
19 | -- --------------------------------------------------------
20 |
21 | --
22 | -- Table structure for table `SearchCategory`
23 | --
24 |
25 | DROP TABLE IF EXISTS `SearchCategory`;
26 | CREATE TABLE `SearchCategory` (
27 | `country` varchar(10) NOT NULL,
28 | `search_cat` varchar(10) NOT NULL,
29 | `num_search` int(10) DEFAULT NULL,
30 | `zero_result_pct` decimal(10,0) DEFAULT NULL
31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
32 |
33 | --
34 | -- Dumping data for table `SearchCategory`
35 | --
36 |
37 | INSERT INTO `SearchCategory` (`country`, `search_cat`, `num_search`, `zero_result_pct`) VALUES
38 | ('CN', 'dog', 9700000, NULL),
39 | ('CN', 'home', 1200000, '13'),
40 | ('CN', 'tax', 1200, '99'),
41 | ('CN', 'travel', 980000, '11'),
42 | ('UAE', 'home', NULL, NULL),
43 | ('UAE', 'travel', NULL, NULL),
44 | ('UK', 'home', NULL, NULL),
45 | ('UK', 'tax', 98000, '1'),
46 | ('UK', 'travel', 100000, '3'),
47 | ('US', 'home', 190000, '1'),
48 | ('US', 'tax', 12000, NULL),
49 | ('US', 'travel', 9500, '3');
50 |
51 | --
52 | -- Indexes for dumped tables
53 | --
54 |
55 | --
56 | -- Indexes for table `SearchCategory`
57 | --
58 | ALTER TABLE `SearchCategory`
59 | ADD PRIMARY KEY (`country`,`search_cat`),
60 | ADD KEY `id` (`country`,`search_cat`);
61 |
--------------------------------------------------------------------------------
/Interview/12_Invalid_Search/solution.sql:
--------------------------------------------------------------------------------
1 | -- MySQL
2 | SELECT
3 | country,
4 | num_search,
5 | ROUND(sum_zero_result / num_search, 2) AS zero_result_pct
6 | FROM (
7 | SELECT
8 | country
9 | ,SUM(CASE WHEN zero_result_pct IS NOT NULL THEN num_search ELSE NULL END) AS num_search
10 | ,SUM(num_search * zero_result_pct) AS sum_zero_result
11 | FROM SearchCategory
12 | GROUP BY country) AS a;
13 |
14 | -- MySQL8
15 | WITH tmp AS (
16 | SELECT
17 | country
18 | ,SUM(CASE WHEN zero_result_pct IS NOT NULL THEN num_search ELSE NULL END) AS num_search
19 | ,SUM(num_search * zero_result_pct) AS sum_zero_result
20 | FROM SearchCategory
21 | GROUP BY country
22 | )
23 | SELECT
24 | country,
25 | num_search,
26 | ROUND(sum_zero_result / num_search, 2) AS zero_result_pct
27 | FROM tmp;
--------------------------------------------------------------------------------
/Interview/13_Text_Confirmation/db.sql:
--------------------------------------------------------------------------------
1 | -- phpMyAdmin SQL Dump
2 | -- version 4.7.7
3 | -- https://www.phpmyadmin.net/
4 | --
5 | -- Host: localhost:8889
6 | -- Generation Time: Mar 18, 2019 at 10:53 AM
7 | -- Server version: 5.6.38
8 | -- PHP Version: 7.2.1
9 |
10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
11 | SET time_zone = "+00:00";
12 |
13 | --
14 | -- Database: `Email`
15 | --
16 | CREATE DATABASE IF NOT EXISTS `Email` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
17 | USE `Email`;
18 |
19 | -- --------------------------------------------------------
20 |
21 | --
22 | -- Table structure for table `Email`
23 | --
24 |
25 | DROP TABLE IF EXISTS `Email`;
26 | CREATE TABLE `Email` (
27 | `ts` datetime NOT NULL,
28 | `user_id` varchar(10) NOT NULL,
29 | `email` varchar(20) NOT NULL
30 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
31 |
32 | --
33 | -- Dumping data for table `Email`
34 | --
35 |
36 | INSERT INTO `Email` (`ts`, `user_id`, `email`) VALUES
37 | ('2019-03-13 00:00:00', 'neo', 'anderson@matrix.com'),
38 | ('2019-03-17 12:15:00', 'Ross', 'ross@126.com'),
39 | ('2019-03-18 05:37:00', 'ali', 'ali@hotmail.com'),
40 | ('2019-03-18 06:00:00', 'shaw', 'shawlu95@gmail.com');
41 |
42 | -- --------------------------------------------------------
43 |
44 | --
45 | -- Table structure for table `Text`
46 | --
47 |
48 | DROP TABLE IF EXISTS `Text`;
49 | CREATE TABLE `Text` (
50 | `id` int(11) NOT NULL,
51 | `ts` datetime NOT NULL,
52 | `user_id` varchar(10) NOT NULL,
53 | `action` enum('CONFIRMED') DEFAULT NULL
54 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
55 |
56 | --
57 | -- Dumping data for table `Text`
58 | --
59 |
60 | INSERT INTO `Text` (`id`, `ts`, `user_id`, `action`) VALUES
61 | (1, '2019-03-17 12:15:00', 'Ross', 'CONFIRMED'),
62 | (2, '2019-03-18 05:37:00', 'Ali', NULL),
63 | (3, '2019-03-18 14:00:00', 'Ali', 'CONFIRMED'),
64 | (4, '2019-03-18 06:00:00', 'shaw', NULL),
65 | (5, '2019-03-19 00:00:00', 'shaw', 'CONFIRMED');
66 |
67 | --
68 | -- Indexes for dumped tables
69 | --
70 |
71 | --
72 | -- Indexes for table `Email`
73 | --
74 | ALTER TABLE `Email`
75 | ADD PRIMARY KEY (`ts`);
76 |
77 | --
78 | -- Indexes for table `Text`
79 | --
80 | ALTER TABLE `Text`
81 | ADD PRIMARY KEY (`id`);
82 |
83 | --
84 | -- AUTO_INCREMENT for dumped tables
85 | --
86 |
87 | --
88 | -- AUTO_INCREMENT for table `Text`
89 | --
90 | ALTER TABLE `Text`
91 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6;
92 |
--------------------------------------------------------------------------------
/Interview/13_Text_Confirmation/solution.sql:
--------------------------------------------------------------------------------
1 | -- Q1
2 | SELECT
3 | CAST(ts AS DATE) AS dt
4 | ,COUNT(*) AS signups
5 | FROM Email
6 | GROUP BY dt;
7 |
8 | -- Q2: one line
9 | SELECT ROUND((SELECT SUM(action = 'CONFIRMED') FROM text) / (SELECT COUNT(*) FROM Email), 2) AS rate;
10 |
11 | -- Q2: Window
12 | WITH confirmed AS (
13 | SELECT user_id, action
14 | FROM text WHERE (user_id, ts) IN (SELECT user_id, MAX(ts) FROM text GROUP BY user_id)
15 | )
16 | SELECT
17 | ROUND(SUM(c.action IS NOT NULL) / COUNT(DISTINCT e.user_id), 2) AS rate
18 | FROM Email AS e
19 | LEFT JOIN confirmed AS c
20 | ON e.user_id = c.user_id;
21 |
22 | -- Q2: plain
23 | SELECT
24 | ROUND(SUM(c.action IS NOT NULL) / COUNT(DISTINCT e.user_id), 2) AS rate
25 | FROM Email AS e
26 | LEFT JOIN (
27 | SELECT user_id, action
28 | FROM text WHERE (user_id, ts) IN (
29 | SELECT user_id, MAX(ts) FROM text GROUP BY user_id
30 | )
31 | ) AS c
32 | ON e.user_id = c.user_id;
33 |
34 | -- Q3
35 | SELECT
36 | e.user_id
37 | FROM Email AS e
38 | JOIN Text AS t
39 | ON e.user_id = t.user_id
40 | AND DATEDIFF(t.ts, e.ts) = 1
41 | WHERE t.action = 'CONFIRMED';
42 |
43 | -- for reference
44 | SELECT
45 | user_id
46 | ,ts
47 | ,action
48 | ,LEAD(ts, 1) OVER (PARTITION BY user_id ORDER BY ts) AS next_ts
49 | ,LEAD(action, 1) OVER (PARTITION BY user_id ORDER BY ts) AS next_action
50 | ,TIMEDIFF(LEAD(ts, 1) OVER (PARTITION BY user_id ORDER BY ts), ts) AS time_diff
51 | FROM Text
52 | ON e.user_id = t.user_id
--------------------------------------------------------------------------------
/Interview/14_Facebook_Common_Friend/README.md:
--------------------------------------------------------------------------------
1 | # Common Friends
2 | > Given a friendship table, find for each user his friends with whom they have at least 3 friends in common. Rank friends by the number of common friend. The table record one-way relationship: sender of friend request and recipient of friend request.
3 |
4 | ___
5 | ### Load Data
6 | Load the database file [db.sql](db.sql) to localhost MySQL. A Facebook database will be created with one Friendship table.
7 | ```bash
8 | mysql < db.sql -uroot -p
9 | ```
10 |
11 | ```sql
12 | select * from friendship limit 5;
13 | ```
14 | ```
15 | +----+---------+-----------+
16 | | id | user_id | friend_id |
17 | +----+---------+-----------+
18 | | 1 | alice | bob |
19 | | 2 | alice | charles |
20 | | 3 | alice | david |
21 | | 4 | alice | mary |
22 | | 5 | bob | david |
23 | +----+---------+-----------+
24 | 5 rows in set (0.00 sec)
25 | ```
26 |
27 | ___
28 | ### Observation
29 | Here we are given directed edge. So the first step is to account for both direction (friendship is mutual). Next, for each user_id pair, join both parties with their common friends (excluding each other). Finally, group by the user_id pair and output pair with at least three matches.
30 |
31 | ___
32 | ### Step 1. Accounting for Undirected Edge
33 | We use a UNION all clause. Because there is no duplicate, we do not use UNION.
34 | ```sql
35 | SELECT user_id, friend_id FROM Friendship
36 | WHERE user_id = 'alice'
37 | UNION ALL
38 | SELECT friend_id, user_id FROM Friendship
39 | WHERE user_id = 'alice'
40 | LIMIT 10;
41 | ```
42 | ```
43 | +---------+-----------+
44 | | user_id | friend_id |
45 | +---------+-----------+
46 | | alice | bob |
47 | | alice | charles |
48 | | alice | david |
49 | | alice | mary |
50 | | bob | alice |
51 | | charles | alice |
52 | | david | alice |
53 | | mary | alice |
54 | +---------+-----------+
55 | 8 rows in set (0.00 sec)
56 | ```
57 |
58 | ___
59 | ### Step 2. Expand Two-way
60 | Join the union result twice, to find friends for each user_id. Filter the results to include common friend only. Note that it is __impossible__ to list one of the user himself as the common friend, because the *bf.friend_id = af.friend_id* will not be satisfied.
61 |
62 | ```sql
63 | WITH tmp AS (
64 | SELECT user_id, friend_id FROM Friendship
65 | UNION ALL
66 | SELECT friend_id, user_id FROM Friendship
67 | )
68 | SELECT
69 | ab.user_id AS a
70 | ,ab.friend_id AS b
71 | ,af.friend_id AS a_friend
72 | ,bf.friend_id AS b_friend
73 | FROM tmp AS ab
74 | JOIN tmp AS af
75 | ON ab.user_id = af.user_id
76 | JOIN tmp AS bf
77 | ON ab.friend_id = bf.user_id
78 | AND bf.friend_id = af.friend_id
79 | ORDER BY a, b, a_friend, b_friend;
80 | ```
81 | ```
82 | +---------+---------+----------+----------+
83 | | a | b | a_friend | b_friend |
84 | +---------+---------+----------+----------+
85 | | alice | bob | charles | charles |
86 | | alice | bob | david | david |
87 | | alice | bob | mary | mary |
88 | | alice | charles | bob | bob |
89 | | alice | david | bob | bob |
90 | | alice | mary | bob | bob |
91 | | bob | alice | charles | charles |
92 | | bob | alice | david | david |
93 | | bob | alice | mary | mary |
94 | | bob | charles | alice | alice |
95 | | bob | david | alice | alice |
96 | | bob | mary | alice | alice |
97 | | charles | alice | bob | bob |
98 | | charles | bob | alice | alice |
99 | | david | alice | bob | bob |
100 | | david | bob | alice | alice |
101 | | mary | alice | bob | bob |
102 | | mary | bob | alice | alice |
103 | +---------+---------+----------+----------+
104 | 18 rows in set (0.00 sec)
105 | ```
106 |
107 | ___
108 | ### Step 3. Aggregation
109 | Group by the user_id pair and count the number of common friends. Because we've counted both way, each eligible pair will have a counterpart with the opposite direction.
110 |
111 | ```sql
112 | WITH tmp AS (
113 | SELECT user_id, friend_id FROM Friendship
114 | UNION ALL
115 | SELECT friend_id, user_id FROM Friendship
116 | )
117 | SELECT
118 | ab.user_id
119 | ,ab.friend_id
120 | ,COUNT(*) AS common_friend
121 | FROM tmp AS ab
122 | JOIN tmp AS af
123 | ON ab.user_id = af.user_id
124 | JOIN tmp AS bf
125 | ON ab.friend_id = bf.user_id
126 | AND bf.friend_id = af.friend_id
127 | GROUP BY ab.user_id, ab.friend_id
128 | HAVING common_friend >= 3
129 | ORDER BY common_friend DESC;
130 | ```
131 | ```
132 | +---------+-----------+---------------+
133 | | user_id | friend_id | common_friend |
134 | +---------+-----------+---------------+
135 | | bob | alice | 3 |
136 | | alice | bob | 3 |
137 | +---------+-----------+---------------+
138 | 2 rows in set (0.00 sec)
139 | ```
140 |
141 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/14_Facebook_Common_Friend/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS Facebook;
2 | CREATE SCHEMA Facebook;
3 | USE Facebook;
4 |
5 | -- --------------------------------------------------------
6 |
7 | --
8 | -- Table structure for table `Friendship`
9 | --
10 |
11 | DROP TABLE IF EXISTS `Friendship`;
12 | CREATE TABLE `Friendship` (
13 | `id` int(11) NOT NULL,
14 | `user_id` varchar(10) DEFAULT NULL,
15 | `friend_id` varchar(10) DEFAULT NULL
16 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
17 |
18 | INSERT INTO `Friendship` (`id`, `user_id`, `friend_id`) VALUES
19 | (1, 'alice', 'bob'),
20 | (2, 'alice', 'charles'),
21 | (3, 'alice', 'david'),
22 | (4, 'alice', 'mary'),
23 | (5, 'bob', 'david'),
24 | (6, 'bob', 'charles'),
25 | (7, 'bob', 'mary'),
26 | (8, 'david', 'sonny'),
27 | (9, 'charles', 'sonny'),
28 | (10, 'bob', 'sonny');
29 |
30 | ALTER TABLE `Friendship`
31 | ADD PRIMARY KEY (`id`);
32 |
33 | ALTER TABLE `Friendship`
34 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11;
--------------------------------------------------------------------------------
/Interview/14_Facebook_Common_Friend/solution.sql:
--------------------------------------------------------------------------------
1 | WITH tmp AS (
2 | SELECT user_id, friend_id FROM Friendship
3 | UNION ALL
4 | SELECT friend_id, user_id FROM Friendship
5 | )
6 | SELECT
7 | ab.user_id
8 | ,ab.friend_id
9 | ,COUNT(*) AS common_friend
10 | FROM tmp AS ab
11 | JOIN tmp AS af
12 | ON ab.user_id = af.user_id
13 | JOIN tmp AS bf
14 | ON ab.friend_id = bf.user_id
15 | AND bf.friend_id = af.friend_id
16 | GROUP BY ab.user_id, ab.friend_id
17 | HAVING common_friend >= 3
18 | ORDER BY common_friend DESC;
--------------------------------------------------------------------------------
/Interview/15_Facebook_People_You_May_Know/README.md:
--------------------------------------------------------------------------------
1 | # People You May Know
2 | > Based on the number of common friends, recommend users to be friend.
3 |
4 | ___
5 | ### Observation
6 | This is a slight twist on the previous problem. Instead of selecting from existing friendships, we are proposing new ones. We'll use the same table as before.
7 |
8 | ___
9 | ### Step 1. Find Candidate Pairs
10 | This can be done with a cross join. Obviously we want to avoid matching a user with himself.
11 |
12 | ```sql
13 | WITH tmp AS (
14 | SELECT user_id, friend_id FROM Friendship
15 | UNION ALL
16 | SELECT friend_id, user_id FROM Friendship
17 | )
18 |
19 | SELECT * FROM
20 | (SELECT DISTINCT user_id FROM tmp) AS a
21 | CROSS JOIN
22 | (SELECT DISTINCT user_id FROM tmp) AS b
23 | ON a.user_id != b.user_id;
24 | ```
25 | ```
26 | +---------+---------+
27 | | user_id | user_id |
28 | +---------+---------+
29 | | bob | alice |
30 | | david | alice |
31 | | charles | alice |
32 | | mary | alice |
33 | | sonny | alice |
34 | | alice | bob |
35 | | david | bob |
36 | | charles | bob |
37 | | mary | bob |
38 | | sonny | bob |
39 | | alice | david |
40 | | bob | david |
41 | | charles | david |
42 | | mary | david |
43 | | sonny | david |
44 | | alice | charles |
45 | | bob | charles |
46 | | david | charles |
47 | | mary | charles |
48 | | sonny | charles |
49 | | alice | mary |
50 | | bob | mary |
51 | | david | mary |
52 | | charles | mary |
53 | | sonny | mary |
54 | | alice | sonny |
55 | | bob | sonny |
56 | | david | sonny |
57 | | charles | sonny |
58 | | mary | sonny |
59 | +---------+---------+
60 | 30 rows in set (0.00 sec)
61 | ```
62 |
63 | Also, we don't want to recommend people who are already friends.
64 |
65 | ```sql
66 | WITH tmp AS (
67 | SELECT user_id, friend_id FROM Friendship
68 | UNION ALL
69 | SELECT friend_id, user_id FROM Friendship
70 | )
71 |
72 | SELECT * FROM
73 | (SELECT DISTINCT user_id FROM tmp) AS a
74 | CROSS JOIN
75 | (SELECT DISTINCT user_id FROM tmp) AS b
76 | ON a.user_id != b.user_id
77 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp);
78 | ```
79 | ```
80 | +---------+---------+
81 | | user_id | user_id |
82 | +---------+---------+
83 | | sonny | alice |
84 | | charles | david |
85 | | mary | david |
86 | | david | charles |
87 | | mary | charles |
88 | | david | mary |
89 | | charles | mary |
90 | | sonny | mary |
91 | | alice | sonny |
92 | | mary | sonny |
93 | +---------+---------+
94 | 10 rows in set (0.00 sec)
95 | ```
96 |
97 | ___
98 | ### Step 2. Expand Two-way
99 | The rest is the same as the previous problem.
100 | ```sql
101 | WITH tmp AS (
102 | SELECT user_id, friend_id FROM Friendship
103 | UNION ALL
104 | SELECT friend_id, user_id FROM Friendship
105 | )
106 |
107 | SELECT
108 | a.user_id
109 | ,b.user_id
110 | ,af.friend_id AS common_friend
111 | FROM
112 | (SELECT DISTINCT user_id FROM tmp) AS a
113 | CROSS JOIN
114 | (SELECT DISTINCT user_id FROM tmp) AS b
115 | ON a.user_id != b.user_id
116 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp)
117 | JOIN tmp AS af
118 | ON a.user_id = af.user_id
119 | JOIN tmp AS bf
120 | ON b.user_id = bf.user_id
121 | AND bf.friend_id = af.friend_id;
122 | ```
123 | ```
124 | +---------+---------+---------------+
125 | | user_id | user_id | common_friend |
126 | +---------+---------+---------------+
127 | | alice | sonny | bob |
128 | | alice | sonny | charles |
129 | | alice | sonny | david |
130 | | david | charles | sonny |
131 | | charles | david | sonny |
132 | | charles | david | alice |
133 | | charles | mary | alice |
134 | | david | charles | alice |
135 | | david | mary | alice |
136 | | mary | charles | alice |
137 | | mary | david | alice |
138 | | david | charles | bob |
139 | | david | mary | bob |
140 | | charles | david | bob |
141 | | charles | mary | bob |
142 | | mary | david | bob |
143 | | mary | charles | bob |
144 | | mary | sonny | bob |
145 | | sonny | alice | david |
146 | | sonny | alice | charles |
147 | | sonny | alice | bob |
148 | | sonny | mary | bob |
149 | +---------+---------+---------------+
150 | 22 rows in set (0.01 sec)
151 | ```
152 |
153 | ___
154 | ### Step 3. Aggregation
155 | Group by the user_id pair and count the number of common friends.
156 |
157 | ```sql
158 | WITH tmp AS (
159 | SELECT user_id, friend_id FROM Friendship
160 | UNION ALL
161 | SELECT friend_id, user_id FROM Friendship
162 | )
163 |
164 | SELECT
165 | a.user_id
166 | ,b.user_id
167 | ,COUNT(*) AS common_friend
168 | FROM
169 | (SELECT DISTINCT user_id FROM tmp) AS a
170 | CROSS JOIN
171 | (SELECT DISTINCT user_id FROM tmp) AS b
172 | ON a.user_id != b.user_id
173 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp)
174 | JOIN tmp AS af
175 | ON a.user_id = af.user_id
176 | JOIN tmp AS bf
177 | ON b.user_id = bf.user_id
178 | AND bf.friend_id = af.friend_id
179 | GROUP BY a.user_id, b.user_id
180 | HAVING COUNT(*) >= 3
181 | ORDER BY common_friend DESC;
182 | ```
183 | ```
184 | +---------+---------+---------------+
185 | | user_id | user_id | common_friend |
186 | +---------+---------+---------------+
187 | | alice | sonny | 3 |
188 | | david | charles | 3 |
189 | | charles | david | 3 |
190 | | sonny | alice | 3 |
191 | +---------+---------+---------------+
192 | 4 rows in set (0.00 sec)
193 | ```
194 |
195 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/15_Facebook_People_You_May_Know/solution.sql:
--------------------------------------------------------------------------------
1 | WITH tmp AS (
2 | SELECT user_id, friend_id FROM Friendship
3 | UNION ALL
4 | SELECT friend_id, user_id FROM Friendship
5 | )
6 |
7 | SELECT
8 | a.user_id
9 | ,b.user_id
10 | ,COUNT(*) AS common_friend
11 | FROM
12 | (SELECT DISTINCT user_id FROM tmp) AS a
13 | CROSS JOIN
14 | (SELECT DISTINCT user_id FROM tmp) AS b
15 | ON a.user_id != b.user_id
16 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp)
17 | JOIN tmp AS af
18 | ON a.user_id = af.user_id
19 | JOIN tmp AS bf
20 | ON b.user_id = bf.user_id
21 | AND bf.friend_id = af.friend_id
22 | GROUP BY a.user_id, b.user_id
23 | HAVING COUNT(*) >= 3
24 | ORDER BY common_friend DESC;
--------------------------------------------------------------------------------
/Interview/16_Instagram_Common_Follower/README.md:
--------------------------------------------------------------------------------
1 | # Instagram Common Follower
2 | > Given a *Follow* table, find the pair of instagram accounts which share highest number of common followers.
3 |
4 | ___
5 | ### Load Data
6 | Load the database file [db.sql](db.sql) to localhost MySQL. A Instagram database will be created with one Follow table.
7 | ```bash
8 | mysql < db.sql -uroot -p
9 | ```
10 |
11 | ___
12 | ### Observation
13 | The major realization is that following is a one-way relationship, unlike friendship which must be two-way. The rest is similar to the earlier problem.
14 |
15 | ___
16 | ### Step 1. Build Candidate Pairs
17 | Find all possible account pairs using cross join. Avoid matching an account to itself.
18 | ```sql
19 | SELECT *
20 | FROM
21 | (SELECT DISTINCT user_id FROM Follow) AS a
22 | CROSS JOIN
23 | (SELECT DISTINCT user_id FROM Follow) AS b
24 | ON a.user_id != b.user_id
25 | ```
26 |
27 | ```
28 | +---------+---------+
29 | | user_id | user_id |
30 | +---------+---------+
31 | | musk | bezos |
32 | | ronaldo | bezos |
33 | | trump | bezos |
34 | | bezos | musk |
35 | | ronaldo | musk |
36 | | trump | musk |
37 | | bezos | ronaldo |
38 | | musk | ronaldo |
39 | | trump | ronaldo |
40 | | bezos | trump |
41 | | musk | trump |
42 | | ronaldo | trump |
43 | +---------+---------+
44 | 12 rows in set (0.00 sec)
45 | ```
46 |
47 | ___
48 | ### Step 2. Find Common Followers
49 | ```sql
50 | SELECT *
51 | FROM
52 | (SELECT DISTINCT user_id FROM Follow) AS a
53 | CROSS JOIN
54 | (SELECT DISTINCT user_id FROM Follow) AS b
55 | ON a.user_id != b.user_id
56 | JOIN Follow AS af
57 | ON a.user_id = af.user_id
58 | JOIN Follow AS bf
59 | ON b.user_id = bf.user_id
60 | AND af.follower_id = bf.follower_id;
61 | ```
62 |
63 | ```
64 | +---------+---------+---------+-------------+---------+-------------+
65 | | user_id | user_id | user_id | follower_id | user_id | follower_id |
66 | +---------+---------+---------+-------------+---------+-------------+
67 | | musk | bezos | musk | david | bezos | david |
68 | | ronaldo | bezos | ronaldo | david | bezos | david |
69 | | ronaldo | bezos | ronaldo | james | bezos | james |
70 | | ronaldo | bezos | ronaldo | mary | bezos | mary |
71 | | bezos | musk | bezos | david | musk | david |
72 | | ronaldo | musk | ronaldo | david | musk | david |
73 | | bezos | ronaldo | bezos | david | ronaldo | david |
74 | | musk | ronaldo | musk | david | ronaldo | david |
75 | | bezos | ronaldo | bezos | james | ronaldo | james |
76 | | bezos | ronaldo | bezos | mary | ronaldo | mary |
77 | +---------+---------+---------+-------------+---------+-------------+
78 | 10 rows in set (0.00 sec)
79 | ```
80 |
81 | ___
82 | ### Step 3. Aggregate Count
83 | ```sql
84 | SELECT
85 | a.user_id
86 | ,b.user_id
87 | ,COUNT(*) AS common
88 | FROM
89 | (SELECT DISTINCT user_id FROM Follow) AS a
90 | CROSS JOIN
91 | (SELECT DISTINCT user_id FROM Follow) AS b
92 | ON a.user_id != b.user_id
93 | JOIN Follow AS af
94 | ON a.user_id = af.user_id
95 | JOIN Follow AS bf
96 | ON b.user_id = bf.user_id
97 | AND af.follower_id = bf.follower_id
98 | GROUP BY a.user_id, b.user_id
99 | ORDER BY common DESC, a.user_id, b.user_id;
100 | ```
101 |
102 | ```
103 | +---------+---------+--------+
104 | | user_id | user_id | common |
105 | +---------+---------+--------+
106 | | bezos | ronaldo | 3 |
107 | | ronaldo | bezos | 3 |
108 | | bezos | musk | 1 |
109 | | musk | bezos | 1 |
110 | | musk | ronaldo | 1 |
111 | | ronaldo | musk | 1 |
112 | +---------+---------+--------+
113 | 6 rows in set (0.00 sec)
114 | ```
115 |
116 | ___
117 | ### Parting Thought
118 | We do we stil get candidate pairs in both direction? Because the undirected edge only affects __step 2__ when we join both parties of a candidate pair with their respective followers!
119 |
120 | See solution [here](solution.sql).
--------------------------------------------------------------------------------
/Interview/16_Instagram_Common_Follower/db.sql:
--------------------------------------------------------------------------------
1 | -- phpMyAdmin SQL Dump
2 | -- version 4.7.7
3 | -- https://www.phpmyadmin.net/
4 | --
5 | -- Host: localhost:8889
6 | -- Generation Time: Mar 23, 2019 at 11:42 PM
7 | -- Server version: 5.6.38
8 | -- PHP Version: 7.2.1
9 |
10 | DROP SCHEMA IF EXISTS Instagram;
11 | CREATE SCHEMA Instagram;
12 | USE Instagram;
13 |
14 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
15 | SET time_zone = "+00:00";
16 |
17 | --
18 | -- Database: `practice`
19 | --
20 |
21 | -- --------------------------------------------------------
22 |
23 | --
24 | -- Table structure for table `Follow`
25 | --
26 |
27 | DROP TABLE IF EXISTS `Follow`;
28 | CREATE TABLE `Follow` (
29 | `user_id` varchar(10) NOT NULL,
30 | `follower_id` varchar(10) NOT NULL
31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
32 |
33 | --
34 | -- Dumping data for table `Follow`
35 | --
36 |
37 | INSERT INTO `Follow` (`user_id`, `follower_id`) VALUES
38 | ('bezos', 'david'),
39 | ('bezos', 'james'),
40 | ('bezos', 'join'),
41 | ('bezos', 'mary'),
42 | ('musk', 'david'),
43 | ('musk', 'john'),
44 | ('ronaldo', 'david'),
45 | ('ronaldo', 'james'),
46 | ('ronaldo', 'linda'),
47 | ('ronaldo', 'mary'),
48 | ('trump', 'melinda');
49 |
50 | --
51 | -- Indexes for dumped tables
52 | --
53 |
54 | --
55 | -- Indexes for table `Follow`
56 | --
57 | ALTER TABLE `Follow`
58 | ADD PRIMARY KEY (`user_id`,`follower_id`);
59 |
--------------------------------------------------------------------------------
/Interview/16_Instagram_Common_Follower/solution.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.user_id
3 | ,b.user_id
4 | ,COUNT(*) AS common
5 | FROM
6 | (SELECT DISTINCT user_id FROM Follow) AS a
7 | CROSS JOIN
8 | (SELECT DISTINCT user_id FROM Follow) AS b
9 | ON a.user_id != b.user_id
10 | JOIN Follow AS af
11 | ON a.user_id = af.user_id
12 | JOIN Follow AS bf
13 | ON b.user_id = bf.user_id
14 | AND af.follower_id = bf.follower_id
15 | GROUP BY a.user_id, b.user_id
16 | ORDER BY common DESC, a.user_id, b.user_id;
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Shaw Lu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/LeetCode/185_Department_Top_Three_Salaries/README.md:
--------------------------------------------------------------------------------
1 | # Department Top Three Salaries
2 |
3 | ## Description
4 | The Employee table holds all employees. Every employee has an Id, and there is also a column for the department Id.
5 | ```
6 | +----+-------+--------+--------------+
7 | | Id | Name | Salary | DepartmentId |
8 | +----+-------+--------+--------------+
9 | | 1 | Joe | 70000 | 1 |
10 | | 2 | Henry | 80000 | 2 |
11 | | 3 | Sam | 60000 | 2 |
12 | | 4 | Max | 90000 | 1 |
13 | | 5 | Janet | 69000 | 1 |
14 | | 6 | Randy | 85000 | 1 |
15 | +----+-------+--------+--------------+
16 | ```
17 | The Department table holds all departments of the company.
18 | ```
19 | +----+----------+
20 | | Id | Name |
21 | +----+----------+
22 | | 1 | IT |
23 | | 2 | Sales |
24 | +----+----------+
25 | ```
26 | Write a SQL query to find employees who earn the top three salaries in each of the department. For the above tables, your SQL query should return the following rows.
27 | ```
28 | +------------+----------+--------+
29 | | Department | Employee | Salary |
30 | +------------+----------+--------+
31 | | IT | Max | 90000 |
32 | | IT | Randy | 85000 |
33 | | IT | Joe | 70000 |
34 | | Sales | Henry | 80000 |
35 | | Sales | Sam | 60000 |
36 | +------------+----------+--------+
37 | ```
38 |
39 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database.
40 | ```
41 | mysql < db.sql -uroot -p
42 | ```
43 |
44 | ---
45 | ## Observation
46 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary.
47 | * Are salary distinct for all employee? If not, must use __DISTINCT__ keyword.
48 | * How to display if department has fewer than 3 distinct salaries?
49 | * Every employment belongs to a department? No employee has __NULL__ in *DepartmentId*.
50 |
51 | ---
52 | ## On Correctness
53 | What does top-3 paid employees in each department have in common?
54 | * They have the same DepartmentId.
55 | * They have fewer than 3 persons who get paid higher salary (can use either < 3 or <= 2).
56 | * Department No. 1 has 0 above him.
57 | * Department No. 2 has 1 above him.
58 | * Department No. 3 has 2 above him.
59 | The conditions are set-up for correlated subquery. In subquery, we can use an equijoin (*DepartmentId*) and non-equijoin (*Salary*) to filter the outer query.
60 |
61 | Basic [MySQL solution](mysql_correlated_subquery.sql) implementing the equijoin and non-equijoin logic above.
62 | ```sql
63 | SELECT
64 | d.Name AS 'Department'
65 | ,e.Name AS 'Employee'
66 | ,e.Salary
67 | FROM Employee e
68 | JOIN Department d
69 | ON e.DepartmentId = d.Id
70 | WHERE
71 | (SELECT COUNT(DISTINCT e2.Salary)
72 | FROM
73 | Employee e2
74 | WHERE
75 | e2.Salary > e.Salary
76 | AND e.DepartmentId = e2.DepartmentId
77 | ) < 3;
78 | ```
79 |
80 | ---
81 | ## On Efficiency
82 | The subquery solution above enforces __nested select__, resulting in __N+1__ select statements and bad runtime efficiency. If we have access to window function ([MS SQL solution](mssql_window.sql), we can simply rank salary over each department as partition, and pick the top 3. Instead of using __DISTINCT__, the window solution uses __DENSE_RANK()__. Note that we cannot refer to window column *rnk* in the WHERE clause. So we must set up a temporary table.
83 |
84 | ```sql
85 | -- MS SQL: window function version
86 | WITH department_ranking AS (
87 | SELECT
88 | e.Name AS Employee
89 | ,d.Name AS Department
90 | ,e.Salary
91 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk
92 | FROM Employee AS e
93 | JOIN Department AS d
94 | ON e.DepartmentId = d.Id
95 | )
96 | SELECT
97 | Department
98 | ,Employee
99 | ,Salary
100 | FROM department_ranking
101 | WHERE rnk <= 3
102 | ORDER BY Department ASC, Salary DESC;
103 | ```
104 |
105 | We can further improve efficiency by [filtering](mssql_pre_filter.sql) the ranking before joining with the department table. Instead of joining every employee with his department, we now only join the department top-3 employees with their departments. This is accomplished with an additional temporary table.
106 |
107 | ```sql
108 | -- MS SQL: Boosting effiency with pre-filtering
109 | WITH department_ranking AS (
110 | SELECT
111 | e.Name AS Employee
112 | ,e.Salary
113 | ,e.DepartmentId
114 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk
115 | FROM Employee AS e
116 | )
117 | -- pre-filter table to reduce join size
118 | ,top_three AS (
119 | SELECT
120 | Employee
121 | ,Salary
122 | ,DepartmentId
123 | FROM department_ranking
124 | WHERE rnk <= 3
125 | )
126 | SELECT
127 | d.Name AS Department
128 | ,e.Employee
129 | ,e.Salary
130 | FROM top_three AS e
131 | JOIN Department AS d
132 | ON e.DepartmentId = d.Id
133 | ORDER BY d.Name ASC, e.Salary DESC;
134 | ```
135 |
136 | We can get rid of the second temporary table by moving the predicate condition *rnk <=3* inside JOIN. The logic is the same as the code above: tables are filtered before joining. We want to reduce the table size before join.
137 |
138 | ```sql
139 | -- MS SQL: cleaner version
140 | WITH department_ranking AS (
141 | SELECT
142 | e.Name AS Employee
143 | ,e.Salary
144 | ,e.DepartmentId
145 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk
146 | FROM Employee AS e
147 | )
148 | SELECT
149 | d.Name AS Department
150 | ,r.Employee
151 | ,r.Salary
152 | FROM department_ranking AS r
153 | JOIN Department AS d
154 | ON r.DepartmentId = d.Id
155 | AND r.rnk <= 3
156 | ORDER BY d.Name ASC, r.Salary DESC;
157 | ```
158 |
159 | ---
160 | ## Parting Thought
161 | Because temporary table has no index. The second solution works better only when the pre-filtering results in significant reduction of table size. In this case, fortunately, we are taking only 3 employees out of every departments, which may have hundreds of employees each (huge reduction in join size). For each employee, we have access to *DepartmentId*, which is a foreign key referring to a primary key. The joining operation is reduced to three index lookup for each department, and index lookup is efficient! So the last solution (the longest) is the most efficient one.
162 |
--------------------------------------------------------------------------------
/LeetCode/185_Department_Top_Three_Salaries/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists Employee (Id int, Name varchar(255), Salary int, DepartmentId int);
6 | Create table If Not Exists Department (Id int, Name varchar(255));
7 | Truncate table Employee;
8 | insert into Employee (Id, Name, Salary, DepartmentId) values ('1', 'Joe', '70000', '1');
9 | insert into Employee (Id, Name, Salary, DepartmentId) values ('2', 'Henry', '80000', '2');
10 | insert into Employee (Id, Name, Salary, DepartmentId) values ('3', 'Sam', '60000', '2');
11 | insert into Employee (Id, Name, Salary, DepartmentId) values ('4', 'Max', '90000', '1');
12 | insert into Employee (Id, Name, Salary, DepartmentId) values ('5', 'Janet', '69000', '1');
13 | insert into Employee (Id, Name, Salary, DepartmentId) values ('6', 'Randy', '85000', '1');
14 | Truncate table Department;
15 | insert into Department (Id, Name) values ('1', 'IT');
16 | insert into Department (Id, Name) values ('2', 'Sales');
--------------------------------------------------------------------------------
/LeetCode/185_Department_Top_Three_Salaries/mssql_pre_filter.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL: Boosting effiency with pre-filtering
2 | WITH department_ranking AS (
3 | SELECT
4 | e.Name AS Employee
5 | ,e.Salary
6 | ,e.DepartmentId
7 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk
8 | FROM Employee AS e
9 | )
10 | -- pre-filter table to reduce join size
11 | ,top_three AS (
12 | SELECT
13 | Employee
14 | ,Salary
15 | ,DepartmentId
16 | FROM department_ranking
17 | WHERE rnk <= 3
18 | )
19 | SELECT
20 | d.Name AS Department
21 | ,e.Employee
22 | ,e.Salary
23 | FROM top_three AS e
24 | JOIN Department AS d
25 | ON e.DepartmentId = d.Id
26 | ORDER BY d.Name ASC, e.Salary DESC;
27 |
28 | -- MS SQL: cleaner version
29 | WITH department_ranking AS (
30 | SELECT
31 | e.Name AS Employee
32 | ,e.Salary
33 | ,e.DepartmentId
34 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk
35 | FROM Employee AS e
36 | )
37 | SELECT
38 | d.Name AS Department
39 | ,r.Employee
40 | ,r.Salary
41 | FROM department_ranking AS r
42 | JOIN Department AS d
43 | ON r.DepartmentId = d.Id
44 | AND r.rnk <= 3
45 | ORDER BY d.Name ASC, r.Salary DESC;
--------------------------------------------------------------------------------
/LeetCode/185_Department_Top_Three_Salaries/mssql_window.sql:
--------------------------------------------------------------------------------
1 | -- window function version
2 | WITH department_ranking AS(
3 | SELECT
4 | e.Name AS Employee
5 | ,d.Name AS Department
6 | ,e.Salary
7 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk
8 | FROM Employee AS e
9 | JOIN Department AS d
10 | ON e.DepartmentId = d.Id
11 | )
12 | SELECT
13 | Department
14 | ,Employee
15 | ,Salary
16 | FROM department_ranking
17 | WHERE rnk <= 3
18 | ORDER BY Department ASC, Salary DESC;
--------------------------------------------------------------------------------
/LeetCode/185_Department_Top_Three_Salaries/mysql_correlated_subquery.sql:
--------------------------------------------------------------------------------
1 | # Use correlated subquery, and aggregate over subquery
2 | SELECT
3 | d.Name AS 'Department'
4 | ,e.Name AS 'Employee'
5 | ,e.Salary
6 | FROM Employee e
7 | JOIN Department d
8 | ON e.DepartmentId = d.Id
9 | WHERE
10 | (SELECT COUNT(DISTINCT e2.Salary)
11 | FROM
12 | Employee e2
13 | WHERE
14 | e2.Salary > e.Salary
15 | AND e.DepartmentId = e2.DepartmentId
16 | ) < 3;
17 |
--------------------------------------------------------------------------------
/LeetCode/262_Trips_and_Users/README.md:
--------------------------------------------------------------------------------
1 | # Trips and Users
2 |
3 | ## Description
4 | The Trips table holds all taxi trips. Each trip has a unique Id, while Client_Id and Driver_Id are both foreign keys to the Users_Id at the Users table. Status is an ENUM type of (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’).
5 |
6 | ```
7 | +----+-----------+-----------+---------+--------------------+----------+
8 | | Id | Client_Id | Driver_Id | City_Id | Status |Request_at|
9 | +----+-----------+-----------+---------+--------------------+----------+
10 | | 1 | 1 | 10 | 1 | completed |2013-10-01|
11 | | 2 | 2 | 11 | 1 | cancelled_by_driver|2013-10-01|
12 | | 3 | 3 | 12 | 6 | completed |2013-10-01|
13 | | 4 | 4 | 13 | 6 | cancelled_by_client|2013-10-01|
14 | | 5 | 1 | 10 | 1 | completed |2013-10-02|
15 | | 6 | 2 | 11 | 6 | completed |2013-10-02|
16 | | 7 | 3 | 12 | 6 | completed |2013-10-02|
17 | | 8 | 2 | 12 | 12 | completed |2013-10-03|
18 | | 9 | 3 | 10 | 12 | completed |2013-10-03|
19 | | 10 | 4 | 13 | 12 | cancelled_by_driver|2013-10-03|
20 | +----+-----------+-----------+---------+--------------------+----------+
21 | ```
22 |
23 | The Users table holds all users. Each user has an unique Users_Id, and Role is an ENUM type of (‘client’, ‘driver’, ‘partner’).
24 |
25 | ```
26 | +----------+--------+--------+
27 | | Users_Id | Banned | Role |
28 | +----------+--------+--------+
29 | | 1 | No | client |
30 | | 2 | Yes | client |
31 | | 3 | No | client |
32 | | 4 | No | client |
33 | | 10 | No | driver |
34 | | 11 | No | driver |
35 | | 12 | No | driver |
36 | | 13 | No | driver |
37 | +----------+--------+--------+
38 | ```
39 | Write a SQL query to find the cancellation rate of requests made by unbanned users between Oct 1, 2013 and Oct 3, 2013. For the above tables, your SQL query should return the following rows with the cancellation rate being rounded to two decimal places.
40 | ```
41 | +------------+-------------------+
42 | | Day | Cancellation Rate |
43 | +------------+-------------------+
44 | | 2013-10-01 | 0.33 |
45 | | 2013-10-02 | 0.00 |
46 | | 2013-10-03 | 0.50 |
47 | +------------+-------------------+
48 | ```
49 |
50 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database.
51 | ```
52 | mysql < db.sql -uroot -p
53 | ```
54 |
55 | ---
56 | ## Observation
57 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary.
58 | * Each trip has two foreign key (*Client_Id*, *Driver_Id*) referring the *Users* table's primary key.
59 | * No trip can have __NULL__ in *Driver_id* or *Client_Id*.
60 | * Because of the above assumptions, the three-way driver-trip-rider join results in same number of rows as the *Trips* table, before applying WHERE clause predicate.
61 |
62 | ---
63 | ## On Correctness
64 | * Need to exclude both banned drivers and riders from calculation.
65 | * Output rate to 2 decimal place as required.
66 | * Give descriptive names to output columns as required.
67 | * Constrain date range as required.
68 |
69 | Basic [solution](mysql_simple.sql) that gives correct output:
70 | ```sql
71 | -- MySQL: simple version
72 | SELECT
73 | t.Request_at AS Day
74 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
75 | FROM Trips t
76 | JOIN Users d ON t.Driver_Id = d.Users_Id
77 | JOIN Users c ON t.Client_Id = c.Users_Id
78 | WHERE d.Banned = "No"
79 | AND c.Banned = "No"
80 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
81 | GROUP BY t.Request_at
82 | ORDER BY t.Request_at;
83 | ```
84 |
85 | ---
86 | ## On Efficiency
87 | SQL performs JOIN operation before applying WHERE clause. If many users are banned, JOIN operation results in lots of invalid trips in which either rider or driver is banned. We may [pre-filter](mysql_pre_filter.sql) the *Users* table and store the results in a temporary table (note that temporary table does not work in LeetCode).
88 |
89 | Similarly, joining the full *Trips* table can be wasteful. It may contain years of data, and we're interested in only 3 day's data. We can pre-filter the *Trips* table before joining it.
90 |
91 | Finally, we inner join the pre-filtered *valid_trips* table to *valid_user* table twice. INNER JOIN filters out trips that have no match in *valid_users*, meaning that the driver or rider is banned.
92 |
93 | ```sql
94 | -- MySQL: pre-filtering before join
95 | -- WARNING: LeetCode MySQL does not allow temporary table
96 | WITH valid_user AS (
97 | SELECT Users_Id
98 | FROM Users
99 | WHERE Banned = "No"
100 | )
101 | ,valid_trips AS (
102 | SELECT *
103 | FROM Trips
104 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
105 | )
106 | SELECT
107 | t.Request_at AS Day
108 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
109 | FROM valid_trips t
110 | JOIN valid_user d ON t.Driver_Id = d.Users_Id
111 | JOIN valid_user c ON t.Client_Id = c.Users_Id
112 | GROUP BY t.Request_at;
113 | ```
114 |
115 | Instead of building two temporary tables, we can pre-filter by moving the predicate condition inside JOIN. The logic is the same as the code above: tables are filtered before joining. We want to reduce the table size before join. In the SQL code below, trips and drivers are filtered before being joined, and the clients are filtered before joining with the output of the first join.
116 |
117 | ```sql
118 | -- MySQL cleaner code
119 | SELECT
120 | t.Request_at AS Day
121 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
122 | FROM Trips t
123 | JOIN Users d
124 | ON t.Driver_Id = d.Users_Id
125 | AND d.Banned = 'No'
126 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
127 | JOIN Users c
128 | ON t.Client_Id = c.Users_Id
129 | And c.Banned = 'No'
130 | GROUP BY t.Request_at;
131 | ```
132 |
133 | Alternatively, use a [set](mysql_set.sql) to retain all valid *User_Id*, and directly filter the *Trip* table without joining. The disadvantage is that because most database engine converts IN clause to series of OR operator, the query needs to be re-evaluated every time a new user gets banned, because the number of OR operator is constantly changing.
134 |
135 | When using multi-column predicate, applying more restrictive condition first. For example, filter *Request_at* before filtering *Users_Id*. Because table size gets cut drastically upfront, computation required for later predicate decreases.
136 |
137 | Using IN (... Banned = "No") clause is more efficient than using NOT IN (...Banned = "Yes"). To check an element is not in a set, a full scan of the set is required.
138 |
139 | ```sql
140 | -- MySQL: set version
141 | SELECT
142 | Request_at AS Day
143 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
144 | FROM Trips
145 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
146 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
147 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
148 | GROUP BY Request_at;
149 | ```
150 |
151 | ---
152 | ## Parting Thought
153 | Because temporary table has no index. The second solution works better only when the pre-filtering results in significant reduction of table size. Otherwise, joining temporary tables without index can be slower than joining the full tables. In practice, look up the query plan and estimated cost before running the query.
154 |
--------------------------------------------------------------------------------
/LeetCode/262_Trips_and_Users/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists Trips (Id int, Client_Id int, Driver_Id int, City_Id int, Status ENUM('completed', 'cancelled_by_driver', 'cancelled_by_client'), Request_at varchar(50));
6 | Create table If Not Exists Users (Users_Id int, Banned varchar(50), Role ENUM('client', 'driver', 'partner'));
7 | Truncate table Trips;
8 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('1', '1', '10', '1', 'completed', '2013-10-01');
9 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('2', '2', '11', '1', 'cancelled_by_driver', '2013-10-01');
10 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('3', '3', '12', '6', 'completed', '2013-10-01');
11 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('4', '4', '13', '6', 'cancelled_by_client', '2013-10-01');
12 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('5', '1', '10', '1', 'completed', '2013-10-02');
13 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('6', '2', '11', '6', 'completed', '2013-10-02');
14 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('7', '3', '12', '6', 'completed', '2013-10-02');
15 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('8', '2', '12', '12', 'completed', '2013-10-03');
16 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('9', '3', '10', '12', 'completed', '2013-10-03');
17 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('10', '4', '13', '12', 'cancelled_by_driver', '2013-10-03');
18 | Truncate table Users;
19 | insert into Users (Users_Id, Banned, Role) values ('1', 'No', 'client');
20 | insert into Users (Users_Id, Banned, Role) values ('2', 'Yes', 'client');
21 | insert into Users (Users_Id, Banned, Role) values ('3', 'No', 'client');
22 | insert into Users (Users_Id, Banned, Role) values ('4', 'No', 'client');
23 | insert into Users (Users_Id, Banned, Role) values ('10', 'No', 'driver');
24 | insert into Users (Users_Id, Banned, Role) values ('11', 'No', 'driver');
25 | insert into Users (Users_Id, Banned, Role) values ('12', 'No', 'driver');
26 | insert into Users (Users_Id, Banned, Role) values ('13', 'No', 'driver');
--------------------------------------------------------------------------------
/LeetCode/262_Trips_and_Users/mysql_pre_filter.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: pre-filtering before join
2 | -- WARNING: LeetCode MySQL does not allow temporary table
3 | WITH valid_user AS (
4 | SELECT Users_Id
5 | FROM Users
6 | WHERE Banned = "No"
7 | )
8 | ,valid_trips AS (
9 | SELECT *
10 | FROM Trips
11 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
12 | )
13 | SELECT
14 | t.Request_at AS Day
15 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
16 | FROM valid_trips t
17 | JOIN valid_user d ON t.Driver_Id = d.Users_Id
18 | JOIN valid_user c ON t.Client_Id = c.Users_Id
19 | GROUP BY t.Request_at;
20 |
21 | -- MySQL cleaner code
22 | -- MySQL cleaner code
23 | SELECT
24 | t.Request_at AS Day
25 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
26 | FROM Trips t
27 | JOIN Users d
28 | ON t.Driver_Id = d.Users_Id
29 | AND d.Banned = 'No'
30 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
31 | JOIN Users c
32 | ON t.Client_Id = c.Users_Id
33 | And c.Banned = 'No'
34 | GROUP BY t.Request_at;
--------------------------------------------------------------------------------
/LeetCode/262_Trips_and_Users/mysql_set.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: set version
2 | SELECT
3 | Request_at AS Day
4 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
5 | FROM Trips
6 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
7 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
8 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
9 | GROUP BY Request_at;
--------------------------------------------------------------------------------
/LeetCode/262_Trips_and_Users/mysql_simple.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: simple join version
2 | SELECT
3 | t.Request_at AS Day
4 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
5 | FROM Trips t
6 | JOIN Users d ON t.Driver_Id = d.Users_Id
7 | JOIN Users c ON t.Client_Id = c.Users_Id
8 | WHERE d.Banned = "No"
9 | AND c.Banned = "No"
10 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
11 | GROUP BY t.Request_at
12 | ORDER BY t.Request_at;
13 |
--------------------------------------------------------------------------------
/LeetCode/579_Find_Cumulative_Salary_of_an_Employee/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists Employee (Id int, Month int, Salary int);
6 |
7 | insert into Employee (Id, Month, Salary) values ('1', '1', '20');
8 | insert into Employee (Id, Month, Salary) values ('2', '1', '20');
9 | insert into Employee (Id, Month, Salary) values ('1', '2', '30');
10 | insert into Employee (Id, Month, Salary) values ('2', '2', '30');
11 | insert into Employee (Id, Month, Salary) values ('3', '2', '40');
12 | insert into Employee (Id, Month, Salary) values ('1', '3', '40');
13 | insert into Employee (Id, Month, Salary) values ('3', '3', '60');
14 | insert into Employee (Id, Month, Salary) values ('1', '4', '60');
15 | insert into Employee (Id, Month, Salary) values ('3', '4', '70');
--------------------------------------------------------------------------------
/LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mssql_lag.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL: Lag window function
2 | WITH cumulative AS (
3 | SELECT
4 | Id
5 | ,LAG(Month, 1) OVER (PARTITION BY Id ORDER BY Month ASC) AS Month
6 | ,ISNULL(LAG(Salary, 1) OVER (PARTITION BY Id ORDER BY Month ASC), 0)
7 | + ISNULL(LAG(Salary, 2) OVER (PARTITION BY Id ORDER BY Month ASC), 0)
8 | + ISNULL(LAG(Salary, 3) OVER (PARTITION BY Id ORDER BY Month ASC), 0) AS Salary
9 | FROM Employee
10 | )
11 | SELECT *
12 | FROM cumulative
13 | WHERE Month IS NOT NULL
14 | ORDER BY Id ASC, Month DESC;
15 |
16 | -- MySQL 8 also supports window function, and even window alias!
17 | WITH cumulative AS (
18 | SELECT
19 | Id
20 | ,LAG(Month, 1) OVER w AS Month
21 | ,IFNULL(LAG(Salary, 1) OVER w, 0)
22 | + IFNULL(LAG(Salary, 2) OVER w, 0)
23 | + IFNULL(LAG(Salary, 3) OVER w, 0) AS Salary
24 | FROM Employee
25 | WINDOW w AS (PARTITION BY Id ORDER BY Month ASC)
26 | )
27 | SELECT *
28 | FROM cumulative
29 | WHERE Month IS NOT NULL
30 | ORDER BY Id ASC, Month DESC;
--------------------------------------------------------------------------------
/LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_join_tmp_table.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: non-equijoin
2 | SELECT
3 | e.Id
4 | ,e.Month
5 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
6 | FROM
7 | (SELECT Id, MAX(Month) AS max_month
8 | FROM Employee
9 | GROUP BY Id) AS e_max
10 | -- if using left join, employee with only one record will see NULL returned after join
11 | JOIN Employee e ON e_max.Id = e.Id AND e_max.max_month > e.Month
12 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
13 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
14 | ORDER BY e.Id ASC, e.Month DESC;
15 |
16 | ----------------------------------------------------------------------------------------
17 | -- alternatively, use where clause to filter result
18 | SELECT
19 | e.Id
20 | ,e.Month
21 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
22 | FROM
23 | (SELECT Id, MAX(Month) AS max_month
24 | FROM Employee
25 | GROUP BY Id) AS e_max
26 | -- if using left join, employee with only one record will see NULL returned after join
27 | JOIN Employee e ON e_max.Id = e.Id
28 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
29 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
30 | WHERE e_max.max_month != e.Month
31 | ORDER BY e.Id ASC, e.Month DESC;
32 |
33 | ----------------------------------------------------------------------------------------
34 | -- alternatively, use cross join, also correct
35 | SELECT
36 | e.Id
37 | ,e.Month
38 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
39 | FROM
40 | (SELECT Id, MAX(Month) AS max_month
41 | FROM Employee
42 | GROUP BY Id) AS e_max,
43 | Employee e
44 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
45 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
46 | WHERE e.Id = e_max.Id AND e.Month != e_max.max_month
47 | ORDER BY e.Id ASC, e.Month DESC;
--------------------------------------------------------------------------------
/LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_set.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: set solution
2 | SELECT
3 | e.Id
4 | ,e.Month
5 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
6 | FROM Employee e
7 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
8 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
9 | WHERE (e.Id, e.Month) NOT IN (
10 | SELECT
11 | Id
12 | ,MAX(Month) AS max_month
13 | FROM Employee
14 | GROUP BY Id)
15 | ORDER BY e.Id ASC, e.Month DESC;
--------------------------------------------------------------------------------
/LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_single_join.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: single join
2 | SELECT
3 | e1.Id
4 | ,MAX(e2.Month) AS Month
5 | ,SUM(e2.Salary) AS Salary
6 | FROM Employee e1
7 | JOIN Employee e2
8 | ON e1.Id = e2.Id
9 | AND e1.Month - e2.Month BETWEEN 1 AND 3
10 | GROUP BY e1.Id, e1.Month
11 | ORDER BY e1.Id ASC, e1.Month DESC
--------------------------------------------------------------------------------
/LeetCode/601_Human_Traffic_of_Stadium/README.md:
--------------------------------------------------------------------------------
1 | # Human Traffic of Stadium
2 |
3 | ## Description
4 | X city built a new stadium, each day many people visit it and the stats are saved as these columns: id, visit_date, people
5 |
6 | Please write a query to display the records which have 3 or more consecutive rows and the amount of people more than 100(inclusive).
7 |
8 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database.
9 | ```
10 | mysql < db.sql -uroot -p
11 | ```
12 |
13 | For example, the table stadium:
14 | ```
15 | +------+------------+-----------+
16 | | id | visit_date | people |
17 | +------+------------+-----------+
18 | | 1 | 2017-01-01 | 10 |
19 | | 2 | 2017-01-02 | 109 |
20 | | 3 | 2017-01-03 | 150 |
21 | | 4 | 2017-01-04 | 99 |
22 | | 5 | 2017-01-05 | 145 |
23 | | 6 | 2017-01-06 | 1455 |
24 | | 7 | 2017-01-07 | 199 |
25 | | 8 | 2017-01-08 | 188 |
26 | +------+------------+-----------+
27 | ```
28 | For the sample data above, the output is:
29 | ```
30 | +------+------------+-----------+
31 | | id | visit_date | people |
32 | +------+------------+-----------+
33 | | 5 | 2017-01-05 | 145 |
34 | | 6 | 2017-01-06 | 1455 |
35 | | 7 | 2017-01-07 | 199 |
36 | | 8 | 2017-01-08 | 188 |
37 | +------+------------+-----------+
38 | ```
39 | Note:
40 | Each day only have one row record, and the dates are increasing with id increasing.
41 |
42 | ---
43 | ## Observation
44 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary.
45 | * Every day has a record. If two dates differs by 1, their id differs by 1. Therefore, instead of joining by *visit_date* we can join by *id*, which is more efficient.
46 | * We are scanning three-day consecutive window here. A valid row can either be positioned at the beginning of the window, at the middle, or end of the window.
47 |
48 | ---
49 | ## On Correctness
50 | The key is to remove duplicates. Consider four days in a row, each day with over 100 people. In the first window, all three days will be returned. In the next window, day 2, 3, 4 will all be returned, resulting in duplicate day 2 and 3. To remove duplicate, use *DISTINCT* keyword, with three [self-joins]( mysql_simple.sql):.
51 |
52 | | window 1 | window 2 |
53 | |----|-----------|
54 | | day 1| |
55 | | day 2| day 2 |
56 | | day 3| day 3 |
57 | | | day 4 |
58 |
59 | ```sql
60 | -- MySQL solution
61 | SELECT DISTINCT
62 | s1.*
63 | FROM
64 | stadium AS s1
65 | ,stadium AS s2
66 | ,stadium AS s3
67 | WHERE s1.people >= 100
68 | AND s2.people >= 100
69 | AND s3.people >= 100
70 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window
71 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window
72 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window
73 | ORDER BY s1.id;
74 | ```
75 |
76 | ---
77 | ## On Efficiency
78 | Joining the table three times resulting in a huge cartesian product. One way to improve efficiency is simply [pre-filter](mssql_pre_filter.sql) the table, so only days with over 100 people are joined.
79 |
80 | ```sql
81 | -- MySQL: pre-filtering
82 | SELECT DISTINCT
83 | s1.*
84 | FROM
85 | (SELECT * FROM stadium WHERE people >= 100) AS s1
86 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2
87 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3
88 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
89 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
90 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
91 | ORDER BY s1.id;
92 | ```
93 |
94 | If using MS SQL, we only need to define the temporary table once, making it much easier to maintain.
95 |
96 | ```sql
97 | -- MS SQL: cleaner code
98 | WITH good_day AS (
99 | SELECT * FROM stadium WHERE people >= 100
100 | )
101 | SELECT DISTINCT s1.* FROM
102 | good_day AS s1,
103 | good_day AS s2,
104 | good_day AS s3
105 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
106 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
107 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
108 | ORDER BY s1.id;
109 | ```
110 |
111 | Using either MySQL or MS SQL, we can do away with temporary table, by merging predicates into the JOIN clause. This small change achieves the same filtering effect.
112 |
113 | ```sql
114 | SELECT DISTINCT s1.*
115 | FROM stadium AS s1
116 | LEFT JOIN stadium AS s2
117 | ON s1.people >= 100
118 | AND s2.people >= 100
119 | LEFT JOIN stadium AS s3
120 | ON s3.people >= 100
121 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
122 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
123 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
124 | ORDER BY s1.id;
125 | ```
126 |
127 | ---
128 | ## Optional: Window function
129 | In MS SQL, the problem can be solved with [window function](mssql_window.sql). Be careful that you __cannot__ use window column in the predicates. You must save the expanded table with window columns in a temporary table.
130 |
131 | ```sql
132 | -- MS SQL: window
133 | WITH long_table AS (
134 | SELECT
135 | *
136 | ,LAG(people, 2) OVER (ORDER BY id ASC) AS pre2
137 | ,LAG(people, 1) OVER (ORDER BY id ASC) AS pre1
138 | ,LEAD(people, 1) OVER (ORDER BY id ASC) AS nxt1
139 | ,LEAD(people, 2) OVER (ORDER BY id ASC) AS nxt2
140 | FROM stadium
141 | )
142 | SELECT
143 | id
144 | ,visit_date
145 | ,people
146 | FROM long_table
147 | WHERE people >= 100
148 | AND ((pre2 >= 100 AND pre1 >= 100)
149 | OR (pre1 >= 100 AND nxt1 >= 100)
150 | OR (nxt1 >= 100 AND nxt2 >= 100))
151 | ORDER BY id;
152 | ```
153 |
154 | For the sake of completion, MySQL8 supports window function too, and can be written with window alias.
155 |
156 | ```sql
157 | -- MySQL 8 equivalent
158 | WITH long_table AS (
159 | SELECT
160 | *
161 | ,LAG(people, 2) OVER w AS pre2
162 | ,LAG(people, 1) OVER w AS pre1
163 | ,LEAD(people, 1) OVER w AS nxt1
164 | ,LEAD(people, 2) OVER w AS nxt2
165 | FROM stadium
166 | WINDOW w AS (ORDER BY id ASC)
167 | )
168 | SELECT
169 | id
170 | ,visit_date
171 | ,people
172 | FROM long_table
173 | WHERE people >= 100
174 | AND ((pre2 >= 100 AND pre1 >= 100)
175 | OR (pre1 >= 100 AND nxt1 >= 100)
176 | OR (nxt1 >= 100 AND nxt2 >= 100))
177 | ORDER BY id;
178 | ```
179 |
180 | ---
181 | ## Parting Thought
182 | Window function is more efficient than join when the table is large. In this example, we are sorting by index *id* in the window, further boosting efficiency. Furthermore, window solution gets rid of the *DISTINCT* keyword, which establishes a hash set (inefficient). The test case in LeetCode, however, does not show its superiority.
183 |
184 |
--------------------------------------------------------------------------------
/LeetCode/601_Human_Traffic_of_Stadium/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists stadium (id int, visit_date DATE NULL, people int);
6 |
7 | insert into stadium (id, visit_date, people) values ('1', '2017-01-01', '10');
8 | insert into stadium (id, visit_date, people) values ('2', '2017-01-02', '109');
9 | insert into stadium (id, visit_date, people) values ('3', '2017-01-03', '150');
10 | insert into stadium (id, visit_date, people) values ('4', '2017-01-04', '99');
11 | insert into stadium (id, visit_date, people) values ('5', '2017-01-05', '145');
12 | insert into stadium (id, visit_date, people) values ('6', '2017-01-06', '1455');
13 | insert into stadium (id, visit_date, people) values ('7', '2017-01-07', '199');
14 | insert into stadium (id, visit_date, people) values ('8', '2017-01-08', '188');
--------------------------------------------------------------------------------
/LeetCode/601_Human_Traffic_of_Stadium/mssql_pre_filter.sql:
--------------------------------------------------------------------------------
1 | -- MySQL: pre-filtering
2 | SELECT DISTINCT
3 | s1.*
4 | FROM
5 | (SELECT * FROM stadium WHERE people >= 100) AS s1
6 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2
7 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3
8 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
9 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
10 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
11 | ORDER BY s1.id;
12 |
13 | -- MS SQL: cleaner code
14 | WITH good_day AS (
15 | SELECT * FROM stadium WHERE people >= 100
16 | )
17 | SELECT DISTINCT s1.* FROM
18 | good_day AS s1,
19 | good_day AS s2,
20 | good_day AS s3
21 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
22 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
23 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
24 | ORDER BY s1.id;
25 |
26 | -- MySQL, MS SQL: cleaner version
27 | SELECT DISTINCT s1.*
28 | FROM stadium AS s1
29 | LEFT JOIN stadium AS s2
30 | ON s1.people >= 100
31 | AND s2.people >= 100
32 | LEFT JOIN stadium AS s3
33 | ON s3.people >= 100
34 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
35 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
36 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
37 | ORDER BY s1.id;
--------------------------------------------------------------------------------
/LeetCode/601_Human_Traffic_of_Stadium/mssql_window.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL: window
2 | WITH long_table AS (
3 | SELECT
4 | *
5 | ,LAG(people, 2) OVER (ORDER BY id ASC) AS pre2
6 | ,LAG(people, 1) OVER (ORDER BY id ASC) AS pre1
7 | ,LEAD(people, 1) OVER (ORDER BY id ASC) AS nxt1
8 | ,LEAD(people, 2) OVER (ORDER BY id ASC) AS nxt2
9 | FROM stadium
10 | )
11 | SELECT
12 | id
13 | ,visit_date
14 | ,people
15 | FROM long_table
16 | WHERE people >= 100
17 | AND ((pre2 >= 100 AND pre1 >= 100)
18 | OR (pre1 >= 100 AND nxt1 >= 100)
19 | OR (nxt1 >= 100 AND nxt2 >= 100))
20 | ORDER BY id;
21 |
22 | -- MySQL 8 equivalent
23 | WITH long_table AS (
24 | SELECT
25 | *
26 | ,LAG(people, 2) OVER w AS pre2
27 | ,LAG(people, 1) OVER w AS pre1
28 | ,LEAD(people, 1) OVER w AS nxt1
29 | ,LEAD(people, 2) OVER w AS nxt2
30 | FROM stadium
31 | WINDOW w AS (ORDER BY id ASC)
32 | )
33 | SELECT
34 | id
35 | ,visit_date
36 | ,people
37 | FROM long_table
38 | WHERE people >= 100
39 | AND ((pre2 >= 100 AND pre1 >= 100)
40 | OR (pre1 >= 100 AND nxt1 >= 100)
41 | OR (nxt1 >= 100 AND nxt2 >= 100))
42 | ORDER BY id;
--------------------------------------------------------------------------------
/LeetCode/601_Human_Traffic_of_Stadium/mysql_simple.sql:
--------------------------------------------------------------------------------
1 | -- MySQL solution
2 | SELECT DISTINCT
3 | s1.*
4 | FROM
5 | stadium AS s1
6 | ,stadium AS s2
7 | ,stadium AS s3
8 | WHERE s1.people >= 100
9 | AND s2.people >= 100
10 | AND s3.people >= 100
11 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window
12 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window
13 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window
14 | ORDER BY s1.id;
--------------------------------------------------------------------------------
/LeetCode/615_Average_Salary/README.md:
--------------------------------------------------------------------------------
1 | # Average Salary: Departments VS Company
2 |
3 | ## Description
4 |
5 | Given two tables as below, write a query to display the comparison result (higher/lower/same) of the average salary of employees in a department to the company's average salary.
6 |
7 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database.
8 | ```
9 | mysql < db.sql -uroot -p
10 | ```
11 |
12 | Table: salary
13 | ```
14 | | id | employee_id | amount | pay_date |
15 | |----|-------------|--------|------------|
16 | | 1 | 1 | 9000 | 2017-03-31 |
17 | | 2 | 2 | 6000 | 2017-03-31 |
18 | | 3 | 3 | 10000 | 2017-03-31 |
19 | | 4 | 1 | 7000 | 2017-02-28 |
20 | | 5 | 2 | 6000 | 2017-02-28 |
21 | | 6 | 3 | 8000 | 2017-02-28 |
22 | ```
23 |
24 | The employee_id column refers to the employee_id in the following table employee.
25 |
26 | ```
27 | | employee_id | department_id |
28 | |-------------|---------------|
29 | | 1 | 1 |
30 | | 2 | 2 |
31 | | 3 | 2 |
32 | ```
33 |
34 | So for the sample data above, the result is:
35 |
36 | ```
37 | | pay_month | department_id | comparison |
38 | |-----------|---------------|-------------|
39 | | 2017-03 | 1 | higher |
40 | | 2017-03 | 2 | lower |
41 | | 2017-02 | 1 | same |
42 | | 2017-02 | 2 | same |
43 | ```
44 |
45 | Explanation
46 |
47 |
48 | In March, the company's average salary is (9000+6000+10000)/3 = 8333.33...
49 |
50 |
51 | The average salary for department '1' is 9000, which is the salary of employee_id '1' since there is only one employee in this department. So the comparison result is 'higher' since 9000 > 8333.33 obviously.
52 |
53 |
54 | The average salary of department '2' is (6000 + 10000)/2 = 8000, which is the average of employee_id '2' and '3'. So the comparison result is 'lower' since 8000 < 8333.33.
55 |
56 |
57 | With he same formula for the average salary comparison in February, the result is 'same' since both the department '1' and '2' have the same average salary with the company, which is 7000.
58 |
59 | ---
60 | ## Observation
61 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary.
62 | * Ask whether every employee will be in a department (no NULL in the department_id column). If there is NULL, how to display it.
63 | * Note that people get paid on __different__ day of the same months, meaning that you must process the date label before averaging, otherwise you __will not__ get the correct average.
64 |
65 | ---
66 | ## Solution
67 | The overall strategy is to build two temporary tables, one aggregated over the pay month, one aggregated over the (pay month, department) key pair. After aggregating, joining the two temporary tables. Because every distinct pay month must exist in both table, inner join is enough.
68 |
69 | #### Step 1. Company Monthly Average
70 | ```sql
71 | mysql> SELECT
72 | -> date_format(pay_date, '%Y-%m') as pay_month
73 | -> ,AVG(amount) AS corp_avg_sal
74 | -> FROM salary GROUP BY pay_month;
75 | ```
76 | ```
77 | +-----------+--------------+
78 | | pay_month | corp_avg_sal |
79 | +-----------+--------------+
80 | | 2017-02 | 6666.6667 |
81 | | 2017-03 | 8333.3333 |
82 | +-----------+--------------+
83 | 2 rows in set (0.00 sec)
84 | ```
85 |
86 | Notice what happens if you don't process pay_date column before aggregating: you get the wrong average, and your final result will be wrong.
87 |
88 | ```sql
89 | mysql> SELECT
90 | -> pay_date
91 | -> ,AVG(amount) AS corp_avg_sal
92 | -> FROM salary GROUP BY pay_date;
93 | ```
94 | ```
95 | +------------+--------------+
96 | | pay_date | corp_avg_sal |
97 | +------------+--------------+
98 | | 2017-02-25 | 6000.0000 |
99 | | 2017-02-28 | 7000.0000 |
100 | | 2017-03-31 | 8333.3333 |
101 | +------------+--------------+
102 | 3 rows in set (0.00 sec)
103 | ```
104 |
105 | #### Step 2. Department Monthly Average
106 | Similarly, process pay_date before aggregating.
107 |
108 | ```sql
109 | mysql> SELECT
110 | -> date_format(pay_date, '%Y-%m') as pay_month
111 | -> ,e.department_id
112 | -> ,AVG(amount) AS dept_avg_sal
113 | -> FROM salary AS s
114 | -> JOIN employee AS e
115 | -> ON s.employee_id = e.employee_id
116 | -> GROUP BY pay_month, e.department_id;
117 | ```
118 | ```
119 | +-----------+---------------+--------------+
120 | | pay_month | department_id | dept_avg_sal |
121 | +-----------+---------------+--------------+
122 | | 2017-02 | 1 | 6000.0000 |
123 | | 2017-02 | 2 | 7000.0000 |
124 | | 2017-03 | 1 | 9000.0000 |
125 | | 2017-03 | 2 | 8000.0000 |
126 | +-----------+---------------+--------------+
127 | 4 rows in set (0.00 sec)
128 | ```
129 |
130 | #### Step 3. Join and Present Results
131 | Can use inner join, or cross join with where clause, or left join. The results are the same. On LeetCode, the efficiency is quite different: cross join (167ms) < inner join (185ms) < left join (205ms).
132 |
133 | Remember to give alias to derived tables. See full solution [here](mysql_session_vars.sql).
134 |
135 | ```sql
136 | mysql> SELECT
137 | -> a.pay_month
138 | -> ,a.department_id,
139 | -> CASE
140 | -> WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower"
141 | -> WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same"
142 | -> ELSE "higher"
143 | -> END AS comparison
144 | -> FROM
145 | -> (SELECT
146 | -> date_format(pay_date, '%Y-%m') as pay_month
147 | -> ,e.department_id
148 | -> ,AVG(amount) AS dept_avg_sal
149 | -> FROM salary AS s
150 | -> JOIN employee AS e
151 | -> ON s.employee_id = e.employee_id
152 | -> GROUP BY pay_month, e.department_id
153 | -> ) AS a
154 | -> JOIN
155 | -> (SELECT
156 | -> date_format(pay_date, '%Y-%m') as pay_month
157 | -> ,AVG(amount) AS corp_avg_sal
158 | -> FROM salary GROUP BY pay_month
159 | -> ) AS b
160 | -> ON a.pay_month = b.pay_month
161 | -> ORDER BY pay_month, department_id;
162 | ```
163 | ```
164 | +-----------+---------------+------------+
165 | | pay_month | department_id | comparison |
166 | +-----------+---------------+------------+
167 | | 2017-02 | 1 | same |
168 | | 2017-02 | 2 | same |
169 | | 2017-03 | 1 | higher |
170 | | 2017-03 | 2 | lower |
171 | +-----------+---------------+------------+
172 | 4 rows in set (0.00 sec)
173 | ```
174 |
--------------------------------------------------------------------------------
/LeetCode/615_Average_Salary/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists salary (id int, employee_id int, amount int, pay_date date);
6 | Create table If Not Exists employee (employee_id int, department_id int);
7 |
8 | Truncate table salary;
9 | insert into salary (id, employee_id, amount, pay_date) values ('1', '1', '9000', '2017/03/31');
10 | insert into salary (id, employee_id, amount, pay_date) values ('2', '2', '6000', '2017/03/31');
11 | insert into salary (id, employee_id, amount, pay_date) values ('3', '3', '10000', '2017/03/31');
12 | insert into salary (id, employee_id, amount, pay_date) values ('4', '1', '7000', '2017/02/25');
13 | insert into salary (id, employee_id, amount, pay_date) values ('5', '2', '6000', '2017/02/28');
14 | insert into salary (id, employee_id, amount, pay_date) values ('6', '3', '8000', '2017/02/28');
15 |
16 | Truncate table employee;
17 | insert into employee (employee_id, department_id) values ('1', '1');
18 | insert into employee (employee_id, department_id) values ('2', '2');
19 | insert into employee (employee_id, department_id) values ('3', '2');
--------------------------------------------------------------------------------
/LeetCode/615_Average_Salary/mysql.sql:
--------------------------------------------------------------------------------
1 | -- MySQL plain solution
2 | SELECT
3 | a.pay_month
4 | ,a.department_id,
5 | CASE
6 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower"
7 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same"
8 | ELSE "higher"
9 | END AS comparison
10 | FROM
11 | (SELECT
12 | date_format(pay_date, '%Y-%m') as pay_month
13 | ,e.department_id
14 | ,AVG(amount) AS dept_avg_sal
15 | FROM salary AS s
16 | JOIN employee AS e
17 | ON s.employee_id = e.employee_id
18 | GROUP BY pay_month, e.department_id
19 | ) AS a
20 | JOIN
21 | (SELECT
22 | date_format(pay_date, '%Y-%m') as pay_month
23 | ,AVG(amount) AS corp_avg_sal
24 | FROM salary GROUP BY pay_month
25 | ) AS b
26 | ON a.pay_month = b.pay_month
27 | ORDER BY pay_month, department_id;
28 |
29 |
30 | /* MySQL Solution using CTE*/
31 | WITH myCTE1 AS (
32 | SELECT s.employee_id, s.amount, s.pay_date,e.department_id,
33 | DATE_FORMAT(s.pay_date, "%Y-%m") as y_m,
34 | ROUND(AVG(amount) OVER(PARTITION BY DATE_FORMAT(s.pay_date, "%Y-%m")),2) 'company_avg'
35 | FROM salary s join employee e
36 | ON s.employee_id = e.employee_id
37 | ),
38 | myCTE2 as (
39 | SELECT y_m as pay_period, department_id, company_avg, ROUND(AVG(amount),2) 'department_avg'
40 | FROM myCTE1
41 | GROUP BY department_id, y_m
42 | )
43 | SELECT *,
44 | CASE
45 | WHEN company_avg < department_avg THEN 'higher'
46 | WHEN company_avg > department_avg THEN 'lower'
47 | ELSE 'lower'
48 | END as 'comparision'
49 | FROM myCTE2
50 | ORDER BY pay_period desc, department_id
51 |
52 |
--------------------------------------------------------------------------------
/LeetCode/615_Average_Salary/mysql_session_vars.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | a.pay_month
3 | ,a.department_id,
4 | CASE
5 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower"
6 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same"
7 | ELSE "higher"
8 | END AS comparison
9 | FROM
10 | (SELECT
11 | date_format(pay_date, '%Y-%m') as pay_month
12 | ,e.department_id
13 | ,AVG(amount) AS dept_avg_sal
14 | FROM salary AS s
15 | JOIN employee AS e
16 | ON s.employee_id = e.employee_id
17 | GROUP BY pay_month, e.department_id
18 | ) AS a
19 | JOIN
20 | (SELECT
21 | date_format(pay_date, '%Y-%m') as pay_month
22 | ,AVG(amount) AS corp_avg_sal
23 | FROM salary GROUP BY pay_month
24 | ) AS b
25 | ON a.pay_month = b.pay_month
26 | ORDER BY pay_month, department_id;
--------------------------------------------------------------------------------
/LeetCode/618_Students_Report_by_Geography/README.md:
--------------------------------------------------------------------------------
1 | # Students Report By Geography
2 |
3 | A U.S graduate school has students from Asia, Europe and America. The students' location information are stored in table student as below.
4 |
5 | ```
6 | | name | continent |
7 | |--------|-----------|
8 | | Jack | America |
9 | | Pascal | Europe |
10 | | Xi | Asia |
11 | | Jane | America |
12 | ```
13 |
14 | Pivot the continent column in this table so that each name is sorted alphabetically and displayed underneath its corresponding continent. The output headers should be America, Asia and Europe respectively. It is guaranteed that the student number from America is no less than either Asia or Europe.
15 |
16 |
17 | For the sample input, the output is:
18 |
19 | ```
20 | | America | Asia | Europe |
21 | |---------|------|--------|
22 | | Jack | Xi | Pascal |
23 | | Jane | | |
24 | ```
25 |
26 | Follow-up: If it is unknown which continent has the most students, can you write a query to generate the student report?
27 |
28 |
29 | ## Solution
30 | This is an exercise of pivoting table, though not a very good one. For more representative pivoting exercise, check this (link)[google.com]. The challenging part in this exercise is that the data are text, and cannot be aggregated over in the pivoted table. So you must assign it a proxy index, and join each column.
31 |
32 | Using session variable can solve the problem, but it is very error prone (see [here](mysql_session_vars.sql)). In this document, I present a solution utilizing *ROW_NUMBER* window function (available in MS SQL and MySQL8) and full join (not available in MySQL8). So this solution does not work in MySQL server.
33 |
34 |
35 | #### Step 1. Build Temporary Tables
36 | ```
37 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America';
38 | +----+------+
39 | | ID | name |
40 | +----+------+
41 | | 1 | Jack |
42 | | 2 | Jane |
43 | +----+------+
44 | 2 rows in set (0.01 sec)
45 |
46 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia';
47 | +----+------+
48 | | ID | name |
49 | +----+------+
50 | | 1 | Xi |
51 | +----+------+
52 | 1 row in set (0.00 sec)
53 |
54 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe';
55 | +----+--------+
56 | | ID | name |
57 | +----+--------+
58 | | 1 | Pascal |
59 | +----+--------+
60 | 1 row in set (0.00 sec)
61 | ```
62 |
63 | #### Step 2. Full Join
64 | In MS SQL, use *FULL JOIN* to combine the three temporary tables, in any order. Note that when joining the third table, the *JOIN* condition could match to either the first table or the second table.
65 |
66 | ```sql
67 | -- MS SQL
68 | SELECT
69 | a.name AS America
70 | ,b.name AS Asia
71 | ,c.name AS Europe
72 | FROM
73 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a
74 | FULL JOIN
75 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b
76 | ON a.ID = b.ID
77 | FULL JOIN
78 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c
79 | ON c.ID = b.ID
80 | OR c.ID = a.ID;
81 | ```
82 |
83 | In a different order: we don't have to know in advance which group has most students.
84 | ```sql
85 | -- MS SQL
86 | SELECT
87 | a.name AS America
88 | ,b.name AS Asia
89 | ,c.name AS Europe
90 | FROM
91 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c
92 | FULL JOIN
93 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b
94 | ON b.ID = c.ID
95 | FULL JOIN
96 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a
97 | ON a.ID = b.ID
98 | OR a.ID = c.ID;
99 | ```
100 |
101 | ### Join with original table to use maximum range of row numbers
102 |
103 | ```sql
104 | SELECT a.name AS America,
105 | b.name AS Asia,
106 | c.name AS Europe
107 | FROM
108 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students) r
109 | LEFT JOIN
110 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Asia') b USING(ID)
111 | LEFT JOIN
112 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'America') a USING(ID)
113 | LEFT JOIN
114 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Europe') c USING(ID)
115 | WHERE NOT(a.ID IS NULL AND b.ID IS NULL AND c.ID IS NULL)
116 | ```
117 |
--------------------------------------------------------------------------------
/LeetCode/618_Students_Report_by_Geography/db.sql:
--------------------------------------------------------------------------------
1 | DROP SCHEMA IF EXISTS LeetCode;
2 | CREATE SCHEMA LeetCode;
3 | USE LeetCode;
4 |
5 | Create table If Not Exists student (name varchar(50), continent varchar(7));
6 | Truncate table student;
7 | insert into student (name, continent) values ('Jane', 'America');
8 | insert into student (name, continent) values ('Pascal', 'Europe');
9 | insert into student (name, continent) values ('Xi', 'Asia');
10 | insert into student (name, continent) values ('Jack', 'America');
--------------------------------------------------------------------------------
/LeetCode/618_Students_Report_by_Geography/mssql_full_join.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL
2 | SELECT
3 | a.name AS America
4 | ,b.name AS Asia
5 | ,c.name AS Europe
6 | FROM
7 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a
8 | FULL JOIN
9 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b
10 | ON a.ID = b.ID
11 | FULL JOIN
12 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c
13 | ON c.ID = b.ID
14 | OR c.ID = a.ID;
--------------------------------------------------------------------------------
/LeetCode/618_Students_Report_by_Geography/mysql_session_vars.sql:
--------------------------------------------------------------------------------
1 | -- MySQL
2 | SELECT
3 | america.name AS America
4 | ,asia.name as Asia
5 | ,europe.name as Europe
6 | FROM
7 | (SELECT @a := 0, @b := 0, @c := 0) AS vars
8 | ,(SELECT @a := @a + 1 AS ID, name FROM student WHERE continent = "America" ORDER BY name) AS america
9 | LEFT JOIN
10 | (SELECT @b := @b + 1 AS ID, name FROM student WHERE continent = "Asia" ORDER BY name) AS asia
11 | ON america.ID = asia.ID
12 | LEFT JOIN
13 | (SELECT @c := @c + 1 AS ID, name FROM student WHERE continent = "Europe" ORDER BY name) AS europe
14 | ON asia.ID = europe.ID
15 | OR america.ID = europe.ID;
--------------------------------------------------------------------------------
/LeetCode/618_Students_Report_by_Geography/self-join-rownums.sql:
--------------------------------------------------------------------------------
1 | SELECT a.name AS America,
2 | b.name AS Asia,
3 | c.name AS Europe
4 | FROM
5 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students) r
6 | LEFT JOIN
7 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Asia') b USING(ID)
8 | LEFT JOIN
9 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'America') a USING(ID)
10 | LEFT JOIN
11 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Europe') c USING(ID)
12 | WHERE NOT(a.ID IS NULL AND b.ID IS NULL AND c.ID IS NULL)
--------------------------------------------------------------------------------
/LeetCode/others/1045_Customers_Who_Bought_All_Products.sql:
--------------------------------------------------------------------------------
1 | select customer_id
2 | from Customer
3 | group by customer_id
4 | having count(distinct product_key) = (select count(distinct product_key) from Product)
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1050_Actors_and_Directors_Who_Cooperated_At_Least_Three_Times.sql:
--------------------------------------------------------------------------------
1 | select actor_id, director_id
2 | from ActorDirector
3 | group by actor_id, director_id
4 | having count(*) >= 3
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1068_Product_Sales_Analysis_I.sql:
--------------------------------------------------------------------------------
1 | select b.product_name, a.year, a.price
2 | from Sales a
3 | join Product b
4 | on a.product_id = b.product_id
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1069_Product_Sales_Analysis_II.sql:
--------------------------------------------------------------------------------
1 | select product_id, sum(quantity) as total_quantity
2 | from Sales
3 | group by product_id
4 |
--------------------------------------------------------------------------------
/LeetCode/others/1070_Product_Sales_Analysis_III.sql:
--------------------------------------------------------------------------------
1 | select top 1 with ties product_id, year as first_year, quantity, price
2 | from Sales
3 | order by rank() over (partition by product_id order by year);
4 |
--------------------------------------------------------------------------------
/LeetCode/others/1075_Project_Employees_I.sql:
--------------------------------------------------------------------------------
1 | select
2 | a.project_id
3 | ,round(avg(b.experience_years), 2) as average_years
4 | from Project a
5 | join Employee b
6 | on a.employee_id = b.employee_id
7 | group by project_id
8 |
--------------------------------------------------------------------------------
/LeetCode/others/1076_Project_Employees_II.sql:
--------------------------------------------------------------------------------
1 | select top 1 with ties project_id
2 | from Project
3 | group by project_id
4 | order by count(*) desc;
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1077_Project_Employees_III.sql:
--------------------------------------------------------------------------------
1 | select top 1 with ties project_id, a.employee_id
2 | from Project a
3 | join Employee b
4 | on a.employee_id = b.employee_id
5 | order by dense_rank() over
6 | (partition by project_id order by experience_years desc);
7 |
--------------------------------------------------------------------------------
/LeetCode/others/1082_Sales_Analysis_I.sql:
--------------------------------------------------------------------------------
1 | select top 1 with ties seller_id
2 | from Sales
3 | group by seller_id
4 | order by sum(price) desc;
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1083_Sales_Analysis_II.sql:
--------------------------------------------------------------------------------
1 | with joined as (
2 | select a.product_id, a.product_name, b.buyer_id
3 | from Product a
4 | join Sales b
5 | on a.product_id = b.product_id
6 | )
7 | ,iphone as (
8 | select distinct buyer_id
9 | from joined where product_name = 'iPhone'
10 | )
11 | ,s8 as (
12 | select distinct buyer_id
13 | from joined where product_name = 'S8'
14 | )
15 | select a.buyer_id
16 | from s8 a
17 | left join iphone b
18 | on a.buyer_id = b.buyer_id
19 | where b.buyer_id is null
20 |
--------------------------------------------------------------------------------
/LeetCode/others/1084_Sales_Analysis_III.sql:
--------------------------------------------------------------------------------
1 | select a.product_id, max(product_name) as product_name
2 | from Sales a
3 | join Product b
4 | on a.product_id = b.product_id
5 | group by a.product_id
6 | having max(sale_date) <= '2019-03-31'
7 | and min(sale_date) >= '2019-01-01'
8 |
--------------------------------------------------------------------------------
/LeetCode/others/1097_Game_Play_Analysis_V.sql:
--------------------------------------------------------------------------------
1 | with ranked as (
2 | select
3 | min(event_date) over (partition by player_id) as install_dt
4 | ,lead(event_date) over (partition by player_id order by event_date) as next_date
5 | ,rank() over (partition by player_id order by event_date) as rk
6 | from Activity
7 | )
8 | select
9 | install_dt
10 | ,count(*) as installs
11 | ,round(
12 | cast(sum(case when dateadd(day, 1, install_dt) = next_date then 1 else 0 end) as float) /
13 | count(*), 2) as day1_retention
14 | from ranked
15 | where rk = 1
16 | group by install_dt
17 |
--------------------------------------------------------------------------------
/LeetCode/others/1098_Unpopular_Books.sql:
--------------------------------------------------------------------------------
1 | select b.book_id, b.name
2 | from (select * from Orders where dispatch_date > date_add('2019-06-23', interval -1 year)) a
3 | right join Books b
4 | on a.book_id = b.book_id
5 | where b.available_from < date_add('2019-06-23',interval -1 month)
6 | group by b.book_id, b.name
7 | having coalesce(sum(quantity), 0) < 10
8 | order by b.book_id
9 |
--------------------------------------------------------------------------------
/LeetCode/others/1107_New_Users_Daily_Count.sql:
--------------------------------------------------------------------------------
1 | select login_date, count(user_id) as user_count
2 | from
3 | (select user_id, min(activity_date) as login_date
4 | from Traffic
5 | where activity = 'login'
6 | group by user_id) t
7 | where datediff('2019-06-30', login_date) <= 90
8 | group by login_date
9 |
--------------------------------------------------------------------------------
/LeetCode/others/1112_Highest_Grade_For_Each_Student.sql:
--------------------------------------------------------------------------------
1 | with unordered as (
2 | select top 1 with ties student_id, course_id, grade
3 | from Enrollments
4 | order by rank() over (partition by student_id order by grade desc, course_id)
5 | )
6 | select * from unordered order by student_id
7 |
--------------------------------------------------------------------------------
/LeetCode/others/1113_Reported_Posts.sql:
--------------------------------------------------------------------------------
1 | select extra as report_reason, count(distinct post_id) as report_count
2 | from Actions
3 | where action_date = '2019-07-04'
4 | and action = 'report'
5 | group by extra
6 |
--------------------------------------------------------------------------------
/LeetCode/others/1126_Active_Businesses.sql:
--------------------------------------------------------------------------------
1 | with tmp as (
2 | select business_id, occurences, avg(occurences) over (partition by event_type) as average
3 | from Events
4 | )
5 | select business_id
6 | from tmp
7 | where occurences > average
8 | group by business_id
9 | having count(*) > 1
10 |
--------------------------------------------------------------------------------
/LeetCode/others/1127_User_Purchase_Platform.sql:
--------------------------------------------------------------------------------
1 | with skeleton as (
2 | select distinct spend_date, 'desktop' as platform from Spending
3 | union all
4 | select distinct spend_date, 'mobile' as platform from Spending
5 | union all
6 | select distinct spend_date, 'both' as platform from Spending
7 | )
8 |
9 | ,labelled as (
10 | select
11 | spend_date, user_id,
12 | case when count(distinct platform) = 1 then max(platform) else 'both' end as platform
13 | from Spending group by spend_date, user_id
14 | )
15 |
16 | ,agg as (
17 | select
18 | a.spend_date, b.platform, sum(a.amount) as total_amount, count(distinct a.user_id) as total_users
19 | from Spending a
20 | join labelled b
21 | on a.spend_date = b.spend_date
22 | and a.user_id = b.user_id
23 | group by a.spend_date, b.platform
24 | )
25 |
26 | select
27 | a.spend_date, a.platform
28 | ,coalesce(b.total_amount, 0) as total_amount
29 | ,coalesce(b.total_users, 0) as total_users
30 | from skeleton as a
31 | left join agg as b
32 | on a.spend_date = b.spend_date
33 | and a.platform = b.platform
34 |
--------------------------------------------------------------------------------
/LeetCode/others/1132_Reported_Posts_II.sql:
--------------------------------------------------------------------------------
1 | select round(100*avg(
2 | if(removed is null, 0, removed/reported)),2) as average_daily_percent from
3 | (
4 | select action_date date, count(distinct post_id) as reported
5 | from actions
6 | where extra='spam'
7 | group by action_date
8 | ) t1 left join
9 | (
10 | select action_date date, count(distinct actions.post_id) as removed
11 | from actions, removals
12 | where extra='spam' and actions.post_id = removals.post_id
13 | group by action_date
14 | ) t2 on (t1.date=t2.date)
15 |
--------------------------------------------------------------------------------
/LeetCode/others/1141_User_Activity_for_the_Past_30_Days_I.sql:
--------------------------------------------------------------------------------
1 | select activity_date as day, count(distinct user_id) as active_users
2 | from Activity
3 | where activity_date between '2019-06-28' and '2019-07-27'
4 | group by activity_date
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1142_User_Activity_for_the_Past_30_Days_II.sql:
--------------------------------------------------------------------------------
1 | select round(coalesce(avg(sess), 0), 2) as average_sessions_per_user from (
2 | select user_id, count(distinct session_id) as sess
3 | from Activity
4 | where activity_date between '2019-06-28' and '2019-07-27'
5 | group by user_id
6 | ) a;
7 |
--------------------------------------------------------------------------------
/LeetCode/others/1148_Article_Views_I.sql:
--------------------------------------------------------------------------------
1 | select distinct author_id as id
2 | from Views
3 | where author_id = viewer_id
4 | order by author_id
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1149_Article_Views_II.sql:
--------------------------------------------------------------------------------
1 | select distinct viewer_id as id
2 | from Views
3 | group by viewer_id, view_date
4 | having count(distinct viewer_id, article_id) > 1
5 |
--------------------------------------------------------------------------------
/LeetCode/others/1158_Market_Analysis_I.sql:
--------------------------------------------------------------------------------
1 | with norder as (
2 | select buyer_id, count(*) as n from Orders
3 | where order_date >= '2019-01-01'
4 | group by buyer_id
5 | )
6 | select a.user_id as buyer_id, join_date, coalesce(n, 0) as orders_in_2019
7 | from Users a
8 | left join norder b
9 | on a.user_id = b.buyer_id
10 |
--------------------------------------------------------------------------------
/LeetCode/others/1159_Market_Analysis_II.sql:
--------------------------------------------------------------------------------
1 | with ranked as (
2 | select seller_id, item_id, rank() over (partition by seller_id order by order_date) as rk
3 | from Orders
4 | ),
5 |
6 | joined_items as (
7 | select a.seller_id, a.item_id, a.rk, b.item_brand
8 | from ranked a
9 | join Items b
10 | on a.item_id = b.item_id
11 | where rk = 2)
12 |
13 | select
14 | a.user_id as seller_id,
15 | case when a.favorite_brand = b.item_brand then 'yes' else 'no' end as '2nd_item_fav_brand'
16 | from Users a
17 | left join joined_items b
18 | on a.user_id = b.seller_id
19 |
--------------------------------------------------------------------------------
/LeetCode/others/1164_Product_Price_at_a_Given_Date.sql:
--------------------------------------------------------------------------------
1 | with skeleton as (
2 | select distinct product_id from Products
3 | ),
4 | recent as (
5 | select top 1 with ties product_id, new_price
6 | from Products
7 | where change_date <= '2019-08-16'
8 | order by rank() over (partition by product_id order by change_date desc)
9 | )
10 | select a.product_id, coalesce(b.new_price, 10) as price
11 | from skeleton a
12 | left join recent b
13 | on a.product_id = b.product_id
14 |
--------------------------------------------------------------------------------
/LeetCode/others/1173_Immediate_Food_Delivery_I.sql:
--------------------------------------------------------------------------------
1 | select round(100 * sum(order_date = customer_pref_delivery_date) / count(*), 2) as immediate_percentage from Delivery;
2 |
--------------------------------------------------------------------------------
/LeetCode/others/1174_Immediate_Food_Delivery_II.sql:
--------------------------------------------------------------------------------
1 | with first as (
2 | select top 1 with ties delivery_id, customer_id, order_date, customer_pref_delivery_date
3 | from Delivery order by rank() over (partition by customer_id order by order_date)
4 | )
5 | select round(100 * sum(case when order_date = customer_pref_delivery_date then 1.0 else 0.0 end) / count(distinct customer_id), 2) as immediate_percentage from first;
6 |
--------------------------------------------------------------------------------
/LeetCode/others/1179_Reformat_Department_Table.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL
2 | select
3 | id,
4 | jan as jan_revenue,
5 | feb as feb_revenue,
6 | mar as mar_revenue,
7 | apr as apr_revenue,
8 | may as may_revenue,
9 | jun as jun_revenue,
10 | jul as jul_revenue,
11 | aug as aug_revenue,
12 | sep as sep_revenue,
13 | oct as oct_revenue,
14 | nov as nov_revenue,
15 | dec as dec_revenue
16 | from department
17 | pivot
18 | (
19 | max(revenue)
20 | for month in (jan, feb, mar, apr, may, jun, jul, aug, sep, oct, nov, dec)
21 | ) as t;
22 |
23 | -- MySQL
24 | select
25 | id
26 | ,max(case when month = 'Jan' then revenue else null end) as 'Jan_Revenue'
27 | ,max(case when month = 'Feb' then revenue else null end) as 'Feb_Revenue'
28 | ,max(case when month = 'Mar' then revenue else null end) as 'Mar_Revenue'
29 | ,max(case when month = 'Apr' then revenue else null end) as 'Apr_Revenue'
30 | ,max(case when month = 'May' then revenue else null end) as 'May_Revenue'
31 | ,max(case when month = 'Jun' then revenue else null end) as 'Jun_Revenue'
32 | ,max(case when month = 'Jul' then revenue else null end) as 'Jul_Revenue'
33 | ,max(case when month = 'Aug' then revenue else null end) as 'Aug_Revenue'
34 | ,max(case when month = 'Sep' then revenue else null end) as 'Sep_Revenue'
35 | ,max(case when month = 'Oct' then revenue else null end) as 'Oct_Revenue'
36 | ,max(case when month = 'Nov' then revenue else null end) as 'Nov_Revenue'
37 | ,max(case when month = 'Dec' then revenue else null end) as 'Dec_Revenue'
38 | from Department
39 | group by id
40 | order by id
41 |
42 |
--------------------------------------------------------------------------------
/LeetCode/others/175_Combine_Two_Tables.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | p.FirstName
3 | ,p.LastName
4 | ,a.City
5 | ,a.State
6 | FROM
7 | Person AS p
8 | LEFT JOIN
9 | Address AS a
10 | ON p.PersonId = a.PersonId;
--------------------------------------------------------------------------------
/LeetCode/others/176_Second_Highest_Salary.sql:
--------------------------------------------------------------------------------
1 | -- check if NULL (only row row in table)
2 | -- check if distinct is necessary (disallow duplicate)
3 | SELECT
4 | IFNULL(
5 | (SELECT
6 | DISTINCT Salary
7 | FROM Employee
8 | ORDER BY Salary DESC
9 | LIMIT 1 OFFSET 1)
10 | , NULL) AS SecondHighestSalary;
--------------------------------------------------------------------------------
/LeetCode/others/177_Nth_Highest_Salary.sql:
--------------------------------------------------------------------------------
1 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT
2 | BEGIN
3 | DECLARE M INT DEFAULT N - 1;
4 | RETURN (
5 | SELECT DISTINCT Salary FROM Employee ORDER BY Salary DESC LIMIT 1 OFFSET M
6 | );
7 | END
8 |
9 | SELECT * /*This is the outer query part */
10 | FROM Employee Emp1
11 | WHERE (N-1) = ( /* Subquery starts here */
12 | SELECT COUNT(DISTINCTEmp2.Salary)
13 | FROM Employee Emp2
14 | WHERE Emp2.Salary > Emp1.Salary)
--------------------------------------------------------------------------------
/LeetCode/others/178_Rank_Scores.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL
2 | SELECT
3 | Score
4 | ,DENSE_RANK() over (ORDER BY Score DESC) AS Rank
5 | FROM scores
6 | ORDER BY Score DESC;
7 |
8 | -- MYSQL
9 | SELECT Scores.Score, Rank
10 | FROM Scores
11 | Left join (Select Score, @Rank:=@Rank+1 as Rank
12 | From (Select Distinct Score From Scores Order by Score DESC) S1, (Select @Rank:=0) var) S2 on Scores.Score=S2.Score
13 | order by Scores.Score desc;
--------------------------------------------------------------------------------
/LeetCode/others/180_Consecutive_Numbers.sql:
--------------------------------------------------------------------------------
1 | -- analysis
2 | -- three consecutive rows
3 | -- join twice, each time offsetting id by 1
4 | -- filter by equating three columns
5 | -- NULL? use inner join
6 |
7 | SELECT
8 | DISTINCT n1.Num
9 | FROM Number n1
10 | JOIN Number n2
11 | ON n2.Id = n1.Id + 1
12 | JOIN Number n3
13 | ON n3.Id = n2.Id + 1
14 | WHERE n1.Num = n2.Num
15 | AND n2.Num = n3.Num;
16 |
17 | -- n1.1 matches to n2.2
18 | /*
19 | 1 1 1
20 | 1 1 2
21 | 1 2 1
22 | 2 1 2
23 | 1 2 2
24 | 2 2 -
25 | 2 -
26 | */
27 |
28 | -- shift in the other direction also works
29 | -- n1.2 matches to n2.1
30 | SELECT
31 | DISTINCT n1.Num
32 | FROM Number n1
33 | JOIN Number n2
34 | ON n2.Id = n1.Id - 1
35 | JOIN Number n3
36 | ON n3.Id = n2.Id - 1
37 | WHERE n1.Num = n2.Num
38 | AND n2.Num = n3.Num;
39 |
40 | -- can use lead or lag
41 | SELECT DISTINCT Num AS ConsecutiveNums
42 | FROM (
43 | SELECT
44 | Num
45 | ,LAG(Num, 1) OVER (ORDER BY Id) AS next
46 | ,LAG(Num, 2) OVER (ORDER BY Id) AS next_next
47 | FROM Logs
48 | ) AS three_day_log
49 | WHERE Num = next AND next = next_next;
50 |
51 | -- ERROR: Windowed functions can only appear in the SELECT or ORDER BY clauses.
52 | -- SELECT
53 | -- Num
54 | -- FROM Logs
55 | -- WHERE Num = LAG(Num, 1) OVER (ORDER BY Id)
56 | -- AND NUM = LAG(Num, 2) OVER (ORDER BY Id);
57 |
58 | /* LAG_id, LAG_num REORDER
59 | 1 N
60 | 2 1
61 | 3 2
62 | 4 3
63 | 5 4
64 | 6 5
65 | 7 6
66 | */
67 | SELECT
68 | Id
69 | ,LAG(Num, 1) OVER (ORDER BY Id) lag_1
70 | FROM Logs
71 | ORDER BY LAG(Num, 1) OVER (ORDER BY Id); -- NULL FIRST
--------------------------------------------------------------------------------
/LeetCode/others/181_Employees_Earning_More_Than_Their_Managers.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | e.Name AS Employee
3 | FROM Employee AS e
4 | JOIN
5 | Employee AS m
6 | WHERE e.ManagerId = m.Id
7 | AND e.Salary > m.Salary;
--------------------------------------------------------------------------------
/LeetCode/others/182_Duplicate_Emails.sql:
--------------------------------------------------------------------------------
1 | SELECT Email
2 | FROM Person
3 | GROUP BY Email
4 | HAVING COUNT(*) > 1;
--------------------------------------------------------------------------------
/LeetCode/others/183_Customers_Who_Never_Order.sql:
--------------------------------------------------------------------------------
1 | SELECT Name AS Customers
2 | FROM Customers
3 | WHERE
4 | Id NOT IN (SELECT DISTINCT CustomerId FROM Orders);
5 |
6 | SELECT
7 | c.Name AS Customers
8 | FROM Customers AS c
9 | LEFT JOIN Orders AS o
10 | ON c.Id = o.CustomerId
11 | WHERE o.Id IS NULL;
--------------------------------------------------------------------------------
/LeetCode/others/184_Department_Highest_Salary.sql:
--------------------------------------------------------------------------------
1 |
2 |
3 | /* -------------
4 | BEGINNER SOLUTION: CORRELATED (N+1)
5 | */
6 | -- succinct but correlated
7 | SELECT
8 | d.Name AS 'Department'
9 | ,e.Name AS 'Employee'
10 | ,e.Salary
11 | FROM Employee AS e
12 | JOIN Department AS d
13 | ON e.DepartmentId = d.Id
14 | WHERE
15 | (SELECT COUNT(DISTINCT e2.Salary)
16 | FROM
17 | Employee e2
18 | WHERE
19 | e2.Salary > e.Salary
20 | AND e.DepartmentId = e2.DepartmentId
21 | ) = 0;
22 |
23 | -- cleaner than above
24 | SELECT
25 | d.Name AS 'Department'
26 | ,e.Name AS 'Employee'
27 | ,e.Salary
28 | FROM Employee AS e
29 | JOIN Department AS d
30 | ON e.DepartmentId = d.Id
31 | WHERE NOT EXISTS
32 | (SELECT * FROM Employee e2
33 | WHERE e2.Salary > e.Salary
34 | AND e2.DepartmentId = e.DepartmentId);
35 |
36 | -- sample code from SQL Antipattern pdf book
37 | -- BugsProducts: product_id, bug_id
38 | -- Bugs: bug_id, date_reported
39 | SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id
40 | FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id)
41 | WHERE NOT EXISTS
42 | (SELECT * FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id)
43 | WHERE bp1.product_id = bp2.product_id
44 | AND b1.date_reported < b2.date_reported);
45 |
46 | /* -------------
47 | INTERMEDOATE SOLUTION: NO CORRELATED SUBQUERY
48 | */
49 | --
50 | -- use departmentId and Salary to uniquely identify employee
51 | -- assume no two employees in same dept shares same highest salary
52 | SELECT
53 | d.Name AS Department
54 | ,e.Name AS Employee
55 | ,a.max_salary AS Salary
56 | FROM
57 | Employee AS e
58 | JOIN
59 | Department AS d
60 | ON e.DepartmentId = d.Id
61 | JOIN
62 | (SELECT
63 | DepartmentId
64 | ,MAX(Salary) AS max_salary
65 | FROM Employee
66 | GROUP BY DepartmentId) AS a
67 | ON e.DepartmentId = a.DepartmentId
68 | WHERE
69 | e.Salary = a.max_salary
70 | AND e.DepartmentId = a.DepartmentId;
71 |
72 | -- use view to simplify code
73 | CREATE VIEW latest_bug AS
74 | SELECT -- in each group: product id is fixed, date_reported is aggreated, bug_id is ignored
75 | product_id
76 | ,MAX(date_reported) AS latest
77 | FROM Bugs b JOIN BugsProducts bp
78 | ON b.bug_id = bp.bug_id -- primary key, one-to-one
79 | GROUP BY product_id;
80 |
81 | -- now we know the latest day when bug for each product_id occurs
82 | -- use latest_day and product_id to retrieve bug_id
83 | -- more than one bug_id may be returned if bug occured more than once on latest day
84 | SELECT
85 | bp.product_id
86 | ,lb.latest
87 | ,bp.bug_id
88 | FROM BugsProducts bp JOIN latest_bug lb
89 | ON bp.product_id = lb.product_id; -- product_id is unique in tmp table, not unique in left table
90 | AND lb.date_reported = lb.latest; -- without this predicate, latest will be matched to all rows on left with same product id
91 |
92 | DROP VIEW;
93 |
94 | -- textbook version
95 | SELECT m.product_id, m.latest, b1.bug_id
96 | FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id)
97 | JOIN (SELECT bp2.product_id, MAX(b2.date_reported) AS latest
98 | FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id)
99 | GROUP BY bp2.product_id) m
100 | ON (bp1.product_id = m.product_id AND b1.date_reported = m.latest);
101 |
102 | /* -------------
103 | ULTIMATE SOLUTION: JOIN
104 | */
105 |
106 | CREATE VIEW tmp_join AS
107 | SELECT bp.product_id, b.bug_id, b.date_reported
108 | FROM BugsProducts bp JOIN Bugs b
109 | WHERE bp.bug_id = b.bug_id;
110 |
111 | -- left join with itself, matching every row to rows with same product_id and earlier date
112 | -- those without a match is what we want (the latest)
113 | SELECT
114 | t1.*
115 | FROM tmp_join t1
116 | LEFT JOIN tmp_join t2
117 | ON t1.product_id = t2.product_id
118 | AND t1.date_reported < t2.date_reported -- latest date in left will match to null on right
119 | WHERE t2.bug_id IS NULL;
120 |
121 | -- textbook version
122 | SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id
123 | FROM Bugs b1 JOIN BugsProducts bp1 ON (b1.bug_id = bp1.bug_id)
124 | LEFT OUTER JOIN (Bugs AS b2 JOIN BugsProducts AS bp2 ON (b2.bug_id = bp2.bug_id))
125 | ON (bp1.product_id = bp2.product_id AND (b1.date_reported < b2.date_reported
126 | OR b1.date_reported = b2.date_reported AND b1.bug_id < b2.bug_id))
127 | WHERE b2.bug_id IS NULL;
128 |
129 |
--------------------------------------------------------------------------------
/LeetCode/others/185_Department_Top_Three_Salaries.sql:
--------------------------------------------------------------------------------
1 | -- MS SQL
2 | WITH department_ranking AS
3 | (SELECT
4 | e.Name AS Employee
5 | ,d.Name AS Department
6 | ,e.Salary
7 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk
8 | FROM Employee AS e
9 | JOIN Department AS d
10 | ON e.DepartmentId = d.Id)
11 |
12 | SELECT
13 | Department
14 | ,Employee
15 | ,Salary
16 | FROM department_ranking
17 | WHERE rnk <= 3
18 | ORDER BY Department ASC, Salary DESC;
--------------------------------------------------------------------------------
/LeetCode/others/196_Delete_Duplicate_Emails.sql:
--------------------------------------------------------------------------------
1 | -- do not use sub-clause or IN clause
2 | -- it does not work when something is deleted
3 | DELETE p2.*
4 | FROM Person p1
5 | JOIN Person p2
6 | ON p1.Email = p2.Email
7 | WHERE p1.Id < p2.Id;
8 |
9 |
10 | # DELETE p1
11 | # FROM Person p1,
12 | # Person p2
13 | # WHERE
14 | # p1.Email = p2.Email
15 | # AND p1.Id > p2.Id;
--------------------------------------------------------------------------------
/LeetCode/others/197_Rising_Temperature.sql:
--------------------------------------------------------------------------------
1 | # 1 matches 2
2 | # Id matches to date
3 | SELECT
4 | w1.Id
5 | FROM Weather w1
6 | JOIN Weather w2
7 | ON w1.Id = w2.Id - 1
8 | WHERE w1.Temperature < w2.Temperature;
9 |
10 | # trap: ask for clarification!
11 | # whether 1 day maps to 1 record!
12 | # whether every day has record!
13 | # DATEDIFF(later date, earlier date)
14 |
15 | SELECT w2.Id
16 | FROM Weather w1
17 | JOIN Weather w2
18 | ON DATEDIFF(w2.RecordDate, w1.RecordDate) = 1
19 | AND w2.Temperature > w1.Temperature;
20 |
21 | # no function in ON clause, check query plan
22 | SELECT w2.Id
23 | FROM Weather w1, Weather w2
24 | WHERE DATEDIFF(w2.RecordDate, w1.RecordDate) = 1
25 | AND w2.Temperature > w1.Temperature;
--------------------------------------------------------------------------------
/LeetCode/others/262_Trips_and_Users.sql:
--------------------------------------------------------------------------------
1 | -- filter out banned users (dirver and clients)
2 | -- calculate rate
3 |
4 | SELECT
5 | t.Request_at AS Day
6 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
7 | FROM Trips t
8 | JOIN Users d ON t.Driver_Id = d.Users_Id
9 | JOIN Users c ON t.Client_Id = c.Users_Id
10 | WHERE d.Banned = "No"
11 | AND c.Banned = "No"
12 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
13 | GROUP BY t.Request_at
14 | ORDER BY t.Request_at;
15 |
16 | WITH valid_user AS (
17 | SELECT *
18 | FROM Users
19 | WHERE Banned = "No"
20 | )
21 | , valid_trips AS (
22 | SELECT *
23 | FROM Trips
24 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
25 | )
26 | SELECT
27 | t.Request_at AS Day
28 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
29 | FROM valid_trips t
30 | JOIN valid_user d ON t.Driver_Id = d.Users_Id
31 | JOIN valid_user c ON t.Client_Id = c.Users_Id
32 | GROUP BY t.Request_at
33 | ORDER BY t.Request_at;
34 |
35 | SELECT
36 | t.Request_at AS Day
37 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS "Cancellation Rate"
38 | FROM Trips t
39 | JOIN (SELECT * FROM Users WHERE Banned = "No") d ON t.Driver_Id = d.Users_Id
40 | JOIN (SELECT * FROM Users WHERE Banned = "No") c ON t.Client_Id = c.Users_Id
41 | WHERE t.Request_at BETWEEN "2013-10-01" AND "2013-10-03"
42 | GROUP BY t.Request_at
43 | ORDER BY t.Request_at;
44 |
45 | SELECT
46 | Request_at as Day,
47 | ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
48 | FROM Trips
49 | WHERE Request_at BETWEEN '2013-10-01' AND '2013-10-03'
50 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
51 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
52 | GROUP BY Request_at;
53 |
54 | SELECT
55 | Request_at AS Day
56 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate'
57 | FROM Trips
58 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03"
59 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
60 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No')
61 | GROUP BY Request_at;
--------------------------------------------------------------------------------
/LeetCode/others/569_Median_Employee_Salary.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | Id, Company, Salary
3 | FROM
4 | (SELECT
5 | e.Id,
6 | e.Salary,
7 | e.Company,
8 | IF(@prev = e.Company, @Rank:=@Rank + 1, @Rank:=1) AS rank,
9 | @prev:=e.Company
10 | FROM
11 | Employee e, (SELECT @Rank:=0, @prev:=0) AS temp
12 | ORDER BY e.Company , e.Salary , e.Id) Ranking
13 | INNER JOIN
14 | (SELECT
15 | COUNT(*) AS totalcount, Company AS name
16 | FROM
17 | Employee e2
18 | GROUP BY e2.Company) companycount ON companycount.name = Ranking.Company
19 | WHERE
20 | Rank = FLOOR((totalcount + 1) / 2)
21 | OR Rank = FLOOR((totalcount + 2) / 2)
22 | ;
23 |
24 | -- total count even: select two
25 | -- total count odd: select one
26 | WITH ranking AS (
27 | SELECT
28 | Id
29 | ,Company
30 | ,Salary
31 | ,COUNT(*) OVER (PARTITION BY Company) AS total_count
32 | ,RANK() OVER (PARTITION BY Company ORDER BY Salary DESC) AS rank
33 | FROM Employee)
34 | SELECT
35 | Id
36 | ,Company
37 | ,Salary
38 | FROM ranking
39 | WHERE rank = FLOOR((total_count + 1) / 2)
40 | OR rank = FLOOR((total_count + 2) / 2);
41 |
42 |
--------------------------------------------------------------------------------
/LeetCode/others/570_Managers_with_at_Least_5_Direct_Reports.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | m.Name
3 | FROM Employee AS e
4 | JOIN Employee AS m
5 | ON e.ManagerId = m.Id
6 | GROUP BY m.Name
7 | HAVING COUNT(*) >= 5
8 | ORDER BY m.Name;
--------------------------------------------------------------------------------
/LeetCode/others/571_Find_Median_Given_Frequency_of_Numbers.sql:
--------------------------------------------------------------------------------
1 |
2 | SELECT
3 | AVG(T1.Number) as median
4 | FROM
5 | (
6 | SELECT
7 | Number,
8 | @cum_freq AS lb,
9 | (@cum_freq:=@cum_freq + Frequency) AS ub
10 | FROM Numbers, (SELECT @cum_freq:=0) init
11 | ORDER BY Number
12 | ) AS T1,
13 | (
14 | SELECT
15 | SUM(Frequency) AS total_freq
16 | FROM Numbers
17 | ) AS T2
18 | WHERE T1.lb < CEIL(T2.total_freq/2)
19 | AND T1.ub >= CEIL(T2.total_freq/2)
20 |
21 | /* u l
22 | 1 NULL 1 0
23 | 2 1 3 1
24 | 1 2 4 3
25 | 4 1 8 4
26 |
27 | If N is even: select avg(N/2, N/2 + 1)
28 | If b is first occuring at frequency N/2 + 1, it has low bound N/2, upper bound doesn't matter
29 | If c is last occuring at N/2, it has upper bound N/2, lower bound doesn't matter
30 |
31 | If N is odd, select CEIL(N/2), lb < CEIL(N/2) <= ub
32 |
33 | Not that lb cannot include CEIL(N/2), because it means previous number's range includes median
34 |
35 | To share equal sign with even case, remove CEIL, lb <= N/2 <= CEIL(N/2), in odd case, lb <= N/2 < CEIL(N/2), lb can be as large as FLOOR(N/2)
36 | */
37 | WITH lag AS (
38 | SELECT
39 | Number
40 | ,Frequency AS cur
41 | ,LAG(Frequency, 1) OVER (ORDER BY Number) AS prev
42 | FROM Numbers
43 | )
44 | ,bound AS (
45 | SELECT
46 | Number
47 | ,Frequency
48 | ,SUM(prev) OVER (ORDER BY Number) AS lb
49 | ,SUM(cur) OVER (ORDER BY Number) AS up
50 | ,SUM(Frequency) OVER () AS total
51 | FROM lag
52 | )
53 | SELECT
54 | AVG(Number) AS median
55 | FROM bound
56 | WHERE lb < CEIL(total / 2)
57 | AND up >= CEIL(total / 2)
--------------------------------------------------------------------------------
/LeetCode/others/574_Winning_Candidate.sql:
--------------------------------------------------------------------------------
1 | -- There is a case when people vote for someone who is not in the candidate.
2 | -- The solution is finding out the winner id from vote and match to candidate.
3 | -- correct solution
4 | SELECT Name
5 | FROM Candidate
6 | WHERE id = (
7 | SELECT CandidateID
8 | FROM Vote
9 | GROUP BY CandidateID
10 | ORDER BY COUNT(*) DESC
11 | LIMIT 1);
12 |
13 | -- incorrect: return most voted candidate that exists in Candidate
14 | -- if most voted does not exist in Candidate, it is missed (orphaned rows)
15 | SELECT
16 | c.Name
17 | FROM Candidate AS c
18 | LEFT JOIN Vote AS v
19 | ON c.id = v.CandidateId
20 | GROUP BY c.Name
21 | ORDER BY COUNT(v.id) DESC
22 | LIMIT 1;
23 |
24 | -- incorrect: return null if most voted candidate does not exist in Candidate
25 | -- at least show something!
26 | SELECT
27 | c.Name
28 | FROM Candidate AS c
29 | RIGHT JOIN Vote AS v
30 | ON c.id = v.CandidateId
31 | GROUP BY c.Name
32 | ORDER BY COUNT(v.id) DESC
33 | LIMIT 1;
--------------------------------------------------------------------------------
/LeetCode/others/577_Employee_Bonus.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | e.name
3 | ,b.bonus
4 | FROM Employee AS e
5 | LEFT JOIN Bonus AS b
6 | ON e.empId = b.empId
7 | WHERE b.bonus < 1000
8 | OR b.bonus IS NULL
9 | ORDER BY e.name;
--------------------------------------------------------------------------------
/LeetCode/others/578_Get_Highest_Answer_Rate_Question.sql:
--------------------------------------------------------------------------------
1 | SELECT question_id AS survey_log
2 | FROM (
3 | SELECT
4 | question_id
5 | ,SUM(action="answer") / SUM(action="show") AS ans_rate
6 | FROM survey_log
7 | GROUP BY question_id
8 | ORDER BY ans_rate DESC
9 | LIMIT 1
10 | ) AS _;
--------------------------------------------------------------------------------
/LeetCode/others/579_Find_Cumulative_Salary_of_an_Employee.sql:
--------------------------------------------------------------------------------
1 | -- use self join three times (if month is not dense)
2 | -- name table e, e1, e2, e3; cummulatively sum e1, e2, e3
3 | SELECT
4 | e.Id
5 | ,e.Month
6 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
7 | FROM Employee e
8 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
9 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
10 | WHERE (e.Id, e.Month) NOT IN (
11 | SELECT
12 | Id
13 | ,MAX(Month) AS max_month
14 | FROM Employee
15 | GROUP BY Id)
16 | ORDER BY e.Id ASC, e.Month DESC;
17 |
18 | SELECT
19 | e.Id
20 | ,e.Month
21 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
22 | FROM
23 | (SELECT Id, MAX(Month) AS max_month
24 | FROM Employee
25 | GROUP BY Id) AS e_max
26 | -- if using left join, employee with only one record will see NULL returned after join
27 | JOIN Employee e ON e_max.Id = e.Id AND e_max.max_month > e.Month
28 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
29 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
30 | ORDER BY e.Id ASC, e.Month DESC;
31 |
32 | -- use where clause instead of inequality join
33 | SELECT
34 | e.Id
35 | ,e.Month
36 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
37 | FROM
38 | (SELECT Id, MAX(Month) AS max_month
39 | FROM Employee
40 | GROUP BY Id) AS e_max
41 | -- if using left join, employee with only one record will see NULL returned after join
42 | JOIN Employee e ON e_max.Id = e.Id
43 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
44 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
45 | WHERE e_max.max_month != e.Month
46 | ORDER BY e.Id ASC, e.Month DESC;
47 |
48 | -- use cross join, also correct
49 | SELECT
50 | e.Id
51 | ,e.Month
52 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
53 | FROM
54 | (SELECT Id, MAX(Month) AS max_month
55 | FROM Employee
56 | GROUP BY Id) AS e_max,
57 | Employee e
58 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
59 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
60 | WHERE e.Id = e_max.Id AND e.Month != e_max.max_month
61 | ORDER BY e.Id ASC, e.Month DESC;
62 |
63 | -- weird condition, non-standard SQL
64 | SELECT
65 | e.Id
66 | ,e.Month
67 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary
68 | FROM
69 | Employee e
70 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1
71 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2
72 | WHERE (e.Id, e.Month) != (SELECT Id, MAX(Month) FROM Employee WHERE Id = e.Id) AS x
73 | ORDER BY e.Id ASC, e.Month DESC;
74 |
75 | -- best solution (rely on consecutive month)
76 | SELECT
77 | e1.Id
78 | ,MAX(e2.Month) AS Month
79 | ,SUM(e2.Salary) AS Salary
80 | FROM Employee e1
81 | JOIN Employee e2
82 | ON e1.Id = e2.Id
83 | AND e1.Month - e2.Month BETWEEN 1 AND 3
84 | GROUP BY e1.Id, e1.Month
85 | ORDER BY e1.Id ASC, e1.Month DESC
--------------------------------------------------------------------------------
/LeetCode/others/580_Count_Student_Number_in_Departments.sql:
--------------------------------------------------------------------------------
1 | /* warning
2 |
3 | NO need to handle null if tables are joined before aggregating.
4 | NULL columns are simply not counted (return 0 if departmend has no student)
5 |
6 | If studnets are counted before joining, only existing department
7 | with at least 1 student are aggregated over with count.
8 |
9 | Left joining full dept tables (including empty dept) will result in NULL */
10 |
11 | SELECT
12 | d.dept_name
13 | ,COUNT(s.student_name) AS student_number
14 | FROM department AS d
15 | LEFT JOIN student AS s
16 | ON d.dept_id = s.dept_id
17 | GROUP BY dept_name
18 | ORDER BY student_number DESC, dept_name;
19 |
20 | SELECT
21 | d.dept_name
22 | ,IFNULL(s.student_number, 0) AS student_number
23 | FROM department AS d
24 | LEFT JOIN (
25 | SELECT
26 | dept_id
27 | ,COUNT(student_name) AS student_number
28 | FROM student
29 | GROUP BY dept_id
30 | ) AS s
31 | ON d.dept_id = s.dept_id
32 | ORDER BY student_number DESC, dept_name ASC;
--------------------------------------------------------------------------------
/LeetCode/others/584_Find_Customer_Referee.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | name
3 | FROM customer
4 | WHERE referee_id != 2
5 | OR referee_id IS NULL;
--------------------------------------------------------------------------------
/LeetCode/others/585_Investments_in_2016.sql:
--------------------------------------------------------------------------------
1 |
2 | SELECT
3 | ROUND(SUM(TIV_2016), 2) AS TIV_2016
4 | FROM insurance
5 | WHERE TIV_2015 IN
6 | (SELECT TIV_2015
7 | FROM insurance
8 | GROUP BY TIV_2015
9 | HAVING COUNT(*) > 1)
10 | AND (LAT, LON) NOT IN
11 | (SELECT lat, lon
12 | FROM insurance
13 | GROUP BY lat, lon
14 | HAVING COUNT(*) > 1);
15 |
16 | -- use IN is faster than NOT IN
17 | select
18 | round(sum(tiv_2016),2) as tiv_2016
19 | from
20 | insurance
21 | where
22 | tiv_2015 in (select tiv_2015 from insurance group by 1 having count(*) > 2)
23 | and (lat, lon) in (select lat, lon from insurance group by 1, 2 having count(*) = 1)
24 |
25 |
26 | SELECT ROUND(SUM(i1.TIV_2016), 2) AS TIV_2016
27 | FROM insurance i1
28 | WHERE EXISTS (
29 | SELECT *
30 | FROM insurance i2
31 | WHERE i1.PID != i2.PID
32 | AND i1.TIV_2015 = i2.TIV_2015)
33 | AND NOT EXISTS(
34 | SELECT *
35 | FROM insurance i3
36 | WHERE i1.PID != i3.PID
37 | AND i1.LAT = i3.LAT
38 | AND i1.LON = i3.LON);
39 |
--------------------------------------------------------------------------------
/LeetCode/others/586_Customer_Placing_the_Largest_Number_of_Orders.sql:
--------------------------------------------------------------------------------
1 | SELECT customer_number
2 | FROM orders
3 | GROUP BY customer_number
4 | ORDER BY COUNT(*) DESC
5 | LIMIT 1;
--------------------------------------------------------------------------------
/LeetCode/others/595_Big_Countries.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | name
3 | ,population
4 | ,area
5 | FROM World
6 | WHERE
7 | population > 25000000
8 | OR area > 3000000;
9 |
10 | -- UNION method, for reference
11 | SELECT
12 | name
13 | ,population
14 | ,area
15 | FROM World
16 | WHERE
17 | population > 25000000
18 | UNION
19 | SELECT
20 | name
21 | ,population
22 | ,area
23 | FROM World
24 | WHERE
25 | area > 3000000;
--------------------------------------------------------------------------------
/LeetCode/others/596_Classes_More_Than_5_Students.sql:
--------------------------------------------------------------------------------
1 | # ask if there is duplicate student in class
2 | -- if so, use DISTINCT
3 | SELECT
4 | class
5 | FROM courses
6 | GROUP BY class
7 | HAVING COUNT(DISTINCT student) >= 5;
--------------------------------------------------------------------------------
/LeetCode/others/597_Friend_Requests_I-_Overall_Acceptance_Rate.sql:
--------------------------------------------------------------------------------
1 | # Write your MySQL query statement below
2 |
3 | /*
4 | how to count friends for user 1
5 |
6 | requester accepter
7 | 1 2
8 | 1 3
9 | 4 1
10 | 5 1
11 |
12 | left join does not work, must use union
13 | count how many times 1 appears in requester col, then accepter col */
14 |
15 | SELECT
16 | requester_id AS id
17 | ,SUM(tally) AS num
18 | FROM
19 | (SELECT
20 | requester_id
21 | ,COUNT(*) AS tally
22 | FROM request_accepted
23 | GROUP BY requester_id
24 | UNION ALL
25 | SELECT
26 | accepter_id
27 | ,COUNT(*)
28 | FROM request_accepted
29 | GROUP BY accepter_id) a -- alias!
30 | GROUP BY id
31 | ORDER BY num DESC -- sort order!
32 | LIMIT 1;
33 |
34 | -- IFNULL(value if not null, value if null)
35 | -- to get a scalar value, envelope subquery with bracket
36 | -- to select from subquery, envelope with bracket
37 | -- Every derived table must have its own alias: AS acc. AS req
38 | SELECT
39 | ROUND(
40 | IFNULL(
41 | (SELECT COUNT(*) FROM (SELECT DISTINCT requester_id, accepter_id FROM request_accepted) AS acc)
42 | /
43 | (SELECT COUNT(*) FROM (SELECT DISTINCT sender_id, send_to_id FROM friend_request) AS req)
44 | ,0)
45 | ,2) AS accept_rate;
46 |
47 | -- use cross join
48 | SELECT ROUND(
49 | IFNULL(
50 | COUNT(DISTINCT requester_id, accepter_id)/
51 | COUNT(DISTINCT sender_id, send_to_id)
52 | , 0)
53 | , 2) AS accept_rate
54 | FROM request_accepted, friend_request
55 |
56 | /*
57 | 1. if A sends two requests to B, B request the second, does the first request count?
58 | 2. duplicate requests are coutned once
59 | */
60 |
61 | SELECT
62 | ROUND(
63 | IFNULL(
64 | (SELECT COUNT(DISTINCT requester_id, accepter_id) FROM request_accepted)
65 | /(SELECT COUNT(DISTINCT sender_id, send_to_id) FROM friend_request)
66 | , 0)
67 | ,2
68 | ) AS accept_rate;
69 |
70 |
71 | SELECT
72 | ROUND(
73 | IFNULL(a.accept_tally / r.request_tally, 0)
74 | ,2
75 | ) AS accept_rate
76 | FROM
77 | (SELECT COUNT(DISTINCT requester_id, accepter_id) AS accept_tally FROM request_accepted) AS a,
78 | (SELECT COUNT(DISTINCT sender_id, send_to_id) AS request_tally FROM friend_request) AS r;
79 |
80 |
81 | SELECT
82 | ROUND(
83 | IF(r.request_tally = 0, 0, a.accept_tally / r.request_tally)
84 | ,2
85 | ) AS accept_rate
86 | FROM
87 | (SELECT COUNT(DISTINCT requester_id, accepter_id) AS accept_tally
88 | FROM request_accepted) AS a,
89 | (SELECT COUNT(DISTINCT sender_id, send_to_id) AS request_tally
90 | FROM friend_request) AS r;
--------------------------------------------------------------------------------
/LeetCode/others/601_Human_Traffic_of_Stadium.sql:
--------------------------------------------------------------------------------
1 | -- be careful with duplicate
2 | SELECT DISTINCT
3 | s1.*
4 | FROM
5 | stadium AS s1
6 | ,stadium AS s2
7 | ,stadium AS s3
8 | WHERE s1.people >= 100
9 | AND s2.people >= 100
10 | AND s3.people >= 100
11 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window
12 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window
13 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window
14 | ORDER BY s1.id;
15 |
16 | -- MySQL: pre-filtering
17 | SELECT DISTINCT
18 | s1.*
19 | FROM
20 | (SELECT * FROM stadium WHERE people >= 100) AS s1
21 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2
22 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3
23 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
24 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
25 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
26 | ORDER BY s1.id;
27 |
28 | -- MS SQL: cleaner code
29 | WITH good_day AS (
30 | SELECT * FROM stadium WHERE people >= 100
31 | )
32 | SELECT DISTINCT s1.* FROM
33 | good_day AS s1,
34 | good_day AS s2,
35 | good_day AS s3
36 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2)
37 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1)
38 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)
39 | ORDER BY s1.id;
40 |
41 |
--------------------------------------------------------------------------------
/LeetCode/others/602_Friend_Requests_II-_Who_Has_the_Most_Friends.sql:
--------------------------------------------------------------------------------
1 | /*
2 | how to count friends for user 1
3 |
4 | requester accepter
5 | 1 2
6 | 1 3
7 | 4 1
8 | 5 1
9 |
10 | left join does not work, must use union
11 | count how many times 1 appears in requester col, then accepter col
12 |
13 | friendship is mutual
14 |
15 | cannot have 1, 2 in on row, then 2, 1 in a nother row */
16 |
17 | SELECT
18 | requester_id AS id
19 | ,SUM(tally) AS num
20 | FROM
21 | (SELECT
22 | requester_id
23 | ,COUNT(*) AS tally
24 | FROM request_accepted
25 | GROUP BY requester_id
26 | UNION ALL
27 | SELECT
28 | accepter_id
29 | ,COUNT(*)
30 | FROM request_accepted
31 | GROUP BY accepter_id) a -- alias!
32 | GROUP BY id
33 | ORDER BY num DESC -- sort order!
34 | LIMIT 1;
35 |
36 | # Write your MySQL query statement below
37 |
38 | SELECT id, COUNT(*) AS num FROM (
39 | SELECT accepter_id AS id FROM request_accepted
40 | UNION ALL
41 | SELECT requester_id AS id FROM request_accepted
42 | ) AS c
43 | GROUP BY 1
44 | ORDER BY num DESC
45 | LIMIT 1;
46 |
--------------------------------------------------------------------------------
/LeetCode/others/603_Consecutive_Available_Seats.sql:
--------------------------------------------------------------------------------
1 | /*
2 | shift down
3 | 1 N
4 | 2 1
5 | 3 2
6 |
7 | shift up
8 | 1 2
9 | 2 3
10 | 3 N
11 |
12 | join
13 | 1 N 2
14 | 2 1 3
15 | 3 2 N
16 |
17 | */
18 |
19 | SELECT
20 | m.seat_id
21 | FROM cinema AS m
22 | LEFT JOIN cinema AS d ON m.seat_id = d.seat_id + 1
23 | LEFT JOIN cinema AS u ON m.seat_id = u.seat_id - 1
24 | WHERE m.free = 1
25 | AND (d.free = 1 OR u.free = 1)
26 | ORDER BY seat_id;
27 |
28 | -- if three consecutive seats are free
29 | -- middle seat will be returned twice
30 | SELECT DISTINCT a.seat_id AS seat_id
31 | FROM cinema a
32 | JOIN cinema b
33 | ON a.free = 1
34 | AND b.free = 1
35 | AND abs(a.seat_id - b.seat_id) = 1
36 | ORDER BY seat_id;
--------------------------------------------------------------------------------
/LeetCode/others/607_Sales_Person.sql:
--------------------------------------------------------------------------------
1 | -- join version
2 | /* for non-null values in grouped column
3 | null in matched columns are simgply ignored by
4 | COUNT, SUM, AVG, if there exists any non-null value.
5 | (exceot when using COUNT(*))
6 |
7 | If there exists no non-null value in matched row
8 | Aggregate output NULL. COUNT returns 0.
9 | */
10 | SELECT
11 | s.name
12 | FROM salesperson AS s
13 | LEFT JOIN orders AS o ON o.sales_id = s.sales_id
14 | LEFT JOIN company AS c ON o.com_id = c.com_id
15 | GROUP BY s.name
16 | HAVING SUM(c.name = "RED") = 0
17 | OR SUM(c.name = "RED") IS NULL
18 | ORDER BY name;
19 |
20 | -- subquery
21 | SELECT
22 | s.name
23 | FROM salesperson s
24 | WHERE sales_id NOT IN (
25 | SELECT sales_id
26 | FROM orders o
27 | LEFT JOIN company c
28 | ON o.com_id = c.com_id
29 | WHERE c.name = 'RED'
30 | );
--------------------------------------------------------------------------------
/LeetCode/others/608_Tree_Node.sql:
--------------------------------------------------------------------------------
1 | -- do a small example, and observe pattern
2 | SELECT
3 | t1.Id,
4 | CASE
5 | WHEN COUNT(t1.p_id) = 0 THEN "Root"
6 | WHEN COUNT(t2.id) = 0 THEN "Leaf"
7 | ELSE "Inner"
8 | END AS Type
9 | FROM tree t1
10 | LEFT JOIN tree t2
11 | ON t1.Id = t2.p_id
12 | GROUP BY t1.Id
13 | ORDER BY t1.Id;
--------------------------------------------------------------------------------
/LeetCode/others/610_Triangle_Judgement.sql:
--------------------------------------------------------------------------------
1 | # Write your MySQL query statement below
2 | # let a > b > c
3 | # 1. c + b > a
4 | # 2. a - c < b (equivalent as above)
5 |
6 | SELECT x, y, z,
7 | CASE
8 | WHEN x + y < z OR x + z < y OR y + z < x THEN 'No'
9 | ELSE 'Yes'
10 | END
11 | AS 'triangle'
12 | FROM triangle t;
--------------------------------------------------------------------------------
/LeetCode/others/612_Shortest_Distance_in_a_Plane.sql:
--------------------------------------------------------------------------------
1 | SELECT ROUND(MIN(SQRT(POWER(p1.x - p2.x, 2) + POWER(p1.y - p2.y, 2))), 2) AS shortest
2 | FROM point_2d p1, point_2d p2
3 | WHERE p1.x != p2.x
4 | OR p1.y != p2.y;
5 |
6 | SELECT
7 | MIN(ROUND(
8 | SQRT(POW(p1.x - p2.x, 2) + POW(p1.y - p2.y, 2)), 2)) AS shortest
9 | FROM point_2d p1, point_2d p2
10 | WHERE NOT (p1.x = p2.x AND p1.y = p2.y);
--------------------------------------------------------------------------------
/LeetCode/others/613_Shortest_Distance_in_a_Line.sql:
--------------------------------------------------------------------------------
1 | # Write your MySQL query statement below
2 | SELECT MIN(ABS(p1.x - p2.x)) AS shortest
3 | FROM point p1, point p2
4 | WHERE p1.x != p2.x;
--------------------------------------------------------------------------------
/LeetCode/others/614_Second_Degree_Follower.sql:
--------------------------------------------------------------------------------
1 | /*
2 | A B C
3 | A B D
4 | B C N
5 | B D E
6 | D E N
7 | */
8 |
9 | -- ask about DISTINCT!!!!
10 | -- not that we are grouping by first degree follower, not followeee
11 | SELECT
12 | f1.follower
13 | ,COUNT(DISTINCT f2.follower) AS num
14 | FROM
15 | follow f1
16 | JOIN
17 | follow f2
18 | ON f1.follower = f2.followee
19 | GROUP BY f1.follower
20 | ORDER BY f1.follower;
--------------------------------------------------------------------------------
/LeetCode/others/615_Average_Salary_Departments_VS_Company.sql:
--------------------------------------------------------------------------------
1 | WITH full_table AS
2 | (SELECT
3 | s.*
4 | ,e.department_id
5 | FROM salary AS s
6 | JOIN employee AS e
7 | ON s.employee_id = e.employee_id)
8 |
9 | ,department_monthly AS
10 | (SELECT
11 | department_id
12 | ,pay_date
13 | ,AVG(amount) AS dept_avg
14 | FROM full_table
15 | GROUP BY department_id, pay_date)
16 |
17 | ,company_monthly AS
18 | (SELECT
19 | pay_date
20 | ,AVG(amount) AS company_avg
21 | FROM full_table
22 | GROUP BY pay_date)
23 |
24 | SELECT
25 | DISTINCT
26 | DATE_FORMAT(d.pay_date, '%Y-%m') AS pay_month
27 | ,d.departmend_id
28 | ,CASE
29 | WHEN d.dept_avg < c.company_avg THEN "lower"
30 | WHEN d.dept_avg = c.company_avg THEN "same"
31 | WHEN d.dept_avg > c.company_avg THEN "higher"
32 | END AS comparison
33 | FROM department_monthly d
34 | JOIN company_monthly c
35 | ON d.pay_date = c.pay_date
36 | ORDER BY d.pay_date DESC, d.department_id ASC;
37 |
38 | -- complication: employees can have different pay-dates
39 | SELECT
40 | a.pay_month
41 | ,a.department_id,
42 | CASE
43 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower"
44 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same"
45 | ELSE "higher"
46 | END AS comparison
47 | FROM
48 | (SELECT
49 | date_format(pay_date, '%Y-%m') as pay_month
50 | ,e.department_id
51 | ,AVG(amount) AS dept_avg_sal
52 | FROM salary AS s
53 | JOIN employee AS e
54 | ON s.employee_id = e.employee_id
55 | GROUP BY pay_month, e.department_id
56 | ) AS a
57 | JOIN
58 | (SELECT
59 | date_format(pay_date, '%Y-%m') as pay_month
60 | ,AVG(amount) AS corp_avg_sal
61 | FROM salary GROUP BY pay_month
62 | ) AS b
63 | ON a.pay_month = b.pay_month
64 | ORDER BY pay_month, a.department_id;
--------------------------------------------------------------------------------
/LeetCode/others/618_Students_Report_By_Geography.sql:
--------------------------------------------------------------------------------
1 | -- knowing america is largest,
2 | -- all other col's id is subset of america's
3 | -- outer join with america is good enough
4 | -- (one left join, one out join)
5 | SELECT
6 | America, Asia, Europe
7 | FROM
8 | (SELECT @as:=0, @am:=0, @eu:=0) t,
9 | (SELECT @as:=@as + 1 AS asid, name AS Asia
10 | FROM student
11 | WHERE continent = 'Asia'
12 | ORDER BY Asia) AS t1
13 | RIGHT JOIN
14 | (SELECT @am:=@am + 1 AS amid, name AS America
15 | FROM student
16 | WHERE continent = 'America'
17 | ORDER BY America) AS t2
18 | ON asid = amid
19 | LEFT JOIN
20 | (SELECT @eu:=@eu + 1 AS euid, name AS Europe
21 | FROM student
22 | WHERE continent = 'Europe'
23 | ORDER BY Europe) AS t3
24 | ON amid = euid;
25 |
26 | SELECT
27 | am.name AS America
28 | ,asia.name AS Asia
29 | ,eu.name AS Europe
30 | FROM
31 | (SELECT
32 | name
33 | ,ROW_NUMBER() OVER (ORDER BY name) AS id
34 | FROM student
35 | WHERE continent = 'America') AS am
36 | LEFT JOIN
37 | (SELECT
38 | name
39 | ,ROW_NUMBER() OVER (ORDER BY name) AS id
40 | FROM student
41 | WHERE continent = 'Asia') AS asia
42 | ON am.id = asia.id
43 | LEFT JOIN
44 | (SELECT
45 | name
46 | ,ROW_NUMBER() OVER (ORDER BY name) AS id
47 | FROM student
48 | WHERE continent = 'Europe') AS eu
49 | ON asia.id = eu.id
50 | OR am.id = eu.id -- IMPORTANT! columns may not be arranged in descending order
51 | ORDER BY America;
52 |
53 | SELECT
54 | america.name AS America
55 | ,asia.name as Asia
56 | ,europe.name as Europe
57 | FROM
58 | (SELECT @a := 0, @b := 0, @c := 0) AS vars
59 | ,(SELECT @a := @a + 1 AS ID, name FROM student WHERE continent = "America" ORDER BY name) AS america
60 | LEFT JOIN
61 | (SELECT @b := @b + 1 AS ID, name FROM student WHERE continent = "Asia" ORDER BY name) AS asia
62 | ON america.ID = asia.ID
63 | LEFT JOIN
64 | (SELECT @c := @c + 1 AS ID, name FROM student WHERE continent = "Europe" ORDER BY name) AS europe
65 | ON asia.ID = europe.ID
66 | OR america.ID = europe.ID;
67 |
--------------------------------------------------------------------------------
/LeetCode/others/619_Biggest_Single_Number.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | MAX(num) AS num
3 | FROM (
4 | SELECT
5 | num
6 | FROM number
7 | GROUP BY num
8 | HAVING COUNT(*) = 1) AS _;
--------------------------------------------------------------------------------
/LeetCode/others/620_Not_Boring_Movies.sql:
--------------------------------------------------------------------------------
1 | SELECT *
2 | FROM cinema
3 | WHERE description NOT LIKE "%boring%"
4 | AND id % 2 = 1
5 | ORDER BY rating DESC;
--------------------------------------------------------------------------------
/LeetCode/others/626_Exchange_Seats.sql:
--------------------------------------------------------------------------------
1 | SELECT
2 | id
3 | ,CASE
4 | -- for odd id, match to next row (may be null)
5 | WHEN id % 2 = 1 THEN (
6 | -- put scalar value in bracket!
7 | IFNULL(SELECT student FROM seat s2 WHERE s1.id = s2.id - 1, s1.student)
8 | )
9 | -- for even id, match to previous row (not null)
10 | ELSE (SELECT student FROM seat s3 WHERE s1.id = s3.id + 1)
11 | END AS student
12 | FROM seat s1;
13 |
14 | /*
15 | 1 2 abbot
16 | 2 1 dories
17 | 3 4 doris
18 | 4 3 green
19 | 5 6 james (missing 5!, need to fix ids)
20 | */
21 | select
22 | case
23 | when s.id is null then stg.id-1
24 | else stg.id
25 | end id
26 | ,stg.student
27 | from
28 | (
29 | select id+1 id,student
30 | from seat
31 | where id%2=1
32 | union all
33 | select id-1 id,student
34 | from seat
35 | where id%2=0
36 | order by id) stg
37 | left join seat s
38 | using (id);
--------------------------------------------------------------------------------
/LeetCode/others/627_Swap_Salary.sql:
--------------------------------------------------------------------------------
1 | UPDATE salary
2 | SET sex = CASE WHEN sex = "m" THEN "f"
3 | WHEN sex = "f" THEN "m"
4 | ELSE NULL
5 | END;
6 |
7 | UPDATE salary
8 | SET sex = CASE WHEN sex = "m" THEN "f"
9 | WHEN sex = "f" THEN "m"
10 | END
11 | WHERE sex IS NOT NULL;
--------------------------------------------------------------------------------
/assets/sql_order.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/assets/sql_order.png
--------------------------------------------------------------------------------
/databases/world_db/README.txt:
--------------------------------------------------------------------------------
1 | This directory contains the file that can be used to set up the world_x
2 | database that is used in the guides of the MySQL Reference
3 | Manual:
4 |
5 | Quick-Start Guide: MySQL Shell for JavaScript
6 | Quick-Start Guide: MySQL Shell for Python
7 |
8 | These instructions assume that your current working directory is
9 | the directory that contains the files created by unpacking the
10 | world_x.zip or world_x.tar.gz distribution.
11 |
12 | You must install MySQL Shell and MySQL Server 5.7.12 or higher
13 | with the X Plugin enabled. Start the server before you load the
14 | world_x database.
15 |
16 | Note: Releases issued prior to September 2016 used table
17 | names in mixed cases. Now table names are all
18 | lowercase. This is because MySQL Shell is case-sensitive.
19 |
20 | Extract the installation archive to a temporary location such as /tmp/.
21 | Unpacking the archive results in a single file named world_x.sql.
22 |
23 | Create or recreate the schema with one of the following commands:
24 |
25 | Either use MySQL Shell:
26 |
27 | shell> mysqlsh -u root --sql --recreate-schema world_x < /tmp/world_x-db/world_x.sql
28 |
29 | Or the standard MySQL command-line client:
30 |
31 | Connect to MySQL:
32 | shell> mysql -u root -p
33 | Load the file:
34 | mysql> SOURCE /tmp/world_x-db/world_x.sql;
35 |
36 | Enter your password when prompted. A non-root account can be used as long as
37 | the account has privileges to create new databases.
38 |
39 | Replace /tmp/ with the path to the world_x.sql file on your system.
40 |
--------------------------------------------------------------------------------
/databases/world_db/world.sql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/databases/world_db/world.sql
--------------------------------------------------------------------------------