├── .gitignore ├── Anti_Patterns ├── 01_Ambiguous_Group │ └── README.md ├── 02_Bad_Subquery │ └── README.md └── 03_Fail_to_Use_Index │ └── README.md ├── Hacks ├── 01_Random_Sampling │ ├── README.md │ └── solution.sql ├── 02_NULL_pathology │ ├── README.md │ └── db.sql ├── 03_Full_Join │ ├── README.md │ └── db.sql ├── 04_Dynamic_Query │ ├── README.md │ └── dynamic.ipynb ├── 05_Stored_Precesure │ ├── README.md │ └── stored_procedure.sql ├── 06_Hacking_Aggregation │ ├── README.md │ └── solution.sql ├── 07_Multi_Column_Partition │ ├── README.md │ ├── db.sql │ └── use_case.sql └── 08_pandasql │ ├── customers.csv │ └── pandasql.ipynb ├── Interview ├── 01_Facebook_Advertiser_Status │ ├── README.md │ ├── db.sql │ ├── fig │ │ └── transition.png │ └── solution.sql ├── 02_Spotify_Listening_History │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 03_Monthly_Active_User │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 04_Page_Recommendation │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 05_Pivoting_Numeric_Data │ ├── Expenses.sql │ ├── README.md │ ├── db.sql │ └── pivot.sql ├── 06_Pivoting_Text_Data │ ├── README.md │ ├── db.sql │ └── pivot.sql ├── 07_Unpivoting_Tables │ ├── README.md │ ├── pivot_table.sql │ └── un_pivot_table.sql ├── 08_Group_by_Bins │ ├── README.md │ └── solution.sql ├── 09_Consecutive_Active_Users │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 10_Spotify_Recommend_Friend │ ├── README.md │ ├── db.sql │ ├── fig │ │ └── symmetry.png │ └── solution.sql ├── 11_Spotify_Similar_Friends │ ├── README.md │ ├── cartesian.txt │ └── solution.sql ├── 12_Invalid_Search │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 13_Text_Confirmation │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 14_Facebook_Common_Friend │ ├── README.md │ ├── db.sql │ └── solution.sql ├── 15_Facebook_People_You_May_Know │ ├── README.md │ └── solution.sql └── 16_Instagram_Common_Follower │ ├── README.md │ ├── db.sql │ └── solution.sql ├── LICENSE ├── LeetCode ├── 185_Department_Top_Three_Salaries │ ├── README.md │ ├── db.sql │ ├── mssql_pre_filter.sql │ ├── mssql_window.sql │ └── mysql_correlated_subquery.sql ├── 262_Trips_and_Users │ ├── README.md │ ├── db.sql │ ├── mysql_pre_filter.sql │ ├── mysql_set.sql │ └── mysql_simple.sql ├── 579_Find_Cumulative_Salary_of_an_Employee │ ├── README.md │ ├── db.sql │ ├── mssql_lag.sql │ ├── mysql_join_tmp_table.sql │ ├── mysql_set.sql │ └── mysql_single_join.sql ├── 601_Human_Traffic_of_Stadium │ ├── README.md │ ├── db.sql │ ├── mssql_pre_filter.sql │ ├── mssql_window.sql │ └── mysql_simple.sql ├── 615_Average_Salary │ ├── README.md │ ├── db.sql │ ├── mysql.sql │ └── mysql_session_vars.sql ├── 618_Students_Report_by_Geography │ ├── README.md │ ├── db.sql │ ├── mssql_full_join.sql │ ├── mysql_session_vars.sql │ └── self-join-rownums.sql └── others │ ├── 1045_Customers_Who_Bought_All_Products.sql │ ├── 1050_Actors_and_Directors_Who_Cooperated_At_Least_Three_Times.sql │ ├── 1068_Product_Sales_Analysis_I.sql │ ├── 1069_Product_Sales_Analysis_II.sql │ ├── 1070_Product_Sales_Analysis_III.sql │ ├── 1075_Project_Employees_I.sql │ ├── 1076_Project_Employees_II.sql │ ├── 1077_Project_Employees_III.sql │ ├── 1082_Sales_Analysis_I.sql │ ├── 1083_Sales_Analysis_II.sql │ ├── 1084_Sales_Analysis_III.sql │ ├── 1097_Game_Play_Analysis_V.sql │ ├── 1098_Unpopular_Books.sql │ ├── 1107_New_Users_Daily_Count.sql │ ├── 1112_Highest_Grade_For_Each_Student.sql │ ├── 1113_Reported_Posts.sql │ ├── 1126_Active_Businesses.sql │ ├── 1127_User_Purchase_Platform.sql │ ├── 1132_Reported_Posts_II.sql │ ├── 1141_User_Activity_for_the_Past_30_Days_I.sql │ ├── 1142_User_Activity_for_the_Past_30_Days_II.sql │ ├── 1148_Article_Views_I.sql │ ├── 1149_Article_Views_II.sql │ ├── 1158_Market_Analysis_I.sql │ ├── 1159_Market_Analysis_II.sql │ ├── 1164_Product_Price_at_a_Given_Date.sql │ ├── 1173_Immediate_Food_Delivery_I.sql │ ├── 1174_Immediate_Food_Delivery_II.sql │ ├── 1179_Reformat_Department_Table.sql │ ├── 175_Combine_Two_Tables.sql │ ├── 176_Second_Highest_Salary.sql │ ├── 177_Nth_Highest_Salary.sql │ ├── 178_Rank_Scores.sql │ ├── 180_Consecutive_Numbers.sql │ ├── 181_Employees_Earning_More_Than_Their_Managers.sql │ ├── 182_Duplicate_Emails.sql │ ├── 183_Customers_Who_Never_Order.sql │ ├── 184_Department_Highest_Salary.sql │ ├── 185_Department_Top_Three_Salaries.sql │ ├── 196_Delete_Duplicate_Emails.sql │ ├── 197_Rising_Temperature.sql │ ├── 262_Trips_and_Users.sql │ ├── 569_Median_Employee_Salary.sql │ ├── 570_Managers_with_at_Least_5_Direct_Reports.sql │ ├── 571_Find_Median_Given_Frequency_of_Numbers.sql │ ├── 574_Winning_Candidate.sql │ ├── 577_Employee_Bonus.sql │ ├── 578_Get_Highest_Answer_Rate_Question.sql │ ├── 579_Find_Cumulative_Salary_of_an_Employee.sql │ ├── 580_Count_Student_Number_in_Departments.sql │ ├── 584_Find_Customer_Referee.sql │ ├── 585_Investments_in_2016.sql │ ├── 586_Customer_Placing_the_Largest_Number_of_Orders.sql │ ├── 595_Big_Countries.sql │ ├── 596_Classes_More_Than_5_Students.sql │ ├── 597_Friend_Requests_I-_Overall_Acceptance_Rate.sql │ ├── 601_Human_Traffic_of_Stadium.sql │ ├── 602_Friend_Requests_II-_Who_Has_the_Most_Friends.sql │ ├── 603_Consecutive_Available_Seats.sql │ ├── 607_Sales_Person.sql │ ├── 608_Tree_Node.sql │ ├── 610_Triangle_Judgement.sql │ ├── 612_Shortest_Distance_in_a_Plane.sql │ ├── 613_Shortest_Distance_in_a_Line.sql │ ├── 614_Second_Degree_Follower.sql │ ├── 615_Average_Salary_Departments_VS_Company.sql │ ├── 618_Students_Report_By_Geography.sql │ ├── 619_Biggest_Single_Number.sql │ ├── 620_Not_Boring_Movies.sql │ ├── 626_Exchange_Seats.sql │ └── 627_Swap_Salary.sql ├── README.md ├── assets └── sql_order.png └── databases ├── classicmodels └── mysqlsampledatabase.sql ├── practice.sql ├── sakila-db ├── sakila-data.sql └── sakila-schema.sql └── world_db ├── README.txt └── world.sql /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints/ 3 | archive/ -------------------------------------------------------------------------------- /Anti_Patterns/02_Bad_Subquery/README.md: -------------------------------------------------------------------------------- 1 | # Bad Subquery 2 | This notebook goes through several common types of bad subqueries. Subqueries are not always bad, and sometimes cannot be avoided. But they are often misused because analysts are too lazy to even think about why they need it. 3 | 4 | ### Not Using Having Clause 5 | We want to select countries with more than 100 cities. 6 | 7 | ```sql 8 | SELECT CountryCode, city_tally 9 | FROM ( 10 | SELECT 11 | CountryCode 12 | ,COUNT(*) AS city_tally 13 | FROM city 14 | GROUP BY CountryCode 15 | ) AS _ 16 | WHERE city_tally >= 100 17 | ORDER BY city_tally DESC; 18 | ``` 19 | ``` 20 | +-------------+------------+ 21 | | CountryCode | city_tally | 22 | +-------------+------------+ 23 | | CHN | 363 | 24 | | IND | 341 | 25 | | USA | 274 | 26 | | BRA | 250 | 27 | | JPN | 248 | 28 | | RUS | 189 | 29 | | MEX | 173 | 30 | | PHL | 136 | 31 | +-------------+------------+ 32 | 8 rows in set (0.00 sec) 33 | ``` 34 | 35 | The subquery can be avoided by moving the *WHERE* clause into the *HAVING* clause. 36 | 37 | ```sql 38 | SELECT 39 | CountryCode 40 | ,COUNT(*) AS city_tally 41 | FROM city 42 | GROUP BY CountryCode 43 | HAVING city_tally >= 100 44 | ORDER BY city_tally DESC; 45 | ``` 46 | ``` 47 | +-------------+------------+ 48 | | CountryCode | city_tally | 49 | +-------------+------------+ 50 | | CHN | 363 | 51 | | IND | 341 | 52 | | USA | 274 | 53 | | BRA | 250 | 54 | | JPN | 248 | 55 | | RUS | 189 | 56 | | MEX | 173 | 57 | | PHL | 136 | 58 | +-------------+------------+ 59 | 8 rows in set (0.00 sec) 60 | ``` 61 | 62 | If you don't need to return the city count, you can move the entire expression into *HAVING* clause. 63 | 64 | ```sql 65 | SELECT 66 | CountryCode 67 | FROM city 68 | GROUP BY CountryCode 69 | HAVING COUNT(*) >= 100; 70 | ``` 71 | ``` 72 | +-------------+ 73 | | CountryCode | 74 | +-------------+ 75 | | BRA | 76 | | CHN | 77 | | IND | 78 | | JPN | 79 | | MEX | 80 | | PHL | 81 | | RUS | 82 | | USA | 83 | +-------------+ 84 | 8 rows in set (0.01 sec) 85 | ``` -------------------------------------------------------------------------------- /Anti_Patterns/03_Fail_to_Use_Index/README.md: -------------------------------------------------------------------------------- 1 | # Failure to Use Index 2 | 3 | In this notebook we use *sakila* database. Specifically, we want to count how many movies each actor has starred. We obviously need to group by the first name and last name of the actor. 4 | 5 | ```sql 6 | SELECT actor.first_name, actor.last_name, COUNT(*) 7 | FROM sakila.film_actor 8 | INNER JOIN sakila.actor USING(actor_id) 9 | GROUP BY actor.first_name, actor.last_name 10 | ORDER BY 1, 2 11 | LIMIT 5; 12 | ``` 13 | ``` 14 | +------------+-----------+----------+ 15 | | first_name | last_name | COUNT(*) | 16 | +------------+-----------+----------+ 17 | | ADAM | GRANT | 18 | 18 | | ADAM | HOPPER | 22 | 19 | | AL | GARLAND | 26 | 20 | | ALAN | DREYFUSS | 27 | 21 | | ALBERT | JOHANSSON | 33 | 22 | +------------+-----------+----------+ 23 | 5 rows in set (0.01 sec) 24 | ``` 25 | 26 | But we can do much better by grouping by index. Notice that index uniquely identifies an actor, just as first name and last name uniquely identify an actor. 27 | 28 | ```sql 29 | SELECT actor.first_name, actor.last_name, COUNT(*) 30 | FROM sakila.film_actor 31 | INNER JOIN sakila.actor USING(actor_id) 32 | GROUP BY film_actor.actor_id 33 | ORDER BY 1, 2 34 | LIMIT 5; 35 | ``` 36 | ``` 37 | +------------+-----------+----------+ 38 | | first_name | last_name | COUNT(*) | 39 | +------------+-----------+----------+ 40 | | ADAM | GRANT | 18 | 41 | | ADAM | HOPPER | 22 | 42 | | AL | GARLAND | 26 | 43 | | ALAN | DREYFUSS | 27 | 44 | | ALBERT | JOHANSSON | 33 | 45 | +------------+-----------+----------+ 46 | 5 rows in set (0.00 sec) 47 | ``` 48 | 49 | You may notice that the first two columns are neither in the GROUP BY clause nor aggregated over. This does not throw an error in MySQL, because it recognizes they are functionally dependent on the GROUP BY column. 50 | 51 | The most correct version is to do one of the following: 52 | 53 | ```sql 54 | SELECT MAX(actor.first_name), MAX(actor.last_name), COUNT(*) 55 | FROM sakila.film_actor 56 | INNER JOIN sakila.actor USING(actor_id) 57 | GROUP BY film_actor.actor_id 58 | ORDER BY 1, 2 59 | LIMIT 5; 60 | 61 | SELECT actor.first_name, actor.last_name, COUNT(*) 62 | FROM sakila.film_actor 63 | INNER JOIN sakila.actor USING(actor_id) 64 | GROUP BY film_actor.actor_id, 1, 2 65 | ORDER BY 1, 2 66 | LIMIT 5; 67 | 68 | SELECT actor.first_name, actor.last_name, c.cnt FROM sakila.actor 69 | INNER JOIN ( 70 | SELECT actor_id, COUNT(*) AS cnt FROM sakila.film_actor 71 | GROUP BY actor_id 72 | ) AS c USING(actor_id) 73 | ORDER BY 1, 2 74 | LIMIT 5; 75 | ``` 76 | 77 | In the last case, be careful that temporary table does not have index, and the *INNER* join could be slow. -------------------------------------------------------------------------------- /Hacks/01_Random_Sampling/solution.sql: -------------------------------------------------------------------------------- 1 | -- create source table 2 | DROP TABLE IF EXISTS sample_dist_stats; 3 | CREATE TEMPORARY TABLE sample_dist_stats AS 4 | SELECT 5 | continent 6 | ,COUNT(*) AS city_tally 7 | ,10/COUNT(*) AS p 8 | ,10 AS mean 9 | ,SQRT(COUNT(*) * (10/COUNT(*)) * (1-10/COUNT(*))) AS std 10 | FROM city_by_continent 11 | GROUP BY continent 12 | ORDER BY city_tally; 13 | 14 | SET @sample_size = 10; 15 | 16 | -- sampling pipeline 17 | WITH 18 | city_group AS 19 | (SELECT 20 | c.* 21 | ,COUNT(*) OVER (PARTITION BY continent) AS group_size 22 | FROM city_by_continent AS c) 23 | ,city_prob_assign AS 24 | (SELECT 25 | c.* 26 | -- beaware expectation of sample size group_size * @sample_size / group_size AS mean = @sample_size 27 | ,SQRT(group_size * (@sample_size / group_size) * (1 - @sample_size / group_size)) AS std 28 | ,RAND() AS prob 29 | FROM city_group AS c) 30 | ,city_prob_cutoff AS 31 | (SELECT 32 | c.* 33 | ,(@sample_size + CEIL(2 * std)) / group_size AS cutoff 34 | FROM city_prob_assign AS c) 35 | -- filter by cuffoff 36 | -- note that this step cannot be merged with previous 37 | ,city_sample AS 38 | (SELECT 39 | c.* 40 | -- window is evaluated after WHERE clause! 41 | -- ranking is cheap when performed on small group! 42 | ,RANK() OVER (PARTITION BY continent ORDER BY prob) AS group_row_num 43 | FROM city_prob_cutoff AS c 44 | WHERE prob < cutoff) 45 | -- final, fixed size sample 46 | SELECT * 47 | FROM city_sample 48 | WHERE group_row_num <= @sample_size; -------------------------------------------------------------------------------- /Hacks/02_NULL_pathology/db.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS Practice; 2 | USE Practice; 3 | 4 | 5 | -- phpMyAdmin SQL Dump 6 | -- version 4.7.7 7 | -- https://www.phpmyadmin.net/ 8 | -- 9 | -- Host: localhost:8889 10 | -- Generation Time: Mar 17, 2019 at 11:27 AM 11 | -- Server version: 5.6.38 12 | -- PHP Version: 7.2.1 13 | 14 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 15 | SET time_zone = "+00:00"; 16 | 17 | -- 18 | -- Database: `practice` 19 | -- 20 | 21 | -- -------------------------------------------------------- 22 | 23 | -- 24 | -- Table structure for table `Balance` 25 | -- 26 | 27 | DROP TABLE IF EXISTS `Balance`; 28 | CREATE TABLE `Balance` ( 29 | `id` int(11) NOT NULL, 30 | `name` varchar(10) DEFAULT NULL, 31 | `balance` int(11) DEFAULT NULL 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 33 | 34 | -- 35 | -- Dumping data for table `Balance` 36 | -- 37 | 38 | INSERT INTO `Balance` (`id`, `name`, `balance`) VALUES 39 | (1, 'Alice', 10), 40 | (2, 'Bob', 5), 41 | (3, NULL, 20), 42 | (4, 'Cindy', NULL), 43 | (5, 'Bob', 10), 44 | (6, 'Cindy', 100); 45 | 46 | -- 47 | -- Indexes for dumped tables 48 | -- 49 | 50 | -- 51 | -- Indexes for table `Balance` 52 | -- 53 | ALTER TABLE `Balance` 54 | ADD PRIMARY KEY (`id`); 55 | 56 | -- 57 | -- AUTO_INCREMENT for dumped tables 58 | -- 59 | 60 | -- 61 | -- AUTO_INCREMENT for table `Balance` 62 | -- 63 | ALTER TABLE `Balance` 64 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=7; 65 | -------------------------------------------------------------------------------- /Hacks/03_Full_Join/README.md: -------------------------------------------------------------------------------- 1 | # Full Join 2 | 3 | MySQL does not support full join. There are two ways to simulate it. 4 | 5 | Load the database file [db.sql](db.sql) to localhost MySQL. Two simple tables will be created in the Practice database. 6 | ``` 7 | mysql < db.sql -uroot -p 8 | ``` 9 | 10 | Let's take a look at the tables we want to join. Eveidently, *letter1* table does not contain C; *letter2* table does not contain B. 11 | 12 | ``` 13 | mysql> SELECT * FROM letter1; 14 | +----+--------+ 15 | | id | letter | 16 | +----+--------+ 17 | | 1 | A | 18 | | 2 | B | 19 | | 3 | A | 20 | +----+--------+ 21 | 3 rows in set (0.01 sec) 22 | 23 | mysql> SELECT * FROM letter2; 24 | +----+--------+ 25 | | id | letter | 26 | +----+--------+ 27 | | 1 | A | 28 | | 2 | C | 29 | | 3 | A | 30 | +----+--------+ 31 | 3 rows in set (0.00 sec) 32 | ``` 33 | 34 | --- 35 | ### Hack 36 | ```sql 37 | SELECT 38 | l1.id 39 | ,l1.letter 40 | ,l2.id 41 | ,l2.letter 42 | FROM letter1 l1 43 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter 44 | UNION ALL 45 | SELECT 46 | l1.id 47 | ,l1.letter 48 | ,l2.id 49 | ,l2.letter 50 | FROM letter1 l1 51 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter 52 | WHERE l1.letter IS NULL; 53 | ``` 54 | ``` 55 | +------+--------+------+--------+ 56 | | id | letter | id | letter | 57 | +------+--------+------+--------+ 58 | | 1 | A | 1 | A | 59 | | 3 | A | 1 | A | 60 | | 1 | A | 3 | A | 61 | | 3 | A | 3 | A | 62 | | 2 | B | NULL | NULL | 63 | | NULL | NULL | 2 | C | 64 | +------+--------+------+--------+ 65 | 6 rows in set (0.00 sec) 66 | ``` 67 | 68 | ```sql 69 | SELECT 70 | l1.id 71 | ,l1.letter 72 | ,l2.id 73 | ,l2.letter 74 | FROM letter1 l1 75 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter 76 | UNION ALL 77 | SELECT 78 | l1.id 79 | ,l1.letter 80 | ,l2.id 81 | ,l2.letter 82 | FROM letter1 l1 83 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter 84 | WHERE l2.letter IS NULL; 85 | ``` 86 | ``` 87 | +------+--------+------+--------+ 88 | | id | letter | id | letter | 89 | +------+--------+------+--------+ 90 | | 1 | A | 1 | A | 91 | | 1 | A | 3 | A | 92 | | 3 | A | 1 | A | 93 | | 3 | A | 3 | A | 94 | | NULL | NULL | 2 | C | 95 | | 2 | B | NULL | NULL | 96 | +------+--------+------+--------+ 97 | 6 rows in set (0.00 sec) 98 | ``` 99 | 100 | --- 101 | ### Warning 102 | Do not use *UNION*, which creates a hashset. If the returned rows are not distinct, you get fewer rows than expected! 103 | 104 | ```sql 105 | SELECT l1.letter, l2.letter 106 | FROM letter1 l1 107 | LEFT JOIN letter2 l2 108 | ON l1.letter=l2.letter 109 | UNION 110 | SELECT l1.letter, l2.letter 111 | FROM letter1 l1 112 | RIGHT JOIN letter2 l2 113 | ON l1.letter=l2.letter; 114 | ``` 115 | ``` 116 | +--------+--------+ 117 | | letter | letter | 118 | +--------+--------+ 119 | | A | A | 120 | | B | NULL | 121 | | NULL | C | 122 | +--------+--------+ 123 | 3 rows in set (0.01 sec) 124 | ``` 125 | 126 | The answer should be: 127 | ```sql 128 | SELECT l1.letter, l2.letter FROM letter1 l1 129 | LEFT JOIN letter2 l2 ON l1.letter = l2.letter 130 | UNION ALL 131 | SELECT l1.letter, l2.letter FROM letter1 l1 132 | RIGHT JOIN letter2 l2 ON l1.letter = l2.letter 133 | WHERE l1.letter IS NULL; 134 | ``` 135 | ``` 136 | +--------+--------+ 137 | | letter | letter | 138 | +--------+--------+ 139 | | A | A | 140 | | A | A | 141 | | A | A | 142 | | A | A | 143 | | B | NULL | 144 | | NULL | C | 145 | +--------+--------+ 146 | 6 rows in set (0.00 sec) 147 | ``` -------------------------------------------------------------------------------- /Hacks/03_Full_Join/db.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS Practice; 2 | USE Practice; 3 | 4 | -- phpMyAdmin SQL Dump 5 | -- version 4.7.7 6 | -- https://www.phpmyadmin.net/ 7 | -- 8 | -- Host: localhost:8889 9 | -- Generation Time: Mar 18, 2019 at 03:26 AM 10 | -- Server version: 5.6.38 11 | -- PHP Version: 7.2.1 12 | 13 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 14 | SET time_zone = "+00:00"; 15 | 16 | -- 17 | -- Database: `practice` 18 | -- 19 | CREATE DATABASE IF NOT EXISTS `practice` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; 20 | USE `practice`; 21 | 22 | -- -------------------------------------------------------- 23 | 24 | -- 25 | -- Table structure for table `Letter1` 26 | -- 27 | 28 | DROP TABLE IF EXISTS `Letter1`; 29 | CREATE TABLE `Letter1` ( 30 | `id` int(11) NOT NULL, 31 | `letter` varchar(1) NOT NULL 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 33 | 34 | -- 35 | -- Dumping data for table `Letter1` 36 | -- 37 | 38 | INSERT INTO `Letter1` (`id`, `letter`) VALUES 39 | (1, 'N'), 40 | (2, 'B'), 41 | (3, 'N'); 42 | 43 | -- -------------------------------------------------------- 44 | 45 | -- 46 | -- Table structure for table `Letter2` 47 | -- 48 | 49 | DROP TABLE IF EXISTS `Letter2`; 50 | CREATE TABLE `Letter2` ( 51 | `id` int(11) NOT NULL, 52 | `letter` varchar(1) NOT NULL 53 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 54 | 55 | -- 56 | -- Dumping data for table `Letter2` 57 | -- 58 | 59 | INSERT INTO `Letter2` (`id`, `letter`) VALUES 60 | (1, 'A'), 61 | (2, 'C'), 62 | (3, 'A'); 63 | 64 | -- 65 | -- Indexes for dumped tables 66 | -- 67 | 68 | -- 69 | -- Indexes for table `Letter1` 70 | -- 71 | ALTER TABLE `Letter1` 72 | ADD PRIMARY KEY (`id`); 73 | 74 | -- 75 | -- Indexes for table `Letter2` 76 | -- 77 | ALTER TABLE `Letter2` 78 | ADD PRIMARY KEY (`id`); 79 | 80 | -- 81 | -- AUTO_INCREMENT for dumped tables 82 | -- 83 | 84 | -- 85 | -- AUTO_INCREMENT for table `Letter1` 86 | -- 87 | ALTER TABLE `Letter1` 88 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=4; 89 | 90 | -- 91 | -- AUTO_INCREMENT for table `Letter2` 92 | -- 93 | ALTER TABLE `Letter2` 94 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=4; 95 | -------------------------------------------------------------------------------- /Hacks/04_Dynamic_Query/README.md: -------------------------------------------------------------------------------- 1 | # Dynamic Query 2 | 3 | In the interview question, we covered a classic problem of [grouping by bins](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Interview/08_Group_by_Bins). This [notebook](dynamic.ipynb) illustrates how to generate and execute query dynamically in Python. You can easily build hundreds of bins without manually writing hundreds of *WHEN* conditions. 4 | 5 | ```sql 6 | SELECT CASE WHEN creditLimit BETWEEN 0 AND 9999 THEN '0~9999' 7 | WHEN creditLimit BETWEEN 10000 AND 19999 THEN '10000~19999' 8 | WHEN creditLimit BETWEEN 20000 AND 29999 THEN '20000~29999' 9 | WHEN creditLimit BETWEEN 30000 AND 39999 THEN '30000~39999' 10 | WHEN creditLimit BETWEEN 40000 AND 49999 THEN '40000~49999' 11 | WHEN creditLimit BETWEEN 50000 AND 59999 THEN '50000~59999' 12 | WHEN creditLimit BETWEEN 60000 AND 69999 THEN '60000~69999' 13 | WHEN creditLimit BETWEEN 70000 AND 79999 THEN '70000~79999' 14 | WHEN creditLimit BETWEEN 80000 AND 89999 THEN '80000~89999' 15 | WHEN creditLimit BETWEEN 90000 AND 99999 THEN '90000~99999' 16 | WHEN creditLimit BETWEEN 100000 AND 109999 THEN '100000~109999' 17 | WHEN creditLimit BETWEEN 110000 AND 119999 THEN '110000~119999' 18 | WHEN creditLimit BETWEEN 120000 AND 129999 THEN '120000~129999' 19 | WHEN creditLimit BETWEEN 130000 AND 139999 THEN '130000~139999' 20 | WHEN creditLimit BETWEEN 140000 AND 149999 THEN '140000~149999' 21 | WHEN creditLimit BETWEEN 150000 AND 159999 THEN '150000~159999' 22 | WHEN creditLimit BETWEEN 160000 AND 169999 THEN '160000~169999' 23 | WHEN creditLimit BETWEEN 170000 AND 179999 THEN '170000~179999' 24 | WHEN creditLimit BETWEEN 180000 AND 189999 THEN '180000~189999' 25 | WHEN creditLimit BETWEEN 190000 AND 199999 THEN '190000~199999' 26 | WHEN creditLimit BETWEEN 200000 AND 209999 THEN '200000~209999' 27 | WHEN creditLimit BETWEEN 210000 AND 219999 THEN '210000~219999' 28 | WHEN creditLimit BETWEEN 220000 AND 229999 THEN '220000~229999' 29 | END AS credit_range, COUNT(*) AS customer_tally FROM customers GROUP BY credit_range; 30 | ``` 31 | 32 | --- 33 | ### Dependencies 34 | * sqlalchemy 35 | * pandas -------------------------------------------------------------------------------- /Hacks/05_Stored_Precesure/README.md: -------------------------------------------------------------------------------- 1 | # Stored Precesure 2 | 3 | --- 4 | ### Building Procedure 5 | For complex queries that are frequently reused, with a few changes in parameters, we can store it as a procedure, and run it like a function. The syntax to create stored procedure is: 6 | 7 | ```sql 8 | DROP PROCEDURE IF EXISTS my_procedure; 9 | 10 | DELIMITER // 11 | CREATE PROCEDURE my_procedure 12 | (IN arg1 INT(11), arg2 VARCHAR(10)) 13 | 14 | BEGIN 15 | 16 | SELECT arg1, arg2; 17 | 18 | END// 19 | DELIMITER ; 20 | ``` 21 | 22 | The *DELIMITER //* statement simply tells SQL to treat *//* as marking the end of statement, so that when creating the procedure *SELECT arg1, arg2;*, SQL does not interpret it as a statement and execute it. After creating the procedure, we need to change back the delimiter to *;*. 23 | 24 | --- 25 | ### Calling Procedure 26 | To call a procesure, simply substitute parameters. 27 | ``` 28 | CALL my_procedure(1, 'hello'); 29 | 30 | +------+-------+ 31 | | arg1 | arg2 | 32 | +------+-------+ 33 | | 1 | hello | 34 | +------+-------+ 35 | 1 row in set (0.00 sec) 36 | ``` 37 | 38 | --- 39 | ### Example Use Case 40 | We can use custom procedure to store the random sampling [query](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Topics/01_Random_Sampling). See implementation [here](stored_procedure.sql). 41 | 42 | Using stored procesure, we can simly sample any desired number of rows we want, by passing sample size as an argument. For example, to draw 3 cities from each continent, we can simply call the procedure as follows: 43 | 44 | ``` 45 | mysql> CALL sample_by_continent(3); 46 | 47 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+ 48 | | city_name | country_name | continent | group_size | std | prob | cutoff | group_row_num | 49 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+ 50 | | Nazilli | Turkey | Asia | 1766 | 1.7305788923769574 | 0.7224973214068425 | 0.0040 | 1 | 51 | | Semnan | Iran | Asia | 1766 | 1.7305788923769574 | 0.7094183831563391 | 0.0040 | 2 | 52 | | Yuci | China | Asia | 1766 | 1.7305788923769574 | 0.3795999822948128 | 0.0040 | 3 | 53 | | Manchester | United Kingdom | Europe | 841 | 1.7289585538059709 | 0.7099240475426652 | 0.0083 | 1 | 54 | | Vitebsk | Belarus | Europe | 841 | 1.7289585538059709 | 0.24038589023089585 | 0.0083 | 2 | 55 | | Arzamas | Russian Federation | Europe | 841 | 1.7289585538059709 | 0.07215733926012864 | 0.0083 | 3 | 56 | | Saint-Pierre | Saint Pierre and Miquelon | North America | 581 | 1.7275732570756073 | 0.9816732946612623 | 0.0120 | 1 | 57 | | Compton | United States | North America | 581 | 1.7275732570756073 | 0.9894793350151547 | 0.0120 | 2 | 58 | | Grand Rapids | United States | North America | 581 | 1.7275732570756073 | 0.0023768218256317282 | 0.0120 | 3 | 59 | | al-Qadarif | Sudan | Africa | 366 | 1.7249376000117878 | 0.6935911212988115 | 0.0191 | 1 | 60 | | Daloa | Côte d’Ivoire | Africa | 366 | 1.7249376000117878 | 0.06386513827728586 | 0.0191 | 2 | 61 | | Abidjan | Côte d’Ivoire | Africa | 366 | 1.7249376000117878 | 0.23855235822363977 | 0.0191 | 3 | 62 | | Adamstown | Pitcairn | Oceania | 55 | 1.684150708706428 | 0.2901749911626568 | 0.1273 | 1 | 63 | | Yangor | Nauru | Oceania | 55 | 1.684150708706428 | 0.44737576548697033 | 0.1273 | 2 | 64 | | Cairns | Australia | Oceania | 55 | 1.684150708706428 | 0.3663538846805262 | 0.1273 | 3 | 65 | | Maracaíbo | Venezuela | South America | 470 | 1.7265140393782532 | 0.8527891355126995 | 0.0149 | 1 | 66 | | Guacara | Venezuela | South America | 470 | 1.7265140393782532 | 0.2539973559360927 | 0.0149 | 2 | 67 | | Corrientes | Argentina | South America | 470 | 1.7265140393782532 | 0.7116194038760097 | 0.0149 | 3 | 68 | +--------------+---------------------------+---------------+------------+--------------------+-----------------------+--------+---------------+ 69 | 18 rows in set (0.01 sec) 70 | 71 | Query OK, 0 rows affected (0.01 sec) 72 | ``` 73 | 74 | -------------------------------------------------------------------------------- /Hacks/05_Stored_Precesure/stored_procedure.sql: -------------------------------------------------------------------------------- 1 | DROP PROCEDURE IF EXISTS sample_by_continent; 2 | DELIMITER // 3 | CREATE PROCEDURE sample_by_continent 4 | (IN sample_size INT(11)) 5 | BEGIN 6 | 7 | WITH 8 | city_group AS 9 | (SELECT 10 | c.* 11 | ,COUNT(*) OVER (PARTITION BY continent) AS group_size 12 | FROM city_by_continent AS c) 13 | ,city_prob_assign AS 14 | (SELECT 15 | c.* 16 | -- beaware expectation of sample size group_size * sample_size / group_size AS mean = sample_size 17 | ,SQRT(group_size * (sample_size / group_size) * (1 - sample_size / group_size)) AS std 18 | ,RAND() AS prob 19 | FROM city_group AS c) 20 | ,city_prob_cutoff AS 21 | (SELECT 22 | c.* 23 | ,(sample_size + CEIL(2 * std)) / group_size AS cutoff 24 | FROM city_prob_assign AS c) 25 | -- filter by cuffoff 26 | -- note that this step cannot be merged with previous 27 | ,city_sample AS 28 | (SELECT 29 | c.* 30 | -- window is evaluated after WHERE clause! 31 | -- ranking is cheap when performed on small group! 32 | ,RANK() OVER (PARTITION BY continent ORDER BY prob) AS group_row_num 33 | FROM city_prob_cutoff AS c 34 | WHERE prob < cutoff) 35 | -- final, fixed size sample 36 | SELECT * 37 | FROM city_sample 38 | WHERE group_row_num <= sample_size; 39 | 40 | END// 41 | DELIMITER ; -------------------------------------------------------------------------------- /Hacks/06_Hacking_Aggregation/README.md: -------------------------------------------------------------------------------- 1 | # Hacking Aggregation 2 | 3 | This notebook go through several ways to find maximum (and conversely minimum) without using aggregate function. We'll use the world database. 4 | 5 | #### Global Aggregate: Select Country with the Largest GNP 6 | This is easy with a scalar subquery, using *MAX()* function. 7 | ``` 8 | SELECT Name 9 | FROM country 10 | WHERE GNP = (SELECT MAX(GNP) FROM country); 11 | 12 | +---------------+ 13 | | Name | 14 | +---------------+ 15 | | United States | 16 | +---------------+ 17 | 1 row in set (0.00 sec) 18 | ``` 19 | 20 | Without aggregate function, we can hack it by using *ALL*. 21 | ``` 22 | SELECT Name 23 | FROM country 24 | WHERE GNP >= ALL(SELECT GNP FROM country); 25 | 26 | +---------------+ 27 | | Name | 28 | +---------------+ 29 | | United States | 30 | +---------------+ 31 | 1 row in set (0.00 sec) 32 | ``` 33 | 34 | __Warning__: if any country has __NULL__ in the *GNP* column, the query will return an empty set. See [this](https://github.com/shawlu95/Beyond-LeetCode-SQL/tree/master/Hacks/02_NULL_pathology) notebook for more pathological examples of __NULL__ behavior. To prevent such behavior, remove __NULL__ from the set. 35 | 36 | ```sql 37 | SELECT Name 38 | FROM country 39 | WHERE GNP >= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL); 40 | ``` 41 | 42 | Similarly, we can find country with lowest GNP, country whose GNP is above / below average. 43 | 44 | ```sql 45 | SELECT Name 46 | FROM country 47 | WHERE GNP <= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL); 48 | 49 | SELECT Name 50 | FROM country 51 | WHERE GNP >= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL); 52 | 53 | SELECT Name 54 | FROM country 55 | WHERE GNP <= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL); 56 | ``` 57 | 58 | ___ 59 | #### Group Aggregate: Find Largest Country on Each Continent 60 | Here we normally need to group by continent. 61 | ```sql 62 | SELECT 63 | Name 64 | ,Continent 65 | ,SurfaceArea 66 | FROM country 67 | WHERE (Continent, SurfaceArea) IN ( 68 | SELECT Continent, MAX(SurfaceArea) AS SurfaceArea 69 | FROM country GROUP BY Continent 70 | ); 71 | ``` 72 | ``` 73 | +--------------------+---------------+-------------+ 74 | | Name | Continent | SurfaceArea | 75 | +--------------------+---------------+-------------+ 76 | | Antarctica | Antarctica | 13120000.00 | 77 | | Australia | Oceania | 7741220.00 | 78 | | Brazil | South America | 8547403.00 | 79 | | Canada | North America | 9970610.00 | 80 | | China | Asia | 9572900.00 | 81 | | Russian Federation | Europe | 17075400.00 | 82 | | Sudan | Africa | 2505813.00 | 83 | +--------------------+---------------+-------------+ 84 | 7 rows in set (0.00 sec) 85 | ``` 86 | 87 | Without using *GROUP BY*, we can simply add a condition on *Continent*, turning the scalar subquery into a __correlated subquery__. 88 | ```sql 89 | SELECT 90 | a.Name 91 | ,a.Continent 92 | ,a.SurfaceArea 93 | FROM country AS a 94 | WHERE a.SurfaceArea >= ALL( 95 | SELECT b.SurfaceArea 96 | FROM country AS b 97 | WHERE a.Continent = b.Continent 98 | AND b.SurfaceArea IS NOT NULL 99 | ); 100 | ``` 101 | ``` 102 | +--------------------+---------------+-------------+ 103 | | Name | Continent | SurfaceArea | 104 | +--------------------+---------------+-------------+ 105 | | Antarctica | Antarctica | 13120000.00 | 106 | | Australia | Oceania | 7741220.00 | 107 | | Brazil | South America | 8547403.00 | 108 | | Canada | North America | 9970610.00 | 109 | | China | Asia | 9572900.00 | 110 | | Russian Federation | Europe | 17075400.00 | 111 | | Sudan | Africa | 2505813.00 | 112 | +--------------------+---------------+-------------+ 113 | 7 rows in set (0.01 sec) 114 | ``` 115 | 116 | Just as before, we can find country whose surface area is smallest, and whose area is above/below average, comparing to other countries in the same continent. 117 | 118 | ```sql 119 | SELECT 120 | a.Name 121 | ,a.Continent 122 | ,a.SurfaceArea 123 | FROM country AS a 124 | WHERE a.SurfaceArea <= ALL( 125 | SELECT b.SurfaceArea 126 | FROM country AS b 127 | WHERE a.Continent = b.Continent 128 | AND b.SurfaceArea IS NOT NULL 129 | ); 130 | 131 | SELECT 132 | a.Name 133 | ,a.Continent 134 | ,a.SurfaceArea 135 | FROM country AS a 136 | WHERE a.SurfaceArea >= ALL( 137 | SELECT AVG(b.SurfaceArea) 138 | FROM country AS b 139 | WHERE a.Continent = b.Continent 140 | AND b.SurfaceArea IS NOT NULL 141 | ); 142 | 143 | SELECT 144 | a.Name 145 | ,a.Continent 146 | ,a.SurfaceArea 147 | FROM country AS a 148 | WHERE a.SurfaceArea <= ALL( 149 | SELECT AVG(b.SurfaceArea) 150 | FROM country AS b 151 | WHERE a.Continent = b.Continent 152 | AND b.SurfaceArea IS NOT NULL 153 | ); 154 | ``` 155 | 156 | ### Note 157 | By using 'less or equal', we are including the compared object itself during the comparison. Without equal sign, the query will return nothing, because it is impossible to have one row that beats every other row including itself. If we don't use equal sign, we need to exclude the object from comparing against itself. 158 | 159 | ```sql 160 | -- Bad 161 | SELECT Name 162 | FROM country 163 | WHERE GNP > ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL); 164 | 165 | Empty set (0.00 sec) 166 | ``` 167 | 168 | ```sql 169 | -- Good 170 | SELECT a.Name 171 | FROM country AS a 172 | WHERE GNP > ALL( 173 | SELECT b.GNP FROM country AS b 174 | WHERE b.GNP IS NOT NULL 175 | AND b.Name != a.Name 176 | ); 177 | ``` 178 | ``` 179 | +---------------+ 180 | | Name | 181 | +---------------+ 182 | | United States | 183 | +---------------+ 184 | 1 row in set (0.00 sec) 185 | ``` 186 | ___ 187 | ### Parting Thoughts 188 | There are 30 countries whose GNP is above average, and 209 countries who GNP is below average. This is a positively skewed distirbution, whose mean is significantly above median. 189 | 190 | -------------------------------------------------------------------------------- /Hacks/06_Hacking_Aggregation/solution.sql: -------------------------------------------------------------------------------- 1 | -- global aggregate 2 | SELECT Name 3 | FROM country 4 | WHERE GNP >= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL); 5 | 6 | SELECT Name 7 | FROM country 8 | WHERE GNP <= ALL(SELECT GNP FROM country WHERE GNP IS NOT NULL); 9 | 10 | SELECT Name 11 | FROM country 12 | WHERE GNP >= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL); 13 | 14 | SELECT Name 15 | FROM country 16 | WHERE GNP <= (SELECT AVG(GNP) FROM country WHERE GNP IS NOT NULL); 17 | 18 | -- group aggregate 19 | SELECT 20 | a.Name 21 | ,a.Continent 22 | ,a.SurfaceArea 23 | FROM country AS a 24 | WHERE a.SurfaceArea >= ALL( 25 | SELECT b.SurfaceArea 26 | FROM country AS b 27 | WHERE a.Continent = b.Continent 28 | AND b.SurfaceArea IS NOT NULL 29 | ); 30 | 31 | SELECT 32 | a.Name 33 | ,a.Continent 34 | ,a.SurfaceArea 35 | FROM country AS a 36 | WHERE a.SurfaceArea <= ALL( 37 | SELECT b.SurfaceArea 38 | FROM country AS b 39 | WHERE a.Continent = b.Continent 40 | AND b.SurfaceArea IS NOT NULL 41 | ); 42 | 43 | SELECT 44 | a.Name 45 | ,a.Continent 46 | ,a.SurfaceArea 47 | FROM country AS a 48 | WHERE a.SurfaceArea >= ALL( 49 | SELECT AVG(b.SurfaceArea) 50 | FROM country AS b 51 | WHERE a.Continent = b.Continent 52 | AND b.SurfaceArea IS NOT NULL 53 | ); 54 | 55 | SELECT 56 | a.Name 57 | ,a.Continent 58 | ,a.SurfaceArea 59 | FROM country AS a 60 | WHERE a.SurfaceArea <= ALL( 61 | SELECT AVG(b.SurfaceArea) 62 | FROM country AS b 63 | WHERE a.Continent = b.Continent 64 | AND b.SurfaceArea IS NOT NULL 65 | ); 66 | -------------------------------------------------------------------------------- /Hacks/07_Multi_Column_Partition/README.md: -------------------------------------------------------------------------------- 1 | # Multi-column Partitioning 2 | 3 | Just as GROUP BY can be applied to multiple columns, so can we PARTITION BY multiple columns. In the sample quiz [table](db.sql), we have one column for student_id, each student can take multiple classes, each classes can have multiple quizzes. To answer question such as "this is Mike's third quiz in deep learning class," we need to partition by both student_id and course. 4 | 5 | Load the table. 6 | ``` 7 | mysql < db.sql -uroot -p 8 | ``` 9 | 10 | ___ 11 | ### Multiple-column Partition 12 | ```sql 13 | SELECT *, ROW_NUMBER() OVER w AS quiz_number 14 | FROM Quiz 15 | WINDOW w AS (PARTITION BY user_id, course ORDER BY quiz_date); 16 | ``` 17 | ``` 18 | +----+---------+--------+------------+-------------+ 19 | | id | user_id | course | quiz_date | quiz_number | 20 | +----+---------+--------+------------+-------------+ 21 | | 6 | john | CS230 | 2019-02-01 | 1 | 22 | | 7 | john | CS230 | 2019-02-12 | 2 | 23 | | 10 | john | CS246 | 2019-03-06 | 1 | 24 | | 8 | john | CS246 | 2019-03-09 | 2 | 25 | | 9 | john | CS246 | 2019-03-29 | 3 | 26 | | 1 | shaw | CS229 | 2019-02-08 | 1 | 27 | | 2 | shaw | CS229 | 2019-03-01 | 2 | 28 | | 3 | shaw | CS230 | 2019-03-04 | 1 | 29 | | 4 | shaw | CS230 | 2019-03-14 | 2 | 30 | | 5 | shaw | CS231 | 2019-02-14 | 1 | 31 | +----+---------+--------+------------+-------------+ 32 | 10 rows in set (0.01 sec) 33 | ``` 34 | 35 | ___ 36 | ### Multiple-column Order 37 | Similarly, we can ORDER BY multiple columns in the window function to break ties. This is useful in resolving edge cases. 38 | 39 | 40 | ```sql 41 | SELECT *, ROW_NUMBER() OVER w AS quiz_number 42 | FROM Quiz 43 | WINDOW w AS (PARTITION BY user_id ORDER BY course, quiz_date); 44 | ``` 45 | ``` 46 | +----+---------+--------+------------+-------------+ 47 | | id | user_id | course | quiz_date | quiz_number | 48 | +----+---------+--------+------------+-------------+ 49 | | 6 | john | CS230 | 2019-02-01 | 1 | 50 | | 7 | john | CS230 | 2019-02-12 | 2 | 51 | | 10 | john | CS246 | 2019-03-06 | 3 | 52 | | 8 | john | CS246 | 2019-03-09 | 4 | 53 | | 9 | john | CS246 | 2019-03-29 | 5 | 54 | | 1 | shaw | CS229 | 2019-02-08 | 1 | 55 | | 2 | shaw | CS229 | 2019-03-01 | 2 | 56 | | 3 | shaw | CS230 | 2019-03-04 | 3 | 57 | | 4 | shaw | CS230 | 2019-03-14 | 4 | 58 | | 5 | shaw | CS231 | 2019-02-14 | 5 | 59 | +----+---------+--------+------------+-------------+ 60 | 10 rows in set (0.00 sec) 61 | ``` 62 | 63 | -------------------------------------------------------------------------------- /Hacks/07_Multi_Column_Partition/db.sql: -------------------------------------------------------------------------------- 1 | 2 | -- phpMyAdmin SQL Dump 3 | -- version 4.7.7 4 | -- https://www.phpmyadmin.net/ 5 | -- 6 | -- Host: localhost:8889 7 | -- Generation Time: Mar 21, 2019 at 05:22 AM 8 | -- Server version: 5.6.38 9 | -- PHP Version: 7.2.1 10 | 11 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 12 | SET time_zone = "+00:00"; 13 | 14 | -- 15 | -- Database: `practice` 16 | -- 17 | CREATE SCHEMA IF NOT EXISTS Practice; 18 | USE Practice; 19 | 20 | -- -------------------------------------------------------- 21 | 22 | -- 23 | -- Table structure for table `Quiz` 24 | -- 25 | 26 | DROP TABLE IF EXISTS `Quiz`; 27 | CREATE TABLE `Quiz` ( 28 | `id` int(11) NOT NULL, 29 | `user_id` varchar(10) NOT NULL, 30 | `course` varchar(10) NOT NULL, 31 | `quiz_date` date NOT NULL 32 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 33 | 34 | -- 35 | -- Dumping data for table `Quiz` 36 | -- 37 | 38 | INSERT INTO `Quiz` (`id`, `user_id`, `course`, `quiz_date`) VALUES 39 | (1, 'shaw', 'CS229', '2019-02-08'), 40 | (2, 'shaw', 'CS229', '2019-03-01'), 41 | (3, 'shaw', 'CS230', '2019-03-04'), 42 | (4, 'shaw', 'CS230', '2019-03-14'), 43 | (5, 'shaw', 'CS231', '2019-02-14'), 44 | (6, 'john', 'CS230', '2019-02-01'), 45 | (7, 'john', 'CS230', '2019-02-12'), 46 | (8, 'john', 'CS246', '2019-03-09'), 47 | (9, 'john', 'CS246', '2019-03-29'), 48 | (10, 'john', 'CS246', '2019-03-06'); 49 | 50 | -- 51 | -- Indexes for dumped tables 52 | -- 53 | 54 | -- 55 | -- Indexes for table `Quiz` 56 | -- 57 | ALTER TABLE `Quiz` 58 | ADD PRIMARY KEY (`id`); 59 | 60 | -- 61 | -- AUTO_INCREMENT for dumped tables 62 | -- 63 | 64 | -- 65 | -- AUTO_INCREMENT for table `Quiz` 66 | -- 67 | ALTER TABLE `Quiz` 68 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11; 69 | -------------------------------------------------------------------------------- /Hacks/07_Multi_Column_Partition/use_case.sql: -------------------------------------------------------------------------------- 1 | -- multi-column partition 2 | SELECT *, ROW_NUMBER() OVER w AS quiz_number 3 | FROM Quiz 4 | WINDOW w AS (PARTITION BY user_id, course ORDER BY quiz_date); 5 | 6 | -- multi-column order 7 | SELECT *, ROW_NUMBER() OVER w AS quiz_number 8 | FROM Quiz 9 | WINDOW w AS (PARTITION BY user_id ORDER BY course, quiz_date); 10 | -------------------------------------------------------------------------------- /Interview/01_Facebook_Advertiser_Status/README.md: -------------------------------------------------------------------------------- 1 | # Facebook Advertiser Status 2 | 3 | ### Key Concepts 4 | * Conditional update with join. 5 | * Left join vs. inner join. 6 | * Edge case: adding new users. 7 | * Boolean algebra and simplification. 8 | 9 | --- 10 | ### Tables: 11 | * *DailyPay*: user_id (showing today paid ads fee) on day T. __Only__ advertisers who paid will show up in this table. 12 | * *Advertiser*: two columns, user_id and their status on day T-1 13 | Use today’s payment log in *DailyPay* table to update status in *Advertiser* table 14 | 15 | ### Status: 16 | * New: users registered on day T. 17 | * Existing: users who paid on day T-1 and on day T. 18 | * Churn: users who paid on day T-1 but not on day T. 19 | * Resurrect: users who did not pay on T-1 but paid on day T. 20 | 21 | ### State Transition 22 | 23 |

24 | 25 |

26 | 27 | |#| Start | End | Condition | 28 | |-|----|----------|-----------| 29 | |1|NEW|EXISTING|Paid on day T| 30 | |2|NEW|CHURN|No pay on day T| 31 | |3|EXISTING|EXISTING|Paid on day T| 32 | |4|EXISTING|CHURN|No pay on day T| 33 | |5|CHURN|RESURRECT|Paid on day T| 34 | |6|CHURN|CHURN|No pay on day T| 35 | |7|RESURRECT|EXISTING|Paid on day T| 36 | |8|RESURRECT|CHURN|No pay on day T| 37 | 38 | By examining the above table. We can see that as long as user has not paid on day T, his status is updated to CHURN regardless of previous status (check with interviewer that all new users who registered on day T did pay, and if they didn't, they are not immediately considered as CHURN. 39 | 40 | When user did pay on day T (#1, 3, 5, 7). They can become either EXISTING or RESURRECT, depending on their previous state. RESURRECT is only possible when previous state is CHURN. When previous state is anything else, status is updated to EXISTING. 41 | 42 | ### Sample Database 43 | Load the database file [db.sql](db.sql) to localhost MySQL. An Advertiser database will be created with two tables. 44 | 45 | ```bash 46 | mysql < db.sql -uroot -p 47 | ``` 48 | 49 | ``` 50 | mysql> SELECT * FROM Advertiser; 51 | +----+---------+-----------+ 52 | | id | user_id | status | 53 | +----+---------+-----------+ 54 | | 1 | bing | NEW | 55 | | 2 | yahoo | NEW | 56 | | 3 | alibaba | EXISTING | 57 | | 4 | baidu | EXISTING | 58 | | 5 | target | CHURN | 59 | | 6 | tesla | CHURN | 60 | | 7 | morgan | RESURRECT | 61 | | 8 | chase | RESURRECT | 62 | +----+---------+-----------+ 63 | 8 rows in set (0.00 sec) 64 | 65 | mysql> SELECT * FROM DailyPay; 66 | +----+---------+------+ 67 | | id | user_id | paid | 68 | +----+---------+------+ 69 | | 1 | yahoo | 45 | 70 | | 2 | alibaba | 100 | 71 | | 3 | target | 13 | 72 | | 4 | morgan | 600 | 73 | | 5 | fitdata | 1 | 74 | +----+---------+------+ 75 | 5 rows in set (0.00 sec) 76 | ``` 77 | 78 | --- 79 | ### Solution 80 | #### Step 1. Update Existing Advertiser 81 | After simplifying the boolean algebra, we only need three conditions. State __explicitly__ we don't need "ELSE status" in the CASE statement because we've covered all possible conditions. Also emphasize we need __LEFT JOIN__ to find out who did not pay on day T. 82 | ```sql 83 | UPDATE Advertiser AS a 84 | LEFT JOIN DailyPay AS d 85 | ON a.user_id = d.user_id 86 | SET a.status = CASE 87 | WHEN d.paid IS NULL THEN "CHURN" 88 | WHEN a.status = "CHURN" AND d.paid IS NOT NULL THEN "RESURRECT" 89 | WHEN a.status != "CHURN" AND d.paid IS NOT NULL THEN "EXISTING" 90 | END; 91 | ``` 92 | 93 | Check the *Advertiser* to see if the update make sense. 94 | ``` 95 | mysql> SELECT * FROM Advertiser; 96 | +----+---------+-----------+ 97 | | id | user_id | status | 98 | +----+---------+-----------+ 99 | | 1 | bing | CHURN | 100 | | 2 | yahoo | EXISTING | 101 | | 3 | alibaba | EXISTING | 102 | | 4 | baidu | CHURN | 103 | | 5 | target | RESURRECT | 104 | | 6 | tesla | CHURN | 105 | | 7 | morgan | EXISTING | 106 | | 8 | chase | CHURN | 107 | +----+---------+-----------+ 108 | 8 rows in set (0.00 sec) 109 | ``` 110 | 111 | #### Step 2. Insert New Advertiser 112 | Note that we missed the new user. To find the new user, left join *DailyPay* with *Advertiser*. If there is no match on the right, the user is new. 113 | 114 | ```sql 115 | INSERT INTO 116 | Advertiser (user_id, status) 117 | SELECT d.user_id 118 | ,"NEW" as status 119 | FROM DailyPay AS d 120 | LEFT JOIN Advertiser AS a 121 | ON d.user_id = a.user_id 122 | WHERE a.user_id IS NULL; 123 | ``` 124 | 125 | Check again that the new users are added. 126 | ``` 127 | SELECT * FROM Advertiser; 128 | +----+---------+-----------+ 129 | | id | user_id | status | 130 | +----+---------+-----------+ 131 | | 1 | bing | CHURN | 132 | | 2 | yahoo | EXISTING | 133 | | 3 | alibaba | EXISTING | 134 | | 4 | baidu | CHURN | 135 | | 5 | target | RESURRECT | 136 | | 6 | tesla | CHURN | 137 | | 7 | morgan | EXISTING | 138 | | 8 | chase | CHURN | 139 | | 9 | fitdata | NEW | 140 | +----+---------+-----------+ 141 | 9 rows in set (0.00 sec) 142 | ``` 143 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/01_Facebook_Advertiser_Status/db.sql: -------------------------------------------------------------------------------- 1 | 2 | DROP SCHEMA IF EXISTS Advertiser; 3 | CREATE SCHEMA Advertiser; 4 | USE Advertiser; 5 | 6 | DROP TABLE IF EXISTS `Advertiser`; 7 | CREATE TABLE `Advertiser` ( 8 | `id` int(11) NOT NULL, 9 | `user_id` varchar(10) NOT NULL, 10 | `status` enum('CHURN','NEW','EXISTING','RESURRECT') NOT NULL 11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 12 | 13 | INSERT INTO `Advertiser` (`id`, `user_id`, `status`) VALUES 14 | (1, 'bing', 'NEW'), 15 | (2, 'yahoo', 'NEW'), 16 | (3, 'alibaba', 'EXISTING'), 17 | (4, 'baidu', 'EXISTING'), 18 | (5, 'target', 'CHURN'), 19 | (6, 'tesla', 'CHURN'), 20 | (7, 'morgan', 'RESURRECT'), 21 | (8, 'chase', 'RESURRECT'); 22 | 23 | ALTER TABLE `Advertiser` 24 | ADD PRIMARY KEY (`id`); 25 | 26 | ALTER TABLE `Advertiser` 27 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=9; 28 | 29 | -- Table structure for table `DailyPay` 30 | DROP TABLE IF EXISTS `DailyPay`; 31 | CREATE TABLE `DailyPay` ( 32 | `id` int(11) NOT NULL, 33 | `user_id` varchar(10) NOT NULL, 34 | `paid` int(11) DEFAULT NULL 35 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 36 | 37 | INSERT INTO `DailyPay` (`id`, `user_id`, `paid`) VALUES 38 | (1, 'yahoo', 45), 39 | (2, 'alibaba', 100), 40 | (3, 'target', 13), 41 | (4, 'morgan', 600), 42 | (5, 'fitdata', 1); 43 | 44 | ALTER TABLE `DailyPay` 45 | ADD PRIMARY KEY (`id`); 46 | 47 | ALTER TABLE `DailyPay` 48 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6; 49 | -------------------------------------------------------------------------------- /Interview/01_Facebook_Advertiser_Status/fig/transition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/Interview/01_Facebook_Advertiser_Status/fig/transition.png -------------------------------------------------------------------------------- /Interview/01_Facebook_Advertiser_Status/solution.sql: -------------------------------------------------------------------------------- 1 | -- Step 1. Update existing advertiser 2 | UPDATE Advertiser AS a 3 | LEFT JOIN DailyPay AS d 4 | ON a.user_id = d.user_id 5 | SET a.status = CASE 6 | WHEN d.paid IS NULL THEN "CHURN" 7 | WHEN a.status = "CHURN" AND d.paid IS NOT NULL THEN "RESURRECT" 8 | WHEN a.status != "CHURN" AND d.paid IS NOT NULL THEN "EXISTING" 9 | END; 10 | 11 | -- Step 2. Insert new advertiser 12 | INSERT INTO 13 | Advertiser (user_id, status) 14 | SELECT d.user_id 15 | ,"NEW" as status 16 | FROM DailyPay AS d 17 | LEFT JOIN Advertiser AS a 18 | ON d.user_id = a.user_id 19 | WHERE a.user_id IS NULL; -------------------------------------------------------------------------------- /Interview/02_Spotify_Listening_History/README.md: -------------------------------------------------------------------------------- 1 | # Spotify Listening History 2 | 3 | ### Key Concepts 4 | * Update aggregate table with event log. 5 | * Temporary table & reusability. 6 | * Update with join statement. 7 | * Edge case: adding new user-song pair. 8 | * Aggregation. 9 | 10 | --- 11 | ### Two Tables 12 | You have a History table where you have date, user_id, song_id and count(tally). 13 | It shows at the end of each day how many times in her history a user has listened to a given song. 14 | So count is cumulative sum. 15 | 16 | You have to update this on a daily basis based on a second *Daily* table that records 17 | in real time when a user listens to a given song. 18 | 19 | Basically, at the end of each day, you go to this second table and pull a count 20 | of each user/song combination and then add this count to the first table that 21 | has the lifetime count. 22 | 23 | ### Sample Database 24 | Load the database file [db.sql](db.sql) to localhost MySQL. A Spotify database will be created with two tables. 25 | 26 | ```bash 27 | mysql < db.sql -uroot -p 28 | ``` 29 | 30 | ``` 31 | mysql> SELECT * from History; 32 | +----+---------+---------+-------+ 33 | | id | user_id | song_id | tally | 34 | +----+---------+---------+-------+ 35 | | 1 | shaw | rise | 2 | 36 | | 2 | linda | lemon | 4 | 37 | +----+---------+---------+-------+ 38 | 2 rows in set (0.00 sec) 39 | 40 | mysql> SELECT * from Daily; 41 | +----+---------+---------+---------------------+ 42 | | id | user_id | song_id | time_stamp | 43 | +----+---------+---------+---------------------+ 44 | | 1 | shaw | rise | 2019-03-01 05:33:08 | 45 | | 2 | shaw | rise | 2019-03-01 16:00:00 | 46 | | 3 | shaw | goodie | 2019-03-01 10:15:00 | 47 | | 4 | linda | lemon | 2019-02-28 00:00:00 | 48 | | 5 | mark | game | 2019-03-01 04:00:00 | 49 | +----+---------+---------+---------------------+ 50 | 5 rows in set (0.00 sec) 51 | ``` 52 | 53 | --- 54 | ### Observation 55 | * Note that the *Daily* table is a event-log. To update *History*, we need to aggregate the event log, grouping by *user_id* and *song_id*. 56 | * A user may listen to a new song for the first time, in which case no existing (*user_id*, *song_id*) compound key pair exists in the *History* table. So we need an additional INSERT statement. 57 | 58 | ### Solution 59 | __Step 1. Build temporary table.__ 60 | For both the UPDATE and INSERT statements, we need the same aggregated information from the *Daily* table. So we can save it as a temporary table. 61 | ```sql 62 | SET @now = "2019-03-01 00:00:00"; 63 | 64 | -- Create tamporary table 65 | DROP TABLE IF EXISTS daily_count; 66 | CREATE TEMPORARY TABLE daily_count 67 | SELECT 68 | user_id 69 | ,song_id 70 | ,COUNT(*) AS tally 71 | FROM Daily 72 | WHERE DATEDIFF(@now, time_stamp) = 0 73 | GROUP BY user_id, song_id; 74 | ``` 75 | 76 | Check the temporary table. 77 | ``` 78 | mysql> SELECT * FROM daily_count; 79 | +---------+---------+-------+ 80 | | user_id | song_id | tally | 81 | +---------+---------+-------+ 82 | | mark | game | 1 | 83 | | shaw | goodie | 1 | 84 | | shaw | rise | 2 | 85 | +---------+---------+-------+ 86 | 3 rows in set (0.00 sec) 87 | ``` 88 | 89 | __Step 2. Update existing pair.__ It's okay to join the temporary table with the History table during the update process, because History is independent of the temporary table. 90 | ```sql 91 | UPDATE History AS uh 92 | JOIN daily_count AS dc 93 | ON uh.user_id = dc.user_id 94 | AND uh.song_id = dc.song_id 95 | SET uh.tally = uh.tally + dc.tally; 96 | ``` 97 | 98 | Check if update is correct: *shaw* listened to *rise* twice on March 1. So the compound key is incremented by 2. On the other hand, linda did not listened to any song on March 1. So her number doesn't change. 99 | ``` 100 | mysql> SELECT * FROM History; 101 | +----+---------+---------+-------+ 102 | | id | user_id | song_id | tally | 103 | +----+---------+---------+-------+ 104 | | 1 | shaw | rise | 4 | 105 | | 2 | linda | lemon | 4 | 106 | +----+---------+---------+-------+ 107 | 2 rows in set (0.00 sec) 108 | ``` 109 | 110 | __Step 3. Insert new pair.__ After updating existing (*user_id*, *song_id*) compound key pair, we need to insert new ones: 111 | 112 | ```sql 113 | INSERT INTO History (user_id, song_id, tally) 114 | SELECT 115 | dc.user_id 116 | ,dc.song_id 117 | ,dc.tally 118 | FROM daily_count AS dc 119 | LEFT JOIN History AS uh 120 | ON dc.user_id = uh.user_id 121 | AND dc.song_id = uh.song_id 122 | WHERE uh.tally IS NULL; 123 | ``` 124 | 125 | Check that the insertions are correct. 126 | ``` 127 | mysql> SELECT * FROM History; 128 | +----+---------+---------+-------+ 129 | | id | user_id | song_id | tally | 130 | +----+---------+---------+-------+ 131 | | 1 | shaw | rise | 4 | 132 | | 2 | linda | lemon | 4 | 133 | | 3 | mark | game | 1 | 134 | | 4 | shaw | goodie | 1 | 135 | +----+---------+---------+-------+ 136 | 4 rows in set (0.00 sec) 137 | ``` 138 | 139 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/02_Spotify_Listening_History/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS Spotify; 2 | CREATE SCHEMA Spotify; 3 | USE Spotify; 4 | 5 | DROP TABLE IF EXISTS `Daily`; 6 | CREATE TABLE `Daily` ( 7 | `id` int(11) NOT NULL, 8 | `user_id` varchar(15) DEFAULT NULL, 9 | `song_id` varchar(15) DEFAULT NULL, 10 | `time_stamp` datetime DEFAULT NULL 11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 12 | 13 | INSERT INTO `Daily` (`id`, `user_id`, `song_id`, `time_stamp`) VALUES 14 | (1, 'shaw', 'rise', '2019-03-01 05:33:08'), 15 | (2, 'shaw', 'rise', '2019-03-01 16:00:00'), 16 | (3, 'shaw', 'goodie', '2019-03-01 10:15:00'), 17 | (4, 'linda', 'lemon', '2019-02-28 00:00:00'), 18 | (5, 'mark', 'game', '2019-03-01 04:00:00'); 19 | 20 | ALTER TABLE `Daily` 21 | ADD PRIMARY KEY (`id`); 22 | 23 | ALTER TABLE `Daily` 24 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6; 25 | 26 | 27 | DROP TABLE IF EXISTS `History`; 28 | CREATE TABLE `History` ( 29 | `id` int(11) NOT NULL, 30 | `user_id` varchar(5) DEFAULT NULL, 31 | `song_id` varchar(10) DEFAULT NULL, 32 | `tally` int(11) DEFAULT NULL 33 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 34 | 35 | INSERT INTO `History` (`id`, `user_id`, `song_id`, `tally`) VALUES 36 | (1, 'shaw', 'rise', 2), 37 | (2, 'linda', 'lemon', 4); 38 | 39 | ALTER TABLE `History` 40 | ADD PRIMARY KEY (`id`); 41 | 42 | ALTER TABLE `History` 43 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=3; 44 | -------------------------------------------------------------------------------- /Interview/02_Spotify_Listening_History/solution.sql: -------------------------------------------------------------------------------- 1 | SET @now = "2019-03-01 00:00:00"; 2 | 3 | -- Step 1. Build temporary table 4 | DROP TABLE IF EXISTS daily_count; 5 | CREATE TEMPORARY TABLE daily_count 6 | SELECT 7 | user_id 8 | ,song_id 9 | ,COUNT(*) AS tally 10 | FROM Daily 11 | WHERE DATEDIFF(@now, time_stamp) = 0 12 | GROUP BY user_id, song_id; 13 | 14 | -- Step 2. Update existing pair 15 | UPDATE History AS uh 16 | JOIN daily_count AS dc 17 | ON uh.user_id = dc.user_id 18 | AND uh.song_id = dc.song_id 19 | SET uh.tally = uh.tally + dc.tally; 20 | 21 | -- Step 3. Insert new pair 22 | INSERT INTO History (user_id, song_id, tally) 23 | SELECT 24 | dc.user_id 25 | ,dc.song_id 26 | ,dc.tally 27 | FROM daily_count AS dc 28 | LEFT JOIN History AS uh 29 | ON dc.user_id = uh.user_id 30 | AND dc.song_id = uh.song_id 31 | WHERE uh.tally IS NULL; 32 | 33 | -------------------------------------------------------------------------------- /Interview/03_Monthly_Active_User/README.md: -------------------------------------------------------------------------------- 1 | # Monthly Active User 2 | 3 | This post is inspired by this [page](https://www.programmerinterview.com/index.php/database-sql/practice-interview-question-2/). However, the solution from there is not completely correct, as they selected non-aggregated columns (Question 1). Also, *DISTINCT* keyword is not necessary in Question 2. 4 | 5 | ### Key Concepts 6 | * Aggregate function 7 | * Left join 8 | * Removing duplicate 9 | * Functional dependency 10 | 11 | ### Sample data 12 | Load the database file [db.sql](db.sql) to localhost MySQL. A MAU database will be created with two tables. 13 | 14 | ```bash 15 | mysql < db.sql -uroot -p 16 | ``` 17 | ``` 18 | mysql> SELECT * FROM User; 19 | +---------+---------+--------------+ 20 | | user_id | name | phone_num | 21 | +---------+---------+--------------+ 22 | | jkog | Jing | 202-555-0176 | 23 | | niceguy | Goodman | 202-555-0174 | 24 | | sanhoo | Sanjay | 202-555-0100 | 25 | | shaw123 | Shaw | 202-555-0111 | 26 | +---------+---------+--------------+ 27 | 4 rows in set (0.00 sec) 28 | ``` 29 | 30 | Every time a user logs in a new row is inserted into the UserHistory table with user_id, current date and action (where action = "logged_on"). 31 | ``` 32 | mysql> SELECT * FROM UserHistory; 33 | +---------+------------+-----------+ 34 | | user_id | date | action | 35 | +---------+------------+-----------+ 36 | | shaw123 | 2019-02-20 | logged_on | 37 | | shaw123 | 2019-03-12 | signed_up | 38 | | sanhoo | 2019-02-27 | logged_on | 39 | | sanhoo | 2019-01-01 | logged_on | 40 | | niceguy | 2019-01-22 | logged_on | 41 | +---------+------------+-----------+ 42 | 5 rows in set (0.00 sec) 43 | ``` 44 | 45 | --- 46 | ### Q1: Find monthly active users. 47 | *Write a SQL query that returns the name, phone number and most recent date for any user that has logged in over the last 30 days (you can tell a user has logged in if the action field in UserHistory is set to "logged_on").* 48 | 49 | ```sql 50 | SET @today := "2019-03-01"; 51 | SELECT 52 | User.name 53 | ,User.phone_num 54 | ,MAX(UserHistory.date) 55 | FROM User, UserHistory 56 | WHERE User.user_id = UserHistory.user_id 57 | AND UserHistory.action = 'logged_on' 58 | AND UserHistory.date >= DATE_SUB(@today, INTERVAL 30 DAY) 59 | GROUP BY User.user_id; 60 | 61 | ``` 62 | ``` 63 | +--------+--------------+-----------------------+ 64 | | name | phone_num | max(UserHistory.date) | 65 | +--------+--------------+-----------------------+ 66 | | Sanjay | 202-555-0100 | 2019-02-27 | 67 | | Shaw | 202-555-0111 | 2019-02-20 | 68 | +--------+--------------+-----------------------+ 69 | 2 rows in set (0.00 sec) 70 | 71 | ``` 72 | 73 | The above solution is only correct when phone_num and name are functionally dependent on user_id. That is, for every user_id, there is a unique phone_num and name, so we can get away with selecting non-aggregated columns (which are also not in group by clause). 74 | 75 | Depending on database engine configuration, an error may be thrown when a selected column is neither aggregated nor in the group by clause. If we are certain that one-on-one mapping exists, we can add a aggregate function to the additional columns (*MAX()* or *MIN()*). 76 | 77 | ```sql 78 | SET @today := "2019-03-01"; 79 | SELECT 80 | MAX(u.name) -- functionally dependent on user_id 81 | ,MAX(u.phone_num) -- functionally dependent on user_id 82 | ,MAX(h.date) AS recent_date 83 | FROM User AS u, UserHistory AS h 84 | WHERE u.user_id = h.user_id 85 | AND h.action = "logged_on" 86 | AND DATEDIFF(@today, h.date) <= 30 -- DATEDIFF(later, earlier) 87 | GROUP BY u.user_id 88 | ORDER BY recent_date; 89 | 90 | ``` 91 | ``` 92 | +-------------+------------------+-------------+ 93 | | MAX(u.name) | MAX(u.phone_num) | recent_date | 94 | +-------------+------------------+-------------+ 95 | | Shaw | 202-555-0111 | 2019-02-20 | 96 | | Sanjay | 202-555-0100 | 2019-02-27 | 97 | +-------------+------------------+-------------+ 98 | 2 rows in set (0.01 sec) 99 | ``` 100 | 101 | Inner join also serves the purpose, and avoid making a cartesian product between two tables as in the cross join above (although query optimizer can take care of such trivial optimization, it's useful to know). 102 | 103 | __Why__ inner join: *user_id* in *UserHistory* table is a foreign key referring to *User* table primary key. Meaning that it is a subset of the primary key column. There may exists users who never logged on, and never appeared in the *UserHistory* table. Since we are interested in monthly active users. It's safe to ignore those inactive users. 104 | 105 | ```sql 106 | -- using inner join 107 | SET @today := "2019-03-01"; 108 | SELECT 109 | MAX(u.name) -- functionally dependent on user_id 110 | ,MAX(u.phone_num) -- functionally dependent on user_id 111 | ,MAX(h.date) AS recent_date 112 | FROM User AS u 113 | JOIN UserHistory AS h 114 | ON u.user_id = h.user_id 115 | WHERE h.action = "logged_on" 116 | AND DATEDIFF(@today, h.date) <= 30 117 | GROUP BY u.user_id 118 | ORDER BY recent_date; 119 | ``` 120 | 121 | If any selected column is __not__ functionally dependent on the group by column, then unpredictable result may be returned, or error may be thrown. To avoid such trouble, only select aggregated columns and group by columns into a temporary tables, and join the temporary table with the original table to retrieve other desired columns. 122 | 123 | --- 124 | ### Q2. Find inactive users 125 | *Write a SQL query to determine which user_ids in the User table are not contained in the UserHistory table (assume the UserHistory table has a subset of the user_ids in User table). Do not use the SQL MINUS statement. Note: the UserHistory table can have multiple entries for each user_id (Note that your SQL should be compatible with MySQL 5.0, and avoid using subqueries)* 126 | 127 | See [here](https://www.programmerinterview.com/index.php/database-sql/practice-interview-question-2-continued/) for a detailed walk-through. However, the solution is not totally correct. We __don't__ need the *DISTINCT* keyword here. Because if a *user_id* has no match in the *UserHistory* table, that row it returned __only once__. 128 | 129 | ```sql 130 | SELECT * 131 | FROM User AS u 132 | LEFT JOIN UserHistory AS h 133 | ON u.user_id = h.user_id 134 | WHERE h.user_id IS NULL; 135 | ``` 136 | ``` 137 | +---------+------+--------------+---------+------+--------+ 138 | | user_id | name | phone_num | user_id | date | action | 139 | +---------+------+--------------+---------+------+--------+ 140 | | jkog | Jing | 202-555-0176 | NULL | NULL | NULL | 141 | +---------+------+--------------+---------+------+--------+ 142 | 1 row in set (0.00 sec) 143 | ``` 144 | 145 | A less efficient approach is to retain valid users in a hashset. Remember that __NOT IN__ requires full traversal of every element in the hashset for a single check (*DISTINCT* keyword turns the set into hash set, but makes no difference on the result). 146 | 147 | ```sql 148 | SELECT * 149 | FROM User 150 | WHERE user_id NOT IN ( 151 | SELECT DISTINCT user_id FROM UserHistory 152 | ); 153 | ``` 154 | ``` 155 | +---------+------+--------------+ 156 | | user_id | name | phone_num | 157 | +---------+------+--------------+ 158 | | jkog | Jing | 202-555-0176 | 159 | +---------+------+--------------+ 160 | 1 row in set (0.00 sec) 161 | ``` 162 | 163 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/03_Monthly_Active_User/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS MAU; 2 | CREATE SCHEMA MAU; 3 | USE MAU; 4 | 5 | 6 | -- -------------------------------------------------------- 7 | 8 | -- 9 | -- Table structure for table `User` 10 | -- 11 | 12 | DROP TABLE IF EXISTS `User`; 13 | CREATE TABLE `User` ( 14 | `user_id` varchar(10) NOT NULL, 15 | `name` varchar(10) NOT NULL, 16 | `phone_num` varchar(12) NOT NULL 17 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 18 | 19 | INSERT INTO `User` (`user_id`, `name`, `phone_num`) VALUES 20 | ('jkog', 'Jing', '202-555-0176'), 21 | ('niceguy', 'Goodman', '202-555-0174'), 22 | ('sanhoo', 'Sanjay', '202-555-0100'), 23 | ('shaw123', 'Shaw', '202-555-0111'); 24 | 25 | ALTER TABLE `User` 26 | ADD PRIMARY KEY (`user_id`); 27 | 28 | 29 | -- -------------------------------------------------------- 30 | 31 | -- 32 | -- Table structure for table `UserHistory` 33 | -- 34 | 35 | DROP TABLE IF EXISTS `UserHistory`; 36 | CREATE TABLE `UserHistory` ( 37 | `user_id` varchar(10) NOT NULL, 38 | `date` date NOT NULL, 39 | `action` varchar(10) NOT NULL 40 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 41 | 42 | INSERT INTO `UserHistory` (`user_id`, `date`, `action`) VALUES 43 | ('sanhoo', '2019-01-01', 'logged_on'), 44 | ('niceguy', '2019-01-22', 'logged_on'), 45 | ('shaw123', '2019-02-20', 'logged_on'), 46 | ('sanhoo', '2019-02-27', 'logged_on'), 47 | ('shaw123', '2019-03-12', 'signed_up'); 48 | 49 | ALTER TABLE `UserHistory` 50 | ADD PRIMARY KEY (`user_id`, `date`); -------------------------------------------------------------------------------- /Interview/03_Monthly_Active_User/solution.sql: -------------------------------------------------------------------------------- 1 | -- Q1: find monthly active users 2 | SET @today := "2019-03-01"; 3 | SELECT 4 | MAX(u.name) -- functionally dependent on user_id 5 | ,MAX(u.phone_num) -- functionally dependent on user_id 6 | ,MAX(h.date) AS recent_date 7 | FROM User AS u 8 | JOIN UserHistory AS h 9 | ON u.user_id = h.user_id 10 | WHERE h.action = "logged_on" 11 | AND DATEDIFF(@today, h.date) <= 30 12 | GROUP BY u.user_id 13 | ORDER BY recent_date; 14 | 15 | -- Q2: find inactive users (all-time) 16 | SELECT * 17 | FROM User AS u 18 | LEFT JOIN UserHistory AS h 19 | ON u.user_id = h.user_id 20 | WHERE h.user_id IS NULL; -------------------------------------------------------------------------------- /Interview/04_Page_Recommendation/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS Recommendation; 2 | CREATE SCHEMA Recommendation; 3 | USE Recommendation; 4 | 5 | -- -------------------------------------------------------- 6 | 7 | -- 8 | -- Table structure for table `Friendship` 9 | -- 10 | 11 | DROP TABLE IF EXISTS `Friendship`; 12 | CREATE TABLE `Friendship` ( 13 | `id` int(11) NOT NULL, 14 | `user_id` varchar(10) DEFAULT NULL, 15 | `friend_id` varchar(10) DEFAULT NULL 16 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 17 | 18 | INSERT INTO `Friendship` (`id`, `user_id`, `friend_id`) VALUES 19 | (1, 'alice', 'bob'), 20 | (2, 'alice', 'charles'), 21 | (3, 'alice', 'david'), 22 | (4, 'bob', 'david'); 23 | 24 | ALTER TABLE `Friendship` 25 | ADD PRIMARY KEY (`id`); 26 | 27 | ALTER TABLE `Friendship` 28 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=5; 29 | 30 | -- -------------------------------------------------------- 31 | 32 | -- 33 | -- Table structure for table `PageFollow` 34 | -- 35 | 36 | DROP TABLE IF EXISTS `PageFollow`; 37 | CREATE TABLE `PageFollow` ( 38 | `id` int(11) NOT NULL, 39 | `user_id` varchar(10) DEFAULT NULL, 40 | `page_id` varchar(10) DEFAULT NULL 41 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 42 | 43 | INSERT INTO `PageFollow` (`id`, `user_id`, `page_id`) VALUES 44 | (1, 'alice', 'google'), 45 | (2, 'bob', 'google'), 46 | (3, 'charles', 'google'), 47 | (4, 'bob', 'linkedin'), 48 | (5, 'charles', 'linkedin'), 49 | (6, 'david', 'linkedin'), 50 | (7, 'david', 'github'), 51 | (8, 'charles', 'github'), 52 | (9, 'alice', 'facebook'), 53 | (10, 'bob', 'facebook'); 54 | 55 | ALTER TABLE `PageFollow` 56 | ADD PRIMARY KEY (`id`); 57 | 58 | ALTER TABLE `PageFollow` 59 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11; 60 | 61 | -------------------------------------------------------------------------------- /Interview/04_Page_Recommendation/solution.sql: -------------------------------------------------------------------------------- 1 | -- MySQL solution 2 | WITH two_way_friendship AS ( 3 | SELECT 4 | user_id 5 | ,friend_id 6 | FROM Friendship 7 | UNION 8 | SELECT 9 | friend_id 10 | ,user_id 11 | FROM Friendship 12 | ) 13 | SELECT 14 | f.user_id 15 | ,p.page_id 16 | ,COUNT(*) AS friends_follower 17 | FROM two_way_friendship AS f 18 | LEFT JOIN PageFollow AS p 19 | ON f.friend_id = p.user_id 20 | WHERE NOT EXISTS ( 21 | SELECT 1 FROM PageFollow AS p2 22 | WHERE f.user_id = p2.user_id 23 | AND p.page_id = p2.page_id 24 | ) 25 | GROUP BY f.user_id, p.page_id 26 | ORDER BY f.user_id ASC, COUNT(*) DESC; 27 | 28 | -- MySQL equivalent solution 29 | WITH two_way_friendship AS( 30 | SELECT 31 | user_id 32 | ,friend_id 33 | FROM Friendship 34 | UNION 35 | SELECT 36 | friend_id 37 | ,user_id 38 | FROM Friendship 39 | ) 40 | SELECT 41 | f.user_id 42 | ,p.page_id 43 | ,COUNT(*) AS friends_follower 44 | FROM two_way_friendship AS f 45 | LEFT JOIN PageFollow AS p 46 | ON f.friend_id = p.user_id 47 | WHERE (f.user_id, p.page_id) NOT IN ( 48 | SELECT user_id, page_id FROM PageFollow 49 | ) 50 | GROUP BY f.user_id, p.page_id 51 | ORDER BY f.user_id ASC, COUNT(*) DESC; -------------------------------------------------------------------------------- /Interview/05_Pivoting_Numeric_Data/Expenses.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Expenses AS 2 | SELECT 3 | CASE type 4 | WHEN "INV" THEN "Invest" 5 | WHEN "LOV" THEN "Family" 6 | WHEN "EDU" THEN "Education" 7 | WHEN "BOO" THEN "Book" 8 | WHEN "STA" THEN "Stationary" 9 | WHEN "PEN" THEN "Pen" 10 | WHEN "FOO" THEN "Food" 11 | WHEN "CLO" THEN "Clothes" 12 | WHEN "SAR" THEN "Sartorial" 13 | WHEN "SHO" THEN "Shoes" 14 | WHEN "ACC" THEN "Accessory" 15 | WHEN "HYG" THEN "Health" 16 | WHEN "FUR" THEN "Home" 17 | WHEN "ELE" THEN "Electronics" 18 | WHEN "COM" THEN "Communication" 19 | WHEN "SOC" THEN "Social" 20 | WHEN "TRA" THEN "Transportation" 21 | WHEN "HOU" THEN "Housing" 22 | WHEN "AUT" THEN "Automobile" 23 | WHEN "ENT" THEN "Entertainment" 24 | WHEN "SOF" THEN "Software" 25 | WHEN "LEG" THEN "Legal" 26 | WHEN "TAX" THEN "Tax" 27 | WHEN "INS" THEN "Insurance" 28 | WHEN "MIS" THEN "Miscellany" 29 | ELSE type 30 | END AS "category" 31 | ,ROUND(abs(priceCNY) / 6.6, 2) AS "cost" 32 | ,time 33 | FROM items 34 | WHERE userID LIKE "shaw%" 35 | AND deleted = 0 36 | AND time BETWEEN '2018-01-01 00:00:00' AND '2019-01-01 00:00:00'; 37 | -------------------------------------------------------------------------------- /Interview/05_Pivoting_Numeric_Data/pivot.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | category 3 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 12 THEN cost ELSE 0 END) AS 'Dec' 4 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 11 THEN cost ELSE 0 END) AS 'Nov' 5 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 10 THEN cost ELSE 0 END) AS 'Oct' 6 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 09 THEN cost ELSE 0 END) AS 'Sep' 7 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 08 THEN cost ELSE 0 END) AS 'Aug' 8 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 07 THEN cost ELSE 0 END) AS 'Jul' 9 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 06 THEN cost ELSE 0 END) AS 'Jun' 10 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 05 THEN cost ELSE 0 END) AS 'May' 11 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 04 THEN cost ELSE 0 END) AS 'Apr' 12 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 03 THEN cost ELSE 0 END) AS 'Mar' 13 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 02 THEN cost ELSE 0 END) AS 'Feb' 14 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 01 THEN cost ELSE 0 END) AS 'Jan' 15 | FROM Expenses 16 | GROUP BY category; 17 | -------------------------------------------------------------------------------- /Interview/06_Pivoting_Text_Data/db.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS Practice; 2 | USE Practice; 3 | 4 | -- --------------------------------------- 5 | 6 | -- 7 | -- Table structure for table `CourseGrade` 8 | -- 9 | 10 | DROP TABLE IF EXISTS `CourseGrade`; 11 | CREATE TABLE `CourseGrade` ( 12 | `name` varchar(10) NOT NULL, 13 | `course` varchar(10) NOT NULL, 14 | `grade` varchar(1) NOT NULL 15 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 16 | 17 | -- 18 | -- Dumping data for table `CourseGrade` 19 | -- 20 | 21 | INSERT INTO `CourseGrade` (`name`, `course`, `grade`) VALUES 22 | ('Alice', 'CS106B', 'A'), 23 | ('Alice', 'CS229', 'A'), 24 | ('Alice', 'CS224N', 'B'), 25 | ('Bob', 'CS106B', 'C'), 26 | ('Bob', 'CS229', 'F'), 27 | ('Bob', 'CS224N', 'F'), 28 | ('Charlie', 'CS106B', 'B'), 29 | ('Charlie', 'CS229', 'B'), 30 | ('Charlie', 'CS224N', 'A'), 31 | ('David', 'CS106B', 'C'), 32 | ('David', 'CS229', 'C'), 33 | ('David', 'CS224N', 'D'), 34 | ('Elsa', 'CS106B', 'B'), 35 | ('Elsa', 'CS229', 'B'), 36 | ('Elsa', 'CS224N', 'A'); 37 | 38 | -- 39 | -- Indexes for dumped tables 40 | -- 41 | 42 | -- 43 | -- Indexes for table `CourseGrade` 44 | -- 45 | ALTER TABLE `CourseGrade` 46 | ADD KEY `name` (`name`); 47 | -------------------------------------------------------------------------------- /Interview/06_Pivoting_Text_Data/pivot.sql: -------------------------------------------------------------------------------- 1 | -- using self join (cross join) 2 | SELECT 3 | t1.name 4 | ,t1.grade AS 'CS106B' 5 | ,t2.grade AS 'CS229' 6 | ,t3.grade AS 'CS224N' 7 | FROM CourseGrade AS t1 8 | ,CourseGrade AS t2 9 | ,CourseGrade AS t3 10 | WHERE t1.name = t2.name 11 | AND t2.name = t3.name 12 | AND t1.course = 'CS106B' 13 | AND t2.course = 'CS229' 14 | AND t3.course = 'CS224N'; 15 | 16 | -- using self join (inner join) 17 | SELECT 18 | t1.name 19 | ,t1.grade AS 'CS106B' 20 | ,t2.grade AS 'CS229' 21 | ,t3.grade AS 'CS224N' 22 | FROM CourseGrade AS t1 23 | JOIN CourseGrade AS t2 24 | ON t1.course = 'CS106B' 25 | AND t2.course = 'CS229' 26 | AND t1.name = t2.name 27 | JOIN CourseGrade AS t3 28 | ON t3.course = 'CS224N' 29 | AND t2.name = t3.name; 30 | 31 | 32 | -- using case statement and aggregation 33 | SELECT 34 | name 35 | ,MAX(CASE WHEN course = 'CS106B' THEN grade ELSE NULL END) AS 'CS106B' 36 | ,MAX(CASE WHEN course = 'CS229' THEN grade ELSE NULL END) AS 'CS229' 37 | ,MAX(CASE WHEN course = 'CS224N' THEN grade ELSE NULL END) AS 'CS224N' 38 | FROM CourseGrade 39 | GROUP BY name; -------------------------------------------------------------------------------- /Interview/07_Unpivoting_Tables/pivot_table.sql: -------------------------------------------------------------------------------- 1 | USE Practice; 2 | 3 | DROP TABLE IF EXISTS course_grade_pivoted; 4 | CREATE TABLE course_grade_pivoted AS 5 | SELECT 6 | name 7 | ,MAX(CASE WHEN course = 'CS106B' THEN grade ELSE NULL END) AS 'CS106B' 8 | ,MAX(CASE WHEN course = 'CS229' THEN grade ELSE NULL END) AS 'CS229' 9 | ,MAX(CASE WHEN course = 'CS224N' THEN grade ELSE NULL END) AS 'CS224N' 10 | FROM CourseGrade 11 | GROUP BY name; 12 | 13 | USE Grocery; 14 | DROP TABLE IF EXISTS expenses_pivoted; 15 | CREATE TABLE expenses_pivoted AS 16 | SELECT 17 | category 18 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 12 THEN cost ELSE 0 END) AS 'Dec_' 19 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 11 THEN cost ELSE 0 END) AS 'Nov' 20 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 10 THEN cost ELSE 0 END) AS 'Oct' 21 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 09 THEN cost ELSE 0 END) AS 'Sep' 22 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 08 THEN cost ELSE 0 END) AS 'Aug' 23 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 07 THEN cost ELSE 0 END) AS 'Jul' 24 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 06 THEN cost ELSE 0 END) AS 'Jun' 25 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 05 THEN cost ELSE 0 END) AS 'May' 26 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 04 THEN cost ELSE 0 END) AS 'Apr' 27 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 03 THEN cost ELSE 0 END) AS 'Mar' 28 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 02 THEN cost ELSE 0 END) AS 'Feb' 29 | ,SUM(CASE WHEN EXTRACT(MONTH FROM time) = 01 THEN cost ELSE 0 END) AS 'Jan' 30 | FROM Expenses 31 | GROUP BY category; 32 | -------------------------------------------------------------------------------- /Interview/07_Unpivoting_Tables/un_pivot_table.sql: -------------------------------------------------------------------------------- 1 | -- un-pivot course_grade table 2 | SELECT 3 | name 4 | ,aux.course 5 | ,CASE aux.course 6 | WHEN 'CS106B' THEN CS106B 7 | WHEN 'CS229' THEN CS229 8 | WHEN 'CS224N' THEN CS224N 9 | END AS grade 10 | FROM course_grade_pivoted, 11 | ( 12 | SELECT 'CS106B' AS course 13 | UNION ALL 14 | SELECT 'CS229' 15 | UNION ALL 16 | SELECT 'CS224N' year 17 | ) aux; 18 | 19 | -- un-pivot expenses table 20 | SELECT 21 | category 22 | ,aux.month 23 | ,CASE aux.month 24 | WHEN 'Jan' THEN Jan 25 | WHEN 'Feb' THEN Feb 26 | WHEN 'Mar' THEN Mar 27 | WHEN 'Apr' THEN Apr 28 | WHEN 'May' THEN May 29 | WHEN 'Jun' THEN Jun 30 | WHEN 'Jul' THEN Jul 31 | WHEN 'Aug' THEN Aug 32 | WHEN 'Sep' THEN Sep 33 | WHEN 'Oct' THEN Oct 34 | WHEN 'Nov' THEN Nov 35 | WHEN 'Dec' THEN Dec_ 36 | END AS month 37 | FROM expenses_pivoted, 38 | ( 39 | SELECT 'Jan' AS month 40 | UNION ALL 41 | SELECT 'Feb' 42 | UNION ALL 43 | SELECT 'Mar' 44 | UNION ALL 45 | SELECT 'Apr' 46 | UNION ALL 47 | SELECT 'May' 48 | UNION ALL 49 | SELECT 'Jun' 50 | UNION ALL 51 | SELECT 'Jul' 52 | UNION ALL 53 | SELECT 'Aug' 54 | UNION ALL 55 | SELECT 'Sep' 56 | UNION ALL 57 | SELECT 'Oct' 58 | UNION ALL 59 | SELECT 'Nov' 60 | UNION ALL 61 | SELECT 'Dec' 62 | ) AS aux 63 | LIMIT 24; -------------------------------------------------------------------------------- /Interview/08_Group_by_Bins/README.md: -------------------------------------------------------------------------------- 1 | # Group by Bins 2 | 3 | One common task is to put rows into bins, meaning that there does not exist a conveninet column for us to GROUP BY. In this notebook, we will use the [*classicmodels* database](../../databases/classicmodels/mysqlsampledatabase.sql). Load the database: 4 | 5 | ```bash 6 | mysql < mysqlsampledatabase.sql -uroot -p 7 | ``` 8 | 9 | It contains a *customers* table. We will put customers into different class of credit limit. 10 | 11 | * 0~50,000 12 | * 50,001~100,000 13 | * over 100,000 14 | 15 | ___ 16 | 17 | ### Step 1. Create Column to Group By 18 | 19 | ```sql 20 | SELECT 21 | customerName 22 | ,CASE 23 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k' 24 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k' 25 | ELSE '> 100k' 26 | END AS credit_range 27 | FROM customers 28 | LIMIT 5; 29 | ``` 30 | ``` 31 | +----------------------------+--------------+ 32 | | customerName | credit_range | 33 | +----------------------------+--------------+ 34 | | Atelier graphique | 0 ~ 50k | 35 | | Signal Gift Stores | 50 ~ 100k | 36 | | Australian Collectors, Co. | > 100k | 37 | | La Rochelle Gifts | > 100k | 38 | | Baane Mini Imports | 50 ~ 100k | 39 | +----------------------------+--------------+ 40 | ``` 41 | 42 | ### Step 2. Group by Bin 43 | ```sql 44 | SELECT 45 | CASE 46 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k' 47 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k' 48 | ELSE '> 100k' 49 | END AS credit_range 50 | ,COUNT(*) AS customer_tally 51 | FROM customers 52 | GROUP BY credit_range; 53 | ``` 54 | ``` 55 | +--------------+----------------+ 56 | | credit_range | customer_tally | 57 | +--------------+----------------+ 58 | | 0 ~ 50k | 37 | 59 | | 50 ~ 100k | 60 | 60 | | > 100k | 25 | 61 | +--------------+----------------+ 62 | 3 rows in set (0.00 sec) 63 | ``` 64 | 65 | --- 66 | ### Parting thought 67 | This type of question can easily generalize to a broad range of topics. For example, what is the distribution of user's age, what is the distribution of revenue from each online order? 68 | 69 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/08_Group_by_Bins/solution.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | CASE 3 | WHEN creditLimit BETWEEN 0 AND 50000 THEN '0 ~ 50k' 4 | WHEN creditLimit BETWEEN 50001 AND 100000 THEN '50 ~ 100k' 5 | ELSE '> 100k' 6 | END AS credit_range 7 | ,COUNT(*) AS customer_tally 8 | FROM customers 9 | GROUP BY credit_range; -------------------------------------------------------------------------------- /Interview/09_Consecutive_Active_Users/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS WAU; 2 | CREATE SCHEMA WAU; 3 | USE WAU; 4 | 5 | CREATE TABLE Login ( 6 | id INT(11) NOT NULL 7 | ,user_id int(11) 8 | ,ts DATE 9 | ); 10 | 11 | ALTER TABLE Login 12 | ADD PRIMARY KEY (id); 13 | 14 | ALTER TABLE Login 15 | MODIFY id int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=1; 16 | 17 | INSERT INTO Login (user_id, ts) VALUES 18 | (1, "2019-02-14"), 19 | (1, "2019-02-13"), 20 | (1, "2019-02-12"), 21 | (1, "2019-02-11"), 22 | (2, "2019-02-14"), 23 | (2, "2019-02-12"), 24 | (2, "2019-02-11"), 25 | (2, "2019-02-10"), 26 | (3, "2019-02-14"), 27 | (3, "2019-02-12"); 28 | 29 | INSERT INTO Login (user_id, ts) VALUES 30 | (4, "2019-02-09"), 31 | (4, "2019-02-08"), 32 | (4, "2019-02-08"), 33 | (4, "2019-02-07"); 34 | -------------------------------------------------------------------------------- /Interview/09_Consecutive_Active_Users/solution.sql: -------------------------------------------------------------------------------- 1 | -- self join method 2 | SET @now = "2019-02-14"; 3 | WITH tmp AS ( 4 | SELECT DISTINCT user_id, ts FROM Login) 5 | SELECT * 6 | FROM tmp AS d0 7 | JOIN tmp AS d1 8 | ON d0.ts = @now 9 | AND DATEDIFF(@now, d1.ts) = 1 10 | AND d0.user_id = d1.user_id 11 | JOIN tmp AS d2 12 | ON DATEDIFF(@now, d2.ts) = 2 13 | AND d2.user_id = d1.user_id; 14 | 15 | -- window method 16 | SET @now = "2019-02-14"; 17 | WITH tmp AS ( 18 | SELECT 19 | user_id 20 | ,ts 21 | ,DATEDIFF(@now, LAG(ts, 1) OVER w) AS day_from_pre1 22 | ,DATEDIFF(@now, LAG(ts, 2) OVER w) AS day_from_pre2 23 | FROM Login 24 | WINDOW w AS (PARTITION BY user_id ORDER BY ts) 25 | ) 26 | SELECT user_id 27 | FROM tmp 28 | WHERE ts = @now 29 | AND day_from_pre1 = 1 30 | AND day_from_pre2 = 2; -------------------------------------------------------------------------------- /Interview/10_Spotify_Recommend_Friend/db.sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.7.7 3 | -- https://www.phpmyadmin.net/ 4 | -- 5 | -- Host: localhost:8889 6 | -- Generation Time: Mar 18, 2019 at 01:41 AM 7 | -- Server version: 5.6.38 8 | -- PHP Version: 7.2.1 9 | 10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 11 | SET time_zone = "+00:00"; 12 | 13 | -- 14 | -- Database: `SpotifyFriend` 15 | -- 16 | CREATE DATABASE IF NOT EXISTS `SpotifyFriend` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; 17 | USE `SpotifyFriend`; 18 | 19 | -- -------------------------------------------------------- 20 | 21 | -- 22 | -- Table structure for table `Song` 23 | -- 24 | 25 | DROP TABLE IF EXISTS `Song`; 26 | CREATE TABLE `Song` ( 27 | `id` int(11) NOT NULL, 28 | `user_id` varchar(10) NOT NULL, 29 | `song` varchar(20) NOT NULL, 30 | `ts` date NOT NULL 31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 32 | 33 | -- 34 | -- Dumping data for table `Song` 35 | -- 36 | 37 | INSERT INTO `Song` (`id`, `user_id`, `song`, `ts`) VALUES 38 | (1, 'Alex', 'Kiroro', '2019-03-17'), 39 | (2, 'Alex', 'Shape of My Heart', '2019-03-17'), 40 | (3, 'Alex', 'Clair de Lune', '2019-03-17'), 41 | (4, 'Alex', 'The Fall', '2019-03-17'), 42 | (5, 'Alex', 'Forever Young', '2019-03-17'), 43 | (6, 'Bill', 'Shape of My Heart', '2019-03-17'), 44 | (7, 'Bill', 'Clair de Lune', '2019-03-17'), 45 | (8, 'Bill', 'The Fall', '2019-03-17'), 46 | (9, 'Bill', 'Forever Young', '2019-03-17'), 47 | (10, 'Bill', 'My Love', '2019-03-14'), 48 | (11, 'Alex', 'Kiroro', '2019-03-17'), 49 | (12, 'Alex', 'Shape of My Heart', '2019-03-17'), 50 | (13, 'Alex', 'Shape of My Heart', '2019-03-17'), 51 | (14, 'Bill', 'Shape of My Heart', '2019-03-17'), 52 | (15, 'Bill', 'Shape of My Heart', '2019-03-17'), 53 | (16, 'Bill', 'Shape of My Heart', '2019-03-17'), 54 | (17, 'Cindy', 'Kiroro', '2019-03-17'), 55 | (18, 'Cindy', 'Clair de Lune', '2019-03-17'), 56 | (19, 'Cindy', 'My Love', '2019-03-14'), 57 | (20, 'Cindy', 'Clair de Lune', '2019-03-14'), 58 | (21, 'Cindy', 'Lemon Tree', '2019-03-14'), 59 | (22, 'Cindy', 'Mad World', '2019-03-14'), 60 | (23, 'Bill', 'Lemon Tree', '2019-03-14'), 61 | (24, 'Bill', 'Mad World', '2019-03-14'), 62 | (25, 'Bill', 'My Love', '2019-03-14'); 63 | 64 | -- -------------------------------------------------------- 65 | 66 | -- 67 | -- Table structure for table `User` 68 | -- 69 | 70 | DROP TABLE IF EXISTS `User`; 71 | CREATE TABLE `User` ( 72 | `id` int(11) NOT NULL, 73 | `user_id` varchar(10) NOT NULL, 74 | `friend_id` varchar(10) NOT NULL 75 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 76 | 77 | -- 78 | -- Dumping data for table `User` 79 | -- 80 | 81 | INSERT INTO `User` (`id`, `user_id`, `friend_id`) VALUES 82 | (1, 'Cindy', 'Bill'); 83 | 84 | -- 85 | -- Indexes for dumped tables 86 | -- 87 | 88 | -- 89 | -- Indexes for table `Song` 90 | -- 91 | ALTER TABLE `Song` 92 | ADD PRIMARY KEY (`id`); 93 | 94 | -- 95 | -- Indexes for table `User` 96 | -- 97 | ALTER TABLE `User` 98 | ADD PRIMARY KEY (`id`); 99 | 100 | -- 101 | -- AUTO_INCREMENT for dumped tables 102 | -- 103 | 104 | -- 105 | -- AUTO_INCREMENT for table `Song` 106 | -- 107 | ALTER TABLE `Song` 108 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=26; 109 | 110 | -- 111 | -- AUTO_INCREMENT for table `User` 112 | -- 113 | ALTER TABLE `User` 114 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=2; 115 | -------------------------------------------------------------------------------- /Interview/10_Spotify_Recommend_Friend/fig/symmetry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/Interview/10_Spotify_Recommend_Friend/fig/symmetry.png -------------------------------------------------------------------------------- /Interview/10_Spotify_Recommend_Friend/solution.sql: -------------------------------------------------------------------------------- 1 | -- full solution 2 | SELECT 3 | s1.user_id 4 | ,s2.user_id AS recommended 5 | FROM Song AS s1 6 | JOIN Song AS s2 7 | ON s1.user_id != s2.user_id 8 | AND s1.ts = s2.ts 9 | AND s1.song = s2.song 10 | WHERE (s1.user_id, s2.user_id) NOT IN ( 11 | SELECT user_id, friend_id FROM User 12 | UNION 13 | SELECT friend_id, user_id FROM User 14 | ) 15 | GROUP BY s1.user_id, s2.user_id, s2.ts 16 | HAVING COUNT(DISTINCT s2.song) >= 3; -------------------------------------------------------------------------------- /Interview/11_Spotify_Similar_Friends/solution.sql: -------------------------------------------------------------------------------- 1 | -- MySQL 2 | SELECT DISTINCT 3 | u.user_id 4 | ,u.friend_id 5 | FROM User AS u 6 | JOIN Song AS s1 7 | ON u.user_id = s1.user_id 8 | JOIN Song AS s2 9 | ON u.friend_id = s2.user_id 10 | WHERE s1.ts = s2.ts 11 | AND s1.song = s2.song 12 | GROUP BY s1.ts, u.user_id, u.friend_id 13 | HAVING COUNT(DISTINCT s1.song) >= 3; 14 | 15 | -- cross join 16 | SELECT DISTINCT 17 | u.user_id, u.friend_id 18 | FROM 19 | User u, Song s1, Song s2 20 | WHERE 21 | u.user_id = s1.user_id 22 | AND u.friend_id = s2.user_id 23 | AND s1.song = s2.song 24 | AND s1.ts = s2.ts 25 | GROUP BY 1, 2, s1.ts 26 | HAVING COUNT(DISTINCT s1.song) >= 3; 27 | 28 | -- prefiltering pipeline 29 | WITH active_user_id AS ( 30 | SELECT 31 | u.user_id 32 | ,s.ts 33 | ,COUNT(*) AS song_tally 34 | FROM user AS u 35 | JOIN song AS s 36 | ON u.user_id = s.user_id 37 | GROUP BY user_id, ts 38 | HAVING COUNT(*) >= 3 39 | ) 40 | ,active_friend_id AS ( 41 | SELECT 42 | u.friend_id 43 | ,s.ts 44 | ,COUNT(*) AS song_tally 45 | FROM user AS u 46 | JOIN song AS s 47 | ON u.friend_id = s.user_id 48 | GROUP BY friend_id, ts 49 | HAVING COUNT(*) >= 3 50 | ) 51 | ,possible_match AS ( 52 | SELECT 53 | u.user_id 54 | ,f.friend_id 55 | ,f.ts 56 | FROM active_user_id AS u 57 | JOIN active_friend_id AS f 58 | ON u.ts = f.ts 59 | ) 60 | SELECT DISTINCT 61 | p.user_id 62 | ,p.friend_id 63 | FROM possible_match AS p 64 | JOIN song AS s1 65 | ON p.ts = s1.ts 66 | AND p.user_id = s1.user_id 67 | JOIN song AS s2 68 | ON p.ts = s2.ts 69 | AND p.friend_id = s2.user_id 70 | WHERE s2.song = s1.song 71 | GROUP BY p.user_id, p.friend_id, s1.ts 72 | HAVING COUNT(DISTINCT s1.song) >= 3; -------------------------------------------------------------------------------- /Interview/12_Invalid_Search/db.sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.7.7 3 | -- https://www.phpmyadmin.net/ 4 | -- 5 | -- Host: localhost:8889 6 | -- Generation Time: Mar 18, 2019 at 09:23 AM 7 | -- Server version: 5.6.38 8 | -- PHP Version: 7.2.1 9 | 10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 11 | SET time_zone = "+00:00"; 12 | 13 | -- 14 | -- Database: `Search` 15 | -- 16 | CREATE DATABASE IF NOT EXISTS `Search` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; 17 | USE `Search`; 18 | 19 | -- -------------------------------------------------------- 20 | 21 | -- 22 | -- Table structure for table `SearchCategory` 23 | -- 24 | 25 | DROP TABLE IF EXISTS `SearchCategory`; 26 | CREATE TABLE `SearchCategory` ( 27 | `country` varchar(10) NOT NULL, 28 | `search_cat` varchar(10) NOT NULL, 29 | `num_search` int(10) DEFAULT NULL, 30 | `zero_result_pct` decimal(10,0) DEFAULT NULL 31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 32 | 33 | -- 34 | -- Dumping data for table `SearchCategory` 35 | -- 36 | 37 | INSERT INTO `SearchCategory` (`country`, `search_cat`, `num_search`, `zero_result_pct`) VALUES 38 | ('CN', 'dog', 9700000, NULL), 39 | ('CN', 'home', 1200000, '13'), 40 | ('CN', 'tax', 1200, '99'), 41 | ('CN', 'travel', 980000, '11'), 42 | ('UAE', 'home', NULL, NULL), 43 | ('UAE', 'travel', NULL, NULL), 44 | ('UK', 'home', NULL, NULL), 45 | ('UK', 'tax', 98000, '1'), 46 | ('UK', 'travel', 100000, '3'), 47 | ('US', 'home', 190000, '1'), 48 | ('US', 'tax', 12000, NULL), 49 | ('US', 'travel', 9500, '3'); 50 | 51 | -- 52 | -- Indexes for dumped tables 53 | -- 54 | 55 | -- 56 | -- Indexes for table `SearchCategory` 57 | -- 58 | ALTER TABLE `SearchCategory` 59 | ADD PRIMARY KEY (`country`,`search_cat`), 60 | ADD KEY `id` (`country`,`search_cat`); 61 | -------------------------------------------------------------------------------- /Interview/12_Invalid_Search/solution.sql: -------------------------------------------------------------------------------- 1 | -- MySQL 2 | SELECT 3 | country, 4 | num_search, 5 | ROUND(sum_zero_result / num_search, 2) AS zero_result_pct 6 | FROM ( 7 | SELECT 8 | country 9 | ,SUM(CASE WHEN zero_result_pct IS NOT NULL THEN num_search ELSE NULL END) AS num_search 10 | ,SUM(num_search * zero_result_pct) AS sum_zero_result 11 | FROM SearchCategory 12 | GROUP BY country) AS a; 13 | 14 | -- MySQL8 15 | WITH tmp AS ( 16 | SELECT 17 | country 18 | ,SUM(CASE WHEN zero_result_pct IS NOT NULL THEN num_search ELSE NULL END) AS num_search 19 | ,SUM(num_search * zero_result_pct) AS sum_zero_result 20 | FROM SearchCategory 21 | GROUP BY country 22 | ) 23 | SELECT 24 | country, 25 | num_search, 26 | ROUND(sum_zero_result / num_search, 2) AS zero_result_pct 27 | FROM tmp; -------------------------------------------------------------------------------- /Interview/13_Text_Confirmation/db.sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.7.7 3 | -- https://www.phpmyadmin.net/ 4 | -- 5 | -- Host: localhost:8889 6 | -- Generation Time: Mar 18, 2019 at 10:53 AM 7 | -- Server version: 5.6.38 8 | -- PHP Version: 7.2.1 9 | 10 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 11 | SET time_zone = "+00:00"; 12 | 13 | -- 14 | -- Database: `Email` 15 | -- 16 | CREATE DATABASE IF NOT EXISTS `Email` DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci; 17 | USE `Email`; 18 | 19 | -- -------------------------------------------------------- 20 | 21 | -- 22 | -- Table structure for table `Email` 23 | -- 24 | 25 | DROP TABLE IF EXISTS `Email`; 26 | CREATE TABLE `Email` ( 27 | `ts` datetime NOT NULL, 28 | `user_id` varchar(10) NOT NULL, 29 | `email` varchar(20) NOT NULL 30 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 31 | 32 | -- 33 | -- Dumping data for table `Email` 34 | -- 35 | 36 | INSERT INTO `Email` (`ts`, `user_id`, `email`) VALUES 37 | ('2019-03-13 00:00:00', 'neo', 'anderson@matrix.com'), 38 | ('2019-03-17 12:15:00', 'Ross', 'ross@126.com'), 39 | ('2019-03-18 05:37:00', 'ali', 'ali@hotmail.com'), 40 | ('2019-03-18 06:00:00', 'shaw', 'shawlu95@gmail.com'); 41 | 42 | -- -------------------------------------------------------- 43 | 44 | -- 45 | -- Table structure for table `Text` 46 | -- 47 | 48 | DROP TABLE IF EXISTS `Text`; 49 | CREATE TABLE `Text` ( 50 | `id` int(11) NOT NULL, 51 | `ts` datetime NOT NULL, 52 | `user_id` varchar(10) NOT NULL, 53 | `action` enum('CONFIRMED') DEFAULT NULL 54 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 55 | 56 | -- 57 | -- Dumping data for table `Text` 58 | -- 59 | 60 | INSERT INTO `Text` (`id`, `ts`, `user_id`, `action`) VALUES 61 | (1, '2019-03-17 12:15:00', 'Ross', 'CONFIRMED'), 62 | (2, '2019-03-18 05:37:00', 'Ali', NULL), 63 | (3, '2019-03-18 14:00:00', 'Ali', 'CONFIRMED'), 64 | (4, '2019-03-18 06:00:00', 'shaw', NULL), 65 | (5, '2019-03-19 00:00:00', 'shaw', 'CONFIRMED'); 66 | 67 | -- 68 | -- Indexes for dumped tables 69 | -- 70 | 71 | -- 72 | -- Indexes for table `Email` 73 | -- 74 | ALTER TABLE `Email` 75 | ADD PRIMARY KEY (`ts`); 76 | 77 | -- 78 | -- Indexes for table `Text` 79 | -- 80 | ALTER TABLE `Text` 81 | ADD PRIMARY KEY (`id`); 82 | 83 | -- 84 | -- AUTO_INCREMENT for dumped tables 85 | -- 86 | 87 | -- 88 | -- AUTO_INCREMENT for table `Text` 89 | -- 90 | ALTER TABLE `Text` 91 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=6; 92 | -------------------------------------------------------------------------------- /Interview/13_Text_Confirmation/solution.sql: -------------------------------------------------------------------------------- 1 | -- Q1 2 | SELECT 3 | CAST(ts AS DATE) AS dt 4 | ,COUNT(*) AS signups 5 | FROM Email 6 | GROUP BY dt; 7 | 8 | -- Q2: one line 9 | SELECT ROUND((SELECT SUM(action = 'CONFIRMED') FROM text) / (SELECT COUNT(*) FROM Email), 2) AS rate; 10 | 11 | -- Q2: Window 12 | WITH confirmed AS ( 13 | SELECT user_id, action 14 | FROM text WHERE (user_id, ts) IN (SELECT user_id, MAX(ts) FROM text GROUP BY user_id) 15 | ) 16 | SELECT 17 | ROUND(SUM(c.action IS NOT NULL) / COUNT(DISTINCT e.user_id), 2) AS rate 18 | FROM Email AS e 19 | LEFT JOIN confirmed AS c 20 | ON e.user_id = c.user_id; 21 | 22 | -- Q2: plain 23 | SELECT 24 | ROUND(SUM(c.action IS NOT NULL) / COUNT(DISTINCT e.user_id), 2) AS rate 25 | FROM Email AS e 26 | LEFT JOIN ( 27 | SELECT user_id, action 28 | FROM text WHERE (user_id, ts) IN ( 29 | SELECT user_id, MAX(ts) FROM text GROUP BY user_id 30 | ) 31 | ) AS c 32 | ON e.user_id = c.user_id; 33 | 34 | -- Q3 35 | SELECT 36 | e.user_id 37 | FROM Email AS e 38 | JOIN Text AS t 39 | ON e.user_id = t.user_id 40 | AND DATEDIFF(t.ts, e.ts) = 1 41 | WHERE t.action = 'CONFIRMED'; 42 | 43 | -- for reference 44 | SELECT 45 | user_id 46 | ,ts 47 | ,action 48 | ,LEAD(ts, 1) OVER (PARTITION BY user_id ORDER BY ts) AS next_ts 49 | ,LEAD(action, 1) OVER (PARTITION BY user_id ORDER BY ts) AS next_action 50 | ,TIMEDIFF(LEAD(ts, 1) OVER (PARTITION BY user_id ORDER BY ts), ts) AS time_diff 51 | FROM Text 52 | ON e.user_id = t.user_id -------------------------------------------------------------------------------- /Interview/14_Facebook_Common_Friend/README.md: -------------------------------------------------------------------------------- 1 | # Common Friends 2 | > Given a friendship table, find for each user his friends with whom they have at least 3 friends in common. Rank friends by the number of common friend. The table record one-way relationship: sender of friend request and recipient of friend request. 3 | 4 | ___ 5 | ### Load Data 6 | Load the database file [db.sql](db.sql) to localhost MySQL. A Facebook database will be created with one Friendship table. 7 | ```bash 8 | mysql < db.sql -uroot -p 9 | ``` 10 | 11 | ```sql 12 | select * from friendship limit 5; 13 | ``` 14 | ``` 15 | +----+---------+-----------+ 16 | | id | user_id | friend_id | 17 | +----+---------+-----------+ 18 | | 1 | alice | bob | 19 | | 2 | alice | charles | 20 | | 3 | alice | david | 21 | | 4 | alice | mary | 22 | | 5 | bob | david | 23 | +----+---------+-----------+ 24 | 5 rows in set (0.00 sec) 25 | ``` 26 | 27 | ___ 28 | ### Observation 29 | Here we are given directed edge. So the first step is to account for both direction (friendship is mutual). Next, for each user_id pair, join both parties with their common friends (excluding each other). Finally, group by the user_id pair and output pair with at least three matches. 30 | 31 | ___ 32 | ### Step 1. Accounting for Undirected Edge 33 | We use a UNION all clause. Because there is no duplicate, we do not use UNION. 34 | ```sql 35 | SELECT user_id, friend_id FROM Friendship 36 | WHERE user_id = 'alice' 37 | UNION ALL 38 | SELECT friend_id, user_id FROM Friendship 39 | WHERE user_id = 'alice' 40 | LIMIT 10; 41 | ``` 42 | ``` 43 | +---------+-----------+ 44 | | user_id | friend_id | 45 | +---------+-----------+ 46 | | alice | bob | 47 | | alice | charles | 48 | | alice | david | 49 | | alice | mary | 50 | | bob | alice | 51 | | charles | alice | 52 | | david | alice | 53 | | mary | alice | 54 | +---------+-----------+ 55 | 8 rows in set (0.00 sec) 56 | ``` 57 | 58 | ___ 59 | ### Step 2. Expand Two-way 60 | Join the union result twice, to find friends for each user_id. Filter the results to include common friend only. Note that it is __impossible__ to list one of the user himself as the common friend, because the *bf.friend_id = af.friend_id* will not be satisfied. 61 | 62 | ```sql 63 | WITH tmp AS ( 64 | SELECT user_id, friend_id FROM Friendship 65 | UNION ALL 66 | SELECT friend_id, user_id FROM Friendship 67 | ) 68 | SELECT 69 | ab.user_id AS a 70 | ,ab.friend_id AS b 71 | ,af.friend_id AS a_friend 72 | ,bf.friend_id AS b_friend 73 | FROM tmp AS ab 74 | JOIN tmp AS af 75 | ON ab.user_id = af.user_id 76 | JOIN tmp AS bf 77 | ON ab.friend_id = bf.user_id 78 | AND bf.friend_id = af.friend_id 79 | ORDER BY a, b, a_friend, b_friend; 80 | ``` 81 | ``` 82 | +---------+---------+----------+----------+ 83 | | a | b | a_friend | b_friend | 84 | +---------+---------+----------+----------+ 85 | | alice | bob | charles | charles | 86 | | alice | bob | david | david | 87 | | alice | bob | mary | mary | 88 | | alice | charles | bob | bob | 89 | | alice | david | bob | bob | 90 | | alice | mary | bob | bob | 91 | | bob | alice | charles | charles | 92 | | bob | alice | david | david | 93 | | bob | alice | mary | mary | 94 | | bob | charles | alice | alice | 95 | | bob | david | alice | alice | 96 | | bob | mary | alice | alice | 97 | | charles | alice | bob | bob | 98 | | charles | bob | alice | alice | 99 | | david | alice | bob | bob | 100 | | david | bob | alice | alice | 101 | | mary | alice | bob | bob | 102 | | mary | bob | alice | alice | 103 | +---------+---------+----------+----------+ 104 | 18 rows in set (0.00 sec) 105 | ``` 106 | 107 | ___ 108 | ### Step 3. Aggregation 109 | Group by the user_id pair and count the number of common friends. Because we've counted both way, each eligible pair will have a counterpart with the opposite direction. 110 | 111 | ```sql 112 | WITH tmp AS ( 113 | SELECT user_id, friend_id FROM Friendship 114 | UNION ALL 115 | SELECT friend_id, user_id FROM Friendship 116 | ) 117 | SELECT 118 | ab.user_id 119 | ,ab.friend_id 120 | ,COUNT(*) AS common_friend 121 | FROM tmp AS ab 122 | JOIN tmp AS af 123 | ON ab.user_id = af.user_id 124 | JOIN tmp AS bf 125 | ON ab.friend_id = bf.user_id 126 | AND bf.friend_id = af.friend_id 127 | GROUP BY ab.user_id, ab.friend_id 128 | HAVING common_friend >= 3 129 | ORDER BY common_friend DESC; 130 | ``` 131 | ``` 132 | +---------+-----------+---------------+ 133 | | user_id | friend_id | common_friend | 134 | +---------+-----------+---------------+ 135 | | bob | alice | 3 | 136 | | alice | bob | 3 | 137 | +---------+-----------+---------------+ 138 | 2 rows in set (0.00 sec) 139 | ``` 140 | 141 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/14_Facebook_Common_Friend/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS Facebook; 2 | CREATE SCHEMA Facebook; 3 | USE Facebook; 4 | 5 | -- -------------------------------------------------------- 6 | 7 | -- 8 | -- Table structure for table `Friendship` 9 | -- 10 | 11 | DROP TABLE IF EXISTS `Friendship`; 12 | CREATE TABLE `Friendship` ( 13 | `id` int(11) NOT NULL, 14 | `user_id` varchar(10) DEFAULT NULL, 15 | `friend_id` varchar(10) DEFAULT NULL 16 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 17 | 18 | INSERT INTO `Friendship` (`id`, `user_id`, `friend_id`) VALUES 19 | (1, 'alice', 'bob'), 20 | (2, 'alice', 'charles'), 21 | (3, 'alice', 'david'), 22 | (4, 'alice', 'mary'), 23 | (5, 'bob', 'david'), 24 | (6, 'bob', 'charles'), 25 | (7, 'bob', 'mary'), 26 | (8, 'david', 'sonny'), 27 | (9, 'charles', 'sonny'), 28 | (10, 'bob', 'sonny'); 29 | 30 | ALTER TABLE `Friendship` 31 | ADD PRIMARY KEY (`id`); 32 | 33 | ALTER TABLE `Friendship` 34 | MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=11; -------------------------------------------------------------------------------- /Interview/14_Facebook_Common_Friend/solution.sql: -------------------------------------------------------------------------------- 1 | WITH tmp AS ( 2 | SELECT user_id, friend_id FROM Friendship 3 | UNION ALL 4 | SELECT friend_id, user_id FROM Friendship 5 | ) 6 | SELECT 7 | ab.user_id 8 | ,ab.friend_id 9 | ,COUNT(*) AS common_friend 10 | FROM tmp AS ab 11 | JOIN tmp AS af 12 | ON ab.user_id = af.user_id 13 | JOIN tmp AS bf 14 | ON ab.friend_id = bf.user_id 15 | AND bf.friend_id = af.friend_id 16 | GROUP BY ab.user_id, ab.friend_id 17 | HAVING common_friend >= 3 18 | ORDER BY common_friend DESC; -------------------------------------------------------------------------------- /Interview/15_Facebook_People_You_May_Know/README.md: -------------------------------------------------------------------------------- 1 | # People You May Know 2 | > Based on the number of common friends, recommend users to be friend. 3 | 4 | ___ 5 | ### Observation 6 | This is a slight twist on the previous problem. Instead of selecting from existing friendships, we are proposing new ones. We'll use the same table as before. 7 | 8 | ___ 9 | ### Step 1. Find Candidate Pairs 10 | This can be done with a cross join. Obviously we want to avoid matching a user with himself. 11 | 12 | ```sql 13 | WITH tmp AS ( 14 | SELECT user_id, friend_id FROM Friendship 15 | UNION ALL 16 | SELECT friend_id, user_id FROM Friendship 17 | ) 18 | 19 | SELECT * FROM 20 | (SELECT DISTINCT user_id FROM tmp) AS a 21 | CROSS JOIN 22 | (SELECT DISTINCT user_id FROM tmp) AS b 23 | ON a.user_id != b.user_id; 24 | ``` 25 | ``` 26 | +---------+---------+ 27 | | user_id | user_id | 28 | +---------+---------+ 29 | | bob | alice | 30 | | david | alice | 31 | | charles | alice | 32 | | mary | alice | 33 | | sonny | alice | 34 | | alice | bob | 35 | | david | bob | 36 | | charles | bob | 37 | | mary | bob | 38 | | sonny | bob | 39 | | alice | david | 40 | | bob | david | 41 | | charles | david | 42 | | mary | david | 43 | | sonny | david | 44 | | alice | charles | 45 | | bob | charles | 46 | | david | charles | 47 | | mary | charles | 48 | | sonny | charles | 49 | | alice | mary | 50 | | bob | mary | 51 | | david | mary | 52 | | charles | mary | 53 | | sonny | mary | 54 | | alice | sonny | 55 | | bob | sonny | 56 | | david | sonny | 57 | | charles | sonny | 58 | | mary | sonny | 59 | +---------+---------+ 60 | 30 rows in set (0.00 sec) 61 | ``` 62 | 63 | Also, we don't want to recommend people who are already friends. 64 | 65 | ```sql 66 | WITH tmp AS ( 67 | SELECT user_id, friend_id FROM Friendship 68 | UNION ALL 69 | SELECT friend_id, user_id FROM Friendship 70 | ) 71 | 72 | SELECT * FROM 73 | (SELECT DISTINCT user_id FROM tmp) AS a 74 | CROSS JOIN 75 | (SELECT DISTINCT user_id FROM tmp) AS b 76 | ON a.user_id != b.user_id 77 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp); 78 | ``` 79 | ``` 80 | +---------+---------+ 81 | | user_id | user_id | 82 | +---------+---------+ 83 | | sonny | alice | 84 | | charles | david | 85 | | mary | david | 86 | | david | charles | 87 | | mary | charles | 88 | | david | mary | 89 | | charles | mary | 90 | | sonny | mary | 91 | | alice | sonny | 92 | | mary | sonny | 93 | +---------+---------+ 94 | 10 rows in set (0.00 sec) 95 | ``` 96 | 97 | ___ 98 | ### Step 2. Expand Two-way 99 | The rest is the same as the previous problem. 100 | ```sql 101 | WITH tmp AS ( 102 | SELECT user_id, friend_id FROM Friendship 103 | UNION ALL 104 | SELECT friend_id, user_id FROM Friendship 105 | ) 106 | 107 | SELECT 108 | a.user_id 109 | ,b.user_id 110 | ,af.friend_id AS common_friend 111 | FROM 112 | (SELECT DISTINCT user_id FROM tmp) AS a 113 | CROSS JOIN 114 | (SELECT DISTINCT user_id FROM tmp) AS b 115 | ON a.user_id != b.user_id 116 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp) 117 | JOIN tmp AS af 118 | ON a.user_id = af.user_id 119 | JOIN tmp AS bf 120 | ON b.user_id = bf.user_id 121 | AND bf.friend_id = af.friend_id; 122 | ``` 123 | ``` 124 | +---------+---------+---------------+ 125 | | user_id | user_id | common_friend | 126 | +---------+---------+---------------+ 127 | | alice | sonny | bob | 128 | | alice | sonny | charles | 129 | | alice | sonny | david | 130 | | david | charles | sonny | 131 | | charles | david | sonny | 132 | | charles | david | alice | 133 | | charles | mary | alice | 134 | | david | charles | alice | 135 | | david | mary | alice | 136 | | mary | charles | alice | 137 | | mary | david | alice | 138 | | david | charles | bob | 139 | | david | mary | bob | 140 | | charles | david | bob | 141 | | charles | mary | bob | 142 | | mary | david | bob | 143 | | mary | charles | bob | 144 | | mary | sonny | bob | 145 | | sonny | alice | david | 146 | | sonny | alice | charles | 147 | | sonny | alice | bob | 148 | | sonny | mary | bob | 149 | +---------+---------+---------------+ 150 | 22 rows in set (0.01 sec) 151 | ``` 152 | 153 | ___ 154 | ### Step 3. Aggregation 155 | Group by the user_id pair and count the number of common friends. 156 | 157 | ```sql 158 | WITH tmp AS ( 159 | SELECT user_id, friend_id FROM Friendship 160 | UNION ALL 161 | SELECT friend_id, user_id FROM Friendship 162 | ) 163 | 164 | SELECT 165 | a.user_id 166 | ,b.user_id 167 | ,COUNT(*) AS common_friend 168 | FROM 169 | (SELECT DISTINCT user_id FROM tmp) AS a 170 | CROSS JOIN 171 | (SELECT DISTINCT user_id FROM tmp) AS b 172 | ON a.user_id != b.user_id 173 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp) 174 | JOIN tmp AS af 175 | ON a.user_id = af.user_id 176 | JOIN tmp AS bf 177 | ON b.user_id = bf.user_id 178 | AND bf.friend_id = af.friend_id 179 | GROUP BY a.user_id, b.user_id 180 | HAVING COUNT(*) >= 3 181 | ORDER BY common_friend DESC; 182 | ``` 183 | ``` 184 | +---------+---------+---------------+ 185 | | user_id | user_id | common_friend | 186 | +---------+---------+---------------+ 187 | | alice | sonny | 3 | 188 | | david | charles | 3 | 189 | | charles | david | 3 | 190 | | sonny | alice | 3 | 191 | +---------+---------+---------------+ 192 | 4 rows in set (0.00 sec) 193 | ``` 194 | 195 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/15_Facebook_People_You_May_Know/solution.sql: -------------------------------------------------------------------------------- 1 | WITH tmp AS ( 2 | SELECT user_id, friend_id FROM Friendship 3 | UNION ALL 4 | SELECT friend_id, user_id FROM Friendship 5 | ) 6 | 7 | SELECT 8 | a.user_id 9 | ,b.user_id 10 | ,COUNT(*) AS common_friend 11 | FROM 12 | (SELECT DISTINCT user_id FROM tmp) AS a 13 | CROSS JOIN 14 | (SELECT DISTINCT user_id FROM tmp) AS b 15 | ON a.user_id != b.user_id 16 | AND (a.user_id, b.user_id) NOT IN (SELECT user_id, friend_id FROM tmp) 17 | JOIN tmp AS af 18 | ON a.user_id = af.user_id 19 | JOIN tmp AS bf 20 | ON b.user_id = bf.user_id 21 | AND bf.friend_id = af.friend_id 22 | GROUP BY a.user_id, b.user_id 23 | HAVING COUNT(*) >= 3 24 | ORDER BY common_friend DESC; -------------------------------------------------------------------------------- /Interview/16_Instagram_Common_Follower/README.md: -------------------------------------------------------------------------------- 1 | # Instagram Common Follower 2 | > Given a *Follow* table, find the pair of instagram accounts which share highest number of common followers. 3 | 4 | ___ 5 | ### Load Data 6 | Load the database file [db.sql](db.sql) to localhost MySQL. A Instagram database will be created with one Follow table. 7 | ```bash 8 | mysql < db.sql -uroot -p 9 | ``` 10 | 11 | ___ 12 | ### Observation 13 | The major realization is that following is a one-way relationship, unlike friendship which must be two-way. The rest is similar to the earlier problem. 14 | 15 | ___ 16 | ### Step 1. Build Candidate Pairs 17 | Find all possible account pairs using cross join. Avoid matching an account to itself. 18 | ```sql 19 | SELECT * 20 | FROM 21 | (SELECT DISTINCT user_id FROM Follow) AS a 22 | CROSS JOIN 23 | (SELECT DISTINCT user_id FROM Follow) AS b 24 | ON a.user_id != b.user_id 25 | ``` 26 | 27 | ``` 28 | +---------+---------+ 29 | | user_id | user_id | 30 | +---------+---------+ 31 | | musk | bezos | 32 | | ronaldo | bezos | 33 | | trump | bezos | 34 | | bezos | musk | 35 | | ronaldo | musk | 36 | | trump | musk | 37 | | bezos | ronaldo | 38 | | musk | ronaldo | 39 | | trump | ronaldo | 40 | | bezos | trump | 41 | | musk | trump | 42 | | ronaldo | trump | 43 | +---------+---------+ 44 | 12 rows in set (0.00 sec) 45 | ``` 46 | 47 | ___ 48 | ### Step 2. Find Common Followers 49 | ```sql 50 | SELECT * 51 | FROM 52 | (SELECT DISTINCT user_id FROM Follow) AS a 53 | CROSS JOIN 54 | (SELECT DISTINCT user_id FROM Follow) AS b 55 | ON a.user_id != b.user_id 56 | JOIN Follow AS af 57 | ON a.user_id = af.user_id 58 | JOIN Follow AS bf 59 | ON b.user_id = bf.user_id 60 | AND af.follower_id = bf.follower_id; 61 | ``` 62 | 63 | ``` 64 | +---------+---------+---------+-------------+---------+-------------+ 65 | | user_id | user_id | user_id | follower_id | user_id | follower_id | 66 | +---------+---------+---------+-------------+---------+-------------+ 67 | | musk | bezos | musk | david | bezos | david | 68 | | ronaldo | bezos | ronaldo | david | bezos | david | 69 | | ronaldo | bezos | ronaldo | james | bezos | james | 70 | | ronaldo | bezos | ronaldo | mary | bezos | mary | 71 | | bezos | musk | bezos | david | musk | david | 72 | | ronaldo | musk | ronaldo | david | musk | david | 73 | | bezos | ronaldo | bezos | david | ronaldo | david | 74 | | musk | ronaldo | musk | david | ronaldo | david | 75 | | bezos | ronaldo | bezos | james | ronaldo | james | 76 | | bezos | ronaldo | bezos | mary | ronaldo | mary | 77 | +---------+---------+---------+-------------+---------+-------------+ 78 | 10 rows in set (0.00 sec) 79 | ``` 80 | 81 | ___ 82 | ### Step 3. Aggregate Count 83 | ```sql 84 | SELECT 85 | a.user_id 86 | ,b.user_id 87 | ,COUNT(*) AS common 88 | FROM 89 | (SELECT DISTINCT user_id FROM Follow) AS a 90 | CROSS JOIN 91 | (SELECT DISTINCT user_id FROM Follow) AS b 92 | ON a.user_id != b.user_id 93 | JOIN Follow AS af 94 | ON a.user_id = af.user_id 95 | JOIN Follow AS bf 96 | ON b.user_id = bf.user_id 97 | AND af.follower_id = bf.follower_id 98 | GROUP BY a.user_id, b.user_id 99 | ORDER BY common DESC, a.user_id, b.user_id; 100 | ``` 101 | 102 | ``` 103 | +---------+---------+--------+ 104 | | user_id | user_id | common | 105 | +---------+---------+--------+ 106 | | bezos | ronaldo | 3 | 107 | | ronaldo | bezos | 3 | 108 | | bezos | musk | 1 | 109 | | musk | bezos | 1 | 110 | | musk | ronaldo | 1 | 111 | | ronaldo | musk | 1 | 112 | +---------+---------+--------+ 113 | 6 rows in set (0.00 sec) 114 | ``` 115 | 116 | ___ 117 | ### Parting Thought 118 | We do we stil get candidate pairs in both direction? Because the undirected edge only affects __step 2__ when we join both parties of a candidate pair with their respective followers! 119 | 120 | See solution [here](solution.sql). -------------------------------------------------------------------------------- /Interview/16_Instagram_Common_Follower/db.sql: -------------------------------------------------------------------------------- 1 | -- phpMyAdmin SQL Dump 2 | -- version 4.7.7 3 | -- https://www.phpmyadmin.net/ 4 | -- 5 | -- Host: localhost:8889 6 | -- Generation Time: Mar 23, 2019 at 11:42 PM 7 | -- Server version: 5.6.38 8 | -- PHP Version: 7.2.1 9 | 10 | DROP SCHEMA IF EXISTS Instagram; 11 | CREATE SCHEMA Instagram; 12 | USE Instagram; 13 | 14 | SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO"; 15 | SET time_zone = "+00:00"; 16 | 17 | -- 18 | -- Database: `practice` 19 | -- 20 | 21 | -- -------------------------------------------------------- 22 | 23 | -- 24 | -- Table structure for table `Follow` 25 | -- 26 | 27 | DROP TABLE IF EXISTS `Follow`; 28 | CREATE TABLE `Follow` ( 29 | `user_id` varchar(10) NOT NULL, 30 | `follower_id` varchar(10) NOT NULL 31 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 32 | 33 | -- 34 | -- Dumping data for table `Follow` 35 | -- 36 | 37 | INSERT INTO `Follow` (`user_id`, `follower_id`) VALUES 38 | ('bezos', 'david'), 39 | ('bezos', 'james'), 40 | ('bezos', 'join'), 41 | ('bezos', 'mary'), 42 | ('musk', 'david'), 43 | ('musk', 'john'), 44 | ('ronaldo', 'david'), 45 | ('ronaldo', 'james'), 46 | ('ronaldo', 'linda'), 47 | ('ronaldo', 'mary'), 48 | ('trump', 'melinda'); 49 | 50 | -- 51 | -- Indexes for dumped tables 52 | -- 53 | 54 | -- 55 | -- Indexes for table `Follow` 56 | -- 57 | ALTER TABLE `Follow` 58 | ADD PRIMARY KEY (`user_id`,`follower_id`); 59 | -------------------------------------------------------------------------------- /Interview/16_Instagram_Common_Follower/solution.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.user_id 3 | ,b.user_id 4 | ,COUNT(*) AS common 5 | FROM 6 | (SELECT DISTINCT user_id FROM Follow) AS a 7 | CROSS JOIN 8 | (SELECT DISTINCT user_id FROM Follow) AS b 9 | ON a.user_id != b.user_id 10 | JOIN Follow AS af 11 | ON a.user_id = af.user_id 12 | JOIN Follow AS bf 13 | ON b.user_id = bf.user_id 14 | AND af.follower_id = bf.follower_id 15 | GROUP BY a.user_id, b.user_id 16 | ORDER BY common DESC, a.user_id, b.user_id; -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Shaw Lu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LeetCode/185_Department_Top_Three_Salaries/README.md: -------------------------------------------------------------------------------- 1 | # Department Top Three Salaries 2 | 3 | ## Description 4 | The Employee table holds all employees. Every employee has an Id, and there is also a column for the department Id. 5 | ``` 6 | +----+-------+--------+--------------+ 7 | | Id | Name | Salary | DepartmentId | 8 | +----+-------+--------+--------------+ 9 | | 1 | Joe | 70000 | 1 | 10 | | 2 | Henry | 80000 | 2 | 11 | | 3 | Sam | 60000 | 2 | 12 | | 4 | Max | 90000 | 1 | 13 | | 5 | Janet | 69000 | 1 | 14 | | 6 | Randy | 85000 | 1 | 15 | +----+-------+--------+--------------+ 16 | ``` 17 | The Department table holds all departments of the company. 18 | ``` 19 | +----+----------+ 20 | | Id | Name | 21 | +----+----------+ 22 | | 1 | IT | 23 | | 2 | Sales | 24 | +----+----------+ 25 | ``` 26 | Write a SQL query to find employees who earn the top three salaries in each of the department. For the above tables, your SQL query should return the following rows. 27 | ``` 28 | +------------+----------+--------+ 29 | | Department | Employee | Salary | 30 | +------------+----------+--------+ 31 | | IT | Max | 90000 | 32 | | IT | Randy | 85000 | 33 | | IT | Joe | 70000 | 34 | | Sales | Henry | 80000 | 35 | | Sales | Sam | 60000 | 36 | +------------+----------+--------+ 37 | ``` 38 | 39 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database. 40 | ``` 41 | mysql < db.sql -uroot -p 42 | ``` 43 | 44 | --- 45 | ## Observation 46 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary. 47 | * Are salary distinct for all employee? If not, must use __DISTINCT__ keyword. 48 | * How to display if department has fewer than 3 distinct salaries? 49 | * Every employment belongs to a department? No employee has __NULL__ in *DepartmentId*. 50 | 51 | --- 52 | ## On Correctness 53 | What does top-3 paid employees in each department have in common? 54 | * They have the same DepartmentId. 55 | * They have fewer than 3 persons who get paid higher salary (can use either < 3 or <= 2). 56 | * Department No. 1 has 0 above him. 57 | * Department No. 2 has 1 above him. 58 | * Department No. 3 has 2 above him. 59 | The conditions are set-up for correlated subquery. In subquery, we can use an equijoin (*DepartmentId*) and non-equijoin (*Salary*) to filter the outer query. 60 | 61 | Basic [MySQL solution](mysql_correlated_subquery.sql) implementing the equijoin and non-equijoin logic above. 62 | ```sql 63 | SELECT 64 | d.Name AS 'Department' 65 | ,e.Name AS 'Employee' 66 | ,e.Salary 67 | FROM Employee e 68 | JOIN Department d 69 | ON e.DepartmentId = d.Id 70 | WHERE 71 | (SELECT COUNT(DISTINCT e2.Salary) 72 | FROM 73 | Employee e2 74 | WHERE 75 | e2.Salary > e.Salary 76 | AND e.DepartmentId = e2.DepartmentId 77 | ) < 3; 78 | ``` 79 | 80 | --- 81 | ## On Efficiency 82 | The subquery solution above enforces __nested select__, resulting in __N+1__ select statements and bad runtime efficiency. If we have access to window function ([MS SQL solution](mssql_window.sql), we can simply rank salary over each department as partition, and pick the top 3. Instead of using __DISTINCT__, the window solution uses __DENSE_RANK()__. Note that we cannot refer to window column *rnk* in the WHERE clause. So we must set up a temporary table. 83 | 84 | ```sql 85 | -- MS SQL: window function version 86 | WITH department_ranking AS ( 87 | SELECT 88 | e.Name AS Employee 89 | ,d.Name AS Department 90 | ,e.Salary 91 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk 92 | FROM Employee AS e 93 | JOIN Department AS d 94 | ON e.DepartmentId = d.Id 95 | ) 96 | SELECT 97 | Department 98 | ,Employee 99 | ,Salary 100 | FROM department_ranking 101 | WHERE rnk <= 3 102 | ORDER BY Department ASC, Salary DESC; 103 | ``` 104 | 105 | We can further improve efficiency by [filtering](mssql_pre_filter.sql) the ranking before joining with the department table. Instead of joining every employee with his department, we now only join the department top-3 employees with their departments. This is accomplished with an additional temporary table. 106 | 107 | ```sql 108 | -- MS SQL: Boosting effiency with pre-filtering 109 | WITH department_ranking AS ( 110 | SELECT 111 | e.Name AS Employee 112 | ,e.Salary 113 | ,e.DepartmentId 114 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk 115 | FROM Employee AS e 116 | ) 117 | -- pre-filter table to reduce join size 118 | ,top_three AS ( 119 | SELECT 120 | Employee 121 | ,Salary 122 | ,DepartmentId 123 | FROM department_ranking 124 | WHERE rnk <= 3 125 | ) 126 | SELECT 127 | d.Name AS Department 128 | ,e.Employee 129 | ,e.Salary 130 | FROM top_three AS e 131 | JOIN Department AS d 132 | ON e.DepartmentId = d.Id 133 | ORDER BY d.Name ASC, e.Salary DESC; 134 | ``` 135 | 136 | We can get rid of the second temporary table by moving the predicate condition *rnk <=3* inside JOIN. The logic is the same as the code above: tables are filtered before joining. We want to reduce the table size before join. 137 | 138 | ```sql 139 | -- MS SQL: cleaner version 140 | WITH department_ranking AS ( 141 | SELECT 142 | e.Name AS Employee 143 | ,e.Salary 144 | ,e.DepartmentId 145 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk 146 | FROM Employee AS e 147 | ) 148 | SELECT 149 | d.Name AS Department 150 | ,r.Employee 151 | ,r.Salary 152 | FROM department_ranking AS r 153 | JOIN Department AS d 154 | ON r.DepartmentId = d.Id 155 | AND r.rnk <= 3 156 | ORDER BY d.Name ASC, r.Salary DESC; 157 | ``` 158 | 159 | --- 160 | ## Parting Thought 161 | Because temporary table has no index. The second solution works better only when the pre-filtering results in significant reduction of table size. In this case, fortunately, we are taking only 3 employees out of every departments, which may have hundreds of employees each (huge reduction in join size). For each employee, we have access to *DepartmentId*, which is a foreign key referring to a primary key. The joining operation is reduced to three index lookup for each department, and index lookup is efficient! So the last solution (the longest) is the most efficient one. 162 | -------------------------------------------------------------------------------- /LeetCode/185_Department_Top_Three_Salaries/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists Employee (Id int, Name varchar(255), Salary int, DepartmentId int); 6 | Create table If Not Exists Department (Id int, Name varchar(255)); 7 | Truncate table Employee; 8 | insert into Employee (Id, Name, Salary, DepartmentId) values ('1', 'Joe', '70000', '1'); 9 | insert into Employee (Id, Name, Salary, DepartmentId) values ('2', 'Henry', '80000', '2'); 10 | insert into Employee (Id, Name, Salary, DepartmentId) values ('3', 'Sam', '60000', '2'); 11 | insert into Employee (Id, Name, Salary, DepartmentId) values ('4', 'Max', '90000', '1'); 12 | insert into Employee (Id, Name, Salary, DepartmentId) values ('5', 'Janet', '69000', '1'); 13 | insert into Employee (Id, Name, Salary, DepartmentId) values ('6', 'Randy', '85000', '1'); 14 | Truncate table Department; 15 | insert into Department (Id, Name) values ('1', 'IT'); 16 | insert into Department (Id, Name) values ('2', 'Sales'); -------------------------------------------------------------------------------- /LeetCode/185_Department_Top_Three_Salaries/mssql_pre_filter.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL: Boosting effiency with pre-filtering 2 | WITH department_ranking AS ( 3 | SELECT 4 | e.Name AS Employee 5 | ,e.Salary 6 | ,e.DepartmentId 7 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk 8 | FROM Employee AS e 9 | ) 10 | -- pre-filter table to reduce join size 11 | ,top_three AS ( 12 | SELECT 13 | Employee 14 | ,Salary 15 | ,DepartmentId 16 | FROM department_ranking 17 | WHERE rnk <= 3 18 | ) 19 | SELECT 20 | d.Name AS Department 21 | ,e.Employee 22 | ,e.Salary 23 | FROM top_three AS e 24 | JOIN Department AS d 25 | ON e.DepartmentId = d.Id 26 | ORDER BY d.Name ASC, e.Salary DESC; 27 | 28 | -- MS SQL: cleaner version 29 | WITH department_ranking AS ( 30 | SELECT 31 | e.Name AS Employee 32 | ,e.Salary 33 | ,e.DepartmentId 34 | ,DENSE_RANK() OVER (PARTITION BY e.DepartmentId ORDER BY e.Salary DESC) AS rnk 35 | FROM Employee AS e 36 | ) 37 | SELECT 38 | d.Name AS Department 39 | ,r.Employee 40 | ,r.Salary 41 | FROM department_ranking AS r 42 | JOIN Department AS d 43 | ON r.DepartmentId = d.Id 44 | AND r.rnk <= 3 45 | ORDER BY d.Name ASC, r.Salary DESC; -------------------------------------------------------------------------------- /LeetCode/185_Department_Top_Three_Salaries/mssql_window.sql: -------------------------------------------------------------------------------- 1 | -- window function version 2 | WITH department_ranking AS( 3 | SELECT 4 | e.Name AS Employee 5 | ,d.Name AS Department 6 | ,e.Salary 7 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk 8 | FROM Employee AS e 9 | JOIN Department AS d 10 | ON e.DepartmentId = d.Id 11 | ) 12 | SELECT 13 | Department 14 | ,Employee 15 | ,Salary 16 | FROM department_ranking 17 | WHERE rnk <= 3 18 | ORDER BY Department ASC, Salary DESC; -------------------------------------------------------------------------------- /LeetCode/185_Department_Top_Three_Salaries/mysql_correlated_subquery.sql: -------------------------------------------------------------------------------- 1 | # Use correlated subquery, and aggregate over subquery 2 | SELECT 3 | d.Name AS 'Department' 4 | ,e.Name AS 'Employee' 5 | ,e.Salary 6 | FROM Employee e 7 | JOIN Department d 8 | ON e.DepartmentId = d.Id 9 | WHERE 10 | (SELECT COUNT(DISTINCT e2.Salary) 11 | FROM 12 | Employee e2 13 | WHERE 14 | e2.Salary > e.Salary 15 | AND e.DepartmentId = e2.DepartmentId 16 | ) < 3; 17 | -------------------------------------------------------------------------------- /LeetCode/262_Trips_and_Users/README.md: -------------------------------------------------------------------------------- 1 | # Trips and Users 2 | 3 | ## Description 4 | The Trips table holds all taxi trips. Each trip has a unique Id, while Client_Id and Driver_Id are both foreign keys to the Users_Id at the Users table. Status is an ENUM type of (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’). 5 | 6 | ``` 7 | +----+-----------+-----------+---------+--------------------+----------+ 8 | | Id | Client_Id | Driver_Id | City_Id | Status |Request_at| 9 | +----+-----------+-----------+---------+--------------------+----------+ 10 | | 1 | 1 | 10 | 1 | completed |2013-10-01| 11 | | 2 | 2 | 11 | 1 | cancelled_by_driver|2013-10-01| 12 | | 3 | 3 | 12 | 6 | completed |2013-10-01| 13 | | 4 | 4 | 13 | 6 | cancelled_by_client|2013-10-01| 14 | | 5 | 1 | 10 | 1 | completed |2013-10-02| 15 | | 6 | 2 | 11 | 6 | completed |2013-10-02| 16 | | 7 | 3 | 12 | 6 | completed |2013-10-02| 17 | | 8 | 2 | 12 | 12 | completed |2013-10-03| 18 | | 9 | 3 | 10 | 12 | completed |2013-10-03| 19 | | 10 | 4 | 13 | 12 | cancelled_by_driver|2013-10-03| 20 | +----+-----------+-----------+---------+--------------------+----------+ 21 | ``` 22 | 23 | The Users table holds all users. Each user has an unique Users_Id, and Role is an ENUM type of (‘client’, ‘driver’, ‘partner’). 24 | 25 | ``` 26 | +----------+--------+--------+ 27 | | Users_Id | Banned | Role | 28 | +----------+--------+--------+ 29 | | 1 | No | client | 30 | | 2 | Yes | client | 31 | | 3 | No | client | 32 | | 4 | No | client | 33 | | 10 | No | driver | 34 | | 11 | No | driver | 35 | | 12 | No | driver | 36 | | 13 | No | driver | 37 | +----------+--------+--------+ 38 | ``` 39 | Write a SQL query to find the cancellation rate of requests made by unbanned users between Oct 1, 2013 and Oct 3, 2013. For the above tables, your SQL query should return the following rows with the cancellation rate being rounded to two decimal places. 40 | ``` 41 | +------------+-------------------+ 42 | | Day | Cancellation Rate | 43 | +------------+-------------------+ 44 | | 2013-10-01 | 0.33 | 45 | | 2013-10-02 | 0.00 | 46 | | 2013-10-03 | 0.50 | 47 | +------------+-------------------+ 48 | ``` 49 | 50 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database. 51 | ``` 52 | mysql < db.sql -uroot -p 53 | ``` 54 | 55 | --- 56 | ## Observation 57 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary. 58 | * Each trip has two foreign key (*Client_Id*, *Driver_Id*) referring the *Users* table's primary key. 59 | * No trip can have __NULL__ in *Driver_id* or *Client_Id*. 60 | * Because of the above assumptions, the three-way driver-trip-rider join results in same number of rows as the *Trips* table, before applying WHERE clause predicate. 61 | 62 | --- 63 | ## On Correctness 64 | * Need to exclude both banned drivers and riders from calculation. 65 | * Output rate to 2 decimal place as required. 66 | * Give descriptive names to output columns as required. 67 | * Constrain date range as required. 68 | 69 | Basic [solution](mysql_simple.sql) that gives correct output: 70 | ```sql 71 | -- MySQL: simple version 72 | SELECT 73 | t.Request_at AS Day 74 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 75 | FROM Trips t 76 | JOIN Users d ON t.Driver_Id = d.Users_Id 77 | JOIN Users c ON t.Client_Id = c.Users_Id 78 | WHERE d.Banned = "No" 79 | AND c.Banned = "No" 80 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 81 | GROUP BY t.Request_at 82 | ORDER BY t.Request_at; 83 | ``` 84 | 85 | --- 86 | ## On Efficiency 87 | SQL performs JOIN operation before applying WHERE clause. If many users are banned, JOIN operation results in lots of invalid trips in which either rider or driver is banned. We may [pre-filter](mysql_pre_filter.sql) the *Users* table and store the results in a temporary table (note that temporary table does not work in LeetCode). 88 | 89 | Similarly, joining the full *Trips* table can be wasteful. It may contain years of data, and we're interested in only 3 day's data. We can pre-filter the *Trips* table before joining it. 90 | 91 | Finally, we inner join the pre-filtered *valid_trips* table to *valid_user* table twice. INNER JOIN filters out trips that have no match in *valid_users*, meaning that the driver or rider is banned. 92 | 93 | ```sql 94 | -- MySQL: pre-filtering before join 95 | -- WARNING: LeetCode MySQL does not allow temporary table 96 | WITH valid_user AS ( 97 | SELECT Users_Id 98 | FROM Users 99 | WHERE Banned = "No" 100 | ) 101 | ,valid_trips AS ( 102 | SELECT * 103 | FROM Trips 104 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 105 | ) 106 | SELECT 107 | t.Request_at AS Day 108 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 109 | FROM valid_trips t 110 | JOIN valid_user d ON t.Driver_Id = d.Users_Id 111 | JOIN valid_user c ON t.Client_Id = c.Users_Id 112 | GROUP BY t.Request_at; 113 | ``` 114 | 115 | Instead of building two temporary tables, we can pre-filter by moving the predicate condition inside JOIN. The logic is the same as the code above: tables are filtered before joining. We want to reduce the table size before join. In the SQL code below, trips and drivers are filtered before being joined, and the clients are filtered before joining with the output of the first join. 116 | 117 | ```sql 118 | -- MySQL cleaner code 119 | SELECT 120 | t.Request_at AS Day 121 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 122 | FROM Trips t 123 | JOIN Users d 124 | ON t.Driver_Id = d.Users_Id 125 | AND d.Banned = 'No' 126 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 127 | JOIN Users c 128 | ON t.Client_Id = c.Users_Id 129 | And c.Banned = 'No' 130 | GROUP BY t.Request_at; 131 | ``` 132 | 133 | Alternatively, use a [set](mysql_set.sql) to retain all valid *User_Id*, and directly filter the *Trip* table without joining. The disadvantage is that because most database engine converts IN clause to series of OR operator, the query needs to be re-evaluated every time a new user gets banned, because the number of OR operator is constantly changing. 134 | 135 | When using multi-column predicate, applying more restrictive condition first. For example, filter *Request_at* before filtering *Users_Id*. Because table size gets cut drastically upfront, computation required for later predicate decreases. 136 | 137 | Using IN (... Banned = "No") clause is more efficient than using NOT IN (...Banned = "Yes"). To check an element is not in a set, a full scan of the set is required. 138 | 139 | ```sql 140 | -- MySQL: set version 141 | SELECT 142 | Request_at AS Day 143 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 144 | FROM Trips 145 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 146 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 147 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 148 | GROUP BY Request_at; 149 | ``` 150 | 151 | --- 152 | ## Parting Thought 153 | Because temporary table has no index. The second solution works better only when the pre-filtering results in significant reduction of table size. Otherwise, joining temporary tables without index can be slower than joining the full tables. In practice, look up the query plan and estimated cost before running the query. 154 | -------------------------------------------------------------------------------- /LeetCode/262_Trips_and_Users/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists Trips (Id int, Client_Id int, Driver_Id int, City_Id int, Status ENUM('completed', 'cancelled_by_driver', 'cancelled_by_client'), Request_at varchar(50)); 6 | Create table If Not Exists Users (Users_Id int, Banned varchar(50), Role ENUM('client', 'driver', 'partner')); 7 | Truncate table Trips; 8 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('1', '1', '10', '1', 'completed', '2013-10-01'); 9 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('2', '2', '11', '1', 'cancelled_by_driver', '2013-10-01'); 10 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('3', '3', '12', '6', 'completed', '2013-10-01'); 11 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('4', '4', '13', '6', 'cancelled_by_client', '2013-10-01'); 12 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('5', '1', '10', '1', 'completed', '2013-10-02'); 13 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('6', '2', '11', '6', 'completed', '2013-10-02'); 14 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('7', '3', '12', '6', 'completed', '2013-10-02'); 15 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('8', '2', '12', '12', 'completed', '2013-10-03'); 16 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('9', '3', '10', '12', 'completed', '2013-10-03'); 17 | insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status, Request_at) values ('10', '4', '13', '12', 'cancelled_by_driver', '2013-10-03'); 18 | Truncate table Users; 19 | insert into Users (Users_Id, Banned, Role) values ('1', 'No', 'client'); 20 | insert into Users (Users_Id, Banned, Role) values ('2', 'Yes', 'client'); 21 | insert into Users (Users_Id, Banned, Role) values ('3', 'No', 'client'); 22 | insert into Users (Users_Id, Banned, Role) values ('4', 'No', 'client'); 23 | insert into Users (Users_Id, Banned, Role) values ('10', 'No', 'driver'); 24 | insert into Users (Users_Id, Banned, Role) values ('11', 'No', 'driver'); 25 | insert into Users (Users_Id, Banned, Role) values ('12', 'No', 'driver'); 26 | insert into Users (Users_Id, Banned, Role) values ('13', 'No', 'driver'); -------------------------------------------------------------------------------- /LeetCode/262_Trips_and_Users/mysql_pre_filter.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: pre-filtering before join 2 | -- WARNING: LeetCode MySQL does not allow temporary table 3 | WITH valid_user AS ( 4 | SELECT Users_Id 5 | FROM Users 6 | WHERE Banned = "No" 7 | ) 8 | ,valid_trips AS ( 9 | SELECT * 10 | FROM Trips 11 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 12 | ) 13 | SELECT 14 | t.Request_at AS Day 15 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 16 | FROM valid_trips t 17 | JOIN valid_user d ON t.Driver_Id = d.Users_Id 18 | JOIN valid_user c ON t.Client_Id = c.Users_Id 19 | GROUP BY t.Request_at; 20 | 21 | -- MySQL cleaner code 22 | -- MySQL cleaner code 23 | SELECT 24 | t.Request_at AS Day 25 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 26 | FROM Trips t 27 | JOIN Users d 28 | ON t.Driver_Id = d.Users_Id 29 | AND d.Banned = 'No' 30 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 31 | JOIN Users c 32 | ON t.Client_Id = c.Users_Id 33 | And c.Banned = 'No' 34 | GROUP BY t.Request_at; -------------------------------------------------------------------------------- /LeetCode/262_Trips_and_Users/mysql_set.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: set version 2 | SELECT 3 | Request_at AS Day 4 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 5 | FROM Trips 6 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 7 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 8 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 9 | GROUP BY Request_at; -------------------------------------------------------------------------------- /LeetCode/262_Trips_and_Users/mysql_simple.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: simple join version 2 | SELECT 3 | t.Request_at AS Day 4 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 5 | FROM Trips t 6 | JOIN Users d ON t.Driver_Id = d.Users_Id 7 | JOIN Users c ON t.Client_Id = c.Users_Id 8 | WHERE d.Banned = "No" 9 | AND c.Banned = "No" 10 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 11 | GROUP BY t.Request_at 12 | ORDER BY t.Request_at; 13 | -------------------------------------------------------------------------------- /LeetCode/579_Find_Cumulative_Salary_of_an_Employee/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists Employee (Id int, Month int, Salary int); 6 | 7 | insert into Employee (Id, Month, Salary) values ('1', '1', '20'); 8 | insert into Employee (Id, Month, Salary) values ('2', '1', '20'); 9 | insert into Employee (Id, Month, Salary) values ('1', '2', '30'); 10 | insert into Employee (Id, Month, Salary) values ('2', '2', '30'); 11 | insert into Employee (Id, Month, Salary) values ('3', '2', '40'); 12 | insert into Employee (Id, Month, Salary) values ('1', '3', '40'); 13 | insert into Employee (Id, Month, Salary) values ('3', '3', '60'); 14 | insert into Employee (Id, Month, Salary) values ('1', '4', '60'); 15 | insert into Employee (Id, Month, Salary) values ('3', '4', '70'); -------------------------------------------------------------------------------- /LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mssql_lag.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL: Lag window function 2 | WITH cumulative AS ( 3 | SELECT 4 | Id 5 | ,LAG(Month, 1) OVER (PARTITION BY Id ORDER BY Month ASC) AS Month 6 | ,ISNULL(LAG(Salary, 1) OVER (PARTITION BY Id ORDER BY Month ASC), 0) 7 | + ISNULL(LAG(Salary, 2) OVER (PARTITION BY Id ORDER BY Month ASC), 0) 8 | + ISNULL(LAG(Salary, 3) OVER (PARTITION BY Id ORDER BY Month ASC), 0) AS Salary 9 | FROM Employee 10 | ) 11 | SELECT * 12 | FROM cumulative 13 | WHERE Month IS NOT NULL 14 | ORDER BY Id ASC, Month DESC; 15 | 16 | -- MySQL 8 also supports window function, and even window alias! 17 | WITH cumulative AS ( 18 | SELECT 19 | Id 20 | ,LAG(Month, 1) OVER w AS Month 21 | ,IFNULL(LAG(Salary, 1) OVER w, 0) 22 | + IFNULL(LAG(Salary, 2) OVER w, 0) 23 | + IFNULL(LAG(Salary, 3) OVER w, 0) AS Salary 24 | FROM Employee 25 | WINDOW w AS (PARTITION BY Id ORDER BY Month ASC) 26 | ) 27 | SELECT * 28 | FROM cumulative 29 | WHERE Month IS NOT NULL 30 | ORDER BY Id ASC, Month DESC; -------------------------------------------------------------------------------- /LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_join_tmp_table.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: non-equijoin 2 | SELECT 3 | e.Id 4 | ,e.Month 5 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 6 | FROM 7 | (SELECT Id, MAX(Month) AS max_month 8 | FROM Employee 9 | GROUP BY Id) AS e_max 10 | -- if using left join, employee with only one record will see NULL returned after join 11 | JOIN Employee e ON e_max.Id = e.Id AND e_max.max_month > e.Month 12 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 13 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 14 | ORDER BY e.Id ASC, e.Month DESC; 15 | 16 | ---------------------------------------------------------------------------------------- 17 | -- alternatively, use where clause to filter result 18 | SELECT 19 | e.Id 20 | ,e.Month 21 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 22 | FROM 23 | (SELECT Id, MAX(Month) AS max_month 24 | FROM Employee 25 | GROUP BY Id) AS e_max 26 | -- if using left join, employee with only one record will see NULL returned after join 27 | JOIN Employee e ON e_max.Id = e.Id 28 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 29 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 30 | WHERE e_max.max_month != e.Month 31 | ORDER BY e.Id ASC, e.Month DESC; 32 | 33 | ---------------------------------------------------------------------------------------- 34 | -- alternatively, use cross join, also correct 35 | SELECT 36 | e.Id 37 | ,e.Month 38 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 39 | FROM 40 | (SELECT Id, MAX(Month) AS max_month 41 | FROM Employee 42 | GROUP BY Id) AS e_max, 43 | Employee e 44 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 45 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 46 | WHERE e.Id = e_max.Id AND e.Month != e_max.max_month 47 | ORDER BY e.Id ASC, e.Month DESC; -------------------------------------------------------------------------------- /LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_set.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: set solution 2 | SELECT 3 | e.Id 4 | ,e.Month 5 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 6 | FROM Employee e 7 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 8 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 9 | WHERE (e.Id, e.Month) NOT IN ( 10 | SELECT 11 | Id 12 | ,MAX(Month) AS max_month 13 | FROM Employee 14 | GROUP BY Id) 15 | ORDER BY e.Id ASC, e.Month DESC; -------------------------------------------------------------------------------- /LeetCode/579_Find_Cumulative_Salary_of_an_Employee/mysql_single_join.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: single join 2 | SELECT 3 | e1.Id 4 | ,MAX(e2.Month) AS Month 5 | ,SUM(e2.Salary) AS Salary 6 | FROM Employee e1 7 | JOIN Employee e2 8 | ON e1.Id = e2.Id 9 | AND e1.Month - e2.Month BETWEEN 1 AND 3 10 | GROUP BY e1.Id, e1.Month 11 | ORDER BY e1.Id ASC, e1.Month DESC -------------------------------------------------------------------------------- /LeetCode/601_Human_Traffic_of_Stadium/README.md: -------------------------------------------------------------------------------- 1 | # Human Traffic of Stadium 2 | 3 | ## Description 4 | X city built a new stadium, each day many people visit it and the stats are saved as these columns: id, visit_date, people 5 | 6 | Please write a query to display the records which have 3 or more consecutive rows and the amount of people more than 100(inclusive). 7 | 8 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database. 9 | ``` 10 | mysql < db.sql -uroot -p 11 | ``` 12 | 13 | For example, the table stadium: 14 | ``` 15 | +------+------------+-----------+ 16 | | id | visit_date | people | 17 | +------+------------+-----------+ 18 | | 1 | 2017-01-01 | 10 | 19 | | 2 | 2017-01-02 | 109 | 20 | | 3 | 2017-01-03 | 150 | 21 | | 4 | 2017-01-04 | 99 | 22 | | 5 | 2017-01-05 | 145 | 23 | | 6 | 2017-01-06 | 1455 | 24 | | 7 | 2017-01-07 | 199 | 25 | | 8 | 2017-01-08 | 188 | 26 | +------+------------+-----------+ 27 | ``` 28 | For the sample data above, the output is: 29 | ``` 30 | +------+------------+-----------+ 31 | | id | visit_date | people | 32 | +------+------------+-----------+ 33 | | 5 | 2017-01-05 | 145 | 34 | | 6 | 2017-01-06 | 1455 | 35 | | 7 | 2017-01-07 | 199 | 36 | | 8 | 2017-01-08 | 188 | 37 | +------+------------+-----------+ 38 | ``` 39 | Note: 40 | Each day only have one row record, and the dates are increasing with id increasing. 41 | 42 | --- 43 | ## Observation 44 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary. 45 | * Every day has a record. If two dates differs by 1, their id differs by 1. Therefore, instead of joining by *visit_date* we can join by *id*, which is more efficient. 46 | * We are scanning three-day consecutive window here. A valid row can either be positioned at the beginning of the window, at the middle, or end of the window. 47 | 48 | --- 49 | ## On Correctness 50 | The key is to remove duplicates. Consider four days in a row, each day with over 100 people. In the first window, all three days will be returned. In the next window, day 2, 3, 4 will all be returned, resulting in duplicate day 2 and 3. To remove duplicate, use *DISTINCT* keyword, with three [self-joins]( mysql_simple.sql):. 51 | 52 | | window 1 | window 2 | 53 | |----|-----------| 54 | | day 1| | 55 | | day 2| day 2 | 56 | | day 3| day 3 | 57 | | | day 4 | 58 | 59 | ```sql 60 | -- MySQL solution 61 | SELECT DISTINCT 62 | s1.* 63 | FROM 64 | stadium AS s1 65 | ,stadium AS s2 66 | ,stadium AS s3 67 | WHERE s1.people >= 100 68 | AND s2.people >= 100 69 | AND s3.people >= 100 70 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window 71 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window 72 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window 73 | ORDER BY s1.id; 74 | ``` 75 | 76 | --- 77 | ## On Efficiency 78 | Joining the table three times resulting in a huge cartesian product. One way to improve efficiency is simply [pre-filter](mssql_pre_filter.sql) the table, so only days with over 100 people are joined. 79 | 80 | ```sql 81 | -- MySQL: pre-filtering 82 | SELECT DISTINCT 83 | s1.* 84 | FROM 85 | (SELECT * FROM stadium WHERE people >= 100) AS s1 86 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2 87 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3 88 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 89 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 90 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 91 | ORDER BY s1.id; 92 | ``` 93 | 94 | If using MS SQL, we only need to define the temporary table once, making it much easier to maintain. 95 | 96 | ```sql 97 | -- MS SQL: cleaner code 98 | WITH good_day AS ( 99 | SELECT * FROM stadium WHERE people >= 100 100 | ) 101 | SELECT DISTINCT s1.* FROM 102 | good_day AS s1, 103 | good_day AS s2, 104 | good_day AS s3 105 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 106 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 107 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 108 | ORDER BY s1.id; 109 | ``` 110 | 111 | Using either MySQL or MS SQL, we can do away with temporary table, by merging predicates into the JOIN clause. This small change achieves the same filtering effect. 112 | 113 | ```sql 114 | SELECT DISTINCT s1.* 115 | FROM stadium AS s1 116 | LEFT JOIN stadium AS s2 117 | ON s1.people >= 100 118 | AND s2.people >= 100 119 | LEFT JOIN stadium AS s3 120 | ON s3.people >= 100 121 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 122 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 123 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 124 | ORDER BY s1.id; 125 | ``` 126 | 127 | --- 128 | ## Optional: Window function 129 | In MS SQL, the problem can be solved with [window function](mssql_window.sql). Be careful that you __cannot__ use window column in the predicates. You must save the expanded table with window columns in a temporary table. 130 | 131 | ```sql 132 | -- MS SQL: window 133 | WITH long_table AS ( 134 | SELECT 135 | * 136 | ,LAG(people, 2) OVER (ORDER BY id ASC) AS pre2 137 | ,LAG(people, 1) OVER (ORDER BY id ASC) AS pre1 138 | ,LEAD(people, 1) OVER (ORDER BY id ASC) AS nxt1 139 | ,LEAD(people, 2) OVER (ORDER BY id ASC) AS nxt2 140 | FROM stadium 141 | ) 142 | SELECT 143 | id 144 | ,visit_date 145 | ,people 146 | FROM long_table 147 | WHERE people >= 100 148 | AND ((pre2 >= 100 AND pre1 >= 100) 149 | OR (pre1 >= 100 AND nxt1 >= 100) 150 | OR (nxt1 >= 100 AND nxt2 >= 100)) 151 | ORDER BY id; 152 | ``` 153 | 154 | For the sake of completion, MySQL8 supports window function too, and can be written with window alias. 155 | 156 | ```sql 157 | -- MySQL 8 equivalent 158 | WITH long_table AS ( 159 | SELECT 160 | * 161 | ,LAG(people, 2) OVER w AS pre2 162 | ,LAG(people, 1) OVER w AS pre1 163 | ,LEAD(people, 1) OVER w AS nxt1 164 | ,LEAD(people, 2) OVER w AS nxt2 165 | FROM stadium 166 | WINDOW w AS (ORDER BY id ASC) 167 | ) 168 | SELECT 169 | id 170 | ,visit_date 171 | ,people 172 | FROM long_table 173 | WHERE people >= 100 174 | AND ((pre2 >= 100 AND pre1 >= 100) 175 | OR (pre1 >= 100 AND nxt1 >= 100) 176 | OR (nxt1 >= 100 AND nxt2 >= 100)) 177 | ORDER BY id; 178 | ``` 179 | 180 | --- 181 | ## Parting Thought 182 | Window function is more efficient than join when the table is large. In this example, we are sorting by index *id* in the window, further boosting efficiency. Furthermore, window solution gets rid of the *DISTINCT* keyword, which establishes a hash set (inefficient). The test case in LeetCode, however, does not show its superiority. 183 | 184 | -------------------------------------------------------------------------------- /LeetCode/601_Human_Traffic_of_Stadium/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists stadium (id int, visit_date DATE NULL, people int); 6 | 7 | insert into stadium (id, visit_date, people) values ('1', '2017-01-01', '10'); 8 | insert into stadium (id, visit_date, people) values ('2', '2017-01-02', '109'); 9 | insert into stadium (id, visit_date, people) values ('3', '2017-01-03', '150'); 10 | insert into stadium (id, visit_date, people) values ('4', '2017-01-04', '99'); 11 | insert into stadium (id, visit_date, people) values ('5', '2017-01-05', '145'); 12 | insert into stadium (id, visit_date, people) values ('6', '2017-01-06', '1455'); 13 | insert into stadium (id, visit_date, people) values ('7', '2017-01-07', '199'); 14 | insert into stadium (id, visit_date, people) values ('8', '2017-01-08', '188'); -------------------------------------------------------------------------------- /LeetCode/601_Human_Traffic_of_Stadium/mssql_pre_filter.sql: -------------------------------------------------------------------------------- 1 | -- MySQL: pre-filtering 2 | SELECT DISTINCT 3 | s1.* 4 | FROM 5 | (SELECT * FROM stadium WHERE people >= 100) AS s1 6 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2 7 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3 8 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 9 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 10 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 11 | ORDER BY s1.id; 12 | 13 | -- MS SQL: cleaner code 14 | WITH good_day AS ( 15 | SELECT * FROM stadium WHERE people >= 100 16 | ) 17 | SELECT DISTINCT s1.* FROM 18 | good_day AS s1, 19 | good_day AS s2, 20 | good_day AS s3 21 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 22 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 23 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 24 | ORDER BY s1.id; 25 | 26 | -- MySQL, MS SQL: cleaner version 27 | SELECT DISTINCT s1.* 28 | FROM stadium AS s1 29 | LEFT JOIN stadium AS s2 30 | ON s1.people >= 100 31 | AND s2.people >= 100 32 | LEFT JOIN stadium AS s3 33 | ON s3.people >= 100 34 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 35 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 36 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 37 | ORDER BY s1.id; -------------------------------------------------------------------------------- /LeetCode/601_Human_Traffic_of_Stadium/mssql_window.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL: window 2 | WITH long_table AS ( 3 | SELECT 4 | * 5 | ,LAG(people, 2) OVER (ORDER BY id ASC) AS pre2 6 | ,LAG(people, 1) OVER (ORDER BY id ASC) AS pre1 7 | ,LEAD(people, 1) OVER (ORDER BY id ASC) AS nxt1 8 | ,LEAD(people, 2) OVER (ORDER BY id ASC) AS nxt2 9 | FROM stadium 10 | ) 11 | SELECT 12 | id 13 | ,visit_date 14 | ,people 15 | FROM long_table 16 | WHERE people >= 100 17 | AND ((pre2 >= 100 AND pre1 >= 100) 18 | OR (pre1 >= 100 AND nxt1 >= 100) 19 | OR (nxt1 >= 100 AND nxt2 >= 100)) 20 | ORDER BY id; 21 | 22 | -- MySQL 8 equivalent 23 | WITH long_table AS ( 24 | SELECT 25 | * 26 | ,LAG(people, 2) OVER w AS pre2 27 | ,LAG(people, 1) OVER w AS pre1 28 | ,LEAD(people, 1) OVER w AS nxt1 29 | ,LEAD(people, 2) OVER w AS nxt2 30 | FROM stadium 31 | WINDOW w AS (ORDER BY id ASC) 32 | ) 33 | SELECT 34 | id 35 | ,visit_date 36 | ,people 37 | FROM long_table 38 | WHERE people >= 100 39 | AND ((pre2 >= 100 AND pre1 >= 100) 40 | OR (pre1 >= 100 AND nxt1 >= 100) 41 | OR (nxt1 >= 100 AND nxt2 >= 100)) 42 | ORDER BY id; -------------------------------------------------------------------------------- /LeetCode/601_Human_Traffic_of_Stadium/mysql_simple.sql: -------------------------------------------------------------------------------- 1 | -- MySQL solution 2 | SELECT DISTINCT 3 | s1.* 4 | FROM 5 | stadium AS s1 6 | ,stadium AS s2 7 | ,stadium AS s3 8 | WHERE s1.people >= 100 9 | AND s2.people >= 100 10 | AND s3.people >= 100 11 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window 12 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window 13 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window 14 | ORDER BY s1.id; -------------------------------------------------------------------------------- /LeetCode/615_Average_Salary/README.md: -------------------------------------------------------------------------------- 1 | # Average Salary: Departments VS Company 2 | 3 | ## Description 4 | 5 | Given two tables as below, write a query to display the comparison result (higher/lower/same) of the average salary of employees in a department to the company's average salary. 6 | 7 | Load the database file [db.sql](db.sql) to localhost MySQL. Relevant tables will be created in the LeetCode database. 8 | ``` 9 | mysql < db.sql -uroot -p 10 | ``` 11 | 12 | Table: salary 13 | ``` 14 | | id | employee_id | amount | pay_date | 15 | |----|-------------|--------|------------| 16 | | 1 | 1 | 9000 | 2017-03-31 | 17 | | 2 | 2 | 6000 | 2017-03-31 | 18 | | 3 | 3 | 10000 | 2017-03-31 | 19 | | 4 | 1 | 7000 | 2017-02-28 | 20 | | 5 | 2 | 6000 | 2017-02-28 | 21 | | 6 | 3 | 8000 | 2017-02-28 | 22 | ``` 23 | 24 | The employee_id column refers to the employee_id in the following table employee. 25 | 26 | ``` 27 | | employee_id | department_id | 28 | |-------------|---------------| 29 | | 1 | 1 | 30 | | 2 | 2 | 31 | | 3 | 2 | 32 | ``` 33 | 34 | So for the sample data above, the result is: 35 | 36 | ``` 37 | | pay_month | department_id | comparison | 38 | |-----------|---------------|-------------| 39 | | 2017-03 | 1 | higher | 40 | | 2017-03 | 2 | lower | 41 | | 2017-02 | 1 | same | 42 | | 2017-02 | 2 | same | 43 | ``` 44 | 45 | Explanation 46 | 47 | 48 | In March, the company's average salary is (9000+6000+10000)/3 = 8333.33... 49 | 50 | 51 | The average salary for department '1' is 9000, which is the salary of employee_id '1' since there is only one employee in this department. So the comparison result is 'higher' since 9000 > 8333.33 obviously. 52 | 53 | 54 | The average salary of department '2' is (6000 + 10000)/2 = 8000, which is the average of employee_id '2' and '3'. So the comparison result is 'lower' since 8000 < 8333.33. 55 | 56 | 57 | With he same formula for the average salary comparison in February, the result is 'same' since both the department '1' and '2' have the same average salary with the company, which is 7000. 58 | 59 | --- 60 | ## Observation 61 | Make the following observation to interviewers. Confirm your observation is correct. Ask for clarification if necessary. 62 | * Ask whether every employee will be in a department (no NULL in the department_id column). If there is NULL, how to display it. 63 | * Note that people get paid on __different__ day of the same months, meaning that you must process the date label before averaging, otherwise you __will not__ get the correct average. 64 | 65 | --- 66 | ## Solution 67 | The overall strategy is to build two temporary tables, one aggregated over the pay month, one aggregated over the (pay month, department) key pair. After aggregating, joining the two temporary tables. Because every distinct pay month must exist in both table, inner join is enough. 68 | 69 | #### Step 1. Company Monthly Average 70 | ```sql 71 | mysql> SELECT 72 | -> date_format(pay_date, '%Y-%m') as pay_month 73 | -> ,AVG(amount) AS corp_avg_sal 74 | -> FROM salary GROUP BY pay_month; 75 | ``` 76 | ``` 77 | +-----------+--------------+ 78 | | pay_month | corp_avg_sal | 79 | +-----------+--------------+ 80 | | 2017-02 | 6666.6667 | 81 | | 2017-03 | 8333.3333 | 82 | +-----------+--------------+ 83 | 2 rows in set (0.00 sec) 84 | ``` 85 | 86 | Notice what happens if you don't process pay_date column before aggregating: you get the wrong average, and your final result will be wrong. 87 | 88 | ```sql 89 | mysql> SELECT 90 | -> pay_date 91 | -> ,AVG(amount) AS corp_avg_sal 92 | -> FROM salary GROUP BY pay_date; 93 | ``` 94 | ``` 95 | +------------+--------------+ 96 | | pay_date | corp_avg_sal | 97 | +------------+--------------+ 98 | | 2017-02-25 | 6000.0000 | 99 | | 2017-02-28 | 7000.0000 | 100 | | 2017-03-31 | 8333.3333 | 101 | +------------+--------------+ 102 | 3 rows in set (0.00 sec) 103 | ``` 104 | 105 | #### Step 2. Department Monthly Average 106 | Similarly, process pay_date before aggregating. 107 | 108 | ```sql 109 | mysql> SELECT 110 | -> date_format(pay_date, '%Y-%m') as pay_month 111 | -> ,e.department_id 112 | -> ,AVG(amount) AS dept_avg_sal 113 | -> FROM salary AS s 114 | -> JOIN employee AS e 115 | -> ON s.employee_id = e.employee_id 116 | -> GROUP BY pay_month, e.department_id; 117 | ``` 118 | ``` 119 | +-----------+---------------+--------------+ 120 | | pay_month | department_id | dept_avg_sal | 121 | +-----------+---------------+--------------+ 122 | | 2017-02 | 1 | 6000.0000 | 123 | | 2017-02 | 2 | 7000.0000 | 124 | | 2017-03 | 1 | 9000.0000 | 125 | | 2017-03 | 2 | 8000.0000 | 126 | +-----------+---------------+--------------+ 127 | 4 rows in set (0.00 sec) 128 | ``` 129 | 130 | #### Step 3. Join and Present Results 131 | Can use inner join, or cross join with where clause, or left join. The results are the same. On LeetCode, the efficiency is quite different: cross join (167ms) < inner join (185ms) < left join (205ms). 132 | 133 | Remember to give alias to derived tables. See full solution [here](mysql_session_vars.sql). 134 | 135 | ```sql 136 | mysql> SELECT 137 | -> a.pay_month 138 | -> ,a.department_id, 139 | -> CASE 140 | -> WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower" 141 | -> WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same" 142 | -> ELSE "higher" 143 | -> END AS comparison 144 | -> FROM 145 | -> (SELECT 146 | -> date_format(pay_date, '%Y-%m') as pay_month 147 | -> ,e.department_id 148 | -> ,AVG(amount) AS dept_avg_sal 149 | -> FROM salary AS s 150 | -> JOIN employee AS e 151 | -> ON s.employee_id = e.employee_id 152 | -> GROUP BY pay_month, e.department_id 153 | -> ) AS a 154 | -> JOIN 155 | -> (SELECT 156 | -> date_format(pay_date, '%Y-%m') as pay_month 157 | -> ,AVG(amount) AS corp_avg_sal 158 | -> FROM salary GROUP BY pay_month 159 | -> ) AS b 160 | -> ON a.pay_month = b.pay_month 161 | -> ORDER BY pay_month, department_id; 162 | ``` 163 | ``` 164 | +-----------+---------------+------------+ 165 | | pay_month | department_id | comparison | 166 | +-----------+---------------+------------+ 167 | | 2017-02 | 1 | same | 168 | | 2017-02 | 2 | same | 169 | | 2017-03 | 1 | higher | 170 | | 2017-03 | 2 | lower | 171 | +-----------+---------------+------------+ 172 | 4 rows in set (0.00 sec) 173 | ``` 174 | -------------------------------------------------------------------------------- /LeetCode/615_Average_Salary/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists salary (id int, employee_id int, amount int, pay_date date); 6 | Create table If Not Exists employee (employee_id int, department_id int); 7 | 8 | Truncate table salary; 9 | insert into salary (id, employee_id, amount, pay_date) values ('1', '1', '9000', '2017/03/31'); 10 | insert into salary (id, employee_id, amount, pay_date) values ('2', '2', '6000', '2017/03/31'); 11 | insert into salary (id, employee_id, amount, pay_date) values ('3', '3', '10000', '2017/03/31'); 12 | insert into salary (id, employee_id, amount, pay_date) values ('4', '1', '7000', '2017/02/25'); 13 | insert into salary (id, employee_id, amount, pay_date) values ('5', '2', '6000', '2017/02/28'); 14 | insert into salary (id, employee_id, amount, pay_date) values ('6', '3', '8000', '2017/02/28'); 15 | 16 | Truncate table employee; 17 | insert into employee (employee_id, department_id) values ('1', '1'); 18 | insert into employee (employee_id, department_id) values ('2', '2'); 19 | insert into employee (employee_id, department_id) values ('3', '2'); -------------------------------------------------------------------------------- /LeetCode/615_Average_Salary/mysql.sql: -------------------------------------------------------------------------------- 1 | -- MySQL plain solution 2 | SELECT 3 | a.pay_month 4 | ,a.department_id, 5 | CASE 6 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower" 7 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same" 8 | ELSE "higher" 9 | END AS comparison 10 | FROM 11 | (SELECT 12 | date_format(pay_date, '%Y-%m') as pay_month 13 | ,e.department_id 14 | ,AVG(amount) AS dept_avg_sal 15 | FROM salary AS s 16 | JOIN employee AS e 17 | ON s.employee_id = e.employee_id 18 | GROUP BY pay_month, e.department_id 19 | ) AS a 20 | JOIN 21 | (SELECT 22 | date_format(pay_date, '%Y-%m') as pay_month 23 | ,AVG(amount) AS corp_avg_sal 24 | FROM salary GROUP BY pay_month 25 | ) AS b 26 | ON a.pay_month = b.pay_month 27 | ORDER BY pay_month, department_id; 28 | 29 | 30 | /* MySQL Solution using CTE*/ 31 | WITH myCTE1 AS ( 32 | SELECT s.employee_id, s.amount, s.pay_date,e.department_id, 33 | DATE_FORMAT(s.pay_date, "%Y-%m") as y_m, 34 | ROUND(AVG(amount) OVER(PARTITION BY DATE_FORMAT(s.pay_date, "%Y-%m")),2) 'company_avg' 35 | FROM salary s join employee e 36 | ON s.employee_id = e.employee_id 37 | ), 38 | myCTE2 as ( 39 | SELECT y_m as pay_period, department_id, company_avg, ROUND(AVG(amount),2) 'department_avg' 40 | FROM myCTE1 41 | GROUP BY department_id, y_m 42 | ) 43 | SELECT *, 44 | CASE 45 | WHEN company_avg < department_avg THEN 'higher' 46 | WHEN company_avg > department_avg THEN 'lower' 47 | ELSE 'lower' 48 | END as 'comparision' 49 | FROM myCTE2 50 | ORDER BY pay_period desc, department_id 51 | 52 | -------------------------------------------------------------------------------- /LeetCode/615_Average_Salary/mysql_session_vars.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | a.pay_month 3 | ,a.department_id, 4 | CASE 5 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower" 6 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same" 7 | ELSE "higher" 8 | END AS comparison 9 | FROM 10 | (SELECT 11 | date_format(pay_date, '%Y-%m') as pay_month 12 | ,e.department_id 13 | ,AVG(amount) AS dept_avg_sal 14 | FROM salary AS s 15 | JOIN employee AS e 16 | ON s.employee_id = e.employee_id 17 | GROUP BY pay_month, e.department_id 18 | ) AS a 19 | JOIN 20 | (SELECT 21 | date_format(pay_date, '%Y-%m') as pay_month 22 | ,AVG(amount) AS corp_avg_sal 23 | FROM salary GROUP BY pay_month 24 | ) AS b 25 | ON a.pay_month = b.pay_month 26 | ORDER BY pay_month, department_id; -------------------------------------------------------------------------------- /LeetCode/618_Students_Report_by_Geography/README.md: -------------------------------------------------------------------------------- 1 | # Students Report By Geography 2 | 3 | A U.S graduate school has students from Asia, Europe and America. The students' location information are stored in table student as below. 4 | 5 | ``` 6 | | name | continent | 7 | |--------|-----------| 8 | | Jack | America | 9 | | Pascal | Europe | 10 | | Xi | Asia | 11 | | Jane | America | 12 | ``` 13 | 14 | Pivot the continent column in this table so that each name is sorted alphabetically and displayed underneath its corresponding continent. The output headers should be America, Asia and Europe respectively. It is guaranteed that the student number from America is no less than either Asia or Europe. 15 | 16 | 17 | For the sample input, the output is: 18 | 19 | ``` 20 | | America | Asia | Europe | 21 | |---------|------|--------| 22 | | Jack | Xi | Pascal | 23 | | Jane | | | 24 | ``` 25 | 26 | Follow-up: If it is unknown which continent has the most students, can you write a query to generate the student report? 27 | 28 | 29 | ## Solution 30 | This is an exercise of pivoting table, though not a very good one. For more representative pivoting exercise, check this (link)[google.com]. The challenging part in this exercise is that the data are text, and cannot be aggregated over in the pivoted table. So you must assign it a proxy index, and join each column. 31 | 32 | Using session variable can solve the problem, but it is very error prone (see [here](mysql_session_vars.sql)). In this document, I present a solution utilizing *ROW_NUMBER* window function (available in MS SQL and MySQL8) and full join (not available in MySQL8). So this solution does not work in MySQL server. 33 | 34 | 35 | #### Step 1. Build Temporary Tables 36 | ``` 37 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America'; 38 | +----+------+ 39 | | ID | name | 40 | +----+------+ 41 | | 1 | Jack | 42 | | 2 | Jane | 43 | +----+------+ 44 | 2 rows in set (0.01 sec) 45 | 46 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia'; 47 | +----+------+ 48 | | ID | name | 49 | +----+------+ 50 | | 1 | Xi | 51 | +----+------+ 52 | 1 row in set (0.00 sec) 53 | 54 | mysql> SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe'; 55 | +----+--------+ 56 | | ID | name | 57 | +----+--------+ 58 | | 1 | Pascal | 59 | +----+--------+ 60 | 1 row in set (0.00 sec) 61 | ``` 62 | 63 | #### Step 2. Full Join 64 | In MS SQL, use *FULL JOIN* to combine the three temporary tables, in any order. Note that when joining the third table, the *JOIN* condition could match to either the first table or the second table. 65 | 66 | ```sql 67 | -- MS SQL 68 | SELECT 69 | a.name AS America 70 | ,b.name AS Asia 71 | ,c.name AS Europe 72 | FROM 73 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a 74 | FULL JOIN 75 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b 76 | ON a.ID = b.ID 77 | FULL JOIN 78 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c 79 | ON c.ID = b.ID 80 | OR c.ID = a.ID; 81 | ``` 82 | 83 | In a different order: we don't have to know in advance which group has most students. 84 | ```sql 85 | -- MS SQL 86 | SELECT 87 | a.name AS America 88 | ,b.name AS Asia 89 | ,c.name AS Europe 90 | FROM 91 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c 92 | FULL JOIN 93 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b 94 | ON b.ID = c.ID 95 | FULL JOIN 96 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a 97 | ON a.ID = b.ID 98 | OR a.ID = c.ID; 99 | ``` 100 | 101 | ### Join with original table to use maximum range of row numbers 102 | 103 | ```sql 104 | SELECT a.name AS America, 105 | b.name AS Asia, 106 | c.name AS Europe 107 | FROM 108 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students) r 109 | LEFT JOIN 110 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Asia') b USING(ID) 111 | LEFT JOIN 112 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'America') a USING(ID) 113 | LEFT JOIN 114 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Europe') c USING(ID) 115 | WHERE NOT(a.ID IS NULL AND b.ID IS NULL AND c.ID IS NULL) 116 | ``` 117 | -------------------------------------------------------------------------------- /LeetCode/618_Students_Report_by_Geography/db.sql: -------------------------------------------------------------------------------- 1 | DROP SCHEMA IF EXISTS LeetCode; 2 | CREATE SCHEMA LeetCode; 3 | USE LeetCode; 4 | 5 | Create table If Not Exists student (name varchar(50), continent varchar(7)); 6 | Truncate table student; 7 | insert into student (name, continent) values ('Jane', 'America'); 8 | insert into student (name, continent) values ('Pascal', 'Europe'); 9 | insert into student (name, continent) values ('Xi', 'Asia'); 10 | insert into student (name, continent) values ('Jack', 'America'); -------------------------------------------------------------------------------- /LeetCode/618_Students_Report_by_Geography/mssql_full_join.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL 2 | SELECT 3 | a.name AS America 4 | ,b.name AS Asia 5 | ,c.name AS Europe 6 | FROM 7 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'America') a 8 | FULL JOIN 9 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Asia') b 10 | ON a.ID = b.ID 11 | FULL JOIN 12 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM student WHERE continent = 'Europe') c 13 | ON c.ID = b.ID 14 | OR c.ID = a.ID; -------------------------------------------------------------------------------- /LeetCode/618_Students_Report_by_Geography/mysql_session_vars.sql: -------------------------------------------------------------------------------- 1 | -- MySQL 2 | SELECT 3 | america.name AS America 4 | ,asia.name as Asia 5 | ,europe.name as Europe 6 | FROM 7 | (SELECT @a := 0, @b := 0, @c := 0) AS vars 8 | ,(SELECT @a := @a + 1 AS ID, name FROM student WHERE continent = "America" ORDER BY name) AS america 9 | LEFT JOIN 10 | (SELECT @b := @b + 1 AS ID, name FROM student WHERE continent = "Asia" ORDER BY name) AS asia 11 | ON america.ID = asia.ID 12 | LEFT JOIN 13 | (SELECT @c := @c + 1 AS ID, name FROM student WHERE continent = "Europe" ORDER BY name) AS europe 14 | ON asia.ID = europe.ID 15 | OR america.ID = europe.ID; -------------------------------------------------------------------------------- /LeetCode/618_Students_Report_by_Geography/self-join-rownums.sql: -------------------------------------------------------------------------------- 1 | SELECT a.name AS America, 2 | b.name AS Asia, 3 | c.name AS Europe 4 | FROM 5 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students) r 6 | LEFT JOIN 7 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Asia') b USING(ID) 8 | LEFT JOIN 9 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'America') a USING(ID) 10 | LEFT JOIN 11 | (SELECT ROW_NUMBER() OVER (ORDER BY name) AS ID, name FROM students WHERE continent = 'Europe') c USING(ID) 12 | WHERE NOT(a.ID IS NULL AND b.ID IS NULL AND c.ID IS NULL) -------------------------------------------------------------------------------- /LeetCode/others/1045_Customers_Who_Bought_All_Products.sql: -------------------------------------------------------------------------------- 1 | select customer_id 2 | from Customer 3 | group by customer_id 4 | having count(distinct product_key) = (select count(distinct product_key) from Product) 5 | -------------------------------------------------------------------------------- /LeetCode/others/1050_Actors_and_Directors_Who_Cooperated_At_Least_Three_Times.sql: -------------------------------------------------------------------------------- 1 | select actor_id, director_id 2 | from ActorDirector 3 | group by actor_id, director_id 4 | having count(*) >= 3 5 | -------------------------------------------------------------------------------- /LeetCode/others/1068_Product_Sales_Analysis_I.sql: -------------------------------------------------------------------------------- 1 | select b.product_name, a.year, a.price 2 | from Sales a 3 | join Product b 4 | on a.product_id = b.product_id 5 | -------------------------------------------------------------------------------- /LeetCode/others/1069_Product_Sales_Analysis_II.sql: -------------------------------------------------------------------------------- 1 | select product_id, sum(quantity) as total_quantity 2 | from Sales 3 | group by product_id 4 | -------------------------------------------------------------------------------- /LeetCode/others/1070_Product_Sales_Analysis_III.sql: -------------------------------------------------------------------------------- 1 | select top 1 with ties product_id, year as first_year, quantity, price 2 | from Sales 3 | order by rank() over (partition by product_id order by year); 4 | -------------------------------------------------------------------------------- /LeetCode/others/1075_Project_Employees_I.sql: -------------------------------------------------------------------------------- 1 | select 2 | a.project_id 3 | ,round(avg(b.experience_years), 2) as average_years 4 | from Project a 5 | join Employee b 6 | on a.employee_id = b.employee_id 7 | group by project_id 8 | -------------------------------------------------------------------------------- /LeetCode/others/1076_Project_Employees_II.sql: -------------------------------------------------------------------------------- 1 | select top 1 with ties project_id 2 | from Project 3 | group by project_id 4 | order by count(*) desc; 5 | -------------------------------------------------------------------------------- /LeetCode/others/1077_Project_Employees_III.sql: -------------------------------------------------------------------------------- 1 | select top 1 with ties project_id, a.employee_id 2 | from Project a 3 | join Employee b 4 | on a.employee_id = b.employee_id 5 | order by dense_rank() over 6 | (partition by project_id order by experience_years desc); 7 | -------------------------------------------------------------------------------- /LeetCode/others/1082_Sales_Analysis_I.sql: -------------------------------------------------------------------------------- 1 | select top 1 with ties seller_id 2 | from Sales 3 | group by seller_id 4 | order by sum(price) desc; 5 | -------------------------------------------------------------------------------- /LeetCode/others/1083_Sales_Analysis_II.sql: -------------------------------------------------------------------------------- 1 | with joined as ( 2 | select a.product_id, a.product_name, b.buyer_id 3 | from Product a 4 | join Sales b 5 | on a.product_id = b.product_id 6 | ) 7 | ,iphone as ( 8 | select distinct buyer_id 9 | from joined where product_name = 'iPhone' 10 | ) 11 | ,s8 as ( 12 | select distinct buyer_id 13 | from joined where product_name = 'S8' 14 | ) 15 | select a.buyer_id 16 | from s8 a 17 | left join iphone b 18 | on a.buyer_id = b.buyer_id 19 | where b.buyer_id is null 20 | -------------------------------------------------------------------------------- /LeetCode/others/1084_Sales_Analysis_III.sql: -------------------------------------------------------------------------------- 1 | select a.product_id, max(product_name) as product_name 2 | from Sales a 3 | join Product b 4 | on a.product_id = b.product_id 5 | group by a.product_id 6 | having max(sale_date) <= '2019-03-31' 7 | and min(sale_date) >= '2019-01-01' 8 | -------------------------------------------------------------------------------- /LeetCode/others/1097_Game_Play_Analysis_V.sql: -------------------------------------------------------------------------------- 1 | with ranked as ( 2 | select 3 | min(event_date) over (partition by player_id) as install_dt 4 | ,lead(event_date) over (partition by player_id order by event_date) as next_date 5 | ,rank() over (partition by player_id order by event_date) as rk 6 | from Activity 7 | ) 8 | select 9 | install_dt 10 | ,count(*) as installs 11 | ,round( 12 | cast(sum(case when dateadd(day, 1, install_dt) = next_date then 1 else 0 end) as float) / 13 | count(*), 2) as day1_retention 14 | from ranked 15 | where rk = 1 16 | group by install_dt 17 | -------------------------------------------------------------------------------- /LeetCode/others/1098_Unpopular_Books.sql: -------------------------------------------------------------------------------- 1 | select b.book_id, b.name 2 | from (select * from Orders where dispatch_date > date_add('2019-06-23', interval -1 year)) a 3 | right join Books b 4 | on a.book_id = b.book_id 5 | where b.available_from < date_add('2019-06-23',interval -1 month) 6 | group by b.book_id, b.name 7 | having coalesce(sum(quantity), 0) < 10 8 | order by b.book_id 9 | -------------------------------------------------------------------------------- /LeetCode/others/1107_New_Users_Daily_Count.sql: -------------------------------------------------------------------------------- 1 | select login_date, count(user_id) as user_count 2 | from 3 | (select user_id, min(activity_date) as login_date 4 | from Traffic 5 | where activity = 'login' 6 | group by user_id) t 7 | where datediff('2019-06-30', login_date) <= 90 8 | group by login_date 9 | -------------------------------------------------------------------------------- /LeetCode/others/1112_Highest_Grade_For_Each_Student.sql: -------------------------------------------------------------------------------- 1 | with unordered as ( 2 | select top 1 with ties student_id, course_id, grade 3 | from Enrollments 4 | order by rank() over (partition by student_id order by grade desc, course_id) 5 | ) 6 | select * from unordered order by student_id 7 | -------------------------------------------------------------------------------- /LeetCode/others/1113_Reported_Posts.sql: -------------------------------------------------------------------------------- 1 | select extra as report_reason, count(distinct post_id) as report_count 2 | from Actions 3 | where action_date = '2019-07-04' 4 | and action = 'report' 5 | group by extra 6 | -------------------------------------------------------------------------------- /LeetCode/others/1126_Active_Businesses.sql: -------------------------------------------------------------------------------- 1 | with tmp as ( 2 | select business_id, occurences, avg(occurences) over (partition by event_type) as average 3 | from Events 4 | ) 5 | select business_id 6 | from tmp 7 | where occurences > average 8 | group by business_id 9 | having count(*) > 1 10 | -------------------------------------------------------------------------------- /LeetCode/others/1127_User_Purchase_Platform.sql: -------------------------------------------------------------------------------- 1 | with skeleton as ( 2 | select distinct spend_date, 'desktop' as platform from Spending 3 | union all 4 | select distinct spend_date, 'mobile' as platform from Spending 5 | union all 6 | select distinct spend_date, 'both' as platform from Spending 7 | ) 8 | 9 | ,labelled as ( 10 | select 11 | spend_date, user_id, 12 | case when count(distinct platform) = 1 then max(platform) else 'both' end as platform 13 | from Spending group by spend_date, user_id 14 | ) 15 | 16 | ,agg as ( 17 | select 18 | a.spend_date, b.platform, sum(a.amount) as total_amount, count(distinct a.user_id) as total_users 19 | from Spending a 20 | join labelled b 21 | on a.spend_date = b.spend_date 22 | and a.user_id = b.user_id 23 | group by a.spend_date, b.platform 24 | ) 25 | 26 | select 27 | a.spend_date, a.platform 28 | ,coalesce(b.total_amount, 0) as total_amount 29 | ,coalesce(b.total_users, 0) as total_users 30 | from skeleton as a 31 | left join agg as b 32 | on a.spend_date = b.spend_date 33 | and a.platform = b.platform 34 | -------------------------------------------------------------------------------- /LeetCode/others/1132_Reported_Posts_II.sql: -------------------------------------------------------------------------------- 1 | select round(100*avg( 2 | if(removed is null, 0, removed/reported)),2) as average_daily_percent from 3 | ( 4 | select action_date date, count(distinct post_id) as reported 5 | from actions 6 | where extra='spam' 7 | group by action_date 8 | ) t1 left join 9 | ( 10 | select action_date date, count(distinct actions.post_id) as removed 11 | from actions, removals 12 | where extra='spam' and actions.post_id = removals.post_id 13 | group by action_date 14 | ) t2 on (t1.date=t2.date) 15 | -------------------------------------------------------------------------------- /LeetCode/others/1141_User_Activity_for_the_Past_30_Days_I.sql: -------------------------------------------------------------------------------- 1 | select activity_date as day, count(distinct user_id) as active_users 2 | from Activity 3 | where activity_date between '2019-06-28' and '2019-07-27' 4 | group by activity_date 5 | -------------------------------------------------------------------------------- /LeetCode/others/1142_User_Activity_for_the_Past_30_Days_II.sql: -------------------------------------------------------------------------------- 1 | select round(coalesce(avg(sess), 0), 2) as average_sessions_per_user from ( 2 | select user_id, count(distinct session_id) as sess 3 | from Activity 4 | where activity_date between '2019-06-28' and '2019-07-27' 5 | group by user_id 6 | ) a; 7 | -------------------------------------------------------------------------------- /LeetCode/others/1148_Article_Views_I.sql: -------------------------------------------------------------------------------- 1 | select distinct author_id as id 2 | from Views 3 | where author_id = viewer_id 4 | order by author_id 5 | -------------------------------------------------------------------------------- /LeetCode/others/1149_Article_Views_II.sql: -------------------------------------------------------------------------------- 1 | select distinct viewer_id as id 2 | from Views 3 | group by viewer_id, view_date 4 | having count(distinct viewer_id, article_id) > 1 5 | -------------------------------------------------------------------------------- /LeetCode/others/1158_Market_Analysis_I.sql: -------------------------------------------------------------------------------- 1 | with norder as ( 2 | select buyer_id, count(*) as n from Orders 3 | where order_date >= '2019-01-01' 4 | group by buyer_id 5 | ) 6 | select a.user_id as buyer_id, join_date, coalesce(n, 0) as orders_in_2019 7 | from Users a 8 | left join norder b 9 | on a.user_id = b.buyer_id 10 | -------------------------------------------------------------------------------- /LeetCode/others/1159_Market_Analysis_II.sql: -------------------------------------------------------------------------------- 1 | with ranked as ( 2 | select seller_id, item_id, rank() over (partition by seller_id order by order_date) as rk 3 | from Orders 4 | ), 5 | 6 | joined_items as ( 7 | select a.seller_id, a.item_id, a.rk, b.item_brand 8 | from ranked a 9 | join Items b 10 | on a.item_id = b.item_id 11 | where rk = 2) 12 | 13 | select 14 | a.user_id as seller_id, 15 | case when a.favorite_brand = b.item_brand then 'yes' else 'no' end as '2nd_item_fav_brand' 16 | from Users a 17 | left join joined_items b 18 | on a.user_id = b.seller_id 19 | -------------------------------------------------------------------------------- /LeetCode/others/1164_Product_Price_at_a_Given_Date.sql: -------------------------------------------------------------------------------- 1 | with skeleton as ( 2 | select distinct product_id from Products 3 | ), 4 | recent as ( 5 | select top 1 with ties product_id, new_price 6 | from Products 7 | where change_date <= '2019-08-16' 8 | order by rank() over (partition by product_id order by change_date desc) 9 | ) 10 | select a.product_id, coalesce(b.new_price, 10) as price 11 | from skeleton a 12 | left join recent b 13 | on a.product_id = b.product_id 14 | -------------------------------------------------------------------------------- /LeetCode/others/1173_Immediate_Food_Delivery_I.sql: -------------------------------------------------------------------------------- 1 | select round(100 * sum(order_date = customer_pref_delivery_date) / count(*), 2) as immediate_percentage from Delivery; 2 | -------------------------------------------------------------------------------- /LeetCode/others/1174_Immediate_Food_Delivery_II.sql: -------------------------------------------------------------------------------- 1 | with first as ( 2 | select top 1 with ties delivery_id, customer_id, order_date, customer_pref_delivery_date 3 | from Delivery order by rank() over (partition by customer_id order by order_date) 4 | ) 5 | select round(100 * sum(case when order_date = customer_pref_delivery_date then 1.0 else 0.0 end) / count(distinct customer_id), 2) as immediate_percentage from first; 6 | -------------------------------------------------------------------------------- /LeetCode/others/1179_Reformat_Department_Table.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL 2 | select 3 | id, 4 | jan as jan_revenue, 5 | feb as feb_revenue, 6 | mar as mar_revenue, 7 | apr as apr_revenue, 8 | may as may_revenue, 9 | jun as jun_revenue, 10 | jul as jul_revenue, 11 | aug as aug_revenue, 12 | sep as sep_revenue, 13 | oct as oct_revenue, 14 | nov as nov_revenue, 15 | dec as dec_revenue 16 | from department 17 | pivot 18 | ( 19 | max(revenue) 20 | for month in (jan, feb, mar, apr, may, jun, jul, aug, sep, oct, nov, dec) 21 | ) as t; 22 | 23 | -- MySQL 24 | select 25 | id 26 | ,max(case when month = 'Jan' then revenue else null end) as 'Jan_Revenue' 27 | ,max(case when month = 'Feb' then revenue else null end) as 'Feb_Revenue' 28 | ,max(case when month = 'Mar' then revenue else null end) as 'Mar_Revenue' 29 | ,max(case when month = 'Apr' then revenue else null end) as 'Apr_Revenue' 30 | ,max(case when month = 'May' then revenue else null end) as 'May_Revenue' 31 | ,max(case when month = 'Jun' then revenue else null end) as 'Jun_Revenue' 32 | ,max(case when month = 'Jul' then revenue else null end) as 'Jul_Revenue' 33 | ,max(case when month = 'Aug' then revenue else null end) as 'Aug_Revenue' 34 | ,max(case when month = 'Sep' then revenue else null end) as 'Sep_Revenue' 35 | ,max(case when month = 'Oct' then revenue else null end) as 'Oct_Revenue' 36 | ,max(case when month = 'Nov' then revenue else null end) as 'Nov_Revenue' 37 | ,max(case when month = 'Dec' then revenue else null end) as 'Dec_Revenue' 38 | from Department 39 | group by id 40 | order by id 41 | 42 | -------------------------------------------------------------------------------- /LeetCode/others/175_Combine_Two_Tables.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | p.FirstName 3 | ,p.LastName 4 | ,a.City 5 | ,a.State 6 | FROM 7 | Person AS p 8 | LEFT JOIN 9 | Address AS a 10 | ON p.PersonId = a.PersonId; -------------------------------------------------------------------------------- /LeetCode/others/176_Second_Highest_Salary.sql: -------------------------------------------------------------------------------- 1 | -- check if NULL (only row row in table) 2 | -- check if distinct is necessary (disallow duplicate) 3 | SELECT 4 | IFNULL( 5 | (SELECT 6 | DISTINCT Salary 7 | FROM Employee 8 | ORDER BY Salary DESC 9 | LIMIT 1 OFFSET 1) 10 | , NULL) AS SecondHighestSalary; -------------------------------------------------------------------------------- /LeetCode/others/177_Nth_Highest_Salary.sql: -------------------------------------------------------------------------------- 1 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 2 | BEGIN 3 | DECLARE M INT DEFAULT N - 1; 4 | RETURN ( 5 | SELECT DISTINCT Salary FROM Employee ORDER BY Salary DESC LIMIT 1 OFFSET M 6 | ); 7 | END 8 | 9 | SELECT * /*This is the outer query part */ 10 | FROM Employee Emp1 11 | WHERE (N-1) = ( /* Subquery starts here */ 12 | SELECT COUNT(DISTINCTEmp2.Salary) 13 | FROM Employee Emp2 14 | WHERE Emp2.Salary > Emp1.Salary) -------------------------------------------------------------------------------- /LeetCode/others/178_Rank_Scores.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL 2 | SELECT 3 | Score 4 | ,DENSE_RANK() over (ORDER BY Score DESC) AS Rank 5 | FROM scores 6 | ORDER BY Score DESC; 7 | 8 | -- MYSQL 9 | SELECT Scores.Score, Rank 10 | FROM Scores 11 | Left join (Select Score, @Rank:=@Rank+1 as Rank 12 | From (Select Distinct Score From Scores Order by Score DESC) S1, (Select @Rank:=0) var) S2 on Scores.Score=S2.Score 13 | order by Scores.Score desc; -------------------------------------------------------------------------------- /LeetCode/others/180_Consecutive_Numbers.sql: -------------------------------------------------------------------------------- 1 | -- analysis 2 | -- three consecutive rows 3 | -- join twice, each time offsetting id by 1 4 | -- filter by equating three columns 5 | -- NULL? use inner join 6 | 7 | SELECT 8 | DISTINCT n1.Num 9 | FROM Number n1 10 | JOIN Number n2 11 | ON n2.Id = n1.Id + 1 12 | JOIN Number n3 13 | ON n3.Id = n2.Id + 1 14 | WHERE n1.Num = n2.Num 15 | AND n2.Num = n3.Num; 16 | 17 | -- n1.1 matches to n2.2 18 | /* 19 | 1 1 1 20 | 1 1 2 21 | 1 2 1 22 | 2 1 2 23 | 1 2 2 24 | 2 2 - 25 | 2 - 26 | */ 27 | 28 | -- shift in the other direction also works 29 | -- n1.2 matches to n2.1 30 | SELECT 31 | DISTINCT n1.Num 32 | FROM Number n1 33 | JOIN Number n2 34 | ON n2.Id = n1.Id - 1 35 | JOIN Number n3 36 | ON n3.Id = n2.Id - 1 37 | WHERE n1.Num = n2.Num 38 | AND n2.Num = n3.Num; 39 | 40 | -- can use lead or lag 41 | SELECT DISTINCT Num AS ConsecutiveNums 42 | FROM ( 43 | SELECT 44 | Num 45 | ,LAG(Num, 1) OVER (ORDER BY Id) AS next 46 | ,LAG(Num, 2) OVER (ORDER BY Id) AS next_next 47 | FROM Logs 48 | ) AS three_day_log 49 | WHERE Num = next AND next = next_next; 50 | 51 | -- ERROR: Windowed functions can only appear in the SELECT or ORDER BY clauses. 52 | -- SELECT 53 | -- Num 54 | -- FROM Logs 55 | -- WHERE Num = LAG(Num, 1) OVER (ORDER BY Id) 56 | -- AND NUM = LAG(Num, 2) OVER (ORDER BY Id); 57 | 58 | /* LAG_id, LAG_num REORDER 59 | 1 N 60 | 2 1 61 | 3 2 62 | 4 3 63 | 5 4 64 | 6 5 65 | 7 6 66 | */ 67 | SELECT 68 | Id 69 | ,LAG(Num, 1) OVER (ORDER BY Id) lag_1 70 | FROM Logs 71 | ORDER BY LAG(Num, 1) OVER (ORDER BY Id); -- NULL FIRST -------------------------------------------------------------------------------- /LeetCode/others/181_Employees_Earning_More_Than_Their_Managers.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.Name AS Employee 3 | FROM Employee AS e 4 | JOIN 5 | Employee AS m 6 | WHERE e.ManagerId = m.Id 7 | AND e.Salary > m.Salary; -------------------------------------------------------------------------------- /LeetCode/others/182_Duplicate_Emails.sql: -------------------------------------------------------------------------------- 1 | SELECT Email 2 | FROM Person 3 | GROUP BY Email 4 | HAVING COUNT(*) > 1; -------------------------------------------------------------------------------- /LeetCode/others/183_Customers_Who_Never_Order.sql: -------------------------------------------------------------------------------- 1 | SELECT Name AS Customers 2 | FROM Customers 3 | WHERE 4 | Id NOT IN (SELECT DISTINCT CustomerId FROM Orders); 5 | 6 | SELECT 7 | c.Name AS Customers 8 | FROM Customers AS c 9 | LEFT JOIN Orders AS o 10 | ON c.Id = o.CustomerId 11 | WHERE o.Id IS NULL; -------------------------------------------------------------------------------- /LeetCode/others/184_Department_Highest_Salary.sql: -------------------------------------------------------------------------------- 1 | 2 | 3 | /* ------------- 4 | BEGINNER SOLUTION: CORRELATED (N+1) 5 | */ 6 | -- succinct but correlated 7 | SELECT 8 | d.Name AS 'Department' 9 | ,e.Name AS 'Employee' 10 | ,e.Salary 11 | FROM Employee AS e 12 | JOIN Department AS d 13 | ON e.DepartmentId = d.Id 14 | WHERE 15 | (SELECT COUNT(DISTINCT e2.Salary) 16 | FROM 17 | Employee e2 18 | WHERE 19 | e2.Salary > e.Salary 20 | AND e.DepartmentId = e2.DepartmentId 21 | ) = 0; 22 | 23 | -- cleaner than above 24 | SELECT 25 | d.Name AS 'Department' 26 | ,e.Name AS 'Employee' 27 | ,e.Salary 28 | FROM Employee AS e 29 | JOIN Department AS d 30 | ON e.DepartmentId = d.Id 31 | WHERE NOT EXISTS 32 | (SELECT * FROM Employee e2 33 | WHERE e2.Salary > e.Salary 34 | AND e2.DepartmentId = e.DepartmentId); 35 | 36 | -- sample code from SQL Antipattern pdf book 37 | -- BugsProducts: product_id, bug_id 38 | -- Bugs: bug_id, date_reported 39 | SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id 40 | FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id) 41 | WHERE NOT EXISTS 42 | (SELECT * FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id) 43 | WHERE bp1.product_id = bp2.product_id 44 | AND b1.date_reported < b2.date_reported); 45 | 46 | /* ------------- 47 | INTERMEDOATE SOLUTION: NO CORRELATED SUBQUERY 48 | */ 49 | -- 50 | -- use departmentId and Salary to uniquely identify employee 51 | -- assume no two employees in same dept shares same highest salary 52 | SELECT 53 | d.Name AS Department 54 | ,e.Name AS Employee 55 | ,a.max_salary AS Salary 56 | FROM 57 | Employee AS e 58 | JOIN 59 | Department AS d 60 | ON e.DepartmentId = d.Id 61 | JOIN 62 | (SELECT 63 | DepartmentId 64 | ,MAX(Salary) AS max_salary 65 | FROM Employee 66 | GROUP BY DepartmentId) AS a 67 | ON e.DepartmentId = a.DepartmentId 68 | WHERE 69 | e.Salary = a.max_salary 70 | AND e.DepartmentId = a.DepartmentId; 71 | 72 | -- use view to simplify code 73 | CREATE VIEW latest_bug AS 74 | SELECT -- in each group: product id is fixed, date_reported is aggreated, bug_id is ignored 75 | product_id 76 | ,MAX(date_reported) AS latest 77 | FROM Bugs b JOIN BugsProducts bp 78 | ON b.bug_id = bp.bug_id -- primary key, one-to-one 79 | GROUP BY product_id; 80 | 81 | -- now we know the latest day when bug for each product_id occurs 82 | -- use latest_day and product_id to retrieve bug_id 83 | -- more than one bug_id may be returned if bug occured more than once on latest day 84 | SELECT 85 | bp.product_id 86 | ,lb.latest 87 | ,bp.bug_id 88 | FROM BugsProducts bp JOIN latest_bug lb 89 | ON bp.product_id = lb.product_id; -- product_id is unique in tmp table, not unique in left table 90 | AND lb.date_reported = lb.latest; -- without this predicate, latest will be matched to all rows on left with same product id 91 | 92 | DROP VIEW; 93 | 94 | -- textbook version 95 | SELECT m.product_id, m.latest, b1.bug_id 96 | FROM Bugs b1 JOIN BugsProducts bp1 USING (bug_id) 97 | JOIN (SELECT bp2.product_id, MAX(b2.date_reported) AS latest 98 | FROM Bugs b2 JOIN BugsProducts bp2 USING (bug_id) 99 | GROUP BY bp2.product_id) m 100 | ON (bp1.product_id = m.product_id AND b1.date_reported = m.latest); 101 | 102 | /* ------------- 103 | ULTIMATE SOLUTION: JOIN 104 | */ 105 | 106 | CREATE VIEW tmp_join AS 107 | SELECT bp.product_id, b.bug_id, b.date_reported 108 | FROM BugsProducts bp JOIN Bugs b 109 | WHERE bp.bug_id = b.bug_id; 110 | 111 | -- left join with itself, matching every row to rows with same product_id and earlier date 112 | -- those without a match is what we want (the latest) 113 | SELECT 114 | t1.* 115 | FROM tmp_join t1 116 | LEFT JOIN tmp_join t2 117 | ON t1.product_id = t2.product_id 118 | AND t1.date_reported < t2.date_reported -- latest date in left will match to null on right 119 | WHERE t2.bug_id IS NULL; 120 | 121 | -- textbook version 122 | SELECT bp1.product_id, b1.date_reported AS latest, b1.bug_id 123 | FROM Bugs b1 JOIN BugsProducts bp1 ON (b1.bug_id = bp1.bug_id) 124 | LEFT OUTER JOIN (Bugs AS b2 JOIN BugsProducts AS bp2 ON (b2.bug_id = bp2.bug_id)) 125 | ON (bp1.product_id = bp2.product_id AND (b1.date_reported < b2.date_reported 126 | OR b1.date_reported = b2.date_reported AND b1.bug_id < b2.bug_id)) 127 | WHERE b2.bug_id IS NULL; 128 | 129 | -------------------------------------------------------------------------------- /LeetCode/others/185_Department_Top_Three_Salaries.sql: -------------------------------------------------------------------------------- 1 | -- MS SQL 2 | WITH department_ranking AS 3 | (SELECT 4 | e.Name AS Employee 5 | ,d.Name AS Department 6 | ,e.Salary 7 | ,DENSE_RANK() OVER (PARTITION BY DepartmentId ORDER BY Salary DESC) AS rnk 8 | FROM Employee AS e 9 | JOIN Department AS d 10 | ON e.DepartmentId = d.Id) 11 | 12 | SELECT 13 | Department 14 | ,Employee 15 | ,Salary 16 | FROM department_ranking 17 | WHERE rnk <= 3 18 | ORDER BY Department ASC, Salary DESC; -------------------------------------------------------------------------------- /LeetCode/others/196_Delete_Duplicate_Emails.sql: -------------------------------------------------------------------------------- 1 | -- do not use sub-clause or IN clause 2 | -- it does not work when something is deleted 3 | DELETE p2.* 4 | FROM Person p1 5 | JOIN Person p2 6 | ON p1.Email = p2.Email 7 | WHERE p1.Id < p2.Id; 8 | 9 | 10 | # DELETE p1 11 | # FROM Person p1, 12 | # Person p2 13 | # WHERE 14 | # p1.Email = p2.Email 15 | # AND p1.Id > p2.Id; -------------------------------------------------------------------------------- /LeetCode/others/197_Rising_Temperature.sql: -------------------------------------------------------------------------------- 1 | # 1 matches 2 2 | # Id matches to date 3 | SELECT 4 | w1.Id 5 | FROM Weather w1 6 | JOIN Weather w2 7 | ON w1.Id = w2.Id - 1 8 | WHERE w1.Temperature < w2.Temperature; 9 | 10 | # trap: ask for clarification! 11 | # whether 1 day maps to 1 record! 12 | # whether every day has record! 13 | # DATEDIFF(later date, earlier date) 14 | 15 | SELECT w2.Id 16 | FROM Weather w1 17 | JOIN Weather w2 18 | ON DATEDIFF(w2.RecordDate, w1.RecordDate) = 1 19 | AND w2.Temperature > w1.Temperature; 20 | 21 | # no function in ON clause, check query plan 22 | SELECT w2.Id 23 | FROM Weather w1, Weather w2 24 | WHERE DATEDIFF(w2.RecordDate, w1.RecordDate) = 1 25 | AND w2.Temperature > w1.Temperature; -------------------------------------------------------------------------------- /LeetCode/others/262_Trips_and_Users.sql: -------------------------------------------------------------------------------- 1 | -- filter out banned users (dirver and clients) 2 | -- calculate rate 3 | 4 | SELECT 5 | t.Request_at AS Day 6 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 7 | FROM Trips t 8 | JOIN Users d ON t.Driver_Id = d.Users_Id 9 | JOIN Users c ON t.Client_Id = c.Users_Id 10 | WHERE d.Banned = "No" 11 | AND c.Banned = "No" 12 | AND t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 13 | GROUP BY t.Request_at 14 | ORDER BY t.Request_at; 15 | 16 | WITH valid_user AS ( 17 | SELECT * 18 | FROM Users 19 | WHERE Banned = "No" 20 | ) 21 | , valid_trips AS ( 22 | SELECT * 23 | FROM Trips 24 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 25 | ) 26 | SELECT 27 | t.Request_at AS Day 28 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 29 | FROM valid_trips t 30 | JOIN valid_user d ON t.Driver_Id = d.Users_Id 31 | JOIN valid_user c ON t.Client_Id = c.Users_Id 32 | GROUP BY t.Request_at 33 | ORDER BY t.Request_at; 34 | 35 | SELECT 36 | t.Request_at AS Day 37 | ,ROUND(SUM(t.Status != "completed") / COUNT(*), 2) AS "Cancellation Rate" 38 | FROM Trips t 39 | JOIN (SELECT * FROM Users WHERE Banned = "No") d ON t.Driver_Id = d.Users_Id 40 | JOIN (SELECT * FROM Users WHERE Banned = "No") c ON t.Client_Id = c.Users_Id 41 | WHERE t.Request_at BETWEEN "2013-10-01" AND "2013-10-03" 42 | GROUP BY t.Request_at 43 | ORDER BY t.Request_at; 44 | 45 | SELECT 46 | Request_at as Day, 47 | ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 48 | FROM Trips 49 | WHERE Request_at BETWEEN '2013-10-01' AND '2013-10-03' 50 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 51 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 52 | GROUP BY Request_at; 53 | 54 | SELECT 55 | Request_at AS Day 56 | ,ROUND(SUM(Status != "completed") / COUNT(*), 2) AS 'Cancellation Rate' 57 | FROM Trips 58 | WHERE Request_at BETWEEN "2013-10-01" AND "2013-10-03" 59 | AND Driver_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 60 | AND Client_Id IN (SELECT Users_Id FROM Users WHERE Banned = 'No') 61 | GROUP BY Request_at; -------------------------------------------------------------------------------- /LeetCode/others/569_Median_Employee_Salary.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | Id, Company, Salary 3 | FROM 4 | (SELECT 5 | e.Id, 6 | e.Salary, 7 | e.Company, 8 | IF(@prev = e.Company, @Rank:=@Rank + 1, @Rank:=1) AS rank, 9 | @prev:=e.Company 10 | FROM 11 | Employee e, (SELECT @Rank:=0, @prev:=0) AS temp 12 | ORDER BY e.Company , e.Salary , e.Id) Ranking 13 | INNER JOIN 14 | (SELECT 15 | COUNT(*) AS totalcount, Company AS name 16 | FROM 17 | Employee e2 18 | GROUP BY e2.Company) companycount ON companycount.name = Ranking.Company 19 | WHERE 20 | Rank = FLOOR((totalcount + 1) / 2) 21 | OR Rank = FLOOR((totalcount + 2) / 2) 22 | ; 23 | 24 | -- total count even: select two 25 | -- total count odd: select one 26 | WITH ranking AS ( 27 | SELECT 28 | Id 29 | ,Company 30 | ,Salary 31 | ,COUNT(*) OVER (PARTITION BY Company) AS total_count 32 | ,RANK() OVER (PARTITION BY Company ORDER BY Salary DESC) AS rank 33 | FROM Employee) 34 | SELECT 35 | Id 36 | ,Company 37 | ,Salary 38 | FROM ranking 39 | WHERE rank = FLOOR((total_count + 1) / 2) 40 | OR rank = FLOOR((total_count + 2) / 2); 41 | 42 | -------------------------------------------------------------------------------- /LeetCode/others/570_Managers_with_at_Least_5_Direct_Reports.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | m.Name 3 | FROM Employee AS e 4 | JOIN Employee AS m 5 | ON e.ManagerId = m.Id 6 | GROUP BY m.Name 7 | HAVING COUNT(*) >= 5 8 | ORDER BY m.Name; -------------------------------------------------------------------------------- /LeetCode/others/571_Find_Median_Given_Frequency_of_Numbers.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT 3 | AVG(T1.Number) as median 4 | FROM 5 | ( 6 | SELECT 7 | Number, 8 | @cum_freq AS lb, 9 | (@cum_freq:=@cum_freq + Frequency) AS ub 10 | FROM Numbers, (SELECT @cum_freq:=0) init 11 | ORDER BY Number 12 | ) AS T1, 13 | ( 14 | SELECT 15 | SUM(Frequency) AS total_freq 16 | FROM Numbers 17 | ) AS T2 18 | WHERE T1.lb < CEIL(T2.total_freq/2) 19 | AND T1.ub >= CEIL(T2.total_freq/2) 20 | 21 | /* u l 22 | 1 NULL 1 0 23 | 2 1 3 1 24 | 1 2 4 3 25 | 4 1 8 4 26 | 27 | If N is even: select avg(N/2, N/2 + 1) 28 | If b is first occuring at frequency N/2 + 1, it has low bound N/2, upper bound doesn't matter 29 | If c is last occuring at N/2, it has upper bound N/2, lower bound doesn't matter 30 | 31 | If N is odd, select CEIL(N/2), lb < CEIL(N/2) <= ub 32 | 33 | Not that lb cannot include CEIL(N/2), because it means previous number's range includes median 34 | 35 | To share equal sign with even case, remove CEIL, lb <= N/2 <= CEIL(N/2), in odd case, lb <= N/2 < CEIL(N/2), lb can be as large as FLOOR(N/2) 36 | */ 37 | WITH lag AS ( 38 | SELECT 39 | Number 40 | ,Frequency AS cur 41 | ,LAG(Frequency, 1) OVER (ORDER BY Number) AS prev 42 | FROM Numbers 43 | ) 44 | ,bound AS ( 45 | SELECT 46 | Number 47 | ,Frequency 48 | ,SUM(prev) OVER (ORDER BY Number) AS lb 49 | ,SUM(cur) OVER (ORDER BY Number) AS up 50 | ,SUM(Frequency) OVER () AS total 51 | FROM lag 52 | ) 53 | SELECT 54 | AVG(Number) AS median 55 | FROM bound 56 | WHERE lb < CEIL(total / 2) 57 | AND up >= CEIL(total / 2) -------------------------------------------------------------------------------- /LeetCode/others/574_Winning_Candidate.sql: -------------------------------------------------------------------------------- 1 | -- There is a case when people vote for someone who is not in the candidate. 2 | -- The solution is finding out the winner id from vote and match to candidate. 3 | -- correct solution 4 | SELECT Name 5 | FROM Candidate 6 | WHERE id = ( 7 | SELECT CandidateID 8 | FROM Vote 9 | GROUP BY CandidateID 10 | ORDER BY COUNT(*) DESC 11 | LIMIT 1); 12 | 13 | -- incorrect: return most voted candidate that exists in Candidate 14 | -- if most voted does not exist in Candidate, it is missed (orphaned rows) 15 | SELECT 16 | c.Name 17 | FROM Candidate AS c 18 | LEFT JOIN Vote AS v 19 | ON c.id = v.CandidateId 20 | GROUP BY c.Name 21 | ORDER BY COUNT(v.id) DESC 22 | LIMIT 1; 23 | 24 | -- incorrect: return null if most voted candidate does not exist in Candidate 25 | -- at least show something! 26 | SELECT 27 | c.Name 28 | FROM Candidate AS c 29 | RIGHT JOIN Vote AS v 30 | ON c.id = v.CandidateId 31 | GROUP BY c.Name 32 | ORDER BY COUNT(v.id) DESC 33 | LIMIT 1; -------------------------------------------------------------------------------- /LeetCode/others/577_Employee_Bonus.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | e.name 3 | ,b.bonus 4 | FROM Employee AS e 5 | LEFT JOIN Bonus AS b 6 | ON e.empId = b.empId 7 | WHERE b.bonus < 1000 8 | OR b.bonus IS NULL 9 | ORDER BY e.name; -------------------------------------------------------------------------------- /LeetCode/others/578_Get_Highest_Answer_Rate_Question.sql: -------------------------------------------------------------------------------- 1 | SELECT question_id AS survey_log 2 | FROM ( 3 | SELECT 4 | question_id 5 | ,SUM(action="answer") / SUM(action="show") AS ans_rate 6 | FROM survey_log 7 | GROUP BY question_id 8 | ORDER BY ans_rate DESC 9 | LIMIT 1 10 | ) AS _; -------------------------------------------------------------------------------- /LeetCode/others/579_Find_Cumulative_Salary_of_an_Employee.sql: -------------------------------------------------------------------------------- 1 | -- use self join three times (if month is not dense) 2 | -- name table e, e1, e2, e3; cummulatively sum e1, e2, e3 3 | SELECT 4 | e.Id 5 | ,e.Month 6 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 7 | FROM Employee e 8 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 9 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 10 | WHERE (e.Id, e.Month) NOT IN ( 11 | SELECT 12 | Id 13 | ,MAX(Month) AS max_month 14 | FROM Employee 15 | GROUP BY Id) 16 | ORDER BY e.Id ASC, e.Month DESC; 17 | 18 | SELECT 19 | e.Id 20 | ,e.Month 21 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 22 | FROM 23 | (SELECT Id, MAX(Month) AS max_month 24 | FROM Employee 25 | GROUP BY Id) AS e_max 26 | -- if using left join, employee with only one record will see NULL returned after join 27 | JOIN Employee e ON e_max.Id = e.Id AND e_max.max_month > e.Month 28 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 29 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 30 | ORDER BY e.Id ASC, e.Month DESC; 31 | 32 | -- use where clause instead of inequality join 33 | SELECT 34 | e.Id 35 | ,e.Month 36 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 37 | FROM 38 | (SELECT Id, MAX(Month) AS max_month 39 | FROM Employee 40 | GROUP BY Id) AS e_max 41 | -- if using left join, employee with only one record will see NULL returned after join 42 | JOIN Employee e ON e_max.Id = e.Id 43 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 44 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 45 | WHERE e_max.max_month != e.Month 46 | ORDER BY e.Id ASC, e.Month DESC; 47 | 48 | -- use cross join, also correct 49 | SELECT 50 | e.Id 51 | ,e.Month 52 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 53 | FROM 54 | (SELECT Id, MAX(Month) AS max_month 55 | FROM Employee 56 | GROUP BY Id) AS e_max, 57 | Employee e 58 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 59 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 60 | WHERE e.Id = e_max.Id AND e.Month != e_max.max_month 61 | ORDER BY e.Id ASC, e.Month DESC; 62 | 63 | -- weird condition, non-standard SQL 64 | SELECT 65 | e.Id 66 | ,e.Month 67 | ,IFNULL(e.Salary, 0) + IFNULL(e1.Salary, 0) + IFNULL(e2.Salary, 0) AS Salary 68 | FROM 69 | Employee e 70 | LEFT JOIN Employee e1 ON e.Id = e1.Id AND e.Month = e1.Month + 1 71 | LEFT JOIN Employee e2 ON e.Id = e2.Id AND e.Month = e2.Month + 2 72 | WHERE (e.Id, e.Month) != (SELECT Id, MAX(Month) FROM Employee WHERE Id = e.Id) AS x 73 | ORDER BY e.Id ASC, e.Month DESC; 74 | 75 | -- best solution (rely on consecutive month) 76 | SELECT 77 | e1.Id 78 | ,MAX(e2.Month) AS Month 79 | ,SUM(e2.Salary) AS Salary 80 | FROM Employee e1 81 | JOIN Employee e2 82 | ON e1.Id = e2.Id 83 | AND e1.Month - e2.Month BETWEEN 1 AND 3 84 | GROUP BY e1.Id, e1.Month 85 | ORDER BY e1.Id ASC, e1.Month DESC -------------------------------------------------------------------------------- /LeetCode/others/580_Count_Student_Number_in_Departments.sql: -------------------------------------------------------------------------------- 1 | /* warning 2 | 3 | NO need to handle null if tables are joined before aggregating. 4 | NULL columns are simply not counted (return 0 if departmend has no student) 5 | 6 | If studnets are counted before joining, only existing department 7 | with at least 1 student are aggregated over with count. 8 | 9 | Left joining full dept tables (including empty dept) will result in NULL */ 10 | 11 | SELECT 12 | d.dept_name 13 | ,COUNT(s.student_name) AS student_number 14 | FROM department AS d 15 | LEFT JOIN student AS s 16 | ON d.dept_id = s.dept_id 17 | GROUP BY dept_name 18 | ORDER BY student_number DESC, dept_name; 19 | 20 | SELECT 21 | d.dept_name 22 | ,IFNULL(s.student_number, 0) AS student_number 23 | FROM department AS d 24 | LEFT JOIN ( 25 | SELECT 26 | dept_id 27 | ,COUNT(student_name) AS student_number 28 | FROM student 29 | GROUP BY dept_id 30 | ) AS s 31 | ON d.dept_id = s.dept_id 32 | ORDER BY student_number DESC, dept_name ASC; -------------------------------------------------------------------------------- /LeetCode/others/584_Find_Customer_Referee.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | name 3 | FROM customer 4 | WHERE referee_id != 2 5 | OR referee_id IS NULL; -------------------------------------------------------------------------------- /LeetCode/others/585_Investments_in_2016.sql: -------------------------------------------------------------------------------- 1 | 2 | SELECT 3 | ROUND(SUM(TIV_2016), 2) AS TIV_2016 4 | FROM insurance 5 | WHERE TIV_2015 IN 6 | (SELECT TIV_2015 7 | FROM insurance 8 | GROUP BY TIV_2015 9 | HAVING COUNT(*) > 1) 10 | AND (LAT, LON) NOT IN 11 | (SELECT lat, lon 12 | FROM insurance 13 | GROUP BY lat, lon 14 | HAVING COUNT(*) > 1); 15 | 16 | -- use IN is faster than NOT IN 17 | select 18 | round(sum(tiv_2016),2) as tiv_2016 19 | from 20 | insurance 21 | where 22 | tiv_2015 in (select tiv_2015 from insurance group by 1 having count(*) > 2) 23 | and (lat, lon) in (select lat, lon from insurance group by 1, 2 having count(*) = 1) 24 | 25 | 26 | SELECT ROUND(SUM(i1.TIV_2016), 2) AS TIV_2016 27 | FROM insurance i1 28 | WHERE EXISTS ( 29 | SELECT * 30 | FROM insurance i2 31 | WHERE i1.PID != i2.PID 32 | AND i1.TIV_2015 = i2.TIV_2015) 33 | AND NOT EXISTS( 34 | SELECT * 35 | FROM insurance i3 36 | WHERE i1.PID != i3.PID 37 | AND i1.LAT = i3.LAT 38 | AND i1.LON = i3.LON); 39 | -------------------------------------------------------------------------------- /LeetCode/others/586_Customer_Placing_the_Largest_Number_of_Orders.sql: -------------------------------------------------------------------------------- 1 | SELECT customer_number 2 | FROM orders 3 | GROUP BY customer_number 4 | ORDER BY COUNT(*) DESC 5 | LIMIT 1; -------------------------------------------------------------------------------- /LeetCode/others/595_Big_Countries.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | name 3 | ,population 4 | ,area 5 | FROM World 6 | WHERE 7 | population > 25000000 8 | OR area > 3000000; 9 | 10 | -- UNION method, for reference 11 | SELECT 12 | name 13 | ,population 14 | ,area 15 | FROM World 16 | WHERE 17 | population > 25000000 18 | UNION 19 | SELECT 20 | name 21 | ,population 22 | ,area 23 | FROM World 24 | WHERE 25 | area > 3000000; -------------------------------------------------------------------------------- /LeetCode/others/596_Classes_More_Than_5_Students.sql: -------------------------------------------------------------------------------- 1 | # ask if there is duplicate student in class 2 | -- if so, use DISTINCT 3 | SELECT 4 | class 5 | FROM courses 6 | GROUP BY class 7 | HAVING COUNT(DISTINCT student) >= 5; -------------------------------------------------------------------------------- /LeetCode/others/597_Friend_Requests_I-_Overall_Acceptance_Rate.sql: -------------------------------------------------------------------------------- 1 | # Write your MySQL query statement below 2 | 3 | /* 4 | how to count friends for user 1 5 | 6 | requester accepter 7 | 1 2 8 | 1 3 9 | 4 1 10 | 5 1 11 | 12 | left join does not work, must use union 13 | count how many times 1 appears in requester col, then accepter col */ 14 | 15 | SELECT 16 | requester_id AS id 17 | ,SUM(tally) AS num 18 | FROM 19 | (SELECT 20 | requester_id 21 | ,COUNT(*) AS tally 22 | FROM request_accepted 23 | GROUP BY requester_id 24 | UNION ALL 25 | SELECT 26 | accepter_id 27 | ,COUNT(*) 28 | FROM request_accepted 29 | GROUP BY accepter_id) a -- alias! 30 | GROUP BY id 31 | ORDER BY num DESC -- sort order! 32 | LIMIT 1; 33 | 34 | -- IFNULL(value if not null, value if null) 35 | -- to get a scalar value, envelope subquery with bracket 36 | -- to select from subquery, envelope with bracket 37 | -- Every derived table must have its own alias: AS acc. AS req 38 | SELECT 39 | ROUND( 40 | IFNULL( 41 | (SELECT COUNT(*) FROM (SELECT DISTINCT requester_id, accepter_id FROM request_accepted) AS acc) 42 | / 43 | (SELECT COUNT(*) FROM (SELECT DISTINCT sender_id, send_to_id FROM friend_request) AS req) 44 | ,0) 45 | ,2) AS accept_rate; 46 | 47 | -- use cross join 48 | SELECT ROUND( 49 | IFNULL( 50 | COUNT(DISTINCT requester_id, accepter_id)/ 51 | COUNT(DISTINCT sender_id, send_to_id) 52 | , 0) 53 | , 2) AS accept_rate 54 | FROM request_accepted, friend_request 55 | 56 | /* 57 | 1. if A sends two requests to B, B request the second, does the first request count? 58 | 2. duplicate requests are coutned once 59 | */ 60 | 61 | SELECT 62 | ROUND( 63 | IFNULL( 64 | (SELECT COUNT(DISTINCT requester_id, accepter_id) FROM request_accepted) 65 | /(SELECT COUNT(DISTINCT sender_id, send_to_id) FROM friend_request) 66 | , 0) 67 | ,2 68 | ) AS accept_rate; 69 | 70 | 71 | SELECT 72 | ROUND( 73 | IFNULL(a.accept_tally / r.request_tally, 0) 74 | ,2 75 | ) AS accept_rate 76 | FROM 77 | (SELECT COUNT(DISTINCT requester_id, accepter_id) AS accept_tally FROM request_accepted) AS a, 78 | (SELECT COUNT(DISTINCT sender_id, send_to_id) AS request_tally FROM friend_request) AS r; 79 | 80 | 81 | SELECT 82 | ROUND( 83 | IF(r.request_tally = 0, 0, a.accept_tally / r.request_tally) 84 | ,2 85 | ) AS accept_rate 86 | FROM 87 | (SELECT COUNT(DISTINCT requester_id, accepter_id) AS accept_tally 88 | FROM request_accepted) AS a, 89 | (SELECT COUNT(DISTINCT sender_id, send_to_id) AS request_tally 90 | FROM friend_request) AS r; -------------------------------------------------------------------------------- /LeetCode/others/601_Human_Traffic_of_Stadium.sql: -------------------------------------------------------------------------------- 1 | -- be careful with duplicate 2 | SELECT DISTINCT 3 | s1.* 4 | FROM 5 | stadium AS s1 6 | ,stadium AS s2 7 | ,stadium AS s3 8 | WHERE s1.people >= 100 9 | AND s2.people >= 100 10 | AND s3.people >= 100 11 | AND ((s1.id = s2.id - 1 AND s1.id = s3.id - 2) -- start of window 12 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) -- middle of window 13 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1)) -- end of window 14 | ORDER BY s1.id; 15 | 16 | -- MySQL: pre-filtering 17 | SELECT DISTINCT 18 | s1.* 19 | FROM 20 | (SELECT * FROM stadium WHERE people >= 100) AS s1 21 | ,(SELECT * FROM stadium WHERE people >= 100) AS s2 22 | ,(SELECT * FROM stadium WHERE people >= 100) AS s3 23 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 24 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 25 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 26 | ORDER BY s1.id; 27 | 28 | -- MS SQL: cleaner code 29 | WITH good_day AS ( 30 | SELECT * FROM stadium WHERE people >= 100 31 | ) 32 | SELECT DISTINCT s1.* FROM 33 | good_day AS s1, 34 | good_day AS s2, 35 | good_day AS s3 36 | WHERE (s1.id = s2.id - 1 AND s1.id = s3.id - 2) 37 | OR (s1.id = s2.id + 1 AND s1.id = s3.id - 1) 38 | OR (s1.id = s2.id + 2 AND s1.id = s3.id + 1) 39 | ORDER BY s1.id; 40 | 41 | -------------------------------------------------------------------------------- /LeetCode/others/602_Friend_Requests_II-_Who_Has_the_Most_Friends.sql: -------------------------------------------------------------------------------- 1 | /* 2 | how to count friends for user 1 3 | 4 | requester accepter 5 | 1 2 6 | 1 3 7 | 4 1 8 | 5 1 9 | 10 | left join does not work, must use union 11 | count how many times 1 appears in requester col, then accepter col 12 | 13 | friendship is mutual 14 | 15 | cannot have 1, 2 in on row, then 2, 1 in a nother row */ 16 | 17 | SELECT 18 | requester_id AS id 19 | ,SUM(tally) AS num 20 | FROM 21 | (SELECT 22 | requester_id 23 | ,COUNT(*) AS tally 24 | FROM request_accepted 25 | GROUP BY requester_id 26 | UNION ALL 27 | SELECT 28 | accepter_id 29 | ,COUNT(*) 30 | FROM request_accepted 31 | GROUP BY accepter_id) a -- alias! 32 | GROUP BY id 33 | ORDER BY num DESC -- sort order! 34 | LIMIT 1; 35 | 36 | # Write your MySQL query statement below 37 | 38 | SELECT id, COUNT(*) AS num FROM ( 39 | SELECT accepter_id AS id FROM request_accepted 40 | UNION ALL 41 | SELECT requester_id AS id FROM request_accepted 42 | ) AS c 43 | GROUP BY 1 44 | ORDER BY num DESC 45 | LIMIT 1; 46 | -------------------------------------------------------------------------------- /LeetCode/others/603_Consecutive_Available_Seats.sql: -------------------------------------------------------------------------------- 1 | /* 2 | shift down 3 | 1 N 4 | 2 1 5 | 3 2 6 | 7 | shift up 8 | 1 2 9 | 2 3 10 | 3 N 11 | 12 | join 13 | 1 N 2 14 | 2 1 3 15 | 3 2 N 16 | 17 | */ 18 | 19 | SELECT 20 | m.seat_id 21 | FROM cinema AS m 22 | LEFT JOIN cinema AS d ON m.seat_id = d.seat_id + 1 23 | LEFT JOIN cinema AS u ON m.seat_id = u.seat_id - 1 24 | WHERE m.free = 1 25 | AND (d.free = 1 OR u.free = 1) 26 | ORDER BY seat_id; 27 | 28 | -- if three consecutive seats are free 29 | -- middle seat will be returned twice 30 | SELECT DISTINCT a.seat_id AS seat_id 31 | FROM cinema a 32 | JOIN cinema b 33 | ON a.free = 1 34 | AND b.free = 1 35 | AND abs(a.seat_id - b.seat_id) = 1 36 | ORDER BY seat_id; -------------------------------------------------------------------------------- /LeetCode/others/607_Sales_Person.sql: -------------------------------------------------------------------------------- 1 | -- join version 2 | /* for non-null values in grouped column 3 | null in matched columns are simgply ignored by 4 | COUNT, SUM, AVG, if there exists any non-null value. 5 | (exceot when using COUNT(*)) 6 | 7 | If there exists no non-null value in matched row 8 | Aggregate output NULL. COUNT returns 0. 9 | */ 10 | SELECT 11 | s.name 12 | FROM salesperson AS s 13 | LEFT JOIN orders AS o ON o.sales_id = s.sales_id 14 | LEFT JOIN company AS c ON o.com_id = c.com_id 15 | GROUP BY s.name 16 | HAVING SUM(c.name = "RED") = 0 17 | OR SUM(c.name = "RED") IS NULL 18 | ORDER BY name; 19 | 20 | -- subquery 21 | SELECT 22 | s.name 23 | FROM salesperson s 24 | WHERE sales_id NOT IN ( 25 | SELECT sales_id 26 | FROM orders o 27 | LEFT JOIN company c 28 | ON o.com_id = c.com_id 29 | WHERE c.name = 'RED' 30 | ); -------------------------------------------------------------------------------- /LeetCode/others/608_Tree_Node.sql: -------------------------------------------------------------------------------- 1 | -- do a small example, and observe pattern 2 | SELECT 3 | t1.Id, 4 | CASE 5 | WHEN COUNT(t1.p_id) = 0 THEN "Root" 6 | WHEN COUNT(t2.id) = 0 THEN "Leaf" 7 | ELSE "Inner" 8 | END AS Type 9 | FROM tree t1 10 | LEFT JOIN tree t2 11 | ON t1.Id = t2.p_id 12 | GROUP BY t1.Id 13 | ORDER BY t1.Id; -------------------------------------------------------------------------------- /LeetCode/others/610_Triangle_Judgement.sql: -------------------------------------------------------------------------------- 1 | # Write your MySQL query statement below 2 | # let a > b > c 3 | # 1. c + b > a 4 | # 2. a - c < b (equivalent as above) 5 | 6 | SELECT x, y, z, 7 | CASE 8 | WHEN x + y < z OR x + z < y OR y + z < x THEN 'No' 9 | ELSE 'Yes' 10 | END 11 | AS 'triangle' 12 | FROM triangle t; -------------------------------------------------------------------------------- /LeetCode/others/612_Shortest_Distance_in_a_Plane.sql: -------------------------------------------------------------------------------- 1 | SELECT ROUND(MIN(SQRT(POWER(p1.x - p2.x, 2) + POWER(p1.y - p2.y, 2))), 2) AS shortest 2 | FROM point_2d p1, point_2d p2 3 | WHERE p1.x != p2.x 4 | OR p1.y != p2.y; 5 | 6 | SELECT 7 | MIN(ROUND( 8 | SQRT(POW(p1.x - p2.x, 2) + POW(p1.y - p2.y, 2)), 2)) AS shortest 9 | FROM point_2d p1, point_2d p2 10 | WHERE NOT (p1.x = p2.x AND p1.y = p2.y); -------------------------------------------------------------------------------- /LeetCode/others/613_Shortest_Distance_in_a_Line.sql: -------------------------------------------------------------------------------- 1 | # Write your MySQL query statement below 2 | SELECT MIN(ABS(p1.x - p2.x)) AS shortest 3 | FROM point p1, point p2 4 | WHERE p1.x != p2.x; -------------------------------------------------------------------------------- /LeetCode/others/614_Second_Degree_Follower.sql: -------------------------------------------------------------------------------- 1 | /* 2 | A B C 3 | A B D 4 | B C N 5 | B D E 6 | D E N 7 | */ 8 | 9 | -- ask about DISTINCT!!!! 10 | -- not that we are grouping by first degree follower, not followeee 11 | SELECT 12 | f1.follower 13 | ,COUNT(DISTINCT f2.follower) AS num 14 | FROM 15 | follow f1 16 | JOIN 17 | follow f2 18 | ON f1.follower = f2.followee 19 | GROUP BY f1.follower 20 | ORDER BY f1.follower; -------------------------------------------------------------------------------- /LeetCode/others/615_Average_Salary_Departments_VS_Company.sql: -------------------------------------------------------------------------------- 1 | WITH full_table AS 2 | (SELECT 3 | s.* 4 | ,e.department_id 5 | FROM salary AS s 6 | JOIN employee AS e 7 | ON s.employee_id = e.employee_id) 8 | 9 | ,department_monthly AS 10 | (SELECT 11 | department_id 12 | ,pay_date 13 | ,AVG(amount) AS dept_avg 14 | FROM full_table 15 | GROUP BY department_id, pay_date) 16 | 17 | ,company_monthly AS 18 | (SELECT 19 | pay_date 20 | ,AVG(amount) AS company_avg 21 | FROM full_table 22 | GROUP BY pay_date) 23 | 24 | SELECT 25 | DISTINCT 26 | DATE_FORMAT(d.pay_date, '%Y-%m') AS pay_month 27 | ,d.departmend_id 28 | ,CASE 29 | WHEN d.dept_avg < c.company_avg THEN "lower" 30 | WHEN d.dept_avg = c.company_avg THEN "same" 31 | WHEN d.dept_avg > c.company_avg THEN "higher" 32 | END AS comparison 33 | FROM department_monthly d 34 | JOIN company_monthly c 35 | ON d.pay_date = c.pay_date 36 | ORDER BY d.pay_date DESC, d.department_id ASC; 37 | 38 | -- complication: employees can have different pay-dates 39 | SELECT 40 | a.pay_month 41 | ,a.department_id, 42 | CASE 43 | WHEN a.dept_avg_sal < b.corp_avg_sal THEN "lower" 44 | WHEN a.dept_avg_sal = b.corp_avg_sal THEN "same" 45 | ELSE "higher" 46 | END AS comparison 47 | FROM 48 | (SELECT 49 | date_format(pay_date, '%Y-%m') as pay_month 50 | ,e.department_id 51 | ,AVG(amount) AS dept_avg_sal 52 | FROM salary AS s 53 | JOIN employee AS e 54 | ON s.employee_id = e.employee_id 55 | GROUP BY pay_month, e.department_id 56 | ) AS a 57 | JOIN 58 | (SELECT 59 | date_format(pay_date, '%Y-%m') as pay_month 60 | ,AVG(amount) AS corp_avg_sal 61 | FROM salary GROUP BY pay_month 62 | ) AS b 63 | ON a.pay_month = b.pay_month 64 | ORDER BY pay_month, a.department_id; -------------------------------------------------------------------------------- /LeetCode/others/618_Students_Report_By_Geography.sql: -------------------------------------------------------------------------------- 1 | -- knowing america is largest, 2 | -- all other col's id is subset of america's 3 | -- outer join with america is good enough 4 | -- (one left join, one out join) 5 | SELECT 6 | America, Asia, Europe 7 | FROM 8 | (SELECT @as:=0, @am:=0, @eu:=0) t, 9 | (SELECT @as:=@as + 1 AS asid, name AS Asia 10 | FROM student 11 | WHERE continent = 'Asia' 12 | ORDER BY Asia) AS t1 13 | RIGHT JOIN 14 | (SELECT @am:=@am + 1 AS amid, name AS America 15 | FROM student 16 | WHERE continent = 'America' 17 | ORDER BY America) AS t2 18 | ON asid = amid 19 | LEFT JOIN 20 | (SELECT @eu:=@eu + 1 AS euid, name AS Europe 21 | FROM student 22 | WHERE continent = 'Europe' 23 | ORDER BY Europe) AS t3 24 | ON amid = euid; 25 | 26 | SELECT 27 | am.name AS America 28 | ,asia.name AS Asia 29 | ,eu.name AS Europe 30 | FROM 31 | (SELECT 32 | name 33 | ,ROW_NUMBER() OVER (ORDER BY name) AS id 34 | FROM student 35 | WHERE continent = 'America') AS am 36 | LEFT JOIN 37 | (SELECT 38 | name 39 | ,ROW_NUMBER() OVER (ORDER BY name) AS id 40 | FROM student 41 | WHERE continent = 'Asia') AS asia 42 | ON am.id = asia.id 43 | LEFT JOIN 44 | (SELECT 45 | name 46 | ,ROW_NUMBER() OVER (ORDER BY name) AS id 47 | FROM student 48 | WHERE continent = 'Europe') AS eu 49 | ON asia.id = eu.id 50 | OR am.id = eu.id -- IMPORTANT! columns may not be arranged in descending order 51 | ORDER BY America; 52 | 53 | SELECT 54 | america.name AS America 55 | ,asia.name as Asia 56 | ,europe.name as Europe 57 | FROM 58 | (SELECT @a := 0, @b := 0, @c := 0) AS vars 59 | ,(SELECT @a := @a + 1 AS ID, name FROM student WHERE continent = "America" ORDER BY name) AS america 60 | LEFT JOIN 61 | (SELECT @b := @b + 1 AS ID, name FROM student WHERE continent = "Asia" ORDER BY name) AS asia 62 | ON america.ID = asia.ID 63 | LEFT JOIN 64 | (SELECT @c := @c + 1 AS ID, name FROM student WHERE continent = "Europe" ORDER BY name) AS europe 65 | ON asia.ID = europe.ID 66 | OR america.ID = europe.ID; 67 | -------------------------------------------------------------------------------- /LeetCode/others/619_Biggest_Single_Number.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | MAX(num) AS num 3 | FROM ( 4 | SELECT 5 | num 6 | FROM number 7 | GROUP BY num 8 | HAVING COUNT(*) = 1) AS _; -------------------------------------------------------------------------------- /LeetCode/others/620_Not_Boring_Movies.sql: -------------------------------------------------------------------------------- 1 | SELECT * 2 | FROM cinema 3 | WHERE description NOT LIKE "%boring%" 4 | AND id % 2 = 1 5 | ORDER BY rating DESC; -------------------------------------------------------------------------------- /LeetCode/others/626_Exchange_Seats.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | id 3 | ,CASE 4 | -- for odd id, match to next row (may be null) 5 | WHEN id % 2 = 1 THEN ( 6 | -- put scalar value in bracket! 7 | IFNULL(SELECT student FROM seat s2 WHERE s1.id = s2.id - 1, s1.student) 8 | ) 9 | -- for even id, match to previous row (not null) 10 | ELSE (SELECT student FROM seat s3 WHERE s1.id = s3.id + 1) 11 | END AS student 12 | FROM seat s1; 13 | 14 | /* 15 | 1 2 abbot 16 | 2 1 dories 17 | 3 4 doris 18 | 4 3 green 19 | 5 6 james (missing 5!, need to fix ids) 20 | */ 21 | select 22 | case 23 | when s.id is null then stg.id-1 24 | else stg.id 25 | end id 26 | ,stg.student 27 | from 28 | ( 29 | select id+1 id,student 30 | from seat 31 | where id%2=1 32 | union all 33 | select id-1 id,student 34 | from seat 35 | where id%2=0 36 | order by id) stg 37 | left join seat s 38 | using (id); -------------------------------------------------------------------------------- /LeetCode/others/627_Swap_Salary.sql: -------------------------------------------------------------------------------- 1 | UPDATE salary 2 | SET sex = CASE WHEN sex = "m" THEN "f" 3 | WHEN sex = "f" THEN "m" 4 | ELSE NULL 5 | END; 6 | 7 | UPDATE salary 8 | SET sex = CASE WHEN sex = "m" THEN "f" 9 | WHEN sex = "f" THEN "m" 10 | END 11 | WHERE sex IS NOT NULL; -------------------------------------------------------------------------------- /assets/sql_order.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/assets/sql_order.png -------------------------------------------------------------------------------- /databases/world_db/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains the file that can be used to set up the world_x 2 | database that is used in the guides of the MySQL Reference 3 | Manual: 4 | 5 | Quick-Start Guide: MySQL Shell for JavaScript 6 | Quick-Start Guide: MySQL Shell for Python 7 | 8 | These instructions assume that your current working directory is 9 | the directory that contains the files created by unpacking the 10 | world_x.zip or world_x.tar.gz distribution. 11 | 12 | You must install MySQL Shell and MySQL Server 5.7.12 or higher 13 | with the X Plugin enabled. Start the server before you load the 14 | world_x database. 15 | 16 | Note: Releases issued prior to September 2016 used table 17 | names in mixed cases. Now table names are all 18 | lowercase. This is because MySQL Shell is case-sensitive. 19 | 20 | Extract the installation archive to a temporary location such as /tmp/. 21 | Unpacking the archive results in a single file named world_x.sql. 22 | 23 | Create or recreate the schema with one of the following commands: 24 | 25 | Either use MySQL Shell: 26 | 27 | shell> mysqlsh -u root --sql --recreate-schema world_x < /tmp/world_x-db/world_x.sql 28 | 29 | Or the standard MySQL command-line client: 30 | 31 | Connect to MySQL: 32 | shell> mysql -u root -p 33 | Load the file: 34 | mysql> SOURCE /tmp/world_x-db/world_x.sql; 35 | 36 | Enter your password when prompted. A non-root account can be used as long as 37 | the account has privileges to create new databases. 38 | 39 | Replace /tmp/ with the path to the world_x.sql file on your system. 40 | -------------------------------------------------------------------------------- /databases/world_db/world.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shawlu95/Beyond-LeetCode-SQL/8ab7f0ad60b85ab9dff18da6fc760211335d0fb0/databases/world_db/world.sql --------------------------------------------------------------------------------