├── Easy ├── 00175-combine-two-tables.sql ├── 00181-employees-earning-more-than-their-managers.sql ├── 00182-duplicate-emails.sql ├── 00183-customers-who-never-order.sql ├── 00196-delete-duplicate-emails.sql ├── 00197-rising-temperature.sql ├── 00511-game-play-analysis-i.sql ├── 00512-game-play-analysis-ii.sql ├── 00577-employee-bonus.sql ├── 00584-find-customer-referee.sql ├── 00586-customer-placing-the-largest-number-of-orders.sql ├── 00595-big-countries.sql ├── 00596-classes-more-than-5-students.sql ├── 00597-friend-requests-i-overall-acceptance-rate.sql ├── 00603-consecutive-available-seats.sql ├── 00607-sales-person.sql ├── 00610-triangle-judgement.sql ├── 00613-shortest-distance-in-a-line.sql ├── 00619-biggest-single-number.sql ├── 00620-not-boring-movies.sql ├── 00627-swap-salary.sql ├── 01050-actors-and-director-who-cooperated-at-least-three-times.sql ├── 01068-product-sales-analysis-i.sql ├── 01069-prooduct-sales-analysis-ii.sql ├── 01075-project-employees-i.sql ├── 01076-project-employees-ii.sql ├── 01082-sales-analysis-i.sql ├── 01083-sales-analysis-ii.sql ├── 01084-sales-analysis-iii.sql ├── 01113-reported-posts.sql ├── 01141-user-activity-for-the-past-30-days-i.sql ├── 01142-user-activity-for-the-past-30-days-ii.sql ├── 01148-article-views-i.sql ├── 01173-immediate-food-delivery-i.sql ├── 01179-reformat-department-table.sql ├── 01211-queries-quality-and-percentage.sql ├── 01241-number-of-comments-per-post.sql ├── 01251-average-selling-price.sql ├── 01280-students-and-examinations.sql ├── 01294-weather-type-in-each-country.sql ├── 01303-find-the-team-size.sql ├── 01322-ads-performance.sql ├── 01327-list-the-products-ordered-in-a-period.sql ├── 01350-students-with-invalid-departments.sql ├── 01378-replace-employee-id-with-the-unique-identifier.sql ├── 01407-top-travellers.sql ├── 01421-npv-queries.sql ├── 01435-create-a-sessions-bar-chart.sql ├── 01484-group-sold-products-by-the-date.sql ├── 01495-friendly-movies-streamed-last-month.sql ├── 01511-customer_order-frequency.sql ├── 01517-find-users-with-valid-emails.sql ├── 01527-patients-with-a-condition.sql ├── 01543-fix-product-name-format.sql ├── 01565-unique-orders-and-customers-per-month.sql ├── 01571-warehouse-manager.sql ├── 01581-customer-who-visited-but-did-not-make-any-transactions.sql ├── 01587-bank-account-summary-ii.sql ├── 01607-sellers-with-no-sales.sql ├── 01623-all-valid-triplets-that-can-represent-a-country.sql ├── 01633-percentage-of-users-attended-a-contest.sql ├── 01661-average-time-of-process-per-machine.sql ├── 01667-fix-names-in-a-table.sql ├── 01677-products-worth-over-invoices.sql ├── 01683-invalid-tweets.sql ├── 01693-daily_leads-and-partners.sql ├── 01729-find-followers-count.sql ├── 01731-the-number-of-employees-which-report-to-each-employee.sql ├── 01741-find-total-time-spent-by-each-employee.sql ├── 01757-recyclable-and-low-fat-products.sql ├── 01777-products-price-for-each-store.sql ├── 01789-primary-department-for-each-employee.sql ├── 01795-rearrange-products-table.sql ├── 01809-ad-free-sessions.sql ├── 01821-find-customers-with-positive-revenue-this-year.sql ├── 01853-convert-date-format.sql ├── 01873-calculate-special-bonus.sql ├── 01890-the-latest-login-in-2020.sql ├── 01939-users-that-actively-request-confirmation-messages.sql ├── 01965-employees-with-missing-information.sql ├── 01978-employees-whose-manager-left-the-company.sql ├── 02026-low-quality-problems.sql ├── 02072-the-winner-university.sql ├── 02082-the-number-of-rich-customers.sql ├── 02205-the-number-of-users-that-are-eligible-for-the-discount.sql ├── 02230-the-users-that-are-eligible-for-discount.sql ├── 02356-number-of-unique-subjects-taught-by-each-teacher.sql ├── 02377-sort-the-olympic-table.sql └── 02668-find-latest-salaries.sql ├── Hard ├── 00185-department-top-three-salaries.sql ├── 00262-trips-and-users.sql ├── 00569-median-employee-salary.sql ├── 00571-find-median-given-frequency-of-numbers.sql ├── 00579-find-cumulative-salary-of-an-employee.sql ├── 00601-human-traffic-of-stadium.sql ├── 00615-average-salary-departments-vs-company.sql ├── 00618-students-reported-by-geography.sql ├── 01097-game-play-analysis-v.sql ├── 01127-user-purchase-platform.sql ├── 01159-market-analysis-ii.sql ├── 01194-tournament-winners.sql ├── 01225-report-contiguous-dates.sql ├── 01336-number-of-transactions-per-visit.sql ├── 01369-get-the-second-most-recent-ctivity.sql ├── 01384-total-sales-amount-by-year.sql ├── 01412-find-the-quiet-students-in-all-exams.sql ├── 01479-sales-by-day-of-the-week.sql ├── 01635-hopper-company-queries-i.sql ├── 01767-find-the-subtasks-that-did-not-execute.sql ├── 01972-first-and-last-call-on-the-same-day.sql ├── 02004-the-number-of-seniors-and-juniors-to-join-the-company.sql ├── 02010-the-number-of-seniors-and-juniors-to-join-the-company-ii.sql ├── 02118-build-the-equation.sql ├── 02173-longest-winning-streak.sql ├── 02199-finding-the-topic-of-each-post.sql └── 02701-consecutive-transactions-with-increasing-amounts.sql ├── Medium ├── 00176-second-highest-salary.sql ├── 00177-nth-highest-salary.sql ├── 00178-rank-scores.sql ├── 00180-consecutive-numbers.sql ├── 00184-department-highest-salary.sql ├── 00534-game-play-analysis-iii.sql ├── 00550-game-play-analysis-iv.sql ├── 00570-managers-with-at-least-5-direct-reports.sql ├── 00574-winning-candidate.sql ├── 00578-get-highest-answer-rate-question.sql ├── 00580-count-student-number-in-departments.sql ├── 00585-investments-in-2016.sql ├── 00602-friend-requests-ii-who-has-the-most-friends.sql ├── 00608-tree-node.sql ├── 00612-shortest-distance-in-a-plane.sql ├── 00614-second-degree-follower.sql ├── 00626-exchange-seats.sql ├── 01045-customers-who-bought-all-products.sql ├── 01070-product-sales-analysis-iii.sql ├── 01077-project-employees-iii.sql ├── 01098-unpopular-books.sql ├── 01107-new-users-daily-count.sql ├── 01112-highest-grade-for-each-student.sql ├── 01126-active-businesses.sql ├── 01132-reported-posts-ii.sql ├── 01149-article-views-ii.sql ├── 01158-market-analysis-i.sql ├── 01164-product-price-at-a-given-date.sql ├── 01174-immediate-food-delivery-ii.sql ├── 01193-monthly-transactions-i.sql ├── 01204-last-person-to-fit-in-the-bus.sql ├── 01205-monthly-tractions-ii.sql ├── 01212-teams-scores-in-football-tournamant.sql ├── 01264-page-recommendations.sql ├── 01270-all-people-report-to-the-given-manager.sql ├── 01285-find-the-start-and-end-number-of-continuous-ranges.sql ├── 01308-running-total-for-different-genders.sql ├── 01321-restaurant-growth.sql ├── 01341-movie-rating.sql ├── 01355-activity-participans.sql ├── 01364-number-of-trusted-contacts-of-a-customer.sql ├── 01393-capital-gain-loss.sql ├── 01398-customers-who-bought-products-A-and-B-but-not-C.sql ├── 01440-evaluate-boolean-expression.sql ├── 01445-apples-&-oranges.sql ├── 01454-active-users.sql ├── 01459-rectangles-area.sql ├── 01468- calculate-salaries.sql ├── 01501-countries-you-can-safely-invest-in.sql ├── 01532-the-most-recent-three-orders.sql ├── 01549-the-most-recent-orders-for-each-product.sql ├── 01555-bank-account-summary.sql ├── 01596-the-most-frequently-ordered-products-for-each-customer.sql ├── 01613-find-the-missing-ids.sql ├── 01699-number-of-calls-between-two-persons.sql ├── 01709-biggest-window-between-visits.sql ├── 01715-count-apples-and-oranges.sql ├── 01747-leetflex-banned-accounts.sql ├── 01783-grand-slam-titles.sql ├── 01811-find-interview-candidates.sql ├── 01831-maximum-transaction-each-day.sql ├── 01841-league-statistics.sql ├── 01867-orders-with-maximum-quantity-above-average.sql ├── 01875-group-employees-of-the-same-salary.sql ├── 01907-count-salary-categories.sql ├── 01934-confirmation-rate.sql ├── 01949-strong-friendship.sql ├── 01951-all-the-pairs-with-the-maximum-number-of-common-followers.sql ├── 01988-find-cutoff-score-for-each-school.sql ├── 01990-count-the-number-of-experiments.sql ├── 02020-number-of-accounts-that-did-not-stream.sql ├── 02041-accepted-candidates-from-the-interviews.sql ├── 02066-account-balance.sql ├── 02084-drop-type-1-orders-for-customers-with-type-0-orders.sql ├── 02112-the-airport-with-the-most-traffic.sql ├── 02142-the-number-of-passengers-in-each-bus-i.sql ├── 02159- order-two-columns-independently.sql ├── 02175-the-change-in-global-rankings.sql ├── 02228-user-with-two-purchases-within-seven-days.sql └── 02308-arrange-table-by-gender.sql └── README.md /Easy/00175-combine-two-tables.sql: -------------------------------------------------------------------------------- 1 | -- simple LEFT JOIN 2 | 3 | select p.firstName, p.lastName, a.city, a.state 4 | from Person p 5 | left join Address a 6 | using(personId) 7 | 8 | 9 | -- apple- 4 10 | -- bloomberg- 2 11 | -- amazon- 2 12 | -- microsoft- 2 13 | -- adobe- 3 14 | -- google- 3 15 | -------------------------------------------------------------------------------- /Easy/00181-employees-earning-more-than-their-managers.sql: -------------------------------------------------------------------------------- 1 | -- use join 2 | -- Employee's manager_id = Manager's id 3 | -- salary of Employee should be higher 4 | 5 | select e.name as 'Employee' 6 | from Employee m 7 | left join Employee e 8 | on e.managerId = m.id 9 | where e.salary > m.salary 10 | 11 | -- amazon- 3 12 | -- yahoo- 2 13 | --uber- 5 14 | -- google- 3 15 | -- bloomberg- 2 16 | -- microsoft- 2 17 | -- wix- 1 18 | -------------------------------------------------------------------------------- /Easy/00182-duplicate-emails.sql: -------------------------------------------------------------------------------- 1 | -- use group by for aggregate 2 | 3 | select email as Email 4 | from Person 5 | group by email 6 | having count(email) > 1 7 | 8 | 9 | -- amazon- 2 10 | -- uber- 2 11 | -------------------------------------------------------------------------------- /Easy/00183-customers-who-never-order.sql: -------------------------------------------------------------------------------- 1 | -- pick id from Orders, and do not select those ids 2 | 3 | select name as Customers 4 | from Customers 5 | where id not in 6 | (select distinct customerId 7 | from Orders) 8 | 9 | -- amazon- 3 10 | -- apple- 7 11 | -- bloomberg- 5 12 | -- adobe- 2 13 | -------------------------------------------------------------------------------- /Easy/00196-delete-duplicate-emails.sql: -------------------------------------------------------------------------------- 1 | -- make a row_num() partition by email- this will give a partition for each email 2 | -- since we only need lowest id, delete rows with rnk > 1 3 | 4 | delete 5 | from Person 6 | where id in 7 | (select id 8 | from ( 9 | select *, row_number() over (partition by email order by id) as rnk 10 | from Person) temp1 11 | where rnk > 1) 12 | 13 | ------------------------------------------------------------------------------------------------------------------------ 14 | 15 | -- without using rank() 16 | -- get min(id), and delete rows which aren't min id 17 | 18 | delete 19 | from Person 20 | where id not in 21 | (select min_id 22 | from ( 23 | select email, min(id) as min_id 24 | from Person 25 | group by email) temp1 26 | ) 27 | 28 | ------------------------------------------------------------------------------------------------------------------------ 29 | 30 | -- using join 31 | 32 | delete p1 33 | from Person p1, Person p2 34 | where p1.email = p2.email and p1.id > p2.id 35 | 36 | 37 | -- oracle- 2 38 | -- amazon- 5 39 | -- uber- 2 40 | -- apple- 2 41 | -------------------------------------------------------------------------------- /Easy/00197-rising-temperature.sql: -------------------------------------------------------------------------------- 1 | -- use inner join instead of left join, because left join will give all ids 2 | -- so w1 is current date, w2 is previous date 3 | -- current date - prev date should be equal to 1(datediff) 4 | -- current temp should be greater than previous temp 5 | -- we want output that satisfies all these conditions, hence putting everything in 'ON' clause. we can filter data using WHERE as well 6 | 7 | select w1.id as Id 8 | from Weather w1 inner join Weather w2 on datediff(w1.recordDate, w2.recordDate) = 1 and w1.temperature > w2.temperature 9 | 10 | -- google- 3 11 | -- adobe- 2 12 | -- amazon- 2 13 | -- yahoo- 2 14 | -- bloomberg- 3 15 | -- cognizant- 2 16 | -------------------------------------------------------------------------------- /Easy/00511-game-play-analysis-i.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate function 2 | 3 | select player_id, min(event_date) as first_login 4 | from Activity 5 | group by 1 6 | 7 | -- adobe- 2 8 | -- amazon- 4 9 | -- bloomberg- 4 10 | -- gsn games- 1 11 | -------------------------------------------------------------------------------- /Easy/00512-game-play-analysis-ii.sql: -------------------------------------------------------------------------------- 1 | -- using subquery 2 | 3 | select player_id, device_id 4 | from Activity 5 | where (player_id, event_date) in 6 | (select player_id, min(event_date) 7 | from Activity 8 | group by player_id) 9 | 10 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 11 | -- using window function 12 | 13 | with CTE as 14 | (select player_id, device_id, 15 | row_number() over(partition by player_id order by event_date) as rn 16 | from Activity) 17 | 18 | select player_id, device_id 19 | from CTE 20 | where rn = 1 21 | 22 | -- gsn games- 1 23 | -------------------------------------------------------------------------------- /Easy/00577-employee-bonus.sql: -------------------------------------------------------------------------------- 1 | -- left join on employee 2 | -- we need nulls too, so specify that in where condition 3 | 4 | select name, bonus 5 | from Employee e left join Bonus b 6 | on e.empId = b.empId 7 | where bonus < 1000 or bonus is null 8 | 9 | 10 | -- google- 2 11 | -- amazon- 2 12 | -- netsuite- 1 13 | -------------------------------------------------------------------------------- /Easy/00584-find-customer-referee.sql: -------------------------------------------------------------------------------- 1 | -- if we do not specify NULL, WHERE automatically removed them. To keep null, specify in WHERE condition 2 | 3 | select name 4 | from Customer 5 | where referee_id != 2 or referee_id is null 6 | 7 | 8 | -- amazon- 4 9 | -- google- 4 10 | -- apple- 3 11 | -- adobe- 3 12 | -------------------------------------------------------------------------------- /Easy/00586-customer-placing-the-largest-number-of-orders.sql: -------------------------------------------------------------------------------- 1 | -- the the cusotmer with maximum order count, order by, limit 2 | 3 | select customer_number 4 | from Orders 5 | group by 1 6 | order by count(order_number) desc 7 | limit 1 8 | 9 | 10 | -- adobe- 2 11 | -- google- 3 12 | -- apple- 2 13 | -- uber- 2 14 | -- twitter- 1 15 | -------------------------------------------------------------------------------- /Easy/00595-big-countries.sql: -------------------------------------------------------------------------------- 1 | -- use WHERE and OR for condition 2 | 3 | select name, population, area 4 | from World 5 | where area >= 3000000 or population >= 25000000 6 | 7 | 8 | -- apple- 3 9 | -- google- 2 10 | -- bloomberg- 2 11 | -- amazon- 2 12 | -- adobe- 3 13 | -- facebook- 3 14 | -- yahoo- 2 15 | -------------------------------------------------------------------------------- /Easy/00596-classes-more-than-5-students.sql: -------------------------------------------------------------------------------- 1 | -- group by for count() 2 | 3 | select class 4 | from Courses 5 | group by 1 6 | having count(distinct student) >= 5 7 | 8 | 9 | -- no companies listed 10 | -------------------------------------------------------------------------------- /Easy/00597-friend-requests-i-overall-acceptance-rate.sql: -------------------------------------------------------------------------------- 1 | -- distinct pairs from both tables, division, round 2 | -- answer shouldn't be null, so used ifnull 3 | -- can also do IFNULL INSIDE ROUND- more popular 4 | 5 | select ifnull( 6 | round( 7 | (select count(distinct requester_id, accepter_id) 8 | from RequestAccepted) 9 | / 10 | (select count(distinct sender_id, send_to_id) 11 | from FriendRequest) 12 | , 2) 13 | , 0) as accept_rate 14 | 15 | -- facebook- 2 16 | -------------------------------------------------------------------------------- /Easy/00603-consecutive-available-seats.sql: -------------------------------------------------------------------------------- 1 | -- lag and lead will give the rows above and below 2 | -- if free = 1 and either of lag_free or lead_free is 1, it means we have 2 consecutive free seats. 3 | -- just pick those rows 4 | 5 | with CTE as( 6 | select seat_id, free, 7 | lag(free, 1) over(order by seat_id) as lag_free, 8 | lead(free, 1) over(order by seat_id) as lead_free 9 | from Cinema) 10 | 11 | select seat_id 12 | from CTE 13 | where (free = 1 and lag_free = 1) or (free = 1 and lead_free = 1) 14 | order by 1 15 | 16 | ------------------------------------------------------------------------------------------------------------------------------------------------------------ 17 | -- if seat = free AND seat + 1 or seat - 1 have free = 1, then pull that seat 18 | 19 | select seat_id 20 | from Cinema 21 | where free = 1 and 22 | (seat_id - 1 in (select seat_id 23 | from Cinema 24 | where free = 1) 25 | or 26 | seat_id + 1 in (select seat_id 27 | from Cinema 28 | where free = 1)) 29 | 30 | 31 | -- amazon- 4 32 | -------------------------------------------------------------------------------- /Easy/00607-sales-person.sql: -------------------------------------------------------------------------------- 1 | -- nested query 2 | -- select all salesPerson with company RED 3 | -- select all salesPerson from SalesPerson not in the above table 4 | 5 | select sp.name 6 | from SalesPerson sp 7 | where sales_id not in 8 | (select o.sales_id 9 | from Orders o 10 | where o.com_id in 11 | (select c.com_id 12 | from Company c 13 | where c.name = 'RED')) 14 | 15 | ------------------------------------------------------------------------------------------------------------------------------------------------ 16 | -- JOIN Company c and Ordered o 17 | -- pick all sales_id with company = 'RED' 18 | -- pick all salesPerson from SalesPerson not in temp table above 19 | 20 | select sp.name 21 | from SalesPerson sp 22 | where sales_id not in 23 | (select o.sales_id 24 | from Orders o 25 | inner join Company c 26 | on c.com_id = o.com_id 27 | where c.name = 'RED') 28 | 29 | 30 | -- no companies listed 31 | -------------------------------------------------------------------------------- /Easy/00610-triangle-judgement.sql: -------------------------------------------------------------------------------- 1 | -- sum of 2 sides should be always greater than 3rd side, for all combinations 2 | 3 | 4 | select *, 5 | (case when x + y > z and x + z > y and y + z > x then 'Yes' else 'No' end) as triangle 6 | from Triangle 7 | 8 | -- amazon- 3 9 | -- apple- 3 10 | -- facebook- 2 11 | -------------------------------------------------------------------------------- /Easy/00613-shortest-distance-in-a-line.sql: -------------------------------------------------------------------------------- 1 | -- cross joining all the points from 2 tables, except the ones where they are same 2 | -- find the min of absolute distance 3 | 4 | select min(abs(a - b)) as shortest 5 | from 6 | (select p1.x as a, p2.x as b 7 | from Point p1 cross join Point p2 8 | where p1.x != p2.x) temp 9 | 10 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 11 | -- concise version of the above 12 | 13 | select min(abs(p1.x - p2.x)) as shortest 14 | from Point p1 cross join Point p2 15 | where p1.x != p2.x 16 | 17 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 18 | -- pull min distance with a where condition 19 | 20 | select min(p1.x - p2.x) as shortest 21 | from Point p1, Point p2 22 | where p1.x > p2.x 23 | 24 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 25 | -- sort the table, and do lag. Now diff between current and lag- because difference between the sorted will always be lesser than difference between the larger ones 26 | -- pull the min distance 27 | 28 | with CTE as 29 | (select x - lag(x) over(order by x) as distance 30 | from Point) 31 | 32 | select min(distance) as shortest from CTE 33 | 34 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 35 | -- picking the lowest distance, 1st row will always be null hence use offset 36 | 37 | select x - lag(x) over(order by x) as shortest 38 | from Point 39 | order by 1 asc 40 | limit 1 offset 1 41 | 42 | 43 | -- no companies listed 44 | -------------------------------------------------------------------------------- /Easy/00619-biggest-single-number.sql: -------------------------------------------------------------------------------- 1 | -- create a temp table to get nums with count = 1 2 | -- pick max num out of those 3 | 4 | select max(num) as num 5 | from( 6 | select num 7 | from MyNumbers 8 | group by num 9 | having count(*) = 1 10 | ) temp 11 | 12 | 13 | -- apple- 2 14 | -------------------------------------------------------------------------------- /Easy/00620-not-boring-movies.sql: -------------------------------------------------------------------------------- 1 | -- use % for modulo function 2 | 3 | select * 4 | from Cinema 5 | where id % 2 = 1 and description != 'boring' 6 | order by rating desc 7 | 8 | 9 | -- amazon- 2 10 | -- apple- 2 11 | -------------------------------------------------------------------------------- /Easy/00627-swap-salary.sql: -------------------------------------------------------------------------------- 1 | -- update statement 2 | -- update sex, but we have conditions, so update using case statement 3 | 4 | update Salary 5 | set sex = (case when sex = 'm' then 'f' else 'm' end) 6 | 7 | -- apple- 2 8 | -- yahoo- 2 9 | -------------------------------------------------------------------------------- /Easy/01050-actors-and-director-who-cooperated-at-least-three-times.sql: -------------------------------------------------------------------------------- 1 | -- aggregate- group by 2 columns, count 2 | 3 | select actor_id, director_id 4 | from ActorDirector 5 | group by 1, 2 6 | having count(*) >= 3 7 | 8 | 9 | -- amazon- 3 10 | -------------------------------------------------------------------------------- /Easy/01068-product-sales-analysis-i.sql: -------------------------------------------------------------------------------- 1 | -- we needed details for each sale_id, so Sales will be left table. 2 | 3 | select product_name, year, price 4 | from Sales s left join Product p on s.product_id = p.product_id 5 | 6 | 7 | -- google- 2 8 | -- adobe- 2 9 | -- amazon- 1 10 | -------------------------------------------------------------------------------- /Easy/01069-prooduct-sales-analysis-ii.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate 2 | 3 | select product_id, sum(quantity) as total_quantity 4 | from Sales 5 | group by 1 6 | 7 | -- amazon- 1 8 | -------------------------------------------------------------------------------- /Easy/01075-project-employees-i.sql: -------------------------------------------------------------------------------- 1 | -- basic aggregate function avg, group by 2 | 3 | select project_id, round(avg(experience_years), 2) as average_years 4 | from Project p 5 | left join Employee e 6 | on p.employee_id = e.employee_id 7 | group by p.project_id 8 | 9 | 10 | -- amazon- 2 11 | -- facebook- 1 12 | -------------------------------------------------------------------------------- /Easy/01076-project-employees-ii.sql: -------------------------------------------------------------------------------- 1 | -- in subquery, find hight count project using order and limit 2 | -- in the main query, count should match that in subquery 3 | 4 | select project_id 5 | from Project 6 | group by project_id 7 | having count(employee_id) = (select count(employee_id) 8 | from Project 9 | group by project_id 10 | order by 1 desc 11 | limit 1) 12 | 13 | --------------------------------------------------------------------------------------------------------------------------------------------------------- 14 | -- using wiwndow function 15 | -- we are ordering by aggregate function, so no partition by 16 | -- use group by 17 | 18 | with cte as 19 | (select project_id, dense_rank() over(order by count(employee_id) desc) as rnk 20 | from Project 21 | group by project_id) 22 | 23 | select project_id 24 | from cte 25 | where rnk = 1 26 | 27 | 28 | -- facebook 29 | -------------------------------------------------------------------------------- /Easy/01082-sales-analysis-i.sql: -------------------------------------------------------------------------------- 1 | -- calculate max sum(price) in subquery by order and limit 2 | -- in the main query, return sellers whose sum(price) = that calculated in subquery 3 | 4 | select seller_id 5 | from Sales 6 | group by 1 7 | having sum(price) = (select sum(price) 8 | from Sales 9 | group by seller_id 10 | order by 1 desc 11 | limit 1) 12 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- 13 | -- better time complexity 14 | -- using window function 15 | 16 | with cte as 17 | (select seller_id, dense_rank() over(order by sum(price) desc) as rnk 18 | from Sales 19 | group by 1) 20 | 21 | select distinct seller_id 22 | from cte 23 | where rnk = 1 24 | 25 | 26 | -- amazon 27 | -------------------------------------------------------------------------------- /Easy/01083-sales-analysis-ii.sql: -------------------------------------------------------------------------------- 1 | -- using join- use to join Product and Sales, and get product name, instead of doing a nester query 2 | 3 | select distinct s.buyer_id 4 | from Sales s 5 | join Product p 6 | on p.product_id = s.product_id 7 | where p.product_name = 'S8' 8 | and s.buyer_id not in 9 | (select s2.buyer_id 10 | from Sales s2 11 | join Product p2 12 | on p2.product_id = s2.product_id 13 | where p2.product_name = 'iPhone') 14 | 15 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 16 | -- sum of records for each buyer_id 17 | -- if they bought S8, sum should be > 0 18 | -- for iphone, sum should be = 0 19 | 20 | select s.buyer_id 21 | from Sales s 22 | join Product p 23 | using(product_id) 24 | group by s.buyer_id 25 | having sum(p.product_name = 'S8') > 0 and sum(p.product_name = 'iPhone') = 0 26 | 27 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | -- without using join- nested queries 29 | 30 | select distinct buyer_id 31 | from Sales 32 | where product_id = (select product_id 33 | from Product 34 | where product_name = 'S8') 35 | and buyer_id not in 36 | (select buyer_id 37 | from Sales 38 | where product_id = (select product_id 39 | from Product 40 | where product_name = 'iPhone')) 41 | 42 | 43 | -- amazon- 1 44 | -------------------------------------------------------------------------------- /Easy/01084-sales-analysis-iii.sql: -------------------------------------------------------------------------------- 1 | -- good one 2 | -- when grouped by, min date should be in 1st quarter AND max date should be in first quarter 3 | -- if max date is in another quarter, then do not o/p 4 | -- use HAVING clause for these conditions 5 | 6 | select s.product_id, p.product_name 7 | from Sales s 8 | join Product p 9 | using(product_id) 10 | group by 1 11 | having min(sale_date) >= '2019-01-01' and max(sale_date) <= '2019-03-31' 12 | 13 | ------------------------------------------------------------------------------------------------------------------------------------------------------------ 14 | -- when product is between the given dates then 0 else 1- when sum > 0 that means product was sold outside the criteria 15 | -- only o/p those products where flag = 0 16 | 17 | with cte as 18 | (select product_id, 19 | sum(case when sale_date between '2019-01-01' and '2019-03-31' then 0 else 1 end) as flag 20 | from Sales s 21 | group by 1) 22 | 23 | select s.product_id, p.product_name 24 | from cte s 25 | join Product p 26 | using(product_id) 27 | where flag = 0 28 | -------------------------------------------------------------------------------- /Easy/01113-reported-posts.sql: -------------------------------------------------------------------------------- 1 | -- we need to find count of distinct posts which were reported for a particular reason 2 | -- filter date and action 3 | -- group by reason 4 | 5 | select extra as report_reason, count(distinct post_id) as report_count 6 | from Actions 7 | where action = 'report' 8 | and action_date = '2019-07-04' 9 | group by 1 10 | 11 | -- facebook- 1 12 | -------------------------------------------------------------------------------- /Easy/01141-user-activity-for-the-past-30-days-i.sql: -------------------------------------------------------------------------------- 1 | -- count distinct users on each day 2 | -- we want days starting from 2019-06-28 to 2019-07-27= we know this only after multiple submissions 3 | 4 | 5 | select activity_date as day, count(distinct user_id) as active_users 6 | from Activity 7 | where activity_date > date_sub('2019-07-27', interval 30 day) and activity_date <= '2019-07-27' 8 | group by 1 9 | 10 | -- facebook- 3 11 | -- bloomber- 2 12 | -- zoom- 1 13 | -------------------------------------------------------------------------------- /Easy/01142-user-activity-for-the-past-30-days-ii.sql: -------------------------------------------------------------------------------- 1 | -- question is to find session with any activity in the given date range per user 2 | -- then average it 3 | -- notice we used interval day 29 instead of 30 because 30 includes 1 more day which is not required 4 | 5 | select ifnull(round(count(distinct session_id)/count(distinct user_id), 2), 0) as average_sessions_per_user 6 | from Activity 7 | where activity_date between date_sub('2019-07-27', interval 29 day) and '2019-07-27' 8 | 9 | -- facrbook- 1 10 | -- zoom- 1 11 | -------------------------------------------------------------------------------- /Easy/01148-article-views-i.sql: -------------------------------------------------------------------------------- 1 | -- use DISTINCT because if there is no Primary Key in the table, the values will be repeated(duplicate rows). 2 | -- We only want a list of IDs(unique) 3 | 4 | 5 | select distinct author_id as id 6 | from Views 7 | where author_id = viewer_id 8 | order by id 9 | 10 | -- amazon- 4 11 | -- yahoo- 4 12 | -- bloomberg- 3 13 | -- google- 2 14 | -- adobe- 2 15 | -- linkedin- 1 16 | -------------------------------------------------------------------------------- /Easy/01173-immediate-food-delivery-i.sql: -------------------------------------------------------------------------------- 1 | -- simple condition in aggregate function- count immediate, divide by total rows in the table 2 | 3 | select round(sum(order_date = customer_pref_delivery_date) / count(*) * 100, 2) as immediate_percentage 4 | from Delivery 5 | --------------------------------------------------------------------------------------------------------------------------------------------- 6 | -- same as above but using case 7 | 8 | select round(sum( case when order_date = customer_pref_delivery_date then 1 else 0 end) / count(*) * 100, 2) as immediate_percentage 9 | from Delivery 10 | 11 | 12 | -- doordash- 2 13 | -------------------------------------------------------------------------------- /Easy/01179-reformat-department-table.sql: -------------------------------------------------------------------------------- 1 | -- we need to group by to get all ids in 1 row 2 | -- so we use aggregate function 3 | -- can also use MAX OR MIN instead of sum 4 | 5 | select id, 6 | sum(case when month = 'Jan' then revenue end) as Jan_Revenue, 7 | sum(case when month = 'Feb' then revenue end) as Feb_Revenue, 8 | sum(case when month = 'Mar' then revenue end) as Mar_Revenue, 9 | sum(case when month = 'Apr' then revenue end) as Apr_Revenue, 10 | sum(case when month = 'May' then revenue end) as May_Revenue, 11 | sum(case when month = 'Jun' then revenue end) as Jun_Revenue, 12 | sum(case when month = 'Jul' then revenue end) as Jul_Revenue, 13 | sum(case when month = 'Aug' then revenue end) as Aug_Revenue, 14 | sum(case when month = 'Sep' then revenue end) as Sep_Revenue, 15 | sum(case when month = 'Oct' then revenue end) as Oct_Revenue, 16 | sum(case when month = 'Nov' then revenue end) as Nov_Revenue, 17 | sum(case when month = 'Dec' then revenue end) as Dec_Revenue 18 | from Department 19 | group by id 20 | 21 | -- amazon- 3 22 | -------------------------------------------------------------------------------- /Easy/01211-queries-quality-and-percentage.sql: -------------------------------------------------------------------------------- 1 | -- basic aggregation 2 | -- case when quesry name is null 3 | 4 | select query_name, 5 | round(sum(rating / position) / count(*), 2) as quality, 6 | round(sum(case when rating < 3 then 1 else 0 end)*100/ count(*), 2) as poor_query_percentage 7 | from Queries 8 | where query_name is not null 9 | group by query_name 10 | 11 | -- amazon- 2 12 | -- facebook- 1 13 | -------------------------------------------------------------------------------- /Easy/01241-number-of-comments-per-post.sql: -------------------------------------------------------------------------------- 1 | -- two tables self join- left and right 2 | -- pick sub_id from left where parent_id is null 3 | -- count distinct subId from the right table 4 | -- left join 5 | 6 | select distinct l.sub_id as post_id, count(distinct r.sub_id) as number_of_comments 7 | from Submissions l 8 | left join Submissions r 9 | on l.sub_id = r.parent_id 10 | where l.parent_id is null 11 | group by 1 12 | order by 1 13 | 14 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 15 | -- longer version of the above 16 | 17 | with left_table as 18 | (select distinct sub_id 19 | from Submissions 20 | where parent_id is null), 21 | 22 | right_table as 23 | (select parent_id, count(distinct sub_id) as number_of_comments 24 | from Submissions 25 | group by 1) 26 | 27 | select l.sub_id as post_id, ifnull(r.number_of_comments, 0) as number_of_comments 28 | from left_table l 29 | left join right_table r 30 | on l.sub_id = r.parent_id 31 | order by 1 32 | 33 | 34 | -- facebook- 1 35 | -------------------------------------------------------------------------------- /Easy/01251-average-selling-price.sql: -------------------------------------------------------------------------------- 1 | -- do a left join to get all product ids, if null then put 0 2 | -- calculate avg 3 | -- BE CAREFUL WITH WHICH TABLE'S COLUMN IS USED FOR GROUP BY 4 | 5 | 6 | select p.product_id, ifnull(round(sum(price*units)/sum(units), 2), 0) as average_price 7 | from Prices p 8 | left join UnitsSold u 9 | on p.product_id = u.product_id 10 | and purchase_date between start_date and end_date 11 | group by p.product_id 12 | 13 | -- amazon- 4 14 | -- adobe- 2 15 | -------------------------------------------------------------------------------- /Easy/01280-students-and-examinations.sql: -------------------------------------------------------------------------------- 1 | 2 | -- first, create a cross table between students and subjects 3 | -- then, left join Examination table, and count from Examination table 4 | -- group by and order by 5 | -- BETTER TIME COMPLEXITY from Leetcode 6 | 7 | with Cross_All as ( 8 | select st.student_id, st.student_name, sb.subject_name 9 | from Students st cross join Subjects sb 10 | ) 11 | 12 | select c.student_id, c.student_name, c.subject_name, count(e.subject_name) as attended_exams 13 | from Cross_All c 14 | left join Examinations e 15 | on c.student_id = e.student_id 16 | and c.subject_name = e.subject_name 17 | group by student_id, subject_name 18 | order by student_id, subject_name 19 | 20 | ------------------------------------------------------------------------------------------------------------------- 21 | 22 | -- same as above, but code is simplified. Removed a layer, joined all 3 tables 23 | 24 | select st.student_id, st.student_name, sb.subject_name, count(e.subject_name) as attended_exams 25 | from Students st 26 | cross join Subjects sb 27 | left join Examinations e 28 | on st.student_id = e.student_id 29 | and sb.subject_name = e.subject_name 30 | group by student_id, subject_name 31 | order by student_id, subject_name 32 | 33 | 34 | -- amazon- 3 35 | -- yahoo- 2 36 | -- roblox- 1 37 | -------------------------------------------------------------------------------- /Easy/01294-weather-type-in-each-country.sql: -------------------------------------------------------------------------------- 1 | -- CASE statements and JOIN 2 | 3 | select c.country_name, 4 | (case when avg(weather_state) <= 15 then 'Cold' 5 | when avg(weather_state) >= 25 then 'Hot' 6 | else 'Warm' end) as weather_type 7 | from Countries c 8 | join Weather w 9 | using(country_id) 10 | where day like '2019-11%' 11 | group by c.country_id 12 | 13 | -- point72- 1 14 | -------------------------------------------------------------------------------- /Easy/01303-find-the-team-size.sql: -------------------------------------------------------------------------------- 1 | -- if we do not use order by in over(), we do not get running total, just normal aggregate for all rows within that partition 2 | 3 | select employee_id, count(*) over(partition by team_id) as team_size 4 | from Employee 5 | order by 1 6 | 7 | -- amazon- 1 8 | -------------------------------------------------------------------------------- /Easy/01322-ads-performance.sql: -------------------------------------------------------------------------------- 1 | -- my first intuition was to create 2 ctes- 1 with click count and 1 with (click + view) count using WHERE filter 2 | -- but this approach will not work because we cannot filter rows because we want all ad_ids, even those who were not clicked/viewed 3 | 4 | -- so we need to use case statement or the below approach 5 | 6 | select ad_id, 7 | ifnull(round((sum(action = 'Clicked'))/(sum(action = 'Clicked') + sum(action = 'Viewed'))*100, 2), 0) as ctr 8 | from Ads 9 | group by 1 10 | order by 2 desc, 1 11 | 12 | -- facebook- 1 13 | -------------------------------------------------------------------------------- /Easy/01327-list-the-products-ordered-in-a-period.sql: -------------------------------------------------------------------------------- 1 | -- normal join- can use Inner or Left join, use HAVING CLAUSE for sum of units 2 | 3 | select p.product_name, sum(unit) as unit 4 | from Orders o 5 | left join Products p 6 | on o.product_id = p.product_id 7 | where o.order_date like '2020-02%' 8 | group by o.product_id 9 | having sum(unit) >= 100 10 | 11 | 12 | -- amazon- 1 13 | -------------------------------------------------------------------------------- /Easy/01350-students-with-invalid-departments.sql: -------------------------------------------------------------------------------- 1 | -- subquery- use WHERE 2 | 3 | select id, name 4 | from Students 5 | where department_id not in 6 | (select id 7 | from Departments) 8 | 9 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 10 | 11 | -- join- use WHERE id is null 12 | 13 | select s.id, s.name 14 | from Students s 15 | left join Departments d 16 | on d.id = s.department_id 17 | where d.id is null 18 | 19 | 20 | -- amazon- 1 21 | -------------------------------------------------------------------------------- /Easy/01378-replace-employee-id-with-the-unique-identifier.sql: -------------------------------------------------------------------------------- 1 | -- we needed to get values such that if unique id is not present, it should be null. 2 | -- so we used UNI table as right table 3 | 4 | select unique_id, name 5 | from Employees e left join EmployeeUNI eu on e.id = eu.id 6 | 7 | 8 | -- google- 2 9 | -- amazon- 3 10 | -- point72- 1 11 | -------------------------------------------------------------------------------- /Easy/01407-top-travellers.sql: -------------------------------------------------------------------------------- 1 | -- using ifnull around sum()- can also use coalesce 2 | 3 | select u.name, ifnull(sum(r.distance), 0) as travelled_distance 4 | from Users u 5 | left join Rides r 6 | on u.id = r.user_id 7 | group by u.id 8 | order by 2 desc, 1 asc 9 | 10 | 11 | -- point72- 1 12 | -------------------------------------------------------------------------------- /Easy/01421-npv-queries.sql: -------------------------------------------------------------------------------- 1 | -- simple left join, return 0 where null 2 | -- see how to join using USING keyword on 2 variables on row 8 3 | 4 | select q.id, q.year, ifnull(n.npv, 0) as npv 5 | from Queries q 6 | left join NPV n 7 | on q.id = n.id and q.year = n.year 8 | -- using(id, year) 9 | 10 | -- amazon- 1 11 | -------------------------------------------------------------------------------- /Easy/01435-create-a-sessions-bar-chart.sql: -------------------------------------------------------------------------------- 1 | -- case statements 2 | 3 | select '[0-5>' as bin, count(case when (duration/60) >= 0 and (duration/60) < 5 then session_id end) as total 4 | from Sessions 5 | union 6 | select '[5-10>' as bin, count(case when (duration/60) >= 5 and (duration/60) < 10 then session_id end) as total 7 | from Sessions 8 | union 9 | select '[10-15>' as bin, count(case when (duration/60) >= 10 and (duration/60) < 15 then session_id end) as total 10 | from Sessions 11 | union 12 | select '15 or more' as bin, count(case when (duration/60) >= 15 then session_id end) as total 13 | from Sessions 14 | 15 | -- twitch- 1 16 | 17 | -- another solution wiuld be to create an upeer bound and lower bound, and if the duration falls into that bin, count it 18 | -------------------------------------------------------------------------------- /Easy/01484-group-sold-products-by-the-date.sql: -------------------------------------------------------------------------------- 1 | -- use GROUP_CONCAT(column, order by col) for getting all products in 1 row 2 | -- use count(distinct product) to count distinct products only 3 | 4 | select sell_date, count(distinct product) as num_sold, group_concat(distinct product order by product) as products 5 | from Activities 6 | group by 1 7 | order by 1 8 | 9 | 10 | ---------------------------------------------------------------------------------------------------------------------------------- 11 | 12 | -- use 'separator' inside group_concat to use some other separator 13 | 14 | select sell_date, count(distinct product) as num_sold, group_concat(distinct product order by product separator ',') as products 15 | from Activities 16 | group by 1 17 | order by 1 18 | 19 | 20 | -- adobe- 2 21 | -- startup- 1 22 | -------------------------------------------------------------------------------- /Easy/01495-friendly-movies-streamed-last-month.sql: -------------------------------------------------------------------------------- 1 | -- use DISTINCT because we want distinct titles 2 | -- use where condition for filter 3 | 4 | select distinct title 5 | from Content c join TVProgram t 6 | on c.content_id = t.content_id 7 | where c.Kids_content = 'Y' and c.content_type = 'Movies' and t.program_date like '2020-06%' 8 | 9 | 10 | -- amazon- 1 11 | -------------------------------------------------------------------------------- /Easy/01511-customer_order-frequency.sql: -------------------------------------------------------------------------------- 1 | -- we need customer ids from 2 separate tables using 'and' condition 2 | -- 1st table- get sum of expenditures of all customers in June 2020, filter by customers whose sum >= 100 3 | -- 2nd table- get sum of expenditures of all customers in July 2020, filter by customers whose sum >= 100 4 | -- pull all customers who are in table1 AND table 2 5 | 6 | select c.customer_id, c.name 7 | from Customers 8 | where customer_id in 9 | (select customer_id 10 | from Orders o 11 | join Product p 12 | on o.product_id = p.product_id 13 | where left(order_date, 7) = '2020-06' 14 | group by customer_id 15 | having sum(quantity*price) >= 100) 16 | and customer_id in 17 | (select customer_id, sum(quantity*price) 18 | from Orders o 19 | join Product p 20 | on o.product_id = p.product_id 21 | where left(order_date, 7) = '2020-07' 22 | group by customer_id 23 | having sum(quantity*price) >= 100) 24 | 25 | --------------------------------------------------------------------------------------------------------------------- 26 | 27 | -- create a temp table- join all tables 28 | -- create 2 additional columns- expenditure in June and in July- CASE, AGGREGATE 29 | -- in the main query, pull customer ids where expenditure in both columns are >= 100 30 | 31 | with CTE as(select c.customer_id, c.name, 32 | sum(case when left(o.order_date, 7) = '2020-06' then p.price*o.quantity else 0 end) june_spent, 33 | sum(case when left(o.order_date, 7) = '2020-07' then p.price*o.quantity else 0 end) july_spent 34 | from Customers c 35 | join Orders o 36 | on c.customer_id = o.customer_id 37 | join Product p 38 | on p.product_id = o.product_id 39 | group by 1) 40 | 41 | select customer_id, name 42 | from CTE 43 | where june_spent >= 100 and july_spent >= 100 44 | 45 | 46 | -- amazon- 1 47 | -------------------------------------------------------------------------------- /Easy/01517-find-users-with-valid-emails.sql: -------------------------------------------------------------------------------- 1 | -- regexp pattern 2 | -- ^ = starting should be this 3 | -- * = can be any number of characters 4 | -- [.] = should be a dot 5 | -- $ = end- no characters after this 6 | 7 | select * 8 | from Users 9 | where mail regexp '^[A-Za-z][A-Za-z0-9_.-]*@leetcode[.]com$' 10 | 11 | -- no companies listed 12 | -------------------------------------------------------------------------------- /Easy/01527-patients-with-a-condition.sql: -------------------------------------------------------------------------------- 1 | -- need condition starting with DIAB1 or 'some word, space, DIAB1' 2 | 3 | select * 4 | from Patients 5 | where conditions like 'DIAB1%' or conditions like '% DIAB1%' 6 | 7 | 8 | -- no companies listed 9 | -------------------------------------------------------------------------------- /Easy/01543-fix-product-name-format.sql: -------------------------------------------------------------------------------- 1 | -- lower()- lower case 2 | -- trim() to remove leading and trailing spaces 3 | -- date_format- to format date 4 | 5 | select trim(lower(product_name)) as product_name, date_format(sale_date, '%Y-%m') as sale_date, count(sale_id) as total 6 | from Sales 7 | group by 1, 2 8 | order by 1, 2 9 | 10 | -- no companies listed 11 | -------------------------------------------------------------------------------- /Easy/01565-unique-orders-and-customers-per-month.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate with WHERE filter 2 | 3 | select date_format(order_date, '%Y-%m') as 'month', 4 | count(distinct order_id) as order_count, 5 | count(distinct customer_id) as customer_count 6 | from Orders 7 | where invoice > 20 8 | group by 1 9 | 10 | -- whole foods market- 1 11 | -------------------------------------------------------------------------------- /Easy/01571-warehouse-manager.sql: -------------------------------------------------------------------------------- 1 | -- multiple units, width, length, height, then calculate sum() of those 2 | 3 | select w.name as warehouse_name, 4 | sum(units * Width * Length * Height) as volume 5 | from Warehouse w 6 | left join Products p 7 | on w.product_id = p.product_id 8 | group by 1 9 | 10 | ------------------------------------------------------------------------------------------------------------- 11 | -- breaking down the above one 12 | 13 | with CTE as ( 14 | select product_id, (Width * Length * Height) as size 15 | from Products) 16 | 17 | select name as warehouse_name, sum(units * size) as volume 18 | from Warehouse w 19 | left join CTE c 20 | on c.product_id = w.product_id 21 | group by name 22 | 23 | -- amazon- 1 24 | -------------------------------------------------------------------------------- /Easy/01581-customer-who-visited-but-did-not-make-any-transactions.sql: -------------------------------------------------------------------------------- 1 | -- write a code to get a dataset where we get all visits (left join on Transactions) 2 | -- now, we have all the visits, but only transations corresponding to those visits 3 | -- pick the visits where transations are null(visited but not made any transactions) 4 | -- from this dataset, get count of such visits by each customer 5 | 6 | select customer_id, count(visit_id) as count_no_trans 7 | from 8 | ( 9 | select v.visit_id, v.customer_id, t.transaction_id, t.visit_id as transaction_visit 10 | from Visits v left join Transactions t on v.visit_id = t.visit_id) temp 11 | where transaction_visit is null 12 | group by customer_id 13 | 14 | ---------------------------------------------------------------------------------------------------------------------------------------------- 15 | 16 | -- same solution as above, just removed a layer 17 | -- selected necessary variables, aggregated, joined, and then put a filter condition(where), group by for aggregate function 18 | -- BETTER TIME COMPLEXITY from Leetcode 19 | 20 | select customer_id, count(v.visit_id) as count_no_trans 21 | from 22 | Visits v left join Transactions t on v.visit_id = t.visit_id 23 | where t.visit_id is null 24 | group by customer_id 25 | 26 | -- amazon- 5 27 | -- apple- 3 28 | -- adobe- 2 29 | -- nerdwallet- 1 30 | -------------------------------------------------------------------------------- /Easy/01587-bank-account-summary-ii.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate with JOIN and HAVING 2 | 3 | select u.name, sum(t.amount) as balance 4 | from Users u 5 | left join Transactions t 6 | on u.account = t.account 7 | group by u.account 8 | having sum(t.amount) > 10000 9 | 10 | -- uber- 2 11 | -------------------------------------------------------------------------------- /Easy/01607-sellers-with-no-sales.sql: -------------------------------------------------------------------------------- 1 | -- select sellers from Orders table 2 | -- then select Sellers from Sellers table who are not in temp 3 | 4 | select seller_name 5 | from Seller s 6 | where seller_id not in 7 | (select seller_id 8 | from Orders 9 | where sale_date like '2020%') 10 | order by seller_name 11 | 12 | -------------------------------------------------------------------------------------------------------------------------------------------- 13 | -- using JOIN with conditions 14 | 15 | select s.seller_name 16 | from Seller s 17 | left join Orders o 18 | on o.seller_id = s.seller_id and sale_date like '2020%' 19 | where o.seller_id is null 20 | order by seller_name 21 | 22 | -- no companies listed 23 | -------------------------------------------------------------------------------- /Easy/01623-all-valid-triplets-that-can-represent-a-country.sql: -------------------------------------------------------------------------------- 1 | select 2 | a.student_name as member_A, 3 | b.student_name as member_B, 4 | c.student_name as member_C 5 | from SchoolA a, SchoolB b, SchoolC c 6 | where 7 | a.student_id != b.student_id and 8 | b.student_id != c.student_id and 9 | c.student_id != a.student_id and 10 | a.student_name != b.student_name and 11 | b.student_name != c.student_name and 12 | c.student_name != a.student_name 13 | 14 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 15 | -- best time complexity 16 | 17 | select 18 | a.student_name as member_A, 19 | b.student_name as member_B, 20 | c.student_name as member_C 21 | from SchoolA a 22 | join SchoolB b 23 | join SchoolC c 24 | on 25 | a.student_id != b.student_id and 26 | b.student_id != c.student_id and 27 | c.student_id != a.student_id and 28 | a.student_name != b.student_name and 29 | b.student_name != c.student_name and 30 | c.student_name != a.student_name 31 | 32 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 33 | -- second best time complexity 34 | 35 | select 36 | a.student_name as member_A, 37 | b.student_name as member_B, 38 | c.student_name as member_C 39 | from SchoolA a 40 | join SchoolB b 41 | on a.student_id != b.student_id and 42 | a.student_name != b.student_name 43 | join SchoolC c 44 | on b.student_id != c.student_id and 45 | c.student_id != a.student_id and 46 | b.student_name != c.student_name and 47 | c.student_name != a.student_name 48 | 49 | -- amazon- 1 50 | -------------------------------------------------------------------------------- /Easy/01633-percentage-of-users-attended-a-contest.sql: -------------------------------------------------------------------------------- 1 | -- no need to join 2 | -- use subquery because if we do count(u.user_id), it would show count from r table because r is left table(if we join) 3 | 4 | 5 | select contest_id, round(count(r.user_id)*100/(select count(*) from Users), 2) as percentage 6 | from Register r 7 | group by 1 8 | order by 2 desc, 1 asc 9 | 10 | 11 | -- no companies listed 12 | -------------------------------------------------------------------------------- /Easy/01661-average-time-of-process-per-machine.sql: -------------------------------------------------------------------------------- 1 | -- GOOD QUESTION 2 | -- join - treat this table as 2 different tables- one for start time and one for end time 3 | -- match on machine_id and process_id, do not match on activity type- we need different activity type 4 | -- use avg() and group by- this will take care of calculating avg 5 | -- use round() to round to 3 places 6 | 7 | select s.machine_id, round(avg(e.timestamp - s.timestamp), 3) as processing_time 8 | from Activity s 9 | join Activity e 10 | on s.machine_id = e.machine_id 11 | and s.process_id = e.process_id 12 | and s.activity_type = 'start' 13 | and e.activity_type = 'end' 14 | group by s.machine_id 15 | 16 | 17 | -- amazon- 3 18 | -- apple- 2 19 | -- bloomeberg- 2 20 | -- microsoft- 2 21 | -- adobe- 2 22 | -- google- 2 23 | -- facebook- 1 24 | -------------------------------------------------------------------------------- /Easy/01667-fix-names-in-a-table.sql: -------------------------------------------------------------------------------- 1 | -- concat first upper case and rest lower case 2 | 3 | select user_id, concat(upper(substr(name, 1, 1)), lower(substr(name, 2))) as name 4 | from Users 5 | order by 1 6 | 7 | ---------------------------------------------------------------------------------------------------------- 8 | 9 | -- same as above but used left instead of substr 10 | 11 | select user_id, concat(upper(left(name, 1)), lower(substr(name, 2))) as name 12 | from Users 13 | order by 1 14 | 15 | 16 | -- adobe- 2 17 | -------------------------------------------------------------------------------- /Easy/01677-products-worth-over-invoices.sql: -------------------------------------------------------------------------------- 1 | -- summed all columnms 2 | -- grouped by product id 3 | -- null sum = null, so 0 using ifnull/ coalesce 4 | 5 | select p.name, 6 | ifnull(sum(rest), 0) as rest, 7 | ifnull(sum(paid), 0) as paid, 8 | ifnull(sum(canceled), 0) as canceled, 9 | ifnull(sum(refunded), 0) as refunded 10 | from Product p 11 | left join Invoice i 12 | using(product_id) 13 | group by p.product_id 14 | order by 1 15 | 16 | -- no companies listed 17 | -------------------------------------------------------------------------------- /Easy/01683-invalid-tweets.sql: -------------------------------------------------------------------------------- 1 | -- use LENGTH() instead of LEN()- LEN() is in Python 2 | 3 | select tweet_id 4 | from Tweets 5 | where length(content) > 15 6 | 7 | 8 | -- amazon- 2 9 | -- twitter- 1 10 | -------------------------------------------------------------------------------- /Easy/01693-daily_leads-and-partners.sql: -------------------------------------------------------------------------------- 1 | -- basic aggregate function- count distinct, group by 2 variables 2 | 3 | select date_id, make_name, 4 | count(distinct lead_id) unique_leads, 5 | count(distinct partner_id) unique_partners 6 | from DailySales 7 | group by date_id, make_name 8 | 9 | 10 | -- no companies listed 11 | -------------------------------------------------------------------------------- /Easy/01729-find-followers-count.sql: -------------------------------------------------------------------------------- 1 | -- count distinct followers for each user 2 | 3 | select user_id, count(distinct follower_id) as followers_count 4 | from Followers 5 | group by 1 6 | order by 1 asc 7 | 8 | 9 | -- Tesla- 1 10 | -------------------------------------------------------------------------------- /Easy/01731-the-number-of-employees-which-report-to-each-employee.sql: -------------------------------------------------------------------------------- 1 | -- 2 tables- e for Employee, m for Manager 2 | -- pull manager_id from m(this will get all managers), name of those managers from e, count of reports to from m and age from m 3 | -- we have all information in m table, just need name from e table 4 | -- since we have nulls in manager_id col, we filter out nulls 5 | 6 | select m.reports_to as employee_id, e.name, count(m.reports_to) as reports_count, round(avg(m.age)) as average_age 7 | from Employees m 8 | left join Employees e 9 | on m.reports_to = e.employee_id 10 | where m.reports_to is not null 11 | group by m.reports_to 12 | order by 1 13 | 14 | ------------------------------------------------------------------------------------------------------------------------------- 15 | 16 | -- solution from LC 17 | -- pull manager_id and name from E1 table, count and age from E2 table 18 | -- join on E1.employee_id and E2.reports_to 19 | 20 | SELECT E1.employee_id, E1.name, COUNT(E2.employee_id) as reports_count, round(avg(E2.age)) AS average_age 21 | FROM Employees E1 22 | INNER JOIN Employees E2 23 | ON E1.employee_id = E2.reports_to 24 | group by E1.employee_id 25 | order by E1.employee_id 26 | 27 | 28 | -- amazon- 2 29 | -- coderbyte- 1 30 | -------------------------------------------------------------------------------- /Easy/01741-find-total-time-spent-by-each-employee.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregation- we need total sum of the diff between out and in time 2 | 3 | select event_day as day, emp_id, sum(out_time-in_time) as total_time 4 | from Employees 5 | group by 1, 2 6 | 7 | 8 | -- adobe- 2 9 | -- amazon- 1 10 | -------------------------------------------------------------------------------- /Easy/01757-recyclable-and-low-fat-products.sql: -------------------------------------------------------------------------------- 1 | -- use WHERE for condition 2 | 3 | select product_id 4 | from Products 5 | where low_fats = 'Y' and recyclable = 'Y' 6 | 7 | 8 | -- amazon- 19 9 | -- adobe- 10 10 | -- google- 8 11 | -- facebook- 3 12 | -- apple- 3 13 | -- microsoft- 3 14 | -- yahoo- 2 15 | -- bloomberg- 2 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /Easy/01777-products-price-for-each-store.sql: -------------------------------------------------------------------------------- 1 | -- aggregate conditional will get all groups in 1 row 2 | -- can use either max() or sum() 3 | 4 | select product_id, 5 | max(case when store = 'store1' then price end) as store1, 6 | max(case when store = 'store2' then price end) as store2, 7 | max(case when store = 'store3' then price end) as store3 8 | from Products 9 | group by 1 10 | 11 | -- amazon- 2 12 | -------------------------------------------------------------------------------- /Easy/01789-primary-department-for-each-employee.sql: -------------------------------------------------------------------------------- 1 | -- there are 2 different cases- 1 for single dept and 1 for multiple dept. 2 | -- when there are different cases, use UNION 3 | -- if count = 1, return that dept for single dept 4 | -- primary_flag = 'Y' case for multiple dept 5 | 6 | select employee_id, department_id 7 | from Employee 8 | group by 1 9 | having count(employee_id) = 1 10 | union 11 | select employee_id, department_id 12 | from Employee 13 | where primary_flag = 'Y' 14 | 15 | -------------------------------------------------------------------------------------------------------------- 16 | 17 | -- same but selecting department_id inside count function 18 | 19 | select employee_id, department_id 20 | from Employee 21 | group by 1 22 | having count(department_id) = 1 23 | union 24 | select employee_id, department_id 25 | from Employee 26 | where primary_flag = 'Y' 27 | 28 | 29 | -- facebook- 2 30 | -------------------------------------------------------------------------------- /Easy/01795-rearrange-products-table.sql: -------------------------------------------------------------------------------- 1 | -- beginner solution- using unions 2 | -- create a table without nulls 3 | 4 | select product_id, 'store1' as store, store1 as price 5 | from Products 6 | where store1 is not null 7 | union 8 | select product_id, 'store2' as store, store2 as price 9 | from Products 10 | where store2 is not null 11 | union 12 | select product_id, 'store3' as store, store3 as price 13 | from Products 14 | where store3 is not null 15 | 16 | --------------------------------------------------------------------------------------------------------------------------------------------------------- 17 | -- create a table with nulls, then filter out rows without nulls 18 | 19 | select product_id, store, price 20 | from 21 | (select product_id, 'store1' as store, store1 as price 22 | from Products 23 | union 24 | select product_id, 'store2' as store, store2 as price 25 | from Products 26 | union 27 | select product_id, 'store3' as store, store3 as price 28 | from Products) t 29 | where price is not null 30 | 31 | 32 | -- bloomberg- 2 33 | -- apple- 2 34 | -- amazon- 1 35 | -------------------------------------------------------------------------------- /Easy/01809-ad-free-sessions.sql: -------------------------------------------------------------------------------- 1 | -- first, get all session which run ads 2 | -- then select those sessions from Sessions table not in the above list 3 | 4 | select distinct session_id 5 | from Playback 6 | where session_id not in 7 | (select distinct p.session_id 8 | from Playback p 9 | join Ads a 10 | on p.customer_id = a.customer_id and a.timestamp between p.start_time and p.end_time) 11 | 12 | -- amazon- 1 13 | -------------------------------------------------------------------------------- /Easy/01821-find-customers-with-positive-revenue-this-year.sql: -------------------------------------------------------------------------------- 1 | -- simple WHERE clause 2 | 3 | select customer_id 4 | from Customers 5 | where year = '2021' and revenue > 0 6 | 7 | 8 | -- google- 1 9 | -------------------------------------------------------------------------------- /Easy/01853-convert-date-format.sql: -------------------------------------------------------------------------------- 1 | -- date format 2 | 3 | select date_format(day, '%W, %M %e, %Y') as day 4 | from Days 5 | 6 | -- no companies listed 7 | -------------------------------------------------------------------------------- /Easy/01873-calculate-special-bonus.sql: -------------------------------------------------------------------------------- 1 | -- simple CASE statement 2 | 3 | select employee_id, 4 | (case when employee_id % 2 = 1 and name not like 'M%' then salary else 0 end) as bonus 5 | from Employees 6 | order by employee_id 7 | 8 | 9 | -- apple- 2 10 | -------------------------------------------------------------------------------- /Easy/01890-the-latest-login-in-2020.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate(), like 2 | 3 | select user_id, max(time_stamp) as last_stamp 4 | from Logins 5 | where time_stamp like '2020%' 6 | group by 1 7 | 8 | ---------------------------------------------------------------------------------------------------------------- 9 | -- using year() for getting 2020 instead of like 10 | 11 | select user_id, max(time_stamp) as last_stamp 12 | from Logins 13 | where year(time_stamp) = '2020' 14 | group by 1 15 | 16 | ---------------------------------------------------------------------------------------------------------------- 17 | -- using first_value() 18 | 19 | select distinct user_id, first_value(time_stamp) over(partition by user_id order by time_stamp desc) as last_stamp 20 | from Logins 21 | where year(time_stamp) = '2020' 22 | 23 | -- no companies listed 24 | -------------------------------------------------------------------------------- /Easy/01939-users-that-actively-request-confirmation-messages.sql: -------------------------------------------------------------------------------- 1 | -- timestampdiff() to get diff in tome stamp- similar to datediff but for time part as well as date part 2 | -- used second <= 86400 becuase hour <= 24 will not work, because of this condition 3 | -- Two messages exactly 24 hours apart are considered to be within the window 4 | -- example user 2: 5 | +---------+---------------------+-----------+ 6 | | user_id | time_stamp | action | 7 | +---------+---------------------+-----------+ 8 | | 3 | 2021-01-06 03:30:46 | timeout | 9 | | 3 | 2021-01-06 03:37:45 | timeout | 10 | | 7 | 2021-06-12 11:57:29 | confirmed | 11 | | 7 | 2021-06-13 11:57:30 | confirmed | 12 | | 2 | 2021-01-22 00:00:00 | confirmed | 13 | | 2 | 2021-01-23 00:00:00 | timeout | 14 | | 6 | 2021-10-23 14:14:14 | confirmed | 15 | | 6 | 2021-10-24 14:14:13 | timeout | 16 | +---------+---------------------+-----------+ 17 | 18 | select distinct c1.user_id 19 | from Confirmations c1 20 | join Confirmations c2 21 | on c1.user_id = c2.user_id and c1.time_stamp < c2.time_stamp 22 | where timestampdiff(second, c1.time_stamp, c2.time_stamp) <= 86400 23 | 24 | -- no companies listed 25 | -------------------------------------------------------------------------------- /Easy/01965-employees-with-missing-information.sql: -------------------------------------------------------------------------------- 1 | -- select all employees from Employees not in Salaries, UNION select all employees from Salaries not in Employees 2 | 3 | select employee_id 4 | from Employees 5 | where employee_id not in 6 | (select employee_id 7 | from Salaries) 8 | union 9 | select employee_id 10 | from Salaries 11 | where employee_id not in 12 | (select employee_id 13 | from Employees) 14 | order by 1 15 | 16 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- 17 | -- first get a table of all employees bu doing a UNION on the 2 tables 18 | -- then select those emeployees which are in above table but not in respective tables- UNION 19 | 20 | with all_employees as 21 | (select employee_id 22 | from Employees 23 | union 24 | select employee_id 25 | from Salaries) 26 | 27 | select employee_id 28 | from all_employees 29 | where employee_id not in (select employee_id from Employees) 30 | union 31 | select employee_id 32 | from all_employees 33 | 34 | -- adobe- 2 35 | where employee_id not in (select employee_id from Salaries) 36 | order by 1 37 | 38 | 39 | -------------------------------------------------------------------------------- /Easy/01978-employees-whose-manager-left-the-company.sql: -------------------------------------------------------------------------------- 1 | -- we just need a list of employee_id whose manager_id is not listed in employee_id column 2 | -- use subquery and where 3 | 4 | select employee_id 5 | from Employees 6 | where manager_id not in 7 | (select employee_id from Employees) 8 | and salary < 30000 9 | order by employee_id 10 | 11 | --------------------------------------------------------------------------------------------------------------------------------------------- 12 | 13 | -- 2 tables, employee e and manager m 14 | -- we need to select a managerid who is not present in employee and then select employee corresponding to that manager 15 | -- so directly select employee_id from m table, managerid should be in m table, but not in e table 16 | -- since we are doing a left join, m is left table, so m.manager_id shouldn't be null 17 | 18 | select m.employee_id 19 | from Employees m 20 | left join Employees e 21 | on m.manager_id = e.employee_id 22 | where e.employee_id is null and m.manager_id is not null and m.salary < 30000 23 | order by m.employee_id 24 | 25 | 26 | 27 | -- adobe- 2 28 | 29 | -------------------------------------------------------------------------------- /Easy/02026-low-quality-problems.sql: -------------------------------------------------------------------------------- 1 | -- calculation in where clause 2 | 3 | select problem_id 4 | from Problems 5 | where (likes/(likes + dislikes)) * 100 < 60 6 | order by 1 7 | 8 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 9 | 10 | -- same as above, just not multipled by 100 11 | 12 | select problem_id 13 | from Problems 14 | where likes/(likes + dislikes) < 0.6 15 | order by 1 16 | 17 | -- no companies listed 18 | -------------------------------------------------------------------------------- /Easy/02072-the-winner-university.sql: -------------------------------------------------------------------------------- 1 | -- there are many ways but this is my solution 2 | -- count ny and c, and then pick winner in the main query 3 | 4 | select (case when ny_count > c_count then 'New York University' 5 | when c_count > ny_count then 'California University' 6 | else 'No Winner' end) as winner 7 | from 8 | (select sum(n.score >= 90) as ny_count, sum(c.score >= 90) as c_count 9 | from NewYork n, California c) t 10 | 11 | ---------------------------------------------------------------------------------------------------------------------------------------------------------- 12 | -- using count and 2 different ctes 13 | 14 | with cte_ny as 15 | (select count(student_id) as ny_count 16 | from NewYork n 17 | where score >= 90), 18 | 19 | cte_c as 20 | (select count(student_id) as c_count 21 | from California c 22 | where score >= 90) 23 | 24 | 25 | select (case when ny_count > c_count then 'New York University' 26 | when c_count > ny_count then 'California University' 27 | else 'No Winner' end) as winner 28 | from cte_ny, cte_c 29 | 30 | 31 | -- walmart labs- 1 32 | -------------------------------------------------------------------------------- /Easy/02082-the-number-of-rich-customers.sql: -------------------------------------------------------------------------------- 1 | -- count distinct customers where amount > 500 2 | -- this will give count of rich customers 3 | 4 | select count(distinct customer_id) as rich_count 5 | from Store 6 | where amount > 500 7 | 8 | -- athenahealth- 1 9 | -------------------------------------------------------------------------------- /Easy/02205-the-number-of-users-that-are-eligible-for-the-discount.sql: -------------------------------------------------------------------------------- 1 | -- counting total users tht satisfies both where conditions 2 | 3 | CREATE FUNCTION getUserIDs(startDate DATE, endDate DATE, minAmount INT) RETURNS INT 4 | BEGIN 5 | RETURN ( 6 | -- Write your MySQL query statement below. 7 | select count(distinct user_id) as user_cnt 8 | from Purchases 9 | where time_stamp between startDate and endDate 10 | and amount >= minAmount 11 | ); 12 | END 13 | 14 | -- analytics quotient- 1 15 | -------------------------------------------------------------------------------- /Easy/02230-the-users-that-are-eligible-for-discount.sql: -------------------------------------------------------------------------------- 1 | -- my code stattrs after BEGIN 2 | -- use ';' after statement ends 3 | 4 | CREATE PROCEDURE getUserIDs(startDate DATE, endDate DATE, minAmount INT) 5 | BEGIN 6 | -- Write your MySQL query statement below. 7 | select distinct user_id 8 | from Purchases 9 | where time_stamp between startDate and endDate 10 | and amount >= minAmount 11 | order by 1; 12 | END 13 | 14 | 15 | -- analytics quotient 16 | -------------------------------------------------------------------------------- /Easy/02356-number-of-unique-subjects-taught-by-each-teacher.sql: -------------------------------------------------------------------------------- 1 | -- just need to check distinct subjects for each teacher 2 | -- count(distinct variable) function 3 | 4 | 5 | select teacher_id, count(distinct subject_id) as cnt 6 | from Teacher 7 | group by 1 8 | 9 | -- capgemini- 1 10 | -------------------------------------------------------------------------------- /Easy/02377-sort-the-olympic-table.sql: -------------------------------------------------------------------------------- 1 | -- use of order by for sorting and break ties 2 | 3 | select * 4 | from Olympic 5 | order by gold_medals desc, silver_medals desc, bronze_medals desc, country 6 | 7 | 8 | -- no companies listed 9 | -------------------------------------------------------------------------------- /Easy/02668-find-latest-salaries.sql: -------------------------------------------------------------------------------- 1 | -- getting max of all salaries because highest one will be the latest 2 | 3 | select emp_id, firstname, lastname, max(salary) as salary, department_id 4 | from Salary 5 | group by emp_id 6 | order by emp_id 7 | 8 | -- no companies listed 9 | -------------------------------------------------------------------------------- /Hard/00185-department-top-three-salaries.sql: -------------------------------------------------------------------------------- 1 | -- use DENSE_RANK() 2 | -- get top 3 ranks from CTE 3 | 4 | with CTE as 5 | (select *, 6 | dense_rank() over(partition by departmentId order by salary desc) as rnk 7 | from Employee) 8 | 9 | select d.name as Department, e.name as Employee, salary as Salary 10 | from Department d 11 | left join CTE e 12 | on e.departmentId = d.id 13 | where rnk <= 3 14 | 15 | -- google- 2 16 | -- amazon- 7 17 | -- facebook- 2 18 | -- adobe- 2 19 | -- shopee- 2 20 | -------------------------------------------------------------------------------- /Hard/00262-trips-and-users.sql: -------------------------------------------------------------------------------- 1 | -- use a WHERE condition to filter rows for unbanned drivers AND clients, and dates 2 | -- we need to have '0' in case there were no cancellations, so write a case sun it and, divide by total rows 3 | -- group by date 4 | 5 | select request_at as Day, 6 | round(sum(case when status like 'cancelled%' then 1 else 0 end)/count(*), 2) as 'Cancellation Rate' 7 | from Trips 8 | where client_id not in (select users_id 9 | from Users 10 | where banned = 'Yes') 11 | and driver_id not in (select users_id 12 | from Users 13 | where banned = 'Yes') 14 | and request_at between '2013-10-01' and '2013-10-03' 15 | group by request_at 16 | 17 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 18 | -- beginner solution 19 | -- 2 CTEs- first counting all rides, second counting cancelled rides 20 | -- left join both and division, add a WHERE condition for dates 21 | 22 | with all_rides as 23 | (select request_at, count(id) as count_total 24 | from Trips 25 | where client_id not in 26 | (select users_id 27 | from Users 28 | where banned = 'Yes') 29 | and driver_id not in 30 | (select users_id 31 | from Users 32 | where banned = 'Yes') 33 | group by 1), 34 | 35 | cancelled_rides as 36 | (select request_at, count(id) as count_cancelled 37 | from Trips 38 | where client_id not in 39 | (select users_id 40 | from Users 41 | where banned = 'Yes') 42 | and driver_id not in 43 | (select users_id 44 | from Users 45 | where banned = 'Yes') 46 | and status like 'cancelled%' 47 | group by 1) 48 | 49 | select t.request_at as Day, round(coalesce(count_cancelled, 0)/count_total, 2) as 'Cancellation Rate' 50 | from all_rides t 51 | left join cancelled_rides c 52 | on t.request_at = c.request_at 53 | where t.request_at between '2013-10-01' and '2013-10-03' 54 | 55 | -- amazon- 2 56 | -- uber- 3 57 | -- adobe- 2 58 | -- bloomberg- 2 59 | -------------------------------------------------------------------------------- /Hard/00569-median-employee-salary.sql: -------------------------------------------------------------------------------- 1 | -- we do not have to average 2 | -- for even counts, return num/2, num/2 + a1 3 | -- for odd counts, return ceiing(num/2) 4 | -- ceiling(cnt/2) = ceiling(6/2) = 3; celing(5/2) = 3 5 | -- floor(cnt/2)+1 = floor(6/2)+1 = 4; floor(5/2)+1 = 2+1 = 3 6 | 7 | with cte as 8 | (select id, company, salary, 9 | row_number() over(partition by company order by salary, id) as rn, 10 | count(*) over(partition by company) as cnt 11 | from Employee 12 | order by company, salary) 13 | 14 | 15 | select id, company, salary 16 | from cte 17 | where rn = ceiling(cnt/2) or rn = floor(cnt/2) + 1 18 | 19 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 20 | -- easier way- write case statements in WHERE for odd and even counts 21 | 22 | with cte as 23 | (select id, company, salary, 24 | row_number() over(partition by company order by salary, id) as rn, 25 | count(*) over(partition by company) as cnt 26 | from Employee 27 | order by company, salary) 28 | 29 | 30 | select id, company, salary 31 | from cte 32 | where 33 | case when cnt%2 = 0 then (rn = cnt/2 or rn = cnt/2 + 1) 34 | when cnt%2 = 1 then (rn = ceiling(cnt/2)) end 35 | 36 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 37 | -- rn between 6/2 and 6/2+1 = 3 and 4 38 | -- rn between 5/2 and 5/2 + 1 = 2.5 and 3.5 = 3 because rn is always a whole number 39 | 40 | with cte as 41 | (select id, company, salary, 42 | row_number() over(partition by company order by salary, id) as rn, 43 | count(*) over(partition by company) as cnt 44 | from Employee 45 | order by company, salary) 46 | 47 | 48 | select id, company, salary 49 | from cte 50 | where rn between cnt/2 and cnt/2 + 1 51 | 52 | -- goole- 1 53 | -------------------------------------------------------------------------------- /Hard/00571-find-median-given-frequency-of-numbers.sql: -------------------------------------------------------------------------------- 1 | -- create lower and upper bounds 2 | -- lower bound = running_tital - freq 3 | -- upper bound = running total 4 | -- we need to do avg of 2 number for even n 5 | 6 | with defined_bounds as 7 | (select num, 8 | sum(frequency) over(order by num) - frequency as lower_bound, 9 | sum(frequency) over(order by num) as upper_bound, 10 | sum(frequency) over()/2 as median_n 11 | from Numbers) 12 | 13 | select avg(num) as median 14 | from defined_bounds 15 | where median_n between lower_bound and upper_bound 16 | 17 | -- eg: if we have even n = 8, 8 is in both the rows- num = 1 and num = 2 18 | -- so we avg 1 and 2 = 1.5 19 | 20 | | num | frequency | lower_bound | upper_bound | median_n | 21 | | --- | ---------- | ----------- | ----------- | -------- | 22 | | 0 | 7 | 0 | 7 | 8 | 23 | | 1 | 1 | 7 | 8 | 8 | 24 | | 2 | 3 | 8 | 11 | 8 | 25 | | 3 | 5 | 11 | 16 | 8 | 26 | 27 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | -- same as above 29 | -- lower bound defined in only where clause 30 | 31 | with defined_bounds as 32 | (select num, frequency, 33 | sum(frequency) over(order by num) as n, 34 | sum(frequency) over()/2 as median_n 35 | from Numbers) 36 | 37 | select avg(num) as median 38 | from defined_bounds 39 | where median_n between (n - frequency) and n 40 | 41 | 42 | -- pinterest- 1 43 | -------------------------------------------------------------------------------- /Hard/00579-find-cumulative-salary-of-an-employee.sql: -------------------------------------------------------------------------------- 1 | -- use RANGE- this will give eaxctly what is needed 2 | -- exclude id with max(month) 3 | select id, month, 4 | sum(salary) over(partition by id order by month range between 2 preceding and current row) as Salary 5 | from Employee 6 | where (id, month) not in (select id, max(month) 7 | from Employee 8 | group by 1) 9 | order by 1, 2 desc 10 | 11 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 12 | -- same as above 13 | -- excluding last month- do a rank desc and exclude with rank = 1 14 | 15 | select id, month, Salary 16 | from 17 | (select id, month, 18 | sum(salary) over(partition by id order by month range between 2 preceding and current row) as Salary, 19 | dense_rank() over(partition by id order by month desc) as rnk 20 | from Employee) t 21 | where rnk > 1 22 | 23 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 24 | -- without using range 25 | -- getting row_num to filter row with max month and from calculations 26 | -- self join cte table on id 27 | -- filter rows where rn = 1- this is the last month and we don't want that 28 | -- diff between 2 months should be between 0 and 2- 0 because calculate itself, 1 is prev month, 2 is prev of 1 29 | -- sum the salary of c2 table because c1 is base month 30 | -- group by id and c1 month 31 | -- we are doing inner join so there won't be last month from c2 table 32 | 33 | with cte as 34 | (select *, row_number() over (partition by id order by month desc) as rn 35 | from employee) 36 | 37 | select c1.id, c1.month, sum(c2.Salary) as Salary 38 | from cte c1 39 | join cte c2 on c1.id = c2.id 40 | where (c1.month - c2.month) between 0 and 2 41 | and c1.rn != 1 42 | group by c1.id, c1.month 43 | 44 | 45 | -- amazon- 1 46 | -------------------------------------------------------------------------------- /Hard/00601-human-traffic-of-stadium.sql: -------------------------------------------------------------------------------- 1 | -- in cte, calculate rn for ids whose people >= 100 2 | -- difference between rn and id- continuous ids will have same diff 3 | -- subquery- find the diff whose count >= 3 4 | -- final query- ouput those records whose diff = diff in subquery 5 | 6 | with cte as 7 | (select *, row_number() over(order by id) as rn, id - row_number() over(order by id) as diff 8 | from Stadium 9 | where people >= 100) 10 | 11 | select id, visit_date, people 12 | from cte 13 | where diff in (select diff 14 | from cte 15 | group by diff 16 | having count(diff) >= 3) 17 | 18 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 19 | -- using lead() and lag() 20 | -- do a lead 1, lead 2, lag 1, lag 2 to see what are previous 2, 1 and next 1, 2 21 | -- extract the rows where current >= 100, and (prev 2 >= 100 or next 2 >= 100 or prev 1 and next 1 >= 100) 22 | -- the above logic checkes all 3 seats have >= 100 23 | 24 | with cte as 25 | (select *, 26 | lead(people, 1) over(order by id) as le1, 27 | lead(people, 2) over(order by id) as le2, 28 | lag(people, 1) over(order by id) as la1, 29 | lag(people, 2) over(order by id) as la2 30 | from Stadium) 31 | 32 | select id, visit_date, people 33 | from cte 34 | where people >= 100 and ((le1 >= 100 and le2 >= 100) or 35 | (la1 >= 100 and la2 >= 100) or 36 | (le1 >= 100 and la1 >= 100)) 37 | order by 2 asc 38 | 39 | 40 | -- amazon- 3 41 | -------------------------------------------------------------------------------- /Hard/00615-average-salary-departments-vs-company.sql: -------------------------------------------------------------------------------- 1 | -- global_avg_sal- get avg salary of company for each paymonth 2 | -- avg_sal_dept- calculated avg salary for each dept by joining on Employee table 3 | -- final query- joined above 2 tables and pulled required fields as per o/p, comparison using CASE 4 | 5 | with global_avg_sal as 6 | (select date_format(pay_date, '%Y-%m') as pay_month, avg(amount) as global_avg 7 | from Salary 8 | group by 1), 9 | 10 | avg_sal_dept as 11 | (select date_format(s.pay_date, '%Y-%m') as pay_month, e.department_id, avg(s.amount) as avg_sal 12 | from Salary s 13 | join Employee e 14 | using(employee_id) 15 | group by 1, 2) 16 | 17 | select a.pay_month, a.department_id, 18 | (case when avg_sal > global_avg then 'higher' 19 | when avg_sal < global_avg then 'lower' 20 | else 'same' 21 | end) as comparison 22 | from avg_sal_dept a 23 | join global_avg_sal g 24 | using(pay_month) 25 | 26 | ------------------------------------------------------------------------------------------------------------------------------------------------------- 27 | -- longer version of the above 28 | -- emp_dept- join emp and dept to get dept column 29 | -- global_avg_sal- calculate avg calary of the company for each paymonth 30 | -- avg_sal_dept- calculate avg sal of each dept for each paymonth using emp_dept 31 | -- comparision- join global_avg_sal and avg_sal_dept to get a table with all columns 32 | -- final query- output with CASE for comparison 33 | 34 | with emp_dept as 35 | (select s.employee_id, e.department_id, s.amount, s.pay_date 36 | from Salary s 37 | join Employee e 38 | using(employee_id)), 39 | 40 | global_avg_sal as 41 | (select date_format(pay_date, '%Y-%m') as month_year, avg(amount) as avg_sal 42 | from emp_dept 43 | group by 1), 44 | 45 | avg_sal_dept as 46 | (select date_format(pay_date, '%Y-%m') as month_year, department_id, avg(amount) as avg_sal 47 | from emp_dept 48 | group by 1, 2), 49 | 50 | comparison_table as 51 | (select g.month_year, g.avg_sal as global_avg, a.department_id, a.avg_sal 52 | from global_avg_sal g 53 | left join avg_sal_dept a 54 | using(month_year)) 55 | 56 | select month_year as pay_month, department_id, 57 | (case when avg_sal > global_avg then 'higher' 58 | when avg_sal < global_avg then 'lower' 59 | else 'same' 60 | end) as comparison 61 | from comparison_table 62 | 63 | 64 | -- no companies listed 65 | -------------------------------------------------------------------------------- /Hard/00618-students-reported-by-geography.sql: -------------------------------------------------------------------------------- 1 | -- create a cte with a row_number colmn 2 | -- we need to order by name because output should be in ascending order of name for eery column 3 | -- write case statement and pull date from cte and group by row_num 4 | -- sinc we are grouping, we need an aggregate function- use either MAX or MIN 5 | 6 | select 7 | max(case when continent = 'America' then name end) as America, 8 | max(case when continent = 'Asia' then name end) as Asia, 9 | max(case when continent = 'Europe' then name end) as Europe 10 | from 11 | (select name, continent, row_number() over(partition by continent order by name) as rn 12 | from Student 13 | order by continent, name) t 14 | group by rn 15 | 16 | -- amazon- 3 17 | -------------------------------------------------------------------------------- /Hard/01097-game-play-analysis-v.sql: -------------------------------------------------------------------------------- 1 | -- cte- get min date(install date) in the same table using window fun 2 | -- then use CASE to get flag where diff between days is 1 3 | -- in the final query, do a sum on flag divided by count of distinct players 4 | -- group by install date 5 | 6 | with cte as 7 | (select *, 8 | min(event_date) over(partition by player_id) as first_date 9 | from Activity), 10 | 11 | cte2 as 12 | (select a.player_id, a.event_date, a.first_date, 13 | (case when datediff(a.event_date, a.first_date) = 1 then 1 else 0 end) flg 14 | from cte a) 15 | 16 | select first_date as install_dt, 17 | count(distinct player_id) as installs, 18 | round(sum(flg)/count(distinct player_id), 2) as Day1_retention 19 | from cte2 20 | group by 1 21 | 22 | -- o/p of cte2 23 | 24 | | player_id | event_date | first_date | flg | 25 | | --------- | ---------- | ---------- | --- | 26 | | 1 | 2016-03-01 | 2016-03-01 | 0 | 27 | | 1 | 2016-03-02 | 2016-03-01 | 1 | 28 | | 2 | 2017-06-25 | 2017-06-25 | 0 | 29 | | 3 | 2016-03-01 | 2016-03-01 | 0 | 30 | | 3 | 2018-07-03 | 2016-03-01 | 0 | 31 | 32 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 33 | -- only change in row 39 from the above code 34 | 35 | with get_min_dts as 36 | (select *, 37 | min(event_date) over(partition by player_id) as install_dt 38 | from Activity), 39 | 40 | cte2 as 41 | (select install_dt, 42 | count(distinct player_id) as cnt_all, 43 | sum(datediff(event_date, install_dt) = 1) as cnt_retended, 44 | sum(datediff(event_date, install_dt) = 1)/ count(distinct player_id) as Day1_retention 45 | from get_min_dts 46 | group by install_dt) 47 | 48 | select install_dt, cnt_all as installs, round(Day1_retention, 2) as Day1_retention 49 | from cte2 50 | 51 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 52 | -- concised version of the above 53 | 54 | with get_min_dts as 55 | (select *, 56 | min(event_date) over(partition by player_id) as install_dt 57 | from Activity) 58 | 59 | select install_dt, 60 | count(distinct player_id) as installs, 61 | round(sum(datediff(event_date, install_dt) = 1)/ count(distinct player_id), 2) as Day1_retention 62 | from get_min_dts 63 | group by install_dt 64 | 65 | 66 | -- gsn games- 1 67 | -------------------------------------------------------------------------------- /Hard/01127-user-purchase-platform.sql: -------------------------------------------------------------------------------- 1 | -- we want all combinations of date and platform, hence create left table 2 | -- right table- when number of rows for that date and user_id > 1, it means that user has used both platforms on that day, so keep platform = both, else platform 3 | -- sum of amount grouped by user_id and date- for that user and date 4 | -- final query- date and platform will be from left table, again SUM amount and count of users from right table- for that date specifically(all users are added up) 5 | -- group amount and count users by user_id and date 6 | 7 | with left_table as 8 | (select distinct spend_date, 'desktop' as platform 9 | from Spending 10 | union 11 | select distinct spend_date, 'mobile' as platform 12 | from Spending 13 | union 14 | select distinct spend_date, 'both' as platform 15 | from Spending), 16 | 17 | right_table as 18 | (select user_id, spend_date, sum(amount) as total, 19 | (case when count(*) = 1 then platform else 'both' end) as platform 20 | from Spending 21 | group by 1, 2) 22 | 23 | select l.spend_date, l.platform, ifnull(sum(r.total), 0) as total_amount, count(distinct r.user_id) as total_users 24 | from left_table l 25 | left join right_table r 26 | on l.spend_date = r.spend_date 27 | and l.platform = r.platform 28 | group by 1, 2 29 | 30 | -- LinkedIn- 1 31 | -------------------------------------------------------------------------------- /Hard/01159-market-analysis-ii.sql: -------------------------------------------------------------------------------- 1 | -- ranked- create a table with ranks 2 | -- second_sold_item- pull users and items with rnk = 2, join on Items to get name of second sold item 3 | -- final query- compare actual fav items from User and second sold from above table- if they match then yes else no 4 | 5 | with ranked as 6 | (select seller_id, item_id, order_date, dense_rank() over(partition by seller_id order by order_date) as rnk 7 | from Orders 8 | ), 9 | 10 | second_sold_item as 11 | (select r.seller_id, r.item_id, i.item_brand 12 | from ranked r 13 | join Items i 14 | using(item_id) 15 | where rnk = 2) 16 | 17 | select u.user_id as seller_id, 18 | (case when u.favorite_brand = s.item_brand then 'yes' else 'no' end) as 2nd_item_fav_brand 19 | from Users u 20 | left join second_sold_item s 21 | on u.user_id = s.seller_id01159 22 | 23 | -- poshmark- 1 24 | -------------------------------------------------------------------------------- /Hard/01194-tournament-winners.sql: -------------------------------------------------------------------------------- 1 | -- the concerpt is to unoin first and second player and scores because the match was between 2 players of the same group 2 | -- for each group, there can only be 1 winner, so easier approach to convery everything to records 3 | -- t- union all plaers and scores 4 | -- cte- get group of each player, find sum of scores for each player, and assign rank for each group(partition) according to highest score and lower player_id in case of tie(order by) 5 | -- final query- pull the players with rank = 1 from cte 6 | 7 | with cte as 8 | (select group_id, first_player, sum(first_score) as total_score, 9 | dense_rank() over(partition by group_id order by sum(first_score) desc, player_id asc) as rnk 10 | from 11 | ( 12 | select m.first_player, m.first_score 13 | from Matches m 14 | union all 15 | select m.second_player, m.second_score 16 | from Matches m 17 | ) t 18 | join Players p 19 | on p.player_id = t.first_player 20 | group by 2) 21 | 22 | select group_id, first_player as player_id 23 | from cte 24 | where rnk = 1 25 | 26 | -- wayfair- 1 27 | -------------------------------------------------------------------------------- /Hard/01225-report-contiguous-dates.sql: -------------------------------------------------------------------------------- 1 | -- cte1- unioned both tables and arranged in ascending order of dates 2 | -- cte2- calculated rn and rnk and diff between them 3 | -- final query- picked min date as start date, max date as end date, grouped by status and diff 4 | -- rank()- gave them ranking based on their status ascending order date 5 | -- row_number()- ordered by date asc 6 | -- diff between them will be consistent if they are contiguous, hence group by diff 7 | -- when the status changes, again diff between them will be contiguous hence group by status to get different records for same status 8 | 9 | with cte1 as 10 | ((select fail_date as event_date, 'failed' as status 11 | from Failed) 12 | union all 13 | (select success_date as event_date, 'succeeded' as status 14 | from Succeeded)), 15 | cte2 as 16 | (select *, 17 | row_number() over(order by event_date) as rn, 18 | dense_rank() over (partition by status order by event_date) as rnk, 19 | row_number() over(order by event_date) - dense_rank() over (partition by status order by event_date) as diff 20 | from cte1 21 | where event_date between '2019-01-01' and '2019-12-31' 22 | order by 1) 23 | 24 | select status as period_state, min(event_date) as start_date, max(event_date) as end_date 25 | from cte2 26 | group by status, diff 27 | order by 2 28 | 29 | -- facebook- 1 30 | 31 | ------------------------------------------------------------------------------------------------------------------------------------------------------------ 32 | 33 | -- o/p of cte2 34 | 35 | | event_date | status | rn | rnk | diff | 36 | | ---------- | --------- | -- | --- | ---- | 37 | | 2019-01-01 | succeeded | 1 | 1 | 0 | 38 | | 2019-01-02 | succeeded | 2 | 2 | 0 | 39 | | 2019-01-03 | succeeded | 3 | 3 | 0 | 40 | | 2019-01-04 | failed | 4 | 1 | 3 | 41 | | 2019-01-05 | failed | 5 | 2 | 3 | 42 | | 2019-01-06 | succeeded | 6 | 4 | 2 | 43 | -------------------------------------------------------------------------------- /Hard/01336-number-of-transactions-per-visit.sql: -------------------------------------------------------------------------------- 1 | -- trans_by_date_user- get a count of all transactions group by date and user_id 2 | -- this will give count of how many transaction count exists starting from 0 3 | -- rows_trans_count- it's a recursive cte to start count from 0 union count + 1 till it reaches the max count from the above table 4 | -- final query- the 2nd cte is the left table, because it has all counts from 0 to max 5 | -- left join on 1st cte. here, count of count in 1st cte will become visits_count 6 | 7 | with trans_by_date_user as 8 | (select v.visit_date, v.user_id as v_user, count(t.transaction_date) as num_tran_by_date_user 9 | from Visits v 10 | left join Transactions t 11 | on v.user_id = t.user_id and 12 | v.visit_date = t.transaction_date 13 | group by 1, 2), 14 | 15 | rows_trans_count as 16 | (with recursive cte_r as 17 | (select 0 as transactions_count 18 | union all 19 | select transactions_count + 1 20 | from cte_r 21 | where transactions_count < (select max(num_tran_by_date_user) 22 | from trans_by_date_user) 23 | ) 24 | select * from cte_r 25 | ) 26 | 27 | select rtc.transactions_count, count(tdu.num_tran_by_date_user) as visits_count 28 | from rows_trans_count as rtc 29 | left join trans_by_date_user tdu 30 | on rtc.transactions_count = tdu.num_tran_by_date_user 31 | group by 1 32 | 33 | -- o/p of trans_by_date_user: 34 | | visit_date | v_user | num_tran_by_date_user | 35 | | ---------- | ------ | --------------------- | 36 | | 2020-01-01 | 1 | 0 | 37 | | 2020-01-02 | 2 | 0 | 38 | | 2020-01-01 | 12 | 0 | 39 | | 2020-01-03 | 19 | 0 | 40 | | 2020-01-02 | 1 | 1 | 41 | | 2020-01-03 | 2 | 1 | 42 | | 2020-01-04 | 1 | 1 | 43 | | 2020-01-11 | 7 | 1 | 44 | | 2020-01-25 | 9 | 3 | 45 | | 2020-01-28 | 8 | 1 | 46 | 47 | -- o/p of rows_trans_count: 48 | | transactions_count | 49 | | ------------------ | 50 | | 0 | 51 | | 1 | 52 | | 2 | 53 | | 3 | 54 | 55 | -- o/p of final query: 56 | | transactions_count | visits_count | 57 | | ------------------ | ------------ | 58 | | 0 | 4 | 59 | | 1 | 5 | 60 | | 2 | 0 | 61 | | 3 | 1 | 62 | -------------------------------------------------------------------------------- /Hard/01369-get-the-second-most-recent-ctivity.sql: -------------------------------------------------------------------------------- 1 | -- output where rank = 2 or count < 2 2 | 3 | with cte as 4 | (select *, 5 | dense_rank() over(partition by username order by endDate desc) as rnk, 6 | count(activity) over(partition by username) as cnt 7 | from UserActivity) 8 | 9 | select username, activity, startDate, endDate 10 | from cte 11 | where cnt = 1 or rnk = 2 12 | 13 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 14 | -- union 2 conditions 15 | -- return rank = 2 where max rank is > 1 16 | -- return the row where max rank = 1 17 | 18 | with cte as 19 | (select *, 20 | dense_rank() over(partition by username order by endDate desc) as rnk 21 | from UserActivity) 22 | 23 | select username, activity, startDate, endDate 24 | from cte 25 | where rnk = 2 26 | group by username 27 | having max(rnk) > 1 28 | union 29 | select username, activity, startDate, endDate 30 | from cte 31 | group by username 32 | having max(rnk) = 1 33 | 34 | 35 | -- microsoft- 1 36 | -------------------------------------------------------------------------------- /Hard/01384-total-sales-amount-by-year.sql: -------------------------------------------------------------------------------- 1 | -- recursive cte- get all dates between given start date and end date 2 | -- join sales and Product tables to get product name, sum of amounts 3 | -- join the above table with cte on cte.s_date >= given start_date and cte.s_date <= given end_date 4 | -- group by product_id and year 5 | 6 | with recursive cte as 7 | (select min(period_start) as s_date 8 | from Sales 9 | union all 10 | select date_add(s_date, interval 1 day) as s_date 11 | from cte 12 | where s_date <= (select max(period_end) 13 | from Sales)) 14 | 15 | select s.product_id, p.product_name, left(c.s_date, 4) as report_year, 16 | sum(s.average_daily_sales) as total_amount 17 | from Sales s 18 | join Product p 19 | on p.product_id = s.product_id 20 | join cte c 21 | on s.period_start <= c.s_date and s.period_end >= c.s_date 22 | group by 1, 3 23 | order by 1, 3 24 | 25 | -- no companies listed 26 | -------------------------------------------------------------------------------- /Hard/01412-find-the-quiet-students-in-all-exams.sql: -------------------------------------------------------------------------------- 1 | -- CTE1- find highest rank and lowest rank in 2 separate columns using dense_rank() 2 | -- CTE2- get the list of students in CTE1 3 | -- CTE3- get the list of students who took exams but are not in CTE2 4 | -- final query- output id and name of students in CTE3 5 | 6 | with CTE1 as 7 | (select exam_id, student_id, score, 8 | dense_rank() over(partition by exam_id order by score desc) rank_highest, 9 | dense_rank() over(partition by exam_id order by score asc) rank_lowest 10 | from Exam), 11 | CTE2 as 12 | (select student_id 13 | from CTE1 14 | where rank_highest = 1 or rank_lowest = 1), 15 | CTE3 as 16 | (select distinct student_id 17 | from Exam 18 | where student_id not in (select * from CTE2)) 19 | 20 | select student_id, student_name 21 | from Student 22 | where student_id in (select * from CTE3) 23 | order by 1 24 | 25 | -------------------------------------------------------------------------------------------------------------------------------------------------------- 26 | -- CTE- find highest rank and lowest rank in 2 separate columns using dense_rank() 27 | -- pull student_id from Exam table- note that student_id is not primary key, so use distnct 28 | -- pull name from Student table 29 | -- use Exam as left table because we don't want students who didn't take any exams 30 | -- use WHERE condition not in-> CTE 31 | 32 | with CTE as 33 | (select exam_id, student_id, score, 34 | dense_rank() over(partition by exam_id order by score desc) rank_highest, 35 | dense_rank() over(partition by exam_id order by score asc) rank_lowest 36 | from Exam) 37 | 38 | select distinct e.student_id, s.student_name 39 | from Exam e 40 | left join Student s 41 | on e.student_id = s.student_id 42 | where e.student_id not in 43 | (select student_id 44 | from CTE 45 | where rank_highest = 1 or rank_lowest = 1) 46 | order by 1 47 | 48 | 49 | -- no companies listed 50 | -------------------------------------------------------------------------------- /Hard/01479-sales-by-day-of-the-week.sql: -------------------------------------------------------------------------------- 1 | -- aggregation on case statements 2 | 3 | select i.item_category as category, 4 | sum(case when weekday(order_date) = 0 then quantity else 0 end) as MONDAY, 5 | sum(case when weekday(order_date) = 1 then quantity else 0 end) as TUESDAY, 6 | sum(case when weekday(order_date) = 2 then quantity else 0 end) as WEDNESDAY, 7 | sum(case when weekday(order_date) = 3 then quantity else 0 end) as THURSDAY, 8 | sum(case when weekday(order_date) = 4 then quantity else 0 end) as FRIDAY, 9 | sum(case when weekday(order_date) = 5 then quantity else 0 end) as SATURDAY, 10 | sum(case when weekday(order_date) = 6 then quantity else 0 end) as SUNDAY 11 | from Items i 12 | left join Orders o 13 | using(item_id) 14 | group by 1 15 | order by 1 16 | 17 | -- amazon- 1 18 | -------------------------------------------------------------------------------- /Hard/01635-hopper-company-queries-i.sql: -------------------------------------------------------------------------------- 1 | -- in first cte, do a recursive cte and get sequence from 1 to 12 for all months 2 | -- activeDrivers_2020- from Divers table, do a running count on driver_ids where year is <= 2020 3 | -- acceptedRide_2020- from Rides table, pull all the rides requested in 2020 and that are in AcceptedRides table for each month 4 | -- in the final query, we want all months, so cte will be left table, and join this table on the other 2 tables 5 | -- for null values, replace values by 0 6 | -- for active drivers, we need runi=ning max value for subsequent months 7 | 8 | with recursive cte as( 9 | select 1 as 'month' 10 | union 11 | select month + 1 12 | from cte 13 | where month <= 11), 14 | 15 | activeDrivers_2020 as 16 | (select month(join_date) as month, count(driver_id) over(order by join_date) as driver_cnt 17 | from Drivers 18 | where year(join_date) <= '2020'), 19 | 20 | acceptedRide_2020 as 21 | (select month(requested_at) as month, count(ride_id) as ride_cnt 22 | from rides 23 | where ride_id in (select ride_id from AcceptedRides) 24 | and year(requested_at) = '2020' 25 | group by 1) 26 | 27 | select distinct c.month, 28 | ifnull(max(ad.driver_cnt) over(order by c.month), 0) as active_drivers, 29 | ifnull(ar.ride_cnt, 0) as accepted_rides 30 | from cte c 31 | left join activeDrivers_2020 ad 32 | on c.month = ad.month 33 | left join acceptedRide_2020 ar 34 | on c.month = ar.month 35 | 36 | -- uber- 1 37 | -------------------------------------------------------------------------------- /Hard/01767-find-the-subtasks-that-did-not-execute.sql: -------------------------------------------------------------------------------- 1 | -- using recursive CTE 2 | -- calculate subtask_count starting from given number, subtracting 1 till it reaches 1 3 | -- so subtasks count shouldn't be less than 2, because 2-1 = 1, and 1-1 will become 0 4 | -- use recursive cte to get all subtaks 5 | -- in the final query, pull all rows from cte, except the rows in Executed table 6 | 7 | with recursive cte as 8 | (select task_id, subtasks_count 9 | from Tasks 10 | 11 | union all 12 | 13 | select task_id, subtasks_count-1 14 | from cte 15 | where subtasks_count > 1) 16 | 17 | select task_id, subtasks_count as subtask_id 18 | from cte 19 | where (task_id, subtasks_count) not in (select * 20 | from Executed) 21 | 22 | -- google- 1 23 | -------------------------------------------------------------------------------- /Hard/01972-first-and-last-call-on-the-same-day.sql: -------------------------------------------------------------------------------- 1 | -- the question asks to find users whose first and last call of the day were with same person 2 | -- union caller and recipient ids to get all in same col 3 | -- for each user, we want first and last calls for each day, so use first_value() of recipient_id for each user and day(partition) order by call time 4 | -- get first value asc and desc to get first_call and last_call values 5 | -- in the final query, o/p those distinct users whose first_call and last_call values are same 6 | 7 | with all_calls as 8 | (select caller_id as user1, recipient_id as user2, call_time 9 | from Calls 10 | union all 11 | select recipient_id, caller_id, call_time 12 | from Calls), 13 | 14 | first_last_calls as 15 | (select distinct user1, 16 | first_value(user2) over(partition by user1, date(call_time) order by call_time asc) as first_call, 17 | first_value(user2) over(partition by user1, date(call_time) order by call_time desc) as last_call 18 | from all_calls) 19 | 20 | select distinct user1 as user_id 21 | from first_last_calls 22 | where first_call = last_call 23 | 24 | -- amazon- 5 25 | -------------------------------------------------------------------------------- /Hard/02004-the-number-of-seniors-and-juniors-to-join-the-company.sql: -------------------------------------------------------------------------------- 1 | -- first get running total of salary partition by experience 2 | -- get running total of senior people where running total <= 70k- this will get list of all senior that can be hired 3 | -- get running total of junior people where running total <= 70k - senior_total because only the remiaing amount is the budget for junior people 4 | -- union count of both junior and senior employee ids 5 | 6 | with cte as 7 | (select *, sum(salary) over(partition by experience order by salary) as running_salary 8 | from Candidates), 9 | 10 | senior_cte as 11 | (select * 12 | from cte 13 | where experience = 'Senior' 14 | and running_salary <= 70000) 15 | 16 | select 'Senior' as experience, count(distinct employee_id) as accepted_candidates 17 | from senior_cte 18 | union 19 | select 'Junior' as experience, count(distinct employee_id) as accepted_candidates 20 | from cte 21 | where experience = 'Junior' 22 | and running_salary <= 70000 - (select coalesce(max(running_salary), 0) 23 | from senior_cte) 24 | 25 | -- apple- 2 26 | -- wayfair- 2 27 | -------------------------------------------------------------------------------- /Hard/02010-the-number-of-seniors-and-juniors-to-join-the-company-ii.sql: -------------------------------------------------------------------------------- 1 | -- first get running total of salary partition by experience 2 | -- get running total of senior people where running total <= 70k- this will get list of all senior that can be hired 3 | -- get running total of junior people where running total <= 70k - senior_total because only the remiaing amount is the budget for junior people 4 | -- union both junior and senior employee ids 5 | 6 | 7 | with total_sal as 8 | (select *, sum(salary) over(partition by experience order by salary) as running_salary 9 | from Candidates), 10 | 11 | senior_cte as 12 | (select * 13 | from total_sal 14 | where experience = 'Senior' 15 | and running_salary <= 70000) 16 | 17 | select employee_id 18 | from senior_cte 19 | union 20 | select employee_id 21 | from total_sal 22 | where experience = 'Junior' 23 | and running_salary <= 70000 - (select coalesce(max(running_salary), 0) 24 | from senior_cte) 25 | 26 | -- no companies listed 27 | -------------------------------------------------------------------------------- /Hard/02118-build-the-equation.sql: -------------------------------------------------------------------------------- 1 | -- first, for each row, make the term by concating factor and power with necessary conditions 2 | -- then do a group_concat- this function concats rows, order by power desc 3 | -- we need to mention a separator ''- else by default it will be a comma (,)- | +1X^2,-4X,+2=0 | 4 | -- separator is a delimeter which will be in o/p separating the rows 5 | 6 | select 7 | concat(group_concat(term order by power desc separator ''), '=0') as equation 8 | from 9 | (select 10 | concat( 11 | case when factor > 0 then '+' else '' end, 12 | factor, 13 | case when power = 0 then '' else 'X' end, 14 | case when power = 0 or power = 1 then '' else '^' end, 15 | case when power = 0 or power = 1 then '' else power end 16 | ) term, 17 | power 18 | from terms 19 | order by 2 desc) t 20 | 21 | -- no companies listed 22 | -------------------------------------------------------------------------------- /Hard/02173-longest-winning-streak.sql: -------------------------------------------------------------------------------- 1 | -- first, do a rank on all roecords for user, and a rank on each result for each user 2 | -- for each result, there will be a streak(count of records)- the differnce between the two ranks will be same for each streak- we'll filter out only streaks where type = Win 3 | -- in the final result, pull the max streak for each player in Matches table. If that player is not in cte, then 0 4 | 5 | with ranked as 6 | (select *, 7 | row_number() over(partition by player_id order by match_day) as all_rnk, 8 | row_number() over(partition by player_id, result order by match_day) as result_rnk 9 | from Matches), 10 | 11 | win_streak_player as 12 | (select player_id, count(*) as win_streak 13 | from ranked 14 | where result = 'Win' 15 | group by 1, all_rnk - result_rnk) 16 | 17 | select m.player_id, ifnull(max(win_streak), 0) as longest_streak 18 | from Matches m 19 | left join win_streak_player w 20 | using(player_id) 21 | group by 1 22 | 23 | -- amazon- 1 24 | -------------------------------------------------------------------------------- /Hard/02199-finding-the-topic-of-each-post.sql: -------------------------------------------------------------------------------- 1 | -- left join because we want to find topic for all posts 2 | -- We attach a topic to a post if a K.word from that topic exists in P.content. The match can be case insensitive, so we can check for LOWER(K.word) in LOWER(P.content). 3 | -- In MySQL, we do this with LIKE and by wrapping the keyword in % with CONCAT 4 | -- However, we don't want to match the keyword if it exists in a substring of a word in P.content 5 | -- (e.g. 'war' can match with 'i went to war', but should not match with 'there was a warning'). So we need to wrap the keyword in spaces 6 | -- In case the keyword exists at the start or end of the content, we need to wrap the content in spaces as well. Again, we use CONCAT 7 | -- You'll see that for each keyword in the content of a post, we have a separate row for that post. So we need to group by P.post_id and concatenate the K.topic_ids somehow. 8 | -- We can use GROUP_CONCAT around K.topic_id, which has a comma as a default separator. 9 | -- What if we have two keywords from the same topic? Then that K.topic_id would repeat itself. So we need to select DISTINCT K.topic_id 10 | -- We are also required to order by K.topic_id before we concatenate. 11 | -- What if we have no K.topic_ids? We should label these as "Ambiguous!". We can do this when the column value is null with IFNULL. 12 | 13 | select p.post_id, ifnull(group_concat(distinct k.topic_id order by topic_id), 'Ambiguous!') as topic 14 | from Posts p 15 | left join Keywords k 16 | on concat(' ', lower(p.content), ' ') like concat('% ', lower(k.word), ' %') 17 | group by 1 18 | 19 | -- facebook- 1 20 | -------------------------------------------------------------------------------- /Hard/02701-consecutive-transactions-with-increasing-amounts.sql: -------------------------------------------------------------------------------- 1 | -- 1st cte- join Transactions with itself to get those records where datediff = 1 and amount is increasing. 2 | -- here, last row will be not there from T1 for each customer becuase it will be from T2 due to datediff, but we are outputting from T1 3 | -- 2nd cte- create row number 4 | -- 3rd cte- subtract rn from t1 date as diff. This will be used for grouping 5 | -- 4th cte- out of the list of the T1 dates, pick min date to be start_date, and count how many rows are there for each group- group by customer_id and above 6 | -- finally, for end_date, add count to start_date- this will cover the case where last date was missing 7 | 8 | with possible_trans as 9 | (select t1.customer_id as T1customer_id, t1.transaction_date as T1_date, t2.customer_id as T2customer_id, t2.transaction_date as T2_date 10 | from Transactions t1 join Transactions t2 11 | on t1.customer_id = t2.customer_id 12 | and datediff(t2.transaction_date, t1.transaction_date) = 1 13 | and t2.amount > t1.amount), 14 | 15 | ordering as 16 | (select T1customer_id as customer_id, T1_date as transaction_date, 17 | row_number() over(partition by T1customer_id order by T1_date) as rn 18 | from possible_trans 19 | order by T1customer_id, T1_date), 20 | 21 | subtracting_rn as 22 | (select *, date_sub(transaction_date, interval rn day) as base_date 23 | from ordering), 24 | 25 | count_rows as 26 | (select customer_id, min(transaction_date) as consecutive_start, count(*) as c 27 | from subtracting_rn 28 | group by customer_id, base_date) 29 | 30 | select customer_id, consecutive_start, date_add(consecutive_start, interval c day) as consecutive_end 31 | from count_rows 32 | where c >= 2 33 | 34 | -- no companies listed 35 | -------------------------------------------------------------------------------- /Medium/00176-second-highest-salary.sql: -------------------------------------------------------------------------------- 1 | 2 | -- create a temp table to get max salary 3 | -- pick the max salary from Employee table which is not in temp table 4 | 5 | select max(salary) as SecondHighestSalary 6 | from Employee 7 | where salary not in 8 | (select max(salary) 9 | from Employee) 10 | 11 | ----------------------------------------------------------------------------------------------------------- 12 | 13 | -- write a query to get offset 1 salary(skip first row) 14 | -- but this won't give 'null' as answer 15 | -- so write a simple select, and select that 16 | 17 | select( 18 | select distinct salary SecondHighestSalary 19 | from Employee 20 | order by salary desc 21 | limit 1 offset 1 22 | ) as SecondHighestSalary 23 | 24 | 25 | -- amaazon- 2 26 | -- adobe- 2 27 | -- microsoft- 2 28 | -- accencture- 2 29 | -- google- 4 30 | -- oracle- 3 31 | -- tcs- 3 32 | -- infosys- 2 33 | -- yahoo- 2 34 | -- apple- 2 35 | -- mckinsey- 2 36 | -- amdocs- 2 37 | -------------------------------------------------------------------------------- /Medium/00177-nth-highest-salary.sql: -------------------------------------------------------------------------------- 1 | -- my code starts with 'with cte...' 2 | -- ranked salary desc, got salary with rnk = n 3 | 4 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 5 | BEGIN 6 | RETURN ( 7 | with cte as(select distinct salary, dense_rank() over(order by salary desc) as rnk 8 | from Employee) 9 | 10 | select distinct salary 11 | from cte 12 | where rnk = n 13 | 14 | ); 15 | END 16 | 17 | ------------------------------------------------------------------------------------------------------------------------------------------------------------ 18 | -- SET N = N-1 after BEGIN 19 | -- offset n 20 | -- offset means skip that many rows from top, eg: if 3rd highest salary is needed, skip 2 rows, so 3-1 = 2 rows 21 | -- offset n-1 throws error 22 | 23 | CREATE FUNCTION getNthHighestSalary(N INT) RETURNS INT 24 | BEGIN 25 | SET N = N-1; 26 | RETURN ( 27 | 28 | select distinct salary 29 | from Employee 30 | order by salary desc 31 | limit 1 offset n 32 | 33 | ); 34 | END 35 | 36 | 37 | -- adobe- 2 38 | -- amazon- 3 39 | -- bloomberg- 3 40 | -- microsoft- 2 41 | -------------------------------------------------------------------------------- /Medium/00178-rank-scores.sql: -------------------------------------------------------------------------------- 1 | -- use dense_rank() 2 | 3 | select score, dense_rank() over(order by score desc) as 'rank' 4 | from Scores 5 | order by score desc 6 | 7 | -- adobe- 2 8 | -- google- 2 9 | -- amazon- 4 10 | -- apple- 2 11 | -- uber- 2 12 | -------------------------------------------------------------------------------- /Medium/00180-consecutive-numbers.sql: -------------------------------------------------------------------------------- 1 | -- when comparing rows, use LEAD()/LAG() 2 | -- using lag by 1, lag by 2 and comparing num, lg1 and lg2 3 | -- can also use 2 leads 4 | 5 | select distinct num as ConsecutiveNums 6 | from 7 | ( 8 | select *, lag(num, 1) over(order by id) lag_num1, lag(num, 2) over(order by id) lag_num2 9 | from Logs 10 | ) temp1 11 | where num = lag_num1 and lag_num1 = lag_num2 12 | 13 | ----------------------------------------------------------------------------------------------------------------- 14 | 15 | -- using lag by 1, lead by 1 and comparing num, lg, ld 16 | 17 | select distinct num as ConsecutiveNums 18 | from 19 | ( 20 | select *, lag(num, 1) over(order by id) lg, lead(num, 1) over(order by id) ld 21 | from Logs 22 | ) temp1 23 | where num = lg and lg = ld 24 | 25 | 26 | 27 | -- adobe- 2 28 | -- amazon- 2 29 | -- facebook- 2 30 | -- apple- 2 31 | -- uber- 2 32 | 33 | -------------------------------------------------------------------------------- /Medium/00184-department-highest-salary.sql: -------------------------------------------------------------------------------- 1 | -- using subquery 2 | 3 | select d.name as Department, e.name as Employee, e.salary 4 | from Department d join Employee e 5 | on d.id = e.departmentId 6 | where (e.departmentId, e.salary) in 7 | (select departmentId, max(salary) 8 | from Employee 9 | group by departmentId) 10 | 11 | ----------------------------------------------------------------------------------------------------------------------------------------------------------- 12 | 13 | -- using window function 14 | 15 | with CTE as 16 | (select departmentId, id, name, salary, 17 | dense_rank() over(partition by departmentId order by salary desc) as rnk 18 | from Employee) 19 | 20 | select d.name as Department, e.name as Employee, e.salary 21 | from Department d join CTE e 22 | on d.id = e.departmentId 23 | where e.rnk = 1 24 | 25 | 26 | -- amazon- 2 27 | -- microsoft- 3 28 | -- apple- 2 29 | -- facebook- 2 30 | -- google- 2 31 | -------------------------------------------------------------------------------- /Medium/00534-game-play-analysis-iii.sql: -------------------------------------------------------------------------------- 1 | -- running total of games played 2 | -- sum() over() 3 | 4 | select player_id, event_date, 5 | sum(games_played) over(partition by player_id order by event_date) as games_played_so_far 6 | from Activity 7 | 8 | -- gsn games 9 | -------------------------------------------------------------------------------- /Medium/00550-game-play-analysis-iv.sql: -------------------------------------------------------------------------------- 1 | -- first, create a temp table which has player_id, and date of 1 day after min login date for all players(group by) 2 | -- write a SQL- count total players from Activity where their id and dates are in temp table(date after 1st login) 3 | -- this is done to check if a player logged in after 1st login 4 | -- if they match in temp table, count them 5 | -- then divide by total number of players 6 | 7 | select 8 | round(count(player_id) / (select count(distinct player_id) from Activity), 2) as fraction 9 | from Activity 10 | where (player_id, event_date) in 11 | (select player_id, adddate(min(event_date), interval 1 day) 12 | from Activity 13 | group by player_id) 14 | 15 | 16 | -- adobe- 2 17 | -- gsn games- 1 18 | -------------------------------------------------------------------------------- /Medium/00570-managers-with-at-least-5-direct-reports.sql: -------------------------------------------------------------------------------- 1 | -- make 2 tables: manager and employee- manager table's ID = employee table's managerId 2 | -- use having clause for count filter 3 | -- use group by clause to group by count() 4 | 5 | select m.name 6 | from Employee m 7 | left join Employee e 8 | on m.id = e.managerId 9 | group by e.managerId 10 | having count(e.managerId) >= 5 11 | 12 | -- amazon- 5 13 | -- apple- 2 14 | -- bloomberg- 2 15 | -------------------------------------------------------------------------------- /Medium/00574-winning-candidate.sql: -------------------------------------------------------------------------------- 1 | -- we need to count the candidate who got highest votes 2 | -- for table Vote, id is VoteID, and that candidaeId got that VoteID 3 | -- this means we just need to calculate count of candidateIds in Vote table 4 | 5 | select c.name 6 | from Candidate c 7 | join Vote v 8 | on c.id = v.candidateId 9 | group by candidateId 10 | order by count(candidateId) desc 11 | limit 1 12 | 13 | -- no companies listed 14 | -------------------------------------------------------------------------------- /Medium/00578-get-highest-answer-rate-question.sql: -------------------------------------------------------------------------------- 1 | -- can also use case statements 2 | -- sum(x = 'abc') is a way to include a row in the sum calculation as 1 3 | 4 | select question_id as survey_log 5 | from SurveyLog 6 | group by question_id 7 | order by round(sum(action = 'answer')/sum(action = 'show'), 2) desc, 1 asc 8 | limit 1 9 | 10 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- 11 | -- longer version of the above 12 | -- 2 different ctes has 2 different variables which are used for calculation 13 | -- in the final query, perform calculation and sort 14 | with t1 as 15 | (select question_id, sum(case when action = 'answer' then 1 else 0 end) as ans_cnt 16 | from SurveyLog 17 | group by question_id), 18 | 19 | t2 as 20 | (select question_id, sum(case when action = 'show' then 1 else 0 end) as show_cnt 21 | from SurveyLog 22 | group by question_id) 23 | 24 | select t1.question_id as survey_log 25 | from t1 26 | join t2 27 | order by t1.ans_cnt/t2.show_cnt desc, 1 28 | limit 1 29 | 30 | 31 | -- facebook- 1 32 | -------------------------------------------------------------------------------- /Medium/00580-count-student-number-in-departments.sql: -------------------------------------------------------------------------------- 1 | -- LEFT join both tables 2 | -- count() will give 0 not null 3 | 4 | select d.dept_name, count(s.student_id) as student_number 5 | from Department d 6 | left join Student s 7 | using(dept_id) 8 | group by d.dept_id 9 | order by 2 desc, 1 10 | 11 | -- twitter 12 | -------------------------------------------------------------------------------- /Medium/00585-investments-in-2016.sql: -------------------------------------------------------------------------------- 1 | -- GOOD QUESTION 2 | -- 2 conditions should be met: 1. city should be unique; 2. tiv_2015 should be duplicate 3 | -- calculate sum of those rows 4 | -- use WHERE to write both conditions 5 | 6 | select round(sum(tiv_2016), 2) as tiv_2016 7 | from Insurance 8 | where (lat, lon) in 9 | (select lat, lon 10 | from Insurance 11 | group by lat, lon 12 | having count(*) = 1 13 | ) 14 | and tiv_2015 in 15 | (select tiv_2015 16 | from Insurance 17 | group by tiv_2015 18 | having count(*) > 1 19 | ) 20 | 21 | -- twitter- 1 22 | -------------------------------------------------------------------------------- /Medium/00602-friend-requests-ii-who-has-the-most-friends.sql: -------------------------------------------------------------------------------- 1 | -- get sender and accepter in 1 column using UNION ALL 2 | -- count id, output the one with max counts 3 | 4 | select requester_id as id, count(*) as num 5 | from( 6 | select requester_id 7 | from RequestAccepted 8 | union all 9 | select accepter_id 10 | from RequestAccepted 11 | ) temp1 12 | group by 1 13 | order by 2 desc 14 | limit 1 15 | 16 | -- amazon- 2 17 | -- facebook- 1 18 | 19 | -------------------------------------------------------------------------------- /Medium/00608-tree-node.sql: -------------------------------------------------------------------------------- 1 | -- 3 cases- use CASE WHEN 2 | 3 | select id, 4 | (case when p_id is null then 'Root' 5 | when id in (select p_id from Tree) then 'Inner' 6 | else 'Leaf' end) as type 7 | from Tree 8 | 9 | 10 | -- twitter- 1 11 | -------------------------------------------------------------------------------- /Medium/00612-shortest-distance-in-a-plane.sql: -------------------------------------------------------------------------------- 1 | -- multiple ways to do it- this gave best time complexity 2 | -- self join both Points tables where points are not equal 3 | -- pull the min answer, then sqrt and round it 4 | 5 | select round(sqrt(min(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2))), 2) as shortest 6 | from Point2D p1 7 | join Point2D p2 8 | where (p1.x, p1.y) != (p2.x, p2.y) 9 | 10 | -- same as above 11 | -- self join both Points tables where points are not equal 12 | -- order by and limit to 1- MIN() NOT USED 13 | select round(sqrt(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2)), 2) as shortest 14 | from Point2D p1 15 | join Point2D p2 16 | where (p1.x, p1.y) != (p2.x, p2.y) 17 | order by 1 18 | limit 1 19 | 20 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | 22 | -- popular solution 23 | -- give roww numbers, join where 1st rn > 2nd rn 24 | -- pull min answer 25 | with cte as 26 | (select *, row_number() over(order by x asc) as rn 27 | from Point2D) 28 | 29 | select round(sqrt(min(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2))), 2) as shortest 30 | from cte p1 31 | join cte p2 on p1.rn > p2.rn 32 | 33 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 34 | 35 | -- cross join everything 36 | -- exclude records with answer = 0 with WHERE 37 | -- order by and limit 1 38 | select round(sqrt(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2)), 2) as shortest 39 | from Point2D p1 40 | cross join Point2D p2 41 | where round(sqrt(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2)), 2) != 0 42 | order by 1 asc 43 | limit 1 44 | 45 | -- cross join everything 46 | -- exclude records with answer = 0 with HAVING 47 | -- order by and limit 1 48 | select round(sqrt(power(p1.x - p2.x, 2) + power(p1.y - p2.y, 2)), 2) as shortest 49 | from Point2D p1 50 | join Point2D p2 51 | having shortest != 0 52 | order by 1 asc 53 | limit 1 54 | 55 | -- no companies listed 56 | -------------------------------------------------------------------------------- /Medium/00614-second-degree-follower.sql: -------------------------------------------------------------------------------- 1 | -- select followee that is in both colunms 2 | -- count followers for that followee 3 | 4 | select distinct f1.followee as follower, count(distinct f1.follower) as num 5 | from Follow f1 6 | join Follow f2 7 | on f1.followee = f2.follower 8 | group by 1 9 | order by 1 10 | 11 | -- no companies listed 12 | -------------------------------------------------------------------------------- /Medium/00626-exchange-seats.sql: -------------------------------------------------------------------------------- 1 | 2 | -- very obvious solution 3 | -- when even then id - 1, when odd then id + 1, when max(id) = odd then id 4 | 5 | select 6 | (case when id % 2 = 0 then id - 1 7 | when id = (select count(*) from Seat) then id 8 | else id + 1 9 | end) as id, student 10 | from Seat 11 | order by id 12 | 13 | --------------------------------------------------------------------------------------------------------------------- 14 | 15 | -- creative approach 16 | -- take one column lead and one column lag. Lead will go up 1 row, but last row can be odd, so it cannot be null. hence keep it as it as 17 | -- take second column lag. Lag will go down 1 row, but we don't care about first row to be null 18 | -- when id is odd, take lead 19 | -- when id is even, take lag(hence we didn't care about first null row) 20 | 21 | with CTE as 22 | (select id, student, lead(student, 1, student) over() as next_student, 23 | lag(student, 1) over() as prev_student 24 | from Seat) 25 | 26 | select id, 27 | (case when id % 2 = 1 then next_student 28 | else prev_student 29 | end) as student 30 | from CTE 31 | 32 | --------------------------------------------------------------------------------------------------------------------- 33 | -- same as above but concised 34 | 35 | select id, 36 | (case when id % 2 = 1 then lead(student, 1, student) over() 37 | else lag(student) over() 38 | end) as student 39 | from Seat 40 | order by id 41 | 42 | ------------------------------------------------------------------------------------------------------------------- 43 | -- same as 2nd, just swapped ids instead of students 44 | 45 | with CTE as( 46 | select student, id, lead(id, 1, id) over() as next_id, lag(id) over() as prev_id 47 | from Seat 48 | ) 49 | 50 | select (case when id % 2 = 1 then next_id else prev_id end) as id, student 51 | from CTE 52 | order by id 53 | 54 | 55 | -- amazon- 4 56 | -- bloomberg- 2 57 | -------------------------------------------------------------------------------- /Medium/01045-customers-who-bought-all-products.sql: -------------------------------------------------------------------------------- 1 | -- find total number of products from product table 2 | -- count products for each customer and match with total products 3 | -- we need to count distinct products because Customer table may have duplicate rows 4 | 5 | select customer_id 6 | from Customer c 7 | group by customer_id 8 | having count(distinct c.product_key) = 9 | ( 10 | select count(p.product_key) 11 | from Product p 12 | ) 13 | 14 | 15 | -- amazon- 2 16 | -- adobe- 2 17 | -------------------------------------------------------------------------------- /Medium/01070-product-sales-analysis-iii.sql: -------------------------------------------------------------------------------- 1 | -- write a query to get first year 2 | -- then use that query to get detail of products in the first year 3 | 4 | 5 | select product_id, year as first_year, quantity, price 6 | from Sales 7 | where (product_id, year) in 8 | (select product_id, min(year) 9 | from Sales 10 | group by 1) 11 | 12 | 13 | -- amazon- 1 14 | -------------------------------------------------------------------------------- /Medium/01077-project-employees-iii.sql: -------------------------------------------------------------------------------- 1 | -- use dense_rank(), partition by project, order by years desc 2 | 3 | with CTE as 4 | (select p.project_id, p.employee_id, e.experience_years, 5 | dense_rank() over(partition by p.project_id order by e.experience_years desc) as rnk 6 | from Project p 7 | left join Employee e 8 | on p.employee_id = e.employee_id) 9 | 10 | select project_id, employee_id 11 | from CTE 12 | where rnk = 1 13 | 14 | -- facebook- 1 15 | -------------------------------------------------------------------------------- /Medium/01098-unpopular-books.sql: -------------------------------------------------------------------------------- 1 | -- cte- find quantity sold for all books withing 1 year 2 | -- join with all books table to get book name, 3 | -- include books with quanitity < 10 or null, 4 | -- exclude books with available date falls anywhere in last month 5 | 6 | with cte as 7 | (select book_id, sum(quantity) as sold_quantity 8 | from Orders 9 | where dispatch_date between '2018-06-23' and '2019-06-23' 10 | group by book_id) 11 | 12 | select b.book_id, b.name 13 | from Books b 14 | left join 15 | cte c 16 | using(book_id) 17 | where (sold_quantity < 10 or sold_quantity is null) 18 | and (book_id not in (select book_id 19 | from Books 20 | where available_from between '2019-05-23' and '2019-06-23')) 21 | 22 | ---------------------------------------------------------------------------------------------------------------------------------------------- 23 | -- first select all books and filter book which we are not even considering- available date within a month 24 | -- then find the books which has quanity > 10 in given date range- WE DO NOT WANT THESE 25 | -- EXCLUDE the books which meet the above condition 26 | 27 | select book_id, name 28 | from Books b 29 | where available_from < '2019-05-23' 30 | and book_id not in 31 | (select book_id 32 | from Orders 33 | where dispatch_date between '2018-06-23' and '2019-06-23' 34 | group by book_id 35 | having sum(quantity) >= 10) 36 | 37 | -- no companies listed 38 | -------------------------------------------------------------------------------- /Medium/01107-new-users-daily-count.sql: -------------------------------------------------------------------------------- 1 | -- in cte, group by user_id to find first_login date 2 | -- then count and group by date and filter users whose first login date is in the given interval 3 | 4 | with first_login as 5 | (select user_id, min(activity_date) as login_date 6 | from Traffic 7 | where activity = 'login' 8 | group by user_id) 9 | 10 | select login_date, count(distinct user_id) as user_count 11 | from first_login 12 | where login_date between date_sub('2019-06-30', interval 90 day) and '2019-06-30' 13 | group by login_date 14 | 15 | -- Linkedin- 1 16 | -------------------------------------------------------------------------------- /Medium/01112-highest-grade-for-each-student.sql: -------------------------------------------------------------------------------- 1 | -- use RANK() and pull results where rank = 1 2 | 3 | select student_id, course_id, grade 4 | from 5 | (select student_id, course_id, grade, dense_rank() over(partition by student_id order by grade desc, course_id asc) as rnk 6 | from Enrollments) temp1 7 | where rnk = 1 8 | order by 1 9 | 10 | -------------------------------------------------------------------------------------------------------------------------------------------------------------- 11 | -- nested 12 | -- first get id and highest grade, then get min course_id 13 | 14 | select student_id, min(course_id) as course_id, grade 15 | from Enrollments 16 | where (student_id, grade) in 17 | (select student_id, max(grade) as grade 18 | from Enrollments 19 | group by student_id) 20 | group by student_id 21 | order by student_id 22 | 23 | -- amazon- 2 24 | -- coursera- 1 25 | -------------------------------------------------------------------------------- /Medium/01126-active-businesses.sql: -------------------------------------------------------------------------------- 1 | -- 1st cte- find avg for each event 2 | -- 2nd cte- filter on business id whose event counts > global avg using WHERE 3 | -- final query- filter such business using HAVING COUNT > 1 4 | 5 | with avg_occurrences as 6 | (select event_type, avg(occurrences) as avg_occ 7 | from Events 8 | group by event_type), 9 | 10 | occ_more_than_avg as 11 | (select e.business_id, e.event_type, e.occurrences 12 | from Events e 13 | left join avg_occurrences a 14 | using(event_type) 15 | where e.occurrences > a.avg_occ) 16 | 17 | select business_id 18 | from occ_more_than_avg 19 | group by 1 20 | having count(business_id) > 1 21 | 22 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 23 | -- much concise 24 | -- do avg() over() to get avg if that event in a separate column in that table 25 | -- pull business_id from that table using a WHERE filter 26 | -- use HAVING to filter on count of such business > 1 27 | 28 | select business_id 29 | from 30 | (select *, avg(occurrences) over(partition by event_type) as avg_occ 31 | from Events) as global_avg 32 | where occurrences > avg_occ 33 | group by business_id 34 | having count(business_id) > 1 35 | 36 | 37 | -- yelp 38 | -------------------------------------------------------------------------------- /Medium/01132-reported-posts-ii.sql: -------------------------------------------------------------------------------- 1 | -- create a flag wjich has values in removal table 2 | -- sum(flag) = total removed 3 | -- count(post_id) = total reported 4 | -- divide these 2, group by date 5 | -- in the final query, avg all the rows 6 | 7 | with flagging as 8 | (select distinct a.post_id, a.action_date, 9 | (case when r.post_id is not null then 1 else 0 end) as flag 10 | from Actions a 11 | left join Removals r 12 | using(post_id) 13 | where extra = 'spam'), 14 | 15 | daily_percent_date as 16 | (select action_date, sum(flag)/count(distinct post_id)*100 as daily_percent 17 | from flagging 18 | group by 1) 19 | 20 | select round(avg(daily_percent), 2) as average_daily_percent 21 | from daily_percent_date 22 | 23 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 24 | -- same as above but concise 25 | 26 | with daily_percent_date as 27 | (select distinct a.post_id, a.action_date, 28 | count(distinct r.post_id)/count(distinct a.post_id)*100 as daily_percent 29 | from Actions a 30 | left join Removals r 31 | using(post_id) 32 | where extra = 'spam' 33 | group by a.action_date) 34 | 35 | select round(avg(daily_percent), 2) as average_daily_percent 36 | from daily_percent_date 37 | 38 | -- facebook- 1 39 | -------------------------------------------------------------------------------- /Medium/01149-article-views-ii.sql: -------------------------------------------------------------------------------- 1 | -- selected date, viewer_id, count of distinct articles on that date 2 | -- select the o/p with cnt > 1 3 | -- select those ids as main o/p 4 | 5 | select distinct viewer_id as id 6 | from (select view_date, viewer_id, count(distinct article_id) as cnt 7 | from Views 8 | group by 1,2 9 | having cnt > 1) temp 10 | order by id 11 | 12 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 13 | -- concise version of the above 14 | 15 | select distinct viewer_id as id 16 | from Views 17 | group by viewer_id, view_date 18 | having count(distinct article_id) > 1 19 | order by 1 20 | 21 | 22 | -- LinkedIn- 1 23 | -------------------------------------------------------------------------------- /Medium/01158-market-analysis-i.sql: -------------------------------------------------------------------------------- 1 | -- using left join 2 | -- we are adding the condition in 'on' clause to get 0 as answer 3 | -- if this condition is used in 'where' caluse, the rows with 0 wouldn't be o/p as where filters rows 4 | 5 | select u.user_id as buyer_id, u.join_date, count(o.order_id) as orders_in_2019 6 | from Users u 7 | left join Orders o 8 | on u.user_id = o.buyer_id and o.order_date like '2019%' 9 | group by 1 10 | 11 | -- poshmark- 1 12 | -------------------------------------------------------------------------------- /Medium/01164-product-price-at-a-given-date.sql: -------------------------------------------------------------------------------- 1 | -- GOOD QUESTION 2 | -- create ranks with filtered data- filter out data with date > 2019-08-16 3 | -- now only those will be ranked with data <= 2019-08-16, rnk will desc from change_date 4 | -- now 2 cases will be covered, change price on 2019-08-16 and before 2019-08-16 5 | -- if first change is after 2019-08-16, then write the condition min(change_date) > 2019-08-16 6 | -- use UNOIN for different cases 7 | 8 | with CTE as ( 9 | select product_id, new_price as price, change_date, dense_rank() over(partition by product_id order by change_date desc) as rnk 10 | from Products 11 | where change_date <= '2019-08-16' 12 | ) 13 | 14 | select product_id, price 15 | from CTE 16 | where rnk = 1 17 | 18 | union 19 | 20 | select product_id, 10 21 | from Products 22 | group by 1 23 | having min(change_date) > '2019-08-16' 24 | 25 | -------------------------------------------------------------------------------------------------------------------------------- 26 | 27 | -- same but without using rank 28 | 29 | select product_id, new_price as price 30 | from Products 31 | where (product_id, change_date) in 32 | ( 33 | select product_id, max(change_date) as change_date 34 | from Products 35 | where change_date <= '2019-08-16' 36 | group by product_id 37 | ) 38 | 39 | union 40 | 41 | select product_id, 10 as price 42 | from Products 43 | group by 1 44 | having min(change_date) > '2019-08-16' 45 | 46 | 47 | -- google- 2 48 | -- amazon- 4 49 | -------------------------------------------------------------------------------- /Medium/01174-immediate-food-delivery-ii.sql: -------------------------------------------------------------------------------- 1 | -- make a temporary table that has only customer id and their min date 2 | -- write basic aggregate functions but put a WHERE 3 | -- WHERE should pull a tuple from temp table 4 | 5 | 6 | select round(sum(case when order_date = customer_pref_delivery_date then 1 else 0 end)*100 / count(customer_id), 2) as immediate_percentage 7 | from Delivery 8 | where (customer_id, order_date) in 9 | ( 10 | select customer_id, min(order_date) as first_order 11 | from Delivery 12 | group by customer_id 13 | ) 14 | 15 | 16 | -- doordash- 2 17 | -------------------------------------------------------------------------------- /Medium/01193-monthly-transactions-i.sql: -------------------------------------------------------------------------------- 1 | -- date_marmat to extract only year and month 2 | -- simple aggregate functions, case statement 3 | 4 | 5 | select date_format(trans_date, '%Y-%m') as month, country, 6 | count(*) as trans_count, 7 | sum(case when state = 'approved' then 1 else 0 end) as approved_count, 8 | sum(amount) as trans_total_amount, 9 | sum(case when state = 'approved' then amount else 0 end) as approved_total_amount 10 | from Transactions 11 | group by 1, 2 12 | 13 | 14 | -- adobe- 2 15 | -- wayfair- 1 16 | -- wish- 1 17 | -------------------------------------------------------------------------------- /Medium/01204-last-person-to-fit-in-the-bus.sql: -------------------------------------------------------------------------------- 1 | -- use running total 2 | -- create a temp table which has running total of weights 3 | -- in the main query, pull the weight <= 1000, order by desc and limit 1 4 | 5 | select person_name 6 | from 7 | ( 8 | select *, sum(weight) over(order by turn) as total_weight 9 | from Queue 10 | ) temp 11 | where total_weight <= 1000 12 | order by turn desc 13 | limit 1 14 | 15 | 16 | -- amazon- 2 17 | -- wayfair- 2 18 | -------------------------------------------------------------------------------- /Medium/01205-monthly-tractions-ii.sql: -------------------------------------------------------------------------------- 1 | -- there are 2 cases- approved and chargeback, so create a union for both case in cte 2 | -- use aggregate with case and group by to get the result-- 3 | -- o/p of cte: 4 | 5 | -- | id | month | country | state | amount | 6 | -- | --- | ------- | ------- | ---------- | ------ | 7 | -- | 101 | 2019-05 | US | approved | 1000 | 8 | -- | 103 | 2019-06 | US | approved | 3000 | 9 | -- | 105 | 2019-06 | US | approved | 5000 | 10 | -- | 102 | 2019-05 | US | chargeback | 2000 | 11 | -- | 101 | 2019-06 | US | chargeback | 1000 | 12 | -- | 105 | 2019-09 | US | chargeback | 5000 | 13 | 14 | with cte as 15 | (select id, date_format(trans_date, '%Y-%m') as 'month', country, state, amount 16 | from Transactions t 17 | where state = 'approved' 18 | union 19 | select c.trans_id, date_format(c.trans_date, '%Y-%m') as 'month', t.country, 'chargeback' as state, t.amount 20 | from Chargebacks c 21 | left join Transactions t 22 | on c.trans_id = t.id) 23 | 24 | select month, country, 25 | sum(case when state = 'approved' then 1 else 0 end) as approved_count, 26 | sum(case when state = 'approved' then amount else 0 end) as approved_amount, 27 | sum(case when state = 'chargeback' then 1 else 0 end) as chargeback_count, 28 | sum(case when state = 'chargeback' then amount else 0 end) as chargeback_amount 29 | from cte 30 | group by 1, 2 31 | 32 | -- wish- 1 33 | -------------------------------------------------------------------------------- /Medium/01212-teams-scores-in-football-tournamant.sql: -------------------------------------------------------------------------------- 1 | -- union all- one for host(when that team was host)- count host score, 2nd for guest(when that team was guest)- count guest score 2 | -- calculate sum of points 3 | -- group by team_id 4 | 5 | select t.team_id, t.team_name, coalesce(sum(u.points), 0) as num_points 6 | from Teams t 7 | left join 8 | (select match_id, host_team as team_id, (case when host_goals > guest_goals then 3 9 | when host_goals = guest_goals then 1 10 | else 0 end) as points 11 | from Matches 12 | union all 13 | select match_id, guest_team as team_id, 14 | (case when host_goals < guest_goals then 3 15 | when host_goals = guest_goals then 1 16 | else 0 end) as points 17 | from Matches) u 18 | on u.team_id = t.team_id 19 | group by team_id 20 | order by 3 desc, 1 asc 21 | 22 | ------------------------------------------------------------------------------------------------------------------------- 23 | -- without using 'UNION ALL'- only used JOIN 24 | 25 | select t.team_id, t.team_name, coalesce( 26 | sum(case when t.team_id = m.host_team and m.host_goals > m.guest_goals then 3 27 | when t.team_id = m.guest_team and m.guest_goals > m.host_goals then 3 28 | when host_goals = guest_goals then 1 end), 0) as num_points 29 | from Teams t 30 | left join Matches m 31 | on m.host_team = t.team_id or m.guest_team = t.team_id 32 | group by team_id 33 | order by 3 desc, 1 asc 34 | 35 | 36 | -- wayfair- 1 37 | -------------------------------------------------------------------------------- /Medium/01264-page-recommendations.sql: -------------------------------------------------------------------------------- 1 | -- got user1=id = 1 friends in t0 and t1 2 | -- joined t1 with Likes on 1's friends 3 | -- where condition- page shouldn't be liked by 1 4 | 5 | with t0 as 6 | (select user1_id, user2_id 7 | from Friendship 8 | union 9 | select user2_id, user1_id 10 | from Friendship), 11 | t1 as 12 | (select user1_id, user2_id 13 | from t0 14 | where user1_id = 1) 15 | 16 | select distinct l.page_id as recommended_page 17 | from t1 join Likes l 18 | on t1.user2_id = l.user_id 19 | where page_id not in 20 | (select page_id 21 | from Likes 22 | where user_id = 1) 23 | 24 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 25 | -- same as above but 1 less CTE and added where condition 26 | 27 | with t1 as 28 | (select user1_id, user2_id 29 | from Friendship 30 | union 31 | select user2_id, user1_id 32 | from Friendship) 33 | 34 | select distinct l.page_id as recommended_page 35 | from t1 join Likes l 36 | on t1.user2_id = l.user_id 37 | where t1.user1_id = 1 38 | and page_id not in 39 | (select page_id 40 | from Likes 41 | where user_id = 1) 42 | 43 | 44 | -- facebook- 1 45 | -------------------------------------------------------------------------------- /Medium/01270-all-people-report-to-the-given-manager.sql: -------------------------------------------------------------------------------- 1 | -- innermost layer will give- 2, 77 because they directly report to 1 2 | -- second layer will give- 4 because he reports to 2 3 | -- outermost layer will give 7 because he reports to 4 4 | -- so first table- 7 5 | -- 2nd table- 4 6 | -- 3rd table- 2, 77 7 | 8 | select employee_id 9 | from Employees 10 | where manager_id in 11 | (select employee_id 12 | from Employees 13 | where manager_id in 14 | (select employee_id 15 | from Employees 16 | where manager_id = 1 and employee_id != 1)) 17 | union 18 | select employee_id 19 | from Employees 20 | where manager_id in 21 | (select employee_id 22 | from Employees 23 | where manager_id = 1 and employee_id != 1) 24 | union 25 | select employee_id 26 | from Employees 27 | where manager_id = 1 and employee_id != 1 28 | 29 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 30 | -- using join 31 | -- tier1 is immediate reportees of 1- 2, 77 32 | -- tier 2 has 1 layer between them and 1- tier1.employee_id = Employee.manager_id- 4 33 | -- tier3 has 2 layers between them and 1- tier2.emplpoyee_id = Employee.manager_id- 7 34 | 35 | with tier1 as 36 | (select e.employee_id 37 | from Employees e 38 | where manager_id = 1 and e.employee_id != 1), 39 | tier2 as 40 | (select e.employee_id 41 | from Employees e 42 | join tier1 t1 43 | on t1.employee_id = e.manager_id), 44 | tier3 as 45 | (select e.employee_id 46 | from Employees e 47 | join tier2 t2 48 | on t2.employee_id = e.manager_id) 49 | 50 | select employee_id 51 | from tier1 52 | union 53 | select employee_id 54 | from tier2 55 | union 56 | select employee_id 57 | from tier3 58 | 59 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 60 | -- using 2 joins 61 | 62 | select e1.employee_id 63 | from Employees e1 64 | join Employees e2 65 | on e1.manager_id = e2.employee_id 66 | join Employees e3 67 | on e2.manager_id = e3.employee_id 68 | where e3.manager_id = 1 and e1.employee_id != 1 69 | 70 | 71 | -- adobe- 2 72 | -- google- 1 73 | 74 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 75 | -- NOT A SOLUTION- ONLY EXPLANATION- JOINS 76 | 77 | select e1.employee_id as e1emp, e1.manager_id as e1man, e2.employee_id as e2emp, e2.manager_id as e2man, e3.employee_id as e3emp, e3.manager_id as e3man 78 | from Employees e1 79 | join Employees e2 80 | on e1.manager_id = e2.employee_id 81 | join Employees e3 82 | on e2.manager_id = e3.employee_id 83 | 84 | -- logic- o/p e1emp 85 | -- e1man = e2emp, e2man = e3emp, e3man = 1, e1emp != 1 86 | -- we are outputting employee != 1 and ultimate manager = 1 87 | 88 | | e1emp | e1man | e2emp | e2man | e3emp | e3man | 89 | | ----- | ----- | ----- | ----- | ----- | ----- | 90 | | 4 | 2 | 2 | 1 | 1 | 1 | 91 | | 1 | 1 | 1 | 1 | 1 | 1 | 92 | | 2 | 1 | 1 | 1 | 1 | 1 | 93 | | 77 | 1 | 1 | 1 | 1 | 1 | 94 | | 9 | 8 | 8 | 3 | 3 | 3 | 95 | | 3 | 3 | 3 | 3 | 3 | 3 | 96 | | 8 | 3 | 3 | 3 | 3 | 3 | 97 | | 7 | 4 | 4 | 2 | 2 | 1 | 98 | 99 | -------------------------------------------------------------------------------- /Medium/01285-find-the-start-and-end-number-of-continuous-ranges.sql: -------------------------------------------------------------------------------- 1 | -- row number- created just to look at what it looks like 2 | -- continuous ranges have same differences from row number 3 | -- so pick min as start, max as end and group by diff 4 | 5 | select min(log_id) as start_id, max(log_id) as end_id 6 | from 7 | (select log_id, 8 | row_number() over(order by log_id) as rn, 9 | log_id - row_number() over(order by log_id) as diff 10 | from Logs) temp 11 | group by diff 12 | 13 | 14 | -- microsoft- 1 15 | -------------------------------------------------------------------------------- /Medium/01308-running-total-for-different-genders.sql: -------------------------------------------------------------------------------- 1 | -- sum() over(partition by x) will give sum total for that gender, not running total 2 | -- sum() over(partition by x order by y) will give running total 3 | 4 | select gender, day, sum(score_points) over(partition by gender order by day) as total 5 | from Scores 6 | order by 1, 2 7 | 8 | -- no companies listed 9 | -------------------------------------------------------------------------------- /Medium/01321-restaurant-growth.sql: -------------------------------------------------------------------------------- 1 | -- use ROWS/ RANGE keywords to get a part of the window only 2 | -- first create a temp table which has running total of previous 6 rows and current row; and min date for all rows 3 | -- using that temp table, use division to get avg amount, filter where date >= min + 6 4 | -- more details on ROWS/RANGE in SQL notes 5 | 6 | select distinct visited_on, amount, round(amount/7, 2) as average_amount 7 | from 8 | (select visited_on, sum(amount) over(order by visited_on range between interval 6 day preceding and current row) as amount, 9 | min(visited_on) over() as first_day 10 | from Customer 11 | ) temp 12 | where visited_on >= first_day + 6 13 | order by 1 14 | 15 | ----------------------------------------------------------------------------------------------------------------------------------------------- 16 | -- using join 17 | -- in temp table, difference from min date should be 6 or greater for outputting those rows only 18 | -- in main table, join where difference is between 0 and 6, that is calculate sum of amounts where difference is between 0 and 6- WINDOW of 7 rows 19 | 20 | select t.visited_on, sum(c.amount) as amount, round(sum(c.amount)/7, 2) as average_amount 21 | from 22 | (select distinct visited_on 23 | from Customer 24 | where datediff(visited_on, (select min(visited_on) from Customer)) >= 6) t 25 | left join Customer c 26 | on datediff(t.visited_on, c.visited_on) between 0 and 6 27 | group by 1 28 | order by 1 29 | 30 | 31 | -- point72- 1 32 | -------------------------------------------------------------------------------- /Medium/01341-movie-rating.sql: -------------------------------------------------------------------------------- 1 | -- UNION ALL because user_name can same as movie_name. we need both in the answer 2 | -- just simple joins, then order by aggrgate functions 3 | 4 | (select u.name as results 5 | from MovieRating mr 6 | inner join Users u 7 | on mr.user_id = u.user_id 8 | group by mr.user_id 9 | order by count(mr.user_id) desc, u.name asc 10 | limit 1) 11 | 12 | union all 13 | 14 | (select m.title as results 15 | from MovieRating mr 16 | inner join Movies m 17 | on mr.movie_id = m.movie_id 18 | where created_at like '2020-02%' 19 | group by mr.movie_id 20 | order by avg(mr.rating) desc, m.title asc 21 | limit 1) 22 | 23 | 24 | -- SAP- 1 25 | -------------------------------------------------------------------------------- /Medium/01355-activity-participans.sql: -------------------------------------------------------------------------------- 1 | -- using RANK()- create rank columns on count- asc and desc 2 | -- select those records whose both ranks != 1 3 | 4 | with activity_count as 5 | (select activity, count(distinct id) as cnt_activity 6 | from Friends 7 | group by 1), 8 | 9 | ranked as 10 | (select activity, cnt_activity, 11 | dense_rank() over(order by cnt_activity) as asc_rnk, 12 | dense_rank() over(order by cnt_activity desc) as desc_rnk 13 | from activity_count) 14 | 15 | select activity 16 | from ranked where asc_rnk != 1 and desc_rnk != 1 17 | 18 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 19 | -- same as above but concise 20 | 21 | select activity from 22 | (select activity, 23 | dense_rank() over(order by count(activity)) as asc_rnk, 24 | dense_rank() over(order by count(activity) desc) as desc_rnk 25 | from Friends 26 | group by 1) as ranked 27 | where asc_rnk != 1 and desc_rnk != 1 28 | 29 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 30 | -- using AGGREGATE() 31 | -- do a max and min, choose records whose counts are not max and min 32 | 33 | with activity_count as 34 | (select activity, count(distinct id) as cnt_activity 35 | from Friends 36 | group by 1) 37 | 38 | select activity 39 | from activity_count 40 | where cnt_activity not in (select min(cnt_activity) from activity_count) 41 | and cnt_activity not in (select max(cnt_activity) from activity_count) 42 | 43 | -- ibm- 1 44 | -------------------------------------------------------------------------------- /Medium/01364-number-of-trusted-contacts-of-a-customer.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate for count of contacts 2 | -- for trusted contacts, pick only those which are present in Customer table using CASE 3 | 4 | select i.invoice_id, cm.customer_name, i.price, count(ct.contact_name) as contacts_cnt, 5 | sum(case when contact_name in (select customer_name 6 | from Customers) then 1 else 0 end) as trusted_contacts_cnt 7 | from Invoices i 8 | left join Customers cm 9 | on i.user_id = cm.customer_id 10 | left join Contacts ct 11 | on cm.customer_id = ct.user_id 12 | group by 1 13 | order by 1 14 | 15 | -- roblox- 1 16 | -------------------------------------------------------------------------------- /Medium/01393-capital-gain-loss.sql: -------------------------------------------------------------------------------- 1 | -- sell - buy = profit/loss 2 | 3 | select stock_name, sum(case when operation = 'Sell' then price end) - 4 | sum(case when operation = 'Buy' then price end) as capital_gain_loss 5 | from Stocks 6 | group by 1 7 | 8 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 9 | -- same as above but concise 10 | -- done a single aggregate using both cases 11 | 12 | select stock_name, sum(case when operation = 'Sell' then price else (price * -1) end) as capital_gain_loss 13 | from Stocks 14 | group by 1 15 | 16 | 17 | -- amazon- 2 18 | -- robinhood- 1 19 | -------------------------------------------------------------------------------- /Medium/01398-customers-who-bought-products-A-and-B-but-not-C.sql: -------------------------------------------------------------------------------- 1 | -- summing up all products, choosing those customers that only has A and B as > 0 and C = 0 2 | 3 | select o.customer_id, c.customer_name 4 | from 5 | (select order_id, customer_id, 6 | sum(product_name='A') as A, 7 | sum(product_name='B') as B, 8 | sum(product_name='C') as C 9 | from Orders 10 | group by customer_id) o 11 | left join Customers c 12 | on c.customer_id = o.customer_id 13 | where A > 0 and B > 0 and C = 0 14 | order by 1 15 | 16 | ------------------------------------------------------------------------------------------------------------------------------------------------- 17 | -- group_concat() approach- unique approach- my first thought 18 | -- group all products per customer, choose customers with only A and B but not c 19 | 20 | select customer_id, customer_name 21 | from 22 | ( 23 | select o.order_id, o.customer_id, c.customer_name, group_concat(o.product_name order by product_name) as group_products 24 | from Orders o left join Customers c 25 | on o.customer_id = c.customer_id 26 | group by c.customer_id 27 | ) temp1 28 | where group_products like '%A%B%' and group_products not like '%A%B%C%' 29 | 30 | ------------------------------------------------------------------------------------------------------------------------------------------------- 31 | -- longer version of the 1st one 32 | 33 | select o.customer_id, c.customer_name 34 | from 35 | (select order_id, customer_id, 36 | sum(case when product_name='A' then 1 else 0 end) as A, 37 | sum(case when product_name='B' then 1 else 0 end) as B, 38 | sum(case when product_name='C' then 1 else 0 end) as C 39 | from Orders 40 | group by customer_id) o 41 | left join Customers c 42 | on c.customer_id = o.customer_id 43 | where A > 0 and B > 0 and C = 0 44 | order by 1 45 | 46 | ------------------------------------------------------------------------------------------------------------------------------------------------- 47 | -- much simpler version of the 1st one 48 | 49 | select o.customer_id, c.customer_name 50 | from Orders o 51 | left join Customers c 52 | on c.customer_id = o.customer_id 53 | group by o.customer_id 54 | having sum(product_name='A') > 0 and sum(product_name='B') > 0 and sum(product_name='C') = 0 55 | order by 1 56 | 57 | 58 | -- amazon- 2 59 | -- facebook- 1 60 | -------------------------------------------------------------------------------- /Medium/01440-evaluate-boolean-expression.sql: -------------------------------------------------------------------------------- 1 | -- create 2 value tables- l for left operand, r for right operand 2 | -- use join to join it to the main table 3 | -- write case statements using l.value and r.value 4 | 5 | select e.left_operand, e.operator, e.right_operand, 6 | (case when operator = '>' and l.value > r.value then 'true' 7 | when operator = '<' and l.value < r.value then 'true' 8 | when operator = '=' and l.value = r.value then 'true' 9 | else 'false' end) as value 10 | from Expressions e 11 | join Variables l 12 | on l.name = e.left_operand 13 | join Variables r 14 | on r.name = right_operand 15 | 16 | -- point72-1 17 | -------------------------------------------------------------------------------- /Medium/01445-apples-&-oranges.sql: -------------------------------------------------------------------------------- 1 | -- simple aggregate with condition 2 | 3 | select sale_date, (sum(case when fruit = 'apples' then sold_num else 0 end) - 4 | sum(case when fruit = 'oranges' then sold_num else 0 end)) as diff 5 | from Sales 6 | group by 1 7 | order by 1 8 | 9 | --------------------------------------------------------------------------------------------------------------- 10 | -- using join- 1 table for apples, 1 for oranges, join on sales date 11 | 12 | select sa.sale_date, (ifnull(sum(sa.sold_num),0)-ifnull(sum(so.sold_num), 0)) as diff 13 | from Sales sa 14 | join Sales so 15 | on sa.sale_date = so.sale_date and sa.fruit = 'apples' and so.fruit = 'oranges' 16 | group by 1 17 | order by 1 18 | 19 | 20 | -- facebook- 1 21 | -------------------------------------------------------------------------------- /Medium/01454-active-users.sql: -------------------------------------------------------------------------------- 1 | -- my approach- unique 2 | -- CTE- select only distinct dates for a user 3 | -- do a lead() on rows- because we need 5 consecutive days, and 1st day would be the current login date, and 4 leading dates- eg- for 30 May, leading dates would be 31 May, 1, 2, 3 June = 5 consecutive days 4 | -- diff between current date and leading date should be 4- eg. 30 May and 3 June = 4 5 | -- join for getting names 6 | 7 | with distinct_logins as 8 | (select distinct id, login_date 9 | from Logins) 10 | select distinct t.id, a.name 11 | from 12 | (select id, login_date, 13 | lead(login_date, 4, null) over(partition by id order by login_date) as lead_date, 14 | datediff(lead(login_date, 4, null) over(partition by id order by login_date), login_date) as diff 15 | from distinct_logins) t 16 | join Accounts a 17 | using(id) 18 | where diff = 4 19 | order by 1 20 | 21 | ------------------------------------------------------------------------------------------------------------------------------------ 22 | -- using RANGE clause with count() to count number of consecutive days 23 | 24 | with distinct_logins as 25 | (select distinct id, login_date 26 | from Logins), 27 | 28 | count_prev as 29 | (select id, login_date, 30 | count(login_date) over(partition by id order by login_date range between interval 4 day preceding and current row) as login_days 31 | from distinct_logins) 32 | 33 | select distinct t.id, a.name 34 | from count_prev t 35 | join Accounts a 36 | using(id) 37 | where login_days >= 5 38 | order by 1 39 | 40 | ------------------------------------------------------------------------------------------------------------------------------------ 41 | -- popular solution 42 | -- by joining 2 login tables by id, each date match with every other date for that id 43 | -- we need consecitive days, so diff should be between 1 and 4 44 | -- we also need to make sure that l2.date is same for these records(there can be records with diff between 1 to 4, but might not be consecutive 45 | -- so having count() = 4 46 | -- group by- id, l1.date 47 | 48 | with CTE as 49 | (select l1.id, l2.id as L2id, l1.login_date as L1date, l2.login_date as L2date 50 | from Logins l1 51 | join Logins l2 52 | on l1.id = l2.id 53 | where datediff(l1.login_date, l2.login_date) between 1 and 4 54 | group by l1.id, l1.login_date 55 | having count(distinct l2.login_date) = 4) 56 | 57 | select distinct id, a.name 58 | from CTE 59 | join Accounts a 60 | using(id) 61 | order by 1 62 | 63 | 64 | ------------------------------------------------------------------------------------------------------------------------------------ 65 | -- popular solution- good one 66 | -- calculate rank() ascending order of date 67 | -- subtract rank as days from the login date 68 | -- if days are continuous, difference will be same 69 | -- the count of difference should be >= 5 70 | 71 | with distinct_logins as 72 | (select distinct id, login_date, 73 | dense_rank() over(partition by id order by login_date) as rnk 74 | from Logins), 75 | 76 | count_diff as 77 | (select id, login_date, 78 | login_date - interval rnk day as diff 79 | from distinct_logins) 80 | 81 | select distinct t.id, a.name 82 | from count_diff t 83 | join Accounts a 84 | using(id) 85 | group by 1, diff 86 | having count(t.diff) >= 5 87 | order by 1 88 | 89 | 90 | -- amazon- 3 91 | -- facebook- 2 92 | -- tiktok- 2 93 | -------------------------------------------------------------------------------- /Medium/01459-rectangles-area.sql: -------------------------------------------------------------------------------- 1 | -- self join on the table 2 | -- from the o/p, we can see that p1.id < p2.id, and we don't want axes to be dientical(else it won't be a rectangle) 3 | -- so bth points' x-axis and y-axis cannpt be same 4 | -- do calculations- (x1-x2 * y1-y2) to get the AREA 5 | -- order by as given 6 | 7 | select p.id as P1, q.id as P2, abs((p.x_value-q.x_value) * (p.y_value-q.y_value)) as area 8 | from Points p 9 | join Points q 10 | on p.id < q.id and p.x_value != q.x_value and p.y_value != q.y_value 11 | order by 3 desc, 1, 2 12 | 13 | -- twitter 14 | -------------------------------------------------------------------------------- /Medium/01468- calculate-salaries.sql: -------------------------------------------------------------------------------- 1 | -- easier to read 2 | -- cte- calculated the max salary 3 | -- final query- compared it with actual salary and performed calculations 4 | 5 | with cte as 6 | (select company_id, employee_id, employee_name, salary, max(salary) over(partition by company_id) as max_salary 7 | from Salaries) 8 | 9 | select company_id, employee_id, employee_name, 10 | round((case when max_salary < 1000 then salary 11 | when max_salary between 1000 and 10000 then salary - salary*24/100 12 | when max_salary > 10000 then salary - salary*49/100 13 | end)) as salary 14 | from cte 15 | 16 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 17 | -- same as above, just compared and calculated everything in 1 table 18 | 19 | select company_id, employee_id, employee_name, 20 | round((case when max(salary) over(partition by company_id) < 1000 then salary 21 | when max(salary) over(partition by company_id) between 1000 and 10000 then salary - salary*24/100 22 | when max(salary) over(partition by company_id) > 10000 then salary - salary*49/100 23 | end)) as salary 24 | from Salaries 25 | -------------------------------------------------------------------------------- /Medium/01501-countries-you-can-safely-invest-in.sql: -------------------------------------------------------------------------------- 1 | -- id_country- get country name for each person by joining on country code 2 | -- id_duration- duration for each person on each call- each person can have multiple rows 3 | -- final query- join these 2 to calculate avg for each country- group by country 4 | -- use having clause to filter avg for country > global avg (calculated using id_duration cte) 5 | 6 | 7 | with id_country as 8 | (select p.id, c.name as country 9 | from Person p 10 | join Country c 11 | on left(p.phone_number, 3) = c.country_code), 12 | 13 | id_duration as 14 | (select caller_id, duration 15 | from Calls 16 | union all 17 | select callee_id, duration 18 | from Calls) 19 | 20 | select c.country 21 | from id_country c 22 | join id_duration d 23 | on c.id = d.caller_id 24 | group by c.country 25 | having avg(duration) > (select avg(duration) 26 | from id_duration) 27 | 28 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 29 | -- same as above but concise 30 | -- first cte as above, 31 | -- then join on Calls using OR 32 | 33 | select c.name as country 34 | from Country c 35 | join Person p 36 | on c.country_code = left(p.phone_number, 3) 37 | join calls cl 38 | on cl.caller_id = p.id or cl.callee_id = p.id 39 | group by c.name 40 | having avg(duration) > (select avg(duration) 41 | from Calls) 42 | 43 | 44 | -- no companies listed 45 | -------------------------------------------------------------------------------- /Medium/01532-the-most-recent-three-orders.sql: -------------------------------------------------------------------------------- 1 | -- we needed top 3, so subquery won't be possible without window function 2 | -- used dense_rank() 3 | 4 | with CTE as 5 | (select *, dense_rank() over(partition by customer_id order by order_date desc) as rnk 6 | from Orders) 7 | 8 | select c.name as customer_name, CTE.customer_id, CTE.order_id, CTE.order_date 9 | from CTE 10 | join Customers c 11 | on CTE.customer_id = c.customer_id 12 | where rnk <= 3 13 | order by name, customer_id, order_date desc 14 | 15 | -- no companies listed 16 | -------------------------------------------------------------------------------- /Medium/01549-the-most-recent-orders-for-each-product.sql: -------------------------------------------------------------------------------- 1 | -- using window function- dense_rank() to get most recent orders by date, partition by product_id 2 | 3 | with CTE as 4 | (select p.product_name, o.product_id, o.order_id, o.order_date, 5 | dense_rank() over(partition by o.product_id order by o.order_date desc) as rnk 6 | from Orders o 7 | left join Products p 8 | on o.product_id = p.product_id) 9 | 10 | select product_name, product_id, order_id, order_date 11 | from CTE 12 | where rnk = 1 13 | order by 1, 2, 3 14 | 15 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 16 | -- using subquery 17 | 18 | select p.product_name, o.product_id, o.order_id, o.order_date 19 | from Products p 20 | join Orders o 21 | on p.product_id = o.product_id 22 | where (o.product_id, o.order_date) in (select product_id, max(order_date) 23 | from Orders 24 | group by product_id) 25 | order by 1, 2, 3 26 | -------------------------------------------------------------------------------- /Medium/01555-bank-account-summary.sql: -------------------------------------------------------------------------------- 1 | -- multiplied amount paid by with -1 to make it negative, as it will be deducted from credit 2 | -- joined by user_id both paid by and paid to using 'OR' 3 | -- if user_id = paid_by then add negative amount to the credit, if user_id = paid_to then add positive amount to the credit 4 | -- at the end, if the above calculation is < 0 then Yes else No 5 | 6 | with cte as 7 | (select trans_id, paid_by, (amount) * -1 as amount_paid, paid_to, amount as amount_received 8 | from Transactions 9 | order by transacted_on) 10 | 11 | select user_id, user_name, 12 | credit + sum(case when user_id = paid_by then t.amount_paid 13 | when user_id = paid_to then t.amount_received 14 | else 0 end) as credit, 15 | (case when credit + sum(case when user_id = paid_by then t.amount_paid 16 | when user_id = paid_to then t.amount_received 17 | else 0 end) > 0 then 'No' else 'Yes' end) as credit_limit_breached 18 | from Users u 19 | left join cte t 20 | on u.user_id = paid_by or u.user_id = paid_to 21 | group by user_id 22 | 23 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 24 | -- same as above but concise 25 | -- credit '-' when user_id = paid_by, '+' when user_id = paid_to 26 | 27 | select user_id, user_name, 28 | credit - sum(case when user_id = paid_by then t.amount else 0 end) + 29 | sum(case when user_id = paid_to then t.amount else 0 end) as credit, 30 | case when (credit - sum(case when user_id = paid_by then t.amount else 0 end) + 31 | sum(case when user_id = paid_to then t.amount else 0 end)) < 0 then 'Yes' else 'No' end as credit_limit_breached 32 | from Users u 33 | left join Transactions t 34 | on u.user_id = paid_by or u.user_id = paid_to 35 | group by user_id 36 | 37 | 38 | -- optum- 1 39 | -------------------------------------------------------------------------------- /Medium/01596-the-most-frequently-ordered-products-for-each-customer.sql: -------------------------------------------------------------------------------- 1 | -- first create cte1 with count 2 | -- then cte2 with rank using that count 3 | -- then fetch rows with rnk = 1, join for product name 4 | 5 | with cte1 as 6 | (select customer_id, product_id, count(product_id) as count_product 7 | from Orders 8 | group by customer_id, product_id), 9 | cte2 as 10 | (select customer_id, product_id, 11 | dense_rank() over(partition by customer_id order by count_product desc) as rnk 12 | from cte1) 13 | 14 | select c.customer_id, c.product_id, p.product_name 15 | from cte2 c 16 | join Products p 17 | on p.product_id = c.product_id 18 | where rnk = 1 19 | 20 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | 22 | -- same as above, but combined cte1 and cte2 into 1 table- used count() directly inside rank() 23 | 24 | with cte2 as 25 | (select customer_id, product_id, 26 | dense_rank() over(partition by customer_id order by count(product_id) desc) as rnk 27 | from Orders 28 | group by 1, 2) 29 | 30 | select c.customer_id, c.product_id, p.product_name 31 | from cte2 c 32 | join Products p 33 | on p.product_id = c.product_id 34 | where rnk = 1 35 | 36 | 37 | -- no companies listed 38 | -------------------------------------------------------------------------------- /Medium/01613-find-the-missing-ids.sql: -------------------------------------------------------------------------------- 1 | -- create a recursive cte to get all ids from 1 till max id 2 | -- output all ids from cte that are not in Customer table 3 | -- for recursive cte, start with 1, union ids incremented by 1, till it reaches the number that is = max(customer_id) 4 | 5 | with recursive cte as 6 | (select 1 as ids 7 | union all 8 | select ids + 1 9 | from cte 10 | where ids < (select max(customer_id) 11 | from Customers) 12 | ) 13 | select * 14 | from cte 15 | where ids not in (select customer_id 16 | from Customers) 17 | 18 | -- amazon- 1 19 | -------------------------------------------------------------------------------- /Medium/01699-number-of-calls-between-two-persons.sql: -------------------------------------------------------------------------------- 1 | -- union all- this gets all calls, then we put condition p1 < p2 2 | 3 | select from_id as person1, to_id as person2, count(*) as call_count, sum(duration) as total_duration 4 | from 5 | (select from_id, to_id, duration 6 | from Calls 7 | union all 8 | select to_id, from_id, duration 9 | from Calls) t 10 | where from_id < to_id 11 | group by 1, 2 12 | 13 | ----------------------------------------------------------------------------------------------------------------------------- 14 | -- without using union all 15 | -- make p1 < p2, then do calculations 16 | 17 | select 18 | (case when from_id < to_id then from_id else to_id end) as person1, 19 | (case when from_id < to_id then to_id else from_id end) as person2, 20 | count(*) as call_count, 21 | sum(duration) as total_duration 22 | from Calls 23 | group by 1, 2 24 | 25 | 26 | -- facebook- 2 27 | -- amazon- 1 28 | -------------------------------------------------------------------------------- /Medium/01709-biggest-window-between-visits.sql: -------------------------------------------------------------------------------- 1 | -- use lead() to get next date 2 | -- if there's no date, difference has to be calculated with 2021-1-1, so put this value in lead() 3 | 4 | with CTE as 5 | (select user_id, visit_date, 6 | lead(visit_date, 1, '2021-1-1') over(partition by user_id order by visit_date) as next_date 7 | from UserVisits) 8 | 9 | select user_id, max(datediff(next_date, visit_date)) as biggest_window 10 | from CTE 11 | group by user_id 12 | 13 | 14 | -- no companies listed 15 | -------------------------------------------------------------------------------- /Medium/01715-count-apples-and-oranges.sql: -------------------------------------------------------------------------------- 1 | -- join the two tables- chest values can be null, so use ifnull 2 | -- addition of both 3 | -- sum of all rows 4 | 5 | select sum(b.apple_count + ifnull(c.apple_count, 0)) as apple_count, 6 | sum(b.orange_count + ifnull(c.orange_count, 0)) as orange_count 7 | from Boxes b 8 | left join Chests c 9 | using(chest_id) 10 | 11 | -- no companies listed 12 | -------------------------------------------------------------------------------- /Medium/01747-leetflex-banned-accounts.sql: -------------------------------------------------------------------------------- 1 | -- join 2 | 3 | select distinct l1.account_id 4 | from LogInfo l1 5 | join LogInfo l2 6 | on l1.account_id = l2.account_id 7 | and l1.ip_address != l2.ip_address 8 | and l1.login between l2.login and l2.logout 9 | 10 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 11 | -- self join- more popular answer on LC 12 | 13 | select distinct l1.account_id 14 | from LogInfo l1, LogInfo l2 15 | where l1.account_id = l2.account_id 16 | and l1.ip_address != l2.ip_address 17 | and l1.login between l2.login and l2.logout 18 | 19 | 20 | -- amazon- 1 21 | 22 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 23 | 24 | -- NOT WORKING FOR THIS CASE: 25 | 26 | -- | account_id | ip_address | login | logout | 27 | -- | ---------- | ---------- | ------------------- | ------------------- | 28 | -- | 1 | 1 | 2021-02-01 09:00:00 | 2021-02-01 15:00:00 | 29 | -- | 1 | 1 | 2021-02-01 10:00:00 | 2021-02-01 11:00:00 | 30 | -- | 1 | 6 | 2021-02-01 12:00:00 | 2021-02-01 13:00:00 | 31 | 32 | -- HERE 1ST AND THIRD OVERLAPS, BUT LEAD() DOESN'T CATCH THAT 33 | 34 | with CTE as 35 | ( 36 | select account_id, ip_address, login, logout, 37 | lead(login) over(partition by account_id order by login) as next_login, 38 | lead(ip_address) over(partition by account_id order by login) as next_ip 39 | from LogInfo 40 | ) 41 | 42 | select distinct account_id 43 | from CTE 44 | where next_login between login and logout 45 | and next_ip != ip_address 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /Medium/01783-grand-slam-titles.sql: -------------------------------------------------------------------------------- 1 | -- beginner level solution 2 | -- all player ids in a single column, count those, join with Players for name 3 | 4 | with CTE as 5 | (select Wimbledon as id 6 | from Championships 7 | union all 8 | select Fr_open as id 9 | from Championships 10 | union all 11 | select US_open as id 12 | from Championships 13 | union all 14 | select Au_open as id 15 | from Championships) 16 | 17 | select c.id as player_id, p.player_name, count(c.id) as grand_slams_count 18 | from CTE c 19 | join Players p 20 | on c.id = p.player_id 21 | group by 1 22 | 23 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ 24 | -- using cross join 25 | -- using aggregate function because we want to group by each player 26 | -- using cross join, we are getting all players and all championships 27 | -- so we use having to filter only those players who have won at least 1 28 | 29 | select p.player_id, p.player_name, 30 | sum(case when p.player_id = c.Wimbledon then 1 else 0 end + 31 | case when p.player_id = c.Fr_open then 1 else 0 end + 32 | case when p.player_id = c.US_open then 1 else 0 end + 33 | case when p.player_id = c.Au_open then 1 else 0 end) as grand_slams_count 34 | from Players p 35 | cross join Championships c 36 | group by p.player_id 37 | having grand_slams_count > 0 38 | 39 | 40 | -- amazon- 1 41 | -------------------------------------------------------------------------------- /Medium/01811-find-interview-candidates.sql: -------------------------------------------------------------------------------- 1 | -- first, unpivot the table to get contests and winners 2 | -- then we need 3 consecutive contests, so do 2 leads 3 | -- we subtract 2 leads at a time and their diffs should be 1- that'll give consecutive contests 4 | -- we union another code to find gold medal winners for that condition- that user should have at least won 3 gold medals 5 | -- at the end, join with Users table to get name and email 6 | 7 | with unpivot as( 8 | select contest_id, gold_medal as user 9 | from contests 10 | union all 11 | select contest_id, silver_medal as user 12 | from contests 13 | union all 14 | select contest_id, bronze_medal as user 15 | from contests), 16 | 17 | lead_contests as 18 | (select u1.contest_id, 19 | lead(contest_id, 1, 0) over(partition by user order by contest_id) as lead1, 20 | lead(contest_id, 2, 0) over(partition by user order by contest_id) as lead2, 21 | u1.user 22 | from unpivot u1), 23 | 24 | interview_users_list as 25 | (select distinct user 26 | from lead_contests 27 | where lead2 - lead1 = 1 28 | and lead1 - contest_id = 1 29 | union 30 | select gold_medal as user 31 | from Contests 32 | group by 1 33 | having count(*) >= 3) 34 | 35 | select name, mail 36 | from Users u 37 | join interview_users_list iu 38 | on u.user_id = iu.user 39 | 40 | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- 41 | -- same as above but a little change in consecutive logic 42 | -- to find consecutive, it's a good idea to do a rank/ row_num and subtract that from ids 43 | -- if the ids are consecutive, diff will be same 44 | -- then we can group by diff 45 | 46 | with unpivot as( 47 | select contest_id, gold_medal as user 48 | from contests 49 | union all 50 | select contest_id, silver_medal as user 51 | from contests 52 | union all 53 | select contest_id, bronze_medal as user 54 | from contests), 55 | 56 | lead_contests as 57 | (select u1.user, u1.contest_id, 58 | row_number() over(partition by user order by contest_id) as rnk, 59 | contest_id - row_number() over(partition by user order by contest_id) as diff 60 | from unpivot u1), 61 | 62 | interview_users_list as 63 | (select distinct user 64 | from lead_contests 65 | group by user, diff 66 | having count(*) >= 3 67 | union 68 | select gold_medal as user 69 | from Contests 70 | group by 1 71 | having count(*) >= 3) 72 | 73 | select name, mail 74 | from Users u 75 | join interview_users_list iu 76 | on u.user_id = iu.user 77 | 78 | -- amazon- 1 79 | -------------------------------------------------------------------------------- /Medium/01831-maximum-transaction-each-day.sql: -------------------------------------------------------------------------------- 1 | -- using subquery 2 | 3 | select transaction_id 4 | from Transactions 5 | where (day, amount) in (select date(day), max(amount) 6 | from Transactions 7 | group by 1) 8 | order by 1 9 | 10 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 11 | -- using window function 12 | 13 | with CTE as 14 | (select transaction_id, dense_rank() over(partition by date(day) order by amount desc) as rnk 15 | from Transactions) 16 | 17 | select transaction_id 18 | from CTE 19 | where rnk = 1 20 | order by 1 21 | 22 | 23 | -- no companies listed 24 | -------------------------------------------------------------------------------- /Medium/01841-league-statistics.sql: -------------------------------------------------------------------------------- 1 | -- first, union all records of home team and away team along with points calculation 2 | -- then use simple aggregate, group by to get those statistics for each team 3 | -- join on team name to get team name 4 | 5 | with all_records as 6 | (select home_team_id, 7 | (case when home_team_goals > away_team_goals then 3 8 | when home_team_goals < away_team_goals then 0 9 | else 1 end) as points, 10 | home_team_goals, 11 | away_team_goals 12 | from Matches 13 | union all 14 | select away_team_id, 15 | (case when home_team_goals < away_team_goals then 3 16 | when home_team_goals > away_team_goals then 0 17 | else 1 end) as points, 18 | away_team_goals, 19 | home_team_goals 20 | from Matches) 21 | 22 | select t.team_name, count(home_team_id) as matches_played, 23 | sum(points) as points, 24 | sum(home_team_goals) as goal_for, 25 | sum(away_team_goals) as goal_against, 26 | (sum(home_team_goals) - sum(away_team_goals)) as goal_diff 27 | from all_records a 28 | join Teams t 29 | on t.team_id = a.home_team_id 30 | group by a.home_team_id 31 | order by points desc, goal_diff desc, team_name 32 | 33 | -- no companies listed 34 | -------------------------------------------------------------------------------- /Medium/01867-orders-with-maximum-quantity-above-average.sql: -------------------------------------------------------------------------------- 1 | -- we need to find imbalanced orders- "An imbalanced order is one whose maximum quantity is strictly greater than the maximum of average quantities of all orders 2 | -- The maximum of average quantities of all orders is max(12.3333333, 5.5, 14.333333, 5, 9) = 14.333333, only orders 1 and 3 are imbalanced because their maximum quantities (15 and 20) are strictly greater than 14.333333 3 | -- so first we find max and avg for each order 4 | -- then we compare max_q of each order to overall max_avg 5 | 6 | with cte as 7 | (select order_id, max(quantity) as max_q, avg(quantity) as avg_q 8 | from OrdersDetails 9 | group by 1) 10 | 11 | select order_id 12 | from cte 13 | where max_q > (select max(avg_q) as max_avg 14 | from cte) 15 | 16 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 17 | -- same logic as above but concise 18 | -- max(avg(quantity) over() gives maximum of all avg quantities 19 | -- used group by for avg(quantity) 20 | 21 | with cte as 22 | (select order_id, max(quantity) as max_q, 23 | max(avg(quantity)) over() as max_avg_q 24 | from OrdersDetails 25 | group by 1) 26 | 27 | select order_id 28 | from cte 29 | where max_q > max_avg_q 30 | 31 | 32 | -- no companies listed 33 | -------------------------------------------------------------------------------- /Medium/01875-group-employees-of-the-same-salary.sql: -------------------------------------------------------------------------------- 1 | -- in the cte, pull only those salaries which are not unique(count > 1) 2 | -- in the final query, pull everything from Employee table, do a rank on salary 3 | -- filter salary which are on cte only 4 | 5 | with cte as 6 | (select salary 7 | from Employees 8 | group by salary 9 | having count(*) >= 2) 10 | 11 | select employee_id, name, salary, 12 | dense_rank() over(order by salary) as team_id 13 | from Employees 14 | where salary in (select salary 15 | from cte) 16 | order by 4, 1 17 | 18 | -- clari- 1 19 | -------------------------------------------------------------------------------- /Medium/01907-count-salary-categories.sql: -------------------------------------------------------------------------------- 1 | -- since all rows are supposed to have different texts, use UNION 2 | -- count rows for each income group 3 | 4 | select 'Low Salary' as category, sum(case when income < 20000 then 1 else 0 end) as accounts_count 5 | from Accounts 6 | 7 | union 8 | 9 | select 'Average Salary' as category, sum(case when income between 20000 and 50000 then 1 else 0 end) as accounts_count 10 | from Accounts 11 | 12 | union 13 | 14 | select 'High Salary' as category, sum(case when income > 50000 then 1 else 0 end) as accounts_count 15 | from Accounts 16 | 17 | 18 | -- no companies listed 19 | -------------------------------------------------------------------------------- /Medium/01934-confirmation-rate.sql: -------------------------------------------------------------------------------- 1 | -- write a case statement to count number of confirmed, sum it up, and divide by total count 2 | -- group by because of count 3 | 4 | select s.user_id, round(sum(case when action = 'confirmed' then 1 else 0 end) /count(*), 2) as confirmation_rate 5 | from Signups s 6 | left join Confirmations c 7 | on s.user_id = c.user_id 8 | group by s.user_id 9 | 10 | 11 | -- amazon- 2 12 | -------------------------------------------------------------------------------- /Medium/01949-strong-friendship.sql: -------------------------------------------------------------------------------- 1 | -- union all to get all users and their friends 2 | -- then pick user1's friends fron cte1, user2's friend from cte2, but we want common friend, so c1.friend should be equal to c2.friend 3 | -- in the final query, do a count on the rows(or friends) and return the o/p whose counts are >= 3 4 | 5 | with cte as 6 | (select user1_id as user_id, user2_id as friend 7 | from Friendship 8 | union 9 | select user2_id as user_id, user1_id as friend 10 | from Friendship), 11 | 12 | common_friend as 13 | (select f.user1_id, f.user2_id, c1.friend as user1_friends, c2.friend as user2_friends 14 | from Friendship f 15 | left join cte c1 on f.user1_id = c1.user_id 16 | left join cte c2 on f.user2_id = c2.user_id 17 | where c1.friend = c2.friend) 18 | 19 | select user1_id, user2_id, count(user1_friends) as common_friend 20 | from common_friend 21 | group by 1, 2 22 | having count(user1_friends) >= 3 23 | 24 | -- facebook- 1 25 | -------------------------------------------------------------------------------- /Medium/01951-all-the-pairs-with-the-maximum-number-of-common-followers.sql: -------------------------------------------------------------------------------- 1 | -- in the cte, we are doing a self join since we want pairs of users in the o/p 2 | -- we are joining where user1 < user2 because users cannot be equal and != will repeat user pairs and follower1 = follower2 because we want common followers only 3 | -- this will return all the records with pairs and common friend 4 | -- then we do a dense rank on count desc for each pair(group by user1, user2) 5 | -- in the final query, return the user pairs with rank = 1 6 | 7 | with cte as 8 | (select r1.user_id as user1_id, r1.follower_id as follower1_id, r2.user_id as user2_id, r2.follower_id as follower2_id, dense_rank() over(order by count(*) desc) as rnk 9 | from Relations r1 10 | join Relations r2 11 | on r1.user_id < r2.user_id and r1.follower_id = r2.follower_id 12 | group by 1, 3) 13 | 14 | select user1_id, user2_id 15 | from cte 16 | where rnk = 1 17 | 18 | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 19 | -- same as above but used count instead of rank 20 | -- counted rows for each pair 21 | -- in the final query, o/p pairs whose count = max(count) 22 | 23 | with cte as 24 | (select r1.user_id as user1_id, r1.follower_id as follower1_id, r2.user_id as user2_id, r2.follower_id as follower2_id, count(*) as common_count 25 | from Relations r1 26 | join Relations r2 27 | on r1.user_id < r2.user_id and r1.follower_id = r2.follower_id 28 | group by 1, 3) 29 | 30 | select user1_id, user2_id 31 | from cte 32 | where common_count in (select max(common_count) from cte) 33 | 34 | 35 | -- instagram- 1 36 | -------------------------------------------------------------------------------- /Medium/01988-find-cutoff-score-for-each-school.sql: -------------------------------------------------------------------------------- 1 | -- given in the questions- the more the score, the fewer the counts, lesser score has more counts 2 | -- we want to maximize the number of applications, hence we will use the min score- even if count is tied, we are supposed to pick min score 3 | -- left join on school where capacity >= count, because we want to accommodate all the students that apply 4 | -- o/p for each school- group by schools- if null then o/p -1 5 | 6 | select school_id, ifnull(min(score), -1) as score 7 | from Schools 8 | left join Exam 9 | on capacity >= student_count 10 | group by 1 11 | 12 | -- no companies listed 13 | -------------------------------------------------------------------------------- /Medium/01990-count-the-number-of-experiments.sql: -------------------------------------------------------------------------------- 1 | -- we need to hardcode platform and experiment_name values 2 | -- then cross join these values 3 | -- final query- left join the all_cte with Experiments table and group by 1, 2 cols 4 | 5 | with cte_platforms as 6 | (select 'Android' as platform 7 | union 8 | select 'IOS' 9 | union 10 | select 'Web'), 11 | 12 | cte_experiment_names as 13 | (select 'Reading' as experiment_name 14 | union 15 | select 'Sports' 16 | union 17 | select 'Programming'), 18 | 19 | all_platform as 20 | (select * 21 | from cte_platforms 22 | cross join cte_experiment_names) 23 | 24 | select a.platform, a.experiment_name, count(e.experiment_id) as num_experiments 25 | from all_platform a 26 | left join Experiments e 27 | on a.platform = e.platform 28 | and a.experiment_name = e.experiment_name 29 | group by a.experiment_name, a.platform 30 | 31 | 32 | -- strava- 1 33 | -------------------------------------------------------------------------------- /Medium/02020-number-of-accounts-that-did-not-stream.sql: -------------------------------------------------------------------------------- 1 | -- the question is as follows- Write an SQL query to report the number of accounts that has an active subscription in 2021 but did not have any stream session in 2021 2 | -- so we join the two tables, get team date for that account 3 | -- the following conditions should be true- 1. stream should be in the duration of subscription; 2. the subscription should be active and 2021; 3. there should be no streams in 2021 4 | -- so for 1- stream date between st_dt and en_dt 5 | -- for 2- end date should be in 2021- we don't care when subscription started 6 | -- for 3- stream date not like 2021% 7 | 8 | with cte as 9 | (select sb.*, st.stream_date 10 | from Subscriptions sb 11 | left join Streams st 12 | using(account_id) 13 | where stream_date between start_date and end_date 14 | and end_date like '2021%' 15 | and stream_date not like '2021%') 16 | 17 | select count(distinct account_id) as accounts_count 18 | from cte 19 | 20 | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | -- same as above but concise 22 | -- directly used count() instead of cte 23 | 24 | select count(distinct sb.account_id) as accounts_count 25 | from Subscriptions sb 26 | left join Streams st 27 | using(account_id) 28 | where stream_date between start_date and end_date 29 | and end_date like '2021%' 30 | and stream_date not like '2021%' 31 | 32 | 33 | -- amazon- 2 34 | -- warnermedia- 1 35 | -------------------------------------------------------------------------------- /Medium/02041-accepted-candidates-from-the-interviews.sql: -------------------------------------------------------------------------------- 1 | -- need those candidate ids whose exp >= 2 and sum(score) > 15 2 | -- subquery 3 | 4 | select candidate_id 5 | from Candidates 6 | where years_of_exp >= 2 7 | and interview_id in (select interview_id 8 | from Rounds 9 | group by 1 10 | having sum(score) > 15) 11 | 12 | ------------------------------------------------------------------------------------------------------------------------------------------------------ 13 | -- using join 14 | 15 | select candidate_id 16 | from Candidates c 17 | join Rounds t 18 | on c.interview_id = t.interview_id 19 | where years_of_exp >= 2 20 | group by 1 21 | having sum(score) > 15 22 | 23 | ------------------------------------------------------------------------------------------------------------------------------------------------------ 24 | -- using join 25 | -- same as above but longer 26 | 27 | with total_score as 28 | (select interview_id 29 | from Rounds 30 | group by 1 31 | having sum(score) > 15) 32 | 33 | select candidate_id 34 | from Candidates c 35 | join total_score t 36 | on c.interview_id = t.interview_id 37 | where years_of_exp >= 2 38 | group by 1 39 | 40 | 41 | -- no companies listed 42 | -------------------------------------------------------------------------------- /Medium/02066-account-balance.sql: -------------------------------------------------------------------------------- 1 | -- order running total of balance 2 | -- if deposit then +ve, if withdraw then -ve 3 | 4 | select account_id, day, 5 | sum(case when type = 'Deposit' then amount else amount * -1 end) over (partition by account_id order by day) as balance 6 | from Transactions 7 | order by 1, 2 8 | 9 | -- no companies listed 10 | -------------------------------------------------------------------------------- /Medium/02084-drop-type-1-orders-for-customers-with-type-0-orders.sql: -------------------------------------------------------------------------------- 1 | -- an order should either be 0, or should be 1 and but not in 0 list 2 | 3 | select * 4 | from Orders 5 | where order_type = 0 or 6 | (order_type = 1 and customer_id not in (select customer_id 7 | from Orders 8 | where order_type = 0)) 9 | 10 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 11 | -- find min order_type for each user 12 | -- user with '0' or both will have min order type = 0 (no 1's) 13 | -- user with only '1' order type will have min order type = 1 14 | -- return records with min order type 15 | -- same can be done using rank 16 | 17 | 18 | select * 19 | from Orders 20 | where (customer_id, order_type) in (select customer_id, min(order_type) 21 | from Orders 22 | group by 1) 23 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 24 | -- unioning both conditions 25 | -- o/p when 0 26 | -- o/p when 1 but that customer should not have 0 27 | 28 | select * 29 | from Orders 30 | where order_type = 0 31 | union 32 | select * 33 | from Orders 34 | where order_type = 1 and customer_id not in (select customer_id 35 | from Orders 36 | where order_type = 0) 37 | 38 | -- no companies listed 39 | -------------------------------------------------------------------------------- /Medium/02112-the-airport-with-the-most-traffic.sql: -------------------------------------------------------------------------------- 1 | -- using window function 2 | -- get count of all flights using union all and sum() 3 | -- rank by sum desc 4 | -- pull records with rnk = 1 5 | 6 | with cte as 7 | (select departure_airport as airport_id, flights_count 8 | from Flights 9 | union all 10 | select arrival_airport, flights_count 11 | from Flights), 12 | 13 | ranked as 14 | (select airport_id, sum(flights_count), rank() over(order by sum(flights_count) desc) as rnk 15 | from cte 16 | group by 1) 17 | 18 | select airport_id 19 | from ranked 20 | where rnk = 1 21 | 22 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 23 | -- using agg function 24 | -- select those ids which has max count 25 | 26 | with cte as 27 | (select departure_airport as airport_id, flights_count 28 | from Flights 29 | union all 30 | select arrival_airport, flights_count 31 | from Flights), 32 | 33 | total_cnt as 34 | (select airport_id, sum(flights_count) as total_flights 35 | from cte 36 | group by 1) 37 | 38 | select airport_id 39 | from total_cnt 40 | where total_flights in (select max(total_flights) from total_cnt) 41 | 42 | 43 | -- no companies listed 44 | -------------------------------------------------------------------------------- /Medium/02142-the-number-of-passengers-in-each-bus-i.sql: -------------------------------------------------------------------------------- 1 | -- join buses and passengers tables on p.time <= b.time 2 | -- pick the min time from the bus table for each passenger- this is the time when each passenger got picked up 3 | -- we want Buses table to be left table to get count for all bus ids(even which didnot pick passengers) 4 | -- join Buses on cte, countinh pick_up time 5 | 6 | with cte as 7 | (select passenger_id, p.arrival_time, min(b.arrival_time) pick_up_time 8 | from Passengers p 9 | left join Buses b 10 | on p.arrival_time <= b.arrival_time 11 | group by 1) 12 | 13 | select b.bus_id, count(c.pick_up_time) as passengers_cnt 14 | from Buses b 15 | left join cte c 16 | on b.arrival_time = c.pick_up_time 17 | group by 1 18 | order by 1 19 | 20 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 21 | -- first, do a lag on bust_time to get time of previous bus 22 | -- in the final query, left join on bust table 23 | -- join conditions should be p_time <= b_time AND p_time should be > time of prev_bus because the passenger arrived after the prev bus 24 | 25 | with cte as 26 | (select bus_id, arrival_time, lag(arrival_time, 1, 0) over(order by arrival_time) as prev_bus_time 27 | from buses) 28 | 29 | select distinct bus_id, count(passenger_id) as passengers_cnt 30 | -- c.arrival_time as bat, c.prev_bus_time, passenger_id, p.arrival_time as pat 31 | from cte c 32 | left join Passengers p 33 | on p.arrival_time <= c.arrival_time and p.arrival_time > c.prev_bus_time 34 | group by 1 35 | order by 1 36 | 37 | 38 | -- no companies listed 39 | -------------------------------------------------------------------------------- /Medium/02159- order-two-columns-independently.sql: -------------------------------------------------------------------------------- 1 | -- two rns for ach column- 1 asc, 1 desc 2 | -- we need to join where both rns are equal 3 | 4 | with cte as 5 | (select first_col, row_number() over(order by first_col) as rn1, 6 | second_col, row_number() over(order by second_col desc) as rn2 7 | from Data) 8 | 9 | select cte1.first_col, cte2.second_col 10 | from cte cte1 11 | join cte cte2 12 | on cte1.rn1 = cte2.rn2 13 | order by cte1.rn1 14 | 15 | -- amazon- 2 16 | -- booking.com- 1 17 | 18 | -- result of cte 19 | 20 | | first_col | rn1 | second_col | rn2 | 21 | | --------- | --- | ---------- | --- | 22 | | 1 | 1 | 4 | 1 | 23 | | 2 | 2 | 3 | 2 | 24 | | 4 | 4 | 2 | 3 | 25 | | 3 | 3 | 1 | 4 | 26 | -------------------------------------------------------------------------------- /Medium/02175-the-change-in-global-rankings.sql: -------------------------------------------------------------------------------- 1 | -- need to cast as signed because it was throwing error because of negative sign 2 | -- can use row_number() or dense_rank() instead of rank 3 | 4 | select t.team_id, name, 5 | (cast(rank() over(order by t.points desc, name) as signed) 6 | - cast(rank() over(order by t.points + p.points_change desc, name) as signed)) as rank_diff 7 | from TeamPoints t 8 | left join PointsChange p 9 | using(team_id) 10 | 11 | -- no companies listed 12 | -------------------------------------------------------------------------------- /Medium/02228-user-with-two-purchases-within-seven-days.sql: -------------------------------------------------------------------------------- 1 | -- for each usr, compare it's each purchase date with the other- self join 2 | -- ofc purchase id's shouldn't be equal 3 | -- abs diff between dates should be between 0 and 7 4 | 5 | select distinct p1.user_id 6 | from Purchases p1 7 | join purchases p2 8 | on p1.user_id = p2.user_id and p1.purchase_id != p2.purchase_id 9 | where abs(datediff(p2.purchase_date, p1.purchase_date)) between 0 and 7 10 | order by 1 11 | 12 | ---------------------------------------------------------------------------------------------------------------------------------------------------------- 13 | -- tweaked join and where condition from above 14 | 15 | select distinct p1.user_id 16 | from Purchases p1 17 | join purchases p2 18 | on p1.user_id = p2.user_id and p1.purchase_id != p2.purchase_id and p2.purchase_date >= p1.purchase_date 19 | where datediff(p2.purchase_date, p1.purchase_date) between 0 and 7 20 | order by 1 21 | 22 | 23 | -- amazon- 1 24 | -------------------------------------------------------------------------------- /Medium/02308-arrange-table-by-gender.sql: -------------------------------------------------------------------------------- 1 | -- we will give row_num based on user_id asc 2 | -- we want female first, then other, then male- so we create a flag to arrange these 3 | -- if we wanted female first, then malle then other, then we could have directly used 'gender' in order by clause 4 | 5 | with cte as 6 | (select *, row_number() over(partition by gender order by user_id) as rn, 7 | (case when gender = 'female' then 1 8 | when gender = 'male' then 3 9 | else 2 end) as flg 10 | from Genders) 11 | 12 | select user_id, gender 13 | from cte 14 | order by rn, flg 15 | 16 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 17 | -- same as above, just used length of gender as 2nd sort condition 18 | -- female as 6 characters, other has 5 characters, male has 4 characters, so we order by these lengths 19 | 20 | with cte as 21 | (select *, row_number() over(partition by gender order by user_id) as rn 22 | from Genders) 23 | 24 | select user_id, gender 25 | from cte 26 | order by rn, length(gender) desc 27 | 28 | 29 | -- no companies listed 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |