├── .gitattributes ├── Case Study #1 - Danny's Diner ├── README.md ├── database.sql └── query.sql ├── Case Study #2 - Pizza Runner ├── README.md ├── Solution │ ├── A. Pizza Metrics.md │ ├── B. Runner and Customer Experience.md │ ├── C. Ingredient Optimisation.md │ ├── D. Pricing and Ratings.md │ └── E. Bonus Questions.md ├── Syntax │ ├── A_data_cleaning.sql │ ├── A_query.sql │ ├── B_query.sql │ ├── C_data_cleaning.sql │ ├── C_query.sql │ ├── D_query.sql │ └── E_query.sql └── database.sql ├── Case Study #3 - Foodie-Fi ├── README.md ├── Solution │ ├── A. Customer Journey.md │ ├── B. Data Analysis Questions.md │ ├── C. Challenge Payment Question.md │ └── D. Outside The Box Questions.md ├── Syntax │ ├── A_query.sql │ ├── B_query.sql │ ├── C_query.sql │ └── D_query.sql └── database.sql ├── Case Study #4 - Data Bank ├── README.md ├── Solution │ ├── A. Customer Nodes Exploration.md │ └── B. Customer Transactions.md ├── Syntax │ ├── A_query.sql │ └── B_query.sql └── database.sql ├── Case Study #5 - Data Mart ├── README.md ├── Solution │ ├── A. Data Cleansing Steps.md │ ├── B. Data Exploration.md │ ├── C. Before & After Analysis.md │ └── D. Bonus Question.md ├── Syntax │ ├── A_query.sql │ ├── B_query.sql │ ├── C_query.sql │ └── D_query.sql └── database.sql ├── Case Study #6 - Clique Bait ├── README.md ├── Solution │ ├── A. Digital Analysis.md │ ├── B. Product Funnel Analysis.md │ └── C. Campaigns Analysis.md ├── Syntax │ ├── A_query.sql │ ├── B_query.sql │ └── C_query.sql └── database.sql ├── Case Study #7 - Balanced Tree Clothing Co. ├── README.md ├── Solution │ ├── A. High Level Sales Analysis.md │ ├── B. Transaction Analysis.md │ ├── C. Product Analysis.md │ └── D. Bonus Question.md ├── Syntax │ ├── A_query.sql │ ├── B_query.sql │ ├── C_query.sql │ └── D_query.sql └── database.sql ├── Case Study #8 - Fresh Segments ├── README.md ├── Solution │ ├── A. Data Exploration and Cleansing.md │ ├── B. Interest Analysis.md │ ├── C. Segment Analysis.md │ └── D. Index Analysis.md ├── Syntax │ ├── A_query.sql │ ├── B_query.sql │ ├── C_query.sql │ └── D_query.sql └── database.sql ├── IMG ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── 5.png ├── 6.png ├── 7.png ├── 8.png ├── e1.PNG ├── e2.PNG ├── e3.PNG ├── e4.png ├── e5.png ├── e6.PNG ├── e7.1.PNG ├── e7.2.PNG └── e8-updated.PNG └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | *.sql linguist-detectable=true 2 | *.sql linguist-language=tsql 3 | *.sql text 4 | -------------------------------------------------------------------------------- /Case Study #1 - Danny's Diner/database.sql: -------------------------------------------------------------------------------- 1 | -------------------------------- 2 | --CASE STUDY #1: DANNY'S DINER-- 3 | -------------------------------- 4 | --Author: Anh Nguyen 5 | --Date: 05/11/2022 6 | --Tool used: SQL Server 7 | 8 | 9 | CREATE DATABASE dannys_diner; 10 | 11 | CREATE TABLE dannys_diner.dbo.sales ( 12 | "customer_id" VARCHAR(1), 13 | "order_date" DATE, 14 | "product_id" INTEGER 15 | ); 16 | 17 | INSERT INTO dannys_diner.dbo.sales 18 | ("customer_id", "order_date", "product_id") 19 | VALUES 20 | ('A', '2021-01-01', '1'), 21 | ('A', '2021-01-01', '2'), 22 | ('A', '2021-01-07', '2'), 23 | ('A', '2021-01-10', '3'), 24 | ('A', '2021-01-11', '3'), 25 | ('A', '2021-01-11', '3'), 26 | ('B', '2021-01-01', '2'), 27 | ('B', '2021-01-02', '2'), 28 | ('B', '2021-01-04', '1'), 29 | ('B', '2021-01-11', '1'), 30 | ('B', '2021-01-16', '3'), 31 | ('B', '2021-02-01', '3'), 32 | ('C', '2021-01-01', '3'), 33 | ('C', '2021-01-01', '3'), 34 | ('C', '2021-01-07', '3'); 35 | 36 | 37 | CREATE TABLE dannys_diner.dbo.menu ( 38 | "product_id" INTEGER, 39 | "product_name" VARCHAR(5), 40 | "price" INTEGER 41 | ); 42 | 43 | INSERT INTO dannys_diner.dbo.menu 44 | ("product_id", "product_name", "price") 45 | VALUES 46 | ('1', 'sushi', '10'), 47 | ('2', 'curry', '15'), 48 | ('3', 'ramen', '12'); 49 | 50 | 51 | CREATE TABLE dannys_diner.dbo.members ( 52 | "customer_id" VARCHAR(1), 53 | "join_date" DATE 54 | ); 55 | 56 | INSERT INTO dannys_diner.dbo.members 57 | ("customer_id", "join_date") 58 | VALUES 59 | ('A', '2021-01-07'), 60 | ('B', '2021-01-09'); 61 | -------------------------------------------------------------------------------- /Case Study #1 - Danny's Diner/query.sql: -------------------------------------------------------------------------------- 1 | ------------------------ 2 | --CASE STUDY QUESTIONS-- 3 | ------------------------ 4 | 5 | --Author: Anh Nguyen 6 | --Date: 05/11/2022 7 | --Tool used: SQL Server 8 | 9 | 10 | -- 1. What is the total amount each customer spent at the restaurant? 11 | 12 | SELECT 13 | s.customer_id, 14 | SUM(m.price) AS total_pay 15 | FROM sales s 16 | JOIN menu m 17 | ON s.product_id = m.product_id 18 | GROUP BY s.customer_id 19 | ORDER BY s.customer_id; 20 | 21 | 22 | -- 2. How many days has each customer visited the restaurant? 23 | 24 | SELECT 25 | customer_id, 26 | COUNT(DISTINCT order_date) AS visit_count 27 | FROM sales 28 | GROUP BY customer_id; 29 | 30 | 31 | -- 3. What was the first item from the menu purchased by each customer? 32 | 33 | WITH orderRank AS ( 34 | SELECT 35 | customer_id, 36 | product_id, 37 | order_date, 38 | DENSE_RANK() OVER(PARTITION BY customer_id ORDER BY order_date) AS rnk 39 | FROM sales 40 | ) 41 | 42 | SELECT 43 | o.customer_id, 44 | o.order_date, 45 | m.product_name 46 | FROM orderRank o 47 | JOIN menu m 48 | ON o.product_id = m.product_id 49 | WHERE o.rnk = 1 50 | GROUP BY o.customer_id, o.order_date, m.product_name; 51 | 52 | 53 | -- 4. What is the most purchased item on the menu and how many times was it purchased by all customers? 54 | 55 | SELECT 56 | TOP 1 s.product_id, 57 | m.product_name, 58 | COUNT(*) AS most_purch 59 | FROM sales s 60 | JOIN menu m 61 | ON s.product_id = m.product_id 62 | GROUP BY s.product_id, m.product_name; 63 | 64 | 65 | -- 5. Which item was the most popular for each customer? 66 | 67 | WITH freqRank AS ( 68 | SELECT 69 | customer_id, 70 | product_id, 71 | COUNT(*) AS purch_freq, 72 | DENSE_RANK() OVER(PARTITION BY customer_id ORDER BY COUNT(*) DESC) AS rnk 73 | FROM sales 74 | GROUP BY customer_id, product_id 75 | ) 76 | 77 | SELECT 78 | f.customer_id, 79 | m.product_name, 80 | f.purch_freq 81 | FROM freqRank f 82 | JOIN menu m 83 | ON f.product_id = m.product_id 84 | WHERE f.rnk = 1 85 | ORDER BY f.customer_id; 86 | 87 | 88 | -- 6. Which item was purchased first by the customer after they became a member? 89 | 90 | WITH orderAfterMember AS ( 91 | SELECT 92 | s.customer_id, 93 | mn.product_name, 94 | s.order_date, 95 | m.join_date, 96 | DENSE_RANK() OVER(PARTITION BY s.customer_id ORDER BY s.order_date) AS rnk 97 | FROM sales s 98 | JOIN members m 99 | ON s.customer_id = m.customer_id 100 | JOIN menu mn 101 | ON s.product_id = mn.product_id 102 | WHERE s.order_date >= m.join_date 103 | ) 104 | 105 | SELECT 106 | customer_id, 107 | product_name, 108 | order_date, 109 | join_date 110 | FROM orderAfterMember 111 | WHERE rnk = 1; 112 | 113 | 114 | -- 7. Which item was purchased just before the customer became a member? 115 | 116 | WITH orderBeforeMember AS ( 117 | SELECT 118 | s.customer_id, 119 | mn.product_name, 120 | s.order_date, 121 | m.join_date, 122 | DENSE_RANK() OVER(PARTITION BY s.customer_id ORDER BY s.order_date DESC) AS rnk 123 | FROM sales s 124 | JOIN members m 125 | ON s.customer_id = m.customer_id 126 | JOIN menu mn 127 | ON s.product_id = mn.product_id 128 | WHERE s.order_date < m.join_date 129 | ) 130 | 131 | SELECT 132 | customer_id, 133 | product_name, 134 | order_date, 135 | join_date 136 | FROM orderBeforeMember 137 | WHERE rnk = 1; 138 | 139 | 140 | -- 8. What is the total items and amount spent for each member before they became a member? 141 | 142 | SELECT 143 | s.customer_id, 144 | COUNT(s.product_id) AS total_items, 145 | SUM(mn.price) AS total_spend 146 | FROM sales s 147 | JOIN members m 148 | ON s.customer_id = m.customer_id 149 | JOIN menu mn 150 | ON s.product_id = mn.product_id 151 | WHERE s.order_date < m.join_date 152 | GROUP BY s.customer_id; 153 | 154 | 155 | -- 9. If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have? 156 | -- Note: Only customers who are members receive points when purchasing items 157 | 158 | WITH CustomerPoints AS ( 159 | SELECT 160 | s.customer_id, 161 | CASE 162 | WHEN s.customer_id IN (SELECT customer_id FROM members) AND mn.product_name = 'sushi' THEN mn.price*20 163 | WHEN s.customer_id IN (SELECT customer_id FROM members) AND mn.product_name != 'sushi' THEN mn.price*10 164 | ELSE 0 END AS points 165 | FROM menu mn 166 | JOIN sales s 167 | ON mn.product_id = s.product_id 168 | ) 169 | 170 | SELECT 171 | customer_id, 172 | SUM(points) AS total_points 173 | FROM CustomerPoints 174 | GROUP BY customer_id; 175 | 176 | 177 | -- 10. In the first week after a customer joins the program (including their join date), they earn 2x points 178 | -- on all items, not just sushi - how many points do customer A and B have at the end of January? 179 | 180 | WITH programDates AS ( 181 | SELECT 182 | customer_id, 183 | join_date, 184 | DATEADD(d, 6, join_date) AS valid_date, 185 | EOMONTH('2021-01-01') AS last_date 186 | FROM members 187 | ) 188 | 189 | SELECT 190 | p.customer_id, 191 | SUM(CASE 192 | WHEN s.order_date BETWEEN p.join_date AND p.valid_date THEN m.price*20 193 | WHEN m.product_name = 'sushi' THEN m.price*20 194 | ELSE m.price*10 END) AS total_points 195 | FROM sales s 196 | JOIN programDates p 197 | ON s.customer_id = p.customer_id 198 | JOIN menu m 199 | ON s.product_id = m.product_id 200 | WHERE s.order_date <= last_date 201 | GROUP BY p.customer_id; 202 | 203 | 204 | ------------------------ 205 | -- BONUS QUESTIONS -- 206 | ------------------------ 207 | 208 | -- Join All The Things 209 | 210 | SELECT 211 | s.customer_id, 212 | s.order_date, 213 | mn.product_name, 214 | mn.price, 215 | CASE WHEN s.order_date >= m.join_date THEN 'Y' 216 | ELSE 'N' END AS member 217 | FROM sales s 218 | JOIN menu mn 219 | ON s.product_id = mn.product_id 220 | LEFT JOIN members m 221 | ON s.customer_id = m.customer_id; 222 | 223 | 224 | -- Rank All The Things 225 | -- Note: Create a CTE using the result in the previous question 226 | 227 | WITH customerStatus AS( 228 | SELECT 229 | s.customer_id, 230 | s.order_date, 231 | mn.product_name, 232 | mn.price, 233 | CASE WHEN s.order_date >= m.join_date THEN 'Y' 234 | ELSE 'N' END AS member 235 | FROM sales s 236 | JOIN menu mn 237 | ON s.product_id = mn.product_id 238 | LEFT JOIN members m 239 | ON s.customer_id = m.customer_id 240 | ) 241 | 242 | SELECT *, 243 | CASE WHEN member = 'Y' 244 | THEN DENSE_RANK() OVER(PARTITION BY customer_id, member ORDER BY order_date) 245 | ELSE null END AS ranking 246 | FROM customerStatus; 247 | -------------------------------------------------------------------------------- /Case Study #2 - Pizza Runner/README.md: -------------------------------------------------------------------------------- 1 | # 🍕 Case Study #2 - Pizza Runner 2 |
3 |
4 |
5 | ## 📕 Table of Contents
6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#%EF%B8%8F-bussiness-task)
7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-entity-relationship-diagram)
8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-case-study-questions)
9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-my-solution)
10 |
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny wanted to expand his new Pizza Empire and intended to Uberize it with Pizza Runner.
14 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house)
15 | and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
16 |
17 | ---
18 | ## 🔐 Entity Relationship Diagram
19 |
20 |
21 |
22 | ---
23 | ## ❓ Case Study Questions
24 | ### A. Pizza Metrics
25 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/A.%20Pizza%20Metrics.md).
26 |
27 | 1. How many pizzas were ordered?
28 | 2. How many unique customer orders were made?
29 | 3. How many successful orders were delivered by each runner?
30 | 4. How many of each type of pizza was delivered?
31 | 5. How many Vegetarian and Meatlovers were ordered by each customer?
32 | 6. What was the maximum number of pizzas delivered in a single order?
33 | 7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
34 | 8. How many pizzas were delivered that had both exclusions and extras?
35 | 9. What was the total volume of pizzas ordered for each hour of the day?
36 | 10. What was the volume of orders for each day of the week?
37 |
38 | ---
39 | ### B. Runner and Customer Experience
40 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/B.%20Runner%20and%20Customer%20Experience.md).
41 |
42 | 1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
43 | 2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
44 | 3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
45 | 4. What was the average distance travelled for each customer?
46 | 5. What was the difference between the longest and shortest delivery times for all orders?
47 | 6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
48 | 7. What is the successful delivery percentage for each runner?
49 |
50 | ---
51 | ### C. Ingredient Optimisation
52 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/C.%20Ingredient%20Optimisation.md).
53 |
54 | 1. What are the standard ingredients for each pizza?
55 | 2. What was the most commonly added extra?
56 | 3. What was the most common exclusion?
57 | 4. Generate an order item for each record in the customers_orders table in the format of one of the following:
58 | * ```Meat Lovers```
59 | * ```Meat Lovers - Exclude Beef```
60 | * ```Meat Lovers - Extra Bacon```
61 | * ```Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers```
62 | 5. Generate an alphabetically ordered comma separated ingredient list for each pizza order from the customer_orders table and add a 2x in front of any relevant ingredients
63 | * For example: ```"Meat Lovers: 2xBacon, Beef, ... , Salami"```
64 | 6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most frequent first?
65 |
66 | ---
67 | ### D. Pricing and Ratings
68 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/D.%20Pricing%20and%20Ratings.md).
69 |
70 | 1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - how much money has Pizza Runner made so far if there are no delivery fees?
71 | 2. What if there was an additional $1 charge for any pizza extras?
72 | * Add cheese is $1 extra
73 | 3. The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, how would you design an additional table for this new dataset - generate a schema for this new table and insert your own data for ratings for each successful customer order between 1 to 5.
74 | 4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
75 | * ```customer_id```
76 | * ```order_id```
77 | * ```runner_id```
78 | * ```rating```
79 | * ```order_time```
80 | * ```pickup_time```
81 | * Time between order and pickup
82 | * Delivery duration
83 | * Average speed
84 | * Total number of pizzas
85 | 5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
86 |
87 | ---
88 | ### E. Bonus questions
89 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/E.%20Bonus%20Questions.md).
90 |
91 | If Danny wants to expand his range of pizzas - how would this impact the existing data design? Write an ```INSERT``` statement to demonstrate what would happen if a new ```Supreme``` pizza with all the toppings was added to the Pizza Runner menu?
92 |
93 | ---
94 | ## 🚀 My Solution
95 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner/Syntax).
96 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution).
97 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/B. Runner and Customer Experience.md:
--------------------------------------------------------------------------------
1 | # 🍕 Case Study #2 - Pizza Runner
2 | ## B. Runner and Customer Experience
3 | ### Q1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
4 | ```TSQL
5 | SELECT
6 | DATEPART(week, registration_date) AS week_period,
7 | COUNT(*) AS runner_count
8 | FROM runners
9 | GROUP BY DATEPART(week, registration_date);
10 | ```
11 | | week_period | runner_count |
12 | |-------------|---------------|
13 | | 1 | 1 |
14 | | 2 | 2 |
15 | | 3 | 1 |
16 |
17 | ---
18 | ### Q2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
19 | ```TSQL
20 | WITH runners_pickup AS (
21 | SELECT
22 | r.runner_id,
23 | c.order_id,
24 | c.order_time,
25 | r.pickup_time,
26 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS pickup_minutes
27 | FROM #customer_orders_temp AS c
28 | JOIN #runner_orders_temp AS r
29 | ON c.order_id = r.order_id
30 | WHERE r.cancellation IS NULL
31 | GROUP BY r.runner_id, c.order_id, c.order_time, r.pickup_time
32 | )
33 |
34 | SELECT
35 | runner_id,
36 | AVG(pickup_minutes) AS average_time
37 | FROM runners_pickup
38 | GROUP BY runner_id;
39 | ```
40 | | runner_id | average_time |
41 | |-----------|---------------|
42 | | 1 | 14 |
43 | | 2 | 20 |
44 | | 3 | 10 |
45 |
46 | ---
47 | ### Q3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
48 | ```TSQL
49 | WITH pizzaPrepration AS (
50 | SELECT
51 | c.order_id,
52 | c.order_time,
53 | r.pickup_time,
54 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS prep_time,
55 | COUNT(c.pizza_id) AS pizza_count
56 | FROM #customer_orders_temp AS c
57 | JOIN #runner_orders_temp AS r
58 | ON c.order_id = r.order_id
59 | WHERE r.cancellation IS NULL
60 | GROUP BY c.order_id, c.order_time, r.pickup_time,
61 | DATEDIFF(MINUTE, c.order_time, r.pickup_time)
62 | )
63 |
64 | SELECT
65 | pizza_count,
66 | AVG(prep_time) AS avg_prep_time
67 | FROM pizzaPrepration
68 | GROUP BY pizza_count;
69 | ```
70 | | pizza_count | avg_prep_time |
71 | |-------------|----------------|
72 | | 1 | 12 |
73 | | 2 | 18 |
74 | | 3 | 30 |
75 |
76 | * More pizzas, longer time to prepare.
77 | * 2 pizzas took 6 minutes more to prepare, 3 pizza took 12 minutes more to prepare.
78 | * On average, it took 6 * (number of pizzas - 1) minutes more to prepare the next pizza.
79 |
80 | ---
81 | ### Q4. What was the average distance travelled for each customer?
82 | ```TSQL
83 | SELECT
84 | c.customer_id,
85 | ROUND(AVG(r.distance), 1) AS average_distance
86 | FROM #customer_orders_temp AS c
87 | JOIN #runner_orders_temp AS r
88 | ON c.order_id = r.order_id
89 | GROUP BY c.customer_id;
90 | ```
91 | | customer_id | average_distance |
92 | |-------------|-------------------|
93 | | 101 | 20 |
94 | | 102 | 16.7 |
95 | | 103 | 23.4 |
96 | | 104 | 10 |
97 | | 105 | 25 |
98 |
99 | ---
100 | ### Q5. What was the difference between the longest and shortest delivery times for all orders?
101 | ```TSQL
102 | SELECT MAX(duration) - MIN(duration) AS time_difference
103 | FROM #runner_orders_temp;
104 | ```
105 | | time_difference|
106 | |----------------|
107 | | 30 |
108 |
109 | ---
110 | ### Q6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
111 | ```TSQL
112 | SELECT
113 | r.runner_id,
114 | c.order_id,
115 | r.distance,
116 | r.duration AS duration_min,
117 | COUNT(c.order_id) AS pizza_count,
118 | ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed
119 | FROM #runner_orders_temp r
120 | JOIN #customer_orders_temp c
121 | ON r.order_id = c.order_id
122 | WHERE r.cancellation IS NULL
123 | GROUP BY r.runner_id, c.order_id, r.distance, r.duration;
124 | ```
125 | | runner_id | order_id | distance | duration_min | pizza_count | avg_speed |
126 | |-----------|----------|----------|--------------|-------------|------------|
127 | | 1 | 1 | 20 | 32 | 1 | 37.5 |
128 | | 1 | 2 | 20 | 27 | 1 | 44.4 |
129 | | 1 | 3 | 13.4 | 20 | 2 | 40.2 |
130 | | 1 | 10 | 10 | 10 | 2 | 60 |
131 | | 2 | 4 | 23.4 | 40 | 3 | 35.1 |
132 | | 2 | 7 | 25 | 25 | 1 | 60 |
133 | | 2 | 8 | 23.4 | 15 | 1 | 93.6 |
134 | | 3 | 5 | 10 | 15 | 1 | 40 |
135 |
136 | * Runner ```1``` had the average speed from 37.5 km/h to 60 km/h
137 | * Runner ```2``` had the average speed from 35.1 km/h to 93.6 km/h. With the same distance (23.4 km), order ```4``` was delivered at 35.1 km/h, while order ```8``` was delivered at 93.6 km/h. There must be something wrong here!
138 | * Runner ```3``` had the average speed at 40 km/h
139 |
140 | ---
141 | ### Q7. What is the successful delivery percentage for each runner?
142 | ```TSQL
143 |
144 | SELECT
145 | runner_id,
146 | COUNT(distance) AS delivered,
147 | COUNT(order_id) AS total,
148 | 100 * COUNT(distance) / COUNT(order_id) AS successful_pct
149 | FROM #runner_orders_temp
150 | GROUP BY runner_id;
151 | ```
152 | | runner_id | delivered | total | successful_pct |
153 | |-----------|-----------|-------|-----------------|
154 | | 1 | 4 | 4 | 100 |
155 | | 2 | 3 | 4 | 75 |
156 | | 3 | 1 | 2 | 50 |
157 |
158 | ---
159 | My solution for **[C. Ingredient Optimisation](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/C.%20Ingredient%20Optimisation.md).**
160 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/D. Pricing and Ratings.md:
--------------------------------------------------------------------------------
1 | # 🍕 Case Study #2 - Pizza Runner
2 | ## D. Pricing and Ratings
3 | ### Q1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - how much money has Pizza Runner made so far if there are no delivery fees?
4 |
5 | ```TSQL
6 | SELECT
7 | SUM(CASE WHEN p.pizza_name = 'Meatlovers' THEN 12
8 | ELSE 10 END) AS money_earned
9 | FROM #customer_orders_temp c
10 | JOIN pizza_names p
11 | ON c.pizza_id = p.pizza_id
12 | JOIN #runner_orders_temp r
13 | ON c.order_id = r.order_id
14 | WHERE r.cancellation IS NULL;
15 | ```
16 | | money_earned |
17 | |---------------|
18 | | 138 |
19 |
20 | ---
21 | ### Q2. What if there was an additional $1 charge for any pizza extras?
22 | * Add cheese is $1 extra
23 | ```TSQL
24 | DECLARE @basecost INT
25 | SET @basecost = 138 -- @basecost = result of the previous question
26 |
27 | SELECT
28 | @basecost + SUM(CASE WHEN p.topping_name = 'Cheese' THEN 2
29 | ELSE 1 END) updated_money
30 | FROM #extrasBreak e
31 | JOIN pizza_toppings p
32 | ON e.extra_id = p.topping_id;
33 | ```
34 | | updated_money |
35 | |----------------|
36 | | 145 |
37 |
38 | ---
39 | ### Q3. The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, how would you design an additional table for this new dataset - generate a schema for this new table and insert your own data for ratings for each successful customer order between 1 to 5.
40 | ```TSQL
41 | DROP TABLE IF EXISTS ratings
42 | CREATE TABLE ratings (
43 | order_id INT,
44 | rating INT);
45 | INSERT INTO ratings (order_id, rating)
46 | VALUES
47 | (1,3),
48 | (2,5),
49 | (3,3),
50 | (4,1),
51 | (5,5),
52 | (7,3),
53 | (8,4),
54 | (10,3);
55 |
56 | SELECT *
57 | FROM ratings;
58 | ```
59 | | order_id | rating |
60 | |----------|---------|
61 | | 1 | 3 |
62 | | 2 | 5 |
63 | | 3 | 3 |
64 | | 4 | 1 |
65 | | 5 | 5 |
66 | | 7 | 3 |
67 | | 8 | 4 |
68 | | 10 | 3 |
69 |
70 | ---
71 | ### Q4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
72 | * ```customer_id```
73 | * ```order_id```
74 | * ```runner_id```
75 | * ```rating```
76 | * ```order_time```
77 | * ```pickup_time```
78 | * Time between order and pickup
79 | * Delivery duration
80 | * Average speed
81 | * Total number of pizzas
82 |
83 | ```TSQL
84 | SELECT
85 | c.customer_id,
86 | c.order_id,
87 | r.runner_id,
88 | c.order_time,
89 | r.pickup_time,
90 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS mins_difference,
91 | r.duration,
92 | ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed,
93 | COUNT(c.order_id) AS pizza_count
94 | FROM #customer_orders_temp c
95 | JOIN #runner_orders_temp r
96 | ON r.order_id = c.order_id
97 | GROUP BY
98 | c.customer_id,
99 | c.order_id,
100 | r.runner_id,
101 | c.order_time,
102 | r.pickup_time,
103 | r.duration;
104 | ```
105 | | customer_id | order_id | runner_id | order_time | pickup_time | mins_difference | duration | avg_speed | pizza_count |
106 | |-------------|----------|-----------|-------------------------|-------------------------|-----------------|----------|-----------|--------------|
107 | | 101 | 1 | 1 | 2020-01-01 18:05:02.000 | 2020-01-01 18:15:34.000 | 10 | 32 | 37.5 | 1 |
108 | | 101 | 2 | 1 | 2020-01-01 19:00:52.000 | 2020-01-01 19:10:54.000 | 10 | 27 | 44.4 | 1 |
109 | | 101 | 6 | 3 | 2020-01-08 21:03:13.000 | NULL | NULL | NULL | NULL | 1 |
110 | | 102 | 3 | 1 | 2020-01-02 23:51:23.000 | 2020-01-03 00:12:37.000 | 21 | 20 | 40.2 | 2 |
111 | | 102 | 8 | 2 | 2020-01-09 23:54:33.000 | 2020-01-10 00:15:02.000 | 21 | 15 | 93.6 | 1 |
112 | | 103 | 4 | 2 | 2020-01-04 13:23:46.000 | 2020-01-04 13:53:03.000 | 30 | 40 | 35.1 | 3 |
113 | | 103 | 9 | 2 | 2020-01-10 11:22:59.000 | NULL | NULL | NULL | NULL | 1 |
114 | | 104 | 5 | 3 | 2020-01-08 21:00:29.000 | 2020-01-08 21:10:57.000 | 10 | 15 | 40 | 1 |
115 | | 104 | 10 | 1 | 2020-01-11 18:34:49.000 | 2020-01-11 18:50:20.000 | 16 | 10 | 60 | 2 |
116 | | 105 | 7 | 2 | 2020-01-08 21:20:29.000 | 2020-01-08 21:30:45.000 | 10 | 25 | 60 | 1 |
117 |
118 | ---
119 | ### Q5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
120 | ```TSQL
121 | DECLARE @basecost INT
122 | SET @basecost = 138
123 |
124 | SELECT
125 | @basecost AS revenue,
126 | SUM(distance)*0.3 AS runner_paid,
127 | @basecost - SUM(distance)*0.3 AS money_left
128 | FROM #runner_orders_temp;
129 | ```
130 | | revenue | runner_paid | money_left |
131 | |---------|-------------|-------------|
132 | | 138 | 43.56 | 94.44 |
133 |
134 | ---
135 | My solution for **[E. Bonus questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/E.%20Bonus%20Questions.md)**.
136 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/E. Bonus Questions.md:
--------------------------------------------------------------------------------
1 | # 🍕 Case Study #2 - Pizza Runner
2 | ## E. Bonus Questions
3 | ### If Danny wants to expand his range of pizzas - how would this impact the existing data design? Write an INSERT statement to demonstrate what would happen if a new Supreme pizza with all the toppings was added to the Pizza Runner menu?
4 |
5 | ```TSQL
6 | INSERT INTO pizza_names (pizza_id, pizza_name)
7 | VALUES (3, 'Supreme');
8 |
9 | ALTER TABLE pizza_recipes
10 | ALTER COLUMN toppings VARCHAR(50);
11 |
12 | INSERT INTO pizza_recipes (pizza_id, toppings)
13 | VALUES (3, '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12');
14 | ```
15 | Notice that I had to update the column ```toppings``` because the Supreme pizza had all the toppings.
16 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/A_data_cleaning.sql:
--------------------------------------------------------------------------------
1 | -----------------------------------
2 | --A. Data Cleaning: Pizza Metrics--
3 | -----------------------------------
4 |
5 | -- Create a new temporary table: #customer_orders_temp
6 |
7 | SELECT
8 | order_id,
9 | customer_id,
10 | pizza_id,
11 | CASE
12 | WHEN exclusions IS NULL OR exclusions LIKE 'null' THEN ''
13 | ELSE exclusions
14 | END AS exclusions,
15 | CASE
16 | WHEN extras IS NULL OR extras LIKE 'null' THEN ''
17 | ELSE extras
18 | END AS extras,
19 | order_time
20 | INTO #customer_orders_temp
21 | FROM customer_orders;
22 |
23 | SELECT *
24 | FROM #customer_orders_temp;
25 |
26 | -- Create a new temporary table: #runner_orders_temp
27 |
28 | SELECT
29 | order_id,
30 | runner_id,
31 | CAST(
32 | CASE WHEN pickup_time LIKE 'null' THEN NULL ELSE pickup_time END
33 | AS DATETIME) AS pickup_time,
34 | CAST(
35 | CASE
36 | WHEN distance LIKE 'null' THEN NULL
37 | WHEN distance LIKE '%km' THEN TRIM('km' FROM distance)
38 | ELSE distance END
39 | AS FLOAT) AS distance,
40 | CAST(
41 | CASE
42 | WHEN duration LIKE 'null' THEN NULL
43 | WHEN duration LIKE '%mins' THEN TRIM('mins' FROM duration)
44 | WHEN duration LIKE '%minute' THEN TRIM('minute' FROM duration)
45 | WHEN duration LIKE '%minutes' THEN TRIM('minutes' FROM duration)
46 | ELSE duration END
47 | AS INT) AS duration,
48 | CASE
49 | WHEN cancellation IN ('null', 'NaN', '') THEN NULL
50 | ELSE cancellation
51 | END AS cancellation
52 | INTO #runner_orders_temp
53 | FROM runner_orders;
54 |
55 | SELECT *
56 | FROM #runner_orders_temp;
57 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | --------------------
2 | --A. Pizza Metrics--
3 | --------------------
4 |
5 | -- 1. How many pizzas were ordered?
6 |
7 | SELECT COUNT(order_id) AS pizza_count
8 | FROM #customer_orders_temp;
9 |
10 |
11 | -- 2. How many unique customer orders were made?
12 |
13 | SELECT COUNT(DISTINCT order_id) AS order_count
14 | FROM #customer_orders_temp;
15 |
16 |
17 | -- 3. How many successful orders were delivered by each runner?
18 |
19 | SELECT
20 | runner_id,
21 | COUNT(order_id) AS successful_orders
22 | FROM #runner_orders_temp
23 | WHERE cancellation IS NULL
24 | GROUP BY runner_id;
25 |
26 |
27 | -- 4. How many of each type of pizza was delivered?
28 |
29 | -- Approach 1:
30 | SELECT
31 | p.pizza_name,
32 | COUNT(*) AS deliver_count
33 | FROM #customer_orders_temp c
34 | JOIN pizza_names p
35 | ON c.pizza_id = p.pizza_id
36 | WHERE c.order_id IN (
37 | SELECT order_id
38 | FROM #runner_orders_temp
39 | WHERE cancellation IS NULL)
40 | GROUP BY p.pizza_name;
41 |
42 | -- Aproach 2:
43 |
44 | SELECT
45 | p.pizza_name,
46 | COUNT(*) AS deliver_count
47 | FROM #customer_orders_temp c
48 | JOIN pizza_names p
49 | ON c.pizza_id = p.pizza_id
50 | JOIN #runner_orders_temp r
51 | ON c.order_id = r.order_id
52 | WHERE r.cancellation IS NULL
53 | GROUP BY p.pizza_name;
54 |
55 |
56 | -- 5. How many Vegetarian and Meatlovers were ordered by each customer?
57 |
58 | SELECT
59 | customer_id,
60 | SUM(CASE WHEN pizza_id = 1 THEN 1 ELSE 0 END) AS Meatlovers,
61 | SUM(CASE WHEN pizza_id = 2 THEN 1 ELSE 0 END) AS Vegetarian
62 | FROM #customer_orders_temp
63 | GROUP BY customer_id;
64 |
65 |
66 | -- 6. What was the maximum number of pizzas delivered in a single order?
67 |
68 | SELECT MAX(pizza_count) AS max_count
69 | FROM (
70 | SELECT
71 | c.order_id,
72 | COUNT(c.pizza_id) AS pizza_count
73 | FROM #customer_orders_temp c
74 | JOIN #runner_orders_temp r
75 | ON c.order_id = r.order_id
76 | WHERE r.cancellation IS NULL
77 | GROUP BY c.order_id
78 | ) tmp;
79 |
80 |
81 | -- 7.For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
82 |
83 | SELECT
84 | c.customer_id,
85 | SUM(CASE WHEN exclusions != '' OR extras != '' THEN 1 ELSE 0 END) AS has_change,
86 | SUM(CASE WHEN exclusions = '' AND extras = '' THEN 1 ELSE 0 END) AS no_change
87 | FROM #customer_orders_temp c
88 | JOIN #runner_orders_temp r
89 | ON c.order_id = r.order_id
90 | WHERE r.cancellation IS NULL
91 | GROUP BY c.customer_id;
92 |
93 |
94 | -- 8. How many pizzas were delivered that had both exclusions and extras?
95 |
96 | SELECT
97 | SUM(CASE WHEN exclusions != '' AND extras != '' THEN 1 ELSE 0 END) AS change_both
98 | FROM #customer_orders_temp c
99 | JOIN #runner_orders_temp r
100 | ON c.order_id = r.order_id
101 | WHERE r.cancellation IS NULL;
102 |
103 |
104 | -- 9.What was the total volume of pizzas ordered for each hour of the day?
105 |
106 | SELECT
107 | DATEPART(HOUR, order_time) AS hour_of_day,
108 | COUNT(order_id) AS pizza_volume
109 | FROM #customer_orders_temp
110 | GROUP BY DATEPART(HOUR, order_time)
111 | ORDER BY hour_of_day;
112 |
113 |
114 | -- 10. What was the volume of orders for each day of the week?
115 |
116 | SELECT
117 | DATENAME(weekday, order_time) AS week_day,
118 | COUNT(order_id) AS order_volume
119 | FROM #customer_orders_temp
120 | GROUP BY DATENAME(weekday, order_time);
121 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | -------------------------------------
2 | --B. Runner and Customer Experience--
3 | -------------------------------------
4 |
5 | --1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
6 |
7 | SELECT
8 | DATEPART(week, registration_date) AS week_period,
9 | COUNT(*) AS runner_count
10 | FROM runners
11 | GROUP BY DATEPART(week, registration_date);
12 |
13 |
14 | -- 2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
15 |
16 | WITH runners_pickup AS (
17 | SELECT
18 | r.runner_id,
19 | c.order_id,
20 | c.order_time,
21 | r.pickup_time,
22 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS pickup_minutes
23 | FROM #customer_orders_temp AS c
24 | JOIN #runner_orders_temp AS r
25 | ON c.order_id = r.order_id
26 | WHERE r.cancellation IS NULL
27 | GROUP BY r.runner_id, c.order_id, c.order_time, r.pickup_time
28 | )
29 |
30 | SELECT
31 | runner_id,
32 | AVG(pickup_minutes) AS average_time
33 | FROM runners_pickup
34 | GROUP BY runner_id;
35 |
36 |
37 | -- 3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
38 |
39 | WITH pizzaPrepration AS (
40 | SELECT
41 | c.order_id,
42 | c.order_time,
43 | r.pickup_time,
44 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS prep_time,
45 | COUNT(c.pizza_id) AS pizza_count
46 | FROM #customer_orders_temp AS c
47 | JOIN #runner_orders_temp AS r
48 | ON c.order_id = r.order_id
49 | WHERE r.cancellation IS NULL
50 | GROUP BY c.order_id, c.order_time, r.pickup_time,
51 | DATEDIFF(MINUTE, c.order_time, r.pickup_time)
52 | )
53 |
54 | SELECT
55 | pizza_count,
56 | AVG(prep_time) AS avg_prep_time
57 | FROM pizzaPrepration
58 | GROUP BY pizza_count;
59 |
60 |
61 | -- 4. What was the average distance travelled for each customer?
62 |
63 | SELECT
64 | c.customer_id,
65 | ROUND(AVG(r.distance), 1) AS average_distance
66 | FROM #customer_orders_temp AS c
67 | JOIN #runner_orders_temp AS r
68 | ON c.order_id = r.order_id
69 | GROUP BY c.customer_id;
70 |
71 |
72 | -- 5. What was the difference between the longest and shortest delivery times for all orders?
73 |
74 | SELECT MAX(duration) - MIN(duration) AS difference
75 | FROM #runner_orders_temp;
76 |
77 |
78 | -- 6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
79 |
80 | SELECT
81 | r.runner_id,
82 | c.order_id,
83 | r.distance,
84 | r.duration AS duration_min,
85 | COUNT(c.order_id) AS pizza_count,
86 | ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed
87 | FROM #runner_orders_temp r
88 | JOIN #customer_orders_temp c
89 | ON r.order_id = c.order_id
90 | WHERE r.cancellation IS NULL
91 | GROUP BY r.runner_id, c.order_id, r.distance, r.duration;
92 |
93 |
94 | -- 7. What is the successful delivery percentage for each runner?
95 |
96 | SELECT
97 | runner_id,
98 | COUNT(distance) AS delivered,
99 | COUNT(order_id) AS total,
100 | 100 * COUNT(distance) / COUNT(order_id) AS successful_pct
101 | FROM #runner_orders_temp
102 | GROUP BY runner_id;
103 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/C_data_cleaning.sql:
--------------------------------------------------------------------------------
1 | ---------------------------------------------
2 | --C. Data Cleaning: Ingredient Optimisation--
3 | ---------------------------------------------
4 |
5 | -- 1. Create a new temporary table to separate [toppings] into multiple rows: #toppingsBreak
6 |
7 | SELECT
8 | pr.pizza_id,
9 | TRIM(value) AS topping_id,
10 | pt.topping_name
11 | INTO #toppingsBreak
12 | FROM pizza_recipes pr
13 | CROSS APPLY STRING_SPLIT(toppings, ',') AS t
14 | JOIN pizza_toppings pt
15 | ON TRIM(t.value) = pt.topping_id;
16 |
17 | SELECT *
18 | FROM #toppingsBreak;
19 |
20 |
21 | -- 2. Add a new column [record_id] to select each ordered pizza more easily
22 |
23 | ALTER TABLE #customer_orders_temp
24 | ADD record_id INT IDENTITY(1,1);
25 |
26 | SELECT *
27 | FROM #customer_orders_temp;
28 |
29 |
30 | -- 3. Create a new temporary table to separate [extras] into multiple rows: #extrasBreak
31 | SELECT
32 | c.record_id,
33 | TRIM(e.value) AS extra_id
34 | INTO #extrasBreak
35 | FROM #customer_orders_temp c
36 | CROSS APPLY STRING_SPLIT(extras, ',') AS e;
37 |
38 | SELECT *
39 | FROM #extrasBreak;
40 |
41 | -- 4. Create a new temporary table to separate [exclusions] into multiple rows: #exclusionsBreak
42 |
43 | SELECT
44 | c.record_id,
45 | TRIM(e.value) AS exclusion_id
46 | INTO #exclusionsBreak
47 | FROM #customer_orders_temp c
48 | CROSS APPLY STRING_SPLIT(exclusions, ',') AS e;
49 |
50 | SELECT *
51 | FROM #exclusionsBreak;
52 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | ------------------------------
2 | --C. Ingredient Optimisation--
3 | ------------------------------
4 |
5 | -- 1. What are the standard ingredients for each pizza?
6 |
7 | SELECT
8 | p.pizza_name,
9 | STRING_AGG(t.topping_name, ', ') AS ingredients
10 | FROM #toppingsBreak t
11 | JOIN pizza_names p
12 | ON t.pizza_id = p.pizza_id
13 | GROUP BY p.pizza_name
14 |
15 |
16 | --2. What was the most commonly added extra?
17 |
18 | SELECT
19 | p.topping_name,
20 | COUNT(*) AS extra_count
21 | FROM #extrasBreak e
22 | JOIN pizza_toppings p
23 | ON e.extra_id = p.topping_id
24 | GROUP BY p.topping_name
25 | ORDER BY COUNT(*) DESC;
26 |
27 |
28 | --3. What was the most common exclusion?
29 |
30 | SELECT
31 | p.topping_name,
32 | COUNT(*) AS exclusion_count
33 | FROM #exclusionsBreak e
34 | JOIN pizza_toppings p
35 | ON e.exclusion_id = p.topping_id
36 | GROUP BY p.topping_name
37 | ORDER BY COUNT(*) DESC;
38 |
39 |
40 | --4. Generate an order item for each record in the customers_orders table in the format of one of the following:
41 | --Meat Lovers
42 | --Meat Lovers - Exclude Beef
43 | --Meat Lovers - Extra Bacon
44 | --Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers
45 |
46 | WITH cteExtras AS (
47 | SELECT
48 | e.record_id,
49 | 'Extra ' + STRING_AGG(t.topping_name, ', ') AS record_options
50 | FROM #extrasBreak e
51 | JOIN pizza_toppings t
52 | ON e.extra_id = t.topping_id
53 | GROUP BY e.record_id
54 | ),
55 | cteExclusions AS (
56 | SELECT
57 | e.record_id,
58 | 'Exclusion ' + STRING_AGG(t.topping_name, ', ') AS record_options
59 | FROM #exclusionsBreak e
60 | JOIN pizza_toppings t
61 | ON e.exclusion_id = t.topping_id
62 | GROUP BY e.record_id
63 | ),
64 | cteUnion AS (
65 | SELECT * FROM cteExtras
66 | UNION
67 | SELECT * FROM cteExclusions
68 | )
69 |
70 | SELECT
71 | c.record_id,
72 | c.order_id,
73 | c.customer_id,
74 | c.pizza_id,
75 | c.order_time,
76 | CONCAT_WS(' - ', p.pizza_name, STRING_AGG(u.record_options, ' - ')) AS pizza_info
77 | FROM #customer_orders_temp c
78 | LEFT JOIN cteUnion u
79 | ON c.record_id = u.record_id
80 | JOIN pizza_names p
81 | ON c.pizza_id = p.pizza_id
82 | GROUP BY
83 | c.record_id,
84 | c.order_id,
85 | c.customer_id,
86 | c.pizza_id,
87 | c.order_time,
88 | p.pizza_name
89 | ORDER BY record_id;
90 |
91 |
92 | --5. Generate an alphabetically ordered comma separated ingredient list for each pizza order
93 | --from the customer_orders table and add a 2x in front of any relevant ingredients
94 | --For example: "Meat Lovers: 2xBacon, Beef, ... , Salami"
95 |
96 |
97 | -- Create a CTE: Each line displays an ingredient for an ordered pizza (add 2x for extras and remove exclusions as well)
98 | WITH ingredients AS (
99 | SELECT
100 | c.*,
101 | p.pizza_name,
102 |
103 | -- Add a 2x in front of topping_names if their topping_id appear in the #extrasBreak table
104 | CASE WHEN t.topping_id IN (
105 | SELECT extra_id
106 | FROM #extrasBreak e
107 | WHERE e.record_id = c.record_id)
108 | THEN '2x' + t.topping_name
109 | ELSE t.topping_name
110 | END AS topping
111 |
112 | FROM #customer_orders_temp c
113 | JOIN #toppingsBreak t
114 | ON t.pizza_id = c.pizza_id
115 | JOIN pizza_names p
116 | ON p.pizza_id = c.pizza_id
117 |
118 | -- Exclude toppings if their topping_id appear in the #exclusionBreak table
119 | WHERE t.topping_id NOT IN (
120 | SELECT exclusion_id
121 | FROM #exclusionsBreak e
122 | WHERE c.record_id = e.record_id)
123 | )
124 |
125 | SELECT
126 | record_id,
127 | order_id,
128 | customer_id,
129 | pizza_id,
130 | order_time,
131 | CONCAT(pizza_name + ': ', STRING_AGG(topping, ', ')) AS ingredients_list
132 | FROM ingredients
133 | GROUP BY
134 | record_id,
135 | record_id,
136 | order_id,
137 | customer_id,
138 | pizza_id,
139 | order_time,
140 | pizza_name
141 | ORDER BY record_id;
142 |
143 |
144 | --6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most frequent first?
145 |
146 | WITH frequentIngredients AS (
147 | SELECT
148 | c.record_id,
149 | t.topping_name,
150 | CASE
151 | -- if extra ingredient, add 2
152 | WHEN t.topping_id IN (
153 | SELECT extra_id
154 | FROM #extrasBreak e
155 | WHERE e.record_id = c.record_id)
156 | THEN 2
157 | -- if excluded ingredient, add 0
158 | WHEN t.topping_id IN (
159 | SELECT exclusion_id
160 | FROM #exclusionsBreak e
161 | WHERE c.record_id = e.record_id)
162 | THEN 0
163 | -- no extras, no exclusion, add 1
164 | ELSE 1
165 | END AS times_used
166 | FROM #customer_orders_temp c
167 | JOIN #toppingsBreak t
168 | ON t.pizza_id = c.pizza_id
169 | JOIN pizza_names p
170 | ON p.pizza_id = c.pizza_id
171 | )
172 |
173 | SELECT
174 | topping_name,
175 | SUM(times_used) AS times_used
176 | FROM frequentIngredients
177 | GROUP BY topping_name
178 | ORDER BY times_used DESC;
179 |
180 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/D_query.sql:
--------------------------------------------------------------------------------
1 | --------------------------
2 | --D. Pricing and Ratings--
3 | --------------------------
4 |
5 | --1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes -
6 | --how much money has Pizza Runner made so far if there are no delivery fees?
7 |
8 | SELECT
9 | SUM(CASE WHEN p.pizza_name = 'Meatlovers' THEN 12
10 | ELSE 10 END) AS money_earned
11 | FROM #customer_orders_temp c
12 | JOIN pizza_names p
13 | ON c.pizza_id = p.pizza_id
14 | JOIN #runner_orders_temp r
15 | ON c.order_id = r.order_id
16 | WHERE r.cancellation IS NULL;
17 |
18 |
19 | -- 2.What if there was an additional $1 charge for any pizza extras?
20 | -- Add cheese is $1 extra
21 |
22 | DECLARE @basecost INT
23 | SET @basecost = 138 -- @basecost = result of the previous question
24 |
25 | SELECT
26 | @basecost + SUM(CASE WHEN p.topping_name = 'Cheese' THEN 2
27 | ELSE 1 END) updated_money
28 | FROM #extrasBreak e
29 | JOIN pizza_toppings p
30 | ON e.extra_id = p.topping_id;
31 |
32 |
33 | --3. --The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner,
34 | --how would you design an additional table for this new dataset - generate a schema for this new table and
35 | --insert your own data for ratings for each successful customer order between 1 to 5.
36 |
37 | DROP TABLE IF EXISTS ratings
38 | CREATE TABLE ratings (
39 | order_id INT,
40 | rating INT);
41 | INSERT INTO ratings (order_id, rating)
42 | VALUES
43 | (1,3),
44 | (2,5),
45 | (3,3),
46 | (4,1),
47 | (5,5),
48 | (7,3),
49 | (8,4),
50 | (10,3);
51 |
52 |
53 | -- 4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
54 | -- customer_id, order_id, runner_id, rating, order_time, pickup_time, Time between order and pickup, Delivery duration, Average speed, Total number of pizzas
55 |
56 | SELECT
57 | c.customer_id,
58 | c.order_id,
59 | r.runner_id,
60 | c.order_time,
61 | r.pickup_time,
62 | DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS mins_difference,
63 | r.duration,
64 | ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed,
65 | COUNT(c.order_id) AS pizza_count
66 | FROM #customer_orders_temp c
67 | JOIN #runner_orders_temp r
68 | ON r.order_id = c.order_id
69 | GROUP BY
70 | c.customer_id,
71 | c.order_id,
72 | r.runner_id,
73 | c.order_time,
74 | r.pickup_time,
75 | r.duration;
76 |
77 |
78 | --5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras
79 | --and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
80 |
81 | DECLARE @basecost INT
82 | SET @basecost = 138
83 |
84 | SELECT
85 | @basecost AS revenue,
86 | SUM(distance)*0.3 AS runner_pay,
87 | @basecost - SUM(distance)*0.3 AS money_left
88 | from #runner_orders_temp;
89 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/E_query.sql:
--------------------------------------------------------------------------------
1 | -------------------
2 | --Bonus Questions--
3 | -------------------
4 |
5 | --If Danny wants to expand his range of pizzas - how would this impact the existing data design?
6 | --Write an INSERT statement to demonstrate what would happen if a new Supreme pizza with
7 | --all the toppings was added to the Pizza Runner menu?
8 |
9 | INSERT INTO pizza_names (pizza_id, pizza_name)
10 | VALUES (3, 'Supreme');
11 |
12 | ALTER TABLE pizza_recipes
13 | ALTER COLUMN toppings VARCHAR(50);
14 |
15 | INSERT INTO pizza_recipes (pizza_id, toppings)
16 | VALUES (3, '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12');
17 |
--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/database.sql:
--------------------------------------------------------------------------------
1 | -------------------------------
2 | --CASE STUDY #2: PIZZA RUNNER--
3 | -------------------------------
4 | --Author: Anh Nguyen
5 | --Date: 06/11/2022
6 | --Tool used: SQL Server
7 |
8 |
9 | /*
10 | Note:
11 | 1. In table [customer_order], change the datatype of column [order_time] from TIMESTAMP to DATETIME.
12 | Otherwise, we can't insert new values to table [customer_order].
13 |
14 | 2. In table [pizza_names], [pizza_recipes], and [pizza_toppings], change datatype from TEXT to VARCHAR.
15 | Otherwise, we will face the error in Q4.
16 | */
17 |
18 |
19 | CREATE DATABASE pizza_runner;
20 |
21 | DROP TABLE IF EXISTS pizza_runner.dbo.runners;
22 | CREATE TABLE pizza_runner.dbo.runners (
23 | "runner_id" INTEGER,
24 | "registration_date" DATE
25 | );
26 | INSERT INTO pizza_runner.dbo.runners
27 | ("runner_id", "registration_date")
28 | VALUES
29 | (1, '2021-01-01'),
30 | (2, '2021-01-03'),
31 | (3, '2021-01-08'),
32 | (4, '2021-01-15');
33 |
34 |
35 | DROP TABLE IF EXISTS pizza_runner.dbo.customer_orders;
36 | CREATE TABLE pizza_runner.dbo.customer_orders (
37 | "order_id" INTEGER,
38 | "customer_id" INTEGER,
39 | "pizza_id" INTEGER,
40 | "exclusions" VARCHAR(4),
41 | "extras" VARCHAR(4),
42 | "order_time" DATETIME
43 | );
44 |
45 | INSERT INTO pizza_runner.dbo.customer_orders
46 | ("order_id", "customer_id", "pizza_id", "exclusions", "extras", "order_time")
47 | VALUES
48 | ('1', '101', '1', '', '', '2020-01-01 18:05:02'),
49 | ('2', '101', '1', '', '', '2020-01-01 19:00:52'),
50 | ('3', '102', '1', '', '', '2020-01-02 23:51:23'),
51 | ('3', '102', '2', '', NULL, '2020-01-02 23:51:23'),
52 | ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
53 | ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
54 | ('4', '103', '2', '4', '', '2020-01-04 13:23:46'),
55 | ('5', '104', '1', 'null', '1', '2020-01-08 21:00:29'),
56 | ('6', '101', '2', 'null', 'null', '2020-01-08 21:03:13'),
57 | ('7', '105', '2', 'null', '1', '2020-01-08 21:20:29'),
58 | ('8', '102', '1', 'null', 'null', '2020-01-09 23:54:33'),
59 | ('9', '103', '1', '4', '1, 5', '2020-01-10 11:22:59'),
60 | ('10', '104', '1', 'null', 'null', '2020-01-11 18:34:49'),
61 | ('10', '104', '1', '2, 6', '1, 4', '2020-01-11 18:34:49');
62 |
63 |
64 | DROP TABLE IF EXISTS pizza_runner.dbo.runner_orders;
65 | CREATE TABLE pizza_runner.dbo.runner_orders (
66 | "order_id" INTEGER,
67 | "runner_id" INTEGER,
68 | "pickup_time" VARCHAR(19),
69 | "distance" VARCHAR(7),
70 | "duration" VARCHAR(10),
71 | "cancellation" VARCHAR(23)
72 | );
73 |
74 | INSERT INTO pizza_runner.dbo.runner_orders
75 | ("order_id", "runner_id", "pickup_time", "distance", "duration", "cancellation")
76 | VALUES
77 | ('1', '1', '2020-01-01 18:15:34', '20km', '32 minutes', ''),
78 | ('2', '1', '2020-01-01 19:10:54', '20km', '27 minutes', ''),
79 | ('3', '1', '2020-01-03 00:12:37', '13.4km', '20 mins', NULL),
80 | ('4', '2', '2020-01-04 13:53:03', '23.4', '40', NULL),
81 | ('5', '3', '2020-01-08 21:10:57', '10', '15', NULL),
82 | ('6', '3', 'null', 'null', 'null', 'Restaurant Cancellation'),
83 | ('7', '2', '2020-01-08 21:30:45', '25km', '25mins', 'null'),
84 | ('8', '2', '2020-01-10 00:15:02', '23.4 km', '15 minute', 'null'),
85 | ('9', '2', 'null', 'null', 'null', 'Customer Cancellation'),
86 | ('10', '1', '2020-01-11 18:50:20', '10km', '10minutes', 'null');
87 |
88 |
89 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_names;
90 | CREATE TABLE pizza_runner.dbo.pizza_names (
91 | "pizza_id" INTEGER,
92 | "pizza_name" VARCHAR(10)
93 | );
94 | INSERT INTO pizza_runner.dbo.pizza_names
95 | ("pizza_id", "pizza_name")
96 | VALUES
97 | (1, 'Meatlovers'),
98 | (2, 'Vegetarian');
99 |
100 |
101 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_recipes;
102 | CREATE TABLE pizza_runner.dbo.pizza_recipes (
103 | "pizza_id" INTEGER,
104 | "toppings" VARCHAR(30)
105 | );
106 | INSERT INTO pizza_runner.dbo.pizza_recipes
107 | ("pizza_id", "toppings")
108 | VALUES
109 | (1, '1, 2, 3, 4, 5, 6, 8, 10'),
110 | (2, '4, 6, 7, 9, 11, 12');
111 |
112 |
113 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_toppings;
114 | CREATE TABLE pizza_runner.dbo.pizza_toppings (
115 | "topping_id" INTEGER,
116 | "topping_name" VARCHAR(20)
117 | );
118 | INSERT INTO pizza_runner.dbo.pizza_toppings
119 | ("topping_id", "topping_name")
120 | VALUES
121 | (1, 'Bacon'),
122 | (2, 'BBQ Sauce'),
123 | (3, 'Beef'),
124 | (4, 'Cheese'),
125 | (5, 'Chicken'),
126 | (6, 'Mushrooms'),
127 | (7, 'Onions'),
128 | (8, 'Pepperoni'),
129 | (9, 'Peppers'),
130 | (10, 'Salami'),
131 | (11, 'Tomatoes'),
132 | (12, 'Tomato Sauce');
133 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/README.md:
--------------------------------------------------------------------------------
1 | # 🥑 Case Study #3 - Foodie-Fi
2 |
3 |
4 |
5 | ## 📕 Table of Contents
6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#%EF%B8%8F-bussiness-task)
7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-entity-relationship-diagram)
8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-case-study-questions)
9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-my-solution)
10 |
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny and his friends launched a new startup called "Food-Fi" in 2020 and started selling monthly and annual subscriptions,
14 | giving their customers unlimited on-demand access to exclusive food videos from around the world.
15 |
16 | This case study focuses
17 | on using subscription style digital data to answer some important questions such as business performance, payments, and customer journey.
18 |
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 |
22 |
23 |
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Customer Journey
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/A.%20Customer%20Journey.md).
28 | * Based off the 8 sample customers provided in the sample from the subscriptions table, write a brief description about each customer’s onboarding journey.
29 |
30 | ---
31 | ### B. Data Analysis Questions
32 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/B.%20Data%20Analysis%20Questions.md).
33 |
34 | 1. How many customers has Foodie-Fi ever had?
35 | 2. What is the monthly distribution of trial plan start_date values for our dataset - use the start of the month as the group by value
36 | 3. What plan start_date values occur after the year 2020 for our dataset? Show the breakdown by count of events for each plan_name
37 | 4. What is the customer count and percentage of customers who have churned rounded to 1 decimal place?
38 | 5. How many customers have churned straight after their initial free trial - what percentage is this rounded to the nearest whole number?
39 | 6. What is the number and percentage of customer plans after their initial free trial?
40 | 7. What is the customer count and percentage breakdown of all 5 plan_name values at 2020-12-31?
41 | 8. How many customers have upgraded to an annual plan in 2020?
42 | 9. How many days on average does it take for a customer to an annual plan from the day they join Foodie-Fi?
43 | 10. Can you further breakdown this average value into 30 day periods (i.e. 0-30 days, 31-60 days etc)
44 | 11. How many customers downgraded from a pro monthly to a basic monthly plan in 2020?
45 |
46 | ---
47 | ### C. Challenge Payment Question
48 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/C.%20Challenge%20Payment%20Question.md).
49 |
50 | The Foodie-Fi team wants to create a new payments table for the year 2020 that includes amounts paid by each customer in the subscriptions table with the following requirements:
51 | * monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
52 | * upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
53 | * upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
54 | * once a customer churns they will no longer make payments
55 |
56 | ---
57 | ### D. Outside The Box Questions
58 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/D.%20Outside%20The%20Box%20Questions.md).
59 |
60 | 1. How would you calculate the rate of growth for Foodie-Fi?
61 | 2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
62 | 3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
63 | 4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
64 | 5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
65 |
66 | ---
67 | ## 🚀 My Solution
68 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi/Syntax).
69 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution).
70 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/A. Customer Journey.md:
--------------------------------------------------------------------------------
1 | # 🥑 Case Study #3 - Foodie-Fi
2 | ## A. Customer Journey
3 | Based off the 8 sample customers provided in the sample from the subscriptions table, write a brief description about each customer’s onboarding journey.
4 | ```TSQL
5 | SELECT
6 | s.*,
7 | p.plan_name,
8 | p.price
9 | FROM subscriptions s
10 | JOIN plans p ON s.plan_id = p.plan_id
11 | WHERE customer_id IN (1, 2, 11, 13, 15, 16, 18, 19);
12 | ```
13 | | customer_id | plan_id | start_date | plan_name | price |
14 | |-------------|---------|------------|---------------|---------|
15 | | 1 | 0 | 2020-08-01 | trial | 0.00 |
16 | | 1 | 1 | 2020-08-08 | basic monthly | 9.90 |
17 | | 2 | 0 | 2020-09-20 | trial | 0.00 |
18 | | 2 | 3 | 2020-09-27 | pro annual | 199.00 |
19 | | 11 | 0 | 2020-11-19 | trial | 0.00 |
20 | | 11 | 4 | 2020-11-26 | churn | NULL |
21 | | 13 | 0 | 2020-12-15 | trial | 0.00 |
22 | | 13 | 1 | 2020-12-22 | basic monthly | 9.90 |
23 | | 13 | 2 | 2021-03-29 | pro monthly | 19.90 |
24 | | 15 | 0 | 2020-03-17 | trial | 0.00 |
25 | | 15 | 2 | 2020-03-24 | pro monthly | 19.90 |
26 | | 15 | 4 | 2020-04-29 | churn | NULL |
27 | | 16 | 0 | 2020-05-31 | trial | 0.00 |
28 | | 16 | 1 | 2020-06-07 | basic monthly | 9.90 |
29 | | 16 | 3 | 2020-10-21 | pro annual | 199.00 |
30 | | 18 | 0 | 2020-07-06 | trial | 0.00 |
31 | | 18 | 2 | 2020-07-13 | pro monthly | 19.90 |
32 | | 19 | 0 | 2020-06-22 | trial | 0.00 |
33 | | 19 | 2 | 2020-06-29 | pro monthly | 19.90 |
34 |
35 | Customer 1 signed up to 7-day free trial on 01/08/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 08/08/2020.
36 |
37 | Customer 2 signed up to 7-day free trial on 20/09/2020. After that time, he/she upgraded to pro annual plan on 27/09/2020.
38 |
39 | Customer 11 signed up to 7-day free trial on 19/11/2020. After that time, he/she cancelled the subsciption on 26/11/2020.
40 |
41 | Customer 13 signed up to 7-day free trial on 15/12/2020. After that time, he/she didn't cancelled the subsciption, so the system automatically upgraded it to basic monthly plan on 22/12/2020. He/she continued using that plan for 2 months. On 29/03/2020 (still in the 3rd month using basic monthly plan), he/she upgraded to pro monthly plan.
42 |
43 | Customer 15 signed up to 7-day free trial on 17/03/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it basic monthly plan on 24/03/2020. He/she then cancelled that plan after 5 days (29/03/2020). He/she was able to use the basic monthly plan until 24/04/2020.
44 |
45 | Customer 16 signed up to 7-day free trial on 31/05/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 07/06/2020. He/she continued using that plan for 4 months. On 21/10/2020 (still in the 4th month using basic monthly plan), he/she upgraded to pro annual plan.
46 |
47 | Customer 18 signed up to 7-day free trial on 06/07/2020. After the trial time, he/she upgraded the subscription to pro monthly plan on 13/07/2020.
48 |
49 | Customer 19 signed up to 7-day free trial on 22/06/2020. After that time, he/she upgraded the subscription to pro monthly plan on 29/06/2020. After 2 months using that plan, he/she upgraded to pro annual plan on 29/08/2020.
50 |
51 | ---
52 | My solution for **[B. Data Analysis Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/B.%20Data%20Analysis%20Questions.md)**.
53 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/C. Challenge Payment Question.md:
--------------------------------------------------------------------------------
1 | # 🥑 Case Study #3 - Foodie-Fi
2 | ## C. Challenge Payment Question
3 | The Foodie-Fi team wants to create a new payments table for the year 2020 that includes amounts paid by each customer in the subscriptions table with the following requirements:
4 | * monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
5 | * upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
6 | * upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
7 | * once a customer churns they will no longer make payments
8 |
9 | Example outputs for this table might look like the following:
10 |
11 | | customer_id | plan_id | plan_name | payment_date | amount | payment_order |
12 | |-------------|---------|---------------|--------------|--------|----------------|
13 | | 1 | 1 | basic monthly | 2020-08-08 | 9.90 | 1 |
14 | | 1 | 1 | basic monthly | 2020-09-08 | 9.90 | 2 |
15 | | 1 | 1 | basic monthly | 2020-10-08 | 9.90 | 3 |
16 | | 1 | 1 | basic monthly | 2020-11-08 | 9.90 | 4 |
17 | | 1 | 1 | basic monthly | 2020-12-08 | 9.90 | 5 |
18 | | 2 | 3 | pro annual | 2020-09-27 | 199.00 | 1 |
19 | | 13 | 1 | basic monthly | 2020-12-22 | 9.90 | 1 |
20 | | 15 | 2 | pro monthly | 2020-03-24 | 19.90 | 1 |
21 | | 15 | 2 | pro monthly | 2020-04-24 | 19.90 | 2 |
22 | | 16 | 1 | basic monthly | 2020-06-07 | 9.90 | 1 |
23 | | 16 | 1 | basic monthly | 2020-07-07 | 9.90 | 2 |
24 | | 16 | 1 | basic monthly | 2020-08-07 | 9.90 | 3 |
25 | | 16 | 1 | basic monthly | 2020-09-07 | 9.90 | 4 |
26 | | 16 | 1 | basic monthly | 2020-10-07 | 9.90 | 5 |
27 | | 16 | 3 | pro annual | 2020-10-21 | 189.10 | 6 |
28 | | 18 | 2 | pro monthly | 2020-07-13 | 19.90 | 1 |
29 | | 18 | 2 | pro monthly | 2020-08-13 | 19.90 | 2 |
30 | | 18 | 2 | pro monthly | 2020-09-13 | 19.90 | 3 |
31 | | 18 | 2 | pro monthly | 2020-10-13 | 19.90 | 4 |
32 |
33 | ---
34 | * Use a recursive CTE to increment rows for all monthly paid plans in 2020 until customers changing their plans, except 'pro annual'.
35 | * use ```CASE``` to create a new column ```last_date```: last day of the current plan
36 | * if a customer kept using the current plan, last_date = '2020-12-31'
37 | * if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
38 | * Create a new table ```payments``` by selecting the required columns
39 |
40 | ```TSQL
41 | --Use a recursive CTE to increment rows for all monthly paid plans until customers changing the plan, except 'pro annual'
42 | WITH dateRecursion AS (
43 | SELECT
44 | s.customer_id,
45 | s.plan_id,
46 | p.plan_name,
47 | s.start_date AS payment_date,
48 | --column last_date: last day of the current plan
49 | CASE
50 | --if a customer kept using the current plan, last_date = '2020-12-31'
51 | WHEN LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) IS NULL THEN '2020-12-31'
52 | --if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
53 | ELSE DATEADD(MONTH,
54 | DATEDIFF(MONTH, start_date, LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date)),
55 | start_date) END AS last_date,
56 | p.price AS amount
57 | FROM subscriptions s
58 | JOIN plans p ON s.plan_id = p.plan_id
59 | --exclude trials because they didn't generate payments
60 | WHERE p.plan_name NOT IN ('trial')
61 | AND YEAR(start_date) = 2020
62 |
63 | UNION ALL
64 |
65 | SELECT
66 | customer_id,
67 | plan_id,
68 | plan_name,
69 | --increment payment_date by monthly
70 | DATEADD(MONTH, 1, payment_date) AS payment_date,
71 | last_date,
72 | amount
73 | FROM dateRecursion
74 | --stop incrementing when payment_date = last_date
75 | WHERE DATEADD(MONTH, 1, payment_date) <= last_date
76 | AND plan_name != 'pro annual'
77 | )
78 | --Create a new table [payments]
79 | SELECT
80 | customer_id,
81 | plan_id,
82 | plan_name,
83 | payment_date,
84 | amount,
85 | ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY payment_date) AS payment_order
86 | INTO payments
87 | FROM dateRecursion
88 | --exclude churns
89 | WHERE amount IS NOT NULL
90 | ORDER BY customer_id
91 | OPTION (MAXRECURSION 365);
92 | ```
93 | ---
94 | My solution for **[D. Outside The Box Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/D.%20Outside%20The%20Box%20Questions.md)**.
95 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/D. Outside The Box Questions.md:
--------------------------------------------------------------------------------
1 | # 🥑 Case Study #3 - Foodie-Fi
2 | ## D. Outside The Box Questions
3 | ### 1. How would you calculate the rate of growth for Foodie-Fi?
4 | - I choose the year of 2020 to analyze because I already created the ```payments``` table in part C.
5 | - If you want to incorporate the data in 2021 to see the whole picture (quarterly, 2020-2021 comparison, etc.),
6 | create a new ```payments``` table and change all the date conditions in part C to '2021-12-31'
7 |
8 | ```TSQL
9 | WITH monthlyRevenue AS (
10 | SELECT
11 | MONTH(payment_date) AS months,
12 | SUM(amount) AS revenue
13 | FROM payments
14 | GROUP BY MONTH(payment_date)
15 | )
16 |
17 | SELECT
18 | months,
19 | revenue,
20 | (revenue-LAG(revenue) OVER(ORDER BY months))/revenue AS revenue_growth
21 | FROM monthlyRevenue;
22 | ```
23 | | months | revenue | revenue_growth |
24 | |--------|----------|-----------------|
25 | | 1 | 1282.00 | NULL |
26 | | 2 | 2792.60 | 0.540929 |
27 | | 3 | 4342.40 | 0.356899 |
28 | | 4 | 5972.70 | 0.272958 |
29 | | 5 | 7324.10 | 0.184514 |
30 | | 6 | 8765.50 | 0.164440 |
31 | | 7 | 10207.50 | 0.141268 |
32 | | 8 | 12047.40 | 0.152721 |
33 | | 9 | 12913.20 | 0.067047 |
34 | | 10 | 14952.50 | 0.136385 |
35 | | 11 | 12862.70 | -0.162469 |
36 | | 12 | 13429.50 | 0.042205 |
37 |
38 | ### 2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
39 | - Monthly revenue growth: How does Foodie-Fi's revenue increase or decrease by monthly? Are there any months that the number of customers increasing significantly?
40 | - Customers growth: How many customers increase by monthly? How does the rate look like (x1.5, x2,... after each month)?
41 | - Conversion rate: How many customers keep using Foodie-Fi after trial? How does the rate look like (x1.5, x2,...after each month)?
42 | - Churn rate: How many customers cancel the subscription by monthly? What plan they has used?
43 |
44 | ### 3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
45 | - Customers who downgraded their plan
46 | - Customers who upgraded from basic monthly to pro monthly or pro annual
47 | - Customers who cancelled the subscription
48 |
49 | ### 4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
50 | - What is the primary reason for the cancellation?
51 | + Price
52 | + Techinical issues
53 | + Customer support
54 | + Found an alternative
55 | + Others (please specify)
56 | - Overall, how satisfied were you with the subscription? (Likert scale: Very Satisfied - Very Unsatisfied)
57 | - Would you consider using our services in the future? (Likert scale: Very Satisfied - Very Unsatisfied)
58 | - Would you recommend our company to a colleague, friend or family member? (Likert scale: Very Satisfied - Very Unsatisfied)
59 |
60 | ### 5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
61 | - From the exit survey, look for the most common reasons why customers cancelled the subscription
62 | + Price: increase the number of discounts in some seasons of a year, extend the trial time, or add more benefits to customers
63 | + Service quality: work with the relevant department to fix the issue
64 | + Found an alternative: do some competitor analysis to see their competitive advantages over us
65 | - To validate the effectiveness of those ideas, check:
66 | + Churn rate
67 | + Conversion rate
68 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | -----------------------
2 | --A. Customer Journey--
3 | -----------------------
4 |
5 | --Based off the 8 sample customers provided in the sample from the subscriptions table,
6 | --write a brief description about each customer’s onboarding journey.
7 |
8 | SELECT
9 | s.*,
10 | p.plan_name,
11 | p.price
12 | FROM subscriptions s
13 | JOIN plans p ON s.plan_id = p.plan_id
14 | WHERE customer_id IN (1, 2, 11, 13, 15, 16, 18, 19);
15 |
16 | /*
17 | - Customer 1 signed up to 7-day free trial on 01/08/2020.
18 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 08/08/2020.
19 |
20 | - Customer 2 signed up to 7-day free trial on 20/09/2020.
21 | After that time, he/she upgraded to pro annual plan on 27/09/2020.
22 |
23 | - Customer 11 signed up to 7-day free trial on 19/11/2020.
24 | After that time, he/she cancelled the subsciption on 26/11/2020.
25 |
26 | - Customer 13 signed up to 7-day free trial on 15/12/2020.
27 | After that time, he/she didn't cancelled the subsciption, so the system automatically upgraded it to basic monthly plan on 22/12/2020.
28 | He/she continued using that plan for 2 months. On 29/03/2020 (still in the 3rd month using basic monthly plan), he/she upgraded to pro monthly plan.
29 |
30 | - Customer 15 signed up to 7-day free trial on 17/03/2020.
31 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it basic monthly plan on 24/03/2020.
32 | He/she then cancelled that plan after 5 days (29/03/2020). He/she was able to use the basic monthly plan until 24/04/2020.
33 |
34 | - Customer 16 signed up to 7-day free trial on 31/05/2020.
35 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 07/06/2020.
36 | He/she continued using that plan for 4 months. On 21/10/2020 (still in the 4th month using basic monthly plan), he/she upgraded to pro annual plan.
37 |
38 | - Customer 18 signed up to 7-day free trial on 06/07/2020.
39 | After the trial time, he/she upgraded the subscription to pro monthly plan on 13/07/2020.
40 |
41 | - Customer 19 signed up to 7-day free trial on 22/06/2020.
42 | After that time, he/she upgraded the subscription to pro monthly plan on 29/06/2020.
43 | After 2 months using that plan, he/she upgraded to pro annual plan on 29/08/2020.
44 | */
45 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | ------------------------------
2 | --B. Data Analysis Questions--
3 | ------------------------------
4 |
5 | --1. How many customers has Foodie-Fi ever had?
6 |
7 | SELECT COUNT(DISTINCT customer_id) AS unique_customers
8 | FROM subscriptions;
9 |
10 |
11 | --2. What is the monthly distribution of trial plan start_date values for our dataset - use the start of the month as the group by value?
12 |
13 | SELECT
14 | MONTH(s.start_date) AS months,
15 | COUNT(*) AS distribution_values
16 | FROM subscriptions s
17 | JOIN plans p
18 | ON s.plan_id = p.plan_id
19 | WHERE p.plan_name = 'trial'
20 | GROUP BY MONTH(s.start_date);
21 |
22 |
23 | --3. What plan start_date values occur after the year 2020 for our dataset? Show the breakdown by count of events for each plan_name?
24 |
25 | SELECT
26 | YEAR(s.start_date) AS events,
27 | p.plan_name,
28 | COUNT(*) AS counts
29 | FROM subscriptions s
30 | JOIN plans p
31 | ON s.plan_id = p.plan_id
32 | WHERE YEAR(s.start_date) > 2020
33 | GROUP BY YEAR(s.start_date), p.plan_name;
34 |
35 |
36 | --4. What is the customer count and percentage of customers who have churned rounded to 1 decimal place?
37 |
38 | SELECT
39 | SUM(CASE WHEN p.plan_name = 'churn' THEN 1 END) AS churn_count,
40 | CAST(100*SUM(CASE WHEN p.plan_name = 'churn' THEN 1 END) AS FLOAT(1))
41 | / COUNT(DISTINCT customer_id) AS churn_pct
42 | FROM subscriptions s
43 | JOIN plans p
44 | ON s.plan_id = p.plan_id;
45 |
46 |
47 | --5. How many customers have churned straight after their initial free trial - what percentage is this rounded to the nearest whole number?
48 |
49 | WITH nextPlan AS (
50 | SELECT
51 | s.customer_id,
52 | s.start_date,
53 | p.plan_name,
54 | LEAD(p.plan_name) OVER(PARTITION BY s.customer_id ORDER BY p.plan_id) AS next_plan
55 | FROM subscriptions s
56 | JOIN plans p ON s.plan_id = p.plan_id
57 | )
58 |
59 | SELECT
60 | COUNT(*) AS churn_after_trial,
61 | 100*COUNT(*) / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS pct
62 | FROM nextPlan
63 | WHERE plan_name = 'trial'
64 | AND next_plan = 'churn';
65 |
66 |
67 | --6. What is the number and percentage of customer plans after their initial free trial?
68 |
69 | WITH nextPlan AS (
70 | SELECT
71 | s.customer_id,
72 | s.start_date,
73 | p.plan_name,
74 | LEAD(p.plan_name) OVER(PARTITION BY s.customer_id
75 | ORDER BY p.plan_id) AS next_plan
76 | FROM subscriptions s
77 | JOIN plans p ON s.plan_id = p.plan_id
78 | )
79 |
80 | SELECT
81 | next_plan,
82 | COUNT(*) AS customer_plan,
83 | CAST(100 * COUNT(*) AS FLOAT)
84 | / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS percentage
85 | FROM nextPlan
86 | WHERE next_plan IS NOT NULL
87 | AND plan_name = 'trial'
88 | GROUP BY next_plan;
89 |
90 |
91 | --7. What is the customer count and percentage breakdown of all 5 plan_name values at 2020-12-31?
92 |
93 | WITH plansDate AS (
94 | SELECT
95 | s.customer_id,
96 | s.start_date,
97 | p.plan_id,
98 | p.plan_name,
99 | LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) AS next_date
100 | FROM subscriptions s
101 | JOIN plans p ON s.plan_id = p.plan_id
102 | )
103 |
104 | SELECT
105 | plan_id,
106 | plan_name,
107 | COUNT(*) AS customers,
108 | CAST(100*COUNT(*) AS FLOAT)
109 | / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS conversion_rate
110 | FROM plansDate
111 | WHERE (next_date IS NOT NULL AND (start_date < '2020-12-31' AND next_date > '2020-12-31'))
112 | OR (next_date IS NULL AND start_date < '2020-12-31')
113 | GROUP BY plan_id, plan_name
114 | ORDER BY plan_id;
115 |
116 |
117 | --8. How many customers have upgraded to an annual plan in 2020?
118 |
119 | SELECT
120 | COUNT(DISTINCT customer_id) AS customer_count
121 | FROM subscriptions s
122 | JOIN plans p ON s.plan_id = p.plan_id
123 | WHERE p.plan_name = 'pro annual'
124 | AND YEAR(s.start_date) = 2020;
125 |
126 |
127 | --9. How many days on average does it take for a customer to an annual plan from the day they join Foodie-Fi?
128 |
129 | WITH trialPlan AS (
130 | SELECT
131 | s.customer_id,
132 | s.start_date AS trial_date
133 | FROM subscriptions s
134 | JOIN plans p ON s.plan_id = p.plan_id
135 | WHERE p.plan_name = 'trial'
136 | ),
137 | annualPlan AS (
138 | SELECT
139 | s.customer_id,
140 | s.start_date AS annual_date
141 | FROM subscriptions s
142 | JOIN plans p ON s.plan_id = p.plan_id
143 | WHERE p.plan_name = 'pro annual'
144 | )
145 |
146 | SELECT
147 | AVG(CAST(DATEDIFF(d, trial_date, annual_date) AS FLOAT)) AS avg_days_to_annual
148 | FROM trialPlan t
149 | JOIN annualPlan a
150 | ON t.customer_id = a.customer_id;
151 |
152 |
153 | --10. Can you further breakdown this average value into 30 day periods (i.e. 0-30 days, 31-60 days etc)?
154 |
155 | WITH trialPlan AS (
156 | SELECT
157 | s.customer_id,
158 | s.start_date AS trial_date
159 | FROM subscriptions s
160 | JOIN plans p ON s.plan_id = p.plan_id
161 | WHERE p.plan_name = 'trial'
162 | ),
163 | annualPlan AS (
164 | SELECT
165 | s.customer_id,
166 | s.start_date AS annual_date
167 | FROM subscriptions s
168 | JOIN plans p ON s.plan_id = p.plan_id
169 | WHERE p.plan_name = 'pro annual'
170 | ),
171 | datesDiff AS (
172 | SELECT
173 | t.customer_id,
174 | DATEDIFF(d, trial_date, annual_date) AS diff
175 | FROM trialPlan t
176 | JOIN annualPlan a ON t.customer_id = a.customer_id
177 | ),
178 | daysRecursion AS (
179 | SELECT
180 | 0 AS start_period,
181 | 30 AS end_period
182 | UNION ALL
183 | SELECT
184 | end_period + 1 AS start_period,
185 | end_period + 30 AS end_period
186 | FROM daysRecursion
187 | WHERE end_period < 360
188 | )
189 |
190 | SELECT
191 | dr.start_period,
192 | dr.end_period,
193 | COUNT(*) AS customer_count
194 | FROM daysRecursion dr
195 | LEFT JOIN datesDiff dd
196 | ON (dd.diff >= dr.start_period AND dd.diff <= dr.end_period)
197 | GROUP BY dr.start_period, dr.end_period;
198 |
199 |
200 | --11. How many customers downgraded from a pro monthly to a basic monthly plan in 2020?
201 |
202 | WITH nextPlan AS (
203 | SELECT
204 | s.customer_id,
205 | s.start_date,
206 | p.plan_name,
207 | LEAD(p.plan_name) OVER(PARTITION BY s.customer_id ORDER BY p.plan_id) AS next_plan
208 | FROM subscriptions s
209 | JOIN plans p ON s.plan_id = p.plan_id
210 | )
211 |
212 | SELECT COUNT(*) AS pro_to_basic_monthly
213 | FROM nextPlan
214 | WHERE plan_name = 'pro monthly'
215 | AND next_plan = 'basic monthly'
216 | AND YEAR(start_date) = 2020;
217 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------------------
2 | --C. Challenge Payment Question--
3 | ---------------------------------
4 |
5 | /*
6 | The Foodie-Fi team wants you to create a new payments table for the year 2020 that includes amounts paid
7 | by each customer in the subscriptions table with the following requirements:
8 | - monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
9 | - upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
10 | - upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
11 | - once a customer churns they will no longer make payments
12 | */
13 |
14 |
15 | --Use a recursive CTE to increment rows for all monthly paid plans in 2020 until customers changing their plans, except 'pro annual'
16 | WITH dateRecursion AS (
17 | SELECT
18 | s.customer_id,
19 | s.plan_id,
20 | p.plan_name,
21 | s.start_date AS payment_date,
22 | --last_date: last day of the current plan
23 | CASE
24 | --if a customer kept using the current plan, last_date = '2020-12-31'
25 | WHEN LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) IS NULL THEN '2020-12-31'
26 | --if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
27 | ELSE DATEADD(MONTH,
28 | DATEDIFF(MONTH, start_date, LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date)),
29 | start_date) END AS last_date,
30 | p.price AS amount
31 | FROM subscriptions s
32 | JOIN plans p ON s.plan_id = p.plan_id
33 | --exclude trials because they didn't generate payments
34 | WHERE p.plan_name NOT IN ('trial')
35 | AND YEAR(start_date) = 2020
36 |
37 | UNION ALL
38 |
39 | SELECT
40 | customer_id,
41 | plan_id,
42 | plan_name,
43 | --increment payment_date by monthly
44 | DATEADD(MONTH, 1, payment_date) AS payment_date,
45 | last_date,
46 | amount
47 | FROM dateRecursion
48 | --stop incrementing when payment_date = last_date
49 | WHERE DATEADD(MONTH, 1, payment_date) <= last_date
50 | AND plan_name != 'pro annual'
51 | )
52 | --Create a new table [payments]
53 | SELECT
54 | customer_id,
55 | plan_id,
56 | plan_name,
57 | payment_date,
58 | amount,
59 | ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY payment_date) AS payment_order
60 | INTO payments
61 | FROM dateRecursion
62 | --exclude churns
63 | WHERE amount IS NOT NULL
64 | ORDER BY customer_id
65 | OPTION (MAXRECURSION 365);
66 |
--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/D_query.sql:
--------------------------------------------------------------------------------
1 | --------------------------------
2 | --D. Outside The Box Questions--
3 | --------------------------------
4 |
5 | --1.How would you calculate the rate of growth for Foodie-Fi?
6 |
7 | /*
8 | - I choose the year of 2020 to analyze because I already created the [payments] table in part C.
9 | - If you want to incorporate the data in 2021 to see the whole picture (quarterly, 2020-2021 comparison, etc.),
10 | create a new [payments] table and change all the date conditions in part C to '2021-12-31'.
11 | */
12 |
13 | WITH monthlyRevenue AS (
14 | SELECT
15 | MONTH(payment_date) AS months,
16 | SUM(amount) AS revenue
17 | FROM payments
18 | GROUP BY MONTH(payment_date)
19 | )
20 |
21 | SELECT
22 | months,
23 | revenue,
24 | (revenue-LAG(revenue) OVER(ORDER BY months))/revenue AS revenue_growth
25 | FROM monthlyRevenue;
26 |
27 |
28 | --2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
29 |
30 | /*
31 | - Monthly revenue growth: How does Foodie-Fi's revenue increase or decrease by monthly? Are there any months that the number of customers increasing significantly?
32 | - Customers growth: How many customers increase by monthly? How does the rate look like (x1.5, x2,... after each month)?
33 | - Conversion rate: How many customers keep using Foodie-Fi after trial? How does the rate look like (x1.5, x2,...after each month)?
34 | - Churn rate: How many customers cancel the subscription by monthly? What plan they has used?
35 | */
36 |
37 |
38 | --3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
39 |
40 | /*
41 | - Customers who downgraded their plan
42 | - Customers who upgraded from basic monthly to pro monthly or pro annual
43 | - Customers who cancelled the subscription
44 | */
45 |
46 |
47 | --4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
48 |
49 | /*
50 | - What is the primary reason for the cancellation?
51 | + Price
52 | + Techinical issues
53 | + Customer support
54 | + Found an alternative
55 | + Others (please specify)
56 | - Overall, how satisfied were you with the subscription? (Likert scale: Very Satisfied - Very Unsatisfied)
57 | - Would you consider using our services in the future? (Likert scale: Very Satisfied - Very Unsatisfied)
58 | - Would you recommend our company to a colleague, friend or family member? (Likert scale: Very Satisfied - Very Unsatisfied)
59 | */
60 |
61 |
62 | --5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
63 |
64 | /*
65 | - From the exit survey, look for the most common reasons why customers cancelled the subscription
66 | + Price: increase the number of discounts in some seasons of a year, extend the trial time, or add more benefits to customers
67 | + Service quality: work with the relevant department to fix the issue
68 | + Found an alternative: do some competitor analysis to see their competitive advantages over us
69 | - To validate the effectiveness of those ideas, check:
70 | + Churn rate
71 | + Conversion rate
72 | */
73 |
--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/README.md:
--------------------------------------------------------------------------------
1 | # 📊 Case Study #4 - Data Bank
2 |
3 |
4 |
5 | ## 📕 Table of Contents
6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#%EF%B8%8F-bussiness-task)
7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-entity-relationship-diagram)
8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-case-study-questions)
9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-my-solution)
10 |
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny launched a new initiative, Data Bank which runs banking activities and also acts as the world’s most secure distributed data storage platform!
14 | Customers are allocated cloud data storage limits which are directly linked to how much money they have in their accounts.
15 |
16 | The management team at Data Bank want to increase their total customer base - but also need some help tracking just how much data storage their customers will need.
17 | This case study is all about calculating metrics, growth and helping the business analyse their data in a smart way to better forecast and plan for their future developments!
18 |
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 |
22 |
23 |
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Customer Nodes Exploration
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/A.%20Customer%20Nodes%20Exploration.md).
28 |
29 | 1. How many unique nodes are there on the Data Bank system?
30 | 2. What is the number of nodes per region?
31 | 3. How many customers are allocated to each region?
32 | 4. How many days on average are customers reallocated to a different node?
33 | 5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
34 |
35 | ---
36 | ### B. Customer Transactions
37 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/B.%20Customer%20Transactions.md).
38 |
39 | 1. What is the unique count and total amount for each transaction type?
40 | 2. What is the average total historical deposit counts and amounts for all customers?
41 | 3. For each month - how many Data Bank customers make more than 1 deposit and either 1 purchase or 1 withdrawal in a single month?
42 | 4. What is the closing balance for each customer at the end of the month?
43 | 5. What is the percentage of customers who increase their closing balance by more than 5%?
44 |
45 | ---
46 | ## 🚀 My Solution
47 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank/Syntax).
48 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank/Solution).
49 |
--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Solution/A. Customer Nodes Exploration.md:
--------------------------------------------------------------------------------
1 | # 📊 Case Study #4 - Data Bank
2 | ## A. Customer Nodes Exploration
3 | ### 1. How many unique nodes are there on the Data Bank system?
4 | ```TSQL
5 | SELECT COUNT(DISTINCT node_id) AS unique_nodes
6 | FROM customer_nodes;
7 | ```
8 | | unique_nodes |
9 | |---------------|
10 | | 5 |
11 |
12 | ---
13 | ### 2. What is the number of nodes per region?
14 | ```TSQL
15 | SELECT
16 | r.region_id,
17 | r.region_name,
18 | COUNT(n.node_id) AS nodes
19 | FROM customer_nodes n
20 | JOIN regions r
21 | ON n.region_id = r.region_id
22 | GROUP BY r.region_id, r.region_name
23 | ORDER BY r.region_id;
24 | ```
25 | | region_id | region_name | nodes |
26 | |-----------|-------------|--------|
27 | | 1 | Australia | 770 |
28 | | 2 | America | 735 |
29 | | 3 | Africa | 714 |
30 | | 4 | Asia | 665 |
31 | | 5 | Europe | 616 |
32 |
33 | ---
34 | ### 3. How many customers are allocated to each region?
35 | ```TSQL
36 | SELECT
37 | r.region_id,
38 | r.region_name,
39 | COUNT(DISTINCT n.customer_id) AS customers
40 | FROM customer_nodes n
41 | JOIN regions r
42 | ON n.region_id = r.region_id
43 | GROUP BY r.region_id, r.region_name
44 | ORDER BY r.region_id;
45 | ```
46 | | region_id | region_name | customers |
47 | |-----------|-------------|------------|
48 | | 1 | Australia | 110 |
49 | | 2 | America | 105 |
50 | | 3 | Africa | 102 |
51 | | 4 | Asia | 95 |
52 | | 5 | Europe | 88 |
53 |
54 | ---
55 | ### 4. How many days on average are customers reallocated to a different node?
56 | * Create a CTE ```customerDates``` containing the first date of every customer in each node
57 | * Create a CTE ```reallocation``` to calculate the difference in days between the first date in this node and the first date in next node
58 | * Take the average of those day differences
59 | ```TSQL
60 | WITH customerDates AS (
61 | SELECT
62 | customer_id,
63 | region_id,
64 | node_id,
65 | MIN(start_date) AS first_date
66 | FROM customer_nodes
67 | GROUP BY customer_id, region_id, node_id
68 | ),
69 | reallocation AS (
70 | SELECT
71 | customer_id,
72 | node_id,
73 | region_id,
74 | first_date,
75 | DATEDIFF(DAY, first_date,
76 | LEAD(first_date) OVER(PARTITION BY customer_id
77 | ORDER BY first_date)) AS moving_days
78 | FROM customerDates
79 | )
80 |
81 | SELECT
82 | AVG(CAST(moving_days AS FLOAT)) AS avg_moving_days
83 | FROM reallocation;
84 | ```
85 | | avg_moving_days |
86 | |------------------|
87 | | 23.6889920424403 |
88 |
89 | On average, it takes 24 days for a customer to reallocate to a different node.
90 |
91 | ---
92 | ### 5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
93 | Using 2 CTEs in the previous questions ```customerDates``` and ```reallocation``` to calculate the median, 80th and 95th percentile for reallocation days in each region.
94 | ```TSQL
95 | WITH customerDates AS (
96 | SELECT
97 | customer_id,
98 | region_id,
99 | node_id,
100 | MIN(start_date) AS first_date
101 | FROM customer_nodes
102 | GROUP BY customer_id, region_id, node_id
103 | ),
104 | reallocation AS (
105 | SELECT
106 | customer_id,
107 | region_id,
108 | node_id,
109 | first_date,
110 | DATEDIFF(DAY, first_date,
111 | LEAD(first_date) OVER(PARTITION BY customer_id
112 | ORDER BY first_date)) AS moving_days
113 | FROM customerDates
114 | )
115 |
116 | SELECT
117 | DISTINCT r.region_id,
118 | rg.region_name,
119 | PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS median,
120 | PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_80,
121 | PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_95
122 | FROM reallocation r
123 | JOIN regions rg ON r.region_id = rg.region_id
124 | WHERE moving_days IS NOT NULL;
125 | ```
126 | | region_id | region_name | median | percentile_80 | percentile_95 |
127 | |-----------|-------------|--------|---------------|----------------|
128 | | 1 | Australia | 22 | 31 | 54 |
129 | | 2 | America | 21 | 33.2 | 57 |
130 | | 3 | Africa | 21 | 33.2 | 58.8 |
131 | | 4 | Asia | 22 | 32.4 | 49.85 |
132 | | 5 | Europe | 22 | 31 | 54.3 |
133 |
134 | ---
135 | My solution for **[B. Customer Transactions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/B.%20Customer%20Transactions.md)**.
136 |
--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------------------
2 | --A. Customer Nodes Exploration--
3 | ---------------------------------
4 |
5 | --1. How many unique nodes are there on the Data Bank system?
6 |
7 | SELECT COUNT(DISTINCT node_id) AS unique_nodes
8 | FROM customer_nodes;
9 |
10 |
11 | --2. What is the number of nodes per region?
12 |
13 | SELECT
14 | r.region_id,
15 | r.region_name,
16 | COUNT(n.node_id) AS nodes
17 | FROM customer_nodes n
18 | JOIN regions r
19 | ON n.region_id = r.region_id
20 | GROUP BY r.region_id, r.region_name
21 | ORDER BY r.region_id;
22 |
23 |
24 | --3. How many customers are allocated to each region?
25 |
26 | SELECT
27 | r.region_id,
28 | r.region_name,
29 | COUNT(DISTINCT n.customer_id) AS customers
30 | FROM customer_nodes n
31 | JOIN regions r
32 | ON n.region_id = r.region_id
33 | GROUP BY r.region_id, r.region_name
34 | ORDER BY r.region_id;
35 |
36 |
37 | --4. How many days on average are customers reallocated to a different node?
38 |
39 | WITH customerDates AS (
40 | SELECT
41 | customer_id,
42 | region_id,
43 | node_id,
44 | MIN(start_date) AS first_date
45 | FROM customer_nodes
46 | GROUP BY customer_id, region_id, node_id
47 | ),
48 | reallocation AS (
49 | SELECT
50 | customer_id,
51 | node_id,
52 | region_id,
53 | first_date,
54 | DATEDIFF(DAY, first_date,
55 | LEAD(first_date) OVER(PARTITION BY customer_id
56 | ORDER BY first_date)) AS moving_days
57 | FROM customerDates
58 | )
59 |
60 | SELECT
61 | AVG(CAST(moving_days AS FLOAT)) AS avg_moving_days
62 | FROM reallocation;
63 |
64 |
65 | --5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
66 |
67 | WITH customerDates AS (
68 | SELECT
69 | customer_id,
70 | region_id,
71 | node_id,
72 | MIN(start_date) AS first_date
73 | FROM customer_nodes
74 | GROUP BY customer_id, region_id, node_id
75 | ),
76 | reallocation AS (
77 | SELECT
78 | customer_id,
79 | region_id,
80 | node_id,
81 | first_date,
82 | DATEDIFF(DAY, first_date, LEAD(first_date) OVER(PARTITION BY customer_id ORDER BY first_date)) AS moving_days
83 | FROM customerDates
84 | )
85 |
86 | SELECT
87 | DISTINCT r.region_id,
88 | rg.region_name,
89 | PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS median,
90 | PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_80,
91 | PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_95
92 | FROM reallocation r
93 | JOIN regions rg ON r.region_id = rg.region_id
94 | WHERE moving_days IS NOT NULL;
95 |
--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | ----------------------------
2 | --B. Customer Transactions--
3 | ----------------------------
4 |
5 | --1. What is the unique count and total amount for each transaction type?
6 |
7 | SELECT
8 | txn_type,
9 | COUNT(*) AS unique_count,
10 | SUM(txn_amount) AS total_amount
11 | FROM customer_transactions
12 | GROUP BY txn_type;
13 |
14 |
15 | --2. What is the average total historical deposit counts and amounts for all customers?
16 |
17 | WITH customerDeposit AS (
18 | SELECT
19 | customer_id,
20 | txn_type,
21 | COUNT(*) AS dep_count,
22 | SUM(txn_amount) AS dep_amount
23 | FROM customer_transactions
24 | WHERE txn_type = 'deposit'
25 | GROUP BY customer_id, txn_type
26 | )
27 |
28 | SELECT
29 | AVG(dep_count) AS avg_dep_count,
30 | AVG(dep_amount) AS avg_dep_amount
31 | FROM customerDeposit;
32 |
33 |
34 | --3. For each month - how many Data Bank customers make more than 1 deposit and either 1 purchase or 1 withdrawal in a single month?
35 |
36 | WITH cte_transaction AS (
37 | SELECT
38 | customer_id,
39 | MONTH(txn_date) AS months,
40 | SUM(CASE WHEN txn_type = 'deposit' THEN 1 ELSE 0 END) AS deposit_count,
41 | SUM(CASE WHEN txn_type = 'purchase' THEN 1 ELSE 0 END) AS purchase_count,
42 | SUM(CASE WHEN txn_type = 'withdrawal' THEN 1 ELSE 0 END) AS withdrawal_count
43 | FROM customer_transactions
44 | GROUP BY customer_id, MONTH(txn_date)
45 | )
46 |
47 | SELECT
48 | months,
49 | COUNT(customer_id) AS customer_count
50 | FROM cte_transaction
51 | WHERE deposit_count > 1
52 | AND (purchase_count = 1 OR withdrawal_count = 1)
53 | GROUP BY months;
54 |
55 |
56 | --4. What is the closing balance for each customer at the end of the month?
57 |
58 | --End date in the month of the max date of our dataset
59 | DECLARE @maxDate DATE = (SELECT EOMONTH(MAX(txn_date)) FROM customer_transactions)
60 |
61 | --CTE 1: Monthly transactions of each customer
62 | WITH monthly_transactions AS (
63 | SELECT
64 | customer_id,
65 | EOMONTH(txn_date) AS end_date,
66 | SUM(CASE WHEN txn_type IN ('withdrawal', 'purchase') THEN -txn_amount
67 | ELSE txn_amount END) AS transactions
68 | FROM customer_transactions
69 | GROUP BY customer_id, EOMONTH(txn_date)
70 | ),
71 |
72 | --CTE 2: Increment last days of each month till they are equal to @maxDate
73 | recursive_dates AS (
74 | SELECT
75 | DISTINCT customer_id,
76 | CAST('2020-01-31' AS DATE) AS end_date
77 | FROM customer_transactions
78 | UNION ALL
79 | SELECT
80 | customer_id,
81 | EOMONTH(DATEADD(MONTH, 1, end_date)) AS end_date
82 | FROM recursive_dates
83 | WHERE EOMONTH(DATEADD(MONTH, 1, end_date)) <= @maxDate
84 | )
85 |
86 | SELECT
87 | r.customer_id,
88 | r.end_date,
89 | COALESCE(m.transactions, 0) AS transactions,
90 | SUM(m.transactions) OVER (PARTITION BY r.customer_id ORDER BY r.end_date
91 | ROWS UNBOUNDED PRECEDING) AS closing_balance
92 | FROM recursive_dates r
93 | LEFT JOIN monthly_transactions m
94 | ON r.customer_id = m.customer_id
95 | AND r.end_date = m.end_date;
96 |
97 |
98 | --5. What is the percentage of customers who increase their closing balance by more than 5%?
99 |
100 | --End date in the month of the max date of our dataset (Q4)
101 | DECLARE @maxDate DATE = (SELECT EOMONTH(MAX(txn_date)) FROM customer_transactions)
102 |
103 | --CTE 1: Monthly transactions of each customer (Q4)
104 | WITH monthly_transactions AS (
105 | SELECT
106 | customer_id,
107 | EOMONTH(txn_date) AS end_date,
108 | SUM(CASE WHEN txn_type IN ('withdrawal', 'purchase') THEN -txn_amount
109 | ELSE txn_amount END) AS transactions
110 | FROM customer_transactions
111 | GROUP BY customer_id, EOMONTH(txn_date)
112 | ),
113 |
114 | --CTE 2: Increment last days of each month till they are equal to @maxDate (Q4)
115 | recursive_dates AS (
116 | SELECT
117 | DISTINCT customer_id,
118 | CAST('2020-01-31' AS DATE) AS end_date
119 | FROM customer_transactions
120 | UNION ALL
121 | SELECT
122 | customer_id,
123 | EOMONTH(DATEADD(MONTH, 1, end_date)) AS end_date
124 | FROM recursive_dates
125 | WHERE EOMONTH(DATEADD(MONTH, 1, end_date)) <= @maxDate
126 | ),
127 |
128 | -- CTE 3: Closing balance of each customer by monthly (Q4)
129 | customers_balance AS (
130 | SELECT
131 | r.customer_id,
132 | r.end_date,
133 | COALESCE(m.transactions, 0) AS transactions,
134 | SUM(m.transactions) OVER (PARTITION BY r.customer_id ORDER BY r.end_date
135 | ROWS UNBOUNDED PRECEDING) AS closing_balance
136 | FROM recursive_dates r
137 | LEFT JOIN monthly_transactions m
138 | ON r.customer_id = m.customer_id
139 | AND r.end_date = m.end_date
140 | ),
141 |
142 | --CTE 4: CTE 3 & next_balance
143 | customers_next_balance AS (
144 | SELECT *,
145 | LEAD(closing_balance) OVER(PARTITION BY customer_id ORDER BY end_date) AS next_balance
146 | FROM customers_balance
147 | ),
148 |
149 | --CTE 5: Calculate the increase percentage of closing balance for each customer
150 | pct_increase AS (
151 | SELECT *,
152 | 100.0*(next_balance-closing_balance)/closing_balance AS pct
153 | FROM customers_next_balance
154 | WHERE closing_balance ! = 0 AND next_balance IS NOT NULL
155 | )
156 |
157 | --Create a temporary table because of the error: Null value is eliminated by an aggregate or other SET operation
158 | SELECT *
159 | INTO #temp
160 | FROM pct_increase;
161 |
162 | --Calculate the percentage of customers whose closing balance increasing 5% compared to the previous month
163 | SELECT CAST(100.0*COUNT(DISTINCT customer_id) AS FLOAT)
164 | / (SELECT COUNT(DISTINCT customer_id) FROM customer_transactions) AS pct_customers
165 | FROM #temp
166 | WHERE pct > 5;
167 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/README.md:
--------------------------------------------------------------------------------
1 | # 🛒 Case Study #5 - Data Mart
2 |
3 |
4 |
5 | ## 📕 Table of Contents
6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#%EF%B8%8F-bussiness-task)
7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-entity-relationship-diagram)
8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-case-study-questions)
9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-my-solution)
10 |
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Data Mart is an online supermarket that specialises in fresh produce.
14 | In June 2020 - large scale supply changes were made at Data Mart.
15 | All Data Mart products now use sustainable packaging methods in every single step from the farm all the way to the customer.
16 |
17 | Danny needs your help to analyse and quantify the impact of this change on the sales performance for Data Mart and it’s separate business areas.
18 | The key business question to answer are the following:
19 | * What was the quantifiable impact of the changes introduced in June 2020?
20 | * Which platform, region, segment and customer types were the most impacted by this change?
21 | * What can we do about future introduction of similar sustainability updates to the business to minimise impact on sales?
22 |
23 | ---
24 | ## 🔐 Entity Relationship Diagram
25 |
26 |
27 |
28 | ---
29 | ## ❓ Case Study Questions
30 | ### A. Data Cleansing Steps
31 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/A.%20Data%20Cleansing%20Steps.md).
32 |
33 | In a single query, perform the following operations and generate a new table in the ```data_mart``` schema named ```clean_weekly_sales```:
34 | * Convert the ```week_date``` to a ```DATE``` format
35 | * Add a ```week_number``` as the second column for each ```week_date``` value, for example any value from the 1st of January to 7th of January will be 1, 8th to 14th will be 2 etc
36 | * Add a ```month_number``` with the calendar month for each ```week_date``` value as the 3rd column
37 | * Add a ```calendar_year``` column as the 4th column containing either 2018, 2019 or 2020 values
38 | * Add a new column called ```age_band``` after the original ```segment``` column using the following mapping on the number inside the ```segment``` value
39 |
40 | | egment | age_band |
41 | |--------|--------------|
42 | | 1 | Young Adults |
43 | | 2 | Middle Aged |
44 | | 3 or 4 | Retirees |
45 |
46 | * Add a new ```demographic``` column using the following mapping for the first letter in the ```segment``` values
47 |
48 | | segment | demographic |
49 | |---------|-------------|
50 | | C | Couples |
51 | | F | Families |
52 |
53 | * Ensure all ```null``` string values with an ```"unknown"``` string value in the original ```segment``` column as well as the new ```age_band``` and ```demographic``` columns
54 | * Generate a new ```avg_transaction``` column as the sales value divided by ```transactions``` rounded to 2 decimal places for each record
55 |
56 | ---
57 | ### B. Data Exploration
58 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/B.%20Data%20Exploration.md).
59 |
60 | 1. What day of the week is used for each ```week_date``` value?
61 | 2. What range of week numbers are missing from the dataset?
62 | 3. How many total transactions were there for each year in the dataset?
63 | 4. What is the total sales for each region for each month?
64 | 5. What is the total count of transactions for each platform
65 | 6. What is the percentage of sales for Retail vs Shopify for each month?
66 | 7. What is the percentage of sales by demographic for each year in the dataset?
67 | 8. Which ```age_band``` and ```demographic``` values contribute the most to Retail sales?
68 | 9. Can we use the ```avg_transaction``` column to find the average transaction size for each year for Retail vs Shopify? If not - how would you calculate it instead?
69 |
70 | ---
71 | ### C. Before & After Analysis
72 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/C.%20Before%20%26%20After%20Analysis.md).
73 |
74 | This technique is usually used when we inspect an important event and want to inspect the impact before and after a certain point in time.
75 | Taking the week_date value of 2020-06-15 as the baseline week where the Data Mart sustainable packaging changes came into effect.
76 | We would include all week_date values for 2020-06-15 as the start of the period after the change and the previous week_date values would be before.
77 |
78 | Using this analysis approach - answer the following questions:
79 |
80 | 1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
81 | 2. What about the entire 12 weeks before and after?
82 | 3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
83 |
84 | ---
85 | ### D. Bonus Question
86 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/D.%20Bonus%20Question.md).
87 |
88 | Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
89 | * ```region```
90 | * ```platform```
91 | * ```age_band```
92 | * ```demographic```
93 | * ```customer_type```
94 | Do you have any further recommendations for Danny’s team at Data Mart or any interesting insights based off this analysis?
95 |
96 | ---
97 | ## 🚀 My Solution
98 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart/Syntax).
99 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/d7bbc5cd9ab25fe7429644a36bb0f62a8b292792/Case%20Study%20%235%20-%20Data%20Mart/Solution).
100 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/A. Data Cleansing Steps.md:
--------------------------------------------------------------------------------
1 | # 🛒 Case Study #5 - Data Mart
2 | ## A. Data Cleansing Steps
3 | In a single query, perform the following operations and generate a new table in the ```data_mart``` schema named ```clean_weekly_sales```:
4 | * Convert the ```week_date``` to a ```DATE``` format
5 | * Add a ```week_number``` as the second column for each ```week_date``` value, for example any value from the 1st of January to 7th of January will be 1, 8th to 14th will be 2 etc
6 | * Add a ```month_number``` with the calendar month for each ```week_date``` value as the 3rd column
7 | * Add a ```calendar_year``` column as the 4th column containing either 2018, 2019 or 2020 values
8 | * Add a new column called ```age_band``` after the original ```segment``` column using the following mapping on the number inside the ```segment``` value
9 |
10 | | egment | age_band |
11 | |--------|--------------|
12 | | 1 | Young Adults |
13 | | 2 | Middle Aged |
14 | | 3 or 4 | Retirees |
15 |
16 | * Add a new ```demographic``` column using the following mapping for the first letter in the ```segment``` values
17 |
18 | | segment | demographic |
19 | |---------|-------------|
20 | | C | Couples |
21 | | F | Families |
22 |
23 | * Ensure all ```null``` string values with an ```"unknown"``` string value in the original ```segment``` column as well as the new ```age_band``` and ```demographic``` columns
24 | * Generate a new ```avg_transaction``` column as the sales value divided by ```transactions``` rounded to 2 decimal places for each record
25 |
26 | ---
27 | ## Solution
28 |
29 | | Columns | Actions to take |
30 | |------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
31 | | week_date | Convert to ```DATE``` using ```CONVERT``` |
32 | | week_number* | Extract number of week using ```DATEPART``` |
33 | | month_number* | Extract month using ```DATEPART``` |
34 | | calendar_year* | Extract year using ```DATEPART``` |
35 | | region | No changes |
36 | | platform | No changes |
37 | | segment | No changes |
38 | | customer_type | No changes |
39 | | age_band* | Use ```CASE WHEN``` to categorize ```segment```: '1' = ```Young Adults```, '2' = ```Middle Aged```, '3' or '4' = ```Retirees``` and null = ```unknown``` |
40 | | demographic* | Use ```CASE WHEN``` to categorize ```segment```: 'C' = ```Couples```, 'F' = ```Families``` and null = ```unknown``` |
41 | | transactions | No changes |
42 | | sales | ```CAST``` to ```bigint``` for further aggregations |
43 | | avg_transaction* | Divide ```sales``` by ```transactions``` and round up to 2 decimal places
44 |
45 | ```TSQL
46 | SELECT
47 | CONVERT(date, week_date, 3) AS week_date,
48 | DATEPART(week, CONVERT(date, week_date, 3)) AS week_number,
49 | DATEPART(month, CONVERT(date, week_date, 3)) AS month_number,
50 | DATEPART(year, CONVERT(date, week_date, 3)) AS calendar_year,
51 | region,
52 | platform,
53 | segment,
54 | customer_type,
55 | CASE
56 | WHEN RIGHT(segment, 1) = '1' THEN 'Young Adults'
57 | WHEN RIGHT(segment, 1) = '2' THEN 'Middle Aged'
58 | WHEN RIGHT(segment, 1) IN ('3', '4') THEN 'Retirees'
59 | ELSE 'unknown' END AS age_band,
60 | CASE
61 | WHEN LEFT(segment, 1) = 'C' THEN 'Couples'
62 | WHEN LEFT(segment, 1) = 'F' THEN 'Families'
63 | ELSE 'unknown' END AS demographic,
64 | transactions,
65 | CAST(sales AS bigint) AS sales,
66 | ROUND(CAST(sales AS FLOAT)/transactions, 2) AS avg_transaction
67 | INTO clean_weekly_sales
68 | FROM weekly_sales;
69 | ```
70 | The first 10 rows:
71 | | week_date | week_number | month_number | calendar_year | region | platform | segment | customer_type | age_band | demographic | transactions | sales | avg_transaction |
72 | |------------|-------------|--------------|---------------|--------|----------|---------|---------------|--------------|-------------|--------------|----------|------------------|
73 | | 2020-08-31 | 36 | 8 | 2020 | ASIA | Retail | C3 | New | Retirees | Couples | 120631 | 3656163 | 30.31 |
74 | | 2020-08-31 | 36 | 8 | 2020 | ASIA | Retail | F1 | New | Young Adults | Families | 31574 | 996575 | 31.56 |
75 | | 2020-08-31 | 36 | 8 | 2020 | USA | Retail | null | Guest | unknown | unknown | 529151 | 16509610 | 31.2 |
76 | | 2020-08-31 | 36 | 8 | 2020 | EUROPE | Retail | C1 | New | Young Adults | Couples | 4517 | 141942 | 31.42 |
77 | | 2020-08-31 | 36 | 8 | 2020 | AFRICA | Retail | C2 | New | Middle Aged | Couples | 58046 | 1758388 | 30.29 |
78 | | 2020-08-31 | 36 | 8 | 2020 | CANADA | Shopify | F2 | Existing | Middle Aged | Families | 1336 | 243878 | 182.54 |
79 | | 2020-08-31 | 36 | 8 | 2020 | AFRICA | Shopify | F3 | Existing | Retirees | Families | 2514 | 519502 | 206.64 |
80 | | 2020-08-31 | 36 | 8 | 2020 | ASIA | Shopify | F1 | Existing | Young Adults | Families | 2158 | 371417 | 172.11 |
81 | | 2020-08-31 | 36 | 8 | 2020 | AFRICA | Shopify | F2 | New | Middle Aged | Families | 318 | 49557 | 155.84 |
82 | | 2020-08-31 | 36 | 8 | 2020 | AFRICA | Retail | C3 | New | Retirees | Couples | 111032 | 3888162 | 35.02 |
83 |
84 | ---
85 | My solution for **[B. Data Exploration](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/B.%20Data%20Exploration.md)**.
86 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/C. Before & After Analysis.md:
--------------------------------------------------------------------------------
1 | # 🛒 Case Study #5 - Data Mart
2 | ## C. Before & After Analysis
3 |
4 | This technique is usually used when we inspect an important event and want to inspect the impact before and after a certain point in time.
5 | Taking the week_date value of 2020-06-15 as the baseline week where the Data Mart sustainable packaging changes came into effect.
6 | We would include all week_date values for 2020-06-15 as the start of the period after the change and the previous week_date values would be before.
7 |
8 | Using this analysis approach - answer the following questions:
9 |
10 | ### 1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
11 | ```TSQL
12 | --Find the week_number of '2020-06-15' (@weekNum=25)
13 | DECLARE @weekNum int = (
14 | SELECT DISTINCT week_number
15 | FROM clean_weekly_sales
16 | WHERE week_date = '2020-06-15')
17 |
18 | --Find the total sales of 4 weeks before and after @weekNum
19 | WITH salesChanges AS (
20 | SELECT
21 | SUM(CASE WHEN week_number BETWEEN @weekNum-4 AND @weekNum-1 THEN sales END) AS before_changes,
22 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_changes
23 | FROM clean_weekly_sales
24 | WHERE calendar_year = 2020
25 | )
26 |
27 | SELECT *,
28 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
29 | FROM salesChanges;
30 | ```
31 | | before_changes | after_changes | pct_change |
32 | |----------------|---------------|-------------|
33 | | 2345878357 | 2318994169 | -1.15 |
34 |
35 | ---
36 | ### 2. What about the entire 12 weeks before and after?
37 | ```TSQL
38 | --Find the week_number of '2020-06-15' (@weekNum=25)
39 | DECLARE @weekNum int = (
40 | SELECT DISTINCT week_number
41 | FROM clean_weekly_sales
42 | WHERE week_date = '2020-06-15')
43 |
44 | --Find the total sales of 12 weeks before and after @weekNum
45 | WITH salesChanges AS (
46 | SELECT
47 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
48 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
49 | FROM clean_weekly_sales
50 | WHERE calendar_year = 2020
51 | )
52 |
53 | SELECT *,
54 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
55 | FROM salesChanges;
56 | ```
57 | | before_changes | after_changes | pct_change |
58 | |----------------|---------------|-------------|
59 | | 7126273147 | 6973947753 | -2.14 |
60 |
61 | ---
62 | ### 3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
63 | Part 1: How do the sales metrics for 4 weeks before and after compared with the previous years in 2018 and 2019?
64 | ```TSQL
65 | --Find the week_number of '2020-06-15' (@weekNum=25)
66 | DECLARE @weekNum int = (
67 | SELECT DISTINCT week_number
68 | FROM clean_weekly_sales
69 | WHERE week_date = '2020-06-15')
70 |
71 | --Find the total sales of 4 weeks before and after @weekNum
72 | WITH salesChanges AS (
73 | SELECT
74 | calendar_year,
75 | SUM(CASE WHEN week_number BETWEEN @weekNum-3 AND @weekNum-1 THEN sales END) AS before_sales,
76 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_sales
77 | FROM clean_weekly_sales
78 | GROUP BY calendar_year
79 | )
80 |
81 | SELECT *,
82 | CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
83 | FROM salesChanges
84 | ORDER BY calendar_year;
85 | ```
86 | | calendar_year | before_sales | after_sales | pct_change |
87 | |---------------|--------------|-------------|-------------|
88 | | 2018 | 1602763447 | 2129242914 | 32.85 |
89 | | 2019 | 1688891616 | 2252326390 | 33.36 |
90 | | 2020 | 1760870267 | 2318994169 | 31.70 |
91 |
92 | Part 2: How do the sales metrics for 12 weeks before and after compared with the previous years in 2018 and 2019?
93 | ```TSQL
94 | --Find the week_number of '2020-06-15' (@weekNum=25)
95 | DECLARE @weekNum int = (
96 | SELECT DISTINCT week_number
97 | FROM clean_weekly_sales
98 | WHERE week_date = '2020-06-15')
99 |
100 | --Find the total sales of 12 weeks before and after @weekNum
101 | WITH salesChanges AS (
102 | SELECT
103 | calendar_year,
104 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_sales,
105 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_sales
106 | FROM clean_weekly_sales
107 | GROUP BY calendar_year
108 | )
109 |
110 | SELECT *,
111 | CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
112 | FROM salesChanges
113 | ORDER BY calendar_year;
114 | ```
115 | | calendar_year | before_sales | after_sales | pct_change |
116 | |---------------|--------------|-------------|-------------|
117 | | 2018 | 6396562317 | 6500818510 | 1.63 |
118 | | 2019 | 6883386397 | 6862646103 | -0.30 |
119 | | 2020 | 7126273147 | 6973947753 | -2.14 |
120 |
121 | ---
122 | My solution for **[D. Bonus Question](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/D.%20Bonus%20Question.md)**.
123 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/D. Bonus Question.md:
--------------------------------------------------------------------------------
1 | # 🛒 Case Study #5 - Data Mart
2 | ## D. Bonus Question
3 | Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
4 | * ```region```
5 | * ```platform```
6 | * ```age_band```
7 | * ```demographic```
8 | * ```customer_type```
9 |
10 | Do you have any further recommendations for Danny’s team at Data Mart or any interesting insights based off this analysis?
11 |
12 | ---
13 | ## Solution
14 | First, using the technique in part C to find the ```week_number``` of ```2020-06-15```.
15 | ```TSQL
16 | --Find the week_number of '2020-06-15' (@weekNum=25)
17 | DECLARE @weekNum int = (
18 | SELECT DISTINCT week_number
19 | FROM clean_weekly_sales
20 | WHERE week_date = '2020-06-15'
21 | AND calendar_year =2020)
22 | ```
23 | Then, depending on the area we want to analyze, change the column name in the ```SELECT``` and ```GROUP BY```.
24 |
25 | Remember to include the ```DECLARE @weekNum``` in the beginning of each part below.
26 |
27 | ---
28 | ### 1. Sales changes by ```regions```
29 | ```TSQL
30 | WITH regionChanges AS (
31 | SELECT
32 | region,
33 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
34 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
35 | FROM clean_weekly_sales
36 | GROUP BY region
37 | )
38 | SELECT *,
39 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
40 | FROM regionChanges;
41 | ```
42 | | region | before_changes | after_changes | pct_change |
43 | |---------------|--------------|-------------|-------------|
44 | | OCEANIA | 6698586333 | 6640244793 | -0.87 |
45 | | EUROPE | 328141414 | 344420043 | 4.96 |
46 | | SOUTH AMERICA | 611056923 | 608981392 | -0.34 |
47 | | AFRICA | 4942976910 | 4997516159 | 1.10 |
48 | | CANADA | 1244662705 | 1234025206 | -0.85 |
49 | | ASIA | 4613242689 | 4551927271 | -1.33 |
50 | | USA | 1967554887 | 1960297502 | -0.37 |
51 |
52 | **Insights and recommendations:**
53 | * Overall, the sales of most countries decreased after changing packages.
54 | * The highest negative impact was in ```ASIA``` with -1.33%.
55 | Danny should reduce the number of products with sustainable packages here.
56 | * Only ```EUROPE``` saw a significant increase of 4.96% followed by ```AFRICA``` with 1.1%. These are areas that Danny should invest more.
57 |
58 | ---
59 | ### 2. Sales changes by ```platform```
60 | ```TSQL
61 | WITH platformChanges AS (
62 | SELECT
63 | platform,
64 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
65 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
66 | FROM clean_weekly_sales
67 | GROUP BY platform
68 | )
69 | SELECT *,
70 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
71 | FROM platformChanges;
72 | ```
73 | | platform | before_changes | after_changes | pct_change |
74 | |----------|--------------|-------------|-------------|
75 | | Retail | 19886040272 | 19768576165 | -0.59 |
76 | | Shopify | 520181589 | 568836201 | 9.35 |
77 |
78 | **Insights and recommendations:**
79 | * ```Shopify``` stores saw an increase in sales of 9.35% while the```Retail``` stores slightly decreased by 0.59%.
80 | * Danny should put more products with sustanable packages in ```Shopify``` stores.
81 |
82 | ---
83 | ### 3. Sales changes by ```age_band```
84 | ```TSQL
85 | WITH ageBandChanges AS (
86 | SELECT
87 | age_band,
88 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
89 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
90 | FROM clean_weekly_sales
91 | GROUP BY age_band
92 | )
93 | SELECT *,
94 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
95 | FROM ageBandChanges;
96 | ```
97 | | age_band | before_changes | after_changes | pct_change |
98 | |--------------|--------------|-------------|-------------|
99 | | unknown | 8191628826 | 8146983408 | -0.55 |
100 | | Young Adults | 2290835366 | 2285973456 | -0.21 |
101 | | Middle Aged | 3276892347 | 3269748622 | -0.22 |
102 | | Retirees | 6646865322 | 6634706880 | -0.18 |
103 |
104 | **Insights and recommendations:**
105 | * Overall, the sales slightly decreased in all bands.
106 | * ```Middle Aged``` and ```Young Adults``` had more negative impact on sales than the ```Retirees```. Those bands should not be targeted in new packages.
107 |
108 | ---
109 | ### 4. Sales changes by ```demographic```
110 | ```TSQL
111 | WITH demographicChanges AS (
112 | SELECT
113 | demographic,
114 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
115 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
116 | FROM clean_weekly_sales
117 | GROUP BY demographic
118 | )
119 | SELECT *,
120 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
121 | FROM demographicChanges;
122 | ```
123 | | demographic | before_changes | after_changes | pct_change |
124 | |-------------|----------------|---------------|-------------|
125 | | unknown | 8191628826 | 8146983408 | -0.55 |
126 | | Families | 6605726904 | 6598087538 | -0.12 |
127 | | Couples | 5608866131 | 5592341420 | -0.29 |
128 |
129 | **Insights and recommendations:**
130 | * Overall, the sales slightly decreased in all demographic groups.
131 | * ```Couples``` had more negative impact on sales than ```Families```. Those groups should not be targeted in new packages.
132 |
133 | ---
134 | ### 5. Sales changes by ```customer_type```
135 | ```TSQL
136 | WITH customerTypeChanges AS (
137 | SELECT
138 | customer_type,
139 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
140 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
141 | FROM clean_weekly_sales
142 | GROUP BY customer_type
143 | )
144 | SELECT *,
145 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
146 | FROM customerTypeChanges;
147 | ```
148 | | customer_type | before_changes | after_changes | pct_change |
149 | |---------------|----------------|---------------|-------------|
150 | | Guest | 7630353739 | 7595150744 | -0.46 |
151 | | Existing | 10168877642 | 10117367239 | -0.51 |
152 | | New | 2606990480 | 2624894383 | 0.69 |
153 |
154 | **Insights and recommendations:**
155 | * The sales for `Guests` and `Existing` customers decreased, but increased for `New` customers.
156 | * Further analysis should be taken to understand why `New` customers were interested in sustainable packages.
157 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------------
2 | --A. Data Cleansing Steps--
3 | ---------------------------
4 |
5 | SELECT
6 | CONVERT(date, week_date, 3) AS week_date,
7 | DATEPART(week, CONVERT(date, week_date, 3)) AS week_number,
8 | DATEPART(month, CONVERT(date, week_date, 3)) AS month_number,
9 | DATEPART(year, CONVERT(date, week_date, 3)) AS calendar_year,
10 | region,
11 | platform,
12 | segment,
13 | customer_type,
14 | CASE
15 | WHEN RIGHT(segment, 1) = '1' THEN 'Young Adults'
16 | WHEN RIGHT(segment, 1) = '2' THEN 'Middle Aged'
17 | WHEN RIGHT(segment, 1) IN ('3', '4') THEN 'Retirees'
18 | ELSE 'unknown' END AS age_band,
19 | CASE
20 | WHEN LEFT(segment, 1) = 'C' THEN 'Couples'
21 | WHEN LEFT(segment, 1) = 'F' THEN 'Families'
22 | ELSE 'unknown' END AS demographic,
23 | transactions,
24 | CAST(sales AS bigint) AS sales,
25 | ROUND(CAST(sales AS FLOAT)/transactions, 2) AS avg_transaction
26 | INTO clean_weekly_sales
27 | FROM weekly_sales;
28 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | -----------------------
2 | --B. Data Exploration--
3 | -----------------------
4 |
5 | --1. What day of the week is used for each week_date value?
6 |
7 | SELECT DISTINCT(DATENAME(dw, week_date)) AS week_date_value
8 | FROM clean_weekly_sales;
9 |
10 |
11 | --2. What range of week numbers are missing from the dataset?
12 |
13 | WITH allWeeks AS (
14 | SELECT 1 AS pos
15 | UNION ALL
16 | SELECT pos+1 FROM allWeeks
17 | WHERE pos+1 <= 52)
18 |
19 | SELECT
20 | DISTINCT a.pos,
21 | c.week_number
22 | FROM allWeeks a
23 | LEFT JOIN clean_weekly_sales c
24 | ON a.pos = c.week_number
25 | WHERE c.week_number IS NULL
26 | ORDER BY a.pos;
27 |
28 |
29 | --3. How many total transactions were there for each year in the dataset?
30 |
31 | SELECT
32 | calendar_year,
33 | SUM(transactions) AS total_transactions
34 | FROM clean_weekly_sales
35 | GROUP BY calendar_year
36 | ORDER BY calendar_year;
37 |
38 |
39 | --4. What is the total sales for each region for each month?
40 |
41 | SELECT
42 | region,
43 | month_number,
44 | SUM(sales) AS total_sales
45 | FROM clean_weekly_sales
46 | GROUP BY region, month_number
47 | ORDER BY region, month_number;
48 |
49 |
50 | --5. What is the total count of transactions for each platform
51 |
52 | SELECT
53 | platform,
54 | SUM(transactions) AS total_transactions
55 | FROM clean_weekly_sales
56 | GROUP BY platform;
57 |
58 |
59 | --6. What is the percentage of sales for Retail vs Shopify for each month?
60 |
61 | WITH sales_cte AS (
62 | SELECT
63 | calendar_year,
64 | month_number,
65 | platform,
66 | SUM(sales) AS monthly_sales
67 | FROM clean_weekly_sales
68 | GROUP BY calendar_year, month_number, platform
69 | )
70 |
71 | SELECT
72 | calendar_year,
73 | month_number,
74 | CAST(100.0 * MAX(CASE WHEN platform = 'Retail' THEN monthly_sales END)
75 | / SUM(monthly_sales) AS decimal(5, 2)) AS pct_retail,
76 | CAST(100.0 * MAX(CASE WHEN platform = 'Shopify' THEN monthly_sales END)
77 | / SUM(monthly_sales) AS decimal(5, 2)) AS pct_shopify
78 | FROM sales_cte
79 | GROUP BY calendar_year, month_number
80 | ORDER BY calendar_year, month_number;
81 |
82 |
83 | --7. What is the percentage of sales by demographic for each year in the dataset?
84 |
85 | WITH sales_by_demographic AS (
86 | SELECT
87 | calendar_year,
88 | demographic,
89 | SUM(sales) AS sales
90 | FROM clean_weekly_sales
91 | GROUP BY calendar_year, demographic)
92 |
93 | SELECT
94 | calendar_year,
95 | CAST(100.0 * MAX(CASE WHEN demographic = 'Families' THEN sales END)
96 | / SUM(sales) AS decimal(5, 2)) AS pct_families,
97 | CAST(100.0 * MAX(CASE WHEN demographic = 'Couples' THEN sales END)
98 | / SUM(sales) AS decimal(5, 2)) AS pct_couples,
99 | CAST(100.0 * MAX(CASE WHEN demographic = 'unknown' THEN sales END)
100 | / SUM(sales) AS decimal(5, 2)) AS pct_unknown
101 | FROM sales_by_demographic
102 | GROUP BY calendar_year;
103 |
104 |
105 | --8. Which age_band and demographic values contribute the most to Retail sales?
106 |
107 | DECLARE @retailSales bigint = (
108 | SELECT SUM(sales)
109 | FROM clean_weekly_sales
110 | WHERE platform = 'Retail')
111 |
112 | SELECT
113 | age_band,
114 | demographic,
115 | SUM(sales) AS sales,
116 | CAST(100.0 * SUM(sales)/@retailSales AS decimal(5, 2)) AS contribution
117 | FROM clean_weekly_sales
118 | WHERE platform = 'Retail'
119 | GROUP BY age_band, demographic
120 | ORDER BY contribution DESC;
121 |
122 |
123 | --9.Can we use the avg_transaction column to find the average transaction size for each year for Retail vs Shopify? If not - how would you calculate it instead?
124 |
125 | SELECT
126 | calendar_year,
127 | platform,
128 | ROUND(AVG(avg_transaction), 0) AS avg_transaction_row,
129 | SUM(sales) / SUM(transactions) AS avg_transaction_group
130 | FROM clean_weekly_sales
131 | GROUP BY calendar_year, platform
132 | ORDER BY calendar_year, platform;
133 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | ------------------------------
2 | --C. Before & After Analysis--
3 | ------------------------------
4 |
5 | --1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
6 |
7 | --Find the week_number of '2020-06-15' (@weekNum=25)
8 | DECLARE @weekNum int = (
9 | SELECT DISTINCT week_number
10 | FROM clean_weekly_sales
11 | WHERE week_date = '2020-06-15')
12 |
13 | --Find the total sales of 4 weeks before and after @weekNum
14 | WITH salesChanges AS (
15 | SELECT
16 | SUM(CASE WHEN week_number BETWEEN @weekNum-4 AND @weekNum-1 THEN sales END) AS before_changes,
17 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_changes
18 | FROM clean_weekly_sales
19 | WHERE calendar_year = 2020
20 | )
21 |
22 | SELECT *,
23 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
24 | FROM salesChanges;
25 |
26 |
27 | --2. What about the entire 12 weeks before and after?
28 |
29 | --Find the week_number of '2020-06-15' (@weekNum=25)
30 | DECLARE @weekNum int = (
31 | SELECT DISTINCT week_number
32 | FROM clean_weekly_sales
33 | WHERE week_date = '2020-06-15')
34 |
35 | --Find the total sales of 12 weeks before and after @weekNum
36 | WITH salesChanges AS (
37 | SELECT
38 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
39 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
40 | FROM clean_weekly_sales
41 | WHERE calendar_year = 2020
42 | )
43 |
44 | SELECT *,
45 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
46 | FROM salesChanges;
47 |
48 |
49 | --3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
50 |
51 | --Part 1: How do the sales metrics for 4 weeks before and after compared with the previous years in 2018 and 2019
52 | --Find the week_number of '2020-06-15' (@weekNum=25)
53 | DECLARE @weekNum int = (
54 | SELECT DISTINCT week_number
55 | FROM clean_weekly_sales
56 | WHERE week_date = '2020-06-15')
57 |
58 | --Find the total sales of 4 weeks before and after @weekNum
59 | WITH salesChanges AS (
60 | SELECT
61 | calendar_year,
62 | SUM(CASE WHEN week_number BETWEEN @weekNum-3 AND @weekNum-1 THEN sales END) AS before_sales,
63 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_sales
64 | FROM clean_weekly_sales
65 | GROUP BY calendar_year
66 | )
67 |
68 | SELECT *,
69 | CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
70 | FROM salesChanges
71 | ORDER BY calendar_year;
72 |
73 |
74 | --Part 2: How do the sales metrics for 12 weeks before and after compared with the previous years in 2018 and 2019
75 | --Find the week_number of '2020-06-15' (@weekNum=25)
76 | DECLARE @weekNum int = (
77 | SELECT DISTINCT week_number
78 | FROM clean_weekly_sales
79 | WHERE week_date = '2020-06-15')
80 |
81 | --Find the total sales of 12 weeks before and after @weekNum
82 | WITH salesChanges AS (
83 | SELECT
84 | calendar_year,
85 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_sales,
86 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_sales
87 | FROM clean_weekly_sales
88 | GROUP BY calendar_year
89 | )
90 |
91 | SELECT *,
92 | CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
93 | FROM salesChanges
94 | ORDER BY calendar_year;
95 |
--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/D_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------
2 | --D. Bonus Question--
3 | ---------------------
4 |
5 | -- Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
6 |
7 | --Find the week_number of '2020-06-15' (@weekNum=25)
8 | DECLARE @weekNum int = (
9 | SELECT DISTINCT week_number
10 | FROM clean_weekly_sales
11 | WHERE week_date = '2020-06-15'
12 | AND calendar_year =2020)
13 |
14 |
15 | --1. Sales changes by [regions]
16 | WITH regionChanges AS (
17 | SELECT
18 | region,
19 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
20 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
21 | FROM clean_weekly_sales
22 | GROUP BY region
23 | )
24 | SELECT *,
25 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
26 | FROM regionChanges;
27 |
28 |
29 | --2. Sales changes by [platform]
30 | WITH platformChanges AS (
31 | SELECT
32 | platform,
33 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
34 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
35 | FROM clean_weekly_sales
36 | GROUP BY platform
37 | )
38 | SELECT *,
39 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
40 | FROM platformChanges;
41 |
42 |
43 | --3. Sales changes by [age_band]
44 | WITH ageBandChanges AS (
45 | SELECT
46 | age_band,
47 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
48 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
49 | FROM clean_weekly_sales
50 | GROUP BY age_band
51 | )
52 | SELECT *,
53 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
54 | FROM ageBandChanges;
55 |
56 |
57 | --4. Sales changes by [demographic]
58 | WITH demographicChanges AS (
59 | SELECT
60 | demographic,
61 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
62 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
63 | FROM clean_weekly_sales
64 | GROUP BY demographic
65 | )
66 | SELECT *,
67 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
68 | FROM demographicChanges;
69 |
70 |
71 | --5. Sales changes by [customer_type]
72 | WITH customerTypeChanges AS (
73 | SELECT
74 | customer_type,
75 | SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
76 | SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
77 | FROM clean_weekly_sales
78 | GROUP BY customer_type
79 | )
80 | SELECT *,
81 | CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
82 | FROM customerTypeChanges;
83 |
--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/README.md:
--------------------------------------------------------------------------------
1 | # 🐟 Case Study #6 - Clique Bait
2 |
3 |
4 |
5 | ## 📕 Table of Contents
6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#%EF%B8%8F-bussiness-task)
7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-entity-relationship-diagram)
8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-case-study-questions)
9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-my-solution)
10 |
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Clique Bait is not like your regular online seafood store - the founder and CEO Danny, was also a part of a digital data analytics team and wanted
14 | to expand his knowledge into the seafood industry!
15 |
16 | In this case study - you are required to support Danny’s vision and analyse his dataset and come up with
17 | creative solutions to calculate funnel fallout rates for the Clique Bait online store.
18 |
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 |
22 |
23 |
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Digital Analysis
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/A.%20Digital%20Analysis.md).
28 |
29 | Using the available datasets - answer the following questions using a single query for each one:
30 | 1. How many users are there?
31 | 2. How many cookies does each user have on average?
32 | 3. What is the unique number of visits by all users per month?
33 | 4. What is the number of events for each event type?
34 | 5. What is the percentage of visits which have a purchase event?
35 | 6. What is the percentage of visits which view the checkout page but do not have a purchase event?
36 | 7. What are the top 3 pages by number of views?
37 | 8. What is the number of views and cart adds for each product category?
38 | 9. What are the top 3 products by purchases?
39 |
40 | ---
41 | ### B. Product Funnel Analysis
42 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/B.%20Product%20Funnel%20Analysis.md).
43 |
44 | Using a single SQL query - create a new output table which has the following details:
45 | * How many times was each product viewed?
46 | * How many times was each product added to cart?
47 | * How many times was each product added to a cart but not purchased (abandoned)?
48 | * How many times was each product purchased?
49 |
50 | Additionally, create another table which further aggregates the data for the above points but this time for each product category instead of individual products.
51 |
52 | Use your 2 new output tables - answer the following questions:
53 | 1. Which product had the most views, cart adds and purchases?
54 | 2. Which product was most likely to be abandoned?
55 | 3. Which product had the highest view to purchase percentage?
56 | 4. What is the average conversion rate from view to cart add?
57 | 5. What is the average conversion rate from cart add to purchase?
58 |
59 | ---
60 | ### C. Campaigns Analysis
61 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/C.%20Campaigns%20Analysis.md).
62 |
63 | Generate a table that has 1 single row for every unique visit_id record and has the following columns:
64 | * `user_id`
65 | * `visit_id`
66 | * `visit_start_time`: the earliest event_time for each visit
67 | * `page_views`: count of page views for each visit
68 | * `art_adds`: count of product cart add events for each visit
69 | * `purchase`: 1/0 flag if a purchase event exists for each visit
70 | * `campaign_name`: map the visit to a campaign if the visit_start_time falls between the start_date and end_date
71 | * `impression`: count of ad impressions for each visit
72 | * `click`: count of ad clicks for each visit
73 | * (Optional column) `cart_products`: a comma separated text value with
74 | products added to the cart sorted by the order they were added to the cart (hint: use the `sequence_number`)
75 |
76 | Some ideas to investigate further include:
77 | - Identifying users who have received impressions during each campaign period
78 | and comparing each metric with other users who did not have an impression event
79 | - Does clicking on an impression lead to higher purchase rates?
80 | - What is the uplift in purchase rate when comparing users who click on a campaign impression versus
81 | users who do not receive an impression? What if we compare them with users who have just an impression but do not click?
82 | - What metrics can you use to quantify the success or failure of each campaign compared to each other?
83 |
84 | ---
85 | ## 🚀 My Solution
86 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait/Syntax).
87 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution).
88 |
--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Solution/A. Digital Analysis.md:
--------------------------------------------------------------------------------
1 | # 🐟 Case Study #6 - Clique Bait
2 | ## A. Digital Analysis
3 | ### 1. How many users are there?
4 | ```TSQL
5 | SELECT COUNT(DISTINCT user_id) AS users_count
6 | FROM users;
7 | ```
8 | | users_count |
9 | |--------------|
10 | | 500 |
11 |
12 | ---
13 | ### 2. How many cookies does each user have on average?
14 | ```TSQL
15 | SELECT CAST(AVG(cookies_count) AS FLOAT) AS avg_cookies_per_user
16 | FROM (
17 | SELECT
18 | user_id,
19 | 1.0*COUNT(cookie_id) AS cookies_count
20 | FROM users
21 | GROUP BY user_id) temp;
22 | ```
23 | | avg_cookies_per_user |
24 | |-----------------------|
25 | | 3.564 |
26 |
27 | ---
28 | ### 3. What is the unique number of visits by all users per month?
29 | ```TSQL
30 | SELECT
31 | MONTH(event_time) AS months,
32 | COUNT(DISTINCT visit_id) AS visits_count
33 | FROM events
34 | GROUP BY MONTH(event_time)
35 | ORDER BY months;
36 | ```
37 | | months | visits_count |
38 | |--------|---------------|
39 | | 1 | 876 |
40 | | 2 | 1488 |
41 | | 3 | 916 |
42 | | 4 | 248 |
43 | | 5 | 36 |
44 |
45 | ---
46 | ### 4. What is the number of events for each event type?
47 | ```TSQL
48 | SELECT
49 | e.event_type,
50 | ei.event_name,
51 | COUNT(*) AS event_count
52 | FROM events e
53 | JOIN event_identifier ei
54 | ON e.event_type = ei.event_type
55 | GROUP BY e.event_type, ei.event_name
56 | ORDER BY e.event_type;
57 | ```
58 | | event_type | event_name | event_count |
59 | |------------|---------------|--------------|
60 | | 1 | Page View | 20928 |
61 | | 2 | Add to Cart | 8451 |
62 | | 3 | Purchase | 1777 |
63 | | 4 | Ad Impression | 876 |
64 | | 5 | Ad Click | 702 |
65 |
66 | ---
67 | ### 5. What is the percentage of visits which have a purchase event?
68 | ```TSQL
69 | SELECT
70 | CAST(100.0 * COUNT(DISTINCT e.visit_id)
71 | / (SELECT COUNT(DISTINCT visit_id) FROM events) AS decimal(10,2)) AS purchase_pct
72 | FROM events e
73 | JOIN event_identifier ei
74 | ON e.event_type = ei.event_type
75 | WHERE ei.event_name = 'Purchase';
76 | ```
77 | | purchase_pct |
78 | |---------------|
79 | | 49.86 |
80 |
81 | ---
82 | ### 6. What is the percentage of visits which view the checkout page but do not have a purchase event?
83 | ```TSQL
84 | WITH view_checkout AS (
85 | SELECT COUNT(e.visit_id) AS cnt
86 | FROM events e
87 | JOIN event_identifier ei ON e.event_type = ei.event_type
88 | JOIN page_hierarchy p ON e.page_id = p.page_id
89 | WHERE ei.event_name = 'Page View'
90 | AND p.page_name = 'Checkout'
91 | )
92 |
93 | SELECT CAST(100-(100.0 * COUNT(DISTINCT e.visit_id)
94 | / (SELECT cnt FROM view_checkout)) AS decimal(10, 2)) AS pct_view_checkout_not_purchase
95 | FROM events e
96 | JOIN event_identifier ei ON e.event_type = ei.event_type
97 | WHERE ei.event_name = 'Purchase'
98 | ```
99 | | pct_view_checkout_not_purchase |
100 | |---------------------------------|
101 | | 15.50 |
102 |
103 | ---
104 | ### 7. What are the top 3 pages by number of views?
105 | ```TSQL
106 | SELECT
107 | TOP 3 ph.page_name,
108 | COUNT(*) AS page_views
109 | FROM events e
110 | JOIN event_identifier ei ON e.event_type = ei.event_type
111 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
112 | WHERE ei.event_name = 'Page View'
113 | GROUP BY ph.page_name
114 | ORDER BY page_views DESC;
115 | ```
116 | | page_name | page_views |
117 | |--------------|-------------|
118 | | All Products | 3174 |
119 | | Checkout | 2103 |
120 | | Home Page | 1782 |
121 |
122 | ---
123 | ### 8. What is the number of views and cart adds for each product category?
124 | ```TSQL
125 | SELECT
126 | ph.product_category,
127 | SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
128 | SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds
129 | FROM events e
130 | JOIN event_identifier ei ON e.event_type = ei.event_type
131 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
132 | WHERE ph.product_category IS NOT NULL
133 | GROUP BY ph.product_category;
134 | ```
135 | | product_category | page_views | cart_adds |
136 | |------------------|------------|------------|
137 | | Fish | 4633 | 2789 |
138 | | Luxury | 3032 | 1870 |
139 | | Shellfish | 6204 | 3792 |
140 |
141 | ---
142 | ### 9. What are the top 3 products by purchases?
143 | ```TSQL
144 | SELECT
145 | TOP 3 ph.product_id,
146 | ph.page_name,
147 | ph.product_category,
148 | COUNT(*) AS purchase_count
149 | FROM events e
150 | JOIN event_identifier ei ON e.event_type = ei.event_type
151 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
152 | --1st layer: products are added to cart
153 | WHERE ei.event_name = 'Add to cart'
154 | --2nd layer: add-to-cart products are purchased
155 | AND e.visit_id IN (
156 | SELECT e.visit_id
157 | FROM events e
158 | JOIN event_identifier ei ON e.event_type = ei.event_type
159 | WHERE ei.event_name = 'Purchase')
160 | GROUP BY ph.product_id, ph.page_name, ph.product_category
161 | ORDER BY purchase_count DESC;
162 | ```
163 | | product_id | page_name | product_category | purchase_count |
164 | |------------|-----------|------------------|-----------------|
165 | | 7 | Lobster | Shellfish | 754 |
166 | | 9 | Oyster | Shellfish | 726 |
167 | | 8 | Crab | Shellfish | 719 |
168 |
169 | ---
170 | My solution for **[B. Product Funnel Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/B.%20Product%20Funnel%20Analysis.md)**.
171 |
--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | -----------------------
2 | --A. Digital Analysis--
3 | -----------------------
4 |
5 | --1. How many users are there?
6 |
7 | SELECT COUNT(DISTINCT user_id) AS users_count
8 | FROM users;
9 |
10 |
11 | --2. How many cookies does each user have on average?
12 |
13 | SELECT CAST(AVG(cookies_count) AS FLOAT) AS avg_cookies_per_user
14 | FROM (
15 | SELECT
16 | user_id,
17 | 1.0*COUNT(cookie_id) AS cookies_count
18 | FROM users
19 | GROUP BY user_id) temp;
20 |
21 |
22 | --3. What is the unique number of visits by all users per month?
23 |
24 | SELECT
25 | MONTH(event_time) AS months,
26 | COUNT(DISTINCT visit_id) AS visits_count
27 | FROM events
28 | GROUP BY MONTH(event_time)
29 | ORDER BY months;
30 |
31 |
32 | --4. What is the number of events for each event type?
33 |
34 | SELECT
35 | e.event_type,
36 | ei.event_name,
37 | COUNT(*) AS event_count
38 | FROM events e
39 | JOIN event_identifier ei
40 | ON e.event_type = ei.event_type
41 | GROUP BY e.event_type, ei.event_name
42 | ORDER BY e.event_type;
43 |
44 |
45 | --5. What is the percentage of visits which have a purchase event?
46 |
47 | SELECT
48 | CAST(100.0 * COUNT(DISTINCT e.visit_id)
49 | / (SELECT COUNT(DISTINCT visit_id) FROM events) AS decimal(10,2)) AS purchase_pct
50 | FROM events e
51 | JOIN event_identifier ei
52 | ON e.event_type = ei.event_type
53 | WHERE ei.event_name = 'Purchase';
54 |
55 |
56 | --6. What is the percentage of visits which view the checkout page but do not have a purchase event?
57 |
58 | WITH view_checkout AS (
59 | SELECT COUNT(e.visit_id) AS cnt
60 | FROM events e
61 | JOIN event_identifier ei ON e.event_type = ei.event_type
62 | JOIN page_hierarchy p ON e.page_id = p.page_id
63 | WHERE ei.event_name = 'Page View'
64 | AND p.page_name = 'Checkout'
65 | )
66 |
67 | SELECT CAST(100-(100.0 * COUNT(DISTINCT e.visit_id)
68 | / (SELECT cnt FROM view_checkout)) AS decimal(10, 2)) AS pct_view_checkout_not_purchase
69 | FROM events e
70 | JOIN event_identifier ei ON e.event_type = ei.event_type
71 | WHERE ei.event_name = 'Purchase'
72 |
73 |
74 | --7. What are the top 3 pages by number of views?
75 |
76 | SELECT
77 | TOP 3 ph.page_name,
78 | COUNT(*) AS page_views
79 | FROM events e
80 | JOIN event_identifier ei ON e.event_type = ei.event_type
81 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
82 | WHERE ei.event_name = 'Page View'
83 | GROUP BY ph.page_name
84 | ORDER BY page_views DESC;
85 |
86 |
87 | --8. What is the number of views and cart adds for each product category?
88 |
89 | SELECT
90 | ph.product_category,
91 | SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
92 | SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds
93 | FROM events e
94 | JOIN event_identifier ei ON e.event_type = ei.event_type
95 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
96 | WHERE ph.product_category IS NOT NULL
97 | GROUP BY ph.product_category;
98 |
99 |
100 | --9. What are the top 3 products by purchases?
101 |
102 | SELECT
103 | TOP 3 ph.product_id,
104 | ph.page_name,
105 | ph.product_category,
106 | COUNT(*) AS purchase_count
107 | FROM events e
108 | JOIN event_identifier ei ON e.event_type = ei.event_type
109 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
110 | WHERE ei.event_name = 'Add to cart'
111 | AND e.visit_id IN (
112 | SELECT e.visit_id
113 | FROM events e
114 | JOIN event_identifier ei ON e.event_type = ei.event_type
115 | WHERE ei.event_name = 'Purchase')
116 | GROUP BY ph.product_id, ph.page_name, ph.product_category
117 | ORDER BY purchase_count DESC;
118 |
--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | ------------------------------
2 | --B. Product Funnel Analysis--
3 | ------------------------------
4 |
5 | /*
6 | Using a single SQL query - create a new output table which has the following details:
7 | - How many times was each product viewed?
8 | - How many times was each product added to cart?
9 | - How many times was each product added to a cart but not purchased (abandoned)?
10 | - How many times was each product purchased?
11 | */
12 |
13 | WITH product_info AS (
14 | SELECT
15 | ph.product_id,
16 | ph.page_name AS product_name,
17 | ph.product_category,
18 | SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS views,
19 | SUM(CASE WHEN ei.event_name = 'Add To Cart' THEN 1 ELSE 0 END) AS cart_adds
20 | FROM events e
21 | JOIN event_identifier ei ON e.event_type = ei.event_type
22 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
23 | WHERE ph.product_id IS NOT NULL
24 | GROUP BY ph.product_id, ph.page_name, ph.product_category
25 | ),
26 | product_abandoned AS (
27 | SELECT
28 | ph.product_id,
29 | ph.page_name AS product_name,
30 | ph.product_category,
31 | COUNT(*) AS abandoned
32 | FROM events e
33 | JOIN event_identifier ei ON e.event_type = ei.event_type
34 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
35 | WHERE ei.event_name = 'Add to cart'
36 | AND e.visit_id NOT IN (
37 | SELECT e.visit_id
38 | FROM events e
39 | JOIN event_identifier ei ON e.event_type = ei.event_type
40 | WHERE ei.event_name = 'Purchase')
41 | GROUP BY ph.product_id, ph.page_name, ph.product_category
42 | ),
43 | product_purchased AS (
44 | SELECT
45 | ph.product_id,
46 | ph.page_name AS product_name,
47 | ph.product_category,
48 | COUNT(*) AS purchases
49 | FROM events e
50 | JOIN event_identifier ei ON e.event_type = ei.event_type
51 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
52 | WHERE ei.event_name = 'Add to cart'
53 | AND e.visit_id IN (
54 | SELECT e.visit_id
55 | FROM events e
56 | JOIN event_identifier ei ON e.event_type = ei.event_type
57 | WHERE ei.event_name = 'Purchase')
58 | GROUP BY ph.product_id, ph.page_name, ph.product_category
59 | )
60 |
61 | SELECT
62 | pi.*,
63 | pa.abandoned,
64 | pp.purchases
65 | INTO #product_summary
66 | FROM product_info pi
67 | JOIN product_abandoned pa ON pi.product_id = pa.product_id
68 | JOIN product_purchased pp ON pi.product_id = pp.product_id;
69 |
70 |
71 | /*
72 | Additionally, create another table which further aggregates the data for the above points
73 | but this time for each product category instead of individual products.
74 | */
75 |
76 | WITH category_info AS (
77 | SELECT
78 | ph.product_category,
79 | SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS views,
80 | SUM(CASE WHEN ei.event_name = 'Add To Cart' THEN 1 ELSE 0 END) AS cart_adds
81 | FROM events e
82 | JOIN event_identifier ei ON e.event_type = ei.event_type
83 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
84 | WHERE ph.product_id IS NOT NULL
85 | GROUP BY ph.product_category
86 | ),
87 | category_abandoned AS (
88 | SELECT
89 | ph.product_category,
90 | COUNT(*) AS abandoned
91 | FROM events e
92 | JOIN event_identifier ei ON e.event_type = ei.event_type
93 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
94 | WHERE ei.event_name = 'Add to cart'
95 | AND e.visit_id NOT IN (
96 | SELECT e.visit_id
97 | FROM events e
98 | JOIN event_identifier ei ON e.event_type = ei.event_type
99 | WHERE ei.event_name = 'Purchase')
100 | GROUP BY ph.product_category
101 | ),
102 | category_purchased AS (
103 | SELECT
104 | ph.product_category,
105 | COUNT(*) AS purchases
106 | FROM events e
107 | JOIN event_identifier ei ON e.event_type = ei.event_type
108 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
109 | WHERE ei.event_name = 'Add to cart'
110 | AND e.visit_id IN (
111 | SELECT e.visit_id
112 | FROM events e
113 | JOIN event_identifier ei ON e.event_type = ei.event_type
114 | WHERE ei.event_name = 'Purchase')
115 | GROUP BY ph.product_category
116 | )
117 |
118 | SELECT
119 | ci.*,
120 | ca.abandoned,
121 | cp.purchases
122 | FROM category_info ci
123 | JOIN category_abandoned ca ON ci.product_category = ca.product_category
124 | JOIN category_purchased cp ON ci.product_category = cp.product_category;
125 |
126 |
127 | --Use your 2 new output tables - answer the following questions:
128 | --1. Which product had the most views, cart adds and purchases?
129 |
130 | SELECT TOP 1 *
131 | FROM #product_summary
132 | ORDER BY views DESC;
133 | --> Oyster has the most views.
134 |
135 | SELECT TOP 1 *
136 | FROM #product_summary
137 | ORDER BY cart_adds DESC;
138 | --> Lobster had the most cart adds.
139 |
140 | SELECT TOP 1 *
141 | FROM #product_summary
142 | ORDER BY purchases DESC;
143 | --> Lobster had the most purchases.
144 |
145 |
146 | --2. Which product was most likely to be abandoned?
147 | SELECT TOP 1 *
148 | FROM #product_summary
149 | ORDER BY abandoned DESC;
150 | --> Russian Caviar was most likely to be abandoned.
151 |
152 |
153 | --3. Which product had the highest view to purchase percentage?
154 | SELECT
155 | TOP 1 product_name,
156 | product_category,
157 | CAST(100.0 * purchases / views AS decimal(10, 2)) AS purchase_per_view_pct
158 | FROM #product_summary
159 | ORDER BY purchase_per_view_pct DESC;
160 | --> Lobster had the highest view to purchase percentage?
161 |
162 |
163 | --4. What is the average conversion rate from view to cart add?
164 | SELECT
165 | CAST(AVG(100.0*cart_adds/views) AS decimal(10, 2)) AS avg_view_to_cart
166 | FROM #product_summary;
167 |
168 |
169 | --5. What is the average conversion rate from cart add to purchase?
170 | SELECT
171 | CAST(AVG(100.0*purchases/cart_adds) AS decimal(10, 2)) AS avg_cart_to_purchase
172 | FROM #product_summary;
173 |
--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | -------------------------
2 | --C. Campaigns Analysis--
3 | -------------------------
4 | /*Generate a table that has 1 single row for every unique visit_id record and has the following columns:
5 | - user_id
6 | - visit_id
7 | - visit_start_time: the earliest event_time for each visit
8 | - page_views: count of page views for each visit
9 | - cart_adds: count of product cart add events for each visit
10 | - purchase: 1/0 flag if a purchase event exists for each visit
11 | - campaign_name: map the visit to a campaign if the visit_start_time falls between the start_date and end_date
12 | - impression: count of ad impressions for each visit
13 | - click: count of ad clicks for each visit
14 | - (Optional column) cart_products: a comma separated text value with products added to the cart
15 | sorted by the order they were added to the cart (hint: use the sequence_number)
16 | */
17 |
18 | SELECT
19 | u.user_id,
20 | e.visit_id,
21 | MIN(event_time) AS visit_start_time,
22 | SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
23 | SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds,
24 | SUM(CASE WHEN ei.event_name = 'Purchase' THEN 1 ELSE 0 END) AS purchase,
25 | c.campaign_name,
26 | SUM(CASE WHEN ei.event_name = 'Ad Impression' THEN 1 ELSE 0 END) AS impression,
27 | SUM(CASE WHEN ei.event_name = 'Ad Click' THEN 1 ELSE 0 END) AS click,
28 | STRING_AGG(CASE WHEN ei.event_name = 'Add to Cart' THEN ph.page_name END, ', ')
29 | WITHIN GROUP (ORDER BY e.sequence_number) AS cart_products
30 | INTO #campaign_summary
31 | FROM events e
32 | JOIN users u
33 | ON e.cookie_id = u.cookie_id
34 | JOIN event_identifier ei
35 | ON e.event_type = ei.event_type
36 | JOIN page_hierarchy ph
37 | ON e.page_id = ph.page_id
38 | LEFT JOIN campaign_identifier c
39 | ON e.event_time BETWEEN c.start_date AND c.end_date
40 | GROUP BY u.user_id, e.visit_id, c.campaign_name;
41 |
42 |
43 | /*
44 | - Identifying users who have received impressions during each campaign period
45 | and comparing each metric with other users who did not have an impression event
46 | - Does clicking on an impression lead to higher purchase rates?
47 | - What is the uplift in purchase rate when comparing users who click on a campaign impression versus
48 | users who do not receive an impression? What if we compare them with users who have just an impression but do not click?
49 | - What metrics can you use to quantify the success or failure of each campaign compared to each other?
50 | */
51 |
52 |
53 | --1. Calculate the number of users in each group
54 |
55 | --Number of users who received impressions during campaign periods = 417
56 | SELECT COUNT(DISTINCT user_id) AS received_impressions
57 | FROM #campaign_summary
58 | WHERE impression > 0
59 | AND campaign_name IS NOT NULL;
60 |
61 |
62 | --Number of users who received impressions but didn't click on ads = 127
63 | SELECT COUNT(DISTINCT user_id) AS received_impressions_not_clicked
64 | FROM #campaign_summary
65 | WHERE impression > 0
66 | AND click = 0
67 | AND campaign_name IS NOT NULL;
68 |
69 |
70 | --Number of users who didn't receive impressions during campaign periods = 56
71 | SELECT COUNT(DISTINCT user_id) AS received_impressions
72 | FROM #campaign_summary
73 | WHERE campaign_name IS NOT NULL
74 | AND user_id NOT IN (
75 | SELECT user_id
76 | FROM #campaign_summary
77 | WHERE impression > 0)
78 |
79 |
80 | --2. Calculate the average clicks, average views, average cart adds, and average purchases of each group
81 |
82 | --For users who received impressions
83 | DECLARE @received int
84 | SET @received = 417
85 |
86 | SELECT
87 | CAST(1.0*SUM(page_views) / @received AS decimal(10,1)) AS avg_view,
88 | CAST(1.0*SUM(cart_adds) / @received AS decimal(10,1)) AS avg_cart_adds,
89 | CAST(1.0*SUM(purchase) / @received AS decimal(10,1)) AS avg_purchase
90 | FROM #campaign_summary
91 | WHERE impression > 0
92 | AND campaign_name IS NOT NULL;
93 |
94 |
95 | --For users who received impressions but didn't click on ads
96 | DECLARE @received_not_clicked int
97 | SET @received_not_clicked = 127
98 |
99 | SELECT
100 | CAST(1.0*SUM(page_views) / @received_not_clicked AS decimal(10,1)) AS avg_view,
101 | CAST(1.0*SUM(cart_adds) / @received_not_clicked AS decimal(10,1)) AS avg_cart_adds,
102 | CAST(1.0*SUM(purchase) / @received_not_clicked AS decimal(10,1)) AS avg_purchase
103 | FROM #campaign_summary
104 | WHERE impression > 0
105 | AND click = 0
106 | AND campaign_name IS NOT NULL;
107 |
108 |
109 | --For users who didn't receive impressions
110 | DECLARE @not_received int
111 | SET @not_received = 56
112 |
113 | SELECT
114 | CAST(1.0*SUM(page_views) / @not_received AS decimal(10,1)) AS avg_view,
115 | CAST(1.0*SUM(cart_adds) / @not_received AS decimal(10,1)) AS avg_cart_adds,
116 | CAST(1.0*SUM(purchase) / @not_received AS decimal(10,1)) AS avg_purchase
117 | FROM #campaign_summary
118 | WHERE campaign_name IS NOT NULL
119 | AND user_id NOT IN (
120 | SELECT user_id
121 | FROM #campaign_summary
122 | WHERE impression > 0);
123 |
124 |
125 | --3. Compare the average views, average cart adds and average purchases of users received impressions and not received impressions
126 |
127 | /* Combine table
128 | | | avg_view | avg_cart_adds | avg_purchase |
129 | |-----------------------------|----------|---------------|---------------|
130 | | Received impressions | 15.3 | 9 | 1.5 |
131 | | Not received impressions | 19.4 | 5.8 | 1.2 |
132 | | Increase by campaigns | No | Yes | Yes |
133 | */
134 |
135 |
136 | --4. Compare the average purchases of users received impressions and received impressions but not clicked to ads
137 | /* Combine table
138 | | | avg_purchase |
139 | |--------------------------------------|---------------|
140 | | Received impressions | 1.5 |
141 | | Received impressions but not clicked | 0.8 |
142 | | Increase by clicking to the ads | Yes |
143 | */
144 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./README.md:
--------------------------------------------------------------------------------
1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
2 |
3 |
4 |
5 |
6 | ## 📕 Table of Contents
7 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#%EF%B8%8F-bussiness-task)
8 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-entity-relationship-diagram)
9 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-case-study-questions)
10 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-my-solution)
11 |
12 | ---
13 | ## 🛠️ Bussiness Task
14 |
15 | Balanced Tree Clothing Company prides themselves on providing an optimised range of clothing and lifestyle wear for the modern adventurer!
16 |
17 | Danny, the CEO of this trendy fashion company has asked you to assist the team’s merchandising teams analyse their sales performance
18 | and generate a basic financial report to share with the wider business.
19 |
20 | ---
21 | ## 🔐 Entity Relationship Diagram
22 | ### Regular questions
23 |
24 |
25 |
26 | ### Bonus question
27 |
28 |
29 |
30 | ---
31 | ## ❓ Case Study Questions
32 | ### A. High Level Sales Analysis
33 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/A.%20High%20Level%20Sales%20Analysis.md).
34 |
35 | 1. What was the total quantity sold for all products?
36 | 2. What is the total generated revenue for all products before discounts?
37 | 3. What was the total discount amount for all products?
38 |
39 | ---
40 | ### B. Transaction Analysis
41 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/B.%20Transaction%20Analysis.md).
42 |
43 | 1. How many unique transactions were there?
44 | 2. What is the average unique products purchased in each transaction?
45 | 3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
46 | 4. What is the average discount value per transaction?
47 | 5. What is the percentage split of all transactions for members vs non-members?
48 | 6. What is the average revenue for member transactions and non-member transactions?
49 |
50 | ---
51 | ### C. Product Analysis
52 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/C.%20Product%20Analysis.md).
53 |
54 | 1. What are the top 3 products by total revenue before discount?
55 | 2. What is the total quantity, revenue and discount for each segment?
56 | 3. What is the top selling product for each segment?
57 | 4. What is the total quantity, revenue and discount for each category?
58 | 5. What is the top selling product for each category?
59 | 6. What is the percentage split of revenue by product for each segment?
60 | 7. What is the percentage split of revenue by segment for each category?
61 | 8. What is the percentage split of total revenue by category?
62 | 9. What is the total transaction “penetration” for each product? (hint: penetration = number of transactions where at least 1 quantity of a product was purchased divided by total number of transactions)
63 | 10. What is the most common combination of at least 1 quantity of any 3 products in a 1 single transaction?
64 |
65 | ---
66 | ### D. Bonus Challenge
67 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/D.%20Bonus%20Question.md).
68 |
69 | Use a single SQL query to transform the `product_hierarchy` and `product_prices` datasets to the `product_details` table.
70 |
71 | ---
72 | ## 🚀 My Solution
73 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Syntax).
74 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution).
75 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/A. High Level Sales Analysis.md:
--------------------------------------------------------------------------------
1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
2 | ## A. High Level Sales Analysis
3 | ### 1. What was the total quantity sold for all products?
4 | ```TSQL
5 | SELECT SUM(qty) AS total_quantity
6 | FROM sales;
7 | ```
8 | | total_quantity |
9 | |-----------------|
10 | | 45216 |
11 |
12 | ---
13 | ### 2. What is the total generated revenue for all products before discounts?
14 | ```TSQL
15 | SELECT SUM(qty * price) AS revenue_before_discounts
16 | FROM sales;
17 | ```
18 | | revenue_before_discounts |
19 | |---------------------------|
20 | | 1289453 |
21 |
22 | ---
23 | ### 3. What was the total discount amount for all products?
24 | ```TSQL
25 | SELECT CAST(SUM(qty * price * discount/100.0) AS FLOAT) AS total_discount
26 | FROM sales;
27 | ```
28 | | total_discount |
29 | |-----------------|
30 | | 156229.14 |
31 |
32 | ---
33 | My solution for **[B. Transaction Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/B.%20Transaction%20Analysis.md)**.
34 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/B. Transaction Analysis.md:
--------------------------------------------------------------------------------
1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
2 | ## B. Transaction Analysis
3 | ### 1. How many unique transactions were there?
4 | ```TSQL
5 | SELECT COUNT(DISTINCT txn_id) AS unique_transactions
6 | FROM sales;
7 | ```
8 | | unique_transactions |
9 | |----------------------|
10 | | 2500 |
11 |
12 | ---
13 | ### 2. What is the average unique products purchased in each transaction?
14 | ```TSQL
15 | SELECT AVG(product_count) AS avg_unique_products
16 | FROM (
17 | SELECT
18 | txn_id,
19 | COUNT(DISTINCT prod_id) AS product_count
20 | FROM sales
21 | GROUP BY txn_id
22 | ) temp;
23 | ```
24 | | avg_unique_products |
25 | |----------------------|
26 | | 6 |
27 |
28 | ---
29 | ### 3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
30 | ```TSQL
31 | WITH transaction_revenue AS (
32 | SELECT
33 | txn_id,
34 | SUM(qty*price) AS revenue
35 | FROM sales
36 | GROUP BY txn_id)
37 |
38 | SELECT
39 | DISTINCT
40 | PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_25th,
41 | PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_50th,
42 | PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_75th
43 | FROM transaction_revenue;
44 | ```
45 | | pct_25th | pct_50th | pct_75th |
46 | |----------|----------|-----------|
47 | | 375.75 | 509.5 | 647 |
48 |
49 | ---
50 | ### 4. What is the average discount value per transaction?
51 | ```TSQL
52 | SELECT CAST(AVG(total_discount) AS decimal(5, 1)) AS avg_discount_per_transaction
53 | FROM (
54 | SELECT
55 | txn_id,
56 | SUM(qty*price*discount/100.0) AS total_discount
57 | FROM sales
58 | GROUP BY txn_id
59 | ) temp;
60 | ```
61 | | avg_discount_per_transaction |
62 | |-------------------------------|
63 | | 62.5 |
64 |
65 | ---
66 | ### 5. What is the percentage split of all transactions for members vs non-members?
67 | ```TSQL
68 | SELECT
69 | CAST(100.0*COUNT(DISTINCT CASE WHEN member = 1 THEN txn_id END)
70 | / COUNT(DISTINCT txn_id) AS FLOAT) AS members_pct,
71 | CAST(100.0*COUNT(DISTINCT CASE WHEN member = 0 THEN txn_id END)
72 | / COUNT(DISTINCT txn_id) AS FLOAT) AS non_members_pct
73 | FROM sales;
74 | ```
75 | | members_pct | non_members_pct |
76 | |---------|--------------|
77 | | 60.2 | 39.8 |
78 |
79 | ---
80 | ### 6. What is the average revenue for member transactions and non-member transactions?
81 | ```TSQL
82 | WITH member_revenue AS (
83 | SELECT
84 | member,
85 | txn_id,
86 | SUM(qty*price) AS revenue
87 | FROM sales
88 | GROUP BY member, txn_id
89 | )
90 |
91 | SELECT
92 | member,
93 | CAST(AVG(1.0*revenue) AS decimal(10,2)) AS avg_revenue
94 | FROM member_revenue
95 | GROUP BY member;
96 | ```
97 | | member | avg_revenue |
98 | |--------|--------------|
99 | | 0 | 515.04 |
100 | | 1 | 516.27 |
101 |
102 | ---
103 | My solution for **[C. Product Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/C.%20Product%20Analysis.md)**.
104 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/D. Bonus Question.md:
--------------------------------------------------------------------------------
1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
2 | ## D. Bonus Question
3 | Use a single SQL query to transform the `product_hierarchy` and `product_prices` datasets to the `product_details` table.
4 |
5 | ### Solution
6 |
7 | ```TSQL
8 | SELECT
9 | pp.product_id,
10 | pp.price,
11 | CONCAT(ph1.level_text, ' ', ph2.level_text, ' - ', ph3.level_text) AS product_name,
12 | ph2.parent_id AS category_id,
13 | ph1.parent_id AS segment_id,
14 | ph1.id AS style_id,
15 | ph3.level_text AS category_name,
16 | ph2.level_text AS segment_name,
17 | ph1.level_text AS style_name
18 | FROM product_hierarchy ph1
19 | --self join style level (ph1) with segment level (ph2)
20 | JOIN product_hierarchy ph2 ON ph1.parent_id = ph2.id
21 | --self join segment level (ph2) with category level (ph3)
22 | JOIN product_hierarchy ph3 ON ph3.id = ph2.parent_id
23 | --inner join style level (ph1) with table [product_prices]
24 | JOIN product_prices pp ON ph1.id = pp.id;
25 | ```
26 | | product_id | price | product_name | category_id | segment_id | style_id | category_name | segment_name | style_name |
27 | |------------|-------|----------------------------------|-------------|------------|----------|---------------|--------------|---------------------|
28 | | c4a632 | 13 | Navy Oversized Jeans - Womens | 1 | 3 | 7 | Womens | Jeans | Navy Oversized |
29 | | e83aa3 | 32 | Black Straight Jeans - Womens | 1 | 3 | 8 | Womens | Jeans | Black Straight |
30 | | e31d39 | 10 | Cream Relaxed Jeans - Womens | 1 | 3 | 9 | Womens | Jeans | Cream Relaxed |
31 | | d5e9a6 | 23 | Khaki Suit Jacket - Womens | 1 | 4 | 10 | Womens | Jacket | Khaki Suit |
32 | | 72f5d4 | 19 | Indigo Rain Jacket - Womens | 1 | 4 | 11 | Womens | Jacket | Indigo Rain |
33 | | 9ec847 | 54 | Grey Fashion Jacket - Womens | 1 | 4 | 12 | Womens | Jacket | Grey Fashion |
34 | | 5d267b | 40 | White Tee Shirt - Mens | 2 | 5 | 13 | Mens | Shirt | White Tee |
35 | | c8d436 | 10 | Teal Button Up Shirt - Mens | 2 | 5 | 14 | Mens | Shirt | Teal Button Up |
36 | | 2a2353 | 57 | Blue Polo Shirt - Mens | 2 | 5 | 15 | Mens | Shirt | Blue Polo |
37 | | f084eb | 36 | Navy Solid Socks - Mens | 2 | 6 | 16 | Mens | Socks | Navy Solid |
38 | | b9a74d | 17 | White Striped Socks - Mens | 2 | 6 | 17 | Mens | Socks | White Striped |
39 | | 2feb6b | 29 | Pink Fluro Polkadot Socks - Mens | 2 | 6 | 18 | Mens | Socks | Pink Fluro Polkadot |
40 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | --------------------------------
2 | --A. High Level Sales Analysis--
3 | --------------------------------
4 |
5 | --1. What was the total quantity sold for all products?
6 |
7 | SELECT SUM(qty) AS total_quantity
8 | FROM sales;
9 |
10 |
11 | --2. What is the total generated revenue for all products before discounts?
12 |
13 | SELECT SUM(qty * price) AS revenue_before_discounts
14 | FROM sales;
15 |
16 |
17 | --3. What was the total discount amount for all products?
18 |
19 | SELECT CAST(SUM(qty * price * discount/100.0) AS FLOAT) AS total_discount
20 | FROM sales;
21 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------------
2 | --B. Transaction Analysis--
3 | ---------------------------
4 |
5 | --1. How many unique transactions were there?
6 |
7 | SELECT COUNT(DISTINCT txn_id) AS unique_transactions
8 | FROM sales;
9 |
10 |
11 | --2.What is the average unique products purchased in each transaction?
12 |
13 | SELECT AVG(product_count) AS avg_unique_products
14 | FROM (
15 | SELECT
16 | txn_id,
17 | COUNT(DISTINCT prod_id) AS product_count
18 | FROM sales
19 | GROUP BY txn_id
20 | ) temp;
21 |
22 |
23 | --3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
24 |
25 | WITH transaction_revenue AS (
26 | SELECT
27 | txn_id,
28 | SUM(qty*price) AS revenue
29 | FROM sales
30 | GROUP BY txn_id)
31 |
32 | SELECT
33 | DISTINCT
34 | PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_25th,
35 | PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_50th,
36 | PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_75th
37 | FROM transaction_revenue;
38 |
39 |
40 | --4. What is the average discount value per transaction?
41 |
42 | SELECT CAST(AVG(total_discount) AS decimal(5, 1)) AS avg_discount_per_transaction
43 | FROM (
44 | SELECT
45 | txn_id,
46 | SUM(qty*price*discount/100.0) AS total_discount
47 | FROM sales
48 | GROUP BY txn_id
49 | ) temp;
50 |
51 |
52 | --5. What is the percentage split of all transactions for members vs non-members?
53 |
54 | SELECT
55 | CAST(100.0*COUNT(DISTINCT CASE WHEN member = 1 THEN txn_id END)
56 | / COUNT(DISTINCT txn_id) AS FLOAT) AS members_pct,
57 | CAST(100.0*COUNT(DISTINCT CASE WHEN member = 0 THEN txn_id END)
58 | / COUNT(DISTINCT txn_id) AS FLOAT) AS non_members_pct
59 | FROM sales;
60 |
61 |
62 | --6. What is the average revenue for member transactions and non-member transactions?
63 |
64 | WITH member_revenue AS (
65 | SELECT
66 | member,
67 | txn_id,
68 | SUM(qty*price) AS revenue
69 | FROM sales
70 | GROUP BY member, txn_id
71 | )
72 |
73 | SELECT
74 | member,
75 | CAST(AVG(1.0*revenue) AS decimal(10,2)) AS avg_revenue
76 | FROM member_revenue
77 | GROUP BY member;
78 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | -----------------------
2 | --C. Product Analysis--
3 | -----------------------
4 |
5 | --1. What are the top 3 products by total revenue before discount?
6 |
7 | SELECT
8 | TOP 3 pd.product_name,
9 | SUM(s.qty * s.price) AS revenue_before_discount
10 | FROM sales s
11 | JOIN product_details pd
12 | ON s.prod_id = pd.product_id
13 | GROUP BY pd.product_name
14 | ORDER BY SUM(s.qty * s.price) DESC;
15 |
16 |
17 | --2. What is the total quantity, revenue and discount for each segment?
18 |
19 | SELECT
20 | pd.segment_name,
21 | SUM(s.qty) total_quantity,
22 | SUM(s.qty * s.price) AS total_revenue_before_discount,
23 | SUM(s.qty * s.price * discount) AS total_discount
24 | FROM sales s
25 | JOIN product_details pd
26 | ON s.prod_id = pd.product_id
27 | GROUP BY pd.segment_name;
28 |
29 |
30 | --3. What is the top selling product for each segment?
31 |
32 | WITH segment_product_quantity AS (
33 | SELECT
34 | pd.segment_name,
35 | pd.product_name,
36 | SUM(s.qty) AS total_quantity,
37 | DENSE_RANK() OVER (PARTITION BY pd.segment_name ORDER BY SUM(s.qty) DESC) AS rnk
38 | FROM sales s
39 | JOIN product_details pd
40 | ON s.prod_id = pd.product_id
41 | GROUP BY pd.segment_name, pd.product_name
42 | )
43 |
44 | SELECT
45 | segment_name,
46 | product_name AS top_selling_product,
47 | total_quantity
48 | FROM segment_product_quantity
49 | WHERE rnk = 1;
50 |
51 |
52 | --4. What is the total quantity, revenue and discount for each category?
53 |
54 | SELECT
55 | pd.category_name,
56 | SUM(s.qty) AS total_quantity,
57 | SUM(s.qty * s.price) AS total_revenue,
58 | SUM(s.qty * s.price * s.discount/100) AS total_discount
59 | FROM sales s
60 | JOIN product_details pd
61 | ON s.prod_id = pd.product_id
62 | GROUP BY pd.category_name;
63 |
64 |
65 | --5. What is the top selling product for each category?
66 |
67 | WITH category_product_quantity AS (
68 | SELECT
69 | pd.category_name,
70 | pd.product_name,
71 | SUM(s.qty) AS total_quantity,
72 | DENSE_RANK() OVER (PARTITION BY pd.category_name ORDER BY SUM(s.qty) DESC) AS rnk
73 | FROM sales s
74 | JOIN product_details pd
75 | ON s.prod_id = pd.product_id
76 | GROUP BY pd.category_name, pd.product_name
77 | )
78 |
79 | SELECT
80 | category_name,
81 | product_name AS top_selling_product,
82 | total_quantity
83 | FROM category_product_quantity
84 | WHERE rnk = 1;
85 |
86 |
87 | --6. What is the percentage split of revenue by product for each segment?
88 |
89 | WITH segment_product_revenue AS (
90 | SELECT
91 | pd.segment_name,
92 | pd.product_name,
93 | SUM(s.qty * s.price) AS product_revenue
94 | FROM sales s
95 | JOIN product_details pd
96 | ON s.prod_id = pd.product_id
97 | GROUP BY pd.segment_name, pd.product_name
98 | )
99 |
100 | SELECT
101 | segment_name,
102 | product_name,
103 | CAST(100.0 * product_revenue
104 | / SUM(product_revenue) OVER (PARTITION BY segment_name)
105 | AS decimal (10, 2)) AS segment_product_pct
106 | FROM segment_product_revenue;
107 |
108 |
109 | --7. What is the percentage split of revenue by segment for each category?
110 |
111 | WITH segment_category_revenue AS (
112 | SELECT
113 | pd.segment_name,
114 | pd.category_name,
115 | SUM(s.qty * s.price) AS category_revenue
116 | FROM sales s
117 | JOIN product_details pd
118 | ON s.prod_id = pd.product_id
119 | GROUP BY pd.segment_name, pd.category_name
120 | )
121 |
122 | SELECT
123 | segment_name,
124 | category_name,
125 | CAST(100.0 * category_revenue
126 | / SUM(category_revenue) OVER (PARTITION BY category_name)
127 | AS decimal (10, 2)) AS segment_category_pct
128 | FROM segment_category_revenue;
129 |
130 |
131 | --8. What is the percentage split of total revenue by category?
132 |
133 | WITH category_revenue AS (
134 | SELECT
135 | pd.category_name,
136 | SUM(s.qty * s.price) AS revenue
137 | FROM sales s
138 | JOIN product_details pd
139 | ON s.prod_id = pd.product_id
140 | GROUP BY pd.category_name
141 | )
142 |
143 | SELECT
144 | category_name,
145 | CAST(100.0 * revenue / SUM(revenue) OVER () AS decimal (10, 2)) AS category_pct
146 | FROM category_revenue;
147 |
148 |
149 | --9. What is the total transaction “penetration” for each product?
150 | --(hint: penetration = number of transactions where at least 1 quantity of a product was purchased divided by total number of transactions)
151 |
152 | WITH product_transations AS (
153 | SELECT
154 | DISTINCT s.prod_id, pd.product_name,
155 | COUNT(DISTINCT s.txn_id) AS product_txn,
156 | (SELECT COUNT(DISTINCT txn_id) FROM sales) AS total_txn
157 | FROM sales s
158 | JOIN product_details pd
159 | ON s.prod_id = pd.product_id
160 | GROUP BY prod_id, pd.product_name
161 | )
162 |
163 | SELECT
164 | *,
165 | CAST(100.0 * product_txn / total_txn AS decimal(10,2)) AS penetration_pct
166 | FROM product_transations;
167 |
168 |
169 | --10. What is the most common combination of at least 1 quantity of any 3 products in a 1 single transaction?
170 |
171 | --Count the number of products in each transaction
172 | WITH products_per_transaction AS (
173 | SELECT
174 | s.txn_id,
175 | pd.product_id,
176 | pd.product_name,
177 | s.qty,
178 | COUNT(pd.product_id) OVER (PARTITION BY txn_id) AS cnt
179 | FROM sales s
180 | JOIN product_details pd
181 | ON s.prod_id = pd.product_id
182 | ),
183 |
184 | --Filter transactions that have the 3 products and group them to a cell
185 | combinations AS (
186 | SELECT
187 | STRING_AGG(product_id, ', ') AS products_id,
188 | STRING_AGG(product_name, ', ') AS products_name
189 | FROM products_per_transaction
190 | WHERE cnt = 3
191 | GROUP BY txn_id
192 | ),
193 |
194 | --Count the number of times each combination appears
195 | combination_count AS (
196 | SELECT
197 | products_id,
198 | products_name,
199 | COUNT (*) AS common_combinations
200 | FROM combinations
201 | GROUP BY products_id, products_name
202 | )
203 |
204 | --Filter the most common combinations
205 | SELECT
206 | products_id,
207 | products_name
208 | FROM combination_count
209 | WHERE common_combinations = (SELECT MAX(common_combinations)
210 | FROM combination_count);
211 |
--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/D_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------
2 | --D.Bonus Challenge--
3 | ---------------------
4 |
5 | --Use a single SQL query to transform the product_hierarchy and product_prices datasets to the product_details table.
6 |
7 | SELECT
8 | pp.product_id,
9 | pp.price,
10 | CONCAT(ph1.level_text, ' ', ph2.level_text, ' - ', ph3.level_text) AS product_name,
11 | ph2.parent_id AS category_id,
12 | ph1.parent_id AS segment_id,
13 | ph1.id AS style_id,
14 | ph3.level_text AS category_name,
15 | ph2.level_text AS segment_name,
16 | ph1.level_text AS style_name
17 | FROM product_hierarchy ph1
18 | --self join style level (ph1) with segment level (ph2)
19 | JOIN product_hierarchy ph2 ON ph1.parent_id = ph2.id
20 | --self join segment level (ph2) with category level (ph3)
21 | JOIN product_hierarchy ph3 ON ph3.id = ph2.parent_id
22 | --inner join style level (ph1) with table [product_prices]
23 | JOIN product_prices pp ON ph1.id = pp.id;
24 |
--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/README.md:
--------------------------------------------------------------------------------
1 | # 🍊 Case Study #8 - Fresh Segments
2 |
3 |
4 |
5 |
6 | ## 📕 Table of Contents
7 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#%EF%B8%8F-bussiness-task)
8 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-entity-relationship-diagram)
9 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-case-study-questions)
10 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-my-solution)
11 |
12 | ---
13 | ## 🛠️ Bussiness Task
14 | Fresh Segments is a digital marketing agency that helps other businesses analyse trends in online ad click behaviour for their unique customer base.
15 |
16 | Clients share their customer lists with the Fresh Segments team who then aggregate interest metrics and generate a single dataset worth of metrics for further analysis.
17 | In particular - the composition and rankings for different interests are provided for each client showing the proportion of their customer list who interacted with online assets related to each interest for each month.
18 |
19 | Danny has asked for your assistance to analyse aggregated metrics for an example client and provide some high level insights about the customer list and their interests.
20 |
21 | ---
22 | ## 🔐 Entity Relationship Diagram
23 |
24 |
25 |
26 | ---
27 | ## ❓ Case Study Questions
28 | ### A. Data Exploration and Cleansing
29 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/A.%20Data%20Exploration%20and%20Cleansing.md).
30 |
31 | 1. Update the `fresh_segments.interest_metrics` table by modifying the `month_year` column to be a date data type with the start of the month
32 | 2. What is count of records in the `fresh_segments.interest_metrics` for each `month_year` value sorted in chronological order (earliest to latest)
33 | with the null values appearing first?
34 | 3. What do you think we should do with these null values in the `fresh_segments.interest_metrics`
35 | 4. How many `interest_id` values exist in the `fresh_segments.interest_metrics` table but not in the `fresh_segments.interest_map` table?
36 | What about the other way around?
37 | 5. Summarise the `id` values in the `fresh_segments.interest_map` by its total record count in this table.
38 | 6. What sort of table join should we perform for our analysis and why?
39 | Check your logic by checking the rows where `interest_id` = 21246 in your joined output and
40 | include all columns from `fresh_segments.interest_metrics` and all columns from `fresh_segments.interest_map` except from the `id` column.
41 | 7. Are there any records in your joined table where the `month_year` value is before the `created_at` value from the `fresh_segments.interest_map` table?
42 | Do you think these values are valid and why?
43 |
44 | ---
45 | ### B. Interest Analysis
46 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/B.%20Interest%20Analysis.md).
47 |
48 | 1. Which interests have been present in all `month_year` dates in our dataset?
49 | 2. Using this same `total_months` measure - calculate the cumulative percentage of all records starting at 14 months - which `total_months` value
50 | passes the 90% cumulative percentage value?
51 | 3. If we were to remove all `interest_id` values which are lower than the `total_months` value we found in the previous question - how many total data points
52 | would we be removing?
53 | 4. Does this decision make sense to remove these data points from a business perspective?
54 | Use an example where there are all 14 months present to a removed interest example for your arguments - think about
55 | what it means to have less months present from a segment perspective.
56 | 5. After removing these interests - how many unique interests are there for each month?
57 |
58 | ---
59 | ### C. Segment Analysis
60 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/C.%20Segment%20Analysis.md).
61 |
62 | 1. Using our filtered dataset by removing the interests with less than 6 months worth of data,
63 | which are the top 10 and bottom 10 interests which have the largest composition values in any `month_year`?
64 | 2. Only use the maximum `composition` value for each interest but you must keep the corresponding `month_year`.
65 | 3. Which 5 interests had the lowest average `ranking` value?
66 | 4. Which 5 interests had the largest standard deviation in their `percentile_ranking` value?
67 | 5. For the 5 interests found in the previous question - what was minimum and maximum `percentile_ranking` values for each interest
68 | and its corresponding `year_month` value? Can you describe what is happening for these 5 interests?
69 | 6. How would you describe our customers in this segment based off their `composition` and ranking values?
70 | What sort of products or services should we show to these customers and what should we avoid?
71 |
72 | ---
73 | ### D. Index Analysis
74 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/D.%20Index%20Analysis.md).
75 |
76 | The `index_value` is a measure which can be used to reverse calculate the average composition for Fresh Segments’ clients.
77 |
78 | Average composition can be calculated by dividing the `composition` column by the `index_value` column rounded to 2 decimal places.
79 |
80 | 1. What is the top 10 interests by the average composition for each month?
81 | 2. For all of these top 10 interests - which interest appears the most often?
82 | 3. What is the average of the average composition for the top 10 interests for each month?
83 | 4. What is the 3 month rolling average of the max average composition value from September 2018 to August 2019 and
84 | include the previous top ranking interests in the same output shown below.
85 | 5. Provide a possible reason why the max average composition might change from month to month?
86 | Could it signal something is not quite right with the overall business model for Fresh Segments?
87 |
88 | Required output for question 4:
89 |
90 | | month_year | interest_name | max_index_composition | 3_month_moving_avg | 1_month_ago | 2_months_ago |
91 | |------------|-------------------------------|-----------------------|--------------------|-----------------------------------|------------------------------------|
92 | | 2018-09-01 | Work Comes First Travelers | 8.26 | 7.61 | Las Vegas Trip Planners: 7.21 | Las Vegas Trip Planners: 7.36 |
93 | | 2018-10-01 | Work Comes First Travelers | 9.14 | 8.20 | Work Comes First Travelers: 8.26 | Las Vegas Trip Planners: 7.21 |
94 | | 2018-11-01 | Work Comes First Travelers | 8.28 | 8.56 | Work Comes First Travelers: 9.14 | Work Comes First Travelers: 8.26 |
95 | | 2018-12-01 | Work Comes First Travelers | 8.31 | 8.58 | Work Comes First Travelers: 8.28 | Work Comes First Travelers: 9.14 |
96 | | 2019-01-01 | Work Comes First Travelers | 7.66 | 8.08 | Work Comes First Travelers: 8.31 | Work Comes First Travelers: 8.28 |
97 | | 2019-02-01 | Work Comes First Travelers | 7.66 | 7.88 | Work Comes First Travelers: 7.66 | Work Comes First Travelers: 8.31 |
98 | | 2019-03-01 | Alabama Trip Planners | 6.54 | 7.29 | Work Comes First Travelers: 7.66 | Work Comes First Travelers: 7.66 |
99 | | 2019-04-01 | Solar Energy Researchers | 6.28 | 6.83 | Alabama Trip Planners: 6.54 | Work Comes First Travelers: 7.66 |
100 | | 2019-05-01 | Readers of Honduran Content | 4.41 | 5.74 | Solar Energy Researchers: 6.28 | Alabama Trip Planners: 6.54 |
101 | | 2019-06-01 | Las Vegas Trip Planners | 2.77 | 4.49 | Readers of Honduran Content: 4.41 | Solar Energy Researchers: 6.28 |
102 | | 2019-07-01 | Las Vegas Trip Planners | 2.82 | 3.33 | Las Vegas Trip Planners: 2.77 | Readers of Honduran Content: 4.41 |
103 | | 2019-08-01 | Cosmetics and Beauty Shoppers | 2.73 | 2.77 | Las Vegas Trip Planners: 2.82 | Las Vegas Trip Planners: 2.77 |
104 |
105 | ---
106 | ## 🚀 My Solution
107 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments/Syntax).
108 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution).
109 |
--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/A_query.sql:
--------------------------------------------------------------------------------
1 | -------------------------------------
2 | --A. Data Exploration and Cleansing--
3 | -------------------------------------
4 |
5 | --1. Update the fresh_segments.interest_metrics table by modifying the month_year column to be a date data type with the start of the month
6 |
7 | --Modify the length of column month_year so it can store 10 characters
8 | ALTER TABLE fresh_segments.dbo.interest_metrics
9 | ALTER COLUMN month_year VARCHAR(10);
10 |
11 | --Update values in month_year column
12 | UPDATE fresh_segments.dbo.interest_metrics
13 | SET month_year = CONVERT(DATE, '01-' + month_year, 105)
14 |
15 | --Convert month_year to DATE
16 | ALTER TABLE fresh_segments.dbo.interest_metrics
17 | ALTER COLUMN month_year DATE;
18 |
19 | SELECT * FROM fresh_segments.dbo.interest_metrics;
20 |
21 |
22 | --2. What is count of records in the fresh_segments.interest_metrics for each month_year value
23 | --sorted in chronological order (earliest to latest) with the null values appearing first?
24 |
25 | SELECT
26 | month_year,
27 | COUNT(*) AS cnt
28 | FROM interest_metrics
29 | GROUP BY month_year
30 | ORDER BY month_year;
31 |
32 |
33 | --3. What do you think we should do with these null values in the fresh_segments.interest_metrics?
34 |
35 | --interest_id = 21246 have NULL _month, _year, and month_year
36 | SELECT *
37 | FROM interest_metrics
38 | WHERE month_year IS NULL
39 | ORDER BY interest_id DESC;
40 |
41 | --Delete rows that are null in column interest_id (1193 rows)
42 | DELETE FROM interest_metrics
43 | WHERE interest_id IS NULL;
44 |
45 |
46 | --4. How many interest_id values exist in the fresh_segments.interest_metrics table
47 | --but not in the fresh_segments.interest_map table? What about the other way around?
48 |
49 | SELECT
50 | COUNT(DISTINCT map.id) AS map_id_count,
51 | COUNT(DISTINCT metrics.interest_id) AS metrics_id_count,
52 | SUM(CASE WHEN map.id IS NULL THEN 1 END) AS not_in_metric,
53 | SUM(CASE WHEN metrics.interest_id is NULL THEN 1 END) AS not_in_map
54 | FROM interest_metrics metrics
55 | FULL JOIN interest_map map
56 | ON metrics.interest_id = map.id;
57 |
58 |
59 | --5. Summarise the id values in the fresh_segments.interest_map by its total record count in this table
60 |
61 | SELECT COUNT(*) AS map_id_count
62 | FROM interest_map;
63 |
64 |
65 | --6. What sort of table join should we perform for our analysis and why?
66 | --Check your logic by checking the rows where interest_id = 21246 in your joined output and
67 | --include all columns from fresh_segments.interest_metrics and all columns from fresh_segments.interest_map except from the id column.
68 |
69 | SELECT
70 | metrics.*,
71 | map.interest_name,
72 | map.interest_summary,
73 | map.created_at,
74 | map.last_modified
75 | FROM interest_metrics metrics
76 | JOIN interest_map map
77 | ON metrics.interest_id = map.id
78 | WHERE metrics.interest_id = 21246;
79 |
80 |
81 | --7. Are there any records in your joined table where the month_year value is before the created_at value from the fresh_segments.interest_map table?
82 | --Do you think these values are valid and why?
83 |
84 | --Check if metrics.month_year < map.created_at
85 | SELECT COUNT(*) AS cnt
86 | FROM interest_metrics metrics
87 | JOIN interest_map map
88 | ON metrics.interest_id = map.id
89 | WHERE metrics.month_year < CAST(map.created_at AS DATE);
90 |
91 | --Check if metrics.month_year and map.created_at are in the same month
92 | SELECT COUNT(*) AS cnt
93 | FROM interest_metrics metrics
94 | JOIN interest_map map
95 | ON map.id = metrics.interest_id
96 | WHERE metrics.month_year < CAST(DATEADD(DAY, -DAY(map.created_at)+1, map.created_at) AS DATE);
97 |
--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/B_query.sql:
--------------------------------------------------------------------------------
1 | ------------------------
2 | --B. Interest Analysis--
3 | ------------------------
4 |
5 | --1. Which interests have been present in all month_year dates in our dataset?
6 |
7 | -- Find how many unique month_year dates in our dataset
8 | DECLARE @unique_month_year_cnt INT = (
9 | SELECT COUNT(DISTINCT month_year)
10 | FROM interest_metrics)
11 |
12 | --Filter all interest_id that have the count = @unique_month_year_cnt
13 | SELECT
14 | interest_id,
15 | COUNT(month_year) AS cnt
16 | FROM interest_metrics
17 | GROUP BY interest_id
18 | HAVING COUNT(month_year) = @unique_month_year_cnt;
19 |
20 |
21 | --2. Using this same total_months measure - calculate the cumulative percentage of all records starting at 14 months
22 | -- which total_months value passes the 90% cumulative percentage value?
23 |
24 | WITH interest_months AS (
25 | SELECT
26 | interest_id,
27 | COUNT(DISTINCT month_year) AS total_months
28 | FROM interest_metrics
29 | WHERE interest_id IS NOT NULL
30 | GROUP BY interest_id
31 | ),
32 | interest_count AS (
33 | SELECT
34 | total_months,
35 | COUNT(interest_id) AS interests
36 | FROM interest_months
37 | GROUP BY total_months
38 | )
39 |
40 | SELECT *,
41 | CAST(100.0 * SUM(interests) OVER(ORDER BY total_months DESC)
42 | / SUM(interests) OVER() AS decimal(10, 2)) AS cumulative_pct
43 | FROM interest_count;
44 |
45 |
46 | --3. If we were to remove all interest_id values which are lower than the total_months value we found in the previous question
47 | --- how many total data points would we be removing?
48 |
49 | WITH interest_months AS (
50 | SELECT
51 | interest_id,
52 | COUNT(DISTINCT month_year) AS total_months
53 | FROM interest_metrics
54 | WHERE interest_id IS NOT NULL
55 | GROUP BY interest_id
56 | )
57 |
58 | SELECT
59 | COUNT(interest_id) AS interests,
60 | COUNT(DISTINCT interest_id) AS unique_interests
61 | FROM interest_metrics
62 | WHERE interest_id IN (
63 | SELECT interest_id
64 | FROM interest_months
65 | WHERE total_months < 6);
66 |
67 |
68 | --4. Does this decision make sense to remove these data points from a business perspective?
69 | --Use an example where there are all 14 months present to a removed interest example for your arguments
70 | -- think about what it means to have less months present from a segment perspective.
71 |
72 | --When total_months = 14
73 | SELECT
74 | month_year,
75 | COUNT(DISTINCT interest_id) interest_count,
76 | MIN(ranking) AS highest_rank,
77 | MAX(composition) AS composition_max,
78 | MAX(index_value) AS index_max
79 | FROM interest_metrics metrics
80 | WHERE interest_id IN (
81 | SELECT interest_id
82 | FROM interest_metrics
83 | WHERE interest_id IS NOT NULL
84 | GROUP BY interest_id
85 | HAVING COUNT(DISTINCT month_year) = 14)
86 | GROUP BY month_year
87 | ORDER BY month_year, highest_rank;
88 |
89 | --When total_months = 1
90 | SELECT
91 | month_year,
92 | COUNT(DISTINCT interest_id) interest_count,
93 | MIN(ranking) AS highest_rank,
94 | MAX(composition) AS composition_max,
95 | MAX(index_value) AS index_max
96 | FROM interest_metrics metrics
97 | WHERE interest_id IN (
98 | SELECT interest_id
99 | FROM interest_metrics
100 | WHERE interest_id IS NOT NULL
101 | GROUP BY interest_id
102 | HAVING COUNT(DISTINCT month_year) = 1)
103 | GROUP BY month_year
104 | ORDER BY month_year, highest_rank;
105 |
106 |
107 | --5. After removing these interests - how many unique interests are there for each month?
108 |
109 | --Create a temporary table [interest_metrics_edited] that removes all interest_id that have total_months lower than 6
110 | SELECT *
111 | INTO #interest_metrics_edited
112 | FROM interest_metrics
113 | WHERE interest_id NOT IN (
114 | SELECT interest_id
115 | FROM interest_metrics
116 | WHERE interest_id IS NOT NULL
117 | GROUP BY interest_id
118 | HAVING COUNT(DISTINCT month_year) < 6);
119 |
120 | --Check the count of interests_id
121 | SELECT
122 | COUNT(interest_id) AS all_interests,
123 | COUNT(DISTINCT interest_id) AS unique_interests
124 | FROM #interest_metrics_edited;
125 |
126 | --Find the number of unique interests for each month after removing step above
127 | SELECT
128 | month_year,
129 | COUNT(DISTINCT interest_id) AS unique_interests
130 | FROM #interest_metrics_edited
131 | WHERE month_year IS NOT NULL
132 | GROUP BY month_year
133 | ORDER BY month_year;
134 |
--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/C_query.sql:
--------------------------------------------------------------------------------
1 | -----------------------
2 | --C. Segment Analysis--
3 | -----------------------
4 |
5 | --1. Using our filtered dataset by removing the interests with less than 6 months worth of data,
6 | --which are the top 10 and bottom 10 interests which have the largest composition values in any month_year?
7 | --Only use the maximum composition value for each interest but you must keep the corresponding month_year.
8 |
9 | WITH max_composition AS (
10 | SELECT
11 | month_year,
12 | interest_id,
13 | MAX(composition) OVER(PARTITION BY interest_id) AS largest_composition
14 | FROM #interest_metrics_edited -- filtered dataset in which interests with less than 6 months are removed
15 | WHERE month_year IS NOT NULL
16 | ),
17 | composition_rank AS (
18 | SELECT *,
19 | DENSE_RANK() OVER(ORDER BY largest_composition DESC) AS rnk
20 | FROM max_composition
21 | )
22 |
23 | --Top 10 interests that have the largest composition values
24 | SELECT
25 | cr.interest_id,
26 | im.interest_name,
27 | cr.rnk
28 | FROM composition_rank cr
29 | JOIN interest_map im ON cr.interest_id = im.id
30 | WHERE cr.rnk <= 10
31 | ORDER BY cr.rnk;
32 |
33 | --Bottom 10 interests that have the largest composition values
34 | SELECT
35 | DISTINCT TOP 10 cr.interest_id,
36 | im.interest_name,
37 | cr.rnk
38 | FROM composition_rank cr
39 | JOIN interest_map im ON cr.interest_id = im.id
40 | ORDER BY cr.rnk DESC;
41 |
42 |
43 | --2. Which 5 interests had the lowest average ranking value?
44 |
45 | SELECT
46 | TOP 5 metrics.interest_id,
47 | map.interest_name,
48 | CAST(AVG(1.0*metrics.ranking) AS decimal(10,2)) AS avg_ranking
49 | FROM #interest_metrics_edited metrics
50 | JOIN interest_map map
51 | ON metrics.interest_id = map.id
52 | GROUP BY metrics.interest_id, map.interest_name
53 | ORDER BY avg_ranking;
54 |
55 |
56 | --3. Which 5 interests had the largest standard deviation in their percentile_ranking value?
57 |
58 | SELECT
59 | DISTINCT TOP 5 metrics.interest_id,
60 | map.interest_name,
61 | ROUND(STDEV(metrics.percentile_ranking)
62 | OVER(PARTITION BY metrics.interest_id), 2) AS std_percentile_ranking
63 | FROM #interest_metrics_edited metrics
64 | JOIN interest_map map
65 | ON metrics.interest_id = map.id
66 | ORDER BY std_percentile_ranking DESC;
67 |
68 |
69 | --4. For the 5 interests found in the previous question - what were minimum and maximum percentile_ranking values for each interest
70 | --and its corresponding year_month value? Can you describe what is happening for these 5 interests?
71 |
72 | --Based on the query for the previous question
73 | WITH largest_std_interests AS (
74 | SELECT
75 | DISTINCT TOP 5 metrics.interest_id,
76 | map.interest_name,
77 | map.interest_summary,
78 | ROUND(STDEV(metrics.percentile_ranking)
79 | OVER(PARTITION BY metrics.interest_id), 2) AS std_percentile_ranking
80 | FROM #interest_metrics_edited metrics
81 | JOIN interest_map map
82 | ON metrics.interest_id = map.id
83 | ORDER BY std_percentile_ranking DESC
84 | ),
85 | max_min_percentiles AS (
86 | SELECT
87 | lsi.interest_id,
88 | lsi.interest_name,
89 | lsi. interest_summary,
90 | ime.month_year,
91 | ime.percentile_ranking,
92 | MAX(ime.percentile_ranking) OVER(PARTITION BY lsi.interest_id) AS max_pct_rnk,
93 | MIN(ime.percentile_ranking) OVER(PARTITION BY lsi.interest_id) AS min_pct_rnk
94 | FROM largest_std_interests lsi
95 | JOIN #interest_metrics_edited ime
96 | ON lsi.interest_id = ime.interest_id
97 | )
98 |
99 | SELECT
100 | interest_id,
101 | interest_name,
102 | interest_summary,
103 | MAX(CASE WHEN percentile_ranking = max_pct_rnk THEN month_year END) AS max_pct_month_year,
104 | MAX(CASE WHEN percentile_ranking = max_pct_rnk THEN percentile_ranking END) AS max_pct_rnk,
105 | MIN(CASE WHEN percentile_ranking = min_pct_rnk THEN month_year END) AS min_pct_month_year,
106 | MIN(CASE WHEN percentile_ranking = min_pct_rnk THEN percentile_ranking END) AS min_pct_rnk
107 | FROM max_min_percentiles
108 | GROUP BY interest_id, interest_name, interest_summary;
109 |
--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/D_query.sql:
--------------------------------------------------------------------------------
1 | ---------------------
2 | --D. Index Analysis--
3 | ---------------------
4 |
5 | --1. What is the top 10 interests by the average composition for each month?
6 |
7 | WITH avg_composition_rank AS (
8 | SELECT
9 | metrics.interest_id,
10 | map.interest_name,
11 | metrics.month_year,
12 | ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
13 | DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
14 | FROM interest_metrics metrics
15 | JOIN interest_map map
16 | ON metrics.interest_id = map.id
17 | WHERE metrics.month_year IS NOT NULL
18 | )
19 | SELECT *
20 | FROM avg_composition_rank
21 | --filter top 10 interests for each month
22 | WHERE rnk <= 10;
23 |
24 |
25 | --2. For all of these top 10 interests - which interest appears the most often?
26 |
27 | WITH avg_composition_rank AS (
28 | SELECT
29 | metrics.interest_id,
30 | map.interest_name,
31 | metrics.month_year,
32 | ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
33 | DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
34 | FROM interest_metrics metrics
35 | JOIN interest_map map
36 | ON metrics.interest_id = map.id
37 | WHERE metrics.month_year IS NOT NULL
38 | ),
39 | frequent_interests AS (
40 | SELECT
41 | interest_id,
42 | interest_name,
43 | COUNT(*) AS freq
44 | FROM avg_composition_rank
45 | WHERE rnk <= 10 --filter top 10 interests for each month
46 | GROUP BY interest_id, interest_name
47 | )
48 |
49 | SELECT *
50 | FROM frequent_interests
51 | WHERE freq IN (SELECT MAX(freq) FROM frequent_interests);
52 |
53 |
54 | --3. What is the average of the average composition for the top 10 interests for each month?
55 |
56 | WITH avg_composition_rank AS (
57 | SELECT
58 | metrics.interest_id,
59 | map.interest_name,
60 | metrics.month_year,
61 | ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
62 | DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
63 | FROM interest_metrics metrics
64 | JOIN interest_map map
65 | ON metrics.interest_id = map.id
66 | WHERE metrics.month_year IS NOT NULL
67 | )
68 |
69 | SELECT
70 | month_year,
71 | AVG(avg_composition) AS avg_of_avg_composition
72 | FROM avg_composition_rank
73 | WHERE rnk <= 10 --filter top 10 interests for each month
74 | GROUP BY month_year;
75 |
76 |
77 | --4. What is the 3 month rolling average of the max average composition value from September 2018 to August 2019
78 | --and include the previous top ranking interests in the same output shown below.
79 |
80 | WITH avg_compositions AS (
81 | SELECT
82 | month_year,
83 | interest_id,
84 | ROUND(composition / index_value, 2) AS avg_comp,
85 | ROUND(MAX(composition / index_value) OVER(PARTITION BY month_year), 2) AS max_avg_comp
86 | FROM interest_metrics
87 | WHERE month_year IS NOT NULL
88 | ),
89 | max_avg_compositions AS (
90 | SELECT *
91 | FROM avg_compositions
92 | WHERE avg_comp = max_avg_comp
93 | ),
94 | moving_avg_compositions AS (
95 | SELECT
96 | mac.month_year,
97 | im.interest_name,
98 | mac.max_avg_comp AS max_index_composition,
99 | ROUND(AVG(mac.max_avg_comp)
100 | OVER(ORDER BY mac.month_year
101 | ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), 2) AS '3_month_moving_avg',
102 | LAG(im.interest_name) OVER (ORDER BY mac.month_year) + ': ' +
103 | CAST(LAG(mac.max_avg_comp) OVER (ORDER BY mac.month_year) AS VARCHAR(4)) AS '1_month_ago',
104 | LAG(im.interest_name, 2) OVER (ORDER BY mac.month_year) + ': ' +
105 | CAST(LAG(mac.max_avg_comp, 2) OVER (ORDER BY mac.month_year) AS VARCHAR(4)) AS '2_month_ago'
106 | FROM max_avg_compositions mac
107 | JOIN interest_map im
108 | ON mac.interest_id = im.id
109 | )
110 |
111 | SELECT *
112 | FROM moving_avg_compositions
113 | WHERE month_year BETWEEN '2018-09-01' AND '2019-08-01';
114 |
--------------------------------------------------------------------------------
/IMG/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/1.png
--------------------------------------------------------------------------------
/IMG/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/2.png
--------------------------------------------------------------------------------
/IMG/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/3.png
--------------------------------------------------------------------------------
/IMG/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/4.png
--------------------------------------------------------------------------------
/IMG/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/5.png
--------------------------------------------------------------------------------
/IMG/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/6.png
--------------------------------------------------------------------------------
/IMG/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/7.png
--------------------------------------------------------------------------------
/IMG/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/8.png
--------------------------------------------------------------------------------
/IMG/e1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e1.PNG
--------------------------------------------------------------------------------
/IMG/e2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e2.PNG
--------------------------------------------------------------------------------
/IMG/e3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e3.PNG
--------------------------------------------------------------------------------
/IMG/e4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e4.png
--------------------------------------------------------------------------------
/IMG/e5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e5.png
--------------------------------------------------------------------------------
/IMG/e6.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e6.PNG
--------------------------------------------------------------------------------
/IMG/e7.1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e7.1.PNG
--------------------------------------------------------------------------------
/IMG/e7.2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e7.2.PNG
--------------------------------------------------------------------------------
/IMG/e8-updated.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e8-updated.PNG
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # :fire: 8-Week SQL Challenge
2 |
3 | This repository contains my solution for the [#8WeekSQLChallenge](https://8weeksqlchallenge.com/) using MS SQL Server.
4 |
5 | Thanks @DataWithDanny for interesting SQL case studies! :wave:🏻
6 |
7 |
8 | ## 📕 Table of Contents
9 | * [Case Study #1 - Danny's Diner](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-1---dannys-diner)
10 | * [Case Study #2 - Pizza Runner](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-2---pizza-runner)
11 | * [Case Study #3 - Foodie-Fi](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-3---foodie-fi)
12 | * [Case Study #4 - Data Bank](https://github.com/qanhnn12/8-Week-SQL-Challenge#bar_chart-case-study-4---data-bank)
13 | * [Case Study #5 - Data Mart](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-5---data-mart)
14 | * [Case Study #6 - Clique Bait](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-6---clique-bait)
15 | * [Case Study #7 - Balanced Tree Clothing Co.](https://github.com/qanhnn12/8-Week-SQL-Challenge#tshirt-case-study-7---balanced-tree-clothing-co)
16 | * [Case Study #8 - Fresh Segments](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-8---fresh-segments)
17 |
18 | ---
19 | ## 🍜 Case Study #1 - Danny's Diner
20 |
21 |
22 |
23 | Danny seriously loves Japanese food so in the beginning of 2021, he decides to embark upon a risky venture and opens up a cute little restaurant that sells his 3 favourite foods: sushi, curry and ramen.
24 |
25 | Danny’s Diner is in need of your assistance to help the restaurant stay afloat - the restaurant has captured some very basic data from their few months of operation but have no idea how to use their data to help them run the business.
26 |
27 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-1/) and my solution
28 | [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%231%20-%20Danny's%20Diner).***
29 |
30 | ---
31 | ## 🍕 Case Study #2 - Pizza Runner
32 |
33 |
34 |
35 | Danny was scrolling through his Instagram feed when something really caught his eye - “80s Retro Styling and Pizza Is The Future!”
36 |
37 | Danny was sold on the idea, but he knew that pizza alone was not going to help him get seed funding to expand his new Pizza Empire - so he had one more genius idea to combine with it - he was going to Uberize it - and so Pizza Runner was launched!
38 |
39 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house) and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
40 |
41 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-2/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner).***
42 |
43 | ---
44 | ## 🥑 Case Study #3 - Foodie-Fi
45 |
46 |
47 |
48 | Danny was scrolling through his Instagram feed when something really caught his eye - “80s Retro Styling and Pizza Is The Future!”
49 |
50 | Danny was sold on the idea, but he knew that pizza alone was not going to help him get seed funding to expand his new Pizza Empire - so he had one more genius idea to combine with it - he was going to Uberize it - and so Pizza Runner was launched!
51 |
52 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house) and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
53 |
54 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-3/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi).***
55 |
56 | ---
57 | ## :bar_chart: Case Study #4 - Data Bank
58 |
59 |
60 |
61 | There is a new innovation in the financial industry called Neo-Banks: new aged digital only banks without physical branches.
62 |
63 | Danny thought that there should be some sort of intersection between these new age banks, cryptocurrency and the data world…so he decides to launch a new initiative - Data Bank!
64 |
65 | The management team at Data Bank want to increase their total customer base - but also need some help tracking just how much data storage their customers will need.
66 |
67 | This case study is all about calculating metrics, growth and helping the business analyse their data in a smart way to better forecast and plan for their future developments!
68 |
69 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-4/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank).***
70 |
71 | ---
72 | ## 🛒 Case Study #5 - Data Mart
73 |
74 |
75 |
76 | Data Mart is Danny’s latest venture and after running international operations for his online supermarket that specialises in fresh produce - Danny is asking for your support to analyse his sales performance.
77 |
78 | In June 2020 - large scale supply changes were made at Data Mart. All Data Mart products now use sustainable packaging methods in every single step from the farm all the way to the customer. Danny needs your help to quantify the impact of this change on the sales performance for Data Mart and it’s separate business areas.
79 |
80 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-5/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart).***
81 |
82 | ---
83 | ## 🐟 Case Study #6 - Clique Bait
84 |
85 |
86 |
87 | Clique Bait is not like your regular online seafood store - the founder and CEO Danny, was also a part of a digital data analytics team and wanted to expand his knowledge into the seafood industry!
88 |
89 | In this case study - you are required to support Danny’s vision and analyse his dataset and come up with creative solutions to calculate funnel fallout rates for the Clique Bait online store.
90 |
91 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-6/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait).***
92 |
93 | ---
94 | ## :tshirt: Case Study #7 - Balanced Tree Clothing Co.
95 |
96 |
97 |
98 | Balanced Tree Clothing Company prides themselves on providing an optimised range of clothing and lifestyle wear for the modern adventurer!
99 |
100 | Danny, the CEO of this trendy fashion company has asked you to assist the team’s merchandising teams analyse their sales performance and generate a basic financial report to share with the wider business.
101 |
102 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-7/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.).***
103 |
104 | ---
105 | ## 🍊 Case Study #8 - Fresh Segments
106 |
107 |
108 |
109 | Danny created Fresh Segments, a digital marketing agency that helps other businesses analyse trends in online ad click behaviour for their unique customer base.
110 |
111 | Clients share their customer lists with the Fresh Segments team who then aggregate interest metrics and generate a single dataset worth of metrics for further analysis. In particular - the composition and rankings for different interests are provided for each client showing the proportion of their customer list who interacted with online assets related to each interest for each month.
112 |
113 | Danny has asked for your assistance to analyse aggregated metrics for an example client and provide some high level insights about the customer list and their interests.
114 |
115 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-8/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments).***
116 |
117 | ---
118 | ## ✨ Contribution
119 | Contributions, issues, and feature requests are welcome!
120 |
121 | To contribute to this project, see the GitHub documentation on [creating a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
122 |
123 | ---
124 | ## 👏 Support
125 | Please give me a ⭐️ if you like this project!
126 |
127 | ---
128 | © 2022 Anh Nguyen
129 |
--------------------------------------------------------------------------------