├── .gitattributes
├── Case Study #1 - Danny's Diner
    ├── README.md
    ├── database.sql
    └── query.sql
├── Case Study #2 - Pizza Runner
    ├── README.md
    ├── Solution
    │   ├── A. Pizza Metrics.md
    │   ├── B. Runner and Customer Experience.md
    │   ├── C. Ingredient Optimisation.md
    │   ├── D. Pricing and Ratings.md
    │   └── E. Bonus Questions.md
    ├── Syntax
    │   ├── A_data_cleaning.sql
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   ├── C_data_cleaning.sql
    │   ├── C_query.sql
    │   ├── D_query.sql
    │   └── E_query.sql
    └── database.sql
├── Case Study #3 - Foodie-Fi
    ├── README.md
    ├── Solution
    │   ├── A. Customer Journey.md
    │   ├── B. Data Analysis Questions.md
    │   ├── C. Challenge Payment Question.md
    │   └── D. Outside The Box Questions.md
    ├── Syntax
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   ├── C_query.sql
    │   └── D_query.sql
    └── database.sql
├── Case Study #4 - Data Bank
    ├── README.md
    ├── Solution
    │   ├── A. Customer Nodes Exploration.md
    │   └── B. Customer Transactions.md
    ├── Syntax
    │   ├── A_query.sql
    │   └── B_query.sql
    └── database.sql
├── Case Study #5 - Data Mart
    ├── README.md
    ├── Solution
    │   ├── A. Data Cleansing Steps.md
    │   ├── B. Data Exploration.md
    │   ├── C. Before & After Analysis.md
    │   └── D. Bonus Question.md
    ├── Syntax
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   ├── C_query.sql
    │   └── D_query.sql
    └── database.sql
├── Case Study #6 - Clique Bait
    ├── README.md
    ├── Solution
    │   ├── A. Digital Analysis.md
    │   ├── B. Product Funnel Analysis.md
    │   └── C. Campaigns Analysis.md
    ├── Syntax
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   └── C_query.sql
    └── database.sql
├── Case Study #7 - Balanced Tree Clothing Co.
    ├── README.md
    ├── Solution
    │   ├── A. High Level Sales Analysis.md
    │   ├── B. Transaction Analysis.md
    │   ├── C. Product Analysis.md
    │   └── D. Bonus Question.md
    ├── Syntax
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   ├── C_query.sql
    │   └── D_query.sql
    └── database.sql
├── Case Study #8 - Fresh Segments
    ├── README.md
    ├── Solution
    │   ├── A. Data Exploration and Cleansing.md
    │   ├── B. Interest Analysis.md
    │   ├── C. Segment Analysis.md
    │   └── D. Index Analysis.md
    ├── Syntax
    │   ├── A_query.sql
    │   ├── B_query.sql
    │   ├── C_query.sql
    │   └── D_query.sql
    └── database.sql
├── IMG
    ├── 1.png
    ├── 2.png
    ├── 3.png
    ├── 4.png
    ├── 5.png
    ├── 6.png
    ├── 7.png
    ├── 8.png
    ├── e1.PNG
    ├── e2.PNG
    ├── e3.PNG
    ├── e4.png
    ├── e5.png
    ├── e6.PNG
    ├── e7.1.PNG
    ├── e7.2.PNG
    └── e8-updated.PNG
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.sql linguist-detectable=true
2 | *.sql linguist-language=tsql
3 | *.sql text
4 | 


--------------------------------------------------------------------------------
/Case Study #1 - Danny's Diner/database.sql:
--------------------------------------------------------------------------------
 1 | --------------------------------
 2 | --CASE STUDY #1: DANNY'S DINER--
 3 | --------------------------------
 4 | --Author: Anh Nguyen
 5 | --Date: 05/11/2022
 6 | --Tool used: SQL Server
 7 | 
 8 | 
 9 | CREATE DATABASE dannys_diner;
10 | 
11 | CREATE TABLE dannys_diner.dbo.sales (
12 |   "customer_id" VARCHAR(1),
13 |   "order_date" DATE,
14 |   "product_id" INTEGER
15 | );
16 | 
17 | INSERT INTO dannys_diner.dbo.sales
18 |   ("customer_id", "order_date", "product_id")
19 | VALUES
20 |   ('A', '2021-01-01', '1'),
21 |   ('A', '2021-01-01', '2'),
22 |   ('A', '2021-01-07', '2'),
23 |   ('A', '2021-01-10', '3'),
24 |   ('A', '2021-01-11', '3'),
25 |   ('A', '2021-01-11', '3'),
26 |   ('B', '2021-01-01', '2'),
27 |   ('B', '2021-01-02', '2'),
28 |   ('B', '2021-01-04', '1'),
29 |   ('B', '2021-01-11', '1'),
30 |   ('B', '2021-01-16', '3'),
31 |   ('B', '2021-02-01', '3'),
32 |   ('C', '2021-01-01', '3'),
33 |   ('C', '2021-01-01', '3'),
34 |   ('C', '2021-01-07', '3');
35 |  
36 | 
37 | CREATE TABLE dannys_diner.dbo.menu (
38 |   "product_id" INTEGER,
39 |   "product_name" VARCHAR(5),
40 |   "price" INTEGER
41 | );
42 | 
43 | INSERT INTO dannys_diner.dbo.menu
44 |   ("product_id", "product_name", "price")
45 | VALUES
46 |   ('1', 'sushi', '10'),
47 |   ('2', 'curry', '15'),
48 |   ('3', 'ramen', '12');
49 |   
50 | 
51 | CREATE TABLE dannys_diner.dbo.members (
52 |   "customer_id" VARCHAR(1),
53 |   "join_date" DATE
54 | );
55 | 
56 | INSERT INTO dannys_diner.dbo.members
57 |   ("customer_id", "join_date")
58 | VALUES
59 |   ('A', '2021-01-07'),
60 |   ('B', '2021-01-09');
61 | 


--------------------------------------------------------------------------------
/Case Study #1 - Danny's Diner/query.sql:
--------------------------------------------------------------------------------
  1 | ------------------------
  2 | --CASE STUDY QUESTIONS--
  3 | ------------------------
  4 | 
  5 | --Author: Anh Nguyen
  6 | --Date: 05/11/2022
  7 | --Tool used: SQL Server
  8 | 
  9 | 
 10 | -- 1. What is the total amount each customer spent at the restaurant?
 11 | 
 12 | SELECT 
 13 |   s.customer_id,
 14 |   SUM(m.price) AS total_pay
 15 | FROM sales s
 16 | JOIN menu m
 17 |   ON s.product_id = m.product_id
 18 | GROUP BY s.customer_id
 19 | ORDER BY s.customer_id;
 20 | 
 21 | 
 22 | -- 2. How many days has each customer visited the restaurant?
 23 | 
 24 | SELECT 
 25 |   customer_id,
 26 |   COUNT(DISTINCT order_date) AS visit_count
 27 | FROM sales 
 28 | GROUP BY customer_id;
 29 | 
 30 | 
 31 | -- 3. What was the first item from the menu purchased by each customer?
 32 | 
 33 | WITH orderRank AS (
 34 |   SELECT 
 35 |     customer_id,
 36 |     product_id,
 37 |     order_date,
 38 |     DENSE_RANK() OVER(PARTITION BY customer_id ORDER BY order_date) AS rnk
 39 |   FROM sales
 40 | )
 41 | 
 42 | SELECT 
 43 |   o.customer_id,
 44 |   o.order_date,
 45 |   m.product_name
 46 | FROM orderRank o
 47 | JOIN menu m 
 48 |   ON o.product_id = m.product_id
 49 | WHERE o.rnk = 1
 50 | GROUP BY o.customer_id, o.order_date, m.product_name;
 51 | 
 52 | 
 53 | -- 4. What is the most purchased item on the menu and how many times was it purchased by all customers?
 54 | 
 55 | SELECT
 56 |   TOP 1 s.product_id,
 57 |   m.product_name,
 58 |   COUNT(*) AS most_purch
 59 | FROM sales s
 60 | JOIN menu m 
 61 |   ON s.product_id = m.product_id
 62 | GROUP BY s.product_id, m.product_name;
 63 | 
 64 | 
 65 | -- 5. Which item was the most popular for each customer?
 66 | 
 67 | WITH freqRank AS (
 68 |   SELECT
 69 |     customer_id,
 70 |     product_id,
 71 |     COUNT(*) AS purch_freq,
 72 |     DENSE_RANK() OVER(PARTITION BY customer_id ORDER BY COUNT(*) DESC) AS rnk
 73 |   FROM sales
 74 |   GROUP BY customer_id, product_id
 75 | )
 76 | 
 77 | SELECT 
 78 |   f.customer_id,
 79 |   m.product_name,
 80 |   f.purch_freq
 81 | FROM freqRank f
 82 | JOIN menu m 
 83 |   ON f.product_id = m.product_id
 84 | WHERE f.rnk = 1
 85 | ORDER BY f.customer_id;
 86 | 
 87 | 
 88 | -- 6. Which item was purchased first by the customer after they became a member?
 89 | 
 90 | WITH orderAfterMember AS (
 91 |   SELECT 
 92 |     s.customer_id,
 93 |     mn.product_name,
 94 |     s.order_date,
 95 |     m.join_date,
 96 |     DENSE_RANK() OVER(PARTITION BY s.customer_id ORDER BY s.order_date) AS rnk
 97 |   FROM sales s
 98 |   JOIN members m 
 99 |     ON s.customer_id = m.customer_id
100 |   JOIN menu mn 
101 |     ON s.product_id = mn.product_id
102 |   WHERE s.order_date >= m.join_date
103 | )
104 | 
105 | SELECT 
106 |   customer_id,
107 |   product_name,
108 |   order_date,
109 |   join_date
110 | FROM orderAfterMember
111 | WHERE rnk = 1;
112 | 
113 | 
114 | -- 7. Which item was purchased just before the customer became a member?
115 | 
116 | WITH orderBeforeMember AS (
117 |   SELECT 
118 |     s.customer_id,
119 |     mn.product_name,
120 |     s.order_date,
121 |     m.join_date,
122 |     DENSE_RANK() OVER(PARTITION BY s.customer_id ORDER BY s.order_date DESC) AS rnk
123 |   FROM sales s
124 |   JOIN members m 
125 |     ON s.customer_id = m.customer_id
126 |   JOIN menu mn 
127 |     ON s.product_id = mn.product_id
128 |   WHERE s.order_date < m.join_date
129 | )
130 | 
131 | SELECT 
132 |   customer_id,
133 |   product_name,
134 |   order_date,
135 |   join_date
136 | FROM orderBeforeMember
137 | WHERE rnk = 1;
138 | 
139 | 
140 | -- 8. What is the total items and amount spent for each member before they became a member?
141 | 
142 | SELECT 
143 |   s.customer_id,
144 |   COUNT(s.product_id) AS total_items,
145 |   SUM(mn.price) AS total_spend
146 | FROM sales s
147 | JOIN members m 
148 |   ON s.customer_id = m.customer_id
149 | JOIN menu mn 
150 |   ON s.product_id = mn.product_id
151 | WHERE s.order_date < m.join_date
152 | GROUP BY s.customer_id;
153 | 
154 | 
155 | -- 9.  If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have?
156 | -- Note: Only customers who are members receive points when purchasing items
157 | 
158 | WITH CustomerPoints AS (
159 |   SELECT 
160 |     s.customer_id,
161 |     CASE 
162 |       WHEN s.customer_id IN (SELECT customer_id FROM members) AND mn.product_name = 'sushi' THEN mn.price*20
163 |       WHEN s.customer_id IN (SELECT customer_id FROM members) AND mn.product_name != 'sushi' THEN mn.price*10 
164 |     ELSE 0 END AS points
165 |   FROM menu mn 
166 |   JOIN sales s
167 |     ON mn.product_id = s.product_id
168 | )
169 | 
170 | SELECT 
171 |   customer_id,
172 |   SUM(points) AS total_points
173 | FROM CustomerPoints
174 | GROUP BY customer_id;
175 | 
176 | 
177 | -- 10. In the first week after a customer joins the program (including their join date), they earn 2x points
178 | -- on all items, not just sushi - how many points do customer A and B have at the end of January?
179 | 
180 | WITH programDates AS (
181 |   SELECT 
182 |     customer_id, 
183 |     join_date,
184 |     DATEADD(d, 6, join_date) AS valid_date, 
185 |     EOMONTH('2021-01-01') AS last_date
186 |   FROM members
187 | )
188 | 
189 | SELECT 
190 |   p.customer_id,
191 |   SUM(CASE 
192 |       	WHEN s.order_date BETWEEN p.join_date AND p.valid_date THEN m.price*20
193 |       	WHEN m.product_name = 'sushi' THEN m.price*20
194 |       ELSE m.price*10 END) AS total_points
195 | FROM sales s
196 | JOIN programDates p 
197 |   ON s.customer_id = p.customer_id
198 | JOIN menu m 
199 |   ON s.product_id = m.product_id
200 | WHERE s.order_date <= last_date
201 | GROUP BY p.customer_id;
202 | 
203 | 
204 | ------------------------
205 | --   BONUS QUESTIONS  --
206 | ------------------------
207 | 
208 | -- Join All The Things
209 | 
210 | SELECT 
211 |   s.customer_id,
212 |   s.order_date,
213 |   mn.product_name,
214 |   mn.price,
215 |   CASE WHEN s.order_date >= m.join_date THEN 'Y'
216 |     ELSE 'N' END AS member
217 | FROM sales s
218 | JOIN menu mn 
219 |   ON s.product_id = mn.product_id
220 | LEFT JOIN members m 
221 |   ON s.customer_id = m.customer_id;
222 | 
223 | 
224 | -- Rank All The Things
225 | -- Note: Create a CTE using the result in the previous question
226 | 
227 | WITH customerStatus AS(
228 | SELECT 
229 |   s.customer_id,
230 |   s.order_date,
231 |   mn.product_name,
232 |   mn.price,
233 |   CASE WHEN s.order_date >= m.join_date THEN 'Y'
234 |     ELSE 'N' END AS member
235 | FROM sales s
236 | JOIN menu mn 
237 |   ON s.product_id = mn.product_id
238 | LEFT JOIN members m 
239 |   ON s.customer_id = m.customer_id
240 | )
241 | 
242 | SELECT *,
243 |   CASE WHEN member = 'Y' 
244 |   	THEN DENSE_RANK() OVER(PARTITION BY customer_id, member ORDER BY order_date)
245 |   ELSE null END AS ranking
246 | FROM customerStatus;
247 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/README.md:
--------------------------------------------------------------------------------
 1 | # 🍕 Case Study #2 - Pizza Runner
 2 | <p align="center">
 3 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/2.png" align="center" width="400" height="400" >
 4 |   
 5 | ## 📕 Table of Contents
 6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#%EF%B8%8F-bussiness-task)
 7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-entity-relationship-diagram)
 8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-case-study-questions)
 9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/README.md#-my-solution)
10 | 
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny wanted to expand his new Pizza Empire and intended to Uberize it with Pizza Runner. 
14 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house) 
15 |   and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
16 | 
17 | ---
18 | ## 🔐 Entity Relationship Diagram
19 | <p align="center">
20 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e2.PNG" align="center" width="550" height="300" >
21 | 
22 | ---
23 | ## ❓ Case Study Questions
24 | ### A. Pizza Metrics
25 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/A.%20Pizza%20Metrics.md).
26 | 
27 | 1. How many pizzas were ordered?
28 | 2. How many unique customer orders were made?
29 | 3. How many successful orders were delivered by each runner?
30 | 4. How many of each type of pizza was delivered?
31 | 5. How many Vegetarian and Meatlovers were ordered by each customer?
32 | 6. What was the maximum number of pizzas delivered in a single order?
33 | 7. For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
34 | 8. How many pizzas were delivered that had both exclusions and extras?
35 | 9. What was the total volume of pizzas ordered for each hour of the day?
36 | 10. What was the volume of orders for each day of the week?
37 | 
38 | ---
39 | ### B. Runner and Customer Experience
40 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/B.%20Runner%20and%20Customer%20Experience.md).
41 | 
42 | 1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
43 | 2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
44 | 3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
45 | 4. What was the average distance travelled for each customer?
46 | 5. What was the difference between the longest and shortest delivery times for all orders?
47 | 6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
48 | 7. What is the successful delivery percentage for each runner?
49 | 
50 | ---
51 | ### C. Ingredient Optimisation
52 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/C.%20Ingredient%20Optimisation.md).
53 | 
54 | 1. What are the standard ingredients for each pizza?
55 | 2. What was the most commonly added extra?
56 | 3. What was the most common exclusion?
57 | 4. Generate an order item for each record in the customers_orders table in the format of one of the following:
58 |     * ```Meat Lovers```
59 |     * ```Meat Lovers - Exclude Beef```
60 |     * ```Meat Lovers - Extra Bacon```
61 |     * ```Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers```
62 | 5. Generate an alphabetically ordered comma separated ingredient list for each pizza order from the customer_orders table and add a 2x in front of any relevant ingredients
63 |     * For example: ```"Meat Lovers: 2xBacon, Beef, ... , Salami"```
64 | 6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most frequent first?
65 | 
66 | ---
67 | ### D. Pricing and Ratings
68 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/D.%20Pricing%20and%20Ratings.md).
69 | 
70 | 1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - how much money has Pizza Runner made so far if there are no delivery fees?
71 | 2. What if there was an additional $1 charge for any pizza extras?
72 |     * Add cheese is $1 extra
73 | 3. The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, how would you design an additional table for this new dataset - generate a schema for this new table and insert your own data for ratings for each successful customer order between 1 to 5.
74 | 4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
75 |     * ```customer_id```
76 |     * ```order_id```
77 |     * ```runner_id```
78 |     * ```rating```
79 |     * ```order_time```
80 |     * ```pickup_time```
81 |     * Time between order and pickup
82 |     * Delivery duration
83 |     * Average speed
84 |     * Total number of pizzas
85 | 5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
86 | 
87 | ---
88 | ### E. Bonus questions
89 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/E.%20Bonus%20Questions.md).
90 | 
91 | If Danny wants to expand his range of pizzas - how would this impact the existing data design? Write an ```INSERT``` statement to demonstrate what would happen if a new ```Supreme``` pizza with all the toppings was added to the Pizza Runner menu?
92 | 
93 | ---
94 | ## 🚀 My Solution
95 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner/Syntax).
96 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution).
97 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/B. Runner and Customer Experience.md:
--------------------------------------------------------------------------------
  1 | # 🍕 Case Study #2 - Pizza Runner
  2 | ## B. Runner and Customer Experience
  3 | ### Q1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
  4 | ```TSQL
  5 | SELECT 
  6 |   DATEPART(week, registration_date) AS week_period,
  7 |   COUNT(*) AS runner_count
  8 | FROM runners
  9 | GROUP BY DATEPART(week, registration_date);
 10 | ```
 11 | | week_period | runner_count  |
 12 | |-------------|---------------|
 13 | | 1           | 1             |
 14 | | 2           | 2             |
 15 | | 3           | 1             |
 16 | 
 17 | ---
 18 | ### Q2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
 19 | ```TSQL
 20 | WITH runners_pickup AS (
 21 |   SELECT
 22 |     r.runner_id,
 23 |     c.order_id, 
 24 |     c.order_time, 
 25 |     r.pickup_time, 
 26 |     DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS pickup_minutes
 27 |   FROM #customer_orders_temp AS c
 28 |   JOIN #runner_orders_temp AS r
 29 |     ON c.order_id = r.order_id
 30 |   WHERE r.cancellation IS NULL
 31 |   GROUP BY r.runner_id, c.order_id, c.order_time, r.pickup_time
 32 | )
 33 | 
 34 | SELECT 
 35 |   runner_id,
 36 |   AVG(pickup_minutes) AS average_time
 37 | FROM runners_pickup
 38 | GROUP BY runner_id;
 39 | ```
 40 | | runner_id | average_time  |
 41 | |-----------|---------------|
 42 | | 1         | 14            |
 43 | | 2         | 20            |
 44 | | 3         | 10            |
 45 | 
 46 | ---
 47 | ### Q3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
 48 | ```TSQL
 49 | WITH pizzaPrepration AS (
 50 |   SELECT
 51 |     c.order_id, 
 52 |     c.order_time, 
 53 |     r.pickup_time,
 54 |     DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS prep_time,
 55 |     COUNT(c.pizza_id) AS pizza_count
 56 |   FROM #customer_orders_temp AS c
 57 |   JOIN #runner_orders_temp AS r
 58 |     ON c.order_id = r.order_id
 59 |   WHERE r.cancellation IS NULL
 60 |   GROUP BY c.order_id, c.order_time, r.pickup_time, 
 61 |            DATEDIFF(MINUTE, c.order_time, r.pickup_time)
 62 | )
 63 | 
 64 | SELECT 
 65 |   pizza_count,
 66 |   AVG(prep_time) AS avg_prep_time
 67 | FROM pizzaPrepration
 68 | GROUP BY pizza_count;
 69 | ```
 70 | | pizza_count | avg_prep_time  |
 71 | |-------------|----------------|
 72 | | 1           | 12             |
 73 | | 2           | 18             |
 74 | | 3           | 30             |
 75 | 
 76 | * More pizzas, longer time to prepare. 
 77 | * 2 pizzas took 6 minutes more to prepare, 3 pizza took 12 minutes more to prepare.
 78 | * On average, it took 6 * (number of pizzas - 1) minutes more to prepare the next pizza.
 79 | 
 80 | ---
 81 | ### Q4. What was the average distance travelled for each customer?
 82 | ```TSQL
 83 | SELECT
 84 |   c.customer_id,
 85 |   ROUND(AVG(r.distance), 1) AS average_distance
 86 | FROM #customer_orders_temp AS c
 87 | JOIN #runner_orders_temp AS r
 88 |   ON c.order_id = r.order_id
 89 | GROUP BY c.customer_id;
 90 | ```
 91 | | customer_id | average_distance  |
 92 | |-------------|-------------------|
 93 | | 101         | 20                |
 94 | | 102         | 16.7              |
 95 | | 103         | 23.4              |
 96 | | 104         | 10                |
 97 | | 105         | 25                |
 98 | 
 99 | ---
100 | ### Q5. What was the difference between the longest and shortest delivery times for all orders?
101 | ```TSQL
102 | SELECT MAX(duration) - MIN(duration) AS time_difference
103 | FROM #runner_orders_temp;
104 | ```
105 | | time_difference|
106 | |----------------|
107 | | 30             |
108 | 
109 | ---
110 | ### Q6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
111 | ```TSQL
112 | SELECT 
113 |   r.runner_id,
114 |   c.order_id,
115 |   r.distance,
116 |   r.duration AS duration_min,
117 |   COUNT(c.order_id) AS pizza_count, 
118 |   ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed
119 | FROM #runner_orders_temp r
120 | JOIN #customer_orders_temp c
121 |   ON r.order_id = c.order_id
122 | WHERE r.cancellation IS NULL
123 | GROUP BY r.runner_id, c.order_id, r.distance, r.duration;
124 | ```
125 | | runner_id | order_id | distance | duration_min | pizza_count | avg_speed  |
126 | |-----------|----------|----------|--------------|-------------|------------|
127 | | 1         | 1        | 20       | 32           | 1           | 37.5       |
128 | | 1         | 2        | 20       | 27           | 1           | 44.4       |
129 | | 1         | 3        | 13.4     | 20           | 2           | 40.2       |
130 | | 1         | 10       | 10       | 10           | 2           | 60         |
131 | | 2         | 4        | 23.4     | 40           | 3           | 35.1       |
132 | | 2         | 7        | 25       | 25           | 1           | 60         |
133 | | 2         | 8        | 23.4     | 15           | 1           | 93.6       |
134 | | 3         | 5        | 10       | 15           | 1           | 40         |
135 | 
136 | * Runner ```1``` had the average speed from 37.5 km/h to 60 km/h
137 | * Runner ```2``` had the average speed from 35.1 km/h to 93.6 km/h. With the same distance (23.4 km), order ```4``` was delivered at 35.1 km/h, while order ```8``` was delivered at 93.6 km/h. There must be something wrong here!
138 | * Runner ```3``` had the average speed at 40 km/h
139 | 
140 | ---
141 | ### Q7. What is the successful delivery percentage for each runner?
142 | ```TSQL
143 | 
144 | SELECT 
145 |   runner_id,
146 |   COUNT(distance) AS delivered,
147 |   COUNT(order_id) AS total,
148 |   100 * COUNT(distance) / COUNT(order_id) AS successful_pct
149 | FROM #runner_orders_temp
150 | GROUP BY runner_id;
151 | ```
152 | | runner_id | delivered | total | successful_pct  |
153 | |-----------|-----------|-------|-----------------|
154 | | 1         | 4         | 4     | 100             |
155 | | 2         | 3         | 4     | 75              |
156 | | 3         | 1         | 2     | 50              |
157 | 
158 | ---
159 | My solution for **[C. Ingredient Optimisation](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/C.%20Ingredient%20Optimisation.md).**
160 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/D. Pricing and Ratings.md:
--------------------------------------------------------------------------------
  1 | # 🍕 Case Study #2 - Pizza Runner
  2 | ## D. Pricing and Ratings
  3 | ### Q1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - how much money has Pizza Runner made so far if there are no delivery fees?
  4 | 
  5 | ```TSQL
  6 | SELECT
  7 |   SUM(CASE WHEN p.pizza_name = 'Meatlovers' THEN 12
  8 |         ELSE 10 END) AS money_earned
  9 | FROM #customer_orders_temp c
 10 | JOIN pizza_names p
 11 |   ON c.pizza_id = p.pizza_id
 12 | JOIN #runner_orders_temp r
 13 |   ON c.order_id = r.order_id
 14 | WHERE r.cancellation IS NULL;
 15 | ```
 16 | | money_earned  |
 17 | |---------------|
 18 | | 138           |
 19 | 
 20 | ---
 21 | ### Q2. What if there was an additional $1 charge for any pizza extras?
 22 | * Add cheese is $1 extra
 23 | ```TSQL
 24 | DECLARE @basecost INT
 25 | SET @basecost = 138 	-- @basecost = result of the previous question
 26 | 
 27 | SELECT 
 28 |   @basecost + SUM(CASE WHEN p.topping_name = 'Cheese' THEN 2
 29 | 		  ELSE 1 END) updated_money
 30 | FROM #extrasBreak e
 31 | JOIN pizza_toppings p
 32 |   ON e.extra_id = p.topping_id;
 33 | ```
 34 | | updated_money  |
 35 | |----------------|
 36 | | 145            |
 37 | 
 38 | ---
 39 | ### Q3. The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, how would you design an additional table for this new dataset - generate a schema for this new table and insert your own data for ratings for each successful customer order between 1 to 5.
 40 | ```TSQL
 41 | DROP TABLE IF EXISTS ratings
 42 | CREATE TABLE ratings (
 43 |   order_id INT,
 44 |   rating INT);
 45 | INSERT INTO ratings (order_id, rating)
 46 | VALUES 
 47 |   (1,3),
 48 |   (2,5),
 49 |   (3,3),
 50 |   (4,1),
 51 |   (5,5),
 52 |   (7,3),
 53 |   (8,4),
 54 |   (10,3);
 55 | 
 56 |  SELECT *
 57 |  FROM ratings;
 58 |  ```
 59 | | order_id | rating  |
 60 | |----------|---------|
 61 | | 1        | 3       |
 62 | | 2        | 5       |
 63 | | 3        | 3       |
 64 | | 4        | 1       |
 65 | | 5        | 5       |
 66 | | 7        | 3       |
 67 | | 8        | 4       |
 68 | | 10       | 3       |
 69 | 
 70 | ---
 71 | ### Q4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
 72 | * ```customer_id```
 73 | * ```order_id```
 74 | * ```runner_id```
 75 | * ```rating```
 76 | * ```order_time```
 77 | * ```pickup_time```
 78 | * Time between order and pickup
 79 | * Delivery duration
 80 | * Average speed
 81 | * Total number of pizzas
 82 | 
 83 | ```TSQL
 84 | SELECT 
 85 |   c.customer_id,
 86 |   c.order_id,
 87 |   r.runner_id,
 88 |   c.order_time,
 89 |   r.pickup_time,
 90 |   DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS mins_difference,
 91 |   r.duration,
 92 |   ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed,
 93 |   COUNT(c.order_id) AS pizza_count
 94 | FROM #customer_orders_temp c
 95 | JOIN #runner_orders_temp r 
 96 |   ON r.order_id = c.order_id
 97 | GROUP BY 
 98 |   c.customer_id,
 99 |   c.order_id,
100 |   r.runner_id,
101 |   c.order_time,
102 |   r.pickup_time, 
103 |   r.duration;
104 |   ```
105 | | customer_id | order_id | runner_id | order_time              | pickup_time             | mins_difference | duration | avg_speed | pizza_count  |
106 | |-------------|----------|-----------|-------------------------|-------------------------|-----------------|----------|-----------|--------------|
107 | | 101         | 1        | 1         | 2020-01-01 18:05:02.000 | 2020-01-01 18:15:34.000 | 10              | 32       | 37.5      | 1            |
108 | | 101         | 2        | 1         | 2020-01-01 19:00:52.000 | 2020-01-01 19:10:54.000 | 10              | 27       | 44.4      | 1            |
109 | | 101         | 6        | 3         | 2020-01-08 21:03:13.000 | NULL                    | NULL            | NULL     | NULL      | 1            |
110 | | 102         | 3        | 1         | 2020-01-02 23:51:23.000 | 2020-01-03 00:12:37.000 | 21              | 20       | 40.2      | 2            |
111 | | 102         | 8        | 2         | 2020-01-09 23:54:33.000 | 2020-01-10 00:15:02.000 | 21              | 15       | 93.6      | 1            |
112 | | 103         | 4        | 2         | 2020-01-04 13:23:46.000 | 2020-01-04 13:53:03.000 | 30              | 40       | 35.1      | 3            |
113 | | 103         | 9        | 2         | 2020-01-10 11:22:59.000 | NULL                    | NULL            | NULL     | NULL      | 1            |
114 | | 104         | 5        | 3         | 2020-01-08 21:00:29.000 | 2020-01-08 21:10:57.000 | 10              | 15       | 40        | 1            |
115 | | 104         | 10       | 1         | 2020-01-11 18:34:49.000 | 2020-01-11 18:50:20.000 | 16              | 10       | 60        | 2            |
116 | | 105         | 7        | 2         | 2020-01-08 21:20:29.000 | 2020-01-08 21:30:45.000 | 10              | 25       | 60        | 1            |
117 | 
118 | ---
119 | ### Q5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
120 | ```TSQL
121 | DECLARE @basecost INT
122 | SET @basecost = 138
123 | 
124 | SELECT 
125 |   @basecost AS revenue,
126 |   SUM(distance)*0.3 AS runner_paid,
127 |   @basecost - SUM(distance)*0.3 AS money_left
128 | FROM #runner_orders_temp;
129 | ```
130 | | revenue | runner_paid | money_left  |
131 | |---------|-------------|-------------|
132 | | 138     | 43.56       | 94.44       |
133 | 
134 | ---
135 | My solution for **[E. Bonus questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%232%20-%20Pizza%20Runner/Solution/E.%20Bonus%20Questions.md)**.
136 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Solution/E. Bonus Questions.md:
--------------------------------------------------------------------------------
 1 | # 🍕 Case Study #2 - Pizza Runner
 2 | ## E. Bonus Questions
 3 | ### If Danny wants to expand his range of pizzas - how would this impact the existing data design? Write an INSERT statement to demonstrate what would happen if a new Supreme pizza with all the toppings was added to the Pizza Runner menu?
 4 | 
 5 | ```TSQL
 6 | INSERT INTO pizza_names (pizza_id, pizza_name)
 7 | VALUES (3, 'Supreme');
 8 | 
 9 | ALTER TABLE pizza_recipes
10 | ALTER COLUMN toppings VARCHAR(50);
11 | 
12 | INSERT INTO pizza_recipes (pizza_id, toppings)
13 | VALUES (3, '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12');
14 | ```
15 | Notice that I had to update the column ```toppings``` because the Supreme pizza had all the toppings.
16 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/A_data_cleaning.sql:
--------------------------------------------------------------------------------
 1 | -----------------------------------
 2 | --A. Data Cleaning: Pizza Metrics--
 3 | -----------------------------------
 4 | 
 5 | -- Create a new temporary table: #customer_orders_temp
 6 | 
 7 | SELECT 
 8 |     order_id,
 9 |     customer_id,
10 |     pizza_id,
11 |     CASE 
12 |     	WHEN exclusions IS NULL OR exclusions LIKE 'null' THEN ''
13 |       	ELSE exclusions 
14 |       	END AS exclusions,
15 |     CASE 
16 |     	WHEN extras IS NULL OR extras LIKE 'null' THEN ''
17 |       	ELSE extras 
18 |       	END AS extras,
19 |     order_time
20 | INTO #customer_orders_temp
21 | FROM customer_orders;
22 | 
23 | SELECT *
24 | FROM #customer_orders_temp;
25 | 
26 | -- Create a new temporary table: #runner_orders_temp
27 | 
28 | SELECT 
29 |     order_id,
30 |     runner_id,
31 |     CAST(
32 |     	CASE WHEN pickup_time LIKE 'null' THEN NULL ELSE pickup_time END 
33 | 	AS DATETIME) AS pickup_time,
34 |     CAST(
35 |     	CASE 
36 | 	    WHEN distance LIKE 'null' THEN NULL
37 | 	    WHEN distance LIKE '%km' THEN TRIM('km' FROM distance)
38 | 	    ELSE distance END
39 |         AS FLOAT) AS distance,
40 |     CAST(
41 |     	CASE 
42 | 	    WHEN duration LIKE 'null' THEN NULL
43 | 	    WHEN duration LIKE '%mins' THEN TRIM('mins' FROM duration)
44 | 	    WHEN duration LIKE '%minute' THEN TRIM('minute' FROM duration)
45 | 	    WHEN duration LIKE '%minutes' THEN TRIM('minutes' FROM duration)
46 | 	    ELSE duration END
47 |         AS INT) AS duration,
48 |     CASE
49 |         WHEN cancellation IN ('null', 'NaN', '') THEN NULL 
50 |         ELSE cancellation
51 |         END AS cancellation
52 | INTO #runner_orders_temp
53 | FROM runner_orders;
54 | 
55 | SELECT *
56 | FROM #runner_orders_temp;
57 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/A_query.sql:
--------------------------------------------------------------------------------
  1 | --------------------
  2 | --A. Pizza Metrics--
  3 | --------------------
  4 | 
  5 | -- 1. How many pizzas were ordered?
  6 | 
  7 | SELECT COUNT(order_id) AS pizza_count
  8 | FROM #customer_orders_temp;
  9 | 
 10 | 
 11 | -- 2. How many unique customer orders were made?
 12 | 
 13 | SELECT COUNT(DISTINCT order_id) AS order_count
 14 | FROM #customer_orders_temp;
 15 | 
 16 | 
 17 | -- 3. How many successful orders were delivered by each runner?
 18 | 
 19 | SELECT 
 20 |   runner_id,
 21 |   COUNT(order_id) AS successful_orders
 22 | FROM #runner_orders_temp
 23 | WHERE cancellation IS NULL
 24 | GROUP BY runner_id;
 25 | 
 26 | 
 27 | -- 4. How many of each type of pizza was delivered?
 28 | 
 29 | -- Approach 1:
 30 | SELECT 
 31 |   p.pizza_name,
 32 |   COUNT(*) AS deliver_count
 33 | FROM #customer_orders_temp c
 34 | JOIN pizza_names p 
 35 |   ON c.pizza_id = p.pizza_id
 36 | WHERE c.order_id IN (
 37 |     SELECT order_id 
 38 |     FROM #runner_orders_temp
 39 |     WHERE cancellation IS NULL)
 40 | GROUP BY p.pizza_name;
 41 | 
 42 | -- Aproach 2:
 43 | 
 44 | SELECT 
 45 |   p.pizza_name,
 46 |   COUNT(*) AS deliver_count
 47 | FROM #customer_orders_temp c
 48 | JOIN pizza_names p 
 49 |   ON c.pizza_id = p.pizza_id
 50 | JOIN #runner_orders_temp r 
 51 |   ON c.order_id = r.order_id
 52 | WHERE r.cancellation IS NULL
 53 | GROUP BY p.pizza_name;
 54 | 
 55 | 
 56 | -- 5. How many Vegetarian and Meatlovers were ordered by each customer?
 57 | 
 58 | SELECT 
 59 |   customer_id,
 60 |   SUM(CASE WHEN pizza_id = 1 THEN 1 ELSE 0 END) AS Meatlovers,
 61 |   SUM(CASE WHEN pizza_id = 2 THEN 1 ELSE 0 END) AS Vegetarian
 62 | FROM #customer_orders_temp
 63 | GROUP BY customer_id;
 64 | 
 65 | 
 66 | -- 6. What was the maximum number of pizzas delivered in a single order?
 67 | 
 68 | SELECT MAX(pizza_count) AS max_count
 69 | FROM (
 70 |   SELECT 
 71 |     c.order_id,
 72 |     COUNT(c.pizza_id) AS pizza_count
 73 |   FROM #customer_orders_temp c
 74 |   JOIN #runner_orders_temp r 
 75 |     ON c.order_id = r.order_id
 76 |   WHERE r.cancellation IS NULL
 77 |   GROUP BY c.order_id
 78 | ) tmp;
 79 | 
 80 | 
 81 | -- 7.For each customer, how many delivered pizzas had at least 1 change and how many had no changes?
 82 | 
 83 | SELECT 
 84 |   c.customer_id,
 85 |   SUM(CASE WHEN exclusions != '' OR extras != '' THEN 1 ELSE 0 END) AS has_change,
 86 |   SUM(CASE WHEN exclusions = '' AND extras = '' THEN 1 ELSE 0 END) AS no_change
 87 | FROM #customer_orders_temp c
 88 | JOIN #runner_orders_temp r 
 89 |   ON c.order_id = r.order_id
 90 | WHERE r.cancellation IS NULL
 91 | GROUP BY c.customer_id;
 92 | 
 93 | 
 94 | -- 8. How many pizzas were delivered that had both exclusions and extras?
 95 | 
 96 | SELECT 
 97 |   SUM(CASE WHEN exclusions != '' AND extras != '' THEN 1 ELSE 0 END) AS change_both
 98 | FROM #customer_orders_temp c
 99 | JOIN #runner_orders_temp r 
100 |   ON c.order_id = r.order_id
101 | WHERE r.cancellation IS NULL;
102 | 
103 | 
104 | -- 9.What was the total volume of pizzas ordered for each hour of the day?
105 | 
106 | SELECT 
107 |   DATEPART(HOUR, order_time) AS hour_of_day,
108 |   COUNT(order_id) AS pizza_volume
109 | FROM #customer_orders_temp
110 | GROUP BY DATEPART(HOUR, order_time)
111 | ORDER BY hour_of_day;
112 | 
113 | 
114 | -- 10. What was the volume of orders for each day of the week?
115 | 
116 | SELECT 
117 |   DATENAME(weekday, order_time) AS week_day,
118 |   COUNT(order_id) AS order_volume
119 | FROM #customer_orders_temp
120 | GROUP BY DATENAME(weekday, order_time);
121 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | -------------------------------------
  2 | --B. Runner and Customer Experience--
  3 | -------------------------------------
  4 | 
  5 | --1. How many runners signed up for each 1 week period? (i.e. week starts 2021-01-01)
  6 | 
  7 | SELECT 
  8 |   DATEPART(week, registration_date) AS week_period,
  9 |   COUNT(*) AS runner_count
 10 | FROM runners
 11 | GROUP BY DATEPART(week, registration_date);
 12 | 
 13 | 
 14 | -- 2. What was the average time in minutes it took for each runner to arrive at the Pizza Runner HQ to pickup the order?
 15 | 
 16 | WITH runners_pickup AS (
 17 |   SELECT
 18 |     r.runner_id,
 19 |     c.order_id, 
 20 |     c.order_time, 
 21 |     r.pickup_time, 
 22 |     DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS pickup_minutes
 23 |   FROM #customer_orders_temp AS c
 24 |   JOIN #runner_orders_temp AS r
 25 |     ON c.order_id = r.order_id
 26 |   WHERE r.cancellation IS NULL
 27 |   GROUP BY r.runner_id, c.order_id, c.order_time, r.pickup_time
 28 | )
 29 | 
 30 | SELECT 
 31 |   runner_id,
 32 |   AVG(pickup_minutes) AS average_time
 33 | FROM runners_pickup
 34 | GROUP BY runner_id;
 35 | 
 36 | 
 37 | -- 3. Is there any relationship between the number of pizzas and how long the order takes to prepare?
 38 | 
 39 | WITH pizzaPrepration AS (
 40 |   SELECT
 41 |     c.order_id, 
 42 |     c.order_time, 
 43 |     r.pickup_time,
 44 |     DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS prep_time,
 45 |     COUNT(c.pizza_id) AS pizza_count
 46 |   FROM #customer_orders_temp AS c
 47 |   JOIN #runner_orders_temp AS r
 48 |     ON c.order_id = r.order_id
 49 |   WHERE r.cancellation IS NULL
 50 |   GROUP BY c.order_id, c.order_time, r.pickup_time, 
 51 |            DATEDIFF(MINUTE, c.order_time, r.pickup_time)
 52 | )
 53 | 
 54 | SELECT 
 55 |   pizza_count,
 56 |   AVG(prep_time) AS avg_prep_time
 57 | FROM pizzaPrepration
 58 | GROUP BY pizza_count;
 59 | 
 60 | 
 61 | -- 4. What was the average distance travelled for each customer?
 62 | 
 63 | SELECT
 64 |   c.customer_id,
 65 |   ROUND(AVG(r.distance), 1) AS average_distance
 66 | FROM #customer_orders_temp AS c
 67 | JOIN #runner_orders_temp AS r
 68 |   ON c.order_id = r.order_id
 69 | GROUP BY c.customer_id;
 70 | 
 71 | 
 72 | -- 5. What was the difference between the longest and shortest delivery times for all orders?
 73 | 
 74 | SELECT MAX(duration) - MIN(duration) AS difference
 75 | FROM #runner_orders_temp;
 76 | 
 77 | 
 78 | -- 6. What was the average speed for each runner for each delivery and do you notice any trend for these values?
 79 | 
 80 | SELECT 
 81 |   r.runner_id,
 82 |   c.order_id,
 83 |   r.distance,
 84 |   r.duration AS duration_min,
 85 |   COUNT(c.order_id) AS pizza_count, 
 86 |   ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed
 87 | FROM #runner_orders_temp r
 88 | JOIN #customer_orders_temp c
 89 |   ON r.order_id = c.order_id
 90 | WHERE r.cancellation IS NULL
 91 | GROUP BY r.runner_id, c.order_id, r.distance, r.duration;
 92 | 
 93 | 
 94 | -- 7. What is the successful delivery percentage for each runner?
 95 | 
 96 | SELECT 
 97 |   runner_id,
 98 |   COUNT(distance) AS delivered,
 99 |   COUNT(order_id) AS total,
100 |   100 * COUNT(distance) / COUNT(order_id) AS successful_pct
101 | FROM #runner_orders_temp
102 | GROUP BY runner_id;
103 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/C_data_cleaning.sql:
--------------------------------------------------------------------------------
 1 | ---------------------------------------------
 2 | --C. Data Cleaning: Ingredient Optimisation--
 3 | ---------------------------------------------
 4 | 
 5 | -- 1. Create a new temporary table to separate [toppings] into multiple rows: #toppingsBreak
 6 | 
 7 | SELECT 
 8 |   pr.pizza_id,
 9 |   TRIM(value) AS topping_id,
10 |   pt.topping_name
11 | INTO #toppingsBreak
12 | FROM pizza_recipes pr
13 |   CROSS APPLY STRING_SPLIT(toppings, ',') AS t
14 | JOIN pizza_toppings pt
15 |   ON TRIM(t.value) = pt.topping_id;
16 |   
17 | SELECT *
18 | FROM #toppingsBreak;
19 | 
20 | 
21 | -- 2. Add a new column [record_id] to select each ordered pizza more easily
22 | 
23 | ALTER TABLE #customer_orders_temp
24 | ADD record_id INT IDENTITY(1,1);
25 | 
26 | SELECT *
27 | FROM #customer_orders_temp;
28 | 
29 | 
30 | -- 3. Create a new temporary table to separate [extras] into multiple rows: #extrasBreak
31 | SELECT 
32 |   c.record_id,
33 |   TRIM(e.value) AS extra_id
34 | INTO #extrasBreak 
35 | FROM #customer_orders_temp c
36 |   CROSS APPLY STRING_SPLIT(extras, ',') AS e;
37 | 
38 | SELECT *
39 | FROM #extrasBreak;
40 | 
41 | -- 4. Create a new temporary table to separate [exclusions] into multiple rows: #exclusionsBreak
42 |  
43 | SELECT 
44 |   c.record_id,
45 |   TRIM(e.value) AS exclusion_id
46 | INTO #exclusionsBreak
47 | FROM #customer_orders_temp c
48 |   CROSS APPLY STRING_SPLIT(exclusions, ',') AS e;
49 |   
50 | SELECT *
51 | FROM #exclusionsBreak;
52 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/C_query.sql:
--------------------------------------------------------------------------------
  1 | ------------------------------
  2 | --C. Ingredient Optimisation--
  3 | ------------------------------
  4 | 
  5 | -- 1. What are the standard ingredients for each pizza?
  6 | 
  7 | SELECT 
  8 |   p.pizza_name,
  9 |   STRING_AGG(t.topping_name, ', ') AS ingredients
 10 | FROM #toppingsBreak t
 11 | JOIN pizza_names p 
 12 |   ON t.pizza_id = p.pizza_id
 13 | GROUP BY p.pizza_name
 14 | 
 15 | 
 16 | --2. What was the most commonly added extra?
 17 | 
 18 | SELECT 
 19 |   p.topping_name,
 20 |   COUNT(*) AS extra_count
 21 | FROM #extrasBreak e
 22 | JOIN pizza_toppings p
 23 |   ON e.extra_id = p.topping_id
 24 | GROUP BY p.topping_name
 25 | ORDER BY COUNT(*) DESC;
 26 | 
 27 | 
 28 | --3. What was the most common exclusion?
 29 | 
 30 | SELECT 
 31 |   p.topping_name,
 32 |   COUNT(*) AS exclusion_count
 33 | FROM #exclusionsBreak e
 34 | JOIN pizza_toppings p
 35 |   ON e.exclusion_id = p.topping_id
 36 | GROUP BY p.topping_name
 37 | ORDER BY COUNT(*) DESC;
 38 | 
 39 | 
 40 | --4. Generate an order item for each record in the customers_orders table in the format of one of the following:
 41 | --Meat Lovers
 42 | --Meat Lovers - Exclude Beef
 43 | --Meat Lovers - Extra Bacon
 44 | --Meat Lovers - Exclude Cheese, Bacon - Extra Mushroom, Peppers
 45 | 
 46 | WITH cteExtras AS (
 47 |   SELECT 
 48 |     e.record_id,
 49 |     'Extra ' + STRING_AGG(t.topping_name, ', ') AS record_options
 50 |   FROM #extrasBreak e
 51 |   JOIN pizza_toppings t
 52 |     ON e.extra_id = t.topping_id
 53 |   GROUP BY e.record_id
 54 | ), 
 55 | cteExclusions AS (
 56 |   SELECT 
 57 |     e.record_id,
 58 |     'Exclusion ' + STRING_AGG(t.topping_name, ', ') AS record_options
 59 |   FROM #exclusionsBreak e
 60 |   JOIN pizza_toppings t
 61 |     ON e.exclusion_id = t.topping_id
 62 |   GROUP BY e.record_id
 63 | ), 
 64 | cteUnion AS (
 65 |   SELECT * FROM cteExtras
 66 |   UNION
 67 |   SELECT * FROM cteExclusions
 68 | )
 69 | 
 70 | SELECT 
 71 |   c.record_id,
 72 |   c.order_id,
 73 |   c.customer_id,
 74 |   c.pizza_id,
 75 |   c.order_time,
 76 |   CONCAT_WS(' - ', p.pizza_name, STRING_AGG(u.record_options, ' - ')) AS pizza_info
 77 | FROM #customer_orders_temp c
 78 | LEFT JOIN cteUnion u
 79 |   ON c.record_id = u.record_id
 80 | JOIN pizza_names p
 81 |   ON c.pizza_id = p.pizza_id
 82 | GROUP BY
 83 |   c.record_id, 
 84 |   c.order_id,
 85 |   c.customer_id,
 86 |   c.pizza_id,
 87 |   c.order_time,
 88 |   p.pizza_name
 89 | ORDER BY record_id;
 90 | 
 91 | 
 92 | --5. Generate an alphabetically ordered comma separated ingredient list for each pizza order 
 93 | --from the customer_orders table and add a 2x in front of any relevant ingredients
 94 | --For example: "Meat Lovers: 2xBacon, Beef, ... , Salami"
 95 | 
 96 | 
 97 | -- Create a CTE: Each line displays an ingredient for an ordered pizza (add 2x for extras and remove exclusions as well)
 98 | WITH ingredients AS (
 99 |   SELECT 
100 |     c.*,
101 |     p.pizza_name,
102 | 
103 |     -- Add a 2x in front of topping_names if their topping_id appear in the #extrasBreak table
104 |     CASE WHEN t.topping_id IN (
105 |           SELECT extra_id 
106 |           FROM #extrasBreak e 
107 |           WHERE e.record_id = c.record_id)
108 |       THEN '2x' + t.topping_name
109 |       ELSE t.topping_name
110 |     END AS topping
111 | 
112 |   FROM #customer_orders_temp c
113 |   JOIN #toppingsBreak t
114 |     ON t.pizza_id = c.pizza_id
115 |   JOIN pizza_names p
116 |     ON p.pizza_id = c.pizza_id
117 | 
118 |   -- Exclude toppings if their topping_id appear in the #exclusionBreak table
119 |   WHERE t.topping_id NOT IN (
120 |       SELECT exclusion_id 
121 |       FROM #exclusionsBreak e 
122 |       WHERE c.record_id = e.record_id)
123 | )
124 | 
125 | SELECT 
126 |   record_id,
127 |   order_id,
128 |   customer_id,
129 |   pizza_id,
130 |   order_time,
131 |   CONCAT(pizza_name + ': ', STRING_AGG(topping, ', ')) AS ingredients_list
132 | FROM ingredients
133 | GROUP BY 
134 |   record_id, 
135 |   record_id,
136 |   order_id,
137 |   customer_id,
138 |   pizza_id,
139 |   order_time,
140 |   pizza_name
141 | ORDER BY record_id;
142 | 
143 | 
144 | --6. What is the total quantity of each ingredient used in all delivered pizzas sorted by most frequent first?
145 | 
146 | WITH frequentIngredients AS (
147 |   SELECT 
148 |     c.record_id,
149 |     t.topping_name,
150 |     CASE
151 |       -- if extra ingredient, add 2
152 |       WHEN t.topping_id IN (
153 |           SELECT extra_id 
154 |           FROM #extrasBreak e
155 |           WHERE e.record_id = c.record_id) 
156 |       THEN 2
157 |       -- if excluded ingredient, add 0
158 |       WHEN t.topping_id IN (
159 |           SELECT exclusion_id 
160 |           FROM #exclusionsBreak e 
161 |           WHERE c.record_id = e.record_id)
162 |       THEN 0
163 |       -- no extras, no exclusion, add 1
164 |       ELSE 1
165 |     END AS times_used
166 |   FROM #customer_orders_temp c
167 |   JOIN #toppingsBreak t
168 |     ON t.pizza_id = c.pizza_id
169 |   JOIN pizza_names p
170 |     ON p.pizza_id = c.pizza_id
171 | )
172 | 
173 | SELECT 
174 |   topping_name,
175 |   SUM(times_used) AS times_used 
176 | FROM frequentIngredients
177 | GROUP BY topping_name
178 | ORDER BY times_used DESC;
179 | 
180 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/D_query.sql:
--------------------------------------------------------------------------------
 1 | --------------------------
 2 | --D. Pricing and Ratings--
 3 | --------------------------
 4 | 
 5 | --1. If a Meat Lovers pizza costs $12 and Vegetarian costs $10 and there were no charges for changes - 
 6 | --how much money has Pizza Runner made so far if there are no delivery fees?
 7 | 
 8 | SELECT
 9 |   SUM(CASE WHEN p.pizza_name = 'Meatlovers' THEN 12
10 |         ELSE 10 END) AS money_earned
11 | FROM #customer_orders_temp c
12 | JOIN pizza_names p
13 |   ON c.pizza_id = p.pizza_id
14 | JOIN #runner_orders_temp r
15 |   ON c.order_id = r.order_id
16 | WHERE r.cancellation IS NULL;
17 | 
18 | 
19 | -- 2.What if there was an additional $1 charge for any pizza extras?
20 | -- Add cheese is $1 extra
21 | 
22 | DECLARE @basecost INT
23 | SET @basecost = 138 	-- @basecost = result of the previous question
24 | 
25 | SELECT 
26 |   @basecost + SUM(CASE WHEN p.topping_name = 'Cheese' THEN 2
27 | 		  ELSE 1 END) updated_money
28 | FROM #extrasBreak e
29 | JOIN pizza_toppings p
30 |   ON e.extra_id = p.topping_id;
31 | 
32 | 
33 | --3. --The Pizza Runner team now wants to add an additional ratings system that allows customers to rate their runner, 
34 | --how would you design an additional table for this new dataset - generate a schema for this new table and 
35 | --insert your own data for ratings for each successful customer order between 1 to 5.
36 | 
37 | DROP TABLE IF EXISTS ratings
38 | CREATE TABLE ratings (
39 |   order_id INT,
40 |   rating INT);
41 | INSERT INTO ratings (order_id, rating)
42 | VALUES 
43 |   (1,3),
44 |   (2,5),
45 |   (3,3),
46 |   (4,1),
47 |   (5,5),
48 |   (7,3),
49 |   (8,4),
50 |   (10,3);
51 |   
52 |   
53 | -- 4. Using your newly generated table - can you join all of the information together to form a table which has the following information for successful deliveries?
54 | -- customer_id, order_id, runner_id, rating, order_time, pickup_time, Time between order and pickup, Delivery duration, Average speed, Total number of pizzas
55 | 
56 | SELECT 
57 |   c.customer_id,
58 |   c.order_id,
59 |   r.runner_id,
60 |   c.order_time,
61 |   r.pickup_time,
62 |   DATEDIFF(MINUTE, c.order_time, r.pickup_time) AS mins_difference,
63 |   r.duration,
64 |   ROUND(AVG(r.distance/r.duration*60), 1) AS avg_speed,
65 |   COUNT(c.order_id) AS pizza_count
66 | FROM #customer_orders_temp c
67 | JOIN #runner_orders_temp r 
68 |   ON r.order_id = c.order_id
69 | GROUP BY 
70 |   c.customer_id,
71 |   c.order_id,
72 |   r.runner_id,
73 |   c.order_time,
74 |   r.pickup_time, 
75 |   r.duration;
76 | 
77 | 
78 | --5. If a Meat Lovers pizza was $12 and Vegetarian $10 fixed prices with no cost for extras 
79 | --and each runner is paid $0.30 per kilometre traveled - how much money does Pizza Runner have left over after these deliveries?
80 | 
81 | DECLARE @basecost INT
82 | SET @basecost = 138
83 | 
84 | SELECT 
85 |   @basecost AS revenue,
86 |   SUM(distance)*0.3 AS runner_pay,
87 |   @basecost - SUM(distance)*0.3 AS money_left
88 | from #runner_orders_temp;
89 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/Syntax/E_query.sql:
--------------------------------------------------------------------------------
 1 | -------------------
 2 | --Bonus Questions--
 3 | -------------------
 4 | 
 5 | --If Danny wants to expand his range of pizzas - how would this impact the existing data design? 
 6 | --Write an INSERT statement to demonstrate what would happen if a new Supreme pizza with 
 7 | --all the toppings was added to the Pizza Runner menu?
 8 | 
 9 | INSERT INTO pizza_names (pizza_id, pizza_name)
10 | VALUES (3, 'Supreme');
11 | 
12 | ALTER TABLE pizza_recipes
13 | ALTER COLUMN toppings VARCHAR(50);
14 | 
15 | INSERT INTO pizza_recipes (pizza_id, toppings)
16 | VALUES (3, '1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12');
17 | 


--------------------------------------------------------------------------------
/Case Study #2 - Pizza Runner/database.sql:
--------------------------------------------------------------------------------
  1 | -------------------------------
  2 | --CASE STUDY #2: PIZZA RUNNER--
  3 | -------------------------------
  4 | --Author: Anh Nguyen
  5 | --Date: 06/11/2022
  6 | --Tool used: SQL Server
  7 | 
  8 | 
  9 | /*  
 10 | Note: 
 11 | 1. In table [customer_order], change the datatype of column [order_time] from TIMESTAMP to DATETIME.
 12 | Otherwise, we can't insert new values to table [customer_order].
 13 | 
 14 | 2. In table [pizza_names], [pizza_recipes], and [pizza_toppings], change datatype from TEXT to VARCHAR. 
 15 | Otherwise, we will face the error in Q4.
 16 | */ 
 17 | 
 18 | 
 19 | CREATE DATABASE pizza_runner;
 20 | 
 21 | DROP TABLE IF EXISTS pizza_runner.dbo.runners;
 22 | CREATE TABLE pizza_runner.dbo.runners (
 23 |   "runner_id" INTEGER,
 24 |   "registration_date" DATE
 25 | );
 26 | INSERT INTO pizza_runner.dbo.runners
 27 |   ("runner_id", "registration_date")
 28 | VALUES
 29 |   (1, '2021-01-01'),
 30 |   (2, '2021-01-03'),
 31 |   (3, '2021-01-08'),
 32 |   (4, '2021-01-15');
 33 | 
 34 | 
 35 | DROP TABLE IF EXISTS pizza_runner.dbo.customer_orders;
 36 | CREATE TABLE pizza_runner.dbo.customer_orders (
 37 |   "order_id" INTEGER,
 38 |   "customer_id" INTEGER,
 39 |   "pizza_id" INTEGER,
 40 |   "exclusions" VARCHAR(4),
 41 |   "extras" VARCHAR(4),
 42 |   "order_time" DATETIME     
 43 | );
 44 | 
 45 | INSERT INTO pizza_runner.dbo.customer_orders
 46 |   ("order_id", "customer_id", "pizza_id", "exclusions", "extras", "order_time")
 47 | VALUES
 48 |   ('1', '101', '1', '', '', '2020-01-01 18:05:02'),
 49 |   ('2', '101', '1', '', '', '2020-01-01 19:00:52'),
 50 |   ('3', '102', '1', '', '', '2020-01-02 23:51:23'),
 51 |   ('3', '102', '2', '', NULL, '2020-01-02 23:51:23'),
 52 |   ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
 53 |   ('4', '103', '1', '4', '', '2020-01-04 13:23:46'),
 54 |   ('4', '103', '2', '4', '', '2020-01-04 13:23:46'),
 55 |   ('5', '104', '1', 'null', '1', '2020-01-08 21:00:29'),
 56 |   ('6', '101', '2', 'null', 'null', '2020-01-08 21:03:13'),
 57 |   ('7', '105', '2', 'null', '1', '2020-01-08 21:20:29'),
 58 |   ('8', '102', '1', 'null', 'null', '2020-01-09 23:54:33'),
 59 |   ('9', '103', '1', '4', '1, 5', '2020-01-10 11:22:59'),
 60 |   ('10', '104', '1', 'null', 'null', '2020-01-11 18:34:49'),
 61 |   ('10', '104', '1', '2, 6', '1, 4', '2020-01-11 18:34:49');
 62 | 
 63 | 
 64 | DROP TABLE IF EXISTS pizza_runner.dbo.runner_orders;
 65 | CREATE TABLE pizza_runner.dbo.runner_orders (
 66 |   "order_id" INTEGER,
 67 |   "runner_id" INTEGER,
 68 |   "pickup_time" VARCHAR(19),
 69 |   "distance" VARCHAR(7),
 70 |   "duration" VARCHAR(10),
 71 |   "cancellation" VARCHAR(23)
 72 | );
 73 | 
 74 | INSERT INTO pizza_runner.dbo.runner_orders
 75 |   ("order_id", "runner_id", "pickup_time", "distance", "duration", "cancellation")
 76 | VALUES
 77 |   ('1', '1', '2020-01-01 18:15:34', '20km', '32 minutes', ''),
 78 |   ('2', '1', '2020-01-01 19:10:54', '20km', '27 minutes', ''),
 79 |   ('3', '1', '2020-01-03 00:12:37', '13.4km', '20 mins', NULL),
 80 |   ('4', '2', '2020-01-04 13:53:03', '23.4', '40', NULL),
 81 |   ('5', '3', '2020-01-08 21:10:57', '10', '15', NULL),
 82 |   ('6', '3', 'null', 'null', 'null', 'Restaurant Cancellation'),
 83 |   ('7', '2', '2020-01-08 21:30:45', '25km', '25mins', 'null'),
 84 |   ('8', '2', '2020-01-10 00:15:02', '23.4 km', '15 minute', 'null'),
 85 |   ('9', '2', 'null', 'null', 'null', 'Customer Cancellation'),
 86 |   ('10', '1', '2020-01-11 18:50:20', '10km', '10minutes', 'null');
 87 | 
 88 | 
 89 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_names;
 90 | CREATE TABLE pizza_runner.dbo.pizza_names (
 91 |   "pizza_id" INTEGER,
 92 |   "pizza_name" VARCHAR(10)
 93 | );
 94 | INSERT INTO pizza_runner.dbo.pizza_names
 95 |   ("pizza_id", "pizza_name")
 96 | VALUES
 97 |   (1, 'Meatlovers'),
 98 |   (2, 'Vegetarian');
 99 | 
100 | 
101 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_recipes;
102 | CREATE TABLE pizza_runner.dbo.pizza_recipes (
103 |   "pizza_id" INTEGER,
104 |   "toppings" VARCHAR(30)
105 | );
106 | INSERT INTO pizza_runner.dbo.pizza_recipes
107 |   ("pizza_id", "toppings")
108 | VALUES
109 |   (1, '1, 2, 3, 4, 5, 6, 8, 10'),
110 |   (2, '4, 6, 7, 9, 11, 12');
111 | 
112 | 
113 | DROP TABLE IF EXISTS pizza_runner.dbo.pizza_toppings;
114 | CREATE TABLE pizza_runner.dbo.pizza_toppings (
115 |   "topping_id" INTEGER,
116 |   "topping_name" VARCHAR(20)
117 | );
118 | INSERT INTO pizza_runner.dbo.pizza_toppings
119 |   ("topping_id", "topping_name")
120 | VALUES
121 |   (1, 'Bacon'),
122 |   (2, 'BBQ Sauce'),
123 |   (3, 'Beef'),
124 |   (4, 'Cheese'),
125 |   (5, 'Chicken'),
126 |   (6, 'Mushrooms'),
127 |   (7, 'Onions'),
128 |   (8, 'Pepperoni'),
129 |   (9, 'Peppers'),
130 |   (10, 'Salami'),
131 |   (11, 'Tomatoes'),
132 |   (12, 'Tomato Sauce');
133 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/README.md:
--------------------------------------------------------------------------------
 1 | # 🥑 Case Study #3 - Foodie-Fi
 2 | <p align="center">
 3 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/3.png" align="center" width="400" height="400" >
 4 |   
 5 | ## 📕 Table of Contents
 6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#%EF%B8%8F-bussiness-task)
 7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-entity-relationship-diagram)
 8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-case-study-questions)
 9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi#-my-solution)
10 | 
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny and his friends launched a new startup called "Food-Fi" in 2020 and started selling monthly and annual subscriptions, 
14 |   giving their customers unlimited on-demand access to exclusive food videos from around the world. 
15 |   
16 |   This case study focuses 
17 |   on using subscription style digital data to answer some important questions such as business performance, payments, and customer journey.
18 | 
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 | <p align="center">
22 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e3.PNG" align="center" width="550" height="220" >
23 | 
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Customer Journey
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/A.%20Customer%20Journey.md).
28 | * Based off the 8 sample customers provided in the sample from the subscriptions table, write a brief description about each customer’s onboarding journey.
29 | 
30 | ---
31 | ### B. Data Analysis Questions
32 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/B.%20Data%20Analysis%20Questions.md).
33 | 
34 | 1. How many customers has Foodie-Fi ever had?
35 | 2. What is the monthly distribution of trial plan start_date values for our dataset - use the start of the month as the group by value
36 | 3. What plan start_date values occur after the year 2020 for our dataset? Show the breakdown by count of events for each plan_name
37 | 4. What is the customer count and percentage of customers who have churned rounded to 1 decimal place?
38 | 5. How many customers have churned straight after their initial free trial - what percentage is this rounded to the nearest whole number?
39 | 6. What is the number and percentage of customer plans after their initial free trial?
40 | 7. What is the customer count and percentage breakdown of all 5 plan_name values at 2020-12-31?
41 | 8. How many customers have upgraded to an annual plan in 2020?
42 | 9. How many days on average does it take for a customer to an annual plan from the day they join Foodie-Fi?
43 | 10. Can you further breakdown this average value into 30 day periods (i.e. 0-30 days, 31-60 days etc)
44 | 11. How many customers downgraded from a pro monthly to a basic monthly plan in 2020?
45 | 
46 | ---
47 | ### C. Challenge Payment Question
48 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/C.%20Challenge%20Payment%20Question.md).
49 | 
50 | The Foodie-Fi team wants to create a new payments table for the year 2020 that includes amounts paid by each customer in the subscriptions table with the following requirements:
51 |   * monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
52 |   * upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
53 |   * upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
54 |   * once a customer churns they will no longer make payments
55 | 
56 | ---
57 | ### D. Outside The Box Questions 
58 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/D.%20Outside%20The%20Box%20Questions.md).
59 | 
60 | 1. How would you calculate the rate of growth for Foodie-Fi?
61 | 2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
62 | 3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
63 | 4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
64 | 5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
65 | 
66 | ---
67 | ## 🚀 My Solution
68 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi/Syntax).
69 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution).
70 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/A. Customer Journey.md:
--------------------------------------------------------------------------------
 1 | # 🥑 Case Study #3 - Foodie-Fi
 2 | ## A. Customer Journey
 3 | Based off the 8 sample customers provided in the sample from the subscriptions table, write a brief description about each customer’s onboarding journey.
 4 | ```TSQL
 5 | SELECT 
 6 | 	s.*,
 7 | 	p.plan_name,
 8 | 	p.price
 9 | FROM subscriptions s
10 | JOIN plans p ON s.plan_id = p.plan_id
11 | WHERE customer_id IN (1, 2, 11, 13, 15, 16, 18, 19);
12 | ```
13 | | customer_id | plan_id | start_date | plan_name     | price   |
14 | |-------------|---------|------------|---------------|---------|
15 | | 1           | 0       | 2020-08-01 | trial         | 0.00    |
16 | | 1           | 1       | 2020-08-08 | basic monthly | 9.90    |
17 | | 2           | 0       | 2020-09-20 | trial         | 0.00    |
18 | | 2           | 3       | 2020-09-27 | pro annual    | 199.00  |
19 | | 11          | 0       | 2020-11-19 | trial         | 0.00    |
20 | | 11          | 4       | 2020-11-26 | churn         | NULL    |
21 | | 13          | 0       | 2020-12-15 | trial         | 0.00    |
22 | | 13          | 1       | 2020-12-22 | basic monthly | 9.90    |
23 | | 13          | 2       | 2021-03-29 | pro monthly   | 19.90   |
24 | | 15          | 0       | 2020-03-17 | trial         | 0.00    |
25 | | 15          | 2       | 2020-03-24 | pro monthly   | 19.90   |
26 | | 15          | 4       | 2020-04-29 | churn         | NULL    |
27 | | 16          | 0       | 2020-05-31 | trial         | 0.00    |
28 | | 16          | 1       | 2020-06-07 | basic monthly | 9.90    |
29 | | 16          | 3       | 2020-10-21 | pro annual    | 199.00  |
30 | | 18          | 0       | 2020-07-06 | trial         | 0.00    |
31 | | 18          | 2       | 2020-07-13 | pro monthly   | 19.90   |
32 | | 19          | 0       | 2020-06-22 | trial         | 0.00    |
33 | | 19          | 2       | 2020-06-29 | pro monthly   | 19.90   |
34 | 
35 | Customer 1 signed up to 7-day free trial on 01/08/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 08/08/2020.
36 | 
37 | Customer 2 signed up to 7-day free trial on 20/09/2020. After that time, he/she upgraded to pro annual plan on 27/09/2020.
38 | 
39 | Customer 11 signed up to 7-day free trial on 19/11/2020. After that time, he/she cancelled the subsciption on 26/11/2020.
40 | 
41 | Customer 13 signed up to 7-day free trial on 15/12/2020. After that time, he/she didn't cancelled the subsciption, so the system automatically upgraded it to basic monthly plan on 22/12/2020. He/she continued using that plan for 2 months. On 29/03/2020 (still in the 3rd month using basic monthly plan), he/she upgraded to pro monthly plan.
42 | 
43 | Customer 15 signed up to 7-day free trial on 17/03/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it basic monthly plan on 24/03/2020. He/she then cancelled that plan after 5 days (29/03/2020). He/she was able to use the basic monthly plan until 24/04/2020.
44 | 
45 | Customer 16 signed up to 7-day free trial on 31/05/2020. After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 07/06/2020. He/she continued using that plan for 4 months. On 21/10/2020 (still in the 4th month using basic monthly plan), he/she upgraded to pro annual plan.
46 | 
47 | Customer 18 signed up to 7-day free trial on 06/07/2020. After the trial time, he/she upgraded the subscription to pro monthly plan on 13/07/2020.
48 | 
49 | Customer 19 signed up to 7-day free trial on 22/06/2020. After that time, he/she upgraded the subscription to pro monthly plan on 29/06/2020. After 2 months using that plan, he/she upgraded to pro annual plan on 29/08/2020.
50 | 
51 | ---
52 | My solution for **[B. Data Analysis Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/B.%20Data%20Analysis%20Questions.md)**.
53 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/C. Challenge Payment Question.md:
--------------------------------------------------------------------------------
 1 | # 🥑 Case Study #3 - Foodie-Fi
 2 | ## C. Challenge Payment Question
 3 | The Foodie-Fi team wants to create a new payments table for the year 2020 that includes amounts paid by each customer in the subscriptions table with the following requirements:
 4 |   * monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
 5 |   * upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
 6 |   * upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
 7 |   * once a customer churns they will no longer make payments
 8 | 
 9 | Example outputs for this table might look like the following:
10 | 
11 | | customer_id | plan_id | plan_name     | payment_date | amount | payment_order  |
12 | |-------------|---------|---------------|--------------|--------|----------------|
13 | | 1           | 1       | basic monthly | 2020-08-08   | 9.90   | 1              |
14 | | 1           | 1       | basic monthly | 2020-09-08   | 9.90   | 2              |
15 | | 1           | 1       | basic monthly | 2020-10-08   | 9.90   | 3              |
16 | | 1           | 1       | basic monthly | 2020-11-08   | 9.90   | 4              |
17 | | 1           | 1       | basic monthly | 2020-12-08   | 9.90   | 5              |
18 | | 2           | 3       | pro annual    | 2020-09-27   | 199.00 | 1              |
19 | | 13          | 1       | basic monthly | 2020-12-22   | 9.90   | 1              |
20 | | 15          | 2       | pro monthly   | 2020-03-24   | 19.90  | 1              |
21 | | 15          | 2       | pro monthly   | 2020-04-24   | 19.90  | 2              |
22 | | 16          | 1       | basic monthly | 2020-06-07   | 9.90   | 1              |
23 | | 16          | 1       | basic monthly | 2020-07-07   | 9.90   | 2              |
24 | | 16          | 1       | basic monthly | 2020-08-07   | 9.90   | 3              |
25 | | 16          | 1       | basic monthly | 2020-09-07   | 9.90   | 4              |
26 | | 16          | 1       | basic monthly | 2020-10-07   | 9.90   | 5              |
27 | | 16          | 3       | pro annual    | 2020-10-21   | 189.10 | 6              |
28 | | 18          | 2       | pro monthly   | 2020-07-13   | 19.90  | 1              |
29 | | 18          | 2       | pro monthly   | 2020-08-13   | 19.90  | 2              |
30 | | 18          | 2       | pro monthly   | 2020-09-13   | 19.90  | 3              |
31 | | 18          | 2       | pro monthly   | 2020-10-13   | 19.90  | 4              |
32 | 
33 | ---
34 |   * Use a recursive CTE to increment rows for all monthly paid plans in 2020 until customers changing their plans, except 'pro annual'.
35 |     * use ```CASE``` to create a new column ```last_date```: last day of the current plan
36 |     * if a customer kept using the current plan, last_date = '2020-12-31'
37 |     * if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
38 |   * Create a new table ```payments``` by selecting the required columns
39 | 
40 | ```TSQL
41 | --Use a recursive CTE to increment rows for all monthly paid plans until customers changing the plan, except 'pro annual'
42 | WITH dateRecursion AS (
43 |   SELECT 
44 |     s.customer_id,
45 |     s.plan_id,
46 |     p.plan_name,
47 |     s.start_date AS payment_date,
48 |     --column last_date: last day of the current plan
49 |     CASE 
50 |       --if a customer kept using the current plan, last_date = '2020-12-31'
51 |       WHEN LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) IS NULL THEN '2020-12-31'
52 |       --if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
53 |       ELSE DATEADD(MONTH, 
54 | 		   DATEDIFF(MONTH, start_date, LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date)),
55 | 		   start_date) END AS last_date,
56 |     p.price AS amount
57 |   FROM subscriptions s
58 |   JOIN plans p ON s.plan_id = p.plan_id
59 |   --exclude trials because they didn't generate payments 
60 |   WHERE p.plan_name NOT IN ('trial')
61 |     AND YEAR(start_date) = 2020
62 | 
63 |   UNION ALL
64 | 
65 |   SELECT 
66 |     customer_id,
67 |     plan_id,
68 |     plan_name,
69 |     --increment payment_date by monthly
70 |     DATEADD(MONTH, 1, payment_date) AS payment_date,
71 |     last_date,
72 |     amount
73 |   FROM dateRecursion
74 |   --stop incrementing when payment_date = last_date
75 |   WHERE DATEADD(MONTH, 1, payment_date) <= last_date
76 |     AND plan_name != 'pro annual'
77 | )
78 | --Create a new table [payments]
79 | SELECT 
80 |   customer_id,
81 |   plan_id,
82 |   plan_name,
83 |   payment_date,
84 |   amount,
85 |   ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY payment_date) AS payment_order
86 | INTO payments
87 | FROM dateRecursion
88 | --exclude churns
89 | WHERE amount IS NOT NULL
90 | ORDER BY customer_id
91 | OPTION (MAXRECURSION 365);
92 | ```
93 | ---
94 | My solution for **[D. Outside The Box Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%233%20-%20Foodie-Fi/Solution/D.%20Outside%20The%20Box%20Questions.md)**.
95 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Solution/D. Outside The Box Questions.md:
--------------------------------------------------------------------------------
 1 | # 🥑 Case Study #3 - Foodie-Fi
 2 | ## D. Outside The Box Questions
 3 | ### 1. How would you calculate the rate of growth for Foodie-Fi?
 4 | - I choose the year of 2020 to analyze because I already created the ```payments``` table in part C.
 5 | - If you want to incorporate the data in 2021 to see the whole picture (quarterly, 2020-2021 comparison, etc.), 
 6 | create a new ```payments``` table and change all the date conditions in part C to '2021-12-31'
 7 | 
 8 | ```TSQL
 9 | WITH monthlyRevenue AS (
10 |   SELECT 
11 |     MONTH(payment_date) AS months,
12 |     SUM(amount) AS revenue
13 |   FROM payments
14 |   GROUP BY MONTH(payment_date)
15 | )
16 | 
17 | SELECT 
18 |   months,
19 |   revenue,
20 |   (revenue-LAG(revenue) OVER(ORDER BY months))/revenue AS revenue_growth
21 | FROM monthlyRevenue;
22 | ```
23 | | months | revenue  | revenue_growth  |
24 | |--------|----------|-----------------|
25 | | 1      | 1282.00  | NULL            |
26 | | 2      | 2792.60  | 0.540929        |
27 | | 3      | 4342.40  | 0.356899        |
28 | | 4      | 5972.70  | 0.272958        |
29 | | 5      | 7324.10  | 0.184514        |
30 | | 6      | 8765.50  | 0.164440        |
31 | | 7      | 10207.50 | 0.141268        |
32 | | 8      | 12047.40 | 0.152721        |
33 | | 9      | 12913.20 | 0.067047        |
34 | | 10     | 14952.50 | 0.136385        |
35 | | 11     | 12862.70 | -0.162469       |
36 | | 12     | 13429.50 | 0.042205        |
37 | 
38 | ### 2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
39 | - Monthly revenue growth: How does Foodie-Fi's revenue increase or decrease by monthly? Are there any months that the number of customers increasing significantly?
40 | - Customers growth: How many customers increase by monthly? How does the rate look like (x1.5, x2,... after each month)? 
41 | - Conversion rate: How many customers keep using Foodie-Fi after trial? How does the rate look like (x1.5, x2,...after each month)?
42 | - Churn rate: How many customers cancel the subscription by monthly? What plan they has used?
43 | 
44 | ### 3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
45 | - Customers who downgraded their plan
46 | - Customers who upgraded from basic monthly to pro monthly or pro annual
47 | - Customers who cancelled the subscription
48 | 
49 | ### 4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
50 | - What is the primary reason for the cancellation? 
51 |   + Price
52 |   + Techinical issues
53 |   + Customer support
54 |   + Found an alternative
55 |   + Others (please specify)
56 | - Overall, how satisfied were you with the subscription? (Likert scale: Very Satisfied - Very Unsatisfied)
57 | - Would you consider using our services in the future? (Likert scale: Very Satisfied - Very Unsatisfied)
58 | - Would you recommend our company to a colleague, friend or family member? (Likert scale: Very Satisfied - Very Unsatisfied)
59 | 
60 | ### 5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
61 | - From the exit survey, look for the most common reasons why customers cancelled the subscription
62 |   + Price: increase the number of discounts in some seasons of a year, extend the trial time, or add more benefits to customers 
63 |   + Service quality: work with the relevant department to fix the issue
64 |   + Found an alternative: do some competitor analysis to see their competitive advantages over us
65 | - To validate the effectiveness of those ideas, check:
66 |   + Churn rate
67 |   + Conversion rate
68 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/A_query.sql:
--------------------------------------------------------------------------------
 1 | -----------------------
 2 | --A. Customer Journey--
 3 | -----------------------
 4 | 
 5 | --Based off the 8 sample customers provided in the sample from the subscriptions table, 
 6 | --write a brief description about each customer’s onboarding journey.
 7 | 
 8 | SELECT 
 9 | 	s.*,
10 | 	p.plan_name,
11 | 	p.price
12 | FROM subscriptions s
13 | JOIN plans p ON s.plan_id = p.plan_id
14 | WHERE customer_id IN (1, 2, 11, 13, 15, 16, 18, 19);
15 | 
16 | /*
17 | - Customer 1 signed up to 7-day free trial on 01/08/2020. 
18 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 08/08/2020.
19 | 
20 | - Customer 2 signed up to 7-day free trial on 20/09/2020. 
21 | After that time, he/she upgraded to pro annual plan on 27/09/2020.
22 | 
23 | - Customer 11 signed up to 7-day free trial on 19/11/2020. 
24 | After that time, he/she cancelled the subsciption on 26/11/2020.
25 | 
26 | - Customer 13 signed up to 7-day free trial on 15/12/2020. 
27 | After that time, he/she didn't cancelled the subsciption, so the system automatically upgraded it to basic monthly plan on 22/12/2020. 
28 | He/she continued using that plan for 2 months. On 29/03/2020 (still in the 3rd month using basic monthly plan), he/she upgraded to pro monthly plan.
29 | 
30 | - Customer 15 signed up to 7-day free trial on 17/03/2020. 
31 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it basic monthly plan on 24/03/2020. 
32 | He/she then cancelled that plan after 5 days (29/03/2020). He/she was able to use the basic monthly plan until 24/04/2020.
33 | 
34 | - Customer 16 signed up to 7-day free trial on 31/05/2020. 
35 | After that time, he/she didn't cancel the subsciption, so the system automatically upgraded it to basic monthly plan on 07/06/2020. 
36 | He/she continued using that plan for 4 months. On 21/10/2020 (still in the 4th month using basic monthly plan), he/she upgraded to pro annual plan.
37 | 
38 | - Customer 18 signed up to 7-day free trial on 06/07/2020. 
39 | After the trial time, he/she upgraded the subscription to pro monthly plan on 13/07/2020.
40 | 
41 | - Customer 19 signed up to 7-day free trial on 22/06/2020. 
42 | After that time, he/she upgraded the subscription to pro monthly plan on 29/06/2020. 
43 | After 2 months using that plan, he/she upgraded to pro annual plan on 29/08/2020.
44 | */
45 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | ------------------------------
  2 | --B. Data Analysis Questions--
  3 | ------------------------------
  4 | 
  5 | --1. How many customers has Foodie-Fi ever had?
  6 | 
  7 | SELECT COUNT(DISTINCT customer_id) AS unique_customers
  8 | FROM subscriptions;
  9 | 
 10 | 
 11 | --2. What is the monthly distribution of trial plan start_date values for our dataset - use the start of the month as the group by value?
 12 | 
 13 | SELECT 
 14 |   MONTH(s.start_date) AS months,
 15 |   COUNT(*) AS distribution_values
 16 | FROM subscriptions s
 17 | JOIN plans p 
 18 |   ON s.plan_id = p.plan_id
 19 | WHERE p.plan_name = 'trial'
 20 | GROUP BY MONTH(s.start_date);
 21 | 
 22 | 
 23 | --3. What plan start_date values occur after the year 2020 for our dataset? Show the breakdown by count of events for each plan_name?
 24 | 
 25 | SELECT 
 26 |   YEAR(s.start_date) AS events,
 27 |   p.plan_name,
 28 |   COUNT(*) AS counts
 29 | FROM subscriptions s
 30 | JOIN plans p 
 31 |   ON s.plan_id = p.plan_id
 32 | WHERE YEAR(s.start_date) > 2020
 33 | GROUP BY YEAR(s.start_date), p.plan_name;
 34 | 
 35 | 
 36 | --4. What is the customer count and percentage of customers who have churned rounded to 1 decimal place?
 37 | 
 38 | SELECT 
 39 |   SUM(CASE WHEN p.plan_name = 'churn' THEN 1 END) AS churn_count,
 40 |   CAST(100*SUM(CASE WHEN p.plan_name = 'churn' THEN 1 END) AS FLOAT(1)) 
 41 |     / COUNT(DISTINCT customer_id) AS churn_pct
 42 | FROM subscriptions s
 43 | JOIN plans p 
 44 |   ON s.plan_id = p.plan_id;
 45 | 
 46 | 
 47 | --5. How many customers have churned straight after their initial free trial - what percentage is this rounded to the nearest whole number?
 48 | 
 49 | WITH nextPlan AS (
 50 |   SELECT 
 51 |     s.customer_id,
 52 |     s.start_date,
 53 |     p.plan_name,
 54 |     LEAD(p.plan_name) OVER(PARTITION BY s.customer_id ORDER BY p.plan_id) AS next_plan
 55 |   FROM subscriptions s
 56 |   JOIN plans p ON s.plan_id = p.plan_id
 57 | )
 58 | 
 59 | SELECT 
 60 |   COUNT(*) AS churn_after_trial,
 61 |   100*COUNT(*) / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS pct
 62 | FROM nextPlan
 63 | WHERE plan_name = 'trial' 
 64 |   AND next_plan = 'churn';
 65 | 
 66 | 
 67 | --6. What is the number and percentage of customer plans after their initial free trial?
 68 | 
 69 | WITH nextPlan AS (
 70 |   SELECT 
 71 |     s.customer_id,
 72 |     s.start_date,
 73 |     p.plan_name,
 74 |     LEAD(p.plan_name) OVER(PARTITION BY s.customer_id 
 75 | 			ORDER BY p.plan_id) AS next_plan
 76 |   FROM subscriptions s
 77 |   JOIN plans p ON s.plan_id = p.plan_id
 78 | )
 79 | 
 80 | SELECT 
 81 |   next_plan,
 82 |   COUNT(*) AS customer_plan,
 83 |   CAST(100 * COUNT(*) AS FLOAT) 
 84 |       / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS percentage
 85 | FROM nextPlan
 86 | WHERE next_plan IS NOT NULL
 87 |   AND plan_name = 'trial'
 88 | GROUP BY next_plan;
 89 | 
 90 | 
 91 | --7. What is the customer count and percentage breakdown of all 5 plan_name values at 2020-12-31?
 92 | 
 93 | WITH plansDate AS (
 94 |   SELECT 
 95 |     s.customer_id,
 96 |     s.start_date,
 97 | 	p.plan_id,
 98 |     p.plan_name,
 99 |     LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) AS next_date
100 |   FROM subscriptions s
101 |   JOIN plans p ON s.plan_id = p.plan_id
102 | )
103 | 
104 | SELECT 
105 |   plan_id,
106 |   plan_name,
107 |   COUNT(*) AS customers,
108 |   CAST(100*COUNT(*) AS FLOAT) 
109 |       / (SELECT COUNT(DISTINCT customer_id) FROM subscriptions) AS conversion_rate
110 | FROM plansDate
111 | WHERE (next_date IS NOT NULL AND (start_date < '2020-12-31' AND next_date > '2020-12-31'))
112 |   OR (next_date IS NULL AND start_date < '2020-12-31')
113 | GROUP BY plan_id, plan_name
114 | ORDER BY plan_id;
115 | 
116 | 
117 | --8. How many customers have upgraded to an annual plan in 2020?
118 | 
119 | SELECT 
120 |   COUNT(DISTINCT customer_id) AS customer_count
121 | FROM subscriptions s
122 | JOIN plans p ON s.plan_id = p.plan_id
123 | WHERE p.plan_name = 'pro annual'
124 |   AND YEAR(s.start_date) = 2020;
125 |   
126 |   
127 | --9. How many days on average does it take for a customer to an annual plan from the day they join Foodie-Fi?
128 | 
129 | WITH trialPlan AS (
130 |   SELECT 
131 |     s.customer_id,
132 |     s.start_date AS trial_date
133 |   FROM subscriptions s
134 |   JOIN plans p ON s.plan_id = p.plan_id
135 |   WHERE p.plan_name = 'trial'
136 | ),
137 | annualPlan AS (
138 |   SELECT 
139 |     s.customer_id,
140 |     s.start_date AS annual_date
141 |   FROM subscriptions s
142 |   JOIN plans p ON s.plan_id = p.plan_id
143 |   WHERE p.plan_name = 'pro annual'
144 | )
145 | 
146 | SELECT 
147 |   AVG(CAST(DATEDIFF(d, trial_date, annual_date) AS FLOAT)) AS avg_days_to_annual
148 | FROM trialPlan t
149 | JOIN annualPlan a 
150 | ON t.customer_id = a.customer_id;
151 | 
152 | 
153 | --10. Can you further breakdown this average value into 30 day periods (i.e. 0-30 days, 31-60 days etc)?
154 | 
155 | WITH trialPlan AS (
156 |   SELECT 
157 |     s.customer_id,
158 |     s.start_date AS trial_date
159 |   FROM subscriptions s
160 |   JOIN plans p ON s.plan_id = p.plan_id
161 |   WHERE p.plan_name = 'trial'
162 | ),
163 | annualPlan AS (
164 |   SELECT 
165 |     s.customer_id,
166 |     s.start_date AS annual_date
167 |   FROM subscriptions s
168 |   JOIN plans p ON s.plan_id = p.plan_id
169 |   WHERE p.plan_name = 'pro annual'
170 | ),
171 | datesDiff AS (
172 |   SELECT 
173 |     t.customer_id,
174 |     DATEDIFF(d, trial_date, annual_date) AS diff
175 |   FROM trialPlan t
176 |   JOIN annualPlan a ON t.customer_id = a.customer_id
177 | ),
178 | daysRecursion AS (
179 |   SELECT 
180 |     0 AS start_period, 
181 |     30 AS end_period
182 |   UNION ALL
183 |   SELECT 
184 |     end_period + 1 AS start_period,
185 |     end_period + 30 AS end_period
186 |   FROM daysRecursion
187 |   WHERE end_period < 360
188 | )
189 | 
190 | SELECT 
191 |   dr.start_period,
192 |   dr.end_period,
193 |   COUNT(*) AS customer_count
194 | FROM daysRecursion dr
195 | LEFT JOIN datesDiff dd 
196 |   ON (dd.diff >= dr.start_period AND dd.diff <= dr.end_period)
197 | GROUP BY dr.start_period, dr.end_period;
198 | 
199 | 
200 | --11. How many customers downgraded from a pro monthly to a basic monthly plan in 2020?
201 |  
202 | WITH nextPlan AS (
203 |   SELECT 
204 |     s.customer_id,
205 |     s.start_date,
206 |     p.plan_name,
207 |     LEAD(p.plan_name) OVER(PARTITION BY s.customer_id ORDER BY p.plan_id) AS next_plan
208 |   FROM subscriptions s
209 |   JOIN plans p ON s.plan_id = p.plan_id
210 | )
211 | 
212 | SELECT COUNT(*) AS pro_to_basic_monthly
213 | FROM nextPlan
214 | WHERE plan_name = 'pro monthly'
215 |   AND next_plan = 'basic monthly'
216 |   AND YEAR(start_date) = 2020;
217 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/C_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------------------
 2 | --C. Challenge Payment Question--
 3 | ---------------------------------
 4 | 
 5 | /*
 6 | The Foodie-Fi team wants you to create a new payments table for the year 2020 that includes amounts paid 
 7 | by each customer in the subscriptions table with the following requirements:
 8 | - monthly payments always occur on the same day of month as the original start_date of any monthly paid plan
 9 | - upgrades from basic to monthly or pro plans are reduced by the current paid amount in that month and start immediately
10 | - upgrades from pro monthly to pro annual are paid at the end of the current billing period and also starts at the end of the month period
11 | - once a customer churns they will no longer make payments
12 | */
13 | 
14 | 
15 | --Use a recursive CTE to increment rows for all monthly paid plans in 2020 until customers changing their plans, except 'pro annual'
16 | WITH dateRecursion AS (
17 |   SELECT 
18 |     s.customer_id,
19 |     s.plan_id,
20 |     p.plan_name,
21 |     s.start_date AS payment_date,
22 |     --last_date: last day of the current plan
23 |     CASE 
24 |       --if a customer kept using the current plan, last_date = '2020-12-31'
25 |       WHEN LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date) IS NULL THEN '2020-12-31'
26 |       --if a customer changed the plan, last_date = (month difference between start_date and changing date) + start_date
27 |       ELSE DATEADD(MONTH, 
28 | 		   DATEDIFF(MONTH, start_date, LEAD(s.start_date) OVER(PARTITION BY s.customer_id ORDER BY s.start_date)),
29 | 		   start_date) END AS last_date,
30 |     p.price AS amount
31 |   FROM subscriptions s
32 |   JOIN plans p ON s.plan_id = p.plan_id
33 |   --exclude trials because they didn't generate payments 
34 |   WHERE p.plan_name NOT IN ('trial')
35 |     AND YEAR(start_date) = 2020
36 | 
37 |   UNION ALL
38 | 
39 |   SELECT 
40 |     customer_id,
41 |     plan_id,
42 |     plan_name,
43 |     --increment payment_date by monthly
44 |     DATEADD(MONTH, 1, payment_date) AS payment_date,
45 |     last_date,
46 |     amount
47 |   FROM dateRecursion
48 |   --stop incrementing when payment_date = last_date
49 |   WHERE DATEADD(MONTH, 1, payment_date) <= last_date
50 |     AND plan_name != 'pro annual'
51 | )
52 | --Create a new table [payments]
53 | SELECT 
54 |   customer_id,
55 |   plan_id,
56 |   plan_name,
57 |   payment_date,
58 |   amount,
59 |   ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY payment_date) AS payment_order
60 | INTO payments
61 | FROM dateRecursion
62 | --exclude churns
63 | WHERE amount IS NOT NULL
64 | ORDER BY customer_id
65 | OPTION (MAXRECURSION 365);
66 | 


--------------------------------------------------------------------------------
/Case Study #3 - Foodie-Fi/Syntax/D_query.sql:
--------------------------------------------------------------------------------
 1 | --------------------------------
 2 | --D. Outside The Box Questions--
 3 | --------------------------------
 4 | 
 5 | --1.How would you calculate the rate of growth for Foodie-Fi?
 6 | 
 7 | /*
 8 | - I choose the year of 2020 to analyze because I already created the [payments] table in part C.
 9 | - If you want to incorporate the data in 2021 to see the whole picture (quarterly, 2020-2021 comparison, etc.), 
10 | create a new [payments] table and change all the date conditions in part C to '2021-12-31'.
11 | */
12 | 
13 | WITH monthlyRevenue AS (
14 |   SELECT 
15 |     MONTH(payment_date) AS months,
16 |     SUM(amount) AS revenue
17 |   FROM payments
18 |   GROUP BY MONTH(payment_date)
19 | )
20 | 
21 | SELECT 
22 |   months,
23 |   revenue,
24 |   (revenue-LAG(revenue) OVER(ORDER BY months))/revenue AS revenue_growth
25 | FROM monthlyRevenue;
26 | 
27 | 
28 | --2. What key metrics would you recommend Foodie-Fi management to track over time to assess performance of their overall business?
29 | 
30 | /*
31 | - Monthly revenue growth: How does Foodie-Fi's revenue increase or decrease by monthly? Are there any months that the number of customers increasing significantly?
32 | - Customers growth: How many customers increase by monthly? How does the rate look like (x1.5, x2,... after each month)? 
33 | - Conversion rate: How many customers keep using Foodie-Fi after trial? How does the rate look like (x1.5, x2,...after each month)?
34 | - Churn rate: How many customers cancel the subscription by monthly? What plan they has used?
35 | */
36 | 
37 | 
38 | --3. What are some key customer journeys or experiences that you would analyse further to improve customer retention?
39 | 
40 | /*
41 | - Customers who downgraded their plan
42 | - Customers who upgraded from basic monthly to pro monthly or pro annual
43 | - Customers who cancelled the subscription
44 | */
45 | 
46 | 
47 | --4. If the Foodie-Fi team were to create an exit survey shown to customers who wish to cancel their subscription, what questions would you include in the survey?
48 | 
49 | /*
50 | - What is the primary reason for the cancellation? 
51 |   + Price
52 |   + Techinical issues
53 |   + Customer support
54 |   + Found an alternative
55 |   + Others (please specify)
56 | - Overall, how satisfied were you with the subscription? (Likert scale: Very Satisfied - Very Unsatisfied)
57 | - Would you consider using our services in the future? (Likert scale: Very Satisfied - Very Unsatisfied)
58 | - Would you recommend our company to a colleague, friend or family member? (Likert scale: Very Satisfied - Very Unsatisfied)
59 | */
60 | 
61 | 
62 | --5. What business levers could the Foodie-Fi team use to reduce the customer churn rate? How would you validate the effectiveness of your ideas?
63 | 
64 | /*
65 | - From the exit survey, look for the most common reasons why customers cancelled the subscription
66 |   + Price: increase the number of discounts in some seasons of a year, extend the trial time, or add more benefits to customers 
67 |   + Service quality: work with the relevant department to fix the issue
68 |   + Found an alternative: do some competitor analysis to see their competitive advantages over us
69 | - To validate the effectiveness of those ideas, check:
70 |   + Churn rate
71 |   + Conversion rate
72 | */
73 | 


--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/README.md:
--------------------------------------------------------------------------------
 1 | # 📊 Case Study #4 - Data Bank
 2 | <p align="center">
 3 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/4.png" align="center" width="400" height="400" >
 4 | 
 5 | ## 📕 Table of Contents
 6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#%EF%B8%8F-bussiness-task)
 7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-entity-relationship-diagram)
 8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-case-study-questions)
 9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank#-my-solution)
10 | 
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Danny launched a new initiative, Data Bank which runs banking activities and also acts as the world’s most secure distributed data storage platform!
14 | Customers are allocated cloud data storage limits which are directly linked to how much money they have in their accounts.
15 | 
16 | The management team at Data Bank want to increase their total customer base - but also need some help tracking just how much data storage their customers will need.
17 | This case study is all about calculating metrics, growth and helping the business analyse their data in a smart way to better forecast and plan for their future developments!
18 | 
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 | <p align="center">
22 | <img src="https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/main/IMG/e4.png" align="center">
23 | 
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Customer Nodes Exploration
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/A.%20Customer%20Nodes%20Exploration.md).
28 | 
29 | 1. How many unique nodes are there on the Data Bank system?
30 | 2. What is the number of nodes per region?
31 | 3. How many customers are allocated to each region?
32 | 4. How many days on average are customers reallocated to a different node?
33 | 5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
34 | 
35 | ---
36 | ### B. Customer Transactions
37 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/B.%20Customer%20Transactions.md).
38 | 
39 | 1. What is the unique count and total amount for each transaction type?
40 | 2. What is the average total historical deposit counts and amounts for all customers?
41 | 3. For each month - how many Data Bank customers make more than 1 deposit and either 1 purchase or 1 withdrawal in a single month?
42 | 4. What is the closing balance for each customer at the end of the month?
43 | 5. What is the percentage of customers who increase their closing balance by more than 5%?
44 | 
45 | ---
46 | ## 🚀 My Solution
47 |   * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank/Syntax).
48 |   * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank/Solution).
49 | 


--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Solution/A. Customer Nodes Exploration.md:
--------------------------------------------------------------------------------
  1 | # 📊 Case Study #4 - Data Bank
  2 | ## A. Customer Nodes Exploration
  3 | ### 1. How many unique nodes are there on the Data Bank system?
  4 | ```TSQL
  5 | SELECT COUNT(DISTINCT node_id) AS unique_nodes
  6 | FROM customer_nodes;
  7 | ```
  8 | | unique_nodes  |
  9 | |---------------|
 10 | | 5             |
 11 | 
 12 | ---
 13 | ### 2. What is the number of nodes per region?
 14 | ```TSQL
 15 | SELECT 
 16 |   r.region_id,
 17 |   r.region_name,
 18 |   COUNT(n.node_id) AS nodes
 19 | FROM customer_nodes n
 20 | JOIN regions r
 21 |   ON n.region_id = r.region_id
 22 | GROUP BY r.region_id, r.region_name
 23 | ORDER BY r.region_id;
 24 | ```
 25 | | region_id | region_name | nodes  |
 26 | |-----------|-------------|--------|
 27 | | 1         | Australia   | 770    |
 28 | | 2         | America     | 735    |
 29 | | 3         | Africa      | 714    |
 30 | | 4         | Asia        | 665    |
 31 | | 5         | Europe      | 616    |
 32 | 
 33 | ---
 34 | ### 3. How many customers are allocated to each region?
 35 | ```TSQL
 36 | SELECT 
 37 |   r.region_id,
 38 |   r.region_name,
 39 |   COUNT(DISTINCT n.customer_id) AS customers
 40 | FROM customer_nodes n
 41 | JOIN regions r
 42 |   ON n.region_id = r.region_id
 43 | GROUP BY r.region_id, r.region_name
 44 | ORDER BY r.region_id;
 45 | ```
 46 | | region_id | region_name | customers  |
 47 | |-----------|-------------|------------|
 48 | | 1         | Australia   | 110        |
 49 | | 2         | America     | 105        |
 50 | | 3         | Africa      | 102        |
 51 | | 4         | Asia        | 95         |
 52 | | 5         | Europe      | 88         |
 53 | 
 54 | ---
 55 | ### 4. How many days on average are customers reallocated to a different node?
 56 |   * Create a CTE ```customerDates``` containing the first date of every customer in each node
 57 |   * Create a CTE ```reallocation``` to calculate the difference in days between the first date in this node and the first date in next node
 58 |   * Take the average of those day differences
 59 | ```TSQL
 60 | WITH customerDates AS (
 61 |   SELECT 
 62 |     customer_id,
 63 |     region_id,
 64 |     node_id,
 65 |     MIN(start_date) AS first_date
 66 |   FROM customer_nodes
 67 |   GROUP BY customer_id, region_id, node_id
 68 | ),
 69 | reallocation AS (
 70 |   SELECT
 71 |     customer_id,
 72 |     node_id,
 73 |     region_id,
 74 |     first_date,
 75 |     DATEDIFF(DAY, first_date, 
 76 |              LEAD(first_date) OVER(PARTITION BY customer_id 
 77 |                                    ORDER BY first_date)) AS moving_days
 78 |   FROM customerDates
 79 | )
 80 | 
 81 | SELECT 
 82 |   AVG(CAST(moving_days AS FLOAT)) AS avg_moving_days
 83 | FROM reallocation;
 84 | ```
 85 | | avg_moving_days  |
 86 | |------------------|
 87 | | 23.6889920424403 |
 88 | 
 89 | On average, it takes 24 days for a customer to reallocate to a different node.
 90 | 
 91 | ---
 92 | ### 5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
 93 | Using 2 CTEs in the previous questions ```customerDates``` and ```reallocation``` to calculate the median, 80th and 95th percentile for reallocation days in each region.
 94 | ```TSQL
 95 | WITH customerDates AS (
 96 |   SELECT 
 97 |     customer_id,
 98 |     region_id,
 99 |     node_id,
100 |     MIN(start_date) AS first_date
101 |   FROM customer_nodes
102 |   GROUP BY customer_id, region_id, node_id
103 | ),
104 | reallocation AS (
105 |   SELECT
106 |     customer_id,
107 |     region_id,
108 |     node_id,
109 |     first_date,
110 |     DATEDIFF(DAY, first_date, 
111 |              LEAD(first_date) OVER(PARTITION BY customer_id 
112 |                                    ORDER BY first_date)) AS moving_days
113 |   FROM customerDates
114 | )
115 | 
116 | SELECT 
117 |   DISTINCT r.region_id,
118 |   rg.region_name,
119 |   PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS median,
120 |   PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_80,
121 |   PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_95
122 | FROM reallocation r
123 | JOIN regions rg ON r.region_id = rg.region_id
124 | WHERE moving_days IS NOT NULL;
125 | ```
126 | | region_id | region_name | median | percentile_80 | percentile_95  |
127 | |-----------|-------------|--------|---------------|----------------|
128 | | 1         | Australia   | 22     | 31            | 54             |
129 | | 2         | America     | 21     | 33.2          | 57             |
130 | | 3         | Africa      | 21     | 33.2          | 58.8           |
131 | | 4         | Asia        | 22     | 32.4          | 49.85          |
132 | | 5         | Europe      | 22     | 31            | 54.3           |
133 | 
134 | ---
135 | My solution for **[B. Customer Transactions](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%234%20-%20Data%20Bank/Solution/B.%20Customer%20Transactions.md)**.
136 | 


--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Syntax/A_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------------------
 2 | --A. Customer Nodes Exploration--
 3 | ---------------------------------
 4 | 
 5 | --1. How many unique nodes are there on the Data Bank system?
 6 | 
 7 | SELECT COUNT(DISTINCT node_id) AS unique_nodes
 8 | FROM customer_nodes;
 9 | 
10 | 
11 | --2. What is the number of nodes per region?
12 | 
13 | SELECT 
14 |   r.region_id,
15 |   r.region_name,
16 |   COUNT(n.node_id) AS nodes
17 | FROM customer_nodes n
18 | JOIN regions r
19 |   ON n.region_id = r.region_id
20 | GROUP BY r.region_id, r.region_name
21 | ORDER BY r.region_id;
22 | 
23 | 
24 | --3. How many customers are allocated to each region?
25 | 
26 | SELECT 
27 |   r.region_id,
28 |   r.region_name,
29 |   COUNT(DISTINCT n.customer_id) AS customers
30 | FROM customer_nodes n
31 | JOIN regions r
32 |   ON n.region_id = r.region_id
33 | GROUP BY r.region_id, r.region_name
34 | ORDER BY r.region_id;
35 | 
36 | 
37 | --4. How many days on average are customers reallocated to a different node?
38 | 
39 | WITH customerDates AS (
40 |   SELECT 
41 |     customer_id,
42 |     region_id,
43 |     node_id,
44 |     MIN(start_date) AS first_date
45 |   FROM customer_nodes
46 |   GROUP BY customer_id, region_id, node_id
47 | ),
48 | reallocation AS (
49 |   SELECT
50 |     customer_id,
51 |     node_id,
52 |     region_id,
53 |     first_date,
54 |     DATEDIFF(DAY, first_date, 
55 |              LEAD(first_date) OVER(PARTITION BY customer_id 
56 |                                    ORDER BY first_date)) AS moving_days
57 |   FROM customerDates
58 | )
59 | 
60 | SELECT 
61 |   AVG(CAST(moving_days AS FLOAT)) AS avg_moving_days
62 | FROM reallocation;
63 | 
64 | 
65 | --5. What is the median, 80th and 95th percentile for this same reallocation days metric for each region?
66 | 
67 | WITH customerDates AS (
68 |   SELECT 
69 |     customer_id,
70 |     region_id,
71 |     node_id,
72 |     MIN(start_date) AS first_date
73 |   FROM customer_nodes
74 |   GROUP BY customer_id, region_id, node_id
75 | ),
76 | reallocation AS (
77 |   SELECT
78 |     customer_id,
79 |     region_id,
80 |     node_id,
81 |     first_date,
82 |     DATEDIFF(DAY, first_date, LEAD(first_date) OVER(PARTITION BY customer_id ORDER BY first_date)) AS moving_days
83 |   FROM customerDates
84 | )
85 | 
86 | SELECT 
87 |   DISTINCT r.region_id,
88 |   rg.region_name,
89 |   PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS median,
90 |   PERCENTILE_CONT(0.8) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_80,
91 |   PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY r.moving_days) OVER(PARTITION BY r.region_id) AS percentile_95
92 | FROM reallocation r
93 | JOIN regions rg ON r.region_id = rg.region_id
94 | WHERE moving_days IS NOT NULL;
95 | 


--------------------------------------------------------------------------------
/Case Study #4 - Data Bank/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | ----------------------------
  2 | --B. Customer Transactions--
  3 | ----------------------------
  4 | 
  5 | --1. What is the unique count and total amount for each transaction type?
  6 | 
  7 | SELECT 
  8 |   txn_type,
  9 |   COUNT(*) AS unique_count,
 10 |   SUM(txn_amount) AS total_amount
 11 | FROM customer_transactions
 12 | GROUP BY txn_type;
 13 | 
 14 | 
 15 | --2. What is the average total historical deposit counts and amounts for all customers?
 16 | 
 17 | WITH customerDeposit AS (
 18 |   SELECT 
 19 |     customer_id,
 20 |     txn_type,
 21 |     COUNT(*) AS dep_count,
 22 |     SUM(txn_amount) AS dep_amount
 23 |   FROM customer_transactions
 24 |   WHERE txn_type = 'deposit'
 25 |   GROUP BY customer_id, txn_type
 26 | )
 27 | 
 28 | SELECT
 29 |   AVG(dep_count) AS avg_dep_count,
 30 |   AVG(dep_amount) AS avg_dep_amount
 31 | FROM customerDeposit;
 32 | 
 33 | 
 34 | --3. For each month - how many Data Bank customers make more than 1 deposit and either 1 purchase or 1 withdrawal in a single month?
 35 | 
 36 | WITH cte_transaction AS (
 37 |   SELECT 
 38 |     customer_id,
 39 |     MONTH(txn_date) AS months,
 40 |     SUM(CASE WHEN txn_type = 'deposit' THEN 1 ELSE 0 END) AS deposit_count,
 41 |     SUM(CASE WHEN txn_type = 'purchase' THEN 1 ELSE 0 END) AS purchase_count,
 42 |     SUM(CASE WHEN txn_type = 'withdrawal' THEN 1 ELSE 0 END) AS withdrawal_count
 43 |   FROM customer_transactions
 44 |   GROUP BY customer_id, MONTH(txn_date)
 45 | )
 46 | 
 47 | SELECT 
 48 |   months,
 49 |   COUNT(customer_id) AS customer_count
 50 | FROM cte_transaction
 51 | WHERE deposit_count > 1
 52 |   AND (purchase_count = 1 OR withdrawal_count = 1)
 53 | GROUP BY months;
 54 | 
 55 | 
 56 | --4. What is the closing balance for each customer at the end of the month?
 57 | 
 58 | --End date in the month of the max date of our dataset
 59 | DECLARE @maxDate DATE = (SELECT EOMONTH(MAX(txn_date)) FROM customer_transactions)
 60 | 
 61 | --CTE 1: Monthly transactions of each customer
 62 | WITH monthly_transactions AS (
 63 |   SELECT
 64 |     customer_id,
 65 |     EOMONTH(txn_date) AS end_date,
 66 |     SUM(CASE WHEN txn_type IN ('withdrawal', 'purchase') THEN -txn_amount
 67 |              ELSE txn_amount END) AS transactions
 68 |   FROM customer_transactions
 69 |   GROUP BY customer_id, EOMONTH(txn_date)
 70 | ),
 71 | 
 72 | --CTE 2: Increment last days of each month till they are equal to @maxDate 
 73 | recursive_dates AS (
 74 |   SELECT
 75 |     DISTINCT customer_id,
 76 |     CAST('2020-01-31' AS DATE) AS end_date
 77 |   FROM customer_transactions
 78 |   UNION ALL
 79 |   SELECT 
 80 |     customer_id,
 81 |     EOMONTH(DATEADD(MONTH, 1, end_date)) AS end_date
 82 |   FROM recursive_dates
 83 |   WHERE EOMONTH(DATEADD(MONTH, 1, end_date)) <= @maxDate
 84 | )
 85 | 
 86 | SELECT 
 87 |   r.customer_id,
 88 |   r.end_date,
 89 |   COALESCE(m.transactions, 0) AS transactions,
 90 |   SUM(m.transactions) OVER (PARTITION BY r.customer_id ORDER BY r.end_date 
 91 |       ROWS UNBOUNDED PRECEDING) AS closing_balance
 92 | FROM recursive_dates r
 93 | LEFT JOIN  monthly_transactions m
 94 |   ON r.customer_id = m.customer_id
 95 |   AND r.end_date = m.end_date;
 96 | 
 97 | 
 98 | --5. What is the percentage of customers who increase their closing balance by more than 5%?
 99 | 
100 | --End date in the month of the max date of our dataset (Q4)
101 | DECLARE @maxDate DATE = (SELECT EOMONTH(MAX(txn_date)) FROM customer_transactions)
102 | 
103 | --CTE 1: Monthly transactions of each customer (Q4)
104 | WITH monthly_transactions AS (
105 |   SELECT
106 |     customer_id,
107 |     EOMONTH(txn_date) AS end_date,
108 |     SUM(CASE WHEN txn_type IN ('withdrawal', 'purchase') THEN -txn_amount
109 |              ELSE txn_amount END) AS transactions
110 |   FROM customer_transactions
111 |   GROUP BY customer_id, EOMONTH(txn_date)
112 | ),
113 | 
114 | --CTE 2: Increment last days of each month till they are equal to @maxDate (Q4)
115 | recursive_dates AS (
116 |   SELECT
117 |     DISTINCT customer_id,
118 |     CAST('2020-01-31' AS DATE) AS end_date
119 |   FROM customer_transactions
120 |   UNION ALL
121 |   SELECT 
122 |     customer_id,
123 |     EOMONTH(DATEADD(MONTH, 1, end_date)) AS end_date
124 |   FROM recursive_dates
125 |   WHERE EOMONTH(DATEADD(MONTH, 1, end_date)) <= @maxDate
126 | ),
127 | 
128 | -- CTE 3: Closing balance of each customer by monthly (Q4)
129 | customers_balance AS (
130 |   SELECT 
131 |     r.customer_id,
132 |     r.end_date,
133 |     COALESCE(m.transactions, 0) AS transactions,
134 |     SUM(m.transactions) OVER (PARTITION BY r.customer_id ORDER BY r.end_date 
135 |         ROWS UNBOUNDED PRECEDING) AS closing_balance
136 |     FROM recursive_dates r
137 |     LEFT JOIN  monthly_transactions m
138 |       ON r.customer_id = m.customer_id
139 |       AND r.end_date = m.end_date
140 | ),
141 | 
142 | --CTE 4: CTE 3 & next_balance
143 | customers_next_balance AS (
144 |   SELECT *,
145 |     LEAD(closing_balance) OVER(PARTITION BY customer_id ORDER BY end_date) AS next_balance
146 |   FROM customers_balance
147 | ),
148 | 
149 | --CTE 5: Calculate the increase percentage of closing balance for each customer
150 | pct_increase AS (
151 |   SELECT *,
152 |     100.0*(next_balance-closing_balance)/closing_balance AS pct
153 |   FROM customers_next_balance
154 |   WHERE closing_balance ! = 0 AND next_balance IS NOT NULL
155 | )
156 | 
157 | --Create a temporary table because of the error: Null value is eliminated by an aggregate or other SET operation
158 | SELECT *
159 | INTO #temp
160 | FROM pct_increase;
161 | 
162 | --Calculate the percentage of customers whose closing balance increasing 5% compared to the previous month
163 | SELECT CAST(100.0*COUNT(DISTINCT customer_id) AS FLOAT)
164 |       / (SELECT COUNT(DISTINCT customer_id) FROM customer_transactions) AS pct_customers
165 | FROM #temp
166 | WHERE pct > 5;
167 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/README.md:
--------------------------------------------------------------------------------
  1 | # 🛒 Case Study #5 - Data Mart
  2 | <p align="center">
  3 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/5.png" align="center" width="400" height="400" >
  4 | 
  5 | ## 📕 Table of Contents
  6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#%EF%B8%8F-bussiness-task)
  7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-entity-relationship-diagram)
  8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-case-study-questions)
  9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart#-my-solution)
 10 | 
 11 | ---
 12 | ## 🛠️ Bussiness Task
 13 | Data Mart is an online supermarket that specialises in fresh produce.
 14 |   In June 2020 - large scale supply changes were made at Data Mart. 
 15 |   All Data Mart products now use sustainable packaging methods in every single step from the farm all the way to the customer.
 16 |   
 17 | Danny needs your help to analyse and quantify the impact of this change on the sales performance for Data Mart and it’s separate business areas.
 18 | The key business question to answer are the following:
 19 | * What was the quantifiable impact of the changes introduced in June 2020?
 20 | * Which platform, region, segment and customer types were the most impacted by this change?
 21 | * What can we do about future introduction of similar sustainability updates to the business to minimise impact on sales?
 22 | 
 23 | ---
 24 | ## 🔐 Entity Relationship Diagram
 25 | <p align="center">
 26 | <img src="https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/main/IMG/e5.png" align="center">
 27 | 
 28 | ---
 29 | ## ❓ Case Study Questions
 30 | ### A. Data Cleansing Steps
 31 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/A.%20Data%20Cleansing%20Steps.md).
 32 |   
 33 | In a single query, perform the following operations and generate a new table in the ```data_mart``` schema named ```clean_weekly_sales```:
 34 |   * Convert the ```week_date``` to a ```DATE``` format
 35 |   * Add a ```week_number``` as the second column for each ```week_date``` value, for example any value from the 1st of January to 7th of January will be 1, 8th to 14th will be 2 etc
 36 |   * Add a ```month_number``` with the calendar month for each ```week_date``` value as the 3rd column
 37 |   * Add a ```calendar_year``` column as the 4th column containing either 2018, 2019 or 2020 values
 38 |   * Add a new column called ```age_band``` after the original ```segment``` column using the following mapping on the number inside the ```segment``` value
 39 | 
 40 | | egment | age_band     |
 41 | |--------|--------------|
 42 | | 1      | Young Adults |
 43 | | 2      | Middle Aged  |
 44 | | 3 or 4 | Retirees     |
 45 |   
 46 |   * Add a new ```demographic``` column using the following mapping for the first letter in the ```segment``` values
 47 |   
 48 | | segment | demographic |
 49 | |---------|-------------|
 50 | | C       | Couples     |
 51 | | F       | Families    |
 52 |   
 53 |   * Ensure all ```null``` string values with an ```"unknown"``` string value in the original ```segment``` column as well as the new ```age_band``` and ```demographic``` columns
 54 |   * Generate a new ```avg_transaction``` column as the sales value divided by ```transactions``` rounded to 2 decimal places for each record
 55 | 
 56 | ---
 57 | ### B. Data Exploration
 58 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/B.%20Data%20Exploration.md).
 59 |   
 60 | 1. What day of the week is used for each ```week_date``` value?
 61 | 2. What range of week numbers are missing from the dataset?
 62 | 3. How many total transactions were there for each year in the dataset?
 63 | 4. What is the total sales for each region for each month?
 64 | 5. What is the total count of transactions for each platform
 65 | 6. What is the percentage of sales for Retail vs Shopify for each month?
 66 | 7. What is the percentage of sales by demographic for each year in the dataset?
 67 | 8. Which ```age_band``` and ```demographic``` values contribute the most to Retail sales?
 68 | 9. Can we use the ```avg_transaction``` column to find the average transaction size for each year for Retail vs Shopify? If not - how would you calculate it instead?
 69 | 
 70 | ---
 71 | ### C. Before & After Analysis
 72 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/C.%20Before%20%26%20After%20Analysis.md).
 73 |   
 74 | This technique is usually used when we inspect an important event and want to inspect the impact before and after a certain point in time.
 75 | Taking the week_date value of 2020-06-15 as the baseline week where the Data Mart sustainable packaging changes came into effect.
 76 | We would include all week_date values for 2020-06-15 as the start of the period after the change and the previous week_date values would be before.
 77 | 
 78 | Using this analysis approach - answer the following questions:
 79 | 
 80 | 1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
 81 | 2. What about the entire 12 weeks before and after?
 82 | 3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
 83 | 
 84 | ---
 85 | ### D. Bonus Question
 86 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/D.%20Bonus%20Question.md).
 87 |   
 88 | Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
 89 |   * ```region```
 90 |   * ```platform```
 91 |   * ```age_band```
 92 |   * ```demographic```
 93 |   * ```customer_type```
 94 | Do you have any further recommendations for Danny’s team at Data Mart or any interesting insights based off this analysis?
 95 | 
 96 | ---
 97 | ## 🚀 My Solution
 98 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart/Syntax).
 99 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/d7bbc5cd9ab25fe7429644a36bb0f62a8b292792/Case%20Study%20%235%20-%20Data%20Mart/Solution).
100 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/A. Data Cleansing Steps.md:
--------------------------------------------------------------------------------
 1 | # 🛒 Case Study #5 - Data Mart
 2 | ## A. Data Cleansing Steps
 3 | In a single query, perform the following operations and generate a new table in the ```data_mart``` schema named ```clean_weekly_sales```:
 4 |   * Convert the ```week_date``` to a ```DATE``` format
 5 |   * Add a ```week_number``` as the second column for each ```week_date``` value, for example any value from the 1st of January to 7th of January will be 1, 8th to 14th will be 2 etc
 6 |   * Add a ```month_number``` with the calendar month for each ```week_date``` value as the 3rd column
 7 |   * Add a ```calendar_year``` column as the 4th column containing either 2018, 2019 or 2020 values
 8 |   * Add a new column called ```age_band``` after the original ```segment``` column using the following mapping on the number inside the ```segment``` value
 9 | 
10 | | egment | age_band     |
11 | |--------|--------------|
12 | | 1      | Young Adults |
13 | | 2      | Middle Aged  |
14 | | 3 or 4 | Retirees     |
15 |   
16 |   * Add a new ```demographic``` column using the following mapping for the first letter in the ```segment``` values
17 |   
18 | | segment | demographic |
19 | |---------|-------------|
20 | | C       | Couples     |
21 | | F       | Families    |
22 |   
23 |   * Ensure all ```null``` string values with an ```"unknown"``` string value in the original ```segment``` column as well as the new ```age_band``` and ```demographic``` columns
24 |   * Generate a new ```avg_transaction``` column as the sales value divided by ```transactions``` rounded to 2 decimal places for each record
25 | 
26 | ---
27 | ## Solution
28 | 
29 | | Columns          | Actions to take                                                                                                                                          |
30 | |------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|
31 | | week_date        | Convert to ```DATE``` using ```CONVERT```                                                                                                                |
32 | | week_number*     | Extract number of week using ```DATEPART```                                                                                                              |
33 | | month_number*    | Extract month using ```DATEPART```                                                                                                                       |
34 | | calendar_year*   | Extract year using ```DATEPART```                                                                                                                        |
35 | | region           | No changes                                                                                                                                               |
36 | | platform         | No changes                                                                                                                                               |
37 | | segment          | No changes                                                                                                                                               |
38 | | customer_type    | No changes                                                                                                                                               |
39 | | age_band*        | Use ```CASE WHEN``` to categorize ```segment```: '1' = ```Young Adults```, '2' = ```Middle Aged```, '3' or '4' = ```Retirees``` and null = ```unknown``` |
40 | | demographic*     | Use ```CASE WHEN``` to categorize ```segment```: 'C' = ```Couples```, 'F' = ```Families``` and null = ```unknown```                                      |
41 | | transactions     | No changes                                                                                                                                               |
42 | | sales            | ```CAST``` to ```bigint``` for further aggregations                                                                                                         |
43 | | avg_transaction* | Divide ```sales``` by ```transactions``` and round up to 2 decimal places                     
44 | 
45 | ```TSQL
46 | SELECT
47 |   CONVERT(date, week_date, 3) AS week_date,
48 |   DATEPART(week, CONVERT(date, week_date, 3)) AS week_number,
49 |   DATEPART(month, CONVERT(date, week_date, 3)) AS month_number,
50 |   DATEPART(year, CONVERT(date, week_date, 3)) AS calendar_year,
51 |   region,
52 |   platform,
53 |   segment,
54 |   customer_type,
55 |   CASE 
56 |     WHEN RIGHT(segment, 1) = '1' THEN 'Young Adults'
57 |     WHEN RIGHT(segment, 1) = '2' THEN 'Middle Aged'
58 |     WHEN RIGHT(segment, 1) IN ('3', '4') THEN 'Retirees'
59 |     ELSE 'unknown' END AS age_band,
60 |   CASE 
61 |     WHEN LEFT(segment, 1) = 'C' THEN 'Couples'
62 |     WHEN LEFT(segment, 1) = 'F' THEN 'Families'
63 |     ELSE 'unknown' END AS demographic,
64 |   transactions,
65 |   CAST(sales AS bigint) AS sales,
66 |   ROUND(CAST(sales AS FLOAT)/transactions, 2) AS avg_transaction
67 | INTO clean_weekly_sales
68 | FROM weekly_sales;
69 | ```
70 | The first 10 rows:
71 | | week_date  | week_number | month_number | calendar_year | region | platform | segment | customer_type | age_band     | demographic | transactions | sales    | avg_transaction  |
72 | |------------|-------------|--------------|---------------|--------|----------|---------|---------------|--------------|-------------|--------------|----------|------------------|
73 | | 2020-08-31 | 36          | 8            | 2020          | ASIA   | Retail   | C3      | New           | Retirees     | Couples     | 120631       | 3656163  | 30.31            |
74 | | 2020-08-31 | 36          | 8            | 2020          | ASIA   | Retail   | F1      | New           | Young Adults | Families    | 31574        | 996575   | 31.56            |
75 | | 2020-08-31 | 36          | 8            | 2020          | USA    | Retail   | null    | Guest         | unknown      | unknown     | 529151       | 16509610 | 31.2             |
76 | | 2020-08-31 | 36          | 8            | 2020          | EUROPE | Retail   | C1      | New           | Young Adults | Couples     | 4517         | 141942   | 31.42            |
77 | | 2020-08-31 | 36          | 8            | 2020          | AFRICA | Retail   | C2      | New           | Middle Aged  | Couples     | 58046        | 1758388  | 30.29            |
78 | | 2020-08-31 | 36          | 8            | 2020          | CANADA | Shopify  | F2      | Existing      | Middle Aged  | Families    | 1336         | 243878   | 182.54           |
79 | | 2020-08-31 | 36          | 8            | 2020          | AFRICA | Shopify  | F3      | Existing      | Retirees     | Families    | 2514         | 519502   | 206.64           |
80 | | 2020-08-31 | 36          | 8            | 2020          | ASIA   | Shopify  | F1      | Existing      | Young Adults | Families    | 2158         | 371417   | 172.11           |
81 | | 2020-08-31 | 36          | 8            | 2020          | AFRICA | Shopify  | F2      | New           | Middle Aged  | Families    | 318          | 49557    | 155.84           |
82 | | 2020-08-31 | 36          | 8            | 2020          | AFRICA | Retail   | C3      | New           | Retirees     | Couples     | 111032       | 3888162  | 35.02            |
83 | 
84 | ---
85 | My solution for **[B. Data Exploration](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/B.%20Data%20Exploration.md)**.
86 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/C. Before & After Analysis.md:
--------------------------------------------------------------------------------
  1 | # 🛒 Case Study #5 - Data Mart
  2 | ## C. Before & After Analysis
  3 | 
  4 | This technique is usually used when we inspect an important event and want to inspect the impact before and after a certain point in time. 
  5 | Taking the week_date value of 2020-06-15 as the baseline week where the Data Mart sustainable packaging changes came into effect. 
  6 | We would include all week_date values for 2020-06-15 as the start of the period after the change and the previous week_date values would be before.
  7 | 
  8 | Using this analysis approach - answer the following questions:
  9 | 
 10 | ### 1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
 11 | ```TSQL
 12 | --Find the week_number of '2020-06-15' (@weekNum=25)
 13 | DECLARE @weekNum int = (
 14 |   SELECT DISTINCT week_number
 15 |   FROM clean_weekly_sales
 16 |   WHERE week_date = '2020-06-15')
 17 | 
 18 | --Find the total sales of 4 weeks before and after @weekNum
 19 | WITH salesChanges AS (
 20 |   SELECT
 21 |     SUM(CASE WHEN week_number BETWEEN @weekNum-4 AND @weekNum-1 THEN sales END) AS before_changes,
 22 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_changes
 23 |   FROM clean_weekly_sales
 24 |   WHERE calendar_year = 2020
 25 | )
 26 | 
 27 | SELECT *,
 28 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
 29 | FROM salesChanges;
 30 | ```
 31 | | before_changes | after_changes | pct_change  |
 32 | |----------------|---------------|-------------|
 33 | | 2345878357     | 2318994169    | -1.15       |
 34 | 
 35 | ---
 36 | ### 2. What about the entire 12 weeks before and after?
 37 | ```TSQL
 38 | --Find the week_number of '2020-06-15' (@weekNum=25)
 39 | DECLARE @weekNum int = (
 40 |   SELECT DISTINCT week_number
 41 |   FROM clean_weekly_sales
 42 |   WHERE week_date = '2020-06-15')
 43 | 
 44 | --Find the total sales of 12 weeks before and after @weekNum
 45 | WITH salesChanges AS (
 46 |   SELECT
 47 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
 48 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
 49 |   FROM clean_weekly_sales
 50 |   WHERE calendar_year = 2020
 51 | )
 52 | 
 53 | SELECT *,
 54 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
 55 | FROM salesChanges;
 56 | ```
 57 | | before_changes | after_changes | pct_change  |
 58 | |----------------|---------------|-------------|
 59 | | 7126273147     | 6973947753    | -2.14       |
 60 | 
 61 | ---
 62 | ### 3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
 63 | Part 1: How do the sales metrics for 4 weeks before and after compared with the previous years in 2018 and 2019?
 64 | ```TSQL
 65 | --Find the week_number of '2020-06-15' (@weekNum=25)
 66 | DECLARE @weekNum int = (
 67 |   SELECT DISTINCT week_number
 68 |   FROM clean_weekly_sales
 69 |   WHERE week_date = '2020-06-15')
 70 | 
 71 | --Find the total sales of 4 weeks before and after @weekNum
 72 | WITH salesChanges AS (
 73 |   SELECT
 74 |     calendar_year,
 75 |     SUM(CASE WHEN week_number BETWEEN @weekNum-3 AND @weekNum-1 THEN sales END) AS before_sales,
 76 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_sales
 77 |   FROM clean_weekly_sales
 78 |   GROUP BY calendar_year
 79 | )
 80 | 
 81 | SELECT *,
 82 |   CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
 83 | FROM salesChanges
 84 | ORDER BY calendar_year;
 85 | ```
 86 | | calendar_year | before_sales | after_sales | pct_change  |
 87 | |---------------|--------------|-------------|-------------|
 88 | | 2018          | 1602763447   | 2129242914  | 32.85       |
 89 | | 2019          | 1688891616   | 2252326390  | 33.36       |
 90 | | 2020          | 1760870267   | 2318994169  | 31.70       |
 91 | 
 92 | Part 2: How do the sales metrics for 12 weeks before and after compared with the previous years in 2018 and 2019?
 93 | ```TSQL
 94 | --Find the week_number of '2020-06-15' (@weekNum=25)
 95 | DECLARE @weekNum int = (
 96 |   SELECT DISTINCT week_number
 97 |   FROM clean_weekly_sales
 98 |   WHERE week_date = '2020-06-15')
 99 | 
100 | --Find the total sales of 12 weeks before and after @weekNum
101 | WITH salesChanges AS (
102 |   SELECT
103 |     calendar_year,
104 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_sales,
105 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_sales
106 |   FROM clean_weekly_sales
107 |   GROUP BY calendar_year
108 | )
109 | 
110 | SELECT *,
111 |   CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
112 | FROM salesChanges
113 | ORDER BY calendar_year;
114 | ```
115 | | calendar_year | before_sales | after_sales | pct_change  |
116 | |---------------|--------------|-------------|-------------|
117 | | 2018          | 6396562317   | 6500818510  | 1.63        |
118 | | 2019          | 6883386397   | 6862646103  | -0.30       |
119 | | 2020          | 7126273147   | 6973947753  | -2.14       |
120 | 
121 | ---
122 | My solution for **[D. Bonus Question](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%235%20-%20Data%20Mart/Solution/D.%20Bonus%20Question.md)**.
123 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Solution/D. Bonus Question.md:
--------------------------------------------------------------------------------
  1 | # 🛒 Case Study #5 - Data Mart
  2 | ## D. Bonus Question
  3 | Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
  4 |   * ```region```
  5 |   * ```platform```
  6 |   * ```age_band```
  7 |   * ```demographic```
  8 |   * ```customer_type```
  9 |   
 10 | Do you have any further recommendations for Danny’s team at Data Mart or any interesting insights based off this analysis?
 11 | 
 12 | ---
 13 | ## Solution
 14 | First, using the technique in part C to find the ```week_number``` of ```2020-06-15```.
 15 | ```TSQL
 16 | --Find the week_number of '2020-06-15' (@weekNum=25)
 17 | DECLARE @weekNum int = (
 18 |   SELECT DISTINCT week_number
 19 |   FROM clean_weekly_sales
 20 |   WHERE week_date = '2020-06-15'
 21 |   AND calendar_year =2020)
 22 | ```
 23 | Then, depending on the area we want to analyze, change the column name in the ```SELECT``` and  ```GROUP BY```. 
 24 | 
 25 | Remember to include the ```DECLARE @weekNum``` in the beginning of each part below.
 26 | 
 27 | ---
 28 | ### 1. Sales changes by ```regions```
 29 | ```TSQL
 30 | WITH regionChanges AS (
 31 |   SELECT
 32 |     region,
 33 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
 34 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
 35 |   FROM clean_weekly_sales
 36 |   GROUP BY region
 37 | )
 38 | SELECT *,
 39 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
 40 | FROM regionChanges;
 41 | ```
 42 | | region        | before_changes | after_changes | pct_change  |
 43 | |---------------|--------------|-------------|-------------|
 44 | | OCEANIA       | 6698586333   | 6640244793  | -0.87       |
 45 | | EUROPE        | 328141414    | 344420043   | 4.96        |
 46 | | SOUTH AMERICA | 611056923    | 608981392   | -0.34       |
 47 | | AFRICA        | 4942976910   | 4997516159  | 1.10        |
 48 | | CANADA        | 1244662705   | 1234025206  | -0.85       |
 49 | | ASIA          | 4613242689   | 4551927271  | -1.33       |
 50 | | USA           | 1967554887   | 1960297502  | -0.37       |
 51 | 
 52 | **Insights and recommendations:** 
 53 | * Overall, the sales of most countries decreased after changing packages. 
 54 | * The highest negative impact was in ```ASIA``` with -1.33%. 
 55 | Danny should reduce the number of products with sustainable packages here.
 56 | * Only ```EUROPE``` saw a significant increase of 4.96% followed by ```AFRICA``` with 1.1%. These are areas that Danny should invest more.
 57 | 
 58 | ---
 59 | ### 2. Sales changes by ```platform```
 60 | ```TSQL
 61 | WITH platformChanges AS (
 62 |   SELECT
 63 |     platform,
 64 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
 65 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
 66 |   FROM clean_weekly_sales
 67 |   GROUP BY platform
 68 | )
 69 | SELECT *,
 70 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
 71 | FROM platformChanges;
 72 | ```
 73 | | platform | before_changes | after_changes | pct_change  |
 74 | |----------|--------------|-------------|-------------|
 75 | | Retail   | 19886040272  | 19768576165 | -0.59       |
 76 | | Shopify  | 520181589    | 568836201   | 9.35        |
 77 | 
 78 | **Insights and recommendations:** 
 79 | * ```Shopify``` stores saw an increase in sales of 9.35% while the```Retail``` stores slightly decreased by 0.59%. 
 80 | * Danny should put more products with sustanable packages in ```Shopify``` stores.
 81 | 
 82 | ---
 83 | ### 3. Sales changes by ```age_band```
 84 | ```TSQL
 85 | WITH ageBandChanges AS (
 86 |   SELECT
 87 |     age_band,
 88 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
 89 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
 90 |   FROM clean_weekly_sales
 91 |   GROUP BY age_band
 92 | )
 93 | SELECT *,
 94 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
 95 | FROM ageBandChanges;
 96 | ```
 97 | | age_band     | before_changes | after_changes | pct_change  |
 98 | |--------------|--------------|-------------|-------------|
 99 | | unknown      | 8191628826   | 8146983408  | -0.55       |
100 | | Young Adults | 2290835366   | 2285973456  | -0.21       |
101 | | Middle Aged  | 3276892347   | 3269748622  | -0.22       |
102 | | Retirees     | 6646865322   | 6634706880  | -0.18       |
103 | 
104 | **Insights and recommendations:** 
105 | * Overall, the sales slightly decreased in all bands.
106 | * ```Middle Aged``` and ```Young Adults``` had more negative impact on sales than the ```Retirees```. Those bands should not be targeted in new packages.
107 | 
108 | ---
109 | ### 4. Sales changes by ```demographic```
110 | ```TSQL
111 | WITH demographicChanges AS (
112 |   SELECT
113 |     demographic,
114 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
115 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
116 |   FROM clean_weekly_sales
117 |   GROUP BY demographic
118 | )
119 | SELECT *,
120 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
121 | FROM demographicChanges;
122 | ```
123 | | demographic | before_changes | after_changes | pct_change  |
124 | |-------------|----------------|---------------|-------------|
125 | | unknown     | 8191628826     | 8146983408    | -0.55       |
126 | | Families    | 6605726904     | 6598087538    | -0.12       |
127 | | Couples     | 5608866131     | 5592341420    | -0.29       |
128 | 
129 | **Insights and recommendations:** 
130 | * Overall, the sales slightly decreased in all demographic groups.
131 | * ```Couples``` had more negative impact on sales than ```Families```. Those groups should not be targeted in new packages.
132 | 
133 | ---
134 | ### 5. Sales changes by ```customer_type```
135 | ```TSQL
136 | WITH customerTypeChanges AS (
137 |   SELECT
138 |     customer_type,
139 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
140 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
141 |   FROM clean_weekly_sales
142 |   GROUP BY customer_type
143 | )
144 | SELECT *,
145 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
146 | FROM customerTypeChanges;
147 | ```
148 | | customer_type | before_changes | after_changes | pct_change  |
149 | |---------------|----------------|---------------|-------------|
150 | | Guest         | 7630353739     | 7595150744    | -0.46       |
151 | | Existing      | 10168877642    | 10117367239   | -0.51       |
152 | | New           | 2606990480     | 2624894383    | 0.69        |
153 | 
154 | **Insights and recommendations:** 
155 | * The sales for `Guests` and `Existing` customers decreased, but increased for `New` customers.
156 | * Further analysis should be taken to understand why `New` customers were interested in sustainable packages.
157 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/A_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------------
 2 | --A. Data Cleansing Steps--
 3 | ---------------------------
 4 | 
 5 | SELECT
 6 |   CONVERT(date, week_date, 3) AS week_date,
 7 |   DATEPART(week, CONVERT(date, week_date, 3)) AS week_number,
 8 |   DATEPART(month, CONVERT(date, week_date, 3)) AS month_number,
 9 |   DATEPART(year, CONVERT(date, week_date, 3)) AS calendar_year,
10 |   region,
11 |   platform,
12 |   segment,
13 |   customer_type,
14 |   CASE 
15 |     WHEN RIGHT(segment, 1) = '1' THEN 'Young Adults'
16 |     WHEN RIGHT(segment, 1) = '2' THEN 'Middle Aged'
17 |     WHEN RIGHT(segment, 1) IN ('3', '4') THEN 'Retirees'
18 |     ELSE 'unknown' END AS age_band,
19 |   CASE 
20 |     WHEN LEFT(segment, 1) = 'C' THEN 'Couples'
21 |     WHEN LEFT(segment, 1) = 'F' THEN 'Families'
22 |     ELSE 'unknown' END AS demographic,
23 |   transactions,
24 |   CAST(sales AS bigint) AS sales,
25 |   ROUND(CAST(sales AS FLOAT)/transactions, 2) AS avg_transaction
26 | INTO clean_weekly_sales
27 | FROM weekly_sales;
28 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | -----------------------
  2 | --B. Data Exploration--
  3 | -----------------------
  4 | 
  5 | --1. What day of the week is used for each week_date value?
  6 | 
  7 | SELECT DISTINCT(DATENAME(dw, week_date)) AS week_date_value
  8 | FROM clean_weekly_sales;
  9 | 
 10 | 
 11 | --2. What range of week numbers are missing from the dataset?
 12 | 
 13 | WITH allWeeks AS (
 14 |   SELECT 1 AS pos
 15 |   UNION ALL
 16 |   SELECT pos+1 FROM allWeeks
 17 |   WHERE pos+1 <= 52)
 18 | 
 19 | SELECT 
 20 |   DISTINCT a.pos, 
 21 |   c.week_number
 22 | FROM allWeeks a
 23 | LEFT JOIN clean_weekly_sales c
 24 |   ON a.pos = c.week_number
 25 | WHERE c.week_number IS NULL
 26 | ORDER BY a.pos;
 27 | 
 28 | 
 29 | --3. How many total transactions were there for each year in the dataset?
 30 | 
 31 | SELECT 
 32 |   calendar_year,
 33 |   SUM(transactions) AS total_transactions
 34 | FROM clean_weekly_sales
 35 | GROUP BY calendar_year
 36 | ORDER BY calendar_year;
 37 | 
 38 | 
 39 | --4. What is the total sales for each region for each month?
 40 | 
 41 | SELECT 
 42 |   region, 
 43 |   month_number, 
 44 |   SUM(sales) AS total_sales
 45 | FROM clean_weekly_sales
 46 | GROUP BY region, month_number
 47 | ORDER BY region, month_number;
 48 | 
 49 | 
 50 | --5. What is the total count of transactions for each platform
 51 | 
 52 | SELECT 
 53 |   platform,
 54 |   SUM(transactions) AS total_transactions
 55 | FROM clean_weekly_sales
 56 | GROUP BY platform;
 57 | 
 58 | 
 59 | --6. What is the percentage of sales for Retail vs Shopify for each month?
 60 | 
 61 | WITH sales_cte AS (
 62 |   SELECT 
 63 |     calendar_year, 
 64 |     month_number, 
 65 |     platform, 
 66 |     SUM(sales) AS monthly_sales
 67 |   FROM clean_weekly_sales
 68 |   GROUP BY calendar_year, month_number, platform
 69 | )
 70 | 
 71 | SELECT 
 72 |   calendar_year, 
 73 |   month_number, 
 74 |   CAST(100.0 * MAX(CASE WHEN platform = 'Retail' THEN monthly_sales END)
 75 | 	/ SUM(monthly_sales) AS decimal(5, 2)) AS pct_retail,
 76 |   CAST(100.0 * MAX(CASE WHEN platform = 'Shopify' THEN monthly_sales END)
 77 | 	/ SUM(monthly_sales) AS decimal(5, 2)) AS pct_shopify
 78 | FROM sales_cte
 79 | GROUP BY calendar_year,  month_number
 80 | ORDER BY calendar_year, month_number;
 81 | 
 82 | 
 83 | --7. What is the percentage of sales by demographic for each year in the dataset?
 84 | 
 85 | WITH sales_by_demographic AS (
 86 |   SELECT 
 87 |     calendar_year,
 88 |     demographic,
 89 |     SUM(sales) AS sales
 90 |   FROM clean_weekly_sales
 91 |   GROUP BY calendar_year, demographic)
 92 | 
 93 | SELECT 
 94 |   calendar_year,
 95 |   CAST(100.0 * MAX(CASE WHEN demographic = 'Families' THEN sales END)
 96 | 	/ SUM(sales) AS decimal(5, 2)) AS pct_families,
 97 |   CAST(100.0 * MAX(CASE WHEN demographic = 'Couples' THEN sales END) 
 98 | 	/ SUM(sales) AS decimal(5, 2)) AS pct_couples,
 99 |   CAST(100.0 * MAX(CASE WHEN demographic = 'unknown' THEN sales END)
100 | 	/ SUM(sales) AS decimal(5, 2)) AS pct_unknown
101 | FROM sales_by_demographic
102 | GROUP BY calendar_year;
103 | 
104 | 
105 | --8. Which age_band and demographic values contribute the most to Retail sales?
106 | 
107 | DECLARE @retailSales bigint = (
108 |   SELECT SUM(sales)
109 |   FROM clean_weekly_sales
110 |   WHERE platform = 'Retail')
111 | 				
112 | SELECT 
113 |   age_band,
114 |   demographic,
115 |   SUM(sales) AS sales,
116 |   CAST(100.0 * SUM(sales)/@retailSales AS decimal(5, 2)) AS contribution
117 | FROM clean_weekly_sales
118 | WHERE platform = 'Retail'
119 | GROUP BY age_band, demographic
120 | ORDER BY contribution DESC;
121 | 
122 | 
123 | --9.Can we use the avg_transaction column to find the average transaction size for each year for Retail vs Shopify? If not - how would you calculate it instead?
124 | 
125 | SELECT 
126 |   calendar_year,
127 |   platform,
128 |   ROUND(AVG(avg_transaction), 0) AS avg_transaction_row,
129 |   SUM(sales) / SUM(transactions) AS avg_transaction_group
130 | FROM clean_weekly_sales
131 | GROUP BY calendar_year, platform
132 | ORDER BY calendar_year, platform;
133 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/C_query.sql:
--------------------------------------------------------------------------------
 1 | ------------------------------
 2 | --C. Before & After Analysis--
 3 | ------------------------------
 4 | 
 5 | --1. What is the total sales for the 4 weeks before and after 2020-06-15? What is the growth or reduction rate in actual values and percentage of sales?
 6 | 
 7 | --Find the week_number of '2020-06-15' (@weekNum=25)
 8 | DECLARE @weekNum int = (
 9 |   SELECT DISTINCT week_number
10 |   FROM clean_weekly_sales
11 |   WHERE week_date = '2020-06-15')
12 | 
13 | --Find the total sales of 4 weeks before and after @weekNum
14 | WITH salesChanges AS (
15 |   SELECT
16 |     SUM(CASE WHEN week_number BETWEEN @weekNum-4 AND @weekNum-1 THEN sales END) AS before_changes,
17 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_changes
18 |   FROM clean_weekly_sales
19 |   WHERE calendar_year = 2020
20 | )
21 | 
22 | SELECT *,
23 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
24 | FROM salesChanges;
25 | 
26 | 
27 | --2. What about the entire 12 weeks before and after?
28 | 
29 | --Find the week_number of '2020-06-15' (@weekNum=25)
30 | DECLARE @weekNum int = (
31 |   SELECT DISTINCT week_number
32 |   FROM clean_weekly_sales
33 |   WHERE week_date = '2020-06-15')
34 | 
35 | --Find the total sales of 12 weeks before and after @weekNum
36 | WITH salesChanges AS (
37 |   SELECT
38 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
39 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
40 |   FROM clean_weekly_sales
41 |   WHERE calendar_year = 2020
42 | )
43 | 
44 | SELECT *,
45 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
46 | FROM salesChanges;
47 | 
48 | 
49 | --3. How do the sale metrics for these 2 periods before and after compare with the previous years in 2018 and 2019?
50 | 
51 | --Part 1: How do the sales metrics for 4 weeks before and after compared with the previous years in 2018 and 2019
52 | --Find the week_number of '2020-06-15' (@weekNum=25)
53 | DECLARE @weekNum int = (
54 |   SELECT DISTINCT week_number
55 |   FROM clean_weekly_sales
56 |   WHERE week_date = '2020-06-15')
57 | 
58 | --Find the total sales of 4 weeks before and after @weekNum
59 | WITH salesChanges AS (
60 |   SELECT
61 |     calendar_year,
62 |     SUM(CASE WHEN week_number BETWEEN @weekNum-3 AND @weekNum-1 THEN sales END) AS before_sales,
63 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+3 THEN sales END) AS after_sales
64 |   FROM clean_weekly_sales
65 |   GROUP BY calendar_year
66 | )
67 | 
68 | SELECT *,
69 |   CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
70 | FROM salesChanges
71 | ORDER BY calendar_year;
72 | 
73 | 
74 | --Part 2: How do the sales metrics for 12 weeks before and after compared with the previous years in 2018 and 2019
75 | --Find the week_number of '2020-06-15' (@weekNum=25)
76 | DECLARE @weekNum int = (
77 |   SELECT DISTINCT week_number
78 |   FROM clean_weekly_sales
79 |   WHERE week_date = '2020-06-15')
80 | 
81 | --Find the total sales of 12 weeks before and after @weekNum
82 | WITH salesChanges AS (
83 |   SELECT
84 |     calendar_year,
85 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_sales,
86 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_sales
87 |   FROM clean_weekly_sales
88 |   GROUP BY calendar_year
89 | )
90 | 
91 | SELECT *,
92 |   CAST(100.0 * (after_sales-before_sales)/before_sales AS decimal(5,2)) AS pct_change
93 | FROM salesChanges
94 | ORDER BY calendar_year;
95 | 


--------------------------------------------------------------------------------
/Case Study #5 - Data Mart/Syntax/D_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------
 2 | --D. Bonus Question--
 3 | ---------------------
 4 | 
 5 | -- Which areas of the business have the highest negative impact in sales metrics performance in 2020 for the 12 week before and after period?
 6 | 
 7 | --Find the week_number of '2020-06-15' (@weekNum=25)
 8 | DECLARE @weekNum int = (
 9 |   SELECT DISTINCT week_number
10 |   FROM clean_weekly_sales
11 |   WHERE week_date = '2020-06-15'
12 |   AND calendar_year =2020)
13 | 
14 | 
15 | --1. Sales changes by [regions]
16 | WITH regionChanges AS (
17 |   SELECT
18 |     region,
19 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
20 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
21 |   FROM clean_weekly_sales
22 |   GROUP BY region
23 | )
24 | SELECT *,
25 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
26 | FROM regionChanges;
27 | 
28 | 
29 | --2. Sales changes by [platform]
30 | WITH platformChanges AS (
31 |   SELECT
32 |     platform,
33 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
34 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
35 |   FROM clean_weekly_sales
36 |   GROUP BY platform
37 | )
38 | SELECT *,
39 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
40 | FROM platformChanges;
41 | 
42 | 
43 | --3. Sales changes by [age_band]
44 | WITH ageBandChanges AS (
45 |   SELECT
46 |     age_band,
47 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
48 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
49 |   FROM clean_weekly_sales
50 |   GROUP BY age_band
51 | )
52 | SELECT *,
53 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
54 | FROM ageBandChanges;
55 | 
56 | 
57 | --4. Sales changes by [demographic]
58 | WITH demographicChanges AS (
59 |   SELECT
60 |     demographic,
61 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
62 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
63 |   FROM clean_weekly_sales
64 |   GROUP BY demographic
65 | )
66 | SELECT *,
67 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
68 | FROM demographicChanges;
69 | 
70 | 
71 | --5. Sales changes by [customer_type]
72 | WITH customerTypeChanges AS (
73 |   SELECT
74 |     customer_type,
75 |     SUM(CASE WHEN week_number BETWEEN @weekNum-12 AND @weekNum-1 THEN sales END) AS before_changes,
76 |     SUM(CASE WHEN week_number BETWEEN @weekNum AND @weekNum+11 THEN sales END) AS after_changes
77 |   FROM clean_weekly_sales
78 |   GROUP BY customer_type
79 | )
80 | SELECT *,
81 |   CAST(100.0 * (after_changes-before_changes)/before_changes AS decimal(5,2)) AS pct_change
82 | FROM customerTypeChanges;
83 | 


--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/README.md:
--------------------------------------------------------------------------------
 1 | # 🐟 Case Study #6 - Clique Bait
 2 | <p align="center">
 3 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/6.png" align="center" width="400" height="400" >
 4 | 
 5 | ## 📕 Table of Contents
 6 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#%EF%B8%8F-bussiness-task)
 7 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-entity-relationship-diagram)
 8 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-case-study-questions)
 9 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait#-my-solution)
10 | 
11 | ---
12 | ## 🛠️ Bussiness Task
13 | Clique Bait is not like your regular online seafood store - the founder and CEO Danny, was also a part of a digital data analytics team and wanted 
14 |   to expand his knowledge into the seafood industry!
15 | 
16 | In this case study - you are required to support Danny’s vision and analyse his dataset and come up with 
17 |   creative solutions to calculate funnel fallout rates for the Clique Bait online store.
18 | 
19 | ---
20 | ## 🔐 Entity Relationship Diagram
21 | <p align="center">
22 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e6.PNG" align="center ="800" height="400"">
23 | 
24 | ---
25 | ## ❓ Case Study Questions
26 | ### A. Digital Analysis
27 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/A.%20Digital%20Analysis.md).
28 | 
29 | Using the available datasets - answer the following questions using a single query for each one:
30 | 1. How many users are there?
31 | 2. How many cookies does each user have on average?
32 | 3. What is the unique number of visits by all users per month?
33 | 4. What is the number of events for each event type?
34 | 5. What is the percentage of visits which have a purchase event?
35 | 6. What is the percentage of visits which view the checkout page but do not have a purchase event?
36 | 7. What are the top 3 pages by number of views?
37 | 8. What is the number of views and cart adds for each product category?
38 | 9. What are the top 3 products by purchases?
39 | 
40 | ---
41 | ### B. Product Funnel Analysis
42 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/B.%20Product%20Funnel%20Analysis.md).
43 | 
44 | Using a single SQL query - create a new output table which has the following details:
45 |   * How many times was each product viewed?
46 |   * How many times was each product added to cart?
47 |   * How many times was each product added to a cart but not purchased (abandoned)?
48 |   * How many times was each product purchased?
49 |   
50 | Additionally, create another table which further aggregates the data for the above points but this time for each product category instead of individual products.
51 | 
52 | Use your 2 new output tables - answer the following questions:
53 | 1. Which product had the most views, cart adds and purchases?
54 | 2. Which product was most likely to be abandoned?
55 | 3. Which product had the highest view to purchase percentage?
56 | 4. What is the average conversion rate from view to cart add?
57 | 5. What is the average conversion rate from cart add to purchase?
58 | 
59 | ---
60 | ### C. Campaigns Analysis
61 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/C.%20Campaigns%20Analysis.md).
62 | 
63 | Generate a table that has 1 single row for every unique visit_id record and has the following columns:
64 |   * `user_id`
65 |   * `visit_id`
66 |   * `visit_start_time`: the earliest event_time for each visit
67 |   * `page_views`: count of page views for each visit
68 |   * `art_adds`: count of product cart add events for each visit
69 |   * `purchase`: 1/0 flag if a purchase event exists for each visit
70 |   * `campaign_name`: map the visit to a campaign if the visit_start_time falls between the start_date and end_date
71 |   * `impression`: count of ad impressions for each visit
72 |   * `click`: count of ad clicks for each visit
73 |   * (Optional column) `cart_products`: a comma separated text value with 
74 |   products added to the cart sorted by the order they were added to the cart (hint: use the `sequence_number`)
75 | 
76 | Some ideas to investigate further include:
77 | - Identifying users who have received impressions during each campaign period 
78 | and comparing each metric with other users who did not have an impression event
79 | - Does clicking on an impression lead to higher purchase rates?
80 | - What is the uplift in purchase rate when comparing users who click on a campaign impression versus 
81 | users who do not receive an impression? What if we compare them with users who have just an impression but do not click?
82 | - What metrics can you use to quantify the success or failure of each campaign compared to each other?
83 |   
84 | ---
85 | ## 🚀 My Solution
86 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait/Syntax).
87 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution).
88 | 


--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Solution/A. Digital Analysis.md:
--------------------------------------------------------------------------------
  1 | # 🐟 Case Study #6 - Clique Bait
  2 | ## A. Digital Analysis
  3 | ### 1. How many users are there?
  4 | ```TSQL
  5 | SELECT COUNT(DISTINCT user_id) AS users_count
  6 | FROM users;
  7 | ```
  8 | | users_count  |
  9 | |--------------|
 10 | | 500          |
 11 | 
 12 | ---
 13 | ### 2. How many cookies does each user have on average?
 14 | ```TSQL
 15 | SELECT CAST(AVG(cookies_count) AS FLOAT) AS avg_cookies_per_user
 16 | FROM (
 17 |   SELECT 
 18 |     user_id,
 19 |     1.0*COUNT(cookie_id) AS cookies_count
 20 |   FROM users
 21 |   GROUP BY user_id) temp;
 22 | ```
 23 | | avg_cookies_per_user  |
 24 | |-----------------------|
 25 | | 3.564                 |
 26 | 
 27 | ---
 28 | ### 3. What is the unique number of visits by all users per month?
 29 | ```TSQL
 30 | SELECT 
 31 |   MONTH(event_time) AS months,
 32 |   COUNT(DISTINCT visit_id) AS visits_count
 33 | FROM events
 34 | GROUP BY MONTH(event_time)
 35 | ORDER BY months;
 36 | ```
 37 | | months | visits_count  |
 38 | |--------|---------------|
 39 | | 1      | 876           |
 40 | | 2      | 1488          |
 41 | | 3      | 916           |
 42 | | 4      | 248           |
 43 | | 5      | 36            |
 44 | 
 45 | ---
 46 | ### 4. What is the number of events for each event type?
 47 | ```TSQL
 48 | SELECT 
 49 |   e.event_type,
 50 |   ei.event_name,
 51 |   COUNT(*) AS event_count
 52 | FROM events e
 53 | JOIN event_identifier ei
 54 |   ON e.event_type = ei.event_type
 55 | GROUP BY e.event_type, ei.event_name
 56 | ORDER BY e.event_type;
 57 | ```
 58 | | event_type | event_name    | event_count  |
 59 | |------------|---------------|--------------|
 60 | | 1          | Page View     | 20928        |
 61 | | 2          | Add to Cart   | 8451         |
 62 | | 3          | Purchase      | 1777         |
 63 | | 4          | Ad Impression | 876          |
 64 | | 5          | Ad Click      | 702          |
 65 | 
 66 | ---
 67 | ### 5. What is the percentage of visits which have a purchase event?
 68 | ```TSQL
 69 | SELECT 
 70 |   CAST(100.0 * COUNT(DISTINCT e.visit_id) 
 71 |        / (SELECT COUNT(DISTINCT visit_id) FROM events) AS decimal(10,2)) AS purchase_pct
 72 | FROM events e
 73 | JOIN event_identifier ei
 74 |   ON e.event_type = ei.event_type
 75 | WHERE ei.event_name = 'Purchase';
 76 | ```
 77 | | purchase_pct  |
 78 | |---------------|
 79 | | 49.86         |
 80 | 
 81 | ---
 82 | ### 6. What is the percentage of visits which view the checkout page but do not have a purchase event?
 83 | ```TSQL
 84 | WITH view_checkout AS (
 85 |   SELECT COUNT(e.visit_id) AS cnt
 86 |   FROM events e
 87 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 88 |   JOIN page_hierarchy p ON e.page_id = p.page_id
 89 |   WHERE ei.event_name = 'Page View'
 90 |     AND p.page_name = 'Checkout'
 91 | )
 92 | 
 93 | SELECT CAST(100-(100.0 * COUNT(DISTINCT e.visit_id) 
 94 | 		/ (SELECT cnt FROM view_checkout)) AS decimal(10, 2)) AS pct_view_checkout_not_purchase
 95 | FROM events e
 96 | JOIN event_identifier ei ON e.event_type = ei.event_type
 97 | WHERE ei.event_name = 'Purchase'
 98 | ```
 99 | | pct_view_checkout_not_purchase  |
100 | |---------------------------------|
101 | | 15.50                           |
102 | 
103 | ---
104 | ### 7. What are the top 3 pages by number of views?
105 | ```TSQL
106 | SELECT 
107 |   TOP 3 ph.page_name,
108 |   COUNT(*) AS page_views
109 | FROM events e
110 | JOIN event_identifier ei ON e.event_type = ei.event_type 
111 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
112 | WHERE ei.event_name = 'Page View'
113 | GROUP BY ph.page_name
114 | ORDER BY page_views DESC;
115 | ```
116 | | page_name    | page_views  |
117 | |--------------|-------------|
118 | | All Products | 3174        |
119 | | Checkout     | 2103        |
120 | | Home Page    | 1782        |
121 | 
122 | ---
123 | ### 8. What is the number of views and cart adds for each product category?
124 | ```TSQL
125 | SELECT 
126 |   ph.product_category,
127 |   SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
128 |   SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds
129 | FROM events e
130 | JOIN event_identifier ei ON e.event_type = ei.event_type
131 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
132 | WHERE ph.product_category IS NOT NULL
133 | GROUP BY ph.product_category;
134 | ```
135 | | product_category | page_views | cart_adds  |
136 | |------------------|------------|------------|
137 | | Fish             | 4633       | 2789       |
138 | | Luxury           | 3032       | 1870       |
139 | | Shellfish        | 6204       | 3792       |
140 | 
141 | ---
142 | ### 9. What are the top 3 products by purchases?
143 | ```TSQL
144 | SELECT 
145 |   TOP 3 ph.product_id,
146 |   ph.page_name,
147 |   ph.product_category,
148 |   COUNT(*) AS purchase_count
149 | FROM events e
150 | JOIN event_identifier ei ON e.event_type = ei.event_type
151 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
152 | --1st layer: products are added to cart
153 | WHERE ei.event_name = 'Add to cart'
154 | --2nd layer: add-to-cart products are purchased
155 | AND e.visit_id IN (
156 |   SELECT e.visit_id
157 |   FROM events e
158 |   JOIN event_identifier ei ON e.event_type = ei.event_type
159 |   WHERE ei.event_name = 'Purchase')
160 | GROUP BY ph.product_id,	ph.page_name, ph.product_category
161 | ORDER BY purchase_count DESC;
162 | ```
163 | | product_id | page_name | product_category | purchase_count  |
164 | |------------|-----------|------------------|-----------------|
165 | | 7          | Lobster   | Shellfish        | 754             |
166 | | 9          | Oyster    | Shellfish        | 726             |
167 | | 8          | Crab      | Shellfish        | 719             |
168 | 
169 | ---
170 | My solution for **[B. Product Funnel Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%236%20-%20Clique%20Bait/Solution/B.%20Product%20Funnel%20Analysis.md)**.
171 | 


--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/A_query.sql:
--------------------------------------------------------------------------------
  1 | -----------------------
  2 | --A. Digital Analysis--
  3 | -----------------------
  4 | 
  5 | --1. How many users are there?
  6 | 
  7 | SELECT COUNT(DISTINCT user_id) AS users_count
  8 | FROM users;
  9 | 
 10 | 
 11 | --2. How many cookies does each user have on average?
 12 | 
 13 | SELECT CAST(AVG(cookies_count) AS FLOAT) AS avg_cookies_per_user
 14 | FROM (
 15 |   SELECT 
 16 |     user_id,
 17 |     1.0*COUNT(cookie_id) AS cookies_count
 18 |   FROM users
 19 |   GROUP BY user_id) temp;
 20 | 
 21 | 
 22 | --3. What is the unique number of visits by all users per month?
 23 | 
 24 | SELECT 
 25 |   MONTH(event_time) AS months,
 26 |   COUNT(DISTINCT visit_id) AS visits_count
 27 | FROM events
 28 | GROUP BY MONTH(event_time)
 29 | ORDER BY months;
 30 | 
 31 | 
 32 | --4. What is the number of events for each event type?
 33 | 
 34 | SELECT 
 35 |   e.event_type,
 36 |   ei.event_name,
 37 |   COUNT(*) AS event_count
 38 | FROM events e
 39 | JOIN event_identifier ei
 40 |   ON e.event_type = ei.event_type
 41 | GROUP BY e.event_type, ei.event_name
 42 | ORDER BY e.event_type;
 43 | 
 44 | 
 45 | --5. What is the percentage of visits which have a purchase event?
 46 | 
 47 | SELECT 
 48 |   CAST(100.0 * COUNT(DISTINCT e.visit_id) 
 49 |        / (SELECT COUNT(DISTINCT visit_id) FROM events) AS decimal(10,2)) AS purchase_pct
 50 | FROM events e
 51 | JOIN event_identifier ei
 52 |   ON e.event_type = ei.event_type
 53 | WHERE ei.event_name = 'Purchase';
 54 | 
 55 | 
 56 | --6. What is the percentage of visits which view the checkout page but do not have a purchase event?
 57 | 
 58 | WITH view_checkout AS (
 59 |   SELECT COUNT(e.visit_id) AS cnt
 60 |   FROM events e
 61 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 62 |   JOIN page_hierarchy p ON e.page_id = p.page_id
 63 |   WHERE ei.event_name = 'Page View'
 64 |     AND p.page_name = 'Checkout'
 65 | )
 66 | 
 67 | SELECT CAST(100-(100.0 * COUNT(DISTINCT e.visit_id) 
 68 | 		/ (SELECT cnt FROM view_checkout)) AS decimal(10, 2)) AS pct_view_checkout_not_purchase
 69 | FROM events e
 70 | JOIN event_identifier ei ON e.event_type = ei.event_type
 71 | WHERE ei.event_name = 'Purchase'
 72 | 
 73 | 
 74 | --7. What are the top 3 pages by number of views?
 75 | 
 76 | SELECT 
 77 |   TOP 3 ph.page_name,
 78 |   COUNT(*) AS page_views
 79 | FROM events e
 80 | JOIN event_identifier ei ON e.event_type = ei.event_type 
 81 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
 82 | WHERE ei.event_name = 'Page View'
 83 | GROUP BY ph.page_name
 84 | ORDER BY page_views DESC;
 85 | 
 86 | 
 87 | --8. What is the number of views and cart adds for each product category?
 88 | 
 89 | SELECT 
 90 |   ph.product_category,
 91 |   SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
 92 |   SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds
 93 | FROM events e
 94 | JOIN event_identifier ei ON e.event_type = ei.event_type
 95 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
 96 | WHERE ph.product_category IS NOT NULL
 97 | GROUP BY ph.product_category;
 98 | 
 99 | 
100 | --9. What are the top 3 products by purchases?
101 | 
102 | SELECT 
103 |   TOP 3 ph.product_id,
104 |   ph.page_name,
105 |   ph.product_category,
106 |   COUNT(*) AS purchase_count
107 | FROM events e
108 | JOIN event_identifier ei ON e.event_type = ei.event_type
109 | JOIN page_hierarchy ph ON e.page_id = ph.page_id
110 | WHERE ei.event_name = 'Add to cart'
111 | AND e.visit_id IN (
112 |   SELECT e.visit_id
113 |   FROM events e
114 |   JOIN event_identifier ei ON e.event_type = ei.event_type
115 |   WHERE ei.event_name = 'Purchase')
116 | GROUP BY ph.product_id,	ph.page_name, ph.product_category
117 | ORDER BY purchase_count DESC;
118 | 


--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | ------------------------------
  2 | --B. Product Funnel Analysis--
  3 | ------------------------------
  4 | 
  5 | /*
  6 | Using a single SQL query - create a new output table which has the following details:
  7 | - How many times was each product viewed?
  8 | - How many times was each product added to cart?
  9 | - How many times was each product added to a cart but not purchased (abandoned)?
 10 | - How many times was each product purchased?
 11 | */
 12 | 
 13 | WITH product_info AS (
 14 |   SELECT 
 15 |     ph.product_id,
 16 |     ph.page_name AS product_name,
 17 |     ph.product_category,
 18 |     SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS views,
 19 |     SUM(CASE WHEN ei.event_name = 'Add To Cart' THEN 1 ELSE 0 END) AS cart_adds
 20 |   FROM events e
 21 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 22 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
 23 |   WHERE ph.product_id IS NOT NULL
 24 |   GROUP BY ph.product_id, ph.page_name, ph.product_category 
 25 | ),
 26 | product_abandoned AS (
 27 |   SELECT 
 28 |     ph.product_id,
 29 |     ph.page_name AS product_name,
 30 |     ph.product_category,
 31 |     COUNT(*) AS abandoned
 32 |   FROM events e
 33 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 34 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
 35 |   WHERE ei.event_name = 'Add to cart'
 36 |   AND e.visit_id NOT IN (
 37 |     SELECT e.visit_id
 38 |     FROM events e
 39 |     JOIN event_identifier ei ON e.event_type = ei.event_type
 40 |     WHERE ei.event_name = 'Purchase')
 41 |     GROUP BY ph.product_id, ph.page_name, ph.product_category
 42 | ),
 43 | product_purchased AS (
 44 |   SELECT 
 45 |     ph.product_id,
 46 |     ph.page_name AS product_name,
 47 |     ph.product_category,
 48 |     COUNT(*) AS purchases
 49 |   FROM events e
 50 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 51 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
 52 |   WHERE ei.event_name = 'Add to cart'
 53 |   AND e.visit_id IN (
 54 |     SELECT e.visit_id
 55 |     FROM events e
 56 |     JOIN event_identifier ei ON e.event_type = ei.event_type
 57 |     WHERE ei.event_name = 'Purchase')
 58 |     GROUP BY ph.product_id, ph.page_name, ph.product_category
 59 | )
 60 | 
 61 | SELECT 
 62 |   pi.*,
 63 |   pa.abandoned,
 64 |   pp.purchases
 65 | INTO #product_summary
 66 | FROM product_info pi
 67 | JOIN product_abandoned pa ON pi.product_id = pa.product_id
 68 | JOIN product_purchased pp ON pi.product_id = pp.product_id;
 69 | 
 70 | 
 71 | /*
 72 | Additionally, create another table which further aggregates the data for the above points 
 73 | but this time for each product category instead of individual products.
 74 | */
 75 | 
 76 | WITH category_info AS (
 77 |   SELECT 
 78 |     ph.product_category,
 79 |     SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS views,
 80 |     SUM(CASE WHEN ei.event_name = 'Add To Cart' THEN 1 ELSE 0 END) AS cart_adds
 81 |   FROM events e
 82 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 83 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
 84 |   WHERE ph.product_id IS NOT NULL
 85 |   GROUP BY ph.product_category 
 86 | ),
 87 | category_abandoned AS (
 88 |   SELECT 
 89 |     ph.product_category,
 90 |     COUNT(*) AS abandoned
 91 |   FROM events e
 92 |   JOIN event_identifier ei ON e.event_type = ei.event_type
 93 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
 94 |   WHERE ei.event_name = 'Add to cart'
 95 |   AND e.visit_id NOT IN (
 96 |     SELECT e.visit_id
 97 |     FROM events e
 98 |     JOIN event_identifier ei ON e.event_type = ei.event_type
 99 |     WHERE ei.event_name = 'Purchase')
100 |     GROUP BY ph.product_category
101 | ),
102 | category_purchased AS (
103 |   SELECT 
104 |     ph.product_category,
105 |     COUNT(*) AS purchases
106 |   FROM events e
107 |   JOIN event_identifier ei ON e.event_type = ei.event_type
108 |   JOIN page_hierarchy ph ON e.page_id = ph.page_id
109 |   WHERE ei.event_name = 'Add to cart'
110 |   AND e.visit_id IN (
111 |     SELECT e.visit_id
112 |     FROM events e
113 |     JOIN event_identifier ei ON e.event_type = ei.event_type
114 |     WHERE ei.event_name = 'Purchase')
115 |     GROUP BY ph.product_category
116 | )
117 | 
118 | SELECT 
119 |   ci.*,
120 |   ca.abandoned,
121 |   cp.purchases
122 | FROM category_info ci
123 | JOIN category_abandoned ca ON ci.product_category = ca.product_category
124 | JOIN category_purchased cp ON ci.product_category = cp.product_category;
125 | 
126 | 
127 | --Use your 2 new output tables - answer the following questions:
128 | --1. Which product had the most views, cart adds and purchases?
129 | 
130 | SELECT TOP 1 *
131 | FROM #product_summary
132 | ORDER BY views DESC;
133 | --> Oyster has the most views.
134 | 
135 | SELECT TOP 1 *
136 | FROM #product_summary
137 | ORDER BY cart_adds DESC;
138 | --> Lobster had the most cart adds.
139 | 
140 | SELECT TOP 1 *
141 | FROM #product_summary
142 | ORDER BY purchases DESC;
143 | --> Lobster had the most purchases.
144 | 
145 | 
146 | --2. Which product was most likely to be abandoned?
147 | SELECT TOP 1 *
148 | FROM #product_summary
149 | ORDER BY abandoned DESC;
150 | --> Russian Caviar was most likely to be abandoned.
151 | 
152 | 
153 | --3. Which product had the highest view to purchase percentage?
154 | SELECT 
155 |   TOP 1 product_name,
156 |   product_category,
157 |   CAST(100.0 * purchases / views AS decimal(10, 2)) AS purchase_per_view_pct
158 | FROM #product_summary
159 | ORDER BY purchase_per_view_pct DESC;
160 | --> Lobster had the highest view to purchase percentage?
161 | 
162 | 
163 | --4. What is the average conversion rate from view to cart add?
164 | SELECT 
165 |   CAST(AVG(100.0*cart_adds/views) AS decimal(10, 2)) AS avg_view_to_cart
166 | FROM #product_summary;
167 | 
168 | 
169 | --5. What is the average conversion rate from cart add to purchase?
170 | SELECT 
171 |   CAST(AVG(100.0*purchases/cart_adds) AS decimal(10, 2)) AS avg_cart_to_purchase
172 | FROM #product_summary;
173 | 


--------------------------------------------------------------------------------
/Case Study #6 - Clique Bait/Syntax/C_query.sql:
--------------------------------------------------------------------------------
  1 | -------------------------
  2 | --C. Campaigns Analysis--
  3 | -------------------------
  4 | /*Generate a table that has 1 single row for every unique visit_id record and has the following columns:
  5 | - user_id
  6 | - visit_id
  7 | - visit_start_time: the earliest event_time for each visit
  8 | - page_views: count of page views for each visit
  9 | - cart_adds: count of product cart add events for each visit
 10 | - purchase: 1/0 flag if a purchase event exists for each visit
 11 | - campaign_name: map the visit to a campaign if the visit_start_time falls between the start_date and end_date
 12 | - impression: count of ad impressions for each visit
 13 | - click: count of ad clicks for each visit
 14 | - (Optional column) cart_products: a comma separated text value with products added to the cart 
 15 | sorted by the order they were added to the cart (hint: use the sequence_number)
 16 | */
 17 | 
 18 | SELECT
 19 |   u.user_id,
 20 |   e.visit_id,
 21 |   MIN(event_time) AS visit_start_time,
 22 |   SUM(CASE WHEN ei.event_name = 'Page View' THEN 1 ELSE 0 END) AS page_views,
 23 |   SUM(CASE WHEN ei.event_name = 'Add to Cart' THEN 1 ELSE 0 END) AS cart_adds,
 24 |   SUM(CASE WHEN ei.event_name = 'Purchase' THEN 1 ELSE 0 END) AS purchase,
 25 |   c.campaign_name,
 26 |   SUM(CASE WHEN ei.event_name = 'Ad Impression' THEN 1 ELSE 0 END) AS impression,
 27 |   SUM(CASE WHEN ei.event_name = 'Ad Click' THEN 1 ELSE 0 END) AS click,
 28 |   STRING_AGG(CASE WHEN ei.event_name = 'Add to Cart' THEN ph.page_name END, ', ') 
 29 |     WITHIN GROUP (ORDER BY e.sequence_number) AS cart_products
 30 | INTO #campaign_summary
 31 | FROM events e
 32 | JOIN users u 
 33 |   ON e.cookie_id = u.cookie_id
 34 | JOIN event_identifier ei 
 35 |   ON e.event_type = ei.event_type
 36 | JOIN page_hierarchy ph 
 37 |   ON e.page_id = ph.page_id
 38 | LEFT JOIN campaign_identifier c 
 39 |   ON e.event_time BETWEEN c.start_date AND c.end_date
 40 | GROUP BY u.user_id, e.visit_id, c.campaign_name;
 41 | 
 42 | 
 43 | /*
 44 | - Identifying users who have received impressions during each campaign period 
 45 | and comparing each metric with other users who did not have an impression event
 46 | - Does clicking on an impression lead to higher purchase rates?
 47 | - What is the uplift in purchase rate when comparing users who click on a campaign impression versus 
 48 | users who do not receive an impression? What if we compare them with users who have just an impression but do not click?
 49 | - What metrics can you use to quantify the success or failure of each campaign compared to each other?
 50 | */
 51 | 
 52 | 
 53 | --1. Calculate the number of users in each group
 54 | 
 55 | --Number of users who received impressions during campaign periods = 417
 56 | SELECT COUNT(DISTINCT user_id) AS received_impressions
 57 | FROM #campaign_summary
 58 | WHERE impression > 0
 59 | AND campaign_name IS NOT NULL;
 60 | 
 61 | 
 62 | --Number of users who received impressions but didn't click on ads = 127
 63 | SELECT COUNT(DISTINCT user_id) AS received_impressions_not_clicked
 64 | FROM #campaign_summary
 65 | WHERE impression > 0
 66 | AND click = 0
 67 | AND campaign_name IS NOT NULL;
 68 | 
 69 | 
 70 | --Number of users who didn't receive impressions during campaign periods = 56
 71 | SELECT COUNT(DISTINCT user_id) AS received_impressions
 72 | FROM #campaign_summary
 73 | WHERE campaign_name IS NOT NULL
 74 | AND user_id NOT IN (
 75 | 	SELECT user_id
 76 | 	FROM #campaign_summary
 77 | 	WHERE impression > 0)
 78 | 
 79 | 
 80 | --2. Calculate the average clicks, average views, average cart adds, and average purchases of each group
 81 | 
 82 | --For users who received impressions
 83 | DECLARE @received int 
 84 | SET @received = 417
 85 | 
 86 | SELECT 
 87 |   CAST(1.0*SUM(page_views) / @received AS decimal(10,1)) AS avg_view,
 88 |   CAST(1.0*SUM(cart_adds) / @received AS decimal(10,1)) AS avg_cart_adds,
 89 |   CAST(1.0*SUM(purchase) / @received AS decimal(10,1)) AS avg_purchase
 90 | FROM #campaign_summary
 91 | WHERE impression > 0
 92 | AND campaign_name IS NOT NULL;
 93 | 
 94 | 
 95 | --For users who received impressions but didn't click on ads
 96 | DECLARE @received_not_clicked int 
 97 | SET @received_not_clicked = 127
 98 | 
 99 | SELECT
100 |   CAST(1.0*SUM(page_views) / @received_not_clicked AS decimal(10,1)) AS avg_view,
101 |   CAST(1.0*SUM(cart_adds) / @received_not_clicked AS decimal(10,1)) AS avg_cart_adds,
102 |   CAST(1.0*SUM(purchase) / @received_not_clicked AS decimal(10,1)) AS avg_purchase
103 | FROM #campaign_summary
104 | WHERE impression > 0
105 | AND click = 0
106 | AND campaign_name IS NOT NULL;
107 | 
108 | 
109 | --For users who didn't receive impressions 
110 | DECLARE @not_received int 
111 | SET @not_received = 56
112 | 
113 | SELECT 
114 |   CAST(1.0*SUM(page_views) / @not_received AS decimal(10,1)) AS avg_view,
115 |   CAST(1.0*SUM(cart_adds) / @not_received AS decimal(10,1)) AS avg_cart_adds,
116 |   CAST(1.0*SUM(purchase) / @not_received AS decimal(10,1)) AS avg_purchase
117 | FROM #campaign_summary
118 | WHERE campaign_name IS NOT NULL
119 | AND user_id NOT IN (
120 |   SELECT user_id
121 |   FROM #campaign_summary
122 |   WHERE impression > 0);
123 |   
124 |   
125 | --3. Compare the average views, average cart adds and average purchases of users received impressions and not received impressions
126 | 
127 | /* Combine table
128 | |                             | avg_view | avg_cart_adds | avg_purchase  |
129 | |-----------------------------|----------|---------------|---------------|
130 | | Received impressions        | 15.3     | 9             | 1.5           |
131 | | Not received impressions    | 19.4     | 5.8           | 1.2           |
132 | | Increase by campaigns       |  No      | Yes           | Yes           |
133 | */
134 | 
135 | 
136 | --4. Compare the average purchases of users received impressions and received impressions but not clicked to ads
137 | /* Combine table
138 | |                                      | avg_purchase  |
139 | |--------------------------------------|---------------|
140 | | Received impressions                 | 1.5           |
141 | | Received impressions but not clicked | 0.8           |
142 | | Increase by clicking to the ads      | Yes           |
143 | */
144 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./README.md:
--------------------------------------------------------------------------------
 1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
 2 | 
 3 | <p align="center">
 4 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/7.png" align="center" width="400" height="400" >
 5 | 
 6 | ## 📕 Table of Contents
 7 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#%EF%B8%8F-bussiness-task)
 8 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-entity-relationship-diagram)
 9 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-case-study-questions)
10 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.#-my-solution)
11 | 
12 | ---
13 | ## 🛠️ Bussiness Task
14 | 
15 | Balanced Tree Clothing Company prides themselves on providing an optimised range of clothing and lifestyle wear for the modern adventurer!
16 | 
17 | Danny, the CEO of this trendy fashion company has asked you to assist the team’s merchandising teams analyse their sales performance 
18 | and generate a basic financial report to share with the wider business.
19 | 
20 | ---
21 | ## 🔐 Entity Relationship Diagram
22 | ### Regular questions
23 | <p align="center">
24 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e7.1.PNG" align="center width="600" height="300"">
25 | 
26 | ### Bonus question
27 | <p align="center">
28 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e7.2.PNG" align="center width="400" height="200"">
29 | 
30 | ---
31 | ## ❓ Case Study Questions
32 | ### A. High Level Sales Analysis
33 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/A.%20High%20Level%20Sales%20Analysis.md).
34 | 
35 | 1. What was the total quantity sold for all products?
36 | 2. What is the total generated revenue for all products before discounts?
37 | 3. What was the total discount amount for all products?
38 | 
39 | ---
40 | ### B. Transaction Analysis
41 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/B.%20Transaction%20Analysis.md).
42 | 
43 | 1. How many unique transactions were there?
44 | 2. What is the average unique products purchased in each transaction?
45 | 3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
46 | 4. What is the average discount value per transaction?
47 | 5. What is the percentage split of all transactions for members vs non-members?
48 | 6. What is the average revenue for member transactions and non-member transactions?
49 | 
50 | ---
51 | ### C. Product Analysis
52 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/C.%20Product%20Analysis.md).
53 | 
54 | 1. What are the top 3 products by total revenue before discount?
55 | 2. What is the total quantity, revenue and discount for each segment?
56 | 3. What is the top selling product for each segment?
57 | 4. What is the total quantity, revenue and discount for each category?
58 | 5. What is the top selling product for each category?
59 | 6. What is the percentage split of revenue by product for each segment?
60 | 7. What is the percentage split of revenue by segment for each category?
61 | 8. What is the percentage split of total revenue by category?
62 | 9. What is the total transaction “penetration” for each product? (hint: penetration = number of transactions where at least 1 quantity of a product was purchased divided by total number of transactions)
63 | 10. What is the most common combination of at least 1 quantity of any 3 products in a 1 single transaction?
64 | 
65 | ---
66 | ### D. Bonus Challenge
67 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/D.%20Bonus%20Question.md).
68 | 
69 | Use a single SQL query to transform the `product_hierarchy` and `product_prices` datasets to the `product_details` table.
70 | 
71 | ---
72 | ## 🚀 My Solution
73 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Syntax).
74 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution).
75 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/A. High Level Sales Analysis.md:
--------------------------------------------------------------------------------
 1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
 2 | ## A. High Level Sales Analysis
 3 | ### 1. What was the total quantity sold for all products?
 4 | ```TSQL
 5 | SELECT SUM(qty) AS total_quantity
 6 | FROM sales;
 7 | ```
 8 | | total_quantity  |
 9 | |-----------------|
10 | | 45216           |
11 | 
12 | ---
13 | ### 2. What is the total generated revenue for all products before discounts?
14 | ```TSQL
15 | SELECT SUM(qty * price) AS revenue_before_discounts
16 | FROM sales;
17 | ```
18 | | revenue_before_discounts  |
19 | |---------------------------|
20 | | 1289453                   |
21 | 
22 | ---
23 | ### 3. What was the total discount amount for all products?
24 | ```TSQL
25 | SELECT CAST(SUM(qty * price * discount/100.0) AS FLOAT) AS total_discount
26 | FROM sales;
27 | ```
28 | | total_discount  |
29 | |-----------------|
30 | | 156229.14       |
31 | 
32 | ---
33 | My solution for **[B. Transaction Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/B.%20Transaction%20Analysis.md)**.
34 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/B. Transaction Analysis.md:
--------------------------------------------------------------------------------
  1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
  2 | ## B. Transaction Analysis
  3 | ### 1. How many unique transactions were there?
  4 | ```TSQL
  5 | SELECT COUNT(DISTINCT txn_id) AS unique_transactions
  6 | FROM sales;
  7 | ```
  8 | | unique_transactions  |
  9 | |----------------------|
 10 | | 2500                 |
 11 | 
 12 | ---
 13 | ### 2. What is the average unique products purchased in each transaction?
 14 | ```TSQL
 15 | SELECT AVG(product_count) AS avg_unique_products
 16 | FROM (
 17 |   SELECT 
 18 |     txn_id,
 19 |     COUNT(DISTINCT prod_id) AS product_count
 20 |   FROM sales 
 21 |   GROUP BY txn_id
 22 | ) temp;
 23 | ```
 24 | | avg_unique_products  |
 25 | |----------------------|
 26 | | 6                    |
 27 | 
 28 | ---
 29 | ### 3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
 30 | ```TSQL
 31 | WITH transaction_revenue AS (
 32 |   SELECT 
 33 |     txn_id,
 34 |     SUM(qty*price) AS revenue
 35 |   FROM sales
 36 |   GROUP BY txn_id)
 37 | 
 38 | SELECT 
 39 |   DISTINCT 
 40 |   PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_25th,
 41 |   PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_50th,
 42 |   PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_75th
 43 | FROM transaction_revenue;
 44 | ```
 45 | | pct_25th | pct_50th | pct_75th  |
 46 | |----------|----------|-----------|
 47 | | 375.75   | 509.5    | 647       |
 48 | 
 49 | ---
 50 | ### 4. What is the average discount value per transaction?
 51 | ```TSQL
 52 | SELECT CAST(AVG(total_discount) AS decimal(5, 1)) AS avg_discount_per_transaction
 53 | FROM (
 54 |   SELECT 
 55 |     txn_id,
 56 |     SUM(qty*price*discount/100.0) AS total_discount
 57 |   FROM sales
 58 |   GROUP BY txn_id
 59 | ) temp;
 60 | ```
 61 | | avg_discount_per_transaction  |
 62 | |-------------------------------|
 63 | | 62.5                          |
 64 | 
 65 | ---
 66 | ### 5. What is the percentage split of all transactions for members vs non-members?
 67 | ```TSQL
 68 | SELECT 
 69 |   CAST(100.0*COUNT(DISTINCT CASE WHEN member = 1 THEN txn_id END) 
 70 | 		/ COUNT(DISTINCT txn_id) AS FLOAT) AS members_pct,
 71 |   CAST(100.0*COUNT(DISTINCT CASE WHEN member = 0 THEN txn_id END)
 72 | 		/ COUNT(DISTINCT txn_id) AS FLOAT) AS non_members_pct
 73 | FROM sales;
 74 | ```
 75 | | members_pct | non_members_pct  |
 76 | |---------|--------------|
 77 | | 60.2    | 39.8         |
 78 | 
 79 | ---
 80 | ### 6. What is the average revenue for member transactions and non-member transactions?
 81 | ```TSQL
 82 | WITH member_revenue AS (
 83 |   SELECT 
 84 |     member,
 85 |     txn_id,
 86 |     SUM(qty*price) AS revenue
 87 |   FROM sales
 88 |   GROUP BY member, txn_id
 89 | ) 
 90 | 
 91 | SELECT 
 92 |   member,
 93 |   CAST(AVG(1.0*revenue) AS decimal(10,2)) AS avg_revenue
 94 | FROM member_revenue
 95 | GROUP BY member;
 96 | ```
 97 | | member | avg_revenue  |
 98 | |--------|--------------|
 99 | | 0      | 515.04       |
100 | | 1      | 516.27       |
101 | 
102 | ---
103 | My solution for **[C. Product Analysis](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co./Solution/C.%20Product%20Analysis.md)**.
104 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Solution/D. Bonus Question.md:
--------------------------------------------------------------------------------
 1 | # 👕 Case Study #7 - Balanced Tree Clothing Co.
 2 | ## D. Bonus Question
 3 | Use a single SQL query to transform the `product_hierarchy` and `product_prices` datasets to the `product_details` table.
 4 | 
 5 | ### Solution
 6 | 
 7 | ```TSQL
 8 | SELECT 
 9 |   pp.product_id,
10 |   pp.price,
11 |   CONCAT(ph1.level_text, ' ', ph2.level_text, ' - ', ph3.level_text) AS product_name,
12 |   ph2.parent_id AS category_id,
13 |   ph1.parent_id AS segment_id,
14 |   ph1.id AS style_id,
15 |   ph3.level_text AS category_name,
16 |   ph2.level_text AS segment_name,
17 |   ph1.level_text AS style_name
18 | FROM product_hierarchy ph1
19 | --self join style level (ph1) with segment level (ph2)
20 | JOIN product_hierarchy ph2 ON ph1.parent_id = ph2.id
21 | --self join segment level (ph2) with category level (ph3)
22 | JOIN product_hierarchy ph3 ON ph3.id = ph2.parent_id
23 | --inner join style level (ph1) with table [product_prices] 
24 | JOIN product_prices pp ON ph1.id = pp.id;
25 | ```
26 | | product_id | price | product_name                     | category_id | segment_id | style_id | category_name | segment_name | style_name          |
27 | |------------|-------|----------------------------------|-------------|------------|----------|---------------|--------------|---------------------|
28 | | c4a632     | 13    | Navy Oversized Jeans - Womens    | 1           | 3          | 7        | Womens        | Jeans        | Navy Oversized      |
29 | | e83aa3     | 32    | Black Straight Jeans - Womens    | 1           | 3          | 8        | Womens        | Jeans        | Black Straight      |
30 | | e31d39     | 10    | Cream Relaxed Jeans - Womens     | 1           | 3          | 9        | Womens        | Jeans        | Cream Relaxed       |
31 | | d5e9a6     | 23    | Khaki Suit Jacket - Womens       | 1           | 4          | 10       | Womens        | Jacket       | Khaki Suit          |
32 | | 72f5d4     | 19    | Indigo Rain Jacket - Womens      | 1           | 4          | 11       | Womens        | Jacket       | Indigo Rain         |
33 | | 9ec847     | 54    | Grey Fashion Jacket - Womens     | 1           | 4          | 12       | Womens        | Jacket       | Grey Fashion        |
34 | | 5d267b     | 40    | White Tee Shirt - Mens           | 2           | 5          | 13       | Mens          | Shirt        | White Tee           |
35 | | c8d436     | 10    | Teal Button Up Shirt - Mens      | 2           | 5          | 14       | Mens          | Shirt        | Teal Button Up      |
36 | | 2a2353     | 57    | Blue Polo Shirt - Mens           | 2           | 5          | 15       | Mens          | Shirt        | Blue Polo           |
37 | | f084eb     | 36    | Navy Solid Socks - Mens          | 2           | 6          | 16       | Mens          | Socks        | Navy Solid          |
38 | | b9a74d     | 17    | White Striped Socks - Mens       | 2           | 6          | 17       | Mens          | Socks        | White Striped       |
39 | | 2feb6b     | 29    | Pink Fluro Polkadot Socks - Mens | 2           | 6          | 18       | Mens          | Socks        | Pink Fluro Polkadot |
40 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/A_query.sql:
--------------------------------------------------------------------------------
 1 | --------------------------------
 2 | --A. High Level Sales Analysis--
 3 | --------------------------------
 4 | 
 5 | --1. What was the total quantity sold for all products?
 6 | 
 7 | SELECT SUM(qty) AS total_quantity
 8 | FROM sales;
 9 | 
10 | 
11 | --2. What is the total generated revenue for all products before discounts?
12 | 
13 | SELECT SUM(qty * price) AS revenue_before_discounts
14 | FROM sales;
15 | 
16 | 
17 | --3. What was the total discount amount for all products?
18 | 
19 | SELECT CAST(SUM(qty * price * discount/100.0) AS FLOAT) AS total_discount
20 | FROM sales;
21 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/B_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------------
 2 | --B. Transaction Analysis--
 3 | ---------------------------
 4 | 
 5 | --1. How many unique transactions were there?
 6 | 
 7 | SELECT COUNT(DISTINCT txn_id) AS unique_transactions
 8 | FROM sales;
 9 | 
10 | 
11 | --2.What is the average unique products purchased in each transaction?
12 | 
13 | SELECT AVG(product_count) AS avg_unique_products
14 | FROM (
15 |   SELECT 
16 |     txn_id,
17 |     COUNT(DISTINCT prod_id) AS product_count
18 |   FROM sales 
19 |   GROUP BY txn_id
20 | ) temp;
21 | 
22 | 
23 | --3. What are the 25th, 50th and 75th percentile values for the revenue per transaction?
24 | 
25 | WITH transaction_revenue AS (
26 |   SELECT 
27 |     txn_id,
28 |     SUM(qty*price) AS revenue
29 |   FROM sales
30 |   GROUP BY txn_id)
31 | 
32 | SELECT 
33 |   DISTINCT 
34 |   PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_25th,
35 |   PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_50th,
36 |   PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY revenue) OVER () AS pct_75th
37 | FROM transaction_revenue;
38 | 
39 | 
40 | --4. What is the average discount value per transaction?
41 | 
42 | SELECT CAST(AVG(total_discount) AS decimal(5, 1)) AS avg_discount_per_transaction
43 | FROM (
44 |   SELECT 
45 |     txn_id,
46 |     SUM(qty*price*discount/100.0) AS total_discount
47 |   FROM sales
48 |   GROUP BY txn_id
49 | ) temp;
50 | 
51 | 
52 | --5. What is the percentage split of all transactions for members vs non-members?
53 | 
54 | SELECT 
55 |   CAST(100.0*COUNT(DISTINCT CASE WHEN member = 1 THEN txn_id END) 
56 | 		/ COUNT(DISTINCT txn_id) AS FLOAT) AS members_pct,
57 |   CAST(100.0*COUNT(DISTINCT CASE WHEN member = 0 THEN txn_id END)
58 | 		/ COUNT(DISTINCT txn_id) AS FLOAT) AS non_members_pct
59 | FROM sales;
60 | 
61 | 
62 | --6. What is the average revenue for member transactions and non-member transactions?
63 | 
64 | WITH member_revenue AS (
65 |   SELECT 
66 |     member,
67 |     txn_id,
68 |     SUM(qty*price) AS revenue
69 |   FROM sales
70 |   GROUP BY member, txn_id
71 | ) 
72 | 
73 | SELECT 
74 |   member,
75 |   CAST(AVG(1.0*revenue) AS decimal(10,2)) AS avg_revenue
76 | FROM member_revenue
77 | GROUP BY member;
78 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/C_query.sql:
--------------------------------------------------------------------------------
  1 | -----------------------
  2 | --C. Product Analysis--
  3 | -----------------------
  4 | 
  5 | --1. What are the top 3 products by total revenue before discount?
  6 | 
  7 | SELECT 
  8 |   TOP 3 pd.product_name,
  9 |   SUM(s.qty * s.price) AS revenue_before_discount
 10 | FROM sales s
 11 | JOIN product_details pd 
 12 |   ON s.prod_id = pd.product_id
 13 | GROUP BY pd.product_name
 14 | ORDER BY SUM(s.qty * s.price) DESC;
 15 | 
 16 | 
 17 | --2. What is the total quantity, revenue and discount for each segment?
 18 | 
 19 | SELECT 
 20 |   pd.segment_name,
 21 |   SUM(s.qty) total_quantity,
 22 |   SUM(s.qty * s.price) AS total_revenue_before_discount,
 23 |   SUM(s.qty * s.price * discount) AS total_discount
 24 | FROM sales s
 25 | JOIN product_details pd 
 26 |   ON s.prod_id = pd.product_id
 27 | GROUP BY pd.segment_name;
 28 | 
 29 | 
 30 | --3. What is the top selling product for each segment?
 31 | 
 32 | WITH segment_product_quantity AS (
 33 | SELECT 
 34 |   pd.segment_name,
 35 |   pd.product_name,
 36 |   SUM(s.qty) AS total_quantity,
 37 |   DENSE_RANK() OVER (PARTITION BY pd.segment_name ORDER BY SUM(s.qty) DESC) AS rnk
 38 | FROM sales s
 39 | JOIN product_details pd 
 40 |   ON s.prod_id = pd.product_id
 41 | GROUP BY pd.segment_name, pd.product_name
 42 | )
 43 | 
 44 | SELECT 
 45 |   segment_name,
 46 |   product_name AS top_selling_product,
 47 |   total_quantity
 48 | FROM segment_product_quantity
 49 | WHERE rnk = 1;
 50 | 
 51 | 
 52 | --4. What is the total quantity, revenue and discount for each category?
 53 | 
 54 | SELECT 
 55 |   pd.category_name,
 56 |   SUM(s.qty) AS total_quantity,
 57 |   SUM(s.qty * s.price) AS total_revenue,
 58 |   SUM(s.qty * s.price * s.discount/100) AS total_discount
 59 | FROM sales s
 60 | JOIN product_details pd 
 61 |   ON s.prod_id = pd.product_id
 62 | GROUP BY pd.category_name;
 63 | 
 64 | 
 65 | --5. What is the top selling product for each category?
 66 | 
 67 | WITH category_product_quantity AS (
 68 |   SELECT 
 69 |     pd.category_name,
 70 |     pd.product_name,
 71 |     SUM(s.qty) AS total_quantity,
 72 |     DENSE_RANK() OVER (PARTITION BY pd.category_name ORDER BY SUM(s.qty) DESC) AS rnk
 73 |   FROM sales s
 74 |   JOIN product_details pd 
 75 |     ON s.prod_id = pd.product_id
 76 |   GROUP BY pd.category_name, pd.product_name
 77 | )
 78 | 
 79 | SELECT 
 80 |   category_name,
 81 |   product_name AS top_selling_product,
 82 |   total_quantity
 83 | FROM category_product_quantity
 84 | WHERE rnk = 1;
 85 | 
 86 | 
 87 | --6. What is the percentage split of revenue by product for each segment?
 88 | 
 89 | WITH segment_product_revenue AS (
 90 |   SELECT 
 91 |     pd.segment_name,
 92 |     pd.product_name,
 93 |     SUM(s.qty * s.price) AS product_revenue
 94 |   FROM sales s
 95 |   JOIN product_details pd 
 96 |     ON s.prod_id = pd.product_id
 97 |   GROUP BY pd.segment_name, pd.product_name
 98 | )
 99 | 
100 | SELECT 
101 |   segment_name,
102 |   product_name,
103 |   CAST(100.0 * product_revenue 
104 | 	/ SUM(product_revenue) OVER (PARTITION BY segment_name) 
105 |     AS decimal (10, 2)) AS segment_product_pct
106 | FROM segment_product_revenue;
107 | 
108 | 
109 | --7. What is the percentage split of revenue by segment for each category?
110 | 
111 | WITH segment_category_revenue AS (
112 |   SELECT 
113 |     pd.segment_name,
114 |     pd.category_name,
115 |     SUM(s.qty * s.price) AS category_revenue
116 |   FROM sales s
117 |   JOIN product_details pd 
118 |     ON s.prod_id = pd.product_id
119 |   GROUP BY pd.segment_name, pd.category_name
120 | )
121 | 
122 | SELECT 
123 |   segment_name,
124 |   category_name,
125 |   CAST(100.0 * category_revenue 
126 | 	/ SUM(category_revenue) OVER (PARTITION BY category_name) 
127 |     AS decimal (10, 2)) AS segment_category_pct
128 | FROM segment_category_revenue;
129 | 
130 | 
131 | --8. What is the percentage split of total revenue by category?
132 | 
133 | WITH category_revenue AS (
134 |   SELECT 
135 |     pd.category_name,
136 |     SUM(s.qty * s.price) AS revenue
137 |   FROM sales s
138 |   JOIN product_details pd 
139 |     ON s.prod_id = pd.product_id
140 |   GROUP BY pd.category_name
141 | )
142 | 
143 | SELECT 
144 |   category_name,
145 |   CAST(100.0 * revenue / SUM(revenue) OVER () AS decimal (10, 2)) AS category_pct
146 | FROM category_revenue;
147 | 
148 | 
149 | --9. What is the total transaction “penetration” for each product? 
150 | --(hint: penetration = number of transactions where at least 1 quantity of a product was purchased divided by total number of transactions)
151 | 
152 | WITH product_transations AS (
153 |   SELECT 
154 |     DISTINCT s.prod_id, pd.product_name,
155 |     COUNT(DISTINCT s.txn_id) AS product_txn,
156 |     (SELECT COUNT(DISTINCT txn_id) FROM sales) AS total_txn
157 |   FROM sales s
158 |   JOIN product_details pd 
159 |     ON s.prod_id = pd.product_id
160 |   GROUP BY prod_id, pd.product_name
161 | )
162 | 
163 | SELECT 
164 |   *,
165 |   CAST(100.0 * product_txn / total_txn AS decimal(10,2)) AS penetration_pct
166 | FROM product_transations;
167 | 
168 | 
169 | --10. What is the most common combination of at least 1 quantity of any 3 products in a 1 single transaction?
170 | 
171 | --Count the number of products in each transaction
172 | WITH products_per_transaction AS (
173 |   SELECT 
174 |     s.txn_id,
175 |     pd.product_id,
176 |     pd.product_name,
177 |     s.qty,
178 |     COUNT(pd.product_id) OVER (PARTITION BY txn_id) AS cnt
179 |   FROM sales s
180 |   JOIN product_details pd 
181 |   ON s.prod_id = pd.product_id
182 | ),
183 | 
184 | --Filter transactions that have the 3 products and group them to a cell
185 | combinations AS (
186 |   SELECT 
187 |     STRING_AGG(product_id, ', ') AS products_id,
188 |     STRING_AGG(product_name, ', ') AS products_name
189 |   FROM products_per_transaction
190 |   WHERE cnt = 3
191 |   GROUP BY txn_id
192 | ),
193 | 
194 | --Count the number of times each combination appears
195 | combination_count AS (
196 |   SELECT 
197 |     products_id,
198 |     products_name,
199 |     COUNT (*) AS common_combinations
200 |   FROM combinations
201 |   GROUP BY products_id, products_name
202 | )
203 | 
204 | --Filter the most common combinations
205 | SELECT 
206 |   products_id,
207 |   products_name
208 | FROM combination_count
209 | WHERE common_combinations = (SELECT MAX(common_combinations) 
210 | 			     FROM combination_count);
211 | 


--------------------------------------------------------------------------------
/Case Study #7 - Balanced Tree Clothing Co./Syntax/D_query.sql:
--------------------------------------------------------------------------------
 1 | ---------------------
 2 | --D.Bonus Challenge--
 3 | ---------------------
 4 | 
 5 | --Use a single SQL query to transform the product_hierarchy and product_prices datasets to the product_details table.
 6 | 
 7 | SELECT 
 8 |   pp.product_id,
 9 |   pp.price,
10 |   CONCAT(ph1.level_text, ' ', ph2.level_text, ' - ', ph3.level_text) AS product_name,
11 |   ph2.parent_id AS category_id,
12 |   ph1.parent_id AS segment_id,
13 |   ph1.id AS style_id,
14 |   ph3.level_text AS category_name,
15 |   ph2.level_text AS segment_name,
16 |   ph1.level_text AS style_name
17 | FROM product_hierarchy ph1
18 | --self join style level (ph1) with segment level (ph2)
19 | JOIN product_hierarchy ph2 ON ph1.parent_id = ph2.id
20 | --self join segment level (ph2) with category level (ph3)
21 | JOIN product_hierarchy ph3 ON ph3.id = ph2.parent_id
22 | --inner join style level (ph1) with table [product_prices] 
23 | JOIN product_prices pp ON ph1.id = pp.id;
24 | 


--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/README.md:
--------------------------------------------------------------------------------
  1 | # 🍊 Case Study #8 - Fresh Segments
  2 | 
  3 | <p align="center">
  4 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/8.png" align="center" width="400" height="400" >
  5 | 
  6 | ## 📕 Table of Contents
  7 | * [Bussiness Task](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#%EF%B8%8F-bussiness-task)
  8 | * [Entity Relationship Diagram](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-entity-relationship-diagram)
  9 | * [Case Study Questions](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-case-study-questions)
 10 | * [My Solution](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments#-my-solution)
 11 | 
 12 | ---
 13 | ## 🛠️ Bussiness Task
 14 | Fresh Segments is a digital marketing agency that helps other businesses analyse trends in online ad click behaviour for their unique customer base.
 15 | 
 16 | Clients share their customer lists with the Fresh Segments team who then aggregate interest metrics and generate a single dataset worth of metrics for further analysis.
 17 | In particular - the composition and rankings for different interests are provided for each client showing the proportion of their customer list who interacted with online assets related to each interest for each month.
 18 | 
 19 | Danny has asked for your assistance to analyse aggregated metrics for an example client and provide some high level insights about the customer list and their interests.
 20 | 
 21 | ---
 22 | ## 🔐 Entity Relationship Diagram
 23 | <p align="center">
 24 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/e8-updated.PNG" align="center width="600" height="300"">
 25 | 
 26 | ---
 27 | ## ❓ Case Study Questions
 28 | ### A. Data Exploration and Cleansing
 29 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/A.%20Data%20Exploration%20and%20Cleansing.md).
 30 | 
 31 | 1. Update the `fresh_segments.interest_metrics` table by modifying the `month_year` column to be a date data type with the start of the month
 32 | 2. What is count of records in the `fresh_segments.interest_metrics` for each `month_year` value sorted in chronological order (earliest to latest) 
 33 | with the null values appearing first?
 34 | 3. What do you think we should do with these null values in the `fresh_segments.interest_metrics`
 35 | 4. How many `interest_id` values exist in the `fresh_segments.interest_metrics` table but not in the `fresh_segments.interest_map` table? 
 36 | What about the other way around?
 37 | 5. Summarise the `id` values in the `fresh_segments.interest_map` by its total record count in this table.
 38 | 6. What sort of table join should we perform for our analysis and why? 
 39 | Check your logic by checking the rows where `interest_id` = 21246 in your joined output and 
 40 | include all columns from `fresh_segments.interest_metrics` and all columns from `fresh_segments.interest_map` except from the `id` column.
 41 | 7. Are there any records in your joined table where the `month_year` value is before the `created_at` value from the `fresh_segments.interest_map` table? 
 42 | Do you think these values are valid and why?
 43 | 
 44 | ---
 45 | ### B. Interest Analysis
 46 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/B.%20Interest%20Analysis.md).
 47 | 
 48 | 1. Which interests have been present in all `month_year` dates in our dataset?
 49 | 2. Using this same `total_months` measure - calculate the cumulative percentage of all records starting at 14 months - which `total_months` value 
 50 | passes the 90% cumulative percentage value?
 51 | 3. If we were to remove all `interest_id` values which are lower than the `total_months` value we found in the previous question - how many total data points
 52 | would we be removing?
 53 | 4. Does this decision make sense to remove these data points from a business perspective? 
 54 | Use an example where there are all 14 months present to a removed interest example for your arguments - think about 
 55 | what it means to have less months present from a segment perspective.
 56 | 5. After removing these interests - how many unique interests are there for each month?
 57 | 
 58 | ---
 59 | ### C. Segment Analysis
 60 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/C.%20Segment%20Analysis.md).
 61 | 
 62 | 1. Using our filtered dataset by removing the interests with less than 6 months worth of data, 
 63 | which are the top 10 and bottom 10 interests which have the largest composition values in any `month_year`? 
 64 | 2. Only use the maximum `composition` value for each interest but you must keep the corresponding `month_year`.
 65 | 3. Which 5 interests had the lowest average `ranking` value?
 66 | 4. Which 5 interests had the largest standard deviation in their `percentile_ranking` value?
 67 | 5. For the 5 interests found in the previous question - what was minimum and maximum `percentile_ranking` values for each interest 
 68 | and its corresponding `year_month` value? Can you describe what is happening for these 5 interests?
 69 | 6. How would you describe our customers in this segment based off their `composition` and ranking values? 
 70 | What sort of products or services should we show to these customers and what should we avoid?
 71 | 
 72 | ---
 73 | ### D. Index Analysis
 74 | View my solution [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution/D.%20Index%20Analysis.md).
 75 | 
 76 | The `index_value` is a measure which can be used to reverse calculate the average composition for Fresh Segments’ clients.
 77 | 
 78 | Average composition can be calculated by dividing the `composition` column by the `index_value` column rounded to 2 decimal places.
 79 | 
 80 | 1. What is the top 10 interests by the average composition for each month?
 81 | 2. For all of these top 10 interests - which interest appears the most often?
 82 | 3. What is the average of the average composition for the top 10 interests for each month?
 83 | 4. What is the 3 month rolling average of the max average composition value from September 2018 to August 2019 and 
 84 | include the previous top ranking interests in the same output shown below.
 85 | 5. Provide a possible reason why the max average composition might change from month to month? 
 86 | Could it signal something is not quite right with the overall business model for Fresh Segments?
 87 | 
 88 | Required output for question 4:
 89 | 
 90 | | month_year | interest_name                 | max_index_composition | 3_month_moving_avg | 1_month_ago                       | 2_months_ago                       |
 91 | |------------|-------------------------------|-----------------------|--------------------|-----------------------------------|------------------------------------|
 92 | | 2018-09-01 | Work Comes First Travelers    | 8.26                  | 7.61               | Las Vegas Trip Planners: 7.21     | Las Vegas Trip Planners: 7.36      |
 93 | | 2018-10-01 | Work Comes First Travelers    | 9.14                  | 8.20               | Work Comes First Travelers: 8.26  | Las Vegas Trip Planners: 7.21      |
 94 | | 2018-11-01 | Work Comes First Travelers    | 8.28                  | 8.56               | Work Comes First Travelers: 9.14  | Work Comes First Travelers: 8.26   |
 95 | | 2018-12-01 | Work Comes First Travelers    | 8.31                  | 8.58               | Work Comes First Travelers: 8.28  | Work Comes First Travelers: 9.14   |
 96 | | 2019-01-01 | Work Comes First Travelers    | 7.66                  | 8.08               | Work Comes First Travelers: 8.31  | Work Comes First Travelers: 8.28   |
 97 | | 2019-02-01 | Work Comes First Travelers    | 7.66                  | 7.88               | Work Comes First Travelers: 7.66  | Work Comes First Travelers: 8.31   |
 98 | | 2019-03-01 | Alabama Trip Planners         | 6.54                  | 7.29               | Work Comes First Travelers: 7.66  | Work Comes First Travelers: 7.66   |
 99 | | 2019-04-01 | Solar Energy Researchers      | 6.28                  | 6.83               | Alabama Trip Planners: 6.54       | Work Comes First Travelers: 7.66   |
100 | | 2019-05-01 | Readers of Honduran Content   | 4.41                  | 5.74               | Solar Energy Researchers: 6.28    | Alabama Trip Planners: 6.54        |
101 | | 2019-06-01 | Las Vegas Trip Planners       | 2.77                  | 4.49               | Readers of Honduran Content: 4.41 | Solar Energy Researchers: 6.28     |
102 | | 2019-07-01 | Las Vegas Trip Planners       | 2.82                  | 3.33               | Las Vegas Trip Planners: 2.77     | Readers of Honduran Content: 4.41  |
103 | | 2019-08-01 | Cosmetics and Beauty Shoppers | 2.73                  | 2.77               | Las Vegas Trip Planners: 2.82     | Las Vegas Trip Planners: 2.77      |
104 | 
105 | ---
106 | ## 🚀 My Solution
107 | * View the complete syntax [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments/Syntax).
108 | * View the result and explanation [HERE](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments/Solution).
109 | 


--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/A_query.sql:
--------------------------------------------------------------------------------
 1 | -------------------------------------
 2 | --A. Data Exploration and Cleansing--
 3 | -------------------------------------
 4 | 
 5 | --1. Update the fresh_segments.interest_metrics table by modifying the month_year column to be a date data type with the start of the month
 6 | 
 7 | --Modify the length of column month_year so it can store 10 characters
 8 | ALTER TABLE fresh_segments.dbo.interest_metrics
 9 | ALTER COLUMN month_year VARCHAR(10);
10 | 
11 | --Update values in month_year column
12 | UPDATE fresh_segments.dbo.interest_metrics
13 | SET month_year =  CONVERT(DATE, '01-' + month_year, 105)
14 | 
15 | --Convert month_year to DATE
16 | ALTER TABLE fresh_segments.dbo.interest_metrics
17 | ALTER COLUMN month_year DATE;
18 | 
19 | SELECT * FROM fresh_segments.dbo.interest_metrics;
20 | 
21 | 
22 | --2. What is count of records in the fresh_segments.interest_metrics for each month_year value 
23 | --sorted in chronological order (earliest to latest) with the null values appearing first?
24 | 
25 | SELECT 
26 |   month_year,
27 |   COUNT(*) AS cnt
28 | FROM interest_metrics
29 | GROUP BY month_year
30 | ORDER BY month_year;
31 | 
32 | 
33 | --3. What do you think we should do with these null values in the fresh_segments.interest_metrics?
34 | 
35 | --interest_id = 21246 have NULL _month, _year, and month_year
36 | SELECT *
37 | FROM interest_metrics
38 | WHERE month_year IS NULL
39 | ORDER BY interest_id DESC;
40 | 
41 | --Delete rows that are null in column interest_id (1193 rows)
42 | DELETE FROM interest_metrics
43 | WHERE interest_id IS NULL;
44 | 
45 | 
46 | --4. How many interest_id values exist in the fresh_segments.interest_metrics table 
47 | --but not in the fresh_segments.interest_map table? What about the other way around?
48 | 
49 | SELECT 
50 |   COUNT(DISTINCT map.id) AS map_id_count,
51 |   COUNT(DISTINCT metrics.interest_id) AS metrics_id_count,
52 |   SUM(CASE WHEN map.id IS NULL THEN 1 END) AS not_in_metric,
53 |   SUM(CASE WHEN metrics.interest_id is NULL THEN 1 END) AS not_in_map
54 | FROM interest_metrics metrics
55 | FULL JOIN interest_map map
56 |   ON metrics.interest_id = map.id;
57 |   
58 | 
59 | --5. Summarise the id values in the fresh_segments.interest_map by its total record count in this table
60 | 
61 | SELECT COUNT(*) AS map_id_count
62 | FROM interest_map;
63 | 
64 | 
65 | --6. What sort of table join should we perform for our analysis and why? 
66 | --Check your logic by checking the rows where interest_id = 21246 in your joined output and 
67 | --include all columns from fresh_segments.interest_metrics and all columns from fresh_segments.interest_map except from the id column.
68 | 
69 | SELECT 
70 |   metrics.*,
71 |   map.interest_name,
72 |   map.interest_summary,
73 |   map.created_at,
74 |   map.last_modified
75 | FROM interest_metrics metrics
76 | JOIN interest_map map
77 |   ON metrics.interest_id = map.id
78 | WHERE metrics.interest_id = 21246;
79 |   
80 |   
81 | --7. Are there any records in your joined table where the month_year value is before the created_at value from the fresh_segments.interest_map table? 
82 | --Do you think these values are valid and why?
83 | 
84 | --Check if metrics.month_year < map.created_at
85 | SELECT COUNT(*) AS cnt
86 | FROM interest_metrics metrics
87 | JOIN interest_map map
88 |   ON metrics.interest_id = map.id
89 | WHERE metrics.month_year < CAST(map.created_at AS DATE);
90 | 
91 | --Check if metrics.month_year and map.created_at are in the same month
92 | SELECT COUNT(*) AS cnt
93 | FROM interest_metrics metrics
94 | JOIN interest_map map
95 |   ON map.id = metrics.interest_id
96 | WHERE metrics.month_year < CAST(DATEADD(DAY, -DAY(map.created_at)+1, map.created_at) AS DATE);
97 | 


--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/B_query.sql:
--------------------------------------------------------------------------------
  1 | ------------------------
  2 | --B. Interest Analysis--
  3 | ------------------------
  4 | 
  5 | --1. Which interests have been present in all month_year dates in our dataset?
  6 | 
  7 | -- Find how many unique month_year dates in our dataset
  8 | DECLARE @unique_month_year_cnt INT = (
  9 |   SELECT COUNT(DISTINCT month_year)
 10 |   FROM interest_metrics)
 11 | 
 12 | --Filter all interest_id that have the count = @unique_month_year_cnt
 13 | SELECT 
 14 |   interest_id,
 15 |   COUNT(month_year) AS cnt
 16 | FROM interest_metrics
 17 | GROUP BY interest_id
 18 | HAVING COUNT(month_year) = @unique_month_year_cnt;
 19 | 
 20 | 
 21 | --2. Using this same total_months measure - calculate the cumulative percentage of all records starting at 14 months 
 22 | -- which total_months value passes the 90% cumulative percentage value?
 23 | 
 24 | WITH interest_months AS (
 25 |   SELECT
 26 |     interest_id,
 27 |     COUNT(DISTINCT month_year) AS total_months
 28 |   FROM interest_metrics
 29 |   WHERE interest_id IS NOT NULL
 30 |   GROUP BY interest_id
 31 | ),
 32 | interest_count AS (
 33 |   SELECT
 34 |     total_months,
 35 |     COUNT(interest_id) AS interests
 36 |   FROM interest_months
 37 |   GROUP BY total_months
 38 | )
 39 | 
 40 | SELECT *,
 41 |   CAST(100.0 * SUM(interests) OVER(ORDER BY total_months DESC)
 42 | 	/ SUM(interests) OVER() AS decimal(10, 2)) AS cumulative_pct
 43 | FROM interest_count;
 44 | 
 45 | 
 46 | --3. If we were to remove all interest_id values which are lower than the total_months value we found in the previous question 
 47 | --- how many total data points would we be removing?
 48 | 
 49 | WITH interest_months AS (
 50 |   SELECT
 51 |     interest_id,
 52 |     COUNT(DISTINCT month_year) AS total_months
 53 |   FROM interest_metrics
 54 |   WHERE interest_id IS NOT NULL
 55 |   GROUP BY interest_id
 56 | )
 57 | 
 58 | SELECT 
 59 |   COUNT(interest_id) AS interests,
 60 |   COUNT(DISTINCT interest_id) AS unique_interests
 61 | FROM interest_metrics
 62 | WHERE interest_id IN (
 63 |   SELECT interest_id 
 64 |   FROM interest_months
 65 |   WHERE total_months < 6);
 66 | 
 67 | 
 68 | --4. Does this decision make sense to remove these data points from a business perspective? 
 69 | --Use an example where there are all 14 months present to a removed interest example for your arguments 
 70 | -- think about what it means to have less months present from a segment perspective.
 71 | 
 72 | --When total_months = 14
 73 | SELECT 
 74 |   month_year,
 75 |   COUNT(DISTINCT interest_id) interest_count,
 76 |   MIN(ranking) AS highest_rank,
 77 |   MAX(composition) AS composition_max,
 78 |   MAX(index_value) AS index_max
 79 | FROM interest_metrics metrics
 80 | WHERE interest_id IN (
 81 |   SELECT interest_id
 82 |   FROM interest_metrics
 83 |   WHERE interest_id IS NOT NULL
 84 |   GROUP BY interest_id
 85 |   HAVING COUNT(DISTINCT month_year) = 14)
 86 | GROUP BY month_year
 87 | ORDER BY month_year, highest_rank;
 88 | 
 89 | --When total_months = 1
 90 | SELECT 
 91 |   month_year,
 92 |   COUNT(DISTINCT interest_id) interest_count,
 93 |   MIN(ranking) AS highest_rank,
 94 |   MAX(composition) AS composition_max,
 95 |   MAX(index_value) AS index_max
 96 | FROM interest_metrics metrics
 97 | WHERE interest_id IN (
 98 |   SELECT interest_id
 99 |   FROM interest_metrics
100 |   WHERE interest_id IS NOT NULL
101 |   GROUP BY interest_id
102 |   HAVING COUNT(DISTINCT month_year) = 1)
103 | GROUP BY month_year
104 | ORDER BY month_year, highest_rank;
105 | 
106 | 
107 | --5. After removing these interests - how many unique interests are there for each month?
108 | 
109 | --Create a temporary table [interest_metrics_edited] that removes all interest_id that have total_months lower than 6
110 | SELECT *
111 | INTO #interest_metrics_edited
112 | FROM interest_metrics
113 | WHERE interest_id NOT IN (
114 |   SELECT interest_id
115 |   FROM interest_metrics
116 |   WHERE interest_id IS NOT NULL
117 |   GROUP BY interest_id
118 |   HAVING COUNT(DISTINCT month_year) < 6);
119 | 
120 | --Check the count of interests_id
121 | SELECT 
122 |   COUNT(interest_id) AS all_interests,
123 |   COUNT(DISTINCT interest_id) AS unique_interests
124 | FROM #interest_metrics_edited;
125 | 
126 | --Find the number of unique interests for each month after removing step above
127 | SELECT 
128 |   month_year,
129 |   COUNT(DISTINCT interest_id) AS unique_interests
130 | FROM #interest_metrics_edited
131 | WHERE month_year IS NOT NULL
132 | GROUP BY month_year
133 | ORDER BY month_year;
134 | 


--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/C_query.sql:
--------------------------------------------------------------------------------
  1 | -----------------------
  2 | --C. Segment Analysis--
  3 | -----------------------
  4 | 
  5 | --1. Using our filtered dataset by removing the interests with less than 6 months worth of data, 
  6 | --which are the top 10 and bottom 10 interests which have the largest composition values in any month_year? 
  7 | --Only use the maximum composition value for each interest but you must keep the corresponding month_year.
  8 | 
  9 | WITH max_composition AS (
 10 |   SELECT 
 11 |     month_year,
 12 |     interest_id,
 13 |     MAX(composition) OVER(PARTITION BY interest_id) AS largest_composition
 14 |   FROM #interest_metrics_edited -- filtered dataset in which interests with less than 6 months are removed
 15 |   WHERE month_year IS NOT NULL
 16 | ),
 17 | composition_rank AS (
 18 |   SELECT *,
 19 |     DENSE_RANK() OVER(ORDER BY largest_composition DESC) AS rnk
 20 |   FROM max_composition
 21 | )
 22 | 
 23 | --Top 10 interests that have the largest composition values
 24 | SELECT 
 25 |   cr.interest_id,
 26 |   im.interest_name,
 27 |   cr.rnk
 28 | FROM composition_rank cr
 29 | JOIN interest_map im ON cr.interest_id = im.id
 30 | WHERE cr.rnk <= 10
 31 | ORDER BY cr.rnk;
 32 | 
 33 | --Bottom 10 interests that have the largest composition values
 34 | SELECT 
 35 |   DISTINCT TOP 10 cr.interest_id,
 36 |   im.interest_name,
 37 |   cr.rnk
 38 | FROM composition_rank cr
 39 | JOIN interest_map im ON cr.interest_id = im.id
 40 | ORDER BY cr.rnk DESC;
 41 | 
 42 | 
 43 | --2. Which 5 interests had the lowest average ranking value?
 44 | 
 45 | SELECT 
 46 |   TOP 5 metrics.interest_id,
 47 |   map.interest_name,
 48 |   CAST(AVG(1.0*metrics.ranking) AS decimal(10,2)) AS avg_ranking
 49 | FROM #interest_metrics_edited metrics
 50 | JOIN interest_map map
 51 |   ON metrics.interest_id = map.id
 52 | GROUP BY metrics.interest_id, map.interest_name
 53 | ORDER BY avg_ranking;
 54 | 
 55 | 
 56 | --3. Which 5 interests had the largest standard deviation in their percentile_ranking value?
 57 | 
 58 | SELECT 
 59 |   DISTINCT TOP 5 metrics.interest_id,
 60 |   map.interest_name,
 61 |   ROUND(STDEV(metrics.percentile_ranking) 
 62 |     OVER(PARTITION BY metrics.interest_id), 2) AS std_percentile_ranking
 63 | FROM #interest_metrics_edited metrics
 64 | JOIN interest_map map
 65 | ON metrics.interest_id = map.id
 66 | ORDER BY std_percentile_ranking DESC;
 67 | 
 68 | 
 69 | --4. For the 5 interests found in the previous question - what were minimum and maximum percentile_ranking values for each interest 
 70 | --and its corresponding year_month value? Can you describe what is happening for these 5 interests?
 71 | 
 72 | --Based on the query for the previous question
 73 | WITH largest_std_interests AS (
 74 |   SELECT 
 75 |     DISTINCT TOP 5 metrics.interest_id,
 76 |     map.interest_name,
 77 |     map.interest_summary,
 78 |     ROUND(STDEV(metrics.percentile_ranking) 
 79 |       OVER(PARTITION BY metrics.interest_id), 2) AS std_percentile_ranking
 80 |   FROM #interest_metrics_edited metrics
 81 |   JOIN interest_map map
 82 |   ON metrics.interest_id = map.id
 83 |   ORDER BY std_percentile_ranking DESC
 84 | ),
 85 | max_min_percentiles AS (
 86 |   SELECT 
 87 |     lsi.interest_id,
 88 |     lsi.interest_name,
 89 |     lsi. interest_summary,
 90 |     ime.month_year,
 91 |     ime.percentile_ranking,
 92 |     MAX(ime.percentile_ranking) OVER(PARTITION BY lsi.interest_id) AS max_pct_rnk,
 93 |     MIN(ime.percentile_ranking) OVER(PARTITION BY lsi.interest_id) AS min_pct_rnk
 94 |   FROM largest_std_interests lsi
 95 |   JOIN #interest_metrics_edited ime
 96 |   ON lsi.interest_id = ime.interest_id
 97 | )
 98 | 
 99 | SELECT 
100 |   interest_id,
101 |   interest_name,
102 |   interest_summary,
103 |   MAX(CASE WHEN percentile_ranking = max_pct_rnk THEN month_year END) AS max_pct_month_year,
104 |   MAX(CASE WHEN percentile_ranking = max_pct_rnk THEN percentile_ranking END) AS max_pct_rnk,
105 |   MIN(CASE WHEN percentile_ranking = min_pct_rnk THEN month_year END) AS min_pct_month_year,
106 |   MIN(CASE WHEN percentile_ranking = min_pct_rnk THEN percentile_ranking END) AS min_pct_rnk
107 | FROM max_min_percentiles
108 | GROUP BY interest_id, interest_name, interest_summary;
109 | 


--------------------------------------------------------------------------------
/Case Study #8 - Fresh Segments/Syntax/D_query.sql:
--------------------------------------------------------------------------------
  1 | ---------------------
  2 | --D. Index Analysis--
  3 | ---------------------
  4 | 
  5 | --1. What is the top 10 interests by the average composition for each month?
  6 | 
  7 | WITH avg_composition_rank AS (
  8 |   SELECT 
  9 |     metrics.interest_id,
 10 |     map.interest_name,
 11 |     metrics.month_year,
 12 |     ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
 13 |     DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
 14 |   FROM interest_metrics metrics
 15 |   JOIN interest_map map 
 16 |     ON metrics.interest_id = map.id
 17 |   WHERE metrics.month_year IS NOT NULL
 18 | ) 
 19 | SELECT *
 20 | FROM avg_composition_rank
 21 | --filter top 10 interests for each month
 22 | WHERE rnk <= 10; 
 23 | 
 24 | 
 25 | --2. For all of these top 10 interests - which interest appears the most often?
 26 | 
 27 | WITH avg_composition_rank AS (
 28 |   SELECT 
 29 |     metrics.interest_id,
 30 |     map.interest_name,
 31 |     metrics.month_year,
 32 |     ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
 33 |     DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
 34 |   FROM interest_metrics metrics
 35 |   JOIN interest_map map 
 36 |     ON metrics.interest_id = map.id
 37 |   WHERE metrics.month_year IS NOT NULL
 38 | ),
 39 | frequent_interests AS (
 40 |   SELECT 
 41 |     interest_id,
 42 |     interest_name,
 43 |     COUNT(*) AS freq
 44 |   FROM avg_composition_rank
 45 |   WHERE rnk <= 10	--filter top 10 interests for each month
 46 |   GROUP BY interest_id, interest_name
 47 | )
 48 | 
 49 | SELECT * 
 50 | FROM frequent_interests
 51 | WHERE freq IN (SELECT MAX(freq) FROM frequent_interests);
 52 | 
 53 | 
 54 | --3. What is the average of the average composition for the top 10 interests for each month?
 55 | 
 56 | WITH avg_composition_rank AS (
 57 |   SELECT 
 58 |     metrics.interest_id,
 59 |     map.interest_name,
 60 |     metrics.month_year,
 61 |     ROUND(metrics.composition / metrics.index_value, 2) AS avg_composition,
 62 |     DENSE_RANK() OVER(PARTITION BY metrics.month_year ORDER BY metrics.composition / metrics.index_value DESC) AS rnk
 63 |   FROM interest_metrics metrics
 64 |   JOIN interest_map map 
 65 |     ON metrics.interest_id = map.id
 66 |   WHERE metrics.month_year IS NOT NULL
 67 | )
 68 | 
 69 | SELECT 
 70 |   month_year,
 71 |   AVG(avg_composition) AS avg_of_avg_composition
 72 | FROM avg_composition_rank
 73 | WHERE rnk <= 10 --filter top 10 interests for each month
 74 | GROUP BY month_year;
 75 | 
 76 | 
 77 | --4. What is the 3 month rolling average of the max average composition value from September 2018 to August 2019 
 78 | --and include the previous top ranking interests in the same output shown below.
 79 | 
 80 | WITH avg_compositions AS (
 81 |   SELECT 
 82 |     month_year,
 83 |     interest_id,
 84 |     ROUND(composition / index_value, 2) AS avg_comp,
 85 |     ROUND(MAX(composition / index_value) OVER(PARTITION BY month_year), 2) AS max_avg_comp
 86 |   FROM interest_metrics
 87 |   WHERE month_year IS NOT NULL
 88 | ),
 89 | max_avg_compositions AS (
 90 |   SELECT *
 91 |   FROM avg_compositions
 92 |   WHERE avg_comp = max_avg_comp
 93 | ),
 94 | moving_avg_compositions AS (
 95 |   SELECT 
 96 |     mac.month_year,
 97 |     im.interest_name,
 98 |     mac.max_avg_comp AS max_index_composition,
 99 |     ROUND(AVG(mac.max_avg_comp) 
100 | 	  OVER(ORDER BY mac.month_year 
101 | 	       ROWS BETWEEN 2 PRECEDING AND CURRENT ROW), 2) AS '3_month_moving_avg',
102 |     LAG(im.interest_name) OVER (ORDER BY mac.month_year) + ': ' +
103 | 	CAST(LAG(mac.max_avg_comp) OVER (ORDER BY mac.month_year) AS VARCHAR(4)) AS '1_month_ago',
104 |     LAG(im.interest_name, 2) OVER (ORDER BY mac.month_year) + ': ' +
105 | 	CAST(LAG(mac.max_avg_comp, 2) OVER (ORDER BY mac.month_year) AS VARCHAR(4)) AS '2_month_ago'
106 |   FROM max_avg_compositions mac 
107 |   JOIN interest_map im 
108 |     ON mac.interest_id = im.id
109 | )
110 | 
111 | SELECT *
112 | FROM moving_avg_compositions
113 | WHERE month_year BETWEEN '2018-09-01' AND '2019-08-01';
114 | 


--------------------------------------------------------------------------------
/IMG/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/1.png


--------------------------------------------------------------------------------
/IMG/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/2.png


--------------------------------------------------------------------------------
/IMG/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/3.png


--------------------------------------------------------------------------------
/IMG/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/4.png


--------------------------------------------------------------------------------
/IMG/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/5.png


--------------------------------------------------------------------------------
/IMG/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/6.png


--------------------------------------------------------------------------------
/IMG/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/7.png


--------------------------------------------------------------------------------
/IMG/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/8.png


--------------------------------------------------------------------------------
/IMG/e1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e1.PNG


--------------------------------------------------------------------------------
/IMG/e2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e2.PNG


--------------------------------------------------------------------------------
/IMG/e3.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e3.PNG


--------------------------------------------------------------------------------
/IMG/e4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e4.png


--------------------------------------------------------------------------------
/IMG/e5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e5.png


--------------------------------------------------------------------------------
/IMG/e6.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e6.PNG


--------------------------------------------------------------------------------
/IMG/e7.1.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e7.1.PNG


--------------------------------------------------------------------------------
/IMG/e7.2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e7.2.PNG


--------------------------------------------------------------------------------
/IMG/e8-updated.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qanhnn12/8-Week-SQL-Challenge/463e180431cf46bdb7c932d3b458d6921a2dbb91/IMG/e8-updated.PNG


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # :fire: 8-Week SQL Challenge
  2 | 
  3 | This repository contains my solution for the [#8WeekSQLChallenge](https://8weeksqlchallenge.com/) using MS SQL Server.
  4 | 
  5 | Thanks @DataWithDanny for interesting SQL case studies! :wave:🏻
  6 | 
  7 | 
  8 | ## 📕 Table of Contents
  9 | * [Case Study #1 - Danny's Diner](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-1---dannys-diner)
 10 | * [Case Study #2 - Pizza Runner](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-2---pizza-runner)
 11 | * [Case Study #3 - Foodie-Fi](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-3---foodie-fi)
 12 | * [Case Study #4 - Data Bank](https://github.com/qanhnn12/8-Week-SQL-Challenge#bar_chart-case-study-4---data-bank)
 13 | * [Case Study #5 - Data Mart](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-5---data-mart)
 14 | * [Case Study #6 - Clique Bait](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-6---clique-bait)
 15 | * [Case Study #7 - Balanced Tree Clothing Co.](https://github.com/qanhnn12/8-Week-SQL-Challenge#tshirt-case-study-7---balanced-tree-clothing-co)
 16 | * [Case Study #8 - Fresh Segments](https://github.com/qanhnn12/8-Week-SQL-Challenge#-case-study-8---fresh-segments)
 17 | 
 18 | ---
 19 | ## 🍜 Case Study #1 - Danny's Diner
 20 | <p align="center">
 21 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/1.png" align="center" width="400" height="400" >
 22 | 
 23 | Danny seriously loves Japanese food so in the beginning of 2021, he decides to embark upon a risky venture and opens up a cute little restaurant that sells his 3 favourite foods: sushi, curry and ramen.
 24 | 
 25 | Danny’s Diner is in need of your assistance to help the restaurant stay afloat - the restaurant has captured some very basic data from their few months of operation but have no idea how to use their data to help them run the business.
 26 | 
 27 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-1/) and my solution 
 28 |   [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%231%20-%20Danny's%20Diner).***
 29 | 
 30 | ---
 31 | ## 🍕 Case Study #2 - Pizza Runner
 32 | <p align="center">
 33 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/2.png" align="center" width="400" height="400" >
 34 | 
 35 | Danny was scrolling through his Instagram feed when something really caught his eye - “80s Retro Styling and Pizza Is The Future!”
 36 | 
 37 | Danny was sold on the idea, but he knew that pizza alone was not going to help him get seed funding to expand his new Pizza Empire - so he had one more genius idea to combine with it - he was going to Uberize it - and so Pizza Runner was launched!
 38 | 
 39 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house) and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
 40 | 
 41 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-2/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%232%20-%20Pizza%20Runner).***
 42 | 
 43 | ---
 44 | ## 🥑 Case Study #3 - Foodie-Fi
 45 | <p align="center">
 46 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/3.png" align="center" width="400" height="400" >
 47 | 
 48 | Danny was scrolling through his Instagram feed when something really caught his eye - “80s Retro Styling and Pizza Is The Future!”
 49 | 
 50 | Danny was sold on the idea, but he knew that pizza alone was not going to help him get seed funding to expand his new Pizza Empire - so he had one more genius idea to combine with it - he was going to Uberize it - and so Pizza Runner was launched!
 51 | 
 52 | Danny started by recruiting “runners” to deliver fresh pizza from Pizza Runner Headquarters (otherwise known as Danny’s house) and also maxed out his credit card to pay freelance developers to build a mobile app to accept orders from customers.
 53 | 
 54 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-3/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%233%20-%20Foodie-Fi).***
 55 |   
 56 | ---
 57 | ## :bar_chart: Case Study #4 - Data Bank
 58 | <p align="center">
 59 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/4.png" align="center" width="400" height="400" >
 60 | 
 61 | There is a new innovation in the financial industry called Neo-Banks: new aged digital only banks without physical branches.
 62 | 
 63 | Danny thought that there should be some sort of intersection between these new age banks, cryptocurrency and the data world…so he decides to launch a new initiative - Data Bank!
 64 | 
 65 | The management team at Data Bank want to increase their total customer base - but also need some help tracking just how much data storage their customers will need.
 66 | 
 67 | This case study is all about calculating metrics, growth and helping the business analyse their data in a smart way to better forecast and plan for their future developments!
 68 | 
 69 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-4/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%234%20-%20Data%20Bank).***
 70 |   
 71 | ---
 72 | ## 🛒 Case Study #5 - Data Mart
 73 | <p align="center">
 74 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/5.png" align="center" width="400" height="400" >
 75 | 
 76 | Data Mart is Danny’s latest venture and after running international operations for his online supermarket that specialises in fresh produce - Danny is asking for your support to analyse his sales performance.
 77 | 
 78 | In June 2020 - large scale supply changes were made at Data Mart. All Data Mart products now use sustainable packaging methods in every single step from the farm all the way to the customer. Danny needs your help to quantify the impact of this change on the sales performance for Data Mart and it’s separate business areas.
 79 | 
 80 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-5/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%235%20-%20Data%20Mart).***
 81 |   
 82 | ---
 83 | ## 🐟 Case Study #6 - Clique Bait
 84 | <p align="center">
 85 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/6.png" align="center" width="400" height="400" >
 86 | 
 87 | Clique Bait is not like your regular online seafood store - the founder and CEO Danny, was also a part of a digital data analytics team and wanted to expand his knowledge into the seafood industry!
 88 | 
 89 | In this case study - you are required to support Danny’s vision and analyse his dataset and come up with creative solutions to calculate funnel fallout rates for the Clique Bait online store.
 90 | 
 91 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-6/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%236%20-%20Clique%20Bait).***
 92 |   
 93 | ---
 94 | ## :tshirt: Case Study #7 - Balanced Tree Clothing Co.
 95 | <p align="center">
 96 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/7.png" align="center" width="400" height="400" >
 97 | 
 98 | Balanced Tree Clothing Company prides themselves on providing an optimised range of clothing and lifestyle wear for the modern adventurer!
 99 | 
100 | Danny, the CEO of this trendy fashion company has asked you to assist the team’s merchandising teams analyse their sales performance and generate a basic financial report to share with the wider business.
101 | 
102 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-7/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%237%20-%20Balanced%20Tree%20Clothing%20Co.).***
103 | 
104 | ---
105 | ## 🍊 Case Study #8 - Fresh Segments
106 | <p align="center">
107 | <img src="https://github.com/qanhnn12/8-Week-SQL-Challenge/blob/main/IMG/8.png" align="center" width="400" height="400" >
108 | 
109 | Danny created Fresh Segments, a digital marketing agency that helps other businesses analyse trends in online ad click behaviour for their unique customer base.
110 |   
111 | Clients share their customer lists with the Fresh Segments team who then aggregate interest metrics and generate a single dataset worth of metrics for further analysis. In particular - the composition and rankings for different interests are provided for each client showing the proportion of their customer list who interacted with online assets related to each interest for each month.
112 | 
113 | Danny has asked for your assistance to analyse aggregated metrics for an example client and provide some high level insights about the customer list and their interests.
114 | 
115 | ***View the case study [here](https://8weeksqlchallenge.com/case-study-8/) and my solution [here](https://github.com/qanhnn12/8-Week-SQL-Challenge/tree/main/Case%20Study%20%238%20-%20Fresh%20Segments).***
116 | 
117 | ---
118 | ## ✨ Contribution
119 | Contributions, issues, and feature requests are welcome!
120 | 
121 | To contribute to this project, see the GitHub documentation on [creating a pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request).
122 | 
123 | ---
124 | ## 👏 Support
125 | Please give me a ⭐️ if you like this project!
126 | 
127 | ---
128 | © 2022 Anh Nguyen
129 | 


--------------------------------------------------------------------------------