├── SQL-cheat-sheet.pdf
├── SQL_diagram_v6.pdf
├── Topics To Cover.pptx
├── SQL_Reference_sheet_v3.pdf
├── SQL Interview Questions.pptx
├── Udacity Course
    ├── SQL Data Cleaning.pptx
    ├── Advanced SQL - Window.pptx
    ├── SQL- Joins & Aggregations.pptx
    ├── SQL Sub Queries & Temporary tables.pptx
    ├── Advanced SQL Advanced JOINs & Performance.pptx
    ├── SQL_Advanced.sql
    ├── SQL_Window.sql
    ├── SQLData Cleaning.sql
    ├── SQL_SubQueries.sql
    └── SQL_Queries.sql
├── reference-guide-aggregate-functions.pdf
├── Join Data_ Learn Joins Cheatsheet _ Codecademy.pdf
├── Sylvia Moestl Vasilik - SQL Practice Problems_ 57.pdf
├── README.md
└── Sylvia Moestl Vasilik - SQL Practice Problems_ 57 beginning, intermediate, and advanced challenges for you to solve using a _learn-by-doing_ approach-CreateSpace Independent Publishing Platform (2016).epub


/SQL-cheat-sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/SQL-cheat-sheet.pdf


--------------------------------------------------------------------------------
/SQL_diagram_v6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/SQL_diagram_v6.pdf


--------------------------------------------------------------------------------
/Topics To Cover.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Topics To Cover.pptx


--------------------------------------------------------------------------------
/SQL_Reference_sheet_v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/SQL_Reference_sheet_v3.pdf


--------------------------------------------------------------------------------
/SQL Interview Questions.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/SQL Interview Questions.pptx


--------------------------------------------------------------------------------
/Udacity Course/SQL Data Cleaning.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Udacity Course/SQL Data Cleaning.pptx


--------------------------------------------------------------------------------
/reference-guide-aggregate-functions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/reference-guide-aggregate-functions.pdf


--------------------------------------------------------------------------------
/Udacity Course/Advanced SQL - Window.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Udacity Course/Advanced SQL - Window.pptx


--------------------------------------------------------------------------------
/Udacity Course/SQL- Joins & Aggregations.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Udacity Course/SQL- Joins & Aggregations.pptx


--------------------------------------------------------------------------------
/Join Data_ Learn Joins Cheatsheet _ Codecademy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Join Data_ Learn Joins Cheatsheet _ Codecademy.pdf


--------------------------------------------------------------------------------
/Sylvia Moestl Vasilik - SQL Practice Problems_ 57.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Sylvia Moestl Vasilik - SQL Practice Problems_ 57.pdf


--------------------------------------------------------------------------------
/Udacity Course/SQL Sub Queries & Temporary tables.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Udacity Course/SQL Sub Queries & Temporary tables.pptx


--------------------------------------------------------------------------------
/Udacity Course/Advanced SQL Advanced JOINs & Performance.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Udacity Course/Advanced SQL Advanced JOINs & Performance.pptx


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SQL
2 | This file contains topic wise slides that I have developed while preparing for SQL interviews. 
3 | I used resources from Mode Analytics, Udacity primarily for creating slides.
4 | There are many queries which are from different resources like leetcode etc. 
5 | 


--------------------------------------------------------------------------------
/Sylvia Moestl Vasilik - SQL Practice Problems_ 57 beginning, intermediate, and advanced challenges for you to solve using a _learn-by-doing_ approach-CreateSpace Independent Publishing Platform (2016).epub:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/waleedsial/SQL/HEAD/Sylvia Moestl Vasilik - SQL Practice Problems_ 57 beginning, intermediate, and advanced challenges for you to solve using a _learn-by-doing_ approach-CreateSpace Independent Publishing Platform (2016).epub


--------------------------------------------------------------------------------
/Udacity Course/SQL_Advanced.sql:
--------------------------------------------------------------------------------
  1 | -- each account who has a sales rep 
  2 | -- and each sales rep that has an account (all of the columns in these returned rows will be full)
  3 | 
  4 | -- but also each account that does not have a sales rep 
  5 | -- and each sales rep that does not have an account (some of the columns in these returned rows will be empty)
  6 | 
  7 | -- I think we need to use full join here which will return the unmatches rows in both the tables as well. 
  8 | 
  9 | SELECT * 
 10 | FROM accounts
 11 | FULL OUTER JOIN sales_reps
 12 | ON accounts.sales_rep_id = sales_reps.id
 13 | 
 14 | 
 15 | -- Inequality JOINs
 16 | 
 17 | SELECT accounts.name, accounts.primary_poc, sales_reps.name
 18 | FROM accounts
 19 | LEFT JOIN sales_reps
 20 | ON accounts.sales_rep_id = sales_reps.id 
 21 | AND accounts.primary_poc < sales_reps.name 
 22 | 
 23 | -- SELF Joins 
 24 | 
 25 | SELECT o1.id AS o1_id,
 26 |        o1.account_id AS o1_account_id,
 27 |        o1.occurred_at AS o1_occurred_at,
 28 |        o2.id AS o2_id,
 29 |        o2.account_id AS o2_account_id,
 30 |        o2.occurred_at AS o2_occurred_at
 31 |   FROM orders o1
 32 |  LEFT JOIN orders o2
 33 |    ON o1.account_id = o2.account_id
 34 |   AND o2.occurred_at > o1.occurred_at
 35 |   AND o2.occurred_at <= o1.occurred_at + INTERVAL '28 days'
 36 | ORDER BY o1.account_id, o1.occurred_at
 37 | 
 38 | -- Modify the query abobve,  to perform the same interval analysis except for the web_events table.
 39 | -- change the interval to 1 day to find those web events that occurred after, but not more than 1 day after, another web event
 40 | -- add a column for the channel variable in both instances of the table in your query
 41 | 
 42 | 
 43 | SELECT w1.account_id, 
 44 | 		w1.occurred_at, 
 45 | 		w1.id, 
 46 | 		w1.channel,
 47 | 		w2.account_id, 
 48 | 		w2.occurred_at,
 49 | 		w2.id,
 50 | 		w2.channel
 51 | 		
 52 | FROM web_events w1
 53 | LEFT JOIN web_events w2
 54 | ON w1.account_id = w2.account_id
 55 | AND w1.occurred_at > w2.occurred_at
 56 | AND w1.occurred_at <= w2.occurred_at + INTERVAL '1 days'
 57 | ORDER by w1.account_id, w1.occurred_at
 58 | 
 59 | 
 60 | -- Udacity Answer: 
 61 | SELECT we1.id AS we_id,
 62 |        we1.account_id AS we1_account_id,
 63 |        we1.occurred_at AS we1_occurred_at,
 64 |        we1.channel AS we1_channel,
 65 |        we2.id AS we2_id,
 66 |        we2.account_id AS we2_account_id,
 67 |        we2.occurred_at AS we2_occurred_at,
 68 |        we2.channel AS we2_channel
 69 |   FROM web_events we1 
 70 |  LEFT JOIN web_events we2
 71 |    ON we1.account_id = we2.account_id
 72 |   AND we1.occurred_at > we2.occurred_at
 73 |   AND we1.occurred_at <= we2.occurred_at + INTERVAL '1 day'
 74 | ORDER BY we1.account_id, we2.occurred_at
 75 | 
 76 | 
 77 | -- Optimization 
 78 | -- A query for calculting different daily metrics shc as active sales rep, orders, web events. 
 79 | -- This query results in 79k records. 
 80 | 
 81 | SELECT DATE_TRUNC('day',o.occurred_at) as date, 
 82 | 		COUNT(DISTINCT a.sales_rep_id) as active_sales_reps,
 83 | 		COUNT(DISTINCT o.id) as orders,
 84 | 		COUNT(DISTINCT we.id) as web_visits
 85 | 
 86 | FROM accounts a  
 87 | JOIN orders o
 88 | ON a.id = o.account_id
 89 | JOIN web_events we
 90 | ON a.id = we.account_id
 91 | GROUP BY 1
 92 | ORDER BY 1 DESC 
 93 | 
 94 | 
 95 | -- Here we are joining on the date field & this is causing a data explosion. 
 96 | -- What happens is that we are given every row on a given day in one table to every row in the other table with the same day 
 97 | -- As a result number of rows is very high. 
 98 | -- Due to this issue, we have to use count distinct instead of regular count to get accurate count of metrics. 
 99 | 
100 | 
101 | -- We can get the same results in a more efficient way. 
102 | -- Doing aggregations separately which is faster becuase counts are perfomred in far smaller datasets. 
103 | 
104 | 
105 | 
106 | -- First Sub Query 
107 | 
108 | SELECT DATE_TRUNC('day', o.occurred_at) as date, 
109 | 		COUNT(a.sales_rep_id) as active_sales_reps,
110 | 		COUNT(o.id) as orders
111 | 
112 | FROM accounts a 
113 | JOIN orders o 
114 | ON a.id = o.account_id
115 | GROUP BY 1 
116 | 
117 | -- 2nd Sub qury 
118 | 
119 | SELECT DATE_TRUNC('day', we.occurred_at) as date, 
120 | 		COUNT(we.id) as web_events
121 | FROM  web_events we 
122 | GROUP BY 1 
123 | 
124 | -- Now we can join these 2 tables
125 | 
126 | 
127 | SELECT COALESCE(orders.date, web_events.date) as date, 
128 | 		orders.active_sales_reps, 
129 | 		orders.orders, 
130 | 		web_events.web_visits
131 | 	FROM (
132 | 			SELECT DATE_TRUNC('day', o.occurred_at) as date, 
133 | 				COUNT(a.sales_rep_id) as active_sales_reps,
134 | 				COUNT(o.id) as orders
135 | 
136 | 				FROM accounts a 
137 | 				JOIN orders o 
138 | 				ON a.id = o.account_id
139 | 				GROUP BY 1 
140 | 			) orders 
141 | 			
142 | 	FULL JOIN 
143 | 	(
144 | 		SELECT DATE_TRUNC('day', we.occurred_at) as date, 
145 | 		COUNT(we.id) as web_visits
146 | 		FROM  web_events we 
147 | 		GROUP BY 1 
148 | 	) web_events
149 | 	
150 | 	ON web_events.date = orders.date
151 | 	order by 1 desc 
152 | -- we are using full join for just n case when one table may not have any record for that date 
153 | 


--------------------------------------------------------------------------------
/Udacity Course/SQL_Window.sql:
--------------------------------------------------------------------------------
  1 | -- Quiz Question 1 
  2 | --  create a running total of standard_amt_usd (in the orders table) over order time with no date truncation.
  3 | -- Your final table should have two columns: 
  4 | -- one with the amount being added for each new row, 
  5 | -- and a second with the running total.
  6 | 
  7 | SELECT standard_amt_usd, 
  8 | 		SUM(standard_amt_usd) OVER (ORDER BY occurred_at ) as running_total
  9 | FROM orders
 10 | 
 11 | -- Quiz Question 2 
 12 | -- Creating a Partitioned Running Total Using Window Functions
 13 | -- create a running total of standard_amt_usd (in the orders table) over order time,
 14 | -- but this time, 
 15 | --date truncate occurred_at by year
 16 | -- and partition by that same year-truncated occurred_at variable. 
 17 | --Your final table should have three columns:
 18 | -- One with the amount being added for each row, 
 19 | --one for the truncated date, 
 20 | -- and a final column with the running total within each year.
 21 | 
 22 | SELECT standard_amt_usd,
 23 | 		DATE_TRUNC('year', occurred_at) as year,
 24 | 		SUM(standard_amt_usd) OVER (PARTITION BY DATE_TRUNC('year', occurred_at)) as running_total
 25 | FROM orders
 26 | 
 27 | -- Udacity Solution 
 28 | SELECT standard_amt_usd,
 29 |        DATE_TRUNC('year', occurred_at) as year,
 30 |        SUM(standard_amt_usd) OVER (PARTITION BY DATE_TRUNC('year', occurred_at) ORDER BY occurred_at) AS running_total
 31 | FROM orders
 32 | 
 33 | -- Quiz Row Number & Rank 
 34 | -- Select the id, account_id, and total variable from the orders table,
 35 | --  then create a column called total_rank 
 36 | -- that ranks this total amount of paper ordered (from highest to lowest) for each account using a partition.
 37 | --  Your final table should have these four columns.
 38 | 
 39 | SELECT id, account_id, total,
 40 |        RANK() OVER (PARTITION BY account_id ORDER BY total desc) AS total_rank
 41 | FROM orders
 42 | 
 43 | 
 44 | SELECT id,
 45 |        account_id,
 46 |        standard_qty,
 47 |        DATE_TRUNC('month', occurred_at) AS month,
 48 |        DENSE_RANK() OVER (PARTITION BY account_id ) AS dense_rank,
 49 |        SUM(standard_qty) OVER (PARTITION BY account_id ) AS sum_std_qty,
 50 |        COUNT(standard_qty) OVER (PARTITION BY account_id ) AS count_std_qty,
 51 |        AVG(standard_qty) OVER (PARTITION BY account_id ) AS avg_std_qty,
 52 |        MIN(standard_qty) OVER (PARTITION BY account_id ) AS min_std_qty,
 53 |        MAX(standard_qty) OVER (PARTITION BY account_id ) AS max_std_qty
 54 | FROM orders
 55 | 
 56 | 
 57 | -- Aliases 
 58 | 
 59 | 
 60 | --  create and use an alias to shorten the following query (which is different than the one in Derek's previous video)
 61 | --  that has multiple window functions. Name the alias account_year_window, which is more descriptive than main_window in the example above.
 62 | 
 63 | SELECT id,
 64 |        account_id,
 65 |        DATE_TRUNC('year',occurred_at) AS year,
 66 |        DENSE_RANK() OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS dense_rank,
 67 |        total_amt_usd,
 68 |        SUM(total_amt_usd) OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS sum_total_amt_usd,
 69 |        COUNT(total_amt_usd) OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS count_total_amt_usd,
 70 |        AVG(total_amt_usd) OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS avg_total_amt_usd,
 71 |        MIN(total_amt_usd) OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS min_total_amt_usd,
 72 |        MAX(total_amt_usd) OVER (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) AS max_total_amt_usd
 73 | FROM orders
 74 | 
 75 | 
 76 | -- Below is the shortedned version created using Aliases. 
 77 | 
 78 | SELECT id,
 79 |        account_id,
 80 |        DATE_TRUNC('year',occurred_at) AS year,
 81 |        DENSE_RANK() OVER account_year_window AS dense_rank,
 82 |        total_amt_usd,
 83 |        SUM(total_amt_usd) OVER account_year_window AS sum_total_amt_usd,
 84 |        COUNT(total_amt_usd) OVER account_year_window AS count_total_amt_usd,
 85 |        AVG(total_amt_usd) OVER account_year_window AS avg_total_amt_usd,
 86 |        MIN(total_amt_usd) OVER account_year_window AS min_total_amt_usd,
 87 |        MAX(total_amt_usd) OVER account_year_window AS max_total_amt_usd
 88 | FROM orders
 89 | WINDOW account_year_window AS (PARTITION BY account_id ORDER BY DATE_TRUNC('year',occurred_at)) 
 90 | 
 91 | 
 92 | -- LAG & Lead Quiz 
 93 |  
 94 |  --  Imagine you're an analyst at Parch & Posey and you want to
 95 | --  determine how the current order's total revenue ("total" meaning from sales of all types of paper) 
 96 |  -- compares to the next order's total revenue.
 97 |  
 98 |  -- Modify Derek's query from the previous video in the SQL Explorer below to perform this analysis.
 99 | --  You'll need to use occurred_at and total_amt_usd in the orders table along with LEAD to do so.
100 | --  In your query results, there should be four columns: occurred_at, total_amt_usd, lead, and lead_difference.
101 | 
102 | -- Original Query 
103 | SELECT account_id,
104 |        standard_sum,
105 |        LAG(standard_sum) OVER (ORDER BY standard_sum) AS lag,
106 |        LEAD(standard_sum) OVER (ORDER BY standard_sum) AS lead,
107 |        standard_sum - LAG(standard_sum) OVER (ORDER BY standard_sum) AS lag_difference,
108 |        LEAD(standard_sum) OVER (ORDER BY standard_sum) - standard_sum AS lead_difference
109 | FROM (
110 | SELECT account_id,
111 |        SUM(standard_qty) AS standard_sum
112 |   FROM orders 
113 |  GROUP BY 1
114 |  ) sub
115 |  
116 |  -- Changes Required 
117 |  -- Need to find current order total revenue, the nested query above is findfing standard_sum 
118 |  -- We can modify it to find total for each order. 
119 |  
120 |  -- Query for total of each order 
121 |  SELECT occurred_at, account_id,
122 |        SUM(total_amt_usd) AS total_sum
123 |   FROM orders 
124 |  GROUP BY 1, 2
125 |  
126 |  -- Now using this query, I need to find how much for each order differs. 
127 |  
128 |  -- Problem is how to use occurred_at & why to use occurred at
129 |  -- I think the reason is that we want to order our results from past to present so that we can see how much we are increasing decreasing in total over time. 
130 |  
131 |  
132 |  SELECT occurred_at,
133 | 		account_id,
134 | 		total_sum,
135 | 		LEAD(total_sum) OVER (ORDER BY occurred_at ) as lead, 
136 | 		LEAD(total_sum) OVER (ORDER BY occurred_at ) - total_sum as lead_difference
137 | 		FROM (
138 | 			SELECT occurred_at, account_id,
139 | 			SUM(total_amt_usd) AS total_sum
140 | 			FROM orders 
141 | 			GROUP BY 1, 2
142 | 		) sub 
143 | 		
144 | -- Udacity Query 
145 |  
146 | SELECT occurred_at,
147 |        total_amt_usd,
148 |        LEAD(total_amt_usd) OVER (ORDER BY occurred_at) AS lead,
149 |        LEAD(total_amt_usd) OVER (ORDER BY occurred_at) - total_amt_usd AS lead_difference
150 | FROM (
151 | SELECT occurred_at,
152 |        SUM(total_amt_usd) AS total_amt_usd
153 |   FROM orders 
154 |  GROUP BY 1
155 | ) sub
156 | 
157 | -- My Mistake, I should have read & understood properly that this is a time based queestion & there is no need to add account_id in the query. 
158 | -- Inner query just gives result for each date entry 
159 | -- Outer query use it to compute the results over time. 
160 | 
161 | 
162 | 
163 | 
164 | -- NTILE Quiz 
165 | 
166 | -- Imagine you're an analyst at Parch & Posey and you want to determine the largest orders 
167 | -- (in terms of quantity) a specific customer has made to encourage them to order more similarly sized large orders.
168 | 
169 | -- We only want to use NTILE for that customer's account_id 
170 | 
171 | 
172 | -- Query 1 
173 | -- Use the NTILE functionality to divide the accounts into 4 levels 
174 | -- in terms of the amount of standard_qty for their orders. 
175 | -- Your resulting table should have the 
176 | --account_id, the occurred_at time for each order, the total amount of standard_qty paper purchased, and one of four levels in a standard_quartile column.
177 | 
178 | 
179 | SELECT account_id,
180 | 		occurred_at, 
181 | 		standard_qty, 
182 | 		NTILE(4) OVER (PARTITION by account_id ORDER BY standard_qty) as standard_quartile
183 | FROM orders
184 | 
185 | 
186 | -- MISTAKE
187 | -- I made a mistake that I did not use partition by , even the question stated this "You only want to consider the NTILE for that customer's account_id."
188 | -- When I dont use the partition by account_id, it considers the whole as a window. 
189 | -- In that case it simply assigns the first occuring values in the first quartile. 
190 | 
191 | 
192 | -- Query 2 
193 | 
194 | -- Use the NTILE functionality to divide the accounts into two levels in terms of the amount of gloss_qty for their orders.
195 | -- Your resulting table should have the 
196 | --account_id, the occurred_at time for each order, the total amount of gloss_qty paper purchased, and one of two levels in a gloss_half column.
197 | 
198 | 
199 | SELECT account_id,
200 | 		occurred_at, 
201 | 		gloss_qty, 
202 | 		NTILE(2) OVER (PARTITION by account_id ORDER BY gloss_qty) as gloss_half
203 | FROM orders
204 | 
205 | -- What is this query doing ? 
206 | -- The order by column is determining overall divisions & than in the window determined by the partition by we are assigning values to each one in the window. 
207 | 
208 | -- for example, glossqty max value is 14281, min value is 0 & avg is 146 
209 | -- Now if we are to dermine the quartiles with respect to whole dataset than it will be simply 14281/2 
210 | -- However, I think when we use partitions by we are detrmining based on values in that specific window. 
211 | 
212 | -- for example account id 1151 has 2 values 0 & 50, its half will be 25 so based on this it will assign values. 
213 | 
214 | 
215 | -- Query 3 
216 | -- Use the NTILE functionality to divide the orders for each account into 100 levels in terms of the amount of total_amt_usd for their orders. 
217 | ---Your resulting table should have the account_id, 
218 | --the occurred_at time for each order, 
219 | --the total amount of total_amt_usd paper purchased, 
220 | --and one of 100 levels in a total_percentile column
221 | 
222 | 
223 | 
224 | 
225 | SELECT account_id,
226 | 		occurred_at, 
227 | 		total_amt_usd, 
228 | 		NTILE(100) OVER (PARTITION by account_id ORDER BY total_amt_usd) as percentile
229 | FROM orders
230 | order by account_id


--------------------------------------------------------------------------------
/Udacity Course/SQLData Cleaning.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | -- QUIZ LEFt & Right 
  3 | 
  4 | -- Question 1 
  5 | --In the accounts table, there is a column holding the website for each company.
  6 | -- The last three digits specify what type of web address they are using. 
  7 | --A list of extensions (and pricing) is provided here.
  8 | -- Pull these extensions and provide how many of each website type exist in the accounts table.
  9 | select  Count(Distinct(RIGHT (website, 3)))
 10 | as extension 
 11 | from accounts
 12 | limit 10;
 13 | -- this only rturns the unique type of the domains 
 14 | 
 15 | -- the following query returns the number of each domain. 
 16 | 
 17 | select count(*), (RIGHT (website, 3)) as extension 
 18 | from accounts
 19 | GROUP BY extension
 20 | 
 21 | -- Question 2
 22 | --There is much debate about how much the name (or even the first letter of a company name) matters.
 23 | -- Use the accounts table to pull the first letter of each company name to see the distribution of company names that begin with each letter (or number).
 24 | 
 25 | SELECT LEFT(accounts.name, 1) as first_letter, count(*) as occurance_of_each 
 26 | FROM accounts 
 27 | GROUP BY first_letter
 28 | order by occurance_of_each desc
 29 | 
 30 | -- Question 3 
 31 | 
 32 | -- Use the accounts table and a CASE statement 
 33 | --to create two groups: one group of company names that start with a number
 34 | -- and a second group of those company names that start with a letter.
 35 | --  What proportion of company names start with a letter?
 36 | 
 37 | WITH table1 as (
 38 | 	SELECT LEFT(accounts.name, 1) as first_letter, count(*) as occurance_of_each 
 39 | 	FROM accounts 
 40 | 	GROUP BY first_letter
 41 | 	order by occurance_of_each desc)
 42 | 
 43 | SELECT 
 44 | 	CASE WHEN table1.first_letter > 0 AND WHEN table1.first_letter < 9 THEN 1
 45 | 	ELSE 0 END AS Starts_Numeric 
 46 | FROM table1 
 47 | -- My implementation was wrong, I need to figure out a way about isnumeric function in pSQL
 48 | 
 49 | SELECT SUM(num) nums, SUM(letter) letters
 50 | FROM (SELECT name, CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 
 51 |                        THEN 1 ELSE 0 END AS num, 
 52 |          CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 
 53 |                        THEN 0 ELSE 1 END AS letter
 54 |       FROM accounts) t1;
 55 | 
 56 | ---
 57 | 
 58 | 
 59 | 
 60 | 
 61 | -- Question 4 
 62 | 
 63 | -- Consider vowels as a, e, i, o, and u. What proportion of company names start with a vowel, and what percent start with anything else?
 64 | SELECT Count(*),
 65 |  CASE WHEN LEFT(UPPER(name), 1) IN ('A', 'E', 'I', 'O','U')
 66 |                        THEN 1 ELSE 0 END AS start_vowel 
 67 |  FROM accounts
 68 |  GROUP BY start_vowel
 69 |  
 70 |  -- The sum solution given by Udacity seems more cleaner than mine. 
 71 |  
 72 |  SELECT SUM (start_vowel) as vowels, SUM (not_start_vowel) as not_vowels 
 73 |  FROM (
 74 |  
 75 |  SELECT name, 
 76 |  CASE WHEN LEFT(UPPER(name), 1) IN ('A', 'E', 'I', 'O','U')
 77 |                        THEN 1 ELSE 0 END AS start_vowel,
 78 | 					   
 79 |  CASE WHEN LEFT(UPPER(name), 1) NOT IN ('A', 'E', 'I', 'O','U')
 80 | 				   THEN 1 ELSE 0 END AS not_start_vowel 				   
 81 |  FROM accounts
 82 |  ) sub 
 83 | 
 84 | 
 85 | -- QUiz POSITION & STRPOS 
 86 | -- Use the accounts table to create first and last name columns that hold the first and last names for the primary_poc.
 87 | 
 88 | -- We need to find the space
 89 | -- Once space position is found we need to use the left & right base don that 
 90 | 
 91 | SELECT primary_poc, POSITION (' ' IN primary_poc),
 92 | 
 93 | LEFT(primary_poc, POSITION (' ' IN primary_poc)-1) as first_name,
 94 | RIGHT(primary_poc, LENGTH(primary_poc)- POSITION (' ' IN primary_poc)) as last_name
 95 | FROM accounts 
 96 | 
 97 | -- Udacity Solution 
 98 | SELECT LEFT(primary_poc, STRPOS(primary_poc, ' ') -1 ) first_name, 
 99 | RIGHT(primary_poc, LENGTH(primary_poc) - STRPOS(primary_poc, ' ')) last_name
100 | FROM accounts;
101 | 
102 | -- Question 2 
103 | -- Now see if you can do the same thing for every rep name in the sales_reps table. Again provide first and last name columns.
104 | 
105 | SELECT name, POSITION (' ' IN name),
106 | LEFT(name, POSITION (' ' IN name)-1) as first_name,
107 | RIGHT(name, LENGTH(name)- POSITION (' ' IN name)) as last_name
108 | FROM sales_reps 
109 | 
110 | -- Lets do the same question using STRPOS 
111 | SELECT
112 | LEFT(name, STRPOS(name,' ')-1) as first_name,
113 | RIGHT(name, LENGTH(name)- STRPOS (name,' ')) as last_name
114 | FROM sales_reps 
115 | 
116 | 
117 | -- Quiz CONCAT 
118 | -- Each company in the accounts table wants to create an email address for each primary_poc.
119 | --  The email address should be the first name of the primary_poc . last name primary_poc @ company name .com.
120 | -- ist extract first & last names
121 | -- get the name column 
122 | -- 
123 | SELECT 
124 | CONCAT (
125 | 	LEFT(primary_poc, STRPOS(primary_poc,' ')-1) ,
126 | 	'.'
127 | 	RIGHT(primary_poc, LENGTH(primary_poc)- STRPOS (primary_poc,' ')),
128 | 	'@',name,
129 | 	'.com'
130 | 	)
131 | FROM accounts
132 | 
133 | -- Udacity Solution 
134 | -- They used CTES which looks better
135 | -- Handled for space as well. 
136 | WITH t1 AS (
137 |  SELECT LEFT(primary_poc,     STRPOS(primary_poc, ' ') -1 ) first_name,  RIGHT(primary_poc, LENGTH(primary_poc) - STRPOS(primary_poc, ' ')) last_name, name
138 |  FROM accounts)
139 | SELECT first_name, last_name, CONCAT(first_name, '.', last_name, '@', name, '.com')
140 | FROM t1;
141 | 
142 | -- Question 2 
143 | 
144 | -- You may have noticed that in the previous solution some of the company names include spaces,
145 | -- which will certainly not work in an email address.
146 | -- See if you can create an email address that will work by removing all of the spaces in the account name,
147 | -- but otherwise your solution should be just as in question 1. Some helpful documentation is here.
148 | -- https://www.postgresql.org/docs/8.1/functions-string.html
149 | 
150 | WITH t1 AS (
151 |  SELECT LEFT(primary_poc,     STRPOS(primary_poc, ' ') -1 ) first_name,  RIGHT(primary_poc, LENGTH(primary_poc) - STRPOS(primary_poc, ' ')) last_name, name
152 |  FROM accounts)
153 | SELECT first_name, last_name, CONCAT(first_name, '.', last_name, '@', REPLACE(name, ' ', ''), '.com')
154 | FROM t1;
155 | 
156 | 
157 | -- Question 3 
158 | 
159 | -- We would also like to create an initial password,
160 | -- which they will change after their first log in.
161 | -- The first password will be the first letter of the primary_poc's first name (lowercase), 
162 | --then the last letter of their first name (lowercase), 
163 | --the first letter of their last name (lowercase),
164 | -- the last letter of their last name (lowercase),
165 | -- the number of letters in their first name,
166 | -- the number of letters in their last name,
167 | -- and then the name of the company they are working with,
168 | -- all capitalized with no spaces.
169 | 
170 | -- First name, last name, company name 
171 | WITH t1 AS (
172 |  SELECT LEFT(primary_poc,STRPOS(primary_poc, ' ') -1 ) first_name,
173 |  RIGHT(primary_poc, LENGTH(primary_poc) - STRPOS(primary_poc, ' ')) last_name,
174 |  name
175 |  FROM accounts),
176 |  
177 |   t2 as (
178 |  SELECT 
179 | 
180 |  LEFT (first_name,1) as ist_istname, 
181 |  RIGHT (first_name,1) as last_istname,
182 |  LENGTH (first_name) as len_1stname, 
183 |  LENGTH (last_name) as len_LAstname,
184 |  UPPER(name) as upper_name 
185 |   from t1
186 | )
187 | select CONCAT(ist_istname,last_istname,len_1stname,len_LAstname,upper_name  )
188 | FROM t2
189 | 
190 | -- So I was not able to concatenate by extracting within the contenate 
191 | 
192 | -- Udacity Solution 
193 | WITH t1 AS (
194 |  SELECT LEFT(primary_poc,
195 |  STRPOS(primary_poc, ' ') -1 ) first_name,
196 |  RIGHT(primary_poc, LENGTH(primary_poc) - STRPOS(primary_poc, ' ')) last_name,
197 |  name
198 |  FROM accounts)
199 |  
200 | SELECT 
201 | first_name, 
202 | last_name, 
203 | CONCAT(first_name, '.', last_name, '@', name, '.com'),
204 | LEFT(LOWER(first_name), 1) || RIGHT(LOWER(first_name), 1) || LEFT(LOWER(last_name), 1) || RIGHT(LOWER(last_name), 1) 
205 | || LENGTH(first_name) || LENGTH(last_name) || REPLACE(UPPER(name), ' ', '')
206 | FROM t1;
207 | 
208 | 
209 | -- CAST Section 
210 | 
211 | -- Date is in this format = 01/31/2014 08:00:00 AM +0000
212 | -- IN SQL correct format is yyyy-mm-day
213 | 
214 | 
215 | -- Ist we need to extract year, month, day separately 
216 | -- Than we need to concatenate them 
217 | substring(string [from <str_pos>] [for <ext_char>])
218 | 
219 | SELECT 
220 | (SUBSTRING(date,7,4) ||'-'|| SUBSTRING(date,1,2) || '-' || SUBSTRING(date,4,2))::date as date_formatted
221 | FROM sf_crime_data
222 | 
223 | -- Udacity Solution 
224 | SELECT date orig_date, (SUBSTR(date, 7, 4) || '-' || LEFT(date, 2) || '-' || SUBSTR(date, 4, 2))::DATE new_date
225 | FROM sf_crime_data;
226 | 
227 | 
228 | 
229 | -- COALESCE
230 | 
231 | SELECT  COALESCE(a.id, a.id) filled_id,a.name, a.website, a.lat, a.long, a.primary_poc, a.sales_rep_id,o.*
232 | FROM accounts a
233 | LEFT JOIN orders o
234 | ON a.id = o.account_id
235 | WHERE o.total IS NULL;
236 | 
237 | 
238 | SELECT  COALESCE(o.id, a.id) filled_id,a.name, a.website, a.lat, a.long, a.primary_poc, a.sales_rep_id,o.*
239 | FROM accounts a
240 | LEFT JOIN orders o
241 | ON a.id = o.account_id
242 | WHERE o.total IS NULL;
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | SELECT COALESCE(a.id, a.id) filled_id, 
252 | a.name, a.website, a.lat, a.long, a.primary_poc, a.sales_rep_id, 
253 | COALESCE(o.account_id, a.id)
254 |  account_id, o.occurred_at, o.standard_qty, o.gloss_qty, o.poster_qty, o.total, o.standard_amt_usd, o.gloss_amt_usd, o.poster_amt_usd, o.total_amt_usd
255 | FROM accounts a
256 | LEFT JOIN orders o
257 | ON a.id = o.account_id
258 | WHERE o.total IS NULL;
259 | 
260 | 
261 | SELECT COALESCE(a.id, a.id) filled_id, a.name, a.website, a.lat, a.long, a.primary_poc, a.sales_rep_id,
262 | COALESCE(o.account_id, a.id) account_id, o.occurred_at, 
263 | COALESCE(o.standard_qty, 0) standard_qty, COALESCE(o.gloss_qty,0) gloss_qty, COALESCE(o.poster_qty,0) poster_qty, COALESCE(o.total,0) total, COALESCE(o.standard_amt_usd,0) standard_amt_usd, COALESCE(o.gloss_amt_usd,0) gloss_amt_usd, COALESCE(o.poster_amt_usd,0) poster_amt_usd, COALESCE(o.total_amt_usd,0) total_amt_usd
264 | FROM accounts a
265 | LEFT JOIN orders o
266 | ON a.id = o.account_id
267 | WHERE o.total IS NULL;


--------------------------------------------------------------------------------
/Udacity Course/SQL_SubQueries.sql:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | -- Find the number of events that occur for each day for each channel. 
  4 | 
  5 | select channel, avg(num_events) as a_events
  6 | from 
  7 | (
  8 | SELECT DATE_TRUNC('day', occurred_at) as day, channel, count(*) as num_events
  9 | FROM web_events
 10 | group by 1, 2) sub
 11 | group by channel
 12 | order by a_events desc
 13 | 
 14 | -- Use date_trunc to pull month level information about the fuirst order ever placed
 15 | 
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | select  avg(standard_qty) as std_avg, avg(gloss_qty) gloss_avg, avg(poster_qty) as post_avg, sum(total_amt_usd)
 22 | from orders
 23 | 	where DATE_TRUNC('month',occurred_at) = (
 24 | 		SELECT MIN(DATE_TRUNC('month',occurred_at)) 
 25 | 		FROM web_events
 26 | 		)
 27 | 		
 28 | 		
 29 | 		
 30 | 		
 31 | 		
 32 | 		
 33 | -- Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.
 34 | 
 35 | -- First, I wanted to find the total_amt_usd totals associated with each sales rep, 
 36 | --and I also wanted the region in which they were located. The query below provided this information.
 37 | 
 38 | SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
 39 | FROM sales_reps s
 40 | JOIN accounts a
 41 | ON a.sales_rep_id = s.id
 42 | JOIN orders o
 43 | ON o.account_id = a.id
 44 | JOIN region r
 45 | ON r.id = s.region_id
 46 | GROUP BY 1,2
 47 | ORDER BY 3 DESC;
 48 | 
 49 | -- Next, I pulled the max for each region, and then we can use this to pull those rows in our final result.
 50 | SELECT region_name, MAX(total_amt) total_amt
 51 |      FROM(SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
 52 |              FROM sales_reps s
 53 |              JOIN accounts a
 54 |              ON a.sales_rep_id = s.id
 55 |              JOIN orders o
 56 |              ON o.account_id = a.id
 57 |              JOIN region r
 58 |              ON r.id = s.region_id
 59 |              GROUP BY 1, 2) t1
 60 |      GROUP BY 1;
 61 | 
 62 | 
 63 | -- Essentially, this is a JOIN of these two tables, where the region and amount match.
 64 | -- We have the region & the max amount fromt the 2nd table, we will check from the first table where this quantity will occur. 
 65 | 
 66 | SELECT t3.rep_name, t3.region_name, t3.total_amt
 67 | 
 68 | FROM (
 69 | SELECT region_name, MAX(total_amt) total_amt
 70 |      FROM(SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
 71 |              FROM sales_reps s
 72 |              JOIN accounts a
 73 |              ON a.sales_rep_id = s.id
 74 |              JOIN orders o
 75 |              ON o.account_id = a.id
 76 |              JOIN region r
 77 |              ON r.id = s.region_id
 78 |              GROUP BY 1, 2) t1
 79 |      GROUP BY 1) t2
 80 | 
 81 | JOIN (
 82 | 	SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
 83 | 		FROM sales_reps s
 84 | 			JOIN accounts a
 85 | 			ON a.sales_rep_id = s.id
 86 | 			JOIN orders o
 87 | 			ON o.account_id = a.id
 88 | 			JOIN region r
 89 | 			ON r.id = s.region_id
 90 | 			GROUP BY 1,2
 91 | 		ORDER BY 3 DESC
 92 | ) t3
 93 | 
 94 | ON t2.region_name = t3.region_name AND t3.total_amt = t2.total_amt;
 95 | 
 96 | 
 97 | -- For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed?
 98 | 
 99 | 
100 | 			-- this query will calculate the sum of total_amt_usd with respect to regions 
101 | 			SELECT reg, max(total) largest_sum
102 | 			FROM 
103 | 			(
104 | 				SELECT region.name as reg, SUM(orders.total_amt_usd) as total
105 | 				FROM region
106 | 				JOIN sales_reps
107 | 				ON region.id = sales_reps.region_id
108 | 				JOIN accounts
109 | 				ON sales_reps.id = accounts.sales_rep_id
110 | 				JOIN orders
111 | 				ON accounts.id = orders.account_id
112 | 				group by region.name
113 | 				order by total desc 
114 | 				limit 1
115 | 				) sub
116 | 			order by largest_sum desc
117 | 			
118 | 			
119 | 			
120 | 			-- count the number of orders for each region 
121 | 			SELECT reg, num_orders
122 | 			FROM 
123 | 			(
124 | 			SELECT region.name as t1.reg, count(orders.total) as num_orders
125 | 			FROM region
126 | 				JOIN sales_reps
127 | 				ON region.id = sales_reps.region_id
128 | 				JOIN accounts
129 | 				ON sales_reps.id = accounts.sales_rep_id
130 | 				JOIN orders
131 | 				ON accounts.id = orders.account_id
132 | 				group by region.name ) t1
133 | 			JOIN ( 
134 | 			SELECT region.name as reg, SUM(orders.total_amt_usd) as total
135 | 				FROM region
136 | 				JOIN sales_reps
137 | 				ON region.id = sales_reps.region_id
138 | 				JOIN accounts
139 | 				ON sales_reps.id = accounts.sales_rep_id
140 | 				JOIN orders
141 | 				ON accounts.id = orders.account_id
142 | 				group by region.name
143 | 				order by total desc ) t2
144 | 			on t1.reg = t2.reg
145 | 			order by total desc
146 | 			limit 1
147 | 			
148 | 			
149 | -- The udacity cvourse used this query for this question, they used having clause. 
150 | SELECT r.name, COUNT(o.total) total_orders
151 | FROM sales_reps s
152 | JOIN accounts a
153 | ON a.sales_rep_id = s.id
154 | JOIN orders o
155 | ON o.account_id = a.id
156 | JOIN region r
157 | ON r.id = s.region_id
158 | GROUP BY r.name
159 | HAVING SUM(o.total_amt_usd) = (
160 |       SELECT MAX(total_amt)
161 |       FROM (SELECT r.name region_name, SUM(o.total_amt_usd) total_amt
162 |               FROM sales_reps s
163 |               JOIN accounts a
164 |               ON a.sales_rep_id = s.id
165 |               JOIN orders o
166 |               ON o.account_id = a.id
167 |               JOIN region r
168 |               ON r.id = s.region_id
169 |               GROUP BY r.name) sub);
170 | 
171 | 
172 | -- How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?
173 | 
174 | 
175 | -- step 1 : find the account with most standard qty buys 
176 | 
177 | SELECT t1.act_id
178 | FROM (
179 | 	SELECT orders.account_id act_id, sum(standard_qty) as std_qty_sum
180 | 	FROM orders
181 | 	group by account_id
182 | 	order by std_qty_sum desc
183 | 	limit 1
184 | ) t1
185 | 
186 | -- Step 2 : find the total purchases for this account id 
187 | SELECT  SUM(total)
188 | FROM orders
189 | WHERE orders.account_id = (
190 | 	SELECT t1.act_id
191 | 	FROM (
192 | 		SELECT orders.account_id act_id, sum(standard_qty) as std_qty_sum
193 | 		FROM orders
194 | 		group by account_id
195 | 		order by std_qty_sum desc
196 | 		limit 1
197 | 	) t1)
198 | 	
199 | 	
200 | 	
201 | 	-- Accounts with total purchaes summed
202 | 	-- We need to filter those accounts which have total purchases greater than the account id we obtained earlier. 
203 | 	
204 | 	
205 | 	SELECT COUNT (*)
206 | 	FROM (
207 | 		SELECT accounts.name, sum(total) as total_orders
208 | 		FROM orders
209 | 		JOIN accounts 
210 | 		ON orders.account_id = accounts.id
211 | 		GROUP by accounts.name 
212 | 		HAVING sum(total) > (
213 | 					SELECT  SUM(total)
214 | 				FROM orders
215 | 				WHERE orders.account_id = (
216 | 					SELECT t1.act_id
217 | 					FROM (
218 | 						SELECT orders.account_id act_id, sum(standard_qty) as std_qty_sum
219 | 						FROM orders
220 | 						group by account_id
221 | 						order by std_qty_sum desc
222 | 						limit 1
223 | 					) t1)
224 | 		)
225 | 		order by total_orders desc
226 | 	) final
227 | 	
228 | 	
229 | -- For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?
230 | 	
231 | 	
232 | 	-- 1 : find the account id with most spending 
233 | 	
234 | 	SELECT account_id 
235 | 	FROM (
236 | 				SELECT account_id, sum(total_amt_usd) total_spending
237 | 				FROM orders
238 | 				GROUP BY orders.account_id
239 | 				order by total_spending desc
240 | 				limit 1
241 | 	) t1
242 | 
243 | 	-- Now using this ID, we can find the number of events for each channel. 
244 | 	
245 | 	
246 | 	SELECT accounts.name, web_events.channel, count(*)
247 | 	FROM accounts 
248 | 	JOIN web_events
249 | 	ON accounts.id = web_events.account_id
250 | 		WHERE accounts.id = (
251 | 					SELECT account_id 
252 | 						FROM 
253 | 						(
254 | 							SELECT account_id, sum(total_amt_usd) total_spending
255 | 							FROM orders
256 | 							GROUP BY orders.account_id
257 | 							order by total_spending desc
258 | 							limit 1
259 | 						) t1
260 | 	)
261 | 	GROUP BY accounts.name, web_events.channel
262 | 	order by count(*) desc
263 | 	
264 | 	
265 | 	-- What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?
266 | 
267 | 	
268 | 
269 | -- step 1 : find the accounts with top 10 spending 
270 | SELECT account_id 
271 | FROM (
272 | 	SELECT account_id, sum(total_amt_usd) total_spending
273 | 	FROM orders
274 | 	GROUP BY orders.account_id
275 | 	order by total_spending desc
276 | 	limit 10
277 | 	) t1
278 | 
279 | -- 
280 | -- Calculate lifetime avg of customers 
281 | 	
282 | 	SELECT account_id, avg(total_amt_usd) total_spending
283 | 	FROM orders
284 | 	WHERE account_id in 
285 | 	(
286 | 					SELECT account_id 
287 | 				FROM (
288 | 					SELECT account_id, sum(total_amt_usd) total_spending
289 | 					FROM orders
290 | 					GROUP BY orders.account_id
291 | 					order by total_spending desc
292 | 					limit 10
293 | 					) t1
294 | 	)
295 | 	GROUP BY orders.account_id
296 | 	order by total_spending desc
297 | 	-- I interpreted the question wrong 
298 | 	-- THe question asks for the avg of these spendings alltogetrrt. 
299 | 	
300 | 
301 | 
302 | -- What is the lifetime average amount spent in terms of total_amt_usd,
303 | --  including only the companies that spent more per order, on average, than the average of all orders.
304 | 
305 | 
306 | --Step 1 find the average of all orders
307 | 
308 | SELECT AVG (total_amt_usd)
309 | FROM orders 
310 | 
311 | 
312 | -- Find average for each company 
313 | 
314 | SELECT orders.account_id, AVG (total_amt_usd)
315 | FROM orders 
316 | group by orders.account_id
317 | having AVG (total_amt_usd) > (SELECT AVG (total_amt_usd)
318 | FROM orders 
319 | )
320 | 
321 | -- course answer 
322 | SELECT AVG(avg_amt)
323 | FROM (SELECT o.account_id, AVG(o.total_amt_usd) avg_amt
324 |     FROM orders o
325 |     GROUP BY 1
326 |     HAVING AVG(o.total_amt_usd) > (SELECT AVG(o.total_amt_usd) avg_all
327 |                                    FROM orders o)) temp_table;
328 | 								   
329 | 
330 | 
331 | --
332 | 
333 | 
334 | 
335 | 								
336 | -- CTE Section 
337 | 
338 | -- table1 will simply give the sum of each sales rep for each region. 
339 | WITH table1 AS (
340 | select region_id, sales_reps.name as sr_name ,  sum(orders.total_amt_usd) as sr_sum
341 | FROM region 
342 | JOIN sales_reps
343 | ON  region.id = sales_reps.region_id
344 | JOIN accounts 
345 | ON sales_reps.id = accounts.sales_rep_id 
346 | JOIN orders
347 | ON accounts.id = orders.account_id
348 | group by region_id, sales_reps.name
349 | order by 3 desc 
350 | ),
351 | -- table 2 will give the maximum in each region 
352 | table2 AS (
353 | SELECT t1.region_id, MAX(sr_sum) max_sum
354 | FROM 
355 | (
356 | 	select region_id, sales_reps.name ,  sum(orders.total_amt_usd) as sr_sum
357 | 	FROM region 
358 | 	JOIN sales_reps
359 | 	ON  region.id = sales_reps.region_id
360 | 	JOIN accounts 
361 | 	ON sales_reps.id = accounts.sales_rep_id 
362 | 	JOIN orders
363 | 	ON accounts.id = orders.account_id
364 | 	group by region_id, sales_reps.name
365 | ) t1
366 | GROUP BY t1.region_id
367 | )
368 | -- using these 2 tables, we can simply join them on region id & put a condition on the sales rep sum where it will be equalto maximum. 
369 | SELECT table1.sr_name, table1.region_id, sr_sum
370 | FROM table1
371 | JOIN table2
372 | ON table1.region_id = table2.region_id AND table1.sr_sum = table2.max_sum
373 | 
374 | 
375 | -- COURSE Solution 
376 | WITH t1 AS (
377 |   SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
378 |    FROM sales_reps s
379 |    JOIN accounts a
380 |    ON a.sales_rep_id = s.id
381 |    JOIN orders o
382 |    ON o.account_id = a.id
383 |    JOIN region r
384 |    ON r.id = s.region_id
385 |    GROUP BY 1,2
386 |    ORDER BY 3 DESC), 
387 | t2 AS (
388 |    SELECT region_name, MAX(total_amt) total_amt
389 |    FROM t1
390 |    GROUP BY 1)
391 | SELECT t1.rep_name, t1.region_name, t1.total_amt
392 | FROM t1
393 | JOIN t2
394 | ON t1.region_name = t2.region_name AND t1.total_amt = t2.total_amt;
395 | 
396 | -- this is very nice, in the course , we used the results of the first CTE in the 2nd CTE, very clean 
397 | 
398 | 
399 | -- Question 2 
400 | -- For the region with the largest sales total_amt_usd, how many total orders were placed?
401 | 
402 | -- table 1 should have sum of total_amt_usd  for each region 
403 | -- since we are ordering by desc & limiting the results to 1
404 | -- this table will essentially give the region with the maximum sales
405 | WITH table1 AS (
406 | 
407 | SELECT region.name as region_name, SUM(total_amt_usd)
408 | FROM region
409 | JOIN sales_reps
410 | ON region.id = sales_reps.region_id
411 | JOIN accounts 
412 | ON accounts.sales_rep_id = sales_reps.id 
413 | JOIN orders
414 | ON orders.account_id = accounts.id
415 | GROUP BY region.name
416 | order by 2 desc 
417 | limit 1
418 | ), 
419 | -- for each region how many orders were made 
420 | table2 AS (
421 | SELECT region.name as region_name, SUM(total) as total_orders 
422 | FROM region
423 | JOIN sales_reps
424 | ON region.id = sales_reps.region_id
425 | JOIN accounts 
426 | ON accounts.sales_rep_id = sales_reps.id 
427 | JOIN orders
428 | ON orders.account_id = accounts.id
429 | GROUP BY region.name
430 |  )
431 |  -- we are joining the result of table1 with the table2 
432 |  -- since the table1 has only 1 result therefore
433 |  -- we will get the region with maximum total same amount, followed by the orders it had 
434 |  SELECT * 
435 |  FROM table1
436 |  JOIN table2
437 |  ON table1.region_name = table2.region_name
438 | 
439 | 
440 | -- Solution by the udacity teacher is a bit different 
441 | -- Since he used the max & than used having command. 
442 | WITH t1 AS (
443 |    SELECT r.name region_name, SUM(o.total_amt_usd) total_amt
444 |    FROM sales_reps s
445 |    JOIN accounts a
446 |    ON a.sales_rep_id = s.id
447 |    JOIN orders o
448 |    ON o.account_id = a.id
449 |    JOIN region r
450 |    ON r.id = s.region_id
451 |    GROUP BY r.name), 
452 | t2 AS (
453 |    SELECT MAX(total_amt)
454 |    FROM t1)
455 | SELECT r.name, COUNT(o.total) total_orders
456 | FROM sales_reps s
457 | JOIN accounts a
458 | ON a.sales_rep_id = s.id
459 | JOIN orders o
460 | ON o.account_id = a.id
461 | JOIN region r
462 | ON r.id = s.region_id
463 | GROUP BY r.name
464 | HAVING SUM(o.total_amt_usd) = (SELECT * FROM t2);
465 | 
466 | 
467 | -- How many accounts had more total purchases than the account name which has bought the most standard_qty paper throughout their lifetime as a customer?
468 | 
469 | 
470 | -- table 2 will compute the standard_qty number for each account 
471 | -- We can use the same table to compute corresponding total for each account 
472 | -- order the result by standard qty 
473 | --
474 | WITH table2 as (
475 | SELECT accounts.name as act_name, sum(standard_qty) as std_qty, sum(total) as total
476 | FROM orders
477 | JOIN accounts
478 | ON orders.account_id = accounts.id
479 | GROUP BY accounts.name
480 | order by 2 desc 
481 | limit 1
482 | ), 
483 | -- here we select the total for the account which had the highest std qty. 
484 | table3 as (
485 | SELECT total
486 | FROM table2 ), 
487 | 
488 | -- here we select the accounts which have sum of order greater than the obtained previously 
489 | table4 as (
490 | SELECT accounts.name
491 | FROM orders
492 | JOIN accounts
493 | ON orders.account_id = accounts.id
494 | GROUP BY accounts.name
495 | HAVING SUM(orders.total) > (SELECT * FROM table3))
496 | -- here we simply count them. 
497 | select count(*)
498 | FROM table4;
499 | 
500 | -- For the customer that spent the most (in total over their lifetime as a customer) total_amt_usd, how many web_events did they have for each channel?
501 | 
502 | 
503 | WITH table1 as (
504 | SELECT accounts.name as act_name, sum(orders.total_amt_usd) as total_spending 
505 | FROM orders
506 | JOIN accounts
507 | ON orders.account_id = accounts.id
508 | GROUP BY accounts.name
509 | order by 2 desc
510 | limit 1 
511 | ), 
512 | -- this will give me the name of the customer with most spending 
513 | table2 as (
514 | SELECT act_name
515 | FROM table1 
516 | )
517 | -- find web events for each customer 
518 | 
519 | SELECT accounts.name, channel , count(*)
520 | FROM web_events
521 | JOIN accounts 
522 | on web_events.account_id = accounts.id
523 | group by accounts.name, channel
524 | having accounts.name = (select * from table2)
525 | 
526 | 
527 | -- Udacity solution 
528 | WITH t1 AS (
529 |    SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent
530 |    FROM orders o
531 |    JOIN accounts a
532 |    ON a.id = o.account_id
533 |    GROUP BY a.id, a.name
534 |    ORDER BY 3 DESC
535 |    LIMIT 1)
536 | SELECT a.name, w.channel, COUNT(*)
537 | FROM accounts a
538 | JOIN web_events w
539 | ON a.id = w.account_id AND a.id =  (SELECT id FROM t1)
540 | GROUP BY 1, 2
541 | ORDER BY 3 DESC;
542 | 
543 | 
544 | 
545 | -- Question 5 
546 | -- What is the lifetime average amount spent in terms of total_amt_usd for the top 10 total spending accounts?
547 | 
548 | -- Find top 10 spending accounts 
549 | WITH t1 AS (
550 | 	SELECT accounts.name as act_name, sum(total_amt_usd) as tot
551 | 	FROM orders 
552 | 	JOIN accounts
553 | 	ON orders.account_id = accounts.id
554 | 	GROUP BY accounts.name
555 | 	ORDER BY 2 desc 
556 | 	LIMIT 10 
557 | 	)
558 | 	
559 | SELECT  avg(tot)
560 | FROM t1
561 | -- This question was confusion to me from understanding english point of view. 
562 | 
563 | -- Question 6 
564 | 
565 | -- What is the lifetime average amount spent in terms of total_amt_usd,
566 | -- including only the companies that spent more per order, on average, than the average of all orders.
567 | 
568 | -- step 1 : find the average amount per order 
569 | WITH t1 AS (
570 |    SELECT AVG(o.total_amt_usd) avg_all
571 |    FROM orders o
572 |    JOIN accounts a
573 |    ON a.id = o.account_id),
574 | t2 AS (
575 |    SELECT o.account_id, AVG(o.total_amt_usd) avg_amt
576 |    FROM orders o
577 |    GROUP BY 1
578 |    HAVING AVG(o.total_amt_usd) > (SELECT * FROM t1))
579 | SELECT AVG(avg_amt)
580 | FROM t2;
581 | 
582 | 
583 | 
584 | 
585 | 
586 | 
587 | 
588 | 
589 | 


--------------------------------------------------------------------------------
/Udacity Course/SQL_Queries.sql:
--------------------------------------------------------------------------------
  1 | ## 
  2 | Extracting hour , finding the best hour for posting stories. 
  3 | 
  4 | SELECT 
  5 |    strftime('%H', timestamp)  hour, 
  6 |    ROUND(avg(score),2) hourly_score, 
  7 |    count(*) Number_of_stories
  8 | FROM hacker_news
  9 | where timestamp not NULL
 10 | GROUP BY hour
 11 | order by 1 desc;  
 12 | 
 13 | 
 14 | with play_count as
 15 | (SELECT song_id,
 16 |    COUNT(*) AS 'times_played'
 17 | FROM plays
 18 | GROUP BY song_id
 19 | )
 20 | select songs.title, songs.artist, play_count.times_played
 21 | from play_count
 22 | join songs 
 23 | on play_count.song_id = songs.id;
 24 | 
 25 | select primary_poc, occurred_at, channel, accounts.name
 26 | from accounts 
 27 | join web_events 
 28 | on accounts.id = web_events.account_id
 29 | where accounts.name = 'Walmart'
 30 | 
 31 | 
 32 | 
 33 | select sales_reps.name as name, region.name as region, accounts.name as account_name
 34 | from sales_reps
 35 | join region 
 36 | on sales_reps.region_id = region.id
 37 | join accounts 
 38 | on sales_reps.id = accounts.sales_rep_id
 39 | 
 40 | 
 41 | -- Question 1 
 42 | select sales_reps.name as name, region as region, accounts.name as accounts_name
 43 | from sales_reps
 44 | join region on sales_reps.region_id = region.id AND region.name = 'Midwest'
 45 | join accounts on sales_reps.id = accounts.sales_rep_id
 46 | order by accounts.name
 47 | 
 48 | 
 49 | 
 50 | -- Question 2
 51 | select sales_reps.name as name, region as region, accounts.name as accounts_name
 52 | from sales_reps
 53 | join region on sales_reps.region_id = region.id AND region.name = 'Midwest' 
 54 | join accounts on sales_reps.id = accounts.sales_rep_id 
 55 | where sales_reps.name like 'S%'
 56 | order by accounts.name
 57 | 
 58 | 
 59 | -- Question 2 using condition in on 
 60 | select sales_reps.name as name, region as region, accounts.name as accounts_name
 61 | from sales_reps
 62 | join region on sales_reps.region_id = region.id AND region.name = 'Midwest' 
 63 | AND sales_reps.name like 'S%'
 64 | join accounts on sales_reps.id = accounts.sales_rep_id 
 65 | order by accounts.name
 66 | 
 67 | -- Question 3
 68 | select sales_reps.name as name, region as region, accounts.name as accounts_name
 69 | from sales_reps
 70 | join region on sales_reps.region_id = region.id AND region.name = 'Midwest' 
 71 | AND sales_reps.name like '% K%'
 72 | join accounts on sales_reps.id = accounts.sales_rep_id 
 73 | order by accounts.name
 74 | 
 75 | -- Question 4
 76 | select orders.id as orderid, accounts.name as account, region.name as region, (orders.total_amt_usd/orders.total+ 0.01) as unitprice 
 77 | from sales_reps
 78 | join accounts on sales_reps.id = accounts.sales_rep_id
 79 | join orders on accounts.id = orders.account_id
 80 | join region on sales_reps.region_id = region.id
 81 | where orders.standard_qty > 100
 82 | 
 83 | -- Question 5
 84 | select region.name as region, accounts.name as account,  (orders.total_amt_usd/orders.total+ 0.01) as unitprice 
 85 | from sales_reps
 86 | join accounts on sales_reps.id = accounts.sales_rep_id
 87 | join orders on accounts.id = orders.account_id
 88 | join region on sales_reps.region_id = region.id
 89 | where orders.standard_qty > 100 AND orders.poster_qty > 50
 90 | order by unitprice 
 91 | 
 92 | -- Question 6
 93 | select region.name as region, accounts.name as account,  (orders.total_amt_usd/orders.total+ 0.01) as unitprice 
 94 | from sales_reps
 95 | join accounts on sales_reps.id = accounts.sales_rep_id
 96 | join orders on accounts.id = orders.account_id
 97 | join region on sales_reps.region_id = region.id
 98 | where orders.standard_qty > 100 AND orders.poster_qty > 50
 99 | order by unitprice desc
100 | 
101 | -- Question 7
102 | select distinct(accounts.name),  web_events.channel
103 | from accounts 
104 | join web_events 
105 | on accounts.id = web_events.account_id
106 | AND accounts.id = 1001
107 | 
108 | -- Question 8 
109 | SELECT o.occurred_at, a.name, o.total, o.total_amt_usd
110 | FROM accounts a
111 | JOIN orders o
112 | ON o.account_id = a.id
113 | WHERE o.occurred_at BETWEEN '01-01-2015' AND '01-01-2016'
114 | ORDER BY o.occurred_at DESC;
115 | 
116 | 
117 | -- Question 5 Min, Max, Average ind the mean (AVERAGE) amount spent per order on each paper type, as well as the mean amount of each paper type purchased per order. Your final answer should have 6 values - one for each paper type for the average number of sales, as well as the average amount.
118 | select
119 | AVG(standard_qty) as std_avg, 
120 | AVG(standard_amt_usd) std_avg_amt, 
121 | AVG(gloss_qty) gloss_avg, 
122 | AVG(gloss_amt_usd) gloss_avg_amt, 
123 | AVG(poster_qty) post_avg, 
124 | AVG(poster_amt_usd) post_avg_amt
125 | from orders
126 | 
127 | 
128 | -- calculate the MEDIAN. Though this is more advanced than what we have covered so far try finding - what is the MEDIAN total_usd spent on all orders?
129 | 
130 | -- hardcdoed 
131 | SELECT *
132 | FROM (SELECT total_amt_usd
133 |       FROM orders
134 |       ORDER BY total_amt_usd
135 |       LIMIT 3457) AS Table1
136 | ORDER BY total_amt_usd DESC
137 | LIMIT 2;
138 | 
139 | 
140 | -- Which account (by name) placed the earliest order? Your solution should have the account name and the date of the order.
141 | 
142 | select accounts.name as name, orders.occurred_at as orderdate
143 | from accounts
144 | join orders on accounts.id = orders.account_id
145 | order by orderdate
146 | 
147 | --or using group by for more cleaner 
148 | select accounts.name as name, min(orders.occurred_at) as orderdate
149 | from accounts
150 | join orders on accounts.id = orders.account_id
151 | group by name
152 | order by orderdate
153 | 
154 | -- Find the total sales in usd for each account. You should include two columns - the total sales for each company's orders in usd and the company name.
155 | 
156 | 
157 | select accounts.name as name, sum(orders.total)
158 | from accounts
159 | join orders on accounts.id = orders.account_id
160 | group by accounts.name
161 | 
162 | 
163 | -- Via what channel did the most recent (latest) web_event occur, which account was associated with this web_event? Your query should return only three values - the date, channel, and account name.
164 | 
165 | 
166 | select max(occurred_at) as latest_date, channel, accounts.name
167 | from accounts 
168 | join web_events 
169 | on accounts.id = web_events.account_id
170 | group by channel, accounts.name
171 | 
172 | -- Find the total number of times each type of channel from the web_events was used. Your final table should have two columns - the channel and the number of times the channel was used.
173 | 
174 | select channel, count(*)
175 | from web_events
176 | group by channel
177 | 
178 | -- Who was the primary contact associated with the earliest web_event?
179 | select primary_poc, occurred_at
180 | from accounts
181 | join web_events
182 | on accounts.id = web_events.account_id
183 | order by web_events.occurred_at desc
184 | limit 1
185 | 
186 | -- What was the smallest order placed by each account in terms of total usd. Provide only two columns - the account name and the total usd. Order from smallest dollar amounts to largest.
187 | 
188 | select accounts.name as name, min(orders.total) as total
189 | from accounts 
190 | join orders 
191 | on accounts.id = orders.account_id
192 | group by name
193 | order by total
194 | 
195 | 
196 | -- Find the number of sales reps in each region. Your final table should have two columns - the region and the number of sales_reps. Order from fewest reps to most reps
197 | 
198 | 
199 | select region.name as name, count(sales_reps.id)
200 | from region 
201 | join sales_reps 
202 | on region.id = sales_reps.region_id 
203 | group by region.name
204 | 
205 | -- For each account, determine the average amount of each type of paper they purchased across their orders. 
206 | -- Your result should have four columns - 
207 | -- one for the account name and one for the average quantity purchased for each of the paper types for each account.
208 | 
209 | select accounts.name as name, avg(standard_qty) as avg_std, avg(gloss_qty) as avg_gloss, avg(poster_qty) as avg_poster
210 | from accounts join orders
211 | on accounts.id = orders.account_id
212 | group by accounts.name
213 | 
214 | --  For each account, determine the average amount spent per order on each paper type.
215 | --  Your result should have four columns - one for the account name and one for the average amount spent on each paper type.
216 | 
217 | select accounts.name as name, avg(standard_amt_usd) as avg_std_amt, avg(gloss_amt_usd) as avg_gloss_amt, avg(poster_amt_usd) as avg_poster_amt
218 | from accounts join orders
219 | on accounts.id = orders.account_id
220 | group by accounts.name
221 | 
222 | -- Determine the number of times a particular channel was used in the web_events table for each sales rep. 
223 | -- Your final table should have three columns - 
224 | -- the name of the sales rep, the channel, and the number of occurrences. Order your table with the highest number of occurrences first.
225 | 
226 | 
227 | select sales_reps.name as name, web_events.channel as channel, count(*)
228 | from sales_reps
229 | join accounts on sales_reps.id = accounts.sales_rep_id
230 | join web_events on accounts.id = web_events.account_id
231 | group by sales_reps.name, channel
232 | order by count(*) desc
233 | 
234 | 
235 | -- Determine the number of times a particular channel was used in the web_events table for each region. 
236 | -- Your final table should have three columns - the region name, the channel, and the number of occurrences. Order your table with the highest number of occurrences first.
237 | 
238 | 
239 | select region.name,channel, count(*)
240 | from region 
241 | join sales_reps
242 | on region.id = sales_reps.region_id
243 | join accounts on sales_reps.id = accounts.sales_rep_id
244 | join web_events on accounts.id = web_events.account_id
245 | group by region.name, channel
246 | order by count(*) desc
247 | 
248 | 
249 | -- Use DISTINCT to test if there are any accounts associated with more than one region.
250 | 
251 | select accounts.name act,  region.name rg
252 | from accounts
253 | join sales_reps on accounts.sales_rep_id = sales_reps.id 
254 | join region on sales_reps.region_id = region.id
255 | group by accounts.name, rg
256 | 
257 | -- Have any sales reps worked on more than one account?
258 | 
259 | select distinct sales_reps.name, accounts.name
260 | from accounts
261 | join sales_reps on accounts.sales_rep_id = sales_reps.id 
262 | group accounts.name
263 | 
264 | 
265 | -- Having Section 
266 | 
267 | --How many of the sales reps have more than 5 accounts that they manage?
268 | 
269 | select sales_reps.name as name, count(distinct(accounts.id)) as acts
270 | from accounts 
271 | join sales_reps on accounts.sales_rep_id = sales_reps.id
272 | group by sales_reps.name
273 | 
274 | -- OR
275 | from accounts 
276 | join sales_reps on accounts.sales_rep_id = sales_reps.id
277 | group by sales_reps.id
278 | having count(*) > 20
279 | 
280 | -- How many accounts have more than 20 orders?
281 | 
282 | 
283 | select accounts.name, count(*)
284 | from accounts 
285 | join orders 
286 | on accounts.id = orders.account_id
287 | group by accounts.name
288 | having count(*) > 20
289 | 
290 | -- Which account has the most orders?
291 | 
292 | select accounts.name, count(*)
293 | from accounts 
294 | join orders 
295 | on accounts.id = orders.account_id
296 | group by accounts.name
297 | order by count(*) desc
298 | limit 1
299 | 
300 | -- 
301 | 
302 | 
303 | -- Which accounts spent more than 30,000 usd total across all orders?
304 | 
305 | select accounts.name, sum(total) as tt
306 | from accounts 
307 | join orders 
308 | on accounts.id = orders.account_id
309 | group by accounts.name
310 | having sum(total) > 30000
311 | 
312 |  -- Which accounts spent less than 1,000 usd total across all orders?
313 | select accounts.name, sum(total) as tt
314 | from accounts 
315 | join orders 
316 | on accounts.id = orders.account_id
317 | group by accounts.name
318 | having sum(total) < 1000
319 | 
320 | -- Which account has spent the most with us?
321 | select accounts.name, sum(total_amt_usd) as tt
322 | from accounts 
323 | join orders 
324 | on accounts.id = orders.account_id
325 | group by accounts.name
326 | order by sum(total_amt_usd) desc
327 | limit 1
328 | 
329 | 
330 | 
331 | -- Which accounts used facebook as a channel to contact customers more than 6 times?
332 | SELECT a.id, a.name, w.channel, COUNT(*) use_of_channel
333 | FROM accounts a
334 | JOIN web_events w
335 | ON a.id = w.account_id
336 | GROUP BY a.id, a.name, w.channel
337 | HAVING COUNT(*) > 6 AND w.channel = 'facebook'
338 | ORDER BY use_of_channel;
339 | 
340 | --Which channel was most frequently used by most accounts?
341 | 
342 | SELECT a.id, a.name, w.channel, COUNT(*) use_of_channel
343 | FROM accounts a
344 | JOIN web_events w
345 | ON a.id = w.account_id
346 | GROUP BY a.id, a.name, w.channel
347 | ORDER BY use_of_channel DESC
348 | LIMIT 10;
349 | 
350 | 
351 | -- Dates Section 
352 | --Find the sales in terms of total dollars for all orders in each year, ordered from greatest to least. Do you notice any trends in the yearly sales totals?
353 | 
354 | select DATE_PART('year', occurred_at) as yr, SUM(total_amt_usd) as yearly_total
355 | from orders
356 | group by 1
357 | order by 2 desc
358 | 
359 | -- Which month did Parch & Posey have the greatest sales in terms of total dollars? Are all months evenly represented by the dataset?
360 | select DATE_PART('month', occurred_at) as mn, SUM(total_amt_usd) as mn_total
361 | from orders
362 | group by 1
363 | order by 2 desc
364 | 
365 | 
366 | -- Which year did Parch & Posey have the greatest sales in terms of total number of orders? Are all years evenly represented by the dataset?
367 | select DATE_PART('year', occurred_at) as yr, count(*) as yearly_total
368 | from orders
369 | group by 1
370 | order by 2 desc
371 | 
372 | 
373 | --Which month did Parch & Posey have the greatest sales in terms of total number of orders? Are all months evenly represented by the dataset?
374 | select DATE_PART('month', occurred_at) as mn, count(*) as mn_total
375 | from orders
376 | WHERE occurred_at BETWEEN '2014-01-01' AND '2017-01-01'
377 | group by 1
378 | order by 2 desc 
379 | 
380 | -- 
381 | 
382 | --In which month of which year did Walmart spend the most on gloss paper in terms of dollars?
383 | 
384 | select DATE_PART('year', occurred_at) as yr,DATE_PART('month', occurred_at) as mn, SUM(gloss_amt_usd) as gloss_total
385 | from orders
386 | join accounts on orders.account_id = accounts.id
387 | where accounts.name='walmart'
388 | group by 1,2
389 | order by 2 desc -- this is wromng 
390 | 
391 | -- this is correct
392 | SELECT DATE_TRUNC('month', o.occurred_at) ord_date, SUM(o.gloss_amt_usd) tot_spent
393 | FROM orders o 
394 | JOIN accounts a
395 | ON a.id = o.account_id
396 | WHERE a.name = 'Walmart'
397 | GROUP BY 1
398 | ORDER BY 2 DESC
399 | LIMIT 1;
400 | 
401 | 
402 | -- 
403 | 
404 | 
405 | -- Case Statements Section 
406 | 
407 | -- Write a query to display for each order, the account ID, total amount of the order, and the level of the order - ‘Large’ or ’Small’ - depending on if the order is $3000 or more, or smaller than $3000.
408 | 
409 | 
410 | 
411 | 
412 | 
413 |  select 
414 |  accounts.id as id,
415 |  orders.total_amt_usd as total,
416 |  --CASE when orders.total_amt_usd > 3000 
417 | 	THEN 'Large'  
418 | 	ELSE 'Small'
419 | 	END AS Level
420 | from orders
421 | from orders
422 | join accounts on orders.account_id = accounts.id;
423 | 
424 | 
425 | --Write a query to display the number of orders in each of three categories, based on the total number of items in each order.
426 | -- The three categories are: 'At Least 2000', 'Between 1000 and 2000' and 'Less than 1000'.
427 | 
428 | select 
429 | CASE
430 | 	WHEN orders.total < 1000 THEN 'Less than 1000'
431 | 	WHEN orders.total >= 1000  AND orders.total < 2000 THEN 'Between 1000 and 2000'
432 | 	WHEN orders.total >= 2000 THEN 'At Least 2000'
433 | 	END as categories,  count(*) as category_count
434 | from orders
435 | group by 1
436 | 
437 | 
438 | -- We would like to understand 3 different levels of customers based on the amount associated with their purchases.
439 | -- The top level includes anyone with a Lifetime Value (total sales of all orders) greater than 200,000 usd.
440 | -- The second level is between 200,000 and 100,000 usd. The lowest level is anyone under 100,000 usd. 
441 | --Provide a table that includes the level associated with each account. 
442 | --You should provide the account name, the total sales of all orders for the customer, and the level. Order with the top spending customers listed firs
443 | 
444 | 
445 | 
446 | select accounts.name, sum(total_amt_usd) as Total_Sales, 
447 | CASE WHEN SUM(orders.total_amt_usd) > 200000  THEN 'Top'
448 | 	WHEN SUM(orders.total_amt_usd) > 100000 THEN 'Middle'
449 | 	ELSE 'Lowest' END AS Level
450 | from orders
451 | join accounts on orders.account_id = accounts.id
452 | group by accounts.name
453 | order by 2 desc
454 | 
455 | 
456 | -- We would now like to perform a similar calculation to the first,
457 | --  but we want to obtain the total amount spent by customers only in 2016 and 2017. 
458 | -- Keep the same levels as in the previous question. Order with the top spending customers listed first.
459 | 
460 | SELECT a.name, SUM(total_amt_usd) total_spent, 
461 |      CASE WHEN SUM(total_amt_usd) > 200000 THEN 'top'
462 |      WHEN  SUM(total_amt_usd) > 100000 THEN 'middle'
463 |      ELSE 'low' END AS customer_level
464 | FROM orders o
465 | JOIN accounts a
466 | ON o.account_id = a.id
467 | WHERE occurred_at > '2015-12-31' 
468 | GROUP BY 1
469 | ORDER BY 2 DESC;
470 | 
471 | 
472 | -- We would like to identify top performing sales reps,
473 | --  which are sales reps associated with more than 200 orders.
474 | --  Create a table with the sales rep name, 
475 | -- the total number of orders, and a column with top or not depending on if they have more than 200 orders. 
476 | -- Place the top sales people first in your final table.
477 | 
478 | 
479 | SELECT s.name, COUNT(*) num_ords,
480 |      CASE WHEN COUNT(*) > 200 THEN 'top'
481 |      ELSE 'not' END AS sales_rep_level
482 | FROM orders o
483 | JOIN accounts a
484 | ON o.account_id = a.id 
485 | JOIN sales_reps s
486 | ON s.id = a.sales_rep_id
487 | GROUP BY s.name
488 | ORDER BY 2 DESC;
489 | 
490 | 
491 |  -- The previous didn't account for the middle, nor the dollar amount associated with the sales. Management decides they want to see these characteristics represented as well. We would like to identify top performing sales reps, which are sales reps associated with more than 200 orders or more than 750000 in total sales. The middle group has any rep with more than 150 orders or 500000 in sales. Create a table with the sales rep name, the total number of orders, total sales across all orders, and a column with top, middle, or low depending on this criteria. Place the top sales people based on dollar amount of sales first in your final table.
492 | SELECT s.name, COUNT(*), SUM(o.total_amt_usd) total_spent, 
493 |      CASE WHEN COUNT(*) > 200 OR SUM(o.total_amt_usd) > 750000 THEN 'top'
494 |      WHEN COUNT(*) > 150 OR SUM(o.total_amt_usd) > 500000 THEN 'middle'
495 |      ELSE 'low' END AS sales_rep_level
496 | FROM orders o
497 | JOIN accounts a
498 | ON o.account_id = a.id 
499 | JOIN sales_reps s
500 | ON s.id = a.sales_rep_id
501 | GROUP BY s.name
502 | ORDER BY 3 DESC;
503 | 
504 | 
505 | 


--------------------------------------------------------------------------------