├── variability_lesson_13
    ├── variability.md
    ├── avg_abs_dev_formula.png
    ├── avg_squared_deviation.png
    ├── sum_squared_deviation_formula.png
    └── quizzes_lesson13.md
├── .gitignore
├── basic_sql_lesson28
    ├── table.png
    ├── parch_posey_db.png
    ├── notes.md
    ├── syntax_sql.md
    └── quizzes.sql
├── sql_joins_lesson_29
    ├── join_sql.png
    ├── venn_diagram.png
    ├── primary_foreign_key.png
    ├── interchangeable_result.png
    ├── entity_relationship_diagram.png
    ├── recap_joins.md
    ├── sql_joins_lesson_29.md
    └── join_quizzes.sql
├── data_types_and_operators_lesson_24
    ├── slicing.png
    ├── slicing_start.png
    ├── identity_operators.png
    ├── membership_operators.png
    ├── quizzes_lesson_24.py
    ├── string_methods_lists.md
    ├── dictionaries.md
    └── list_methods_tuples_sets.md
├── intro_to_research_methods_lessons_1_5
    ├── sampling_error.png
    ├── population_sample.png
    └── terminology_intro_to_research_methods.md
├── scripting_lesson_27
    └── scripting.md
├── sql_data_cleaning_lesson32
    ├── sql_data_cleaning_lesson32.md
    └── data_cleaning.sql
├── control_flow_lesson_25
    ├── while_loops.md
    ├── control_flow_quizzes.py
    ├── list_comprehensions.py
    ├── control_flow_practice.py
    ├── while_loops.py
    ├── control_flow.md
    ├── loops.md
    └── loops_quizzes.py
├── functions_lesson_26
    ├── generator.py
    ├── generator_quizzes.py
    ├── functions.py
    └── functions.md
├── visualizing_data_lesson_6
    └── visualizing_data.md
├── aggregations_lesson_30
    ├── date_functions_quizzes.sql
    ├── case_statement.md
    ├── cas_statement.sql
    ├── aggregations.md
    └── aggregations.sql
├── README.md
└── sql_subqueries_temporary_table_lesson31
    ├── subqueries_tasks.sql
    ├── with_vs_subquery.sql
    └── subqueries_temporary_tables.md


/variability_lesson_13/variability.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .idea 
3 | sql_subqueries_temporary_table_lesson31/subquery_mani_u_solutions.py


--------------------------------------------------------------------------------
/basic_sql_lesson28/table.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/basic_sql_lesson28/table.png


--------------------------------------------------------------------------------
/sql_joins_lesson_29/join_sql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/join_sql.png


--------------------------------------------------------------------------------
/sql_joins_lesson_29/venn_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/venn_diagram.png


--------------------------------------------------------------------------------
/basic_sql_lesson28/parch_posey_db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/basic_sql_lesson28/parch_posey_db.png


--------------------------------------------------------------------------------
/sql_joins_lesson_29/primary_foreign_key.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/primary_foreign_key.png


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/slicing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/slicing.png


--------------------------------------------------------------------------------
/sql_joins_lesson_29/interchangeable_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/interchangeable_result.png


--------------------------------------------------------------------------------
/variability_lesson_13/avg_abs_dev_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/avg_abs_dev_formula.png


--------------------------------------------------------------------------------
/variability_lesson_13/avg_squared_deviation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/avg_squared_deviation.png


--------------------------------------------------------------------------------
/sql_joins_lesson_29/entity_relationship_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/entity_relationship_diagram.png


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/slicing_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/slicing_start.png


--------------------------------------------------------------------------------
/intro_to_research_methods_lessons_1_5/sampling_error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/intro_to_research_methods_lessons_1_5/sampling_error.png


--------------------------------------------------------------------------------
/variability_lesson_13/sum_squared_deviation_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/sum_squared_deviation_formula.png


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/identity_operators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/identity_operators.png


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/membership_operators.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/membership_operators.png


--------------------------------------------------------------------------------
/intro_to_research_methods_lessons_1_5/population_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/intro_to_research_methods_lessons_1_5/population_sample.png


--------------------------------------------------------------------------------
/scripting_lesson_27/scripting.md:
--------------------------------------------------------------------------------
 1 | ## SCRIPTING
 2 | 
 3 | * Python Installation and Environment Setup
 4 | * Running and Editing Python Scripts
 5 | * Interacting with User Input
 6 | * Handling Exceptions
 7 | * Reading and Writing Files
 8 | * Importing Local, Standard, and Third-Party Modules
 9 | * Experimenting with an Interpreter
10 | 


--------------------------------------------------------------------------------
/sql_data_cleaning_lesson32/sql_data_cleaning_lesson32.md:
--------------------------------------------------------------------------------
1 | ## LEFT and RIGHT
2 | 
3 | LEFT pulls a specified number of characters for each row in a specified column starting at the beginning (or from the left). As you saw here, you can pull the first three digits of a phone number using LEFT(phone_number, 3).
4 | 
5 | 
6 | RIGHT pulls a specified number of characters for each row in a specified column starting at the end (or from the right). As you saw here, you can pull the last eight digits of a phone number using RIGHT(phone_number, 8).
7 | 
8 | 
9 | LENGTH provides the number of characters for each row of a specified column. Here, you saw that we could use this to get the length of each phone number as LENGTH(phone_number).


--------------------------------------------------------------------------------
/control_flow_lesson_25/while_loops.md:
--------------------------------------------------------------------------------
 1 | ## while Loops
 2 | 
 3 | `for` useful if you know how many iterations of the loop you need or "definite iteration". There are situations where it's impossible to know in advance
 4 | how many times will want the loop body executed ("definite iteration"). That's what a `while` loop is used for. 
 5 | 
 6 | `while` loops sometimes called **conditional** loops because they iterate as long as some conditions is true or end. 
 7 | Example:
 8 | ```
 9 | card_deck = [4, 11, 8, 5, 13, 2, 8, 10]
10 | hand = []
11 | 
12 | # adds the last element of the card_deck list to the hand list
13 | # until the values in hand add up to 17 or more
14 | while sum(hand)  < 17:
15 |     hand.append(card_deck.pop())
16 | ``` 
17 | 
18 | ## sum() and pop()
19 | 
20 | `sum()`returns the sum of the elements in a list. 
21 | 
22 | `pop()` is the opposite (or inverse) of the append method, it removes the last elemet from a list nd returns it.


--------------------------------------------------------------------------------
/variability_lesson_13/quizzes_lesson13.md:
--------------------------------------------------------------------------------
 1 | ### 24. Which formula describes Average Absolute Deviations:
 2 | 
 3 | ![Formula for absolute Deviation](avg_abs_dev_formula.png)
 4 | 
 5 | ### 26. Sum of Squres
 6 | 
 7 | Another way to get rid of negative values is to squared each one. It means, to multiply each value by itself.
 8 | 
 9 | ![Formula squared devitions](sum_squared_deviation_formula.png)
10 | The last correct formula is called **SS - sum of squares**.
11 | 
12 | ### 27. Average Squared Deviation
13 | 
14 | ![Average squared deviation](avg_squared_deviation.png)
15 | 
16 | The average score diviation is 291,622,740. There's a special name for this number, it's called the **variance**.
17 | 
18 | How can we put **variance** in words?
19 | * Mean of squared deviations. (add all squared deviations and dedvide by n)
20 | * Sum of squared deviations divided ba n.
21 | 
22 | ### 33. Quiz: Standard Deviation in Words.
23 | 
24 | What is a way to put the Standard Deviation in words?
25 | 
26 | * Square root of average quared deviation


--------------------------------------------------------------------------------
/sql_joins_lesson_29/recap_joins.md:
--------------------------------------------------------------------------------
 1 | Primary and Foreign Keys
 2 | You learned a key element for JOINing tables in a database has to do with primary and foreign keys:
 3 | 
 4 | * primary keys - are unique for every row in a table. These are generally the first column in our database (like you saw with the id column for every table in the Parch & Posey database).
 5 | 
 6 | * foreign keys - are the primary key appearing in another table, which allows the rows to be non-unique.
 7 | 
 8 | Choosing the set up of data in our database is very important, but not usually the job of a data analyst. This process is known as Database Normalization.
 9 | 
10 | JOINs
11 | In this lesson, you learned how to combine data from multiple tables using JOINs. The three JOIN statements you are most likely to use are:
12 | 
13 | 1. JOIN - an INNER JOIN that only pulls data that exists in both tables.
14 | 	
15 | 2. LEFT JOIN - a way to pull all of the rows from the table in the FROM even if they do not exist in the JOIN statement.
16 | 	
17 | 3. RIGHT JOIN - a way to pull all of the rows from the table in the JOIN even if they do not exist in the FROM statement.


--------------------------------------------------------------------------------
/functions_lesson_26/generator.py:
--------------------------------------------------------------------------------
 1 | # square_number function returns a list of squared numbers
 2 | 
 3 | def square_numbers(nums):
 4 |     result = []
 5 |     for i in nums:
 6 |         result.append(i * i)
 7 |     return result
 8 | 
 9 | 
10 | my_nums = square_numbers([1, 2, 3, 4, 5])
11 | 
12 | print(my_nums)
13 | 
14 | print('\n')
15 | 
16 | # generator
17 | 
18 | """
19 | generator don't hold the entire result in memory it yields one result
20 | at a time
21 | """
22 | 
23 | def square_numbers(nums):
24 |     for i in nums:
25 |         yield (i * i)
26 | 
27 | my_nums = square_numbers([1, 2, 3, 4, 5]) # my_ nums is generator
28 | 
29 | for num in my_nums:
30 |     print(num)
31 | 
32 | print('\n')
33 | 
34 | # next(my_nums) the output is 1, the first value in a list and first
35 | # squared number
36 | #print next(my_nums) # 1
37 | #print next(my_nums) # 4
38 | #print next(my_nums) # 9
39 | #print next(my_nums) # 16
40 | #print next(my_nums) 25
41 | 
42 | 
43 | # generator with list coprehension
44 | 
45 | my_nums = (x*x for x in [1, 2, 3, 4, 5])
46 | 
47 | for num in my_nums:
48 |     print(num)
49 | 
50 | print('\n')
51 | 
52 | # generator, convert data in a list
53 | 
54 | my_nums = (x*x for x in [1, 2, 3, 4, 5])
55 | 
56 | print list(my_nums)


--------------------------------------------------------------------------------
/control_flow_lesson_25/control_flow_quizzes.py:
--------------------------------------------------------------------------------
 1 | # Conditional Statements
 2 | 
 3 | # First Example - try changing the value of phone_balance
 4 | phone_balance = 1
 5 | bank_balance = 50
 6 | 
 7 | if phone_balance < 10:
 8 |     phone_balance += 10
 9 |     bank_balance -= 10
10 | 
11 | print(phone_balance)
12 | print(bank_balance)
13 | 
14 | # Second Example - try changing the value of number
15 | 
16 | number = 140
17 | if number % 2 == 0:
18 |     print("Number " + str(number) + " is even.")
19 | else:
20 |     print("Number " + str(number) + " is odd.")
21 | 
22 | # Third Example - try to change the value of age
23 | age = 3
24 | 
25 | # Here are the age limits for bus fares
26 | free_up_to_age = 4
27 | child_up_to_age = 18
28 | senior_from_age = 65
29 | 
30 | # These lines determine the bus fare prices
31 | concession_ticket = 1.25
32 | adult_ticket = 2.50
33 | 
34 | # Here is the logic for bus fare prices
35 | if age <= free_up_to_age:
36 |     ticket_price = 0
37 | elif age <= child_up_to_age:
38 |     ticket_price = concession_ticket
39 | elif age >= senior_from_age:
40 |     ticket_price = concession_ticket
41 | else:
42 |     ticket_price = adult_ticket
43 | 
44 | message = "Somebody who is {} years old will pay ${} to ride the bus.".format(age, ticket_price)
45 | print(message)
46 | 


--------------------------------------------------------------------------------
/visualizing_data_lesson_6/visualizing_data.md:
--------------------------------------------------------------------------------
 1 | # Frequency table.
 2 | 
 3 | The number of times a certain value appears in a row/set of data is called the **frequency**. Frequency is a better word for number.
 4 | For example, in the following list of numbers, the frequency of the number 3 is 6 (because it occurs 6 times):
 5 |   `1, 4, 3, 9, 11, 3, 3, 5, 77, 3, 88, 3, 3`
 6 | 
 7 | A frequency table is a simple way to display the number of occurrences of a particular value or characteristic.
 8 | 
 9 | A frequency distribution is a table showing each distinct value of some variable and the number of times it occurs in some dataset.
10 | 
11 | Frequency distribution helps us:
12 | * to analyze the data
13 | * to estimate the frequencies of the population on the basis of the ample
14 | * to facilitate the computation of various statistical measures
15 | 
16 | A **relative frequency distribution**s is a distribution in which relative frequencies are recorded against each class interval.
17 | 
18 | # Tables.
19 | 
20 | Tables can show either **categorical variables** (sometimes called qualitative variables) or **quantitative variables** (sometimes called numeric variables). You 
21 | can think of categorical variables as being categories (like eye color or brand of dog food) and quantitative variables as being numbers.
22 | 
23 | # Histogram and Bar graph.
24 | 
25 | 


--------------------------------------------------------------------------------
/control_flow_lesson_25/list_comprehensions.py:
--------------------------------------------------------------------------------
 1 | # Quiz: Extract First Names
 2 | #
 3 | # Use a list comprehension to create a new list first_names containing just
 4 | # the first names in names in lowercase.
 5 | 
 6 | names = ["Rick Sanchez", "Morty Smith", "Summer Smith", "Jerry Smith", "Beth Smith"]
 7 | 
 8 | first_names = [name.lower().split()[0] for name in names]
 9 | 
10 | print(first_names)
11 | 
12 | 
13 | # Quiz: Multiples of Three
14 | # Use a list comprehension to create a list multiples_3 containing the first
15 | # 20 multiples of 3.
16 | 
17 | multiples_3 = [ x for x in range(3, 60+1)  if x % 3 == 0]
18 | print(multiples_3)
19 | 
20 | # Second solution:
21 | 
22 | multiples_3 = [x * 3 for x in range(1, 21)]
23 | print(multiples_3)
24 | 
25 | 
26 | # Quiz: Filter Names by Scores
27 | # Use a list comprehension to create a list of names passed that only include
28 | # those that scored at least 65.
29 | #
30 | 
31 | 
32 | scores = {
33 |              "Rick Sanchez": 70,
34 |              "Morty Smith": 35,
35 |              "Summer Smith": 82,
36 |              "Jerry Smith": 23,
37 |              "Beth Smith": 98
38 |           }
39 | 
40 | 
41 | passed = [key for key, value in scores.items() if value >= 65]
42 | print(passed)
43 | 
44 | 
45 | # Udacity solution:
46 | 
47 | passed = [name for name, score in scores.items() if score >= 65]
48 | print(passed)


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/quizzes_lesson_24.py:
--------------------------------------------------------------------------------
 1 | # 22. Quiz: Slicing Lists
 2 | 
 3 | eclipse_dates = ['June 21, 2001', 'December 4, 2002', 'November 23, 2003',
 4 |                  'March 29, 2006', 'August 1, 2008', 'July 22, 2009',
 5 |                  'July 11, 2010', 'November 13, 2012', 'March 20, 2015',
 6 |                  'March 9, 2016'] 
 7 | # TODO: Modify this line so it prints the last three elements of the list
 8 | print(eclipse_dates[-3:])
 9 | 
10 | # 24. Quiz: List Methods
11 | 
12 | names = ["Carol", "Albert", "Ben", "Donna"]
13 | names.append("Eugenia")
14 | print(sorted(names))
15 | 
16 | ['Albert', 'Ben', 'Carol', 'Donna', 'Eugenia']
17 | 
18 | # 30. Quiz: Dictionaries
19 | 
20 | a = [1, 2, 3]
21 | b = a
22 | c = [1, 2, 3]
23 | 
24 | print(a == b)  # True
25 | print(a is b)  # True
26 | print(a == c)  # True
27 | print(a is c)  # False
28 | 
29 | # 34. Quiz: Compound Data Structures
30 | 
31 | elements = {'hydrogen': {'number': 1, 'weight': 1.00794, 'symbol': 'H'},
32 |             'helium': {'number': 2, 'weight': 4.002602, 'symbol': 'He'}}
33 | 
34 | # todo: Add an 'is_noble_gas' entry to the hydrogen and helium dictionaries
35 | # hint: helium is a noble gas, hydrogen isn't
36 | 
37 | elements['hydrogen']['is_noble_gas'] = False
38 | elements['helium']['is_noble_gas'] = True
39 | 
40 | print(elements['hydrogen']['is_noble_gas'])
41 | print(elements['helium']['is_noble_gas'])
42 | 
43 | 


--------------------------------------------------------------------------------
/aggregations_lesson_30/date_functions_quizzes.sql:
--------------------------------------------------------------------------------
 1 | # 1. Find the sales in terms of total dollars for all orders in each year.
 2 | 
 3 | SELECT DATE_TRUNC('year', occurred_at) as year,
 4 | 	   SUM(total) as total
 5 | FROM orders
 6 | GROUP BY 1
 7 | ORDER BY 2 DESC;
 8 | 
 9 | # 2. Which month did Parch & Posey have the greatest sales in terms of
10 | # total dollars? Are all months evenly represented by the dataset?
11 | 
12 | SELECT DATE_TRUNC('month', occurred_at) as month,
13 | 	   SUM(total_amt_usd) as total
14 | FROM orders
15 | WHERE occurred_at BETWEEN '2014-01-01' AND '2017-01-01'  # remove the sales from 2013 and 2017
16 | GROUP BY 1
17 | ORDER BY 2 DESC;
18 | 
19 | # 3. Which year did Parch & Posey have the greatest sales in terms
20 | # of total number of orders? Are all years evenly represented by the dataset?
21 | 
22 | SELECT DATE_TRUNC('year', occurred_at) as year,
23 | 	   COUNT(*) as total_sales
24 | FROM orders
25 | GROUP BY 1
26 | ORDER BY 2 DESC;
27 | 
28 | 
29 | # 4. Which month did Parch & Posey have the greatest sales in terms of total
30 | # number of orders? Are all months evenly represented by the dataset?
31 | 
32 | SELECT DATE_TRUNC('month', occurred_at) as month,
33 | 	   COUNT(*) as total_sales 
34 | FROM orders
35 | WHERE occurred_at BETWEEN '2014-01-01' AND '2017-01-01'
36 | GROUP BY 1
37 | ORDER BY 2 DESC;
38 | 
39 | # 5. In which month of which year did Walmart spend the most on gloss paper in terms of dollars?
40 | 
41 | SELECT DATE_TRUNC('month', occurred_at) as month,
42 | 	   SUM(o.gloss_amt_usd) as gloss_paper_usd
43 | 
44 | FROM orders o
45 | JOIN accounts a
46 | ON a.id = o.account_id
47 | WHERE a.name = 'Walmart'
48 | GROUP BY 1
49 | ORDER BY 2 DESC;


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## About
 2 | 
 3 | ##### This repository is for my study notes, exercises, and quizzes during the Udacity Bertelsmann Scholarship Challenge.
 4 |  
 5 | Notes are sorted by lessons.
 6 | 
 7 | ## Lessons:
 8 | 
 9 | - [Lessons 1-5](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/blob/master/intro_to_research_methods_lessons_1_5/terminology_intro_to_research_methods.md)
10 | - [Lesson 6](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/visualizing_data_lesson_6)
11 | - [Lesson 13](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/variability_lesson_13)
12 | - [Lesson 24](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/data_types_and_operators_lesson_24)
13 | - [Lesson 25](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/control_flow_lesson_25)
14 | - [Lesson 26](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/functions_lesson_26)
15 | - [Lesson 28](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/basic_sql_lesson28)
16 | - [Lesson 29](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/sql_joins_lesson_29)
17 | - [Lesson 30](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/aggregations_lesson_30)
18 | - [Lesson 31](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/sql_subqueries_temporary_table_lesson31)
19 | - [Lesson 32]()
20 | 


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/string_methods_lists.md:
--------------------------------------------------------------------------------
 1 | # String Methods.
 2 | 
 3 | * String is a sequences of letters.
 4 | * Using methods is almost the same as using function: it takes arguments and returns a value. 
 5 | * To call method use dot notation. For example `sample_string.lower()`, methods could receive additional arguments, which are passed inside the parentheses.
 6 | * Methods are specific to the data type for a particular variable. So there are some built-in methods that are available for all strings, different methods that are available for all integers, etc.
 7 | 
 8 | Links:
 9 | 
10 | * [String Methods Documentation](https://docs.python.org/3/library/stdtypes.html#string-methods)
11 | 
12 | # Lists!
13 | 
14 | A list is a sequence of values. The values in a list are called **elements** (sometimes **items**) and elements can be any type of data. For example:
15 | 
16 | `random_list = ['Gauda is a cheese?', 32, True]`
17 | 
18 | `random_list[-1]`  # True
19 | 
20 | `random_list[0]`  # Gauda is a cheese?
21 | 
22 | A list within another list is **nested** list. A list that contains no elements inside is called an **empty** list, for example: []
23 | Lists are mutable (their content can be modified).
24 | 
25 | # Slicing, in or not in.
26 | 
27 | Slicing is used to create new lists that have the same values or parts of the values of the originals.
28 | 
29 | When using slicing, it is important to remember that the lower index is `inclusive` and the upper index is 
30 | `exclusive`. 
31 | 
32 | ![Slicing](slicing.png)
33 | ![Slicing start](slicing_start.png)
34 | 
35 | ![Membership operators](membership_operators.png)
36 | 
37 | # Mutability and oder.
38 | 
39 | While lists are mutable and can be modified but strings (strings is an immutable data type) don't. Both strings and lists are ordered. 


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/dictionaries.md:
--------------------------------------------------------------------------------
 1 | # Dictionary.
 2 | 
 3 | A dictionary is a mutable data type. In a list, the indices have to be integers; in a dictionary they can be (almost) any type.
 4 | 
 5 | A dictionary stores pairs of elements **keys** and **values**.
 6 | We can check whether a value is in a dictionary the same way we check whether a value is in a list or set with the `in` keyword.
 7 | `get`is a related method, `get`looks up values in a dictionary and returns `None` if the key is not found or dafault value.
 8 | ```python
 9 | food_bill = {"milk": 2, "bread": 1.23, "apples": 4}
10 | 
11 | food_bill ["cucumber"] = 1.25 # add element
12 | 
13 | print(food_bill)
14 | print("tomatoes" in food_bill)
15 | print(food_bill.get("pear")) # return None
16 | 
17 | # use is not to check if a key return None
18 | vegetables = food_bill.get("carrots")
19 | is_null = vegetables is None # or use: vegetables is not None
20 | print(is_null)
21 | ```
22 | ![Identity operators](identity_operators.png)
23 | 
24 | 
25 | ```python
26 | elements.get('dilithium')
27 | None
28 | 
29 | elements['dilithium']
30 | KeyError: 'dilithium'
31 | 
32 | elements.get('kryptonite', 'There\'s no such element!')
33 | "There's no such element!"
34 | ```
35 | 
36 | # Compound Data Structure.
37 | 
38 | We can include containers in other containers to create compound data structures.
39 | Nested dictionary: 
40 | ```python
41 | elements = {"hydrogen": {"number": 1,
42 |                          "weight": 1.00794,
43 |                          "symbol": "H"},
44 |               "helium": {"number": 2,
45 |                          "weight": 4.002602,
46 |                          "symbol": "He"}}
47 | print(elements['hydrogen'])
48 | print(elements['hydrogen']['number'])
49 | print(elements.get('zink', 'There is no such element!'))
50 | 
51 | ``` 
52 | Python practice links:                  
53 | [More practice](https://www.hackerrank.com/domains/python/py-introduction)
54 | [Python practice](https://www.codewars.com/users/sign_in)     


--------------------------------------------------------------------------------
/aggregations_lesson_30/case_statement.md:
--------------------------------------------------------------------------------
 1 | Derive column take data from existing colimns and modify them.
 2 | 
 3 | "CASE" statement handles "if", "Then" logic, is follwed by at least one pair of "When" and "Then" statements. Must end with the world "END".
 4 | 
 5 | **CASE - Expert Tip**
 6 | + The CASE statement always goes in the SELECT clause.
 7 | 
 8 | + CASE must include the following components: WHEN, THEN, and END. ELSE is an optional component to catch cases that didn’t meet any of the other previous CASE conditions.
 9 | 
10 | + You can make any conditional statement using any conditional operator (like WHERE) between WHEN and THEN. This includes stringing together multiple conditional statements using AND and OR.
11 | 
12 | + You can include multiple WHEN statements, as well as an ELSE statement again, to deal with any unaddressed conditions.
13 | 
14 | Example
15 | In a quiz question in the previous Basic SQL lesson, you saw this question:
16 | 
17 | Create a column that divides the standard_amt_usd by the standard_qty to find the unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields. NOTE - you will be thrown an error with the correct solution to this question. This is for a division by zero. You will learn how to get a solution without an error to this query when you learn about CASE statements in a later section.
18 | 
19 | Let's see how we can use the CASE statement to get around this error.
20 | 
21 | ```
22 | SELECT id, account_id, standard_amt_usd/standard_qty AS unit_price
23 | FROM orders
24 | LIMIT 10;
25 | ```
26 | 
27 | Now, let's use a CASE statement. This way any time the standard_qty is zero, we will return 0, and otherwise we will return the unit_price.
28 | ```
29 | SELECT account_id, CASE WHEN standard_qty = 0 OR standard_qty IS NULL THEN 0
30 |                         ELSE standard_amt_usd/standard_qty END AS unit_price
31 | FROM orders
32 | LIMIT 10;
33 | ```
34 | 
35 | Example:
36 | ```
37 | SELECT CASE WHEN total > 500 THEN 'Over 500'
38 | 	   ELSE '500 or under' END as total_group,
39 |        COUNT(*) as order_count
40 | FROM orders
41 | GROUP BY 1;
42 | ```
43 | 
44 | Using `WHERE` clause means only being able to get one set of data at a time.
45 | 
46 | ```
47 | SELECT COUNT(1) as oredrs_ver_500_units
48 | FROM orders
49 | WHERE total > 500;
50 | ```


--------------------------------------------------------------------------------
/sql_data_cleaning_lesson32/data_cleaning.sql:
--------------------------------------------------------------------------------
 1 | # LEFT and RIGHT QUIZZES.
 2 | 
 3 | # 1. In the accounts table, there is a column holding the website for each company.
 4 | # The last three digits specify what type of web address they are using.
 5 | # Pull these extensions and provide how many of each website type exist in the
 6 | # accounts table.
 7 | 
 8 | 
 9 | SELECT RIGHT(website, 3) AS web_address, COUNT(*) num_companies
10 | FROM accounts
11 | GROUP BY 1
12 | ORDER BY 2 DESC;
13 | 
14 | 
15 | # 2.
16 | /*
17 | There is much debate about how much the name (or even the first letter of a company name)
18 | matters. Use the accounts table to pull the first letter of each company name to see the
19 | distribution of company names that begin with each letter (or number).
20 | */
21 | 
22 | SELECT LEFT(UPPER(name), 1) AS first_char, COUNT(*) num_companies
23 | FROM accounts
24 | GROUP BY 1
25 | ORDER BY 2 DESC;
26 | 
27 | # 3. Use the accounts table and a CASE statement to create two groups: one group
28 | # of company names that start with a number and a second group of those company names that
29 | # start with a letter. What proportion of company names start with a letter?
30 | 
31 | SELECT SUM(num) nums, SUM(letter) letters
32 | FROM (SELECT name, CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 
33 |                        THEN 1 ELSE 0 END AS num, 
34 |          CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 
35 |                        THEN 0 ELSE 1 END AS letter
36 |       FROM accounts) t1;
37 | 
38 | # or
39 | 
40 | SELECT  SUM(CASE WHEN LEFT(name, 1) LIKE '^[0-9]' THEN 1 ELSE 0 END) AS num, 
41 |         SUM(CASE WHEN LEFT(name, 1) LIKE '^[0-9]' THEN 0 ELSE 1 END) AS letter
42 |  FROM accounts;
43 | 
44 | 
45 | # 4. Consider vowels as a, e, i, o, and u. What proportion of company names start with a vowel,
46 | # and what percent start with anything else?
47 | 
48 | #There are 80 company names that start with a vowel and 271 that start with other characters.
49 | #Therefore 80/351 are vowels or 22.8%. Therefore, 77.2% of company names do not start with vowels.
50 | SELECT SUM(vowels) vowels, SUM(other) other
51 | FROM (SELECT name, CASE WHEN LEFT(UPPER(name), 1) IN ('A','E','I','O','U') 
52 |                         THEN 1 ELSE 0 END AS vowels, 
53 |           CASE WHEN LEFT(UPPER(name), 1) IN ('A','E','I','O','U') 
54 |                        THEN 0 ELSE 1 END AS other
55 |          FROM accounts) t1;
56 | 


--------------------------------------------------------------------------------
/basic_sql_lesson28/notes.md:
--------------------------------------------------------------------------------
 1 | # Basic SQL.
 2 | 
 3 | One way to store data is to use spreadsheets. To visualize the relationships between spreadsheets using **ERD** (Entity Relationship Diagram). Each spreadsheet is represented on a table. At the top is a name of the table, below each column name is listed. For example:
 4 | ![table, columns](table.png)
 5 | 
 6 | SQL is a language used to interact with a database. It can query one table or across multiple tables.
 7 | 
 8 | Database is a collection of tables that share connected data tored in a computer.
 9 | 
10 | Below is the ERD for the database we will use from Parch & Posey. These diagrams help you visualize the data you are analyzing including:
11 | 
12 | 1. The names of the tables.
13 | 2. The columns in each table.
14 | 3. The way the tables work together.
15 | 4. You can think of each of the boxes below as a spreadsheet.
16 | ![Parch and Porsey DB](parch_posey_db.png)
17 | Note: glossy_qty is incorrect, it is actually gloss_qty in the database
18 | 
19 | # Why SQL?
20 | 
21 | **SQL** has a variety of functions that allows to read, manipulate and change data. Why **SQL** is so popular for data analyses:
22 | 
23 | 1. **SQL** is easy to understand and learn.
24 | 2. Access data directly.
25 | 3. Easy to audit and copy data.
26 | 4. **SQL** can run queries on multiple tables at once, across large datasets.
27 | 5. You can do: sum, count, max, min..
28 | 6. **SQL** is flexible compare to Google Analytics and Excel.
29 | 
30 | **NoSQL** stands for not only **SQL**. **NoSQL** envirenments popular for web based data, but less popular for data that lives in spreedsheets.
31 | 
32 | One of the most popular **NoSQL** database is **MongoDB**. Instead of storing data in tables made out of individual rows, like a relational database does, it stores data in collections made out of individual documents.
33 | 
34 | ## Why Businesses like Databases?
35 | 
36 | 1. Data integrity is ensured - only the data you want entered is entered, and only certain users are able to enter data into the database. 
37 | 2. Data can be accessed quickly - SQL allows you to obtain results very quickly from the data stored in a database. 
38 | 3. Data is easily shared - multiple individuals can access data stored in a database, and the data is the same for all users allowing for consistent results for anyone with access to your database.
39 | 
40 | ## How DB store data?
41 | 
42 | Data in DB is stored in tables. DB tables can be organized by column, each column must have a `unique name`. All dat in a column must be of the same type (don't mix string, text or numbers). 
43 | 
44 | Consistent column types are one of the main reasons working with db is fast. 
45 | 
46 | [Comparison of Relational
47 |  Database](https://www.digitalocean.com/community/tutorials/sqlite-vs-mysql-vs-postgresql-a-comparison-of-relational-database-management-systems)


--------------------------------------------------------------------------------
/functions_lesson_26/generator_quizzes.py:
--------------------------------------------------------------------------------
  1 | # Quiz: Implement my_enumerate
  2 | """
  3 | Write your own generator function that works like the built-in function enumerate.
  4 | 
  5 | Calling the function like this:
  6 | 
  7 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow",
  8 |            "Functions", "Scripting"]
  9 | 
 10 | for i, lesson in my_enumerate(lessons, 1):
 11 |     print("Lesson {}: {}".format(i, lesson))
 12 | 
 13 | should output:
 14 | 
 15 | Lesson 1: Why Python Programming
 16 | Lesson 2: Data Types and Operators
 17 | Lesson 3: Control Flow
 18 | Lesson 4: Functions
 19 | Lesson 5: Scripting
 20 | """
 21 | 
 22 | 
 23 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"]
 24 | 
 25 | 
 26 | def my_enumerate(iterable, start=0):
 27 |     # Implement your generator function here
 28 |     for i in range(start, len(iterable) + start):
 29 |         yield(i, iterable[i-start])
 30 |         
 31 | for i, lesson in my_enumerate(lessons, 1):
 32 |     print("Lesson {}: {}".format(i, lesson))
 33 | 
 34 | 
 35 | # print  5 lessons
 36 | 
 37 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"]
 38 | 
 39 | 
 40 | def my_enumerate(iterable, start=0):
 41 |     i = start
 42 |     for num in iterable:
 43 |         yield i, num
 44 |         i += 1
 45 | 
 46 | for i, lesson in my_enumerate(lessons, 1):
 47 |     print("Lesson {}: {}".format(i, lesson))
 48 | 
 49 | 
 50 | # Quiz: Chunker
 51 | 
 52 | """
 53 | If you have an iterable that is too large to fit in memory in full (e.g.,
 54 | when dealing with large files), being able to take and use chunks of it at a
 55 | time can be very valuable.
 56 | 
 57 | Implement a generator function, chunker, that takes in an iterable and yields
 58 | a chunk of a specified size at a time.
 59 | 
 60 | should output:
 61 | 
 62 | [0, 1, 2, 3]
 63 | [4, 5, 6, 7]
 64 | [8, 9, 10, 11]
 65 | [12, 13, 14, 15]
 66 | [16, 17, 18, 19]
 67 | [20, 21, 22, 23]
 68 | [24]
 69 | """
 70 | 
 71 | 
 72 | def chunker(iterable, size):
 73 |     for i in range(0, len(iterable), size):
 74 |         index = i + size
 75 |         lst = iterable[i:index]
 76 |         yield lst
 77 | 
 78 | 
 79 | for chunk in chunker(range(25), 4):
 80 |     print(list(chunk))
 81 | 
 82 | 
 83 | # Udacity solution
 84 | 
 85 | def chunker(iterable, size):
 86 |     """Yield successive chunks from iterable of length size."""
 87 |     for i in range(0, len(iterable), size):
 88 |         yield iterable[i:i + size]
 89 | 
 90 | for chunk in chunker(range(25), 4):
 91 |     print(list(chunk))
 92 | 
 93 | 
 94 | # Generator Expressions
 95 | 
 96 | 
 97 | sq_list = [x**2 for x in range(10)]  # this produces a list of squares
 98 | 
 99 | sq_iterator = (x**2 for x in range(10))  # this produces an iterator of squares
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/data_types_and_operators_lesson_24/list_methods_tuples_sets.md:
--------------------------------------------------------------------------------
 1 | # List Methods.
 2 | 
 3 | Python provide methods that operate on list, all this methods modify lists instead of creating a new list.
 4 | 
 5 | **Useful** functions for list:
 6 | 
 7 | 1. `l.append(v)` appends value v to the end of list l.
 8 | 2. `l.insert(i, v)` inserts value v at index i in list l.
 9 | 3. `l.reverse()` reverses the order of the values in list l.
10 | 4. `len()` returns how many elements are in a list.
11 | 5. `max()` returns the greatest element of the list. 
12 | 6. `sorted()` returns a copy of a list in order from smallest to largest, leaving the list unchanged.
13 | 
14 | 
15 | # Join Method.
16 | 
17 | `join` takes a list as an argument and returns a string consisting of the list elements joined by a separator string. `\n` is a separator for a new line between elements.
18 | ```python
19 | new_str = "\n".join(["ann", "get", "an", "umbrella"])
20 | print(new_str)
21 | 
22 | Output:
23 | 
24 | ann
25 | get
26 | an
27 | umbrella
28 | ```
29 | It is important to remember to separate each of the items in the list you are joining with a comma (,). Forgetting to do so will not trigger an error, but will also give you unexpected results.
30 | 
31 | # Tuples.
32 | 
33 | Tuple is a sequence of values, this values can be any type and they are indexed by integers and can be accessed by indicis. Tuples are immutable. you can't add and remove items from tuples, or sort them in place. 
34 | 
35 | They are often used to store related pieces of information (for example: latitude and longitude coordinates). Tuples also used to assign multiple variables in a compact way.
36 | 
37 | Tuple unpacking used for signing information from a tuple into multiple variables without having to access them one by one and make multiple assignments statement.
38 | ```python
39 | dimensions = 52, 40, 100
40 | length, width, height = dimensions # tuple unpacking
41 | print("The dimensions are {} x {} x {}".format(length, width, height))
42 | ```
43 | 
44 | ```python
45 | tuple_a = 1, 2
46 | tuple_b = (1, 2)
47 | 
48 | print(tuple_a == tuple_b)
49 | print(tuple_a[1])
50 | 
51 | Output:
52 | True #Perenthesis are optional when making tuple.
53 | 2
54 | ```
55 | 
56 | # Sets.
57 | 
58 | A set is an unordered collection of unique elements; any elements appears in a set at most once, there are no **duplicates**. Unordered means that elements are not sorted in any order.
59 | We can create a set from a list:
60 | ```python
61 | apples_set = set(apples)
62 | print(len(apples_set))
63 | ```
64 | 
65 | Sets support the `in` operator the same as lists do.
66 | Set operations:
67 | `add` adds element to a `set`.
68 | `pop` remove a random element. 
69 | 
70 | ```python 
71 | fruit = {"apple", "banana", "orange", "grapefruit"}  # define a set
72 | 
73 | print("watermelon" in fruit)  # check for element
74 | 
75 | fruit.add("watermelon")  # add an element
76 | print(fruit)
77 | 
78 | print(fruit.pop())  # remove a random element
79 | print(fruit)
80 | 
81 | Output:
82 | False
83 | {'grapefruit', 'orange', 'watermelon', 'banana', 'apple'}
84 | grapefruit
85 | {'orange', 'watermelon', 'banana', 'apple'}
86 | ```


--------------------------------------------------------------------------------
/control_flow_lesson_25/control_flow_practice.py:
--------------------------------------------------------------------------------
  1 | # Practice: Conditional Statement
  2 | 
  3 | points = 174  # use this input to make your submission
  4 | 
  5 | if points <= 50:
  6 |     result = "Congratulations! You won a wooden rabbit!"
  7 | 
  8 | elif 51 <= points <= 150:
  9 |     result = "Oh dear, no prize this time."
 10 | 
 11 | elif 151 <= points <= 180:
 12 |     result = "Congratulations! You won a wafer-thin mint!"
 13 | 
 14 | else:
 15 |     result = "Congratulations! You won a penguin!"
 16 | 
 17 | print(result)
 18 | 
 19 | 
 20 | # Quiz: Guess My Number
 21 | 
 22 | # You decide you want to play a game where you are hiding
 23 | # a number from someone.  Store this number in a variable
 24 | # called 'answer'.  Another user provides a number called
 25 | # 'guess'.  By comparing guess to answer, you inform the user
 26 | # if their guess is too high or too low.
 27 | 
 28 | answer = 10  # provide answer
 29 | guess = 5  # provide guess
 30 | 
 31 | if guess < answer:
 32 |     result = "Oops!  Your guess was too low."
 33 | elif guess > answer:
 34 |     result = "Oops!  Your guess was too high."
 35 | elif guess == answer:
 36 |     result = "Nice!  Your guess matched the answer!"
 37 | 
 38 | print(result)
 39 | 
 40 | 
 41 | # Quiz: Tax Purchase
 42 | 
 43 | # Depending on where an individual is from we need to tax them
 44 | # appropriately.  The states of CA, MN, and
 45 | # NY have taxes of 7.5%, 9.5%, and 8.9% respectively.
 46 | # Use this information to take the amount of a purchase and
 47 | # the corresponding state to assure that they are taxed by the right
 48 | # amount.
 49 | 
 50 | state = 'CA'  # Either CA, MN, or NY
 51 | purchase_amount = 21  # amount of purchase
 52 | 
 53 | if state == 'CA':
 54 |     tax_amount = .075
 55 |     total_cost = purchase_amount*(1+tax_amount)
 56 |     result = "Since you're from {}, your total cost is {}.".format(state, total_cost)
 57 | 
 58 | elif state == 'MN':
 59 |     tax_amount = .095
 60 |     total_cost = purchase_amount*(1+tax_amount)
 61 |     result = "Since you're from {}, your total cost is {}.".format(state, total_cost)
 62 | 
 63 | elif state == 'NY':
 64 |     tax_amount = .089
 65 |     total_cost = purchase_amount*(1+tax_amount)
 66 |     result = "Since you're from {}, your total cost is {}.".format(state, total_cost)
 67 | 
 68 | print(result)
 69 | 
 70 | 
 71 | # Quiz: Boolean Expressions for Conditions
 72 | 
 73 | #You will use a new variable prize to store a prize name if one was won, and
 74 | #then use the truth value of this variable to compose the result message. This
 75 | #will involve two if statements.
 76 | 
 77 | #1st conditional statement: update prize to the correct prize name based
 78 | #on points.
 79 | #2nd conditional statement: set result to the correct phrase based on whether
 80 | #prize is evaluated as True or False.
 81 | 
 82 | 
 83 | points = 174
 84 | 
 85 | # establish the default prize value to None
 86 | prize = None
 87 | 
 88 | # use the points value to assign prizes to the correct prize names
 89 | if points <= 50:
 90 |     prize = "wooden rabbit"
 91 | elif 151 <= points <= 180:
 92 |     prize = "wafer-thin mint"
 93 | elif 181 <= points <= 200:
 94 |     prize = "penguin"
 95 | 
 96 | # use the truth value of prize to assign result to the correct prize
 97 | if prize:
 98 |     result = "Congratulations! You won a {}!".format(prize)
 99 | else:
100 |     result = "Oh dear, no prize this time."
101 | 
102 | print(result)
103 | 


--------------------------------------------------------------------------------
/functions_lesson_26/functions.py:
--------------------------------------------------------------------------------
  1 | # Print vs. Return in Functions
  2 | 
  3 | 
  4 | # this prints something, but does not return anything
  5 | def show_plus_ten(num):
  6 |     print(num + 10)
  7 | 
  8 | 
  9 | # this returns something
 10 | def add_ten(num):
 11 |     return(num + 10)
 12 | 
 13 | print('Calling show_plus_ten...')
 14 | return_value_1 = show_plus_ten(5)
 15 | print('Done calling')
 16 | print('This function returned: {}'.format(return_value_1))
 17 | 
 18 | 
 19 | print('\nCalling add_ten...')
 20 | return_value_2 = add_ten(10)
 21 | print('Done calling')
 22 | print('This function returned: {}'.format(return_value_2))
 23 | 
 24 | 
 25 | # Quiz: Population Density Function
 26 | 
 27 | def population_density(population, land_area):
 28 |     return population / land_area
 29 | 
 30 | # test cases for your function
 31 | test1 = population_density(10, 1)
 32 | expected_result1 = 10
 33 | print("expected result: {}, actual result: {}".format(expected_result1, test1))
 34 | 
 35 | test2 = population_density(864816, 121.4)
 36 | expected_result2 = 7123.6902801
 37 | print("expected result: {}, actual result: {}".format(expected_result2, test2))
 38 | 
 39 | 
 40 | # Quiz: readable_timedelta
 41 | 
 42 | def readable_timedelta(days):
 43 |     """
 44 |     Return a string of the number of weeks and days included in days.
 45 | 
 46 |     Parameters:
 47 |     days -- number of days to convert (int)
 48 | 
 49 |     Returns:
 50 |     string of the number of weeks and days included in days
 51 |     """
 52 | 
 53 |     week = days // 7
 54 |     # % to get the number of days that remain
 55 |     day = days % 7
 56 |     return "{} week(s) and {} day(s).".format(week, day)
 57 | 
 58 | print(readable_timedelta(6))
 59 | 
 60 | # Variable scope 
 61 | 
 62 | egg_count = 0
 63 | 
 64 | def buy_eggs(count):
 65 |     return count + 12  # purchase a dozen eggs
 66 | 
 67 | egg_count = buy_eggs(egg_count)
 68 | 
 69 | 
 70 | # Quiz: Lambda with Map
 71 | # Rewrite this code to be more concise by replacing the mean function with a
 72 | # lambda expression defined within the call to map().
 73 | 
 74 | numbers = [
 75 |               [34, 63, 88, 71, 29],
 76 |               [90, 78, 51, 27, 45],
 77 |               [63, 37, 85, 46, 22],
 78 |               [51, 22, 34, 11, 18]
 79 |             ]
 80 | 
 81 | 
 82 | def mean(num_list):
 83 |     return sum(num_list) / len(num_list)
 84 | 
 85 | averages = list(map(mean, numbers))
 86 | print(averages)
 87 | 
 88 | # With lambda:
 89 | 
 90 | numbers = [
 91 |               [34, 63, 88, 71, 29],
 92 |               [90, 78, 51, 27, 45],
 93 |               [63, 37, 85, 46, 22],
 94 |               [51, 22, 34, 11, 18]
 95 |            ]
 96 | 
 97 | averages = list(map(lambda x: sum(x) / len(x), numbers))
 98 | 
 99 | print(averages)
100 | 
101 | 
102 | # Quiz: Lambda with Filter
103 | # Rewrite this code to be more concise by replacing the is_short function with
104 | # a lambda expression defined within the call to filter()
105 | 
106 | cities = ["New York City", "Los Angeles", "Chicago", "Mountain View", "Denver", "Boston"]
107 | 
108 | def is_short(name):
109 |     return len(name) < 10
110 | 
111 | short_cities = list(filter(is_short, cities))
112 | print(short_cities)
113 | 
114 | 
115 | # With lambda
116 | cities = ["New York City", "Los Angeles", "Chicago", "Mountain View", "Denver", "Boston"]
117 | 
118 | short_cities = list(filter(lambda city: len(city) < 10, cities))
119 | 
120 | print(short_cities)


--------------------------------------------------------------------------------
/intro_to_research_methods_lessons_1_5/terminology_intro_to_research_methods.md:
--------------------------------------------------------------------------------
 1 | # Constructs, Variables, Operational definition.
 2 | 
 3 | **Construct** is a variable that is not directly observable or measurable. But once a construct has been operationally 
 4 | defined, variables are created. Examples of Construct: effort, itchiness, hunger, maturity, wisdom...
 5 | 
 6 | 
 7 | |Construct      |       Operational definition            |
 8 | |   :---:       |                  :---:                   |
 9 | |`Stress`       |       Level of cortisol (stress hormone) |
10 | |`Hunger`       |       Gramms of food consumed            |
11 | |`Effort`       |       Minutes spent studying for an exam |
12 | 
13 | **Operational definition** describes how researcher decide to measure the variables (in our case construct) in a study. It also
14 |   helps you to measure constructs in the real world by turning them into measurable variables 
15 | 
16 | **Hypothesis** is a statementabout the relationship between the variables.
17 | 
18 | All experiments/researches examine some kind of variable(s). A variable is not only something that we measure, but also something that we can manipulate and something we can control for.
19 | 
20 | 1. Dependent Variable or Outcome, or y-variable.
21 |   - Is a variable that is dependent on an independent variable(s).
22 | 
23 | 2. Independent Variable sometimes called Experimental Variable or Manipulated Variable, or Predicted, or x-variable.
24 |   - Is a variable that is being manipulated in an experiment in order to observe the effect on a Dependent Variable, sometimes called an Outcome Variable.
25 | 
26 | 3. Lurking Variables or Extraneous factors are variables/factors that can impact the Outcome/Dependent Variable.
27 | 
28 | 
29 | # Sample, population.
30 | 
31 | **Population (or mu)** are values that describe the entire population. 
32 | A `parameter` is any numerical quantity that characterizes a given population or some aspect of it. This means the parameter tells us something about the whole population. Example of parameters: standard deviation, population mean (average)
33 | `N` is a population size.
34 | `mu` is an average (or a mean) of the entire population.
35 | 
36 | **Sample (or X-bar)** are portions of a population selected for the study. A measurable characteristic of a sample is called a `statistic`.
37 | `n` is a number of a sample.
38 | X-bar is an sample average (or a mean)  of the population.
39 | 
40 | ![population vs sample](population_sample.png)
41 | 
42 | # Sampling designs.
43 | 
44 | **Random sample** means that each element in the population has an equal chance of being included to the sample. 
45 | 
46 | **Random selection (or sampling)** is a randomly choosing a sample from a population.
47 | 
48 | **Convenience selection (or sampling)** selections is based on easy availability/accessibility of elements; doesn't represent entire population
49 | 
50 | 
51 | # Sampling error.
52 | 
53 | **Samplig error** the difference between a population parameter and a sample statistic used to estimate it. Sampling error occurs because a portion, and not the entire population, is surveyed.
54 | 
55 | Sampling error formula:
56 | - `mu - X-bar` or `X-bar - mu` where `mu` is a population average and `X-bar` is a sample average
57 | 
58 | ![sampling_error](sampling_error.png)
59 | 
60 | 
61 | # Bias.
62 | 
63 | Bias - any systematic failure of a sample to represent its population. 
64 | The most common is called a **simple random bias**. The best way to avoid random bias is to select elements for the sample at random. 
65 | **Non-response bias** occurs when individuals randomly sampled for a survey fail to respond, cannot respond or decline to participate.
66 | 
67 | 
68 | Links:
69 | - [Samplig error][1]
70 | - [Estimation of a population][2]
71 | - [OpenIntro Statistics Second Edition by Christopher D. Barr, David M. Diez, and Mine Çetinkaya-Rundel][3]
72 | 
73 | [1]: https://www.britannica.com/science/sampling-error
74 | [2]: https://www.britannica.com/science/statistics/Estimation-of-a-population-mean#ref367452
75 | [3]: https://www.openintro.org/stat/textbook.php?stat_book=os


--------------------------------------------------------------------------------
/aggregations_lesson_30/cas_statement.sql:
--------------------------------------------------------------------------------
  1 | # 1. Quiz: CASE
  2 | 
  3 | /*
  4 | We would like to understand 3 different levels of customers based on the amount associated with their purchases.
  5 | The top branch includes anyone with a Lifetime Value (total sales of all orders) greater than 200,000 usd.
  6 | The second branch is between 200,000 and 100,000 usd. The lowest branch is anyone under 100,000 usd.
  7 | Provide a table that includes the level associated with each account. You should provide the account name, 
  8 | the total sales of all orders for the customer, and the level. Order with the top spending customers listed first.
  9 | */
 10 | 
 11 | SELECT a.name,
 12 | 	   SUM(o.total_amt_usd),
 13 |        CASE WHEN SUM(o.total_amt_usd) > 200000 THEN 'Over 200,000'
 14 |        		WHEN SUM(o.total_amt_usd) > 100000 THEN 'Over 100,000 '
 15 |        		ELSE 'Less 100,000' END as total_level
 16 | FROM orders o
 17 | JOIN accounts a on a.id = o.account_id
 18 | GROUP BY a.name
 19 | ORDER BY 2 DESC; 
 20 | 
 21 | 
 22 | # 2. Quiz: CASE
 23 | 
 24 | /* 
 25 | We would now like to perform a similar calculation to the first, but we want to obtain the total amount spent
 26 | by customers only in 2016 and 2017. Keep the same levels as in the previous question. Order with the top spending
 27 | customers listed first.
 28 | */
 29 | 
 30 | SELECT DATE_TRUNC('year', o.occurred_at) as year,
 31 | 	   a.name,
 32 | 	   SUM(o.total_amt_usd),
 33 |        CASE WHEN SUM(o.total_amt_usd) > 200000 THEN 'Over 200,000'
 34 |        		WHEN SUM(o.total_amt_usd) > 100000 THEN 'Over 100,000 '
 35 |        		ELSE 'Less 100,000' END as total_level
 36 | FROM orders o
 37 | JOIN accounts a on a.id = o.account_id
 38 | WHERE o.occurred_at BETWEEN '2016-01-01' and '2017-12-31'
 39 | GROUP BY a.name, year
 40 | ORDER BY 3 DESC;
 41 | 
 42 | # 2. Udacity solution
 43 | 
 44 | SELECT a.name, SUM(total_amt_usd) total_spent, 
 45 |      CASE WHEN SUM(total_amt_usd) > 200000 THEN 'top'
 46 |      WHEN  SUM(total_amt_usd) > 100000 THEN 'middle'
 47 |      ELSE 'low' END AS customer_level
 48 | FROM orders o
 49 | JOIN accounts a
 50 | ON o.account_id = a.id
 51 | WHERE occurred_at > '2015-12-31' 
 52 | GROUP BY 1
 53 | ORDER BY 2 DESC;
 54 | 
 55 | # 3. Quiz: CASE
 56 | 
 57 | /*
 58 | We would like to identify top performing sales reps, which are sales reps associated with more than 200 orders.
 59 | Create a table with the sales rep name, the total number of orders, and a column with top or not depending on if
 60 | they have more than 200 orders. Place the top sales people first in your final table.
 61 | */
 62 | 
 63 | SELECT s.name,
 64 | 	   COUNT(*) as number_of_orders,
 65 |        CASE WHEN COUNT(*) > 200 THEN 'top'
 66 |        ELSE 'not' END as sales_level
 67 | FROM orders o
 68 | JOIN accounts a on o.account_id = a.id
 69 | JOIN sales_reps s on a.sales_rep_id = s.id
 70 | GROUP BY s.name
 71 | ORDER BY 2 DESC;
 72 | 
 73 | # 4. Quiz: CASE
 74 | 
 75 | /*
 76 | The previous didn't account for the middle, nor the dollar amount associated with the sales. Management
 77 | decides they want to see these characteristics represented as well. We would like to identify top performing
 78 | sales reps, which are sales reps associated with more than 200 orders or more than 750000 in total sales.
 79 | The middle group has any rep with more than 150 orders or 500000 in sales. Create a table with the sales rep name,
 80 | the total number of orders, total sales across all orders, and a column with top, middle, or low depending on this
 81 | criteria. Place the top sales people based on dollar amount of sales first in your final table.
 82 | */
 83 | 
 84 | SELECT s.name,
 85 | 	   COUNT(*) as number_of_orders,
 86 |        SUM(total_amt_usd) as total_usd,
 87 |        CASE WHEN COUNT(*) > 200 and SUM(total_amt_usd) > 750000 THEN 'top'
 88 |        		WHEN COUNT(*) > 150 and SUM(total_amt_usd) > 500000 THEN 'middle'
 89 |        	    ELSE 'not' END as sales_level
 90 | FROM orders o
 91 | JOIN accounts a on o.account_id = a.id
 92 | JOIN sales_reps s on a.sales_rep_id = s.id
 93 | GROUP BY s.name
 94 | ORDER BY sales_level DESC; 
 95 | 
 96 | # 4. Udacity Solution
 97 | 
 98 | SELECT s.name, COUNT(*), SUM(o.total_amt_usd) total_spent, 
 99 |      CASE WHEN COUNT(*) > 200 OR SUM(o.total_amt_usd) > 750000 THEN 'top'
100 |      WHEN COUNT(*) > 150 OR SUM(o.total_amt_usd) > 500000 THEN 'middle'
101 |      ELSE 'low' END AS sales_rep_level
102 | FROM orders o
103 | JOIN accounts a
104 | ON o.account_id = a.id 
105 | JOIN sales_reps s
106 | ON s.id = a.sales_rep_id
107 | GROUP BY s.name
108 | ORDER BY 3 DESC;


--------------------------------------------------------------------------------
/sql_subqueries_temporary_table_lesson31/subqueries_tasks.sql:
--------------------------------------------------------------------------------
  1 | # 1. Quiz
  2 | # Find the number f events that occur for each day or each channel
  3 | 
  4 | SELECT DATE_TRUNC('day', occurred_at) as day,
  5 | 		channel,
  6 |         COUNT(*) as events_count
  7 | FROM web_events
  8 | GROUP BY day, channel
  9 | ORDER BY events_count DESC;
 10 | 
 11 | 
 12 | # 2. Quiz
 13 | # Create a subquery that provides all of the data rom your first query. 
 14 | 
 15 | SELECT *
 16 | FROM 
 17 | (SELECT DATE_TRUNC('day', occurred_at) as day,
 18 | 		channel,
 19 |         COUNT(*) as events_count
 20 | FROM web_events
 21 | GROUP BY day, channel
 22 | ORDER BY events_count DESC) sub;
 23 | 
 24 | 
 25 | # 3. Quiz
 26 | # Find the average number of events or each channel.
 27 | 
 28 | SELECT channel,
 29 | 	   AVG(events_count) as avg_events_count
 30 | FROM 
 31 | (SELECT DATE_TRUNC('day', occurred_at) as day,
 32 | 		channel,
 33 |         COUNT(*) as events_count
 34 | FROM web_events
 35 | GROUP BY day, channel) sub
 36 | GROUP BY channel
 37 | ORDER BY avg_events_count DESC;
 38 | 
 39 | # More on subqueries:
 40 | 
 41 | # pull the first month/year combo from the orders table
 42 | 
 43 | SELECT DATE_TRUNC('month', MIN(occurred_at)) AS min_month
 44 | FROM web_events;
 45 | 
 46 | # pull the average for each. Total result
 47 | 
 48 | SELECT SUM(total_amt_usd)
 49 | FROM orders
 50 | WHERE DATE_TRUNC('month', occurred_at) = 
 51 |       (SELECT DATE_TRUNC('month', MIN(occurred_at)) FROM orders);
 52 | 
 53 | # Result per each kind of a peper
 54 | 
 55 | SELECT AVG(standard_qty) as avg_standard,
 56 |        AVG(gloss_qty) as avg_gloss,
 57 |        AVG(poster_qty) as avg_poster
 58 | FROM orders
 59 | WHERE DATE_TRUNC('month', occurred_at) =
 60 | (SELECT DATE_TRUNC('month', MIN(occurred_at)) AS min
 61 | FROM orders);
 62 | 
 63 | 
 64 | # QUIZ: Subquery Mania
 65 | # 1. Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales.
 66 | 
 67 | SELECT s.name, s.region_id, MAX(o.total_amt_usd) as max_total
 68 | FROM sales_reps s
 69 | JOIN accounts a ON s.id = a.sales_rep_id
 70 | JOIN orders o ON a.id = o.account_id
 71 | GROUP BY s.name, s.region_id
 72 | ORDER BY max_total DESC;
 73 | 
 74 | # 2. For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed? 
 75 | 
 76 | SELECT s.name, r.name, SUM(o.total_amt_usd) as total_amt_usd, COUNT(total) total_orders
 77 | FROM region r
 78 | JOIN sales_reps s ON r.id = s.region_id
 79 | JOIN accounts a ON s.id = a.sales_rep_id
 80 | JOIN orders o ON a.id = o.account_id
 81 | GROUP BY s.name, r.name
 82 | ORDER BY total_amt_usd DESC;
 83 | 
 84 | # 3. For the name of the account that purchased the most (in total over their lifetime as a customer)
 85 | # standard_qty paper, how many accounts still had more in total purchases? 
 86 | 
 87 | SELECT a.name, w.channel, COUNT(w.id)
 88 | FROM accounts a
 89 | JOIN web_events w ON a.id = w.account_id
 90 | GROUP BY 1, 2
 91 | HAVING a.name = (SELECT customer 
 92 | FROM (SELECT a.name AS customer, SUM(o.total_amt_usd) AS total_usd
 93 | FROM accounts a
 94 | JOIN orders o ON a.id = o.account_id
 95 | GROUP BY 1
 96 | ORDER BY 2 DESC
 97 | LIMIT 1) t1)
 98 | ORDER BY 3 DESC;
 99 | 
100 | # 4. For the customer that spent the most (in total over their lifetime as
101 | # a customer) total_amt_usd, how many web_events did they have for each channel?
102 | 
103 | SELECT * 
104 | FROM (SELECT a.name, w.channel, COUNT(w.channel)
105 |     FROM web_events w
106 |       JOIN accounts a ON a.id = w.account_id
107 |       GROUP BY a.name, w.channel) t1
108 | JOIN (SELECT a.name, sum(o.total_amt_usd) total_usd
109 |     FROM accounts a
110 |       JOIN orders o ON a.id = o.account_id
111 |       GROUP BY a.name
112 |       ORDER BY total_usd DESC
113 |       LIMIT 1) t2
114 |       ON t1.name = t2.name
115 | 
116 | 
117 | # 5. What is the lifetime average amount spent in terms
118 | # of total_amt_usd for the top 10 total spending accounts?
119 | 
120 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
121 | FROM orders o
122 | JOIN accounts a
123 | ON a.id = o.account_id
124 | GROUP BY a.id, a.name
125 | ORDER BY 3 DESC
126 | LIMIT 10;
127 | 
128 | # average of 10 amounts
129 | 
130 | SELECT AVG(total_spent)
131 | FROM (SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
132 |       FROM orders o
133 |       JOIN accounts a
134 |       ON a.id = o.account_id
135 |       GROUP BY a.id, a.name
136 |       ORDER BY 3 DESC
137 |        LIMIT 10) temp;
138 | 
139 | # 6. What is the lifetime average amount spent in terms
140 | # of total_amt_usd for only the companies that spent more
141 | # than the average of all orders.
142 | 
143 | SELECT AVG(avg_amt_usd)
144 | FROM (SELECT o.account_id, AVG(o.total_amt_usd) as avg_amt_usd
145 | FROM orders o
146 | GROUP BY 1
147 | HAVING AVG(o.total_amt_usd) > 
148 | (SELECT AVG(total_amt_usd)
149 | FROM orders)) temp_table;
150 | 


--------------------------------------------------------------------------------
/control_flow_lesson_25/while_loops.py:
--------------------------------------------------------------------------------
  1 | # while Loops
  2 | 
  3 | # 1.Practice: Water Falls
  4 | 
  5 | # Print string vertical.
  6 | 
  7 | print_str = "Water falls"
  8 | 
  9 | # initialize a counting variable "i" to 0
 10 | i = 0
 11 | 
 12 | # write your while header line, comparing "i" to the length of the string
 13 | while i < len(print_str):
 14 |     #print out the current character from the string
 15 |     print(print_str[i])
 16 | 
 17 |     #increment counter variable in the body of the loop
 18 |     i = i + 1
 19 | 
 20 | #print(print_str)
 21 | 
 22 | 
 23 | # 2.Practice: Factorials with While Loops
 24 | 
 25 | """
 26 | Find the Factorial of a Number, using While Loop.
 27 | 
 28 | A factorial of a whole number is that number multiplied by every whole number
 29 | between itself and 1. For example, 6 factorial (written "6!")
 30 | equals 6 x 5 x 4 x 3 x 2 x 1 = 720. So 6! = 720.
 31 | 
 32 | We can write a while loop to take any given number, and figure out what its
 33 | factorial is.
 34 | 
 35 | Example: If number is 6, your code should compute and print the product of 720:
 36 | """
 37 | 
 38 | number = 6
 39 | product = number
 40 | 
 41 | while number > 1:
 42 |     number = number - 1
 43 |     product = product * number
 44 | 
 45 | print(product)
 46 | 
 47 | 
 48 | 
 49 | 
 50 | # 3.Practice: Factorials with For Loops
 51 | # Now use a For Loop to Find the Factorial!
 52 | 
 53 | number = 6
 54 | # We'll start with the product equal to the number
 55 | product = number
 56 | 
 57 | # Write a for loop that calculates the factorial of our number
 58 | for num in range(1, number):
 59 |     if num > 1:
 60 |         number = number - 1
 61 |         product = product * number
 62 | 
 63 | print(product)
 64 | 
 65 | # another solution without if statement
 66 | for num in range(1, number):
 67 |     product *= num
 68 | 
 69 | print(product)
 70 | 
 71 | 
 72 | # Quiz: Count by
 73 | #
 74 | # 1. Suppose you want to count from some number start_num by another number
 75 | # count_by until you hit a final number end_num. Use break_num as the variable
 76 | # that you'll change each time through the loop.
 77 | 
 78 | start_num = 2  # start number
 79 | end_num = 66  # end number that you stop when you hit
 80 | count_by = 3  # some number to count by
 81 | 
 82 | break_num = start_num
 83 | while break_num < end_num:
 84 |     break_num = break_num + count_by
 85 | 
 86 | print(break_num)
 87 | 
 88 | 
 89 | # 2. Now in addition, address what would happen if someone gives a start_num
 90 | # that is greater than end_num. If this is the case, set result to "Oops! Looks
 91 | # like your start value is greater than the end value. Please try again."
 92 | # Otherwise, set result to the value of break_num.
 93 | 
 94 | start_num = 2  # some start number
 95 | end_num = 22  # some end number that you stop when you hit
 96 | count_by = 3  # some number to count by 
 97 | 
 98 | # condition to check that end_num is larger than start_num before looping
 99 | 
100 | if start_num > end_num:
101 |     result = "Oops! Looks like your start value is greater than the end value. Please try again."
102 | 
103 | else:
104 |     break_num = start_num
105 |     while break_num < end_num:
106 |         break_num += count_by
107 |     result = break_num
108 | 
109 | print(result)
110 | 
111 | 
112 | # 3. Write a while loop that finds the largest square number less than an
113 | # integerlimit and stores it in a variable nearest_square
114 | 
115 | limit = 40
116 | 
117 | count = 1
118 | nearest_square = 1
119 | 
120 | while (count + 1) ** 2 < limit:
121 |     count = count + 1
122 |     nearest_square = count ** 2
123 |     #print(nearest_square) # to print all possible nearest square
124 | 
125 | print(nearest_square)
126 | 
127 | # Break and Continue
128 | 
129 | manifest = [("bananas", 15), ("mattresses", 24), ("dog kennels", 42), ("machine", 120), ("cheeses", 5)]
130 | 
131 | # the code breaks the loop when weight exceeds or reaches the limit
132 | print("METHOD 1")
133 | weight = 0
134 | items = []
135 | for cargo_name, cargo_weight in manifest:
136 |     print("current weight: {}".format(weight))
137 |     if weight >= 100:
138 |         print("  breaking loop now!")
139 |         break
140 |     else:
141 |         print("  adding {} ({})".format(cargo_name, cargo_weight))
142 |         items.append(cargo_name)
143 |         weight += cargo_weight
144 | 
145 | print("\nFinal Weight: {}".format(weight))
146 | print("Final Items: {}".format(items))
147 | 
148 | # skips an iteration when adding an item would exceed the limit
149 | # breaks the loop if weight is exactly the value of the limit
150 | print("\nMETHOD 2")
151 | weight = 0
152 | items = []
153 | for cargo_name, cargo_weight in manifest:
154 |     print("current weight: {}".format(weight))
155 |     if weight >= 100:
156 |         print("  breaking from the loop now!")
157 |         break
158 |     elif weight + cargo_weight > 100:
159 |         print("  skipping {} ({})".format(cargo_name, cargo_weight))
160 |         continue
161 |     else:
162 |         print("  adding {} ({})".format(cargo_name, cargo_weight))
163 |         items.append(cargo_name)
164 |         weight += cargo_weight
165 | 
166 | print("\nFinal Weight: {}".format(weight))
167 | print("Final Items: {}".format(items))
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/basic_sql_lesson28/syntax_sql.md:
--------------------------------------------------------------------------------
  1 | ## Types of Statements.
  2 | 
  3 | The SQL laguage has a few different elements, the most basic of which is a statements. `Statements` tell the db what you'd like to do with the data. 
  4 | 
  5 | `CREATE TABLE` is a statement that creates a new table in a db, changes the data in a db.
  6 | 
  7 | `DROP TABLE` removes a table in a db, changes the data in a db.
  8 | 
  9 | `SELECT` allows to read data and displays it. Select statements are commonly referred as **queries**. 
 10 | 
 11 | ## SELECT and FROM
 12 | 
 13 | In order to generate the list of all orders, write a SELECT statement. 
 14 | 
 15 | `SELECT` is where you tell the query what columns you want back. Column names are separated by commas with no comma after the last column name. 
 16 | 
 17 | `SELECT *` select with asterik means select all.
 18 | 
 19 | `FROM` is where you tell the query what table you are querying from. Notice the columns need to exist in this table.
 20 | 
 21 | Both SELECT and FROM clauses are mandatory.
 22 | 
 23 | ## Formatting.
 24 | 
 25 | It is common practice to capitalize commands (SELECT, FROM). This makes queries easier to read, which will matter more as you write more complex queries.
 26 | 
 27 | It is common to use underscores and avoid spaces in column names. It is a bit annoying to work with spaces in SQL.
 28 | 
 29 | SQL is not case sensitive. But it's a good habits to capitalize commands. 
 30 | 
 31 | Depending on your SQL environment, your query may need a semicolon at the end to execute. Other environments are more flexible in terms of this being a "requirement." 
 32 | 
 33 | Best practice:
 34 | 
 35 | ```
 36 | SELECT column
 37 | 
 38 | FROM table;
 39 | ```
 40 | 
 41 | ## LIMIT
 42 | 
 43 | LIMIT statement is used to retrieve records from one or more tables in a database and limit the number of records returned based on a limit value. 
 44 | 
 45 | ```
 46 | SELECT *
 47 | FROM table
 48 | LIMIT 10;
 49 | ```
 50 | 
 51 | ## ORDER BY
 52 | 
 53 | ORDER BY statement allows to order table by any row. It goes between the FROM and LIMIT clauses. By default ORDER BY goes from `a to z`, lowest to highest or earliest to latest if working with dates. This is referred to as ascending order.
 54 | 
 55 | To sort in descending order, add DESC (from biggest to lowest) after the column in ORDER BY statement.
 56 | 
 57 | ## WHERE
 58 | 
 59 | WHERE statement allows to filter a set of results based on specific criteria. WHERE claus goes after FROM but before ORDER BY or LIMIT.
 60 | 
 61 | Comparison operators:
 62 | ```
 63 | > (greater than)
 64 | 
 65 | < (less than)
 66 | 
 67 | >= (greater than or equal to)
 68 | 
 69 | <= (less than or equal to)
 70 | 
 71 | = (equal to)
 72 | 
 73 | != (not equal to)
 74 | ```
 75 | 
 76 | ## WHERE with Non-Numerical Data.
 77 | 
 78 | Comparison operators can work with non-numerical data as well. If you're using an operator with values that are non-numerical you'll need to put the value in single quotes.
 79 | 
 80 | ## Arithmetic Operators
 81 | 
 82 | **Derived Column** a new column that is a manipulation of the existing columns in your db.
 83 | Can include simple arithmetic or any number of advanced conculations. 
 84 | ```
 85 | * (Multiplication)
 86 | 
 87 | + (Addition)
 88 | 
 89 | - (Subtraction)
 90 | 
 91 | / (Division)
 92 | ```
 93 | 
 94 | To rename a derived column: add AS to the end of the line that produced the derived column
 95 | and give  then it a name:
 96 | 
 97 | ```
 98 | glossy_qty + poster_qty AS nonstandard_qty
 99 | ```
100 | 
101 | ## Logical Operators
102 | 
103 | 1. LIKE
104 | This allows you to perform operations similar to using WHERE and =, but for cases when you might not know exactly what you are looking for.
105 | 
106 | 2. IN
107 | This allows you to perform operations similar to using WHERE and =, but for more than one condition.
108 | 
109 | 3. NOT
110 | This is used with IN and LIKE to select all of the rows NOT LIKE or NOT IN a certain condition.
111 | 
112 | 4. AND & BETWEEN
113 | These allow you to combine operations where all combined conditions must be true.
114 | 
115 | 5. OR
116 | This allow you to combine operations where at least one of the combined conditions must be true.
117 | 
118 | ## LIKE
119 | 
120 | The `LIKE` operator is exremely useful working with text. Use LIKE within a WHERE clause.
121 | The LIKE operator is frequently used with '%Example%' or 'S%' or '%s'.
122 | 
123 | ## IN
124 | 
125 | The `IN` operator is useful for working with both numeric and text columns. This operators allows you to use `=` but for more than one item
126 | of that particular column and all within the same query.
127 | 
128 | `IN` requaries single quotation marks around **non-numerical data**, **numerical data** can be entered directly.
129 | 
130 | ## NOT
131 | 
132 | The `NOT` operator useful for working with the `IN` and `LIKE` operators. By specifying `NOT IN` and `NOT LIKE` we can grab all of the rows
133 | that don't meet a particular criteria.
134 | 
135 | `NOT` provides the inverse results for IN, LIKE and similar operators.
136 | 
137 | ## AND and BETWEEN
138 | The `AND` operator is used within a WHERE statement to consider more than one logical clause at a time. Each time you link a new statement with an AND, you will need to specify the column you are interested in looking at. You may link as many statements as you would like to consider at the same time. This operator works with all of the operations we have seen so far including arithmetic operators (+, *, -, /). LIKE, IN, and NOT logic can also be linked together using the AND operator.
139 |  The `BETWEEN` operator:
140 | ```
141 | WHERE column BETWEEN 6 AND 10
142 | ```
143 | The same as:
144 | ```
145 | WHERE column >= 6 AND column <= 10
146 | ```
147 | 
148 | 
149 | ## OR
150 | 
151 | `OR` is a logical operator in SQL that allows to select rows that satisfy either of two conditions. It works similary to `AND` which select the rows that satisfy both of 2 conditions. `OR` works with all the operations including arithmetic operators (+, -, *, /). When combining multiple of these operations, might need to use **parentheses** to assure that the logic you want to perform is being executed correctly.


--------------------------------------------------------------------------------
/sql_subqueries_temporary_table_lesson31/with_vs_subquery.sql:
--------------------------------------------------------------------------------
  1 | # You need to find the average number of events for each channel per day.
  2 | 
  3 | 
  4 | SELECT channel, AVG(events) AS average_events
  5 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day,
  6 |              channel, COUNT(*) as events
  7 |       FROM web_events 
  8 |       GROUP BY 1,2) sub
  9 | GROUP BY channel
 10 | ORDER BY 2 DESC;
 11 | 
 12 | 
 13 | # Using with
 14 | 
 15 | # Notice, you can pull the inner query:
 16 | 
 17 | SELECT DATE_TRUNC('day',occurred_at) AS day, 
 18 |        channel, COUNT(*) as events
 19 | FROM web_events 
 20 | GROUP BY 1,2
 21 | 
 22 | # This is the part we put in the WITH statement. 
 23 | # Notice, we are aliasing the table as events below:
 24 | 
 25 | WITH events AS (
 26 |           SELECT DATE_TRUNC('day',occurred_at) AS day, 
 27 |                         channel, COUNT(*) as events
 28 |           FROM web_events 
 29 |           GROUP BY 1,2)
 30 | 
 31 | # Now, we can use this newly created events table as if it is any
 32 | # other table in our database:
 33 | 
 34 | WITH events AS (
 35 |           SELECT DATE_TRUNC('day',occurred_at) AS day, 
 36 |                         channel, COUNT(*) as events
 37 |           FROM web_events 
 38 |           GROUP BY 1,2)
 39 | 
 40 | SELECT channel, AVG(events) AS average_events
 41 | FROM events
 42 | GROUP BY channel
 43 | ORDER BY 2 DESC;
 44 | 
 45 | 
 46 | # For the above example, we don't need anymore than the one additional table,
 47 | # but imagine we needed to create a second table to pull from. We can create
 48 | # an additional table to pull from in the following way:
 49 | 
 50 | WITH table1 AS (
 51 |           SELECT *
 52 |           FROM web_events),
 53 | 
 54 |      table2 AS (
 55 |           SELECT *
 56 |           FROM accounts)
 57 | 
 58 | 
 59 | SELECT *
 60 | FROM table1
 61 | JOIN table2
 62 | ON table1.account_id = table2.id;
 63 | 
 64 | 
 65 | # QUIZ: WITH 
 66 | 
 67 | # Provide the name of the sales_rep in each region with the largest amount of
 68 | # total_amt_usd sales.
 69 | 
 70 | WITH t1 AS (
 71 |   SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt
 72 |    FROM sales_reps s
 73 |    JOIN accounts a
 74 |    ON a.sales_rep_id = s.id
 75 |    JOIN orders o
 76 |    ON o.account_id = a.id
 77 |    JOIN region r
 78 |    ON r.id = s.region_id
 79 |    GROUP BY 1,2
 80 |    ORDER BY 3 DESC), 
 81 | t2 AS (
 82 |    SELECT region_name, MAX(total_amt) total_amt
 83 |    FROM t1
 84 |    GROUP BY 1)
 85 | SELECT t1.rep_name, t1.region_name, t1.total_amt
 86 | FROM t1
 87 | JOIN t2
 88 | ON t1.region_name = t2.region_name AND t1.total_amt = t2.total_amt;
 89 | 
 90 | # For the region with the largest sales total_amt_usd, how many total orders were placed? 
 91 | 
 92 | WITH t1 AS (
 93 |    SELECT r.name region_name, SUM(o.total_amt_usd) total_amt
 94 |    FROM sales_reps s
 95 |    JOIN accounts a
 96 |    ON a.sales_rep_id = s.id
 97 |    JOIN orders o
 98 |    ON o.account_id = a.id
 99 |    JOIN region r
100 |    ON r.id = s.region_id
101 |    GROUP BY r.name), 
102 | t2 AS (
103 |    SELECT MAX(total_amt)
104 |    FROM t1)
105 | SELECT r.name, COUNT(o.total) total_orders
106 | FROM sales_reps s
107 | JOIN accounts a
108 | ON a.sales_rep_id = s.id
109 | JOIN orders o
110 | ON o.account_id = a.id
111 | JOIN region r
112 | ON r.id = s.region_id
113 | GROUP BY r.name
114 | HAVING SUM(o.total_amt_usd) = (SELECT * FROM t2);
115 | 
116 | # For the account that purchased the most (in total over their lifetime as a
117 | # customer) standard_qty paper, how many accounts still had more in total
118 | # purchases? 
119 | 
120 | WITH t1 AS (
121 |   SELECT a.name account_name, SUM(o.standard_qty) total_std, SUM(o.total) total
122 |   FROM accounts a
123 |   JOIN orders o
124 |   ON o.account_id = a.id
125 |   GROUP BY 1
126 |   ORDER BY 2 DESC
127 |   LIMIT 1), 
128 | t2 AS (
129 |   SELECT a.name
130 |   FROM orders o
131 |   JOIN accounts a
132 |   ON a.id = o.account_id
133 |   GROUP BY 1
134 |   HAVING SUM(o.total) > (SELECT total FROM t1))
135 | SELECT COUNT(*)
136 | FROM t2;
137 | 
138 | 
139 | #For the customer that spent the most (in total over their lifetime as a
140 | #customer) total_amt_usd, how many web_events did they have for each channel?
141 | 
142 | WITH t1 AS (
143 |    SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent
144 |    FROM orders o
145 |    JOIN accounts a
146 |    ON a.id = o.account_id
147 |    GROUP BY a.id, a.name
148 |    ORDER BY 3 DESC
149 |    LIMIT 1)
150 | SELECT a.name, w.channel, COUNT(*)
151 | FROM accounts a
152 | JOIN web_events w
153 | ON a.id = w.account_id AND a.id =  (SELECT id FROM t1)
154 | GROUP BY 1, 2
155 | ORDER BY 3 DESC;
156 | 
157 | # What is the lifetime average amount spent in terms of total_amt_usd for the
158 | # top 10 total spending accounts?
159 | 
160 | WITH t1 AS (
161 |    SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent
162 |    FROM orders o
163 |    JOIN accounts a
164 |    ON a.id = o.account_id
165 |    GROUP BY a.id, a.name
166 |    ORDER BY 3 DESC
167 |    LIMIT 10)
168 | SELECT AVG(tot_spent)
169 | FROM t1;
170 | 
171 | 
172 | # 6. What is the lifetime average amount spent in terms of total_amt_usd for
173 | # only the companies that spent more than the average of all accounts.
174 | 
175 | # query avg(total_amt_usd) for all accounts
176 | SELECT AVG(o.total_amt_usd) avg_all
177 |       FROM orders o
178 |       JOIN accounts a
179 |       ON a.id = o.account_id;
180 | 
181 | # AVG() of all orders
182 | SELECT o.account_id, AVG(o.total_amt_usd) avg_amt
183 |    FROM orders o
184 |    GROUP BY 1
185 |    HAVING AVG(o.total_amt_usd) > (SELECT AVG(o.total_amt_usd) avg_all
186 |           FROM orders o
187 |           JOIN accounts a
188 |           ON a.id = o.account_id);
189 | 
190 | # lifetime avg
191 | 
192 | SELECT AVG(avg_amt) 
193 | FROM (SELECT o.account_id, AVG(o.total_amt_usd)avg_amt
194 |       FROM orders o
195 |       GROUP BY 1
196 |       HAVING AVG(o.total_amt_usd) > (SELECT AVG(o.total_amt_usd) avg_all
197 |                                        FROM orders o
198 |                                         JOIN accounts a
199 |                                         ON a.id = o.account_id)) t1;


--------------------------------------------------------------------------------
/basic_sql_lesson28/quizzes.sql:
--------------------------------------------------------------------------------
  1 | ### Limits
  2 | 
  3 | SELECT occurred_at, account_id, channel
  4 | FROM web_events
  5 | LIMIT 15;
  6 | 
  7 | ### ORDER BY
  8 | 
  9 | /*1.Write a query to return the 10 earliest orders in the orders table.
 10 | Include the id, occurred_at, and total_amt_usd.*/
 11 | 
 12 | SELECT id, occurred_at, total_amt_usd
 13 | FROM orders
 14 | LIMIT 10; 
 15 | 
 16 | /*2.Write a query to return the top 5 orders in terms of largest total_amt_usd.
 17 | Include the id, account_id, and total_amt_usd.*/
 18 | 
 19 | SELECT id, account_id, total_amt_usd
 20 | FROM orders
 21 | ORDER BY total_amt_usd  desc
 22 | LIMIT 5;
 23 | 
 24 | /*3.Write a query to return the bottom 20 orders in terms of least total.
 25 | Include the id, account_id, and total.*/
 26 | 
 27 | SELECT id, account_id, total
 28 | FROM orders
 29 | ORDER BY total
 30 | LIMIT 20;
 31 | 
 32 | 
 33 | ## ORDER BY Part 2
 34 | 
 35 | /*Write a query that returns the top 5 rows from orders ordered according to newest to oldest,
 36 | but with the largest total_amt_usd for each date listed first for each date.*/
 37 | 
 38 | SELECT total_amt_usd
 39 | FROM orders
 40 | ORDER BY total_amt_usd desc
 41 | LIMIT 5;
 42 | 
 43 | /*Write a query that returns the top 10 rows from orders ordered according to oldest
 44 | to newest, but with the smallest total_amt_usd for each date listed first for each date.*/
 45 | 
 46 | SELECT total_amt_usd
 47 | FROM orders
 48 | ORDER BY total_amt_usd
 49 | LIMIT 10;
 50 | 
 51 | ## WHERE
 52 | 
 53 | /*Pull the first 5 rows and all columns from the orders table that have
 54 | a dollar amount of gloss_amt_usd greater than or equal to 1000.*/
 55 | 
 56 | SELECT *
 57 | FROM orders
 58 | WHERE gloss_amt_usd >= 1000
 59 | LIMIT 5;
 60 | 
 61 | /*Pull the first 10 rows and all columns from the orders table that
 62 | have a total_amt_usd less than 500.*/
 63 | 
 64 | SELECT *
 65 | FROM orders
 66 | WHERE total_amt_usd < 500
 67 | LIMIT 10;
 68 | 
 69 | 
 70 | ## WHERE with Non-Numeric Data
 71 | 
 72 | /*Filter the accounts table to include the company name, website, and the
 73 | primary point of contact (primary_poc) for Exxon Mobil in the accounts table.*/
 74 | 
 75 | SELECT name, website 
 76 | From accounts
 77 | WHERE primary_poc = 'Exxon Mobil';
 78 | 
 79 | ## Arithmetic Operators
 80 | 
 81 | /*Using the orders table:
 82 | 
 83 | Create a column that divides the standard_amt_usd by the standard_qty to find the
 84 | unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields.*/
 85 | 
 86 | SELECT standard_amt_usd, 
 87 | 		standard_qty,
 88 |         id,
 89 |         account_id,
 90 |         standard_amt_usd / standard_qty AS unit_cost
 91 | FROM orders 
 92 | LIMIT 10;
 93 | 
 94 | /*Write a query that finds the percentage of revenue that comes from poster paper for each order. You will need to use only the columns
 95 | that end with _usd. (Try to do this without using the total column). Include the id and account_id fields.*/
 96 | 
 97 | SELECT  id,
 98 |         account_id,
 99 |         poster_amt_usd / (standard_amt_usd + gloss_amt_usd + poster_amt_usd ) AS poster_paper
100 | FROM orders;
101 | 
102 | ## LIKE
103 | 
104 | /*All the companies whose names start with 'C'. */
105 | 
106 | SELECT *
107 | FROM accounts
108 | WHERE name LIKE '%C%';
109 | 
110 | /*All companies whose names contain the string 'one' somewhere in the name.*/
111 | 
112 | SELECT *
113 | FROM accounts
114 | WHERE name LIKE '%one%';
115 | 
116 | /*All companies whose names end with 's'.*/
117 | 
118 | SELECT *
119 | FROM accounts
120 | WHERE name LIKE '%s%';
121 | 
122 | ## IN
123 | /*Use the accounts table to find the account name, primary_poc, and sales_rep_id for Walmart, Target, and Nordstrom.*/
124 | 
125 | SELECT name, primary_poc, sales_rep_id
126 | FROM accounts
127 | WHERE name IN ('Walmart', 'Target', 'Nordstrom');
128 | 
129 | /*Use the web_events table to find all information regarding individuals who were contacted via the channel of organic or adwords.*/
130 | 
131 | SELECT channel
132 | FROM web_events
133 | WHERE channel IN ('organic', 'adwords');
134 | 
135 | ## NOT
136 | 
137 | /*Use the accounts table to find the account name, primary poc, and sales rep id for all stores except Walmart, Target, and Nordstrom.*/
138 | 
139 | SELECT name, primary_poc, sales_rep_id
140 | FROM accounts
141 | WHERE name NOT IN ('%Walmart%', '%Target%', '%Nordstrom%');
142 | 
143 | /*Use the web_events table to find all information regarding individuals who were contacted via any method except using organic or adwords methods.*/
144 | 
145 | SELECT *
146 | FROM web_events
147 | WHERE channel NOT IN ('%organic%', '%adwords%');
148 | 
149 | /*All the companies whose names do not start with 'C'.*/
150 | 
151 | SELECT name
152 | FROM accounts
153 | WHERE name NOT LIKE ('%C%');
154 | 
155 | /*All companies whose names do not contain the string 'one' somewhere in the name*/
156 | 
157 | SELECT name
158 | FROM accounts
159 | WHERE name NOT LIKE ('%one%');
160 | 
161 | /*All companies whose names do not end with 's'.*/
162 | 
163 | SELECT name
164 | FROM accounts
165 | WHERE name NOT LIKE ('%s%');
166 | 
167 | ## AND and BETWEEN
168 | 
169 | /* 1. Write a query that returns all the orders where the standard_qty is over 1000, the poster_qty is 0, and the gloss_qty is 0.*/
170 | 
171 | SELECT standard_qty, poster_qty, gloss_qty
172 | FROM orders
173 | WHERE standard_qty > 1000 and poster_qty = 0 and gloss_qty = 0;
174 | 
175 | /* 2. Using the accounts table find all the companies whose names do not start with 'C' and end with 's'.*/
176 | 
177 | SELECT name
178 | FROM accounts
179 | WHERE name NOT LIKE 'C%' AND name LIKE '%s';
180 | 
181 | /* 3. Use the web_events table to find all information regarding individuals who were contacted via organic or adwords and started their
182 | account at any point in 2016 sorted from newest to oldest.
183 | */
184 | 
185 | SELECT *
186 | FROM web_events
187 | WHERE channel IN ('organic', 'adwords') 
188 | AND occurred_at BETWEEN '2016.01.01' AND '2017.01.01'
189 | ORDER BY channel DESC;
190 | 
191 | ## OR
192 | 
193 | /*1.Find list of orders ids where either gloss_qty or poster_qty is greater than 4000. Only include the id field in the resulting table.*/
194 | 
195 | SELECT *
196 | FROM orders
197 | WHERE gloss_qty = 4000 OR poster_qty = 4000
198 | ORDER BY id;
199 | 
200 | /*2.Write a query that returns a list of orders where the standard_qty is zero and either the gloss_qty or poster_qty is over 1000.*/
201 | 
202 | SELECT *
203 | FROM orders
204 | WHERE standard_qty = 0 OR poster_qty = 1000 
205 | OR gloss_qty = 1000;
206 | 
207 | /*3.Find all the company names that start with a 'C' or 'W', and the primary contact contains 'ana' or 'Ana', but it doesn't contain 'eana'.*/
208 | 
209 | SELECT *
210 | FROM accounts
211 | WHERE (name LIKE 'C%' OR name LIKE 'W%') 
212 | 	AND ((primary_poc LIKE '%ana%' OR primary_poc LIKE '%Ana%' )
213 | 	AND primary_poc NOT LIKE '%eana%');


--------------------------------------------------------------------------------
/sql_joins_lesson_29/sql_joins_lesson_29.md:
--------------------------------------------------------------------------------
  1 | ## Relational DB
  2 | 
  3 | The term **relational database** refers to the fact that tables within it relate to one another. They contain common identidiers that allow information from 
  4 | multiple tables to be easily combined.
  5 | 
  6 | When you write a query it's execution speed depends on the amount of data you're asking the db to read and the number and type of calculation you're
  7 | asking it to make.
  8 | 
  9 | ## DB normailization.
 10 | 
 11 | When creating a db, it's really important to think about how data will be stored. This is known as **normalization**.
 12 | There are essentially three ideas that are aimed at database normalization:
 13 | 
 14 | 1. Are the tables storing logical groupings of the data?
 15 | 2. Can I make changes in a single location, rather than in many tables for the same information?
 16 | 3. Can I access and manipulate data quickly and efficiently?
 17 | 
 18 | [Why You Need Database Normalization link](http://www.itprotoday.com/microsoft-sql-server/sql-design-why-you-need-database-normalization)
 19 | 
 20 | Example:
 21 | Here we are only pulling data from the orders table since in the SELECT statement we only reference columns from the orders table.
 22 | The ON statement holds the two columns that get linked across the two tables.
 23 | 
 24 | ![inner join](join_sql.png)
 25 | 
 26 | To specify tables and columns in the SELECT statement:
 27 | 
 28 | 1. The table name is always before the period.
 29 | 2. The column you want from that table is always after the period.
 30 | 
 31 | For example, if we want to pull only the account name:
 32 | 
 33 | ```
 34 | SELECT accounts.name, orders.occurred_at
 35 | FROM orders
 36 | JOIN accounts
 37 | ON orders.account_id = accounts.id;
 38 | ```
 39 | This query only pulls two columns, not all the information in these two tables.
 40 | 
 41 | ## ERD reminder.
 42 | 
 43 | ERD or entity relationship diagram is a common way to view data in a database. 
 44 | 
 45 | ![ERD diagram](entity_relationship_diagram.png)
 46 | The PK here stands for primary key. A primary key exists in every table, and it is a column that has a unique value for every row.
 47 | If you look at the first few rows of any of the tables in our database, you will notice that this first, PK, column is always unique. For this database it is always called id, but that is not true of all databases.
 48 | 
 49 | ## Primary and Foreign Keys.
 50 | 
 51 | `Primary Key (PK)`
 52 | A primary key is a unique column in a particular table. This is the first column in each of **our tables**. Here, those columns are all called id, but that doesn't necessarily have to be the name. It is common that the primary key is the first column in our tables in most databases.
 53 | 
 54 | The primary key is a single column that must exist in each table of a database. Again, these rules are true for most major databases, but some databases may not enforce these rules.
 55 | 
 56 | `Foreign Key (FK)`
 57 | A foreign key is when we see a primary key in another table. 
 58 | 
 59 | Foreign keys are always associated with a primary key, and they are associated with the crow-foot notation above to show they can appear multiple times in a particular table.
 60 | 
 61 | ![primary and foreign key](primary_foreign_key.png)
 62 | 
 63 | ## JOIN more than two tables.
 64 | 
 65 | ```
 66 | SELECT *
 67 | FROM web_events
 68 | JOIN accounts
 69 | ON web_events.account_id = accounts.id
 70 | JOIN orders
 71 | ON accounts.id = orders.account_id;
 72 | ```
 73 | 
 74 | ## ALIAS
 75 | 
 76 | When we `JOIN` tables together it's easiest to give your table names **aliases**. The `ALIAS` for a table will be created in the `FROM` or `JOIN` clauses.
 77 | Best practice: to use all lower case letters and underscores instead of spaces.
 78 | Example:
 79 | ```
 80 | FROM tablename AS t1
 81 | JOIN tablename2 AS t2
 82 | ```
 83 | Or without the AS statement:
 84 | ```
 85 | FROM tablename t1
 86 | JOIN tablename2 t2
 87 | ```
 88 | 
 89 | We can simply write our alias directly after the column name (in the SELECT) or table name (in the FROM or JOIN) by writing the alias directly following the column or table we would like to alias. 
 90 | ```
 91 | SELECT col1 + col2 total, col3
 92 | ```
 93 | 
 94 | ```
 95 | Select t1.column1 aliasname, t2.column2 aliasname2
 96 | FROM tablename AS t1
 97 | JOIN tablename2 AS t2
 98 | ```
 99 | 
100 | ## Many-to-many relationships 
101 | 
102 | [Why no many-to-many relationships?](https://stackoverflow.com/questions/7339143/why-no-many-to-many-relationships)
103 | 
104 | ## LEFT and RIGHT JOIN
105 | 
106 | INNER JOIN will return only rows that appear in **both tables**.
107 | 
108 | This Inner Join will return only rows at the intersection of these two circles.
109 | If want to show accounts that don't appear in the orders table we need to use OUTER Join.
110 | ``` 
111 | SELECT accounts.id, accounts.name, order.total
112 | FROM orders 
113 | JOIN accounts
114 | ON orders.account_id = accounts.id
115 | ```
116 | 
117 | Venn Diagram is a common way to visualize JOINs. Each circle in the diagram represents a table. The left circle includes all rows of data in the table in  **FROM** clause. The right circle represents all raws of data in the table in **JOIN** clause. The overlapping middle section represents all rows for which the ON clause is **true**.
118 | ![Venn Diagram](venn_diagram.png)
119 | 
120 | There are three types of joins we might use if we want to include data that doesn't exist in both tables (only in one of the two tables): LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN. 
121 | 
122 | LEFT JOIN produces a complete set of records from the left table regardless if any of those records have match in the right table. It will also return any results that are in the left table that didn't match.
123 | 
124 | RIGHT JOIN will return all of the records in the right table regardless if any of those records have a match in the left table. 
125 | Left and Right joins are somewhat interchangeable:
126 | ![left and right](interchangeable_result.png)
127 | 
128 | If there is not matching information in the JOINed table, then you will have columns with empty cells. These empty cells introduce a new data type called NULL.
129 | 
130 | ## OUTER JOIN
131 | 
132 | OUTER JOIN will return the inner join result set, as well as any unmatched rows from either of the two tables being joined.
133 | 
134 | Again this returns rows that do not match one another from the two tables. The use cases for a full outer join are very rare. 
135 | [When is a good situation to use a full outer join?](https://stackoverflow.com/questions/2094793/when-is-a-good-situation-to-use-a-full-outer-join)
136 | 
137 | FULL OUTER JOIN, which is the same as OUTER JOIN. LEFT OUTER JOIN and RIGHT OUTER JOIN the same as LEFT JOIN and RIGHT JOIN.
138 | 
139 | ## JOINs and Filtering 
140 | 
141 | `ON` logic in the on clause reduces the rows **before combining the tables**.
142 | 
143 | `WHERE` logic in the where clause occurs **after the join occurs**.
144 | 
145 |  When the database executes the query, it executes the join and everything in the **ON clause first**. Think of this as building the new result set. That result set is then filtered using the WHERE clause.
146 | 
147 |  INNER JOINs only return the rows for which the two tables match, moving this filter to the ON clause of an inner join will produce the same result as keeping it in the WHERE clause.


--------------------------------------------------------------------------------
/control_flow_lesson_25/control_flow.md:
--------------------------------------------------------------------------------
  1 | # Control flow
  2 | 
  3 | We'll learn:
  4 | * conditional statements
  5 | * **for** and **while** loop 
  6 | * exit or skip loops with **break** and **continue**
  7 | * use **built-in functions**: **zip** and **enumerate**
  8 | * list comprehensions
  9 | 
 10 | ## **if** statement
 11 | 
 12 | An **if** statement is a conditional statement that runs or skips code based on whether a condition is true or false. Example:
 13 | 
 14 | ```
 15 | if phone_balance < 5:
 16 |     phone_balance += 10
 17 |     bank_balance -= 10
 18 | ```
 19 | 
 20 | ## Comparison Operators in Conditional Statements
 21 | 
 22 | `=` assignment operator that assigns value on the left to the name on the right
 23 | 
 24 | `==` comparison operator that evaluates whether objects on both sides are equal
 25 | 
 26 | ## **if**, **elif**, **else**
 27 | 
 28 | **if**: An if statement must always start with an if clause, which contains the first condition that is checked. If this evaluates to True, Python runs the code indented in this if block and then skips to the rest of the code after the if statement.
 29 | 
 30 | **elif**: elif is short for "else if." An elif clause is used to check for an additional condition if the conditions in the previous clauses in the if statement evaluate to False.
 31 | 
 32 | **else**: Last is the else clause, which must come at the end of an if statement if used. This clause doesn't require a condition. The code in an else block is run if all conditions above that in the if statement evaluate to False.
 33 | 
 34 | ```
 35 | if season == 'spring':
 36 |     print('plant the garden!')
 37 | elif season == 'summer':
 38 |     print('water the garden!')
 39 | elif season == 'fall':
 40 |     print('harvest the garden!')
 41 | elif season == 'winter':
 42 |     print('stay indoors!')
 43 | else:
 44 |     print('unrecognized season')
 45 | ```
 46 | 
 47 | ## Indentation
 48 | 
 49 | In Python, indents conventionally come in multiples of four spaces. Be strict about following this convention, because changing the indentation can completely change the meaning of the code.
 50 | 
 51 | The [Python Style Guide](https://www.python.org/dev/peps/pep-0008/#tabs-or-spaces) recommends using 4 spaces to indent, rather than using a tab. Whichever you use, be aware that "Python 3 disallows mixing the use of tabs and spaces for indentation."
 52 | 
 53 | ## Boolean expressions
 54 | 
 55 | A **boolean expression** is an expression that is either True or False.
 56 | 
 57 | There are tree **logical operatos**: and, or, not. Use parentheses if you need to make the combinations clear.
 58 | 
 59 | **if** statements sometimes use more complicated boolean expressions for their conditions. They may contain multiple comparisons operators, logical operators, and even calculations. Examples:
 60 | 
 61 | ```
 62 | if 18.5 <= weight / height**2 < 25:
 63 |     print("BMI is considered 'normal'")
 64 | 
 65 | if is_raining and is_sunny:
 66 |     print("Is there a rainbow?")
 67 | 
 68 | if (not unsubscribed) and (location == "USA" or location == "CAN"):
 69 |     print("send email")
 70 | ```
 71 | 
 72 | However simple or complex, the condition in an **if** statement must be a boolean expression that evaluates to either True or False and it is this value that decides whether the indented block in an if statement executes or not.
 73 | 
 74 | ## Good and Bad Examples 
 75 | 
 76 | **Don't use**: `if True:` or `if False:`
 77 | 
 78 | Bad example:
 79 | ```if True:
 80 |     print("This indented code will always get run.")
 81 | ```    
 82 | While `True` is a valid boolean expression, it's not useful as a condition since it always evaluates to True, so the indented code will always get run. Similarly, if `False` is not a condition you should use either - the statement following this `if` statement would never be executed.
 83 | 
 84 | 
 85 | **Be careful** writing expression that use **logical operators**: `and`, `or`, `not`:
 86 | 
 87 | Bad example:
 88 | ```
 89 | if weather == "snow" or "rain":
 90 |     print("Wear boots!")
 91 | ```
 92 | This code is valid in Python, but it is not a boolean expression, although it reads like one. The reason is that the expression to the right of the or operator, "rain", is not a boolean expression - it's a string! Later we'll discuss what happens when you use non-boolean-type objects in place of booleans.
 93 | 
 94 | 
 95 | **Don't evaluate** the truth of a boolean variable with `== True` or `== False`:
 96 | 
 97 | Bad example:
 98 | This comparison isn’t necessary, since the boolean variable itself is a boolean expression.
 99 | ```
100 | if is_cold == True:
101 |     print("The weather is cold!")
102 | ```
103 | This is a valid condition, but we can make the code more readable by using the variable itself as the condition instead, as below.
104 | 
105 | Good example:
106 | ```
107 | if is_cold:
108 |     print("The weather is cold!")
109 | ```    
110 | 
111 | If you want to check whether a boolean is False, you can use the **not** operator.
112 | 
113 | ## Truth Value Testing
114 | If we use a **non-boolean object** as a condition in an if statement in place of the boolean expression, Python will check for its truth value and use that to decide whether or not to run the indented code. By default, the truth value of an object in Python is considered True unless specified as False in the documentation.
115 | 
116 | Here are most of the built-in objects that are considered False in Python:
117 | 
118 | * constants defined to be false: `None` and `False`
119 | 
120 | * zero of any numeric type: `0`, `0.0`, `0j`, `Decimal(0)`, `Fraction(0, 1)`
121 | 
122 | * empty sequences and collections: `""`, `()`, `[]`, `{}`, `set()`, `range(0)`
123 | 
124 | Example:
125 | ```
126 | errors = 3
127 | if errors:
128 |     print("You have {} errors to fix!".format(errors))
129 | else:
130 |     print("No errors to fix!")
131 | ```
132 | In this code, errors has the truth value True because it's a non-zero number, so the error message is printed.
133 | 
134 | ## Quiz: Boolean Expressions for Conditions
135 | 
136 | Imagine an air traffic control program that tracks three variables, altitude,
137 | speed, and propulsion which for a particular airplane have the values
138 | specified below:
139 | ```
140 | altitude = 10000
141 | speed = 250
142 | propulsion = "Propeller"
143 | ```
144 | Expressions: 
145 | 
146 | 1.`altitude < 1000 and speed > 100` 
147 | 
148 |     `altitude < 1000` is False, so we don't even need to check the second condition - the whole expression
149 |      is False.
150 | 
151 | 
152 | 2.`(propulsion == "Jet" or propulsion == "Turboprop") and speed < 300 and altitude > 20000 `
153 | 
154 |     `propulsion == "Jet"` is False, and `propulsion == "Turboprop"` is False, so the whole expression inside
155 |      the parentheses is False.
156 | 
157 | 
158 | 3.`not (speed > 400 and propulsion == "Propeller") `
159 | 
160 |      To work this one out, we need to look at the inside of the parentheses first, then apply not to that.
161 |      `speed > 400` is False, and because we are using and this makes the whole of the expression inside the
162 |       parentheses False. Applying not reverses this, so this expression is True.
163 | 
164 | 
165 | 
166 | 4.`(altitude > 500 and speed > 100) or not propulsion == "Propeller" `
167 | 
168 |      `altitude > 500` is True, and speed is greater than 100, so the expression inside the parenthesis is True.
169 |      Whatever the value of the other expression, because they are connected by or, the whole expression will
170 |      evaluate to True.
171 | 
172 | 
173 | # Break and Continue:
174 | 
175 | `for` loops iterate over every element in a sequence.
176 | `while`  loops iterate until they're stopping condition is met.
177 | 
178 | `break` lterminates loop (for or while) immediately if it get a break statement.
179 | 
180 | `continue` terminates one iteration od a `for` or `while` loop.
181 | 
182 | 


--------------------------------------------------------------------------------
/sql_subqueries_temporary_table_lesson31/subqueries_temporary_tables.md:
--------------------------------------------------------------------------------
  1 | ## Intro to subqueries
  2 | 
  3 | Both **subqueries** and table expressions are methods for being able to write a query that creates a table, and then write a query that interacts with this newly created table. Sometimes the question you are trying to answer doesn't have an answer when working directly with existing tables in database.
  4 | 
  5 | However, if we were able to create new tables from the existing tables, we know we could query these new tables to answer our question
  6 | 
  7 | Whenever we need to use existing tables to create a new table that we then want to query again, this is an indication that we will need to use some sort of subquery.
  8 | 
  9 | **Subqueries** also known as **inner queries** and **nested queries** - allow you to answer more complex questions than you can with a single DB table. 
 10 | 
 11 | ## Write your first subquery
 12 | We want to find the average number of events for each day for each channel. The first table will provide us the number of events for each day and channel, and then we will need to average these values together using a second query.
 13 | 
 14 | 1. Start by querying table to check the data.
 15 | 
 16 | ```
 17 | SELECT *
 18 | FROM web_events;
 19 | ``` 
 20 | 
 21 | 2. Count up all the events in each channel, in each day.
 22 | 
 23 | ```
 24 | SELECT DATE_TRUNC('day', occurred_at) as day,
 25 | 	   channel,
 26 |        COUNT(*) as event_count
 27 | FROM web_events
 28 | GROUP BY 1, 2
 29 | ORDER BY 1;
 30 | ```
 31 | 
 32 | 3. Average across the events column we've created. In order to do this, we quering the result of previous query. We can do it by wrapping the query in parantheses and using it in the FROM clause of the next query that you write above.
 33 | 
 34 | Query within a query also known as a subquery:
 35 | ```
 36 | SELECT *
 37 | FROM 
 38 | (SELECT DATE_TRUNC('day', occurred_at) as day,
 39 | 	   channel,
 40 |        COUNT(*) as event_count
 41 | FROM web_events
 42 | GROUP BY 1, 2
 43 | ORDER BY 1) sub
 44 | ```
 45 | **Subqueries** are requaired to have aliases, which added after the parantheses `()sub`.
 46 | 
 47 | 4. Average events for each channel. Subquery acts like one table in the FORM clause put GROUP BY clause after he subquery. 
 48 | Since reordering based on this new aggregation, you don't need ORDER BY statement in the subquery.
 49 | ```
 50 | SELECT channel,
 51 | 	   AVG(event_count) AS avg_event_count
 52 | FROM 
 53 | (SELECT DATE_TRUNC('day', occurred_at) as day,
 54 | 	   channel,
 55 |        COUNT(*) as event_count
 56 | FROM web_events
 57 | GROUP BY 1, 2) sub
 58 | 	GROUP BY channel
 59 |     ORDER BY 2 DESC;
 60 | ```
 61 | 
 62 | ####How this query runs: 
 63 | 
 64 | 1. Inner query will run. DB will treat it as an independent query
 65 | ```
 66 | SELECT DATE_TRUNC('day', occurred_at) as day,
 67 | 	   channel,
 68 |        COUNT(*) as event_count
 69 | FROM web_events
 70 | GROUP BY 1, 2
 71 | ```
 72 | 2. The outer query will run accross he result set created by he inner query:
 73 | ```
 74 | SELECT channel,
 75 | 	   AVG(event_count) AS avg_event_count
 76 | FROM 
 77 | (SELECT DATE_TRUNC('day', occurred_at) as day,
 78 | 	   channel,
 79 |        COUNT(*) as event_count
 80 | FROM web_events
 81 | GROUP BY 1, 2) sub
 82 | 	GROUP BY channel
 83 |     ORDER BY 2 DESC;
 84 | ```
 85 | 
 86 | ## Subquery Formatting
 87 | 
 88 | #### Badly formatted queries
 89 | 
 90 | ```
 91 | SELECT * FROM (SELECT DATE_TRUNC('day',occurred_at) AS day, channel, COUNT(*) as events FROM web_events GROUP BY 1,2 ORDER BY 3 DESC) sub;
 92 | ```
 93 | 
 94 | This second version, which includes some helpful line breaks, is easier to read than that previous version, but it is still not as easy to read as the queries in the Well Formatted Query section.
 95 | 
 96 | ```
 97 | SELECT *
 98 | FROM (
 99 | SELECT DATE_TRUNC('day',occurred_at) AS day,
100 | channel, COUNT(*) as events
101 | FROM web_events 
102 | GROUP BY 1,2
103 | ORDER BY 3 DESC) sub;
104 | ```
105 | 
106 | #### Well Formatted Query
107 | 
108 | If we have a GROUP BY, ORDER BY, WHERE, HAVING, or any other statement following our subquery, we would then indent it at the same level as our outer query.
109 | 
110 | ```
111 | SELECT *
112 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day,
113 |                 channel, COUNT(*) as events
114 |       FROM web_events 
115 |       GROUP BY 1,2
116 |       ORDER BY 3 DESC) sub;
117 | ```
118 | 
119 | The inner query GROUP BY and ORDER BY statements are indented to match the inner table. 
120 | ```
121 | SELECT *
122 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day,
123 |                 channel, COUNT(*) as events
124 |       FROM web_events 
125 |       GROUP BY 1,2
126 |       ORDER BY 3 DESC) sub
127 | GROUP BY channel
128 | ORDER BY 2 DESC;
129 | ```
130 | 
131 | ## More on Subqueries
132 | 
133 | If you are only returning a single value, you might use that value in a logical statement like WHERE, HAVING, or even SELECT - the value could be nested within a CASE statement. Most conditional logic will work with subqueries containing **one-cell results**. BUT `IN` is the only type of conditional logic that will work when the inner query ontains multiple results. 
134 | 
135 | 
136 | 
137 | **Expert Tip**
138 | 
139 | Note that you should not include an alias when you write a subquery in a conditional statement. This is because the subquery is treated as an individual value (or set of values in the IN case) rather than as a table.
140 | 
141 | Also, notice the query here compared a single value. If we returned an entire column IN would need to be used to perform a logical argument. If we are returning an entire table, then we must use an ALIAS for the table, and perform additional logic on the entire table.
142 | 
143 | ### MORE on sub queries
144 | 
145 | 1. Subquery table
146 | ```
147 | SELECT a.id, a.name, we.channel, COUNT(*) as ct
148 | FROM accounts a
149 | JOIN web_events we 
150 | ON a.id = we.account_id
151 | GROUP BY a.id, a.name, channel
152 | ORDER BY a.id;
153 | ```
154 | 
155 | 2. Find the max from all data:
156 | ```
157 | SELECT MAX(ct)
158 | 
159 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct
160 | 	FROM accounts a
161 | 	JOIN web_events we 
162 | 	ON a.id = we.account_id
163 | 	GROUP BY a.id, a.name, channel
164 | 	ORDER BY a.id) table1
165 | ```
166 | 
167 | 3. Max for every accounts:
168 | 
169 | ```
170 | SELECT t1.id, t1.name, MAX(ct)
171 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct
172 | 	FROM accounts a
173 | 	JOIN web_events we 
174 | 	ON a.id = we.account_id
175 | 	GROUP BY a.id, a.name, channel) t1
176 | GROUP BY t1.id, t1.name
177 | ORDER BY t1.id;
178 | ```
179 | 
180 | 4. Final table:
181 | 
182 | ```
183 | SELECT t3.id, t3.name, t3.channel, t3.ct
184 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct
185 | 	FROM accounts a
186 | 	JOIN web_events we 
187 | 	ON a.id = we.account_id
188 | 	GROUP BY a.id, a.name, channel) t3
189 | 
190 | JOIN (SELECT t1.id, t1.name, MAX(ct) max_chan
191 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct
192 | 	FROM accounts a
193 | 	JOIN web_events we 
194 | 	ON a.id = we.account_id
195 | 	GROUP BY a.id, a.name, channel) t1
196 | GROUP BY t1.id, t1.name) t2
197 | ON t2.id = t3.id AND t2.max_chan = t3.ct
198 | ORDER BY t3.id, t3.ct;
199 | ```
200 | 
201 | ## WITH 
202 | 
203 | The `WITH` statement is often called a Common Table Expression or CTE. Though these expressions serve the exact same purpose as subqueries, they are more common in practice, as they tend to be cleaner for a future reader to follow the logic.
204 | 
205 | Subqueries they make queries lengthy and difficult to read. Common Table Expressions or CTEs can help break your query into separate components and the logic will be more easily to read.
206 | 
207 | * When creating multiple ables using `WITH` add a comma after every table except the last table leading to final query.
208 | * The new table name  always aliased using `table_name AS`, which is followed by your nasted between parentheses.


--------------------------------------------------------------------------------
/sql_joins_lesson_29/join_quizzes.sql:
--------------------------------------------------------------------------------
  1 | # JOIN practice
  2 | 
  3 | /*Try pulling all the data from the accounts table, and all the data from the orders table.*/
  4 | SELECT accounts.*, orders.*
  5 | FROM accounts
  6 | JOIN orders
  7 | ON accounts.id = orders.id;
  8 | 
  9 | /*Try pulling standard_qty, gloss_qty, and poster_qty from the orders table, and the website and the primary_poc from the accounts table.*/
 10 | 
 11 | SELECT orders.standard_qty, orders.gloss_qty, orders.poster_qty,
 12 | 		accounts.website, accounts.primary_poc
 13 | FROM orders
 14 | JOIN accounts
 15 | ON orders.id = accounts.id;
 16 | 
 17 | # JOIN QUESTIONS PART 1
 18 | 
 19 | /*1.Provide a table for all web_events associated with account name of Walmart. There should be three columns. Be sure to include the primary_poc,
 20 | time of the event, and the channel for each event. Additionally, you might choose to add a fourth column to assure only Walmart events were chosen. */
 21 | 
 22 | SELECT web_events.occurred_at, accounts.primary_poc, web_events.channel
 23 | FROM web_events
 24 | JOIN accounts
 25 | ON web_events.account_id = accounts.id
 26 | WHERE accounts.name LIKE '%Walmart%';
 27 | 
 28 | /*2.Provide a table that provides the region for each sales_rep along with their associated accounts. Your final table should include three
 29 | columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.*/
 30 | 
 31 | SELECT region, sales_reps, accounts AS f_table
 32 | FROM accounts
 33 | JOIN sales_reps
 34 | ON accounts.sales_rep_id = sales_reps.id
 35 | JOIN region
 36 | ON sales_reps.region_id = region.id;
 37 | 
 38 | /*3.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. Your
 39 | final table should have 3 columns: region name, account name, and unit price. A few accounts have 0 for total, so I divided by (total + 0.01) to assure 
 40 | not dividing by zero.*/
 41 | 
 42 | SELECT region.name AS region_name, accounts.name AS account_name, orders.total_amt_usd/(orders.total + 0.01) AS unit_price
 43 | FROM orders
 44 | JOIN accounts ON orders.account_id = accounts.id
 45 | JOIN sales_reps ON accounts.sales_rep_id = sales_reps.id
 46 | JOIN region ON sales_reps.region_id = region.id;
 47 | 
 48 | ## JOINs and Filtering. Quiz: Last Check
 49 | 
 50 | /*1.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for the Midwest region. 
 51 | Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically
 52 | (A-Z) according to account name.*/
 53 | 
 54 | SELECT region.name AS Region, sales_reps.name AS SalesRepName, accounts.name AS AcountName
 55 | FROM accounts 
 56 | JOIN sales_reps 
 57 | ON accounts.sales_rep_id = sales_reps.id
 58 | JOIN region ON sales_reps.region_id = region.id
 59 | WHERE region.name = 'Midwest'
 60 | ORDER BY AcountName;
 61 | 
 62 | /*2.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a first
 63 | name starting with S and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the
 64 | accounts alphabetically (A-Z) according to account name.*/
 65 | 
 66 | SELECT region.name, sales_reps.name AS SalesRepName, accounts.name AS AcountName
 67 | FROM accounts 
 68 | JOIN sales_reps 
 69 | ON accounts.sales_rep_id = sales_reps.id
 70 | JOIN region 
 71 | ON sales_reps.region_id = region.id
 72 | WHERE region.name = 'Midwest' and sales_reps.name LIKE 'S%'
 73 | ORDER BY AcountName;
 74 | 
 75 | /*3.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a last name
 76 | starting with K and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts
 77 | alphabetically (A-Z) according to account name.*/
 78 | 
 79 | SELECT region.name, sales_reps.name AS SalesRepName, accounts.name AS AcountName
 80 | FROM accounts 
 81 | JOIN sales_reps 
 82 | ON accounts.sales_rep_id = sales_reps.id
 83 | JOIN region
 84 | ON sales_reps.region_id = region.id
 85 | WHERE region.name = 'Midwest' AND sales_reps.name LIKE '% K%'
 86 | ORDER BY AcountName;
 87 | 
 88 | /*4.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should
 89 | only provide the results if the standard order quantity exceeds 100. Your final table should have 3 columns: region name, account name, and unit price. In order to avoid a
 90 | division by zero error, adding .01 to the denominator here is helpful total_amt_usd/(total+0.01).*/
 91 | 
 92 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price
 93 | FROM orders 
 94 | JOIN accounts
 95 | ON orders.account_id = accounts.id
 96 | JOIN sales_reps 
 97 | ON accounts.sales_rep_id = sales_reps.id
 98 | JOIN region 
 99 | ON sales_reps.region_id = region.id
100 | WHERE orders.standard_qty > 100;
101 | 
102 | /*5.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should only provide
103 | the results if the standard order quantity exceeds 100 and the poster order quantity exceeds 50. Your final table should have 3 columns: region name, account name, and unit price.
104 | Sort for the smallest unit price first. In order to avoid a division by zero error, adding .01 to the denominator here is helpful (total_amt_usd/(total+0.01).*/
105 | 
106 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price
107 | FROM orders 
108 | JOIN accounts
109 | ON orders.account_id = accounts.id
110 | JOIN sales_reps 
111 | ON accounts.sales_rep_id = sales_reps.id
112 | JOIN region 
113 | ON sales_reps.region_id = region.id
114 | WHERE orders.standard_qty > 100 AND poster_qty > 50
115 | ORDER BY unit_price ASC;
116 | 
117 | /*6.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should only provide
118 | the results if the standard order quantity exceeds 100 and the poster order quantity exceeds 50. Your final table should have 3 columns: region name, account name, and unit price.
119 | Sort for the largest unit price first. In order to avoid a division by zero error, adding .01 to the denominator here is helpful (total_amt_usd/(total+0.01). */
120 | 
121 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price
122 | FROM orders 
123 | JOIN accounts
124 | ON orders.account_id = accounts.id
125 | JOIN sales_reps 
126 | ON accounts.sales_rep_id = sales_reps.id
127 | JOIN region 
128 | ON sales_reps.region_id = region.id
129 | WHERE orders.standard_qty > 100 AND poster_qty > 50
130 | ORDER BY unit_price DESC;
131 | 
132 | /*7.What are the different channels used by account id 1001? Your final table should have only 2 columns: account name and the different channels. You can try SELECT DISTINCT to narrow
133 | down the results to only the unique values.*/
134 | 
135 | SELECT DISTINCT web_events.channel, accounts.name
136 | FROM web_events 
137 | JOIN accounts
138 | ON accounts.id = web_events.account_id
139 | WHERE accounts.id = '1001';
140 | 
141 | /*8.Find all the orders that occurred in 2015. Your final table should have 4 columns: occurred_at, account name, order total, and order total_amt_usd.*/
142 | 
143 | SELECT orders.occurred_at, accounts.name, orders.total,
144 | orders.total_amt_usd
145 | FROM orders 
146 | JOIN accounts
147 | ON accounts.id = orders.account_id
148 | WHERE orders.occurred_at BETWEEN '01-01-2015' AND '01-01-2016'
149 | ORDER BY orders.occurred_at DESC;
150 | 
151 | 


--------------------------------------------------------------------------------
/functions_lesson_26/functions.md:
--------------------------------------------------------------------------------
  1 | # Functions
  2 | 
  3 | **Functions** are useful chunks of code that allow you to encapsulate a task. 
  4 | **Encapsulation** is a way to carrry out a whole series of steps with one command.
  5 | 
  6 | Functions are used to help organize and optimize code.
  7 | 
  8 | # Defining function
  9 | 
 10 | When you define a function you specify the name and the sequence of statements.
 11 | 
 12 | this function calculates the volume of a cylinder. The formula for this is the cylender's height, multiplied by the square of it's radius multiplied by pi.
 13 | ```
 14 | def cylinder_volume(height, radius):  # function header # (height, radius) are arguments
 15 |     pi = 3.14159  # body of the function
 16 |     return height * pi * radius ** 2
 17 | 
 18 | cylinder_volume(10, 3)  # function call statement
 19 | ```
 20 | 
 21 | **Function Header**
 22 | The function header, which is the first line of a function definition.
 23 | 
 24 | 1. The function header always starts with the `def` keyword, which indicates that this is a function definition.
 25 | 2.Then comes the function name (here, `cylinder_volume`), which follows the same naming conventions as variables. You can revisit the naming conventions below.
 26 | 3. Immediately after the name are parentheses that may include arguments separated by commas (here, height and radius). Arguments, or parameters, are values that are passed in as inputs when the function is called, and are used in the function body. If a function doesn't take arguments, these parentheses are left empty.
 27 | 4. The header always end with a colon `:`.
 28 | 
 29 | 
 30 | **Function Body**
 31 | The rest of the function is contained in the body, which is where the function does its work.
 32 | 
 33 | 1. The body of a function is the code indented after the header line. Here, it's the two lines that define `pi` and `return` the volume.
 34 | 2. Within this body, we can refer to the argument variables and define new variables, which can only be used within these indented lines.
 35 | 3. The body will often include a return statement, which is used to send back an output value from the function to the statement that called the function. A return statement consists of the return keyword followed by an expression that is evaluated to get the output value for the function. If there is no return statement, the function simply returns `None`.
 36 | 
 37 | `Print` provides output o the console while `Return` provides the value hat you can store and work with and code later. 
 38 | 
 39 | 
 40 | ## Default Arguments
 41 | 
 42 | Default arguments allow functions to use default values when those arguments are omitted.
 43 | 
 44 | We can add default arguments in a function to have default values for parameters that are unspecified in a function call.
 45 | 
 46 | ```
 47 | def cylinder_volume(height, radius=5):
 48 |     pi = 3.14159
 49 |     return height * pi * radius ** 2
 50 | 
 51 | cylinder_volume(10)  # radius is default avlue in argument
 52 | cylinder_volume(10, 7)  # pass in arguments by position, overwrite the default value of 5.
 53 | cylinder_volume(height=10, radius=7)  # pass in arguments by name
 54 | ```
 55 | 
 56 | ## Variable scope
 57 | 
 58 | **Variable scope** the parts of a program that a variable can be referenced, or used, from.
 59 | If variable is created inside a function, it can only be used within that function. Accessing it outside that function is not possible. 
 60 | 
 61 | ```
 62 | # This will result in an error
 63 | def some_function():
 64 |     word = "hello"
 65 | 
 66 | print(word)
 67 | ```
 68 | 
 69 | `word` is said to have scope that is only local to each function. This means you can use the same name for different variables that are used in different functions.
 70 | ```
 71 | # This works fine
 72 | def some_function():
 73 |     word = "hello"
 74 | 
 75 | def another_function():
 76 |     word = "goodbye"
 77 | ```
 78 | 
 79 | We can define a variable outside the function and it can still be accessed within a function.
 80 | 
 81 | ```
 82 | word = "hello"
 83 | 
 84 | def some_function():
 85 |     print(word)
 86 | 
 87 | some_function()
 88 | ```
 89 | 
 90 | **Scope** is essential to understand how info is passed throughout programms in any languges.
 91 | 
 92 | ## Documentation
 93 | 
 94 | **Docstring** a type of comment used to explain the purpose of a function and how it should be used.
 95 | Docstring are sussounded by triple quotes. 
 96 |   [PEP 257 -- Docstring Conventions](https://www.python.org/dev/peps/pep-0257/)
 97 | 
 98 | 
 99 | ## Lambda Expressions
100 | 
101 | In Python, you can use **lambda expressions** to create anonymous functions. That's a function that don't have a name. They're helpful to create quick functions that aren't really needed later in your code.
102 | sIf you want to specify multiple arguments in a **lambda function**, include them before the colomn, separate by commas.
103 | 
104 | ```
105 | def multiply(x, y):
106 |     return x * y
107 | ``` 
108 | With a lambda expression:
109 | 
110 | ```
111 | multiply = lambda x, y: x * y
112 | ```
113 | 
114 | Both of these functions are used in the same way. In either case, we can call multiply like this:
115 | `multiply(4, 7)`
116 | 
117 | **Components of a Lambda Function*
118 | 1. The `lambda` keyword is used to indicate that this is a lambda expression.
119 | 2. Following lambda are one or more arguments for the anonymous function separated by commas, followed by a colon :. Similar to functions, the way the arguments are named in a lambda expression is arbitrary.
120 | 3. Last is an expression that is evaluated and returned in this function.
121 | 
122 | With this structure, lambda expressions aren’t ideal for complex functions, but can be very useful for short, simple functions.
123 | 
124 | #### Quiz: Lambda with Map
125 | `map()` is a higher-order built-in function that takes a function and iterable as inputs, and returns an iterator that applies the function to each element of the iterable. The code below uses map() to find the mean of each list in numbers to create the list averages. Test run it to see what happens.
126 | 
127 | Rewrite this code to be more concise by replacing the mean function with a lambda expression defined within the call to `map()`.
128 | 
129 | ```
130 | numbers = [
131 |               [34, 63, 88, 71, 29],
132 |               [90, 78, 51, 27, 45],
133 |               [63, 37, 85, 46, 22],
134 |               [51, 22, 34, 11, 18]
135 |            ]
136 | 
137 | def mean(num_list):
138 |     return sum(num_list) / len(num_list)
139 | 
140 | averages = list(map(mean, numbers))
141 | print(averages)
142 | ```
143 | 
144 | #### Lambda with Filter
145 | `filter()` is a higher-order built-in function that takes a function and iterable as inputs and returns an iterator with the elements from the iterable for which the function returns True.
146 | 
147 | [More about map(), filter()](https://www.programiz.com/python-programming/anonymous-function)
148 | 
149 | 
150 | ## Iterators and Generators
151 | 
152 | **Iterables** are objects that can return one of it's elements at a time. List is one of the common iterables. Many of the built-in functions we’ve used so far, like 'enumerate,' return an iterator.
153 | 
154 | **An iterator** is an object that represents a stream of data. This is different from a list, which is also an iterable, but not an iterator because it is not a stream of data.
155 | 
156 | **Generators** are a simple way to create iterators using functions. It's not only way to create iterator. You can also define iterators using classes, which you can read more about [here](https://docs.python.org/3/tutorial/classes.html#iterators)
157 | 
158 | Here is an example of a generator function called my_range, which produces an iterator that is a stream of numbers from 0 to (x - 1).
159 | ```
160 | def my_range(x):
161 |     i = 0
162 |     while i < x:
163 |         yield i
164 |         i += 1
165 | 
166 | # since this returns an iterator, we can convert it to a list or iterate through it in a loop to view 
167 | # its  contents. For example, this code:
168 | 
169 | for x in my_range(5):
170 |     print(x)
171 | ```
172 | Output:
173 | ```
174 | 0
175 | 1
176 | 2
177 | 3
178 | 4
179 | ```
180 | 
181 | Notice that instead of using the return keyword, it uses `yield`. This allows the function to return values one at a time, and start where it left off each time it’s called. This `yield` keyword is what differentiates a generator from a typical function.
182 | 
183 | 


--------------------------------------------------------------------------------
/control_flow_lesson_25/loops.md:
--------------------------------------------------------------------------------
  1 | ## Loops
  2 | 
  3 | There are two types of loops in Python: `for` and `while`.
  4 | 
  5 | A for loop is used to "iterate", or do something repeatedly, over an **iterable**.
  6 | 
  7 | An **iterable** is an object that can return one of its elements at a time. This can include **sequence types**, such as strings, lists, and tuples, as well as **non-sequence types**, such as dictionaries and files.
  8 | 
  9 | Example:
 10 | ```
 11 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles']
 12 | for city in cities:
 13 |     print(city)
 14 | print("Done!")
 15 | ```
 16 | Output: 
 17 | ```
 18 | new york city
 19 | mountain view
 20 | chicago
 21 | los angeles
 22 | Done!
 23 | ```
 24 | 
 25 | ## Built-in function **range()**
 26 | 
 27 | The built-in function range() is the function to iterate over a sequence of numbers. It generates an iterator of arithmetic progressions.
 28 | 
 29 | Example: 
 30 | ```
 31 | # Prints out the numbers 0,1,2,3,4
 32 | for x in range(5):
 33 |     print(x)
 34 | ```
 35 | 
 36 | `range()` is a built-in function used to create an iterable sequence of numbers. You will frequently use `range()` with a `for` loop to repeat an action a certain number of times, as in this example:
 37 | ```
 38 | for i in range(3):
 39 |     print("Hello!")
 40 | ```
 41 | **range(start=0, stop, step=1)**
 42 | The `range()` function takes three integer arguments, the first and third of which are optional:
 43 | 
 44 | * The 'start' argument is the first number of the sequence. If unspecified, 'start' defaults to 0.
 45 | * The 'stop' argument is 1 more than the last number of the sequence. This argument must be specified.
 46 | * The 'step' argument is the difference between each number in the sequence. If unspecified, 'step' defaults to 1.
 47 | 
 48 | Notes on using `range()`:
 49 | 
 50 | If you specify one integer inside the parentheses withrange(), it's used as the value for 'stop,' and the defaults are used for the other two.
 51 | * e.g. - `range(4)` returns 0, 1, 2, 3
 52 |    If you specify two integers inside the parentheses withrange(), they're used for 'start' and 'stop,' and the default is used for 'step.'
 53 | * e.g. - `range(2, 6)` returns 2, 3, 4, 5
 54 |    Or you can specify all three integers for 'start', 'stop', and 'step.'
 55 | * e.g. - `range(1, 10, 2)` returns 1, 3, 5, 7, 9
 56 | 
 57 | * e.g. - `range(0, -5)` returns []
 58 | 
 59 | ## Creating and Modifying Lists
 60 | You can create a list by appending to a new list at each iteration of the for loop like this:
 61 | 
 62 | Creating a new list:
 63 | ```
 64 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles']
 65 | capitalized_cities = []
 66 | 
 67 | for city in cities:
 68 |     capitalized_cities.append(city.title())
 69 | ```
 70 | 
 71 | **Modifying** a list is a bit more involved, and requires the use of the range() function.
 72 | 
 73 | We can use the range() function to generate the indices for each value in the cities list. This lets us access the elements of the list with cities[index] so that we can modify the values in the cities list in place.
 74 | ```
 75 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles']
 76 | 
 77 | for index in range(len(cities)):
 78 |     cities[index] = cities[index].title()
 79 | ```
 80 | 
 81 | ## Iterating Through Dictionaries with For Loops
 82 | 
 83 | When you iterate through a dictionary using a for loop, doing it the normal way (for n in some_dict) will only give you access to the keys in the dictionary - which is what you'd want in some situations. In other cases, you'd want to iterate through both the keys and values in the dictionary. Let's see how this is done in an example. Consider this dictionary that uses names of actors as keys and their characters as values.
 84 | 
 85 | ```
 86 | cast = {
 87 |            "Jerry Seinfeld": "Jerry Seinfeld",
 88 |            "Julia Louis-Dreyfus": "Elaine Benes",
 89 |            "Jason Alexander": "George Costanza",
 90 |            "Michael Richards": "Cosmo Kramer"
 91 |        }
 92 | for key in cast:
 93 |     print(key)
 94 | ```
 95 | The output:
 96 | ```
 97 | Jerry Seinfeld
 98 | Julia Louis-Dreyfus
 99 | Jason Alexander
100 | Michael Richards
101 | ```
102 | 
103 | The method ***items()** returns a list of dict's (key, value) tuple pairs. 
104 | ```
105 | cast = {
106 |            "Jerry Seinfeld": "Jerry Seinfeld",
107 |            "Julia Louis-Dreyfus": "Elaine Benes",
108 |            "Jason Alexander": "George Costanza",
109 |            "Michael Richards": "Cosmo Kramer"
110 |        }
111 | 
112 | for key, value in cast.items():
113 |     print("Actor: {}    Role: {}".format(key, value))
114 | ```
115 | 
116 | The output:
117 | ```
118 | Actor: Jerry Seinfeld    Role: Jerry Seinfeld
119 | Actor: Julia Louis-Dreyfus    Role: Elaine Benes
120 | Actor: Jason Alexander    Role: George Costanza
121 | Actor: Michael Richards    Role: Cosmo Kramer
122 | ```
123 | Example:
124 | ```
125 | cast = {
126 |            "Jerry Seinfeld": "Jerry Seinfeld",
127 |            "Julia Louis-Dreyfus": "Elaine Benes",
128 |            "Jason Alexander": "George Costanza",
129 |            "Michael Richards": "Cosmo Kramer"
130 |        }
131 | 
132 | print("Iterating through keys:")
133 | for key in cast:
134 |     print(key)
135 | 
136 | print("\nIterating through keys and values:")
137 | for key, value in cast.items():
138 |     print("Actor: {}    Role: {}".format(key, value))
139 | ```
140 | The output:
141 | ```
142 | Iterating through keys:
143 | Jason Alexander
144 | Michael Richards
145 | Jerry Seinfeld
146 | Julia Louis-Dreyfus
147 | 
148 | Iterating through keys and values:
149 | Actor: Jason Alexander    Role: George Costanza
150 | Actor: Michael Richards    Role: Cosmo Kramer
151 | Actor: Jerry Seinfeld    Role: Jerry Seinfeld
152 | Actor: Julia Louis-Dreyfus    Role: Elaine Benes
153 | ```
154 | 
155 | ## **zip** and **enumerate**
156 | 
157 | `zip` is a built-in function, returns an iterator that combines multiple iterables into one sequence of tuples. A tuple is a sequence of values. The values can be any type and they're indexed by integers. Tuples are immutable.
158 | For example:
159 | 
160 | `list(zip(['a', 'b', 'c'], [1, 2, 3]))` would output: `[('a', 1), ('b', 2), ('c', 3)]`
161 | 
162 | Like we did for range() we need to convert it to a list or iterate through it with a loop to see the elements.
163 | 
164 | You could unpack each tuple in a for loop like this.
165 | ```
166 | letters = ['a', 'b', 'c']
167 | nums = [1, 2, 3]
168 | 
169 | for letter, num in zip(letters, nums):
170 |     print("{}: {}".format(letter, num))
171 | ```
172 | 
173 | To unzip a list into tuples using an asterisk:
174 | ```
175 | some_list = [('a', 1), ('b', 2), ('c', 3)]
176 | letters, nums = zip(*some_list)
177 | ```
178 | 
179 | # enumerate
180 | 
181 | `enumerate()` a built-in function, returns a list of pairs or enumerate object. The first element of each pair is an index and the second is the sequence's value at that index.
182 | 
183 | Example:
184 | 
185 | ```
186 | letters = ['a', 'b', 'c', 'd', 'e']
187 | for i, letter in enumerate(letters):
188 |     print(i, letter)
189 | ```
190 | 
191 | Output: 
192 | 
193 | ```
194 | 0 a
195 | 1 b
196 | 2 c
197 | 3 d
198 | 4 e
199 | ```
200 | 
201 | ## List comprehensions
202 | 
203 | List comprehension is an easy way to define and create lists based on existing lists.
204 | 
205 | List comprehensions can identify when it receives a string or a tuple and work on it like a list.
206 | 
207 | You want to separate the letters of the word hand and add the letters as items of a list.
208 | Example with for loop:
209 | 
210 | ```
211 | h_letters = []
212 | 
213 | for letter in 'hand':
214 |     h_letters.append(letter)
215 | 
216 | print(h_letters)
217 | ```
218 | 
219 | List comprehensions:
220 | 
221 | ```
222 | h_letters = [ letter for letter in 'hand' ]
223 | print( h_letters)
224 | ```
225 | **Syntax of List Comprehension**
226 | `[expression for item in list]` => `letter for letter in 'human'`
227 | 
228 | 
229 | ### Conditionals in List Comprehension
230 | 
231 | We will create list that uses mathematical operators, integers, and range().
232 | 
233 | ```
234 | number_list = [ x for x in range(20) if x % 2 == 0]
235 | print(number_list)
236 | ```
237 | Output:
238 | `[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]`
239 | The list ,number_list, will be populated by the items in range from 0-19 if the item's value is divisible by 2.
240 | 
241 | `squares = [x**2 for x in range(9) if x % 2 == 0]`
242 | The code above sets squares equal to the list [0, 4, 16, 36, 64], as x to the power of 2 is only evaluated if x is even.
243 | 
244 | If you would like to add else, you have to move the conditionals to the beginning of the listcomp, right after the expression, like this.
245 | `squares = [x**2 if x % 2 == 0 else x + 3 for x in range(9)]`
246 | List comprehensions are not found in other languages, but are very common in python.


--------------------------------------------------------------------------------
/aggregations_lesson_30/aggregations.md:
--------------------------------------------------------------------------------
  1 | # Aggregations.
  2 | 
  3 | ## NULLs
  4 | 
  5 | `NULL`s are a datatype that specifies where no data exists in SQL. Mean no data. It's different from a zero or space (space is a value).
  6 | 
  7 | NULLs are different than a zero (zero is a value) - they are cells where data does not exist. When identifying NULLs in a WHERE clause, we write IS NULL or IS NOT NULL. We don't use =, because NULL isn't considered a value in SQL. Rather, it is a property of the data.
  8 | 
  9 | ## NULLs - Expert Tip
 10 | There are two common ways in which you are likely to encounter NULLs:
 11 | 
 12 | + **NULL**s frequently occur when performing a LEFT or RIGHT JOIN. When some rows in the left table of a left join are not matched with rows in the right table, those rows will contain some NULL values in the result set.
 13 | 
 14 | + **NULL**s can also occur from simply missing data in our database.
 15 | 
 16 | ## NULLs and COUNT
 17 | 
 18 | `count()` function is returning of all the rows that contain some non-null data.
 19 | 
 20 | `count()` can also be used to count the number of non-null records in an individual column or any column in a table.
 21 | 
 22 | Notice that COUNT does not consider rows that have NULL values. Therefore, this can be useful for quickly identifying which rows have missing data.
 23 | 
 24 | ## SUM
 25 | 
 26 | **SUM** works similarly to **COUNT** except you'll want to specify column names rather than using star.
 27 | 
 28 | Can't use  `SUM(*)` the way you canuse `COUNT(*)`. Unlike COUNT, you can only use SUM on numeric columns. However, SUM will ignore NULL values and treat NULLs as zero!
 29 | 
 30 | Aggregation Reminder
 31 | An important thing to remember: **aggregators only aggregate vertically - the values of a column**. If you want to perform a calculation across rows, you would do this with [simple arithmetic](https://community.modeanalytics.com/sql/tutorial/sql-operators/#arithmetic-in-sql).
 32 | 
 33 | ## MIN and MAX
 34 | 
 35 | The syntax for MIN and MAx is similar to SUM and COUNT.  MIN and MAX ignore NULL values.
 36 | 
 37 | #####Expert Tip:
 38 | functionally, MIN and MAX are similar to COUNT in that they can be used on non-numerical columns. Depending on the column type, MIN will return the lowest number, earliest date, or non-numerical value as early in the alphabet as possible. As you might suspect, MAX does the opposite—it returns the highest number, the latest date, or the non-numerical value closest alphabetically to “Z.”
 39 | 
 40 | 
 41 | ## AVG
 42 | 
 43 | `AVG` is a SQL aggregate function that calculates the average of a selected group of values. AVG has similar syntax to all of the other aggregation functions. AVG can be only used on numerical columns, it ignores nulls completely!! 
 44 | 
 45 | If you want to count NULLs as zero, you will need to use SUM and COUNT. However, this is probably not a good idea if the NULL values truly just represent unknown values for a cell.
 46 | 
 47 | #####MEDIAN - Expert Tip
 48 | One quick note that a median might be a more appropriate measure of center for this data, but finding the median happens to be a pretty difficult thing to get using SQL alone — so difficult that finding a median is occasionally asked as an interview question.
 49 | 
 50 | ## MEDIAN
 51 | 
 52 | "Calculates a percentile based on a continuous distribution of the column value in SQL Server. The result is interpolated and might not be equal to any of the specific values in the column."
 53 | 
 54 | ```
 55 | PERCENTILE_CONT ( numeric_literal )   
 56 |     
 57 |     WITHIN GROUP ( ORDER BY order_by_expression [ ASC | DESC ] )  
 58 |     
 59 |     OVER ( [ <partition_by_clause> ] )
 60 | ```
 61 | 
 62 | [Median: PERCENTILE_CONT](https://docs.microsoft.com/en-us/sql/t-sql/functions/percentile-cont-transact-sql?view=sql-server-2017)
 63 | 
 64 | 
 65 | ## GROUP BY 
 66 | 
 67 | `GROUP BY` allows to take the sum of data limited to each account rather than across the enrire dataset.
 68 | 
 69 | + **GROUP BY** can be used to aggregate data within subsets of the data. For example, grouping for different accounts, different regions, or different sales representatives.
 70 | 
 71 | + The GROUP BY always goes between WHERE and ORDER BY
 72 | 
 73 | + ORDER BY works like SORT in spreadsheet software
 74 | 
 75 | + Any column in the SELECT statement that is not within an aggregator must be in the GROUP BY clause.
 76 | 
 77 | Example:
 78 | 
 79 | ```
 80 | SELECT account_id,
 81 | 	SUM(standard_qty) as standard_sum,
 82 | 	SUM(gloss_qty) as gloss_sum,
 83 | 	SUM(poster_qty) as poster_sum
 84 | FROM demo.orders
 85 | GROUP BY account_id
 86 | ORDER BY account_id;
 87 | ```
 88 | 
 89 | ##### GROUP BY - Expert Tip
 90 | it is worth noting that SQL evaluates the aggregations before the LIMIT clause. If you don’t group by any columns, you’ll get a 1-row result—no problem there. If you group by a column with enough unique values that it exceeds the LIMIT number, the aggregates will be calculated, and then some rows will simply be omitted from the results.
 91 | 
 92 | This is actually a nice way to do things because you know you’re going to get the correct aggregates. If SQL cuts the table down to 100 rows, then performed the aggregations, your results would be substantially different. The above query’s results exceed 100 rows, so it’s a perfect example.
 93 | 
 94 | You can GROUP BY multiple columns at once. This is often useful to aggregate across a number of different segments.
 95 | 
 96 | Example:
 97 | ```
 98 | SELECT account_id,
 99 | 	   channel,
100 | 	   COUNT(id) as events
101 | FROM demo.web_events_full
102 | GROUP BY account_id, channel
103 | ORDER BY account_id, events DESC;
104 | ```
105 | The order in the `ORDER BY` determines which column is ordered on first.
106 | You can order `DESC` for any column in `ORDER BY`.
107 | 
108 | #####GROUP BY - Expert Tips
109 | + The order of column names in your `GROUP BY` clause doesn’t matter—the results will be the same regardless. If we run the same query and reverse the order in the GROUP BY clause, you can see we get the same results.
110 | 
111 | 
112 | + As with ORDER BY, you can substitute numbers for column names in the `GROUP BY` clause. It’s generally recommended to do this only when you’re grouping many columns, or if something else is causing the text in the GROUP BY clause to be excessively long.
113 | 
114 | 
115 | + A reminder here that any column that is not within an aggregation must show up in your GROUP BY statement. If you forget, you will likely get an error.
116 | 
117 | 
118 | ## DISTINCT
119 | 
120 | If you want to group by some columns but you don't want to include any aggregations you can use `DISTINCT`. 
121 | 
122 | `DISTINCT` is always used in SELECT statements, and it provides the unique rows for all columns written in the SELECT statement. Therefore, you only use DISTINCT once in any particular SELECT statement.
123 | 
124 | ```
125 | SELECT DISTINCT column1, DISTINCT column2, DISTINCT column3
126 | FROM table1;
127 | ```
128 | 
129 | **DISTINCT - Expert Tip**
130 | It’s worth noting that using `DISTINCT`, particularly in aggregations, can slow your queries down quite a bit.
131 | 
132 | 
133 | ## HAVING
134 | **HAVING - Expert Tip**
135 | 
136 | `HAVING` is the “clean” way to filter a query that has been aggregated, but this is also commonly done using a subquery. Essentially, any time you want to perform a WHERE on an element of your query that was created by an aggregate, you need to use HAVING instead.
137 | 
138 | **WHERE** subsets the returned data based on a logical condition.
139 | **WHERE** appears after the FROM, JOIN, ON clauses, but before GROUP BY.
140 | **HAVING** appears after he GROUP BY clause but before the ORDER BY.
141 | **HAVING** is leki **WHERE**, but it works on logical statement involving aggregations.
142 | 
143 | **Query clause order**
144 | 1. `SELECT`
145 | 2. `FROM`s
146 | 3. `WHERE`
147 | 4. `GROUP BY`
148 | 5. `HAVING`
149 | 6. `ORDER BY` 
150 | 
151 | 
152 | ## DATE Functions
153 | 
154 | GROUPing BY a date column is not usually very useful in SQL, as these columns tend to have transaction data down to a second.
155 | There are a number of built in SQL functions that are aimed at helping us improve our experience in working with dates.
156 | 
157 | `DATE_TRUNC` allows you to truncate your date to a particular part of your date-time column. Common trunctions are day, month, and year. Here is a great blog post by Mode Analytics on the power of this function.
158 | 
159 | `DATE_PART` can be useful for pulling a specific portion of a date, but notice pulling month or day of the week (dow) means that you are no longer keeping the years in order. Rather you are grouping for certain components regardless of which year they belonged in.
160 | 
161 | You can reference the columns in your select statement in GROUP BY and ORDER BY clauses with numbers that follow the order they appear in the select statement. For example
162 | 
163 | ```
164 | SELECT standard_qty, COUNT(*)
165 | 
166 | FROM orders
167 | 
168 | GROUP BY 1 (this 1 refers to standard_qty since it is the first of the columns included in the select statement)
169 | 
170 | ORDER BY 1 (this 1 refers to standard_qty since it is the first of the columns included in the select statement)
171 | ```
172 | 
173 | `DATE_PART('dow')` pulls day of the week andr returns a value from 0 to 6 (0 is Sunday, 6 is Saturday).


--------------------------------------------------------------------------------
/control_flow_lesson_25/loops_quizzes.py:
--------------------------------------------------------------------------------
  1 | # Quiz 1: Create Usernames
  2 | 
  3 | #Write a for loop that iterates over the names list to create a usernames list.
  4 | #To create a username for each name, make everything lowercase and replace
  5 | #spaces with underscores. Running your for loop over the list.
  6 | 
  7 | 
  8 | names = ["Joey Tribbiani", "Monica Geller", "Chandler Bing", "Phoebe Buffay"]
  9 | usernames = []
 10 | 
 11 | for name in names:
 12 |     name = name.lower()
 13 |     name = name.replace(' ', '_')
 14 |     usernames.append(name)
 15 |     
 16 |     # or shorter variant:
 17 |     # usernames.append(name.lower().replace(' ', '_'))
 18 | 
 19 | print(usernames)
 20 | 
 21 | 
 22 | # Quiz 2: Modify Usernames with Range
 23 | # Write a for loop that uses range() to iterate over the positions in usernames
 24 | # to modify the list. Like you did in the previous quiz, change each name to be
 25 | # lowercase and replace spaces with underscores.
 26 | 
 27 | usernames = ["Joey Tribbiani", "Monica Geller", "Chandler Bing",
 28 |              "Phoebe Buffay"]
 29 | 
 30 | for index in range(len(usernames)):
 31 |     usernames[index] = usernames[index].lower().replace(' ', '_')
 32 | print(usernames)
 33 | 
 34 | 
 35 | # Quiz 3: Tag Counter
 36 | # Write a for loop that iterates over a list of strings, tokens, and counts how
 37 | # many of them are XML tags. XML is a data language similar to HTML. You can
 38 | # tell if a string is an XML tag if it begins with a left angle bracket "<" and
 39 | # ends with a right angle bracket ">". Keep track of the number of tags using
 40 | # the variable count.
 41 | 
 42 | tokens = ['<greeting>', 'Hello World!', '</greeting>']
 43 | count = 0
 44 | 
 45 | for token in tokens:
 46 |     if token[0] == '<' and token[-1] == '>':
 47 |         count = count + 1
 48 |     
 49 | print(count)
 50 | 
 51 | 
 52 | # Quiz 4: Create an HTML List
 53 | # Write some code, including a for loop, that iterates over a list of strings
 54 | # and creates a single string, html_str, which is an HTML list. For example,
 55 | # should output:
 56 | # <ul>
 57 | # <li>first string</li>
 58 | # <li>second string</li>
 59 | # </ul>
 60 | 
 61 | items = ['first string', 'second string']
 62 | html_str = "<ul>\n"  # "\ n" is the character that marks the end of the line,
 63 |                      # it does the characters that are after it in html_str
 64 |                      # are on the next line
 65 | 
 66 | for item in items:
 67 |     html_str = html_str + "<li>" + str(item) + "</li>" "\n"
 68 | 
 69 | html_str = html_str + "</ul>"
 70 | 
 71 | print(html_str)
 72 | 
 73 | 
 74 | # Quiz 5: Lower
 75 | # If you want to create a new list called lower_colors, where each color
 76 | # in colors is lower cased, which code would do this?
 77 | 
 78 | colors = ['Red', 'Blue', 'Green', 'Purple']
 79 | lower_colors = []
 80 | 
 81 | for color in colors:
 82 |     lower_colors.append(color.lower())
 83 | 
 84 | print(lower_colors)
 85 | 
 86 | 
 87 | # Quizzes: Iterating Through Dictionaries
 88 | 
 89 | # Quiz 1: Fruit Basket - Task 1
 90 | """
 91 | You would like to count the number of fruits in your basket. In order to do
 92 | this, you have the following dictionary and list of fruits. Use the dictionary
 93 | and list to count the total number of fruits, but you do not want to count the
 94 | other items in your basket.
 95 | """
 96 | 
 97 | result = 0
 98 | basket_items = {'apples': 4, 'oranges': 19, 'kites': 3, 'sandwiches': 8}
 99 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas']
100 | 
101 | #Iterate through the dictionary
102 | for key, value in basket_items.items():
103 |     for item in fruits:
104 |         #if the key is in the list of fruits, add the value (number of fruits)
105 |         #to result
106 |         if item == key:
107 |             result = result + value
108 | 
109 | print(result)
110 | 
111 | 
112 | # Quiz: Fruit Basket - Task 2
113 | """
114 | If your solution is robust, you should be able to use it with any dictionary of
115 | items to count the number of fruits in the basket. Try the loop for each of
116 | the dictionaries below to make sure it always works.
117 | """
118 | 
119 | #Example 1
120 | 
121 | result = 0
122 | basket_items = {'pears': 5, 'grapes': 19, 'kites': 3, 'sandwiches': 8, 'bananas': 4}
123 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas']
124 | 
125 | # Your previous solution here
126 | 
127 | for key, value in basket_items.items():
128 |     for item in fruits:
129 |         
130 |         #if the key is in the list of fruits, add the value (number of fruits) 
131 |         #to result
132 |         if item == key:
133 |             result = result + value
134 | 
135 | print(result)
136 | 
137 | #Example 2
138 | 
139 | result = 0
140 | basket_items = {'peaches': 5, 'lettuce': 2, 'kites': 3, 'sandwiches': 8, 'pears': 4}
141 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas']
142 | 
143 | # Your previous solution here
144 | 
145 | for key, value in basket_items.items():
146 |     for item in fruits:
147 |         
148 |         #if the key is in the list of fruits, add the value (number of fruits)
149 |         #to result
150 |         if item == key:
151 |             result = result + value
152 | 
153 | print(result)
154 | 
155 | #Example 3
156 | 
157 | result = 0
158 | basket_items = {'lettuce': 2, 'kites': 3, 'sandwiches': 8, 'pears': 4, 'bears': 10}
159 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas']
160 | 
161 | # Your previous solution here
162 | 
163 | for key, value in basket_items.items():
164 |     for item in fruits:
165 |         #if the key is in the list of fruits, add the value (number of fruits)
166 |         #to result
167 |         if item == key:
168 |             result = result + value
169 | 
170 | print("I count {} fruits in the busket".format(result)
171 | 
172 | 
173 | # Quiz: Fruit Basket - Task 3
174 | 
175 | # You would like to count the number of fruits in your basket. 
176 | # In order to do this, you have the following dictionary and list of
177 | # fruits.  Use the dictionary and list to count the total number
178 | # of fruits and not_fruits.
179 | 
180 | fruit_count, not_fruit_count = 0, 0
181 | basket_items = {'apples': 4, 'oranges': 19, 'kites': 3, 'sandwiches': 8}
182 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas']
183 | 
184 | #Iterate through the dictionary
185 | for key, value in basket_items.items():
186 | 
187 |     #if the key is in the list of fruits, add to fruit_count.
188 |     if key in fruits:
189 |         fruit_count = fruit_count + value
190 | 
191 |     #if the key is not in the list, then add to the not_fruit_count
192 |     else:
193 |         not_fruit_count = not_fruit_count + value
194 | 
195 | print("There are {} fruits and {} not fruits".format(fruit_count, not_fruit_count))
196 | 
197 | # Quiz: Break the String
198 | #
199 | # Write a loop with a break statement to create a string, news_ticker, that
200 | # is exactly 140 characters long. You should create the news ticker by adding
201 | # headlines from the headlines list, inserting a space in between each headline.
202 | 
203 | headlines = ["Local Bear Eaten by Man",
204 |              "Legislature Announces New Laws",
205 |              "Peasant Discovers Violence Inherent in System",
206 |              "Cat Rescues Fireman Stuck in Tree",
207 |              "Brave Knight Runs Away",
208 |              "Papperbok Review: Totally Triffic"]
209 | 
210 | news_ticker = ""
211 | 
212 | headlines = " ".join(headlines)
213 | 
214 | for letter in headlines:
215 |     news_ticker = news_ticker + letter
216 |     if len(news_ticker) == 140:
217 |         break
218 | 
219 | print(news_ticker)
220 | 
221 | # Udacity solution
222 | 
223 | headlines = ["Local Bear Eaten by Man",
224 |              "Legislature Announces New Laws",
225 |              "Peasant Discovers Violence Inherent in System",
226 |              "Cat Rescues Fireman Stuck in Tree",
227 |              "Brave Knight Runs Away",
228 |              "Papperbok Review: Totally Triffic"]
229 | 
230 | news_ticker = ""
231 | for headline in headlines:
232 |     news_ticker += headline + " "
233 |     if len(news_ticker) >= 140:
234 |         news_ticker = news_ticker[:140]
235 |         break
236 | 
237 | print(news_ticker)
238 | 
239 | 
240 | # Quiz 1: zip() and enumerate()
241 | #
242 | # Zip Coordinates
243 | 
244 | 
245 | x_coord = [23, 53, 2, -12, 95, 103, 14, -5]
246 | y_coord = [677, 233, 405, 433, 905, 376, 432, 445]
247 | z_coord = [4, 16, -6, -42, 3, -6, 23, -1]
248 | labels = ["F", "J", "A", "Q", "Y", "B", "W", "X"]
249 | 
250 | points = []
251 | 
252 | for num_x, num_y, num_z, letter in zip(x_coord, y_coord, z_coord, labels):
253 |     points.append("{}: {}, {}, {}".format(letter, num_x, num_y, num_z))
254 | 
255 | print(points)
256 | 
257 | 
258 | # Udacity solution:
259 | x_coord = [23, 53, 2, -12, 95, 103, 14, -5]
260 | y_coord = [677, 233, 405, 433, 905, 376, 432, 445]
261 | z_coord = [4, 16, -6, -42, 3, -6, 23, -1]
262 | labels = ["F", "J", "A", "Q", "Y", "B", "W", "X"]
263 | 
264 | points = []
265 | for point in zip(labels, x_coord, y_coord, z_coord):
266 |     points.append("{}: {}, {}, {}".format(*point))
267 | 
268 | for point in points:
269 |     print(point)
270 | 
271 | 
272 | # Quiz 2: zip() and enumerate()
273 | #
274 | # Zip Lists to a Dictionary
275 | 
276 | cast_names = ["Barney", "Robin", "Ted", "Lily", "Marshall"]
277 | cast_heights = [72, 68, 72, 66, 76]
278 | 
279 | cast = dict(zip(cast_names, cast_heights)) 
280 | 
281 | print(cast)
282 | 
283 | 
284 | # Quiz 3: unzip
285 | #
286 | # Unzip the cast tuple into two names and heights tuples.
287 | 
288 | 
289 | cast = (("Barney", 72), ("Robin", 68), ("Ted", 72), ("Lily", 66), ("Marshall", 76))
290 | 
291 | # define names and heights here
292 | 
293 | names, heights = zip(*cast)
294 | 
295 | print(names)
296 | print(heights)
297 | 
298 | 
299 | # Quiz 4: zip() and enumerate()
300 | #
301 | # Quiz: Transpose with Zip
302 | # Use zip to transpose data from a 4-by-3 matrix to a 3-by-4 matrix
303 | 
304 | data = ((0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11))
305 | 
306 | data_transpose = tuple(zip(*data))
307 | 
308 | print(data_transpose)
309 | 
310 | 
311 | # Quiz 5: Quiz: Enumerate
312 | #
313 | # Use enumerate to modify the cast list so that each element contains the name
314 | # followed by the character's corresponding height. For example, the first
315 | # element of cast should change from "Barney Stinson" to "Barney Stinson 72".
316 | 
317 | cast = ["Barney Stinson", "Robin Scherbatsky", "Ted Mosby", "Lily Aldrin", "Marshall Eriksen"]
318 | heights = [72, 68, 72, 66, 76]
319 | 
320 | for index, height in enumerate(heights):
321 |     s = "{} {}".format(cast[index], height)
322 |     cast[index] = s
323 | 
324 | print(cast)
325 | 
326 | 


--------------------------------------------------------------------------------
/aggregations_lesson_30/aggregations.sql:
--------------------------------------------------------------------------------
  1 | # Aggregation Questions
  2 | #
  3 | #
  4 | # 1.Find the total amount of poster_qty paper ordered in the orders table.
  5 | 
  6 | SELECT SUM(poster_qty) as total_poster_sales
  7 | FROM orders;
  8 | 
  9 | # 2.Find the total amount of standard_qty paper ordered in the orders table.
 10 | 
 11 | SELECT COUNT(standard_qty) as total_standard_sales
 12 | FROM orders;
 13 | 
 14 | # 3.Find the total dollar amount of sales using the total_amt_usd in the orders table.
 15 | 
 16 | SELECT COUNT(total_amt_usd) as total_dollar_sales
 17 | FROM orders;
 18 | 
 19 | # 4.Find the total amount spent on standard_amt_usd and gloss_amt_usd paper for each
 20 | # order in the orders table. This should give a dollar amount for each order in the table.
 21 | 
 22 | SELECT standard_amt_usd + gloss_amt_usd as total
 23 | FROM orders;
 24 | 
 25 | # 5. Find the standard_amt_usd per unit of standard_qty paper. Your solution should use
 26 | # both an aggregation and a mathematical operator.
 27 | 
 28 | SELECT SUM(standard_amt_usd)/SUM(standard_qty) as unit_price_standard_qty
 29 | FROM orders;
 30 | 
 31 | 
 32 | # MAX, MIN, AVG
 33 | #
 34 | #
 35 | # 1.When was the earliest order ever placed? You only need to return the date.
 36 | 
 37 | SELECT MIN(occurred_at) as earliest_order
 38 | FROM orders;
 39 | 
 40 | # 2.Try performing the same query as in question 1 without using an aggregation function. 
 41 | 
 42 | SELECT occurred_at as earliest_order
 43 | FROM orders
 44 | ORDER BY occurred_at ASC
 45 | LIMIT 1;
 46 | 
 47 | # 3.When did the most recent (latest) web_event occur?
 48 | 
 49 | SELECT MAX(occurred_at) as latest_web_event
 50 | FROM web_events;
 51 | 
 52 | # 4.Try to perform the result of the previous query without using an aggregation function.
 53 | 
 54 | SELECT occurred_at as latest_web_event
 55 | FROM web_events
 56 | ORDER BY occurred_at DESC
 57 | LIMIT 1;
 58 | 
 59 | # 5.Find the mean (AVERAGE) amount spent per order on each paper type, as well as the mean
 60 | # amount of each paper type purchased per order. Your final answer should have 6 values -
 61 | # one for each paper type for the average number of sales, as well as the average amount.
 62 | 
 63 | SELECT AVG(standard_qty) as avg_standard,
 64 |        AVG(gloss_qty) as avg_gloss,
 65 |        AVG(poster_qty) as avg_poster,
 66 |        AVG(standard_amt_usd) as avg_standart_usd,
 67 |        AVG(gloss_amt_usd) as avg_gloss_usd,
 68 | 	 AVG(poster_amt_usd) as avg_poster_usd
 69 | FROM orders;
 70 | 
 71 | # 6.What is the MEDIAN total_usd
 72 | # spent on all orders?
 73 | 
 74 | /*
 75 | PERCENTILE_CONT interpolates the appropriate value, whether or not it exists in the data set,
 76 | while PERCENTILE_DISC always returns an actual value from the set.
 77 | */
 78 | 
 79 | SELECT PERCENTILE_CONT(0.5)
 80 | WITHIN GROUP (ORDER BY total_amt_usd) as median_total_usd
 81 | FROM orders;
 82 | 
 83 | 
 84 | # Udycity solution:
 85 | 
 86 | /*Since there are 6912 orders - we want the average of the 3457 and 3456 order amounts when ordered.
 87 | This is the average of 2483.16 and 2482.55. This gives the median of 2482.855. This obviously isn't
 88 | an ideal way to compute. If we obtain new orders, we would have to change the limit. SQL didn't even
 89 | calculate the median for us. The above used a SUBQUERY, but you could use any method to find the two
 90 | necessary values, and then you just need the average of them.
 91 | */
 92 | 
 93 | SELECT *
 94 | FROM (SELECT total_amt_usd
 95 |       FROM orders
 96 |       ORDER BY total_amt_usd
 97 |       LIMIT 3457) AS Table1
 98 | ORDER BY total_amt_usd DESC
 99 | LIMIT 2;
100 | 
101 | 
102 | 
103 | # GROUP BY
104 | 
105 | # 1.
106 | # Which account (by name) placed the earliest order? Your solution should have the account name
107 | # and the date of the order.
108 | 
109 | SELECT accounts.name as account_name, 
110 | 	 orders.occurred_at as order_date
111 | FROM accounts
112 | JOIN orders
113 | ON accounts.id = orders.account_id
114 | ORDER BY accounts.name
115 | LIMIT 1;
116 | 
117 | # or
118 | 
119 | SELECT accounts.name as account_name, 
120 |        MIN(orders.occurred_at) as order_date
121 | FROM orders, accounts
122 | GROUP BY accounts.name
123 | ORDER BY accounts.name
124 | LIMIT 1;
125 | 
126 | # 2.
127 | # Find the total sales in usd for each account. You should include two columns - the total sales
128 | # for each company's orders in usd and the company name.
129 | 
130 | 
131 | SELECT accounts.name as account_name, 
132 | 	 SUM(orders.total_amt_usd) as total_sales_per_oder
133 | FROM orders
134 | JOIN accounts
135 | ON accounts.id = orders.account_id
136 | GROUP BY accounts.name;
137 | 
138 | # 3.
139 | # Via what channel did the most recent (latest) web_event occur, which account was associated
140 | # with this web_event? Your query should return only three values - the date, channel, and account name.
141 | 
142 | SELECT occurred_at as latest_web_events,
143 |        accounts.name as account_name, 
144 |        web_events.channel as channel_name
145 | FROM web_events
146 | JOIN accounts
147 | ON web_events.account_id = accounts.id
148 | ORDER BY web_events.occurred_at DESC
149 | LIMIT 1;
150 | 
151 | # 4.
152 | # Find the total number of times each type of channel from the web_events was used. Your final
153 | # table should have two columns - the channel and the number of times the channel was used.
154 | 
155 | SELECT COUNT(occurred_at) as use_web_events,  
156 |        channel as channel_name
157 | FROM web_events
158 | GROUP BY web_events.channel;
159 | 
160 | # 5.Who was the primary contact associated with the earliest web_event? 
161 | 
162 | SELECT primary_poc 
163 | FROM web_events
164 | JOIN accounts
165 | ON web_events.account_id = accounts.id 
166 | ORDER BY web_events.occurred_at
167 | LIMIT 1;
168 | 
169 | # 6.
170 | # What was the smallest order placed by each account in terms of
171 | # total usd. Provide only two columns - the account name and the total usd. Order from smallest
172 | # dollar amounts to largest.
173 | 
174 | SELECT MIN(total_amt_usd) as smallest_order,
175 | 	 accounts.name as account_name
176 | FROM accounts
177 | JOIN orders
178 | ON orders.account_id = accounts.id
179 | GROUP BY accounts.name
180 | ORDER BY smallest_order;
181 | 
182 | # 7.
183 | # Find the number of sales reps in each region. Your final table should have two columns -
184 | # the region and the number of sales_reps. Order from fewest reps to most reps.
185 | 
186 | SELECT region.name as region_name, 
187 | COUNT(*) as number_sales_reps 	   
188 | FROM region
189 | JOIN sales_reps
190 | ON sales_reps.region_id = region.id
191 | GROUP BY region.name
192 | ORDER BY number_sales_reps;
193 | 
194 | 
195 | # GROUP BY Part 2
196 | #
197 | 
198 | # 1.
199 | # For each account, determine the average amount of each type of paper they purchased
200 | # across their orders. Your result should have four columns - one for the account name and one
201 | # for the average quantity purchased for each of the paper types for each account.
202 | 
203 | SELECT accounts.name as account_name,
204 |        AVG(standard_qty) as avg_standard,
205 |        AVG(gloss_qty) as avg_gloss,
206 |        AVG(poster_qty) as avg_poster
207 | FROM accounts
208 | JOIN orders on accounts.id = orders.account_id
209 | GROUP BY account_name;
210 | 
211 | # 2. 
212 | # For each account, determine the average amount spent per order on each paper type.
213 | # Your result should have four columns - one for the account name and one for the average
214 | # amount spent on each paper type.
215 | 
216 | SELECT accounts.name as account_name,
217 |        AVG(standard_amt_usd) as avg_standard,
218 |        AVG(gloss_amt_usd) as avg_gloss,
219 |        AVG(poster_amt_usd) as avg_poster
220 | FROM accounts
221 | JOIN orders on accounts.id = orders.account_id
222 | GROUP BY account_name;
223 | 
224 | 
225 | # 3.
226 | # Determine the number of times a particular channel was used in the web_events table for each sales rep.
227 | # Your final table should have three columns - the name of the sales rep, the channel, and the number of occurrences.
228 | # Order your table with the highest number of occurrences first.
229 |  
230 | SELECT sales_reps.name as name, web_events.channel as channel, COUNT(channel) as num_events
231 | 
232 | FROM sales_reps
233 | JOIN accounts on sales_reps.id = accounts.sales_rep_id
234 | JOIN web_events on accounts.id = web_events.account_id
235 | GROUP BY sales_reps.name, web_events.channel
236 | ORDER BY num_events DESC;
237 | 
238 | # or
239 | 
240 | SELECT s.name, w.channel, COUNT(*) num_events
241 | FROM accounts a
242 | JOIN web_events w
243 | ON a.id = w.account_id
244 | JOIN sales_reps s
245 | ON s.id = a.sales_rep_id
246 | GROUP BY s.name, w.channel
247 | ORDER BY num_events DESC;
248 | 
249 | # 4. 
250 | # Determine the number of times a particular channel was used in the web_events table for each region. Your final
251 | # table should have three columns - the region name, the channel, and the number of occurrences. Order your table
252 | # with the highest number of occurrences first.
253 | 
254 | SELECT COUNT(web_events.channel) as num_occurences, region.name as name, web_events.channel as channel
255 | FROM web_events 
256 | JOIN accounts on web_events.account_id = accounts.id
257 | JOIN sales_reps on accounts.sales_rep_id = sales_reps.id 
258 | JOIN region on sales_reps.region_id = region.id
259 | GROUP BY region.name, web_events.channel
260 | ORDER BY num_occurences DESC;
261 | 
262 | 
263 | # DISTINCT
264 | #
265 | # 1.Use DISTINCT to test if there are any accounts associated with more than one region.
266 | 
267 | SELECT DISTINCT id, name
268 | FROM accounts;
269 | 
270 | # Solution with JOIN
271 | 
272 | SELECT a.name AS account_name,r.name AS region_name, COUNT(r.name)
273 | FROM accounts a
274 | JOIN sales_reps s
275 | ON a.sales_rep_id=s.id
276 | JOIN region r
277 | ON s.region_id=r.id
278 | GROUP BY a.name, r.name
279 | Order by a.name;
280 | 
281 | # Solution with COUNT and DISTINCT to count unique and all data
282 | SELECT COUNT(region.id) as all_records, COUNT(DISTINCT region_id) as unique_records
283 | FROM sales_reps, region;
284 | 
285 | # Udacity solution
286 | # If each account was associated with more than one region, the first query should
287 | # have returned more rows than the second query.
288 | 
289 | SELECT a.id as "account id", r.id as "region id", 
290 | a.name as "account name", r.name as "region name"
291 | FROM accounts a
292 | JOIN sales_reps s
293 | ON s.id = a.sales_rep_id
294 | JOIN region r
295 | ON r.id = s.region_id;
296 | 
297 | #and 
298 | SELECT DISTINCT id, name
299 | FROM accounts;
300 | 
301 | # 2. Have many sales reps worked on more than one account?
302 | 
303 | SELECT DISTINCT id, name
304 | FROM sales_reps;
305 | 
306 | # or
307 | 
308 | SELECT sales_reps.name, COUNT(*) num_accounts,
309 | sales_reps.id 
310 | FROM accounts 
311 | JOIN sales_reps
312 | ON sales_reps.id = accounts.sales_rep_id
313 | GROUP BY sales_reps.id, sales_reps.name
314 | ORDER BY num_accounts;
315 | 
316 | 
317 | # HAVING
318 | # 1.How many of the sales reps have more than 5 accounts that they manage?
319 | #
320 | 
321 | SELECT s.id, s.name, COUNT(*) num_accounts
322 | FROM sales_reps s
323 | JOIN accounts a on s.id = a.sales_rep_id
324 | GROUP BY s.id, s.name
325 | HAVING COUNT(*) > 5
326 | ORDER BY num_accounts;
327 | 
328 | # Using SUBQUERY
329 | 
330 | SELECT COUNT(*) num_reps_above5
331 | FROM(SELECT s.id, s.name, COUNT(*) num_accounts
332 |      FROM accounts a
333 |      JOIN sales_reps s
334 |      ON s.id = a.sales_rep_id
335 |      GROUP BY s.id, s.name
336 |      HAVING COUNT(*) > 5
337 |      ORDER BY num_accounts) AS Table1;
338 | 
339 | 
340 | # 2. How many accounts have more than 20 orders?
341 | SELECT a.id, a.name, COUNT(*) num_orders
342 | FROM accounts a
343 | JOIN orders o 
344 | ON a.id = o.account_id
345 | GROUP BY a.id, a.name
346 | HAVING COUNT(*) > 20 
347 | ORDER BY num_orders;
348 | 
349 | # 3. Which account has the most orders?
350 | 
351 | SELECT a.id, a.name, COUNT(*) num_orders
352 | FROM accounts a
353 | JOIN orders o
354 | ON a.id = o.account_id
355 | GROUP BY a.id, a.name
356 | ORDER BY num_orders DESC
357 | LIMIT 1;
358 | 
359 | # 4. How many accounts spent more than 30,000 usd total across all orders?
360 | 
361 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
362 | FROM accounts a
363 | JOIN orders o
364 | ON a.id = o.account_id
365 | GROUP BY a.id, a.name
366 | HAVING SUM(o.total_amt_usd) > 30000
367 | ORDER BY total_spent;
368 | 
369 | # 5. How many accounts spent less than 1,000 usd total across all orders?
370 | 
371 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
372 | FROM accounts a
373 | JOIN orders o
374 | ON a.id = o.account_id
375 | GROUP BY a.id, a.name
376 | HAVING SUM(o.total_amt_usd) < 1000
377 | ORDER BY total_spent;
378 | 
379 | # 6. Which account has spent the most with us?
380 | 
381 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
382 | FROM accounts a
383 | JOIN orders o
384 | ON a.id = o.account_id
385 | GROUP BY a.id, a.name
386 | ORDER BY total_spent DESC
387 | LIMIT 1;
388 | 
389 | # 7. Which account has spent the least with us?
390 | 
391 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent
392 | FROM accounts a
393 | JOIN orders o
394 | ON a.id = o.account_id
395 | GROUP BY a.id, a.name
396 | ORDER BY total_spent
397 | LIMIT 1;
398 | 
399 | # 8. Which accounts used facebook as a channel to contact customers more than 6 times?
400 | 
401 | SELECT a.id, a.name, w.channel, COUNT(*) count_channel
402 | FROM accounts a
403 | JOIN web_events w ON a.id = w.account_id
404 | GROUP BY a.id, a.name, w.channel
405 | HAVING COUNT(*) > 6
406 | ORDER BY count_channel;
407 | 
408 | # 9. Which account used facebook most as a channel?
409 | 
410 | SELECT a.id, a.name, w.channel, COUNT(*) channel_use
411 | FROM accounts a
412 | JOIN web_events w ON a.id = w.account_id
413 | WHERE  w.channel = 'facebook'
414 | GROUP BY a.id, a.name, w.channel
415 | ORDER BY channel_use DESC
416 | LIMIT 1;
417 | 
418 | 
419 | # 10. Which channel was most frequently used by most accounts?
420 | 
421 | SELECT a.id, a.name, w.channel, COUNT(*) channel_use
422 | FROM accounts a
423 | JOIN web_events w ON a.id = w.account_id
424 | GROUP BY a.id, a.name, w.channel
425 | ORDER BY channel_use DESC
426 | LIMIT 10;


--------------------------------------------------------------------------------