├── variability_lesson_13 ├── variability.md ├── avg_abs_dev_formula.png ├── avg_squared_deviation.png ├── sum_squared_deviation_formula.png └── quizzes_lesson13.md ├── .gitignore ├── basic_sql_lesson28 ├── table.png ├── parch_posey_db.png ├── notes.md ├── syntax_sql.md └── quizzes.sql ├── sql_joins_lesson_29 ├── join_sql.png ├── venn_diagram.png ├── primary_foreign_key.png ├── interchangeable_result.png ├── entity_relationship_diagram.png ├── recap_joins.md ├── sql_joins_lesson_29.md └── join_quizzes.sql ├── data_types_and_operators_lesson_24 ├── slicing.png ├── slicing_start.png ├── identity_operators.png ├── membership_operators.png ├── quizzes_lesson_24.py ├── string_methods_lists.md ├── dictionaries.md └── list_methods_tuples_sets.md ├── intro_to_research_methods_lessons_1_5 ├── sampling_error.png ├── population_sample.png └── terminology_intro_to_research_methods.md ├── scripting_lesson_27 └── scripting.md ├── sql_data_cleaning_lesson32 ├── sql_data_cleaning_lesson32.md └── data_cleaning.sql ├── control_flow_lesson_25 ├── while_loops.md ├── control_flow_quizzes.py ├── list_comprehensions.py ├── control_flow_practice.py ├── while_loops.py ├── control_flow.md ├── loops.md └── loops_quizzes.py ├── functions_lesson_26 ├── generator.py ├── generator_quizzes.py ├── functions.py └── functions.md ├── visualizing_data_lesson_6 └── visualizing_data.md ├── aggregations_lesson_30 ├── date_functions_quizzes.sql ├── case_statement.md ├── cas_statement.sql ├── aggregations.md └── aggregations.sql ├── README.md └── sql_subqueries_temporary_table_lesson31 ├── subqueries_tasks.sql ├── with_vs_subquery.sql └── subqueries_temporary_tables.md /variability_lesson_13/variability.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | sql_subqueries_temporary_table_lesson31/subquery_mani_u_solutions.py -------------------------------------------------------------------------------- /basic_sql_lesson28/table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/basic_sql_lesson28/table.png -------------------------------------------------------------------------------- /sql_joins_lesson_29/join_sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/join_sql.png -------------------------------------------------------------------------------- /sql_joins_lesson_29/venn_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/venn_diagram.png -------------------------------------------------------------------------------- /basic_sql_lesson28/parch_posey_db.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/basic_sql_lesson28/parch_posey_db.png -------------------------------------------------------------------------------- /sql_joins_lesson_29/primary_foreign_key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/primary_foreign_key.png -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/slicing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/slicing.png -------------------------------------------------------------------------------- /sql_joins_lesson_29/interchangeable_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/interchangeable_result.png -------------------------------------------------------------------------------- /variability_lesson_13/avg_abs_dev_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/avg_abs_dev_formula.png -------------------------------------------------------------------------------- /variability_lesson_13/avg_squared_deviation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/avg_squared_deviation.png -------------------------------------------------------------------------------- /sql_joins_lesson_29/entity_relationship_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/sql_joins_lesson_29/entity_relationship_diagram.png -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/slicing_start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/slicing_start.png -------------------------------------------------------------------------------- /intro_to_research_methods_lessons_1_5/sampling_error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/intro_to_research_methods_lessons_1_5/sampling_error.png -------------------------------------------------------------------------------- /variability_lesson_13/sum_squared_deviation_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/variability_lesson_13/sum_squared_deviation_formula.png -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/identity_operators.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/identity_operators.png -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/membership_operators.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/data_types_and_operators_lesson_24/membership_operators.png -------------------------------------------------------------------------------- /intro_to_research_methods_lessons_1_5/population_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/HEAD/intro_to_research_methods_lessons_1_5/population_sample.png -------------------------------------------------------------------------------- /scripting_lesson_27/scripting.md: -------------------------------------------------------------------------------- 1 | ## SCRIPTING 2 | 3 | * Python Installation and Environment Setup 4 | * Running and Editing Python Scripts 5 | * Interacting with User Input 6 | * Handling Exceptions 7 | * Reading and Writing Files 8 | * Importing Local, Standard, and Third-Party Modules 9 | * Experimenting with an Interpreter 10 | -------------------------------------------------------------------------------- /sql_data_cleaning_lesson32/sql_data_cleaning_lesson32.md: -------------------------------------------------------------------------------- 1 | ## LEFT and RIGHT 2 | 3 | LEFT pulls a specified number of characters for each row in a specified column starting at the beginning (or from the left). As you saw here, you can pull the first three digits of a phone number using LEFT(phone_number, 3). 4 | 5 | 6 | RIGHT pulls a specified number of characters for each row in a specified column starting at the end (or from the right). As you saw here, you can pull the last eight digits of a phone number using RIGHT(phone_number, 8). 7 | 8 | 9 | LENGTH provides the number of characters for each row of a specified column. Here, you saw that we could use this to get the length of each phone number as LENGTH(phone_number). -------------------------------------------------------------------------------- /control_flow_lesson_25/while_loops.md: -------------------------------------------------------------------------------- 1 | ## while Loops 2 | 3 | `for` useful if you know how many iterations of the loop you need or "definite iteration". There are situations where it's impossible to know in advance 4 | how many times will want the loop body executed ("definite iteration"). That's what a `while` loop is used for. 5 | 6 | `while` loops sometimes called **conditional** loops because they iterate as long as some conditions is true or end. 7 | Example: 8 | ``` 9 | card_deck = [4, 11, 8, 5, 13, 2, 8, 10] 10 | hand = [] 11 | 12 | # adds the last element of the card_deck list to the hand list 13 | # until the values in hand add up to 17 or more 14 | while sum(hand) < 17: 15 | hand.append(card_deck.pop()) 16 | ``` 17 | 18 | ## sum() and pop() 19 | 20 | `sum()`returns the sum of the elements in a list. 21 | 22 | `pop()` is the opposite (or inverse) of the append method, it removes the last elemet from a list nd returns it. -------------------------------------------------------------------------------- /variability_lesson_13/quizzes_lesson13.md: -------------------------------------------------------------------------------- 1 | ### 24. Which formula describes Average Absolute Deviations: 2 | 3 | ![Formula for absolute Deviation](avg_abs_dev_formula.png) 4 | 5 | ### 26. Sum of Squres 6 | 7 | Another way to get rid of negative values is to squared each one. It means, to multiply each value by itself. 8 | 9 | ![Formula squared devitions](sum_squared_deviation_formula.png) 10 | The last correct formula is called **SS - sum of squares**. 11 | 12 | ### 27. Average Squared Deviation 13 | 14 | ![Average squared deviation](avg_squared_deviation.png) 15 | 16 | The average score diviation is 291,622,740. There's a special name for this number, it's called the **variance**. 17 | 18 | How can we put **variance** in words? 19 | * Mean of squared deviations. (add all squared deviations and dedvide by n) 20 | * Sum of squared deviations divided ba n. 21 | 22 | ### 33. Quiz: Standard Deviation in Words. 23 | 24 | What is a way to put the Standard Deviation in words? 25 | 26 | * Square root of average quared deviation -------------------------------------------------------------------------------- /sql_joins_lesson_29/recap_joins.md: -------------------------------------------------------------------------------- 1 | Primary and Foreign Keys 2 | You learned a key element for JOINing tables in a database has to do with primary and foreign keys: 3 | 4 | * primary keys - are unique for every row in a table. These are generally the first column in our database (like you saw with the id column for every table in the Parch & Posey database). 5 | 6 | * foreign keys - are the primary key appearing in another table, which allows the rows to be non-unique. 7 | 8 | Choosing the set up of data in our database is very important, but not usually the job of a data analyst. This process is known as Database Normalization. 9 | 10 | JOINs 11 | In this lesson, you learned how to combine data from multiple tables using JOINs. The three JOIN statements you are most likely to use are: 12 | 13 | 1. JOIN - an INNER JOIN that only pulls data that exists in both tables. 14 | 15 | 2. LEFT JOIN - a way to pull all of the rows from the table in the FROM even if they do not exist in the JOIN statement. 16 | 17 | 3. RIGHT JOIN - a way to pull all of the rows from the table in the JOIN even if they do not exist in the FROM statement. -------------------------------------------------------------------------------- /functions_lesson_26/generator.py: -------------------------------------------------------------------------------- 1 | # square_number function returns a list of squared numbers 2 | 3 | def square_numbers(nums): 4 | result = [] 5 | for i in nums: 6 | result.append(i * i) 7 | return result 8 | 9 | 10 | my_nums = square_numbers([1, 2, 3, 4, 5]) 11 | 12 | print(my_nums) 13 | 14 | print('\n') 15 | 16 | # generator 17 | 18 | """ 19 | generator don't hold the entire result in memory it yields one result 20 | at a time 21 | """ 22 | 23 | def square_numbers(nums): 24 | for i in nums: 25 | yield (i * i) 26 | 27 | my_nums = square_numbers([1, 2, 3, 4, 5]) # my_ nums is generator 28 | 29 | for num in my_nums: 30 | print(num) 31 | 32 | print('\n') 33 | 34 | # next(my_nums) the output is 1, the first value in a list and first 35 | # squared number 36 | #print next(my_nums) # 1 37 | #print next(my_nums) # 4 38 | #print next(my_nums) # 9 39 | #print next(my_nums) # 16 40 | #print next(my_nums) 25 41 | 42 | 43 | # generator with list coprehension 44 | 45 | my_nums = (x*x for x in [1, 2, 3, 4, 5]) 46 | 47 | for num in my_nums: 48 | print(num) 49 | 50 | print('\n') 51 | 52 | # generator, convert data in a list 53 | 54 | my_nums = (x*x for x in [1, 2, 3, 4, 5]) 55 | 56 | print list(my_nums) -------------------------------------------------------------------------------- /control_flow_lesson_25/control_flow_quizzes.py: -------------------------------------------------------------------------------- 1 | # Conditional Statements 2 | 3 | # First Example - try changing the value of phone_balance 4 | phone_balance = 1 5 | bank_balance = 50 6 | 7 | if phone_balance < 10: 8 | phone_balance += 10 9 | bank_balance -= 10 10 | 11 | print(phone_balance) 12 | print(bank_balance) 13 | 14 | # Second Example - try changing the value of number 15 | 16 | number = 140 17 | if number % 2 == 0: 18 | print("Number " + str(number) + " is even.") 19 | else: 20 | print("Number " + str(number) + " is odd.") 21 | 22 | # Third Example - try to change the value of age 23 | age = 3 24 | 25 | # Here are the age limits for bus fares 26 | free_up_to_age = 4 27 | child_up_to_age = 18 28 | senior_from_age = 65 29 | 30 | # These lines determine the bus fare prices 31 | concession_ticket = 1.25 32 | adult_ticket = 2.50 33 | 34 | # Here is the logic for bus fare prices 35 | if age <= free_up_to_age: 36 | ticket_price = 0 37 | elif age <= child_up_to_age: 38 | ticket_price = concession_ticket 39 | elif age >= senior_from_age: 40 | ticket_price = concession_ticket 41 | else: 42 | ticket_price = adult_ticket 43 | 44 | message = "Somebody who is {} years old will pay ${} to ride the bus.".format(age, ticket_price) 45 | print(message) 46 | -------------------------------------------------------------------------------- /visualizing_data_lesson_6/visualizing_data.md: -------------------------------------------------------------------------------- 1 | # Frequency table. 2 | 3 | The number of times a certain value appears in a row/set of data is called the **frequency**. Frequency is a better word for number. 4 | For example, in the following list of numbers, the frequency of the number 3 is 6 (because it occurs 6 times): 5 | `1, 4, 3, 9, 11, 3, 3, 5, 77, 3, 88, 3, 3` 6 | 7 | A frequency table is a simple way to display the number of occurrences of a particular value or characteristic. 8 | 9 | A frequency distribution is a table showing each distinct value of some variable and the number of times it occurs in some dataset. 10 | 11 | Frequency distribution helps us: 12 | * to analyze the data 13 | * to estimate the frequencies of the population on the basis of the ample 14 | * to facilitate the computation of various statistical measures 15 | 16 | A **relative frequency distribution**s is a distribution in which relative frequencies are recorded against each class interval. 17 | 18 | # Tables. 19 | 20 | Tables can show either **categorical variables** (sometimes called qualitative variables) or **quantitative variables** (sometimes called numeric variables). You 21 | can think of categorical variables as being categories (like eye color or brand of dog food) and quantitative variables as being numbers. 22 | 23 | # Histogram and Bar graph. 24 | 25 | -------------------------------------------------------------------------------- /control_flow_lesson_25/list_comprehensions.py: -------------------------------------------------------------------------------- 1 | # Quiz: Extract First Names 2 | # 3 | # Use a list comprehension to create a new list first_names containing just 4 | # the first names in names in lowercase. 5 | 6 | names = ["Rick Sanchez", "Morty Smith", "Summer Smith", "Jerry Smith", "Beth Smith"] 7 | 8 | first_names = [name.lower().split()[0] for name in names] 9 | 10 | print(first_names) 11 | 12 | 13 | # Quiz: Multiples of Three 14 | # Use a list comprehension to create a list multiples_3 containing the first 15 | # 20 multiples of 3. 16 | 17 | multiples_3 = [ x for x in range(3, 60+1) if x % 3 == 0] 18 | print(multiples_3) 19 | 20 | # Second solution: 21 | 22 | multiples_3 = [x * 3 for x in range(1, 21)] 23 | print(multiples_3) 24 | 25 | 26 | # Quiz: Filter Names by Scores 27 | # Use a list comprehension to create a list of names passed that only include 28 | # those that scored at least 65. 29 | # 30 | 31 | 32 | scores = { 33 | "Rick Sanchez": 70, 34 | "Morty Smith": 35, 35 | "Summer Smith": 82, 36 | "Jerry Smith": 23, 37 | "Beth Smith": 98 38 | } 39 | 40 | 41 | passed = [key for key, value in scores.items() if value >= 65] 42 | print(passed) 43 | 44 | 45 | # Udacity solution: 46 | 47 | passed = [name for name, score in scores.items() if score >= 65] 48 | print(passed) -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/quizzes_lesson_24.py: -------------------------------------------------------------------------------- 1 | # 22. Quiz: Slicing Lists 2 | 3 | eclipse_dates = ['June 21, 2001', 'December 4, 2002', 'November 23, 2003', 4 | 'March 29, 2006', 'August 1, 2008', 'July 22, 2009', 5 | 'July 11, 2010', 'November 13, 2012', 'March 20, 2015', 6 | 'March 9, 2016'] 7 | # TODO: Modify this line so it prints the last three elements of the list 8 | print(eclipse_dates[-3:]) 9 | 10 | # 24. Quiz: List Methods 11 | 12 | names = ["Carol", "Albert", "Ben", "Donna"] 13 | names.append("Eugenia") 14 | print(sorted(names)) 15 | 16 | ['Albert', 'Ben', 'Carol', 'Donna', 'Eugenia'] 17 | 18 | # 30. Quiz: Dictionaries 19 | 20 | a = [1, 2, 3] 21 | b = a 22 | c = [1, 2, 3] 23 | 24 | print(a == b) # True 25 | print(a is b) # True 26 | print(a == c) # True 27 | print(a is c) # False 28 | 29 | # 34. Quiz: Compound Data Structures 30 | 31 | elements = {'hydrogen': {'number': 1, 'weight': 1.00794, 'symbol': 'H'}, 32 | 'helium': {'number': 2, 'weight': 4.002602, 'symbol': 'He'}} 33 | 34 | # todo: Add an 'is_noble_gas' entry to the hydrogen and helium dictionaries 35 | # hint: helium is a noble gas, hydrogen isn't 36 | 37 | elements['hydrogen']['is_noble_gas'] = False 38 | elements['helium']['is_noble_gas'] = True 39 | 40 | print(elements['hydrogen']['is_noble_gas']) 41 | print(elements['helium']['is_noble_gas']) 42 | 43 | -------------------------------------------------------------------------------- /aggregations_lesson_30/date_functions_quizzes.sql: -------------------------------------------------------------------------------- 1 | # 1. Find the sales in terms of total dollars for all orders in each year. 2 | 3 | SELECT DATE_TRUNC('year', occurred_at) as year, 4 | SUM(total) as total 5 | FROM orders 6 | GROUP BY 1 7 | ORDER BY 2 DESC; 8 | 9 | # 2. Which month did Parch & Posey have the greatest sales in terms of 10 | # total dollars? Are all months evenly represented by the dataset? 11 | 12 | SELECT DATE_TRUNC('month', occurred_at) as month, 13 | SUM(total_amt_usd) as total 14 | FROM orders 15 | WHERE occurred_at BETWEEN '2014-01-01' AND '2017-01-01' # remove the sales from 2013 and 2017 16 | GROUP BY 1 17 | ORDER BY 2 DESC; 18 | 19 | # 3. Which year did Parch & Posey have the greatest sales in terms 20 | # of total number of orders? Are all years evenly represented by the dataset? 21 | 22 | SELECT DATE_TRUNC('year', occurred_at) as year, 23 | COUNT(*) as total_sales 24 | FROM orders 25 | GROUP BY 1 26 | ORDER BY 2 DESC; 27 | 28 | 29 | # 4. Which month did Parch & Posey have the greatest sales in terms of total 30 | # number of orders? Are all months evenly represented by the dataset? 31 | 32 | SELECT DATE_TRUNC('month', occurred_at) as month, 33 | COUNT(*) as total_sales 34 | FROM orders 35 | WHERE occurred_at BETWEEN '2014-01-01' AND '2017-01-01' 36 | GROUP BY 1 37 | ORDER BY 2 DESC; 38 | 39 | # 5. In which month of which year did Walmart spend the most on gloss paper in terms of dollars? 40 | 41 | SELECT DATE_TRUNC('month', occurred_at) as month, 42 | SUM(o.gloss_amt_usd) as gloss_paper_usd 43 | 44 | FROM orders o 45 | JOIN accounts a 46 | ON a.id = o.account_id 47 | WHERE a.name = 'Walmart' 48 | GROUP BY 1 49 | ORDER BY 2 DESC; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## About 2 | 3 | ##### This repository is for my study notes, exercises, and quizzes during the Udacity Bertelsmann Scholarship Challenge. 4 | 5 | Notes are sorted by lessons. 6 | 7 | ## Lessons: 8 | 9 | - [Lessons 1-5](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/blob/master/intro_to_research_methods_lessons_1_5/terminology_intro_to_research_methods.md) 10 | - [Lesson 6](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/visualizing_data_lesson_6) 11 | - [Lesson 13](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/variability_lesson_13) 12 | - [Lesson 24](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/data_types_and_operators_lesson_24) 13 | - [Lesson 25](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/control_flow_lesson_25) 14 | - [Lesson 26](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/functions_lesson_26) 15 | - [Lesson 28](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/basic_sql_lesson28) 16 | - [Lesson 29](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/sql_joins_lesson_29) 17 | - [Lesson 30](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/aggregations_lesson_30) 18 | - [Lesson 31](https://github.com/irsol/udacity-bertelsmann-data-science-challenge-scholarship-2018/tree/master/sql_subqueries_temporary_table_lesson31) 19 | - [Lesson 32]() 20 | -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/string_methods_lists.md: -------------------------------------------------------------------------------- 1 | # String Methods. 2 | 3 | * String is a sequences of letters. 4 | * Using methods is almost the same as using function: it takes arguments and returns a value. 5 | * To call method use dot notation. For example `sample_string.lower()`, methods could receive additional arguments, which are passed inside the parentheses. 6 | * Methods are specific to the data type for a particular variable. So there are some built-in methods that are available for all strings, different methods that are available for all integers, etc. 7 | 8 | Links: 9 | 10 | * [String Methods Documentation](https://docs.python.org/3/library/stdtypes.html#string-methods) 11 | 12 | # Lists! 13 | 14 | A list is a sequence of values. The values in a list are called **elements** (sometimes **items**) and elements can be any type of data. For example: 15 | 16 | `random_list = ['Gauda is a cheese?', 32, True]` 17 | 18 | `random_list[-1]` # True 19 | 20 | `random_list[0]` # Gauda is a cheese? 21 | 22 | A list within another list is **nested** list. A list that contains no elements inside is called an **empty** list, for example: [] 23 | Lists are mutable (their content can be modified). 24 | 25 | # Slicing, in or not in. 26 | 27 | Slicing is used to create new lists that have the same values or parts of the values of the originals. 28 | 29 | When using slicing, it is important to remember that the lower index is `inclusive` and the upper index is 30 | `exclusive`. 31 | 32 | ![Slicing](slicing.png) 33 | ![Slicing start](slicing_start.png) 34 | 35 | ![Membership operators](membership_operators.png) 36 | 37 | # Mutability and oder. 38 | 39 | While lists are mutable and can be modified but strings (strings is an immutable data type) don't. Both strings and lists are ordered. -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/dictionaries.md: -------------------------------------------------------------------------------- 1 | # Dictionary. 2 | 3 | A dictionary is a mutable data type. In a list, the indices have to be integers; in a dictionary they can be (almost) any type. 4 | 5 | A dictionary stores pairs of elements **keys** and **values**. 6 | We can check whether a value is in a dictionary the same way we check whether a value is in a list or set with the `in` keyword. 7 | `get`is a related method, `get`looks up values in a dictionary and returns `None` if the key is not found or dafault value. 8 | ```python 9 | food_bill = {"milk": 2, "bread": 1.23, "apples": 4} 10 | 11 | food_bill ["cucumber"] = 1.25 # add element 12 | 13 | print(food_bill) 14 | print("tomatoes" in food_bill) 15 | print(food_bill.get("pear")) # return None 16 | 17 | # use is not to check if a key return None 18 | vegetables = food_bill.get("carrots") 19 | is_null = vegetables is None # or use: vegetables is not None 20 | print(is_null) 21 | ``` 22 | ![Identity operators](identity_operators.png) 23 | 24 | 25 | ```python 26 | elements.get('dilithium') 27 | None 28 | 29 | elements['dilithium'] 30 | KeyError: 'dilithium' 31 | 32 | elements.get('kryptonite', 'There\'s no such element!') 33 | "There's no such element!" 34 | ``` 35 | 36 | # Compound Data Structure. 37 | 38 | We can include containers in other containers to create compound data structures. 39 | Nested dictionary: 40 | ```python 41 | elements = {"hydrogen": {"number": 1, 42 | "weight": 1.00794, 43 | "symbol": "H"}, 44 | "helium": {"number": 2, 45 | "weight": 4.002602, 46 | "symbol": "He"}} 47 | print(elements['hydrogen']) 48 | print(elements['hydrogen']['number']) 49 | print(elements.get('zink', 'There is no such element!')) 50 | 51 | ``` 52 | Python practice links: 53 | [More practice](https://www.hackerrank.com/domains/python/py-introduction) 54 | [Python practice](https://www.codewars.com/users/sign_in) -------------------------------------------------------------------------------- /aggregations_lesson_30/case_statement.md: -------------------------------------------------------------------------------- 1 | Derive column take data from existing colimns and modify them. 2 | 3 | "CASE" statement handles "if", "Then" logic, is follwed by at least one pair of "When" and "Then" statements. Must end with the world "END". 4 | 5 | **CASE - Expert Tip** 6 | + The CASE statement always goes in the SELECT clause. 7 | 8 | + CASE must include the following components: WHEN, THEN, and END. ELSE is an optional component to catch cases that didn’t meet any of the other previous CASE conditions. 9 | 10 | + You can make any conditional statement using any conditional operator (like WHERE) between WHEN and THEN. This includes stringing together multiple conditional statements using AND and OR. 11 | 12 | + You can include multiple WHEN statements, as well as an ELSE statement again, to deal with any unaddressed conditions. 13 | 14 | Example 15 | In a quiz question in the previous Basic SQL lesson, you saw this question: 16 | 17 | Create a column that divides the standard_amt_usd by the standard_qty to find the unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields. NOTE - you will be thrown an error with the correct solution to this question. This is for a division by zero. You will learn how to get a solution without an error to this query when you learn about CASE statements in a later section. 18 | 19 | Let's see how we can use the CASE statement to get around this error. 20 | 21 | ``` 22 | SELECT id, account_id, standard_amt_usd/standard_qty AS unit_price 23 | FROM orders 24 | LIMIT 10; 25 | ``` 26 | 27 | Now, let's use a CASE statement. This way any time the standard_qty is zero, we will return 0, and otherwise we will return the unit_price. 28 | ``` 29 | SELECT account_id, CASE WHEN standard_qty = 0 OR standard_qty IS NULL THEN 0 30 | ELSE standard_amt_usd/standard_qty END AS unit_price 31 | FROM orders 32 | LIMIT 10; 33 | ``` 34 | 35 | Example: 36 | ``` 37 | SELECT CASE WHEN total > 500 THEN 'Over 500' 38 | ELSE '500 or under' END as total_group, 39 | COUNT(*) as order_count 40 | FROM orders 41 | GROUP BY 1; 42 | ``` 43 | 44 | Using `WHERE` clause means only being able to get one set of data at a time. 45 | 46 | ``` 47 | SELECT COUNT(1) as oredrs_ver_500_units 48 | FROM orders 49 | WHERE total > 500; 50 | ``` -------------------------------------------------------------------------------- /sql_data_cleaning_lesson32/data_cleaning.sql: -------------------------------------------------------------------------------- 1 | # LEFT and RIGHT QUIZZES. 2 | 3 | # 1. In the accounts table, there is a column holding the website for each company. 4 | # The last three digits specify what type of web address they are using. 5 | # Pull these extensions and provide how many of each website type exist in the 6 | # accounts table. 7 | 8 | 9 | SELECT RIGHT(website, 3) AS web_address, COUNT(*) num_companies 10 | FROM accounts 11 | GROUP BY 1 12 | ORDER BY 2 DESC; 13 | 14 | 15 | # 2. 16 | /* 17 | There is much debate about how much the name (or even the first letter of a company name) 18 | matters. Use the accounts table to pull the first letter of each company name to see the 19 | distribution of company names that begin with each letter (or number). 20 | */ 21 | 22 | SELECT LEFT(UPPER(name), 1) AS first_char, COUNT(*) num_companies 23 | FROM accounts 24 | GROUP BY 1 25 | ORDER BY 2 DESC; 26 | 27 | # 3. Use the accounts table and a CASE statement to create two groups: one group 28 | # of company names that start with a number and a second group of those company names that 29 | # start with a letter. What proportion of company names start with a letter? 30 | 31 | SELECT SUM(num) nums, SUM(letter) letters 32 | FROM (SELECT name, CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 33 | THEN 1 ELSE 0 END AS num, 34 | CASE WHEN LEFT(UPPER(name), 1) IN ('0','1','2','3','4','5','6','7','8','9') 35 | THEN 0 ELSE 1 END AS letter 36 | FROM accounts) t1; 37 | 38 | # or 39 | 40 | SELECT SUM(CASE WHEN LEFT(name, 1) LIKE '^[0-9]' THEN 1 ELSE 0 END) AS num, 41 | SUM(CASE WHEN LEFT(name, 1) LIKE '^[0-9]' THEN 0 ELSE 1 END) AS letter 42 | FROM accounts; 43 | 44 | 45 | # 4. Consider vowels as a, e, i, o, and u. What proportion of company names start with a vowel, 46 | # and what percent start with anything else? 47 | 48 | #There are 80 company names that start with a vowel and 271 that start with other characters. 49 | #Therefore 80/351 are vowels or 22.8%. Therefore, 77.2% of company names do not start with vowels. 50 | SELECT SUM(vowels) vowels, SUM(other) other 51 | FROM (SELECT name, CASE WHEN LEFT(UPPER(name), 1) IN ('A','E','I','O','U') 52 | THEN 1 ELSE 0 END AS vowels, 53 | CASE WHEN LEFT(UPPER(name), 1) IN ('A','E','I','O','U') 54 | THEN 0 ELSE 1 END AS other 55 | FROM accounts) t1; 56 | -------------------------------------------------------------------------------- /basic_sql_lesson28/notes.md: -------------------------------------------------------------------------------- 1 | # Basic SQL. 2 | 3 | One way to store data is to use spreadsheets. To visualize the relationships between spreadsheets using **ERD** (Entity Relationship Diagram). Each spreadsheet is represented on a table. At the top is a name of the table, below each column name is listed. For example: 4 | ![table, columns](table.png) 5 | 6 | SQL is a language used to interact with a database. It can query one table or across multiple tables. 7 | 8 | Database is a collection of tables that share connected data tored in a computer. 9 | 10 | Below is the ERD for the database we will use from Parch & Posey. These diagrams help you visualize the data you are analyzing including: 11 | 12 | 1. The names of the tables. 13 | 2. The columns in each table. 14 | 3. The way the tables work together. 15 | 4. You can think of each of the boxes below as a spreadsheet. 16 | ![Parch and Porsey DB](parch_posey_db.png) 17 | Note: glossy_qty is incorrect, it is actually gloss_qty in the database 18 | 19 | # Why SQL? 20 | 21 | **SQL** has a variety of functions that allows to read, manipulate and change data. Why **SQL** is so popular for data analyses: 22 | 23 | 1. **SQL** is easy to understand and learn. 24 | 2. Access data directly. 25 | 3. Easy to audit and copy data. 26 | 4. **SQL** can run queries on multiple tables at once, across large datasets. 27 | 5. You can do: sum, count, max, min.. 28 | 6. **SQL** is flexible compare to Google Analytics and Excel. 29 | 30 | **NoSQL** stands for not only **SQL**. **NoSQL** envirenments popular for web based data, but less popular for data that lives in spreedsheets. 31 | 32 | One of the most popular **NoSQL** database is **MongoDB**. Instead of storing data in tables made out of individual rows, like a relational database does, it stores data in collections made out of individual documents. 33 | 34 | ## Why Businesses like Databases? 35 | 36 | 1. Data integrity is ensured - only the data you want entered is entered, and only certain users are able to enter data into the database. 37 | 2. Data can be accessed quickly - SQL allows you to obtain results very quickly from the data stored in a database. 38 | 3. Data is easily shared - multiple individuals can access data stored in a database, and the data is the same for all users allowing for consistent results for anyone with access to your database. 39 | 40 | ## How DB store data? 41 | 42 | Data in DB is stored in tables. DB tables can be organized by column, each column must have a `unique name`. All dat in a column must be of the same type (don't mix string, text or numbers). 43 | 44 | Consistent column types are one of the main reasons working with db is fast. 45 | 46 | [Comparison of Relational 47 | Database](https://www.digitalocean.com/community/tutorials/sqlite-vs-mysql-vs-postgresql-a-comparison-of-relational-database-management-systems) -------------------------------------------------------------------------------- /functions_lesson_26/generator_quizzes.py: -------------------------------------------------------------------------------- 1 | # Quiz: Implement my_enumerate 2 | """ 3 | Write your own generator function that works like the built-in function enumerate. 4 | 5 | Calling the function like this: 6 | 7 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", 8 | "Functions", "Scripting"] 9 | 10 | for i, lesson in my_enumerate(lessons, 1): 11 | print("Lesson {}: {}".format(i, lesson)) 12 | 13 | should output: 14 | 15 | Lesson 1: Why Python Programming 16 | Lesson 2: Data Types and Operators 17 | Lesson 3: Control Flow 18 | Lesson 4: Functions 19 | Lesson 5: Scripting 20 | """ 21 | 22 | 23 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"] 24 | 25 | 26 | def my_enumerate(iterable, start=0): 27 | # Implement your generator function here 28 | for i in range(start, len(iterable) + start): 29 | yield(i, iterable[i-start]) 30 | 31 | for i, lesson in my_enumerate(lessons, 1): 32 | print("Lesson {}: {}".format(i, lesson)) 33 | 34 | 35 | # print 5 lessons 36 | 37 | lessons = ["Why Python Programming", "Data Types and Operators", "Control Flow", "Functions", "Scripting"] 38 | 39 | 40 | def my_enumerate(iterable, start=0): 41 | i = start 42 | for num in iterable: 43 | yield i, num 44 | i += 1 45 | 46 | for i, lesson in my_enumerate(lessons, 1): 47 | print("Lesson {}: {}".format(i, lesson)) 48 | 49 | 50 | # Quiz: Chunker 51 | 52 | """ 53 | If you have an iterable that is too large to fit in memory in full (e.g., 54 | when dealing with large files), being able to take and use chunks of it at a 55 | time can be very valuable. 56 | 57 | Implement a generator function, chunker, that takes in an iterable and yields 58 | a chunk of a specified size at a time. 59 | 60 | should output: 61 | 62 | [0, 1, 2, 3] 63 | [4, 5, 6, 7] 64 | [8, 9, 10, 11] 65 | [12, 13, 14, 15] 66 | [16, 17, 18, 19] 67 | [20, 21, 22, 23] 68 | [24] 69 | """ 70 | 71 | 72 | def chunker(iterable, size): 73 | for i in range(0, len(iterable), size): 74 | index = i + size 75 | lst = iterable[i:index] 76 | yield lst 77 | 78 | 79 | for chunk in chunker(range(25), 4): 80 | print(list(chunk)) 81 | 82 | 83 | # Udacity solution 84 | 85 | def chunker(iterable, size): 86 | """Yield successive chunks from iterable of length size.""" 87 | for i in range(0, len(iterable), size): 88 | yield iterable[i:i + size] 89 | 90 | for chunk in chunker(range(25), 4): 91 | print(list(chunk)) 92 | 93 | 94 | # Generator Expressions 95 | 96 | 97 | sq_list = [x**2 for x in range(10)] # this produces a list of squares 98 | 99 | sq_iterator = (x**2 for x in range(10)) # this produces an iterator of squares 100 | 101 | 102 | -------------------------------------------------------------------------------- /data_types_and_operators_lesson_24/list_methods_tuples_sets.md: -------------------------------------------------------------------------------- 1 | # List Methods. 2 | 3 | Python provide methods that operate on list, all this methods modify lists instead of creating a new list. 4 | 5 | **Useful** functions for list: 6 | 7 | 1. `l.append(v)` appends value v to the end of list l. 8 | 2. `l.insert(i, v)` inserts value v at index i in list l. 9 | 3. `l.reverse()` reverses the order of the values in list l. 10 | 4. `len()` returns how many elements are in a list. 11 | 5. `max()` returns the greatest element of the list. 12 | 6. `sorted()` returns a copy of a list in order from smallest to largest, leaving the list unchanged. 13 | 14 | 15 | # Join Method. 16 | 17 | `join` takes a list as an argument and returns a string consisting of the list elements joined by a separator string. `\n` is a separator for a new line between elements. 18 | ```python 19 | new_str = "\n".join(["ann", "get", "an", "umbrella"]) 20 | print(new_str) 21 | 22 | Output: 23 | 24 | ann 25 | get 26 | an 27 | umbrella 28 | ``` 29 | It is important to remember to separate each of the items in the list you are joining with a comma (,). Forgetting to do so will not trigger an error, but will also give you unexpected results. 30 | 31 | # Tuples. 32 | 33 | Tuple is a sequence of values, this values can be any type and they are indexed by integers and can be accessed by indicis. Tuples are immutable. you can't add and remove items from tuples, or sort them in place. 34 | 35 | They are often used to store related pieces of information (for example: latitude and longitude coordinates). Tuples also used to assign multiple variables in a compact way. 36 | 37 | Tuple unpacking used for signing information from a tuple into multiple variables without having to access them one by one and make multiple assignments statement. 38 | ```python 39 | dimensions = 52, 40, 100 40 | length, width, height = dimensions # tuple unpacking 41 | print("The dimensions are {} x {} x {}".format(length, width, height)) 42 | ``` 43 | 44 | ```python 45 | tuple_a = 1, 2 46 | tuple_b = (1, 2) 47 | 48 | print(tuple_a == tuple_b) 49 | print(tuple_a[1]) 50 | 51 | Output: 52 | True #Perenthesis are optional when making tuple. 53 | 2 54 | ``` 55 | 56 | # Sets. 57 | 58 | A set is an unordered collection of unique elements; any elements appears in a set at most once, there are no **duplicates**. Unordered means that elements are not sorted in any order. 59 | We can create a set from a list: 60 | ```python 61 | apples_set = set(apples) 62 | print(len(apples_set)) 63 | ``` 64 | 65 | Sets support the `in` operator the same as lists do. 66 | Set operations: 67 | `add` adds element to a `set`. 68 | `pop` remove a random element. 69 | 70 | ```python 71 | fruit = {"apple", "banana", "orange", "grapefruit"} # define a set 72 | 73 | print("watermelon" in fruit) # check for element 74 | 75 | fruit.add("watermelon") # add an element 76 | print(fruit) 77 | 78 | print(fruit.pop()) # remove a random element 79 | print(fruit) 80 | 81 | Output: 82 | False 83 | {'grapefruit', 'orange', 'watermelon', 'banana', 'apple'} 84 | grapefruit 85 | {'orange', 'watermelon', 'banana', 'apple'} 86 | ``` -------------------------------------------------------------------------------- /control_flow_lesson_25/control_flow_practice.py: -------------------------------------------------------------------------------- 1 | # Practice: Conditional Statement 2 | 3 | points = 174 # use this input to make your submission 4 | 5 | if points <= 50: 6 | result = "Congratulations! You won a wooden rabbit!" 7 | 8 | elif 51 <= points <= 150: 9 | result = "Oh dear, no prize this time." 10 | 11 | elif 151 <= points <= 180: 12 | result = "Congratulations! You won a wafer-thin mint!" 13 | 14 | else: 15 | result = "Congratulations! You won a penguin!" 16 | 17 | print(result) 18 | 19 | 20 | # Quiz: Guess My Number 21 | 22 | # You decide you want to play a game where you are hiding 23 | # a number from someone. Store this number in a variable 24 | # called 'answer'. Another user provides a number called 25 | # 'guess'. By comparing guess to answer, you inform the user 26 | # if their guess is too high or too low. 27 | 28 | answer = 10 # provide answer 29 | guess = 5 # provide guess 30 | 31 | if guess < answer: 32 | result = "Oops! Your guess was too low." 33 | elif guess > answer: 34 | result = "Oops! Your guess was too high." 35 | elif guess == answer: 36 | result = "Nice! Your guess matched the answer!" 37 | 38 | print(result) 39 | 40 | 41 | # Quiz: Tax Purchase 42 | 43 | # Depending on where an individual is from we need to tax them 44 | # appropriately. The states of CA, MN, and 45 | # NY have taxes of 7.5%, 9.5%, and 8.9% respectively. 46 | # Use this information to take the amount of a purchase and 47 | # the corresponding state to assure that they are taxed by the right 48 | # amount. 49 | 50 | state = 'CA' # Either CA, MN, or NY 51 | purchase_amount = 21 # amount of purchase 52 | 53 | if state == 'CA': 54 | tax_amount = .075 55 | total_cost = purchase_amount*(1+tax_amount) 56 | result = "Since you're from {}, your total cost is {}.".format(state, total_cost) 57 | 58 | elif state == 'MN': 59 | tax_amount = .095 60 | total_cost = purchase_amount*(1+tax_amount) 61 | result = "Since you're from {}, your total cost is {}.".format(state, total_cost) 62 | 63 | elif state == 'NY': 64 | tax_amount = .089 65 | total_cost = purchase_amount*(1+tax_amount) 66 | result = "Since you're from {}, your total cost is {}.".format(state, total_cost) 67 | 68 | print(result) 69 | 70 | 71 | # Quiz: Boolean Expressions for Conditions 72 | 73 | #You will use a new variable prize to store a prize name if one was won, and 74 | #then use the truth value of this variable to compose the result message. This 75 | #will involve two if statements. 76 | 77 | #1st conditional statement: update prize to the correct prize name based 78 | #on points. 79 | #2nd conditional statement: set result to the correct phrase based on whether 80 | #prize is evaluated as True or False. 81 | 82 | 83 | points = 174 84 | 85 | # establish the default prize value to None 86 | prize = None 87 | 88 | # use the points value to assign prizes to the correct prize names 89 | if points <= 50: 90 | prize = "wooden rabbit" 91 | elif 151 <= points <= 180: 92 | prize = "wafer-thin mint" 93 | elif 181 <= points <= 200: 94 | prize = "penguin" 95 | 96 | # use the truth value of prize to assign result to the correct prize 97 | if prize: 98 | result = "Congratulations! You won a {}!".format(prize) 99 | else: 100 | result = "Oh dear, no prize this time." 101 | 102 | print(result) 103 | -------------------------------------------------------------------------------- /functions_lesson_26/functions.py: -------------------------------------------------------------------------------- 1 | # Print vs. Return in Functions 2 | 3 | 4 | # this prints something, but does not return anything 5 | def show_plus_ten(num): 6 | print(num + 10) 7 | 8 | 9 | # this returns something 10 | def add_ten(num): 11 | return(num + 10) 12 | 13 | print('Calling show_plus_ten...') 14 | return_value_1 = show_plus_ten(5) 15 | print('Done calling') 16 | print('This function returned: {}'.format(return_value_1)) 17 | 18 | 19 | print('\nCalling add_ten...') 20 | return_value_2 = add_ten(10) 21 | print('Done calling') 22 | print('This function returned: {}'.format(return_value_2)) 23 | 24 | 25 | # Quiz: Population Density Function 26 | 27 | def population_density(population, land_area): 28 | return population / land_area 29 | 30 | # test cases for your function 31 | test1 = population_density(10, 1) 32 | expected_result1 = 10 33 | print("expected result: {}, actual result: {}".format(expected_result1, test1)) 34 | 35 | test2 = population_density(864816, 121.4) 36 | expected_result2 = 7123.6902801 37 | print("expected result: {}, actual result: {}".format(expected_result2, test2)) 38 | 39 | 40 | # Quiz: readable_timedelta 41 | 42 | def readable_timedelta(days): 43 | """ 44 | Return a string of the number of weeks and days included in days. 45 | 46 | Parameters: 47 | days -- number of days to convert (int) 48 | 49 | Returns: 50 | string of the number of weeks and days included in days 51 | """ 52 | 53 | week = days // 7 54 | # % to get the number of days that remain 55 | day = days % 7 56 | return "{} week(s) and {} day(s).".format(week, day) 57 | 58 | print(readable_timedelta(6)) 59 | 60 | # Variable scope 61 | 62 | egg_count = 0 63 | 64 | def buy_eggs(count): 65 | return count + 12 # purchase a dozen eggs 66 | 67 | egg_count = buy_eggs(egg_count) 68 | 69 | 70 | # Quiz: Lambda with Map 71 | # Rewrite this code to be more concise by replacing the mean function with a 72 | # lambda expression defined within the call to map(). 73 | 74 | numbers = [ 75 | [34, 63, 88, 71, 29], 76 | [90, 78, 51, 27, 45], 77 | [63, 37, 85, 46, 22], 78 | [51, 22, 34, 11, 18] 79 | ] 80 | 81 | 82 | def mean(num_list): 83 | return sum(num_list) / len(num_list) 84 | 85 | averages = list(map(mean, numbers)) 86 | print(averages) 87 | 88 | # With lambda: 89 | 90 | numbers = [ 91 | [34, 63, 88, 71, 29], 92 | [90, 78, 51, 27, 45], 93 | [63, 37, 85, 46, 22], 94 | [51, 22, 34, 11, 18] 95 | ] 96 | 97 | averages = list(map(lambda x: sum(x) / len(x), numbers)) 98 | 99 | print(averages) 100 | 101 | 102 | # Quiz: Lambda with Filter 103 | # Rewrite this code to be more concise by replacing the is_short function with 104 | # a lambda expression defined within the call to filter() 105 | 106 | cities = ["New York City", "Los Angeles", "Chicago", "Mountain View", "Denver", "Boston"] 107 | 108 | def is_short(name): 109 | return len(name) < 10 110 | 111 | short_cities = list(filter(is_short, cities)) 112 | print(short_cities) 113 | 114 | 115 | # With lambda 116 | cities = ["New York City", "Los Angeles", "Chicago", "Mountain View", "Denver", "Boston"] 117 | 118 | short_cities = list(filter(lambda city: len(city) < 10, cities)) 119 | 120 | print(short_cities) -------------------------------------------------------------------------------- /intro_to_research_methods_lessons_1_5/terminology_intro_to_research_methods.md: -------------------------------------------------------------------------------- 1 | # Constructs, Variables, Operational definition. 2 | 3 | **Construct** is a variable that is not directly observable or measurable. But once a construct has been operationally 4 | defined, variables are created. Examples of Construct: effort, itchiness, hunger, maturity, wisdom... 5 | 6 | 7 | |Construct | Operational definition | 8 | | :---: | :---: | 9 | |`Stress` | Level of cortisol (stress hormone) | 10 | |`Hunger` | Gramms of food consumed | 11 | |`Effort` | Minutes spent studying for an exam | 12 | 13 | **Operational definition** describes how researcher decide to measure the variables (in our case construct) in a study. It also 14 | helps you to measure constructs in the real world by turning them into measurable variables 15 | 16 | **Hypothesis** is a statementabout the relationship between the variables. 17 | 18 | All experiments/researches examine some kind of variable(s). A variable is not only something that we measure, but also something that we can manipulate and something we can control for. 19 | 20 | 1. Dependent Variable or Outcome, or y-variable. 21 | - Is a variable that is dependent on an independent variable(s). 22 | 23 | 2. Independent Variable sometimes called Experimental Variable or Manipulated Variable, or Predicted, or x-variable. 24 | - Is a variable that is being manipulated in an experiment in order to observe the effect on a Dependent Variable, sometimes called an Outcome Variable. 25 | 26 | 3. Lurking Variables or Extraneous factors are variables/factors that can impact the Outcome/Dependent Variable. 27 | 28 | 29 | # Sample, population. 30 | 31 | **Population (or mu)** are values that describe the entire population. 32 | A `parameter` is any numerical quantity that characterizes a given population or some aspect of it. This means the parameter tells us something about the whole population. Example of parameters: standard deviation, population mean (average) 33 | `N` is a population size. 34 | `mu` is an average (or a mean) of the entire population. 35 | 36 | **Sample (or X-bar)** are portions of a population selected for the study. A measurable characteristic of a sample is called a `statistic`. 37 | `n` is a number of a sample. 38 | X-bar is an sample average (or a mean) of the population. 39 | 40 | ![population vs sample](population_sample.png) 41 | 42 | # Sampling designs. 43 | 44 | **Random sample** means that each element in the population has an equal chance of being included to the sample. 45 | 46 | **Random selection (or sampling)** is a randomly choosing a sample from a population. 47 | 48 | **Convenience selection (or sampling)** selections is based on easy availability/accessibility of elements; doesn't represent entire population 49 | 50 | 51 | # Sampling error. 52 | 53 | **Samplig error** the difference between a population parameter and a sample statistic used to estimate it. Sampling error occurs because a portion, and not the entire population, is surveyed. 54 | 55 | Sampling error formula: 56 | - `mu - X-bar` or `X-bar - mu` where `mu` is a population average and `X-bar` is a sample average 57 | 58 | ![sampling_error](sampling_error.png) 59 | 60 | 61 | # Bias. 62 | 63 | Bias - any systematic failure of a sample to represent its population. 64 | The most common is called a **simple random bias**. The best way to avoid random bias is to select elements for the sample at random. 65 | **Non-response bias** occurs when individuals randomly sampled for a survey fail to respond, cannot respond or decline to participate. 66 | 67 | 68 | Links: 69 | - [Samplig error][1] 70 | - [Estimation of a population][2] 71 | - [OpenIntro Statistics Second Edition by Christopher D. Barr, David M. Diez, and Mine Çetinkaya-Rundel][3] 72 | 73 | [1]: https://www.britannica.com/science/sampling-error 74 | [2]: https://www.britannica.com/science/statistics/Estimation-of-a-population-mean#ref367452 75 | [3]: https://www.openintro.org/stat/textbook.php?stat_book=os -------------------------------------------------------------------------------- /aggregations_lesson_30/cas_statement.sql: -------------------------------------------------------------------------------- 1 | # 1. Quiz: CASE 2 | 3 | /* 4 | We would like to understand 3 different levels of customers based on the amount associated with their purchases. 5 | The top branch includes anyone with a Lifetime Value (total sales of all orders) greater than 200,000 usd. 6 | The second branch is between 200,000 and 100,000 usd. The lowest branch is anyone under 100,000 usd. 7 | Provide a table that includes the level associated with each account. You should provide the account name, 8 | the total sales of all orders for the customer, and the level. Order with the top spending customers listed first. 9 | */ 10 | 11 | SELECT a.name, 12 | SUM(o.total_amt_usd), 13 | CASE WHEN SUM(o.total_amt_usd) > 200000 THEN 'Over 200,000' 14 | WHEN SUM(o.total_amt_usd) > 100000 THEN 'Over 100,000 ' 15 | ELSE 'Less 100,000' END as total_level 16 | FROM orders o 17 | JOIN accounts a on a.id = o.account_id 18 | GROUP BY a.name 19 | ORDER BY 2 DESC; 20 | 21 | 22 | # 2. Quiz: CASE 23 | 24 | /* 25 | We would now like to perform a similar calculation to the first, but we want to obtain the total amount spent 26 | by customers only in 2016 and 2017. Keep the same levels as in the previous question. Order with the top spending 27 | customers listed first. 28 | */ 29 | 30 | SELECT DATE_TRUNC('year', o.occurred_at) as year, 31 | a.name, 32 | SUM(o.total_amt_usd), 33 | CASE WHEN SUM(o.total_amt_usd) > 200000 THEN 'Over 200,000' 34 | WHEN SUM(o.total_amt_usd) > 100000 THEN 'Over 100,000 ' 35 | ELSE 'Less 100,000' END as total_level 36 | FROM orders o 37 | JOIN accounts a on a.id = o.account_id 38 | WHERE o.occurred_at BETWEEN '2016-01-01' and '2017-12-31' 39 | GROUP BY a.name, year 40 | ORDER BY 3 DESC; 41 | 42 | # 2. Udacity solution 43 | 44 | SELECT a.name, SUM(total_amt_usd) total_spent, 45 | CASE WHEN SUM(total_amt_usd) > 200000 THEN 'top' 46 | WHEN SUM(total_amt_usd) > 100000 THEN 'middle' 47 | ELSE 'low' END AS customer_level 48 | FROM orders o 49 | JOIN accounts a 50 | ON o.account_id = a.id 51 | WHERE occurred_at > '2015-12-31' 52 | GROUP BY 1 53 | ORDER BY 2 DESC; 54 | 55 | # 3. Quiz: CASE 56 | 57 | /* 58 | We would like to identify top performing sales reps, which are sales reps associated with more than 200 orders. 59 | Create a table with the sales rep name, the total number of orders, and a column with top or not depending on if 60 | they have more than 200 orders. Place the top sales people first in your final table. 61 | */ 62 | 63 | SELECT s.name, 64 | COUNT(*) as number_of_orders, 65 | CASE WHEN COUNT(*) > 200 THEN 'top' 66 | ELSE 'not' END as sales_level 67 | FROM orders o 68 | JOIN accounts a on o.account_id = a.id 69 | JOIN sales_reps s on a.sales_rep_id = s.id 70 | GROUP BY s.name 71 | ORDER BY 2 DESC; 72 | 73 | # 4. Quiz: CASE 74 | 75 | /* 76 | The previous didn't account for the middle, nor the dollar amount associated with the sales. Management 77 | decides they want to see these characteristics represented as well. We would like to identify top performing 78 | sales reps, which are sales reps associated with more than 200 orders or more than 750000 in total sales. 79 | The middle group has any rep with more than 150 orders or 500000 in sales. Create a table with the sales rep name, 80 | the total number of orders, total sales across all orders, and a column with top, middle, or low depending on this 81 | criteria. Place the top sales people based on dollar amount of sales first in your final table. 82 | */ 83 | 84 | SELECT s.name, 85 | COUNT(*) as number_of_orders, 86 | SUM(total_amt_usd) as total_usd, 87 | CASE WHEN COUNT(*) > 200 and SUM(total_amt_usd) > 750000 THEN 'top' 88 | WHEN COUNT(*) > 150 and SUM(total_amt_usd) > 500000 THEN 'middle' 89 | ELSE 'not' END as sales_level 90 | FROM orders o 91 | JOIN accounts a on o.account_id = a.id 92 | JOIN sales_reps s on a.sales_rep_id = s.id 93 | GROUP BY s.name 94 | ORDER BY sales_level DESC; 95 | 96 | # 4. Udacity Solution 97 | 98 | SELECT s.name, COUNT(*), SUM(o.total_amt_usd) total_spent, 99 | CASE WHEN COUNT(*) > 200 OR SUM(o.total_amt_usd) > 750000 THEN 'top' 100 | WHEN COUNT(*) > 150 OR SUM(o.total_amt_usd) > 500000 THEN 'middle' 101 | ELSE 'low' END AS sales_rep_level 102 | FROM orders o 103 | JOIN accounts a 104 | ON o.account_id = a.id 105 | JOIN sales_reps s 106 | ON s.id = a.sales_rep_id 107 | GROUP BY s.name 108 | ORDER BY 3 DESC; -------------------------------------------------------------------------------- /sql_subqueries_temporary_table_lesson31/subqueries_tasks.sql: -------------------------------------------------------------------------------- 1 | # 1. Quiz 2 | # Find the number f events that occur for each day or each channel 3 | 4 | SELECT DATE_TRUNC('day', occurred_at) as day, 5 | channel, 6 | COUNT(*) as events_count 7 | FROM web_events 8 | GROUP BY day, channel 9 | ORDER BY events_count DESC; 10 | 11 | 12 | # 2. Quiz 13 | # Create a subquery that provides all of the data rom your first query. 14 | 15 | SELECT * 16 | FROM 17 | (SELECT DATE_TRUNC('day', occurred_at) as day, 18 | channel, 19 | COUNT(*) as events_count 20 | FROM web_events 21 | GROUP BY day, channel 22 | ORDER BY events_count DESC) sub; 23 | 24 | 25 | # 3. Quiz 26 | # Find the average number of events or each channel. 27 | 28 | SELECT channel, 29 | AVG(events_count) as avg_events_count 30 | FROM 31 | (SELECT DATE_TRUNC('day', occurred_at) as day, 32 | channel, 33 | COUNT(*) as events_count 34 | FROM web_events 35 | GROUP BY day, channel) sub 36 | GROUP BY channel 37 | ORDER BY avg_events_count DESC; 38 | 39 | # More on subqueries: 40 | 41 | # pull the first month/year combo from the orders table 42 | 43 | SELECT DATE_TRUNC('month', MIN(occurred_at)) AS min_month 44 | FROM web_events; 45 | 46 | # pull the average for each. Total result 47 | 48 | SELECT SUM(total_amt_usd) 49 | FROM orders 50 | WHERE DATE_TRUNC('month', occurred_at) = 51 | (SELECT DATE_TRUNC('month', MIN(occurred_at)) FROM orders); 52 | 53 | # Result per each kind of a peper 54 | 55 | SELECT AVG(standard_qty) as avg_standard, 56 | AVG(gloss_qty) as avg_gloss, 57 | AVG(poster_qty) as avg_poster 58 | FROM orders 59 | WHERE DATE_TRUNC('month', occurred_at) = 60 | (SELECT DATE_TRUNC('month', MIN(occurred_at)) AS min 61 | FROM orders); 62 | 63 | 64 | # QUIZ: Subquery Mania 65 | # 1. Provide the name of the sales_rep in each region with the largest amount of total_amt_usd sales. 66 | 67 | SELECT s.name, s.region_id, MAX(o.total_amt_usd) as max_total 68 | FROM sales_reps s 69 | JOIN accounts a ON s.id = a.sales_rep_id 70 | JOIN orders o ON a.id = o.account_id 71 | GROUP BY s.name, s.region_id 72 | ORDER BY max_total DESC; 73 | 74 | # 2. For the region with the largest (sum) of sales total_amt_usd, how many total (count) orders were placed? 75 | 76 | SELECT s.name, r.name, SUM(o.total_amt_usd) as total_amt_usd, COUNT(total) total_orders 77 | FROM region r 78 | JOIN sales_reps s ON r.id = s.region_id 79 | JOIN accounts a ON s.id = a.sales_rep_id 80 | JOIN orders o ON a.id = o.account_id 81 | GROUP BY s.name, r.name 82 | ORDER BY total_amt_usd DESC; 83 | 84 | # 3. For the name of the account that purchased the most (in total over their lifetime as a customer) 85 | # standard_qty paper, how many accounts still had more in total purchases? 86 | 87 | SELECT a.name, w.channel, COUNT(w.id) 88 | FROM accounts a 89 | JOIN web_events w ON a.id = w.account_id 90 | GROUP BY 1, 2 91 | HAVING a.name = (SELECT customer 92 | FROM (SELECT a.name AS customer, SUM(o.total_amt_usd) AS total_usd 93 | FROM accounts a 94 | JOIN orders o ON a.id = o.account_id 95 | GROUP BY 1 96 | ORDER BY 2 DESC 97 | LIMIT 1) t1) 98 | ORDER BY 3 DESC; 99 | 100 | # 4. For the customer that spent the most (in total over their lifetime as 101 | # a customer) total_amt_usd, how many web_events did they have for each channel? 102 | 103 | SELECT * 104 | FROM (SELECT a.name, w.channel, COUNT(w.channel) 105 | FROM web_events w 106 | JOIN accounts a ON a.id = w.account_id 107 | GROUP BY a.name, w.channel) t1 108 | JOIN (SELECT a.name, sum(o.total_amt_usd) total_usd 109 | FROM accounts a 110 | JOIN orders o ON a.id = o.account_id 111 | GROUP BY a.name 112 | ORDER BY total_usd DESC 113 | LIMIT 1) t2 114 | ON t1.name = t2.name 115 | 116 | 117 | # 5. What is the lifetime average amount spent in terms 118 | # of total_amt_usd for the top 10 total spending accounts? 119 | 120 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 121 | FROM orders o 122 | JOIN accounts a 123 | ON a.id = o.account_id 124 | GROUP BY a.id, a.name 125 | ORDER BY 3 DESC 126 | LIMIT 10; 127 | 128 | # average of 10 amounts 129 | 130 | SELECT AVG(total_spent) 131 | FROM (SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 132 | FROM orders o 133 | JOIN accounts a 134 | ON a.id = o.account_id 135 | GROUP BY a.id, a.name 136 | ORDER BY 3 DESC 137 | LIMIT 10) temp; 138 | 139 | # 6. What is the lifetime average amount spent in terms 140 | # of total_amt_usd for only the companies that spent more 141 | # than the average of all orders. 142 | 143 | SELECT AVG(avg_amt_usd) 144 | FROM (SELECT o.account_id, AVG(o.total_amt_usd) as avg_amt_usd 145 | FROM orders o 146 | GROUP BY 1 147 | HAVING AVG(o.total_amt_usd) > 148 | (SELECT AVG(total_amt_usd) 149 | FROM orders)) temp_table; 150 | -------------------------------------------------------------------------------- /control_flow_lesson_25/while_loops.py: -------------------------------------------------------------------------------- 1 | # while Loops 2 | 3 | # 1.Practice: Water Falls 4 | 5 | # Print string vertical. 6 | 7 | print_str = "Water falls" 8 | 9 | # initialize a counting variable "i" to 0 10 | i = 0 11 | 12 | # write your while header line, comparing "i" to the length of the string 13 | while i < len(print_str): 14 | #print out the current character from the string 15 | print(print_str[i]) 16 | 17 | #increment counter variable in the body of the loop 18 | i = i + 1 19 | 20 | #print(print_str) 21 | 22 | 23 | # 2.Practice: Factorials with While Loops 24 | 25 | """ 26 | Find the Factorial of a Number, using While Loop. 27 | 28 | A factorial of a whole number is that number multiplied by every whole number 29 | between itself and 1. For example, 6 factorial (written "6!") 30 | equals 6 x 5 x 4 x 3 x 2 x 1 = 720. So 6! = 720. 31 | 32 | We can write a while loop to take any given number, and figure out what its 33 | factorial is. 34 | 35 | Example: If number is 6, your code should compute and print the product of 720: 36 | """ 37 | 38 | number = 6 39 | product = number 40 | 41 | while number > 1: 42 | number = number - 1 43 | product = product * number 44 | 45 | print(product) 46 | 47 | 48 | 49 | 50 | # 3.Practice: Factorials with For Loops 51 | # Now use a For Loop to Find the Factorial! 52 | 53 | number = 6 54 | # We'll start with the product equal to the number 55 | product = number 56 | 57 | # Write a for loop that calculates the factorial of our number 58 | for num in range(1, number): 59 | if num > 1: 60 | number = number - 1 61 | product = product * number 62 | 63 | print(product) 64 | 65 | # another solution without if statement 66 | for num in range(1, number): 67 | product *= num 68 | 69 | print(product) 70 | 71 | 72 | # Quiz: Count by 73 | # 74 | # 1. Suppose you want to count from some number start_num by another number 75 | # count_by until you hit a final number end_num. Use break_num as the variable 76 | # that you'll change each time through the loop. 77 | 78 | start_num = 2 # start number 79 | end_num = 66 # end number that you stop when you hit 80 | count_by = 3 # some number to count by 81 | 82 | break_num = start_num 83 | while break_num < end_num: 84 | break_num = break_num + count_by 85 | 86 | print(break_num) 87 | 88 | 89 | # 2. Now in addition, address what would happen if someone gives a start_num 90 | # that is greater than end_num. If this is the case, set result to "Oops! Looks 91 | # like your start value is greater than the end value. Please try again." 92 | # Otherwise, set result to the value of break_num. 93 | 94 | start_num = 2 # some start number 95 | end_num = 22 # some end number that you stop when you hit 96 | count_by = 3 # some number to count by 97 | 98 | # condition to check that end_num is larger than start_num before looping 99 | 100 | if start_num > end_num: 101 | result = "Oops! Looks like your start value is greater than the end value. Please try again." 102 | 103 | else: 104 | break_num = start_num 105 | while break_num < end_num: 106 | break_num += count_by 107 | result = break_num 108 | 109 | print(result) 110 | 111 | 112 | # 3. Write a while loop that finds the largest square number less than an 113 | # integerlimit and stores it in a variable nearest_square 114 | 115 | limit = 40 116 | 117 | count = 1 118 | nearest_square = 1 119 | 120 | while (count + 1) ** 2 < limit: 121 | count = count + 1 122 | nearest_square = count ** 2 123 | #print(nearest_square) # to print all possible nearest square 124 | 125 | print(nearest_square) 126 | 127 | # Break and Continue 128 | 129 | manifest = [("bananas", 15), ("mattresses", 24), ("dog kennels", 42), ("machine", 120), ("cheeses", 5)] 130 | 131 | # the code breaks the loop when weight exceeds or reaches the limit 132 | print("METHOD 1") 133 | weight = 0 134 | items = [] 135 | for cargo_name, cargo_weight in manifest: 136 | print("current weight: {}".format(weight)) 137 | if weight >= 100: 138 | print(" breaking loop now!") 139 | break 140 | else: 141 | print(" adding {} ({})".format(cargo_name, cargo_weight)) 142 | items.append(cargo_name) 143 | weight += cargo_weight 144 | 145 | print("\nFinal Weight: {}".format(weight)) 146 | print("Final Items: {}".format(items)) 147 | 148 | # skips an iteration when adding an item would exceed the limit 149 | # breaks the loop if weight is exactly the value of the limit 150 | print("\nMETHOD 2") 151 | weight = 0 152 | items = [] 153 | for cargo_name, cargo_weight in manifest: 154 | print("current weight: {}".format(weight)) 155 | if weight >= 100: 156 | print(" breaking from the loop now!") 157 | break 158 | elif weight + cargo_weight > 100: 159 | print(" skipping {} ({})".format(cargo_name, cargo_weight)) 160 | continue 161 | else: 162 | print(" adding {} ({})".format(cargo_name, cargo_weight)) 163 | items.append(cargo_name) 164 | weight += cargo_weight 165 | 166 | print("\nFinal Weight: {}".format(weight)) 167 | print("Final Items: {}".format(items)) 168 | 169 | 170 | -------------------------------------------------------------------------------- /basic_sql_lesson28/syntax_sql.md: -------------------------------------------------------------------------------- 1 | ## Types of Statements. 2 | 3 | The SQL laguage has a few different elements, the most basic of which is a statements. `Statements` tell the db what you'd like to do with the data. 4 | 5 | `CREATE TABLE` is a statement that creates a new table in a db, changes the data in a db. 6 | 7 | `DROP TABLE` removes a table in a db, changes the data in a db. 8 | 9 | `SELECT` allows to read data and displays it. Select statements are commonly referred as **queries**. 10 | 11 | ## SELECT and FROM 12 | 13 | In order to generate the list of all orders, write a SELECT statement. 14 | 15 | `SELECT` is where you tell the query what columns you want back. Column names are separated by commas with no comma after the last column name. 16 | 17 | `SELECT *` select with asterik means select all. 18 | 19 | `FROM` is where you tell the query what table you are querying from. Notice the columns need to exist in this table. 20 | 21 | Both SELECT and FROM clauses are mandatory. 22 | 23 | ## Formatting. 24 | 25 | It is common practice to capitalize commands (SELECT, FROM). This makes queries easier to read, which will matter more as you write more complex queries. 26 | 27 | It is common to use underscores and avoid spaces in column names. It is a bit annoying to work with spaces in SQL. 28 | 29 | SQL is not case sensitive. But it's a good habits to capitalize commands. 30 | 31 | Depending on your SQL environment, your query may need a semicolon at the end to execute. Other environments are more flexible in terms of this being a "requirement." 32 | 33 | Best practice: 34 | 35 | ``` 36 | SELECT column 37 | 38 | FROM table; 39 | ``` 40 | 41 | ## LIMIT 42 | 43 | LIMIT statement is used to retrieve records from one or more tables in a database and limit the number of records returned based on a limit value. 44 | 45 | ``` 46 | SELECT * 47 | FROM table 48 | LIMIT 10; 49 | ``` 50 | 51 | ## ORDER BY 52 | 53 | ORDER BY statement allows to order table by any row. It goes between the FROM and LIMIT clauses. By default ORDER BY goes from `a to z`, lowest to highest or earliest to latest if working with dates. This is referred to as ascending order. 54 | 55 | To sort in descending order, add DESC (from biggest to lowest) after the column in ORDER BY statement. 56 | 57 | ## WHERE 58 | 59 | WHERE statement allows to filter a set of results based on specific criteria. WHERE claus goes after FROM but before ORDER BY or LIMIT. 60 | 61 | Comparison operators: 62 | ``` 63 | > (greater than) 64 | 65 | < (less than) 66 | 67 | >= (greater than or equal to) 68 | 69 | <= (less than or equal to) 70 | 71 | = (equal to) 72 | 73 | != (not equal to) 74 | ``` 75 | 76 | ## WHERE with Non-Numerical Data. 77 | 78 | Comparison operators can work with non-numerical data as well. If you're using an operator with values that are non-numerical you'll need to put the value in single quotes. 79 | 80 | ## Arithmetic Operators 81 | 82 | **Derived Column** a new column that is a manipulation of the existing columns in your db. 83 | Can include simple arithmetic or any number of advanced conculations. 84 | ``` 85 | * (Multiplication) 86 | 87 | + (Addition) 88 | 89 | - (Subtraction) 90 | 91 | / (Division) 92 | ``` 93 | 94 | To rename a derived column: add AS to the end of the line that produced the derived column 95 | and give then it a name: 96 | 97 | ``` 98 | glossy_qty + poster_qty AS nonstandard_qty 99 | ``` 100 | 101 | ## Logical Operators 102 | 103 | 1. LIKE 104 | This allows you to perform operations similar to using WHERE and =, but for cases when you might not know exactly what you are looking for. 105 | 106 | 2. IN 107 | This allows you to perform operations similar to using WHERE and =, but for more than one condition. 108 | 109 | 3. NOT 110 | This is used with IN and LIKE to select all of the rows NOT LIKE or NOT IN a certain condition. 111 | 112 | 4. AND & BETWEEN 113 | These allow you to combine operations where all combined conditions must be true. 114 | 115 | 5. OR 116 | This allow you to combine operations where at least one of the combined conditions must be true. 117 | 118 | ## LIKE 119 | 120 | The `LIKE` operator is exremely useful working with text. Use LIKE within a WHERE clause. 121 | The LIKE operator is frequently used with '%Example%' or 'S%' or '%s'. 122 | 123 | ## IN 124 | 125 | The `IN` operator is useful for working with both numeric and text columns. This operators allows you to use `=` but for more than one item 126 | of that particular column and all within the same query. 127 | 128 | `IN` requaries single quotation marks around **non-numerical data**, **numerical data** can be entered directly. 129 | 130 | ## NOT 131 | 132 | The `NOT` operator useful for working with the `IN` and `LIKE` operators. By specifying `NOT IN` and `NOT LIKE` we can grab all of the rows 133 | that don't meet a particular criteria. 134 | 135 | `NOT` provides the inverse results for IN, LIKE and similar operators. 136 | 137 | ## AND and BETWEEN 138 | The `AND` operator is used within a WHERE statement to consider more than one logical clause at a time. Each time you link a new statement with an AND, you will need to specify the column you are interested in looking at. You may link as many statements as you would like to consider at the same time. This operator works with all of the operations we have seen so far including arithmetic operators (+, *, -, /). LIKE, IN, and NOT logic can also be linked together using the AND operator. 139 | The `BETWEEN` operator: 140 | ``` 141 | WHERE column BETWEEN 6 AND 10 142 | ``` 143 | The same as: 144 | ``` 145 | WHERE column >= 6 AND column <= 10 146 | ``` 147 | 148 | 149 | ## OR 150 | 151 | `OR` is a logical operator in SQL that allows to select rows that satisfy either of two conditions. It works similary to `AND` which select the rows that satisfy both of 2 conditions. `OR` works with all the operations including arithmetic operators (+, -, *, /). When combining multiple of these operations, might need to use **parentheses** to assure that the logic you want to perform is being executed correctly. -------------------------------------------------------------------------------- /sql_subqueries_temporary_table_lesson31/with_vs_subquery.sql: -------------------------------------------------------------------------------- 1 | # You need to find the average number of events for each channel per day. 2 | 3 | 4 | SELECT channel, AVG(events) AS average_events 5 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day, 6 | channel, COUNT(*) as events 7 | FROM web_events 8 | GROUP BY 1,2) sub 9 | GROUP BY channel 10 | ORDER BY 2 DESC; 11 | 12 | 13 | # Using with 14 | 15 | # Notice, you can pull the inner query: 16 | 17 | SELECT DATE_TRUNC('day',occurred_at) AS day, 18 | channel, COUNT(*) as events 19 | FROM web_events 20 | GROUP BY 1,2 21 | 22 | # This is the part we put in the WITH statement. 23 | # Notice, we are aliasing the table as events below: 24 | 25 | WITH events AS ( 26 | SELECT DATE_TRUNC('day',occurred_at) AS day, 27 | channel, COUNT(*) as events 28 | FROM web_events 29 | GROUP BY 1,2) 30 | 31 | # Now, we can use this newly created events table as if it is any 32 | # other table in our database: 33 | 34 | WITH events AS ( 35 | SELECT DATE_TRUNC('day',occurred_at) AS day, 36 | channel, COUNT(*) as events 37 | FROM web_events 38 | GROUP BY 1,2) 39 | 40 | SELECT channel, AVG(events) AS average_events 41 | FROM events 42 | GROUP BY channel 43 | ORDER BY 2 DESC; 44 | 45 | 46 | # For the above example, we don't need anymore than the one additional table, 47 | # but imagine we needed to create a second table to pull from. We can create 48 | # an additional table to pull from in the following way: 49 | 50 | WITH table1 AS ( 51 | SELECT * 52 | FROM web_events), 53 | 54 | table2 AS ( 55 | SELECT * 56 | FROM accounts) 57 | 58 | 59 | SELECT * 60 | FROM table1 61 | JOIN table2 62 | ON table1.account_id = table2.id; 63 | 64 | 65 | # QUIZ: WITH 66 | 67 | # Provide the name of the sales_rep in each region with the largest amount of 68 | # total_amt_usd sales. 69 | 70 | WITH t1 AS ( 71 | SELECT s.name rep_name, r.name region_name, SUM(o.total_amt_usd) total_amt 72 | FROM sales_reps s 73 | JOIN accounts a 74 | ON a.sales_rep_id = s.id 75 | JOIN orders o 76 | ON o.account_id = a.id 77 | JOIN region r 78 | ON r.id = s.region_id 79 | GROUP BY 1,2 80 | ORDER BY 3 DESC), 81 | t2 AS ( 82 | SELECT region_name, MAX(total_amt) total_amt 83 | FROM t1 84 | GROUP BY 1) 85 | SELECT t1.rep_name, t1.region_name, t1.total_amt 86 | FROM t1 87 | JOIN t2 88 | ON t1.region_name = t2.region_name AND t1.total_amt = t2.total_amt; 89 | 90 | # For the region with the largest sales total_amt_usd, how many total orders were placed? 91 | 92 | WITH t1 AS ( 93 | SELECT r.name region_name, SUM(o.total_amt_usd) total_amt 94 | FROM sales_reps s 95 | JOIN accounts a 96 | ON a.sales_rep_id = s.id 97 | JOIN orders o 98 | ON o.account_id = a.id 99 | JOIN region r 100 | ON r.id = s.region_id 101 | GROUP BY r.name), 102 | t2 AS ( 103 | SELECT MAX(total_amt) 104 | FROM t1) 105 | SELECT r.name, COUNT(o.total) total_orders 106 | FROM sales_reps s 107 | JOIN accounts a 108 | ON a.sales_rep_id = s.id 109 | JOIN orders o 110 | ON o.account_id = a.id 111 | JOIN region r 112 | ON r.id = s.region_id 113 | GROUP BY r.name 114 | HAVING SUM(o.total_amt_usd) = (SELECT * FROM t2); 115 | 116 | # For the account that purchased the most (in total over their lifetime as a 117 | # customer) standard_qty paper, how many accounts still had more in total 118 | # purchases? 119 | 120 | WITH t1 AS ( 121 | SELECT a.name account_name, SUM(o.standard_qty) total_std, SUM(o.total) total 122 | FROM accounts a 123 | JOIN orders o 124 | ON o.account_id = a.id 125 | GROUP BY 1 126 | ORDER BY 2 DESC 127 | LIMIT 1), 128 | t2 AS ( 129 | SELECT a.name 130 | FROM orders o 131 | JOIN accounts a 132 | ON a.id = o.account_id 133 | GROUP BY 1 134 | HAVING SUM(o.total) > (SELECT total FROM t1)) 135 | SELECT COUNT(*) 136 | FROM t2; 137 | 138 | 139 | #For the customer that spent the most (in total over their lifetime as a 140 | #customer) total_amt_usd, how many web_events did they have for each channel? 141 | 142 | WITH t1 AS ( 143 | SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent 144 | FROM orders o 145 | JOIN accounts a 146 | ON a.id = o.account_id 147 | GROUP BY a.id, a.name 148 | ORDER BY 3 DESC 149 | LIMIT 1) 150 | SELECT a.name, w.channel, COUNT(*) 151 | FROM accounts a 152 | JOIN web_events w 153 | ON a.id = w.account_id AND a.id = (SELECT id FROM t1) 154 | GROUP BY 1, 2 155 | ORDER BY 3 DESC; 156 | 157 | # What is the lifetime average amount spent in terms of total_amt_usd for the 158 | # top 10 total spending accounts? 159 | 160 | WITH t1 AS ( 161 | SELECT a.id, a.name, SUM(o.total_amt_usd) tot_spent 162 | FROM orders o 163 | JOIN accounts a 164 | ON a.id = o.account_id 165 | GROUP BY a.id, a.name 166 | ORDER BY 3 DESC 167 | LIMIT 10) 168 | SELECT AVG(tot_spent) 169 | FROM t1; 170 | 171 | 172 | # 6. What is the lifetime average amount spent in terms of total_amt_usd for 173 | # only the companies that spent more than the average of all accounts. 174 | 175 | # query avg(total_amt_usd) for all accounts 176 | SELECT AVG(o.total_amt_usd) avg_all 177 | FROM orders o 178 | JOIN accounts a 179 | ON a.id = o.account_id; 180 | 181 | # AVG() of all orders 182 | SELECT o.account_id, AVG(o.total_amt_usd) avg_amt 183 | FROM orders o 184 | GROUP BY 1 185 | HAVING AVG(o.total_amt_usd) > (SELECT AVG(o.total_amt_usd) avg_all 186 | FROM orders o 187 | JOIN accounts a 188 | ON a.id = o.account_id); 189 | 190 | # lifetime avg 191 | 192 | SELECT AVG(avg_amt) 193 | FROM (SELECT o.account_id, AVG(o.total_amt_usd)avg_amt 194 | FROM orders o 195 | GROUP BY 1 196 | HAVING AVG(o.total_amt_usd) > (SELECT AVG(o.total_amt_usd) avg_all 197 | FROM orders o 198 | JOIN accounts a 199 | ON a.id = o.account_id)) t1; -------------------------------------------------------------------------------- /basic_sql_lesson28/quizzes.sql: -------------------------------------------------------------------------------- 1 | ### Limits 2 | 3 | SELECT occurred_at, account_id, channel 4 | FROM web_events 5 | LIMIT 15; 6 | 7 | ### ORDER BY 8 | 9 | /*1.Write a query to return the 10 earliest orders in the orders table. 10 | Include the id, occurred_at, and total_amt_usd.*/ 11 | 12 | SELECT id, occurred_at, total_amt_usd 13 | FROM orders 14 | LIMIT 10; 15 | 16 | /*2.Write a query to return the top 5 orders in terms of largest total_amt_usd. 17 | Include the id, account_id, and total_amt_usd.*/ 18 | 19 | SELECT id, account_id, total_amt_usd 20 | FROM orders 21 | ORDER BY total_amt_usd desc 22 | LIMIT 5; 23 | 24 | /*3.Write a query to return the bottom 20 orders in terms of least total. 25 | Include the id, account_id, and total.*/ 26 | 27 | SELECT id, account_id, total 28 | FROM orders 29 | ORDER BY total 30 | LIMIT 20; 31 | 32 | 33 | ## ORDER BY Part 2 34 | 35 | /*Write a query that returns the top 5 rows from orders ordered according to newest to oldest, 36 | but with the largest total_amt_usd for each date listed first for each date.*/ 37 | 38 | SELECT total_amt_usd 39 | FROM orders 40 | ORDER BY total_amt_usd desc 41 | LIMIT 5; 42 | 43 | /*Write a query that returns the top 10 rows from orders ordered according to oldest 44 | to newest, but with the smallest total_amt_usd for each date listed first for each date.*/ 45 | 46 | SELECT total_amt_usd 47 | FROM orders 48 | ORDER BY total_amt_usd 49 | LIMIT 10; 50 | 51 | ## WHERE 52 | 53 | /*Pull the first 5 rows and all columns from the orders table that have 54 | a dollar amount of gloss_amt_usd greater than or equal to 1000.*/ 55 | 56 | SELECT * 57 | FROM orders 58 | WHERE gloss_amt_usd >= 1000 59 | LIMIT 5; 60 | 61 | /*Pull the first 10 rows and all columns from the orders table that 62 | have a total_amt_usd less than 500.*/ 63 | 64 | SELECT * 65 | FROM orders 66 | WHERE total_amt_usd < 500 67 | LIMIT 10; 68 | 69 | 70 | ## WHERE with Non-Numeric Data 71 | 72 | /*Filter the accounts table to include the company name, website, and the 73 | primary point of contact (primary_poc) for Exxon Mobil in the accounts table.*/ 74 | 75 | SELECT name, website 76 | From accounts 77 | WHERE primary_poc = 'Exxon Mobil'; 78 | 79 | ## Arithmetic Operators 80 | 81 | /*Using the orders table: 82 | 83 | Create a column that divides the standard_amt_usd by the standard_qty to find the 84 | unit price for standard paper for each order. Limit the results to the first 10 orders, and include the id and account_id fields.*/ 85 | 86 | SELECT standard_amt_usd, 87 | standard_qty, 88 | id, 89 | account_id, 90 | standard_amt_usd / standard_qty AS unit_cost 91 | FROM orders 92 | LIMIT 10; 93 | 94 | /*Write a query that finds the percentage of revenue that comes from poster paper for each order. You will need to use only the columns 95 | that end with _usd. (Try to do this without using the total column). Include the id and account_id fields.*/ 96 | 97 | SELECT id, 98 | account_id, 99 | poster_amt_usd / (standard_amt_usd + gloss_amt_usd + poster_amt_usd ) AS poster_paper 100 | FROM orders; 101 | 102 | ## LIKE 103 | 104 | /*All the companies whose names start with 'C'. */ 105 | 106 | SELECT * 107 | FROM accounts 108 | WHERE name LIKE '%C%'; 109 | 110 | /*All companies whose names contain the string 'one' somewhere in the name.*/ 111 | 112 | SELECT * 113 | FROM accounts 114 | WHERE name LIKE '%one%'; 115 | 116 | /*All companies whose names end with 's'.*/ 117 | 118 | SELECT * 119 | FROM accounts 120 | WHERE name LIKE '%s%'; 121 | 122 | ## IN 123 | /*Use the accounts table to find the account name, primary_poc, and sales_rep_id for Walmart, Target, and Nordstrom.*/ 124 | 125 | SELECT name, primary_poc, sales_rep_id 126 | FROM accounts 127 | WHERE name IN ('Walmart', 'Target', 'Nordstrom'); 128 | 129 | /*Use the web_events table to find all information regarding individuals who were contacted via the channel of organic or adwords.*/ 130 | 131 | SELECT channel 132 | FROM web_events 133 | WHERE channel IN ('organic', 'adwords'); 134 | 135 | ## NOT 136 | 137 | /*Use the accounts table to find the account name, primary poc, and sales rep id for all stores except Walmart, Target, and Nordstrom.*/ 138 | 139 | SELECT name, primary_poc, sales_rep_id 140 | FROM accounts 141 | WHERE name NOT IN ('%Walmart%', '%Target%', '%Nordstrom%'); 142 | 143 | /*Use the web_events table to find all information regarding individuals who were contacted via any method except using organic or adwords methods.*/ 144 | 145 | SELECT * 146 | FROM web_events 147 | WHERE channel NOT IN ('%organic%', '%adwords%'); 148 | 149 | /*All the companies whose names do not start with 'C'.*/ 150 | 151 | SELECT name 152 | FROM accounts 153 | WHERE name NOT LIKE ('%C%'); 154 | 155 | /*All companies whose names do not contain the string 'one' somewhere in the name*/ 156 | 157 | SELECT name 158 | FROM accounts 159 | WHERE name NOT LIKE ('%one%'); 160 | 161 | /*All companies whose names do not end with 's'.*/ 162 | 163 | SELECT name 164 | FROM accounts 165 | WHERE name NOT LIKE ('%s%'); 166 | 167 | ## AND and BETWEEN 168 | 169 | /* 1. Write a query that returns all the orders where the standard_qty is over 1000, the poster_qty is 0, and the gloss_qty is 0.*/ 170 | 171 | SELECT standard_qty, poster_qty, gloss_qty 172 | FROM orders 173 | WHERE standard_qty > 1000 and poster_qty = 0 and gloss_qty = 0; 174 | 175 | /* 2. Using the accounts table find all the companies whose names do not start with 'C' and end with 's'.*/ 176 | 177 | SELECT name 178 | FROM accounts 179 | WHERE name NOT LIKE 'C%' AND name LIKE '%s'; 180 | 181 | /* 3. Use the web_events table to find all information regarding individuals who were contacted via organic or adwords and started their 182 | account at any point in 2016 sorted from newest to oldest. 183 | */ 184 | 185 | SELECT * 186 | FROM web_events 187 | WHERE channel IN ('organic', 'adwords') 188 | AND occurred_at BETWEEN '2016.01.01' AND '2017.01.01' 189 | ORDER BY channel DESC; 190 | 191 | ## OR 192 | 193 | /*1.Find list of orders ids where either gloss_qty or poster_qty is greater than 4000. Only include the id field in the resulting table.*/ 194 | 195 | SELECT * 196 | FROM orders 197 | WHERE gloss_qty = 4000 OR poster_qty = 4000 198 | ORDER BY id; 199 | 200 | /*2.Write a query that returns a list of orders where the standard_qty is zero and either the gloss_qty or poster_qty is over 1000.*/ 201 | 202 | SELECT * 203 | FROM orders 204 | WHERE standard_qty = 0 OR poster_qty = 1000 205 | OR gloss_qty = 1000; 206 | 207 | /*3.Find all the company names that start with a 'C' or 'W', and the primary contact contains 'ana' or 'Ana', but it doesn't contain 'eana'.*/ 208 | 209 | SELECT * 210 | FROM accounts 211 | WHERE (name LIKE 'C%' OR name LIKE 'W%') 212 | AND ((primary_poc LIKE '%ana%' OR primary_poc LIKE '%Ana%' ) 213 | AND primary_poc NOT LIKE '%eana%'); -------------------------------------------------------------------------------- /sql_joins_lesson_29/sql_joins_lesson_29.md: -------------------------------------------------------------------------------- 1 | ## Relational DB 2 | 3 | The term **relational database** refers to the fact that tables within it relate to one another. They contain common identidiers that allow information from 4 | multiple tables to be easily combined. 5 | 6 | When you write a query it's execution speed depends on the amount of data you're asking the db to read and the number and type of calculation you're 7 | asking it to make. 8 | 9 | ## DB normailization. 10 | 11 | When creating a db, it's really important to think about how data will be stored. This is known as **normalization**. 12 | There are essentially three ideas that are aimed at database normalization: 13 | 14 | 1. Are the tables storing logical groupings of the data? 15 | 2. Can I make changes in a single location, rather than in many tables for the same information? 16 | 3. Can I access and manipulate data quickly and efficiently? 17 | 18 | [Why You Need Database Normalization link](http://www.itprotoday.com/microsoft-sql-server/sql-design-why-you-need-database-normalization) 19 | 20 | Example: 21 | Here we are only pulling data from the orders table since in the SELECT statement we only reference columns from the orders table. 22 | The ON statement holds the two columns that get linked across the two tables. 23 | 24 | ![inner join](join_sql.png) 25 | 26 | To specify tables and columns in the SELECT statement: 27 | 28 | 1. The table name is always before the period. 29 | 2. The column you want from that table is always after the period. 30 | 31 | For example, if we want to pull only the account name: 32 | 33 | ``` 34 | SELECT accounts.name, orders.occurred_at 35 | FROM orders 36 | JOIN accounts 37 | ON orders.account_id = accounts.id; 38 | ``` 39 | This query only pulls two columns, not all the information in these two tables. 40 | 41 | ## ERD reminder. 42 | 43 | ERD or entity relationship diagram is a common way to view data in a database. 44 | 45 | ![ERD diagram](entity_relationship_diagram.png) 46 | The PK here stands for primary key. A primary key exists in every table, and it is a column that has a unique value for every row. 47 | If you look at the first few rows of any of the tables in our database, you will notice that this first, PK, column is always unique. For this database it is always called id, but that is not true of all databases. 48 | 49 | ## Primary and Foreign Keys. 50 | 51 | `Primary Key (PK)` 52 | A primary key is a unique column in a particular table. This is the first column in each of **our tables**. Here, those columns are all called id, but that doesn't necessarily have to be the name. It is common that the primary key is the first column in our tables in most databases. 53 | 54 | The primary key is a single column that must exist in each table of a database. Again, these rules are true for most major databases, but some databases may not enforce these rules. 55 | 56 | `Foreign Key (FK)` 57 | A foreign key is when we see a primary key in another table. 58 | 59 | Foreign keys are always associated with a primary key, and they are associated with the crow-foot notation above to show they can appear multiple times in a particular table. 60 | 61 | ![primary and foreign key](primary_foreign_key.png) 62 | 63 | ## JOIN more than two tables. 64 | 65 | ``` 66 | SELECT * 67 | FROM web_events 68 | JOIN accounts 69 | ON web_events.account_id = accounts.id 70 | JOIN orders 71 | ON accounts.id = orders.account_id; 72 | ``` 73 | 74 | ## ALIAS 75 | 76 | When we `JOIN` tables together it's easiest to give your table names **aliases**. The `ALIAS` for a table will be created in the `FROM` or `JOIN` clauses. 77 | Best practice: to use all lower case letters and underscores instead of spaces. 78 | Example: 79 | ``` 80 | FROM tablename AS t1 81 | JOIN tablename2 AS t2 82 | ``` 83 | Or without the AS statement: 84 | ``` 85 | FROM tablename t1 86 | JOIN tablename2 t2 87 | ``` 88 | 89 | We can simply write our alias directly after the column name (in the SELECT) or table name (in the FROM or JOIN) by writing the alias directly following the column or table we would like to alias. 90 | ``` 91 | SELECT col1 + col2 total, col3 92 | ``` 93 | 94 | ``` 95 | Select t1.column1 aliasname, t2.column2 aliasname2 96 | FROM tablename AS t1 97 | JOIN tablename2 AS t2 98 | ``` 99 | 100 | ## Many-to-many relationships 101 | 102 | [Why no many-to-many relationships?](https://stackoverflow.com/questions/7339143/why-no-many-to-many-relationships) 103 | 104 | ## LEFT and RIGHT JOIN 105 | 106 | INNER JOIN will return only rows that appear in **both tables**. 107 | 108 | This Inner Join will return only rows at the intersection of these two circles. 109 | If want to show accounts that don't appear in the orders table we need to use OUTER Join. 110 | ``` 111 | SELECT accounts.id, accounts.name, order.total 112 | FROM orders 113 | JOIN accounts 114 | ON orders.account_id = accounts.id 115 | ``` 116 | 117 | Venn Diagram is a common way to visualize JOINs. Each circle in the diagram represents a table. The left circle includes all rows of data in the table in **FROM** clause. The right circle represents all raws of data in the table in **JOIN** clause. The overlapping middle section represents all rows for which the ON clause is **true**. 118 | ![Venn Diagram](venn_diagram.png) 119 | 120 | There are three types of joins we might use if we want to include data that doesn't exist in both tables (only in one of the two tables): LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN. 121 | 122 | LEFT JOIN produces a complete set of records from the left table regardless if any of those records have match in the right table. It will also return any results that are in the left table that didn't match. 123 | 124 | RIGHT JOIN will return all of the records in the right table regardless if any of those records have a match in the left table. 125 | Left and Right joins are somewhat interchangeable: 126 | ![left and right](interchangeable_result.png) 127 | 128 | If there is not matching information in the JOINed table, then you will have columns with empty cells. These empty cells introduce a new data type called NULL. 129 | 130 | ## OUTER JOIN 131 | 132 | OUTER JOIN will return the inner join result set, as well as any unmatched rows from either of the two tables being joined. 133 | 134 | Again this returns rows that do not match one another from the two tables. The use cases for a full outer join are very rare. 135 | [When is a good situation to use a full outer join?](https://stackoverflow.com/questions/2094793/when-is-a-good-situation-to-use-a-full-outer-join) 136 | 137 | FULL OUTER JOIN, which is the same as OUTER JOIN. LEFT OUTER JOIN and RIGHT OUTER JOIN the same as LEFT JOIN and RIGHT JOIN. 138 | 139 | ## JOINs and Filtering 140 | 141 | `ON` logic in the on clause reduces the rows **before combining the tables**. 142 | 143 | `WHERE` logic in the where clause occurs **after the join occurs**. 144 | 145 | When the database executes the query, it executes the join and everything in the **ON clause first**. Think of this as building the new result set. That result set is then filtered using the WHERE clause. 146 | 147 | INNER JOINs only return the rows for which the two tables match, moving this filter to the ON clause of an inner join will produce the same result as keeping it in the WHERE clause. -------------------------------------------------------------------------------- /control_flow_lesson_25/control_flow.md: -------------------------------------------------------------------------------- 1 | # Control flow 2 | 3 | We'll learn: 4 | * conditional statements 5 | * **for** and **while** loop 6 | * exit or skip loops with **break** and **continue** 7 | * use **built-in functions**: **zip** and **enumerate** 8 | * list comprehensions 9 | 10 | ## **if** statement 11 | 12 | An **if** statement is a conditional statement that runs or skips code based on whether a condition is true or false. Example: 13 | 14 | ``` 15 | if phone_balance < 5: 16 | phone_balance += 10 17 | bank_balance -= 10 18 | ``` 19 | 20 | ## Comparison Operators in Conditional Statements 21 | 22 | `=` assignment operator that assigns value on the left to the name on the right 23 | 24 | `==` comparison operator that evaluates whether objects on both sides are equal 25 | 26 | ## **if**, **elif**, **else** 27 | 28 | **if**: An if statement must always start with an if clause, which contains the first condition that is checked. If this evaluates to True, Python runs the code indented in this if block and then skips to the rest of the code after the if statement. 29 | 30 | **elif**: elif is short for "else if." An elif clause is used to check for an additional condition if the conditions in the previous clauses in the if statement evaluate to False. 31 | 32 | **else**: Last is the else clause, which must come at the end of an if statement if used. This clause doesn't require a condition. The code in an else block is run if all conditions above that in the if statement evaluate to False. 33 | 34 | ``` 35 | if season == 'spring': 36 | print('plant the garden!') 37 | elif season == 'summer': 38 | print('water the garden!') 39 | elif season == 'fall': 40 | print('harvest the garden!') 41 | elif season == 'winter': 42 | print('stay indoors!') 43 | else: 44 | print('unrecognized season') 45 | ``` 46 | 47 | ## Indentation 48 | 49 | In Python, indents conventionally come in multiples of four spaces. Be strict about following this convention, because changing the indentation can completely change the meaning of the code. 50 | 51 | The [Python Style Guide](https://www.python.org/dev/peps/pep-0008/#tabs-or-spaces) recommends using 4 spaces to indent, rather than using a tab. Whichever you use, be aware that "Python 3 disallows mixing the use of tabs and spaces for indentation." 52 | 53 | ## Boolean expressions 54 | 55 | A **boolean expression** is an expression that is either True or False. 56 | 57 | There are tree **logical operatos**: and, or, not. Use parentheses if you need to make the combinations clear. 58 | 59 | **if** statements sometimes use more complicated boolean expressions for their conditions. They may contain multiple comparisons operators, logical operators, and even calculations. Examples: 60 | 61 | ``` 62 | if 18.5 <= weight / height**2 < 25: 63 | print("BMI is considered 'normal'") 64 | 65 | if is_raining and is_sunny: 66 | print("Is there a rainbow?") 67 | 68 | if (not unsubscribed) and (location == "USA" or location == "CAN"): 69 | print("send email") 70 | ``` 71 | 72 | However simple or complex, the condition in an **if** statement must be a boolean expression that evaluates to either True or False and it is this value that decides whether the indented block in an if statement executes or not. 73 | 74 | ## Good and Bad Examples 75 | 76 | **Don't use**: `if True:` or `if False:` 77 | 78 | Bad example: 79 | ```if True: 80 | print("This indented code will always get run.") 81 | ``` 82 | While `True` is a valid boolean expression, it's not useful as a condition since it always evaluates to True, so the indented code will always get run. Similarly, if `False` is not a condition you should use either - the statement following this `if` statement would never be executed. 83 | 84 | 85 | **Be careful** writing expression that use **logical operators**: `and`, `or`, `not`: 86 | 87 | Bad example: 88 | ``` 89 | if weather == "snow" or "rain": 90 | print("Wear boots!") 91 | ``` 92 | This code is valid in Python, but it is not a boolean expression, although it reads like one. The reason is that the expression to the right of the or operator, "rain", is not a boolean expression - it's a string! Later we'll discuss what happens when you use non-boolean-type objects in place of booleans. 93 | 94 | 95 | **Don't evaluate** the truth of a boolean variable with `== True` or `== False`: 96 | 97 | Bad example: 98 | This comparison isn’t necessary, since the boolean variable itself is a boolean expression. 99 | ``` 100 | if is_cold == True: 101 | print("The weather is cold!") 102 | ``` 103 | This is a valid condition, but we can make the code more readable by using the variable itself as the condition instead, as below. 104 | 105 | Good example: 106 | ``` 107 | if is_cold: 108 | print("The weather is cold!") 109 | ``` 110 | 111 | If you want to check whether a boolean is False, you can use the **not** operator. 112 | 113 | ## Truth Value Testing 114 | If we use a **non-boolean object** as a condition in an if statement in place of the boolean expression, Python will check for its truth value and use that to decide whether or not to run the indented code. By default, the truth value of an object in Python is considered True unless specified as False in the documentation. 115 | 116 | Here are most of the built-in objects that are considered False in Python: 117 | 118 | * constants defined to be false: `None` and `False` 119 | 120 | * zero of any numeric type: `0`, `0.0`, `0j`, `Decimal(0)`, `Fraction(0, 1)` 121 | 122 | * empty sequences and collections: `""`, `()`, `[]`, `{}`, `set()`, `range(0)` 123 | 124 | Example: 125 | ``` 126 | errors = 3 127 | if errors: 128 | print("You have {} errors to fix!".format(errors)) 129 | else: 130 | print("No errors to fix!") 131 | ``` 132 | In this code, errors has the truth value True because it's a non-zero number, so the error message is printed. 133 | 134 | ## Quiz: Boolean Expressions for Conditions 135 | 136 | Imagine an air traffic control program that tracks three variables, altitude, 137 | speed, and propulsion which for a particular airplane have the values 138 | specified below: 139 | ``` 140 | altitude = 10000 141 | speed = 250 142 | propulsion = "Propeller" 143 | ``` 144 | Expressions: 145 | 146 | 1.`altitude < 1000 and speed > 100` 147 | 148 | `altitude < 1000` is False, so we don't even need to check the second condition - the whole expression 149 | is False. 150 | 151 | 152 | 2.`(propulsion == "Jet" or propulsion == "Turboprop") and speed < 300 and altitude > 20000 ` 153 | 154 | `propulsion == "Jet"` is False, and `propulsion == "Turboprop"` is False, so the whole expression inside 155 | the parentheses is False. 156 | 157 | 158 | 3.`not (speed > 400 and propulsion == "Propeller") ` 159 | 160 | To work this one out, we need to look at the inside of the parentheses first, then apply not to that. 161 | `speed > 400` is False, and because we are using and this makes the whole of the expression inside the 162 | parentheses False. Applying not reverses this, so this expression is True. 163 | 164 | 165 | 166 | 4.`(altitude > 500 and speed > 100) or not propulsion == "Propeller" ` 167 | 168 | `altitude > 500` is True, and speed is greater than 100, so the expression inside the parenthesis is True. 169 | Whatever the value of the other expression, because they are connected by or, the whole expression will 170 | evaluate to True. 171 | 172 | 173 | # Break and Continue: 174 | 175 | `for` loops iterate over every element in a sequence. 176 | `while` loops iterate until they're stopping condition is met. 177 | 178 | `break` lterminates loop (for or while) immediately if it get a break statement. 179 | 180 | `continue` terminates one iteration od a `for` or `while` loop. 181 | 182 | -------------------------------------------------------------------------------- /sql_subqueries_temporary_table_lesson31/subqueries_temporary_tables.md: -------------------------------------------------------------------------------- 1 | ## Intro to subqueries 2 | 3 | Both **subqueries** and table expressions are methods for being able to write a query that creates a table, and then write a query that interacts with this newly created table. Sometimes the question you are trying to answer doesn't have an answer when working directly with existing tables in database. 4 | 5 | However, if we were able to create new tables from the existing tables, we know we could query these new tables to answer our question 6 | 7 | Whenever we need to use existing tables to create a new table that we then want to query again, this is an indication that we will need to use some sort of subquery. 8 | 9 | **Subqueries** also known as **inner queries** and **nested queries** - allow you to answer more complex questions than you can with a single DB table. 10 | 11 | ## Write your first subquery 12 | We want to find the average number of events for each day for each channel. The first table will provide us the number of events for each day and channel, and then we will need to average these values together using a second query. 13 | 14 | 1. Start by querying table to check the data. 15 | 16 | ``` 17 | SELECT * 18 | FROM web_events; 19 | ``` 20 | 21 | 2. Count up all the events in each channel, in each day. 22 | 23 | ``` 24 | SELECT DATE_TRUNC('day', occurred_at) as day, 25 | channel, 26 | COUNT(*) as event_count 27 | FROM web_events 28 | GROUP BY 1, 2 29 | ORDER BY 1; 30 | ``` 31 | 32 | 3. Average across the events column we've created. In order to do this, we quering the result of previous query. We can do it by wrapping the query in parantheses and using it in the FROM clause of the next query that you write above. 33 | 34 | Query within a query also known as a subquery: 35 | ``` 36 | SELECT * 37 | FROM 38 | (SELECT DATE_TRUNC('day', occurred_at) as day, 39 | channel, 40 | COUNT(*) as event_count 41 | FROM web_events 42 | GROUP BY 1, 2 43 | ORDER BY 1) sub 44 | ``` 45 | **Subqueries** are requaired to have aliases, which added after the parantheses `()sub`. 46 | 47 | 4. Average events for each channel. Subquery acts like one table in the FORM clause put GROUP BY clause after he subquery. 48 | Since reordering based on this new aggregation, you don't need ORDER BY statement in the subquery. 49 | ``` 50 | SELECT channel, 51 | AVG(event_count) AS avg_event_count 52 | FROM 53 | (SELECT DATE_TRUNC('day', occurred_at) as day, 54 | channel, 55 | COUNT(*) as event_count 56 | FROM web_events 57 | GROUP BY 1, 2) sub 58 | GROUP BY channel 59 | ORDER BY 2 DESC; 60 | ``` 61 | 62 | ####How this query runs: 63 | 64 | 1. Inner query will run. DB will treat it as an independent query 65 | ``` 66 | SELECT DATE_TRUNC('day', occurred_at) as day, 67 | channel, 68 | COUNT(*) as event_count 69 | FROM web_events 70 | GROUP BY 1, 2 71 | ``` 72 | 2. The outer query will run accross he result set created by he inner query: 73 | ``` 74 | SELECT channel, 75 | AVG(event_count) AS avg_event_count 76 | FROM 77 | (SELECT DATE_TRUNC('day', occurred_at) as day, 78 | channel, 79 | COUNT(*) as event_count 80 | FROM web_events 81 | GROUP BY 1, 2) sub 82 | GROUP BY channel 83 | ORDER BY 2 DESC; 84 | ``` 85 | 86 | ## Subquery Formatting 87 | 88 | #### Badly formatted queries 89 | 90 | ``` 91 | SELECT * FROM (SELECT DATE_TRUNC('day',occurred_at) AS day, channel, COUNT(*) as events FROM web_events GROUP BY 1,2 ORDER BY 3 DESC) sub; 92 | ``` 93 | 94 | This second version, which includes some helpful line breaks, is easier to read than that previous version, but it is still not as easy to read as the queries in the Well Formatted Query section. 95 | 96 | ``` 97 | SELECT * 98 | FROM ( 99 | SELECT DATE_TRUNC('day',occurred_at) AS day, 100 | channel, COUNT(*) as events 101 | FROM web_events 102 | GROUP BY 1,2 103 | ORDER BY 3 DESC) sub; 104 | ``` 105 | 106 | #### Well Formatted Query 107 | 108 | If we have a GROUP BY, ORDER BY, WHERE, HAVING, or any other statement following our subquery, we would then indent it at the same level as our outer query. 109 | 110 | ``` 111 | SELECT * 112 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day, 113 | channel, COUNT(*) as events 114 | FROM web_events 115 | GROUP BY 1,2 116 | ORDER BY 3 DESC) sub; 117 | ``` 118 | 119 | The inner query GROUP BY and ORDER BY statements are indented to match the inner table. 120 | ``` 121 | SELECT * 122 | FROM (SELECT DATE_TRUNC('day',occurred_at) AS day, 123 | channel, COUNT(*) as events 124 | FROM web_events 125 | GROUP BY 1,2 126 | ORDER BY 3 DESC) sub 127 | GROUP BY channel 128 | ORDER BY 2 DESC; 129 | ``` 130 | 131 | ## More on Subqueries 132 | 133 | If you are only returning a single value, you might use that value in a logical statement like WHERE, HAVING, or even SELECT - the value could be nested within a CASE statement. Most conditional logic will work with subqueries containing **one-cell results**. BUT `IN` is the only type of conditional logic that will work when the inner query ontains multiple results. 134 | 135 | 136 | 137 | **Expert Tip** 138 | 139 | Note that you should not include an alias when you write a subquery in a conditional statement. This is because the subquery is treated as an individual value (or set of values in the IN case) rather than as a table. 140 | 141 | Also, notice the query here compared a single value. If we returned an entire column IN would need to be used to perform a logical argument. If we are returning an entire table, then we must use an ALIAS for the table, and perform additional logic on the entire table. 142 | 143 | ### MORE on sub queries 144 | 145 | 1. Subquery table 146 | ``` 147 | SELECT a.id, a.name, we.channel, COUNT(*) as ct 148 | FROM accounts a 149 | JOIN web_events we 150 | ON a.id = we.account_id 151 | GROUP BY a.id, a.name, channel 152 | ORDER BY a.id; 153 | ``` 154 | 155 | 2. Find the max from all data: 156 | ``` 157 | SELECT MAX(ct) 158 | 159 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct 160 | FROM accounts a 161 | JOIN web_events we 162 | ON a.id = we.account_id 163 | GROUP BY a.id, a.name, channel 164 | ORDER BY a.id) table1 165 | ``` 166 | 167 | 3. Max for every accounts: 168 | 169 | ``` 170 | SELECT t1.id, t1.name, MAX(ct) 171 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct 172 | FROM accounts a 173 | JOIN web_events we 174 | ON a.id = we.account_id 175 | GROUP BY a.id, a.name, channel) t1 176 | GROUP BY t1.id, t1.name 177 | ORDER BY t1.id; 178 | ``` 179 | 180 | 4. Final table: 181 | 182 | ``` 183 | SELECT t3.id, t3.name, t3.channel, t3.ct 184 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct 185 | FROM accounts a 186 | JOIN web_events we 187 | ON a.id = we.account_id 188 | GROUP BY a.id, a.name, channel) t3 189 | 190 | JOIN (SELECT t1.id, t1.name, MAX(ct) max_chan 191 | FROM (SELECT a.id, a.name, we.channel, COUNT(*) as ct 192 | FROM accounts a 193 | JOIN web_events we 194 | ON a.id = we.account_id 195 | GROUP BY a.id, a.name, channel) t1 196 | GROUP BY t1.id, t1.name) t2 197 | ON t2.id = t3.id AND t2.max_chan = t3.ct 198 | ORDER BY t3.id, t3.ct; 199 | ``` 200 | 201 | ## WITH 202 | 203 | The `WITH` statement is often called a Common Table Expression or CTE. Though these expressions serve the exact same purpose as subqueries, they are more common in practice, as they tend to be cleaner for a future reader to follow the logic. 204 | 205 | Subqueries they make queries lengthy and difficult to read. Common Table Expressions or CTEs can help break your query into separate components and the logic will be more easily to read. 206 | 207 | * When creating multiple ables using `WITH` add a comma after every table except the last table leading to final query. 208 | * The new table name always aliased using `table_name AS`, which is followed by your nasted between parentheses. -------------------------------------------------------------------------------- /sql_joins_lesson_29/join_quizzes.sql: -------------------------------------------------------------------------------- 1 | # JOIN practice 2 | 3 | /*Try pulling all the data from the accounts table, and all the data from the orders table.*/ 4 | SELECT accounts.*, orders.* 5 | FROM accounts 6 | JOIN orders 7 | ON accounts.id = orders.id; 8 | 9 | /*Try pulling standard_qty, gloss_qty, and poster_qty from the orders table, and the website and the primary_poc from the accounts table.*/ 10 | 11 | SELECT orders.standard_qty, orders.gloss_qty, orders.poster_qty, 12 | accounts.website, accounts.primary_poc 13 | FROM orders 14 | JOIN accounts 15 | ON orders.id = accounts.id; 16 | 17 | # JOIN QUESTIONS PART 1 18 | 19 | /*1.Provide a table for all web_events associated with account name of Walmart. There should be three columns. Be sure to include the primary_poc, 20 | time of the event, and the channel for each event. Additionally, you might choose to add a fourth column to assure only Walmart events were chosen. */ 21 | 22 | SELECT web_events.occurred_at, accounts.primary_poc, web_events.channel 23 | FROM web_events 24 | JOIN accounts 25 | ON web_events.account_id = accounts.id 26 | WHERE accounts.name LIKE '%Walmart%'; 27 | 28 | /*2.Provide a table that provides the region for each sales_rep along with their associated accounts. Your final table should include three 29 | columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically (A-Z) according to account name.*/ 30 | 31 | SELECT region, sales_reps, accounts AS f_table 32 | FROM accounts 33 | JOIN sales_reps 34 | ON accounts.sales_rep_id = sales_reps.id 35 | JOIN region 36 | ON sales_reps.region_id = region.id; 37 | 38 | /*3.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. Your 39 | final table should have 3 columns: region name, account name, and unit price. A few accounts have 0 for total, so I divided by (total + 0.01) to assure 40 | not dividing by zero.*/ 41 | 42 | SELECT region.name AS region_name, accounts.name AS account_name, orders.total_amt_usd/(orders.total + 0.01) AS unit_price 43 | FROM orders 44 | JOIN accounts ON orders.account_id = accounts.id 45 | JOIN sales_reps ON accounts.sales_rep_id = sales_reps.id 46 | JOIN region ON sales_reps.region_id = region.id; 47 | 48 | ## JOINs and Filtering. Quiz: Last Check 49 | 50 | /*1.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for the Midwest region. 51 | Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts alphabetically 52 | (A-Z) according to account name.*/ 53 | 54 | SELECT region.name AS Region, sales_reps.name AS SalesRepName, accounts.name AS AcountName 55 | FROM accounts 56 | JOIN sales_reps 57 | ON accounts.sales_rep_id = sales_reps.id 58 | JOIN region ON sales_reps.region_id = region.id 59 | WHERE region.name = 'Midwest' 60 | ORDER BY AcountName; 61 | 62 | /*2.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a first 63 | name starting with S and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the 64 | accounts alphabetically (A-Z) according to account name.*/ 65 | 66 | SELECT region.name, sales_reps.name AS SalesRepName, accounts.name AS AcountName 67 | FROM accounts 68 | JOIN sales_reps 69 | ON accounts.sales_rep_id = sales_reps.id 70 | JOIN region 71 | ON sales_reps.region_id = region.id 72 | WHERE region.name = 'Midwest' and sales_reps.name LIKE 'S%' 73 | ORDER BY AcountName; 74 | 75 | /*3.Provide a table that provides the region for each sales_rep along with their associated accounts. This time only for accounts where the sales rep has a last name 76 | starting with K and in the Midwest region. Your final table should include three columns: the region name, the sales rep name, and the account name. Sort the accounts 77 | alphabetically (A-Z) according to account name.*/ 78 | 79 | SELECT region.name, sales_reps.name AS SalesRepName, accounts.name AS AcountName 80 | FROM accounts 81 | JOIN sales_reps 82 | ON accounts.sales_rep_id = sales_reps.id 83 | JOIN region 84 | ON sales_reps.region_id = region.id 85 | WHERE region.name = 'Midwest' AND sales_reps.name LIKE '% K%' 86 | ORDER BY AcountName; 87 | 88 | /*4.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should 89 | only provide the results if the standard order quantity exceeds 100. Your final table should have 3 columns: region name, account name, and unit price. In order to avoid a 90 | division by zero error, adding .01 to the denominator here is helpful total_amt_usd/(total+0.01).*/ 91 | 92 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price 93 | FROM orders 94 | JOIN accounts 95 | ON orders.account_id = accounts.id 96 | JOIN sales_reps 97 | ON accounts.sales_rep_id = sales_reps.id 98 | JOIN region 99 | ON sales_reps.region_id = region.id 100 | WHERE orders.standard_qty > 100; 101 | 102 | /*5.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should only provide 103 | the results if the standard order quantity exceeds 100 and the poster order quantity exceeds 50. Your final table should have 3 columns: region name, account name, and unit price. 104 | Sort for the smallest unit price first. In order to avoid a division by zero error, adding .01 to the denominator here is helpful (total_amt_usd/(total+0.01).*/ 105 | 106 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price 107 | FROM orders 108 | JOIN accounts 109 | ON orders.account_id = accounts.id 110 | JOIN sales_reps 111 | ON accounts.sales_rep_id = sales_reps.id 112 | JOIN region 113 | ON sales_reps.region_id = region.id 114 | WHERE orders.standard_qty > 100 AND poster_qty > 50 115 | ORDER BY unit_price ASC; 116 | 117 | /*6.Provide the name for each region for every order, as well as the account name and the unit price they paid (total_amt_usd/total) for the order. However, you should only provide 118 | the results if the standard order quantity exceeds 100 and the poster order quantity exceeds 50. Your final table should have 3 columns: region name, account name, and unit price. 119 | Sort for the largest unit price first. In order to avoid a division by zero error, adding .01 to the denominator here is helpful (total_amt_usd/(total+0.01). */ 120 | 121 | SELECT region.name, accounts.name AS AcountName, orders.total_amt_usd/(orders.total + 0.01) AS unit_price 122 | FROM orders 123 | JOIN accounts 124 | ON orders.account_id = accounts.id 125 | JOIN sales_reps 126 | ON accounts.sales_rep_id = sales_reps.id 127 | JOIN region 128 | ON sales_reps.region_id = region.id 129 | WHERE orders.standard_qty > 100 AND poster_qty > 50 130 | ORDER BY unit_price DESC; 131 | 132 | /*7.What are the different channels used by account id 1001? Your final table should have only 2 columns: account name and the different channels. You can try SELECT DISTINCT to narrow 133 | down the results to only the unique values.*/ 134 | 135 | SELECT DISTINCT web_events.channel, accounts.name 136 | FROM web_events 137 | JOIN accounts 138 | ON accounts.id = web_events.account_id 139 | WHERE accounts.id = '1001'; 140 | 141 | /*8.Find all the orders that occurred in 2015. Your final table should have 4 columns: occurred_at, account name, order total, and order total_amt_usd.*/ 142 | 143 | SELECT orders.occurred_at, accounts.name, orders.total, 144 | orders.total_amt_usd 145 | FROM orders 146 | JOIN accounts 147 | ON accounts.id = orders.account_id 148 | WHERE orders.occurred_at BETWEEN '01-01-2015' AND '01-01-2016' 149 | ORDER BY orders.occurred_at DESC; 150 | 151 | -------------------------------------------------------------------------------- /functions_lesson_26/functions.md: -------------------------------------------------------------------------------- 1 | # Functions 2 | 3 | **Functions** are useful chunks of code that allow you to encapsulate a task. 4 | **Encapsulation** is a way to carrry out a whole series of steps with one command. 5 | 6 | Functions are used to help organize and optimize code. 7 | 8 | # Defining function 9 | 10 | When you define a function you specify the name and the sequence of statements. 11 | 12 | this function calculates the volume of a cylinder. The formula for this is the cylender's height, multiplied by the square of it's radius multiplied by pi. 13 | ``` 14 | def cylinder_volume(height, radius): # function header # (height, radius) are arguments 15 | pi = 3.14159 # body of the function 16 | return height * pi * radius ** 2 17 | 18 | cylinder_volume(10, 3) # function call statement 19 | ``` 20 | 21 | **Function Header** 22 | The function header, which is the first line of a function definition. 23 | 24 | 1. The function header always starts with the `def` keyword, which indicates that this is a function definition. 25 | 2.Then comes the function name (here, `cylinder_volume`), which follows the same naming conventions as variables. You can revisit the naming conventions below. 26 | 3. Immediately after the name are parentheses that may include arguments separated by commas (here, height and radius). Arguments, or parameters, are values that are passed in as inputs when the function is called, and are used in the function body. If a function doesn't take arguments, these parentheses are left empty. 27 | 4. The header always end with a colon `:`. 28 | 29 | 30 | **Function Body** 31 | The rest of the function is contained in the body, which is where the function does its work. 32 | 33 | 1. The body of a function is the code indented after the header line. Here, it's the two lines that define `pi` and `return` the volume. 34 | 2. Within this body, we can refer to the argument variables and define new variables, which can only be used within these indented lines. 35 | 3. The body will often include a return statement, which is used to send back an output value from the function to the statement that called the function. A return statement consists of the return keyword followed by an expression that is evaluated to get the output value for the function. If there is no return statement, the function simply returns `None`. 36 | 37 | `Print` provides output o the console while `Return` provides the value hat you can store and work with and code later. 38 | 39 | 40 | ## Default Arguments 41 | 42 | Default arguments allow functions to use default values when those arguments are omitted. 43 | 44 | We can add default arguments in a function to have default values for parameters that are unspecified in a function call. 45 | 46 | ``` 47 | def cylinder_volume(height, radius=5): 48 | pi = 3.14159 49 | return height * pi * radius ** 2 50 | 51 | cylinder_volume(10) # radius is default avlue in argument 52 | cylinder_volume(10, 7) # pass in arguments by position, overwrite the default value of 5. 53 | cylinder_volume(height=10, radius=7) # pass in arguments by name 54 | ``` 55 | 56 | ## Variable scope 57 | 58 | **Variable scope** the parts of a program that a variable can be referenced, or used, from. 59 | If variable is created inside a function, it can only be used within that function. Accessing it outside that function is not possible. 60 | 61 | ``` 62 | # This will result in an error 63 | def some_function(): 64 | word = "hello" 65 | 66 | print(word) 67 | ``` 68 | 69 | `word` is said to have scope that is only local to each function. This means you can use the same name for different variables that are used in different functions. 70 | ``` 71 | # This works fine 72 | def some_function(): 73 | word = "hello" 74 | 75 | def another_function(): 76 | word = "goodbye" 77 | ``` 78 | 79 | We can define a variable outside the function and it can still be accessed within a function. 80 | 81 | ``` 82 | word = "hello" 83 | 84 | def some_function(): 85 | print(word) 86 | 87 | some_function() 88 | ``` 89 | 90 | **Scope** is essential to understand how info is passed throughout programms in any languges. 91 | 92 | ## Documentation 93 | 94 | **Docstring** a type of comment used to explain the purpose of a function and how it should be used. 95 | Docstring are sussounded by triple quotes. 96 | [PEP 257 -- Docstring Conventions](https://www.python.org/dev/peps/pep-0257/) 97 | 98 | 99 | ## Lambda Expressions 100 | 101 | In Python, you can use **lambda expressions** to create anonymous functions. That's a function that don't have a name. They're helpful to create quick functions that aren't really needed later in your code. 102 | sIf you want to specify multiple arguments in a **lambda function**, include them before the colomn, separate by commas. 103 | 104 | ``` 105 | def multiply(x, y): 106 | return x * y 107 | ``` 108 | With a lambda expression: 109 | 110 | ``` 111 | multiply = lambda x, y: x * y 112 | ``` 113 | 114 | Both of these functions are used in the same way. In either case, we can call multiply like this: 115 | `multiply(4, 7)` 116 | 117 | **Components of a Lambda Function* 118 | 1. The `lambda` keyword is used to indicate that this is a lambda expression. 119 | 2. Following lambda are one or more arguments for the anonymous function separated by commas, followed by a colon :. Similar to functions, the way the arguments are named in a lambda expression is arbitrary. 120 | 3. Last is an expression that is evaluated and returned in this function. 121 | 122 | With this structure, lambda expressions aren’t ideal for complex functions, but can be very useful for short, simple functions. 123 | 124 | #### Quiz: Lambda with Map 125 | `map()` is a higher-order built-in function that takes a function and iterable as inputs, and returns an iterator that applies the function to each element of the iterable. The code below uses map() to find the mean of each list in numbers to create the list averages. Test run it to see what happens. 126 | 127 | Rewrite this code to be more concise by replacing the mean function with a lambda expression defined within the call to `map()`. 128 | 129 | ``` 130 | numbers = [ 131 | [34, 63, 88, 71, 29], 132 | [90, 78, 51, 27, 45], 133 | [63, 37, 85, 46, 22], 134 | [51, 22, 34, 11, 18] 135 | ] 136 | 137 | def mean(num_list): 138 | return sum(num_list) / len(num_list) 139 | 140 | averages = list(map(mean, numbers)) 141 | print(averages) 142 | ``` 143 | 144 | #### Lambda with Filter 145 | `filter()` is a higher-order built-in function that takes a function and iterable as inputs and returns an iterator with the elements from the iterable for which the function returns True. 146 | 147 | [More about map(), filter()](https://www.programiz.com/python-programming/anonymous-function) 148 | 149 | 150 | ## Iterators and Generators 151 | 152 | **Iterables** are objects that can return one of it's elements at a time. List is one of the common iterables. Many of the built-in functions we’ve used so far, like 'enumerate,' return an iterator. 153 | 154 | **An iterator** is an object that represents a stream of data. This is different from a list, which is also an iterable, but not an iterator because it is not a stream of data. 155 | 156 | **Generators** are a simple way to create iterators using functions. It's not only way to create iterator. You can also define iterators using classes, which you can read more about [here](https://docs.python.org/3/tutorial/classes.html#iterators) 157 | 158 | Here is an example of a generator function called my_range, which produces an iterator that is a stream of numbers from 0 to (x - 1). 159 | ``` 160 | def my_range(x): 161 | i = 0 162 | while i < x: 163 | yield i 164 | i += 1 165 | 166 | # since this returns an iterator, we can convert it to a list or iterate through it in a loop to view 167 | # its contents. For example, this code: 168 | 169 | for x in my_range(5): 170 | print(x) 171 | ``` 172 | Output: 173 | ``` 174 | 0 175 | 1 176 | 2 177 | 3 178 | 4 179 | ``` 180 | 181 | Notice that instead of using the return keyword, it uses `yield`. This allows the function to return values one at a time, and start where it left off each time it’s called. This `yield` keyword is what differentiates a generator from a typical function. 182 | 183 | -------------------------------------------------------------------------------- /control_flow_lesson_25/loops.md: -------------------------------------------------------------------------------- 1 | ## Loops 2 | 3 | There are two types of loops in Python: `for` and `while`. 4 | 5 | A for loop is used to "iterate", or do something repeatedly, over an **iterable**. 6 | 7 | An **iterable** is an object that can return one of its elements at a time. This can include **sequence types**, such as strings, lists, and tuples, as well as **non-sequence types**, such as dictionaries and files. 8 | 9 | Example: 10 | ``` 11 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles'] 12 | for city in cities: 13 | print(city) 14 | print("Done!") 15 | ``` 16 | Output: 17 | ``` 18 | new york city 19 | mountain view 20 | chicago 21 | los angeles 22 | Done! 23 | ``` 24 | 25 | ## Built-in function **range()** 26 | 27 | The built-in function range() is the function to iterate over a sequence of numbers. It generates an iterator of arithmetic progressions. 28 | 29 | Example: 30 | ``` 31 | # Prints out the numbers 0,1,2,3,4 32 | for x in range(5): 33 | print(x) 34 | ``` 35 | 36 | `range()` is a built-in function used to create an iterable sequence of numbers. You will frequently use `range()` with a `for` loop to repeat an action a certain number of times, as in this example: 37 | ``` 38 | for i in range(3): 39 | print("Hello!") 40 | ``` 41 | **range(start=0, stop, step=1)** 42 | The `range()` function takes three integer arguments, the first and third of which are optional: 43 | 44 | * The 'start' argument is the first number of the sequence. If unspecified, 'start' defaults to 0. 45 | * The 'stop' argument is 1 more than the last number of the sequence. This argument must be specified. 46 | * The 'step' argument is the difference between each number in the sequence. If unspecified, 'step' defaults to 1. 47 | 48 | Notes on using `range()`: 49 | 50 | If you specify one integer inside the parentheses withrange(), it's used as the value for 'stop,' and the defaults are used for the other two. 51 | * e.g. - `range(4)` returns 0, 1, 2, 3 52 | If you specify two integers inside the parentheses withrange(), they're used for 'start' and 'stop,' and the default is used for 'step.' 53 | * e.g. - `range(2, 6)` returns 2, 3, 4, 5 54 | Or you can specify all three integers for 'start', 'stop', and 'step.' 55 | * e.g. - `range(1, 10, 2)` returns 1, 3, 5, 7, 9 56 | 57 | * e.g. - `range(0, -5)` returns [] 58 | 59 | ## Creating and Modifying Lists 60 | You can create a list by appending to a new list at each iteration of the for loop like this: 61 | 62 | Creating a new list: 63 | ``` 64 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles'] 65 | capitalized_cities = [] 66 | 67 | for city in cities: 68 | capitalized_cities.append(city.title()) 69 | ``` 70 | 71 | **Modifying** a list is a bit more involved, and requires the use of the range() function. 72 | 73 | We can use the range() function to generate the indices for each value in the cities list. This lets us access the elements of the list with cities[index] so that we can modify the values in the cities list in place. 74 | ``` 75 | cities = ['new york city', 'mountain view', 'chicago', 'los angeles'] 76 | 77 | for index in range(len(cities)): 78 | cities[index] = cities[index].title() 79 | ``` 80 | 81 | ## Iterating Through Dictionaries with For Loops 82 | 83 | When you iterate through a dictionary using a for loop, doing it the normal way (for n in some_dict) will only give you access to the keys in the dictionary - which is what you'd want in some situations. In other cases, you'd want to iterate through both the keys and values in the dictionary. Let's see how this is done in an example. Consider this dictionary that uses names of actors as keys and their characters as values. 84 | 85 | ``` 86 | cast = { 87 | "Jerry Seinfeld": "Jerry Seinfeld", 88 | "Julia Louis-Dreyfus": "Elaine Benes", 89 | "Jason Alexander": "George Costanza", 90 | "Michael Richards": "Cosmo Kramer" 91 | } 92 | for key in cast: 93 | print(key) 94 | ``` 95 | The output: 96 | ``` 97 | Jerry Seinfeld 98 | Julia Louis-Dreyfus 99 | Jason Alexander 100 | Michael Richards 101 | ``` 102 | 103 | The method ***items()** returns a list of dict's (key, value) tuple pairs. 104 | ``` 105 | cast = { 106 | "Jerry Seinfeld": "Jerry Seinfeld", 107 | "Julia Louis-Dreyfus": "Elaine Benes", 108 | "Jason Alexander": "George Costanza", 109 | "Michael Richards": "Cosmo Kramer" 110 | } 111 | 112 | for key, value in cast.items(): 113 | print("Actor: {} Role: {}".format(key, value)) 114 | ``` 115 | 116 | The output: 117 | ``` 118 | Actor: Jerry Seinfeld Role: Jerry Seinfeld 119 | Actor: Julia Louis-Dreyfus Role: Elaine Benes 120 | Actor: Jason Alexander Role: George Costanza 121 | Actor: Michael Richards Role: Cosmo Kramer 122 | ``` 123 | Example: 124 | ``` 125 | cast = { 126 | "Jerry Seinfeld": "Jerry Seinfeld", 127 | "Julia Louis-Dreyfus": "Elaine Benes", 128 | "Jason Alexander": "George Costanza", 129 | "Michael Richards": "Cosmo Kramer" 130 | } 131 | 132 | print("Iterating through keys:") 133 | for key in cast: 134 | print(key) 135 | 136 | print("\nIterating through keys and values:") 137 | for key, value in cast.items(): 138 | print("Actor: {} Role: {}".format(key, value)) 139 | ``` 140 | The output: 141 | ``` 142 | Iterating through keys: 143 | Jason Alexander 144 | Michael Richards 145 | Jerry Seinfeld 146 | Julia Louis-Dreyfus 147 | 148 | Iterating through keys and values: 149 | Actor: Jason Alexander Role: George Costanza 150 | Actor: Michael Richards Role: Cosmo Kramer 151 | Actor: Jerry Seinfeld Role: Jerry Seinfeld 152 | Actor: Julia Louis-Dreyfus Role: Elaine Benes 153 | ``` 154 | 155 | ## **zip** and **enumerate** 156 | 157 | `zip` is a built-in function, returns an iterator that combines multiple iterables into one sequence of tuples. A tuple is a sequence of values. The values can be any type and they're indexed by integers. Tuples are immutable. 158 | For example: 159 | 160 | `list(zip(['a', 'b', 'c'], [1, 2, 3]))` would output: `[('a', 1), ('b', 2), ('c', 3)]` 161 | 162 | Like we did for range() we need to convert it to a list or iterate through it with a loop to see the elements. 163 | 164 | You could unpack each tuple in a for loop like this. 165 | ``` 166 | letters = ['a', 'b', 'c'] 167 | nums = [1, 2, 3] 168 | 169 | for letter, num in zip(letters, nums): 170 | print("{}: {}".format(letter, num)) 171 | ``` 172 | 173 | To unzip a list into tuples using an asterisk: 174 | ``` 175 | some_list = [('a', 1), ('b', 2), ('c', 3)] 176 | letters, nums = zip(*some_list) 177 | ``` 178 | 179 | # enumerate 180 | 181 | `enumerate()` a built-in function, returns a list of pairs or enumerate object. The first element of each pair is an index and the second is the sequence's value at that index. 182 | 183 | Example: 184 | 185 | ``` 186 | letters = ['a', 'b', 'c', 'd', 'e'] 187 | for i, letter in enumerate(letters): 188 | print(i, letter) 189 | ``` 190 | 191 | Output: 192 | 193 | ``` 194 | 0 a 195 | 1 b 196 | 2 c 197 | 3 d 198 | 4 e 199 | ``` 200 | 201 | ## List comprehensions 202 | 203 | List comprehension is an easy way to define and create lists based on existing lists. 204 | 205 | List comprehensions can identify when it receives a string or a tuple and work on it like a list. 206 | 207 | You want to separate the letters of the word hand and add the letters as items of a list. 208 | Example with for loop: 209 | 210 | ``` 211 | h_letters = [] 212 | 213 | for letter in 'hand': 214 | h_letters.append(letter) 215 | 216 | print(h_letters) 217 | ``` 218 | 219 | List comprehensions: 220 | 221 | ``` 222 | h_letters = [ letter for letter in 'hand' ] 223 | print( h_letters) 224 | ``` 225 | **Syntax of List Comprehension** 226 | `[expression for item in list]` => `letter for letter in 'human'` 227 | 228 | 229 | ### Conditionals in List Comprehension 230 | 231 | We will create list that uses mathematical operators, integers, and range(). 232 | 233 | ``` 234 | number_list = [ x for x in range(20) if x % 2 == 0] 235 | print(number_list) 236 | ``` 237 | Output: 238 | `[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]` 239 | The list ,number_list, will be populated by the items in range from 0-19 if the item's value is divisible by 2. 240 | 241 | `squares = [x**2 for x in range(9) if x % 2 == 0]` 242 | The code above sets squares equal to the list [0, 4, 16, 36, 64], as x to the power of 2 is only evaluated if x is even. 243 | 244 | If you would like to add else, you have to move the conditionals to the beginning of the listcomp, right after the expression, like this. 245 | `squares = [x**2 if x % 2 == 0 else x + 3 for x in range(9)]` 246 | List comprehensions are not found in other languages, but are very common in python. -------------------------------------------------------------------------------- /aggregations_lesson_30/aggregations.md: -------------------------------------------------------------------------------- 1 | # Aggregations. 2 | 3 | ## NULLs 4 | 5 | `NULL`s are a datatype that specifies where no data exists in SQL. Mean no data. It's different from a zero or space (space is a value). 6 | 7 | NULLs are different than a zero (zero is a value) - they are cells where data does not exist. When identifying NULLs in a WHERE clause, we write IS NULL or IS NOT NULL. We don't use =, because NULL isn't considered a value in SQL. Rather, it is a property of the data. 8 | 9 | ## NULLs - Expert Tip 10 | There are two common ways in which you are likely to encounter NULLs: 11 | 12 | + **NULL**s frequently occur when performing a LEFT or RIGHT JOIN. When some rows in the left table of a left join are not matched with rows in the right table, those rows will contain some NULL values in the result set. 13 | 14 | + **NULL**s can also occur from simply missing data in our database. 15 | 16 | ## NULLs and COUNT 17 | 18 | `count()` function is returning of all the rows that contain some non-null data. 19 | 20 | `count()` can also be used to count the number of non-null records in an individual column or any column in a table. 21 | 22 | Notice that COUNT does not consider rows that have NULL values. Therefore, this can be useful for quickly identifying which rows have missing data. 23 | 24 | ## SUM 25 | 26 | **SUM** works similarly to **COUNT** except you'll want to specify column names rather than using star. 27 | 28 | Can't use `SUM(*)` the way you canuse `COUNT(*)`. Unlike COUNT, you can only use SUM on numeric columns. However, SUM will ignore NULL values and treat NULLs as zero! 29 | 30 | Aggregation Reminder 31 | An important thing to remember: **aggregators only aggregate vertically - the values of a column**. If you want to perform a calculation across rows, you would do this with [simple arithmetic](https://community.modeanalytics.com/sql/tutorial/sql-operators/#arithmetic-in-sql). 32 | 33 | ## MIN and MAX 34 | 35 | The syntax for MIN and MAx is similar to SUM and COUNT. MIN and MAX ignore NULL values. 36 | 37 | #####Expert Tip: 38 | functionally, MIN and MAX are similar to COUNT in that they can be used on non-numerical columns. Depending on the column type, MIN will return the lowest number, earliest date, or non-numerical value as early in the alphabet as possible. As you might suspect, MAX does the opposite—it returns the highest number, the latest date, or the non-numerical value closest alphabetically to “Z.” 39 | 40 | 41 | ## AVG 42 | 43 | `AVG` is a SQL aggregate function that calculates the average of a selected group of values. AVG has similar syntax to all of the other aggregation functions. AVG can be only used on numerical columns, it ignores nulls completely!! 44 | 45 | If you want to count NULLs as zero, you will need to use SUM and COUNT. However, this is probably not a good idea if the NULL values truly just represent unknown values for a cell. 46 | 47 | #####MEDIAN - Expert Tip 48 | One quick note that a median might be a more appropriate measure of center for this data, but finding the median happens to be a pretty difficult thing to get using SQL alone — so difficult that finding a median is occasionally asked as an interview question. 49 | 50 | ## MEDIAN 51 | 52 | "Calculates a percentile based on a continuous distribution of the column value in SQL Server. The result is interpolated and might not be equal to any of the specific values in the column." 53 | 54 | ``` 55 | PERCENTILE_CONT ( numeric_literal ) 56 | 57 | WITHIN GROUP ( ORDER BY order_by_expression [ ASC | DESC ] ) 58 | 59 | OVER ( [ ] ) 60 | ``` 61 | 62 | [Median: PERCENTILE_CONT](https://docs.microsoft.com/en-us/sql/t-sql/functions/percentile-cont-transact-sql?view=sql-server-2017) 63 | 64 | 65 | ## GROUP BY 66 | 67 | `GROUP BY` allows to take the sum of data limited to each account rather than across the enrire dataset. 68 | 69 | + **GROUP BY** can be used to aggregate data within subsets of the data. For example, grouping for different accounts, different regions, or different sales representatives. 70 | 71 | + The GROUP BY always goes between WHERE and ORDER BY 72 | 73 | + ORDER BY works like SORT in spreadsheet software 74 | 75 | + Any column in the SELECT statement that is not within an aggregator must be in the GROUP BY clause. 76 | 77 | Example: 78 | 79 | ``` 80 | SELECT account_id, 81 | SUM(standard_qty) as standard_sum, 82 | SUM(gloss_qty) as gloss_sum, 83 | SUM(poster_qty) as poster_sum 84 | FROM demo.orders 85 | GROUP BY account_id 86 | ORDER BY account_id; 87 | ``` 88 | 89 | ##### GROUP BY - Expert Tip 90 | it is worth noting that SQL evaluates the aggregations before the LIMIT clause. If you don’t group by any columns, you’ll get a 1-row result—no problem there. If you group by a column with enough unique values that it exceeds the LIMIT number, the aggregates will be calculated, and then some rows will simply be omitted from the results. 91 | 92 | This is actually a nice way to do things because you know you’re going to get the correct aggregates. If SQL cuts the table down to 100 rows, then performed the aggregations, your results would be substantially different. The above query’s results exceed 100 rows, so it’s a perfect example. 93 | 94 | You can GROUP BY multiple columns at once. This is often useful to aggregate across a number of different segments. 95 | 96 | Example: 97 | ``` 98 | SELECT account_id, 99 | channel, 100 | COUNT(id) as events 101 | FROM demo.web_events_full 102 | GROUP BY account_id, channel 103 | ORDER BY account_id, events DESC; 104 | ``` 105 | The order in the `ORDER BY` determines which column is ordered on first. 106 | You can order `DESC` for any column in `ORDER BY`. 107 | 108 | #####GROUP BY - Expert Tips 109 | + The order of column names in your `GROUP BY` clause doesn’t matter—the results will be the same regardless. If we run the same query and reverse the order in the GROUP BY clause, you can see we get the same results. 110 | 111 | 112 | + As with ORDER BY, you can substitute numbers for column names in the `GROUP BY` clause. It’s generally recommended to do this only when you’re grouping many columns, or if something else is causing the text in the GROUP BY clause to be excessively long. 113 | 114 | 115 | + A reminder here that any column that is not within an aggregation must show up in your GROUP BY statement. If you forget, you will likely get an error. 116 | 117 | 118 | ## DISTINCT 119 | 120 | If you want to group by some columns but you don't want to include any aggregations you can use `DISTINCT`. 121 | 122 | `DISTINCT` is always used in SELECT statements, and it provides the unique rows for all columns written in the SELECT statement. Therefore, you only use DISTINCT once in any particular SELECT statement. 123 | 124 | ``` 125 | SELECT DISTINCT column1, DISTINCT column2, DISTINCT column3 126 | FROM table1; 127 | ``` 128 | 129 | **DISTINCT - Expert Tip** 130 | It’s worth noting that using `DISTINCT`, particularly in aggregations, can slow your queries down quite a bit. 131 | 132 | 133 | ## HAVING 134 | **HAVING - Expert Tip** 135 | 136 | `HAVING` is the “clean” way to filter a query that has been aggregated, but this is also commonly done using a subquery. Essentially, any time you want to perform a WHERE on an element of your query that was created by an aggregate, you need to use HAVING instead. 137 | 138 | **WHERE** subsets the returned data based on a logical condition. 139 | **WHERE** appears after the FROM, JOIN, ON clauses, but before GROUP BY. 140 | **HAVING** appears after he GROUP BY clause but before the ORDER BY. 141 | **HAVING** is leki **WHERE**, but it works on logical statement involving aggregations. 142 | 143 | **Query clause order** 144 | 1. `SELECT` 145 | 2. `FROM`s 146 | 3. `WHERE` 147 | 4. `GROUP BY` 148 | 5. `HAVING` 149 | 6. `ORDER BY` 150 | 151 | 152 | ## DATE Functions 153 | 154 | GROUPing BY a date column is not usually very useful in SQL, as these columns tend to have transaction data down to a second. 155 | There are a number of built in SQL functions that are aimed at helping us improve our experience in working with dates. 156 | 157 | `DATE_TRUNC` allows you to truncate your date to a particular part of your date-time column. Common trunctions are day, month, and year. Here is a great blog post by Mode Analytics on the power of this function. 158 | 159 | `DATE_PART` can be useful for pulling a specific portion of a date, but notice pulling month or day of the week (dow) means that you are no longer keeping the years in order. Rather you are grouping for certain components regardless of which year they belonged in. 160 | 161 | You can reference the columns in your select statement in GROUP BY and ORDER BY clauses with numbers that follow the order they appear in the select statement. For example 162 | 163 | ``` 164 | SELECT standard_qty, COUNT(*) 165 | 166 | FROM orders 167 | 168 | GROUP BY 1 (this 1 refers to standard_qty since it is the first of the columns included in the select statement) 169 | 170 | ORDER BY 1 (this 1 refers to standard_qty since it is the first of the columns included in the select statement) 171 | ``` 172 | 173 | `DATE_PART('dow')` pulls day of the week andr returns a value from 0 to 6 (0 is Sunday, 6 is Saturday). -------------------------------------------------------------------------------- /control_flow_lesson_25/loops_quizzes.py: -------------------------------------------------------------------------------- 1 | # Quiz 1: Create Usernames 2 | 3 | #Write a for loop that iterates over the names list to create a usernames list. 4 | #To create a username for each name, make everything lowercase and replace 5 | #spaces with underscores. Running your for loop over the list. 6 | 7 | 8 | names = ["Joey Tribbiani", "Monica Geller", "Chandler Bing", "Phoebe Buffay"] 9 | usernames = [] 10 | 11 | for name in names: 12 | name = name.lower() 13 | name = name.replace(' ', '_') 14 | usernames.append(name) 15 | 16 | # or shorter variant: 17 | # usernames.append(name.lower().replace(' ', '_')) 18 | 19 | print(usernames) 20 | 21 | 22 | # Quiz 2: Modify Usernames with Range 23 | # Write a for loop that uses range() to iterate over the positions in usernames 24 | # to modify the list. Like you did in the previous quiz, change each name to be 25 | # lowercase and replace spaces with underscores. 26 | 27 | usernames = ["Joey Tribbiani", "Monica Geller", "Chandler Bing", 28 | "Phoebe Buffay"] 29 | 30 | for index in range(len(usernames)): 31 | usernames[index] = usernames[index].lower().replace(' ', '_') 32 | print(usernames) 33 | 34 | 35 | # Quiz 3: Tag Counter 36 | # Write a for loop that iterates over a list of strings, tokens, and counts how 37 | # many of them are XML tags. XML is a data language similar to HTML. You can 38 | # tell if a string is an XML tag if it begins with a left angle bracket "<" and 39 | # ends with a right angle bracket ">". Keep track of the number of tags using 40 | # the variable count. 41 | 42 | tokens = ['', 'Hello World!', ''] 43 | count = 0 44 | 45 | for token in tokens: 46 | if token[0] == '<' and token[-1] == '>': 47 | count = count + 1 48 | 49 | print(count) 50 | 51 | 52 | # Quiz 4: Create an HTML List 53 | # Write some code, including a for loop, that iterates over a list of strings 54 | # and creates a single string, html_str, which is an HTML list. For example, 55 | # should output: 56 | # 60 | 61 | items = ['first string', 'second string'] 62 | html_str = "" 70 | 71 | print(html_str) 72 | 73 | 74 | # Quiz 5: Lower 75 | # If you want to create a new list called lower_colors, where each color 76 | # in colors is lower cased, which code would do this? 77 | 78 | colors = ['Red', 'Blue', 'Green', 'Purple'] 79 | lower_colors = [] 80 | 81 | for color in colors: 82 | lower_colors.append(color.lower()) 83 | 84 | print(lower_colors) 85 | 86 | 87 | # Quizzes: Iterating Through Dictionaries 88 | 89 | # Quiz 1: Fruit Basket - Task 1 90 | """ 91 | You would like to count the number of fruits in your basket. In order to do 92 | this, you have the following dictionary and list of fruits. Use the dictionary 93 | and list to count the total number of fruits, but you do not want to count the 94 | other items in your basket. 95 | """ 96 | 97 | result = 0 98 | basket_items = {'apples': 4, 'oranges': 19, 'kites': 3, 'sandwiches': 8} 99 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas'] 100 | 101 | #Iterate through the dictionary 102 | for key, value in basket_items.items(): 103 | for item in fruits: 104 | #if the key is in the list of fruits, add the value (number of fruits) 105 | #to result 106 | if item == key: 107 | result = result + value 108 | 109 | print(result) 110 | 111 | 112 | # Quiz: Fruit Basket - Task 2 113 | """ 114 | If your solution is robust, you should be able to use it with any dictionary of 115 | items to count the number of fruits in the basket. Try the loop for each of 116 | the dictionaries below to make sure it always works. 117 | """ 118 | 119 | #Example 1 120 | 121 | result = 0 122 | basket_items = {'pears': 5, 'grapes': 19, 'kites': 3, 'sandwiches': 8, 'bananas': 4} 123 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas'] 124 | 125 | # Your previous solution here 126 | 127 | for key, value in basket_items.items(): 128 | for item in fruits: 129 | 130 | #if the key is in the list of fruits, add the value (number of fruits) 131 | #to result 132 | if item == key: 133 | result = result + value 134 | 135 | print(result) 136 | 137 | #Example 2 138 | 139 | result = 0 140 | basket_items = {'peaches': 5, 'lettuce': 2, 'kites': 3, 'sandwiches': 8, 'pears': 4} 141 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas'] 142 | 143 | # Your previous solution here 144 | 145 | for key, value in basket_items.items(): 146 | for item in fruits: 147 | 148 | #if the key is in the list of fruits, add the value (number of fruits) 149 | #to result 150 | if item == key: 151 | result = result + value 152 | 153 | print(result) 154 | 155 | #Example 3 156 | 157 | result = 0 158 | basket_items = {'lettuce': 2, 'kites': 3, 'sandwiches': 8, 'pears': 4, 'bears': 10} 159 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas'] 160 | 161 | # Your previous solution here 162 | 163 | for key, value in basket_items.items(): 164 | for item in fruits: 165 | #if the key is in the list of fruits, add the value (number of fruits) 166 | #to result 167 | if item == key: 168 | result = result + value 169 | 170 | print("I count {} fruits in the busket".format(result) 171 | 172 | 173 | # Quiz: Fruit Basket - Task 3 174 | 175 | # You would like to count the number of fruits in your basket. 176 | # In order to do this, you have the following dictionary and list of 177 | # fruits. Use the dictionary and list to count the total number 178 | # of fruits and not_fruits. 179 | 180 | fruit_count, not_fruit_count = 0, 0 181 | basket_items = {'apples': 4, 'oranges': 19, 'kites': 3, 'sandwiches': 8} 182 | fruits = ['apples', 'oranges', 'pears', 'peaches', 'grapes', 'bananas'] 183 | 184 | #Iterate through the dictionary 185 | for key, value in basket_items.items(): 186 | 187 | #if the key is in the list of fruits, add to fruit_count. 188 | if key in fruits: 189 | fruit_count = fruit_count + value 190 | 191 | #if the key is not in the list, then add to the not_fruit_count 192 | else: 193 | not_fruit_count = not_fruit_count + value 194 | 195 | print("There are {} fruits and {} not fruits".format(fruit_count, not_fruit_count)) 196 | 197 | # Quiz: Break the String 198 | # 199 | # Write a loop with a break statement to create a string, news_ticker, that 200 | # is exactly 140 characters long. You should create the news ticker by adding 201 | # headlines from the headlines list, inserting a space in between each headline. 202 | 203 | headlines = ["Local Bear Eaten by Man", 204 | "Legislature Announces New Laws", 205 | "Peasant Discovers Violence Inherent in System", 206 | "Cat Rescues Fireman Stuck in Tree", 207 | "Brave Knight Runs Away", 208 | "Papperbok Review: Totally Triffic"] 209 | 210 | news_ticker = "" 211 | 212 | headlines = " ".join(headlines) 213 | 214 | for letter in headlines: 215 | news_ticker = news_ticker + letter 216 | if len(news_ticker) == 140: 217 | break 218 | 219 | print(news_ticker) 220 | 221 | # Udacity solution 222 | 223 | headlines = ["Local Bear Eaten by Man", 224 | "Legislature Announces New Laws", 225 | "Peasant Discovers Violence Inherent in System", 226 | "Cat Rescues Fireman Stuck in Tree", 227 | "Brave Knight Runs Away", 228 | "Papperbok Review: Totally Triffic"] 229 | 230 | news_ticker = "" 231 | for headline in headlines: 232 | news_ticker += headline + " " 233 | if len(news_ticker) >= 140: 234 | news_ticker = news_ticker[:140] 235 | break 236 | 237 | print(news_ticker) 238 | 239 | 240 | # Quiz 1: zip() and enumerate() 241 | # 242 | # Zip Coordinates 243 | 244 | 245 | x_coord = [23, 53, 2, -12, 95, 103, 14, -5] 246 | y_coord = [677, 233, 405, 433, 905, 376, 432, 445] 247 | z_coord = [4, 16, -6, -42, 3, -6, 23, -1] 248 | labels = ["F", "J", "A", "Q", "Y", "B", "W", "X"] 249 | 250 | points = [] 251 | 252 | for num_x, num_y, num_z, letter in zip(x_coord, y_coord, z_coord, labels): 253 | points.append("{}: {}, {}, {}".format(letter, num_x, num_y, num_z)) 254 | 255 | print(points) 256 | 257 | 258 | # Udacity solution: 259 | x_coord = [23, 53, 2, -12, 95, 103, 14, -5] 260 | y_coord = [677, 233, 405, 433, 905, 376, 432, 445] 261 | z_coord = [4, 16, -6, -42, 3, -6, 23, -1] 262 | labels = ["F", "J", "A", "Q", "Y", "B", "W", "X"] 263 | 264 | points = [] 265 | for point in zip(labels, x_coord, y_coord, z_coord): 266 | points.append("{}: {}, {}, {}".format(*point)) 267 | 268 | for point in points: 269 | print(point) 270 | 271 | 272 | # Quiz 2: zip() and enumerate() 273 | # 274 | # Zip Lists to a Dictionary 275 | 276 | cast_names = ["Barney", "Robin", "Ted", "Lily", "Marshall"] 277 | cast_heights = [72, 68, 72, 66, 76] 278 | 279 | cast = dict(zip(cast_names, cast_heights)) 280 | 281 | print(cast) 282 | 283 | 284 | # Quiz 3: unzip 285 | # 286 | # Unzip the cast tuple into two names and heights tuples. 287 | 288 | 289 | cast = (("Barney", 72), ("Robin", 68), ("Ted", 72), ("Lily", 66), ("Marshall", 76)) 290 | 291 | # define names and heights here 292 | 293 | names, heights = zip(*cast) 294 | 295 | print(names) 296 | print(heights) 297 | 298 | 299 | # Quiz 4: zip() and enumerate() 300 | # 301 | # Quiz: Transpose with Zip 302 | # Use zip to transpose data from a 4-by-3 matrix to a 3-by-4 matrix 303 | 304 | data = ((0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)) 305 | 306 | data_transpose = tuple(zip(*data)) 307 | 308 | print(data_transpose) 309 | 310 | 311 | # Quiz 5: Quiz: Enumerate 312 | # 313 | # Use enumerate to modify the cast list so that each element contains the name 314 | # followed by the character's corresponding height. For example, the first 315 | # element of cast should change from "Barney Stinson" to "Barney Stinson 72". 316 | 317 | cast = ["Barney Stinson", "Robin Scherbatsky", "Ted Mosby", "Lily Aldrin", "Marshall Eriksen"] 318 | heights = [72, 68, 72, 66, 76] 319 | 320 | for index, height in enumerate(heights): 321 | s = "{} {}".format(cast[index], height) 322 | cast[index] = s 323 | 324 | print(cast) 325 | 326 | -------------------------------------------------------------------------------- /aggregations_lesson_30/aggregations.sql: -------------------------------------------------------------------------------- 1 | # Aggregation Questions 2 | # 3 | # 4 | # 1.Find the total amount of poster_qty paper ordered in the orders table. 5 | 6 | SELECT SUM(poster_qty) as total_poster_sales 7 | FROM orders; 8 | 9 | # 2.Find the total amount of standard_qty paper ordered in the orders table. 10 | 11 | SELECT COUNT(standard_qty) as total_standard_sales 12 | FROM orders; 13 | 14 | # 3.Find the total dollar amount of sales using the total_amt_usd in the orders table. 15 | 16 | SELECT COUNT(total_amt_usd) as total_dollar_sales 17 | FROM orders; 18 | 19 | # 4.Find the total amount spent on standard_amt_usd and gloss_amt_usd paper for each 20 | # order in the orders table. This should give a dollar amount for each order in the table. 21 | 22 | SELECT standard_amt_usd + gloss_amt_usd as total 23 | FROM orders; 24 | 25 | # 5. Find the standard_amt_usd per unit of standard_qty paper. Your solution should use 26 | # both an aggregation and a mathematical operator. 27 | 28 | SELECT SUM(standard_amt_usd)/SUM(standard_qty) as unit_price_standard_qty 29 | FROM orders; 30 | 31 | 32 | # MAX, MIN, AVG 33 | # 34 | # 35 | # 1.When was the earliest order ever placed? You only need to return the date. 36 | 37 | SELECT MIN(occurred_at) as earliest_order 38 | FROM orders; 39 | 40 | # 2.Try performing the same query as in question 1 without using an aggregation function. 41 | 42 | SELECT occurred_at as earliest_order 43 | FROM orders 44 | ORDER BY occurred_at ASC 45 | LIMIT 1; 46 | 47 | # 3.When did the most recent (latest) web_event occur? 48 | 49 | SELECT MAX(occurred_at) as latest_web_event 50 | FROM web_events; 51 | 52 | # 4.Try to perform the result of the previous query without using an aggregation function. 53 | 54 | SELECT occurred_at as latest_web_event 55 | FROM web_events 56 | ORDER BY occurred_at DESC 57 | LIMIT 1; 58 | 59 | # 5.Find the mean (AVERAGE) amount spent per order on each paper type, as well as the mean 60 | # amount of each paper type purchased per order. Your final answer should have 6 values - 61 | # one for each paper type for the average number of sales, as well as the average amount. 62 | 63 | SELECT AVG(standard_qty) as avg_standard, 64 | AVG(gloss_qty) as avg_gloss, 65 | AVG(poster_qty) as avg_poster, 66 | AVG(standard_amt_usd) as avg_standart_usd, 67 | AVG(gloss_amt_usd) as avg_gloss_usd, 68 | AVG(poster_amt_usd) as avg_poster_usd 69 | FROM orders; 70 | 71 | # 6.What is the MEDIAN total_usd 72 | # spent on all orders? 73 | 74 | /* 75 | PERCENTILE_CONT interpolates the appropriate value, whether or not it exists in the data set, 76 | while PERCENTILE_DISC always returns an actual value from the set. 77 | */ 78 | 79 | SELECT PERCENTILE_CONT(0.5) 80 | WITHIN GROUP (ORDER BY total_amt_usd) as median_total_usd 81 | FROM orders; 82 | 83 | 84 | # Udycity solution: 85 | 86 | /*Since there are 6912 orders - we want the average of the 3457 and 3456 order amounts when ordered. 87 | This is the average of 2483.16 and 2482.55. This gives the median of 2482.855. This obviously isn't 88 | an ideal way to compute. If we obtain new orders, we would have to change the limit. SQL didn't even 89 | calculate the median for us. The above used a SUBQUERY, but you could use any method to find the two 90 | necessary values, and then you just need the average of them. 91 | */ 92 | 93 | SELECT * 94 | FROM (SELECT total_amt_usd 95 | FROM orders 96 | ORDER BY total_amt_usd 97 | LIMIT 3457) AS Table1 98 | ORDER BY total_amt_usd DESC 99 | LIMIT 2; 100 | 101 | 102 | 103 | # GROUP BY 104 | 105 | # 1. 106 | # Which account (by name) placed the earliest order? Your solution should have the account name 107 | # and the date of the order. 108 | 109 | SELECT accounts.name as account_name, 110 | orders.occurred_at as order_date 111 | FROM accounts 112 | JOIN orders 113 | ON accounts.id = orders.account_id 114 | ORDER BY accounts.name 115 | LIMIT 1; 116 | 117 | # or 118 | 119 | SELECT accounts.name as account_name, 120 | MIN(orders.occurred_at) as order_date 121 | FROM orders, accounts 122 | GROUP BY accounts.name 123 | ORDER BY accounts.name 124 | LIMIT 1; 125 | 126 | # 2. 127 | # Find the total sales in usd for each account. You should include two columns - the total sales 128 | # for each company's orders in usd and the company name. 129 | 130 | 131 | SELECT accounts.name as account_name, 132 | SUM(orders.total_amt_usd) as total_sales_per_oder 133 | FROM orders 134 | JOIN accounts 135 | ON accounts.id = orders.account_id 136 | GROUP BY accounts.name; 137 | 138 | # 3. 139 | # Via what channel did the most recent (latest) web_event occur, which account was associated 140 | # with this web_event? Your query should return only three values - the date, channel, and account name. 141 | 142 | SELECT occurred_at as latest_web_events, 143 | accounts.name as account_name, 144 | web_events.channel as channel_name 145 | FROM web_events 146 | JOIN accounts 147 | ON web_events.account_id = accounts.id 148 | ORDER BY web_events.occurred_at DESC 149 | LIMIT 1; 150 | 151 | # 4. 152 | # Find the total number of times each type of channel from the web_events was used. Your final 153 | # table should have two columns - the channel and the number of times the channel was used. 154 | 155 | SELECT COUNT(occurred_at) as use_web_events, 156 | channel as channel_name 157 | FROM web_events 158 | GROUP BY web_events.channel; 159 | 160 | # 5.Who was the primary contact associated with the earliest web_event? 161 | 162 | SELECT primary_poc 163 | FROM web_events 164 | JOIN accounts 165 | ON web_events.account_id = accounts.id 166 | ORDER BY web_events.occurred_at 167 | LIMIT 1; 168 | 169 | # 6. 170 | # What was the smallest order placed by each account in terms of 171 | # total usd. Provide only two columns - the account name and the total usd. Order from smallest 172 | # dollar amounts to largest. 173 | 174 | SELECT MIN(total_amt_usd) as smallest_order, 175 | accounts.name as account_name 176 | FROM accounts 177 | JOIN orders 178 | ON orders.account_id = accounts.id 179 | GROUP BY accounts.name 180 | ORDER BY smallest_order; 181 | 182 | # 7. 183 | # Find the number of sales reps in each region. Your final table should have two columns - 184 | # the region and the number of sales_reps. Order from fewest reps to most reps. 185 | 186 | SELECT region.name as region_name, 187 | COUNT(*) as number_sales_reps 188 | FROM region 189 | JOIN sales_reps 190 | ON sales_reps.region_id = region.id 191 | GROUP BY region.name 192 | ORDER BY number_sales_reps; 193 | 194 | 195 | # GROUP BY Part 2 196 | # 197 | 198 | # 1. 199 | # For each account, determine the average amount of each type of paper they purchased 200 | # across their orders. Your result should have four columns - one for the account name and one 201 | # for the average quantity purchased for each of the paper types for each account. 202 | 203 | SELECT accounts.name as account_name, 204 | AVG(standard_qty) as avg_standard, 205 | AVG(gloss_qty) as avg_gloss, 206 | AVG(poster_qty) as avg_poster 207 | FROM accounts 208 | JOIN orders on accounts.id = orders.account_id 209 | GROUP BY account_name; 210 | 211 | # 2. 212 | # For each account, determine the average amount spent per order on each paper type. 213 | # Your result should have four columns - one for the account name and one for the average 214 | # amount spent on each paper type. 215 | 216 | SELECT accounts.name as account_name, 217 | AVG(standard_amt_usd) as avg_standard, 218 | AVG(gloss_amt_usd) as avg_gloss, 219 | AVG(poster_amt_usd) as avg_poster 220 | FROM accounts 221 | JOIN orders on accounts.id = orders.account_id 222 | GROUP BY account_name; 223 | 224 | 225 | # 3. 226 | # Determine the number of times a particular channel was used in the web_events table for each sales rep. 227 | # Your final table should have three columns - the name of the sales rep, the channel, and the number of occurrences. 228 | # Order your table with the highest number of occurrences first. 229 | 230 | SELECT sales_reps.name as name, web_events.channel as channel, COUNT(channel) as num_events 231 | 232 | FROM sales_reps 233 | JOIN accounts on sales_reps.id = accounts.sales_rep_id 234 | JOIN web_events on accounts.id = web_events.account_id 235 | GROUP BY sales_reps.name, web_events.channel 236 | ORDER BY num_events DESC; 237 | 238 | # or 239 | 240 | SELECT s.name, w.channel, COUNT(*) num_events 241 | FROM accounts a 242 | JOIN web_events w 243 | ON a.id = w.account_id 244 | JOIN sales_reps s 245 | ON s.id = a.sales_rep_id 246 | GROUP BY s.name, w.channel 247 | ORDER BY num_events DESC; 248 | 249 | # 4. 250 | # Determine the number of times a particular channel was used in the web_events table for each region. Your final 251 | # table should have three columns - the region name, the channel, and the number of occurrences. Order your table 252 | # with the highest number of occurrences first. 253 | 254 | SELECT COUNT(web_events.channel) as num_occurences, region.name as name, web_events.channel as channel 255 | FROM web_events 256 | JOIN accounts on web_events.account_id = accounts.id 257 | JOIN sales_reps on accounts.sales_rep_id = sales_reps.id 258 | JOIN region on sales_reps.region_id = region.id 259 | GROUP BY region.name, web_events.channel 260 | ORDER BY num_occurences DESC; 261 | 262 | 263 | # DISTINCT 264 | # 265 | # 1.Use DISTINCT to test if there are any accounts associated with more than one region. 266 | 267 | SELECT DISTINCT id, name 268 | FROM accounts; 269 | 270 | # Solution with JOIN 271 | 272 | SELECT a.name AS account_name,r.name AS region_name, COUNT(r.name) 273 | FROM accounts a 274 | JOIN sales_reps s 275 | ON a.sales_rep_id=s.id 276 | JOIN region r 277 | ON s.region_id=r.id 278 | GROUP BY a.name, r.name 279 | Order by a.name; 280 | 281 | # Solution with COUNT and DISTINCT to count unique and all data 282 | SELECT COUNT(region.id) as all_records, COUNT(DISTINCT region_id) as unique_records 283 | FROM sales_reps, region; 284 | 285 | # Udacity solution 286 | # If each account was associated with more than one region, the first query should 287 | # have returned more rows than the second query. 288 | 289 | SELECT a.id as "account id", r.id as "region id", 290 | a.name as "account name", r.name as "region name" 291 | FROM accounts a 292 | JOIN sales_reps s 293 | ON s.id = a.sales_rep_id 294 | JOIN region r 295 | ON r.id = s.region_id; 296 | 297 | #and 298 | SELECT DISTINCT id, name 299 | FROM accounts; 300 | 301 | # 2. Have many sales reps worked on more than one account? 302 | 303 | SELECT DISTINCT id, name 304 | FROM sales_reps; 305 | 306 | # or 307 | 308 | SELECT sales_reps.name, COUNT(*) num_accounts, 309 | sales_reps.id 310 | FROM accounts 311 | JOIN sales_reps 312 | ON sales_reps.id = accounts.sales_rep_id 313 | GROUP BY sales_reps.id, sales_reps.name 314 | ORDER BY num_accounts; 315 | 316 | 317 | # HAVING 318 | # 1.How many of the sales reps have more than 5 accounts that they manage? 319 | # 320 | 321 | SELECT s.id, s.name, COUNT(*) num_accounts 322 | FROM sales_reps s 323 | JOIN accounts a on s.id = a.sales_rep_id 324 | GROUP BY s.id, s.name 325 | HAVING COUNT(*) > 5 326 | ORDER BY num_accounts; 327 | 328 | # Using SUBQUERY 329 | 330 | SELECT COUNT(*) num_reps_above5 331 | FROM(SELECT s.id, s.name, COUNT(*) num_accounts 332 | FROM accounts a 333 | JOIN sales_reps s 334 | ON s.id = a.sales_rep_id 335 | GROUP BY s.id, s.name 336 | HAVING COUNT(*) > 5 337 | ORDER BY num_accounts) AS Table1; 338 | 339 | 340 | # 2. How many accounts have more than 20 orders? 341 | SELECT a.id, a.name, COUNT(*) num_orders 342 | FROM accounts a 343 | JOIN orders o 344 | ON a.id = o.account_id 345 | GROUP BY a.id, a.name 346 | HAVING COUNT(*) > 20 347 | ORDER BY num_orders; 348 | 349 | # 3. Which account has the most orders? 350 | 351 | SELECT a.id, a.name, COUNT(*) num_orders 352 | FROM accounts a 353 | JOIN orders o 354 | ON a.id = o.account_id 355 | GROUP BY a.id, a.name 356 | ORDER BY num_orders DESC 357 | LIMIT 1; 358 | 359 | # 4. How many accounts spent more than 30,000 usd total across all orders? 360 | 361 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 362 | FROM accounts a 363 | JOIN orders o 364 | ON a.id = o.account_id 365 | GROUP BY a.id, a.name 366 | HAVING SUM(o.total_amt_usd) > 30000 367 | ORDER BY total_spent; 368 | 369 | # 5. How many accounts spent less than 1,000 usd total across all orders? 370 | 371 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 372 | FROM accounts a 373 | JOIN orders o 374 | ON a.id = o.account_id 375 | GROUP BY a.id, a.name 376 | HAVING SUM(o.total_amt_usd) < 1000 377 | ORDER BY total_spent; 378 | 379 | # 6. Which account has spent the most with us? 380 | 381 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 382 | FROM accounts a 383 | JOIN orders o 384 | ON a.id = o.account_id 385 | GROUP BY a.id, a.name 386 | ORDER BY total_spent DESC 387 | LIMIT 1; 388 | 389 | # 7. Which account has spent the least with us? 390 | 391 | SELECT a.id, a.name, SUM(o.total_amt_usd) total_spent 392 | FROM accounts a 393 | JOIN orders o 394 | ON a.id = o.account_id 395 | GROUP BY a.id, a.name 396 | ORDER BY total_spent 397 | LIMIT 1; 398 | 399 | # 8. Which accounts used facebook as a channel to contact customers more than 6 times? 400 | 401 | SELECT a.id, a.name, w.channel, COUNT(*) count_channel 402 | FROM accounts a 403 | JOIN web_events w ON a.id = w.account_id 404 | GROUP BY a.id, a.name, w.channel 405 | HAVING COUNT(*) > 6 406 | ORDER BY count_channel; 407 | 408 | # 9. Which account used facebook most as a channel? 409 | 410 | SELECT a.id, a.name, w.channel, COUNT(*) channel_use 411 | FROM accounts a 412 | JOIN web_events w ON a.id = w.account_id 413 | WHERE w.channel = 'facebook' 414 | GROUP BY a.id, a.name, w.channel 415 | ORDER BY channel_use DESC 416 | LIMIT 1; 417 | 418 | 419 | # 10. Which channel was most frequently used by most accounts? 420 | 421 | SELECT a.id, a.name, w.channel, COUNT(*) channel_use 422 | FROM accounts a 423 | JOIN web_events w ON a.id = w.account_id 424 | GROUP BY a.id, a.name, w.channel 425 | ORDER BY channel_use DESC 426 | LIMIT 10; --------------------------------------------------------------------------------