├── Advanced - CTEs.sql ├── Advanced - Stored Procedures.sql ├── Advanced - Temp Tables.sql ├── Advanced - Triggers and Events.sql ├── Beginner - Group By + Order By.sql ├── Beginner - Having vs Where.sql ├── Beginner - Limit and Aliasing.sql ├── Beginner - Parks_and_Rec_Create_db.sql ├── Beginner - Select Statement.sql ├── Beginner - Where Statement.sql ├── Intermediate - Case Statements.sql ├── Intermediate - Joins.sql ├── Intermediate - String Functions.sql ├── Intermediate - Subqueries.sql ├── Intermediate - Unions.sql ├── Intermediate - Window Functions.sql ├── Portfolio Project - Data Cleaning.sql ├── Portfolio Project - EDA.sql ├── README.md └── layoffs.csv /Advanced - CTEs.sql: -------------------------------------------------------------------------------- 1 | -- Using Common Table Expressions (CTE) 2 | -- A CTE allows you to define a subquery block that can be referenced within the main query. 3 | -- It is particularly useful for recursive queries or queries that require referencing a higher level 4 | -- this is something we will look at in the next lesson/ 5 | 6 | -- Let's take a look at the basics of writing a CTE: 7 | 8 | 9 | -- First, CTEs start using a "With" Keyword. Now we get to name this CTE anything we want 10 | -- Then we say as and within the parenthesis we build our subquery/table we want 11 | WITH CTE_Example AS 12 | ( 13 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary), AVG(salary) 14 | FROM employee_demographics dem 15 | JOIN employee_salary sal 16 | ON dem.employee_id = sal.employee_id 17 | GROUP BY gender 18 | ) 19 | -- directly after using it we can query the CTE 20 | SELECT * 21 | FROM CTE_Example; 22 | 23 | 24 | -- Now if I come down here, it won't work because it's not using the same syntax 25 | SELECT * 26 | FROM CTE_Example; 27 | 28 | 29 | 30 | -- Now we can use the columns within this CTE to do calculations on this data that 31 | -- we couldn't have done without it. 32 | 33 | WITH CTE_Example AS 34 | ( 35 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary) 36 | FROM employee_demographics dem 37 | JOIN employee_salary sal 38 | ON dem.employee_id = sal.employee_id 39 | GROUP BY gender 40 | ) 41 | -- notice here I have to use back ticks to specify the table names - without them it doesn't work 42 | SELECT gender, ROUND(AVG(`SUM(salary)`/`COUNT(salary)`),2) 43 | FROM CTE_Example 44 | GROUP BY gender; 45 | 46 | 47 | 48 | -- we also have the ability to create multiple CTEs with just one With Expression 49 | 50 | WITH CTE_Example AS 51 | ( 52 | SELECT employee_id, gender, birth_date 53 | FROM employee_demographics dem 54 | WHERE birth_date > '1985-01-01' 55 | ), -- just have to separate by using a comma 56 | CTE_Example2 AS 57 | ( 58 | SELECT employee_id, salary 59 | FROM parks_and_recreation.employee_salary 60 | WHERE salary >= 50000 61 | ) 62 | -- Now if we change this a bit, we can join these two CTEs together 63 | SELECT * 64 | FROM CTE_Example cte1 65 | LEFT JOIN CTE_Example2 cte2 66 | ON cte1. employee_id = cte2. employee_id; 67 | 68 | 69 | -- the last thing I wanted to show you is that we can actually make our life easier by renaming the columns in the CTE 70 | -- let's take our very first CTE we made. We had to use tick marks because of the column names 71 | 72 | -- we can rename them like this 73 | WITH CTE_Example (gender, sum_salary, min_salary, max_salary, count_salary) AS 74 | ( 75 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary) 76 | FROM employee_demographics dem 77 | JOIN employee_salary sal 78 | ON dem.employee_id = sal.employee_id 79 | GROUP BY gender 80 | ) 81 | -- notice here I have to use back ticks to specify the table names - without them it doesn't work 82 | SELECT gender, ROUND(AVG(sum_salary/count_salary),2) 83 | FROM CTE_Example 84 | GROUP BY gender; 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /Advanced - Stored Procedures.sql: -------------------------------------------------------------------------------- 1 | -- So let's look at how we can create a stored procedure 2 | 3 | -- First let's just write a super simple query 4 | SELECT * 5 | FROM employee_salary 6 | WHERE salary >= 60000; 7 | 8 | -- Now let's put this into a stored procedure. 9 | CREATE PROCEDURE large_salaries() 10 | SELECT * 11 | FROM employee_salary 12 | WHERE salary >= 60000; 13 | 14 | -- Now if we run this it will work and create the stored procedure 15 | -- we can click refresh and see that it is there 16 | 17 | -- notice it did not give us an output, that's because we 18 | 19 | -- If we want to call it and use it we can call it by saying: 20 | CALL large_salaries(); 21 | 22 | -- as you can see it ran the query inside the stored procedure we created 23 | 24 | 25 | -- Now how we have written is not actually best practice.alter 26 | -- Usually when writing a stored procedure you don't have a simple query like that. It's usually more complex 27 | 28 | -- if we tried to add another query to this stored procedure it wouldn't work. It's a separate query: 29 | CREATE PROCEDURE large_salaries2() 30 | SELECT * 31 | FROM employee_salary 32 | WHERE salary >= 60000; 33 | SELECT * 34 | FROM employee_salary 35 | WHERE salary >= 50000; 36 | 37 | 38 | -- Best practice is to use a delimiter and a Begin and End to really control what's in the stored procedure 39 | -- let's see how we can do this. 40 | -- the delimiter is what separates the queries by default, we can change this to something like two $$ 41 | -- in my career this is what I've seen a lot of people who work in SQL use so I've picked it up as well 42 | 43 | -- When we change this delimiter it now reads in everything as one whole unit or query instead of stopping 44 | -- after the first semi colon 45 | DELIMITER $$ 46 | CREATE PROCEDURE large_salaries2() 47 | BEGIN 48 | SELECT * 49 | FROM employee_salary 50 | WHERE salary >= 60000; 51 | SELECT * 52 | FROM employee_salary 53 | WHERE salary >= 50000; 54 | END $$ 55 | 56 | -- now we change the delimiter back after we use it to make it default again 57 | DELIMITER ; 58 | 59 | -- let's refresh to see the SP 60 | -- now we can run this stored procedure 61 | CALL large_salaries2(); 62 | 63 | -- as you can see we have 2 outputs which are the 2 queries we had in our stored procedure 64 | 65 | 66 | 67 | -- we can also create a stored procedure by right clicking on Stored Procedures and creating one: 68 | 69 | -- it's going to drop the procedure if it already exists. 70 | USE `parks_and_recreation`; 71 | DROP procedure IF EXISTS `large_salaries3`; 72 | -- it automatically adds the dilimiter for us 73 | DELIMITER $$ 74 | CREATE PROCEDURE large_salaries3() 75 | BEGIN 76 | SELECT * 77 | FROM employee_salary 78 | WHERE salary >= 60000; 79 | SELECT * 80 | FROM employee_salary 81 | WHERE salary >= 50000; 82 | END $$ 83 | 84 | DELIMITER ; 85 | 86 | -- and changes it back at the end 87 | 88 | -- this can be a genuinely good option to help you write your Stored Procedures faster, although either way 89 | -- works 90 | 91 | -- if we click finish you can see it is created the same and if we run it 92 | 93 | CALL large_order_totals3(); 94 | 95 | -- we get our results 96 | 97 | 98 | 99 | -- ------------------------------------------------------------------------- 100 | 101 | -- we can also add parameters 102 | USE `parks_and_recreation`; 103 | DROP procedure IF EXISTS `large_salaries3`; 104 | -- it automatically adds the dilimiter for us 105 | DELIMITER $$ 106 | CREATE PROCEDURE large_salaries3(employee_id_param INT) 107 | BEGIN 108 | SELECT * 109 | FROM employee_salary 110 | WHERE salary >= 60000 111 | AND employee_id_param = employee_id; 112 | END $$ 113 | 114 | DELIMITER ; 115 | 116 | 117 | 118 | CALL large_salaries3(1); 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /Advanced - Temp Tables.sql: -------------------------------------------------------------------------------- 1 | -- Using Temporary Tables 2 | -- Temporary tables are tables that are only visible to the session that created them. 3 | -- They can be used to store intermediate results for complex queries or to manipulate data before inserting it into a permanent table. 4 | 5 | -- There's 2 ways to create temp tables: 6 | -- 1. This is the less commonly used way - which is to build it exactly like a real table and insert data into it 7 | 8 | CREATE TEMPORARY TABLE temp_table 9 | (first_name varchar(50), 10 | last_name varchar(50), 11 | favorite_movie varchar(100) 12 | ); 13 | 14 | -- if we execute this it gets created and we can actualyl query it. 15 | 16 | SELECT * 17 | FROM temp_table; 18 | -- notice that if we refresh out tables it isn't there. It isn't an actual table. It's just a table in memory. 19 | 20 | -- now obviously it's balnk so we would need to insert data into it like this: 21 | 22 | INSERT INTO temp_table 23 | VALUES ('Alex','Freberg','Lord of the Rings: The Twin Towers'); 24 | 25 | -- now when we run it and execute it again we have our data 26 | SELECT * 27 | FROM temp_table; 28 | 29 | -- the second way is much faster and my preferred method 30 | -- 2. Build it by inserting data into it - easier and faster 31 | 32 | CREATE TEMPORARY TABLE salary_over_50k 33 | SELECT * 34 | FROM employee_salary 35 | WHERE salary > 50000; 36 | 37 | -- if we run this query we get our output 38 | SELECT * 39 | FROM temp_table_2; 40 | 41 | -- this is the primary way I've used temp tables especially if I'm just querying data and have some complex data I want to put into boxes or these temp tables to use later 42 | -- it helps me kind of categorize and separate it out 43 | 44 | -- In the next lesson we will look at the Temp Tables vs CTEs 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /Advanced - Triggers and Events.sql: -------------------------------------------------------------------------------- 1 | -- Triggers 2 | 3 | -- a Trigger is a block of code that executes automatically executes when an event takes place in a table. 4 | 5 | -- for example we have these 2 tables, invoice and payments - when a client makes a payment we want it to update the invoice field "total paid" 6 | -- to reflect that the client has indeed paid their invoice 7 | 8 | 9 | SELECT * FROM employee_salary; 10 | 11 | SELECT * FROM employee_demographics; 12 | 13 | -- so really when we get a new row or data is inserted into the payments table we want a trigger to update the correct invoice 14 | -- with the amount that was paid 15 | -- so let's write this out 16 | USE parks_and_recreation; 17 | DELIMITER $$ 18 | 19 | CREATE TRIGGER employee_insert2 20 | -- we can also do BEFORE, but for this lesson we have to do after 21 | AFTER INSERT ON employee_salary 22 | -- now this means this trigger gets activated for each row that is inserted. Some sql databses like MSSQL have batch triggers or table level triggers that 23 | -- only trigger once, but MySQL doesn't have this functionality unfortunately 24 | FOR EACH ROW 25 | 26 | -- now we can write our block of code that we want to run when this is triggered 27 | BEGIN 28 | -- we want to update our client invoices table 29 | -- and set the total paid = total_paid (if they had already made some payments) + NEW.amount_paid 30 | -- NEW says only from the new rows that were inserted. There is also OLD which is rows that were deleted or updated, but for us we want NEW 31 | INSERT INTO employee_demographics (employee_id, first_name, last_name) VALUES (NEW.employee_id,NEW.first_name,NEW.last_name); 32 | END $$ 33 | 34 | DELIMITER ; 35 | 36 | -- Now let's run it and create it 37 | 38 | 39 | -- Now that it's created let's test it out. 40 | 41 | -- Let's insert a payment into the payments table and see if it updates in the Invoice table. 42 | 43 | -- so let's put the values that we want to insert - let's pay off this invoice 3 in full 44 | INSERT INTO employee_salary (employee_id, first_name, last_name, occupation, salary, dept_id) 45 | VALUES(13, 'Jean-Ralphio', 'Saperstein', 'Entertainment 720 CEO', 1000000, NULL); 46 | -- now it was updated in the payments table and the trigger was triggered and update the corresponding values in the invoice table 47 | 48 | DELETE FROM employee_salary 49 | WHERE employee_id = 13; 50 | 51 | 52 | 53 | -- ------------------------------------------------------------------------- 54 | 55 | -- now let's look at Events 56 | 57 | -- Now I usually call these "Jobs" because I called them that for years in MSSQL, but in MySQL they're called Events 58 | 59 | -- Events are task or block of code that gets executed according to a schedule. These are fantastic for so many reasons. Importing data on a schedule. 60 | -- Scheduling reports to be exported to files and so many other things 61 | -- you can schedule all of this to happen every day, every monday, every first of the month at 10am. Really whenever you want 62 | 63 | -- This really helps with automation in MySQL 64 | 65 | -- let's say Parks and Rec has a policy that anyone over the age of 60 is immediately retired with lifetime pay 66 | -- All we have to do is delete them from the demographics table 67 | 68 | SELECT * 69 | FROM parks_and_recreation.employee_demographics; 70 | 71 | SHOW EVENTS; 72 | 73 | -- we can drop or alter these events like this: 74 | DROP EVENT IF EXISTS delete_retirees; 75 | DELIMITER $$ 76 | CREATE EVENT delete_retirees 77 | ON SCHEDULE EVERY 30 SECOND 78 | DO BEGIN 79 | DELETE 80 | FROM parks_and_recreation.employee_demographics 81 | WHERE age >= 60; 82 | END $$ 83 | 84 | 85 | -- if we run it again you can see Jerry is now fired -- or I mean retired 86 | SELECT * 87 | FROM parks_and_recreation.employee_demographics; 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /Beginner - Group By + Order By.sql: -------------------------------------------------------------------------------- 1 | -- Group By 2 | -- When you use the GROUP BY clause in a MySQL query, it groups together rows that have the same values in the specified column or columns. 3 | -- GROUP BY is going to allow us to group rows that have the same data and run aggregate functions on them 4 | 5 | SELECT * 6 | FROM employee_demographics; 7 | 8 | -- when you use group by you have to have the same columns you're grouping on in the group by statement 9 | SELECT gender 10 | FROM employee_demographics 11 | GROUP BY gender 12 | ; 13 | 14 | 15 | SELECT first_name 16 | FROM employee_demographics 17 | GROUP BY gender 18 | ; 19 | 20 | 21 | 22 | 23 | 24 | SELECT occupation 25 | FROM employee_salary 26 | GROUP BY occupation 27 | ; 28 | 29 | -- notice there is only one office manager row 30 | 31 | -- when we group by 2 columns we now have a row for both occupation and salary because salary is different 32 | SELECT occupation, salary 33 | FROM employee_salary 34 | GROUP BY occupation, salary 35 | ; 36 | 37 | -- now the most useful reason we use group by is so we can perform out aggregate functions on them 38 | SELECT gender, AVG(age) 39 | FROM employee_demographics 40 | GROUP BY gender 41 | ; 42 | 43 | SELECT gender, MIN(age), MAX(age), COUNT(age),AVG(age) 44 | FROM employee_demographics 45 | GROUP BY gender 46 | ; 47 | 48 | 49 | 50 | #10 - The ORDER BY clause: 51 | ------------------------- 52 | #The ORDER BY keyword is used to sort the result-set in ascending or descending order. 53 | 54 | #The ORDER BY keyword sorts the records in ascending order by default. To sort the records in descending order, use the DESC keyword. 55 | 56 | 57 | #So let's try it out with our customer table 58 | #First let's start simple with just ordering by one column 59 | SELECT * 60 | FROM customers 61 | ORDER BY first_name; 62 | 63 | #You can see that first name is ordered from a - z or Ascending. 64 | 65 | #We can change that by specifying DESC after it 66 | SELECT * 67 | FROM employee_demographics; 68 | 69 | -- if we use order by it goes a to z by default (ascending order) 70 | SELECT * 71 | FROM employee_demographics 72 | ORDER BY first_name; 73 | 74 | -- we can manually change the order by saying desc 75 | SELECT * 76 | FROM employee_demographics 77 | ORDER BY first_name DESC; 78 | 79 | #Now we can also do multiple columns like this: 80 | 81 | SELECT * 82 | FROM employee_demographics 83 | ORDER BY gender, age; 84 | 85 | SELECT * 86 | FROM employee_demographics 87 | ORDER BY gender DESC, age DESC; 88 | 89 | 90 | 91 | #now we don't actually have to spell out the column names. We can actually just use their column position 92 | 93 | #State is in position 8 and money is in 9, we can use those as well. 94 | SELECT * 95 | FROM employee_demographics 96 | ORDER BY 5 DESC, 4 DESC; 97 | 98 | #Now best practice is to use the column names as it's more overt and if columns are added or replaced or something in this table it will still use the right columns to order on. 99 | 100 | #So that's all there is to order by - fairly straight forward, but something I use for most queries I use in SQL 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /Beginner - Having vs Where.sql: -------------------------------------------------------------------------------- 1 | -- Having vs Where 2 | 3 | -- Both were created to filter rows of data, but they filter 2 separate things 4 | -- Where is going to filters rows based off columns of data 5 | -- Having is going to filter rows based off aggregated columns when grouped 6 | 7 | SELECT gender, AVG(age) 8 | FROM employee_demographics 9 | GROUP BY gender 10 | ; 11 | 12 | 13 | -- let's try to filter on the avg age using where 14 | 15 | SELECT gender, AVG(age) 16 | FROM employee_demographics 17 | WHERE AVG(age) > 40 18 | GROUP BY gender 19 | ; 20 | -- this doesn't work because of order of operations. On the backend Where comes before the group by. So you can't filter on data that hasn't been grouped yet 21 | -- this is why Having was created 22 | 23 | SELECT gender, AVG(age) 24 | FROM employee_demographics 25 | GROUP BY gender 26 | HAVING AVG(age) > 40 27 | ; 28 | 29 | SELECT gender, AVG(age) as AVG_age 30 | FROM employee_demographics 31 | GROUP BY gender 32 | HAVING AVG_age > 40 33 | ; 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /Beginner - Limit and Aliasing.sql: -------------------------------------------------------------------------------- 1 | -- LIMIT and ALIASING 2 | 3 | -- Limit is just going to specify how many rows you want in the output 4 | 5 | 6 | SELECT * 7 | FROM employee_demographics 8 | LIMIT 3; 9 | 10 | -- if we change something like the order or use a group by it would change the output 11 | 12 | SELECT * 13 | FROM employee_demographics 14 | ORDER BY first_name 15 | LIMIT 3; 16 | 17 | -- now there is an additional paramater in limit which we can access using a comma that specifies the starting place 18 | 19 | SELECT * 20 | FROM employee_demographics 21 | ORDER BY first_name; 22 | 23 | SELECT * 24 | FROM employee_demographics 25 | ORDER BY first_name 26 | LIMIT 3,2; 27 | 28 | -- this now says start at position 3 and take 2 rows after that 29 | -- this is not used a lot in my opinion 30 | 31 | -- you could us it if you wanted to select the third oldest person by doing this: 32 | SELECT * 33 | FROM employee_demographics 34 | ORDER BY age desc; 35 | -- we can see it's Donna - let's try to select her 36 | SELECT * 37 | FROM employee_demographics 38 | ORDER BY age desc 39 | LIMIT 2,1; 40 | 41 | 42 | -- ALIASING 43 | 44 | -- aliasing is just a way to change the name of the column (for the most part) 45 | -- it can also be used in joins, but we will look at that in the intermediate series 46 | 47 | 48 | SELECT gender, AVG(age) 49 | FROM employee_demographics 50 | GROUP BY gender 51 | ; 52 | -- we can use the keyword AS to specify we are using an Alias 53 | SELECT gender, AVG(age) AS Avg_age 54 | FROM employee_demographics 55 | GROUP BY gender 56 | ; 57 | 58 | -- although we don't actually need it, but it's more explicit which I usually like 59 | SELECT gender, AVG(age) Avg_age 60 | FROM employee_demographics 61 | GROUP BY gender 62 | ; 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /Beginner - Parks_and_Rec_Create_db.sql: -------------------------------------------------------------------------------- 1 | DROP DATABASE IF EXISTS `Parks_and_Recreation`; 2 | CREATE DATABASE `Parks_and_Recreation`; 3 | USE `Parks_and_Recreation`; 4 | 5 | 6 | 7 | 8 | 9 | 10 | CREATE TABLE employee_demographics ( 11 | employee_id INT NOT NULL, 12 | first_name VARCHAR(50), 13 | last_name VARCHAR(50), 14 | age INT, 15 | gender VARCHAR(10), 16 | birth_date DATE, 17 | PRIMARY KEY (employee_id) 18 | ); 19 | 20 | CREATE TABLE employee_salary ( 21 | employee_id INT NOT NULL, 22 | first_name VARCHAR(50) NOT NULL, 23 | last_name VARCHAR(50) NOT NULL, 24 | occupation VARCHAR(50), 25 | salary INT, 26 | dept_id INT 27 | ); 28 | 29 | 30 | INSERT INTO employee_demographics (employee_id, first_name, last_name, age, gender, birth_date) 31 | VALUES 32 | (1,'Leslie', 'Knope', 44, 'Female','1979-09-25'), 33 | (3,'Tom', 'Haverford', 36, 'Male', '1987-03-04'), 34 | (4, 'April', 'Ludgate', 29, 'Female', '1994-03-27'), 35 | (5, 'Jerry', 'Gergich', 61, 'Male', '1962-08-28'), 36 | (6, 'Donna', 'Meagle', 46, 'Female', '1977-07-30'), 37 | (7, 'Ann', 'Perkins', 35, 'Female', '1988-12-01'), 38 | (8, 'Chris', 'Traeger', 43, 'Male', '1980-11-11'), 39 | (9, 'Ben', 'Wyatt', 38, 'Male', '1985-07-26'), 40 | (10, 'Andy', 'Dwyer', 34, 'Male', '1989-03-25'), 41 | (11, 'Mark', 'Brendanawicz', 40, 'Male', '1983-06-14'), 42 | (12, 'Craig', 'Middlebrooks', 37, 'Male', '1986-07-27'); 43 | 44 | 45 | INSERT INTO employee_salary (employee_id, first_name, last_name, occupation, salary, dept_id) 46 | VALUES 47 | (1, 'Leslie', 'Knope', 'Deputy Director of Parks and Recreation', 75000,1), 48 | (2, 'Ron', 'Swanson', 'Director of Parks and Recreation', 70000,1), 49 | (3, 'Tom', 'Haverford', 'Entrepreneur', 50000,1), 50 | (4, 'April', 'Ludgate', 'Assistant to the Director of Parks and Recreation', 25000,1), 51 | (5, 'Jerry', 'Gergich', 'Office Manager', 50000,1), 52 | (6, 'Donna', 'Meagle', 'Office Manager', 60000,1), 53 | (7, 'Ann', 'Perkins', 'Nurse', 55000,4), 54 | (8, 'Chris', 'Traeger', 'City Manager', 90000,3), 55 | (9, 'Ben', 'Wyatt', 'State Auditor', 70000,6), 56 | (10, 'Andy', 'Dwyer', 'Shoe Shiner and Musician', 20000, NULL), 57 | (11, 'Mark', 'Brendanawicz', 'City Planner', 57000, 3), 58 | (12, 'Craig', 'Middlebrooks', 'Parks Director', 65000,1); 59 | 60 | 61 | 62 | CREATE TABLE parks_departments ( 63 | department_id INT NOT NULL AUTO_INCREMENT, 64 | department_name varchar(50) NOT NULL, 65 | PRIMARY KEY (department_id) 66 | ); 67 | 68 | INSERT INTO parks_departments (department_name) 69 | VALUES 70 | ('Parks and Recreation'), 71 | ('Animal Control'), 72 | ('Public Works'), 73 | ('Healthcare'), 74 | ('Library'), 75 | ('Finance'); 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /Beginner - Select Statement.sql: -------------------------------------------------------------------------------- 1 | -- SELECT STATEMENET 2 | 3 | -- the SELECT statement is used to work with columns and specify what columns you want to work see in your output. There are a few other things as well that 4 | -- we will discuss throughout this video 5 | 6 | #We can also select a specefic number of column based on our requirement. 7 | 8 | #Now remember we can just select everything by saying: 9 | SELECT * 10 | FROM parks_and_recreation.employee_demographics; 11 | 12 | 13 | #Let's try selecting a specific column 14 | SELECT first_name 15 | FROM employee_demographics; 16 | 17 | #As you can see from the output, we only have the one column here now and don't see the others 18 | 19 | #Now let's add some more columns, we just need to separate the columns with columns 20 | SELECT first_name, last_name 21 | FROM employee_demographics; 22 | 23 | #Now the order doesn't normall matter when selecting your columns. 24 | #There are some use cases we will look at in later modules where the order of the column 25 | #Names in the select statement will matter, but for this you can put them in any order 26 | 27 | SELECT last_name, first_name, gender, age 28 | FROM employee_demographics; 29 | 30 | #You'll also often see SQL queries formatted like this. 31 | SELECT last_name, 32 | first_name, 33 | gender, 34 | age 35 | FROM employee_demographics; 36 | 37 | #The query still runs the exact same, but it is easier to read and pick out the columns 38 | #being selected and what you're doing with them. 39 | 40 | #For example let's take a look at using a calculation in the select statement 41 | 42 | #You can see here we have the total_money_spent - we can perform calculations on this 43 | SELECT first_name, 44 | last_name, 45 | total_money_spent, 46 | total_money_spent + 100 47 | FROM customers; 48 | 49 | #See how it's pretty easy to read and to see which columns we are using. 50 | 51 | #Math in SQL does follow PEMDAS which stands for Parenthesis, Exponent, Multiplication, 52 | #Division, Addition, subtraction - it's the order of operation for math 53 | 54 | #For example - What will the output be?: 55 | SELECT first_name, 56 | last_name, 57 | salary, 58 | salary + 100 59 | FROM employee_salary; 60 | #This is going to do 10* 100 which is 1000 and then adds the original 540 61 | 62 | #Now what will the output be when we do this? 63 | SELECT first_name, 64 | last_name, 65 | salary, 66 | (salary + 100) * 10 67 | FROM employee_salary; 68 | 69 | 70 | # Pemdas 71 | 72 | #One thing I wanted to show you about the select statement in this lesson is the DISTINCT Statement - this will return only unique values in 73 | #The output - and you won't have any duplicates 74 | 75 | SELECT department_id 76 | FROM employee_salary; 77 | 78 | SELECT DISTINCT department_id 79 | FROM employee_salary; 80 | 81 | #Now a lot happens in the select statement. We have an entire module dedicated to just the 82 | #select statement so this is kind of just an introduction to the select statement. -------------------------------------------------------------------------------- /Beginner - Where Statement.sql: -------------------------------------------------------------------------------- 1 | #WHERE Clause: 2 | #------------- 3 | #The WHERE clause is used to filter records (rows of data) 4 | 5 | #It's going to extract only those records that fulfill a specified condition. 6 | 7 | # So basically if we say "Where name is = 'Alex' - only rows were the name = 'Alex' will return 8 | # So this is only effecting the rows, not the columns 9 | 10 | 11 | #Let's take a look at how this looks 12 | SELECT * 13 | FROM employee_salary 14 | WHERE salary > 50000; 15 | 16 | SELECT * 17 | FROM employee_salary 18 | WHERE salary >= 50000; 19 | 20 | SELECT * 21 | FROM employee_demographics 22 | WHERE gender = 'Female'; 23 | 24 | 25 | #We can also return rows that do have not "Scranton" 26 | SELECT * 27 | FROM employee_demographics 28 | WHERE gender != 'Female'; 29 | 30 | 31 | #We can use WHERE clause with date value also 32 | SELECT * 33 | FROM employee_demographics 34 | WHERE birth_date > '1985-01-01'; 35 | 36 | -- Here '1990-01-01' is the default data formate in MySQL. 37 | -- There are other date formats as well that we will talk about in a later lesson. 38 | 39 | 40 | # LIKE STATEMENT 41 | 42 | -- two special characters a % and a _ 43 | 44 | -- % means anything 45 | SELECT * 46 | FROM employee_demographics 47 | WHERE first_name LIKE 'a%'; 48 | 49 | -- _ means a specific value 50 | SELECT * 51 | FROM employee_demographics 52 | WHERE first_name LIKE 'a__'; 53 | 54 | 55 | SELECT * 56 | FROM employee_demographics 57 | WHERE first_name LIKE 'a___%'; 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /Intermediate - Case Statements.sql: -------------------------------------------------------------------------------- 1 | -- Case Statements 2 | 3 | -- A Case Statement allows you to add logic to your Select Statement, sort of like an if else statement in other programming languages or even things like Excel 4 | 5 | 6 | 7 | SELECT * 8 | FROM employee_demographics; 9 | 10 | 11 | SELECT first_name, 12 | last_name, 13 | CASE 14 | WHEN age <= 30 THEN 'Young' 15 | END 16 | FROM employee_demographics; 17 | 18 | 19 | -- 20 | 21 | SELECT first_name, 22 | last_name, 23 | CASE 24 | WHEN age <= 30 THEN 'Young' 25 | WHEN age BETWEEN 31 AND 50 THEN 'Old' 26 | WHEN age >= 50 THEN "On Death's Door" 27 | END 28 | FROM employee_demographics; 29 | 30 | -- Poor Jerry 31 | 32 | -- Now we don't just have to do simple labels like we did, we can also perform calculations 33 | 34 | -- Let's look at giving bonuses to employees 35 | 36 | SELECT * 37 | FROM employee_salary; 38 | 39 | -- Pawnee Council sent out a memo of their bonus and pay increase structure so we need to follow it 40 | -- Basically if they make less than 45k then they get a 5% raise - very generous 41 | -- if they make more than 45k they get a 7% raise 42 | -- they get a bonus of 10% if they work for the Finance Department 43 | 44 | SELECT first_name, last_name, salary, 45 | CASE 46 | WHEN salary > 45000 THEN salary + (salary * 0.05) 47 | WHEN salary < 45000 THEN salary + (salary * 0.07) 48 | END AS new_salary 49 | FROM employee_salary; 50 | 51 | -- Unfortunately Pawnee Council was extremely specific in their wording and Jerry was not included in the pay increases. Maybe Next Year. 52 | 53 | -- Now we need to also account for Bonuses, let's make a new column 54 | SELECT first_name, last_name, salary, 55 | CASE 56 | WHEN salary > 45000 THEN salary + (salary * 0.05) 57 | WHEN salary < 45000 THEN salary + (salary * 0.07) 58 | END AS new_salary, 59 | CASE 60 | WHEN dept_id = 6 THEN salary * .10 61 | END AS Bonus 62 | FROM employee_salary; 63 | 64 | -- as you can see Ben is the only one who get's a bonus 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /Intermediate - Joins.sql: -------------------------------------------------------------------------------- 1 | -- Joins 2 | 3 | -- joins allow you to combine 2 tables together (or more) if they have a common column. 4 | -- doesn't mean they need the same column name, but the data in it are the same and can be used to join the tables together 5 | -- there are several joins we will look at today, inner joins, outer joins, and self joins 6 | 7 | 8 | -- here are the first 2 tables - let's see what columns and data in the rows we have in common that we can join on 9 | SELECT * 10 | FROM employee_demographics; 11 | 12 | SELECT * 13 | FROM employee_salary; 14 | 15 | -- let's start with an inner join -- inner joins return rows that are the same in both columns 16 | 17 | -- since we have the same columns we need to specify which table they're coming from 18 | SELECT * 19 | FROM employee_demographics 20 | JOIN employee_salary 21 | ON employee_demographics.employee_id = employee_salary.employee_id; 22 | 23 | -- notice Ron Swanson isn't in the results? This is because he doesn't have an employee id in the demographics table. He refused to give his birth date or age or gender 24 | 25 | -- use aliasing! 26 | SELECT * 27 | FROM employee_demographics dem 28 | INNER JOIN employee_salary sal 29 | ON dem.employee_id = sal.employee_id; 30 | 31 | 32 | -- OUTER JOINS 33 | 34 | -- for outer joins we have a left and a right join 35 | -- a left join will take everything from the left table even if there is no match in the join, but will only return matches from the right table 36 | -- the exact opposite is true for a right join 37 | 38 | SELECT * 39 | FROM employee_salary sal 40 | LEFT JOIN employee_demographics dem 41 | ON dem.employee_id = sal.employee_id; 42 | 43 | -- so you'll notice we have everything from the left table or the salary table. Even though there is no match to ron swanson. 44 | -- Since there is not match on the right table it's just all Nulls 45 | 46 | -- if we just switch this to a right join it basically just looks like an inner join 47 | -- that's because we are taking everything from the demographics table and only matches from the left or salary table. Since they have all the matches 48 | -- it looks kind of like an inner join 49 | SELECT * 50 | FROM employee_salary sal 51 | RIGHT JOIN employee_demographics dem 52 | ON dem.employee_id = sal.employee_id; 53 | 54 | 55 | 56 | -- Self Join 57 | 58 | -- a self join is where you tie a table to itself 59 | 60 | SELECT * 61 | FROM employee_salary; 62 | 63 | -- what we could do is a secret santa so the person with the higher ID is the person's secret santa 64 | 65 | 66 | SELECT * 67 | FROM employee_salary emp1 68 | JOIN employee_salary emp2 69 | ON emp1.employee_id = emp2.employee_id 70 | ; 71 | 72 | -- now let's change it to give them their secret santa 73 | SELECT * 74 | FROM employee_salary emp1 75 | JOIN employee_salary emp2 76 | ON emp1.employee_id + 1 = emp2.employee_id 77 | ; 78 | 79 | 80 | 81 | SELECT emp1.employee_id as emp_santa, emp1.first_name as santa_first_name, emp1.last_name as santa_last_name, emp2.employee_id, emp2.first_name, emp2.last_name 82 | FROM employee_salary emp1 83 | JOIN employee_salary emp2 84 | ON emp1.employee_id + 1 = emp2.employee_id 85 | ; 86 | 87 | -- So leslie is Ron's secret santa and so on -- Mark Brandanowitz didn't get a secret santa, but he doesn't deserve one because he broke Ann's heart so it's all good 88 | 89 | 90 | 91 | 92 | 93 | 94 | -- Joining multiple tables 95 | 96 | -- now we have on other table we can join - let's take a look at it 97 | SELECT * 98 | FROM parks_and_recreation.parks_departments; 99 | 100 | 101 | SELECT * 102 | FROM employee_demographics dem 103 | INNER JOIN employee_salary sal 104 | ON dem.employee_id = sal.employee_id 105 | JOIN parks_departments dept 106 | ON dept.department_id = sal.dept_id; 107 | 108 | -- now notice when we did that, since it's an inner join it got rid of andy because he wasn't a part of any department 109 | 110 | -- if we do a left join we would still include him because we are taking everything from the left table which is the salary table in this instance 111 | SELECT * 112 | FROM employee_demographics dem 113 | INNER JOIN employee_salary sal 114 | ON dem.employee_id = sal.employee_id 115 | LEFT JOIN parks_departments dept 116 | ON dept.department_id = sal.dept_id; 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /Intermediate - String Functions.sql: -------------------------------------------------------------------------------- 1 | #Now let's look at string functions. These help us change and look at strings differently. 2 | 3 | SELECT * 4 | FROM bakery.customers; 5 | 6 | 7 | #Length will give us the length of each value 8 | SELECT LENGTH('sky'); 9 | 10 | #Now we can see the length of each name 11 | SELECT first_name, LENGTH(first_name) 12 | FROM employee_demographics; 13 | 14 | #Upper will change all the string characters to upper case 15 | SELECT UPPER('sky'); 16 | 17 | SELECT first_name, UPPER(first_name) 18 | FROM employee_demographics; 19 | 20 | #lower will change all the string characters to lower case 21 | SELECT LOWER('sky'); 22 | 23 | SELECT first_name, LOWER(first_name) 24 | FROM employee_demographics; 25 | 26 | #Now if you have values that have white space on the front or end, we can get rid of that white space using TRIM 27 | SELECT TRIM('sky' ); 28 | 29 | #Now if we have white space in the middle it doesn't work 30 | SELECT LTRIM(' I love SQL'); 31 | 32 | #There's also L trim for trimming just the left side 33 | SELECT LTRIM(' I love SQL'); 34 | 35 | 36 | #There's also R trim for trimming just the Right side 37 | SELECT RTRIM('I love SQL '); 38 | 39 | 40 | #Now we have Left. Left is going to allow us to take a certain amount of strings from the left hand side. 41 | SELECT LEFT('Alexander', 4); 42 | 43 | SELECT first_name, LEFT(first_name,4) 44 | FROM employee_demographics; 45 | 46 | #Right is basically the opposite - taking it starting from the right side 47 | SELECT RIGHT('Alexander', 6); 48 | 49 | SELECT first_name, RIGHT(first_name,4) 50 | FROM employee_demographics; 51 | 52 | #Now let's look at substring, this one I personally love and use a lot. 53 | #Substring allows you to specify a starting point and how many characters you want so you can take characters from anywhere in the string. 54 | SELECT SUBSTRING('Alexander', 2, 3); 55 | 56 | #We could use this on phones to get the area code at the beginning. 57 | SELECT birth_date, SUBSTRING(birth_date,1,4) as birth_year 58 | FROM employee_demographics; 59 | 60 | #We can also use replace 61 | SELECT REPLACE(first_name,'a','z') 62 | FROM employee_demographics; 63 | 64 | #Next we have locate - we have 2 arguments we can use here: we can specify what we are searching for and where to search 65 | #It will return the position of that character in the string. 66 | SELECT LOCATE('x', 'Alexander'); 67 | 68 | #Now Alexander has 2 e's - what will happen if we try to locate it 69 | SELECT LOCATE('e', 'Alexander'); 70 | #It will return the location of just the first position. 71 | 72 | #Let's try it on our first name 73 | SELECT first_name, LOCATE('a',first_name) 74 | FROM employee_demographics; 75 | 76 | #You can also locate longer strings 77 | SELECT first_name, LOCATE('Mic',first_name) 78 | FROM employee_demographics; 79 | 80 | #Now let's look at concatenate - it will combine the strings together 81 | SELECT CONCAT('Alex', 'Freberg'); 82 | 83 | #Here we can combine the first and the last name columns together 84 | SELECT CONCAT(first_name, ' ', last_name) AS full_name 85 | FROM employee_demographics; 86 | 87 | -------------------------------------------------------------------------------- /Intermediate - Subqueries.sql: -------------------------------------------------------------------------------- 1 | # Subqueries 2 | 3 | #So subqueries are queries within queries. Let's see how this looks. 4 | 5 | SELECT * 6 | FROM employee_demographics; 7 | 8 | 9 | #Now let's say we wanted to look at employees who actually work in the Parks and Rec Department, we could join tables together or we could use a subquery 10 | #We can do that like this: 11 | 12 | SELECT * 13 | FROM employee_demographics 14 | WHERE employee_id IN 15 | (SELECT employee_id 16 | FROM employee_salary 17 | WHERE dept_id = 1); 18 | 19 | #So we are using that subquery in the where statement and if we just highlight the subwuery and run it it's basically a list we are selecting from in the outer query 20 | 21 | SELECT * 22 | FROM employee_demographics 23 | WHERE employee_id IN 24 | (SELECT employee_id, salary 25 | FROM employee_salary 26 | WHERE dept_id = 1); 27 | 28 | # now if we try to have more than 1 column in the subquery we get an error saying the operand should contain 1 column only 29 | 30 | #We can also use subqueries in the select and the from statements - let's see how we can do this 31 | 32 | -- Let's say we want to look at the salaries and compare them to the average salary 33 | 34 | SELECT first_name, salary, AVG(salary) 35 | FROM employee_salary; 36 | -- if we run this it's not going to work, we are using columns with an aggregate function so we need to use group by 37 | -- if we do that though we don't exactly get what we want 38 | SELECT first_name, salary, AVG(salary) 39 | FROM employee_salary 40 | GROUP BY first_name, salary; 41 | 42 | -- it's giving us the average PER GROUP which we don't want 43 | -- here's a good use for a subquery 44 | 45 | SELECT first_name, 46 | salary, 47 | (SELECT AVG(salary) 48 | FROM employee_salary) 49 | FROM employee_salary; 50 | 51 | 52 | -- We can also use it in the FROM Statement 53 | -- when we use it here it's almost like we are creating a small table we are querying off of 54 | SELECT * 55 | FROM (SELECT gender, MIN(age), MAX(age), COUNT(age),AVG(age) 56 | FROM employee_demographics 57 | GROUP BY gender) 58 | ; 59 | -- now this doesn't work because we get an error saying we have to name it 60 | 61 | SELECT gender, AVG(Min_age) 62 | FROM (SELECT gender, MIN(age) Min_age, MAX(age) Max_age, COUNT(age) Count_age ,AVG(age) Avg_age 63 | FROM employee_demographics 64 | GROUP BY gender) AS Agg_Table 65 | GROUP BY gender 66 | ; 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /Intermediate - Unions.sql: -------------------------------------------------------------------------------- 1 | #UNIONS 2 | 3 | 4 | #A union is how you can combine rows together- not columns like we have been doing with joins where one column is put next to another 5 | #Joins allow you to combine the rows of data 6 | 7 | #Now you should keep it the same kind of data otherwise if you start mixing tips with first_names it would be really confusing, but you can do it. 8 | #Let's try it out and use Union to bring together some random data, then we will look at an actual use case 9 | 10 | SELECT first_name, last_name 11 | FROM employee_demographics 12 | UNION 13 | SELECT occupation, salary 14 | FROM employee_salary; 15 | 16 | #So you can see we basically combined the data together, but not side by side in different columns, but one on top of the other in the same columns 17 | #This obviously is not good since you're mixing data, but it can be done if you want. 18 | 19 | SELECT first_name, last_name 20 | FROM employee_demographics 21 | UNION 22 | SELECT first_name, last_name 23 | FROM employee_salary; 24 | 25 | -- notice it gets rid of duplicates? Union is actually shorthand for Union Distinct 26 | 27 | SELECT first_name, last_name 28 | FROM employee_demographics 29 | UNION DISTINCT 30 | SELECT first_name, last_name 31 | FROM employee_salary; 32 | 33 | -- we can use UNION ALL to show all values 34 | 35 | SELECT first_name, last_name 36 | FROM employee_demographics 37 | UNION ALL 38 | SELECT first_name, last_name 39 | FROM employee_salary; 40 | 41 | 42 | 43 | #Now Let's actually try to use UNION 44 | # The Parks department is trying to cut their budget and wants to identify older employees they can push out or high paid employees who they can reduce pay or push out 45 | -- let's create some queries to help with this 46 | 47 | SELECT first_name, last_name, 'Old' 48 | FROM employee_demographics 49 | WHERE age > 50; 50 | 51 | 52 | 53 | SELECT first_name, last_name, 'Old Lady' as Label 54 | FROM employee_demographics 55 | WHERE age > 40 AND gender = 'Female' 56 | UNION 57 | SELECT first_name, last_name, 'Old Man' 58 | FROM employee_demographics 59 | WHERE age > 40 AND gender = 'Male' 60 | UNION 61 | SELECT first_name, last_name, 'Highly Paid Employee' 62 | FROM employee_salary 63 | WHERE salary >= 70000 64 | ORDER BY first_name 65 | ; 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /Intermediate - Window Functions.sql: -------------------------------------------------------------------------------- 1 | -- Window Functions 2 | 3 | -- windows functions are really powerful and are somewhat like a group by - except they don't roll everything up into 1 row when grouping. 4 | -- windows functions allow us to look at a partition or a group, but they each keep their own unique rows in the output 5 | -- we will also look at things like Row Numbers, rank, and dense rank 6 | 7 | SELECT * 8 | FROM employee_demographics; 9 | 10 | -- first let's look at group by 11 | SELECT gender, ROUND(AVG(salary),1) 12 | FROM employee_demographics dem 13 | JOIN employee_salary sal 14 | ON dem.employee_id = sal.employee_id 15 | GROUP BY gender 16 | ; 17 | 18 | -- now let's try doing something similar with a window function 19 | 20 | SELECT dem.employee_id, dem.first_name, gender, salary, 21 | AVG(salary) OVER() 22 | FROM employee_demographics dem 23 | JOIN employee_salary sal 24 | ON dem.employee_id = sal.employee_id 25 | ; 26 | 27 | -- now we can add any columns and it works. We could get this exact same output with a subquery in the select statement, 28 | -- but window functions have a lot more functionality, let's take a look 29 | 30 | 31 | -- if we use partition it's kind of like the group by except it doesn't roll up - it just partitions or breaks based on a column when doing the calculation 32 | 33 | SELECT dem.employee_id, dem.first_name, gender, salary, 34 | AVG(salary) OVER(PARTITION BY gender) 35 | FROM employee_demographics dem 36 | JOIN employee_salary sal 37 | ON dem.employee_id = sal.employee_id 38 | ; 39 | 40 | 41 | -- now if we wanted to see what the salaries were for genders we could do that by using sum, but also we could use order by to get a rolling total 42 | 43 | SELECT dem.employee_id, dem.first_name, gender, salary, 44 | SUM(salary) OVER(PARTITION BY gender ORDER BY employee_id) 45 | FROM employee_demographics dem 46 | JOIN employee_salary sal 47 | ON dem.employee_id = sal.employee_id 48 | ; 49 | 50 | 51 | -- Let's look at row_number rank and dense rank now 52 | 53 | 54 | SELECT dem.employee_id, dem.first_name, gender, salary, 55 | ROW_NUMBER() OVER(PARTITION BY gender) 56 | FROM employee_demographics dem 57 | JOIN employee_salary sal 58 | ON dem.employee_id = sal.employee_id 59 | ; 60 | 61 | -- let's try ordering by salary so we can see the order of highest paid employees by gender 62 | SELECT dem.employee_id, dem.first_name, gender, salary, 63 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc) 64 | FROM employee_demographics dem 65 | JOIN employee_salary sal 66 | ON dem.employee_id = sal.employee_id 67 | ; 68 | 69 | -- let's compare this to rank 70 | SELECT dem.employee_id, dem.first_name, gender, salary, 71 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc) row_num, 72 | Rank() OVER(PARTITION BY gender ORDER BY salary desc) rank_1 73 | FROM employee_demographics dem 74 | JOIN employee_salary sal 75 | ON dem.employee_id = sal.employee_id 76 | ; 77 | 78 | -- notice rank repeats on tom ad jerry at 5, but then skips 6 to go to 7 -- this goes based off positional rank 79 | 80 | 81 | -- let's compare this to dense rank 82 | SELECT dem.employee_id, dem.first_name, gender, salary, 83 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc) row_num, 84 | Rank() OVER(PARTITION BY gender ORDER BY salary desc) rank_1, 85 | dense_rank() OVER(PARTITION BY gender ORDER BY salary desc) dense_rank_2 -- this is numerically ordered instead of positional like rank 86 | FROM employee_demographics dem 87 | JOIN employee_salary sal 88 | ON dem.employee_id = sal.employee_id 89 | ; 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /Portfolio Project - Data Cleaning.sql: -------------------------------------------------------------------------------- 1 | -- SQL Project - Data Cleaning 2 | 3 | -- https://www.kaggle.com/datasets/swaptr/layoffs-2022 4 | 5 | 6 | 7 | 8 | 9 | 10 | SELECT * 11 | FROM world_layoffs.layoffs; 12 | 13 | 14 | 15 | -- first thing we want to do is create a staging table. This is the one we will work in and clean the data. We want a table with the raw data in case something happens 16 | CREATE TABLE world_layoffs.layoffs_staging 17 | LIKE world_layoffs.layoffs; 18 | 19 | INSERT layoffs_staging 20 | SELECT * FROM world_layoffs.layoffs; 21 | 22 | 23 | -- now when we are data cleaning we usually follow a few steps 24 | -- 1. check for duplicates and remove any 25 | -- 2. standardize data and fix errors 26 | -- 3. Look at null values and see what 27 | -- 4. remove any columns and rows that are not necessary - few ways 28 | 29 | 30 | 31 | -- 1. Remove Duplicates 32 | 33 | # First let's check for duplicates 34 | 35 | 36 | 37 | SELECT * 38 | FROM world_layoffs.layoffs_staging 39 | ; 40 | 41 | SELECT company, industry, total_laid_off,`date`, 42 | ROW_NUMBER() OVER ( 43 | PARTITION BY company, industry, total_laid_off,`date`) AS row_num 44 | FROM 45 | world_layoffs.layoffs_staging; 46 | 47 | 48 | 49 | SELECT * 50 | FROM ( 51 | SELECT company, industry, total_laid_off,`date`, 52 | ROW_NUMBER() OVER ( 53 | PARTITION BY company, industry, total_laid_off,`date` 54 | ) AS row_num 55 | FROM 56 | world_layoffs.layoffs_staging 57 | ) duplicates 58 | WHERE 59 | row_num > 1; 60 | 61 | -- let's just look at oda to confirm 62 | SELECT * 63 | FROM world_layoffs.layoffs_staging 64 | WHERE company = 'Oda' 65 | ; 66 | -- it looks like these are all legitimate entries and shouldn't be deleted. We need to really look at every single row to be accurate 67 | 68 | -- these are our real duplicates 69 | SELECT * 70 | FROM ( 71 | SELECT company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions, 72 | ROW_NUMBER() OVER ( 73 | PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions 74 | ) AS row_num 75 | FROM 76 | world_layoffs.layoffs_staging 77 | ) duplicates 78 | WHERE 79 | row_num > 1; 80 | 81 | -- these are the ones we want to delete where the row number is > 1 or 2or greater essentially 82 | 83 | -- now you may want to write it like this: 84 | WITH DELETE_CTE AS 85 | ( 86 | SELECT * 87 | FROM ( 88 | SELECT company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions, 89 | ROW_NUMBER() OVER ( 90 | PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions 91 | ) AS row_num 92 | FROM 93 | world_layoffs.layoffs_staging 94 | ) duplicates 95 | WHERE 96 | row_num > 1 97 | ) 98 | DELETE 99 | FROM DELETE_CTE 100 | ; 101 | 102 | 103 | WITH DELETE_CTE AS ( 104 | SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, 105 | ROW_NUMBER() OVER (PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) AS row_num 106 | FROM world_layoffs.layoffs_staging 107 | ) 108 | DELETE FROM world_layoffs.layoffs_staging 109 | WHERE (company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, row_num) IN ( 110 | SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, row_num 111 | FROM DELETE_CTE 112 | ) AND row_num > 1; 113 | 114 | -- one solution, which I think is a good one. Is to create a new column and add those row numbers in. Then delete where row numbers are over 2, then delete that column 115 | -- so let's do it!! 116 | 117 | ALTER TABLE world_layoffs.layoffs_staging ADD row_num INT; 118 | 119 | 120 | SELECT * 121 | FROM world_layoffs.layoffs_staging 122 | ; 123 | 124 | CREATE TABLE `world_layoffs`.`layoffs_staging2` ( 125 | `company` text, 126 | `location`text, 127 | `industry`text, 128 | `total_laid_off` INT, 129 | `percentage_laid_off` text, 130 | `date` text, 131 | `stage`text, 132 | `country` text, 133 | `funds_raised_millions` int, 134 | row_num INT 135 | ); 136 | 137 | INSERT INTO `world_layoffs`.`layoffs_staging2` 138 | (`company`, 139 | `location`, 140 | `industry`, 141 | `total_laid_off`, 142 | `percentage_laid_off`, 143 | `date`, 144 | `stage`, 145 | `country`, 146 | `funds_raised_millions`, 147 | `row_num`) 148 | SELECT `company`, 149 | `location`, 150 | `industry`, 151 | `total_laid_off`, 152 | `percentage_laid_off`, 153 | `date`, 154 | `stage`, 155 | `country`, 156 | `funds_raised_millions`, 157 | ROW_NUMBER() OVER ( 158 | PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions 159 | ) AS row_num 160 | FROM 161 | world_layoffs.layoffs_staging; 162 | 163 | -- now that we have this we can delete rows were row_num is greater than 2 164 | 165 | DELETE FROM world_layoffs.layoffs_staging2 166 | WHERE row_num >= 2; 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | -- 2. Standardize Data 175 | 176 | SELECT * 177 | FROM world_layoffs.layoffs_staging2; 178 | 179 | -- if we look at industry it looks like we have some null and empty rows, let's take a look at these 180 | SELECT DISTINCT industry 181 | FROM world_layoffs.layoffs_staging2 182 | ORDER BY industry; 183 | 184 | SELECT * 185 | FROM world_layoffs.layoffs_staging2 186 | WHERE industry IS NULL 187 | OR industry = '' 188 | ORDER BY industry; 189 | 190 | -- let's take a look at these 191 | SELECT * 192 | FROM world_layoffs.layoffs_staging2 193 | WHERE company LIKE 'Bally%'; 194 | -- nothing wrong here 195 | SELECT * 196 | FROM world_layoffs.layoffs_staging2 197 | WHERE company LIKE 'airbnb%'; 198 | 199 | -- it looks like airbnb is a travel, but this one just isn't populated. 200 | -- I'm sure it's the same for the others. What we can do is 201 | -- write a query that if there is another row with the same company name, it will update it to the non-null industry values 202 | -- makes it easy so if there were thousands we wouldn't have to manually check them all 203 | 204 | -- we should set the blanks to nulls since those are typically easier to work with 205 | UPDATE world_layoffs.layoffs_staging2 206 | SET industry = NULL 207 | WHERE industry = ''; 208 | 209 | -- now if we check those are all null 210 | 211 | SELECT * 212 | FROM world_layoffs.layoffs_staging2 213 | WHERE industry IS NULL 214 | OR industry = '' 215 | ORDER BY industry; 216 | 217 | -- now we need to populate those nulls if possible 218 | 219 | UPDATE layoffs_staging2 t1 220 | JOIN layoffs_staging2 t2 221 | ON t1.company = t2.company 222 | SET t1.industry = t2.industry 223 | WHERE t1.industry IS NULL 224 | AND t2.industry IS NOT NULL; 225 | 226 | -- and if we check it looks like Bally's was the only one without a populated row to populate this null values 227 | SELECT * 228 | FROM world_layoffs.layoffs_staging2 229 | WHERE industry IS NULL 230 | OR industry = '' 231 | ORDER BY industry; 232 | 233 | -- --------------------------------------------------- 234 | 235 | -- I also noticed the Crypto has multiple different variations. We need to standardize that - let's say all to Crypto 236 | SELECT DISTINCT industry 237 | FROM world_layoffs.layoffs_staging2 238 | ORDER BY industry; 239 | 240 | UPDATE layoffs_staging2 241 | SET industry = 'Crypto' 242 | WHERE industry IN ('Crypto Currency', 'CryptoCurrency'); 243 | 244 | -- now that's taken care of: 245 | SELECT DISTINCT industry 246 | FROM world_layoffs.layoffs_staging2 247 | ORDER BY industry; 248 | 249 | -- -------------------------------------------------- 250 | -- we also need to look at 251 | 252 | SELECT * 253 | FROM world_layoffs.layoffs_staging2; 254 | 255 | -- everything looks good except apparently we have some "United States" and some "United States." with a period at the end. Let's standardize this. 256 | SELECT DISTINCT country 257 | FROM world_layoffs.layoffs_staging2 258 | ORDER BY country; 259 | 260 | UPDATE layoffs_staging2 261 | SET country = TRIM(TRAILING '.' FROM country); 262 | 263 | -- now if we run this again it is fixed 264 | SELECT DISTINCT country 265 | FROM world_layoffs.layoffs_staging2 266 | ORDER BY country; 267 | 268 | 269 | -- Let's also fix the date columns: 270 | SELECT * 271 | FROM world_layoffs.layoffs_staging2; 272 | 273 | -- we can use str to date to update this field 274 | UPDATE layoffs_staging2 275 | SET `date` = STR_TO_DATE(`date`, '%m/%d/%Y'); 276 | 277 | -- now we can convert the data type properly 278 | ALTER TABLE layoffs_staging2 279 | MODIFY COLUMN `date` DATE; 280 | 281 | 282 | SELECT * 283 | FROM world_layoffs.layoffs_staging2; 284 | 285 | 286 | 287 | 288 | 289 | -- 3. Look at Null Values 290 | 291 | -- the null values in total_laid_off, percentage_laid_off, and funds_raised_millions all look normal. I don't think I want to change that 292 | -- I like having them null because it makes it easier for calculations during the EDA phase 293 | 294 | -- so there isn't anything I want to change with the null values 295 | 296 | 297 | 298 | 299 | -- 4. remove any columns and rows we need to 300 | 301 | SELECT * 302 | FROM world_layoffs.layoffs_staging2 303 | WHERE total_laid_off IS NULL; 304 | 305 | 306 | SELECT * 307 | FROM world_layoffs.layoffs_staging2 308 | WHERE total_laid_off IS NULL 309 | AND percentage_laid_off IS NULL; 310 | 311 | -- Delete Useless data we can't really use 312 | DELETE FROM world_layoffs.layoffs_staging2 313 | WHERE total_laid_off IS NULL 314 | AND percentage_laid_off IS NULL; 315 | 316 | SELECT * 317 | FROM world_layoffs.layoffs_staging2; 318 | 319 | ALTER TABLE layoffs_staging2 320 | DROP COLUMN row_num; 321 | 322 | 323 | SELECT * 324 | FROM world_layoffs.layoffs_staging2; 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | -------------------------------------------------------------------------------- /Portfolio Project - EDA.sql: -------------------------------------------------------------------------------- 1 | -- EDA 2 | 3 | -- Here we are jsut going to explore the data and find trends or patterns or anything interesting like outliers 4 | 5 | -- normally when you start the EDA process you have some idea of what you're looking for 6 | 7 | -- with this info we are just going to look around and see what we find! 8 | 9 | SELECT * 10 | FROM world_layoffs.layoffs_staging2; 11 | 12 | -- EASIER QUERIES 13 | 14 | SELECT MAX(total_laid_off) 15 | FROM world_layoffs.layoffs_staging2; 16 | 17 | 18 | 19 | 20 | 21 | 22 | -- Looking at Percentage to see how big these layoffs were 23 | SELECT MAX(percentage_laid_off), MIN(percentage_laid_off) 24 | FROM world_layoffs.layoffs_staging2 25 | WHERE percentage_laid_off IS NOT NULL; 26 | 27 | -- Which companies had 1 which is basically 100 percent of they company laid off 28 | SELECT * 29 | FROM world_layoffs.layoffs_staging2 30 | WHERE percentage_laid_off = 1; 31 | -- these are mostly startups it looks like who all went out of business during this time 32 | 33 | -- if we order by funcs_raised_millions we can see how big some of these companies were 34 | SELECT * 35 | FROM world_layoffs.layoffs_staging2 36 | WHERE percentage_laid_off = 1 37 | ORDER BY funds_raised_millions DESC; 38 | -- BritishVolt looks like an EV company, Quibi! I recognize that company - wow raised like 2 billion dollars and went under - ouch 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | -- SOMEWHAT TOUGHER AND MOSTLY USING GROUP BY-------------------------------------------------------------------------------------------------- 56 | 57 | -- Companies with the biggest single Layoff 58 | 59 | SELECT company, total_laid_off 60 | FROM world_layoffs.layoffs_staging 61 | ORDER BY 2 DESC 62 | LIMIT 5; 63 | -- now that's just on a single day 64 | 65 | -- Companies with the most Total Layoffs 66 | SELECT company, SUM(total_laid_off) 67 | FROM world_layoffs.layoffs_staging2 68 | GROUP BY company 69 | ORDER BY 2 DESC 70 | LIMIT 10; 71 | 72 | 73 | 74 | -- by location 75 | SELECT location, SUM(total_laid_off) 76 | FROM world_layoffs.layoffs_staging2 77 | GROUP BY location 78 | ORDER BY 2 DESC 79 | LIMIT 10; 80 | 81 | -- this it total in the past 3 years or in the dataset 82 | 83 | SELECT country, SUM(total_laid_off) 84 | FROM world_layoffs.layoffs_staging2 85 | GROUP BY country 86 | ORDER BY 2 DESC; 87 | 88 | SELECT YEAR(date), SUM(total_laid_off) 89 | FROM world_layoffs.layoffs_staging2 90 | GROUP BY YEAR(date) 91 | ORDER BY 1 ASC; 92 | 93 | 94 | SELECT industry, SUM(total_laid_off) 95 | FROM world_layoffs.layoffs_staging2 96 | GROUP BY industry 97 | ORDER BY 2 DESC; 98 | 99 | 100 | SELECT stage, SUM(total_laid_off) 101 | FROM world_layoffs.layoffs_staging2 102 | GROUP BY stage 103 | ORDER BY 2 DESC; 104 | 105 | 106 | 107 | 108 | 109 | 110 | -- TOUGHER QUERIES------------------------------------------------------------------------------------------------------------------------------------ 111 | 112 | -- Earlier we looked at Companies with the most Layoffs. Now let's look at that per year. It's a little more difficult. 113 | -- I want to look at 114 | 115 | WITH Company_Year AS 116 | ( 117 | SELECT company, YEAR(date) AS years, SUM(total_laid_off) AS total_laid_off 118 | FROM layoffs_staging2 119 | GROUP BY company, YEAR(date) 120 | ) 121 | , Company_Year_Rank AS ( 122 | SELECT company, years, total_laid_off, DENSE_RANK() OVER (PARTITION BY years ORDER BY total_laid_off DESC) AS ranking 123 | FROM Company_Year 124 | ) 125 | SELECT company, years, total_laid_off, ranking 126 | FROM Company_Year_Rank 127 | WHERE ranking <= 3 128 | AND years IS NOT NULL 129 | ORDER BY years ASC, total_laid_off DESC; 130 | 131 | 132 | 133 | 134 | -- Rolling Total of Layoffs Per Month 135 | SELECT SUBSTRING(date,1,7) as dates, SUM(total_laid_off) AS total_laid_off 136 | FROM layoffs_staging2 137 | GROUP BY dates 138 | ORDER BY dates ASC; 139 | 140 | -- now use it in a CTE so we can query off of it 141 | WITH DATE_CTE AS 142 | ( 143 | SELECT SUBSTRING(date,1,7) as dates, SUM(total_laid_off) AS total_laid_off 144 | FROM layoffs_staging2 145 | GROUP BY dates 146 | ORDER BY dates ASC 147 | ) 148 | SELECT dates, SUM(total_laid_off) OVER (ORDER BY dates ASC) as rolling_total_layoffs 149 | FROM DATE_CTE 150 | ORDER BY dates ASC; 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MySQL-YouTube-Series --------------------------------------------------------------------------------