├── Advanced - CTEs.sql
├── Advanced - Stored Procedures.sql
├── Advanced - Temp Tables.sql
├── Advanced - Triggers and Events.sql
├── Beginner - Group By + Order By.sql
├── Beginner - Having vs Where.sql
├── Beginner - Limit and Aliasing.sql
├── Beginner - Parks_and_Rec_Create_db.sql
├── Beginner - Select Statement.sql
├── Beginner - Where Statement.sql
├── Intermediate - Case Statements.sql
├── Intermediate - Joins.sql
├── Intermediate - String Functions.sql
├── Intermediate - Subqueries.sql
├── Intermediate - Unions.sql
├── Intermediate - Window Functions.sql
├── Portfolio Project - Data Cleaning.sql
├── Portfolio Project - EDA.sql
├── README.md
└── layoffs.csv


/Advanced - CTEs.sql:
--------------------------------------------------------------------------------
  1 | -- Using Common Table Expressions (CTE)
  2 | -- A CTE allows you to define a subquery block that can be referenced within the main query. 
  3 | -- It is particularly useful for recursive queries or queries that require referencing a higher level
  4 | -- this is something we will look at in the next lesson/
  5 | 
  6 | -- Let's take a look at the basics of writing a CTE:
  7 | 
  8 | 
  9 | -- First, CTEs start using a "With" Keyword. Now we get to name this CTE anything we want
 10 | -- Then we say as and within the parenthesis we build our subquery/table we want
 11 | WITH CTE_Example AS 
 12 | (
 13 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary), AVG(salary)
 14 | FROM employee_demographics dem
 15 | JOIN employee_salary sal
 16 | 	ON dem.employee_id = sal.employee_id
 17 | GROUP BY gender
 18 | )
 19 | -- directly after using it we can query the CTE
 20 | SELECT *
 21 | FROM CTE_Example;
 22 | 
 23 | 
 24 | -- Now if I come down here, it won't work because it's not using the same syntax
 25 | SELECT *
 26 | FROM CTE_Example;
 27 | 
 28 | 
 29 | 
 30 | -- Now we can use the columns within this CTE to do calculations on this data that
 31 | -- we couldn't have done without it.
 32 | 
 33 | WITH CTE_Example AS 
 34 | (
 35 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary)
 36 | FROM employee_demographics dem
 37 | JOIN employee_salary sal
 38 | 	ON dem.employee_id = sal.employee_id
 39 | GROUP BY gender
 40 | )
 41 | -- notice here I have to use back ticks to specify the table names  - without them it doesn't work
 42 | SELECT gender, ROUND(AVG(`SUM(salary)`/`COUNT(salary)`),2)
 43 | FROM CTE_Example
 44 | GROUP BY gender;
 45 | 
 46 | 
 47 | 
 48 | -- we also have the ability to create multiple CTEs with just one With Expression
 49 | 
 50 | WITH CTE_Example AS 
 51 | (
 52 | SELECT employee_id, gender, birth_date
 53 | FROM employee_demographics dem
 54 | WHERE birth_date > '1985-01-01'
 55 | ), -- just have to separate by using a comma
 56 | CTE_Example2 AS 
 57 | (
 58 | SELECT employee_id, salary
 59 | FROM parks_and_recreation.employee_salary
 60 | WHERE salary >= 50000
 61 | )
 62 | -- Now if we change this a bit, we can join these two CTEs together
 63 | SELECT *
 64 | FROM CTE_Example cte1
 65 | LEFT JOIN CTE_Example2 cte2
 66 | 	ON cte1. employee_id = cte2. employee_id;
 67 | 
 68 | 
 69 | -- the last thing I wanted to show you is that we can actually make our life easier by renaming the columns in the CTE
 70 | -- let's take our very first CTE we made. We had to use tick marks because of the column names
 71 | 
 72 | -- we can rename them like this
 73 | WITH CTE_Example (gender, sum_salary, min_salary, max_salary, count_salary) AS 
 74 | (
 75 | SELECT gender, SUM(salary), MIN(salary), MAX(salary), COUNT(salary)
 76 | FROM employee_demographics dem
 77 | JOIN employee_salary sal
 78 | 	ON dem.employee_id = sal.employee_id
 79 | GROUP BY gender
 80 | )
 81 | -- notice here I have to use back ticks to specify the table names  - without them it doesn't work
 82 | SELECT gender, ROUND(AVG(sum_salary/count_salary),2)
 83 | FROM CTE_Example
 84 | GROUP BY gender;
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/Advanced - Stored Procedures.sql:
--------------------------------------------------------------------------------
  1 | -- So let's look at how we can create a stored procedure
  2 | 
  3 | -- First let's just write a super simple query
  4 | SELECT *
  5 | FROM employee_salary
  6 | WHERE salary >= 60000;
  7 | 
  8 | -- Now let's put this into a stored procedure.
  9 | CREATE PROCEDURE large_salaries()
 10 | SELECT *
 11 | FROM employee_salary
 12 | WHERE salary >= 60000;
 13 | 
 14 | -- Now if we run this it will work and create the stored procedure
 15 | -- we can click refresh and see that it is there
 16 | 
 17 | -- notice it did not give us an output, that's because we 
 18 | 
 19 | -- If we want to call it and use it we can call it by saying:
 20 | CALL large_salaries();
 21 | 
 22 | -- as you can see it ran the query inside the stored procedure we created
 23 | 
 24 | 
 25 | -- Now how we have written is not actually best practice.alter
 26 | -- Usually when writing a stored procedure you don't have a simple query like that. It's usually more complex
 27 | 
 28 | -- if we tried to add another query to this stored procedure it wouldn't work. It's a separate query:
 29 | CREATE PROCEDURE large_salaries2()
 30 | SELECT *
 31 | FROM employee_salary
 32 | WHERE salary >= 60000;
 33 | SELECT *
 34 | FROM employee_salary
 35 | WHERE salary >= 50000;
 36 | 
 37 | 
 38 | -- Best practice is to use a delimiter and a Begin and End to really control what's in the stored procedure
 39 | -- let's see how we can do this.
 40 | -- the delimiter is what separates the queries by default, we can change this to something like two $$
 41 | -- in my career this is what I've seen a lot of people who work in SQL use so I've picked it up as well
 42 | 
 43 | -- When we change this delimiter it now reads in everything as one whole unit or query instead of stopping
 44 | -- after the first semi colon
 45 | DELIMITER $$
 46 | CREATE PROCEDURE large_salaries2()
 47 | BEGIN
 48 | 	SELECT *
 49 | 	FROM employee_salary
 50 | 	WHERE salary >= 60000;
 51 | 	SELECT *
 52 | 	FROM employee_salary
 53 | 	WHERE salary >= 50000;
 54 | END $$
 55 | 
 56 | -- now we change the delimiter back after we use it to make it default again
 57 | DELIMITER ;
 58 | 
 59 | -- let's refresh to see the SP
 60 | -- now we can run this stored procedure
 61 | CALL large_salaries2();
 62 | 
 63 | -- as you can see we have 2 outputs which are the 2 queries we had in our stored procedure
 64 | 
 65 | 
 66 | 
 67 | -- we can also create a stored procedure by right clicking on Stored Procedures and creating one:
 68 | 
 69 | -- it's going to drop the procedure if it already exists.
 70 | USE `parks_and_recreation`;
 71 | DROP procedure IF EXISTS `large_salaries3`;
 72 | -- it automatically adds the dilimiter for us
 73 | DELIMITER $$
 74 | CREATE PROCEDURE large_salaries3()
 75 | BEGIN
 76 | 	SELECT *
 77 | 	FROM employee_salary
 78 | 	WHERE salary >= 60000;
 79 | 	SELECT *
 80 | 	FROM employee_salary
 81 | 	WHERE salary >= 50000;
 82 | END $$
 83 | 
 84 | DELIMITER ;
 85 | 
 86 | -- and changes it back at the end
 87 | 
 88 | -- this can be a genuinely good option to help you write your Stored Procedures faster, although either way
 89 | -- works
 90 | 
 91 | -- if we click finish you can see it is created the same and if we run it
 92 | 
 93 | CALL large_order_totals3();
 94 | 
 95 | -- we get our results
 96 | 
 97 | 
 98 | 
 99 | -- -------------------------------------------------------------------------
100 | 
101 | -- we can also add parameters
102 | USE `parks_and_recreation`;
103 | DROP procedure IF EXISTS `large_salaries3`;
104 | -- it automatically adds the dilimiter for us
105 | DELIMITER $$
106 | CREATE PROCEDURE large_salaries3(employee_id_param INT)
107 | BEGIN
108 | 	SELECT *
109 | 	FROM employee_salary
110 | 	WHERE salary >= 60000
111 |     AND employee_id_param = employee_id;
112 | END $$
113 | 
114 | DELIMITER ;
115 | 
116 | 
117 | 
118 | CALL large_salaries3(1);
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/Advanced - Temp Tables.sql:
--------------------------------------------------------------------------------
  1 | -- Using Temporary Tables
  2 | -- Temporary tables are tables that are only visible to the session that created them. 
  3 | -- They can be used to store intermediate results for complex queries or to manipulate data before inserting it into a permanent table.
  4 | 
  5 | -- There's 2 ways to create temp tables:
  6 | -- 1. This is the less commonly used way - which is to build it exactly like a real table and insert data into it
  7 | 
  8 | CREATE TEMPORARY TABLE temp_table
  9 | (first_name varchar(50),
 10 | last_name varchar(50),
 11 | favorite_movie varchar(100)
 12 | );
 13 | 
 14 | -- if we execute this it gets created and we can actualyl query it.
 15 | 
 16 | SELECT *
 17 | FROM temp_table;
 18 | -- notice that if we refresh out tables it isn't there. It isn't an actual table. It's just a table in memory.
 19 | 
 20 | -- now obviously it's balnk so we would need to insert data into it like this:
 21 | 
 22 | INSERT INTO temp_table
 23 | VALUES ('Alex','Freberg','Lord of the Rings: The Twin Towers');
 24 | 
 25 | -- now when we run it and execute it again we have our data
 26 | SELECT *
 27 | FROM temp_table;
 28 | 
 29 | -- the second way is much faster and my preferred method
 30 | -- 2. Build it by inserting data into it - easier and faster
 31 | 
 32 | CREATE TEMPORARY TABLE salary_over_50k
 33 | SELECT *
 34 | FROM employee_salary
 35 | WHERE salary > 50000;
 36 | 
 37 | -- if we run this query we get our output
 38 | SELECT *
 39 | FROM temp_table_2;
 40 | 
 41 | -- this is the primary way I've used temp tables especially if I'm just querying data and have some complex data I want to put into boxes or these temp tables to use later
 42 | -- it helps me kind of categorize and separate it out
 43 | 
 44 | -- In the next lesson we will look at the Temp Tables vs CTEs
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/Advanced - Triggers and Events.sql:
--------------------------------------------------------------------------------
 1 | -- Triggers
 2 | 
 3 | -- a Trigger is a block of code that executes automatically executes when an event takes place in a table.
 4 | 
 5 | -- for example we have these 2 tables, invoice and payments - when a client makes a payment we want it to update the invoice field "total paid"
 6 | -- to reflect that the client has indeed paid their invoice
 7 | 
 8 | 
 9 | SELECT * FROM employee_salary;
10 | 
11 | SELECT * FROM employee_demographics;
12 | 
13 | -- so really when we get a new row or data is inserted into the payments table we want a trigger to update the correct invoice 
14 | -- with the amount that was paid
15 | -- so let's write this out
16 | USE parks_and_recreation;
17 | DELIMITER $$
18 | 
19 | CREATE TRIGGER employee_insert2
20 |     -- we can also do BEFORE, but for this lesson we have to do after
21 | 	AFTER INSERT ON employee_salary
22 |     -- now this means this trigger gets activated for each row that is inserted. Some sql databses like MSSQL have batch triggers or table level triggers that
23 |     -- only trigger once, but MySQL doesn't have this functionality unfortunately
24 |     FOR EACH ROW
25 |     
26 |     -- now we can write our block of code that we want to run when this is triggered
27 | BEGIN
28 | -- we want to update our client invoices table
29 | -- and set the total paid = total_paid (if they had already made some payments) + NEW.amount_paid
30 | -- NEW says only from the new rows that were inserted. There is also OLD which is rows that were deleted or updated, but for us we want NEW
31 |     INSERT INTO employee_demographics (employee_id, first_name, last_name) VALUES (NEW.employee_id,NEW.first_name,NEW.last_name);
32 | END $$
33 | 
34 | DELIMITER ; 
35 | 
36 | -- Now let's run it and create it
37 | 
38 | 
39 | -- Now that it's created let's test it out.
40 | 
41 | -- Let's insert a payment into the payments table and see if it updates in the Invoice table.
42 | 
43 | -- so let's put the values that we want to insert - let's pay off this invoice 3 in full
44 | INSERT INTO employee_salary (employee_id, first_name, last_name, occupation, salary, dept_id)
45 | VALUES(13, 'Jean-Ralphio', 'Saperstein', 'Entertainment 720 CEO', 1000000, NULL);
46 | -- now it was updated in the payments table and the trigger was triggered and update the corresponding values in the invoice table
47 | 
48 | DELETE FROM employee_salary
49 | WHERE employee_id = 13;
50 | 
51 | 
52 | 
53 | -- -------------------------------------------------------------------------
54 | 
55 | -- now let's look at Events
56 | 
57 | -- Now I usually call these "Jobs" because I called them that for years in MSSQL, but in MySQL they're called Events
58 | 
59 | -- Events are task or block of code that gets executed according to a schedule. These are fantastic for so many reasons. Importing data on a schedule. 
60 | -- Scheduling reports to be exported to files and so many other things
61 | -- you can schedule all of this to happen every day, every monday, every first of the month at 10am. Really whenever you want
62 | 
63 | -- This really helps with automation in MySQL
64 | 
65 | -- let's say Parks and Rec has a policy that anyone over the age of 60 is immediately retired with lifetime pay
66 | -- All we have to do is delete them from the demographics table
67 | 
68 | SELECT * 
69 | FROM parks_and_recreation.employee_demographics;
70 | 
71 | SHOW EVENTS;
72 | 
73 | -- we can drop or alter these events like this:
74 | DROP EVENT IF EXISTS delete_retirees;
75 | DELIMITER $$
76 | CREATE EVENT delete_retirees
77 | ON SCHEDULE EVERY 30 SECOND
78 | DO BEGIN
79 | 	DELETE
80 | 	FROM parks_and_recreation.employee_demographics
81 |     WHERE age >= 60;
82 | END $$
83 | 
84 | 
85 | -- if we run it again you can see Jerry is now fired -- or I mean retired
86 | SELECT * 
87 | FROM parks_and_recreation.employee_demographics;
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/Beginner - Group By + Order By.sql:
--------------------------------------------------------------------------------
  1 | -- Group By
  2 | -- When you use the GROUP BY clause in a MySQL query, it groups together rows that have the same values in the specified column or columns.
  3 | -- GROUP BY is going to allow us to group rows that have the same data and run aggregate functions on them
  4 | 
  5 | SELECT *
  6 | FROM employee_demographics;
  7 | 
  8 | -- when you use group by  you have to have the same columns you're grouping on in the group by statement
  9 | SELECT gender
 10 | FROM employee_demographics
 11 | GROUP BY gender
 12 | ;
 13 | 
 14 | 
 15 | SELECT first_name
 16 | FROM employee_demographics
 17 | GROUP BY gender
 18 | ;
 19 | 
 20 | 
 21 | 
 22 | 
 23 | 
 24 | SELECT occupation
 25 | FROM employee_salary
 26 | GROUP BY occupation
 27 | ;
 28 | 
 29 | -- notice there is only one office manager row
 30 | 
 31 | -- when we group by 2 columns we now have a row for both occupation and salary because salary is different
 32 | SELECT occupation, salary
 33 | FROM employee_salary
 34 | GROUP BY occupation, salary
 35 | ;
 36 | 
 37 | -- now the most useful reason we use group by is so we can perform out aggregate functions on them
 38 | SELECT gender, AVG(age)
 39 | FROM employee_demographics
 40 | GROUP BY gender
 41 | ;
 42 | 
 43 | SELECT gender, MIN(age), MAX(age), COUNT(age),AVG(age)
 44 | FROM employee_demographics
 45 | GROUP BY gender
 46 | ;
 47 | 
 48 | 
 49 | 
 50 | #10 - The ORDER BY clause:
 51 | -------------------------
 52 | #The ORDER BY keyword is used to sort the result-set in ascending or descending order.
 53 | 
 54 | #The ORDER BY keyword sorts the records in ascending order by default. To sort the records in descending order, use the DESC keyword.
 55 | 
 56 | 
 57 | #So let's try it out with our customer table
 58 | #First let's start simple with just ordering by one column
 59 | SELECT *
 60 | FROM customers
 61 | ORDER BY first_name;
 62 | 
 63 | #You can see that first name is ordered from a - z or Ascending.
 64 | 
 65 | #We can change that by specifying DESC after it
 66 | SELECT *
 67 | FROM employee_demographics;
 68 | 
 69 | -- if we use order by it goes a to z by default (ascending order)
 70 | SELECT *
 71 | FROM employee_demographics
 72 | ORDER BY first_name;
 73 | 
 74 | -- we can manually change the order by saying desc
 75 | SELECT *
 76 | FROM employee_demographics
 77 | ORDER BY first_name DESC;
 78 | 
 79 | #Now we can also do multiple columns like this:
 80 | 
 81 | SELECT *
 82 | FROM employee_demographics
 83 | ORDER BY gender, age;
 84 | 
 85 | SELECT *
 86 | FROM employee_demographics
 87 | ORDER BY gender DESC, age DESC;
 88 | 
 89 | 
 90 | 
 91 | #now we don't actually have to spell out the column names. We can actually just use their column position
 92 | 
 93 | #State is in position 8 and money is in 9, we can use those as well.
 94 | SELECT *
 95 | FROM employee_demographics
 96 | ORDER BY 5 DESC, 4 DESC;
 97 | 
 98 | #Now best practice is to use the column names as it's more overt and if columns are added or replaced or something in this table it will still use the right columns to order on.
 99 | 
100 | #So that's all there is to order by - fairly straight forward, but something I use for most queries I use in SQL
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/Beginner - Having vs Where.sql:
--------------------------------------------------------------------------------
 1 | -- Having vs Where
 2 | 
 3 | -- Both were created to filter rows of data, but they filter 2 separate things
 4 | -- Where is going to filters rows based off columns of data
 5 | -- Having is going to filter rows based off aggregated columns when grouped
 6 | 
 7 | SELECT gender, AVG(age)
 8 | FROM employee_demographics
 9 | GROUP BY gender
10 | ;
11 | 
12 | 
13 | -- let's try to filter on the avg age using where
14 | 
15 | SELECT gender, AVG(age)
16 | FROM employee_demographics
17 | WHERE AVG(age) > 40
18 | GROUP BY gender
19 | ;
20 | -- this doesn't work because of order of operations. On the backend Where comes before the group by. So you can't filter on data that hasn't been grouped yet
21 | -- this is why Having was created
22 | 
23 | SELECT gender, AVG(age)
24 | FROM employee_demographics
25 | GROUP BY gender
26 | HAVING AVG(age) > 40
27 | ;
28 | 
29 | SELECT gender, AVG(age) as AVG_age
30 | FROM employee_demographics
31 | GROUP BY gender
32 | HAVING AVG_age > 40
33 | ;
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/Beginner - Limit and Aliasing.sql:
--------------------------------------------------------------------------------
 1 | -- LIMIT and ALIASING
 2 | 
 3 | -- Limit is just going to specify how many rows you want in the output
 4 | 
 5 | 
 6 | SELECT *
 7 | FROM employee_demographics
 8 | LIMIT 3;
 9 | 
10 | -- if we change something like the order or use a group by it would change the output
11 | 
12 | SELECT *
13 | FROM employee_demographics
14 | ORDER BY first_name
15 | LIMIT 3;
16 | 
17 | -- now there is an additional paramater in limit which we can access using a comma that specifies the starting place
18 | 
19 | SELECT *
20 | FROM employee_demographics
21 | ORDER BY first_name;
22 | 
23 | SELECT *
24 | FROM employee_demographics
25 | ORDER BY first_name
26 | LIMIT 3,2;
27 | 
28 | -- this now says start at position 3 and take 2 rows after that
29 | -- this is not used a lot in my opinion
30 | 
31 | -- you could us it if you wanted to select the third oldest person by doing this:
32 | SELECT *
33 | FROM employee_demographics
34 | ORDER BY age desc;
35 | -- we can see it's Donna - let's try to select her
36 | SELECT *
37 | FROM employee_demographics
38 | ORDER BY age desc
39 | LIMIT 2,1;
40 | 
41 | 
42 | -- ALIASING
43 | 
44 | -- aliasing is just a way to change the name of the column (for the most part)
45 | -- it can also be used in joins, but we will look at that in the intermediate series
46 | 
47 | 
48 | SELECT gender, AVG(age)
49 | FROM employee_demographics
50 | GROUP BY gender
51 | ;
52 | -- we can use the keyword AS to specify we are using an Alias
53 | SELECT gender, AVG(age) AS Avg_age
54 | FROM employee_demographics
55 | GROUP BY gender
56 | ;
57 | 
58 | -- although we don't actually need it, but it's more explicit which I usually like
59 | SELECT gender, AVG(age) Avg_age
60 | FROM employee_demographics
61 | GROUP BY gender
62 | ;
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/Beginner - Parks_and_Rec_Create_db.sql:
--------------------------------------------------------------------------------
 1 | DROP DATABASE IF EXISTS `Parks_and_Recreation`;
 2 | CREATE DATABASE `Parks_and_Recreation`;
 3 | USE `Parks_and_Recreation`;
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | CREATE TABLE employee_demographics (
11 |   employee_id INT NOT NULL,
12 |   first_name VARCHAR(50),
13 |   last_name VARCHAR(50),
14 |   age INT,
15 |   gender VARCHAR(10),
16 |   birth_date DATE,
17 |   PRIMARY KEY (employee_id)
18 | );
19 | 
20 | CREATE TABLE employee_salary (
21 |   employee_id INT NOT NULL,
22 |   first_name VARCHAR(50) NOT NULL,
23 |   last_name VARCHAR(50) NOT NULL,
24 |   occupation VARCHAR(50),
25 |   salary INT,
26 |   dept_id INT
27 | );
28 | 
29 | 
30 | INSERT INTO employee_demographics (employee_id, first_name, last_name, age, gender, birth_date)
31 | VALUES
32 | (1,'Leslie', 'Knope', 44, 'Female','1979-09-25'),
33 | (3,'Tom', 'Haverford', 36, 'Male', '1987-03-04'),
34 | (4, 'April', 'Ludgate', 29, 'Female', '1994-03-27'),
35 | (5, 'Jerry', 'Gergich', 61, 'Male', '1962-08-28'),
36 | (6, 'Donna', 'Meagle', 46, 'Female', '1977-07-30'),
37 | (7, 'Ann', 'Perkins', 35, 'Female', '1988-12-01'),
38 | (8, 'Chris', 'Traeger', 43, 'Male', '1980-11-11'),
39 | (9, 'Ben', 'Wyatt', 38, 'Male', '1985-07-26'),
40 | (10, 'Andy', 'Dwyer', 34, 'Male', '1989-03-25'),
41 | (11, 'Mark', 'Brendanawicz', 40, 'Male', '1983-06-14'),
42 | (12, 'Craig', 'Middlebrooks', 37, 'Male', '1986-07-27');
43 | 
44 | 
45 | INSERT INTO employee_salary (employee_id, first_name, last_name, occupation, salary, dept_id)
46 | VALUES
47 | (1, 'Leslie', 'Knope', 'Deputy Director of Parks and Recreation', 75000,1),
48 | (2, 'Ron', 'Swanson', 'Director of Parks and Recreation', 70000,1),
49 | (3, 'Tom', 'Haverford', 'Entrepreneur', 50000,1),
50 | (4, 'April', 'Ludgate', 'Assistant to the Director of Parks and Recreation', 25000,1),
51 | (5, 'Jerry', 'Gergich', 'Office Manager', 50000,1),
52 | (6, 'Donna', 'Meagle', 'Office Manager', 60000,1),
53 | (7, 'Ann', 'Perkins', 'Nurse', 55000,4),
54 | (8, 'Chris', 'Traeger', 'City Manager', 90000,3),
55 | (9, 'Ben', 'Wyatt', 'State Auditor', 70000,6),
56 | (10, 'Andy', 'Dwyer', 'Shoe Shiner and Musician', 20000, NULL),
57 | (11, 'Mark', 'Brendanawicz', 'City Planner', 57000, 3),
58 | (12, 'Craig', 'Middlebrooks', 'Parks Director', 65000,1);
59 | 
60 | 
61 | 
62 | CREATE TABLE parks_departments (
63 |   department_id INT NOT NULL AUTO_INCREMENT,
64 |   department_name varchar(50) NOT NULL,
65 |   PRIMARY KEY (department_id)
66 | );
67 | 
68 | INSERT INTO parks_departments (department_name)
69 | VALUES
70 | ('Parks and Recreation'),
71 | ('Animal Control'),
72 | ('Public Works'),
73 | ('Healthcare'),
74 | ('Library'),
75 | ('Finance');
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/Beginner - Select Statement.sql:
--------------------------------------------------------------------------------
 1 | -- SELECT STATEMENET
 2 | 
 3 | -- the SELECT statement is used to work with columns and specify what columns you want to work see in your output. There are a few other things as well that
 4 | -- we will discuss throughout this video
 5 | 
 6 | #We can also select a specefic number of column based on our requirement. 
 7 | 
 8 | #Now remember we can just select everything by saying:
 9 | SELECT * 
10 | FROM parks_and_recreation.employee_demographics;
11 | 
12 | 
13 | #Let's try selecting a specific column
14 | SELECT first_name
15 | FROM employee_demographics;
16 | 
17 | #As you can see from the output, we only have the one column here now and don't see the others
18 | 
19 | #Now let's add some more columns, we just need to separate the columns with columns
20 | SELECT first_name, last_name
21 | FROM employee_demographics;
22 | 
23 | #Now the order doesn't normall matter when selecting your columns.
24 | #There are some use cases we will look at in later modules where the order of the column
25 | #Names in the select statement will matter, but for this you can put them in any order
26 | 
27 | SELECT last_name, first_name, gender, age
28 | FROM employee_demographics;
29 | 
30 | #You'll also often see SQL queries formatted like this.
31 | SELECT last_name, 
32 | first_name, 
33 | gender, 
34 | age
35 | FROM employee_demographics;
36 | 
37 | #The query still runs the exact same, but it is easier to read and pick out the columns
38 | #being selected and what you're doing with them.
39 | 
40 | #For example let's take a look at using a calculation in the select statement
41 | 
42 | #You can see here we have the total_money_spent - we can perform calculations on this
43 | SELECT first_name,
44 |  last_name,
45 |  total_money_spent,
46 |  total_money_spent + 100
47 | FROM customers;
48 | 
49 | #See how it's pretty easy to read and to see which columns we are using.
50 | 
51 | #Math in SQL does follow PEMDAS which stands for Parenthesis, Exponent, Multiplication,
52 | #Division, Addition, subtraction - it's the order of operation for math
53 | 
54 | #For example - What will the output be?:
55 | SELECT first_name, 
56 | last_name,
57 | salary,
58 | salary + 100
59 | FROM employee_salary;
60 | #This is going to do 10* 100 which is 1000 and then adds the original 540
61 | 
62 | #Now what will the output be when we do this?
63 | SELECT first_name, 
64 | last_name,
65 | salary,
66 | (salary + 100) * 10
67 | FROM employee_salary;
68 | 
69 | 
70 | # Pemdas
71 | 
72 | #One thing I wanted to show you about the select statement in this lesson is the DISTINCT Statement - this will return only unique values in
73 | #The output - and you won't have any duplicates
74 | 
75 | SELECT department_id
76 | FROM employee_salary;
77 | 
78 | SELECT DISTINCT department_id
79 | FROM employee_salary;
80 | 
81 | #Now a lot happens in the select statement. We have an entire module dedicated to just the 
82 | #select statement so this is kind of just an introduction to the select statement.


--------------------------------------------------------------------------------
/Beginner - Where Statement.sql:
--------------------------------------------------------------------------------
 1 | #WHERE Clause:
 2 | #-------------
 3 | #The WHERE clause is used to filter records (rows of data)
 4 | 
 5 | #It's going to extract only those records that fulfill a specified condition.
 6 | 
 7 | # So basically if we say "Where name is = 'Alex' - only rows were the name = 'Alex' will return
 8 | # So this is only effecting the rows, not the columns
 9 | 
10 | 
11 | #Let's take a look at how this looks
12 | SELECT *
13 | FROM employee_salary
14 | WHERE salary > 50000;
15 | 
16 | SELECT *
17 | FROM employee_salary
18 | WHERE salary >= 50000;
19 | 
20 | SELECT *
21 | FROM employee_demographics
22 | WHERE gender = 'Female';
23 | 
24 | 
25 | #We can also return rows that do have not "Scranton"
26 | SELECT *
27 | FROM employee_demographics
28 | WHERE gender != 'Female';
29 | 
30 | 
31 | #We can use WHERE clause with date value also
32 | SELECT *
33 | FROM employee_demographics
34 | WHERE birth_date > '1985-01-01';
35 | 
36 | -- Here '1990-01-01' is the default data formate in MySQL.
37 | -- There are other date formats as well that we will talk about in a later lesson.
38 | 
39 | 
40 | # LIKE STATEMENT
41 | 
42 | -- two special characters a % and a _
43 | 
44 | -- % means anything
45 | SELECT *
46 | FROM employee_demographics
47 | WHERE first_name LIKE 'a%';
48 | 
49 | -- _ means a specific value
50 | SELECT *
51 | FROM employee_demographics
52 | WHERE first_name LIKE 'a__';
53 | 
54 | 
55 | SELECT *
56 | FROM employee_demographics
57 | WHERE first_name LIKE 'a___%';
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/Intermediate - Case Statements.sql:
--------------------------------------------------------------------------------
  1 | -- Case Statements
  2 | 
  3 | -- A Case Statement allows you to add logic to your Select Statement, sort of like an if else statement in other programming languages or even things like Excel
  4 | 
  5 | 
  6 | 
  7 | SELECT * 
  8 | FROM employee_demographics;
  9 | 
 10 | 
 11 | SELECT first_name, 
 12 | last_name, 
 13 | CASE
 14 | 	WHEN age <= 30 THEN 'Young'
 15 | END
 16 | FROM employee_demographics;
 17 | 
 18 | 
 19 | --
 20 | 
 21 | SELECT first_name, 
 22 | last_name, 
 23 | CASE
 24 | 	WHEN age <= 30 THEN 'Young'
 25 |     WHEN age BETWEEN 31 AND 50 THEN 'Old'
 26 |     WHEN age >= 50 THEN "On Death's Door"
 27 | END
 28 | FROM employee_demographics;
 29 | 
 30 | -- Poor Jerry
 31 | 
 32 | -- Now we don't just have to do simple labels like we did, we can also perform calculations
 33 | 
 34 | -- Let's look at giving bonuses to employees
 35 | 
 36 | SELECT * 
 37 | FROM employee_salary;
 38 | 
 39 | -- Pawnee Council sent out a memo of their bonus and pay increase structure so we need to follow it
 40 | -- Basically if they make less than 45k then they get a 5% raise - very generous
 41 | -- if they make more than 45k they get a 7% raise
 42 | -- they get a bonus of 10% if they work for the Finance Department
 43 | 
 44 | SELECT first_name, last_name, salary,
 45 | CASE
 46 | 	WHEN salary > 45000 THEN salary + (salary * 0.05)
 47 |     WHEN salary < 45000 THEN salary + (salary * 0.07)
 48 | END AS new_salary
 49 | FROM employee_salary;
 50 | 
 51 | -- Unfortunately Pawnee Council was extremely specific in their wording and Jerry was not included in the pay increases. Maybe Next Year.
 52 | 
 53 | -- Now we need to also account for Bonuses, let's make a new column
 54 | SELECT first_name, last_name, salary,
 55 | CASE
 56 | 	WHEN salary > 45000 THEN salary + (salary * 0.05)
 57 |     WHEN salary < 45000 THEN salary + (salary * 0.07)
 58 | END AS new_salary,
 59 | CASE
 60 | 	WHEN dept_id = 6 THEN salary * .10
 61 | END AS Bonus
 62 | FROM employee_salary;
 63 | 
 64 | -- as you can see Ben is the only one who get's a bonus
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/Intermediate - Joins.sql:
--------------------------------------------------------------------------------
  1 | -- Joins
  2 | 
  3 | -- joins allow you to combine 2 tables together (or more) if they have a common column.
  4 | -- doesn't mean they need the same column name, but the data in it are the same and can be used to join the tables together
  5 | -- there are several joins we will look at today, inner joins, outer joins, and self joins
  6 | 
  7 | 
  8 | -- here are the first 2 tables - let's see what columns and data in the rows we have in common that we can join on
  9 | SELECT *
 10 | FROM employee_demographics;
 11 | 
 12 | SELECT *
 13 | FROM employee_salary;
 14 | 
 15 | -- let's start with an inner join -- inner joins return rows that are the same in both columns
 16 | 
 17 | -- since we have the same columns we need to specify which table they're coming from
 18 | SELECT *
 19 | FROM employee_demographics
 20 | JOIN employee_salary
 21 | 	ON employee_demographics.employee_id = employee_salary.employee_id;
 22 | 
 23 | -- notice Ron Swanson isn't in the results? This is because he doesn't have an employee id in the demographics table. He refused to give his birth date or age or gender
 24 | 
 25 | -- use aliasing!
 26 | SELECT *
 27 | FROM employee_demographics dem
 28 | INNER JOIN employee_salary sal
 29 | 	ON dem.employee_id = sal.employee_id;
 30 | 
 31 | 
 32 | -- OUTER JOINS
 33 | 
 34 | -- for outer joins we have a left and a right join
 35 | -- a left join will take everything from the left table even if there is no match in the join, but will only return matches from the right table
 36 | -- the exact opposite is true for a right join
 37 | 
 38 | SELECT *
 39 | FROM employee_salary sal
 40 | LEFT JOIN employee_demographics dem
 41 | 	ON dem.employee_id = sal.employee_id;
 42 | 
 43 | -- so you'll notice we have everything from the left table or the salary table. Even though there is no match to ron swanson. 
 44 | -- Since there is not match on the right table it's just all Nulls
 45 | 
 46 | -- if we just switch this to a right join it basically just looks like an inner join
 47 | -- that's because we are taking everything from the demographics table and only matches from the left or salary table. Since they have all the matches
 48 | -- it looks kind of like an inner join
 49 | SELECT *
 50 | FROM employee_salary sal
 51 | RIGHT JOIN employee_demographics dem
 52 | 	ON dem.employee_id = sal.employee_id;
 53 | 
 54 | 
 55 | 
 56 | -- Self Join
 57 | 
 58 | -- a self join is where you tie a table to itself
 59 | 
 60 | SELECT *
 61 | FROM employee_salary;
 62 | 
 63 | -- what we could do is a secret santa so the person with the higher ID is the person's secret santa
 64 | 
 65 | 
 66 | SELECT *
 67 | FROM employee_salary emp1
 68 | JOIN employee_salary emp2
 69 | 	ON emp1.employee_id = emp2.employee_id
 70 |     ;
 71 | 
 72 | -- now let's change it to give them their secret santa
 73 | SELECT *
 74 | FROM employee_salary emp1
 75 | JOIN employee_salary emp2
 76 | 	ON emp1.employee_id + 1  = emp2.employee_id
 77 |     ;
 78 | 
 79 | 
 80 | 
 81 | SELECT emp1.employee_id as emp_santa, emp1.first_name as santa_first_name, emp1.last_name as santa_last_name, emp2.employee_id, emp2.first_name, emp2.last_name
 82 | FROM employee_salary emp1
 83 | JOIN employee_salary emp2
 84 | 	ON emp1.employee_id + 1  = emp2.employee_id
 85 |     ;
 86 | 
 87 | -- So leslie is Ron's secret santa and so on -- Mark Brandanowitz didn't get a secret santa, but he doesn't deserve one because he broke Ann's heart so it's all good
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | -- Joining multiple tables
 95 | 
 96 | -- now we have on other table we can join - let's take a look at it
 97 | SELECT * 
 98 | FROM parks_and_recreation.parks_departments;
 99 | 
100 | 
101 | SELECT *
102 | FROM employee_demographics dem
103 | INNER JOIN employee_salary sal
104 | 	ON dem.employee_id = sal.employee_id
105 | JOIN parks_departments dept
106 | 	ON dept.department_id = sal.dept_id;
107 | 
108 | -- now notice when we did that, since it's an inner join it got rid of andy because he wasn't a part of any department
109 | 
110 | -- if we do a left join we would still include him because we are taking everything from the left table which is the salary table in this instance
111 | SELECT *
112 | FROM employee_demographics dem
113 | INNER JOIN employee_salary sal
114 | 	ON dem.employee_id = sal.employee_id
115 | LEFT JOIN parks_departments dept
116 | 	ON dept.department_id = sal.dept_id;
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 


--------------------------------------------------------------------------------
/Intermediate - String Functions.sql:
--------------------------------------------------------------------------------
 1 | #Now let's look at string functions. These help us change and look at strings differently.
 2 | 
 3 | SELECT * 
 4 | FROM bakery.customers;
 5 | 
 6 | 
 7 | #Length will give us the length of each value
 8 | SELECT LENGTH('sky');
 9 | 
10 | #Now we can see the length of each name
11 | SELECT first_name, LENGTH(first_name) 
12 | FROM employee_demographics;
13 | 
14 | #Upper will change all the string characters to upper case
15 | SELECT UPPER('sky');
16 | 
17 | SELECT first_name, UPPER(first_name) 
18 | FROM employee_demographics;
19 | 
20 | #lower will change all the string characters to lower case
21 | SELECT LOWER('sky');
22 | 
23 | SELECT first_name, LOWER(first_name) 
24 | FROM employee_demographics;
25 | 
26 | #Now if you have values that have white space on the front or end, we can get rid of that white space using TRIM
27 | SELECT TRIM('sky'   );
28 | 
29 | #Now if we have white space in the middle it doesn't work
30 | SELECT LTRIM('     I           love          SQL');
31 | 
32 | #There's also L trim for trimming just the left side
33 | SELECT LTRIM('     I love SQL');
34 | 
35 | 
36 | #There's also R trim for trimming just the Right side
37 | SELECT RTRIM('I love SQL    ');
38 | 
39 | 
40 | #Now we have Left. Left is going to allow us to take a certain amount of strings from the left hand side.
41 | SELECT LEFT('Alexander', 4);
42 | 
43 | SELECT first_name, LEFT(first_name,4) 
44 | FROM employee_demographics;
45 | 
46 | #Right is basically the opposite - taking it starting from the right side
47 | SELECT RIGHT('Alexander', 6);
48 | 
49 | SELECT first_name, RIGHT(first_name,4) 
50 | FROM employee_demographics;
51 | 
52 | #Now let's look at substring, this one I personally love and use a lot.
53 | #Substring allows you to specify a starting point and how many characters you want so you can take characters from anywhere in the string. 
54 | SELECT SUBSTRING('Alexander', 2, 3);
55 | 
56 | #We could use this on phones to get the area code at the beginning.
57 | SELECT birth_date, SUBSTRING(birth_date,1,4) as birth_year
58 | FROM employee_demographics;
59 | 
60 | #We can also use replace
61 | SELECT REPLACE(first_name,'a','z')
62 | FROM employee_demographics;
63 | 
64 | #Next we have locate - we have 2 arguments we can use here: we can specify what we are searching for and where to search
65 | #It will return the position of that character in the string.
66 | SELECT LOCATE('x', 'Alexander');
67 | 
68 | #Now Alexander has 2 e's - what will happen if we try to locate it
69 | SELECT LOCATE('e', 'Alexander');
70 | #It will return the location of just the first position.
71 | 
72 | #Let's try it on our first name
73 | SELECT first_name, LOCATE('a',first_name) 
74 | FROM employee_demographics;
75 | 
76 | #You can also locate longer strings
77 | SELECT first_name, LOCATE('Mic',first_name) 
78 | FROM employee_demographics;
79 | 
80 | #Now let's look at concatenate - it will combine the strings together
81 | SELECT CONCAT('Alex', 'Freberg');
82 | 
83 | #Here we can combine the first and the last name columns together
84 | SELECT CONCAT(first_name, ' ', last_name) AS full_name
85 | FROM employee_demographics;
86 | 
87 | 


--------------------------------------------------------------------------------
/Intermediate - Subqueries.sql:
--------------------------------------------------------------------------------
 1 | # Subqueries
 2 | 
 3 | #So subqueries are queries within queries. Let's see how this looks.
 4 | 
 5 | SELECT *
 6 | FROM employee_demographics;
 7 | 
 8 | 
 9 | #Now let's say we wanted to look at employees who actually work in the Parks and Rec Department, we could join tables together or we could use a subquery
10 | #We can do that like this:
11 | 
12 | SELECT *
13 | FROM employee_demographics
14 | WHERE employee_id IN 
15 | 			(SELECT employee_id
16 | 				FROM employee_salary
17 |                 WHERE dept_id = 1);
18 |                 
19 | #So we are using that subquery in the where statement and if we just highlight the subwuery and run it it's basically a list we are selecting from in the outer query
20 | 
21 | SELECT *
22 | FROM employee_demographics
23 | WHERE employee_id IN 
24 | 			(SELECT employee_id, salary
25 | 				FROM employee_salary
26 |                 WHERE dept_id = 1);
27 | 
28 | # now if we try to have more than 1 column in the subquery we get an error saying the operand should contain 1 column only 
29 | 
30 | #We can also use subqueries in the select and the from statements - let's see how we can do this
31 | 
32 | -- Let's say we want to look at the salaries and compare them to the average salary
33 | 
34 | SELECT first_name, salary, AVG(salary)
35 | FROM employee_salary;
36 | -- if we run this it's not going to work, we are using columns with an aggregate function so we need to use group by
37 | -- if we do that though we don't exactly get what we want
38 | SELECT first_name, salary, AVG(salary)
39 | FROM employee_salary
40 | GROUP BY first_name, salary;
41 | 
42 | -- it's giving us the average PER GROUP which we don't want
43 | -- here's a good use for a subquery
44 | 
45 | SELECT first_name, 
46 | salary, 
47 | (SELECT AVG(salary) 
48 | 	FROM employee_salary)
49 | FROM employee_salary;
50 | 
51 | 
52 | -- We can also use it in the FROM Statement
53 | -- when we use it here it's almost like we are creating a small table we are querying off of
54 | SELECT *
55 | FROM (SELECT gender, MIN(age), MAX(age), COUNT(age),AVG(age)
56 | FROM employee_demographics
57 | GROUP BY gender) 
58 | ;
59 | -- now this doesn't work because we get an error saying we have to name it
60 | 
61 | SELECT gender, AVG(Min_age)
62 | FROM (SELECT gender, MIN(age) Min_age, MAX(age) Max_age, COUNT(age) Count_age ,AVG(age) Avg_age
63 | FROM employee_demographics
64 | GROUP BY gender) AS Agg_Table
65 | GROUP BY gender
66 | ;
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/Intermediate - Unions.sql:
--------------------------------------------------------------------------------
 1 | #UNIONS
 2 | 
 3 | 
 4 | #A union is how you can combine rows together- not columns like we have been doing with joins where one column is put next to another
 5 | #Joins allow you to combine the rows of data
 6 | 
 7 | #Now you should keep it the same kind of data otherwise if you start mixing tips with first_names it would be really confusing, but you can do it.
 8 | #Let's try it out and use Union to bring together some random data, then we will look at an actual use case
 9 | 
10 | SELECT first_name, last_name
11 | FROM employee_demographics
12 | UNION
13 | SELECT occupation, salary
14 | FROM employee_salary;
15 | 
16 | #So you can see we basically combined the data together, but not side by side in different columns, but one on top of the other in the same columns
17 | #This obviously is not good since you're mixing data, but it can be done if you want.
18 | 
19 | SELECT first_name, last_name
20 | FROM employee_demographics
21 | UNION
22 | SELECT first_name, last_name
23 | FROM employee_salary;
24 | 
25 | -- notice it gets rid of duplicates? Union is actually shorthand for Union Distinct
26 | 
27 | SELECT first_name, last_name
28 | FROM employee_demographics
29 | UNION DISTINCT
30 | SELECT first_name, last_name
31 | FROM employee_salary;
32 | 
33 | -- we can use UNION ALL to show all values
34 | 
35 | SELECT first_name, last_name
36 | FROM employee_demographics
37 | UNION ALL
38 | SELECT first_name, last_name
39 | FROM employee_salary;
40 | 
41 | 
42 | 
43 | #Now Let's actually try to use UNION
44 | # The Parks department is trying to cut their budget and wants to identify older employees they can push out or high paid employees who they can reduce pay or push out
45 | -- let's create some queries to help with this
46 | 
47 | SELECT first_name, last_name, 'Old'
48 | FROM employee_demographics
49 | WHERE age > 50;
50 | 
51 | 
52 | 
53 | SELECT first_name, last_name, 'Old Lady' as Label
54 | FROM employee_demographics
55 | WHERE age > 40 AND gender = 'Female'
56 | UNION
57 | SELECT first_name, last_name, 'Old Man'
58 | FROM employee_demographics
59 | WHERE age > 40 AND gender = 'Male'
60 | UNION
61 | SELECT first_name, last_name, 'Highly Paid Employee'
62 | FROM employee_salary
63 | WHERE salary >= 70000
64 | ORDER BY first_name
65 | ;
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/Intermediate - Window Functions.sql:
--------------------------------------------------------------------------------
  1 | -- Window Functions
  2 | 
  3 | -- windows functions are really powerful and are somewhat like a group by - except they don't roll everything up into 1 row when grouping. 
  4 | -- windows functions allow us to look at a partition or a group, but they each keep their own unique rows in the output
  5 | -- we will also look at things like Row Numbers, rank, and dense rank
  6 | 
  7 | SELECT * 
  8 | FROM employee_demographics;
  9 | 
 10 | -- first let's look at group by
 11 | SELECT gender, ROUND(AVG(salary),1)
 12 | FROM employee_demographics dem
 13 | JOIN employee_salary sal
 14 | 	ON dem.employee_id = sal.employee_id
 15 | GROUP BY gender
 16 | ;
 17 | 
 18 | -- now let's try doing something similar with a window function
 19 | 
 20 | SELECT dem.employee_id, dem.first_name, gender, salary,
 21 | AVG(salary) OVER()
 22 | FROM employee_demographics dem
 23 | JOIN employee_salary sal
 24 | 	ON dem.employee_id = sal.employee_id
 25 | ;
 26 | 
 27 | -- now we can add any columns and it works. We could get this exact same output with a subquery in the select statement, 
 28 | -- but window functions have a lot more functionality, let's take a look
 29 | 
 30 | 
 31 | -- if we use partition it's kind of like the group by except it doesn't roll up - it just partitions or breaks based on a column when doing the calculation
 32 | 
 33 | SELECT dem.employee_id, dem.first_name, gender, salary,
 34 | AVG(salary) OVER(PARTITION BY gender)
 35 | FROM employee_demographics dem
 36 | JOIN employee_salary sal
 37 | 	ON dem.employee_id = sal.employee_id
 38 | ;
 39 | 
 40 | 
 41 | -- now if we wanted to see what the salaries were for genders we could do that by using sum, but also we could use order by to get a rolling total
 42 | 
 43 | SELECT dem.employee_id, dem.first_name, gender, salary,
 44 | SUM(salary) OVER(PARTITION BY gender ORDER BY employee_id)
 45 | FROM employee_demographics dem
 46 | JOIN employee_salary sal
 47 | 	ON dem.employee_id = sal.employee_id
 48 | ;
 49 | 
 50 | 
 51 | -- Let's look at row_number rank and dense rank now
 52 | 
 53 | 
 54 | SELECT dem.employee_id, dem.first_name, gender, salary,
 55 | ROW_NUMBER() OVER(PARTITION BY gender)
 56 | FROM employee_demographics dem
 57 | JOIN employee_salary sal
 58 | 	ON dem.employee_id = sal.employee_id
 59 | ;
 60 | 
 61 | -- let's  try ordering by salary so we can see the order of highest paid employees by gender
 62 | SELECT dem.employee_id, dem.first_name, gender, salary,
 63 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc)
 64 | FROM employee_demographics dem
 65 | JOIN employee_salary sal
 66 | 	ON dem.employee_id = sal.employee_id
 67 | ;
 68 | 
 69 | -- let's compare this to rank
 70 | SELECT dem.employee_id, dem.first_name, gender, salary,
 71 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc) row_num,
 72 | Rank() OVER(PARTITION BY gender ORDER BY salary desc) rank_1 
 73 | FROM employee_demographics dem
 74 | JOIN employee_salary sal
 75 | 	ON dem.employee_id = sal.employee_id
 76 | ;
 77 | 
 78 | -- notice rank repeats on tom ad jerry at 5, but then skips 6 to go to 7 -- this goes based off positional rank
 79 | 
 80 | 
 81 | -- let's compare this to dense rank
 82 | SELECT dem.employee_id, dem.first_name, gender, salary,
 83 | ROW_NUMBER() OVER(PARTITION BY gender ORDER BY salary desc) row_num,
 84 | Rank() OVER(PARTITION BY gender ORDER BY salary desc) rank_1,
 85 | dense_rank() OVER(PARTITION BY gender ORDER BY salary desc) dense_rank_2 -- this is numerically ordered instead of positional like rank
 86 | FROM employee_demographics dem
 87 | JOIN employee_salary sal
 88 | 	ON dem.employee_id = sal.employee_id
 89 | ;
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/Portfolio Project - Data Cleaning.sql:
--------------------------------------------------------------------------------
  1 | -- SQL Project - Data Cleaning
  2 | 
  3 | -- https://www.kaggle.com/datasets/swaptr/layoffs-2022
  4 | 
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | SELECT * 
 11 | FROM world_layoffs.layoffs;
 12 | 
 13 | 
 14 | 
 15 | -- first thing we want to do is create a staging table. This is the one we will work in and clean the data. We want a table with the raw data in case something happens
 16 | CREATE TABLE world_layoffs.layoffs_staging 
 17 | LIKE world_layoffs.layoffs;
 18 | 
 19 | INSERT layoffs_staging 
 20 | SELECT * FROM world_layoffs.layoffs;
 21 | 
 22 | 
 23 | -- now when we are data cleaning we usually follow a few steps
 24 | -- 1. check for duplicates and remove any
 25 | -- 2. standardize data and fix errors
 26 | -- 3. Look at null values and see what 
 27 | -- 4. remove any columns and rows that are not necessary - few ways
 28 | 
 29 | 
 30 | 
 31 | -- 1. Remove Duplicates
 32 | 
 33 | # First let's check for duplicates
 34 | 
 35 | 
 36 | 
 37 | SELECT *
 38 | FROM world_layoffs.layoffs_staging
 39 | ;
 40 | 
 41 | SELECT company, industry, total_laid_off,`date`,
 42 | 		ROW_NUMBER() OVER (
 43 | 			PARTITION BY company, industry, total_laid_off,`date`) AS row_num
 44 | 	FROM 
 45 | 		world_layoffs.layoffs_staging;
 46 | 
 47 | 
 48 | 
 49 | SELECT *
 50 | FROM (
 51 | 	SELECT company, industry, total_laid_off,`date`,
 52 | 		ROW_NUMBER() OVER (
 53 | 			PARTITION BY company, industry, total_laid_off,`date`
 54 | 			) AS row_num
 55 | 	FROM 
 56 | 		world_layoffs.layoffs_staging
 57 | ) duplicates
 58 | WHERE 
 59 | 	row_num > 1;
 60 |     
 61 | -- let's just look at oda to confirm
 62 | SELECT *
 63 | FROM world_layoffs.layoffs_staging
 64 | WHERE company = 'Oda'
 65 | ;
 66 | -- it looks like these are all legitimate entries and shouldn't be deleted. We need to really look at every single row to be accurate
 67 | 
 68 | -- these are our real duplicates 
 69 | SELECT *
 70 | FROM (
 71 | 	SELECT company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions,
 72 | 		ROW_NUMBER() OVER (
 73 | 			PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions
 74 | 			) AS row_num
 75 | 	FROM 
 76 | 		world_layoffs.layoffs_staging
 77 | ) duplicates
 78 | WHERE 
 79 | 	row_num > 1;
 80 | 
 81 | -- these are the ones we want to delete where the row number is > 1 or 2or greater essentially
 82 | 
 83 | -- now you may want to write it like this:
 84 | WITH DELETE_CTE AS 
 85 | (
 86 | SELECT *
 87 | FROM (
 88 | 	SELECT company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions,
 89 | 		ROW_NUMBER() OVER (
 90 | 			PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions
 91 | 			) AS row_num
 92 | 	FROM 
 93 | 		world_layoffs.layoffs_staging
 94 | ) duplicates
 95 | WHERE 
 96 | 	row_num > 1
 97 | )
 98 | DELETE
 99 | FROM DELETE_CTE
100 | ;
101 | 
102 | 
103 | WITH DELETE_CTE AS (
104 | 	SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, 
105 |     ROW_NUMBER() OVER (PARTITION BY company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions) AS row_num
106 | 	FROM world_layoffs.layoffs_staging
107 | )
108 | DELETE FROM world_layoffs.layoffs_staging
109 | WHERE (company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, row_num) IN (
110 | 	SELECT company, location, industry, total_laid_off, percentage_laid_off, `date`, stage, country, funds_raised_millions, row_num
111 | 	FROM DELETE_CTE
112 | ) AND row_num > 1;
113 | 
114 | -- one solution, which I think is a good one. Is to create a new column and add those row numbers in. Then delete where row numbers are over 2, then delete that column
115 | -- so let's do it!!
116 | 
117 | ALTER TABLE world_layoffs.layoffs_staging ADD row_num INT;
118 | 
119 | 
120 | SELECT *
121 | FROM world_layoffs.layoffs_staging
122 | ;
123 | 
124 | CREATE TABLE `world_layoffs`.`layoffs_staging2` (
125 | `company` text,
126 | `location`text,
127 | `industry`text,
128 | `total_laid_off` INT,
129 | `percentage_laid_off` text,
130 | `date` text,
131 | `stage`text,
132 | `country` text,
133 | `funds_raised_millions` int,
134 | row_num INT
135 | );
136 | 
137 | INSERT INTO `world_layoffs`.`layoffs_staging2`
138 | (`company`,
139 | `location`,
140 | `industry`,
141 | `total_laid_off`,
142 | `percentage_laid_off`,
143 | `date`,
144 | `stage`,
145 | `country`,
146 | `funds_raised_millions`,
147 | `row_num`)
148 | SELECT `company`,
149 | `location`,
150 | `industry`,
151 | `total_laid_off`,
152 | `percentage_laid_off`,
153 | `date`,
154 | `stage`,
155 | `country`,
156 | `funds_raised_millions`,
157 | 		ROW_NUMBER() OVER (
158 | 			PARTITION BY company, location, industry, total_laid_off,percentage_laid_off,`date`, stage, country, funds_raised_millions
159 | 			) AS row_num
160 | 	FROM 
161 | 		world_layoffs.layoffs_staging;
162 | 
163 | -- now that we have this we can delete rows were row_num is greater than 2
164 | 
165 | DELETE FROM world_layoffs.layoffs_staging2
166 | WHERE row_num >= 2;
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | -- 2. Standardize Data
175 | 
176 | SELECT * 
177 | FROM world_layoffs.layoffs_staging2;
178 | 
179 | -- if we look at industry it looks like we have some null and empty rows, let's take a look at these
180 | SELECT DISTINCT industry
181 | FROM world_layoffs.layoffs_staging2
182 | ORDER BY industry;
183 | 
184 | SELECT *
185 | FROM world_layoffs.layoffs_staging2
186 | WHERE industry IS NULL 
187 | OR industry = ''
188 | ORDER BY industry;
189 | 
190 | -- let's take a look at these
191 | SELECT *
192 | FROM world_layoffs.layoffs_staging2
193 | WHERE company LIKE 'Bally%';
194 | -- nothing wrong here
195 | SELECT *
196 | FROM world_layoffs.layoffs_staging2
197 | WHERE company LIKE 'airbnb%';
198 | 
199 | -- it looks like airbnb is a travel, but this one just isn't populated.
200 | -- I'm sure it's the same for the others. What we can do is
201 | -- write a query that if there is another row with the same company name, it will update it to the non-null industry values
202 | -- makes it easy so if there were thousands we wouldn't have to manually check them all
203 | 
204 | -- we should set the blanks to nulls since those are typically easier to work with
205 | UPDATE world_layoffs.layoffs_staging2
206 | SET industry = NULL
207 | WHERE industry = '';
208 | 
209 | -- now if we check those are all null
210 | 
211 | SELECT *
212 | FROM world_layoffs.layoffs_staging2
213 | WHERE industry IS NULL 
214 | OR industry = ''
215 | ORDER BY industry;
216 | 
217 | -- now we need to populate those nulls if possible
218 | 
219 | UPDATE layoffs_staging2 t1
220 | JOIN layoffs_staging2 t2
221 | ON t1.company = t2.company
222 | SET t1.industry = t2.industry
223 | WHERE t1.industry IS NULL
224 | AND t2.industry IS NOT NULL;
225 | 
226 | -- and if we check it looks like Bally's was the only one without a populated row to populate this null values
227 | SELECT *
228 | FROM world_layoffs.layoffs_staging2
229 | WHERE industry IS NULL 
230 | OR industry = ''
231 | ORDER BY industry;
232 | 
233 | -- ---------------------------------------------------
234 | 
235 | -- I also noticed the Crypto has multiple different variations. We need to standardize that - let's say all to Crypto
236 | SELECT DISTINCT industry
237 | FROM world_layoffs.layoffs_staging2
238 | ORDER BY industry;
239 | 
240 | UPDATE layoffs_staging2
241 | SET industry = 'Crypto'
242 | WHERE industry IN ('Crypto Currency', 'CryptoCurrency');
243 | 
244 | -- now that's taken care of:
245 | SELECT DISTINCT industry
246 | FROM world_layoffs.layoffs_staging2
247 | ORDER BY industry;
248 | 
249 | -- --------------------------------------------------
250 | -- we also need to look at 
251 | 
252 | SELECT *
253 | FROM world_layoffs.layoffs_staging2;
254 | 
255 | -- everything looks good except apparently we have some "United States" and some "United States." with a period at the end. Let's standardize this.
256 | SELECT DISTINCT country
257 | FROM world_layoffs.layoffs_staging2
258 | ORDER BY country;
259 | 
260 | UPDATE layoffs_staging2
261 | SET country = TRIM(TRAILING '.' FROM country);
262 | 
263 | -- now if we run this again it is fixed
264 | SELECT DISTINCT country
265 | FROM world_layoffs.layoffs_staging2
266 | ORDER BY country;
267 | 
268 | 
269 | -- Let's also fix the date columns:
270 | SELECT *
271 | FROM world_layoffs.layoffs_staging2;
272 | 
273 | -- we can use str to date to update this field
274 | UPDATE layoffs_staging2
275 | SET `date` = STR_TO_DATE(`date`, '%m/%d/%Y');
276 | 
277 | -- now we can convert the data type properly
278 | ALTER TABLE layoffs_staging2
279 | MODIFY COLUMN `date` DATE;
280 | 
281 | 
282 | SELECT *
283 | FROM world_layoffs.layoffs_staging2;
284 | 
285 | 
286 | 
287 | 
288 | 
289 | -- 3. Look at Null Values
290 | 
291 | -- the null values in total_laid_off, percentage_laid_off, and funds_raised_millions all look normal. I don't think I want to change that
292 | -- I like having them null because it makes it easier for calculations during the EDA phase
293 | 
294 | -- so there isn't anything I want to change with the null values
295 | 
296 | 
297 | 
298 | 
299 | -- 4. remove any columns and rows we need to
300 | 
301 | SELECT *
302 | FROM world_layoffs.layoffs_staging2
303 | WHERE total_laid_off IS NULL;
304 | 
305 | 
306 | SELECT *
307 | FROM world_layoffs.layoffs_staging2
308 | WHERE total_laid_off IS NULL
309 | AND percentage_laid_off IS NULL;
310 | 
311 | -- Delete Useless data we can't really use
312 | DELETE FROM world_layoffs.layoffs_staging2
313 | WHERE total_laid_off IS NULL
314 | AND percentage_laid_off IS NULL;
315 | 
316 | SELECT * 
317 | FROM world_layoffs.layoffs_staging2;
318 | 
319 | ALTER TABLE layoffs_staging2
320 | DROP COLUMN row_num;
321 | 
322 | 
323 | SELECT * 
324 | FROM world_layoffs.layoffs_staging2;
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 
343 | 
344 | 
345 | 
346 | 
347 | 
348 | 
349 | 
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 
356 | 
357 | 
358 | 
359 | 


--------------------------------------------------------------------------------
/Portfolio Project - EDA.sql:
--------------------------------------------------------------------------------
  1 | -- EDA
  2 | 
  3 | -- Here we are jsut going to explore the data and find trends or patterns or anything interesting like outliers
  4 | 
  5 | -- normally when you start the EDA process you have some idea of what you're looking for
  6 | 
  7 | -- with this info we are just going to look around and see what we find!
  8 | 
  9 | SELECT * 
 10 | FROM world_layoffs.layoffs_staging2;
 11 | 
 12 | -- EASIER QUERIES
 13 | 
 14 | SELECT MAX(total_laid_off)
 15 | FROM world_layoffs.layoffs_staging2;
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | 
 22 | -- Looking at Percentage to see how big these layoffs were
 23 | SELECT MAX(percentage_laid_off),  MIN(percentage_laid_off)
 24 | FROM world_layoffs.layoffs_staging2
 25 | WHERE  percentage_laid_off IS NOT NULL;
 26 | 
 27 | -- Which companies had 1 which is basically 100 percent of they company laid off
 28 | SELECT *
 29 | FROM world_layoffs.layoffs_staging2
 30 | WHERE  percentage_laid_off = 1;
 31 | -- these are mostly startups it looks like who all went out of business during this time
 32 | 
 33 | -- if we order by funcs_raised_millions we can see how big some of these companies were
 34 | SELECT *
 35 | FROM world_layoffs.layoffs_staging2
 36 | WHERE  percentage_laid_off = 1
 37 | ORDER BY funds_raised_millions DESC;
 38 | -- BritishVolt looks like an EV company, Quibi! I recognize that company - wow raised like 2 billion dollars and went under - ouch
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | -- SOMEWHAT TOUGHER AND MOSTLY USING GROUP BY--------------------------------------------------------------------------------------------------
 56 | 
 57 | -- Companies with the biggest single Layoff
 58 | 
 59 | SELECT company, total_laid_off
 60 | FROM world_layoffs.layoffs_staging
 61 | ORDER BY 2 DESC
 62 | LIMIT 5;
 63 | -- now that's just on a single day
 64 | 
 65 | -- Companies with the most Total Layoffs
 66 | SELECT company, SUM(total_laid_off)
 67 | FROM world_layoffs.layoffs_staging2
 68 | GROUP BY company
 69 | ORDER BY 2 DESC
 70 | LIMIT 10;
 71 | 
 72 | 
 73 | 
 74 | -- by location
 75 | SELECT location, SUM(total_laid_off)
 76 | FROM world_layoffs.layoffs_staging2
 77 | GROUP BY location
 78 | ORDER BY 2 DESC
 79 | LIMIT 10;
 80 | 
 81 | -- this it total in the past 3 years or in the dataset
 82 | 
 83 | SELECT country, SUM(total_laid_off)
 84 | FROM world_layoffs.layoffs_staging2
 85 | GROUP BY country
 86 | ORDER BY 2 DESC;
 87 | 
 88 | SELECT YEAR(date), SUM(total_laid_off)
 89 | FROM world_layoffs.layoffs_staging2
 90 | GROUP BY YEAR(date)
 91 | ORDER BY 1 ASC;
 92 | 
 93 | 
 94 | SELECT industry, SUM(total_laid_off)
 95 | FROM world_layoffs.layoffs_staging2
 96 | GROUP BY industry
 97 | ORDER BY 2 DESC;
 98 | 
 99 | 
100 | SELECT stage, SUM(total_laid_off)
101 | FROM world_layoffs.layoffs_staging2
102 | GROUP BY stage
103 | ORDER BY 2 DESC;
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | -- TOUGHER QUERIES------------------------------------------------------------------------------------------------------------------------------------
111 | 
112 | -- Earlier we looked at Companies with the most Layoffs. Now let's look at that per year. It's a little more difficult.
113 | -- I want to look at 
114 | 
115 | WITH Company_Year AS 
116 | (
117 |   SELECT company, YEAR(date) AS years, SUM(total_laid_off) AS total_laid_off
118 |   FROM layoffs_staging2
119 |   GROUP BY company, YEAR(date)
120 | )
121 | , Company_Year_Rank AS (
122 |   SELECT company, years, total_laid_off, DENSE_RANK() OVER (PARTITION BY years ORDER BY total_laid_off DESC) AS ranking
123 |   FROM Company_Year
124 | )
125 | SELECT company, years, total_laid_off, ranking
126 | FROM Company_Year_Rank
127 | WHERE ranking <= 3
128 | AND years IS NOT NULL
129 | ORDER BY years ASC, total_laid_off DESC;
130 | 
131 | 
132 | 
133 | 
134 | -- Rolling Total of Layoffs Per Month
135 | SELECT SUBSTRING(date,1,7) as dates, SUM(total_laid_off) AS total_laid_off
136 | FROM layoffs_staging2
137 | GROUP BY dates
138 | ORDER BY dates ASC;
139 | 
140 | -- now use it in a CTE so we can query off of it
141 | WITH DATE_CTE AS 
142 | (
143 | SELECT SUBSTRING(date,1,7) as dates, SUM(total_laid_off) AS total_laid_off
144 | FROM layoffs_staging2
145 | GROUP BY dates
146 | ORDER BY dates ASC
147 | )
148 | SELECT dates, SUM(total_laid_off) OVER (ORDER BY dates ASC) as rolling_total_layoffs
149 | FROM DATE_CTE
150 | ORDER BY dates ASC;
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MySQL-YouTube-Series


--------------------------------------------------------------------------------