├── example_queries ├── phone_eda_query.sql ├── update_sid_michaels.sql └── requested_query.sql ├── normal_forms ├── 2nf_queries.sql ├── 1nf_queries.sql └── 3nf_queries.sql ├── dataset_synthesis ├── table_creation.sql └── fill_tables.sql └── README.md /example_queries/phone_eda_query.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | td.phone, 3 | h.hospital_name, 4 | h.city, 5 | COUNT(td.phone) AS therapists_at_hospital 6 | FROM therapist_directory td 7 | JOIN hospitals h 8 | ON td.therapist_id = h.therapist_id 9 | GROUP BY 10 | td.phone, 11 | h.hospital_name, 12 | h.city 13 | ORDER BY 14 | COUNT(td.phone) DESC; -------------------------------------------------------------------------------- /normal_forms/2nf_queries.sql: -------------------------------------------------------------------------------- 1 | /* SECOND NORMAL FORM QUERIES */ 2 | 3 | /* creating visit_specifications table */ 4 | CREATE TABLE IF NOT EXISTS visit_specifications( 5 | therapist_id INTEGER PRIMARY KEY, 6 | insurance VARCHAR(3) CHECK(insurance IN ('Yes', 'No')), 7 | new_patients VARCHAR(3) CHECK(insurance IN ('Yes', 'No')), 8 | CONSTRAINT fk_visits 9 | FOREIGN KEY(therapist_id) 10 | REFERENCES therapist_directory(therapist_id)); 11 | 12 | /* inserting data into new table */ 13 | INSERT INTO visit_specifications(therapist_id, insurance, new_patients) 14 | SELECT therapist_id, insurance, new_patients 15 | FROM therapist_directory; -------------------------------------------------------------------------------- /dataset_synthesis/table_creation.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE IF NOT EXISTS therapist_directory( 2 | therapist_id serial PRIMARY KEY, 3 | name VARCHAR(100), 4 | gender VARCHAR(10), 5 | insurance VARCHAR(3) CHECK(insurance IN ('Yes', 'No')), 6 | new_patients VARCHAR(3) CHECK(insurance IN ('Yes', 'No')), 7 | speciality_one VARCHAR(100), 8 | speciality_two VARCHAR(100), 9 | speciality_three VARCHAR(100), 10 | license VARCHAR(5) CHECK(license IN ('MFT', 'PhD', 'MD')), 11 | phone CHAR(10) 12 | ); 13 | 14 | CREATE TABLE IF NOT EXISTS hospitals( 15 | hospital_name VARCHAR(50), 16 | state CHAR(2), 17 | city VARCHAR(20), 18 | therapist VARCHAR(100) 19 | ); -------------------------------------------------------------------------------- /example_queries/update_sid_michaels.sql: -------------------------------------------------------------------------------- 1 | /*updating sid michael's city*/ 2 | UPDATE 3 | therapist_location 4 | SET 5 | hospital_id = (SELECT hospital_id 6 | FROM locations 7 | WHERE hospital_name = 'Open Clinic' 8 | AND city = 'San Jose') 9 | WHERE 10 | therapist_id = 15; 11 | 12 | /*checking that update ran correctly*/ 13 | SELECT 14 | td.first_name, 15 | td.last_name, 16 | l.hospital_name, 17 | l.city 18 | pn.phone_number 19 | FROM 20 | therapist_directory td 21 | JOIN 22 | therapist_location tl 23 | ON td.therapist_id = tl.therapist_id 24 | JOIN 25 | locations l 26 | ON l.hospital_id = tl.hospital_id 27 | JOIN 28 | phone_numbers pn 29 | ON tl.hospital_id = pn.hospital_id 30 | WHERE 31 | td.therapist_id = 15; -------------------------------------------------------------------------------- /example_queries/requested_query.sql: -------------------------------------------------------------------------------- 1 | /*Determine the number of therapists in Northern California that specialize in mood disorders 2 | and, of these therapists, how many are currently accepting new patients.*/ 3 | 4 | SELECT 5 | sub.new_patients, 6 | COUNT(therapist_id) AS norcal_therapists 7 | FROM 8 | (SELECT s.therapist_id, s.speciality_one, s.speciality_two, s.speciality_three, td.new_patients 9 | FROM specialties s 10 | JOIN therapist_directory td ON s.therapist_id = td.therapist_id 11 | JOIN therapist_location tl ON td.therapist_id = tl.therapist_id 12 | JOIN locations l ON tl.hospital_id = l.hospital_id 13 | WHERE l.city ~ '(San Francisco|Oakland|San Jose|Sacramento|Auburn)') sub 14 | WHERE 15 | speciality_one ~ '(Anxiety|Depression|Bipolar)' 16 | OR speciality_two ~ '(Anxiety|Depression|Bipolar)' 17 | OR speciality_three ~ '(Anxiety|Depression|Bipolar)' 18 | GROUP BY sub.new_patients; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A Complete Guide to Database Normalization in SQL 2 | ================ 3 | 4 | ### Tackle messy tables head-on with PostgreSQL. 5 | 6 | #### Overview 7 | 8 | Repo for Towards Data Science article [“A Complete Guide to Database 9 | Normalization in SQL”](https://towardsdatascience.com/a-complete-guide-to-database-normalization-in-sql-6b16544deb0) by Emily Burns. Includes SQL files written 10 | to demonstrate facets of relational database normalization with the 11 | synthetic ‘Northern\_California\_Therapists’ dataset. 12 | 13 | #### Files 14 | 15 | **/dataset\_synthesis folder/** 16 | 17 | **table\_creation.sql**- synthesizing ‘Northern\_California\_Therapists’ 18 | database 19 | **fill\_tables.sql**- filling ‘Northern\_California\_Therapists’ 20 | database tables with mock data 21 | 22 | **/normal\_forms folder/** 23 | 24 | **1nf\_queries.sql**- SQL used to update tables into first normal form 25 | **2nf\_queries.sql**- SQL used to update tables into second normal 26 | form 27 | **3nf\_queries.sql**- SQL used to update tables into third normal form 28 | 29 | **/example\_queries folder/** 30 | 31 | **phone\_eda\_query.sql**- SQL used to look at distribution of phone 32 | numbers across each hospital and therapist 33 | **update\_sid\_michaels.sql**- SQL used to update therapist’s location 34 | **requested_query.sql**- SQL used to "complete company's requested data" 35 | 36 | #### Tools 37 | 38 | - [PostgreSQL server](https://www.postgresql.org/) 39 | - [pgAdmin development platform](https://www.pgadmin.org/) 40 | 41 | *NOTE: normalization is applicable to all SQL servers. Chosen tools are 42 | indicative of author’s personal preference only. For help installing 43 | either PostgreSQL or pgAdmin on your own local device, please refer to 44 | the information posted on the provided tool’s respective website)* 45 | -------------------------------------------------------------------------------- /normal_forms/1nf_queries.sql: -------------------------------------------------------------------------------- 1 | /* NORMAL FORM ONE QUERIES */ 2 | 3 | /* adding primary key */ 4 | ALTER TABLE hospitals ADD COLUMN hospital_id SERIAL PRIMARY KEY; 5 | ALTER TABLE hospitals ADD COLUMN therapist_id INTEGER; 6 | 7 | /* adding foreign key */ 8 | UPDATE hospitals h 9 | SET therapist_id = td.therapist_id 10 | FROM therapist_directory td 11 | WHERE td.name = h.therapist; 12 | 13 | ALTER TABLE hospitals 14 | ADD CONSTRAINT fk_therapist_directory 15 | FOREIGN KEY (therapist_id) 16 | REFERENCES therapist_directory(therapist_id); 17 | 18 | /*dropping therapist column- inconsistent with data group */ 19 | ALTER TABLE hospitals 20 | DROP COLUMN therapist; 21 | 22 | 23 | 24 | /* make new last_name column */ 25 | ALTER TABLE therapist_directory ADD COLUMN last_name VARCHAR(30); 26 | 27 | /* add last name values to last_name */ 28 | UPDATE therapist_directory 29 | SET last_name = SUBSTRING(name, POSITION(' ' IN name)+1, LENGTH(name)); 30 | 31 | /* update name column to first_name */ 32 | ALTER TABLE therapist_directory 33 | RENAME COLUMN name TO first_name; 34 | 35 | /* remove last name substring from first_name */ 36 | UPDATE therapist_directory 37 | SET first_name = SUBSTRING(first_name, 1, POSITION(' ' IN first_name)-1); 38 | 39 | 40 | 41 | /* creating new table for specialties */ 42 | CREATE TABLE IF NOT EXISTS specialties( 43 | specialties_key serial PRIMARY KEY, 44 | speciality_one VARCHAR(100), 45 | speciality_two VARCHAR(100), 46 | speciality_three VARCHAR(100), 47 | therapist_id INTEGER, 48 | CONSTRAINT fk_therapist 49 | FOREIGN KEY(therapist_id) 50 | REFERENCES therapist_directory(therapist_id)); 51 | 52 | /* inserting data into new specialities table */ 53 | INSERT INTO specialties(therapist_id, speciality_one, speciality_two, speciality_three) 54 | SELECT therapist_id, speciality_one, speciality_two, speciality_three 55 | FROM therapist_directory; 56 | 57 | /* dropping speciality columns from therapist_directory */ 58 | ALTER TABLE therapist_directory 59 | DROP COLUMN speciality_one, 60 | DROP COLUMN speciality_two, 61 | DROP COLUMN speciality_three; 62 | 63 | -------------------------------------------------------------------------------- /normal_forms/3nf_queries.sql: -------------------------------------------------------------------------------- 1 | /* THIRD NORMAL FORM QUERIES */ 2 | 3 | /* creating locations table */ 4 | CREATE TABLE IF NOT EXISTS locations( 5 | hospital_id SERIAL PRIMARY KEY, 6 | hospital_name VARCHAR(50), 7 | state CHAR(2), 8 | city VARCHAR(20)); 9 | 10 | INSERT INTO 11 | locations(hospital_name, state, city) 12 | VALUES 13 | ('Van Holsen Community Hospital', 'CA', 'San Francisco'), 14 | ('Clear Water Services', 'CA', 'San Diego'), 15 | ('Imagery Health', 'CA', 'Sacramento'), 16 | ('Blue Cross Clinic', 'CA', 'Los Angeles'), 17 | ('Van Holsen Community Hospital', 'CA', 'Long Beach'), 18 | ('Blue Cross Clinic', 'CA', 'Santa Barbara'), 19 | ('Blue Cross Clinic', 'CA', 'San Francisco'), 20 | ('Imagery Health', 'CA', 'Auburn'), 21 | ('Holistic Health Services', 'CA', 'Santa Barbara'), 22 | ('Open Clinic', 'CA', 'San Jose'), 23 | ('Clark Jamison Hospitals', 'CA', 'Fresno'), 24 | ('Open Clinic', 'CA', 'Oakland'), 25 | ('Clear Minds Community', 'CA', 'Sacramento'); 26 | 27 | /* creating new table for therapist_location */ 28 | CREATE TABLE IF NOT EXISTS therapist_location( 29 | therapist_id INTEGER PRIMARY KEY, 30 | hospital_id INTEGER, 31 | CONSTRAINT fk_therapist_hospital 32 | FOREIGN KEY(hospital_id) 33 | REFERENCES locations(hospital_id)); 34 | 35 | /* inserting data into new therapist_location table */ 36 | INSERT INTO therapist_location(therapist_id, hospital_id) 37 | SELECT DISTINCT td.therapist_id, l.hospital_id 38 | FROM therapist_directory td 39 | JOIN hospitals h ON td.therapist_id = h.therapist_id 40 | JOIN locations l ON h.hospital_name = l.hospital_name AND h.city = l.city; 41 | 42 | /* creating phone numbers table */ 43 | CREATE TABLE IF NOT EXISTS phone_numbers( 44 | hospital_id INTEGER PRIMARY KEY, 45 | phone_number CHAR(10), 46 | CONSTRAINT fk_phones 47 | FOREIGN KEY(hospital_id) 48 | REFERENCES locations(hospital_id)); 49 | 50 | /*inserting data into phone numbers table */ 51 | INSERT INTO phone_numbers(hospital_id, phone_number) 52 | SELECT DISTINCT tl.hospital_id, td.phone 53 | FROM therapist_directory td 54 | JOIN therapist_location tl ON td.therapist_id = tl.therapist_id; 55 | 56 | /* dropping hospitals table */ 57 | DROP TABLE hospitals -------------------------------------------------------------------------------- /dataset_synthesis/fill_tables.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO 2 | therapist_directory(name, gender, insurance, new_patients, speciality_one, 3 | speciality_two, speciality_three, license, phone) 4 | VALUES 5 | ('Flora Martinez', 'Female', 'Yes', 'Yes', 'OCD', 'Phobias', 'Anxiety', 'MD', '8495776489'), 6 | ('Andy James', 'Male', 'Yes', 'No', 'Depression', 'Anxiety', 'PTSD', 'PhD', '2340894766'), 7 | ('Hannah Myers', 'Female', 'No', 'Yes', 'Anxiety', 'Schizophrenia', 'Bipolar', 'MD', '9907846574'), 8 | ('Jane Huang', 'Female', 'Yes', 'Yes', 'Depression', 'Anxiety', 'Bipolar', 'MD', '4507856797'), 9 | ('April Adams', 'Female', 'No', 'Yes', 'OCD', 'Anxiety', 'PTSD', 'MFT', '4507856797'), 10 | ('Jon Schaffer', 'Male', 'Yes', 'No', 'BPD', 'Bipolar', 'Depression', 'PhD', '9907846574'), 11 | ('Shauna West','Female', 'Yes', 'Yes', 'ADHD', 'Anxiety', 'OCD', 'MD', '8495776480'), 12 | ('Juan Angelo', 'Male', 'No', 'Yes', 'Schizophrenia', 'Bipolar', 'Depression', 'MD', '4507856797'), 13 | ('Christie Yang', 'Female', 'Yes', 'Yes', 'Autism', 'ADHD', 'OCD', 'PhD', '4507856796'), 14 | ('Annika Neusler', 'Female', 'Yes', 'No', 'Addiction', 'Depression', 'PTSD', 'MFT', '9907846575'), 15 | ('Simone Anderson', 'Female', 'No', 'No', 'Schizophrenia', 'Depression', 'PTSD', 'MD', '8304498765'), 16 | ('Ted Nyguen', 'Male', 'Yes', 'Yes', 'ADHD', 'Anxiety', 'Phobias', 'PhD', '4301239990'), 17 | ('Valentino Rossi', 'Male', 'Yes', 'Yes', 'Autism', 'Anxiety', 'Depression', 'MD', '8304498765'), 18 | ('Jessica Armer', 'Female', 'No', 'Yes', 'PTSD', 'Bipolar', 'Depression', 'MD', '3330456612'), 19 | ('Sid Michaels', 'Female', 'Yes', 'Yes', 'OCD', 'Phobia', 'Anxiety', 'MFT', '4301239997'), 20 | ('Yen Waters', 'Male', 'Yes', 'Yes', 'Anxiety', 'Depression', 'ADHD', 'PhD', '4507856796'), 21 | ('Ru Izaelia', 'Female', 'No', 'Yes', 'Bipolar', 'BPD', 'Phobias', 'MD', '4301239990'), 22 | ('Vishal Rao', 'Male', 'Yes', 'Yes', 'Depression', 'Schizophrenia', 'Anxiety', 'MD', '7305557894'), 23 | ('Lana John', 'Female', 'Yes', 'Yes', 'Anxiety', 'Phobias', 'OCD', 'MFT', '7305557894'), 24 | ('Izzie Geralt', 'Female', 'Yes', 'Yes', 'Depression', 'Addiction', 'Anxiety', 'MD', '4301239990'); 25 | 26 | INSERT INTO 27 | hospitals(hospital_name, state, city, therapist) 28 | VALUES 29 | ('Van Holsen Community Hospital', 'CA', 'San Francisco', 'Flora Martinez'), 30 | ('Clear Water Services', 'CA', 'San Diego', 'Andy James'), 31 | ('Imagery Health', 'CA', 'Sacramento', 'Hannah Myers'), 32 | ('Blue Cross Clinic', 'CA', 'Los Angeles', 'Jane Huang'), 33 | ('Blue Cross Clinic', 'CA', 'Los Angeles', 'April Adams'), 34 | ('Imagery Health', 'CA', 'Sacramento', 'Jon Schaffer'), 35 | ('Van Holsen Community Hospital', 'CA', 'Long Beach', 'Shauna West'), 36 | ('Blue Cross Clinic', 'CA', 'Santa Barbara', 'Juan Angelo'), 37 | ('Blue Cross Clinic', 'CA', 'San Francisco', 'Christie Yang'), 38 | ('Imagery Health', 'CA', 'Auburn', 'Annika Neusler'), 39 | ('Holistic Health Services', 'CA', 'Santa Barbara', 'Simone Anderson'), 40 | ('Open Clinic', 'CA', 'San Jose', 'Ted Nyguen'), 41 | ('Holistic Health Services', 'CA', 'Santa Barbara', 'Valentino Rossi'), 42 | ('Clark Jamison Hospitals', 'CA', 'Fresno', 'Jessica Armer'), 43 | ('Open Clinic', 'CA', 'Oakland', 'Sid Michaels'), 44 | ('Blue Cross Clinic', 'CA', 'San Francisco', 'Yen Waters'), 45 | ('Open Clinic', 'CA', 'San Jose', 'Ru Izaelia'), 46 | ('Clear Minds Community', 'CA', 'Sacramento', 'Vishal Rao'), 47 | ('Clear Minds Community', 'CA', 'Sacramento', 'Lana John'), 48 | ('Open Clinic', 'CA', 'San Jose', 'Izzie Geralt'); --------------------------------------------------------------------------------