├── 1. Basics_of_R.R ├── parks_and_rec_dataset.csv ├── 2. Operators.R ├── parks_and_rec_dataset_output.csv ├── 3. Working_with_Files.R ├── 7. Parsing and Converting Dates.R ├── 5. Grouping_and_Aggregating.R ├── 8. Removing Duplicates.R ├── Messy_Dataset.csv ├── 6. Handling_Missing_Data.R ├── 4. Selecting_and_Ordering.R ├── 9. Data Visualization and Presentation.R └── parks_and_rec_budget.csv /1. Basics_of_R.R: -------------------------------------------------------------------------------- 1 | # Variables 2 | 3 | num_var <- 42 4 | 5 | print(num_var) 6 | 7 | class(num_var) 8 | 9 | 10 | str_var <- "I like R" 11 | 12 | 13 | vec_var <- c(10,20,50,100,1000) 14 | 15 | 16 | list_var <- list(name = "Alex", age = 30, scores = c(90,50,24)) 17 | 18 | list_var$name 19 | 20 | 21 | df <- data.frame( 22 | name = c("Alex", "Sally", "John"), 23 | age = c(30, 50, 99), 24 | scores = c(90,50,24) 25 | ) 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /parks_and_rec_dataset.csv: -------------------------------------------------------------------------------- 1 | Character,Department,Role,Annual_Salary,Dogs_Rescued_With_3_Legs 2 | Leslie Knope,Parks,Deputy Director,70000,0 3 | Ron Swanson,Parks,Director,90000,0 4 | Tom Haverford,Parks,Administrator,55000,0 5 | April Ludgate,Parks,Assistant,40000,1 6 | Andy Dwyer,Parks,Shoe Shiner / Musician,25000,1 7 | Ben Wyatt,City Management,City Manager,85000,0 8 | Ann Perkins,Health,Nurse,60000,0 9 | Chris Traeger,City Management,Auditor,95000,0 10 | Donna Meagle,Parks,Office Manager,52000,0 11 | Jerry Gergich,Parks,Office Worker,50000,0 12 | -------------------------------------------------------------------------------- /2. Operators.R: -------------------------------------------------------------------------------- 1 | # Assignment Operator 2 | 3 | var <- 42 4 | 5 | # Arithmetic Operator 6 | 7 | x <- 10 8 | 9 | y <- 3 10 | 11 | a <- x + y 12 | 13 | x - y 14 | 15 | x * y 16 | 17 | x / y 18 | 19 | x^y 20 | 21 | x %% y 22 | 23 | PEMDAS 24 | (5*10) / 2 + 6 25 | 26 | 27 | # Comparison Operators 28 | 29 | x >= y 30 | x <= y 31 | x == y 32 | x == x 33 | x != y 34 | 35 | 36 | 37 | # Logical Operators 38 | 39 | 40 | x >= y & x == y 41 | 42 | 43 | x >= y | x == y 44 | 45 | 46 | !x == y 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /parks_and_rec_dataset_output.csv: -------------------------------------------------------------------------------- 1 | "Character","Department","Role","Annual_Salary","Dogs_Rescued_With_3_Legs" 2 | "Leslie Knope","Parks","Deputy Director",70000,0 3 | "Ron Swanson","Parks","Director",90000,0 4 | "Tom Haverford","Parks","Administrator",55000,0 5 | "April Ludgate","Parks","Assistant",40000,1 6 | "Andy Dwyer","Parks","Shoe Shiner / Musician",25000,1 7 | "Ben Wyatt","City Management","City Manager",85000,0 8 | "Ann Perkins","Health","Nurse",60000,0 9 | "Chris Traeger","City Management","Auditor",95000,0 10 | "Donna Meagle","Parks","Office Manager",52000,0 11 | "Jerry Gergich","Parks","Office Worker",50000,0 12 | -------------------------------------------------------------------------------- /3. Working_with_Files.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv") 4 | 5 | 6 | 7 | head(df) 8 | 9 | str(df) 10 | 11 | summary(df) 12 | 13 | df2 <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv", header = TRUE, sep = ",") 14 | 15 | 16 | 17 | write.csv(df2, "D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset_output.csv", row.names = FALSE) 18 | 19 | 20 | 21 | df3 <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset_output.csv", header = TRUE, sep = ",") 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /7. Parsing and Converting Dates.R: -------------------------------------------------------------------------------- 1 | # Parsing and Converting Dates 2 | 3 | library(dplyr) 4 | library(lubridate) 5 | 6 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv") 7 | 8 | df_raw <- df 9 | 10 | 11 | df$Transaction_Date <- parse_date_time(df$Transaction_Date, 12 | orders = c("Y-m-d", "m/d/Y", "Y/m/d", "d-m-Y")) 13 | 14 | df$Transaction_Date_Year <- year(df$Transaction_Date) 15 | 16 | df$Transaction_Date_month <- month(df$Transaction_Date) 17 | 18 | df$Transaction_Date_day <- day(df$Transaction_Date) 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /5. Grouping_and_Aggregating.R: -------------------------------------------------------------------------------- 1 | # SELECTING AND ORDERING DATA 2 | library(dplyr) 3 | 4 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv") 5 | 6 | 7 | 8 | df %>% 9 | group_by(Department) %>% 10 | summarize(Count = n()) 11 | 12 | 13 | 14 | 15 | agg_df <- df %>% 16 | group_by(Department) %>% 17 | summarize(AVG_Salary = mean(Annual_Salary), 18 | Count = n(), 19 | min(Annual_Salary), 20 | max(Annual_Salary), 21 | median(Annual_Salary)) 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /8. Removing Duplicates.R: -------------------------------------------------------------------------------- 1 | # Removing Duplicates 2 | 3 | library(dplyr) 4 | 5 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv") 6 | 7 | 8 | df_no_duplicates <- df %>% 9 | distinct() 10 | 11 | 12 | df_no_duplicates2 <- df %>% 13 | distinct(Customer_ID, .keep_all = TRUE) 14 | 15 | 16 | 17 | df$Transaction_Date <- parse_date_time(df$Transaction_Date, 18 | orders = c("Y-m-d", "m/d/Y", "Y/m/d", "d-m-Y")) 19 | 20 | 21 | df_no_duplicates3 <- df %>% 22 | arrange(Customer_ID, desc(Transaction_Date)) %>% 23 | distinct(Customer_ID, .keep_all = TRUE) 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /Messy_Dataset.csv: -------------------------------------------------------------------------------- 1 | Customer_ID,Customer_Name,Email,Transaction_Amount,Transaction_Date,Category 2 | 101,Alice Johnson,alice.j@example.com,150.75,2024-01-10,Electronics 3 | 102,Bob Smith,bob_smith@example.com,200.5,01/15/2024,Groceries 4 | 103,Charlie Brown,charlie.b@example,,2024-02-20,Electronics 5 | 104,,emma@example.com,99.99,"March 5, 2024",electronics 6 | 105,Emma Wilson,emma.wilson@example.com,250.0,2024-03-10,Groceries 7 | 106,David Lee,david.lee@example.com,175.2,10-04-2024,groceries 8 | 107,Frank White,frankwhite@example.com,300.0,2024/05/15,Furniture 9 | 108,Grace Adams,grace.a@example.com,120.0,15-06-2024,Furnitures 10 | 109,Helen Carter,,210.75,07/20/2024,Clothing 11 | 110,Ian Brooks,ian.b@example.com,,"August 25, 2024",Clothing 12 | 101,Alice Johnson,alice.j@example.com,150.75,2024-01-10,Electronics 13 | 104,David Lee,david.lee@example.com,175.2,10-04-2024,groceries 14 | -------------------------------------------------------------------------------- /6. Handling_Missing_Data.R: -------------------------------------------------------------------------------- 1 | # Cleaning Messy Data 2 | 3 | 4 | library(dplyr) 5 | library(tidyr) 6 | 7 | # Install if you haven't already 8 | # install.packages("tidyverse") 9 | 10 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv", 11 | na.strings = c("", "NA")) 12 | 13 | 14 | 15 | colSums(is.na(df)) 16 | 17 | # Remove Rows when no email is present 18 | df_cleaned <- df %>% drop_na("Email") 19 | 20 | #Populate null Numeric Values 21 | df_cleaned$Transaction_Amount[is.na(df_cleaned$Transaction_Amount)] <- 0 22 | 23 | 24 | df_cleaned$Transaction_Amount[is.na(df_cleaned$Transaction_Amount)] <- mean(df_cleaned$Transaction_Amount, na.rm = TRUE) 25 | 26 | # Populating Character Columns 27 | df_cleaned$Customer_Name[is.na(df_cleaned$Customer_Name)] <- "Unknown" 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /4. Selecting_and_Ordering.R: -------------------------------------------------------------------------------- 1 | # SELECTING AND ORDERING DATA 2 | 3 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv") 4 | 5 | install.packages("dplyr") 6 | 7 | library(dplyr) 8 | 9 | df_characters <- select(df, Character, Role) 10 | 11 | select(df, -Dogs_Rescued_With_3_Legs) 12 | 13 | select(df, Character:Annual_Salary) 14 | 15 | # Filtering 16 | 17 | filter(df, Role == "Director") 18 | 19 | 20 | filter(df, grepl("Director", Role)) 21 | 22 | 23 | 24 | filter(df, Annual_Salary > 50000 & Department == "Parks") 25 | 26 | # ORDERING 27 | 28 | arrange(df, Annual_Salary) 29 | 30 | 31 | arrange(df, desc(Annual_Salary)) 32 | 33 | 34 | # Pipe Operator 35 | df %>% 36 | select(Character:Annual_Salary) %>% 37 | filter(Annual_Salary > 50000 & Department == "Parks") %>% 38 | arrange(desc(Annual_Salary)) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /9. Data Visualization and Presentation.R: -------------------------------------------------------------------------------- 1 | # Data Visualization and Presentation 2 | 3 | library(dplyr) 4 | library(ggplot2) 5 | 6 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_budget.csv") 7 | 8 | # BAR Charts 9 | 10 | 11 | df %>% 12 | group_by(Department) %>% 13 | summarise(Total_Budget = sum(Budget_in_Thousands)) %>% 14 | ggplot(aes(x = reorder(Department, -Total_Budget), y = Total_Budget, fill = Department)) + 15 | geom_bar(stat = "identity") + 16 | ggtitle("Total Budget by Department") + 17 | theme(axis.text.x = element_text(angle = 45,, hjust = 1)) 18 | 19 | 20 | 21 | # Line Charts 22 | 23 | 24 | df %>% 25 | group_by(Year) %>% 26 | summarise(Annual_Budget = sum(Budget_in_Thousands)) %>% 27 | ggplot(aes(x = Year, y = Annual_Budget)) + 28 | geom_line() + 29 | geom_point() + 30 | ggtitle("Annual Budget for all Departments") 31 | theme_minimal() 32 | 33 | #Break out by Departments 34 | 35 | df %>% 36 | ggplot(aes(x = Year, y = Budget_in_Thousands, color = Department)) + 37 | geom_line() + 38 | ggtitle("Annual Budget per Department") + 39 | theme_minimal() 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /parks_and_rec_budget.csv: -------------------------------------------------------------------------------- 1 | Year,Department,Budget_in_Thousands 2 | 2005,Parks,561 3 | 2006,Parks,520 4 | 2007,Parks,574 5 | 2008,Parks,607 6 | 2009,Parks,598 7 | 2010,Parks,552 8 | 2011,Parks,568 9 | 2012,Parks,636 10 | 2013,Parks,631 11 | 2014,Parks,617 12 | 2015,Parks,630 13 | 2016,Parks,596 14 | 2017,Parks,618 15 | 2018,Parks,748 16 | 2019,Parks,626 17 | 2020,Parks,712 18 | 2005,Health,321 19 | 2006,Health,336 20 | 2007,Health,337 21 | 2008,Health,365 22 | 2009,Health,272 23 | 2010,Health,328 24 | 2011,Health,353 25 | 2012,Health,355 26 | 2013,Health,316 27 | 2014,Health,371 28 | 2015,Health,305 29 | 2016,Health,334 30 | 2017,Health,436 31 | 2018,Health,378 32 | 2019,Health,412 33 | 2020,Health,494 34 | 2005,Sanitation,491 35 | 2006,Sanitation,529 36 | 2007,Sanitation,513 37 | 2008,Sanitation,529 38 | 2009,Sanitation,557 39 | 2010,Sanitation,568 40 | 2011,Sanitation,569 41 | 2012,Sanitation,615 42 | 2013,Sanitation,604 43 | 2014,Sanitation,525 44 | 2015,Sanitation,646 45 | 2016,Sanitation,583 46 | 2017,Sanitation,588 47 | 2018,Sanitation,707 48 | 2019,Sanitation,666 49 | 2020,Sanitation,634 50 | 2005,Public Works,592 51 | 2006,Public Works,636 52 | 2007,Public Works,588 53 | 2008,Public Works,575 54 | 2009,Public Works,669 55 | 2010,Public Works,586 56 | 2011,Public Works,579 57 | 2012,Public Works,602 58 | 2013,Public Works,686 59 | 2014,Public Works,669 60 | 2015,Public Works,734 61 | 2016,Public Works,657 62 | 2017,Public Works,703 63 | 2018,Public Works,679 64 | 2019,Public Works,692 65 | 2020,Public Works,784 66 | 2005,City Management,299 67 | 2006,City Management,319 68 | 2007,City Management,395 69 | 2008,City Management,350 70 | 2009,City Management,386 71 | 2010,City Management,404 72 | 2011,City Management,402 73 | 2012,City Management,395 74 | 2013,City Management,457 75 | 2014,City Management,332 76 | 2015,City Management,442 77 | 2016,City Management,424 78 | 2017,City Management,346 79 | 2018,City Management,515 80 | 2019,City Management,424 81 | 2020,City Management,471 82 | 2005,Education,278 83 | 2006,Education,302 84 | 2007,Education,373 85 | 2008,Education,351 86 | 2009,Education,354 87 | 2010,Education,336 88 | 2011,Education,383 89 | 2012,Education,389 90 | 2013,Education,397 91 | 2014,Education,335 92 | 2015,Education,438 93 | 2016,Education,440 94 | 2017,Education,464 95 | 2018,Education,503 96 | 2019,Education,404 97 | 2020,Education,526 98 | 2005,Transportation,357 99 | 2006,Transportation,340 100 | 2007,Transportation,352 101 | 2008,Transportation,342 102 | 2009,Transportation,375 103 | 2010,Transportation,475 104 | 2011,Transportation,418 105 | 2012,Transportation,457 106 | 2013,Transportation,521 107 | 2014,Transportation,404 108 | 2015,Transportation,499 109 | 2016,Transportation,574 110 | 2017,Transportation,519 111 | 2018,Transportation,545 112 | 2019,Transportation,506 113 | 2020,Transportation,601 114 | 2005,Housing,388 115 | 2006,Housing,372 116 | 2007,Housing,442 117 | 2008,Housing,413 118 | 2009,Housing,436 119 | 2010,Housing,434 120 | 2011,Housing,469 121 | 2012,Housing,502 122 | 2013,Housing,460 123 | 2014,Housing,502 124 | 2015,Housing,480 125 | 2016,Housing,440 126 | 2017,Housing,538 127 | 2018,Housing,570 128 | 2019,Housing,494 129 | 2020,Housing,562 130 | 2005,Recreation,613 131 | 2006,Recreation,617 132 | 2007,Recreation,572 133 | 2008,Recreation,555 134 | 2009,Recreation,591 135 | 2010,Recreation,615 136 | 2011,Recreation,652 137 | 2012,Recreation,624 138 | 2013,Recreation,598 139 | 2014,Recreation,706 140 | 2015,Recreation,674 141 | 2016,Recreation,719 142 | 2017,Recreation,689 143 | 2018,Recreation,630 144 | 2019,Recreation,716 145 | 2020,Recreation,716 146 | 2005,Animal Control,232 147 | 2006,Animal Control,311 148 | 2007,Animal Control,277 149 | 2008,Animal Control,243 150 | 2009,Animal Control,285 151 | 2010,Animal Control,327 152 | 2011,Animal Control,289 153 | 2012,Animal Control,389 154 | 2013,Animal Control,292 155 | 2014,Animal Control,252 156 | 2015,Animal Control,319 157 | 2016,Animal Control,398 158 | 2017,Animal Control,408 159 | 2018,Animal Control,405 160 | 2019,Animal Control,419 161 | 2020,Animal Control,477 162 | --------------------------------------------------------------------------------