├── 1. Basics_of_R.R
├── parks_and_rec_dataset.csv
├── 2. Operators.R
├── parks_and_rec_dataset_output.csv
├── 3. Working_with_Files.R
├── 7. Parsing and Converting Dates.R
├── 5. Grouping_and_Aggregating.R
├── 8. Removing Duplicates.R
├── Messy_Dataset.csv
├── 6. Handling_Missing_Data.R
├── 4. Selecting_and_Ordering.R
├── 9. Data Visualization and Presentation.R
└── parks_and_rec_budget.csv


/1. Basics_of_R.R:
--------------------------------------------------------------------------------
 1 | # Variables
 2 | 
 3 | num_var <- 42
 4 | 
 5 | print(num_var)
 6 | 
 7 | class(num_var)
 8 | 
 9 | 
10 | str_var <- "I like R"
11 | 
12 | 
13 | vec_var <- c(10,20,50,100,1000)
14 | 
15 | 
16 | list_var <- list(name = "Alex", age = 30, scores = c(90,50,24))
17 | 
18 | list_var$name
19 | 
20 | 
21 | df <- data.frame(
22 |   name = c("Alex", "Sally", "John"),
23 |   age = c(30, 50, 99),
24 |   scores = c(90,50,24)
25 | )
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/parks_and_rec_dataset.csv:
--------------------------------------------------------------------------------
 1 | Character,Department,Role,Annual_Salary,Dogs_Rescued_With_3_Legs
 2 | Leslie Knope,Parks,Deputy Director,70000,0
 3 | Ron Swanson,Parks,Director,90000,0
 4 | Tom Haverford,Parks,Administrator,55000,0
 5 | April Ludgate,Parks,Assistant,40000,1
 6 | Andy Dwyer,Parks,Shoe Shiner / Musician,25000,1
 7 | Ben Wyatt,City Management,City Manager,85000,0
 8 | Ann Perkins,Health,Nurse,60000,0
 9 | Chris Traeger,City Management,Auditor,95000,0
10 | Donna Meagle,Parks,Office Manager,52000,0
11 | Jerry Gergich,Parks,Office Worker,50000,0
12 | 


--------------------------------------------------------------------------------
/2. Operators.R:
--------------------------------------------------------------------------------
 1 | # Assignment Operator
 2 | 
 3 | var <- 42
 4 | 
 5 | # Arithmetic Operator
 6 | 
 7 | x <- 10
 8 | 
 9 | y <- 3
10 | 
11 | a <- x + y
12 | 
13 | x - y
14 | 
15 | x * y
16 | 
17 | x / y
18 | 
19 | x^y
20 | 
21 | x %% y
22 | 
23 | PEMDAS
24 | (5*10) / 2 + 6
25 | 
26 | 
27 | # Comparison Operators
28 | 
29 | x >= y
30 | x <= y
31 | x == y
32 | x == x
33 | x != y
34 | 
35 | 
36 | 
37 | # Logical Operators
38 | 
39 | 
40 | x >= y & x == y
41 | 
42 | 
43 | x >= y | x == y
44 | 
45 | 
46 | !x == y
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/parks_and_rec_dataset_output.csv:
--------------------------------------------------------------------------------
 1 | "Character","Department","Role","Annual_Salary","Dogs_Rescued_With_3_Legs"
 2 | "Leslie Knope","Parks","Deputy Director",70000,0
 3 | "Ron Swanson","Parks","Director",90000,0
 4 | "Tom Haverford","Parks","Administrator",55000,0
 5 | "April Ludgate","Parks","Assistant",40000,1
 6 | "Andy Dwyer","Parks","Shoe Shiner / Musician",25000,1
 7 | "Ben Wyatt","City Management","City Manager",85000,0
 8 | "Ann Perkins","Health","Nurse",60000,0
 9 | "Chris Traeger","City Management","Auditor",95000,0
10 | "Donna Meagle","Parks","Office Manager",52000,0
11 | "Jerry Gergich","Parks","Office Worker",50000,0
12 | 


--------------------------------------------------------------------------------
/3. Working_with_Files.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv")
 4 | 
 5 | 
 6 | 
 7 | head(df)
 8 | 
 9 | str(df)
10 | 
11 | summary(df)
12 | 
13 | df2 <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv", header = TRUE, sep = ",")
14 | 
15 | 
16 | 
17 | write.csv(df2, "D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset_output.csv", row.names = FALSE)
18 | 
19 | 
20 | 
21 | df3 <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset_output.csv", header = TRUE, sep = ",")
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/7. Parsing and Converting Dates.R:
--------------------------------------------------------------------------------
 1 | # Parsing and Converting Dates
 2 | 
 3 | library(dplyr)
 4 | library(lubridate)
 5 | 
 6 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv")
 7 | 
 8 | df_raw <- df
 9 | 
10 | 
11 | df$Transaction_Date <- parse_date_time(df$Transaction_Date, 
12 |                                        orders = c("Y-m-d", "m/d/Y", "Y/m/d", "d-m-Y"))
13 | 
14 | df$Transaction_Date_Year <- year(df$Transaction_Date)
15 | 
16 | df$Transaction_Date_month <- month(df$Transaction_Date)
17 | 
18 | df$Transaction_Date_day <- day(df$Transaction_Date)
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/5. Grouping_and_Aggregating.R:
--------------------------------------------------------------------------------
 1 | # SELECTING AND ORDERING DATA
 2 | library(dplyr)
 3 | 
 4 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv")
 5 | 
 6 | 
 7 | 
 8 | df %>% 
 9 |   group_by(Department) %>% 
10 |   summarize(Count = n())
11 | 
12 | 
13 | 
14 | 
15 | agg_df <- df %>% 
16 |   group_by(Department) %>% 
17 |   summarize(AVG_Salary = mean(Annual_Salary),
18 |             Count = n(),
19 |             min(Annual_Salary),
20 |             max(Annual_Salary),
21 |             median(Annual_Salary))
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/8. Removing Duplicates.R:
--------------------------------------------------------------------------------
 1 | # Removing Duplicates
 2 | 
 3 | library(dplyr)
 4 | 
 5 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv")
 6 | 
 7 | 
 8 | df_no_duplicates <- df %>%
 9 |                       distinct()
10 | 
11 | 
12 | df_no_duplicates2 <- df %>%
13 |                       distinct(Customer_ID, .keep_all = TRUE)
14 | 
15 | 
16 | 
17 | df$Transaction_Date <- parse_date_time(df$Transaction_Date, 
18 |                                        orders = c("Y-m-d", "m/d/Y", "Y/m/d", "d-m-Y"))
19 | 
20 | 
21 | df_no_duplicates3 <- df %>%
22 |                         arrange(Customer_ID, desc(Transaction_Date)) %>%
23 |                         distinct(Customer_ID, .keep_all = TRUE)
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/Messy_Dataset.csv:
--------------------------------------------------------------------------------
 1 | Customer_ID,Customer_Name,Email,Transaction_Amount,Transaction_Date,Category
 2 | 101,Alice Johnson,alice.j@example.com,150.75,2024-01-10,Electronics
 3 | 102,Bob Smith,bob_smith@example.com,200.5,01/15/2024,Groceries
 4 | 103,Charlie Brown,charlie.b@example,,2024-02-20,Electronics
 5 | 104,,emma@example.com,99.99,"March 5, 2024",electronics
 6 | 105,Emma Wilson,emma.wilson@example.com,250.0,2024-03-10,Groceries
 7 | 106,David Lee,david.lee@example.com,175.2,10-04-2024,groceries
 8 | 107,Frank White,frankwhite@example.com,300.0,2024/05/15,Furniture
 9 | 108,Grace Adams,grace.a@example.com,120.0,15-06-2024,Furnitures
10 | 109,Helen Carter,,210.75,07/20/2024,Clothing
11 | 110,Ian Brooks,ian.b@example.com,,"August 25, 2024",Clothing
12 | 101,Alice Johnson,alice.j@example.com,150.75,2024-01-10,Electronics
13 | 104,David Lee,david.lee@example.com,175.2,10-04-2024,groceries
14 | 


--------------------------------------------------------------------------------
/6. Handling_Missing_Data.R:
--------------------------------------------------------------------------------
 1 | # Cleaning Messy Data
 2 | 
 3 | 
 4 | library(dplyr)
 5 | library(tidyr)
 6 | 
 7 | # Install if you haven't already
 8 | # install.packages("tidyverse")
 9 | 
10 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\Messy_Dataset.csv",
11 |                na.strings = c("", "NA"))
12 | 
13 | 
14 | 
15 | colSums(is.na(df))
16 | 
17 | # Remove Rows when no email is present
18 | df_cleaned <- df %>% drop_na("Email")
19 | 
20 | #Populate null Numeric Values
21 | df_cleaned$Transaction_Amount[is.na(df_cleaned$Transaction_Amount)] <- 0
22 | 
23 | 
24 | df_cleaned$Transaction_Amount[is.na(df_cleaned$Transaction_Amount)] <- mean(df_cleaned$Transaction_Amount, na.rm = TRUE)
25 | 
26 | # Populating Character Columns
27 | df_cleaned$Customer_Name[is.na(df_cleaned$Customer_Name)] <- "Unknown"
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/4. Selecting_and_Ordering.R:
--------------------------------------------------------------------------------
 1 | # SELECTING AND ORDERING DATA
 2 | 
 3 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_dataset.csv")
 4 | 
 5 | install.packages("dplyr")
 6 | 
 7 | library(dplyr)
 8 | 
 9 | df_characters <- select(df, Character, Role)
10 | 
11 | select(df, -Dogs_Rescued_With_3_Legs)
12 | 
13 | select(df, Character:Annual_Salary)
14 | 
15 | # Filtering
16 | 
17 | filter(df, Role == "Director")
18 | 
19 | 
20 | filter(df, grepl("Director", Role))
21 | 
22 | 
23 | 
24 | filter(df, Annual_Salary > 50000 & Department == "Parks")
25 | 
26 | # ORDERING
27 | 
28 | arrange(df, Annual_Salary)
29 | 
30 | 
31 | arrange(df, desc(Annual_Salary))
32 | 
33 | 
34 | # Pipe Operator
35 | df %>% 
36 |   select(Character:Annual_Salary) %>% 
37 |   filter(Annual_Salary > 50000 & Department == "Parks") %>%
38 |   arrange(desc(Annual_Salary))
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/9. Data Visualization and Presentation.R:
--------------------------------------------------------------------------------
 1 | # Data Visualization and Presentation
 2 | 
 3 | library(dplyr)
 4 | library(ggplot2)
 5 | 
 6 | df <- read.csv("D:\\YouTube\\1. Raw Videos\\R Series\\Files\\parks_and_rec_budget.csv")
 7 | 
 8 | # BAR Charts
 9 | 
10 | 
11 | df %>%
12 |   group_by(Department) %>%
13 |   summarise(Total_Budget = sum(Budget_in_Thousands)) %>%
14 |   ggplot(aes(x = reorder(Department, -Total_Budget), y = Total_Budget, fill = Department)) +
15 |   geom_bar(stat = "identity") +
16 |   ggtitle("Total Budget by Department") +
17 |   theme(axis.text.x = element_text(angle = 45,, hjust = 1))
18 |   
19 | 
20 | 
21 | # Line Charts
22 | 
23 | 
24 | df %>%
25 |   group_by(Year) %>%
26 |   summarise(Annual_Budget = sum(Budget_in_Thousands)) %>%
27 |   ggplot(aes(x = Year, y = Annual_Budget)) +
28 |   geom_line() +
29 |   geom_point() +
30 |   ggtitle("Annual Budget for all Departments")
31 |   theme_minimal()
32 |   
33 | #Break out by Departments
34 |   
35 |   df %>%
36 |     ggplot(aes(x = Year, y = Budget_in_Thousands, color = Department)) +
37 |     geom_line() +
38 |     ggtitle("Annual Budget per Department") +
39 |   theme_minimal()
40 |   
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/parks_and_rec_budget.csv:
--------------------------------------------------------------------------------
  1 | Year,Department,Budget_in_Thousands
  2 | 2005,Parks,561
  3 | 2006,Parks,520
  4 | 2007,Parks,574
  5 | 2008,Parks,607
  6 | 2009,Parks,598
  7 | 2010,Parks,552
  8 | 2011,Parks,568
  9 | 2012,Parks,636
 10 | 2013,Parks,631
 11 | 2014,Parks,617
 12 | 2015,Parks,630
 13 | 2016,Parks,596
 14 | 2017,Parks,618
 15 | 2018,Parks,748
 16 | 2019,Parks,626
 17 | 2020,Parks,712
 18 | 2005,Health,321
 19 | 2006,Health,336
 20 | 2007,Health,337
 21 | 2008,Health,365
 22 | 2009,Health,272
 23 | 2010,Health,328
 24 | 2011,Health,353
 25 | 2012,Health,355
 26 | 2013,Health,316
 27 | 2014,Health,371
 28 | 2015,Health,305
 29 | 2016,Health,334
 30 | 2017,Health,436
 31 | 2018,Health,378
 32 | 2019,Health,412
 33 | 2020,Health,494
 34 | 2005,Sanitation,491
 35 | 2006,Sanitation,529
 36 | 2007,Sanitation,513
 37 | 2008,Sanitation,529
 38 | 2009,Sanitation,557
 39 | 2010,Sanitation,568
 40 | 2011,Sanitation,569
 41 | 2012,Sanitation,615
 42 | 2013,Sanitation,604
 43 | 2014,Sanitation,525
 44 | 2015,Sanitation,646
 45 | 2016,Sanitation,583
 46 | 2017,Sanitation,588
 47 | 2018,Sanitation,707
 48 | 2019,Sanitation,666
 49 | 2020,Sanitation,634
 50 | 2005,Public Works,592
 51 | 2006,Public Works,636
 52 | 2007,Public Works,588
 53 | 2008,Public Works,575
 54 | 2009,Public Works,669
 55 | 2010,Public Works,586
 56 | 2011,Public Works,579
 57 | 2012,Public Works,602
 58 | 2013,Public Works,686
 59 | 2014,Public Works,669
 60 | 2015,Public Works,734
 61 | 2016,Public Works,657
 62 | 2017,Public Works,703
 63 | 2018,Public Works,679
 64 | 2019,Public Works,692
 65 | 2020,Public Works,784
 66 | 2005,City Management,299
 67 | 2006,City Management,319
 68 | 2007,City Management,395
 69 | 2008,City Management,350
 70 | 2009,City Management,386
 71 | 2010,City Management,404
 72 | 2011,City Management,402
 73 | 2012,City Management,395
 74 | 2013,City Management,457
 75 | 2014,City Management,332
 76 | 2015,City Management,442
 77 | 2016,City Management,424
 78 | 2017,City Management,346
 79 | 2018,City Management,515
 80 | 2019,City Management,424
 81 | 2020,City Management,471
 82 | 2005,Education,278
 83 | 2006,Education,302
 84 | 2007,Education,373
 85 | 2008,Education,351
 86 | 2009,Education,354
 87 | 2010,Education,336
 88 | 2011,Education,383
 89 | 2012,Education,389
 90 | 2013,Education,397
 91 | 2014,Education,335
 92 | 2015,Education,438
 93 | 2016,Education,440
 94 | 2017,Education,464
 95 | 2018,Education,503
 96 | 2019,Education,404
 97 | 2020,Education,526
 98 | 2005,Transportation,357
 99 | 2006,Transportation,340
100 | 2007,Transportation,352
101 | 2008,Transportation,342
102 | 2009,Transportation,375
103 | 2010,Transportation,475
104 | 2011,Transportation,418
105 | 2012,Transportation,457
106 | 2013,Transportation,521
107 | 2014,Transportation,404
108 | 2015,Transportation,499
109 | 2016,Transportation,574
110 | 2017,Transportation,519
111 | 2018,Transportation,545
112 | 2019,Transportation,506
113 | 2020,Transportation,601
114 | 2005,Housing,388
115 | 2006,Housing,372
116 | 2007,Housing,442
117 | 2008,Housing,413
118 | 2009,Housing,436
119 | 2010,Housing,434
120 | 2011,Housing,469
121 | 2012,Housing,502
122 | 2013,Housing,460
123 | 2014,Housing,502
124 | 2015,Housing,480
125 | 2016,Housing,440
126 | 2017,Housing,538
127 | 2018,Housing,570
128 | 2019,Housing,494
129 | 2020,Housing,562
130 | 2005,Recreation,613
131 | 2006,Recreation,617
132 | 2007,Recreation,572
133 | 2008,Recreation,555
134 | 2009,Recreation,591
135 | 2010,Recreation,615
136 | 2011,Recreation,652
137 | 2012,Recreation,624
138 | 2013,Recreation,598
139 | 2014,Recreation,706
140 | 2015,Recreation,674
141 | 2016,Recreation,719
142 | 2017,Recreation,689
143 | 2018,Recreation,630
144 | 2019,Recreation,716
145 | 2020,Recreation,716
146 | 2005,Animal Control,232
147 | 2006,Animal Control,311
148 | 2007,Animal Control,277
149 | 2008,Animal Control,243
150 | 2009,Animal Control,285
151 | 2010,Animal Control,327
152 | 2011,Animal Control,289
153 | 2012,Animal Control,389
154 | 2013,Animal Control,292
155 | 2014,Animal Control,252
156 | 2015,Animal Control,319
157 | 2016,Animal Control,398
158 | 2017,Animal Control,408
159 | 2018,Animal Control,405
160 | 2019,Animal Control,419
161 | 2020,Animal Control,477
162 | 


--------------------------------------------------------------------------------