├── 911 Calls Data Capstone Project Walkthrough.ipynb
├── Affordable Care Act Data.R
├── Airbnb Seattle
└── listings.csv
├── Airbnb Texas
├── Airbnb Texas.R
└── Airbnb_Texas_Rentals.csv
├── Airplane Crash
├── Airplane Crash.ipynb
└── Airplane_Crashes_and_Fatalities_Since_1908.csv
├── Amazon Sentiment Analysis (In Progress).ipynb
├── Barbeque
├── Barbeque.R
├── Barbeque.ipynb
├── contest_data.csv
├── results_brisket.csv
├── results_chicken.csv
├── results_overall.csv
├── results_pork.csv
└── results_ribs.csv
├── Bikeshare Bay Area.Rmd
├── Caravan
├── Caravan Insurance.R
├── Caravan Insurance.ipynb
└── caravan-insurance-challenge.csv
├── Carbon Dioxide Analysis.ipynb
├── Celebrity Deaths.ipynb
├── College Salaries
├── College Salaries.R
├── College Salaries.ipynb
├── degrees-that-pay-back.csv
├── salaries-by-college-type.csv
└── salaries-by-region.csv
├── Columbus First
├── Columbus First Trip.R
└── Columbus.csv
├── Credit Card Fradulent Modeling 25% Test.ipynb
├── Credit Card Modeling 65%.R
├── Diamond Prices.R
├── Diamond Prices.ipynb
├── Edudata.R
├── Edudata.Rmd
├── Edudata.csv
├── Edudata.html
├── Edudata.pdf
├── El Nino.ipynb
├── Fake News.R
├── Fake News.ipynb
├── Finance Walkthrough.ipynb
├── Glass Classification.ipynb
├── Health Insurance Coverage.ipynb
├── Heart Disease Decision Trees.R
├── Heart Disease Decision Trees.Rmd
├── Housing Index Zillow.R
├── Human Resources Analytics.ipynb
├── Illegal Immigration.R
├── Iris.R
├── Kickstarter.ipynb
├── Mass Shootings
├── Mass Shootings Dataset.csv
├── Mass Shootings.R
└── Mass Shootings.ipynb
├── McDonald's.ipynb
├── Melbourne Housing Market.R
├── Model-Based Feature Selection.ipynb
├── NFL Arrests.csv
├── NFL Draft.R
├── NFL Draft.Rmd
├── NFL_Draft.pdf
├── NYSE
├── FB as Example.ipynb
├── NYSE.R
├── Ralph Lauren.ipynb
├── fundamentals.csv
└── securities.csv
├── Norwegian Development Funds.R
├── Norwegian Development Funds.Rmd
├── Norwegian Development Funds.ipynb
├── NorwegianDevelopmentFunds.pdf
├── Norwegian_Development_Funds.html
├── Pokemon Mining
├── Pokemon Data Mining.R
├── Pokemon Using ML.ipynb
├── final_model.pkl
├── pokemon_alopez247.csv
└── report_Pokemon.pdf
├── Pokemon.R
├── README.md
├── Recent Rent Analysis Based on Prices From Zillow.ipynb
├── Speed Dating Data.csv
├── Video Game Sales.ipynb
├── Welfare
├── SNAPerror.csv
├── UIerror.csv
└── Welfare.R
├── World Food Facts.R
├── Y Combinator.csv
├── Y Combinator.ipynb
├── contest_data.csv
├── diamonds.csv
├── harmit.csv
└── mcdonalds.csv
/Affordable Care Act Data.R:
--------------------------------------------------------------------------------
1 | # Affordable Health Care Act Data
2 |
3 |
4 | ### Loading the Libaries
5 |
6 | library(ggplot2)
7 | library(maps)
8 | library(zipcode)
9 | library(ggmap)
10 | library(choroplethrMaps)
11 | library(choroplethr)
12 | data("state.map")
13 |
14 |
15 | ### Changing the Working Directory
16 |
17 | setwd('./Kaggle')
18 |
19 |
20 | ### Reading the Data
21 |
22 | acadat <- read.csv(file='./states.csv', header = TRUE, sep = ",", strip.white = TRUE )
23 | m <- colnames(acadat)
24 | length(m)
25 | acadat <- read.csv(file='./states.csv', header = FALSE, skip = 1, sep = ",", strip.white = TRUE )
26 | n <- colnames(acadat)
27 |
28 | head(acadat)
29 |
30 | ### Reading the Dictionary and Cleaning the Data
31 |
32 | any(is.na(acadat))
33 |
34 | dictionary <- cbind(n, m)
35 |
36 | str(acadat)
37 | # Strip percentages and dollars first
38 | acadat[,c("V2", "V3", "V4")] <- (sapply(acadat[, c("V2", "V3", "V4")], function(x) as.numeric(gsub("%","", x))))
39 | acadat$V9 <- gsub("\\$","", acadat$V9)
40 | acadat$region = tolower(acadat$V1)
41 |
42 |
43 |
44 | us_state_map = map_data('state');
45 | map_data = merge(acadat, us_state_map, by = 'region')
46 | map_data = arrange(map_data, order)
47 |
48 | ### Maps for the United States
49 |
50 | ggplot(map_data, aes(x = long, y = lat, group = group)) +
51 | geom_polygon(aes(fill = cut_number(V2, 6))) +
52 | geom_path(colour = 'red') + labs(title = "State level insurance coverage in 2010") +
53 | scale_fill_brewer('Uninsured Percent, 2010') + coord_map()
54 |
55 | # Uninsured percent in 2015
56 | ggplot(map_data, aes(x = long, y = lat, group = group)) +
57 | geom_polygon(aes(fill = cut_number(V3, 6))) +
58 | geom_path(colour = 'black', alpha = 0.5) + labs(title = "State level insurance coverage in 2015") +
59 | scale_fill_brewer('Uninsured Percent, 2015') + coord_map()
60 |
61 | # Uninsured rate change, 2010-15
62 | ggplot(map_data, aes(x = long, y = lat, group = group)) +
63 | geom_polygon(aes(fill = cut_number(V4, 6))) +
64 | geom_path(colour = 'orange') + labs(title = "State level change in insurance coverage, 2010-15") +
65 | scale_fill_brewer('Uninsured rate change, 2010-15') + coord_map()
66 |
--------------------------------------------------------------------------------
/Airbnb Texas/Airbnb Texas.R:
--------------------------------------------------------------------------------
1 | # Airbnb
2 |
3 |
4 | ## Loading the Libraries
5 |
6 | library(data.table)
7 | library(ggplot2)
8 | library(lubridate)
9 | library(wordcloud)
10 | library(tm)
11 | library(SnowballC)
12 | library(RSentiment)
13 | library(caTools)
14 | library(randomForest)
15 | library(rpart)
16 | library(rpart.plot)
17 |
18 |
19 | ## Reading the Dataset and Changing Working Directory
20 |
21 | setwd('./Kaggle/Airbnb Texas')
22 | airbnb <- read.csv('./Airbnb_Texas_Rentals.csv')
23 |
24 | ## Converting Factors to Integers
25 |
26 | airbnb$average_rate_per_night <- as.integer(airbnb$average_rate_per_night)
27 | airbnb$bedrooms_count <- as.integer(airbnb$bedrooms_count)
28 |
29 | ## Changing the Description
30 |
31 | airbnb$description <- as.character(airbnb$description)
32 |
33 | ## Checking for Changes being Made
34 |
35 | str(airbnb)
36 | summary(airbnb)
37 |
38 | ## Using the tapply Function
39 |
40 | tapply(airbnb$average_rate_per_night, airbnb$city, mean)
41 |
42 |
43 | ## Subsetting on Dallas
44 |
45 | dallas <- subset(airbnb, airbnb$city == "Dallas")
46 |
47 | ## Visualization on Dallas
48 |
49 | dallaslocation <- as.data.frame(table(round(dallas$latitude,2),round(dallas$longitude,2)))
50 | dallaslocation$Var1 <- as.numeric(as.character(dallaslocation$Var1))
51 | dallaslocation$Var2 <- as.numeric(as.character(dallaslocation$Var2))
52 | ggplot(dallaslocation, aes(x=Var1, y=Var2))+geom_tile(aes(fill=Freq))+scale_fill_gradient(low="green", high="blue")
53 |
54 | ### There are only a few areas in Dallas that has more than 30 listings that are open.
55 |
56 |
57 | ## Creating a Linear Regression Model
58 |
59 | set.seed(13265)
60 | spl = sample.split(dallas$average_rate_per_night, 0.8) # Train at 80%
61 | train <- subset(dallas, spl == TRUE)
62 | test <- subset(dallas, spl == FALSE)
63 | dal_model <- lm(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train)
64 | summary(dal_model)
65 |
66 |
67 |
68 | ## Calculating the MSE (Mean Square Error)
69 |
70 | predictlm <- predict(dal_model, newdata = test)
71 | mselm <- mean((predictlm-test$average_rate_per_night)^2)
72 | mselm
73 |
74 | ## Regression Tree Model
75 |
76 | cart1 <- rpart(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train, method = "anova")
77 | predictcart <- predict(cart1, newdata = test)
78 | msecart <- mean((predictcart-test$average_rate_per_night)^2)
79 | msecart
80 |
81 |
82 | prp(cart1)
83 |
84 | ## Random Forest Model
85 |
86 | forest <- randomForest(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train)
87 | predictrf <- predict(forest, newdata=test)
88 | mserf <- mean((predictrf-test$average_rate_per_night)^2)
89 | mserf
90 |
91 |
92 | ## Cleaning the Words
93 |
94 | corpus <- Corpus(VectorSource(dallas$description))
95 | corpus <- tm_map(corpus, tolower)
96 | corpus <- tm_map(corpus, removeWords, stopwords("english"))
97 | corpus <- tm_map(corpus, removePunctuation)
98 | corpus <- tm_map(corpus, stemDocument)
99 |
100 |
101 | freq <- DocumentTermMatrix(corpus)
102 | freq
103 |
104 | ## Removing Sparse Terms
105 |
106 | freq <- removeSparseTerms(freq, 0.995)
107 |
108 | dallas_sparse <- as.data.frame(as.matrix(freq))
109 | colnames(dallas_sparse) <- make.names(colnames(dallas_sparse))
110 | dallas_sparse$average_rate <- dallas$average_rate_per_night
111 |
112 | ## Performing Random Forest Model Again
113 |
114 | set.seed(2562)
115 | spl2 <- sample.split(dallas_sparse$average_rate, SplitRatio = 0.8) # Training Set at 80%
116 | train2 <- subset(dallas_sparse, spl2==TRUE)
117 | test2 <- subset(dallas_sparse, spl2==FALSE)
118 |
119 | newforest <- randomForest(average_rate~., data=train2)
120 | summary(newforest)
121 | predictrf2 <- predict(newforest, newdata=test2)
122 | mserf2 <- mean((predictrf2-test2$average_rate)^2)
123 | mserf2
124 |
125 |
126 | ## Combining to One Model
127 |
128 | dallas_sparse$bedroom <- dallas$bedrooms_count
129 | dallas_sparse$latitude <- dallas$latitude
130 | dallas_sparse$longitude <- dallas$longitude
131 |
132 | set.seed(10241)
133 | spl3 <- sample.split(dallas_sparse$average_rate, SplitRatio = 0.8) # Training at 80%
134 | train3 <- subset(dallas_sparse, spl3=TRUE)
135 | test3 <- subset(dallas_sparse, spl3==FALSE)
136 | allforest <- randomForest(average_rate~., data=train3)
137 | predictrf3 <- predict(allforest, newdata=test3)
138 | mse_all <- mean((predictrf3-test3$average_rate)^2)
139 | mse_all
140 |
141 | ## Wording Visualization
142 |
143 | vu <- varUsed(allforest, count=TRUE)
144 | vusorted <- sort(vu, decreasing=FALSE, index.return=TRUE)
145 | dotchart(vusorted$x, names(allforest$forest$xlevels[vusorted$ix]))
146 |
147 |
148 |
--------------------------------------------------------------------------------
/Barbeque/Barbeque.R:
--------------------------------------------------------------------------------
1 | # Barbeque Wordcloud
2 |
3 | ## Loading the Libraries
4 |
5 | library(readr)
6 | library(data.table)
7 | library(dplyr)
8 | library(stringr)
9 | library(ggplot2)
10 | library(knitr)
11 | library(DT)
12 | library(tm)
13 | library(wordcloud)
14 |
15 |
16 | ## Changing the Working Directory
17 |
18 | setwd('./Kaggle/Barbeque')
19 |
20 | ## Reading the Dataset
21 |
22 | contest <- read_csv("./contest_data.csv")
23 | results_brisket <- read_csv("./results_brisket.csv")
24 | results_chicken <- read_csv("./results_chicken.csv")
25 | results_pork <- read_csv("./results_pork.csv")
26 | results_ribs <- read_csv("./results_ribs.csv")
27 |
28 |
29 | ## Where Do Competitions Happen More Frequently?
30 |
31 | most_happening <- contest %>%
32 | group_by(state_full) %>%
33 | summarise(count=n()) %>%
34 | arrange(desc(count))
35 |
36 | datatable(most_happening, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
37 |
38 |
39 | ## Where are the Big Prize Money?
40 |
41 | big_prize_money <- contest %>%
42 | group_by(state_full) %>%
43 | summarise(avg_prize= mean(prize, na.rm = TRUE)) %>%
44 | arrange(desc(avg_prize))
45 |
46 | datatable(big_prize_money, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
47 |
48 |
49 | ## Combining the Results
50 | combined_result <- rbind(results_brisket,results_chicken,results_pork,results_ribs)
51 |
52 | ## Total Score
53 | Total_score<-combined_result %>%
54 | group_by(contest_key, team_name) %>%
55 | summarise(tscore =sum(score)) %>%
56 | data.frame() %>%
57 | arrange(contest_key, desc(tscore))
58 |
59 | datatable(Total_score, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
60 |
61 |
62 | ## Number of Participants
63 |
64 | participants <- Total_score %>%
65 | group_by(contest_key)%>%
66 | summarise(No_of_teams = n())
67 |
68 | datatable(participants, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
69 |
70 | rm(participants)
71 | gc()
72 |
73 |
74 | ## Number of Appearances by a Team
75 |
76 | number_of_apperance <- Total_score %>%
77 | group_by(team_name)%>%
78 | summarise(appearances = n())%>%
79 | arrange(desc(appearances))
80 | datatable(number_of_apperance, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
81 |
82 |
83 | ## Who's the Best Team Out There?
84 |
85 | avg_total_team_score <- Total_score %>%
86 | group_by(team_name)%>%
87 | summarise(appearances = n(), avg_total_score = mean(tscore))%>%
88 | arrange(desc(avg_total_score))
89 | datatable(avg_total_team_score, class="table-condensed", style="bootstrap", options = list(dom = 'tp'))
90 |
91 |
92 | ## Total Score Out of 800
93 |
94 | library(tm)
95 | library(wordcloud)
96 | makeWordCloud <- function(documents) {
97 | corpus = Corpus(VectorSource(tolower(documents)))
98 | corpus = tm_map(corpus, removePunctuation)
99 | corpus = tm_map(corpus, removeWords, stopwords("english"))
100 |
101 | frequencies = DocumentTermMatrix(corpus)
102 | word_frequencies = as.data.frame(as.matrix(frequencies))
103 |
104 | words <- colnames(word_frequencies)
105 | freq <- colSums(word_frequencies)
106 | wordcloud(words, freq,
107 | min.freq=sort(freq, decreasing=TRUE)[[100]],
108 | colors=brewer.pal(8, "Dark2"),
109 | random.color=TRUE)
110 | }
111 |
112 |
113 | top_score<-Total_score %>%
114 | group_by(contest_key, team_name) %>%
115 | filter(tscore >600)
116 |
117 | makeWordCloud(top_score[["team_name"]][1:2000])
118 |
--------------------------------------------------------------------------------
/Bikeshare Bay Area.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Bikeshare Bay Area"
3 | output: html_document
4 | ---
5 |
6 | ---
7 | title: "Bike Shares Daily"
8 | output:
9 | flexdashboard::flex_dashboard:
10 | orientation: columns
11 | vertical_layout: fill
12 | runtime: shiny
13 | ---
14 |
15 | ```{r global, include=FALSE}
16 | library(flexdashboard)
17 | library(readr)
18 | library(leaflet)
19 | library(DT)
20 | library(tidyverse)
21 | library(lubridate)
22 | library(plotly)
23 |
24 | trips_df <- read_csv('https://assets.datacamp.com/production/repositories/1448/datasets/1f12031000b09ad096880bceb61f6ca2fd95e2eb/sanfran_bikeshare_joined_oneday.csv') %>%
25 | mutate(duration_min = duration_sec / 60)
26 | ```
27 |
28 | Sidebar {.sidebar}
29 | ====================
30 |
31 | ```{r}
32 |
33 | sliderInput("duration_slider", label = "Select maximum trip duration to display (in minutes):",
34 | min = 0, max = 120, value = 15, step = 5, dragRange = TRUE)
35 |
36 | sliderInput("duration_bin", label = "Select # of minutes to bin trip durations:",
37 | min = 1, max = 15, value = 1, step = 1)
38 |
39 | show_trips_df <- reactive({
40 |
41 | trips_df %>%
42 | filter(duration_sec <= input$duration_slider * 60)
43 |
44 | })
45 | ```
46 |
47 | Overview
48 | ====================
49 |
50 | Column {data-width=450}
51 | -----------------------------------------------------------------------
52 |
53 | ### Origins
54 |
55 | ```{r}
56 |
57 | renderLeaflet({
58 | show_trips_df() %>%
59 | rename(latitude = start_latitude,
60 | longitude = start_longitude) %>%
61 | group_by(start_station_id, latitude, longitude) %>%
62 | count() %>%
63 | leaflet() %>%
64 | addTiles() %>%
65 | addCircles(radius = ~n)
66 | })
67 |
68 | ```
69 |
70 | Column {data-width=350}
71 | -----------------------------------------------------------------------
72 |
73 | ### Total Trips
74 |
75 | ```{r}
76 |
77 | renderValueBox({
78 | valueBox(prettyNum(show_trips_df() %>%
79 | nrow(), big.mark = ','),
80 | icon = 'fa-bicycle')
81 | })
82 |
83 | ```
84 |
85 | ### Trips by Start Time
86 |
87 | ```{r}
88 |
89 | renderPlot({show_trips_df() %>%
90 | mutate(hour = hour(start_date)) %>%
91 | group_by(hour) %>%
92 | summarize(`Trips Started` = n()) %>%
93 | ggplot(aes(x = hour, y = `Trips Started`)) +
94 | theme_bw() +
95 | ylab('Trips Started \n') +
96 | geom_bar(stat = 'identity')
97 | })
98 |
99 |
100 | ```
101 |
102 | Duration
103 | ====================
104 |
105 | ### Trip Durations
106 |
107 | ```{r}
108 |
109 | renderPlot({show_trips_df() %>%
110 | mutate(`Trip Duration (min)` = duration_sec / 60) %>%
111 | ggplot(aes(x = `Trip Duration (min)`)) +
112 | theme_bw() +
113 | geom_histogram(binwidth = input$duration_bin) +
114 | ylab('# Trips')
115 | })
116 |
117 |
118 | ```
119 |
--------------------------------------------------------------------------------
/Caravan/Caravan Insurance.R:
--------------------------------------------------------------------------------
1 | # Caravan Analysis
2 |
3 | ## Loading the Libraries
4 |
5 | library(ggplot2)
6 | library(readr)
7 | library(Amelia)
8 | library(data.table)
9 | library(RColorBrewer)
10 | library(mlbench)
11 | library(DMwR)
12 | library(caret)
13 | library(pROC)
14 |
15 |
16 |
17 | ## Changing the Working Directory
18 |
19 | setwd('./Kaggle/Caravan')
20 |
21 | ## Reading the Dataset
22 |
23 | cvan <- fread("./caravan-insurance-challenge.csv")
24 | dim(cvan)
25 | head(cvan)
26 |
27 | ## Creating the Train Test Split Dataset
28 |
29 | train <- cvan[ORIGIN == "train", ]
30 | test <- cvan[ORIGIN != "train", ]
31 |
32 | ## Putting Them on Tables
33 |
34 | ftable(train[, CARAVAN])
35 | prop.table(ftable(train[, CARAVAN]))
36 |
37 |
38 | tr_outcome <- train[, CARAVAN]
39 |
40 | tr_feat <- train[, !"CARAVAN"]
41 |
42 | test_outcome <- test[, CARAVAN]
43 | test_features <- test[, !"CARAVAN"]
44 |
45 | ## Looking at the Missing Map
46 |
47 | missmap(train)
48 |
49 |
50 | ## Looking at the Plots Between MGODRK and Caravan
51 |
52 | ggplot(train[, .N, by = list(MGODRK, CARAVAN)],
53 | aes(x = MGODRK, y = N, fill = factor(CARAVAN),
54 | color = factor(CARAVAN), alpha = .3 )) +
55 | geom_bar(stat = "identity")
56 |
57 |
58 | ## Looking at the Plots Between MGODOV and Caravan
59 |
60 | ggplot(train[, .N, by = list(MGODOV, CARAVAN)],
61 | aes(x = MGODOV, y = N, fill = factor(CARAVAN),
62 | color = factor(CARAVAN), alpha = .3 )) +
63 | geom_bar(stat = "identity")
64 |
65 |
66 | ## Gathering the Train Table
67 |
68 | train <- train[, Religion := ifelse(MGODRK == 1, "R",
69 | (ifelse(MGODPR == 1, "P",
70 | ifelse(MGODOV == 1, "O","N"))))]
71 |
72 | ftable(train[, MRELGE])
73 |
74 |
75 | ## MRELGE Plot
76 |
77 | ggplot(train[, .N, by = MRELGE],
78 | aes(x= factor(MRELGE), y = N,
79 | color = factor(MRELGE), fill = factor(MRELGE),
80 | alpha = 0.3, size = N)) +
81 | geom_point()
82 |
83 |
84 | ## Looking At Variables Close to Zero
85 |
86 | nearzero_train <- nearZeroVar(train, saveMetrics = TRUE,
87 | freqCut= 95/5)
88 | nearzero_train
89 |
90 |
91 | ## Looking at the Origin Variable
92 |
93 | train <- train[, ORIGIN := NULL]
94 | test <- test[, ORIGIN := NULL]
95 |
96 |
97 | ## Outcomes and Features of Origin
98 |
99 | outcome.train <- train[, CARAVAN]
100 |
101 | features.train <- train[, !"CARAVAN"]
102 |
103 | outcome.test <- test[, CARAVAN]
104 | features.test <- test[, !CARAVAN]
105 |
106 |
107 | train <- train[, CARAVAN := ifelse(CARAVAN == 0, "No", "Yes")]
108 | test <- test[, CARAVAN := ifelse(CARAVAN == 0, "No", "Yes")]
109 |
110 |
111 | ## Train Table for Caravan
112 | ftable(train[, CARAVAN])
113 |
114 | ## Test Table for Caravan
115 | ftable(test[, CARAVAN])
116 |
117 | ## Gathering the Smote Information
118 |
119 | train <- train[, CARAVAN := factor(CARAVAN)]
120 | train <- train[, Religion := NULL]
121 | trsmote <- SMOTE(CARAVAN ~ MHKOOP , perc.over = 200, perc.under = 150,
122 | data = train,
123 | k = 10)
124 |
125 |
126 | dim(trsmote)
127 |
128 | ## Comparing Smote to Original
129 |
130 | ftable(train[, CARAVAN])
131 |
132 | ftable(trsmote[, CARAVAN])
133 |
134 | ## Running Train Control
135 |
136 | trCtrl <- trainControl(method = "repeatedcv", repeats = 3,
137 | summaryFunction=twoClassSummary , classProbs = TRUE, verbose = 1)
138 |
139 | control <- rfeControl(functions=rfFuncs, method="cv", number=10)
140 |
141 | ## Boosted Trees
142 |
143 | bst.grid <- expand.grid(mstop = 50,
144 | maxdepth = 12)
145 |
146 | bstFit <- train(CARAVAN~., data = trsmote,
147 | trControl = trCtrl,
148 | method = "blackboost",
149 | tuneGrid = bst.grid,
150 | metric = "ROC")
151 |
152 | ## Creating the Best Fit
153 |
154 | top <- varImp(bstFit, scale = FALSE)
155 | print(top)
156 |
157 |
158 | plot(top)
159 |
160 | ## Predicted Values Boosted Trees
161 |
162 | bstFit
163 |
164 |
165 | ## Blackboost Predictions for Train
166 |
167 | blackboost_pred <- data.frame(predict(bstFit))
168 |
169 | blackboost_pred_prob <- predict(bstFit, type = "prob")
170 |
171 |
172 | ggplot(blackboost_pred, aes(x = blackboost_pred_prob$Yes,
173 | fill = trsmote[, CARAVAN],
174 | colour = trsmote[, CARAVAN],
175 | alpha = 0.3)) + geom_density()
176 |
177 | ## Blackboost Predictions for Test
178 |
179 | blackboost_pred_test <- data.frame(predict(bstFit, newdata = test))
180 |
181 | blackboost_pred_prob_test <- predict(bstFit, newdata = test, type = "prob")
182 |
183 | ggplot(blackboost_pred_test, aes(x = blackboost_pred_prob_test$Yes,
184 | fill = test[, CARAVAN],
185 | colour = test[, CARAVAN],
186 | alpha = 0.3)) + geom_histogram()
187 |
188 |
189 | ## Confusion Matrix
190 |
191 | confusionMatrix(blackboost_pred_test$predict.bstFit..newdata...test., test[, CARAVAN])
192 |
193 | ## Test Class
194 |
195 | bst_test_class <- ifelse(blackboost_pred_test$predict.bstFit..newdata...test.== "Yes", 1, 0)
196 | test_class <- ifelse(test[, CARAVAN] == "Yes", 1, 0)
197 |
198 | roc(test_class, bst_test_class)
199 |
200 | ## Using XG Boost
201 |
202 | xgb.grid <- expand.grid(nrounds = 3,
203 | max_depth = 12,
204 | eta = 0.1,
205 | gamma = 1,
206 | colsample_bytree = 1,
207 | min_child_weight = 1,
208 | subsample = 0.75)
209 | xgbFit <- train(CARAVAN ~.,
210 | data = trsmote,
211 | method = "xgbTree",
212 | metric = "ROC",
213 | trControl = trCtrl,
214 | tuneGrid = xgb.grid)
215 |
216 |
217 | ## Getting the Results and Plotting It
218 |
219 | xgbFit$results
220 |
221 |
222 | ggplot(xgbFit$results, aes(x = eta, y = ROC,
223 | fill = ROC, size = ROC,
224 | color = factor(gamma))) + geom_point()
225 |
226 |
227 | ## Predicted Values
228 |
229 | xgbFit.pred <- predict(xgbFit)
230 | xgbFit.pred.prob <- predict(xgbFit, type = "prob")
231 |
232 |
233 | xgbFit.pred <- data.frame(xgbFit.pred)
234 |
235 |
236 | xgbFit.pred.test <- predict(xgbFit, newdata = test)
237 | xgbFit.pred.test.prob <- predict(xgbFit, newdata = test, type = "prob")
238 |
239 |
240 | xgbFit.pred.test <- data.frame(xgbFit.pred.test)
241 | colnames(xgbFit.pred.test)
242 |
243 |
244 | ## Plotting the Values
245 |
246 | ggplot(xgbFit.pred.test, aes(x = xgbFit.pred.test.prob$Yes,
247 | fill = test[, CARAVAN],
248 | color = test[, CARAVAN],
249 | alpha = 0.3)) + geom_histogram()
250 |
251 |
252 | ## Gathering the Confusion Matrix
253 |
254 | xgb_test_class <- ifelse(xgbFit.pred.test$xgbFit.pred.test== "Yes", 1, 0)
255 |
256 | confusionMatrix(xgb_test_class, test_class)
257 |
--------------------------------------------------------------------------------
/College Salaries/College Salaries.R:
--------------------------------------------------------------------------------
1 | # College Salaries
2 |
3 | ## Loading the Libraries
4 |
5 | library(tidyverse)
6 | library(stringr)
7 | library(gridExtra)
8 | library(plotly)
9 | library(readr)
10 |
11 | ## Changing the Working Directory
12 |
13 | setwd('./Kaggle/College Salaries')
14 |
15 | ## Reading the First Dataset
16 |
17 | type <- read_csv("./salaries-by-college-type.csv")
18 |
19 |
20 | ## Reformat the Salary
21 |
22 | salary_reform <- function(salary) {
23 |
24 | if(is.na(salary)) return(NA)
25 |
26 | extract <- str_replace_all(salary, "\\$|,", "")
27 | num <- as.integer(extract)
28 | return(num)
29 | }
30 |
31 |
32 | ## Fixing Up the Dataset
33 |
34 | type <- type %>%
35 | select(1:4) %>%
36 | mutate(
37 | `Starting Median Salary` = salary_reform(`Starting Median Salary`),
38 | `Mid-Career Median Salary` = salary_reform(`Mid-Career Median Salary`)
39 | )
40 |
41 |
42 | ## Calculating the Mid-Career
43 |
44 | type <- type %>%
45 | mutate(
46 | `Percentage Change` = round((`Mid-Career Median Salary`-`Starting Median Salary`)/`Starting Median Salary`,3)*100
47 | )
48 |
49 | knitr::kable(head(type))
50 |
51 |
52 | ## Visualization of the Salary Distribution
53 |
54 | type %>%
55 | ggplot(aes(`Starting Median Salary`)) +
56 | geom_histogram(fill="indianred", color="lightgrey", binwidth=2000) +
57 | geom_histogram(aes(`Mid-Career Median Salary`),
58 | fill="navyblue", color="lightgrey", binwidth=2000, alpha=0.6) +
59 | ggtitle("Broader Distribution for Mid-Career Salaries") +
60 | xlab("Salary") + ylab("Count")
61 |
62 | ## Boxplots of the Starting and Median Salaries
63 |
64 |
65 | # Median values for starting and mid-career salaries
66 | median_start <- median(type$`Starting Median Salary`)
67 | mid <- median(type$`Mid-Career Median Salary`)
68 |
69 | # Box Plot for Starting Salaries by School Type
70 | school_type <- type %>%
71 | ggplot(aes(`School Type`, `Starting Median Salary`, fill=`School Type`)) +
72 | geom_jitter(color="darkgrey", alpha=0.8) +
73 | geom_boxplot(alpha=0.6) +
74 | geom_abline(slope=0, intercept=median_start, color="red", linetype=2, alpha=0.5) +
75 | ggtitle("Engineering and Ivy League Lead the Way in Starting Salaries") +
76 | xlab("") + ylab("Starting Salary") +
77 | theme_bw() +
78 | theme(legend.position = "none")
79 |
80 | # Box Plot for Mid-Career Salaries by School Type
81 | mid_school <- type %>%
82 | ggplot(aes(`School Type`, `Mid-Career Median Salary`, fill=`School Type`)) +
83 | geom_jitter(colour="darkgrey", alpha=0.8) +
84 | geom_boxplot(alpha=0.6) +
85 | geom_abline(slope=0, intercept=mid, colour="red", linetype=2, alpha=0.5) +
86 | ggtitle("Higher Upward Mobility for Ivy League Over Engineering Schools Over Time") +
87 | xlab("") + ylab("Mid-Career Salary") +
88 | theme_bw() +
89 | theme(legend.position = "none")
90 |
91 | grid.arrange(school_type, mid_school, ncol=1)
92 |
93 |
94 | ## Looking at the Top 10 Salaries
95 |
96 | type %>%
97 | top_n(10, wt = `Mid-Career Median Salary`) %>%
98 | gather("Career", "Salary", 3:4) %>%
99 | mutate(Career = factor(Career, levels=c("Starting Median Salary","Mid-Career Median Salary"))) %>%
100 | plot_ly(
101 | x=~Career, y=~Salary, color=~`School Name`, type="scatter", mode="lines+markers",
102 | text=~paste(`School Name`,"
",`School Type`,"
Change:",`Percentage Change`, "%"),
103 | colors="Paired"
104 | ) %>%
105 | layout(
106 | title="Dartmouth with the Largest Salary Increase from Number 10 to Number 1",
107 | showlegend=FALSE,
108 | xaxis=list(showticklabels=FALSE,
109 | title="Universities with the Top Median Salaries"),
110 | yaxis=list(title="")
111 | )
112 |
113 | # Focusing on the Region
114 |
115 | ## Loading the Dataset
116 |
117 | region <- read_csv("./salaries-by-region.csv")
118 |
119 |
120 | ## Reformat Salary by Region
121 |
122 | region <- region %>%
123 | mutate(
124 | `Starting Median Salary` = salary_reform(`Starting Median Salary`),
125 | `Mid-Career Median Salary` = salary_reform(`Mid-Career Median Salary`)
126 | )
127 |
128 | ## Barplot of the Region
129 |
130 | region %>%
131 | group_by(Region) %>%
132 | ### rank by mid-career and starting salary combined to break ties
133 | top_n(7, wt=`Mid-Career Median Salary` + `Starting Median Salary`) %>%
134 | mutate(Rank = rank(desc(`Mid-Career Median Salary`), ties.method="first")) %>%
135 | plot_ly(x=~Region, y=~`Mid-Career Median Salary`, color=~factor(Rank),
136 | type="bar", colors="Set3",
137 | text=~paste(`School Name`, "
Rank:", Rank)) %>%
138 | layout(showlegend = FALSE,
139 | title="Universities with the Highest Mid-Career Salaries by Region",
140 | yaxis=list(title="Mid-Career Median Salary"),
141 | xaxis=list(title=""))
142 |
143 |
--------------------------------------------------------------------------------
/College Salaries/degrees-that-pay-back.csv:
--------------------------------------------------------------------------------
1 | Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Percent change from Starting to Mid-Career Salary,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
2 | Accounting,"$46,000.00","$77,100.00",67.6,"$42,200.00","$56,100.00","$108,000.00","$152,000.00"
3 | Aerospace Engineering,"$57,700.00","$101,000.00",75,"$64,300.00","$82,100.00","$127,000.00","$161,000.00"
4 | Agriculture,"$42,600.00","$71,900.00",68.8,"$36,300.00","$52,100.00","$96,300.00","$150,000.00"
5 | Anthropology,"$36,800.00","$61,500.00",67.1,"$33,800.00","$45,500.00","$89,300.00","$138,000.00"
6 | Architecture,"$41,600.00","$76,800.00",84.6,"$50,600.00","$62,200.00","$97,000.00","$136,000.00"
7 | Art History,"$35,800.00","$64,900.00",81.3,"$28,800.00","$42,200.00","$87,400.00","$125,000.00"
8 | Biology,"$38,800.00","$64,800.00",67,"$36,900.00","$47,400.00","$94,500.00","$135,000.00"
9 | Business Management,"$43,000.00","$72,100.00",67.7,"$38,800.00","$51,500.00","$102,000.00","$147,000.00"
10 | Chemical Engineering,"$63,200.00","$107,000.00",69.3,"$71,900.00","$87,300.00","$143,000.00","$194,000.00"
11 | Chemistry,"$42,600.00","$79,900.00",87.6,"$45,300.00","$60,700.00","$108,000.00","$148,000.00"
12 | Civil Engineering,"$53,900.00","$90,500.00",67.9,"$63,400.00","$75,100.00","$115,000.00","$148,000.00"
13 | Communications,"$38,100.00","$70,000.00",83.7,"$37,500.00","$49,700.00","$98,800.00","$143,000.00"
14 | Computer Engineering,"$61,400.00","$105,000.00",71,"$66,100.00","$84,100.00","$135,000.00","$162,000.00"
15 | Computer Science,"$55,900.00","$95,500.00",70.8,"$56,000.00","$74,900.00","$122,000.00","$154,000.00"
16 | Construction,"$53,700.00","$88,900.00",65.5,"$56,300.00","$68,100.00","$118,000.00","$171,000.00"
17 | Criminal Justice,"$35,000.00","$56,300.00",60.9,"$32,200.00","$41,600.00","$80,700.00","$107,000.00"
18 | Drama,"$35,900.00","$56,900.00",58.5,"$36,700.00","$41,300.00","$79,100.00","$153,000.00"
19 | Economics,"$50,100.00","$98,600.00",96.8,"$50,600.00","$70,600.00","$145,000.00","$210,000.00"
20 | Education,"$34,900.00","$52,000.00",49,"$29,300.00","$37,900.00","$73,400.00","$102,000.00"
21 | Electrical Engineering,"$60,900.00","$103,000.00",69.1,"$69,300.00","$83,800.00","$130,000.00","$168,000.00"
22 | English,"$38,000.00","$64,700.00",70.3,"$33,400.00","$44,800.00","$93,200.00","$133,000.00"
23 | Film,"$37,900.00","$68,500.00",80.7,"$33,900.00","$45,500.00","$100,000.00","$136,000.00"
24 | Finance,"$47,900.00","$88,300.00",84.3,"$47,200.00","$62,100.00","$128,000.00","$195,000.00"
25 | Forestry,"$39,100.00","$62,600.00",60.1,"$41,000.00","$49,300.00","$78,200.00","$111,000.00"
26 | Geography,"$41,200.00","$65,500.00",59,"$40,000.00","$50,000.00","$90,800.00","$132,000.00"
27 | Geology,"$43,500.00","$79,500.00",82.8,"$45,000.00","$59,600.00","$101,000.00","$156,000.00"
28 | Graphic Design,"$35,700.00","$59,800.00",67.5,"$36,000.00","$45,500.00","$80,800.00","$112,000.00"
29 | Health Care Administration,"$38,800.00","$60,600.00",56.2,"$34,600.00","$45,600.00","$78,800.00","$101,000.00"
30 | History,"$39,200.00","$71,000.00",81.1,"$37,000.00","$49,200.00","$103,000.00","$149,000.00"
31 | Hospitality & Tourism,"$37,800.00","$57,500.00",52.1,"$35,500.00","$43,600.00","$81,900.00","$124,000.00"
32 | Industrial Engineering,"$57,700.00","$94,700.00",64.1,"$57,100.00","$72,300.00","$132,000.00","$173,000.00"
33 | Information Technology (IT),"$49,100.00","$74,800.00",52.3,"$44,500.00","$56,700.00","$96,700.00","$129,000.00"
34 | Interior Design,"$36,100.00","$53,200.00",47.4,"$35,700.00","$42,600.00","$72,500.00","$107,000.00"
35 | International Relations,"$40,900.00","$80,900.00",97.8,"$38,200.00","$56,000.00","$111,000.00","$157,000.00"
36 | Journalism,"$35,600.00","$66,700.00",87.4,"$38,400.00","$48,300.00","$97,700.00","$145,000.00"
37 | Management Information Systems (MIS),"$49,200.00","$82,300.00",67.3,"$45,300.00","$60,500.00","$108,000.00","$146,000.00"
38 | Marketing,"$40,800.00","$79,600.00",95.1,"$42,100.00","$55,600.00","$119,000.00","$175,000.00"
39 | Math,"$45,400.00","$92,400.00",103.5,"$45,200.00","$64,200.00","$128,000.00","$183,000.00"
40 | Mechanical Engineering,"$57,900.00","$93,600.00",61.7,"$63,700.00","$76,200.00","$120,000.00","$163,000.00"
41 | Music,"$35,900.00","$55,000.00",53.2,"$26,700.00","$40,200.00","$88,000.00","$134,000.00"
42 | Nursing,"$54,200.00","$67,000.00",23.6,"$47,600.00","$56,400.00","$80,900.00","$98,300.00"
43 | Nutrition,"$39,900.00","$55,300.00",38.6,"$33,900.00","$44,500.00","$70,500.00","$99,200.00"
44 | Philosophy,"$39,900.00","$81,200.00",103.5,"$35,500.00","$52,800.00","$127,000.00","$168,000.00"
45 | Physician Assistant,"$74,300.00","$91,700.00",23.4,"$66,400.00","$75,200.00","$108,000.00","$124,000.00"
46 | Physics,"$50,300.00","$97,300.00",93.4,"$56,000.00","$74,200.00","$132,000.00","$178,000.00"
47 | Political Science,"$40,800.00","$78,200.00",91.7,"$41,200.00","$55,300.00","$114,000.00","$168,000.00"
48 | Psychology,"$35,900.00","$60,400.00",68.2,"$31,600.00","$42,100.00","$87,500.00","$127,000.00"
49 | Religion,"$34,100.00","$52,000.00",52.5,"$29,700.00","$36,500.00","$70,900.00","$96,400.00"
50 | Sociology,"$36,500.00","$58,200.00",59.5,"$30,700.00","$40,400.00","$81,200.00","$118,000.00"
51 | Spanish,"$34,000.00","$53,100.00",56.2,"$31,000.00","$40,000.00","$76,800.00","$96,400.00"
52 |
--------------------------------------------------------------------------------
/College Salaries/salaries-by-college-type.csv:
--------------------------------------------------------------------------------
1 | School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary
2 | Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00","$76,800.00","$99,200.00","$168,000.00","$220,000.00"
3 | California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00",N/A,"$104,000.00","$161,000.00",N/A
4 | Harvey Mudd College,Engineering,"$71,800.00","$122,000.00",N/A,"$96,000.00","$180,000.00",N/A
5 | "Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00","$66,800.00","$94,300.00","$143,000.00","$190,000.00"
6 | Cooper Union,Engineering,"$62,200.00","$114,000.00",N/A,"$80,200.00","$142,000.00",N/A
7 | Worcester Polytechnic Institute (WPI),Engineering,"$61,000.00","$114,000.00","$80,000.00","$91,200.00","$137,000.00","$180,000.00"
8 | Carnegie Mellon University (CMU),Engineering,"$61,800.00","$111,000.00","$63,300.00","$80,100.00","$150,000.00","$209,000.00"
9 | Rensselaer Polytechnic Institute (RPI),Engineering,"$61,100.00","$110,000.00","$71,600.00","$85,500.00","$140,000.00","$182,000.00"
10 | Georgia Institute of Technology,Engineering,"$58,300.00","$106,000.00","$67,200.00","$85,200.00","$137,000.00","$183,000.00"
11 | Colorado School of Mines,Engineering,"$58,100.00","$106,000.00","$62,200.00","$87,900.00","$142,000.00","$201,000.00"
12 | Stevens Institute of Technology,Engineering,"$60,600.00","$105,000.00","$68,700.00","$81,900.00","$138,000.00","$185,000.00"
13 | Illinois Institute of Technology (IIT),Engineering,"$56,000.00","$97,800.00","$56,100.00","$77,400.00","$121,000.00","$165,000.00"
14 | Wentworth Institute of Technology,Engineering,"$53,000.00","$96,700.00","$55,200.00","$74,000.00","$117,000.00","$153,000.00"
15 | Virginia Polytechnic Institute and State University (Virginia Tech),Engineering,"$53,500.00","$95,400.00","$50,600.00","$71,400.00","$124,000.00","$163,000.00"
16 | South Dakota School of Mines & Technology,Engineering,"$55,800.00","$93,400.00","$71,500.00","$81,900.00","$122,000.00","$147,000.00"
17 | New Mexico Institute of Mining and Technology (New Mexico Tech),Engineering,"$51,000.00","$93,400.00",N/A,"$67,400.00","$123,000.00",N/A
18 | Rochester Institute of Technology (RIT),Engineering,"$48,900.00","$84,600.00","$45,000.00","$62,100.00","$112,000.00","$159,000.00"
19 | Embry-Riddle Aeronautical University (ERAU),Engineering,"$52,700.00","$80,700.00","$49,800.00","$64,000.00","$106,000.00","$142,000.00"
20 | Tennessee Technological University,Engineering,"$46,200.00","$80,000.00","$42,100.00","$62,600.00","$99,500.00","$121,000.00"
21 | University of Illinois at Urbana-Champaign (UIUC),Party,"$52,900.00","$96,100.00","$48,200.00","$68,900.00","$132,000.00","$177,000.00"
22 | "University of Maryland, College Park",Party,"$52,000.00","$95,000.00","$50,400.00","$68,300.00","$126,000.00","$166,000.00"
23 | "University of California, Santa Barbara (UCSB)",Party,"$50,500.00","$95,000.00","$51,300.00","$71,200.00","$129,000.00","$173,000.00"
24 | University of Texas (UT) - Austin,Party,"$49,700.00","$93,900.00","$50,100.00","$67,400.00","$129,000.00","$188,000.00"
25 | State University of New York (SUNY) at Albany,Party,"$44,500.00","$92,200.00","$47,000.00","$63,100.00","$135,000.00","$209,000.00"
26 | University of Florida (UF),Party,"$47,100.00","$87,900.00","$45,400.00","$62,900.00","$120,000.00","$172,000.00"
27 | Louisiana State University (LSU),Party,"$46,900.00","$87,800.00","$43,700.00","$61,300.00","$120,000.00","$165,000.00"
28 | University of Georgia (UGA),Party,"$44,100.00","$86,000.00","$43,100.00","$57,800.00","$118,000.00","$164,000.00"
29 | Pennsylvania State University (PSU),Party,"$49,900.00","$85,700.00","$46,300.00","$62,000.00","$117,000.00","$160,000.00"
30 | Arizona State University (ASU),Party,"$47,400.00","$84,100.00","$44,600.00","$60,700.00","$114,000.00","$163,000.00"
31 | "Indiana University (IU), Bloomington",Party,"$46,300.00","$84,000.00","$43,600.00","$60,400.00","$119,000.00","$178,000.00"
32 | University of Iowa (UI),Party,"$44,700.00","$83,900.00","$43,300.00","$61,100.00","$116,000.00","$163,000.00"
33 | Randolph-Macon College,Party,"$42,600.00","$83,600.00",N/A,"$54,100.00","$123,000.00",N/A
34 | "University of Alabama, Tuscaloosa",Party,"$41,300.00","$81,400.00","$40,100.00","$56,500.00","$117,000.00","$161,000.00"
35 | University of Mississippi,Party,"$41,400.00","$79,700.00","$40,400.00","$53,500.00","$108,000.00","$186,000.00"
36 | University of New Hampshire (UNH),Party,"$41,800.00","$78,300.00","$41,700.00","$56,400.00","$114,000.00","$147,000.00"
37 | West Virginia University (WVU),Party,"$43,100.00","$78,100.00","$39,700.00","$55,700.00","$106,000.00","$141,000.00"
38 | University of Tennessee,Party,"$43,800.00","$74,600.00","$41,900.00","$53,200.00","$106,000.00","$153,000.00"
39 | Ohio University,Party,"$42,200.00","$73,400.00","$36,600.00","$52,800.00","$106,000.00","$150,000.00"
40 | Florida State University (FSU),Party,"$42,100.00","$73,000.00","$39,600.00","$52,800.00","$107,000.00","$156,000.00"
41 | Bucknell University,Liberal Arts,"$54,100.00","$110,000.00","$62,800.00","$80,600.00","$156,000.00","$251,000.00"
42 | Colgate University,Liberal Arts,"$52,800.00","$108,000.00","$60,000.00","$76,700.00","$167,000.00","$265,000.00"
43 | Amherst College,Liberal Arts,"$54,500.00","$107,000.00",N/A,"$84,900.00","$162,000.00",N/A
44 | Lafayette College,Liberal Arts,"$53,900.00","$107,000.00","$70,600.00","$79,300.00","$144,000.00","$204,000.00"
45 | Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00",N/A,"$74,600.00","$146,000.00",N/A
46 | College of the Holy Cross,Liberal Arts,"$50,200.00","$106,000.00",N/A,"$65,600.00","$143,000.00",N/A
47 | Occidental College,Liberal Arts,"$51,900.00","$105,000.00",N/A,"$54,800.00","$157,000.00",N/A
48 | Washington and Lee University,Liberal Arts,"$53,600.00","$104,000.00",N/A,"$82,800.00","$146,000.00",N/A
49 | Swarthmore College,Liberal Arts,"$49,700.00","$104,000.00",N/A,"$67,200.00","$167,000.00",N/A
50 | Davidson College,Liberal Arts,"$46,100.00","$104,000.00",N/A,"$70,500.00","$146,000.00",N/A
51 | Carleton College,Liberal Arts,"$47,500.00","$103,000.00",N/A,"$69,400.00","$141,000.00",N/A
52 | Williams College,Liberal Arts,"$51,700.00","$102,000.00",N/A,"$76,400.00","$143,000.00",N/A
53 | Pomona College,Liberal Arts,"$48,600.00","$101,000.00",N/A,"$63,300.00","$161,000.00",N/A
54 | "Wesleyan University (Middletown, Connecticut)",Liberal Arts,"$46,500.00","$97,900.00","$42,000.00","$62,500.00","$126,000.00","$215,000.00"
55 | Bates College,Liberal Arts,"$47,300.00","$96,500.00",N/A,"$60,700.00","$162,000.00",N/A
56 | Union College,Liberal Arts,"$47,200.00","$95,800.00","$48,700.00","$75,200.00","$135,000.00","$230,000.00"
57 | University of Richmond,Liberal Arts,"$48,600.00","$94,600.00","$44,500.00","$59,400.00","$151,000.00","$211,000.00"
58 | Vassar College,Liberal Arts,"$46,000.00","$94,600.00",N/A,"$60,600.00","$123,000.00",N/A
59 | Middlebury College,Liberal Arts,"$47,700.00","$94,200.00",N/A,"$69,100.00","$129,000.00",N/A
60 | Mount Holyoke College,Liberal Arts,"$42,400.00","$94,100.00",N/A,"$57,100.00","$131,000.00",N/A
61 | Franklin and Marshall College,Liberal Arts,"$49,100.00","$92,800.00",N/A,"$55,800.00","$185,000.00",N/A
62 | DePauw University,Liberal Arts,"$41,400.00","$88,300.00","$49,500.00","$57,400.00","$133,000.00","$185,000.00"
63 | St. Olaf College,Liberal Arts,"$45,300.00","$86,200.00","$41,300.00","$61,000.00","$120,000.00","$185,000.00"
64 | Colby College,Liberal Arts,"$46,400.00","$85,800.00",N/A,"$63,500.00","$129,000.00",N/A
65 | Gettysburg College,Liberal Arts,"$44,700.00","$85,800.00",N/A,"$66,300.00","$132,000.00",N/A
66 | Siena College,Liberal Arts,"$45,500.00","$85,200.00","$38,700.00","$58,400.00","$129,000.00","$189,000.00"
67 | Smith College,Liberal Arts,"$44,000.00","$83,900.00","$45,100.00","$59,800.00","$129,000.00","$184,000.00"
68 | Hamilton College,Liberal Arts,"$49,200.00","$83,700.00",N/A,"$51,900.00","$123,000.00",N/A
69 | Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",N/A,"$54,100.00","$123,000.00",N/A
70 | Wellesley College,Liberal Arts,"$42,800.00","$83,500.00",N/A,"$58,600.00","$125,000.00",N/A
71 | Denison University,Liberal Arts,"$42,000.00","$83,500.00",N/A,"$62,100.00","$122,000.00",N/A
72 | Oberlin College,Liberal Arts,"$43,400.00","$81,600.00",N/A,"$46,400.00","$128,000.00",N/A
73 | University of Puget Sound,Liberal Arts,"$46,600.00","$81,500.00","$48,900.00","$60,100.00","$104,000.00","$137,000.00"
74 | Colorado College (CC),Liberal Arts,"$38,500.00","$81,400.00",N/A,"$43,000.00","$148,000.00",N/A
75 | Reed College,Liberal Arts,"$40,500.00","$81,100.00",N/A,"$67,400.00","$101,000.00",N/A
76 | Gustavus Adolphus College,Liberal Arts,"$44,500.00","$80,600.00",N/A,"$49,300.00","$101,000.00",N/A
77 | Whitman College,Liberal Arts,"$43,500.00","$80,100.00",N/A,"$64,800.00","$111,000.00",N/A
78 | Ursinus College,Liberal Arts,"$42,100.00","$80,000.00","$35,600.00","$54,300.00","$100,000.00","$160,000.00"
79 | Juniata College,Liberal Arts,"$41,800.00","$78,900.00",N/A,"$67,200.00","$110,000.00",N/A
80 | Wittenberg University,Liberal Arts,"$39,200.00","$78,200.00",N/A,"$54,100.00","$131,000.00",N/A
81 | Grinnell College,Liberal Arts,"$42,600.00","$76,600.00",N/A,"$65,100.00","$116,000.00",N/A
82 | Skidmore College,Liberal Arts,"$41,600.00","$74,600.00",N/A,"$42,800.00","$147,000.00",N/A
83 | Moravian College,Liberal Arts,"$42,500.00","$74,400.00",N/A,"$56,700.00","$94,900.00",N/A
84 | Lewis & Clark College,Liberal Arts,"$38,900.00","$72,600.00","$38,200.00","$53,400.00","$104,000.00","$140,000.00"
85 | Fort Lewis College,Liberal Arts,"$42,000.00","$69,800.00",N/A,"$55,000.00","$94,000.00",N/A
86 | Thomas Aquinas College,Liberal Arts,"$41,500.00","$67,500.00",N/A,"$44,600.00","$93,100.00",N/A
87 | Evergreen State College,Liberal Arts,"$39,500.00","$63,900.00","$38,800.00","$47,200.00","$91,600.00","$120,000.00"
88 | Dartmouth College,Ivy League,"$58,000.00","$134,000.00","$63,100.00","$90,200.00","$234,000.00","$321,000.00"
89 | Princeton University,Ivy League,"$66,500.00","$131,000.00","$68,900.00","$100,000.00","$190,000.00","$261,000.00"
90 | Yale University,Ivy League,"$59,100.00","$126,000.00","$58,000.00","$80,600.00","$198,000.00","$326,000.00"
91 | Harvard University,Ivy League,"$63,400.00","$124,000.00","$54,800.00","$86,200.00","$179,000.00","$288,000.00"
92 | University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00","$55,900.00","$79,200.00","$192,000.00","$282,000.00"
93 | Cornell University,Ivy League,"$60,300.00","$110,000.00","$56,800.00","$79,800.00","$160,000.00","$210,000.00"
94 | Brown University,Ivy League,"$56,200.00","$109,000.00","$55,400.00","$74,400.00","$159,000.00","$228,000.00"
95 | Columbia University,Ivy League,"$59,400.00","$107,000.00","$50,300.00","$71,900.00","$161,000.00","$241,000.00"
96 | "University of California, Berkeley",State,"$59,900.00","$112,000.00","$59,500.00","$81,000.00","$149,000.00","$201,000.00"
97 | University of Virginia (UVA),State,"$52,700.00","$103,000.00","$52,200.00","$71,800.00","$146,000.00","$215,000.00"
98 | Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00","$55,000.00","$74,700.00","$133,000.00","$178,000.00"
99 | University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00","$51,300.00","$72,500.00","$139,000.00","$193,000.00"
100 | "University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00","$51,700.00","$75,400.00","$131,000.00","$177,000.00"
101 | "University of California, Davis",State,"$52,300.00","$99,600.00","$52,000.00","$71,600.00","$135,000.00","$202,000.00"
102 | University of Colorado - Boulder (UCB),State,"$47,100.00","$97,600.00","$51,600.00","$69,000.00","$128,000.00","$187,000.00"
103 | "University of California, Irvine (UCI)",State,"$48,300.00","$96,700.00","$47,800.00","$66,000.00","$123,000.00","$172,000.00"
104 | University of Illinois at Urbana-Champaign (UIUC),State,"$52,900.00","$96,100.00","$48,200.00","$68,900.00","$132,000.00","$177,000.00"
105 | Texas A&M University,State,"$49,700.00","$96,100.00","$51,100.00","$71,300.00","$131,000.00","$171,000.00"
106 | Binghamton University,State,"$53,600.00","$95,900.00","$50,900.00","$71,200.00","$146,000.00","$201,000.00"
107 | University of Missouri - Rolla (UMR),State,"$57,100.00","$95,800.00","$67,600.00","$80,400.00","$122,000.00","$166,000.00"
108 | San Jose State University (SJSU),State,"$53,500.00","$95,600.00","$50,700.00","$70,500.00","$122,000.00","$156,000.00"
109 | "University of Maryland, College Park",State,"$52,000.00","$95,000.00","$50,400.00","$68,300.00","$126,000.00","$166,000.00"
110 | "University of California, Santa Barbara (UCSB)",State,"$50,500.00","$95,000.00","$51,300.00","$71,200.00","$129,000.00","$173,000.00"
111 | University of Texas (UT) - Austin,State,"$49,700.00","$93,900.00","$50,100.00","$67,400.00","$129,000.00","$188,000.00"
112 | University of Michigan,State,"$52,700.00","$93,000.00","$50,900.00","$69,400.00","$128,000.00","$182,000.00"
113 | Stony Brook University,State,"$49,500.00","$93,000.00","$47,200.00","$67,100.00","$129,000.00","$181,000.00"
114 | State University of New York (SUNY) at Albany,State,"$44,500.00","$92,200.00","$47,000.00","$63,100.00","$135,000.00","$209,000.00"
115 | Rutgers University,State,"$50,300.00","$91,800.00","$48,100.00","$65,100.00","$128,000.00","$176,000.00"
116 | Purdue University,State,"$51,400.00","$90,500.00","$49,900.00","$67,400.00","$121,000.00","$168,000.00"
117 | University of Connecticut (UConn),State,"$48,000.00","$88,800.00","$46,100.00","$66,400.00","$120,000.00","$162,000.00"
118 | University of Massachusetts (UMass) - Amherst,State,"$46,600.00","$88,200.00","$43,100.00","$61,300.00","$122,000.00","$168,000.00"
119 | "California State University (CSU), Chico",State,"$47,400.00","$88,100.00","$46,800.00","$62,800.00","$122,000.00","$154,000.00"
120 | University of Florida (UF),State,"$47,100.00","$87,900.00","$45,400.00","$62,900.00","$120,000.00","$172,000.00"
121 | University of Wisconsin (UW) - Madison,State,"$48,900.00","$87,800.00","$47,400.00","$62,400.00","$118,000.00","$170,000.00"
122 | Louisiana State University (LSU),State,"$46,900.00","$87,800.00","$43,700.00","$61,300.00","$120,000.00","$165,000.00"
123 | "California State University, Fullerton (CSUF)",State,"$45,700.00","$87,000.00","$45,400.00","$62,500.00","$119,000.00","$158,000.00"
124 | George Mason University,State,"$47,800.00","$86,900.00","$51,300.00","$67,200.00","$114,000.00","$150,000.00"
125 | University of Massachusetts (UMass) - Lowell,State,"$45,400.00","$86,600.00","$50,900.00","$65,000.00","$113,000.00","$158,000.00"
126 | San Francisco State University (SFSU),State,"$47,300.00","$86,400.00","$45,100.00","$62,700.00","$114,000.00","$150,000.00"
127 | University of Arizona,State,"$47,500.00","$86,100.00","$44,800.00","$61,700.00","$117,000.00","$160,000.00"
128 | Clemson University,State,"$48,400.00","$86,000.00","$50,500.00","$61,800.00","$111,000.00","$150,000.00"
129 | University of Georgia (UGA),State,"$44,100.00","$86,000.00","$43,100.00","$57,800.00","$118,000.00","$164,000.00"
130 | Pennsylvania State University (PSU),State,"$49,900.00","$85,700.00","$46,300.00","$62,000.00","$117,000.00","$160,000.00"
131 | University of Washington (UW),State,"$48,800.00","$85,300.00","$47,000.00","$59,800.00","$115,000.00","$149,000.00"
132 | Michigan State University (MSU),State,"$46,300.00","$85,300.00","$44,200.00","$61,500.00","$119,000.00","$170,000.00"
133 | University of Rhode Island (URI),State,"$43,900.00","$85,300.00","$45,400.00","$60,100.00","$112,000.00","$157,000.00"
134 | San Diego State University (SDSU),State,"$46,200.00","$85,200.00","$45,500.00","$61,800.00","$116,000.00","$158,000.00"
135 | Auburn University,State,"$45,400.00","$84,700.00","$45,400.00","$62,700.00","$109,000.00","$145,000.00"
136 | Washington State University (WSU),State,"$45,300.00","$84,700.00","$43,600.00","$59,000.00","$113,000.00","$162,000.00"
137 | "California State University, Long Beach (CSULB)",State,"$45,100.00","$84,700.00","$47,400.00","$62,500.00","$113,000.00","$154,000.00"
138 | Iowa State University,State,"$45,400.00","$84,600.00","$44,400.00","$60,000.00","$109,000.00","$147,000.00"
139 | University of Delaware,State,"$45,900.00","$84,500.00","$44,500.00","$64,000.00","$119,000.00","$165,000.00"
140 | University of Colorado - Denver,State,"$46,100.00","$84,400.00","$46,400.00","$58,600.00","$105,000.00","$144,000.00"
141 | "California State University, East Bay (CSUEB)",State,"$49,200.00","$84,300.00","$46,000.00","$62,400.00","$115,000.00","$155,000.00"
142 | State University of New York (SUNY) at Farmingdale,State,"$47,300.00","$84,200.00","$50,200.00","$59,800.00","$110,000.00","$162,000.00"
143 | University of Minnesota,State,"$46,200.00","$84,200.00","$49,000.00","$63,200.00","$112,000.00","$148,000.00"
144 | Arizona State University (ASU),State,"$47,400.00","$84,100.00","$44,600.00","$60,700.00","$114,000.00","$163,000.00"
145 | "University of California, Santa Cruz (UCSC)",State,"$44,700.00","$84,100.00","$46,100.00","$62,000.00","$121,000.00","$165,000.00"
146 | "Indiana University (IU), Bloomington",State,"$46,300.00","$84,000.00","$43,600.00","$60,400.00","$119,000.00","$178,000.00"
147 | University of Iowa (UI),State,"$44,700.00","$83,900.00","$43,300.00","$61,100.00","$116,000.00","$163,000.00"
148 | Ohio State University (OSU),State,"$44,900.00","$83,700.00","$45,500.00","$60,700.00","$116,000.00","$162,000.00"
149 | North Carolina State University (NCSU),State,"$47,200.00","$83,300.00","$49,200.00","$64,800.00","$112,000.00","$153,000.00"
150 | Oregon State University (OSU),State,"$45,100.00","$83,300.00","$46,900.00","$64,000.00","$113,000.00","$146,000.00"
151 | University of Utah,State,"$45,400.00","$83,200.00","$43,000.00","$58,400.00","$116,000.00","$148,000.00"
152 | "University of Nevada, Reno (UNR)",State,"$46,500.00","$82,900.00","$41,900.00","$54,600.00","$113,000.00","$143,000.00"
153 | University of Oklahoma,State,"$44,700.00","$82,900.00","$41,200.00","$60,300.00","$114,000.00","$167,000.00"
154 | University of Arkansas,State,"$44,100.00","$82,800.00","$43,200.00","$60,700.00","$113,000.00","$160,000.00"
155 | University of Vermont (UVM),State,"$44,800.00","$82,700.00","$44,700.00","$58,000.00","$122,000.00","$194,000.00"
156 | University of Alabama at Huntsville (UAH),State,"$43,100.00","$82,700.00","$46,100.00","$67,800.00","$106,000.00","$132,000.00"
157 | "California State University, Sacramento (CSUS)",State,"$47,800.00","$82,400.00","$42,900.00","$59,600.00","$111,000.00","$154,000.00"
158 | University of Idaho,State,"$44,900.00","$82,000.00","$43,000.00","$56,700.00","$104,000.00","$142,000.00"
159 | University of Illinois at Chicago,State,"$47,500.00","$81,700.00","$44,700.00","$58,800.00","$110,000.00","$146,000.00"
160 | State University of New York (SUNY) at Buffalo,State,"$46,200.00","$81,700.00","$45,900.00","$61,400.00","$110,000.00","$147,000.00"
161 | University of Kansas,State,"$42,400.00","$81,600.00","$44,800.00","$57,200.00","$115,000.00","$156,000.00"
162 | University of New Mexico (UNM),State,"$41,600.00","$81,600.00","$41,800.00","$59,100.00","$105,000.00","$141,000.00"
163 | University of North Carolina at Chapel Hill (UNCH),State,"$42,900.00","$81,500.00","$43,400.00","$57,500.00","$117,000.00","$155,000.00"
164 | "University of Alabama, Tuscaloosa",State,"$41,300.00","$81,400.00","$40,100.00","$56,500.00","$117,000.00","$161,000.00"
165 | "University of California, Riverside (UCR)",State,"$46,800.00","$81,300.00","$37,200.00","$59,900.00","$109,000.00","$134,000.00"
166 | State University of New York (SUNY) at Geneseo,State,"$42,300.00","$81,300.00","$39,300.00","$47,600.00","$117,000.00","$173,000.00"
167 | University of Missouri - Columbia,State,"$41,700.00","$81,000.00","$43,500.00","$57,100.00","$111,000.00","$156,000.00"
168 | University of Nebraska,State,"$45,700.00","$80,900.00","$42,200.00","$56,600.00","$113,000.00","$156,000.00"
169 | University of Texas at Arlington (UTA),State,"$45,400.00","$80,800.00","$46,400.00","$61,200.00","$106,000.00","$138,000.00"
170 | Northern Illinois University (NIU),State,"$43,600.00","$80,800.00","$43,900.00","$60,200.00","$111,000.00","$161,000.00"
171 | Oklahoma State University,State,"$42,800.00","$80,700.00","$40,100.00","$56,500.00","$114,000.00","$151,000.00"
172 | University of North Dakota,State,"$44,000.00","$80,600.00","$43,400.00","$56,400.00","$111,000.00","$157,000.00"
173 | "California State University, Northridge (CSUN)",State,"$45,500.00","$80,400.00","$44,500.00","$57,800.00","$108,000.00","$153,000.00"
174 | University of Houston (UH),State,"$46,000.00","$79,900.00","$42,000.00","$56,200.00","$106,000.00","$141,000.00"
175 | University of Mississippi,State,"$41,400.00","$79,700.00","$40,400.00","$53,500.00","$108,000.00","$186,000.00"
176 | New Mexico State University,State,"$44,300.00","$79,500.00","$37,400.00","$53,800.00","$102,000.00","$131,000.00"
177 | Lamar University,State,"$46,500.00","$79,400.00","$38,700.00","$51,600.00","$114,000.00","$158,000.00"
178 | Mississippi State University (MSU),State,"$44,500.00","$79,300.00","$43,300.00","$58,800.00","$108,000.00","$151,000.00"
179 | Colorado State University (CSU),State,"$44,800.00","$79,000.00","$43,800.00","$57,100.00","$112,000.00","$150,000.00"
180 | Kansas State University (KSU),State,"$43,300.00","$79,000.00","$37,200.00","$54,100.00","$106,000.00","$138,000.00"
181 | University of Wyoming (UW),State,"$44,500.00","$78,700.00","$41,500.00","$54,000.00","$105,000.00","$145,000.00"
182 | Utah State University,State,"$43,800.00","$78,700.00","$41,600.00","$55,400.00","$101,000.00","$132,000.00"
183 | University of Wisconsin (UW) - Platteville,State,"$45,800.00","$78,500.00","$48,400.00","$61,200.00","$100,000.00","$139,000.00"
184 | University of Oregon,State,"$42,200.00","$78,400.00","$38,100.00","$56,200.00","$117,000.00","$186,000.00"
185 | University of Kentucky (UK),State,"$42,800.00","$78,300.00","$43,000.00","$57,300.00","$107,000.00","$149,000.00"
186 | University of New Hampshire (UNH),State,"$41,800.00","$78,300.00","$41,700.00","$56,400.00","$114,000.00","$147,000.00"
187 | University of Massachusetts (UMass) - Boston,State,"$45,600.00","$78,200.00","$36,300.00","$53,800.00","$109,000.00","$151,000.00"
188 | West Virginia University (WVU),State,"$43,100.00","$78,100.00","$39,700.00","$55,700.00","$106,000.00","$141,000.00"
189 | University of Maryland Baltimore County (UMBC),State,"$47,000.00","$77,800.00","$46,900.00","$59,100.00","$105,000.00","$130,000.00"
190 | North Dakota State University (NDSU),State,"$45,100.00","$77,800.00","$39,000.00","$55,800.00","$100,000.00","$123,000.00"
191 | State University of New York (SUNY) at Oswego,State,"$38,000.00","$77,800.00","$40,400.00","$53,000.00","$115,000.00","$169,000.00"
192 | University of Massachusetts (UMass) - Dartmouth,State,"$43,200.00","$77,700.00","$43,300.00","$56,200.00","$107,000.00","$132,000.00"
193 | Montana State University - Bozeman,State,"$46,600.00","$77,500.00","$40,200.00","$58,100.00","$111,000.00","$151,000.00"
194 | State University of New York (SUNY) at Oneonta,State,"$37,500.00","$76,700.00","$40,000.00","$54,300.00","$97,700.00","$155,000.00"
195 | University of Louisiana (UL) at Lafayette,State,"$41,100.00","$76,300.00","$42,000.00","$54,500.00","$107,000.00","$163,000.00"
196 | State University of New York (SUNY) at Plattsburgh,State,"$40,800.00","$76,200.00","$38,400.00","$54,100.00","$105,000.00","$136,000.00"
197 | Wayne State University,State,"$42,800.00","$76,100.00","$40,100.00","$56,200.00","$101,000.00","$139,000.00"
198 | University of Hawaii,State,"$43,800.00","$76,000.00","$40,400.00","$56,300.00","$104,000.00","$128,000.00"
199 | University of Toledo,State,"$43,100.00","$75,900.00","$40,100.00","$54,100.00","$100,000.00","$133,000.00"
200 | Florida International University (FIU),State,"$43,200.00","$75,500.00","$40,500.00","$55,800.00","$98,200.00","$136,000.00"
201 | University of Wisconsin (UW) - Whitewater,State,"$40,800.00","$75,500.00","$38,200.00","$53,500.00","$99,300.00","$150,000.00"
202 | Western Washington University,State,"$42,700.00","$75,400.00","$41,300.00","$56,700.00","$99,200.00","$119,000.00"
203 | Minnesota State University - Mankato,State,"$43,300.00","$74,700.00","$39,500.00","$53,800.00","$95,700.00","$140,000.00"
204 | University of Tennessee,State,"$43,800.00","$74,600.00","$41,900.00","$53,200.00","$106,000.00","$153,000.00"
205 | University of Wisconsin (UW) - Milwaukee,State,"$42,300.00","$74,600.00","$40,600.00","$54,000.00","$93,700.00","$123,000.00"
206 | University of Arkansas - Monticello (UAM),State,"$39,200.00","$74,500.00","$32,800.00","$46,100.00","$110,000.00","$161,000.00"
207 | Penn State - Harrisburg,State,"$45,700.00","$74,000.00","$44,000.00","$53,100.00","$104,000.00","$150,000.00"
208 | University of North Carolina at Charlotte (UNCC),State,"$43,100.00","$74,000.00","$38,200.00","$53,200.00","$99,500.00","$133,000.00"
209 | Georgia State University,State,"$41,800.00","$74,000.00","$43,000.00","$55,300.00","$99,900.00","$145,000.00"
210 | Western Michigan University (WMU),State,"$42,300.00","$73,800.00","$40,100.00","$52,500.00","$103,000.00","$135,000.00"
211 | South Dakota State University (SDSU),State,"$41,100.00","$73,500.00","$34,100.00","$49,900.00","$99,400.00","$129,000.00"
212 | Idaho State University,State,"$44,900.00","$73,400.00","$35,400.00","$49,600.00","$101,000.00","$143,000.00"
213 | Ohio University,State,"$42,200.00","$73,400.00","$36,600.00","$52,800.00","$106,000.00","$150,000.00"
214 | Illinois State University,State,"$42,000.00","$73,400.00","$39,100.00","$55,200.00","$105,000.00","$142,000.00"
215 | Cleveland State University,State,"$43,500.00","$73,100.00","$39,500.00","$51,600.00","$97,000.00","$137,000.00"
216 | Florida State University (FSU),State,"$42,100.00","$73,000.00","$39,600.00","$52,800.00","$107,000.00","$156,000.00"
217 | "University of Alaska, Anchorage",State,"$45,900.00","$72,600.00","$39,800.00","$56,600.00","$99,300.00","$137,000.00"
218 | Fitchburg State College,State,"$42,400.00","$72,600.00","$43,300.00","$56,100.00","$99,600.00","$151,000.00"
219 | University of Nebraska at Omaha,State,"$41,500.00","$72,600.00","$39,500.00","$54,400.00","$97,400.00","$126,000.00"
220 | Southern Illinois University Carbondale,State,"$43,000.00","$72,500.00","$38,300.00","$51,300.00","$99,300.00","$139,000.00"
221 | University of Texas at El Paso (UTEP),State,"$43,400.00","$72,100.00","$37,700.00","$50,400.00","$99,500.00","$133,000.00"
222 | "California State University, Dominguez Hills (CSUDH)",State,"$42,700.00","$72,100.00","$30,800.00","$47,000.00","$92,200.00","$132,000.00"
223 | University Of Maine,State,"$41,200.00","$72,100.00","$41,700.00","$55,600.00","$99,300.00","$141,000.00"
224 | Eastern Michigan University,State,"$40,300.00","$72,100.00","$37,900.00","$52,800.00","$95,400.00","$135,000.00"
225 | Bowling Green State University,State,"$39,800.00","$72,100.00","$38,200.00","$51,800.00","$101,000.00","$146,000.00"
226 | University of Montana,State,"$37,300.00","$71,900.00","$37,000.00","$51,500.00","$96,400.00","$138,000.00"
227 | University of Central Florida (UCF),State,"$42,600.00","$71,700.00","$39,500.00","$51,500.00","$98,400.00","$125,000.00"
228 | University of South Carolina,State,"$40,000.00","$71,700.00","$36,300.00","$49,900.00","$98,400.00","$131,000.00"
229 | "University of Nevada, Las Vegas (UNLV)",State,"$45,200.00","$71,600.00","$39,000.00","$52,400.00","$100,000.00","$128,000.00"
230 | St. Cloud State University,State,"$41,800.00","$71,400.00","$38,700.00","$49,400.00","$101,000.00","$126,000.00"
231 | University of Wisconsin (UW) - Parkside,State,"$40,700.00","$71,400.00","$40,900.00","$53,100.00","$84,900.00","$119,000.00"
232 | "California State University (CSU), Stanislaus",State,"$38,000.00","$71,400.00","$33,700.00","$50,500.00","$94,100.00","$121,000.00"
233 | Humboldt State University,State,"$42,600.00","$71,300.00","$36,000.00","$56,300.00","$94,400.00","$117,000.00"
234 | Florida Atlantic University (FAU),State,"$42,600.00","$71,100.00","$40,700.00","$53,000.00","$99,500.00","$137,000.00"
235 | University of South Florida (USF),State,"$41,100.00","$71,100.00","$39,600.00","$51,500.00","$98,100.00","$131,000.00"
236 | Portland State University (PSU),State,"$42,600.00","$70,900.00","$40,700.00","$52,300.00","$94,400.00","$123,000.00"
237 | Eastern Washington University,State,"$38,600.00","$70,900.00","$36,000.00","$50,500.00","$93,100.00","$117,000.00"
238 | University of Texas at San Antonio (UTSA),State,"$42,500.00","$70,700.00","$39,100.00","$49,800.00","$92,700.00","$121,000.00"
239 | University of Akron,State,"$41,100.00","$70,300.00","$40,600.00","$53,300.00","$95,200.00","$127,000.00"
240 | State University of New York (SUNY) at Potsdam,State,"$38,000.00","$70,300.00","$35,100.00","$51,200.00","$100,000.00","$179,000.00"
241 | University of Alabama at Birmingham (UAB),State,"$39,200.00","$70,100.00","$43,000.00","$53,400.00","$91,400.00","$125,000.00"
242 | University of Memphis (U of M),State,"$41,400.00","$69,700.00","$36,100.00","$49,100.00","$93,500.00","$127,000.00"
243 | Boise State University (BSU),State,"$40,800.00","$69,500.00","$37,400.00","$48,700.00","$87,500.00","$110,000.00"
244 | Missouri State University (MSU),State,"$36,100.00","$69,500.00","$33,300.00","$46,900.00","$102,000.00","$134,000.00"
245 | University of Wisconsin (UW) - La Crosse,State,"$42,200.00","$69,300.00","$37,500.00","$47,200.00","$93,100.00","$133,000.00"
246 | Appalachian State University,State,"$40,400.00","$69,100.00","$37,200.00","$50,400.00","$90,800.00","$115,000.00"
247 | Virginia Commonwealth University (VCU),State,"$42,000.00","$68,400.00","$37,400.00","$51,900.00","$100,000.00","$123,000.00"
248 | University of Wisconsin (UW) - Stout,State,"$43,600.00","$68,300.00","$40,900.00","$50,600.00","$91,600.00","$136,000.00"
249 | East Carolina University (ECU),State,"$40,200.00","$67,500.00","$38,400.00","$52,000.00","$98,700.00","$151,000.00"
250 | Utah Valley State College,State,"$42,400.00","$67,100.00","$27,000.00","$44,100.00","$84,900.00","$110,000.00"
251 | University of Missouri - St. Louis (UMSL),State,"$41,400.00","$67,100.00","$36,800.00","$49,600.00","$97,600.00","$144,000.00"
252 | Western Carolina University,State,"$36,900.00","$66,600.00","$39,000.00","$49,500.00","$94,400.00","$133,000.00"
253 | University of Wisconsin (UW) - Oshkosh,State,"$39,300.00","$66,400.00","$37,700.00","$49,700.00","$90,100.00","$138,000.00"
254 | State University of New York (SUNY) at Fredonia,State,"$37,800.00","$66,200.00","$32,800.00","$44,200.00","$93,300.00","$181,000.00"
255 | University of Missouri - Kansas City (UMKC),State,"$38,900.00","$65,800.00","$36,300.00","$48,100.00","$95,800.00","$124,000.00"
256 | University of Wisconsin (UW) - Eau Claire,State,"$41,400.00","$64,800.00","$35,000.00","$47,300.00","$93,100.00","$125,000.00"
257 | Ball State University (BSU),State,"$39,100.00","$64,500.00","$35,500.00","$48,200.00","$89,300.00","$128,000.00"
258 | University of North Carolina at Wilmington (UNCW),State,"$37,500.00","$64,400.00","$32,100.00","$46,600.00","$97,100.00","$129,000.00"
259 | University of Wisconsin (UW) - Stevens Point,State,"$39,800.00","$64,000.00","$38,400.00","$45,100.00","$95,400.00","$128,000.00"
260 | University of Southern Maine,State,"$39,400.00","$63,600.00","$40,400.00","$47,900.00","$85,700.00","$117,000.00"
261 | Arkansas State University (ASU),State,"$38,700.00","$63,300.00","$33,600.00","$45,300.00","$83,900.00","$118,000.00"
262 | Kent State University,State,"$38,700.00","$62,600.00","$36,100.00","$45,800.00","$87,000.00","$124,000.00"
263 | Tarleton State University (TSU),State,"$40,800.00","$62,400.00","$32,100.00","$47,400.00","$80,400.00","$126,000.00"
264 | University of Wisconsin (UW) - Green Bay,State,"$35,800.00","$60,600.00","$35,500.00","$46,800.00","$81,800.00","$102,000.00"
265 | Morehead State University,State,"$34,800.00","$60,600.00","$34,300.00","$46,500.00","$72,000.00","$91,300.00"
266 | Austin Peay State University,State,"$37,700.00","$59,200.00","$32,200.00","$40,500.00","$73,900.00","$96,200.00"
267 | Pittsburg State University,State,"$40,400.00","$58,200.00","$25,600.00","$46,000.00","$84,600.00","$117,000.00"
268 | Southern Utah University,State,"$41,900.00","$56,500.00","$30,700.00","$39,700.00","$78,400.00","$116,000.00"
269 | Montana State University - Billings,State,"$37,900.00","$50,600.00","$22,600.00","$31,800.00","$78,500.00","$98,900.00"
270 | Black Hills State University,State,"$35,300.00","$43,900.00","$27,000.00","$32,200.00","$60,900.00","$87,600.00"
271 |
--------------------------------------------------------------------------------
/Columbus First/Columbus First Trip.R:
--------------------------------------------------------------------------------
1 | # Columbus First Trip
2 |
3 |
4 | ## Loading the Libraries
5 |
6 | library(data.table)
7 | library(ggplot2)
8 | library(lubridate)
9 | library(wordcloud)
10 | library(tm)
11 | library(SnowballC)
12 | library(RSentiment)
13 | library(stringr)
14 | library(SnowballC)
15 | library(RWeka)
16 | library(DT)
17 | library(gdata)
18 |
19 | ## Reading the Dataset and Changing Working Directory
20 |
21 | setwd('./Kaggle/Columbus First')
22 |
23 | columbus <- read.csv('./Columbus.csv', encoding = "UTF-8")
24 | str(columbus)
25 | summary(columbus)
26 |
27 | ## Polishing Up
28 |
29 |
30 | columbus$month <- as.factor(columbus$month)
31 | columbus$month <- factor(columbus$month,levels(columbus$month)[c(1,8,7,6,2,4,3,5)]) #Reorder Levels
32 | columbus$nmonth <- columbus$month
33 | levels(columbus$nmonth) <- c("08","09","10","11","12","01","02","03")
34 | columbus$day <- as.factor(columbus$day)
35 | columbus$year <- as.factor(columbus$year)
36 | columbus$nwords <- sapply(gregexpr("[A-z]\\W+", columbus$text), length) + 1L # Number of words in the text
37 | columbus$date <- paste(columbus$day,"-",columbus$nmonth,"-",columbus$year,sep = "")
38 | columbus$date <- as.POSIXct(strptime(columbus$date,format = "%e-%m-%Y"))
39 |
40 | ## Calculate Sentiments
41 |
42 | corpus = Corpus(VectorSource(list(columbus$text)))
43 | corpus = tm_map(corpus, removePunctuation)
44 | corpus = tm_map(corpus, content_transformer(tolower))
45 | corpus = tm_map(corpus, removeNumbers)
46 | corpus = tm_map(corpus, stripWhitespace)
47 | corpus = tm_map(corpus, removeWords, stopwords('en'))
48 |
49 |
50 | dtm_colon = DocumentTermMatrix(VCorpus(VectorSource(corpus[[1]]$content)))
51 | freq_colon <- colSums(as.matrix(dtm_colon))
52 |
53 | sentiments_colon = calculate_sentiment(names(freq_colon))
54 | sentiments_colon = cbind(sentiments_colon, as.data.frame(freq_colon))
55 |
56 | sent_pos_colon = sentiments_colon[sentiments_colon$sentiment == 'Positive',]
57 | sent_neg_colon = sentiments_colon[sentiments_colon$sentiment == 'Negative',]
58 | sent_neu_colon = sentiments_colon[sentiments_colon$sentiment == 'Neutral',]
59 |
60 | cat("We have more positive Sentiments: ",sum(sent_pos_colon$freq_colon)," than negative: ",sum(sent_neg_colon$freq_colon))
61 |
--------------------------------------------------------------------------------
/Credit Card Modeling 65%.R:
--------------------------------------------------------------------------------
1 | # Credit Card Predictive Modeling
2 |
3 | ### Load Libraries
4 |
5 | library(randomForest)
6 | library(e1071)
7 | library(rpart)
8 | library(rpart.plot)
9 | library(caTools)
10 | library(readr)
11 | library(caret)
12 |
13 | # Receiving Dataset and Change Working Directory
14 | setwd('./Kaggle')
15 | ccard <- read_csv("./creditcard.csv")
16 | head(ccard)
17 |
18 | str(ccard)
19 |
20 | ## Predictive Modeling
21 |
22 | ### Set Data 65:35
23 |
24 | set.seed(22540)
25 | split <- sample.split(ccard$Class, SplitRatio = 0.65)
26 | train <- subset(ccard, split == T)
27 | cv <- subset(ccard, split == F)
28 |
29 | ### check output Class distributiion
30 | table(cv$Class)
31 |
32 | # Logistic Regression
33 |
34 | glm.model <- glm(Class ~ ., data = train, family = "binomial")
35 | glm.predict <- predict(glm.model, cv, type = "response")
36 | table(cv$Class, glm.predict > 0.5)
37 |
38 | # Decision Tree Model
39 |
40 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 50)
41 | prp(tree.model)
42 |
43 | tree.predict <- predict(tree.model, cv, type = "class")
44 | confusionMatrix(cv$Class, tree.predict)
45 |
46 |
47 | # Keeping 10,000 Rows with Class=0
48 |
49 | data.class.0 <- subset(ccard, ccard$Class == 0)
50 | data.class.1 <- subset(ccard, ccard$Class == 1)
51 | nrow(data.class.0)
52 |
53 | nrow(data.class.1)
54 |
55 | data.class.0 <- data.class.0[1:10000, ]
56 | nrow(data.class.0)
57 |
58 | data <- rbind(data.class.0, data.class.1)
59 | nrow(data)
60 |
61 | # Split Data 65:35
62 |
63 | set.seed(205)
64 | split <- sample.split(data$Class, SplitRatio = 0.65)
65 | train <- subset(data, split == T)
66 | cv <- subset(data, split == F)
67 |
68 | table(cv$Class)
69 |
70 | # Logistic Regression for Split
71 |
72 | glm.model <- glm(Class ~ ., data = train, family = "binomial", control = list(maxit = 50))
73 | glm.predict <- predict(glm.model, cv, type = "response")
74 | table(cv$Class, glm.predict > 0.5)
75 |
76 | # SVM Model
77 |
78 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 1, gamma = 0.3)
79 | svm.predict <- predict(svm.model, cv)
80 | confusionMatrix(cv$Class, svm.predict)
81 |
82 | # Decision Tree Split
83 |
84 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 10)
85 | prp(tree.model)
86 |
87 | tree.predict <- predict(tree.model, cv, type = "class")
88 | confusionMatrix(cv$Class, tree.predict)
89 |
90 |
91 | # Random Forest Predictions
92 |
93 | set.seed(10)
94 | rf.model <- randomForest(Class ~ ., data = train,
95 | ntree = 2000, nodesize = 20)
96 |
97 | rf.predict <- predict(rf.model, cv)
98 | confusionMatrix(cv$Class, rf.predict)
99 |
100 |
101 | varImpPlot(rf.model)
102 |
--------------------------------------------------------------------------------
/Diamond Prices.R:
--------------------------------------------------------------------------------
1 | # Diamond Prices
2 |
3 | ### Load Libraries
4 |
5 | library(ggplot2) # Data visualization
6 | library(readr) # CSV file I/O, e.g. the read_csv function
7 | library(magrittr)
8 | library(caret)
9 | library(plotly)
10 | library(corrplot)
11 |
12 |
13 | ### Receiving Dataset and Change Working Directory
14 | setwd('./Kaggle')
15 |
16 | diamond <- read.csv('diamonds.csv')
17 | head(diamond)
18 |
19 | ### Class and Missing Variables for Diamonds
20 |
21 | data.frame(cbind(data.frame(VarType=sapply(diamond,class)),data.frame(Total_Missing=sapply(diamond,function(x){sum(is.na(x))}))))
22 |
23 |
24 | ## See Different Levels in Factor Variations
25 | print("Cut Levels")
26 | levels(diamond$cut)
27 |
28 | print("Color Levels")
29 | levels(diamond$color)
30 |
31 | print("Clarity Levels")
32 | levels(diamond$clarity)
33 |
34 |
35 |
36 | ### Density Plots
37 |
38 | qplot(price, data=diamond, geom="density", fill=cut, alpha=I(.5),
39 | main="Distribution of Carat", xlab="Different kinds of cut",
40 | ylab="Density") + theme_minimal()
41 |
42 | qplot(price, data=diamond, geom="density", fill=color, alpha=I(.5),
43 | main="Distribution of Carat", xlab="Different Colors",
44 | ylab="Density") + theme_minimal()
45 |
46 | qplot(price, data=diamond, geom="density", fill=clarity, alpha=I(.5),
47 | main="Distribution of Carat", xlab="Different clarity parameters",
48 | ylab="Density") + theme_minimal()
49 |
50 |
51 | ### More Plots
52 |
53 | ggplot(data=diamond,aes(x=cut))+geom_bar(fill="green")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Cut Type")
54 | ggplot(data=diamond,aes(x=color))+geom_bar(fill="khaki")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Color Type")
55 | ggplot(data=diamond,aes(x=clarity))+geom_bar(fill="violet")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Clarity Type")
56 |
57 |
58 | ### Encoding
59 |
60 | ohe_features<-c("cut","color","clarity")
61 | dummies<-dummyVars(~cut + color + clarity ,data=diamond)
62 |
63 | diamond_ohe<-as.data.frame(predict(dummies,newdata=diamond))
64 | diamond_combined<-cbind(diamond,diamond_ohe)
65 |
66 | newdiamond<-diamond_combined[,!names(diamond_combined)%in%ohe_features]
67 |
68 | rm(diamond_combined)
69 | rm(diamond_ohe)
70 |
71 |
72 | ### Looking at the New Data
73 | head(newdiamond)
74 |
75 | ### Dropping all the Null Values
76 |
77 | x.label<-newdiamond$X
78 | y.label <-as.numeric(newdiamond$price)
79 |
80 | newdiamond$X<-NULL
81 | newdiamond$price<-NULL
82 |
83 | ## Correlation plot
84 | corrplot(cor(cbind(newdiamond,Price=y.label)),type="upper")
85 |
86 |
--------------------------------------------------------------------------------
/Edudata.R:
--------------------------------------------------------------------------------
1 |
2 | ---
3 | ## Students' Academic Performance
4 |
5 | # Change Directory
6 | setwd("./Kaggle")
7 |
8 | # Load Libraries
9 | library(ggplot2)
10 | library(dplyr)
11 | library(randomForest)
12 | library(class)
13 | library(rpart)
14 | library(rpart.plot)
15 | library(e1071)
16 | library(caret)
17 | library(caTools)
18 | library(party)
19 |
20 |
21 | # Reading the Data
22 |
23 | edu <- read.csv('./Edudata.csv')
24 | str(edu)
25 | summary(edu)
26 |
27 | ### Exploratory Data Analysis
28 |
29 | # Raised Hands
30 |
31 | ggplot(edu, aes(x = raisedhands)) + geom_histogram(bins=50, color = "red",fill="blue",alpha=0.2) +
32 | scale_x_continuous(breaks = seq(0,100,5)) +
33 | labs(x = "Raised Hands", y = "Student Count")
34 |
35 |
36 | # Visited Resources
37 |
38 | ggplot(edu, aes(x = VisITedResources)) + geom_histogram(bins=50, color = "orange",fill="orange",alpha=0.4) +
39 | scale_x_continuous(breaks = seq(0,100,5)) +
40 | labs(x = "Visited Resources", y = "Student Count")
41 |
42 |
43 | # Announcements
44 |
45 | ggplot(edu, aes(x = AnnouncementsView)) + geom_histogram(bins = 50,color = "black",fill="red",alpha=0.5) +
46 | scale_x_continuous(breaks = seq(0,100,5)) +
47 | labs(x = "Announcements View", y = "Student Count")
48 |
49 |
50 | # Discussion
51 |
52 | ggplot(edu, aes(x = Discussion)) + geom_histogram(bins=50,color = "black",fill="grey") +
53 | scale_x_continuous(breaks = seq(0,100,5)) +
54 | labs(x = "Discussion Participation", y = "Student Count")
55 |
56 |
57 | ### Barplots
58 |
59 | ggplot(edu, aes(x = gender)) + geom_bar(aes(fill=gender)) +
60 | labs(x = "Gender", y = "Student Count") +
61 | scale_y_continuous(breaks = seq(0,300,30)) + coord_flip()
62 |
63 |
64 |
65 | ggplot(edu, aes(x = NationalITy)) + geom_bar(aes(fill=NationalITy)) +
66 | labs(x = "Nationality", y = "Student Count") +
67 | scale_y_continuous(breaks = seq(0,200,20)) + coord_flip()
68 |
69 |
70 |
71 | ggplot(edu, aes(x = PlaceofBirth)) + geom_bar(aes(fill = NationalITy)) +
72 | labs(x = "Birth Place", y = "Student Count") + coord_flip() # usa is a mix of nationalities
73 |
74 |
75 |
76 | ggplot(edu, aes(x = GradeID, fill = Class)) + geom_bar() +
77 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-06 has students with only low grades
78 |
79 |
80 |
81 | ggplot(edu, aes(x = GradeID, fill = gender)) + geom_bar() +
82 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-10 has no females
83 |
84 |
85 |
86 | ggplot(edu, aes(x = SectionID, fill = Topic,alpha=0.1)) + geom_bar() +
87 | labs(x = "Section ID", y = "Student Count") +
88 | coord_flip()
89 |
90 |
91 |
92 | ggplot(edu, aes(x = Topic, fill = gender,alpha=0.2)) + geom_bar() +
93 | labs(x = "Topic", y = "Student Count") +
94 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip()
95 |
96 |
97 |
98 | ggplot(edu, aes(x = Topic, fill = NationalITy)) + geom_bar() +
99 | labs(x = "Topic", y = "Student Count") + coord_flip() +
100 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip()
101 |
102 |
103 | ggplot(edu, aes(x = Topic, fill = SectionID,alpha=0.3)) + geom_bar() +
104 | labs(x = "Topic", y = "Student Count") + coord_flip() +
105 | scale_y_continuous(breaks = seq(0,100,4))
106 |
107 |
108 |
109 | # Section C for Mostly Spanish Students
110 |
111 | ggplot(edu, aes(x = Topic, fill = Semester)) + geom_bar() +
112 | labs(x = "Topic", y = "Student Count") + coord_flip() +
113 | scale_y_continuous(breaks = seq(0,100,4))
114 |
115 |
116 |
117 | # IT Students Are Mostly in 1st Semester
118 |
119 | ggplot(edu, aes(x = Topic, fill = Relation,alpha=0.5)) + geom_bar() +
120 | labs(x = "Topic", y = "Student Count") + coord_flip() +
121 | scale_y_continuous(breaks = seq(0,100,4))
122 |
123 |
124 | # Most French Students have Mom as Guardian in Comparison to Father
125 |
126 |
127 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar() +
128 | labs(x = "Topic", y = "Student Count") + coord_flip() +
129 | scale_y_continuous(breaks = seq(0,100,4))
130 |
131 |
132 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar(position = "fill") +
133 | labs(x = "Topic", y = "Student Count") + coord_flip() +
134 | scale_y_continuous(breaks = seq(0,100,4))
135 |
136 |
137 | # Geology has no low class students
138 |
139 |
140 | ggplot(edu, aes(x = Semester)) + geom_bar(aes(fill=Semester)) +
141 | labs(x = "Semester", y = "Student Count")
142 |
143 | ggplot(edu, aes(x = Relation, fill = Semester)) + geom_bar() +
144 | labs(x = "Guardian", y = "Student Count")
145 |
146 | ggplot(edu, aes(x = ParentAnsweringSurvey, fill = ParentschoolSatisfaction)) +
147 | geom_bar() +
148 | labs(x = "Does parents answer surveys ?", y = "Student Count")
149 |
150 | # Parent Satisfaction
151 |
152 | ggplot(edu, aes(x = ParentschoolSatisfaction)) +
153 | geom_bar(aes(fill=ParentschoolSatisfaction)) +
154 | labs(x = "Are the Parents Satisfied With the School ?", y = "Student Count")
155 |
156 |
157 |
158 | ggplot(edu, aes(x = StudentAbsenceDays)) + geom_bar(aes(fill=StudentAbsenceDays)) +
159 | labs(x = "Is the student absent for more than seven days", y = "Student Count")
160 |
161 | ggplot(edu, aes(x = Class, fill = gender)) + geom_bar() +
162 | labs(x = "Class", y = "Student Count")
163 |
164 | # Few Girls in the Low Class
165 |
166 | ggplot(edu, aes(x = Class, fill = Relation)) + geom_bar() +
167 | labs(x = "Class", y = "Student Count")
168 |
169 |
170 | ggplot(edu, aes(x = Class, fill = ParentAnsweringSurvey)) + geom_bar() +
171 | labs(x = "Class", y = "Student Count")
172 |
173 |
174 | ggplot(edu, aes(x = Class, fill = StudentAbsenceDays)) + geom_bar() +
175 | labs(x = "Class", y = "Student Count")
176 |
177 |
178 | ### Boxplots
179 |
180 |
181 | ggplot(edu, aes(x = gender, y = raisedhands)) + geom_boxplot(aes(fill=gender))
182 |
183 |
184 |
185 | ggplot(edu, aes(x = gender, y = VisITedResources)) + geom_boxplot(aes(fill=gender))
186 |
187 |
188 | # Girls Use More Resources
189 |
190 | ggplot(edu, aes(x = NationalITy, y = raisedhands)) + geom_boxplot(aes(fill=NationalITy))
191 |
192 | ggplot(edu, aes(x = StageID, y = raisedhands)) + geom_boxplot(aes(fill=StageID))
193 |
194 |
195 | ggplot(edu, aes(x = StageID, y = Discussion)) + geom_boxplot(aes(fill=StageID))
196 |
197 | ggplot(edu, aes(x = GradeID, y = raisedhands)) + geom_boxplot(aes(fill=GradeID))
198 |
199 | ggplot(edu, aes(x = SectionID, y = Discussion)) + geom_boxplot(aes(fill=SectionID))
200 |
201 | ggplot(edu, aes(x = Topic, y = raisedhands)) + geom_boxplot(aes(fill=Topic))
202 |
203 | ggplot(edu, aes(x = Semester, y = raisedhands)) + geom_boxplot(aes(fill=Semester))
204 |
205 | ggplot(edu, aes(x = Relation, y = raisedhands)) + geom_boxplot(aes(fill=Relation))
206 |
207 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
208 |
209 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
210 |
211 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
212 |
213 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
214 |
215 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = raisedhands)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
216 |
217 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = VisITedResources)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
218 |
219 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
220 |
221 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = Discussion)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
222 |
223 | ggplot(edu, aes(x = StudentAbsenceDays, y = raisedhands)) + geom_boxplot(aes(fill=StudentAbsenceDays))
224 |
225 | # More Students Leave = Less Hand Raises
226 |
227 |
228 | ggplot(edu, aes(x = StudentAbsenceDays, y = VisITedResources)) + geom_boxplot(aes(fill=StudentAbsenceDays))
229 |
230 | ggplot(edu, aes(x = StudentAbsenceDays, y = AnnouncementsView)) + geom_boxplot(aes(fill=StudentAbsenceDays))
231 |
232 | ggplot(edu, aes(x = StudentAbsenceDays, y = Discussion)) + geom_boxplot(aes(fill=StudentAbsenceDays))
233 |
234 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
235 |
236 | # Yes Answers to Surveys = More Raised hands
237 |
238 |
239 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
240 |
241 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
242 |
243 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
244 |
245 |
246 | ###Class-Wise Boxplots
247 |
248 | ggplot(edu, aes(x = Class, y = raisedhands)) + geom_boxplot(aes(fill=Class))
249 |
250 | # High Marks = Active Participation
251 |
252 |
253 | ggplot(edu, aes(x = Class, y = VisITedResources)) + geom_boxplot(aes(fill=Class))
254 |
255 |
256 | # High Marks by visited Resources
257 |
258 |
259 | ggplot(edu, aes(x = Class, y = AnnouncementsView)) + geom_boxplot(aes(fill=Class))
260 |
261 |
262 | # More Marks More Announcements
263 |
264 |
265 | ggplot(edu, aes(x = Class, y = Discussion)) + geom_boxplot(aes(fill=Class))
266 |
267 |
268 | ### Scatterplots
269 |
270 |
271 | ggplot(edu, aes(x = raisedhands, y = VisITedResources)) + geom_point() +
272 | geom_smooth(method = "lm",color='green')
273 |
274 | ggplot(edu, aes(x = raisedhands, y = AnnouncementsView)) + geom_point() +
275 | geom_smooth(method = "lm",color='red')
276 |
277 | ggplot(edu, aes(x = raisedhands, y = Discussion)) + geom_point() +
278 | geom_smooth(method = "lm",color='purple')
279 |
280 | ggplot(edu, aes(x = VisITedResources, y = AnnouncementsView)) + geom_point() +
281 | geom_smooth(method = "lm",color='cyan')
282 |
283 | ggplot(edu, aes(x = VisITedResources, y = Discussion)) + geom_point() +
284 | geom_smooth(method = "lm",color='firebrick')
285 |
286 | ggplot(edu, aes(x = AnnouncementsView, y = Discussion)) + geom_point() +
287 | geom_smooth(method = "lm",color='hotpink')
288 |
289 |
290 | ### Density Plots
291 |
292 |
293 | ggplot(edu, aes(x = raisedhands, color = gender)) + geom_density()
294 |
295 |
296 | ggplot(edu, aes(x = raisedhands, color = Topic)) + geom_density()
297 |
298 |
299 |
300 | ggplot(edu, aes(x = raisedhands, color = SectionID)) + geom_density()
301 |
302 | ggplot(edu, aes(x = raisedhands, color = Semester)) + geom_density()
303 |
304 | ggplot(edu, aes(x = raisedhands, color = Class)) + geom_density()
305 |
306 |
307 | ### Tile Map
308 |
309 | tile.map <- edu %>% group_by(gender, NationalITy) %>%
310 | summarise(Count = n()) %>% arrange(desc(Count))
311 |
312 | ggplot(tile.map, aes(x = gender, NationalITy, fill = Count)) + geom_tile()
313 |
314 |
315 |
316 | ### Predictive Modeling
317 |
318 |
319 |
320 | # Splitting data into train and cross-validation sets by using a different sample.
321 |
322 | set.seed(23210)
323 | split <- sample.split(edu$Class, SplitRatio = 0.75)
324 | train <- subset(edu, split == T)
325 | cv <- subset(edu, split == F)
326 |
327 |
328 |
329 | # Decision Tree
330 |
331 |
332 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 1)
333 | prp(tree.model)
334 |
335 |
336 | tree.predict <- predict(tree.model, cv, type = "class")
337 | table(cv$Class, tree.predict)
338 |
339 |
340 | # Decision Tree Using Caret Package
341 |
342 |
343 | rpart.control = trainControl(method = "repeatedcv", number = 10, repeats = 3)
344 | rpart.grid = expand.grid(.cp = seq(0.01, 0.5, 0.02))
345 | rpart.model.caret <-train(Class ~ ., data = train, method = "rpart", preProcess = "scale",
346 | trControl = rpart.control, tuneGrid = rpart.grid)
347 |
348 |
349 |
350 | rpart.predict.caret <- predict.train(rpart.model.caret, cv)
351 | confusionMatrix(rpart.predict.caret, cv$Class)
352 |
353 |
354 |
355 | Accuracy -> 0.7355
356 |
357 | # Random Forest
358 |
359 |
360 | set.seed(10005)
361 |
362 | rf.model <- randomForest(Class ~ .- SectionID , data = train, importance = TRUE,
363 | ntree = 2000, nodesize = 20)
364 |
365 | rf.predict <- predict(rf.model, cv)
366 | confusionMatrix(cv$Class, rf.predict)
367 |
368 |
369 |
370 | varImpPlot(rf.model)
371 |
372 |
373 | Accuracy -> 0.6777
374 |
375 | # C-Forest Utilizing Party
376 |
377 |
378 | cforest.model = cforest(Class ~ .-SectionID , data = train,
379 | controls=cforest_unbiased(ntree=2000, mtry = 3))
380 |
381 |
382 |
383 | cforest.prediction = predict(cforest.model, cv, OOB = TRUE, type = "response")
384 | confusionMatrix(cv$Class, cforest.prediction)
385 |
386 |
387 | Accuracy -> 0.7438
388 |
389 | # Suppost Vector Machines
390 |
391 |
392 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 10, gamma = 0.15)
393 | svm.predict <- predict(svm.model, cv)
394 | confusionMatrix(cv$Class, svm.predict)
395 |
396 |
397 | Accuracy -> 0.777
398 |
399 | # Ensemble Model
400 |
401 |
402 | results <- data.frame(tree = tree.predict, rpart = rpart.predict.caret, rf = rf.predict,
403 | cforest = cforest.prediction, svm = svm.predict,
404 | actual.class = cv$Class, final.prediction = rep("-",nrow(cv)))
405 |
406 | results
407 |
408 |
409 |
410 |
411 | getmode <- function(x) {
412 | unique.x <- unique(x)
413 | unique.x[which.max(tabulate(match(x, unique.x)))]
414 | }
415 |
416 |
417 |
418 | results$final.prediction <- apply(results, 1, getmode)
419 | confusionMatrix(results$actual.class, results$final.prediction)
420 |
421 |
422 | Accuracy -> 0.810 (best)
423 |
--------------------------------------------------------------------------------
/Edudata.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output:
3 | word_document: default
4 | pdf_document: default
5 | html_document: default
6 | ---
7 | ## Students' Academic Performance
8 |
9 | # Load Libraries:
10 | ```{r}
11 | library(ggplot2)
12 | library(dplyr)
13 | library(randomForest)
14 | library(class)
15 | library(rpart)
16 | library(rpart.plot)
17 | library(e1071)
18 | library(caret)
19 | library(caTools)
20 | library(party)
21 | ```
22 |
23 | # Reading the Data
24 | ```{r}
25 | edu <- read.csv('./Edudata.csv')
26 | str(edu)
27 | ```
28 |
29 | ```{r}
30 | summary(edu)
31 | ```
32 | ### Exploratory Data Analysis
33 |
34 | # Raised Hands
35 | ```{r}
36 | ggplot(edu, aes(x = raisedhands)) + geom_histogram(bins=50, color = "red",fill="blue",alpha=0.2) +
37 | scale_x_continuous(breaks = seq(0,100,5)) +
38 | labs(x = "Raised Hands", y = "Student Count")
39 | ```
40 |
41 | # Visited Resources
42 | ```{r}
43 | ggplot(edu, aes(x = VisITedResources)) + geom_histogram(bins=50, color = "orange",fill="orange",alpha=0.4) +
44 | scale_x_continuous(breaks = seq(0,100,5)) +
45 | labs(x = "Visited Resources", y = "Student Count")
46 | ```
47 |
48 | # Announcements
49 | ```{r}
50 | ggplot(edu, aes(x = AnnouncementsView)) + geom_histogram(bins = 50,color = "black",fill="red",alpha=0.5) +
51 | scale_x_continuous(breaks = seq(0,100,5)) +
52 | labs(x = "Announcements View", y = "Student Count")
53 | ```
54 |
55 | # Discussion
56 | ```{r}
57 | ggplot(edu, aes(x = Discussion)) + geom_histogram(bins=50,color = "black",fill="grey") +
58 | scale_x_continuous(breaks = seq(0,100,5)) +
59 | labs(x = "Discussion Participation", y = "Student Count")
60 | ```
61 |
62 | ### Barplots
63 | ```{r}
64 | ggplot(edu, aes(x = gender)) + geom_bar(aes(fill=gender)) +
65 | labs(x = "Gender", y = "Student Count") +
66 | scale_y_continuous(breaks = seq(0,300,30)) + coord_flip()
67 | ```
68 |
69 | ```{r}
70 | ggplot(edu, aes(x = NationalITy)) + geom_bar(aes(fill=NationalITy)) +
71 | labs(x = "Nationality", y = "Student Count") +
72 | scale_y_continuous(breaks = seq(0,200,20)) + coord_flip()
73 | ```
74 |
75 | ```{r}
76 | ggplot(edu, aes(x = PlaceofBirth)) + geom_bar(aes(fill = NationalITy)) +
77 | labs(x = "Birth Place", y = "Student Count") + coord_flip() # usa is a mix of nationalities
78 | ```
79 |
80 | ```{r}
81 | ggplot(edu, aes(x = GradeID, fill = Class)) + geom_bar() +
82 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-06 has students with only low grades
83 | ```
84 |
85 | ```{r}
86 | ggplot(edu, aes(x = GradeID, fill = gender)) + geom_bar() +
87 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-10 has no females
88 | ```
89 |
90 | ```{r}
91 | ggplot(edu, aes(x = SectionID, fill = Topic,alpha=0.1)) + geom_bar() +
92 | labs(x = "Section ID", y = "Student Count") +
93 | coord_flip()
94 | ```
95 |
96 | ```{r}
97 | ggplot(edu, aes(x = Topic, fill = gender,alpha=0.2)) + geom_bar() +
98 | labs(x = "Topic", y = "Student Count") +
99 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip()
100 | ```
101 |
102 | ```{r}
103 | ggplot(edu, aes(x = Topic, fill = NationalITy)) + geom_bar() +
104 | labs(x = "Topic", y = "Student Count") + coord_flip() +
105 | scale_y_continuous(breaks = seq(0,100,4))
106 | ```
107 |
108 | ```{r}
109 | ggplot(edu, aes(x = Topic, fill = SectionID,alpha=0.3)) + geom_bar() +
110 | labs(x = "Topic", y = "Student Count") + coord_flip() +
111 | scale_y_continuous(breaks = seq(0,100,4))
112 | ```
113 |
114 |
115 | # Section C for Mostly Spanish Students
116 | ```{r}
117 | ggplot(edu, aes(x = Topic, fill = Semester)) + geom_bar() +
118 | labs(x = "Topic", y = "Student Count") + coord_flip() +
119 | scale_y_continuous(breaks = seq(0,100,4))
120 | ```
121 |
122 |
123 | # IT Students Are Mostly in 1st Semester
124 | ```{r}
125 | ggplot(edu, aes(x = Topic, fill = Relation,alpha=0.5)) + geom_bar() +
126 | labs(x = "Topic", y = "Student Count") + coord_flip() +
127 | scale_y_continuous(breaks = seq(0,100,4))
128 | ```
129 |
130 | # Most French Students have Mom as Guardian in Comparison to Father
131 |
132 | ```{r}
133 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar() +
134 | labs(x = "Topic", y = "Student Count") + coord_flip() +
135 | scale_y_continuous(breaks = seq(0,100,4))
136 | ```
137 |
138 | ```{r}
139 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar(position = "fill") +
140 | labs(x = "Topic", y = "Student Count") + coord_flip() +
141 | scale_y_continuous(breaks = seq(0,100,4))
142 | ```
143 |
144 | # Geology has no low class students
145 |
146 | ```{r}
147 | ggplot(edu, aes(x = Semester)) + geom_bar(aes(fill=Semester)) +
148 | labs(x = "Semester", y = "Student Count")
149 | ```
150 |
151 | ```{r}
152 | ggplot(edu, aes(x = Relation, fill = Semester)) + geom_bar() +
153 | labs(x = "Guardian", y = "Student Count")
154 | ```
155 |
156 | ```{r}
157 | ggplot(edu, aes(x = ParentAnsweringSurvey, fill = ParentschoolSatisfaction)) +
158 | geom_bar() +
159 | labs(x = "Does parents answer surveys ?", y = "Student Count")
160 | ```
161 |
162 | # Parent Satisfaction
163 | ```{r}
164 | ggplot(edu, aes(x = ParentschoolSatisfaction)) +
165 | geom_bar(aes(fill=ParentschoolSatisfaction)) +
166 | labs(x = "Are the Parents Satisfied With the School ?", y = "Student Count")
167 | ```
168 |
169 | ```{r}
170 | ggplot(edu, aes(x = StudentAbsenceDays)) + geom_bar(aes(fill=StudentAbsenceDays)) +
171 | labs(x = "Is the student absent for more than seven days", y = "Student Count")
172 | ```
173 |
174 | ```{r}
175 | ggplot(edu, aes(x = Class, fill = gender)) + geom_bar() +
176 | labs(x = "Class", y = "Student Count")
177 | ```
178 |
179 | # Few Girls in the Low Class
180 | ```{r}
181 | ggplot(edu, aes(x = Class, fill = Relation)) + geom_bar() +
182 | labs(x = "Class", y = "Student Count")
183 | ```
184 |
185 | ```{r}
186 | ggplot(edu, aes(x = Class, fill = ParentAnsweringSurvey)) + geom_bar() +
187 | labs(x = "Class", y = "Student Count")
188 | ```
189 |
190 | ```{r}
191 | ggplot(edu, aes(x = Class, fill = StudentAbsenceDays)) + geom_bar() +
192 | labs(x = "Class", y = "Student Count")
193 | ```
194 |
195 |
196 | ### Boxplots
197 |
198 | ```{r}
199 | ggplot(edu, aes(x = gender, y = raisedhands)) + geom_boxplot(aes(fill=gender))
200 | ```
201 |
202 | ```{r}
203 | ggplot(edu, aes(x = gender, y = VisITedResources)) + geom_boxplot(aes(fill=gender))
204 | ```
205 |
206 | # Girls Use More Resources
207 | ```{r}
208 | ggplot(edu, aes(x = NationalITy, y = raisedhands)) + geom_boxplot(aes(fill=NationalITy))
209 | ```
210 |
211 | ```{r}
212 | ggplot(edu, aes(x = StageID, y = raisedhands)) + geom_boxplot(aes(fill=StageID))
213 | ```
214 |
215 | ```{r}
216 | ggplot(edu, aes(x = StageID, y = Discussion)) + geom_boxplot(aes(fill=StageID))
217 | ```
218 |
219 | ```{r}
220 | ggplot(edu, aes(x = GradeID, y = raisedhands)) + geom_boxplot(aes(fill=GradeID))
221 | ```
222 |
223 | ```{r}
224 | ggplot(edu, aes(x = SectionID, y = Discussion)) + geom_boxplot(aes(fill=SectionID))
225 | ```
226 |
227 | ```{r}
228 | ggplot(edu, aes(x = Topic, y = raisedhands)) + geom_boxplot(aes(fill=Topic))
229 | ```
230 |
231 | ```{r}
232 | ggplot(edu, aes(x = Semester, y = raisedhands)) + geom_boxplot(aes(fill=Semester))
233 | ```
234 |
235 | ```{r}
236 | ggplot(edu, aes(x = Relation, y = raisedhands)) + geom_boxplot(aes(fill=Relation))
237 | ```
238 |
239 | ```{r}
240 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
241 | ```
242 |
243 | ```{r}
244 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
245 | ```
246 |
247 | ```{r}
248 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
249 | ```
250 |
251 | ```{r}
252 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
253 | ```
254 |
255 | ```{r}
256 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = raisedhands)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
257 | ```
258 |
259 | ```{r}
260 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = VisITedResources)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
261 | ```
262 |
263 | ```{r}
264 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
265 | ```
266 |
267 | ```{r}
268 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = Discussion)) + geom_boxplot(aes(fill=ParentschoolSatisfaction))
269 | ```
270 |
271 | ```{r}
272 | ggplot(edu, aes(x = StudentAbsenceDays, y = raisedhands)) + geom_boxplot(aes(fill=StudentAbsenceDays))
273 | ```
274 |
275 | # More Students Leave = Less Hand Raises
276 |
277 | ```{r}
278 | ggplot(edu, aes(x = StudentAbsenceDays, y = VisITedResources)) + geom_boxplot(aes(fill=StudentAbsenceDays))
279 | ```
280 |
281 | ```{r}
282 | ggplot(edu, aes(x = StudentAbsenceDays, y = AnnouncementsView)) + geom_boxplot(aes(fill=StudentAbsenceDays))
283 | ```
284 |
285 | ```{r}
286 | ggplot(edu, aes(x = StudentAbsenceDays, y = Discussion)) + geom_boxplot(aes(fill=StudentAbsenceDays))
287 | ```
288 |
289 | ```{r}
290 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
291 | ```
292 |
293 | # Yes Answers to Surveys = More Raised hands
294 |
295 | ```{r}
296 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
297 | ```
298 |
299 | ```{r}
300 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
301 | ```
302 |
303 | ```{r}
304 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey))
305 | ```
306 |
307 |
308 | ###Class-Wise Boxplots
309 |
310 | ```{r}
311 | ggplot(edu, aes(x = Class, y = raisedhands)) + geom_boxplot(aes(fill=Class))
312 | ```
313 |
314 | # High Marks = Active Participation
315 |
316 | ```{r}
317 | ggplot(edu, aes(x = Class, y = VisITedResources)) + geom_boxplot(aes(fill=Class))
318 | ```
319 |
320 | # High Marks by visited Resources
321 |
322 | ```{r}
323 | ggplot(edu, aes(x = Class, y = AnnouncementsView)) + geom_boxplot(aes(fill=Class))
324 | ```
325 |
326 | # More Marks More Announcements
327 |
328 | ```{r}
329 | ggplot(edu, aes(x = Class, y = Discussion)) + geom_boxplot(aes(fill=Class))
330 | ```
331 |
332 | ### Scatterplots
333 |
334 | ```{r}
335 | ggplot(edu, aes(x = raisedhands, y = VisITedResources)) + geom_point() +
336 | geom_smooth(method = "lm",color='green')
337 | ```
338 |
339 | ```{r}
340 | ggplot(edu, aes(x = raisedhands, y = AnnouncementsView)) + geom_point() +
341 | geom_smooth(method = "lm",color='red')
342 | ```
343 |
344 | ```{r}
345 | ggplot(edu, aes(x = raisedhands, y = Discussion)) + geom_point() +
346 | geom_smooth(method = "lm",color='purple')
347 | ```
348 |
349 | ```{r}
350 | ggplot(edu, aes(x = VisITedResources, y = AnnouncementsView)) + geom_point() +
351 | geom_smooth(method = "lm",color='cyan')
352 | ```
353 |
354 | ```{r}
355 | ggplot(edu, aes(x = VisITedResources, y = Discussion)) + geom_point() +
356 | geom_smooth(method = "lm",color='firebrick')
357 | ```
358 |
359 | ```{r}
360 | ggplot(edu, aes(x = AnnouncementsView, y = Discussion)) + geom_point() +
361 | geom_smooth(method = "lm",color='hotpink')
362 | ```
363 |
364 |
365 | ### Density Plots
366 |
367 | ```{r}
368 | ggplot(edu, aes(x = raisedhands, color = gender)) + geom_density()
369 | ```
370 |
371 | ```{r}
372 | ggplot(edu, aes(x = raisedhands, color = Topic)) + geom_density()
373 | ```
374 |
375 | ```{r}
376 | ggplot(edu, aes(x = raisedhands, color = SectionID)) + geom_density()
377 | ```
378 |
379 | ```{r}
380 | ggplot(edu, aes(x = raisedhands, color = Semester)) + geom_density()
381 | ```
382 |
383 | ```{r}
384 | ggplot(edu, aes(x = raisedhands, color = Class)) + geom_density()
385 | ```
386 |
387 |
388 | ### Tile Map
389 | ```{r}
390 | tile.map <- edu %>% group_by(gender, NationalITy) %>%
391 | summarise(Count = n()) %>% arrange(desc(Count))
392 | ```
393 |
394 | ```{r}
395 | ggplot(tile.map, aes(x = gender, NationalITy, fill = Count)) + geom_tile()
396 | ```
397 |
398 |
399 | ### Predictive Modeling
400 |
401 |
402 |
403 | # Splitting data into train and cross-validation sets.
404 | ```{r}
405 | set.seed(23210)
406 | split <- sample.split(edu$Class, SplitRatio = 0.75)
407 | train <- subset(edu, split == T)
408 | cv <- subset(edu, split == F)
409 | ```
410 |
411 |
412 | # Decision Tree
413 |
414 | ```{r}
415 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 1)
416 | prp(tree.model)
417 | ```
418 |
419 | ```{r}
420 | tree.predict <- predict(tree.model, cv, type = "class")
421 | table(cv$Class, tree.predict)
422 | ```
423 |
424 |
425 |
426 | # Decision Tree Using Caret Package
427 |
428 | ```{r}
429 | rpart.control = trainControl(method = "repeatedcv", number = 10, repeats = 3)
430 | rpart.grid = expand.grid(.cp = seq(0.01, 0.5, 0.02))
431 | rpart.model.caret <-train(Class ~ ., data = train, method = "rpart", preProcess = "scale",
432 | trControl = rpart.control, tuneGrid = rpart.grid)
433 | ```
434 |
435 | ```{r}
436 | rpart.predict.caret <- predict.train(rpart.model.caret, cv)
437 | confusionMatrix(rpart.predict.caret, cv$Class)
438 | ```
439 |
440 |
441 | Accuracy -> 0.7355
442 |
443 | # Random Forest
444 |
445 | ```{r}
446 | set.seed(10005)
447 |
448 | rf.model <- randomForest(Class ~ .- SectionID , data = train, importance = TRUE,
449 | ntree = 2000, nodesize = 20)
450 |
451 | rf.predict <- predict(rf.model, cv)
452 | confusionMatrix(cv$Class, rf.predict)
453 | ```
454 |
455 | ```{r}
456 | varImpPlot(rf.model)
457 | ```
458 |
459 | Accuracy -> 0.6777
460 |
461 | # C-Forest Utilizing Party
462 |
463 | ```{r}
464 | cforest.model = cforest(Class ~ .-SectionID , data = train,
465 | controls=cforest_unbiased(ntree=2000, mtry = 3))
466 | ```
467 |
468 | ```{r}
469 | cforest.prediction = predict(cforest.model, cv, OOB = TRUE, type = "response")
470 | confusionMatrix(cv$Class, cforest.prediction)
471 | ```
472 |
473 | Accuracy -> 0.7438
474 |
475 | # Suppost Vector Machines
476 |
477 | ```{r}
478 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 10, gamma = 0.15)
479 | svm.predict <- predict(svm.model, cv)
480 | confusionMatrix(cv$Class, svm.predict)
481 | ```
482 |
483 | Accuracy -> 0.777
484 |
485 | # Ensemble Model
486 |
487 |
488 | ```{r}
489 | results <- data.frame(tree = tree.predict, rpart = rpart.predict.caret, rf = rf.predict,
490 | cforest = cforest.prediction, svm = svm.predict,
491 | actual.class = cv$Class, final.prediction = rep("-",nrow(cv)))
492 |
493 | results
494 | ```
495 |
496 |
497 | ```{r}
498 | getmode <- function(x) {
499 | unique.x <- unique(x)
500 | unique.x[which.max(tabulate(match(x, unique.x)))]
501 | }
502 | ```
503 |
504 | ```{r}
505 | results$final.prediction <- apply(results, 1, getmode)
506 | confusionMatrix(results$actual.class, results$final.prediction)
507 | ```
508 |
509 | Accuracy -> 0.810 (best)
510 |
--------------------------------------------------------------------------------
/Edudata.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Edudata.pdf
--------------------------------------------------------------------------------
/Fake News.R:
--------------------------------------------------------------------------------
1 | # Fake News
2 |
3 | ## Loading the Libraries
4 |
5 | library(rpart)
6 | library(rpart.plot)
7 |
8 |
9 | setwd('./Kaggle')
10 |
11 |
12 | ### Reading the Fake News Dataset
13 |
14 | fake <- read.csv('fake.csv')
15 | print(table(fake$type))
16 | head(fake)
17 |
18 | ### Checking for NA Values
19 |
20 | any(is.na(fake))
21 |
22 | ### Running the Analysis
23 |
24 |
25 | mini_model <- rpart(formula = type ~ ord_in_thread + language + country + spam_score + replies_count + participants_count + likes + comments + shares,
26 | data = fake,
27 | method = "class", # Classification
28 | parms = list(split = "information"), # Use Information Gain as splitting criterion
29 | control = rpart.control(cp = 0.01, # Minimum loss decrease complexity param
30 | maxcompete = 3, # Competition by split for debugging
31 | maxsurrogate = 3, # Competition per surrogate for debugging
32 | xval = 20, # 10 cross-validation
33 | maxdepth = 4)) # Maximum Depth for easy interpretation
34 | plotcp(mini_model)
35 |
36 |
37 |
38 | ### Decision Tree
39 | rpart.plot(mini_model, main = "Decision Tree", box.palette = list("Gy", "Gn", "Bu", "Bn", "Or", "Rd", "Gy", "Pu"))
40 |
41 |
42 | print(summary(mini_model))
43 |
--------------------------------------------------------------------------------
/Heart Disease Decision Trees.R:
--------------------------------------------------------------------------------
1 | ## Heart Disease Decision Trees
2 | # Loading the data
3 |
4 | library(FFTrees)
5 | FFTrees.guide()
6 | data(heartdisease)
7 | head(heartdisease)
8 | summary(heartdisease)
9 |
10 | # Heart Disease Data
11 | set.seed(100)
12 | samples <- sample(c(T, F), size = nrow(heartdisease), replace = T)
13 | heartdisease.train <- heartdisease[samples,]
14 | heartdisease.test <- heartdisease[samples == 0,]
15 | heart.FFTrees <- FFTrees(formula = diagnosis ~., data = heartdisease.train,data.test = heartdisease.test)
16 | print(heart.FFTrees)
17 | class(heart.FFTrees)
18 | names(heart.FFTrees)
19 | heart.FFTrees$cue.accuracies
20 |
21 | # ROC Plot
22 | showcues(heart.FFTrees, main = "Heartdisease Cue Accuracy")
23 |
24 | # Stats
25 | heart.FFTrees$FFTrees.stats
26 | summary(heart.FFTrees)
27 |
28 | # Area Under the Curve
29 | heart.FFTrees$auc
30 |
31 | # Train Decision DF
32 |
33 | heart.FFTrees$decision.train[1:5,]
34 | heart.FFTrees$levelout.train[1:5,]
35 |
36 | # Selecting Cues and Plotting Trees
37 | heart.as.FFTrees <- FFTrees(formula = diagnosis ~ age + sex, data = heartdisease)
38 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"))
39 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"), train.p = 5)
40 |
--------------------------------------------------------------------------------
/Heart Disease Decision Trees.Rmd:
--------------------------------------------------------------------------------
1 | ## Heart Disease Decision Trees
2 | # Loading the data
3 |
4 | ```{r}
5 | library(FFTrees)
6 | FFTrees.guide()
7 | data(heartdisease)
8 | head(heartdisease)
9 | summary(heartdisease)
10 | ```
11 |
12 | # Heart Disease Data
13 |
14 | For this dataset, there will be information on the first head and summary sets to determine the overall number of heart disease for patients.
15 | ```{r}
16 | set.seed(100)
17 | samples <- sample(c(T, F), size = nrow(heartdisease), replace = T)
18 | heartdisease.train <- heartdisease[samples,]
19 | heartdisease.test <- heartdisease[samples == 0,]
20 | heart.FFTrees <- FFTrees(formula = diagnosis ~., data = heartdisease.train,data.test = heartdisease.test)
21 | print(heart.FFTrees)
22 | class(heart.FFTrees)
23 | names(heart.FFTrees)
24 | heart.FFTrees$cue.accuracies
25 | ```
26 |
27 | # ROC Plot
28 | ```{r}
29 | showcues(heart.FFTrees, main = "Heartdisease Cue Accuracy")
30 | ```
31 | # Stats
32 |
33 | ```{r}
34 | heart.FFTrees$FFTrees.stats
35 | summary(heart.FFTrees)
36 | ```
37 |
38 | # Area Under the Curve
39 | ```{r}
40 | heart.FFTrees$auc
41 | ```
42 | # Train Decision DF
43 |
44 | ```{r}
45 | heart.FFTrees$decision.train[1:5,]
46 | heart.FFTrees$levelout.train[1:5,]
47 | ```
48 |
49 | # Selecting Cues and Plotting Trees
50 |
51 | ```{r, echo=FALSE}
52 | heart.as.FFTrees <- FFTrees(formula = diagnosis ~ age + sex, data = heartdisease)
53 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"))
54 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"), train.p = 5)
55 | ```
56 |
--------------------------------------------------------------------------------
/Housing Index Zillow.R:
--------------------------------------------------------------------------------
1 | # Zillow Housing Index From 2010
2 |
3 | ### Changing the Working Directory
4 |
5 | setwd('./Kaggle/Zillow')
6 |
7 | ### Loading the Libraries
8 |
9 | library(forecast)
10 | library(zoo)
11 | library(ggplot2)
12 | library(ggthemes)
13 | library(tidyr)
14 | library(dplyr)
15 | library(readr)
16 |
17 | ### Reading the Datasets
18 |
19 | rent <- read.csv('./price.csv')
20 | sqft <- read.csv('./pricepersqft.csv')
21 |
22 | ### Looking at the Top 10
23 |
24 | values=head(rent,10)
25 | values=data.frame(t(as.matrix(values[,7:81])))
26 | colnames(values)=rent[1:10,2]
27 |
28 |
29 | ### Monthly Percentage Change (Seattle)
30 |
31 | suppressMessages(library(quantmod))
32 |
33 | pct_change <- function(rent) {
34 |
35 | nc <- ncol(rent)
36 | ln <- colnames(rent)
37 |
38 | meta <- rent[c(1:6)]
39 | data <- rent[c(7:nc)]
40 |
41 | data <- t(apply(data, 1, Delt))
42 |
43 | rv <- cbind(meta, data)
44 | colnames(rv) <- ln
45 |
46 | rv[-7]
47 | }
48 |
49 | # Select data for the Seattle, WA metro region.
50 | # Total of 98 places.
51 | pc <- subset(rent, rent$Metro == 'Seattle')
52 | pc <- pct_change(pc)
53 |
54 | last = ncol(pc)
55 |
56 | pc <- pc[order(pc[last], decreasing = TRUE),]
57 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 3))
58 |
59 | # Top 10 places in the Seattle region with the
60 | # highest most recent monthly percentage change.
61 | head(pc, n=10)
62 |
63 |
64 | ### Monthly Percentage Change (San Francisco)
65 |
66 | pct_change <- function(rent) {
67 |
68 | nc <- ncol(rent)
69 | ln <- colnames(rent)
70 |
71 | meta <- rent[c(1:6)]
72 | data <- rent[c(7:nc)]
73 |
74 | data <- t(apply(data, 1, Delt))
75 |
76 | rv <- cbind(meta, data)
77 | colnames(rv) <- ln
78 |
79 | rv[-7]
80 | }
81 |
82 | # Select data for the San Francisco, CA metro region.
83 |
84 | pc <- subset(rent, rent$Metro == 'San Francisco')
85 | pc <- pct_change(pc)
86 |
87 | last = ncol(pc)
88 |
89 | pc <- pc[order(pc[last], decreasing = TRUE),]
90 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 3))
91 |
92 | # Top 10 places in the San Francisco region with the
93 | # highest most recent monthly percentage change.
94 | head(pc, n=10)
95 |
96 |
97 |
98 | ### Yearly Percentage Change (Sacramento)
99 |
100 | get_range <- function(rent) {
101 | last = ncol(rent)
102 | n <- colnames(rent)
103 | val <- length(n[7:last])
104 | val <- round(val/12)
105 | rv <- seq(last - val * 12, last, 12)
106 | rv
107 | }
108 |
109 | # Select data for the Sacramento, CA metro region.
110 | # Total of 55 places.
111 | pc <- subset(rent, rent$Metro == 'Sacramento')
112 |
113 | years <- get_range(pc)
114 | pc <- cbind(pc[1:6], pc[years])
115 |
116 | # use function defined above
117 | pc <- pct_change(pc)
118 |
119 | last = ncol(pc)
120 |
121 | pc <- pc[order(pc[last], decreasing = TRUE),]
122 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 2))
123 |
124 | # Top 10 places in the Sacramento region with the
125 | # highest most recent yearly percentage change.
126 | head(pc, n=10)
127 |
128 |
129 | ### Yearly Percentage Change (San Francisco)
130 |
131 | get_range <- function(rent) {
132 | last = ncol(rent)
133 | n <- colnames(rent)
134 | val <- length(n[7:last])
135 | val <- round(val/12)
136 | rv <- seq(last - val * 12, last, 12)
137 | rv
138 | }
139 |
140 | # Select data for the San Francisco, CA metro region.
141 | pc <- subset(rent, rent$Metro == 'San Francisco')
142 |
143 | years <- get_range(pc)
144 | pc <- cbind(pc[1:6], pc[years])
145 |
146 | # use function defined above
147 | pc <- pct_change(pc)
148 |
149 | last = ncol(pc)
150 |
151 | pc <- pc[order(pc[last], decreasing = TRUE),]
152 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 2))
153 |
154 | # Top 10 places in the San Francisco region with the
155 | # highest most recent yearly percentage change.
156 | head(pc, n=10)
157 |
158 |
159 |
160 | ### Index Numbers (Los Angeles)
161 |
162 | index_base_100 <- function(rent) {
163 |
164 | nc <- ncol(rent)
165 | ln <- colnames(rent)
166 |
167 | meta <- rent[, c(1:6)]
168 | data <- rent[7:nc]
169 | base <- rent[7]
170 |
171 | index <- function(x) {
172 | x / base
173 | }
174 |
175 | data <- apply(data, 2, index)
176 | data <- data.frame(data)
177 |
178 | data <- data * 100
179 | data <- round(data)
180 |
181 | rv <- cbind(meta, data)
182 | colnames(rv) <- ln
183 |
184 | rv
185 | }
186 |
187 | # Select data for the Los Angeles, CA metro region.
188 | # Total of 148 places.
189 | # Base: November 2010 = 100
190 | idx <- subset(rent, rent$Metro == 'Los Angeles')
191 | idx <- index_base_100(idx)
192 |
193 | last = ncol(idx)
194 | s <- seq(last-36, last, 12)
195 |
196 | idx <- idx[order(idx[last], decreasing = TRUE),]
197 | idx <- cbind(idx[c(2,5)], idx[s])
198 |
199 | # The top 10 places in the Los Angeles metro region
200 | # with the largest index change over the base period.
201 | head(idx, n=10)
202 |
203 | #### San Francisco Region for Index Numbers
204 |
205 | index_base_100 <- function(rent) {
206 |
207 | nc <- ncol(rent)
208 | ln <- colnames(rent)
209 |
210 | meta <- rent[, c(1:6)]
211 | data <- rent[7:nc]
212 | base <- rent[7]
213 |
214 | index <- function(x) {
215 | x / base
216 | }
217 |
218 | data <- apply(data, 2, index)
219 | data <- data.frame(data)
220 |
221 | data <- data * 100
222 | data <- round(data)
223 |
224 | rv <- cbind(meta, data)
225 | colnames(rv) <- ln
226 |
227 | rv
228 | }
229 |
230 | # Select data for the San Francisco, CA metro region.
231 | # Base: November 2010 = 100
232 | idx <- subset(rent, rent$Metro == 'San Francisco')
233 | idx <- index_base_100(idx)
234 |
235 | last = ncol(idx)
236 | s <- seq(last-36, last, 12)
237 |
238 | idx <- idx[order(idx[last], decreasing = TRUE),]
239 | idx <- cbind(idx[c(2,5)], idx[s])
240 |
241 | # The top 10 places in the San Francisco metro region
242 | # with the largest index change over the base period.
243 | head(idx, n=10)
244 |
245 |
246 |
247 |
248 | ### Top 10 Cities By Population Using Time-Series Analysis
249 |
250 | date <- seq(as.Date("2010/11/01"), as.Date("2017/01/31"),"month")
251 | date <- as.yearmon(date)
252 | ts=zoo(values,order.by = date)
253 | values=fortify(ts)
254 | values$Index=as.Date(values$Index)
255 |
256 | autoplot(ts,facet=NULL)+
257 | theme_minimal()+
258 | labs(x="Time",y="Price")
259 |
260 | forecasts=matrix(,ncol=10,nrow=11)
261 |
262 | for(i in 1:10){
263 |
264 | forecasts[,i]=forecast(auto.arima(ts[,i],lambda = 0,stepwise = F),h=11)$mean
265 |
266 | }
267 |
268 | colnames(forecasts) = rent[1:10,2]
269 | results=rbind(values[,2:11],forecasts)
270 | date_2 <- seq(as.Date("2010/11/01"), as.Date("2017/12/31"),"month")
271 | date_2 <- as.yearmon(date_2)
272 | results=zoo(results,order.by = date_2)
273 | autoplot(results,facet=NULL)+
274 | theme_minimal()+
275 | labs(x="Time",y="Price")+
276 | geom_vline(aes(xintercept=2017),size=0.2)
277 |
--------------------------------------------------------------------------------
/Illegal Immigration.R:
--------------------------------------------------------------------------------
1 | ## Illegal Immigration
2 |
3 | ## Loading the Libraries and Changing the Working Directory
4 | library(tidyverse)
5 | library(d3Network)
6 | setwd('./Kaggle')
7 |
8 | ## Reading the Dataset & Rearranging Them
9 |
10 | arrests <- read.csv('./illegal immigration.csv')
11 | names(arrests) <- gsub("[.]", " ", names(arrests))
12 | arrests.clns <-
13 | arrests %>% gather(key, value, -Border, -Sector, -`State Territory`) %>% separate(key,
14 | into = c("Year", "Type"),
15 | sep = " ",
16 | extra = "merge")%>%na.omit()
17 |
18 | arrests.clns$Year <- gsub("X", "", arrests.clns$Year)
19 | arrests.clns$Type<-trimws(tolower(arrests.clns$Type))
20 |
21 | arrests.net <-
22 | arrests.clns %>% filter(Border != "United States" &
23 | Sector != "All") %>% select(Type, Sector, value) %>% rename(source =
24 | Type, target = Sector) %>% group_by(source, target) %>%
25 | summarize(value = mean(value)) %>% na.omit() %>% mutate(rank = rank(desc(value), source)) %>%
26 | arrange(rank)%>%filter(rank<=10)
27 |
28 | Nodes <-
29 | rbind(data.frame(name = unique(arrests.net$source)), data.frame(name = unique(arrests.net$target)))
30 | Links <- arrests.net
31 | Links$source <- match(Links$source, Nodes$name) - 1
32 | Links$target <- match(Links$target, Nodes$name) - 1
33 |
34 | # Graphs
35 | arrests.plot<-arrests.clns%>%group_by(Year,Type)%>%summarize(avg.value=mean(value))
36 |
37 |
38 | ggplot(arrests.plot)+aes(Year,avg.value,color=Type)+
39 | geom_point(size=2)+theme(plot.title = element_text(hjust = 0.5, face = "bold"),
40 | axis.text.y=element_blank(),axis.text.x=element_blank())+
41 | labs(title="Average arrests over the years",y="Average Arrests")
42 |
43 | ## Based on the information that is given in the graph, the arrests
44 | ## of Mexicans have greatly reduced in comparison to all illegal immigrants. It
45 | ## has been considered that illegal immigration with Mexicans is not the biggest factor
46 | ## that is contributing to the problems in the United States.
47 |
--------------------------------------------------------------------------------
/Iris.R:
--------------------------------------------------------------------------------
1 | # Iris
2 |
3 | # Getting the Data
4 | data("iris")
5 | head(iris)
6 |
7 | # Loading the Libraries
8 | library(ggplot2)
9 | library(dplyr)
10 | library(tidyr)
11 |
12 | # Summary of the Data
13 | summary(iris)
14 |
15 | head(iris, n = 10)
16 |
17 |
18 | # Forming to Long Iris
19 | long_iris <- iris%>%
20 | gather(part,value,Sepal.Length,Sepal.Width,Petal.Length ,Petal.Width)%>%
21 | separate(part, c('part', 'measure'), sep = '\\.')
22 | head(long_iris, n=10)
23 |
24 |
25 | sapply(long_iris, class)
26 |
27 | fcts <- c('part', 'measure')
28 | long_iris[fcts] <- lapply(long_iris[fcts], as.factor)
29 | sapply(long_iris, class)
30 |
31 |
32 | Missing_d <- function(x){sum(is.na(x))/length(x)*100}
33 |
34 |
35 | apply(long_iris, 2, Missing_d)
36 |
37 |
38 |
39 | is_special <- function(x){
40 | if(is.numeric(x)) !is.finite(x) else is.na(x)
41 | }
42 |
43 |
44 | sapply(long_iris, is_special)
45 |
46 |
47 |
48 | # Checking for NA Values
49 |
50 | sum(is.na(long_iris$value))
51 |
52 |
53 | ## Exploratory Data Analysis
54 | p <- ggplot(long_iris, aes(x = Species, y = value, col = part))
55 | p + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(.~ measure)
56 | p + geom_jitter(alpha = 0.3, size = 0.8) + stat_boxplot(alpha = 0.5) + facet_grid(.~ measure)
57 | p + geom_jitter(alpha = 0.5, size = 0.8) + stat_boxplot(alpha = 0.5) + facet_grid(.~ part)
58 |
59 |
60 |
61 | iris$Flower <- 1:nrow(iris)
62 |
63 | #create wide_iris
64 | wide_iris <- iris %>%
65 | gather(key, value, -Species, -Flower) %>%
66 | separate(key, c("Part", "Measure"),sep = "\\.") %>%
67 | spread(Measure, value)
68 |
69 | head(wide_iris, n=10)
70 |
71 |
72 |
73 | q <- ggplot(wide_iris, aes(x = Width, y = Length, col = Species))
74 | q + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(. ~ Species) +
75 | stat_smooth(method = 'lm', se = F)
76 | q + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(. ~ Part)
77 | q + geom_point(alpha = 0.4, size = 0.8) + stat_smooth(method = 'lm', fullrange = T, size = 0.5)
--------------------------------------------------------------------------------
/Mass Shootings/Mass Shootings Dataset.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Mass Shootings/Mass Shootings Dataset.csv
--------------------------------------------------------------------------------
/Mass Shootings/Mass Shootings.R:
--------------------------------------------------------------------------------
1 | # Mass Shootings
2 |
3 | ## Libraries
4 |
5 | library(data.table) # A faster way to handle data frames in R
6 | library(ggplot2) # For more control on plots
7 | library(ggthemes) # For prettier ggplot2 plot aesthetics and acessibility for color-blind palettes
8 | library(knitr) # For pretty tables
9 | library(lubridate) # For easy handling dates
10 | library(scales) # To add more ticks and facilitate plot interpretation
11 | library(lattice)
12 | library(chron)
13 | library(grid)
14 |
15 | ## Changing Working Directory
16 |
17 | setwd('./Kaggle/Mass Shootings')
18 |
19 | ## Loading the Data
20 |
21 | shooters_hooters <- fread("./Mass Shootings Dataset.csv")
22 | kable(head(shooters_hooters))
23 |
24 | ## How many people got killed/injured per year?
25 |
26 | yearvitm <- shooters_hooters[,.(Date,Fatalities,Injured)]
27 | kable(head(yearvitm, 10))
28 |
29 | ## Converstion to Dates
30 | yearvitm$Date <- mdy(yearvitm$Date)
31 |
32 | ## Checking for Missing Data
33 |
34 | nrow(yearvitm[is.na(Date) | is.na(Fatalities) | is.na(Injured)])
35 |
36 |
37 | ## Creating a Time-Series Plot of Fatalities and Injuries
38 |
39 | vitmyear_long <- melt(yearvitm,id.vars="Date",measure.vars = c("Fatalities","Injured"),
40 | variable.name="TypeOfVictim",value.name = "Number")
41 | kable(head(vitmyear_long))
42 |
43 |
44 | ggplot(vitmyear_long, aes(x=Date, y=Number, color=TypeOfVictim)) +
45 | geom_line() +
46 | xlab("") +
47 | ylab("Number of Victims") +
48 | theme_minimal() +
49 | scale_x_date(breaks=pretty_breaks(n=10)) +
50 | labs(title = "Number of Victims per Year",
51 | subtitle = "The number of injured people in October 2, 2017 is much higher than ever seen before.") +
52 | scale_color_colorblind(name = "Type of Victim")
53 |
54 | ## Before the October 2 Fatality
55 |
56 | ggplot(vitmyear_long[Number < 100], aes(x=Date, y=Number, color=TypeOfVictim)) +
57 | geom_line() +
58 | xlab("") +
59 | ylab("Number of Victims") +
60 | theme_minimal() +
61 | scale_x_date(breaks=pretty_breaks(n=10)) +
62 | labs(title = "Number of Victims per Year Without October 2, 2017 Injuries") +
63 | scale_color_colorblind(name = "Type of Victim")
64 |
65 |
66 | ## Geom Plot
67 |
68 | ggplot(vitmyear_long[Number < 100], aes(x=Date, y=Number, color=TypeOfVictim)) +
69 | geom_point(alpha=0.4) + # Modified to point and added alpha for transparency
70 | xlab("") +
71 | ylab("Number of Victims") +
72 | theme_minimal() +
73 | scale_x_date(breaks=pretty_breaks(n=10)) +
74 | labs(title = "Number of Victims per Year Without October 2, 2017 Injuries",
75 | subtitle = "The number of shooting events itensified over the last years.") +
76 | scale_color_colorblind(name = "Type of Victim")
77 |
78 |
79 | ## Looking at the Calendar Days
80 |
81 | calendar_dead <- shooters_hooters[,.(Date,Victims=`Total victims`)]
82 | calendar_dead$Date <- mdy(calendar_dead$Date)
83 | kable(head(calendar_dead))
84 |
85 |
86 | ## Missing Data
87 |
88 | nrow(calendar_dead[is.na(Date) | is.na(Victims)])
89 |
90 |
91 | ## Creating a Calendar Heatmap
92 |
93 | heatmap_calendar <- function(dates,
94 | values,
95 | ncolors=99,
96 | color="r2g",
97 | varname="Values",
98 | date.form = "%Y-%m-%d", ...) {
99 | if (class(dates) == "character" | class(dates) == "factor" ) {
100 | dates <- strptime(dates, date.form)
101 | }
102 | caldat <- data.frame(value = values, dates = dates)
103 | min.date <- as.Date(paste(format(min(dates), "%Y"),
104 | "-1-1",sep = ""))
105 | max.date <- as.Date(paste(format(max(dates), "%Y"),
106 | "-12-31", sep = ""))
107 | dates.f <- data.frame(date.seq = seq(min.date, max.date, by="days"))
108 |
109 | # Merge moves data by one day, avoid
110 | caldat <- data.frame(date.seq = seq(min.date, max.date, by="days"), value = NA)
111 | dates <- as.Date(dates)
112 | caldat$value[match(dates, caldat$date.seq)] <- values
113 |
114 | caldat$dotw <- as.numeric(format(caldat$date.seq, "%w"))
115 | caldat$woty <- as.numeric(format(caldat$date.seq, "%U")) + 1
116 | caldat$yr <- as.factor(format(caldat$date.seq, "%Y"))
117 | caldat$month <- as.numeric(format(caldat$date.seq, "%m"))
118 | yrs <- as.character(unique(caldat$yr))
119 | d.loc <- as.numeric()
120 | for (m in min(yrs):max(yrs)) {
121 | d.subset <- which(caldat$yr == m)
122 | sub.seq <- seq(1,length(d.subset))
123 | d.loc <- c(d.loc, sub.seq)
124 | }
125 | caldat <- cbind(caldat, seq=d.loc)
126 |
127 | #color styles
128 | r2b <- c("#0571B0", "#92C5DE", "#F7F7F7", "#F4A582", "#CA0020") #red to blue
129 | r2g <- c("#D61818", "#FFAE63", "#FFFFBD", "#B5E384") #red to green
130 | w2b <- c("#045A8D", "#2B8CBE", "#74A9CF", "#BDC9E1", "#F1EEF6") #white to blue
131 |
132 | assign("col.sty", get(color))
133 | calendar.pal <- colorRampPalette((col.sty), space = "Lab")
134 | def.theme <- lattice.getOption("default.theme")
135 | cal.theme <-
136 | function() {
137 | theme <-
138 | list(
139 | strip.background = list(col = "transparent"),
140 | strip.border = list(col = "transparent"),
141 | axis.line = list(col="transparent"),
142 | par.strip.text=list(cex=0.8))
143 | }
144 | lattice.options(default.theme = cal.theme)
145 | yrs <- (unique(caldat$yr))
146 | nyr <- length(yrs)
147 | print(cal.plot <- levelplot(value~woty*dotw | yr, data=caldat,
148 | as.table=TRUE,
149 | aspect=.12,
150 | layout = c(1, nyr%%7),
151 | between = list(x=0, y=c(1,1)),
152 | strip=TRUE,
153 | main = paste("Calendar Heat Map of ", varname, sep = ""),
154 | scales = list(
155 | x = list(
156 | at= c(seq(2.9, 52, by=4.42)),
157 | labels = month.abb,
158 | alternating = c(1, rep(0, (nyr-1))),
159 | tck=0,
160 | cex = 0.7),
161 | y=list(
162 | at = c(0, 1, 2, 3, 4, 5, 6),
163 | labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
164 | "Friday", "Saturday"),
165 | alternating = 1,
166 | cex = 0.6,
167 | tck=0)),
168 | xlim =c(0.4, 54.6),
169 | ylim=c(6.6,-0.6),
170 | cuts= ncolors - 1,
171 | col.regions = (calendar.pal(ncolors)),
172 | xlab="" ,
173 | ylab="",
174 | colorkey= list(col = calendar.pal(ncolors), width = 0.6, height = 0.5),
175 | subscripts=TRUE
176 | ) )
177 | panel.locs <- trellis.currentLayout()
178 | for (row in 1:nrow(panel.locs)) {
179 | for (column in 1:ncol(panel.locs)) {
180 | if (panel.locs[row, column] > 0)
181 | {
182 | trellis.focus("panel", row = row, column = column,
183 | highlight = FALSE)
184 | xyetc <- trellis.panelArgs()
185 | subs <- caldat[xyetc$subscripts,]
186 | dates.fsubs <- caldat[caldat$yr == unique(subs$yr),]
187 | y.start <- dates.fsubs$dotw[1]
188 | y.end <- dates.fsubs$dotw[nrow(dates.fsubs)]
189 | dates.len <- nrow(dates.fsubs)
190 | adj.start <- dates.fsubs$woty[1]
191 |
192 | for (k in 0:6) {
193 | if (k < y.start) {
194 | x.start <- adj.start + 0.5
195 | } else {
196 | x.start <- adj.start - 0.5
197 | }
198 | if (k > y.end) {
199 | x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] - 0.5
200 | } else {
201 | x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] + 0.5
202 | }
203 | grid.lines(x = c(x.start, x.finis), y = c(k -0.5, k - 0.5),
204 | default.units = "native", gp=gpar(col = "grey", lwd = 1))
205 | }
206 | if (adj.start < 2) {
207 | grid.lines(x = c( 0.5, 0.5), y = c(6.5, y.start-0.5),
208 | default.units = "native", gp=gpar(col = "grey", lwd = 1))
209 | grid.lines(x = c(1.5, 1.5), y = c(6.5, -0.5), default.units = "native",
210 | gp=gpar(col = "grey", lwd = 1))
211 | grid.lines(x = c(x.finis, x.finis),
212 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
213 | gp=gpar(col = "grey", lwd = 1))
214 | if (dates.fsubs$dotw[dates.len] != 6) {
215 | grid.lines(x = c(x.finis + 1, x.finis + 1),
216 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
217 | gp=gpar(col = "grey", lwd = 1))
218 | }
219 | grid.lines(x = c(x.finis, x.finis),
220 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native",
221 | gp=gpar(col = "grey", lwd = 1))
222 | }
223 | for (n in 1:51) {
224 | grid.lines(x = c(n + 1.5, n + 1.5),
225 | y = c(-0.5, 6.5), default.units = "native", gp=gpar(col = "grey", lwd = 1))
226 | }
227 | x.start <- adj.start - 0.5
228 |
229 | if (y.start > 0) {
230 | grid.lines(x = c(x.start, x.start + 1),
231 | y = c(y.start - 0.5, y.start - 0.5), default.units = "native",
232 | gp=gpar(col = "black", lwd = 1.75))
233 | grid.lines(x = c(x.start + 1, x.start + 1),
234 | y = c(y.start - 0.5 , -0.5), default.units = "native",
235 | gp=gpar(col = "black", lwd = 1.75))
236 | grid.lines(x = c(x.start, x.start),
237 | y = c(y.start - 0.5, 6.5), default.units = "native",
238 | gp=gpar(col = "black", lwd = 1.75))
239 | if (y.end < 6 ) {
240 | grid.lines(x = c(x.start + 1, x.finis + 1),
241 | y = c(-0.5, -0.5), default.units = "native",
242 | gp=gpar(col = "black", lwd = 1.75))
243 | grid.lines(x = c(x.start, x.finis),
244 | y = c(6.5, 6.5), default.units = "native",
245 | gp=gpar(col = "black", lwd = 1.75))
246 | } else {
247 | grid.lines(x = c(x.start + 1, x.finis),
248 | y = c(-0.5, -0.5), default.units = "native",
249 | gp=gpar(col = "black", lwd = 1.75))
250 | grid.lines(x = c(x.start, x.finis),
251 | y = c(6.5, 6.5), default.units = "native",
252 | gp=gpar(col = "black", lwd = 1.75))
253 | }
254 | } else {
255 | grid.lines(x = c(x.start, x.start),
256 | y = c( - 0.5, 6.5), default.units = "native",
257 | gp=gpar(col = "black", lwd = 1.75))
258 | }
259 |
260 | if (y.start == 0 ) {
261 | if (y.end < 6 ) {
262 | grid.lines(x = c(x.start, x.finis + 1),
263 | y = c(-0.5, -0.5), default.units = "native",
264 | gp=gpar(col = "black", lwd = 1.75))
265 | grid.lines(x = c(x.start, x.finis),
266 | y = c(6.5, 6.5), default.units = "native",
267 | gp=gpar(col = "black", lwd = 1.75))
268 | } else {
269 | grid.lines(x = c(x.start + 1, x.finis),
270 | y = c(-0.5, -0.5), default.units = "native",
271 | gp=gpar(col = "black", lwd = 1.75))
272 | grid.lines(x = c(x.start, x.finis),
273 | y = c(6.5, 6.5), default.units = "native",
274 | gp=gpar(col = "black", lwd = 1.75))
275 | }
276 | }
277 | for (j in 1:12) {
278 | last.month <- max(dates.fsubs$seq[dates.fsubs$month == j])
279 | x.last.m <- dates.fsubs$woty[last.month] + 0.5
280 | y.last.m <- dates.fsubs$dotw[last.month] + 0.5
281 | grid.lines(x = c(x.last.m, x.last.m), y = c(-0.5, y.last.m),
282 | default.units = "native", gp=gpar(col = "black", lwd = 1.75))
283 | if ((y.last.m) < 6) {
284 | grid.lines(x = c(x.last.m, x.last.m - 1), y = c(y.last.m, y.last.m),
285 | default.units = "native", gp=gpar(col = "black", lwd = 1.75))
286 | grid.lines(x = c(x.last.m - 1, x.last.m - 1), y = c(y.last.m, 6.5),
287 | default.units = "native", gp=gpar(col = "black", lwd = 1.75))
288 | } else {
289 | grid.lines(x = c(x.last.m, x.last.m), y = c(- 0.5, 6.5),
290 | default.units = "native", gp=gpar(col = "black", lwd = 1.75))
291 | }
292 | }
293 | }
294 | }
295 | trellis.unfocus()
296 | }
297 | lattice.options(default.theme = def.theme)
298 | }
299 |
300 |
301 | ## Analysis for the Last Five Years
302 |
303 | subset_dc <- calendar_dead[year(Date) %in% 2012:2017]
304 | heatmap_calendar(subset_dc$Date, subset_dc$Victims, varname="Victims")
305 |
306 |
307 |
308 | calendar_deadly <- calendar_dead[year(Date) %in% (2012:2017-6)]
309 | heatmap_calendar(calendar_deadly$Date, calendar_deadly$Victims, varname="Victims")
310 |
--------------------------------------------------------------------------------
/Melbourne Housing Market.R:
--------------------------------------------------------------------------------
1 | # Melbourne Housing Market
2 |
3 | ## Importing the Libraries
4 | library(ggplot2)
5 | library(dplyr)
6 | library(plyr)
7 | library(scales)
8 | library(readr)
9 |
10 | ## Getting the Dataset
11 | housing <- read.csv('./Melbourne Housing.csv')
12 | head(housing)
13 | summary(housing)
14 |
15 | ## Checking for NA Values
16 | any(is.na(housing))
17 |
18 |
19 | housing_filter <- filter(housing,Price!="NA")
20 | n <- length(housing_filter$Price)
21 | maxprice<-sort(housing_filter$Price,partial=n-9)[n-9]
22 | minprice<-sort(housing_filter$Price,partial=10)[10]
23 | housing_maxfilter <- filter(housing_filter,Price>=maxprice)
24 | housing_minfilter <- filter(housing_filter,Price<=minprice)
25 |
26 | ## Exploratory Data Analysis
27 | ggplot(housing_maxfilter,aes(Suburb,Price))+geom_bar(stat = "identity") + scale_y_continuous(labels=comma)
28 | ggplot(housing_minfilter,aes(Suburb,Price))+geom_bar(stat = "identity") + scale_y_continuous(labels=comma)
29 | qplot(Price,data=housing_filter,geom = "freqpoly",bins=50) + scale_x_continuous(labels=comma)
30 |
--------------------------------------------------------------------------------
/NFL Draft.R:
--------------------------------------------------------------------------------
1 | ### NFL Draft
2 |
3 | # Load Libraries
4 | options(warn=-1)
5 | library(dplyr)
6 | library(ggplot2)
7 | library(repr)
8 |
9 | setwd('./Kaggle')
10 |
11 |
12 | # Read the Data
13 | draft <- read.csv('./NFL Draft.csv')
14 | head(draft)
15 | str(draft)
16 |
17 | # Checking for the NA Values
18 |
19 | any(is.na(draft))
20 |
21 | # Creating the Linear Regression
22 | draft <- draft %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit()
23 | fit.1 <- lm(DrAV ~ Pick, data=draft)
24 | fit.2 <- lm(DrAV ~ poly(Pick,2), data=draft)
25 | fit.3 <- lm(DrAV ~ poly(Pick,3), data=draft)
26 | fit.4 <- lm(DrAV ~ poly(Pick,4), data=draft)
27 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=draft)
28 | fit.6 <- lm(DrAV ~ poly(Pick,6), data=draft)
29 | fit.7 <- lm(DrAV ~ poly(Pick,7), data=draft)
30 |
31 | anova(fit.1, fit.2, fit.3, fit.4, fit.5, fit.6, fit.7)
32 |
33 | # Draft Pick Exploratory Analysis
34 |
35 | draft$y_hat <- predict(fit.5)
36 | group_by_pick <- draft %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame()
37 | options(repr.plot.width=4, repr.plot.height=3)
38 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point(color='blue')
39 |
40 |
41 | # Quarterback Draft Analysis
42 |
43 | qb <- read.csv("./Nfl Draft.csv")
44 | qb <- qb %>% filter(Position.Standard=='QB') %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit()
45 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb)
46 | new <- data.frame(Pick = seq_len(256))
47 | y_hat <- predict(fit.5, new, se.fit = TRUE)
48 | df <- data.frame(y_hat = matrix(unlist(y_hat)))
49 | qb <- inner_join(qb, df, by=)
50 | group_by_pick <- qb %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame()
51 | options(repr.plot.width=4, repr.plot.height=3)
52 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point()
53 |
54 |
55 | qb <- read.csv("./Nfl Draft.csv")
56 | qb <- filter(qb, Position.Standard=="QB")
57 | qb <- qb %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit()
58 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb)
59 | new <- data.frame(Pick = seq_len(256))
60 | y_hat <- predict(fit.5, new, se.fit = TRUE)
61 | df <- data.frame(y_hat = matrix(unlist(y_hat)))
62 | Predicted_AV <- df[seq(1,256),]
63 | df <- data.frame(Pick = new$Pick, Predicted_AV)
64 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='orange')
65 |
66 |
67 |
68 | # Running Backs in Draft
69 |
70 | rb <- read.csv("./Nfl Draft.csv")
71 | rb <- filter(rb, Position.Standard=="RB")
72 | rb <- qb_df %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit()
73 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=rb)
74 | new <- data.frame(Pick = seq_len(256))
75 | y_hat <- predict(fit.5, new, se.fit = TRUE)
76 | df <- data.frame(y_hat = matrix(unlist(y_hat)))
77 | Predicted_AV <- df[seq(1,256),]
78 | df <- data.frame(Pick = new$Pick, Predicted_AV)
79 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='red')
80 | head(df)
81 |
82 |
83 |
--------------------------------------------------------------------------------
/NFL Draft.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output:
3 | word_document: default
4 | html_document: default
5 | ---
6 | ### NFL Draft
7 |
8 | # Load Libraries
9 | ```{r}
10 | options(warn=-1)
11 | library(dplyr)
12 | library(ggplot2)
13 | library(repr)
14 | ```
15 |
16 | # Read the Data
17 | ```{r}
18 | draft <- read.csv('./NFL Draft.csv')
19 | head(draft)
20 | str(draft)
21 | ```
22 | # Checking for the NA Values
23 |
24 | ```{r}
25 | any(is.na(draft))
26 | ```
27 |
28 | # Creating the Linear Regression
29 |
30 | ```{r}
31 | draft <- draft %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit()
32 | fit.1 <- lm(DrAV ~ Pick, data=draft)
33 | fit.2 <- lm(DrAV ~ poly(Pick,2), data=draft)
34 | fit.3 <- lm(DrAV ~ poly(Pick,3), data=draft)
35 | fit.4 <- lm(DrAV ~ poly(Pick,4), data=draft)
36 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=draft)
37 | fit.6 <- lm(DrAV ~ poly(Pick,6), data=draft)
38 | fit.7 <- lm(DrAV ~ poly(Pick,7), data=draft)
39 |
40 | anova(fit.1, fit.2, fit.3, fit.4, fit.5, fit.6, fit.7)
41 | ```
42 |
43 | Created a linear model to fit all seven rounds of the NFL Draft for predictions with the modern draft.
44 |
45 | # Draft Pick Exploratory Analysis
46 |
47 |
48 | ```{r}
49 | draft$y_hat <- predict(fit.5)
50 | group_by_pick <- draft %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame()
51 | options(repr.plot.width=4, repr.plot.height=3)
52 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point(color='blue')
53 | ```
54 |
55 | # Quarterback Draft Analysis
56 |
57 |
58 |
59 | ```{r}
60 | qb <- read.csv("./Nfl Draft.csv")
61 | qb <- filter(qb, Position.Standard=="QB")
62 | qb <- qb %>%
63 | select(Pick, DrAV) %>%
64 | filter(Pick<257) %>%
65 | na.omit()
66 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb)
67 | new <- data.frame(Pick = seq_len(256))
68 | y_hat <- predict(fit.5, new, se.fit = TRUE)
69 | df <- data.frame(y_hat = matrix(unlist(y_hat)))
70 | Predicted_AV <- df[seq(1,256),]
71 | df <- data.frame(Pick = new$Pick, Predicted_AV)
72 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='orange')
73 | ```
74 |
75 |
76 | # Running Backs in Draft
77 |
78 | ```{r}
79 | rb <- read.csv("./Nfl Draft.csv")
80 | rb <- filter(rb, Position.Standard=="RB")
81 | rb <- rb %>%
82 | select(Pick, DrAV) %>%
83 | filter(Pick<257) %>%
84 | na.omit()
85 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=rb)
86 | new <- data.frame(Pick = seq_len(256))
87 | y_hat <- predict(fit.5, new, se.fit = TRUE)
88 | df <- data.frame(y_hat = matrix(unlist(y_hat)))
89 | Predicted_AV <- df[seq(1,256),]
90 | df <- data.frame(Pick = new$Pick, Predicted_AV)
91 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='red')
92 | head(df)
93 | ```
94 |
95 |
--------------------------------------------------------------------------------
/NFL_Draft.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/NFL_Draft.pdf
--------------------------------------------------------------------------------
/NYSE/NYSE.R:
--------------------------------------------------------------------------------
1 | # NYSE Analysis
2 |
3 | ##Changing the Working Directory
4 |
5 | setwd('./Kaggle/NYSE')
6 |
7 | ## Loading the Libraries
8 |
9 | library(Quandl)
10 | library(ggplot2)
11 | library(readr)
12 | library(dplyr)
13 | library(quantmod)
14 |
15 | ## Reading the Data
16 |
17 | nyse<-read.table("./prices.csv",header = TRUE,sep=",") #importing data into R
18 | head(nyse) #finding structure of the stock
19 | unique(nyse$symbol) #found it has 501 unique stocks
20 |
21 |
22 | ## Picking Apple
23 |
24 | aapl <- subset(nyse,symbol=="AAPL") #extracting only Apple
25 |
26 | aapl[1:3,]
27 | close<-aapl$close #creating vector called close
28 | close[1:3]
29 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of Apple Stock") #plotting a line chart
30 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return
31 | plot(returns,type = "l",xlab = "AAPL",main="Plotting daily returns of AAPL Stock") #plotting daily return chart
32 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation
33 | ohlc <- aapl[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart
34 | head(ohlc)
35 |
36 |
37 |
38 | ## Picking RL
39 |
40 | RL <- subset(nyse,symbol=="RL") #extracting only Ralph Lauren
41 |
42 | RL[1:3,]
43 | close <- RL$close #creating vector called close
44 | close[1:3]
45 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of RL Stock") #plotting a line chart
46 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return
47 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of RL Stock") #plotting daily return chart
48 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation
49 | ohlc <- RL[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart
50 | head(ohlc)
51 |
52 |
53 | ## Picking FB
54 |
55 |
56 | FB <- subset(nyse,symbol=="FB") #extracting only Facebook
57 |
58 | FB[1:3,]
59 | close <- FB$close #creating vector called close
60 | close[1:3]
61 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of FB Stock") #plotting a line chart
62 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return
63 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of FB Stock") #plotting daily return chart
64 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation
65 | ohlc <- FB[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart
66 | head(ohlc)
67 |
68 |
69 | ## Picking PG
70 |
71 |
72 | PG <- subset(nyse,symbol=="PG") #extracting only PG
73 |
74 | PG[1:3,]
75 | close <- PG$close #creating vector called close
76 | close[1:3]
77 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of PG Stock") #plotting a line chart
78 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return
79 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of PG Stock") #plotting daily return chart
80 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation
81 | ohlc <- PG[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart
82 | head(ohlc)
83 |
84 |
85 |
--------------------------------------------------------------------------------
/Norwegian Development Funds.R:
--------------------------------------------------------------------------------
1 | #### Norweigian Development Funds
2 |
3 |
4 | # Change Working Directory
5 | setwd('./Kaggle')
6 |
7 |
8 | # Load the Libraries
9 |
10 | library(readr)
11 | library(ggplot2)
12 | library(tidyverse)
13 | library(dplyr)
14 |
15 |
16 | # Getting the Data
17 | ndf <- read_csv("./funds.csv")
18 | head(ndf)
19 | summary(ndf)
20 |
21 | # Checking for NA Values
22 | any(is.na(ndf))
23 |
24 | # Cleaning the Data
25 | names(ndf) <- make.names(names(ndf))
26 | ndf[, grep("NA.", colnames(ndf))] <- NULL
27 |
28 | # Exploratory Data Analysis
29 |
30 | ndf %>% group_by(Recipient.Region, Year) %>%
31 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% ungroup() %>%
32 | ggplot(aes(x=Year, y=Disbursements)) +
33 | geom_bar(stat = "identity",aes(fill=Recipient.Region)) +
34 | facet_wrap(~Recipient.Region)
35 |
36 | # Middle East is slowly picking up, Asia is on decline.
37 | # Interesting to see the same chart in terms of size of the contract.
38 | # Are certain regions getting bigger "support packages" then others?
39 | # Are there material differences in geographical vs non-geographical contracts?
40 | # Lets look at the quantity of those contracts by country.
41 |
42 | ndf%>% group_by(Recipient.Region, Year) %>%
43 | summarise(Mean_Disbursement=mean(Disbursements..1000...)/1000) %>%
44 | ungroup() %>%
45 | ggplot(aes(x=Year, y=Mean_Disbursement)) +
46 | geom_bar(stat = "identity",aes(fill=Year)) + facet_wrap(~Recipient.Region)
47 |
48 |
49 | # Non Geographical-Projects
50 | ndf%>% dplyr::filter(Recipient.Region=="Not geographically allocated") %>%
51 | group_by(Main.Sector) %>%
52 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>%
53 | ungroup() %>%
54 | ggplot(aes(x=Main.Sector, y=Disbursements)) +
55 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip()
56 |
57 | # It has been determined that administration costs are very high up for the
58 | # disbursements.
59 |
60 |
61 | ndf %>%
62 | dplyr::filter(grepl("910 - Administration", Main.Sector)) %>%
63 | group_by(Budget.Post..Chapter) %>%
64 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>%
65 | ungroup() %>%
66 | ggplot(aes(x=Budget.Post..Chapter, y=Disbursements)) +
67 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip()
68 |
69 |
--------------------------------------------------------------------------------
/Norwegian Development Funds.Rmd:
--------------------------------------------------------------------------------
1 | #### Norweigian Development Funds
2 |
3 |
4 | # Load the Libraries
5 |
6 | ```{r}
7 | library(readr)
8 | library(ggplot2)
9 | library(tidyverse)
10 | library(dplyr)
11 | ```
12 |
13 | # Getting the Data
14 |
15 | ```{r}
16 | ndf <- read_csv("./funds.csv")
17 | head(ndf)
18 | summary(ndf)
19 | ```
20 |
21 |
22 | # Checking for NA Values
23 |
24 | ```{r}
25 | any(is.na(ndf))
26 | ```
27 |
28 | # Cleaning the Data
29 |
30 | ```{r}
31 | names(ndf) <- make.names(names(ndf))
32 | ndf[, grep("NA.", colnames(ndf))] <- NULL
33 | ```
34 |
35 |
36 | # Exploratory Data Analysis
37 |
38 |
39 | ```{r}
40 | ndf %>% group_by(Recipient.Region, Year) %>%
41 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% ungroup() %>%
42 | ggplot(aes(x=Year, y=Disbursements)) +
43 | geom_bar(stat = "identity",aes(fill=Recipient.Region)) +
44 | facet_wrap(~Recipient.Region)
45 | ```
46 |
47 | Middle East is slowly picking up, Asia is on decline. Interesting to see the same chart in terms of size of the contract. Are certain regions getting bigger "support packages" then others? Are there material differences in geographical vs non-geographical contracts? Lets look at the quantity of those contracts by country.
48 |
49 | ```{r}
50 | ndf%>% group_by(Recipient.Region, Year) %>%
51 | summarise(Mean_Disbursement=mean(Disbursements..1000...)/1000) %>%
52 | ungroup() %>%
53 | ggplot(aes(x=Year, y=Mean_Disbursement)) +
54 | geom_bar(stat = "identity",aes(fill=Year)) + facet_wrap(~Recipient.Region)
55 | ```
56 |
57 | # Non Geographical-Projects
58 |
59 | ```{r}
60 | ndf%>% dplyr::filter(Recipient.Region=="Not geographically allocated") %>%
61 | group_by(Main.Sector) %>%
62 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>%
63 | ungroup() %>%
64 | ggplot(aes(x=Main.Sector, y=Disbursements)) +
65 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip()
66 | ```
67 |
68 | It has been determined that administration costs are very high up for the disbursements.
69 |
70 | ```{r}
71 | ndf %>%
72 | dplyr::filter(grepl("910 - Administration", Main.Sector)) %>%
73 | group_by(Budget.Post..Chapter) %>%
74 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>%
75 | ungroup() %>%
76 | ggplot(aes(x=Budget.Post..Chapter, y=Disbursements)) +
77 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip()
78 | ```
79 |
--------------------------------------------------------------------------------
/NorwegianDevelopmentFunds.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/NorwegianDevelopmentFunds.pdf
--------------------------------------------------------------------------------
/Pokemon Mining/Pokemon Data Mining.R:
--------------------------------------------------------------------------------
1 | # Pokemon Data Mining
2 |
3 | ## Loading the Libraries
4 |
5 | library(ggplot2)
6 | library(dplyr)
7 | library(gridExtra)
8 | library(fmsb)
9 | library(corrplot)
10 | library(corrgram)
11 | library(caTools)
12 | library(gplots)
13 | library(RColorBrewer)
14 |
15 | ## Changing the Working Directory
16 |
17 | setwd('./Kaggle/Pokemon Mining')
18 |
19 | ## Reading the Dataset and Grouping the Data
20 |
21 | pokmon <-read.csv('./pokemon_alopez247.csv',sep=',')
22 |
23 | ## Filter by Type and Grouping the Data Together
24 |
25 | group <- pokmon %>%
26 | filter(hasGender=='True') %>%
27 | group_by(Body_Style) %>%
28 | select(HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Pr_Male,Height_m, Weight_kg, Catch_Rate) %>%
29 | summarise(avgHP = mean(Attack),avgDefense = mean(Defense),avgSPAttack = mean(Sp_Atk),avgSPDef = mean(Sp_Def),avgProbMale = mean(Pr_Male),avgHeight = mean(Height_m),avgWeight = mean(Weight_kg),avgCatch = mean(Catch_Rate))
30 |
31 |
32 | ## Creating the Radar Plots
33 |
34 | max <- c(100,100,100,100,1,3,100,200)
35 | min <- rep(0,8)
36 |
37 |
38 | par(mfrow=c(4,4))
39 | par(mar=c(1,1,1,1))
40 | for(i in 1:nrow(group)){
41 | radarchart(rbind(max,min,group[i,2:9]), axistype=2 ,
42 | pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) ,
43 | plwd=4 , cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,2000,5),
44 | cglwd=0.8, vlcex=0.6 ,title=as.character(group$Body_Style[i]))
45 | }
46 |
47 |
48 | ## Creating a Heatmap
49 |
50 | grouped <-as.data.frame(group)
51 | row.names(grouped) <- grouped$Body_Style
52 | grouped <- grouped[,2:9]
53 | group_matrix <- data.matrix(grouped)
54 | heatmap.2(group_matrix, Rowv=FALSE, Colv=FALSE, dendrogram='none', cellnote=round(group_matrix,digits=2), notecol="black", trace='none', key=FALSE,lwid = c(.01,.99),lhei = c(.01,.99),margins = c(8,16))
55 |
56 |
57 | ## Racetrack Plot
58 |
59 | #select the average of Total (sum of all characteristics) and reorder the result
60 |
61 | first <- pokmon %>%
62 | filter(hasGender=='True') %>%
63 | group_by(Body_Style) %>%
64 | select(Total) %>%
65 | summarise(avgTotal = mean(Total))
66 |
67 | first%>%
68 | arrange(desc(avgTotal))
69 |
70 |
71 | first$ReorderedBody <- reorder(first$Body_Style, first$avgTotal)
72 |
73 | ## Creating a Color Palette
74 |
75 | colorCount = length(unique(group$Body_Style))
76 | getPalette = colorRampPalette(brewer.pal(9, "Set1"))
77 |
78 | ## Defining Each Color Label for Each Bar
79 |
80 | first$LABEL <-paste0(round(first$avgTotal))
81 | ggplot(first, aes(x=ReorderedBody, y=avgTotal, fill=factor(ReorderedBody))) +
82 | geom_bar(width = 0.9, stat="identity") +
83 | scale_fill_manual(values = getPalette(colorCount)) +
84 | coord_polar(theta = "y") +
85 | xlab("") +
86 | ylab("") +
87 | ylim(c(0,max(first$avgTotal))) +
88 | ggtitle("Average Total per Body Style") +
89 | geom_text(data = first, hjust = 1, size = 3, aes(x = Body_Style, y = 0, label = LABEL)) +
90 | theme_minimal() +
91 | guides(fill=guide_legend(title=NULL,reverse=TRUE)) +
92 | theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5))
93 |
94 |
95 |
96 | ## Mean/Median Comparison
97 |
98 | Median <- pokmon %>%
99 | filter(hasGender=='True') %>%
100 | group_by(Body_Style) %>%
101 | select(HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Pr_Male,Height_m, Weight_kg, Catch_Rate) %>%
102 | summarise(medHP = median(Attack),medDefense = median(Defense),medSPAttack = median(Sp_Atk),medSPDef = median(Sp_Def),medProbMale = median(Pr_Male),medHeight = median(Height_m),medWeight = median(Weight_kg),medCatch = median(Catch_Rate))
103 |
104 |
105 | Mean<-rbind(max,min,group[1,2:9])
106 | byMedian<-rbind(max,min,Median[1,2:9])
107 | op <- par(mar=c(1, 2, 2, 1),mfrow=c(1, 2))
108 | radarchart(Mean ,vlcex=.6, title = "Bipedal_tailed : mean")
109 | radarchart(byMedian ,vlcex=.6,title = "Bipedal_tailed : median")
110 |
111 |
112 | g1<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Pr_Male)) + geom_histogram(bins=100)
113 |
114 | g2<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Catch_Rate)) + geom_histogram(bins=100)
115 |
116 | g3<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Attack)) + geom_histogram(bins=100)
117 |
118 | grid.arrange(g1,g2,g3,ncol=3)
119 |
120 |
121 | ## Creating a Linear Model (Correlation with Numeric Variables)
122 |
123 | pokemon <-pokmon %>%
124 | filter(hasGender=='True' & Body_Style=='bipedal_tailed') %>%
125 | select(-Number)
126 | num.cols <- sapply(pokemon, is.numeric)
127 | cor.data <- cor(pokemon[,num.cols])
128 | corrPLOT<-corrplot(cor.data,method='ellipse')
129 |
130 |
131 | ## Linear Model (3)
132 |
133 | pokemon1 <-pokmon %>%
134 | filter(hasGender=='True') %>%
135 | select(-Generation)
136 |
137 | ## Split data into Training/Testing
138 |
139 | set.seed(1562)
140 | split<-sample.split(pokemon1$Number,SplitRatio=.7)
141 | train<-subset(pokemon1,split==T)
142 | test<-subset(pokemon1,split==F)
143 |
144 | ## Summarize a Given Model
145 |
146 | plotRes <- function(mod){
147 | print(mod)
148 | summary(mod)
149 | #create DF with prediction and real values
150 | mod.predictions <- predict(mod,test)
151 | mod.res<- cbind(mod.predictions,test$Catch_Rate)
152 | colnames(mod.res) <- c('pred','real')
153 | mod.res <- as.data.frame(mod.res)
154 | #make plots of residuals,etc...
155 | g1 <- ggplot(data=mod.res,aes(x=pred,y=real)) + geom_point() + geom_abline(intercept = 0, slope = 1, color="red")
156 | g2 <- ggplot(data=mod.res,aes(x=real-pred)) + geom_histogram(bins=100)
157 | g3 <- ggplot(data=mod.res,aes(x=pred,y=real-pred)) + geom_point()
158 | grid.arrange(g1,g2,g3,nrow=2, ncol=2)
159 | #calculate metrics
160 | mse <- mean((mod.res$real-mod.res$pred)^2)
161 | rmse<-mse^0.5
162 | SSE = sum((mod.res$pred - mod.res$real)^2)
163 | SST = sum( (mean(test$Catch_Rate) - mod.res$real)^2)
164 | R2 = 1 - SSE/SST
165 | sprintf("MSE: %f RMSE : %f R2 :%f", mse,rmse,R2)
166 | }
167 |
168 | # Linear Model
169 |
170 | linModel<-lm(Catch_Rate ~ HP + Attack + Defense + Sp_Atk + Sp_Def + Speed + Pr_Male + Height_m + Weight_kg, train)
171 | plotRes(linModel)
172 |
173 |
--------------------------------------------------------------------------------
/Pokemon Mining/final_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Pokemon Mining/final_model.pkl
--------------------------------------------------------------------------------
/Pokemon Mining/report_Pokemon.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Pokemon Mining/report_Pokemon.pdf
--------------------------------------------------------------------------------
/Pokemon.R:
--------------------------------------------------------------------------------
1 | # Pokemon
2 |
3 | ## In this report, we are going to analyze the different types of Pokemon to check on types and frequencies.
4 |
5 |
6 |
7 | # Checking Frequencies
8 | setwd("./Kaggle")
9 | pokemon <- read.csv('./Pokemon.csv', header = T)
10 | pokemon$Name <- as.character(pokemon$Name)
11 | rev(sort(table(pokemon$Type.1)))
12 | rev(sort(table(pokemon$Type.2)))
13 |
14 | library(ggplot2)
15 | library(ggthemes)
16 | library(corrplot)
17 | library(reshape2)
18 |
19 | # In this section, we are going to generate a linear model to determine which Pokemon is the strongest in combination.
20 |
21 |
22 | colnames(pokemon) <- c("number", "name", "type1", "type2", "total", "hp",
23 | "attack", "defense", "sp.atk", "sp.def", "speed",
24 | "generation", "legendary")
25 | head(pokemon)
26 | poke <- lm(total ~ hp + attack + defense + sp.atk + sp.def + speed, pokemon)
27 | par(mfrow = c(2,2))
28 | plot(poke)
29 |
30 |
31 | pokemon[c(1,3,6), 2]
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Side-Projects-For-Fun
2 |
3 | The purpose is to draw conclusions or analyses in order to utilize better decision-making or analyses based on facts pulled from the datasets.
4 |
5 | The datasets are going to be varied from different websites that is given.
6 |
7 | It will consist of different programming languages.
8 |
9 | This repository consists of data that analyzes the following:
10 |
11 | Affordable Care Act
12 |
13 | To perform analyses on which US states that are uninsured during the years of 2010 and 2015.
14 |
15 | Education Data
16 |
17 | Performing data analyses in order to determine which gender performs well, takes advantage of resources, making predictions on which
18 | courses are the most common, and utilize models to predict which one is more accurate based on decision trees, SVM, or confusion matrix.
19 |
20 | Fake News
21 |
22 | Using both Python and R to determine the analyses on Fake News in order to predict which type of news it is by
23 | running the decisiontree model in R.
24 | The analyses is also done in Python to generate the accuracy of the random forest model.
25 |
26 | Global Land Temperatures in Oakland and San Francisco
27 |
28 | Using R to predict the differences with the temperatures around these two cities in comparision to the given data back in the early
29 | history. The analyses involved with converting the latitude and longitude, graphing the average monthly temperature on a celsius
30 | perspective, uncertainty between today and 100 years ago, and creating a random forest model for both cities.
31 |
32 | NFL Draft
33 |
34 | Utilized R to predict the performance of draft picks over the last 30 years in order to see which rounds and picks that has performed
35 | the best in their careers. The analysis is done according to the information on the Quarterback, and Running Backs at that time.
36 |
37 | Y Combinator
38 |
39 | Y combinator is a venture capital organization that provides seed money for potential start-ups. Using Python to predict on which
40 | organizations and industries that they would invest in.
41 |
--------------------------------------------------------------------------------
/Speed Dating Data.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Speed Dating Data.csv
--------------------------------------------------------------------------------
/Welfare/SNAPerror.csv:
--------------------------------------------------------------------------------
1 | ST,Error
2 | CT,5.84
3 | ME,2.52
4 | MA,5.09
5 | NH,4.81
6 | NY,5.23
7 | RI,5.97
8 | VT,2.76
9 | DE,2.78
10 | MD,3.41
11 | NJ,1.43
12 | PA,4.27
13 | VA,4.73
14 | WV,4.9
15 | AL,2.03
16 | FL,0.42
17 | GA,6.49
18 | KY,6
19 | MI,1.16
20 | NC,4.98
21 | SC,1.09
22 | TN,1.08
23 | IL,5.27
24 | IN,4.76
25 | MI,2.99
26 | MN,6.87
27 | OH,4.67
28 | WI,2.55
29 | AR,5.58
30 | LA,1.55
31 | NM,6.22
32 | OK,5.58
33 | TX,0.63
34 | CO,4.26
35 | IA,4.6
36 | KS,0.75
37 | MO,1.5
38 | MT,7.25
39 | NB,2.98
40 | ND,1.73
41 | SD,1.26
42 | UT,2.79
43 | WY,5.19
44 | AK,0.89
45 | AZ,5.18
46 | CA,5.13
47 | HI,4.13
48 | ID,2.74
49 | NV,7.61
50 | OR,5.11
51 | WA,0.77
52 |
--------------------------------------------------------------------------------
/Welfare/UIerror.csv:
--------------------------------------------------------------------------------
1 | ST,Sample,Amount,Over,Under,Improper,BYE,Fraud,Agency,Work
2 | CA,"1,715","$16,956,798,074 ",6.93%,0.39%,7.32%,3.91%,4.91%,0.86%,0.13%
3 | TX,"1,454","$7,498,430,914 ",10.11%,0.24%,10.35%,2.03%,0.73%,1.56%,5.28%
4 | PA,"1,538","$7,133,177,891 ",9.23%,0.28%,9.51%,5.40%,6.19%,1.40%,0.00%
5 | NY,"1,445","$7,117,763,161 ",8.44%,0.32%,8.76%,1.98%,4.72%,1.81%,0.90%
6 | NJ,"1,452","$6,421,190,104 ",14.91%,1.59%,16.50%,3.34%,0.63%,1.98%,5.90%
7 | IL,"1,468","$5,584,275,026 ",14.09%,0.52%,14.61%,4.94%,1.53%,0.85%,7.60%
8 | MA,"1,540","$4,186,212,294 ",11.25%,0.56%,11.81%,4.94%,2.76%,2.57%,4.40%
9 | OH,"1,445","$3,024,921,930 ",8.91%,0.66%,9.57%,2.30%,1.59%,2.22%,3.46%
10 | WA,"1,248","$3,017,584,189 ",13.33%,0.23%,13.56%,1.98%,1.54%,0.60%,8.61%
11 | MI,"1,440","$2,500,031,867 ",22.60%,0.45%,23.05%,3.18%,2.42%,5.69%,17.23%
12 | MN,"1,463","$2,403,053,981 ",7.49%,0.34%,7.82%,5.08%,3.08%,0.60%,0.00%
13 | CT,"1,469","$2,102,220,297 ",6.48%,0.17%,6.65%,1.75%,2.06%,1.27%,2.62%
14 | MD,"1,491","$1,951,584,741 ",18.81%,0.19%,19.00%,5.01%,2.80%,1.59%,11.60%
15 | WI,"1,521","$1,907,839,572 ",19.96%,0.40%,20.35%,4.92%,3.83%,2.24%,12.31%
16 | OR,"1,448","$1,628,033,954 ",11.52%,0.67%,12.18%,2.22%,7.01%,2.20%,5.67%
17 | CO,"1,460","$1,563,741,964 ",10.81%,0.69%,11.50%,3.75%,1.48%,2.11%,2.55%
18 | GA,"1,440","$1,451,640,663 ",7.85%,0.00%,7.85%,2.08%,1.55%,1.01%,4.97%
19 | VA,"1,464","$1,429,554,781 ",7.46%,0.21%,7.67%,2.45%,1.32%,1.48%,2.60%
20 | NC,"1,558","$1,303,128,966 ",16.21%,0.62%,16.83%,4.07%,3.35%,1.71%,10.19%
21 | IA,"1,440","$1,219,787,168 ",8.96%,0.50%,9.46%,3.86%,0.93%,1.69%,0.28%
22 | IN,"1,446","$1,156,473,354 ",9.37%,0.10%,9.47%,3.62%,1.01%,2.62%,0.15%
23 | KY,"1,525","$1,095,889,549 ",11.26%,0.36%,11.63%,3.58%,3.39%,4.20%,3.14%
24 | MO,"1,440","$1,075,249,585 ",7.20%,0.11%,7.31%,3.41%,3.37%,0.67%,1.83%
25 | NV,"1,474","$1,046,096,955 ",24.63%,0.41%,25.04%,5.63%,3.92%,1.73%,15.07%
26 | OK,"1,449","$896,136,361 ",3.96%,0.23%,4.19%,2.00%,0.76%,0.98%,0.67%
27 | AZ,"1,441","$885,731,513 ",11.13%,0.06%,11.19%,3.92%,4.56%,2.35%,2.59%
28 | TN,"1,437","$868,021,609 ",22.32%,0.32%,22.64%,3.81%,3.05%,6.12%,12.88%
29 | KS,"1,442","$738,353,820 ",17.00%,0.13%,17.13%,3.56%,2.60%,2.58%,8.51%
30 | AR,"1,441","$736,933,973 ",9.73%,0.46%,10.19%,4.82%,5.28%,1.51%,0.00%
31 | AL,"1,443","$697,834,597 ",6.99%,0.13%,7.13%,3.22%,2.16%,0.77%,2.03%
32 | WV,"1,448","$685,370,018 ",3.65%,0.43%,4.08%,2.09%,1.08%,0.50%,0.20%
33 | FL,600,"$681,007,973 ",6.41%,0.03%,6.44%,2.33%,1.26%,1.95%,1.52%
34 | SC,"1,524","$601,798,820 ",14.20%,0.24%,14.44%,6.46%,4.84%,1.36%,7.94%
35 | NM,"1,484","$560,943,955 ",18.79%,0.35%,19.13%,3.85%,2.72%,3.05%,13.56%
36 | LA,"1,449","$540,247,945 ",9.83%,0.27%,10.10%,5.37%,4.07%,3.26%,0.10%
37 | UT,"1,445","$539,019,184 ",6.50%,0.24%,6.74%,1.58%,1.48%,0.62%,2.65%
38 | HI,"1,084","$531,658,743 ",3.48%,0.15%,3.63%,1.46%,0.96%,0.32%,0.49%
39 | RI,"1,446","$523,060,078 ",12.70%,0.43%,13.12%,2.66%,5.28%,1.94%,7.43%
40 | PR,"1,456","$494,880,795 ",8.07%,0.68%,8.74%,4.40%,2.68%,3.39%,0.10%
41 | AK,"1,444","$411,994,099 ",9.14%,0.35%,9.49%,3.00%,1.87%,0.84%,2.72%
42 | ND,"1,082","$407,266,907 ",15.45%,0.26%,15.71%,1.73%,0.38%,1.07%,10.01%
43 | ME,"1,447","$396,544,911 ",18.06%,0.34%,18.40%,2.56%,1.09%,1.97%,13.65%
44 | DC,"1,092","$392,138,796 ",14.04%,0.38%,14.41%,9.71%,3.83%,2.35%,1.69%
45 | MS,"1,505","$359,396,023 ",9.17%,0.28%,9.45%,4.46%,4.62%,2.25%,1.06%
46 | MT,"1,082","$324,791,812 ",8.41%,0.39%,8.80%,2.26%,1.63%,2.28%,3.44%
47 | ID,"1,457","$293,251,194 ",12.26%,0.32%,12.59%,2.95%,4.83%,1.48%,5.75%
48 | DE,"1,080","$255,174,354 ",12.60%,0.58%,13.17%,2.73%,2.67%,3.54%,5.29%
49 | NE,"1,081","$254,806,809 ",14.31%,0.30%,14.61%,3.62%,0.95%,2.36%,6.22%
50 | WY,"1,080","$251,902,817 ",11.82%,0.06%,11.88%,1.35%,1.62%,2.01%,5.12%
51 | VT,"1,081","$227,535,295 ",6.44%,0.47%,6.90%,2.19%,3.68%,0.36%,1.47%
52 | NH,"1,087","$200,260,946 ",6.17%,0.53%,6.70%,2.18%,0.90%,1.28%,0.74%
53 | SD,"1,080","$81,500,590 ",9.86%,0.12%,9.97%,1.65%,3.66%,2.05%,6.76%
54 |
--------------------------------------------------------------------------------
/Welfare/Welfare.R:
--------------------------------------------------------------------------------
1 | # Welfare
2 |
3 |
4 | ### Loading the Libraries
5 | library(tidyverse)
6 | library(readr)
7 | library(ggmap)
8 | library(highcharter)
9 | data(usgeojson)
10 |
11 |
12 | ### Setting the Working Directory
13 |
14 | setwd('./Kaggle/Welfare')
15 |
16 | ## Reading the Libraries
17 | welfare <- read.csv('./UIerror.csv')
18 | snap <- read.csv('./SNAPerror.csv')
19 | welfare< - welfare %>%mutate(Fraud= as.numeric(unlist(strsplit(welfare$Fraud,'%'))))
20 |
21 | ## Reading NA
22 |
23 | any(is.na(welfare))
24 | any(is.na(snap))
25 |
26 | ## Welfare Fraud By State
27 |
28 | highchart() %>%
29 | hc_title(text = "Welfare Fraud Rate by State", align= "right") %>%
30 | hc_add_series_map(usgeojson, df = welfare,
31 | value = "Fraud", joinBy =c("postalcode","ST")) %>%
32 | hc_mapNavigation(enabled = TRUE) %>%
33 | hc_add_theme(hc_theme_538())%>%
34 | hc_credits(enabled = TRUE, text = "https://www.dol.gov/general/maps/data",
35 | href = "https://www.dol.gov/general/maps/data")
36 |
37 |
38 | highchart() %>%
39 | hc_title(text = "SNAP Payment Error Rate", align="right") %>%
40 | hc_add_series_map(usgeojson, df =snap,
41 | value = "Error", joinBy =c("postalcode","ST")) %>%
42 | hc_mapNavigation(enabled = TRUE) %>%
43 | hc_add_theme(hc_theme_538()) %>%
44 | hc_credits(enabled = TRUE, text = "https://www.fns.usda.gov/sites/default/files/snap/2014-rates.pdf",
45 | href = "https://www.fns.usda.gov/sites/default/files/snap/2014-rates.pdf")
--------------------------------------------------------------------------------
/World Food Facts.R:
--------------------------------------------------------------------------------
1 | ### World Food Facts
2 |
3 |
4 | setwd('./Kaggle')
5 |
6 | # Load Libraries:
7 |
8 | library(ggplot2)
9 | library(dplyr)
10 | library(data.table)
11 | library(caret)
12 | library(corrplot)
13 |
14 | # Reading the Data
15 | food = read.csv('./FoodFacts.csv')
16 | head(food)
17 | summary(food)
18 |
19 | # Checking for NA Values
20 | any(is.na(food))
21 |
22 | ## Cleaning the Data
23 |
24 | # Meat and Vegan
25 |
26 | meat = grep("meat|Meat", food$categories_en)
27 | vegan = grep("vegan|Vegan", food$labels)
28 |
29 |
30 | # New Dataframe for Countries and Products
31 |
32 | Countries = as.data.frame(table(food$countries_en))
33 | Countries = Countries[-1,]
34 |
35 | # Countries With the Highest Value
36 |
37 | Top = head(Countries[order(-Countries$Freq),],10)
38 | Top = droplevels(Top)
39 |
40 |
41 | # Adding Values to the Countries
42 |
43 | i = 1
44 |
45 | while (i < length(Top$Var1)+1) {
46 |
47 | Country = grep(Top[i,1], Countries$Var1)
48 | Country_totals = sum(Countries$Freq[Country])
49 | Top[i,2] = Country_totals
50 | i=i+1
51 | }
52 |
53 | # Plotting the Number of Products Per Country
54 | barplot(Top$Freq, names.arg = Top$Var1, col = "lavender", main = "Number of Products by Country"
55 | , ylab = "Counts", las=1)
56 |
57 | # New Dataframe for the Meat
58 |
59 | Countries_meat = as.data.frame(table(food$countries_en[meat]))
60 | Countries_meat = Countries_meat[-1,]
61 |
62 | # Countries With the Highest Meat Products
63 | Top_meat = head(Countries_meat[order(-Countries_meat$Freq),],10)
64 | Top_meat = Top_meat[-grep(",", Top_meat$Var1),] #Get rid of any rows that are a combination of countries by looking for a comma
65 | Top_meat = droplevels(Top_meat)
66 |
67 |
68 | ## Combining the Dataset
69 |
70 | i = 1
71 |
72 | while (i < length(Top_meat$Var1)+1) {
73 |
74 | Country = grep(Top[i,1], Countries_meat$Var1)
75 | Country_totals = sum(Countries_meat$Freq[Country])
76 | Top_meat[i,2] = Country_totals
77 | i=i+1
78 | }
79 |
80 | # New Dataframe for Vegan Products
81 |
82 | Countries_vegan = as.data.frame(table(food$countries_en[vegan]))
83 | Countries_vegan = Countries_vegan[-1,]
84 |
85 |
86 | Top_vegan = head(Countries_vegan[order(-Countries_vegan$Freq),],10)
87 | Top_vegan = Top_vegan[-grep(",", Top_vegan$Var1),] #Get rid of any rows that are a combination of countries by looking for a comma
88 | Top_vegan = droplevels(Top_vegan)
89 |
90 |
91 | i = 1
92 |
93 | while (i < length(Top_vegan$Var1)+1) {
94 |
95 | Country = grep(Top_vegan[i,1], Countries_vegan$Var1)
96 | Country_totals = sum(Countries_vegan$Freq[Country])
97 | Top_vegan[i,2] = Country_totals
98 | i=i+1
99 | }
100 |
101 |
102 | rm(food)
103 |
104 |
105 | #Change column names,
106 | colnames(Top) = c("Country", "Count")
107 | colnames(Top_meat) = c("Country", "Count")
108 | colnames(Top_vegan) = c("Country", "Count")
109 |
110 |
111 | #Do some merging to get overall results,
112 | Results_meat = merge(Top, Top_meat, by = "Country")
113 | Results_vegan = merge(Top, Top_vegan, by = "Country")
114 | colnames(Results_meat) = c("Country", "Total no. of products", "No. of meat products")
115 | colnames(Results_vegan) = c("Country", "Total no. of products", "No. of vegan products")
116 |
117 | # % for Each Country for Meat Products
118 | Results_meat$MeatPerc = Results_meat$`No. of meat products` / Results_meat$`Total no. of products` * 100
119 | Results_vegan$VeganPerc = Results_vegan$`No. of vegan products` / Results_vegan$`Total no. of products` * 100
120 |
121 |
122 | ## Exploratory Data Analysis in Meat
123 |
124 | # Meat
125 | m = ggplot(Results_meat, aes(x=reorder(Country,-MeatPerc), y=MeatPerc))
126 |
127 | m + geom_bar(stat = "identity", fill="yellow", colour="red") +
128 | ggtitle("Counties with Highest % of Meat Products \n (in terms of no. of products submitted)") +
129 | ylab("Percentage %") +
130 | theme_classic() +
131 | theme(legend.position="none") +
132 | theme(axis.text.x = element_text(size=15, angle = 90)) +
133 | scale_x_discrete(name="")
134 |
135 |
136 | # Vegan
137 |
138 | v = ggplot(Results_vegan, aes(x=reorder(Country,-VeganPerc), y=VeganPerc))
139 |
140 | v + geom_bar(stat = "identity", fill="dark blue", colour="green") +
141 | ggtitle("Counties with Highest % of Vegan-labelled Products \n (in terms of no. of products submitted)") +
142 | ylab("Percentage %") +
143 | theme_classic() +
144 | theme(legend.position="none") +
145 | theme(axis.text.x = element_text(size=15, angle = 90)) +
146 | scale_x_discrete(name="")
147 |
148 |
149 |
150 | ### Removing Spain
151 |
152 | Results_vegan = Results_vegan[-grep("Spain", Results_vegan$Country),]
153 |
154 |
155 |
156 | v = ggplot(Results_vegan, aes(x=reorder(Country,-VeganPerc), y=VeganPerc))
157 |
158 | v + geom_bar(stat = "identity", fill="orange", colour="red",alpha=0.5) +
159 | ggtitle("Counties with Highest % of Vegan-labelled Products - Spain omitted \n (in terms of no. of products submitted)") +
160 | ylab("Percentage %") +
161 | theme_classic() +
162 | theme(legend.position="none") +
163 | theme(axis.text.x = element_text(size=15, angle = 90)) +
164 | scale_x_discrete(name="")
165 |
166 |
167 |
--------------------------------------------------------------------------------
/mcdonalds.csv:
--------------------------------------------------------------------------------
1 | Category,Item,Serving Size,Calories,Calories from Fat,Total Fat,Total Fat (% Daily Value),Saturated Fat,Saturated Fat (% Daily Value),Trans Fat,Cholesterol,Cholesterol (% Daily Value),Sodium,Sodium (% Daily Value),Carbohydrates,Carbohydrates (% Daily Value),Dietary Fiber,Dietary Fiber (% Daily Value),Sugars,Protein,Vitamin A (% Daily Value),Vitamin C (% Daily Value),Calcium (% Daily Value),Iron (% Daily Value)
2 | Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13,20,5,25,0,260,87,750,31,31,10,4,17,3,17,10,0,25,15
3 | Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8,12,3,15,0,25,8,770,32,30,10,4,17,3,18,6,0,25,8
4 | Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23,35,8,42,0,45,15,780,33,29,10,4,17,2,14,8,0,25,10
5 | Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28,43,10,52,0,285,95,860,36,30,10,4,17,2,21,15,0,30,15
6 | Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23,35,8,42,0,50,16,880,37,30,10,4,17,2,21,6,0,25,10
7 | Breakfast,Steak & Egg McMuffin,6.5 oz (185 g),430,210,23,36,9,46,1,300,100,960,40,31,10,4,18,3,26,15,2,30,20
8 | Breakfast,"Bacon, Egg & Cheese Biscuit (Regular Biscuit)",5.3 oz (150 g),460,230,26,40,13,65,0,250,83,1300,54,38,13,2,7,3,19,10,8,15,15
9 | Breakfast,"Bacon, Egg & Cheese Biscuit (Large Biscuit)",5.8 oz (164 g),520,270,30,47,14,68,0,250,83,1410,59,43,14,3,12,4,19,15,8,20,20
10 | Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (Regular Biscuit)",5.4 oz (153 g),410,180,20,32,11,56,0,35,11,1300,54,36,12,2,7,3,20,2,8,15,10
11 | Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (Large Biscuit)",5.9 oz (167 g),470,220,25,38,12,59,0,35,11,1420,59,42,14,3,12,4,20,6,8,15,15
12 | Breakfast,Sausage Biscuit (Regular Biscuit),4.1 oz (117 g),430,240,27,42,12,62,0,30,10,1080,45,34,11,2,6,2,11,0,0,6,15
13 | Breakfast,Sausage Biscuit (Large Biscuit),4.6 oz (131 g),480,280,31,48,13,65,0,30,10,1190,50,39,13,3,11,3,11,4,0,8,15
14 | Breakfast,Sausage Biscuit with Egg (Regular Biscuit),5.7 oz (163 g),510,290,33,50,14,71,0,250,83,1170,49,36,12,2,6,2,18,6,0,10,20
15 | Breakfast,Sausage Biscuit with Egg (Large Biscuit),6.2 oz (177 g),570,330,37,57,15,74,0,250,83,1280,53,42,14,3,11,3,18,10,0,10,20
16 | Breakfast,Sausage Biscuit with Egg Whites (Regular Biscuit),5.9 oz (167 g),460,250,27,42,12,62,0,35,11,1180,49,34,11,2,6,3,18,0,0,8,15
17 | Breakfast,Sausage Biscuit with Egg Whites (Large Biscuit),6.4 oz (181 g),520,280,32,49,13,65,0,35,11,1290,54,40,13,3,11,3,18,4,0,8,15
18 | Breakfast,Southern Style Chicken Biscuit (Regular Biscuit),5 oz (143 g),410,180,20,31,8,41,0,30,10,1180,49,41,14,2,6,3,17,0,2,6,15
19 | Breakfast,Southern Style Chicken Biscuit (Large Biscuit),5.5 oz (157 g),470,220,24,37,9,45,0,30,10,1290,54,46,15,3,11,4,17,4,2,8,15
20 | Breakfast,Steak & Egg Biscuit (Regular Biscuit),7.1 oz (201 g),540,290,32,49,16,78,1,280,93,1470,61,38,13,2,8,3,25,10,2,20,25
21 | Breakfast,"Bacon, Egg & Cheese McGriddles",6.1 oz (174 g),460,190,21,32,9,44,0,250,84,1250,52,48,16,2,9,15,19,10,10,20,15
22 | Breakfast,"Bacon, Egg & Cheese McGriddles with Egg Whites",6.3 oz (178 g),400,140,15,24,7,34,0,35,11,1250,52,47,16,2,9,16,20,2,10,15,10
23 | Breakfast,Sausage McGriddles,5 oz (141 g),420,200,22,34,8,40,0,35,11,1030,43,44,15,2,8,15,11,0,0,8,10
24 | Breakfast,"Sausage, Egg & Cheese McGriddles",7.1 oz (201 g),550,280,31,48,12,61,0,265,89,1320,55,48,16,2,9,15,20,10,0,20,15
25 | Breakfast,"Sausage, Egg & Cheese McGriddles with Egg Whites",7.2 oz (205 g),500,230,26,40,10,52,0,50,17,1320,55,46,15,2,9,15,21,2,0,20,10
26 | Breakfast,"Bacon, Egg & Cheese Bagel",6.9 oz (197 g),620,280,31,48,11,56,0.5,275,92,1480,62,57,19,3,11,7,30,20,15,20,20
27 | Breakfast,"Bacon, Egg & Cheese Bagel with Egg Whites",7.1 oz (201 g),570,230,25,39,9,45,0.5,60,20,1480,62,55,18,3,12,8,30,10,15,20,15
28 | Breakfast,"Steak, Egg & Cheese Bagel",8.5 oz (241 g),670,310,35,53,13,63,1.5,295,99,1510,63,56,19,3,12,7,33,20,4,25,25
29 | Breakfast,Big Breakfast (Regular Biscuit),9.5 oz (269 g),740,430,48,73,17,87,0,555,185,1560,65,51,17,3,12,3,28,15,2,15,25
30 | Breakfast,Big Breakfast (Large Biscuit),10 oz (283 g),800,470,52,80,18,90,0,555,185,1680,70,56,19,4,17,3,28,15,2,15,30
31 | Breakfast,Big Breakfast with Egg Whites (Regular Biscuit),9.6 oz (272 g),640,330,37,57,14,69,0,35,12,1590,66,50,17,3,12,3,26,0,2,10,15
32 | Breakfast,Big Breakfast with Egg Whites (Large Biscuit),10.1 oz (286 g),690,370,41,63,14,72,0,35,12,1700,71,55,18,4,17,4,26,4,2,10,15
33 | Breakfast,Big Breakfast with Hotcakes (Regular Biscuit),14.8 oz (420 g),1090,510,56,87,19,96,0,575,192,2150,90,111,37,6,23,17,36,15,2,25,40
34 | Breakfast,Big Breakfast with Hotcakes (Large Biscuit),15.3 oz (434 g),1150,540,60,93,20,100,0,575,192,2260,94,116,39,7,28,17,36,15,2,30,40
35 | Breakfast,Big Breakfast with Hotcakes and Egg Whites (Regular Biscuit),14.9 oz (423 g),990,410,46,70,16,78,0,55,19,2170,91,110,37,6,23,17,35,0,2,25,30
36 | Breakfast,Big Breakfast with Hotcakes and Egg Whites (Large Biscuit),15.4 oz (437 g),1050,450,50,77,16,81,0,55,19,2290,95,115,38,7,28,18,35,4,2,25,30
37 | Breakfast,Hotcakes,5.3 oz (151 g),350,80,9,13,2,9,0,20,7,590,24,60,20,3,10,14,8,0,0,15,15
38 | Breakfast,Hotcakes and Sausage,6.8 oz (192 g),520,210,24,37,7,36,0,50,17,930,39,61,20,3,10,14,15,0,0,15,15
39 | Breakfast,Sausage Burrito,3.9 oz (111 g),300,150,16,25,7,33,0,115,38,790,33,26,9,1,5,2,12,10,2,15,15
40 | Breakfast,Hash Brown,2 oz (56 g),150,80,9,14,1.5,6,0,0,0,310,13,15,5,2,6,0,1,0,2,0,2
41 | Breakfast,Cinnamon Melts,4 oz (114 g),460,170,19,30,9,43,0,15,5,370,15,66,22,3,11,32,6,4,0,6,15
42 | Breakfast,Fruit & Maple Oatmeal,9.6 oz (251 g),290,35,4,6,1.5,8,0,5,2,160,7,58,19,5,19,32,5,2,130,10,10
43 | Breakfast,Fruit & Maple Oatmeal without Brown Sugar,9.6 oz (251 g),260,40,4,6,1.5,8,0,5,2,115,5,49,16,5,22,18,5,2,130,6,10
44 | Beef & Pork,Big Mac,7.4 oz (211 g),530,240,27,42,10,48,1,85,28,960,40,47,16,3,13,9,24,6,2,25,25
45 | Beef & Pork,Quarter Pounder with Cheese,7.1 oz (202 g),520,240,26,41,12,61,1.5,95,31,1100,46,41,14,3,11,10,30,10,2,30,25
46 | Beef & Pork,Quarter Pounder with Bacon & Cheese,8 oz (227 g),600,260,29,45,13,63,1.5,105,34,1440,60,48,16,3,12,12,37,6,15,25,30
47 | Beef & Pork,Quarter Pounder with Bacon Habanero Ranch,8.3 oz (235 g),610,280,31,48,13,64,1.5,105,35,1180,49,46,15,3,14,10,37,8,20,25,30
48 | Beef & Pork,Quarter Pounder Deluxe,8.6 oz (244 g),540,250,27,42,11,54,1.5,85,28,960,40,45,15,3,13,9,29,10,8,25,30
49 | Beef & Pork,Double Quarter Pounder with Cheese,10 oz (283 g),750,380,43,66,19,96,2.5,160,53,1280,53,42,14,3,11,10,48,10,2,30,35
50 | Beef & Pork,Hamburger,3.5 oz (98 g),240,70,8,12,3,15,0,30,10,480,20,32,11,1,6,6,12,2,2,10,15
51 | Beef & Pork,Cheeseburger,4 oz (113 g),290,100,11,18,5,27,0.5,45,15,680,28,33,11,2,7,7,15,6,2,20,15
52 | Beef & Pork,Double Cheeseburger,5.7 oz (161 g),430,190,21,32,10,52,1,90,30,1040,43,35,12,2,8,7,24,10,2,30,20
53 | Beef & Pork,Bacon Clubhouse Burger,9.5 oz (270 g),720,360,40,62,15,75,1.5,115,38,1470,61,51,17,4,14,14,39,8,25,30,25
54 | Beef & Pork,McDouble,5.2 oz (147 g),380,150,17,26,8,40,1,75,25,840,35,34,11,2,7,7,22,6,2,20,20
55 | Beef & Pork,Bacon McDouble,5.7 oz (161 g),440,200,22,34,10,49,1,90,30,1110,46,35,12,2,7,7,27,6,10,20,20
56 | Beef & Pork,Daily Double,6.7 oz (190 g),430,200,22,35,9,44,1,80,27,760,32,34,11,2,8,7,22,8,8,20,20
57 | Beef & Pork,Jalapeño Double,5.6 oz (159 g),430,210,23,36,9,44,1,80,27,1030,43,35,12,2,7,6,22,6,8,20,20
58 | Beef & Pork,McRib,7.3 oz (208 g),500,240,26,40,10,48,0,70,23,980,41,44,15,3,10,11,22,2,2,15,20
59 | Chicken & Fish,Premium Crispy Chicken Classic Sandwich,7.5 oz (213 g),510,200,22,33,3.5,18,0,45,16,990,41,55,18,3,13,10,24,4,6,15,20
60 | Chicken & Fish,Premium Grilled Chicken Classic Sandwich,7 oz (200 g),350,80,9,13,2,9,0,65,22,820,34,42,14,3,13,8,28,4,8,15,20
61 | Chicken & Fish,Premium Crispy Chicken Club Sandwich,8.8 oz (249 g),670,300,33,51,9,44,0,85,29,1410,59,58,19,3,14,11,36,8,20,30,20
62 | Chicken & Fish,Premium Grilled Chicken Club Sandwich,8.3 oz (235 g),510,180,20,31,7,36,0,105,35,1250,52,44,15,3,13,9,40,8,20,30,20
63 | Chicken & Fish,Premium Crispy Chicken Ranch BLT Sandwich,8.1 oz (230 g),610,250,28,43,6,31,0,70,24,1400,58,57,19,3,13,11,32,4,20,15,20
64 | Chicken & Fish,Premium Grilled Chicken Ranch BLT Sandwich,7.6 oz (217 g),450,130,15,23,4.5,22,0,90,30,1230,51,43,14,3,13,9,36,4,20,15,20
65 | Chicken & Fish,Bacon Clubhouse Crispy Chicken Sandwich,10 oz (284 g),750,340,38,59,10,51,0.5,90,31,1720,72,65,22,4,15,16,36,8,25,30,15
66 | Chicken & Fish,Bacon Clubhouse Grilled Chicken Sandwich,9.5 oz (270 g),590,230,25,39,8,42,0,110,37,1560,65,51,17,4,15,14,40,8,30,30,15
67 | Chicken & Fish,Southern Style Crispy Chicken Sandwich,5.6 oz (160 g),430,170,19,29,3,15,0,45,14,910,38,43,14,2,7,7,21,4,2,15,15
68 | Chicken & Fish,McChicken,5.1 oz (143 g),360,140,16,25,3,15,0,35,11,800,33,40,13,2,7,5,14,0,2,10,15
69 | Chicken & Fish,Bacon Cheddar McChicken,6 oz (171 g),480,220,24,38,7,35,0,65,21,1260,53,43,14,2,8,6,22,4,10,20,15
70 | Chicken & Fish,Bacon Buffalo Ranch McChicken,5.7 oz (161 g),430,190,21,32,5,25,0,50,17,1260,53,41,14,2,7,6,20,2,10,15,15
71 | Chicken & Fish,Buffalo Ranch McChicken,5.2 oz (148 g),360,150,16,25,3,16,0,35,11,990,41,40,13,2,7,5,14,2,2,15,15
72 | Chicken & Fish,Premium McWrap Chicken & Bacon (Crispy Chicken),11.1 oz (316 g),630,280,32,49,9,45,0.5,80,26,1540,64,56,19,3,13,7,32,60,20,20,20
73 | Chicken & Fish,Premium McWrap Chicken & Bacon (Grilled Chicken),10.7 oz (302 g),480,170,19,28,7,36,0,95,32,1370,57,42,14,3,13,6,36,60,25,20,20
74 | Chicken & Fish,Premium McWrap Chicken & Ranch (Crispy Chicken),10.9 oz (310 g),610,280,31,47,8,40,0.5,65,21,1340,56,56,19,3,14,8,27,60,15,20,20
75 | Chicken & Fish,Premium McWrap Chicken & Ranch (Grilled Chicken),10.5 oz (297 g),450,160,18,27,6,31,0.5,80,27,1170,49,42,14,3,14,6,30,60,15,15,20
76 | Chicken & Fish,Premium McWrap Southwest Chicken (Crispy Chicken),11.1 oz (314 g),670,300,33,51,8,40,0.5,60,21,1480,62,68,23,5,19,12,27,60,15,20,20
77 | Chicken & Fish,Premium McWrap Southwest Chicken (Grilled Chicken),11.2 oz (318 g),520,180,20,31,6,32,0,80,27,1320,55,55,18,5,20,10,31,70,15,20,20
78 | Chicken & Fish,Premium McWrap Chicken Sweet Chili (Crispy Chicken),10.7 oz (304 g),540,200,23,35,4.5,23,0,50,16,1260,52,61,20,3,13,14,23,60,15,8,20
79 | Chicken & Fish,Premium McWrap Chicken Sweet Chili (Grilled Chicken),10.3 oz (291 g),380,90,10,15,3,14,0,65,22,1090,45,47,16,3,13,12,27,60,15,8,20
80 | Chicken & Fish,Chicken McNuggets (4 piece),2.3 oz (65 g),190,110,12,18,2,10,0,25,9,360,15,12,4,1,2,0,9,0,2,0,2
81 | Chicken & Fish,Chicken McNuggets (6 piece),3.4 oz (97 g),280,160,18,27,3,15,0,40,13,540,22,18,6,1,4,0,13,0,2,2,4
82 | Chicken & Fish,Chicken McNuggets (10 piece),5.7 oz (162 g),470,270,30,45,5,25,0,65,22,900,37,30,10,2,6,0,22,0,4,2,6
83 | Chicken & Fish,Chicken McNuggets (20 piece),11.4 oz (323 g),940,530,59,91,10,50,0,135,44,1800,75,59,20,3,12,0,44,0,8,4,10
84 | Chicken & Fish,Chicken McNuggets (40 piece),22.8 oz (646 g),1880,1060,118,182,20,101,1,265,89,3600,150,118,39,6,24,1,87,0,15,8,25
85 | Chicken & Fish,Filet-O-Fish,5 oz (142 g),390,170,19,29,4,19,0,40,14,590,24,39,13,2,7,5,15,2,0,15,10
86 | Salads,Premium Bacon Ranch Salad (without Chicken),7.9 oz (223 g),140,70,7,11,3.5,18,0,25,9,300,13,10,3,3,12,4,9,170,30,15,6
87 | Salads,Premium Bacon Ranch Salad with Crispy Chicken,9 oz (255 g),380,190,21,33,6,29,0,70,23,860,36,22,7,2,10,5,25,100,25,15,8
88 | Salads,Premium Bacon Ranch Salad with Grilled Chicken,8.5 oz (241 g),220,80,8,13,4,20,0,85,29,690,29,8,3,2,10,4,29,110,30,15,8
89 | Salads,Premium Southwest Salad (without Chicken),8.1 oz (230 g),140,40,4.5,7,2,9,0,10,3,150,6,20,7,6,23,6,6,160,25,15,10
90 | Salads,Premium Southwest Salad with Crispy Chicken,12.3 oz (348 g),450,190,22,33,4.5,22,0,50,17,850,35,42,14,7,28,12,23,170,30,15,15
91 | Salads,Premium Southwest Salad with Grilled Chicken,11.8 oz (335 g),290,80,8,13,2.5,13,0,70,23,680,28,28,9,7,28,10,27,170,30,15,15
92 | Snacks & Sides,Chipotle BBQ Snack Wrap (Crispy Chicken),4.6 oz (130 g),340,130,15,23,4.5,22,0,30,11,780,33,37,12,1,6,8,14,4,0,10,10
93 | Snacks & Sides,Chipotle BBQ Snack Wrap (Grilled Chicken),4.3 oz (123 g),260,70,8,13,3.5,18,0,40,14,700,29,30,10,1,6,7,16,4,2,10,10
94 | Snacks & Sides,Honey Mustard Snack Wrap (Crispy Chicken),4.3 oz (123 g),330,130,15,23,4.5,22,0,35,11,730,30,34,11,1,5,3,14,2,0,10,10
95 | Snacks & Sides,Honey Mustard Snack Wrap (Grilled Chicken),4.1 oz (116 g),250,70,8,13,3.5,18,0,45,14,650,27,27,9,1,5,2,16,2,2,10,10
96 | Snacks & Sides,Ranch Snack Wrap (Crispy Chicken),4.5 oz (128 g),360,180,20,30,5,27,0,40,13,810,34,32,11,1,5,3,15,2,0,10,10
97 | Snacks & Sides,Ranch Snack Wrap (Grilled Chicken),4.3 oz (121 g),280,120,13,20,4.5,22,0,45,16,720,30,25,8,1,5,2,16,2,2,10,10
98 | Snacks & Sides,Small French Fries,2.6 oz (75 g),230,100,11,17,1.5,8,0,0,0,130,5,30,10,2,10,0,2,0,30,0,4
99 | Snacks & Sides,Medium French Fries,3.9 oz (111 g),340,140,16,24,2.5,11,0,0,0,190,8,44,15,4,14,0,4,0,45,2,4
100 | Snacks & Sides,Large French Fries,5.9 oz (168 g),510,220,24,37,3.5,17,0,0,0,290,12,67,22,5,22,0,6,0,70,2,8
101 | Snacks & Sides,Kids French Fries,1.3 oz (38 g),110,50,5,8,1,4,0,0,0,65,3,15,5,1,5,0,1,0,15,0,2
102 | Snacks & Sides,Side Salad,3.1 oz (87 g),20,0,0,0,0,0,0,0,0,10,0,4,1,1,6,2,1,45,25,2,4
103 | Snacks & Sides,Apple Slices,1.2 oz (34 g),15,0,0,0,0,0,0,0,0,0,0,4,1,0,0,3,0,0,160,2,0
104 | Snacks & Sides,Fruit 'n Yogurt Parfait,5.2 oz (149 g),150,20,2,3,1,5,0,5,2,70,3,30,10,1,3,23,4,2,15,10,4
105 | Desserts,Baked Apple Pie,2.7 oz (77 g),250,110,13,19,7,35,0,0,0,170,7,32,11,4,15,13,2,4,25,2,6
106 | Desserts,Chocolate Chip Cookie,1 cookie (33 g),160,70,8,12,3.5,19,0,10,3,90,4,21,7,1,3,15,2,2,0,2,8
107 | Desserts,Oatmeal Raisin Cookie,1 cookie (33 g),150,50,6,9,2.5,13,0,10,3,135,6,22,7,1,3,13,2,2,0,2,6
108 | Desserts,Kids Ice Cream Cone,1 oz (29 g),45,10,1.5,2,1,4,0,5,2,20,1,7,2,0,0,6,1,2,0,4,0
109 | Desserts,Hot Fudge Sundae,6.3 oz (179 g),330,80,9,14,7,34,0,25,8,170,7,53,18,1,3,48,8,8,0,25,8
110 | Desserts,Hot Caramel Sundae,6.4 oz (182 g),340,70,8,12,5,24,0,30,10,150,6,60,20,0,0,43,7,10,0,25,0
111 | Desserts,Strawberry Sundae,6.3 oz (178 g),280,60,6,10,4,20,0,25,8,85,4,49,16,0,0,45,6,8,4,20,0
112 | Beverages,Coca-Cola Classic (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,0,0,39,13,0,0,39,0,0,0,0,0
113 | Beverages,Coca-Cola Classic (Medium),21 fl oz cup,200,0,0,0,0,0,0,0,0,5,0,55,18,0,0,55,0,0,0,0,0
114 | Beverages,Coca-Cola Classic (Large),30 fl oz cup,280,0,0,0,0,0,0,0,0,5,0,76,25,0,0,76,0,0,0,0,0
115 | Beverages,Coca-Cola Classic (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,0,0,28,9,0,0,28,0,0,0,0,0
116 | Beverages,Diet Coke (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0
117 | Beverages,Diet Coke (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,20,1,0,0,0,0,0,0,0,0,0,0
118 | Beverages,Diet Coke (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,35,1,0,0,0,0,0,0,0,0,0,0
119 | Beverages,Diet Coke (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0
120 | Beverages,Dr Pepper (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,45,2,37,12,0,0,35,0,0,0,0,0
121 | Beverages,Dr Pepper (Medium),21 fl oz cup,190,0,0,0,0,0,0,0,0,65,3,53,18,0,0,51,0,0,0,0,0
122 | Beverages,Dr Pepper (Large),30 fl oz cup,270,0,0,0,0,0,0,0,0,90,4,72,24,0,0,70,0,0,0,0,0
123 | Beverages,Dr Pepper (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,30,1,27,9,0,0,26,0,0,0,0,0
124 | Beverages,Diet Dr Pepper (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,70,3,0,0,0,0,0,2,0,0,0,0
125 | Beverages,Diet Dr Pepper (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,100,4,0,0,0,0,0,3,0,0,0,0
126 | Beverages,Diet Dr Pepper (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,140,6,0,0,0,0,0,4,0,0,0,0
127 | Beverages,Diet Dr Pepper (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,1,0,0,0,0
128 | Beverages,Sprite (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,30,1,37,12,0,0,37,0,0,0,0,0
129 | Beverages,Sprite (Medium),21 fl oz cup,200,0,0,0,0,0,0,0,0,45,2,54,18,0,0,54,0,0,0,0,0
130 | Beverages,Sprite (Large),30 fl oz cup,280,0,0,0,0,0,0,0,0,60,3,74,25,0,0,74,0,0,0,0,0
131 | Beverages,Sprite (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,25,1,27,9,0,0,27,0,0,0,0,0
132 | Beverages,1% Low Fat Milk Jug,1 carton (236 ml),100,20,2.5,4,1.5,8,0,10,3,125,5,12,4,0,0,12,8,10,4,30,0
133 | Beverages,Fat Free Chocolate Milk Jug,1 carton (236 ml),130,0,0,0,0,0,0,5,2,135,6,23,8,1,2,22,9,10,0,30,8
134 | Beverages,Minute Maid 100% Apple Juice Box,6 fl oz (177 ml),80,0,0,0,0,0,0,0,0,15,1,21,7,0,0,19,0,0,100,10,0
135 | Beverages,Minute Maid Orange Juice (Small),12 fl oz cup,150,0,0,0,0,0,0,0,0,0,0,34,11,0,0,30,2,0,130,2,0
136 | Beverages,Minute Maid Orange Juice (Medium),16 fl oz cup,190,0,0,0,0,0,0,0,0,0,0,44,15,0,0,39,3,0,160,4,0
137 | Beverages,Minute Maid Orange Juice (Large),22 fl oz cup,280,0,0,0,0,0,0,0,0,5,0,65,22,0,0,58,4,0,240,4,0
138 | Beverages,Dasani Water Bottle,16.9 fl oz,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
139 | Coffee & Tea,Iced Tea (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0
140 | Coffee & Tea,Iced Tea (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0
141 | Coffee & Tea,Iced Tea (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0
142 | Coffee & Tea,Iced Tea (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0
143 | Coffee & Tea,Sweet Tea (Small),16 fl oz cup,150,0,0,0,0,0,0,0,0,10,0,36,12,0,0,36,1,0,0,0,0
144 | Coffee & Tea,Sweet Tea (Medium),21 fl oz cup,180,0,0,0,0,0,0,0,0,10,0,45,15,0,0,45,1,0,0,0,0
145 | Coffee & Tea,Sweet Tea (Large),30 fl oz cup,220,0,0,0,0,0,0,0,0,10,1,54,18,0,0,54,1,0,0,0,0
146 | Coffee & Tea,Sweet Tea (Child),12 fl oz cup,110,0,0,0,0,0,0,0,0,5,0,27,9,0,0,27,0,0,0,0,0
147 | Coffee & Tea,Coffee (Small),12 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
148 | Coffee & Tea,Coffee (Medium),16 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
149 | Coffee & Tea,Coffee (Large),16 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
150 | Coffee & Tea,Latte (Small),12 fl oz cup,170,80,9,13,5,24,0,25,9,115,5,15,5,1,3,12,9,8,0,30,0
151 | Coffee & Tea,Latte (Medium),16 fl oz cup,210,90,10,16,6,30,0,30,11,140,6,18,6,1,4,15,11,10,0,35,0
152 | Coffee & Tea,Latte (Large),20 fl oz cup,280,120,14,21,8,39,0,40,14,180,8,24,8,1,6,20,15,15,0,50,2
153 | Coffee & Tea,Caramel Latte (Small),12 fl oz cup,270,80,9,13,5,24,0,25,9,115,5,40,13,1,3,38,9,8,0,30,0
154 | Coffee & Tea,Caramel Latte (Medium),16 fl oz cup,340,90,10,16,6,30,0,30,11,140,6,50,17,1,4,48,11,10,0,35,0
155 | Coffee & Tea,Caramel Latte (Large),20 fl oz cup,430,120,14,21,8,39,0,40,14,180,8,62,21,1,6,59,15,15,0,50,2
156 | Coffee & Tea,Hazelnut Latte (Small),12 fl oz cup,270,80,9,13,5,24,0,25,9,115,5,40,13,1,3,38,9,8,0,30,0
157 | Coffee & Tea,Hazelnut Latte (Medium),16 fl oz cup,330,90,10,16,6,30,0,30,11,140,6,50,17,1,4,47,11,10,0,35,0
158 | Coffee & Tea,Hazelnut Latte (Large),20 fl oz cup,430,120,14,21,8,39,0,40,14,180,8,62,21,1,6,58,15,15,0,50,2
159 | Coffee & Tea,French Vanilla Latte (Small),12 fl oz cup,260,80,9,13,5,24,0,25,9,115,5,38,13,1,3,36,9,8,0,30,0
160 | Coffee & Tea,French Vanilla Latte (Medium),16 fl oz cup,330,90,10,16,6,30,0,30,11,140,6,48,16,1,4,45,11,10,0,35,2
161 | Coffee & Tea,French Vanilla Latte (Large),20 fl oz cup,420,120,14,21,8,39,0,40,14,190,8,60,20,1,6,56,15,15,0,50,2
162 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Small),12 fl oz cup,210,80,9,13,5,24,0,25,9,150,6,24,8,1,4,12,9,8,0,30,0
163 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Medium),16 fl oz cup,260,90,10,16,6,30,0,30,11,190,8,29,10,1,5,15,12,10,0,35,0
164 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Large),20 fl oz cup,330,120,14,21,8,39,0,40,14,240,10,37,12,2,7,20,15,15,0,50,2
165 | Coffee & Tea,Nonfat Latte (Small),12 fl oz cup,100,0,0,0,0,0,0,5,2,110,5,15,5,1,3,13,10,10,0,30,0
166 | Coffee & Tea,Nonfat Latte (Medium),16 fl oz cup,130,0,0,0,0,0,0,5,2,135,6,19,6,1,4,16,12,15,0,40,0
167 | Coffee & Tea,Nonfat Latte (Large),20 fl oz cup,170,0,0.5,1,0,0,0,10,3,180,7,25,8,1,6,21,16,15,0,50,2
168 | Coffee & Tea,Nonfat Caramel Latte (Small),12 fl oz cup,200,0,0,0,0,0,0,5,2,110,5,41,14,1,3,39,10,10,0,30,0
169 | Coffee & Tea,Nonfat Caramel Latte (Medium),16 fl oz cup,250,0,0,0,0,0,0,5,2,135,6,51,17,1,4,48,12,15,0,40,0
170 | Coffee & Tea,Nonfat Caramel Latte (Large),20 fl oz cup,310,0,0.5,1,0,0,0,10,3,180,7,63,21,1,6,59,16,15,0,50,2
171 | Coffee & Tea,Nonfat Hazelnut Latte (Small),12 fl oz cup,200,0,0,0,0,0,0,5,2,110,5,40,13,1,3,38,10,10,0,30,0
172 | Coffee & Tea,Nonfat Hazelnut Latte (Medium),16 fl oz cup,250,0,0,0,0,0,0,5,2,135,6,51,17,1,4,48,12,15,0,40,0
173 | Coffee & Tea,Nonfat Hazelnut Latte (Large),20 fl oz cup,310,0,0.5,1,0,0,0,10,3,180,7,63,21,1,6,59,16,15,0,50,2
174 | Coffee & Tea,Nonfat French Vanilla Latte (Small),12 fl oz cup,190,0,0,0,0,0,0,5,2,115,5,39,13,1,3,37,10,10,0,30,0
175 | Coffee & Tea,Nonfat French Vanilla Latte (Medium),16 fl oz cup,240,0,0,0,0,0,0,5,2,140,6,49,16,1,4,46,12,15,0,40,2
176 | Coffee & Tea,Nonfat French Vanilla Latte (Large),20 fl oz cup,300,0,0.5,1,0,0,0,10,3,180,8,60,20,1,6,56,16,15,0,50,2
177 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Small),12 fl oz cup,140,0,0,0,0,0,0,5,2,150,6,24,8,1,4,13,10,10,0,30,0
178 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Medium),16 fl oz cup,170,0,0,0,0,0,0,5,2,180,8,30,10,1,5,16,12,15,0,40,0
179 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Large),20 fl oz cup,220,0,0.5,1,0,0,0,10,3,240,10,38,13,2,7,21,16,15,0,50,2
180 | Coffee & Tea,Mocha (Small),12 fl oz cup,340,100,11,18,7,34,0,35,12,150,6,49,16,2,6,42,10,10,0,30,6
181 | Coffee & Tea,Mocha (Medium),16 fl oz cup,410,120,14,21,8,40,0,40,14,190,8,60,20,2,8,53,13,10,0,40,6
182 | Coffee & Tea,Mocha (Large),20 fl oz cup,500,150,17,26,10,49,0.5,50,17,240,10,72,24,2,10,63,16,15,0,50,8
183 | Coffee & Tea,Mocha with Nonfat Milk (Small),12 fl oz cup,270,30,3.5,5,2,11,0,15,5,150,6,49,16,2,6,43,11,10,0,35,6
184 | Coffee & Tea,Mocha with Nonfat Milk (Medium),16 fl oz cup,330,30,3.5,6,2,11,0,15,5,190,8,60,20,2,8,53,13,15,0,40,6
185 | Coffee & Tea,Mocha with Nonfat Milk (Large),20 fl oz cup,390,35,4,6,2.5,12,0,20,6,240,10,73,24,2,10,64,17,20,0,50,8
186 | Coffee & Tea,Caramel Mocha (Small),12 fl oz cup,320,100,11,17,7,33,0,35,12,170,7,45,15,1,3,40,10,10,0,30,2
187 | Coffee & Tea,Caramel Mocha (Medium),16 fl oz cup,390,120,14,21,8,40,0.5,40,14,220,9,55,18,1,4,50,12,15,0,40,2
188 | Coffee & Tea,Caramel Mocha (Large),20 fl oz cup,480,150,17,26,10,49,0.5,50,17,270,11,66,22,1,5,60,16,15,0,50,4
189 | Coffee & Tea,Nonfat Caramel Mocha (Small),12 fl oz cup,250,30,3.5,5,2,10,0,15,5,170,7,45,15,1,3,41,10,10,0,35,2
190 | Coffee & Tea,Nonfat Caramel Mocha (Medium),16 fl oz cup,310,30,3.5,5,2,11,0,15,5,210,9,56,19,1,4,51,13,15,0,40,2
191 | Coffee & Tea,Nonfat Caramel Mocha (Large),20 fl oz cup,370,35,3.5,6,2.5,11,0,20,6,270,11,67,22,1,5,61,17,20,0,50,4
192 | Coffee & Tea,Hot Chocolate (Small),12 fl oz cup,360,120,13,21,8,39,0,40,14,180,8,50,17,1,3,45,11,10,0,40,6
193 | Coffee & Tea,Hot Chocolate (Medium),16 fl oz cup,440,140,16,25,9,47,0.5,50,16,220,9,61,20,1,4,56,14,15,0,45,6
194 | Coffee & Tea,Hot Chocolate (Large),20 fl oz cup,540,180,20,31,12,58,0.5,60,20,280,12,73,24,1,5,68,17,20,0,60,8
195 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Small),12 fl oz cup,280,30,3.5,5,2,11,0,15,5,180,7,50,17,1,3,46,12,15,0,40,6
196 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Medium),16 fl oz cup,340,30,3.5,5,2,11,0,15,6,220,9,61,20,1,4,57,14,20,0,50,6
197 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Large),20 fl oz cup,400,35,3.5,6,2.5,12,0,20,7,280,12,74,25,1,5,69,19,25,0,60,8
198 | Coffee & Tea,Regular Iced Coffee (Small),16 fl oz cup,140,40,4.5,7,3,15,0,15,6,35,1,23,8,0,0,22,1,4,0,4,0
199 | Coffee & Tea,Regular Iced Coffee (Medium),22 fl oz cup,190,60,7,11,4.5,22,0,25,9,50,2,31,10,0,0,30,1,4,0,4,0
200 | Coffee & Tea,Regular Iced Coffee (Large),32 fl oz cup,270,80,9,14,6,29,0,35,12,75,3,47,16,0,0,45,2,6,0,8,0
201 | Coffee & Tea,Caramel Iced Coffee (Small),16 fl oz cup,130,40,4.5,7,3,15,0,15,6,35,2,22,7,0,0,21,1,4,0,4,0
202 | Coffee & Tea,Caramel Iced Coffee (Medium),22 fl oz cup,180,60,7,11,4.5,22,0,25,9,50,2,29,10,0,0,28,1,4,0,4,0
203 | Coffee & Tea,Caramel Iced Coffee (Large),32 fl oz cup,260,80,9,14,6,29,0,35,12,65,3,43,14,0,0,42,2,6,0,6,0
204 | Coffee & Tea,Hazelnut Iced Coffee (Small),16 fl oz cup,130,40,4.5,7,3,15,0,15,6,35,1,21,7,0,0,20,1,4,0,4,0
205 | Coffee & Tea,Hazelnut Iced Coffee (Medium),22 fl oz cup,180,60,7,11,4.5,22,0,25,9,50,2,29,10,0,0,28,1,4,0,4,0
206 | Coffee & Tea,Hazelnut Iced Coffee (Large),32 fl oz cup,250,80,9,14,6,29,0,35,12,75,3,43,14,0,0,41,2,6,0,8,0
207 | Coffee & Tea,French Vanilla Iced Coffee (Small),16 fl oz cup,120,40,4.5,7,3,15,0,15,6,40,2,20,7,0,0,19,1,4,0,4,0
208 | Coffee & Tea,French Vanilla Iced Coffee (Medium),22 fl oz cup,170,60,7,11,4.5,22,0,25,9,55,2,27,9,0,0,26,1,4,0,4,0
209 | Coffee & Tea,French Vanilla Iced Coffee (Large),32 fl oz cup,240,80,9,14,6,29,0,35,12,80,3,41,14,0,0,39,2,6,0,8,0
210 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Small),16 fl oz cup,80,40,4.5,7,3,15,0,15,6,65,3,9,3,0,0,1,1,4,0,4,0
211 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Medium),22 fl oz cup,120,60,7,11,4.5,22,0,25,9,90,4,12,4,0,0,2,1,4,0,4,0
212 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Large),32 fl oz cup,160,80,9,14,6,29,0,35,12,135,6,18,6,0,0,2,2,6,0,8,0
213 | Coffee & Tea,Iced Mocha (Small),12 fl oz cup,290,100,11,17,7,33,0,35,12,125,5,41,14,1,4,34,8,10,0,25,4
214 | Coffee & Tea,Iced Mocha (Medium),16 fl oz cup,350,110,13,19,8,38,0,40,13,150,6,50,17,1,5,43,9,10,0,30,6
215 | Coffee & Tea,Iced Mocha (Large),22 fl oz cup,480,150,16,25,10,49,0.5,50,17,220,9,70,23,2,8,62,14,15,0,40,8
216 | Coffee & Tea,Iced Mocha with Nonfat Milk (Small),12 fl oz cup,240,45,5,8,3,16,0,20,7,125,5,41,14,1,4,35,8,10,0,25,4
217 | Coffee & Tea,Iced Mocha with Nonfat Milk (Medium),16 fl oz cup,290,45,5,8,3.5,17,0,20,7,150,6,50,17,1,5,43,10,15,0,30,6
218 | Coffee & Tea,Iced Mocha with Nonfat Milk (Large),22 fl oz cup,390,50,6,9,3.5,18,0,25,8,220,9,71,24,2,8,62,14,20,0,45,8
219 | Coffee & Tea,Iced Caramel Mocha (Small),12 fl oz cup,280,100,11,17,7,33,0,35,12,140,6,38,13,0,0,33,8,10,0,25,2
220 | Coffee & Tea,Iced Caramel Mocha (Medium),16 fl oz cup,340,110,13,19,7,37,0,40,13,170,7,46,15,1,2,41,9,10,0,30,2
221 | Coffee & Tea,Iced Caramel Mocha (Large),22 fl oz cup,460,150,16,25,10,48,0.5,50,17,250,10,65,22,1,3,59,13,15,0,40,4
222 | Coffee & Tea,Iced Nonfat Caramel Mocha (Small),12 fl oz cup,230,45,5,8,3,16,0,20,7,140,6,38,13,0,0,33,8,10,0,25,2
223 | Coffee & Tea,Iced Nonfat Caramel Mocha (Medium),16 fl oz cup,270,45,5,8,3,16,0,20,7,170,7,47,16,1,2,41,10,15,0,30,2
224 | Coffee & Tea,Iced Nonfat Caramel Mocha (Large),22 fl oz cup,370,50,6,8,3.5,17,0,25,8,250,10,65,22,1,3,59,14,20,0,45,4
225 | Coffee & Tea,Frappé Mocha (Small),12 fl oz cup,450,160,18,28,12,59,1,65,21,125,5,65,22,1,3,57,7,15,0,20,4
226 | Coffee & Tea,Frappé Mocha (Medium),16 fl oz cup,550,200,22,34,14,71,1,75,25,160,7,80,27,1,4,71,9,15,0,25,4
227 | Coffee & Tea,Frappé Mocha (Large),22 fl oz cup,670,240,26,41,17,85,1,90,30,190,8,98,33,1,4,88,11,20,0,35,4
228 | Coffee & Tea,Frappé Caramel (Small),12 fl oz cup,450,170,19,29,12,60,1,65,22,125,5,64,21,0,0,57,7,15,0,25,2
229 | Coffee & Tea,Frappé Caramel (Medium),16 fl oz cup,550,200,23,35,15,73,1,80,27,160,7,79,26,0,0,71,9,20,0,30,2
230 | Coffee & Tea,Frappé Caramel (Large),22 fl oz cup,670,250,27,42,17,87,1.5,95,32,190,8,96,32,0,0,88,11,20,0,35,2
231 | Coffee & Tea,Frappé Chocolate Chip (Small),12 fl oz cup,530,200,23,35,14,72,1,65,22,135,6,76,25,1,5,67,8,15,0,25,4
232 | Coffee & Tea,Frappé Chocolate Chip (Medium),16 fl oz cup,630,240,26,41,17,85,1,80,26,160,7,91,30,1,5,81,9,15,0,30,4
233 | Coffee & Tea,Frappé Chocolate Chip (Large),22 fl oz cup,760,280,31,48,20,101,1.5,95,32,200,8,111,37,1,5,99,12,20,0,35,6
234 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Small),12 fl oz cup,220,5,0.5,1,0,0,0,5,1,40,2,50,17,3,12,44,2,0,2,6,2
235 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Medium),16 fl oz cup,260,5,1,1,0,0,0,5,1,50,2,62,21,4,15,54,3,0,4,8,2
236 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Large),22 fl oz cup,340,10,1,2,0.5,3,0,5,2,65,3,79,26,5,19,70,4,0,4,10,2
237 | Smoothies & Shakes,Strawberry Banana Smoothie (Small),12 fl oz cup,210,5,0.5,1,0,0,0,5,1,50,2,47,16,3,10,44,3,0,30,8,2
238 | Smoothies & Shakes,Strawberry Banana Smoothie (Medium),16 fl oz cup,250,5,1,1,0,0,0,5,1,60,2,58,19,3,13,54,4,0,35,8,4
239 | Smoothies & Shakes,Strawberry Banana Smoothie (Large),22 fl oz cup,330,10,1,2,0.5,3,0,5,2,80,3,74,25,4,16,70,5,0,45,10,4
240 | Smoothies & Shakes,Mango Pineapple Smoothie (Small),12 fl oz cup,210,5,0.5,1,0,0,0,5,1,40,2,50,17,1,4,46,2,30,20,8,2
241 | Smoothies & Shakes,Mango Pineapple Smoothie (Medium),16 fl oz cup,260,10,1,1,0,0,0,5,1,45,2,61,20,1,5,56,3,40,25,8,2
242 | Smoothies & Shakes,Mango Pineapple Smoothie (Large),22 fl oz cup,340,10,1,2,0.5,3,0,5,2,60,3,78,26,2,6,72,4,50,30,10,2
243 | Smoothies & Shakes,Vanilla Shake (Small),12 fl oz cup,530,140,15,24,10,49,1,60,20,160,7,86,29,0,0,63,11,20,0,40,0
244 | Smoothies & Shakes,Vanilla Shake (Medium),16 fl oz cup,660,170,19,29,12,61,1,75,24,200,9,109,36,0,0,81,14,25,0,50,0
245 | Smoothies & Shakes,Vanilla Shake (Large),22 fl oz cup,820,210,23,35,15,73,1,90,29,260,11,135,45,0,0,101,18,30,0,60,0
246 | Smoothies & Shakes,Strawberry Shake (Small),12 fl oz cup,550,150,16,25,10,52,1,60,21,160,7,90,30,0,0,79,12,20,0,40,0
247 | Smoothies & Shakes,Strawberry Shake (Medium),16 fl oz cup,690,180,20,30,13,63,1,75,25,210,9,114,38,0,0,100,15,25,0,50,0
248 | Smoothies & Shakes,Strawberry Shake (Large),22 fl oz cup,850,210,24,36,15,75,1,90,30,260,11,140,47,0,0,123,18,30,0,70,0
249 | Smoothies & Shakes,Chocolate Shake (Small),12 fl oz cup,560,150,16,25,10,51,1,60,20,240,10,91,30,1,5,77,12,20,0,40,8
250 | Smoothies & Shakes,Chocolate Shake (Medium),16 fl oz cup,700,180,20,30,12,62,1,75,24,300,13,114,38,2,6,97,15,25,0,50,10
251 | Smoothies & Shakes,Chocolate Shake (Large),22 fl oz cup,850,210,23,36,15,74,1,85,29,380,16,141,47,2,8,120,19,30,0,60,15
252 | Smoothies & Shakes,Shamrock Shake (Medium),16 fl oz cup,660,170,19,29,12,61,1,75,24,210,9,109,36,0,0,93,14,25,0,50,0
253 | Smoothies & Shakes,Shamrock Shake (Large),22 fl oz cup,820,210,23,35,15,73,1,90,29,260,11,135,45,0,0,115,18,30,0,60,0
254 | Smoothies & Shakes,McFlurry with M&M’s Candies (Small),10.9 oz (310 g),650,210,23,35,14,72,0.5,50,17,180,7,96,32,1,6,89,13,15,0,45,8
255 | Smoothies & Shakes,McFlurry with M&M’s Candies (Medium),16.2 oz (460 g),930,290,33,50,20,102,1,75,25,260,11,139,46,2,7,128,20,25,0,70,10
256 | Smoothies & Shakes,McFlurry with M&M’s Candies (Snack),7.3 oz (207 g),430,140,15,24,10,48,0,35,11,120,5,64,21,1,4,59,9,10,0,30,4
257 | Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17,26,9,44,0.5,45,14,280,12,80,27,1,4,64,12,15,0,40,8
258 | Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23,35,12,58,1,55,19,380,16,106,35,1,5,85,15,20,0,50,10
259 | Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11,17,6,29,0,30,9,190,8,53,18,1,2,43,8,10,0,25,6
260 | Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32,50,15,76,1,60,20,400,17,114,38,2,9,103,21,20,0,60,6
261 | Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Snack),7.1 oz (202 g),410,150,16,25,8,38,0,30,10,200,8,57,19,1,5,51,10,10,0,30,4
262 |
--------------------------------------------------------------------------------