├── 911 Calls Data Capstone Project Walkthrough.ipynb ├── Affordable Care Act Data.R ├── Airbnb Seattle └── listings.csv ├── Airbnb Texas ├── Airbnb Texas.R └── Airbnb_Texas_Rentals.csv ├── Airplane Crash ├── Airplane Crash.ipynb └── Airplane_Crashes_and_Fatalities_Since_1908.csv ├── Amazon Sentiment Analysis (In Progress).ipynb ├── Barbeque ├── Barbeque.R ├── Barbeque.ipynb ├── contest_data.csv ├── results_brisket.csv ├── results_chicken.csv ├── results_overall.csv ├── results_pork.csv └── results_ribs.csv ├── Bikeshare Bay Area.Rmd ├── Caravan ├── Caravan Insurance.R ├── Caravan Insurance.ipynb └── caravan-insurance-challenge.csv ├── Carbon Dioxide Analysis.ipynb ├── Celebrity Deaths.ipynb ├── College Salaries ├── College Salaries.R ├── College Salaries.ipynb ├── degrees-that-pay-back.csv ├── salaries-by-college-type.csv └── salaries-by-region.csv ├── Columbus First ├── Columbus First Trip.R └── Columbus.csv ├── Credit Card Fradulent Modeling 25% Test.ipynb ├── Credit Card Modeling 65%.R ├── Diamond Prices.R ├── Diamond Prices.ipynb ├── Edudata.R ├── Edudata.Rmd ├── Edudata.csv ├── Edudata.html ├── Edudata.pdf ├── El Nino.ipynb ├── Fake News.R ├── Fake News.ipynb ├── Finance Walkthrough.ipynb ├── Glass Classification.ipynb ├── Health Insurance Coverage.ipynb ├── Heart Disease Decision Trees.R ├── Heart Disease Decision Trees.Rmd ├── Housing Index Zillow.R ├── Human Resources Analytics.ipynb ├── Illegal Immigration.R ├── Iris.R ├── Kickstarter.ipynb ├── Mass Shootings ├── Mass Shootings Dataset.csv ├── Mass Shootings.R └── Mass Shootings.ipynb ├── McDonald's.ipynb ├── Melbourne Housing Market.R ├── Model-Based Feature Selection.ipynb ├── NFL Arrests.csv ├── NFL Draft.R ├── NFL Draft.Rmd ├── NFL_Draft.pdf ├── NYSE ├── FB as Example.ipynb ├── NYSE.R ├── Ralph Lauren.ipynb ├── fundamentals.csv └── securities.csv ├── Norwegian Development Funds.R ├── Norwegian Development Funds.Rmd ├── Norwegian Development Funds.ipynb ├── NorwegianDevelopmentFunds.pdf ├── Norwegian_Development_Funds.html ├── Pokemon Mining ├── Pokemon Data Mining.R ├── Pokemon Using ML.ipynb ├── final_model.pkl ├── pokemon_alopez247.csv └── report_Pokemon.pdf ├── Pokemon.R ├── README.md ├── Recent Rent Analysis Based on Prices From Zillow.ipynb ├── Speed Dating Data.csv ├── Video Game Sales.ipynb ├── Welfare ├── SNAPerror.csv ├── UIerror.csv └── Welfare.R ├── World Food Facts.R ├── Y Combinator.csv ├── Y Combinator.ipynb ├── contest_data.csv ├── diamonds.csv ├── harmit.csv └── mcdonalds.csv /Affordable Care Act Data.R: -------------------------------------------------------------------------------- 1 | # Affordable Health Care Act Data 2 | 3 | 4 | ### Loading the Libaries 5 | 6 | library(ggplot2) 7 | library(maps) 8 | library(zipcode) 9 | library(ggmap) 10 | library(choroplethrMaps) 11 | library(choroplethr) 12 | data("state.map") 13 | 14 | 15 | ### Changing the Working Directory 16 | 17 | setwd('./Kaggle') 18 | 19 | 20 | ### Reading the Data 21 | 22 | acadat <- read.csv(file='./states.csv', header = TRUE, sep = ",", strip.white = TRUE ) 23 | m <- colnames(acadat) 24 | length(m) 25 | acadat <- read.csv(file='./states.csv', header = FALSE, skip = 1, sep = ",", strip.white = TRUE ) 26 | n <- colnames(acadat) 27 | 28 | head(acadat) 29 | 30 | ### Reading the Dictionary and Cleaning the Data 31 | 32 | any(is.na(acadat)) 33 | 34 | dictionary <- cbind(n, m) 35 | 36 | str(acadat) 37 | # Strip percentages and dollars first 38 | acadat[,c("V2", "V3", "V4")] <- (sapply(acadat[, c("V2", "V3", "V4")], function(x) as.numeric(gsub("%","", x)))) 39 | acadat$V9 <- gsub("\\$","", acadat$V9) 40 | acadat$region = tolower(acadat$V1) 41 | 42 | 43 | 44 | us_state_map = map_data('state'); 45 | map_data = merge(acadat, us_state_map, by = 'region') 46 | map_data = arrange(map_data, order) 47 | 48 | ### Maps for the United States 49 | 50 | ggplot(map_data, aes(x = long, y = lat, group = group)) + 51 | geom_polygon(aes(fill = cut_number(V2, 6))) + 52 | geom_path(colour = 'red') + labs(title = "State level insurance coverage in 2010") + 53 | scale_fill_brewer('Uninsured Percent, 2010') + coord_map() 54 | 55 | # Uninsured percent in 2015 56 | ggplot(map_data, aes(x = long, y = lat, group = group)) + 57 | geom_polygon(aes(fill = cut_number(V3, 6))) + 58 | geom_path(colour = 'black', alpha = 0.5) + labs(title = "State level insurance coverage in 2015") + 59 | scale_fill_brewer('Uninsured Percent, 2015') + coord_map() 60 | 61 | # Uninsured rate change, 2010-15 62 | ggplot(map_data, aes(x = long, y = lat, group = group)) + 63 | geom_polygon(aes(fill = cut_number(V4, 6))) + 64 | geom_path(colour = 'orange') + labs(title = "State level change in insurance coverage, 2010-15") + 65 | scale_fill_brewer('Uninsured rate change, 2010-15') + coord_map() 66 | -------------------------------------------------------------------------------- /Airbnb Texas/Airbnb Texas.R: -------------------------------------------------------------------------------- 1 | # Airbnb 2 | 3 | 4 | ## Loading the Libraries 5 | 6 | library(data.table) 7 | library(ggplot2) 8 | library(lubridate) 9 | library(wordcloud) 10 | library(tm) 11 | library(SnowballC) 12 | library(RSentiment) 13 | library(caTools) 14 | library(randomForest) 15 | library(rpart) 16 | library(rpart.plot) 17 | 18 | 19 | ## Reading the Dataset and Changing Working Directory 20 | 21 | setwd('./Kaggle/Airbnb Texas') 22 | airbnb <- read.csv('./Airbnb_Texas_Rentals.csv') 23 | 24 | ## Converting Factors to Integers 25 | 26 | airbnb$average_rate_per_night <- as.integer(airbnb$average_rate_per_night) 27 | airbnb$bedrooms_count <- as.integer(airbnb$bedrooms_count) 28 | 29 | ## Changing the Description 30 | 31 | airbnb$description <- as.character(airbnb$description) 32 | 33 | ## Checking for Changes being Made 34 | 35 | str(airbnb) 36 | summary(airbnb) 37 | 38 | ## Using the tapply Function 39 | 40 | tapply(airbnb$average_rate_per_night, airbnb$city, mean) 41 | 42 | 43 | ## Subsetting on Dallas 44 | 45 | dallas <- subset(airbnb, airbnb$city == "Dallas") 46 | 47 | ## Visualization on Dallas 48 | 49 | dallaslocation <- as.data.frame(table(round(dallas$latitude,2),round(dallas$longitude,2))) 50 | dallaslocation$Var1 <- as.numeric(as.character(dallaslocation$Var1)) 51 | dallaslocation$Var2 <- as.numeric(as.character(dallaslocation$Var2)) 52 | ggplot(dallaslocation, aes(x=Var1, y=Var2))+geom_tile(aes(fill=Freq))+scale_fill_gradient(low="green", high="blue") 53 | 54 | ### There are only a few areas in Dallas that has more than 30 listings that are open. 55 | 56 | 57 | ## Creating a Linear Regression Model 58 | 59 | set.seed(13265) 60 | spl = sample.split(dallas$average_rate_per_night, 0.8) # Train at 80% 61 | train <- subset(dallas, spl == TRUE) 62 | test <- subset(dallas, spl == FALSE) 63 | dal_model <- lm(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train) 64 | summary(dal_model) 65 | 66 | 67 | 68 | ## Calculating the MSE (Mean Square Error) 69 | 70 | predictlm <- predict(dal_model, newdata = test) 71 | mselm <- mean((predictlm-test$average_rate_per_night)^2) 72 | mselm 73 | 74 | ## Regression Tree Model 75 | 76 | cart1 <- rpart(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train, method = "anova") 77 | predictcart <- predict(cart1, newdata = test) 78 | msecart <- mean((predictcart-test$average_rate_per_night)^2) 79 | msecart 80 | 81 | 82 | prp(cart1) 83 | 84 | ## Random Forest Model 85 | 86 | forest <- randomForest(average_rate_per_night ~ bedrooms_count + latitude + longitude, data=train) 87 | predictrf <- predict(forest, newdata=test) 88 | mserf <- mean((predictrf-test$average_rate_per_night)^2) 89 | mserf 90 | 91 | 92 | ## Cleaning the Words 93 | 94 | corpus <- Corpus(VectorSource(dallas$description)) 95 | corpus <- tm_map(corpus, tolower) 96 | corpus <- tm_map(corpus, removeWords, stopwords("english")) 97 | corpus <- tm_map(corpus, removePunctuation) 98 | corpus <- tm_map(corpus, stemDocument) 99 | 100 | 101 | freq <- DocumentTermMatrix(corpus) 102 | freq 103 | 104 | ## Removing Sparse Terms 105 | 106 | freq <- removeSparseTerms(freq, 0.995) 107 | 108 | dallas_sparse <- as.data.frame(as.matrix(freq)) 109 | colnames(dallas_sparse) <- make.names(colnames(dallas_sparse)) 110 | dallas_sparse$average_rate <- dallas$average_rate_per_night 111 | 112 | ## Performing Random Forest Model Again 113 | 114 | set.seed(2562) 115 | spl2 <- sample.split(dallas_sparse$average_rate, SplitRatio = 0.8) # Training Set at 80% 116 | train2 <- subset(dallas_sparse, spl2==TRUE) 117 | test2 <- subset(dallas_sparse, spl2==FALSE) 118 | 119 | newforest <- randomForest(average_rate~., data=train2) 120 | summary(newforest) 121 | predictrf2 <- predict(newforest, newdata=test2) 122 | mserf2 <- mean((predictrf2-test2$average_rate)^2) 123 | mserf2 124 | 125 | 126 | ## Combining to One Model 127 | 128 | dallas_sparse$bedroom <- dallas$bedrooms_count 129 | dallas_sparse$latitude <- dallas$latitude 130 | dallas_sparse$longitude <- dallas$longitude 131 | 132 | set.seed(10241) 133 | spl3 <- sample.split(dallas_sparse$average_rate, SplitRatio = 0.8) # Training at 80% 134 | train3 <- subset(dallas_sparse, spl3=TRUE) 135 | test3 <- subset(dallas_sparse, spl3==FALSE) 136 | allforest <- randomForest(average_rate~., data=train3) 137 | predictrf3 <- predict(allforest, newdata=test3) 138 | mse_all <- mean((predictrf3-test3$average_rate)^2) 139 | mse_all 140 | 141 | ## Wording Visualization 142 | 143 | vu <- varUsed(allforest, count=TRUE) 144 | vusorted <- sort(vu, decreasing=FALSE, index.return=TRUE) 145 | dotchart(vusorted$x, names(allforest$forest$xlevels[vusorted$ix])) 146 | 147 | 148 | -------------------------------------------------------------------------------- /Barbeque/Barbeque.R: -------------------------------------------------------------------------------- 1 | # Barbeque Wordcloud 2 | 3 | ## Loading the Libraries 4 | 5 | library(readr) 6 | library(data.table) 7 | library(dplyr) 8 | library(stringr) 9 | library(ggplot2) 10 | library(knitr) 11 | library(DT) 12 | library(tm) 13 | library(wordcloud) 14 | 15 | 16 | ## Changing the Working Directory 17 | 18 | setwd('./Kaggle/Barbeque') 19 | 20 | ## Reading the Dataset 21 | 22 | contest <- read_csv("./contest_data.csv") 23 | results_brisket <- read_csv("./results_brisket.csv") 24 | results_chicken <- read_csv("./results_chicken.csv") 25 | results_pork <- read_csv("./results_pork.csv") 26 | results_ribs <- read_csv("./results_ribs.csv") 27 | 28 | 29 | ## Where Do Competitions Happen More Frequently? 30 | 31 | most_happening <- contest %>% 32 | group_by(state_full) %>% 33 | summarise(count=n()) %>% 34 | arrange(desc(count)) 35 | 36 | datatable(most_happening, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 37 | 38 | 39 | ## Where are the Big Prize Money? 40 | 41 | big_prize_money <- contest %>% 42 | group_by(state_full) %>% 43 | summarise(avg_prize= mean(prize, na.rm = TRUE)) %>% 44 | arrange(desc(avg_prize)) 45 | 46 | datatable(big_prize_money, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 47 | 48 | 49 | ## Combining the Results 50 | combined_result <- rbind(results_brisket,results_chicken,results_pork,results_ribs) 51 | 52 | ## Total Score 53 | Total_score<-combined_result %>% 54 | group_by(contest_key, team_name) %>% 55 | summarise(tscore =sum(score)) %>% 56 | data.frame() %>% 57 | arrange(contest_key, desc(tscore)) 58 | 59 | datatable(Total_score, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 60 | 61 | 62 | ## Number of Participants 63 | 64 | participants <- Total_score %>% 65 | group_by(contest_key)%>% 66 | summarise(No_of_teams = n()) 67 | 68 | datatable(participants, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 69 | 70 | rm(participants) 71 | gc() 72 | 73 | 74 | ## Number of Appearances by a Team 75 | 76 | number_of_apperance <- Total_score %>% 77 | group_by(team_name)%>% 78 | summarise(appearances = n())%>% 79 | arrange(desc(appearances)) 80 | datatable(number_of_apperance, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 81 | 82 | 83 | ## Who's the Best Team Out There? 84 | 85 | avg_total_team_score <- Total_score %>% 86 | group_by(team_name)%>% 87 | summarise(appearances = n(), avg_total_score = mean(tscore))%>% 88 | arrange(desc(avg_total_score)) 89 | datatable(avg_total_team_score, class="table-condensed", style="bootstrap", options = list(dom = 'tp')) 90 | 91 | 92 | ## Total Score Out of 800 93 | 94 | library(tm) 95 | library(wordcloud) 96 | makeWordCloud <- function(documents) { 97 | corpus = Corpus(VectorSource(tolower(documents))) 98 | corpus = tm_map(corpus, removePunctuation) 99 | corpus = tm_map(corpus, removeWords, stopwords("english")) 100 | 101 | frequencies = DocumentTermMatrix(corpus) 102 | word_frequencies = as.data.frame(as.matrix(frequencies)) 103 | 104 | words <- colnames(word_frequencies) 105 | freq <- colSums(word_frequencies) 106 | wordcloud(words, freq, 107 | min.freq=sort(freq, decreasing=TRUE)[[100]], 108 | colors=brewer.pal(8, "Dark2"), 109 | random.color=TRUE) 110 | } 111 | 112 | 113 | top_score<-Total_score %>% 114 | group_by(contest_key, team_name) %>% 115 | filter(tscore >600) 116 | 117 | makeWordCloud(top_score[["team_name"]][1:2000]) 118 | -------------------------------------------------------------------------------- /Bikeshare Bay Area.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bikeshare Bay Area" 3 | output: html_document 4 | --- 5 | 6 | --- 7 | title: "Bike Shares Daily" 8 | output: 9 | flexdashboard::flex_dashboard: 10 | orientation: columns 11 | vertical_layout: fill 12 | runtime: shiny 13 | --- 14 | 15 | ```{r global, include=FALSE} 16 | library(flexdashboard) 17 | library(readr) 18 | library(leaflet) 19 | library(DT) 20 | library(tidyverse) 21 | library(lubridate) 22 | library(plotly) 23 | 24 | trips_df <- read_csv('https://assets.datacamp.com/production/repositories/1448/datasets/1f12031000b09ad096880bceb61f6ca2fd95e2eb/sanfran_bikeshare_joined_oneday.csv') %>% 25 | mutate(duration_min = duration_sec / 60) 26 | ``` 27 | 28 | Sidebar {.sidebar} 29 | ==================== 30 | 31 | ```{r} 32 | 33 | sliderInput("duration_slider", label = "Select maximum trip duration to display (in minutes):", 34 | min = 0, max = 120, value = 15, step = 5, dragRange = TRUE) 35 | 36 | sliderInput("duration_bin", label = "Select # of minutes to bin trip durations:", 37 | min = 1, max = 15, value = 1, step = 1) 38 | 39 | show_trips_df <- reactive({ 40 | 41 | trips_df %>% 42 | filter(duration_sec <= input$duration_slider * 60) 43 | 44 | }) 45 | ``` 46 | 47 | Overview 48 | ==================== 49 | 50 | Column {data-width=450} 51 | ----------------------------------------------------------------------- 52 | 53 | ### Origins 54 | 55 | ```{r} 56 | 57 | renderLeaflet({ 58 | show_trips_df() %>% 59 | rename(latitude = start_latitude, 60 | longitude = start_longitude) %>% 61 | group_by(start_station_id, latitude, longitude) %>% 62 | count() %>% 63 | leaflet() %>% 64 | addTiles() %>% 65 | addCircles(radius = ~n) 66 | }) 67 | 68 | ``` 69 | 70 | Column {data-width=350} 71 | ----------------------------------------------------------------------- 72 | 73 | ### Total Trips 74 | 75 | ```{r} 76 | 77 | renderValueBox({ 78 | valueBox(prettyNum(show_trips_df() %>% 79 | nrow(), big.mark = ','), 80 | icon = 'fa-bicycle') 81 | }) 82 | 83 | ``` 84 | 85 | ### Trips by Start Time 86 | 87 | ```{r} 88 | 89 | renderPlot({show_trips_df() %>% 90 | mutate(hour = hour(start_date)) %>% 91 | group_by(hour) %>% 92 | summarize(`Trips Started` = n()) %>% 93 | ggplot(aes(x = hour, y = `Trips Started`)) + 94 | theme_bw() + 95 | ylab('Trips Started \n') + 96 | geom_bar(stat = 'identity') 97 | }) 98 | 99 | 100 | ``` 101 | 102 | Duration 103 | ==================== 104 | 105 | ### Trip Durations 106 | 107 | ```{r} 108 | 109 | renderPlot({show_trips_df() %>% 110 | mutate(`Trip Duration (min)` = duration_sec / 60) %>% 111 | ggplot(aes(x = `Trip Duration (min)`)) + 112 | theme_bw() + 113 | geom_histogram(binwidth = input$duration_bin) + 114 | ylab('# Trips') 115 | }) 116 | 117 | 118 | ``` 119 | -------------------------------------------------------------------------------- /Caravan/Caravan Insurance.R: -------------------------------------------------------------------------------- 1 | # Caravan Analysis 2 | 3 | ## Loading the Libraries 4 | 5 | library(ggplot2) 6 | library(readr) 7 | library(Amelia) 8 | library(data.table) 9 | library(RColorBrewer) 10 | library(mlbench) 11 | library(DMwR) 12 | library(caret) 13 | library(pROC) 14 | 15 | 16 | 17 | ## Changing the Working Directory 18 | 19 | setwd('./Kaggle/Caravan') 20 | 21 | ## Reading the Dataset 22 | 23 | cvan <- fread("./caravan-insurance-challenge.csv") 24 | dim(cvan) 25 | head(cvan) 26 | 27 | ## Creating the Train Test Split Dataset 28 | 29 | train <- cvan[ORIGIN == "train", ] 30 | test <- cvan[ORIGIN != "train", ] 31 | 32 | ## Putting Them on Tables 33 | 34 | ftable(train[, CARAVAN]) 35 | prop.table(ftable(train[, CARAVAN])) 36 | 37 | 38 | tr_outcome <- train[, CARAVAN] 39 | 40 | tr_feat <- train[, !"CARAVAN"] 41 | 42 | test_outcome <- test[, CARAVAN] 43 | test_features <- test[, !"CARAVAN"] 44 | 45 | ## Looking at the Missing Map 46 | 47 | missmap(train) 48 | 49 | 50 | ## Looking at the Plots Between MGODRK and Caravan 51 | 52 | ggplot(train[, .N, by = list(MGODRK, CARAVAN)], 53 | aes(x = MGODRK, y = N, fill = factor(CARAVAN), 54 | color = factor(CARAVAN), alpha = .3 )) + 55 | geom_bar(stat = "identity") 56 | 57 | 58 | ## Looking at the Plots Between MGODOV and Caravan 59 | 60 | ggplot(train[, .N, by = list(MGODOV, CARAVAN)], 61 | aes(x = MGODOV, y = N, fill = factor(CARAVAN), 62 | color = factor(CARAVAN), alpha = .3 )) + 63 | geom_bar(stat = "identity") 64 | 65 | 66 | ## Gathering the Train Table 67 | 68 | train <- train[, Religion := ifelse(MGODRK == 1, "R", 69 | (ifelse(MGODPR == 1, "P", 70 | ifelse(MGODOV == 1, "O","N"))))] 71 | 72 | ftable(train[, MRELGE]) 73 | 74 | 75 | ## MRELGE Plot 76 | 77 | ggplot(train[, .N, by = MRELGE], 78 | aes(x= factor(MRELGE), y = N, 79 | color = factor(MRELGE), fill = factor(MRELGE), 80 | alpha = 0.3, size = N)) + 81 | geom_point() 82 | 83 | 84 | ## Looking At Variables Close to Zero 85 | 86 | nearzero_train <- nearZeroVar(train, saveMetrics = TRUE, 87 | freqCut= 95/5) 88 | nearzero_train 89 | 90 | 91 | ## Looking at the Origin Variable 92 | 93 | train <- train[, ORIGIN := NULL] 94 | test <- test[, ORIGIN := NULL] 95 | 96 | 97 | ## Outcomes and Features of Origin 98 | 99 | outcome.train <- train[, CARAVAN] 100 | 101 | features.train <- train[, !"CARAVAN"] 102 | 103 | outcome.test <- test[, CARAVAN] 104 | features.test <- test[, !CARAVAN] 105 | 106 | 107 | train <- train[, CARAVAN := ifelse(CARAVAN == 0, "No", "Yes")] 108 | test <- test[, CARAVAN := ifelse(CARAVAN == 0, "No", "Yes")] 109 | 110 | 111 | ## Train Table for Caravan 112 | ftable(train[, CARAVAN]) 113 | 114 | ## Test Table for Caravan 115 | ftable(test[, CARAVAN]) 116 | 117 | ## Gathering the Smote Information 118 | 119 | train <- train[, CARAVAN := factor(CARAVAN)] 120 | train <- train[, Religion := NULL] 121 | trsmote <- SMOTE(CARAVAN ~ MHKOOP , perc.over = 200, perc.under = 150, 122 | data = train, 123 | k = 10) 124 | 125 | 126 | dim(trsmote) 127 | 128 | ## Comparing Smote to Original 129 | 130 | ftable(train[, CARAVAN]) 131 | 132 | ftable(trsmote[, CARAVAN]) 133 | 134 | ## Running Train Control 135 | 136 | trCtrl <- trainControl(method = "repeatedcv", repeats = 3, 137 | summaryFunction=twoClassSummary , classProbs = TRUE, verbose = 1) 138 | 139 | control <- rfeControl(functions=rfFuncs, method="cv", number=10) 140 | 141 | ## Boosted Trees 142 | 143 | bst.grid <- expand.grid(mstop = 50, 144 | maxdepth = 12) 145 | 146 | bstFit <- train(CARAVAN~., data = trsmote, 147 | trControl = trCtrl, 148 | method = "blackboost", 149 | tuneGrid = bst.grid, 150 | metric = "ROC") 151 | 152 | ## Creating the Best Fit 153 | 154 | top <- varImp(bstFit, scale = FALSE) 155 | print(top) 156 | 157 | 158 | plot(top) 159 | 160 | ## Predicted Values Boosted Trees 161 | 162 | bstFit 163 | 164 | 165 | ## Blackboost Predictions for Train 166 | 167 | blackboost_pred <- data.frame(predict(bstFit)) 168 | 169 | blackboost_pred_prob <- predict(bstFit, type = "prob") 170 | 171 | 172 | ggplot(blackboost_pred, aes(x = blackboost_pred_prob$Yes, 173 | fill = trsmote[, CARAVAN], 174 | colour = trsmote[, CARAVAN], 175 | alpha = 0.3)) + geom_density() 176 | 177 | ## Blackboost Predictions for Test 178 | 179 | blackboost_pred_test <- data.frame(predict(bstFit, newdata = test)) 180 | 181 | blackboost_pred_prob_test <- predict(bstFit, newdata = test, type = "prob") 182 | 183 | ggplot(blackboost_pred_test, aes(x = blackboost_pred_prob_test$Yes, 184 | fill = test[, CARAVAN], 185 | colour = test[, CARAVAN], 186 | alpha = 0.3)) + geom_histogram() 187 | 188 | 189 | ## Confusion Matrix 190 | 191 | confusionMatrix(blackboost_pred_test$predict.bstFit..newdata...test., test[, CARAVAN]) 192 | 193 | ## Test Class 194 | 195 | bst_test_class <- ifelse(blackboost_pred_test$predict.bstFit..newdata...test.== "Yes", 1, 0) 196 | test_class <- ifelse(test[, CARAVAN] == "Yes", 1, 0) 197 | 198 | roc(test_class, bst_test_class) 199 | 200 | ## Using XG Boost 201 | 202 | xgb.grid <- expand.grid(nrounds = 3, 203 | max_depth = 12, 204 | eta = 0.1, 205 | gamma = 1, 206 | colsample_bytree = 1, 207 | min_child_weight = 1, 208 | subsample = 0.75) 209 | xgbFit <- train(CARAVAN ~., 210 | data = trsmote, 211 | method = "xgbTree", 212 | metric = "ROC", 213 | trControl = trCtrl, 214 | tuneGrid = xgb.grid) 215 | 216 | 217 | ## Getting the Results and Plotting It 218 | 219 | xgbFit$results 220 | 221 | 222 | ggplot(xgbFit$results, aes(x = eta, y = ROC, 223 | fill = ROC, size = ROC, 224 | color = factor(gamma))) + geom_point() 225 | 226 | 227 | ## Predicted Values 228 | 229 | xgbFit.pred <- predict(xgbFit) 230 | xgbFit.pred.prob <- predict(xgbFit, type = "prob") 231 | 232 | 233 | xgbFit.pred <- data.frame(xgbFit.pred) 234 | 235 | 236 | xgbFit.pred.test <- predict(xgbFit, newdata = test) 237 | xgbFit.pred.test.prob <- predict(xgbFit, newdata = test, type = "prob") 238 | 239 | 240 | xgbFit.pred.test <- data.frame(xgbFit.pred.test) 241 | colnames(xgbFit.pred.test) 242 | 243 | 244 | ## Plotting the Values 245 | 246 | ggplot(xgbFit.pred.test, aes(x = xgbFit.pred.test.prob$Yes, 247 | fill = test[, CARAVAN], 248 | color = test[, CARAVAN], 249 | alpha = 0.3)) + geom_histogram() 250 | 251 | 252 | ## Gathering the Confusion Matrix 253 | 254 | xgb_test_class <- ifelse(xgbFit.pred.test$xgbFit.pred.test== "Yes", 1, 0) 255 | 256 | confusionMatrix(xgb_test_class, test_class) 257 | -------------------------------------------------------------------------------- /College Salaries/College Salaries.R: -------------------------------------------------------------------------------- 1 | # College Salaries 2 | 3 | ## Loading the Libraries 4 | 5 | library(tidyverse) 6 | library(stringr) 7 | library(gridExtra) 8 | library(plotly) 9 | library(readr) 10 | 11 | ## Changing the Working Directory 12 | 13 | setwd('./Kaggle/College Salaries') 14 | 15 | ## Reading the First Dataset 16 | 17 | type <- read_csv("./salaries-by-college-type.csv") 18 | 19 | 20 | ## Reformat the Salary 21 | 22 | salary_reform <- function(salary) { 23 | 24 | if(is.na(salary)) return(NA) 25 | 26 | extract <- str_replace_all(salary, "\\$|,", "") 27 | num <- as.integer(extract) 28 | return(num) 29 | } 30 | 31 | 32 | ## Fixing Up the Dataset 33 | 34 | type <- type %>% 35 | select(1:4) %>% 36 | mutate( 37 | `Starting Median Salary` = salary_reform(`Starting Median Salary`), 38 | `Mid-Career Median Salary` = salary_reform(`Mid-Career Median Salary`) 39 | ) 40 | 41 | 42 | ## Calculating the Mid-Career 43 | 44 | type <- type %>% 45 | mutate( 46 | `Percentage Change` = round((`Mid-Career Median Salary`-`Starting Median Salary`)/`Starting Median Salary`,3)*100 47 | ) 48 | 49 | knitr::kable(head(type)) 50 | 51 | 52 | ## Visualization of the Salary Distribution 53 | 54 | type %>% 55 | ggplot(aes(`Starting Median Salary`)) + 56 | geom_histogram(fill="indianred", color="lightgrey", binwidth=2000) + 57 | geom_histogram(aes(`Mid-Career Median Salary`), 58 | fill="navyblue", color="lightgrey", binwidth=2000, alpha=0.6) + 59 | ggtitle("Broader Distribution for Mid-Career Salaries") + 60 | xlab("Salary") + ylab("Count") 61 | 62 | ## Boxplots of the Starting and Median Salaries 63 | 64 | 65 | # Median values for starting and mid-career salaries 66 | median_start <- median(type$`Starting Median Salary`) 67 | mid <- median(type$`Mid-Career Median Salary`) 68 | 69 | # Box Plot for Starting Salaries by School Type 70 | school_type <- type %>% 71 | ggplot(aes(`School Type`, `Starting Median Salary`, fill=`School Type`)) + 72 | geom_jitter(color="darkgrey", alpha=0.8) + 73 | geom_boxplot(alpha=0.6) + 74 | geom_abline(slope=0, intercept=median_start, color="red", linetype=2, alpha=0.5) + 75 | ggtitle("Engineering and Ivy League Lead the Way in Starting Salaries") + 76 | xlab("") + ylab("Starting Salary") + 77 | theme_bw() + 78 | theme(legend.position = "none") 79 | 80 | # Box Plot for Mid-Career Salaries by School Type 81 | mid_school <- type %>% 82 | ggplot(aes(`School Type`, `Mid-Career Median Salary`, fill=`School Type`)) + 83 | geom_jitter(colour="darkgrey", alpha=0.8) + 84 | geom_boxplot(alpha=0.6) + 85 | geom_abline(slope=0, intercept=mid, colour="red", linetype=2, alpha=0.5) + 86 | ggtitle("Higher Upward Mobility for Ivy League Over Engineering Schools Over Time") + 87 | xlab("") + ylab("Mid-Career Salary") + 88 | theme_bw() + 89 | theme(legend.position = "none") 90 | 91 | grid.arrange(school_type, mid_school, ncol=1) 92 | 93 | 94 | ## Looking at the Top 10 Salaries 95 | 96 | type %>% 97 | top_n(10, wt = `Mid-Career Median Salary`) %>% 98 | gather("Career", "Salary", 3:4) %>% 99 | mutate(Career = factor(Career, levels=c("Starting Median Salary","Mid-Career Median Salary"))) %>% 100 | plot_ly( 101 | x=~Career, y=~Salary, color=~`School Name`, type="scatter", mode="lines+markers", 102 | text=~paste(`School Name`,"
",`School Type`,"
Change:",`Percentage Change`, "%"), 103 | colors="Paired" 104 | ) %>% 105 | layout( 106 | title="Dartmouth with the Largest Salary Increase from Number 10 to Number 1", 107 | showlegend=FALSE, 108 | xaxis=list(showticklabels=FALSE, 109 | title="Universities with the Top Median Salaries"), 110 | yaxis=list(title="") 111 | ) 112 | 113 | # Focusing on the Region 114 | 115 | ## Loading the Dataset 116 | 117 | region <- read_csv("./salaries-by-region.csv") 118 | 119 | 120 | ## Reformat Salary by Region 121 | 122 | region <- region %>% 123 | mutate( 124 | `Starting Median Salary` = salary_reform(`Starting Median Salary`), 125 | `Mid-Career Median Salary` = salary_reform(`Mid-Career Median Salary`) 126 | ) 127 | 128 | ## Barplot of the Region 129 | 130 | region %>% 131 | group_by(Region) %>% 132 | ### rank by mid-career and starting salary combined to break ties 133 | top_n(7, wt=`Mid-Career Median Salary` + `Starting Median Salary`) %>% 134 | mutate(Rank = rank(desc(`Mid-Career Median Salary`), ties.method="first")) %>% 135 | plot_ly(x=~Region, y=~`Mid-Career Median Salary`, color=~factor(Rank), 136 | type="bar", colors="Set3", 137 | text=~paste(`School Name`, "
Rank:", Rank)) %>% 138 | layout(showlegend = FALSE, 139 | title="Universities with the Highest Mid-Career Salaries by Region", 140 | yaxis=list(title="Mid-Career Median Salary"), 141 | xaxis=list(title="")) 142 | 143 | -------------------------------------------------------------------------------- /College Salaries/degrees-that-pay-back.csv: -------------------------------------------------------------------------------- 1 | Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Percent change from Starting to Mid-Career Salary,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary 2 | Accounting,"$46,000.00","$77,100.00",67.6,"$42,200.00","$56,100.00","$108,000.00","$152,000.00" 3 | Aerospace Engineering,"$57,700.00","$101,000.00",75,"$64,300.00","$82,100.00","$127,000.00","$161,000.00" 4 | Agriculture,"$42,600.00","$71,900.00",68.8,"$36,300.00","$52,100.00","$96,300.00","$150,000.00" 5 | Anthropology,"$36,800.00","$61,500.00",67.1,"$33,800.00","$45,500.00","$89,300.00","$138,000.00" 6 | Architecture,"$41,600.00","$76,800.00",84.6,"$50,600.00","$62,200.00","$97,000.00","$136,000.00" 7 | Art History,"$35,800.00","$64,900.00",81.3,"$28,800.00","$42,200.00","$87,400.00","$125,000.00" 8 | Biology,"$38,800.00","$64,800.00",67,"$36,900.00","$47,400.00","$94,500.00","$135,000.00" 9 | Business Management,"$43,000.00","$72,100.00",67.7,"$38,800.00","$51,500.00","$102,000.00","$147,000.00" 10 | Chemical Engineering,"$63,200.00","$107,000.00",69.3,"$71,900.00","$87,300.00","$143,000.00","$194,000.00" 11 | Chemistry,"$42,600.00","$79,900.00",87.6,"$45,300.00","$60,700.00","$108,000.00","$148,000.00" 12 | Civil Engineering,"$53,900.00","$90,500.00",67.9,"$63,400.00","$75,100.00","$115,000.00","$148,000.00" 13 | Communications,"$38,100.00","$70,000.00",83.7,"$37,500.00","$49,700.00","$98,800.00","$143,000.00" 14 | Computer Engineering,"$61,400.00","$105,000.00",71,"$66,100.00","$84,100.00","$135,000.00","$162,000.00" 15 | Computer Science,"$55,900.00","$95,500.00",70.8,"$56,000.00","$74,900.00","$122,000.00","$154,000.00" 16 | Construction,"$53,700.00","$88,900.00",65.5,"$56,300.00","$68,100.00","$118,000.00","$171,000.00" 17 | Criminal Justice,"$35,000.00","$56,300.00",60.9,"$32,200.00","$41,600.00","$80,700.00","$107,000.00" 18 | Drama,"$35,900.00","$56,900.00",58.5,"$36,700.00","$41,300.00","$79,100.00","$153,000.00" 19 | Economics,"$50,100.00","$98,600.00",96.8,"$50,600.00","$70,600.00","$145,000.00","$210,000.00" 20 | Education,"$34,900.00","$52,000.00",49,"$29,300.00","$37,900.00","$73,400.00","$102,000.00" 21 | Electrical Engineering,"$60,900.00","$103,000.00",69.1,"$69,300.00","$83,800.00","$130,000.00","$168,000.00" 22 | English,"$38,000.00","$64,700.00",70.3,"$33,400.00","$44,800.00","$93,200.00","$133,000.00" 23 | Film,"$37,900.00","$68,500.00",80.7,"$33,900.00","$45,500.00","$100,000.00","$136,000.00" 24 | Finance,"$47,900.00","$88,300.00",84.3,"$47,200.00","$62,100.00","$128,000.00","$195,000.00" 25 | Forestry,"$39,100.00","$62,600.00",60.1,"$41,000.00","$49,300.00","$78,200.00","$111,000.00" 26 | Geography,"$41,200.00","$65,500.00",59,"$40,000.00","$50,000.00","$90,800.00","$132,000.00" 27 | Geology,"$43,500.00","$79,500.00",82.8,"$45,000.00","$59,600.00","$101,000.00","$156,000.00" 28 | Graphic Design,"$35,700.00","$59,800.00",67.5,"$36,000.00","$45,500.00","$80,800.00","$112,000.00" 29 | Health Care Administration,"$38,800.00","$60,600.00",56.2,"$34,600.00","$45,600.00","$78,800.00","$101,000.00" 30 | History,"$39,200.00","$71,000.00",81.1,"$37,000.00","$49,200.00","$103,000.00","$149,000.00" 31 | Hospitality & Tourism,"$37,800.00","$57,500.00",52.1,"$35,500.00","$43,600.00","$81,900.00","$124,000.00" 32 | Industrial Engineering,"$57,700.00","$94,700.00",64.1,"$57,100.00","$72,300.00","$132,000.00","$173,000.00" 33 | Information Technology (IT),"$49,100.00","$74,800.00",52.3,"$44,500.00","$56,700.00","$96,700.00","$129,000.00" 34 | Interior Design,"$36,100.00","$53,200.00",47.4,"$35,700.00","$42,600.00","$72,500.00","$107,000.00" 35 | International Relations,"$40,900.00","$80,900.00",97.8,"$38,200.00","$56,000.00","$111,000.00","$157,000.00" 36 | Journalism,"$35,600.00","$66,700.00",87.4,"$38,400.00","$48,300.00","$97,700.00","$145,000.00" 37 | Management Information Systems (MIS),"$49,200.00","$82,300.00",67.3,"$45,300.00","$60,500.00","$108,000.00","$146,000.00" 38 | Marketing,"$40,800.00","$79,600.00",95.1,"$42,100.00","$55,600.00","$119,000.00","$175,000.00" 39 | Math,"$45,400.00","$92,400.00",103.5,"$45,200.00","$64,200.00","$128,000.00","$183,000.00" 40 | Mechanical Engineering,"$57,900.00","$93,600.00",61.7,"$63,700.00","$76,200.00","$120,000.00","$163,000.00" 41 | Music,"$35,900.00","$55,000.00",53.2,"$26,700.00","$40,200.00","$88,000.00","$134,000.00" 42 | Nursing,"$54,200.00","$67,000.00",23.6,"$47,600.00","$56,400.00","$80,900.00","$98,300.00" 43 | Nutrition,"$39,900.00","$55,300.00",38.6,"$33,900.00","$44,500.00","$70,500.00","$99,200.00" 44 | Philosophy,"$39,900.00","$81,200.00",103.5,"$35,500.00","$52,800.00","$127,000.00","$168,000.00" 45 | Physician Assistant,"$74,300.00","$91,700.00",23.4,"$66,400.00","$75,200.00","$108,000.00","$124,000.00" 46 | Physics,"$50,300.00","$97,300.00",93.4,"$56,000.00","$74,200.00","$132,000.00","$178,000.00" 47 | Political Science,"$40,800.00","$78,200.00",91.7,"$41,200.00","$55,300.00","$114,000.00","$168,000.00" 48 | Psychology,"$35,900.00","$60,400.00",68.2,"$31,600.00","$42,100.00","$87,500.00","$127,000.00" 49 | Religion,"$34,100.00","$52,000.00",52.5,"$29,700.00","$36,500.00","$70,900.00","$96,400.00" 50 | Sociology,"$36,500.00","$58,200.00",59.5,"$30,700.00","$40,400.00","$81,200.00","$118,000.00" 51 | Spanish,"$34,000.00","$53,100.00",56.2,"$31,000.00","$40,000.00","$76,800.00","$96,400.00" 52 | -------------------------------------------------------------------------------- /College Salaries/salaries-by-college-type.csv: -------------------------------------------------------------------------------- 1 | School Name,School Type,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 25th Percentile Salary,Mid-Career 75th Percentile Salary,Mid-Career 90th Percentile Salary 2 | Massachusetts Institute of Technology (MIT),Engineering,"$72,200.00","$126,000.00","$76,800.00","$99,200.00","$168,000.00","$220,000.00" 3 | California Institute of Technology (CIT),Engineering,"$75,500.00","$123,000.00",N/A,"$104,000.00","$161,000.00",N/A 4 | Harvey Mudd College,Engineering,"$71,800.00","$122,000.00",N/A,"$96,000.00","$180,000.00",N/A 5 | "Polytechnic University of New York, Brooklyn",Engineering,"$62,400.00","$114,000.00","$66,800.00","$94,300.00","$143,000.00","$190,000.00" 6 | Cooper Union,Engineering,"$62,200.00","$114,000.00",N/A,"$80,200.00","$142,000.00",N/A 7 | Worcester Polytechnic Institute (WPI),Engineering,"$61,000.00","$114,000.00","$80,000.00","$91,200.00","$137,000.00","$180,000.00" 8 | Carnegie Mellon University (CMU),Engineering,"$61,800.00","$111,000.00","$63,300.00","$80,100.00","$150,000.00","$209,000.00" 9 | Rensselaer Polytechnic Institute (RPI),Engineering,"$61,100.00","$110,000.00","$71,600.00","$85,500.00","$140,000.00","$182,000.00" 10 | Georgia Institute of Technology,Engineering,"$58,300.00","$106,000.00","$67,200.00","$85,200.00","$137,000.00","$183,000.00" 11 | Colorado School of Mines,Engineering,"$58,100.00","$106,000.00","$62,200.00","$87,900.00","$142,000.00","$201,000.00" 12 | Stevens Institute of Technology,Engineering,"$60,600.00","$105,000.00","$68,700.00","$81,900.00","$138,000.00","$185,000.00" 13 | Illinois Institute of Technology (IIT),Engineering,"$56,000.00","$97,800.00","$56,100.00","$77,400.00","$121,000.00","$165,000.00" 14 | Wentworth Institute of Technology,Engineering,"$53,000.00","$96,700.00","$55,200.00","$74,000.00","$117,000.00","$153,000.00" 15 | Virginia Polytechnic Institute and State University (Virginia Tech),Engineering,"$53,500.00","$95,400.00","$50,600.00","$71,400.00","$124,000.00","$163,000.00" 16 | South Dakota School of Mines & Technology,Engineering,"$55,800.00","$93,400.00","$71,500.00","$81,900.00","$122,000.00","$147,000.00" 17 | New Mexico Institute of Mining and Technology (New Mexico Tech),Engineering,"$51,000.00","$93,400.00",N/A,"$67,400.00","$123,000.00",N/A 18 | Rochester Institute of Technology (RIT),Engineering,"$48,900.00","$84,600.00","$45,000.00","$62,100.00","$112,000.00","$159,000.00" 19 | Embry-Riddle Aeronautical University (ERAU),Engineering,"$52,700.00","$80,700.00","$49,800.00","$64,000.00","$106,000.00","$142,000.00" 20 | Tennessee Technological University,Engineering,"$46,200.00","$80,000.00","$42,100.00","$62,600.00","$99,500.00","$121,000.00" 21 | University of Illinois at Urbana-Champaign (UIUC),Party,"$52,900.00","$96,100.00","$48,200.00","$68,900.00","$132,000.00","$177,000.00" 22 | "University of Maryland, College Park",Party,"$52,000.00","$95,000.00","$50,400.00","$68,300.00","$126,000.00","$166,000.00" 23 | "University of California, Santa Barbara (UCSB)",Party,"$50,500.00","$95,000.00","$51,300.00","$71,200.00","$129,000.00","$173,000.00" 24 | University of Texas (UT) - Austin,Party,"$49,700.00","$93,900.00","$50,100.00","$67,400.00","$129,000.00","$188,000.00" 25 | State University of New York (SUNY) at Albany,Party,"$44,500.00","$92,200.00","$47,000.00","$63,100.00","$135,000.00","$209,000.00" 26 | University of Florida (UF),Party,"$47,100.00","$87,900.00","$45,400.00","$62,900.00","$120,000.00","$172,000.00" 27 | Louisiana State University (LSU),Party,"$46,900.00","$87,800.00","$43,700.00","$61,300.00","$120,000.00","$165,000.00" 28 | University of Georgia (UGA),Party,"$44,100.00","$86,000.00","$43,100.00","$57,800.00","$118,000.00","$164,000.00" 29 | Pennsylvania State University (PSU),Party,"$49,900.00","$85,700.00","$46,300.00","$62,000.00","$117,000.00","$160,000.00" 30 | Arizona State University (ASU),Party,"$47,400.00","$84,100.00","$44,600.00","$60,700.00","$114,000.00","$163,000.00" 31 | "Indiana University (IU), Bloomington",Party,"$46,300.00","$84,000.00","$43,600.00","$60,400.00","$119,000.00","$178,000.00" 32 | University of Iowa (UI),Party,"$44,700.00","$83,900.00","$43,300.00","$61,100.00","$116,000.00","$163,000.00" 33 | Randolph-Macon College,Party,"$42,600.00","$83,600.00",N/A,"$54,100.00","$123,000.00",N/A 34 | "University of Alabama, Tuscaloosa",Party,"$41,300.00","$81,400.00","$40,100.00","$56,500.00","$117,000.00","$161,000.00" 35 | University of Mississippi,Party,"$41,400.00","$79,700.00","$40,400.00","$53,500.00","$108,000.00","$186,000.00" 36 | University of New Hampshire (UNH),Party,"$41,800.00","$78,300.00","$41,700.00","$56,400.00","$114,000.00","$147,000.00" 37 | West Virginia University (WVU),Party,"$43,100.00","$78,100.00","$39,700.00","$55,700.00","$106,000.00","$141,000.00" 38 | University of Tennessee,Party,"$43,800.00","$74,600.00","$41,900.00","$53,200.00","$106,000.00","$153,000.00" 39 | Ohio University,Party,"$42,200.00","$73,400.00","$36,600.00","$52,800.00","$106,000.00","$150,000.00" 40 | Florida State University (FSU),Party,"$42,100.00","$73,000.00","$39,600.00","$52,800.00","$107,000.00","$156,000.00" 41 | Bucknell University,Liberal Arts,"$54,100.00","$110,000.00","$62,800.00","$80,600.00","$156,000.00","$251,000.00" 42 | Colgate University,Liberal Arts,"$52,800.00","$108,000.00","$60,000.00","$76,700.00","$167,000.00","$265,000.00" 43 | Amherst College,Liberal Arts,"$54,500.00","$107,000.00",N/A,"$84,900.00","$162,000.00",N/A 44 | Lafayette College,Liberal Arts,"$53,900.00","$107,000.00","$70,600.00","$79,300.00","$144,000.00","$204,000.00" 45 | Bowdoin College,Liberal Arts,"$48,100.00","$107,000.00",N/A,"$74,600.00","$146,000.00",N/A 46 | College of the Holy Cross,Liberal Arts,"$50,200.00","$106,000.00",N/A,"$65,600.00","$143,000.00",N/A 47 | Occidental College,Liberal Arts,"$51,900.00","$105,000.00",N/A,"$54,800.00","$157,000.00",N/A 48 | Washington and Lee University,Liberal Arts,"$53,600.00","$104,000.00",N/A,"$82,800.00","$146,000.00",N/A 49 | Swarthmore College,Liberal Arts,"$49,700.00","$104,000.00",N/A,"$67,200.00","$167,000.00",N/A 50 | Davidson College,Liberal Arts,"$46,100.00","$104,000.00",N/A,"$70,500.00","$146,000.00",N/A 51 | Carleton College,Liberal Arts,"$47,500.00","$103,000.00",N/A,"$69,400.00","$141,000.00",N/A 52 | Williams College,Liberal Arts,"$51,700.00","$102,000.00",N/A,"$76,400.00","$143,000.00",N/A 53 | Pomona College,Liberal Arts,"$48,600.00","$101,000.00",N/A,"$63,300.00","$161,000.00",N/A 54 | "Wesleyan University (Middletown, Connecticut)",Liberal Arts,"$46,500.00","$97,900.00","$42,000.00","$62,500.00","$126,000.00","$215,000.00" 55 | Bates College,Liberal Arts,"$47,300.00","$96,500.00",N/A,"$60,700.00","$162,000.00",N/A 56 | Union College,Liberal Arts,"$47,200.00","$95,800.00","$48,700.00","$75,200.00","$135,000.00","$230,000.00" 57 | University of Richmond,Liberal Arts,"$48,600.00","$94,600.00","$44,500.00","$59,400.00","$151,000.00","$211,000.00" 58 | Vassar College,Liberal Arts,"$46,000.00","$94,600.00",N/A,"$60,600.00","$123,000.00",N/A 59 | Middlebury College,Liberal Arts,"$47,700.00","$94,200.00",N/A,"$69,100.00","$129,000.00",N/A 60 | Mount Holyoke College,Liberal Arts,"$42,400.00","$94,100.00",N/A,"$57,100.00","$131,000.00",N/A 61 | Franklin and Marshall College,Liberal Arts,"$49,100.00","$92,800.00",N/A,"$55,800.00","$185,000.00",N/A 62 | DePauw University,Liberal Arts,"$41,400.00","$88,300.00","$49,500.00","$57,400.00","$133,000.00","$185,000.00" 63 | St. Olaf College,Liberal Arts,"$45,300.00","$86,200.00","$41,300.00","$61,000.00","$120,000.00","$185,000.00" 64 | Colby College,Liberal Arts,"$46,400.00","$85,800.00",N/A,"$63,500.00","$129,000.00",N/A 65 | Gettysburg College,Liberal Arts,"$44,700.00","$85,800.00",N/A,"$66,300.00","$132,000.00",N/A 66 | Siena College,Liberal Arts,"$45,500.00","$85,200.00","$38,700.00","$58,400.00","$129,000.00","$189,000.00" 67 | Smith College,Liberal Arts,"$44,000.00","$83,900.00","$45,100.00","$59,800.00","$129,000.00","$184,000.00" 68 | Hamilton College,Liberal Arts,"$49,200.00","$83,700.00",N/A,"$51,900.00","$123,000.00",N/A 69 | Randolph-Macon College,Liberal Arts,"$42,600.00","$83,600.00",N/A,"$54,100.00","$123,000.00",N/A 70 | Wellesley College,Liberal Arts,"$42,800.00","$83,500.00",N/A,"$58,600.00","$125,000.00",N/A 71 | Denison University,Liberal Arts,"$42,000.00","$83,500.00",N/A,"$62,100.00","$122,000.00",N/A 72 | Oberlin College,Liberal Arts,"$43,400.00","$81,600.00",N/A,"$46,400.00","$128,000.00",N/A 73 | University of Puget Sound,Liberal Arts,"$46,600.00","$81,500.00","$48,900.00","$60,100.00","$104,000.00","$137,000.00" 74 | Colorado College (CC),Liberal Arts,"$38,500.00","$81,400.00",N/A,"$43,000.00","$148,000.00",N/A 75 | Reed College,Liberal Arts,"$40,500.00","$81,100.00",N/A,"$67,400.00","$101,000.00",N/A 76 | Gustavus Adolphus College,Liberal Arts,"$44,500.00","$80,600.00",N/A,"$49,300.00","$101,000.00",N/A 77 | Whitman College,Liberal Arts,"$43,500.00","$80,100.00",N/A,"$64,800.00","$111,000.00",N/A 78 | Ursinus College,Liberal Arts,"$42,100.00","$80,000.00","$35,600.00","$54,300.00","$100,000.00","$160,000.00" 79 | Juniata College,Liberal Arts,"$41,800.00","$78,900.00",N/A,"$67,200.00","$110,000.00",N/A 80 | Wittenberg University,Liberal Arts,"$39,200.00","$78,200.00",N/A,"$54,100.00","$131,000.00",N/A 81 | Grinnell College,Liberal Arts,"$42,600.00","$76,600.00",N/A,"$65,100.00","$116,000.00",N/A 82 | Skidmore College,Liberal Arts,"$41,600.00","$74,600.00",N/A,"$42,800.00","$147,000.00",N/A 83 | Moravian College,Liberal Arts,"$42,500.00","$74,400.00",N/A,"$56,700.00","$94,900.00",N/A 84 | Lewis & Clark College,Liberal Arts,"$38,900.00","$72,600.00","$38,200.00","$53,400.00","$104,000.00","$140,000.00" 85 | Fort Lewis College,Liberal Arts,"$42,000.00","$69,800.00",N/A,"$55,000.00","$94,000.00",N/A 86 | Thomas Aquinas College,Liberal Arts,"$41,500.00","$67,500.00",N/A,"$44,600.00","$93,100.00",N/A 87 | Evergreen State College,Liberal Arts,"$39,500.00","$63,900.00","$38,800.00","$47,200.00","$91,600.00","$120,000.00" 88 | Dartmouth College,Ivy League,"$58,000.00","$134,000.00","$63,100.00","$90,200.00","$234,000.00","$321,000.00" 89 | Princeton University,Ivy League,"$66,500.00","$131,000.00","$68,900.00","$100,000.00","$190,000.00","$261,000.00" 90 | Yale University,Ivy League,"$59,100.00","$126,000.00","$58,000.00","$80,600.00","$198,000.00","$326,000.00" 91 | Harvard University,Ivy League,"$63,400.00","$124,000.00","$54,800.00","$86,200.00","$179,000.00","$288,000.00" 92 | University of Pennsylvania,Ivy League,"$60,900.00","$120,000.00","$55,900.00","$79,200.00","$192,000.00","$282,000.00" 93 | Cornell University,Ivy League,"$60,300.00","$110,000.00","$56,800.00","$79,800.00","$160,000.00","$210,000.00" 94 | Brown University,Ivy League,"$56,200.00","$109,000.00","$55,400.00","$74,400.00","$159,000.00","$228,000.00" 95 | Columbia University,Ivy League,"$59,400.00","$107,000.00","$50,300.00","$71,900.00","$161,000.00","$241,000.00" 96 | "University of California, Berkeley",State,"$59,900.00","$112,000.00","$59,500.00","$81,000.00","$149,000.00","$201,000.00" 97 | University of Virginia (UVA),State,"$52,700.00","$103,000.00","$52,200.00","$71,800.00","$146,000.00","$215,000.00" 98 | Cal Poly San Luis Obispo,State,"$57,200.00","$101,000.00","$55,000.00","$74,700.00","$133,000.00","$178,000.00" 99 | University of California at Los Angeles (UCLA),State,"$52,600.00","$101,000.00","$51,300.00","$72,500.00","$139,000.00","$193,000.00" 100 | "University of California, San Diego (UCSD)",State,"$51,100.00","$101,000.00","$51,700.00","$75,400.00","$131,000.00","$177,000.00" 101 | "University of California, Davis",State,"$52,300.00","$99,600.00","$52,000.00","$71,600.00","$135,000.00","$202,000.00" 102 | University of Colorado - Boulder (UCB),State,"$47,100.00","$97,600.00","$51,600.00","$69,000.00","$128,000.00","$187,000.00" 103 | "University of California, Irvine (UCI)",State,"$48,300.00","$96,700.00","$47,800.00","$66,000.00","$123,000.00","$172,000.00" 104 | University of Illinois at Urbana-Champaign (UIUC),State,"$52,900.00","$96,100.00","$48,200.00","$68,900.00","$132,000.00","$177,000.00" 105 | Texas A&M University,State,"$49,700.00","$96,100.00","$51,100.00","$71,300.00","$131,000.00","$171,000.00" 106 | Binghamton University,State,"$53,600.00","$95,900.00","$50,900.00","$71,200.00","$146,000.00","$201,000.00" 107 | University of Missouri - Rolla (UMR),State,"$57,100.00","$95,800.00","$67,600.00","$80,400.00","$122,000.00","$166,000.00" 108 | San Jose State University (SJSU),State,"$53,500.00","$95,600.00","$50,700.00","$70,500.00","$122,000.00","$156,000.00" 109 | "University of Maryland, College Park",State,"$52,000.00","$95,000.00","$50,400.00","$68,300.00","$126,000.00","$166,000.00" 110 | "University of California, Santa Barbara (UCSB)",State,"$50,500.00","$95,000.00","$51,300.00","$71,200.00","$129,000.00","$173,000.00" 111 | University of Texas (UT) - Austin,State,"$49,700.00","$93,900.00","$50,100.00","$67,400.00","$129,000.00","$188,000.00" 112 | University of Michigan,State,"$52,700.00","$93,000.00","$50,900.00","$69,400.00","$128,000.00","$182,000.00" 113 | Stony Brook University,State,"$49,500.00","$93,000.00","$47,200.00","$67,100.00","$129,000.00","$181,000.00" 114 | State University of New York (SUNY) at Albany,State,"$44,500.00","$92,200.00","$47,000.00","$63,100.00","$135,000.00","$209,000.00" 115 | Rutgers University,State,"$50,300.00","$91,800.00","$48,100.00","$65,100.00","$128,000.00","$176,000.00" 116 | Purdue University,State,"$51,400.00","$90,500.00","$49,900.00","$67,400.00","$121,000.00","$168,000.00" 117 | University of Connecticut (UConn),State,"$48,000.00","$88,800.00","$46,100.00","$66,400.00","$120,000.00","$162,000.00" 118 | University of Massachusetts (UMass) - Amherst,State,"$46,600.00","$88,200.00","$43,100.00","$61,300.00","$122,000.00","$168,000.00" 119 | "California State University (CSU), Chico",State,"$47,400.00","$88,100.00","$46,800.00","$62,800.00","$122,000.00","$154,000.00" 120 | University of Florida (UF),State,"$47,100.00","$87,900.00","$45,400.00","$62,900.00","$120,000.00","$172,000.00" 121 | University of Wisconsin (UW) - Madison,State,"$48,900.00","$87,800.00","$47,400.00","$62,400.00","$118,000.00","$170,000.00" 122 | Louisiana State University (LSU),State,"$46,900.00","$87,800.00","$43,700.00","$61,300.00","$120,000.00","$165,000.00" 123 | "California State University, Fullerton (CSUF)",State,"$45,700.00","$87,000.00","$45,400.00","$62,500.00","$119,000.00","$158,000.00" 124 | George Mason University,State,"$47,800.00","$86,900.00","$51,300.00","$67,200.00","$114,000.00","$150,000.00" 125 | University of Massachusetts (UMass) - Lowell,State,"$45,400.00","$86,600.00","$50,900.00","$65,000.00","$113,000.00","$158,000.00" 126 | San Francisco State University (SFSU),State,"$47,300.00","$86,400.00","$45,100.00","$62,700.00","$114,000.00","$150,000.00" 127 | University of Arizona,State,"$47,500.00","$86,100.00","$44,800.00","$61,700.00","$117,000.00","$160,000.00" 128 | Clemson University,State,"$48,400.00","$86,000.00","$50,500.00","$61,800.00","$111,000.00","$150,000.00" 129 | University of Georgia (UGA),State,"$44,100.00","$86,000.00","$43,100.00","$57,800.00","$118,000.00","$164,000.00" 130 | Pennsylvania State University (PSU),State,"$49,900.00","$85,700.00","$46,300.00","$62,000.00","$117,000.00","$160,000.00" 131 | University of Washington (UW),State,"$48,800.00","$85,300.00","$47,000.00","$59,800.00","$115,000.00","$149,000.00" 132 | Michigan State University (MSU),State,"$46,300.00","$85,300.00","$44,200.00","$61,500.00","$119,000.00","$170,000.00" 133 | University of Rhode Island (URI),State,"$43,900.00","$85,300.00","$45,400.00","$60,100.00","$112,000.00","$157,000.00" 134 | San Diego State University (SDSU),State,"$46,200.00","$85,200.00","$45,500.00","$61,800.00","$116,000.00","$158,000.00" 135 | Auburn University,State,"$45,400.00","$84,700.00","$45,400.00","$62,700.00","$109,000.00","$145,000.00" 136 | Washington State University (WSU),State,"$45,300.00","$84,700.00","$43,600.00","$59,000.00","$113,000.00","$162,000.00" 137 | "California State University, Long Beach (CSULB)",State,"$45,100.00","$84,700.00","$47,400.00","$62,500.00","$113,000.00","$154,000.00" 138 | Iowa State University,State,"$45,400.00","$84,600.00","$44,400.00","$60,000.00","$109,000.00","$147,000.00" 139 | University of Delaware,State,"$45,900.00","$84,500.00","$44,500.00","$64,000.00","$119,000.00","$165,000.00" 140 | University of Colorado - Denver,State,"$46,100.00","$84,400.00","$46,400.00","$58,600.00","$105,000.00","$144,000.00" 141 | "California State University, East Bay (CSUEB)",State,"$49,200.00","$84,300.00","$46,000.00","$62,400.00","$115,000.00","$155,000.00" 142 | State University of New York (SUNY) at Farmingdale,State,"$47,300.00","$84,200.00","$50,200.00","$59,800.00","$110,000.00","$162,000.00" 143 | University of Minnesota,State,"$46,200.00","$84,200.00","$49,000.00","$63,200.00","$112,000.00","$148,000.00" 144 | Arizona State University (ASU),State,"$47,400.00","$84,100.00","$44,600.00","$60,700.00","$114,000.00","$163,000.00" 145 | "University of California, Santa Cruz (UCSC)",State,"$44,700.00","$84,100.00","$46,100.00","$62,000.00","$121,000.00","$165,000.00" 146 | "Indiana University (IU), Bloomington",State,"$46,300.00","$84,000.00","$43,600.00","$60,400.00","$119,000.00","$178,000.00" 147 | University of Iowa (UI),State,"$44,700.00","$83,900.00","$43,300.00","$61,100.00","$116,000.00","$163,000.00" 148 | Ohio State University (OSU),State,"$44,900.00","$83,700.00","$45,500.00","$60,700.00","$116,000.00","$162,000.00" 149 | North Carolina State University (NCSU),State,"$47,200.00","$83,300.00","$49,200.00","$64,800.00","$112,000.00","$153,000.00" 150 | Oregon State University (OSU),State,"$45,100.00","$83,300.00","$46,900.00","$64,000.00","$113,000.00","$146,000.00" 151 | University of Utah,State,"$45,400.00","$83,200.00","$43,000.00","$58,400.00","$116,000.00","$148,000.00" 152 | "University of Nevada, Reno (UNR)",State,"$46,500.00","$82,900.00","$41,900.00","$54,600.00","$113,000.00","$143,000.00" 153 | University of Oklahoma,State,"$44,700.00","$82,900.00","$41,200.00","$60,300.00","$114,000.00","$167,000.00" 154 | University of Arkansas,State,"$44,100.00","$82,800.00","$43,200.00","$60,700.00","$113,000.00","$160,000.00" 155 | University of Vermont (UVM),State,"$44,800.00","$82,700.00","$44,700.00","$58,000.00","$122,000.00","$194,000.00" 156 | University of Alabama at Huntsville (UAH),State,"$43,100.00","$82,700.00","$46,100.00","$67,800.00","$106,000.00","$132,000.00" 157 | "California State University, Sacramento (CSUS)",State,"$47,800.00","$82,400.00","$42,900.00","$59,600.00","$111,000.00","$154,000.00" 158 | University of Idaho,State,"$44,900.00","$82,000.00","$43,000.00","$56,700.00","$104,000.00","$142,000.00" 159 | University of Illinois at Chicago,State,"$47,500.00","$81,700.00","$44,700.00","$58,800.00","$110,000.00","$146,000.00" 160 | State University of New York (SUNY) at Buffalo,State,"$46,200.00","$81,700.00","$45,900.00","$61,400.00","$110,000.00","$147,000.00" 161 | University of Kansas,State,"$42,400.00","$81,600.00","$44,800.00","$57,200.00","$115,000.00","$156,000.00" 162 | University of New Mexico (UNM),State,"$41,600.00","$81,600.00","$41,800.00","$59,100.00","$105,000.00","$141,000.00" 163 | University of North Carolina at Chapel Hill (UNCH),State,"$42,900.00","$81,500.00","$43,400.00","$57,500.00","$117,000.00","$155,000.00" 164 | "University of Alabama, Tuscaloosa",State,"$41,300.00","$81,400.00","$40,100.00","$56,500.00","$117,000.00","$161,000.00" 165 | "University of California, Riverside (UCR)",State,"$46,800.00","$81,300.00","$37,200.00","$59,900.00","$109,000.00","$134,000.00" 166 | State University of New York (SUNY) at Geneseo,State,"$42,300.00","$81,300.00","$39,300.00","$47,600.00","$117,000.00","$173,000.00" 167 | University of Missouri - Columbia,State,"$41,700.00","$81,000.00","$43,500.00","$57,100.00","$111,000.00","$156,000.00" 168 | University of Nebraska,State,"$45,700.00","$80,900.00","$42,200.00","$56,600.00","$113,000.00","$156,000.00" 169 | University of Texas at Arlington (UTA),State,"$45,400.00","$80,800.00","$46,400.00","$61,200.00","$106,000.00","$138,000.00" 170 | Northern Illinois University (NIU),State,"$43,600.00","$80,800.00","$43,900.00","$60,200.00","$111,000.00","$161,000.00" 171 | Oklahoma State University,State,"$42,800.00","$80,700.00","$40,100.00","$56,500.00","$114,000.00","$151,000.00" 172 | University of North Dakota,State,"$44,000.00","$80,600.00","$43,400.00","$56,400.00","$111,000.00","$157,000.00" 173 | "California State University, Northridge (CSUN)",State,"$45,500.00","$80,400.00","$44,500.00","$57,800.00","$108,000.00","$153,000.00" 174 | University of Houston (UH),State,"$46,000.00","$79,900.00","$42,000.00","$56,200.00","$106,000.00","$141,000.00" 175 | University of Mississippi,State,"$41,400.00","$79,700.00","$40,400.00","$53,500.00","$108,000.00","$186,000.00" 176 | New Mexico State University,State,"$44,300.00","$79,500.00","$37,400.00","$53,800.00","$102,000.00","$131,000.00" 177 | Lamar University,State,"$46,500.00","$79,400.00","$38,700.00","$51,600.00","$114,000.00","$158,000.00" 178 | Mississippi State University (MSU),State,"$44,500.00","$79,300.00","$43,300.00","$58,800.00","$108,000.00","$151,000.00" 179 | Colorado State University (CSU),State,"$44,800.00","$79,000.00","$43,800.00","$57,100.00","$112,000.00","$150,000.00" 180 | Kansas State University (KSU),State,"$43,300.00","$79,000.00","$37,200.00","$54,100.00","$106,000.00","$138,000.00" 181 | University of Wyoming (UW),State,"$44,500.00","$78,700.00","$41,500.00","$54,000.00","$105,000.00","$145,000.00" 182 | Utah State University,State,"$43,800.00","$78,700.00","$41,600.00","$55,400.00","$101,000.00","$132,000.00" 183 | University of Wisconsin (UW) - Platteville,State,"$45,800.00","$78,500.00","$48,400.00","$61,200.00","$100,000.00","$139,000.00" 184 | University of Oregon,State,"$42,200.00","$78,400.00","$38,100.00","$56,200.00","$117,000.00","$186,000.00" 185 | University of Kentucky (UK),State,"$42,800.00","$78,300.00","$43,000.00","$57,300.00","$107,000.00","$149,000.00" 186 | University of New Hampshire (UNH),State,"$41,800.00","$78,300.00","$41,700.00","$56,400.00","$114,000.00","$147,000.00" 187 | University of Massachusetts (UMass) - Boston,State,"$45,600.00","$78,200.00","$36,300.00","$53,800.00","$109,000.00","$151,000.00" 188 | West Virginia University (WVU),State,"$43,100.00","$78,100.00","$39,700.00","$55,700.00","$106,000.00","$141,000.00" 189 | University of Maryland Baltimore County (UMBC),State,"$47,000.00","$77,800.00","$46,900.00","$59,100.00","$105,000.00","$130,000.00" 190 | North Dakota State University (NDSU),State,"$45,100.00","$77,800.00","$39,000.00","$55,800.00","$100,000.00","$123,000.00" 191 | State University of New York (SUNY) at Oswego,State,"$38,000.00","$77,800.00","$40,400.00","$53,000.00","$115,000.00","$169,000.00" 192 | University of Massachusetts (UMass) - Dartmouth,State,"$43,200.00","$77,700.00","$43,300.00","$56,200.00","$107,000.00","$132,000.00" 193 | Montana State University - Bozeman,State,"$46,600.00","$77,500.00","$40,200.00","$58,100.00","$111,000.00","$151,000.00" 194 | State University of New York (SUNY) at Oneonta,State,"$37,500.00","$76,700.00","$40,000.00","$54,300.00","$97,700.00","$155,000.00" 195 | University of Louisiana (UL) at Lafayette,State,"$41,100.00","$76,300.00","$42,000.00","$54,500.00","$107,000.00","$163,000.00" 196 | State University of New York (SUNY) at Plattsburgh,State,"$40,800.00","$76,200.00","$38,400.00","$54,100.00","$105,000.00","$136,000.00" 197 | Wayne State University,State,"$42,800.00","$76,100.00","$40,100.00","$56,200.00","$101,000.00","$139,000.00" 198 | University of Hawaii,State,"$43,800.00","$76,000.00","$40,400.00","$56,300.00","$104,000.00","$128,000.00" 199 | University of Toledo,State,"$43,100.00","$75,900.00","$40,100.00","$54,100.00","$100,000.00","$133,000.00" 200 | Florida International University (FIU),State,"$43,200.00","$75,500.00","$40,500.00","$55,800.00","$98,200.00","$136,000.00" 201 | University of Wisconsin (UW) - Whitewater,State,"$40,800.00","$75,500.00","$38,200.00","$53,500.00","$99,300.00","$150,000.00" 202 | Western Washington University,State,"$42,700.00","$75,400.00","$41,300.00","$56,700.00","$99,200.00","$119,000.00" 203 | Minnesota State University - Mankato,State,"$43,300.00","$74,700.00","$39,500.00","$53,800.00","$95,700.00","$140,000.00" 204 | University of Tennessee,State,"$43,800.00","$74,600.00","$41,900.00","$53,200.00","$106,000.00","$153,000.00" 205 | University of Wisconsin (UW) - Milwaukee,State,"$42,300.00","$74,600.00","$40,600.00","$54,000.00","$93,700.00","$123,000.00" 206 | University of Arkansas - Monticello (UAM),State,"$39,200.00","$74,500.00","$32,800.00","$46,100.00","$110,000.00","$161,000.00" 207 | Penn State - Harrisburg,State,"$45,700.00","$74,000.00","$44,000.00","$53,100.00","$104,000.00","$150,000.00" 208 | University of North Carolina at Charlotte (UNCC),State,"$43,100.00","$74,000.00","$38,200.00","$53,200.00","$99,500.00","$133,000.00" 209 | Georgia State University,State,"$41,800.00","$74,000.00","$43,000.00","$55,300.00","$99,900.00","$145,000.00" 210 | Western Michigan University (WMU),State,"$42,300.00","$73,800.00","$40,100.00","$52,500.00","$103,000.00","$135,000.00" 211 | South Dakota State University (SDSU),State,"$41,100.00","$73,500.00","$34,100.00","$49,900.00","$99,400.00","$129,000.00" 212 | Idaho State University,State,"$44,900.00","$73,400.00","$35,400.00","$49,600.00","$101,000.00","$143,000.00" 213 | Ohio University,State,"$42,200.00","$73,400.00","$36,600.00","$52,800.00","$106,000.00","$150,000.00" 214 | Illinois State University,State,"$42,000.00","$73,400.00","$39,100.00","$55,200.00","$105,000.00","$142,000.00" 215 | Cleveland State University,State,"$43,500.00","$73,100.00","$39,500.00","$51,600.00","$97,000.00","$137,000.00" 216 | Florida State University (FSU),State,"$42,100.00","$73,000.00","$39,600.00","$52,800.00","$107,000.00","$156,000.00" 217 | "University of Alaska, Anchorage",State,"$45,900.00","$72,600.00","$39,800.00","$56,600.00","$99,300.00","$137,000.00" 218 | Fitchburg State College,State,"$42,400.00","$72,600.00","$43,300.00","$56,100.00","$99,600.00","$151,000.00" 219 | University of Nebraska at Omaha,State,"$41,500.00","$72,600.00","$39,500.00","$54,400.00","$97,400.00","$126,000.00" 220 | Southern Illinois University Carbondale,State,"$43,000.00","$72,500.00","$38,300.00","$51,300.00","$99,300.00","$139,000.00" 221 | University of Texas at El Paso (UTEP),State,"$43,400.00","$72,100.00","$37,700.00","$50,400.00","$99,500.00","$133,000.00" 222 | "California State University, Dominguez Hills (CSUDH)",State,"$42,700.00","$72,100.00","$30,800.00","$47,000.00","$92,200.00","$132,000.00" 223 | University Of Maine,State,"$41,200.00","$72,100.00","$41,700.00","$55,600.00","$99,300.00","$141,000.00" 224 | Eastern Michigan University,State,"$40,300.00","$72,100.00","$37,900.00","$52,800.00","$95,400.00","$135,000.00" 225 | Bowling Green State University,State,"$39,800.00","$72,100.00","$38,200.00","$51,800.00","$101,000.00","$146,000.00" 226 | University of Montana,State,"$37,300.00","$71,900.00","$37,000.00","$51,500.00","$96,400.00","$138,000.00" 227 | University of Central Florida (UCF),State,"$42,600.00","$71,700.00","$39,500.00","$51,500.00","$98,400.00","$125,000.00" 228 | University of South Carolina,State,"$40,000.00","$71,700.00","$36,300.00","$49,900.00","$98,400.00","$131,000.00" 229 | "University of Nevada, Las Vegas (UNLV)",State,"$45,200.00","$71,600.00","$39,000.00","$52,400.00","$100,000.00","$128,000.00" 230 | St. Cloud State University,State,"$41,800.00","$71,400.00","$38,700.00","$49,400.00","$101,000.00","$126,000.00" 231 | University of Wisconsin (UW) - Parkside,State,"$40,700.00","$71,400.00","$40,900.00","$53,100.00","$84,900.00","$119,000.00" 232 | "California State University (CSU), Stanislaus",State,"$38,000.00","$71,400.00","$33,700.00","$50,500.00","$94,100.00","$121,000.00" 233 | Humboldt State University,State,"$42,600.00","$71,300.00","$36,000.00","$56,300.00","$94,400.00","$117,000.00" 234 | Florida Atlantic University (FAU),State,"$42,600.00","$71,100.00","$40,700.00","$53,000.00","$99,500.00","$137,000.00" 235 | University of South Florida (USF),State,"$41,100.00","$71,100.00","$39,600.00","$51,500.00","$98,100.00","$131,000.00" 236 | Portland State University (PSU),State,"$42,600.00","$70,900.00","$40,700.00","$52,300.00","$94,400.00","$123,000.00" 237 | Eastern Washington University,State,"$38,600.00","$70,900.00","$36,000.00","$50,500.00","$93,100.00","$117,000.00" 238 | University of Texas at San Antonio (UTSA),State,"$42,500.00","$70,700.00","$39,100.00","$49,800.00","$92,700.00","$121,000.00" 239 | University of Akron,State,"$41,100.00","$70,300.00","$40,600.00","$53,300.00","$95,200.00","$127,000.00" 240 | State University of New York (SUNY) at Potsdam,State,"$38,000.00","$70,300.00","$35,100.00","$51,200.00","$100,000.00","$179,000.00" 241 | University of Alabama at Birmingham (UAB),State,"$39,200.00","$70,100.00","$43,000.00","$53,400.00","$91,400.00","$125,000.00" 242 | University of Memphis (U of M),State,"$41,400.00","$69,700.00","$36,100.00","$49,100.00","$93,500.00","$127,000.00" 243 | Boise State University (BSU),State,"$40,800.00","$69,500.00","$37,400.00","$48,700.00","$87,500.00","$110,000.00" 244 | Missouri State University (MSU),State,"$36,100.00","$69,500.00","$33,300.00","$46,900.00","$102,000.00","$134,000.00" 245 | University of Wisconsin (UW) - La Crosse,State,"$42,200.00","$69,300.00","$37,500.00","$47,200.00","$93,100.00","$133,000.00" 246 | Appalachian State University,State,"$40,400.00","$69,100.00","$37,200.00","$50,400.00","$90,800.00","$115,000.00" 247 | Virginia Commonwealth University (VCU),State,"$42,000.00","$68,400.00","$37,400.00","$51,900.00","$100,000.00","$123,000.00" 248 | University of Wisconsin (UW) - Stout,State,"$43,600.00","$68,300.00","$40,900.00","$50,600.00","$91,600.00","$136,000.00" 249 | East Carolina University (ECU),State,"$40,200.00","$67,500.00","$38,400.00","$52,000.00","$98,700.00","$151,000.00" 250 | Utah Valley State College,State,"$42,400.00","$67,100.00","$27,000.00","$44,100.00","$84,900.00","$110,000.00" 251 | University of Missouri - St. Louis (UMSL),State,"$41,400.00","$67,100.00","$36,800.00","$49,600.00","$97,600.00","$144,000.00" 252 | Western Carolina University,State,"$36,900.00","$66,600.00","$39,000.00","$49,500.00","$94,400.00","$133,000.00" 253 | University of Wisconsin (UW) - Oshkosh,State,"$39,300.00","$66,400.00","$37,700.00","$49,700.00","$90,100.00","$138,000.00" 254 | State University of New York (SUNY) at Fredonia,State,"$37,800.00","$66,200.00","$32,800.00","$44,200.00","$93,300.00","$181,000.00" 255 | University of Missouri - Kansas City (UMKC),State,"$38,900.00","$65,800.00","$36,300.00","$48,100.00","$95,800.00","$124,000.00" 256 | University of Wisconsin (UW) - Eau Claire,State,"$41,400.00","$64,800.00","$35,000.00","$47,300.00","$93,100.00","$125,000.00" 257 | Ball State University (BSU),State,"$39,100.00","$64,500.00","$35,500.00","$48,200.00","$89,300.00","$128,000.00" 258 | University of North Carolina at Wilmington (UNCW),State,"$37,500.00","$64,400.00","$32,100.00","$46,600.00","$97,100.00","$129,000.00" 259 | University of Wisconsin (UW) - Stevens Point,State,"$39,800.00","$64,000.00","$38,400.00","$45,100.00","$95,400.00","$128,000.00" 260 | University of Southern Maine,State,"$39,400.00","$63,600.00","$40,400.00","$47,900.00","$85,700.00","$117,000.00" 261 | Arkansas State University (ASU),State,"$38,700.00","$63,300.00","$33,600.00","$45,300.00","$83,900.00","$118,000.00" 262 | Kent State University,State,"$38,700.00","$62,600.00","$36,100.00","$45,800.00","$87,000.00","$124,000.00" 263 | Tarleton State University (TSU),State,"$40,800.00","$62,400.00","$32,100.00","$47,400.00","$80,400.00","$126,000.00" 264 | University of Wisconsin (UW) - Green Bay,State,"$35,800.00","$60,600.00","$35,500.00","$46,800.00","$81,800.00","$102,000.00" 265 | Morehead State University,State,"$34,800.00","$60,600.00","$34,300.00","$46,500.00","$72,000.00","$91,300.00" 266 | Austin Peay State University,State,"$37,700.00","$59,200.00","$32,200.00","$40,500.00","$73,900.00","$96,200.00" 267 | Pittsburg State University,State,"$40,400.00","$58,200.00","$25,600.00","$46,000.00","$84,600.00","$117,000.00" 268 | Southern Utah University,State,"$41,900.00","$56,500.00","$30,700.00","$39,700.00","$78,400.00","$116,000.00" 269 | Montana State University - Billings,State,"$37,900.00","$50,600.00","$22,600.00","$31,800.00","$78,500.00","$98,900.00" 270 | Black Hills State University,State,"$35,300.00","$43,900.00","$27,000.00","$32,200.00","$60,900.00","$87,600.00" 271 | -------------------------------------------------------------------------------- /Columbus First/Columbus First Trip.R: -------------------------------------------------------------------------------- 1 | # Columbus First Trip 2 | 3 | 4 | ## Loading the Libraries 5 | 6 | library(data.table) 7 | library(ggplot2) 8 | library(lubridate) 9 | library(wordcloud) 10 | library(tm) 11 | library(SnowballC) 12 | library(RSentiment) 13 | library(stringr) 14 | library(SnowballC) 15 | library(RWeka) 16 | library(DT) 17 | library(gdata) 18 | 19 | ## Reading the Dataset and Changing Working Directory 20 | 21 | setwd('./Kaggle/Columbus First') 22 | 23 | columbus <- read.csv('./Columbus.csv', encoding = "UTF-8") 24 | str(columbus) 25 | summary(columbus) 26 | 27 | ## Polishing Up 28 | 29 | 30 | columbus$month <- as.factor(columbus$month) 31 | columbus$month <- factor(columbus$month,levels(columbus$month)[c(1,8,7,6,2,4,3,5)]) #Reorder Levels 32 | columbus$nmonth <- columbus$month 33 | levels(columbus$nmonth) <- c("08","09","10","11","12","01","02","03") 34 | columbus$day <- as.factor(columbus$day) 35 | columbus$year <- as.factor(columbus$year) 36 | columbus$nwords <- sapply(gregexpr("[A-z]\\W+", columbus$text), length) + 1L # Number of words in the text 37 | columbus$date <- paste(columbus$day,"-",columbus$nmonth,"-",columbus$year,sep = "") 38 | columbus$date <- as.POSIXct(strptime(columbus$date,format = "%e-%m-%Y")) 39 | 40 | ## Calculate Sentiments 41 | 42 | corpus = Corpus(VectorSource(list(columbus$text))) 43 | corpus = tm_map(corpus, removePunctuation) 44 | corpus = tm_map(corpus, content_transformer(tolower)) 45 | corpus = tm_map(corpus, removeNumbers) 46 | corpus = tm_map(corpus, stripWhitespace) 47 | corpus = tm_map(corpus, removeWords, stopwords('en')) 48 | 49 | 50 | dtm_colon = DocumentTermMatrix(VCorpus(VectorSource(corpus[[1]]$content))) 51 | freq_colon <- colSums(as.matrix(dtm_colon)) 52 | 53 | sentiments_colon = calculate_sentiment(names(freq_colon)) 54 | sentiments_colon = cbind(sentiments_colon, as.data.frame(freq_colon)) 55 | 56 | sent_pos_colon = sentiments_colon[sentiments_colon$sentiment == 'Positive',] 57 | sent_neg_colon = sentiments_colon[sentiments_colon$sentiment == 'Negative',] 58 | sent_neu_colon = sentiments_colon[sentiments_colon$sentiment == 'Neutral',] 59 | 60 | cat("We have more positive Sentiments: ",sum(sent_pos_colon$freq_colon)," than negative: ",sum(sent_neg_colon$freq_colon)) 61 | -------------------------------------------------------------------------------- /Credit Card Modeling 65%.R: -------------------------------------------------------------------------------- 1 | # Credit Card Predictive Modeling 2 | 3 | ### Load Libraries 4 | 5 | library(randomForest) 6 | library(e1071) 7 | library(rpart) 8 | library(rpart.plot) 9 | library(caTools) 10 | library(readr) 11 | library(caret) 12 | 13 | # Receiving Dataset and Change Working Directory 14 | setwd('./Kaggle') 15 | ccard <- read_csv("./creditcard.csv") 16 | head(ccard) 17 | 18 | str(ccard) 19 | 20 | ## Predictive Modeling 21 | 22 | ### Set Data 65:35 23 | 24 | set.seed(22540) 25 | split <- sample.split(ccard$Class, SplitRatio = 0.65) 26 | train <- subset(ccard, split == T) 27 | cv <- subset(ccard, split == F) 28 | 29 | ### check output Class distributiion 30 | table(cv$Class) 31 | 32 | # Logistic Regression 33 | 34 | glm.model <- glm(Class ~ ., data = train, family = "binomial") 35 | glm.predict <- predict(glm.model, cv, type = "response") 36 | table(cv$Class, glm.predict > 0.5) 37 | 38 | # Decision Tree Model 39 | 40 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 50) 41 | prp(tree.model) 42 | 43 | tree.predict <- predict(tree.model, cv, type = "class") 44 | confusionMatrix(cv$Class, tree.predict) 45 | 46 | 47 | # Keeping 10,000 Rows with Class=0 48 | 49 | data.class.0 <- subset(ccard, ccard$Class == 0) 50 | data.class.1 <- subset(ccard, ccard$Class == 1) 51 | nrow(data.class.0) 52 | 53 | nrow(data.class.1) 54 | 55 | data.class.0 <- data.class.0[1:10000, ] 56 | nrow(data.class.0) 57 | 58 | data <- rbind(data.class.0, data.class.1) 59 | nrow(data) 60 | 61 | # Split Data 65:35 62 | 63 | set.seed(205) 64 | split <- sample.split(data$Class, SplitRatio = 0.65) 65 | train <- subset(data, split == T) 66 | cv <- subset(data, split == F) 67 | 68 | table(cv$Class) 69 | 70 | # Logistic Regression for Split 71 | 72 | glm.model <- glm(Class ~ ., data = train, family = "binomial", control = list(maxit = 50)) 73 | glm.predict <- predict(glm.model, cv, type = "response") 74 | table(cv$Class, glm.predict > 0.5) 75 | 76 | # SVM Model 77 | 78 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 1, gamma = 0.3) 79 | svm.predict <- predict(svm.model, cv) 80 | confusionMatrix(cv$Class, svm.predict) 81 | 82 | # Decision Tree Split 83 | 84 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 10) 85 | prp(tree.model) 86 | 87 | tree.predict <- predict(tree.model, cv, type = "class") 88 | confusionMatrix(cv$Class, tree.predict) 89 | 90 | 91 | # Random Forest Predictions 92 | 93 | set.seed(10) 94 | rf.model <- randomForest(Class ~ ., data = train, 95 | ntree = 2000, nodesize = 20) 96 | 97 | rf.predict <- predict(rf.model, cv) 98 | confusionMatrix(cv$Class, rf.predict) 99 | 100 | 101 | varImpPlot(rf.model) 102 | -------------------------------------------------------------------------------- /Diamond Prices.R: -------------------------------------------------------------------------------- 1 | # Diamond Prices 2 | 3 | ### Load Libraries 4 | 5 | library(ggplot2) # Data visualization 6 | library(readr) # CSV file I/O, e.g. the read_csv function 7 | library(magrittr) 8 | library(caret) 9 | library(plotly) 10 | library(corrplot) 11 | 12 | 13 | ### Receiving Dataset and Change Working Directory 14 | setwd('./Kaggle') 15 | 16 | diamond <- read.csv('diamonds.csv') 17 | head(diamond) 18 | 19 | ### Class and Missing Variables for Diamonds 20 | 21 | data.frame(cbind(data.frame(VarType=sapply(diamond,class)),data.frame(Total_Missing=sapply(diamond,function(x){sum(is.na(x))})))) 22 | 23 | 24 | ## See Different Levels in Factor Variations 25 | print("Cut Levels") 26 | levels(diamond$cut) 27 | 28 | print("Color Levels") 29 | levels(diamond$color) 30 | 31 | print("Clarity Levels") 32 | levels(diamond$clarity) 33 | 34 | 35 | 36 | ### Density Plots 37 | 38 | qplot(price, data=diamond, geom="density", fill=cut, alpha=I(.5), 39 | main="Distribution of Carat", xlab="Different kinds of cut", 40 | ylab="Density") + theme_minimal() 41 | 42 | qplot(price, data=diamond, geom="density", fill=color, alpha=I(.5), 43 | main="Distribution of Carat", xlab="Different Colors", 44 | ylab="Density") + theme_minimal() 45 | 46 | qplot(price, data=diamond, geom="density", fill=clarity, alpha=I(.5), 47 | main="Distribution of Carat", xlab="Different clarity parameters", 48 | ylab="Density") + theme_minimal() 49 | 50 | 51 | ### More Plots 52 | 53 | ggplot(data=diamond,aes(x=cut))+geom_bar(fill="green")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Cut Type") 54 | ggplot(data=diamond,aes(x=color))+geom_bar(fill="khaki")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Color Type") 55 | ggplot(data=diamond,aes(x=clarity))+geom_bar(fill="violet")+theme_minimal()+ylab("Total Count")+ggtitle("Distribution of Diamonds by Clarity Type") 56 | 57 | 58 | ### Encoding 59 | 60 | ohe_features<-c("cut","color","clarity") 61 | dummies<-dummyVars(~cut + color + clarity ,data=diamond) 62 | 63 | diamond_ohe<-as.data.frame(predict(dummies,newdata=diamond)) 64 | diamond_combined<-cbind(diamond,diamond_ohe) 65 | 66 | newdiamond<-diamond_combined[,!names(diamond_combined)%in%ohe_features] 67 | 68 | rm(diamond_combined) 69 | rm(diamond_ohe) 70 | 71 | 72 | ### Looking at the New Data 73 | head(newdiamond) 74 | 75 | ### Dropping all the Null Values 76 | 77 | x.label<-newdiamond$X 78 | y.label <-as.numeric(newdiamond$price) 79 | 80 | newdiamond$X<-NULL 81 | newdiamond$price<-NULL 82 | 83 | ## Correlation plot 84 | corrplot(cor(cbind(newdiamond,Price=y.label)),type="upper") 85 | 86 | -------------------------------------------------------------------------------- /Edudata.R: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | ## Students' Academic Performance 4 | 5 | # Change Directory 6 | setwd("./Kaggle") 7 | 8 | # Load Libraries 9 | library(ggplot2) 10 | library(dplyr) 11 | library(randomForest) 12 | library(class) 13 | library(rpart) 14 | library(rpart.plot) 15 | library(e1071) 16 | library(caret) 17 | library(caTools) 18 | library(party) 19 | 20 | 21 | # Reading the Data 22 | 23 | edu <- read.csv('./Edudata.csv') 24 | str(edu) 25 | summary(edu) 26 | 27 | ### Exploratory Data Analysis 28 | 29 | # Raised Hands 30 | 31 | ggplot(edu, aes(x = raisedhands)) + geom_histogram(bins=50, color = "red",fill="blue",alpha=0.2) + 32 | scale_x_continuous(breaks = seq(0,100,5)) + 33 | labs(x = "Raised Hands", y = "Student Count") 34 | 35 | 36 | # Visited Resources 37 | 38 | ggplot(edu, aes(x = VisITedResources)) + geom_histogram(bins=50, color = "orange",fill="orange",alpha=0.4) + 39 | scale_x_continuous(breaks = seq(0,100,5)) + 40 | labs(x = "Visited Resources", y = "Student Count") 41 | 42 | 43 | # Announcements 44 | 45 | ggplot(edu, aes(x = AnnouncementsView)) + geom_histogram(bins = 50,color = "black",fill="red",alpha=0.5) + 46 | scale_x_continuous(breaks = seq(0,100,5)) + 47 | labs(x = "Announcements View", y = "Student Count") 48 | 49 | 50 | # Discussion 51 | 52 | ggplot(edu, aes(x = Discussion)) + geom_histogram(bins=50,color = "black",fill="grey") + 53 | scale_x_continuous(breaks = seq(0,100,5)) + 54 | labs(x = "Discussion Participation", y = "Student Count") 55 | 56 | 57 | ### Barplots 58 | 59 | ggplot(edu, aes(x = gender)) + geom_bar(aes(fill=gender)) + 60 | labs(x = "Gender", y = "Student Count") + 61 | scale_y_continuous(breaks = seq(0,300,30)) + coord_flip() 62 | 63 | 64 | 65 | ggplot(edu, aes(x = NationalITy)) + geom_bar(aes(fill=NationalITy)) + 66 | labs(x = "Nationality", y = "Student Count") + 67 | scale_y_continuous(breaks = seq(0,200,20)) + coord_flip() 68 | 69 | 70 | 71 | ggplot(edu, aes(x = PlaceofBirth)) + geom_bar(aes(fill = NationalITy)) + 72 | labs(x = "Birth Place", y = "Student Count") + coord_flip() # usa is a mix of nationalities 73 | 74 | 75 | 76 | ggplot(edu, aes(x = GradeID, fill = Class)) + geom_bar() + 77 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-06 has students with only low grades 78 | 79 | 80 | 81 | ggplot(edu, aes(x = GradeID, fill = gender)) + geom_bar() + 82 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-10 has no females 83 | 84 | 85 | 86 | ggplot(edu, aes(x = SectionID, fill = Topic,alpha=0.1)) + geom_bar() + 87 | labs(x = "Section ID", y = "Student Count") + 88 | coord_flip() 89 | 90 | 91 | 92 | ggplot(edu, aes(x = Topic, fill = gender,alpha=0.2)) + geom_bar() + 93 | labs(x = "Topic", y = "Student Count") + 94 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip() 95 | 96 | 97 | 98 | ggplot(edu, aes(x = Topic, fill = NationalITy)) + geom_bar() + 99 | labs(x = "Topic", y = "Student Count") + coord_flip() + 100 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip() 101 | 102 | 103 | ggplot(edu, aes(x = Topic, fill = SectionID,alpha=0.3)) + geom_bar() + 104 | labs(x = "Topic", y = "Student Count") + coord_flip() + 105 | scale_y_continuous(breaks = seq(0,100,4)) 106 | 107 | 108 | 109 | # Section C for Mostly Spanish Students 110 | 111 | ggplot(edu, aes(x = Topic, fill = Semester)) + geom_bar() + 112 | labs(x = "Topic", y = "Student Count") + coord_flip() + 113 | scale_y_continuous(breaks = seq(0,100,4)) 114 | 115 | 116 | 117 | # IT Students Are Mostly in 1st Semester 118 | 119 | ggplot(edu, aes(x = Topic, fill = Relation,alpha=0.5)) + geom_bar() + 120 | labs(x = "Topic", y = "Student Count") + coord_flip() + 121 | scale_y_continuous(breaks = seq(0,100,4)) 122 | 123 | 124 | # Most French Students have Mom as Guardian in Comparison to Father 125 | 126 | 127 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar() + 128 | labs(x = "Topic", y = "Student Count") + coord_flip() + 129 | scale_y_continuous(breaks = seq(0,100,4)) 130 | 131 | 132 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar(position = "fill") + 133 | labs(x = "Topic", y = "Student Count") + coord_flip() + 134 | scale_y_continuous(breaks = seq(0,100,4)) 135 | 136 | 137 | # Geology has no low class students 138 | 139 | 140 | ggplot(edu, aes(x = Semester)) + geom_bar(aes(fill=Semester)) + 141 | labs(x = "Semester", y = "Student Count") 142 | 143 | ggplot(edu, aes(x = Relation, fill = Semester)) + geom_bar() + 144 | labs(x = "Guardian", y = "Student Count") 145 | 146 | ggplot(edu, aes(x = ParentAnsweringSurvey, fill = ParentschoolSatisfaction)) + 147 | geom_bar() + 148 | labs(x = "Does parents answer surveys ?", y = "Student Count") 149 | 150 | # Parent Satisfaction 151 | 152 | ggplot(edu, aes(x = ParentschoolSatisfaction)) + 153 | geom_bar(aes(fill=ParentschoolSatisfaction)) + 154 | labs(x = "Are the Parents Satisfied With the School ?", y = "Student Count") 155 | 156 | 157 | 158 | ggplot(edu, aes(x = StudentAbsenceDays)) + geom_bar(aes(fill=StudentAbsenceDays)) + 159 | labs(x = "Is the student absent for more than seven days", y = "Student Count") 160 | 161 | ggplot(edu, aes(x = Class, fill = gender)) + geom_bar() + 162 | labs(x = "Class", y = "Student Count") 163 | 164 | # Few Girls in the Low Class 165 | 166 | ggplot(edu, aes(x = Class, fill = Relation)) + geom_bar() + 167 | labs(x = "Class", y = "Student Count") 168 | 169 | 170 | ggplot(edu, aes(x = Class, fill = ParentAnsweringSurvey)) + geom_bar() + 171 | labs(x = "Class", y = "Student Count") 172 | 173 | 174 | ggplot(edu, aes(x = Class, fill = StudentAbsenceDays)) + geom_bar() + 175 | labs(x = "Class", y = "Student Count") 176 | 177 | 178 | ### Boxplots 179 | 180 | 181 | ggplot(edu, aes(x = gender, y = raisedhands)) + geom_boxplot(aes(fill=gender)) 182 | 183 | 184 | 185 | ggplot(edu, aes(x = gender, y = VisITedResources)) + geom_boxplot(aes(fill=gender)) 186 | 187 | 188 | # Girls Use More Resources 189 | 190 | ggplot(edu, aes(x = NationalITy, y = raisedhands)) + geom_boxplot(aes(fill=NationalITy)) 191 | 192 | ggplot(edu, aes(x = StageID, y = raisedhands)) + geom_boxplot(aes(fill=StageID)) 193 | 194 | 195 | ggplot(edu, aes(x = StageID, y = Discussion)) + geom_boxplot(aes(fill=StageID)) 196 | 197 | ggplot(edu, aes(x = GradeID, y = raisedhands)) + geom_boxplot(aes(fill=GradeID)) 198 | 199 | ggplot(edu, aes(x = SectionID, y = Discussion)) + geom_boxplot(aes(fill=SectionID)) 200 | 201 | ggplot(edu, aes(x = Topic, y = raisedhands)) + geom_boxplot(aes(fill=Topic)) 202 | 203 | ggplot(edu, aes(x = Semester, y = raisedhands)) + geom_boxplot(aes(fill=Semester)) 204 | 205 | ggplot(edu, aes(x = Relation, y = raisedhands)) + geom_boxplot(aes(fill=Relation)) 206 | 207 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 208 | 209 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 210 | 211 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 212 | 213 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 214 | 215 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = raisedhands)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 216 | 217 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = VisITedResources)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 218 | 219 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 220 | 221 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = Discussion)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 222 | 223 | ggplot(edu, aes(x = StudentAbsenceDays, y = raisedhands)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 224 | 225 | # More Students Leave = Less Hand Raises 226 | 227 | 228 | ggplot(edu, aes(x = StudentAbsenceDays, y = VisITedResources)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 229 | 230 | ggplot(edu, aes(x = StudentAbsenceDays, y = AnnouncementsView)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 231 | 232 | ggplot(edu, aes(x = StudentAbsenceDays, y = Discussion)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 233 | 234 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 235 | 236 | # Yes Answers to Surveys = More Raised hands 237 | 238 | 239 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 240 | 241 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 242 | 243 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 244 | 245 | 246 | ###Class-Wise Boxplots 247 | 248 | ggplot(edu, aes(x = Class, y = raisedhands)) + geom_boxplot(aes(fill=Class)) 249 | 250 | # High Marks = Active Participation 251 | 252 | 253 | ggplot(edu, aes(x = Class, y = VisITedResources)) + geom_boxplot(aes(fill=Class)) 254 | 255 | 256 | # High Marks by visited Resources 257 | 258 | 259 | ggplot(edu, aes(x = Class, y = AnnouncementsView)) + geom_boxplot(aes(fill=Class)) 260 | 261 | 262 | # More Marks More Announcements 263 | 264 | 265 | ggplot(edu, aes(x = Class, y = Discussion)) + geom_boxplot(aes(fill=Class)) 266 | 267 | 268 | ### Scatterplots 269 | 270 | 271 | ggplot(edu, aes(x = raisedhands, y = VisITedResources)) + geom_point() + 272 | geom_smooth(method = "lm",color='green') 273 | 274 | ggplot(edu, aes(x = raisedhands, y = AnnouncementsView)) + geom_point() + 275 | geom_smooth(method = "lm",color='red') 276 | 277 | ggplot(edu, aes(x = raisedhands, y = Discussion)) + geom_point() + 278 | geom_smooth(method = "lm",color='purple') 279 | 280 | ggplot(edu, aes(x = VisITedResources, y = AnnouncementsView)) + geom_point() + 281 | geom_smooth(method = "lm",color='cyan') 282 | 283 | ggplot(edu, aes(x = VisITedResources, y = Discussion)) + geom_point() + 284 | geom_smooth(method = "lm",color='firebrick') 285 | 286 | ggplot(edu, aes(x = AnnouncementsView, y = Discussion)) + geom_point() + 287 | geom_smooth(method = "lm",color='hotpink') 288 | 289 | 290 | ### Density Plots 291 | 292 | 293 | ggplot(edu, aes(x = raisedhands, color = gender)) + geom_density() 294 | 295 | 296 | ggplot(edu, aes(x = raisedhands, color = Topic)) + geom_density() 297 | 298 | 299 | 300 | ggplot(edu, aes(x = raisedhands, color = SectionID)) + geom_density() 301 | 302 | ggplot(edu, aes(x = raisedhands, color = Semester)) + geom_density() 303 | 304 | ggplot(edu, aes(x = raisedhands, color = Class)) + geom_density() 305 | 306 | 307 | ### Tile Map 308 | 309 | tile.map <- edu %>% group_by(gender, NationalITy) %>% 310 | summarise(Count = n()) %>% arrange(desc(Count)) 311 | 312 | ggplot(tile.map, aes(x = gender, NationalITy, fill = Count)) + geom_tile() 313 | 314 | 315 | 316 | ### Predictive Modeling 317 | 318 | 319 | 320 | # Splitting data into train and cross-validation sets by using a different sample. 321 | 322 | set.seed(23210) 323 | split <- sample.split(edu$Class, SplitRatio = 0.75) 324 | train <- subset(edu, split == T) 325 | cv <- subset(edu, split == F) 326 | 327 | 328 | 329 | # Decision Tree 330 | 331 | 332 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 1) 333 | prp(tree.model) 334 | 335 | 336 | tree.predict <- predict(tree.model, cv, type = "class") 337 | table(cv$Class, tree.predict) 338 | 339 | 340 | # Decision Tree Using Caret Package 341 | 342 | 343 | rpart.control = trainControl(method = "repeatedcv", number = 10, repeats = 3) 344 | rpart.grid = expand.grid(.cp = seq(0.01, 0.5, 0.02)) 345 | rpart.model.caret <-train(Class ~ ., data = train, method = "rpart", preProcess = "scale", 346 | trControl = rpart.control, tuneGrid = rpart.grid) 347 | 348 | 349 | 350 | rpart.predict.caret <- predict.train(rpart.model.caret, cv) 351 | confusionMatrix(rpart.predict.caret, cv$Class) 352 | 353 | 354 | 355 | Accuracy -> 0.7355 356 | 357 | # Random Forest 358 | 359 | 360 | set.seed(10005) 361 | 362 | rf.model <- randomForest(Class ~ .- SectionID , data = train, importance = TRUE, 363 | ntree = 2000, nodesize = 20) 364 | 365 | rf.predict <- predict(rf.model, cv) 366 | confusionMatrix(cv$Class, rf.predict) 367 | 368 | 369 | 370 | varImpPlot(rf.model) 371 | 372 | 373 | Accuracy -> 0.6777 374 | 375 | # C-Forest Utilizing Party 376 | 377 | 378 | cforest.model = cforest(Class ~ .-SectionID , data = train, 379 | controls=cforest_unbiased(ntree=2000, mtry = 3)) 380 | 381 | 382 | 383 | cforest.prediction = predict(cforest.model, cv, OOB = TRUE, type = "response") 384 | confusionMatrix(cv$Class, cforest.prediction) 385 | 386 | 387 | Accuracy -> 0.7438 388 | 389 | # Suppost Vector Machines 390 | 391 | 392 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 10, gamma = 0.15) 393 | svm.predict <- predict(svm.model, cv) 394 | confusionMatrix(cv$Class, svm.predict) 395 | 396 | 397 | Accuracy -> 0.777 398 | 399 | # Ensemble Model 400 | 401 | 402 | results <- data.frame(tree = tree.predict, rpart = rpart.predict.caret, rf = rf.predict, 403 | cforest = cforest.prediction, svm = svm.predict, 404 | actual.class = cv$Class, final.prediction = rep("-",nrow(cv))) 405 | 406 | results 407 | 408 | 409 | 410 | 411 | getmode <- function(x) { 412 | unique.x <- unique(x) 413 | unique.x[which.max(tabulate(match(x, unique.x)))] 414 | } 415 | 416 | 417 | 418 | results$final.prediction <- apply(results, 1, getmode) 419 | confusionMatrix(results$actual.class, results$final.prediction) 420 | 421 | 422 | Accuracy -> 0.810 (best) 423 | -------------------------------------------------------------------------------- /Edudata.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | word_document: default 4 | pdf_document: default 5 | html_document: default 6 | --- 7 | ## Students' Academic Performance 8 | 9 | # Load Libraries: 10 | ```{r} 11 | library(ggplot2) 12 | library(dplyr) 13 | library(randomForest) 14 | library(class) 15 | library(rpart) 16 | library(rpart.plot) 17 | library(e1071) 18 | library(caret) 19 | library(caTools) 20 | library(party) 21 | ``` 22 | 23 | # Reading the Data 24 | ```{r} 25 | edu <- read.csv('./Edudata.csv') 26 | str(edu) 27 | ``` 28 | 29 | ```{r} 30 | summary(edu) 31 | ``` 32 | ### Exploratory Data Analysis 33 | 34 | # Raised Hands 35 | ```{r} 36 | ggplot(edu, aes(x = raisedhands)) + geom_histogram(bins=50, color = "red",fill="blue",alpha=0.2) + 37 | scale_x_continuous(breaks = seq(0,100,5)) + 38 | labs(x = "Raised Hands", y = "Student Count") 39 | ``` 40 | 41 | # Visited Resources 42 | ```{r} 43 | ggplot(edu, aes(x = VisITedResources)) + geom_histogram(bins=50, color = "orange",fill="orange",alpha=0.4) + 44 | scale_x_continuous(breaks = seq(0,100,5)) + 45 | labs(x = "Visited Resources", y = "Student Count") 46 | ``` 47 | 48 | # Announcements 49 | ```{r} 50 | ggplot(edu, aes(x = AnnouncementsView)) + geom_histogram(bins = 50,color = "black",fill="red",alpha=0.5) + 51 | scale_x_continuous(breaks = seq(0,100,5)) + 52 | labs(x = "Announcements View", y = "Student Count") 53 | ``` 54 | 55 | # Discussion 56 | ```{r} 57 | ggplot(edu, aes(x = Discussion)) + geom_histogram(bins=50,color = "black",fill="grey") + 58 | scale_x_continuous(breaks = seq(0,100,5)) + 59 | labs(x = "Discussion Participation", y = "Student Count") 60 | ``` 61 | 62 | ### Barplots 63 | ```{r} 64 | ggplot(edu, aes(x = gender)) + geom_bar(aes(fill=gender)) + 65 | labs(x = "Gender", y = "Student Count") + 66 | scale_y_continuous(breaks = seq(0,300,30)) + coord_flip() 67 | ``` 68 | 69 | ```{r} 70 | ggplot(edu, aes(x = NationalITy)) + geom_bar(aes(fill=NationalITy)) + 71 | labs(x = "Nationality", y = "Student Count") + 72 | scale_y_continuous(breaks = seq(0,200,20)) + coord_flip() 73 | ``` 74 | 75 | ```{r} 76 | ggplot(edu, aes(x = PlaceofBirth)) + geom_bar(aes(fill = NationalITy)) + 77 | labs(x = "Birth Place", y = "Student Count") + coord_flip() # usa is a mix of nationalities 78 | ``` 79 | 80 | ```{r} 81 | ggplot(edu, aes(x = GradeID, fill = Class)) + geom_bar() + 82 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-06 has students with only low grades 83 | ``` 84 | 85 | ```{r} 86 | ggplot(edu, aes(x = GradeID, fill = gender)) + geom_bar() + 87 | labs(x = "Grade ID", y = "Student Count") + coord_flip() # g-10 has no females 88 | ``` 89 | 90 | ```{r} 91 | ggplot(edu, aes(x = SectionID, fill = Topic,alpha=0.1)) + geom_bar() + 92 | labs(x = "Section ID", y = "Student Count") + 93 | coord_flip() 94 | ``` 95 | 96 | ```{r} 97 | ggplot(edu, aes(x = Topic, fill = gender,alpha=0.2)) + geom_bar() + 98 | labs(x = "Topic", y = "Student Count") + 99 | scale_y_continuous(breaks = seq(0,100,4)) + coord_flip() 100 | ``` 101 | 102 | ```{r} 103 | ggplot(edu, aes(x = Topic, fill = NationalITy)) + geom_bar() + 104 | labs(x = "Topic", y = "Student Count") + coord_flip() + 105 | scale_y_continuous(breaks = seq(0,100,4)) 106 | ``` 107 | 108 | ```{r} 109 | ggplot(edu, aes(x = Topic, fill = SectionID,alpha=0.3)) + geom_bar() + 110 | labs(x = "Topic", y = "Student Count") + coord_flip() + 111 | scale_y_continuous(breaks = seq(0,100,4)) 112 | ``` 113 | 114 | 115 | # Section C for Mostly Spanish Students 116 | ```{r} 117 | ggplot(edu, aes(x = Topic, fill = Semester)) + geom_bar() + 118 | labs(x = "Topic", y = "Student Count") + coord_flip() + 119 | scale_y_continuous(breaks = seq(0,100,4)) 120 | ``` 121 | 122 | 123 | # IT Students Are Mostly in 1st Semester 124 | ```{r} 125 | ggplot(edu, aes(x = Topic, fill = Relation,alpha=0.5)) + geom_bar() + 126 | labs(x = "Topic", y = "Student Count") + coord_flip() + 127 | scale_y_continuous(breaks = seq(0,100,4)) 128 | ``` 129 | 130 | # Most French Students have Mom as Guardian in Comparison to Father 131 | 132 | ```{r} 133 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar() + 134 | labs(x = "Topic", y = "Student Count") + coord_flip() + 135 | scale_y_continuous(breaks = seq(0,100,4)) 136 | ``` 137 | 138 | ```{r} 139 | ggplot(edu, aes(x = Topic, fill = Class)) + geom_bar(position = "fill") + 140 | labs(x = "Topic", y = "Student Count") + coord_flip() + 141 | scale_y_continuous(breaks = seq(0,100,4)) 142 | ``` 143 | 144 | # Geology has no low class students 145 | 146 | ```{r} 147 | ggplot(edu, aes(x = Semester)) + geom_bar(aes(fill=Semester)) + 148 | labs(x = "Semester", y = "Student Count") 149 | ``` 150 | 151 | ```{r} 152 | ggplot(edu, aes(x = Relation, fill = Semester)) + geom_bar() + 153 | labs(x = "Guardian", y = "Student Count") 154 | ``` 155 | 156 | ```{r} 157 | ggplot(edu, aes(x = ParentAnsweringSurvey, fill = ParentschoolSatisfaction)) + 158 | geom_bar() + 159 | labs(x = "Does parents answer surveys ?", y = "Student Count") 160 | ``` 161 | 162 | # Parent Satisfaction 163 | ```{r} 164 | ggplot(edu, aes(x = ParentschoolSatisfaction)) + 165 | geom_bar(aes(fill=ParentschoolSatisfaction)) + 166 | labs(x = "Are the Parents Satisfied With the School ?", y = "Student Count") 167 | ``` 168 | 169 | ```{r} 170 | ggplot(edu, aes(x = StudentAbsenceDays)) + geom_bar(aes(fill=StudentAbsenceDays)) + 171 | labs(x = "Is the student absent for more than seven days", y = "Student Count") 172 | ``` 173 | 174 | ```{r} 175 | ggplot(edu, aes(x = Class, fill = gender)) + geom_bar() + 176 | labs(x = "Class", y = "Student Count") 177 | ``` 178 | 179 | # Few Girls in the Low Class 180 | ```{r} 181 | ggplot(edu, aes(x = Class, fill = Relation)) + geom_bar() + 182 | labs(x = "Class", y = "Student Count") 183 | ``` 184 | 185 | ```{r} 186 | ggplot(edu, aes(x = Class, fill = ParentAnsweringSurvey)) + geom_bar() + 187 | labs(x = "Class", y = "Student Count") 188 | ``` 189 | 190 | ```{r} 191 | ggplot(edu, aes(x = Class, fill = StudentAbsenceDays)) + geom_bar() + 192 | labs(x = "Class", y = "Student Count") 193 | ``` 194 | 195 | 196 | ### Boxplots 197 | 198 | ```{r} 199 | ggplot(edu, aes(x = gender, y = raisedhands)) + geom_boxplot(aes(fill=gender)) 200 | ``` 201 | 202 | ```{r} 203 | ggplot(edu, aes(x = gender, y = VisITedResources)) + geom_boxplot(aes(fill=gender)) 204 | ``` 205 | 206 | # Girls Use More Resources 207 | ```{r} 208 | ggplot(edu, aes(x = NationalITy, y = raisedhands)) + geom_boxplot(aes(fill=NationalITy)) 209 | ``` 210 | 211 | ```{r} 212 | ggplot(edu, aes(x = StageID, y = raisedhands)) + geom_boxplot(aes(fill=StageID)) 213 | ``` 214 | 215 | ```{r} 216 | ggplot(edu, aes(x = StageID, y = Discussion)) + geom_boxplot(aes(fill=StageID)) 217 | ``` 218 | 219 | ```{r} 220 | ggplot(edu, aes(x = GradeID, y = raisedhands)) + geom_boxplot(aes(fill=GradeID)) 221 | ``` 222 | 223 | ```{r} 224 | ggplot(edu, aes(x = SectionID, y = Discussion)) + geom_boxplot(aes(fill=SectionID)) 225 | ``` 226 | 227 | ```{r} 228 | ggplot(edu, aes(x = Topic, y = raisedhands)) + geom_boxplot(aes(fill=Topic)) 229 | ``` 230 | 231 | ```{r} 232 | ggplot(edu, aes(x = Semester, y = raisedhands)) + geom_boxplot(aes(fill=Semester)) 233 | ``` 234 | 235 | ```{r} 236 | ggplot(edu, aes(x = Relation, y = raisedhands)) + geom_boxplot(aes(fill=Relation)) 237 | ``` 238 | 239 | ```{r} 240 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 241 | ``` 242 | 243 | ```{r} 244 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 245 | ``` 246 | 247 | ```{r} 248 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 249 | ``` 250 | 251 | ```{r} 252 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 253 | ``` 254 | 255 | ```{r} 256 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = raisedhands)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 257 | ``` 258 | 259 | ```{r} 260 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = VisITedResources)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 261 | ``` 262 | 263 | ```{r} 264 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 265 | ``` 266 | 267 | ```{r} 268 | ggplot(edu, aes(x = ParentschoolSatisfaction, y = Discussion)) + geom_boxplot(aes(fill=ParentschoolSatisfaction)) 269 | ``` 270 | 271 | ```{r} 272 | ggplot(edu, aes(x = StudentAbsenceDays, y = raisedhands)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 273 | ``` 274 | 275 | # More Students Leave = Less Hand Raises 276 | 277 | ```{r} 278 | ggplot(edu, aes(x = StudentAbsenceDays, y = VisITedResources)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 279 | ``` 280 | 281 | ```{r} 282 | ggplot(edu, aes(x = StudentAbsenceDays, y = AnnouncementsView)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 283 | ``` 284 | 285 | ```{r} 286 | ggplot(edu, aes(x = StudentAbsenceDays, y = Discussion)) + geom_boxplot(aes(fill=StudentAbsenceDays)) 287 | ``` 288 | 289 | ```{r} 290 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = raisedhands)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 291 | ``` 292 | 293 | # Yes Answers to Surveys = More Raised hands 294 | 295 | ```{r} 296 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = VisITedResources)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 297 | ``` 298 | 299 | ```{r} 300 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = AnnouncementsView)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 301 | ``` 302 | 303 | ```{r} 304 | ggplot(edu, aes(x = ParentAnsweringSurvey, y = Discussion)) + geom_boxplot(aes(fill=ParentAnsweringSurvey)) 305 | ``` 306 | 307 | 308 | ###Class-Wise Boxplots 309 | 310 | ```{r} 311 | ggplot(edu, aes(x = Class, y = raisedhands)) + geom_boxplot(aes(fill=Class)) 312 | ``` 313 | 314 | # High Marks = Active Participation 315 | 316 | ```{r} 317 | ggplot(edu, aes(x = Class, y = VisITedResources)) + geom_boxplot(aes(fill=Class)) 318 | ``` 319 | 320 | # High Marks by visited Resources 321 | 322 | ```{r} 323 | ggplot(edu, aes(x = Class, y = AnnouncementsView)) + geom_boxplot(aes(fill=Class)) 324 | ``` 325 | 326 | # More Marks More Announcements 327 | 328 | ```{r} 329 | ggplot(edu, aes(x = Class, y = Discussion)) + geom_boxplot(aes(fill=Class)) 330 | ``` 331 | 332 | ### Scatterplots 333 | 334 | ```{r} 335 | ggplot(edu, aes(x = raisedhands, y = VisITedResources)) + geom_point() + 336 | geom_smooth(method = "lm",color='green') 337 | ``` 338 | 339 | ```{r} 340 | ggplot(edu, aes(x = raisedhands, y = AnnouncementsView)) + geom_point() + 341 | geom_smooth(method = "lm",color='red') 342 | ``` 343 | 344 | ```{r} 345 | ggplot(edu, aes(x = raisedhands, y = Discussion)) + geom_point() + 346 | geom_smooth(method = "lm",color='purple') 347 | ``` 348 | 349 | ```{r} 350 | ggplot(edu, aes(x = VisITedResources, y = AnnouncementsView)) + geom_point() + 351 | geom_smooth(method = "lm",color='cyan') 352 | ``` 353 | 354 | ```{r} 355 | ggplot(edu, aes(x = VisITedResources, y = Discussion)) + geom_point() + 356 | geom_smooth(method = "lm",color='firebrick') 357 | ``` 358 | 359 | ```{r} 360 | ggplot(edu, aes(x = AnnouncementsView, y = Discussion)) + geom_point() + 361 | geom_smooth(method = "lm",color='hotpink') 362 | ``` 363 | 364 | 365 | ### Density Plots 366 | 367 | ```{r} 368 | ggplot(edu, aes(x = raisedhands, color = gender)) + geom_density() 369 | ``` 370 | 371 | ```{r} 372 | ggplot(edu, aes(x = raisedhands, color = Topic)) + geom_density() 373 | ``` 374 | 375 | ```{r} 376 | ggplot(edu, aes(x = raisedhands, color = SectionID)) + geom_density() 377 | ``` 378 | 379 | ```{r} 380 | ggplot(edu, aes(x = raisedhands, color = Semester)) + geom_density() 381 | ``` 382 | 383 | ```{r} 384 | ggplot(edu, aes(x = raisedhands, color = Class)) + geom_density() 385 | ``` 386 | 387 | 388 | ### Tile Map 389 | ```{r} 390 | tile.map <- edu %>% group_by(gender, NationalITy) %>% 391 | summarise(Count = n()) %>% arrange(desc(Count)) 392 | ``` 393 | 394 | ```{r} 395 | ggplot(tile.map, aes(x = gender, NationalITy, fill = Count)) + geom_tile() 396 | ``` 397 | 398 | 399 | ### Predictive Modeling 400 | 401 | 402 | 403 | # Splitting data into train and cross-validation sets. 404 | ```{r} 405 | set.seed(23210) 406 | split <- sample.split(edu$Class, SplitRatio = 0.75) 407 | train <- subset(edu, split == T) 408 | cv <- subset(edu, split == F) 409 | ``` 410 | 411 | 412 | # Decision Tree 413 | 414 | ```{r} 415 | tree.model <- rpart(Class ~ ., data = train, method = "class", minbucket = 1) 416 | prp(tree.model) 417 | ``` 418 | 419 | ```{r} 420 | tree.predict <- predict(tree.model, cv, type = "class") 421 | table(cv$Class, tree.predict) 422 | ``` 423 | 424 | 425 | 426 | # Decision Tree Using Caret Package 427 | 428 | ```{r} 429 | rpart.control = trainControl(method = "repeatedcv", number = 10, repeats = 3) 430 | rpart.grid = expand.grid(.cp = seq(0.01, 0.5, 0.02)) 431 | rpart.model.caret <-train(Class ~ ., data = train, method = "rpart", preProcess = "scale", 432 | trControl = rpart.control, tuneGrid = rpart.grid) 433 | ``` 434 | 435 | ```{r} 436 | rpart.predict.caret <- predict.train(rpart.model.caret, cv) 437 | confusionMatrix(rpart.predict.caret, cv$Class) 438 | ``` 439 | 440 | 441 | Accuracy -> 0.7355 442 | 443 | # Random Forest 444 | 445 | ```{r} 446 | set.seed(10005) 447 | 448 | rf.model <- randomForest(Class ~ .- SectionID , data = train, importance = TRUE, 449 | ntree = 2000, nodesize = 20) 450 | 451 | rf.predict <- predict(rf.model, cv) 452 | confusionMatrix(cv$Class, rf.predict) 453 | ``` 454 | 455 | ```{r} 456 | varImpPlot(rf.model) 457 | ``` 458 | 459 | Accuracy -> 0.6777 460 | 461 | # C-Forest Utilizing Party 462 | 463 | ```{r} 464 | cforest.model = cforest(Class ~ .-SectionID , data = train, 465 | controls=cforest_unbiased(ntree=2000, mtry = 3)) 466 | ``` 467 | 468 | ```{r} 469 | cforest.prediction = predict(cforest.model, cv, OOB = TRUE, type = "response") 470 | confusionMatrix(cv$Class, cforest.prediction) 471 | ``` 472 | 473 | Accuracy -> 0.7438 474 | 475 | # Suppost Vector Machines 476 | 477 | ```{r} 478 | svm.model <- svm(Class ~ ., data = train, kernel = "radial", cost = 10, gamma = 0.15) 479 | svm.predict <- predict(svm.model, cv) 480 | confusionMatrix(cv$Class, svm.predict) 481 | ``` 482 | 483 | Accuracy -> 0.777 484 | 485 | # Ensemble Model 486 | 487 | 488 | ```{r} 489 | results <- data.frame(tree = tree.predict, rpart = rpart.predict.caret, rf = rf.predict, 490 | cforest = cforest.prediction, svm = svm.predict, 491 | actual.class = cv$Class, final.prediction = rep("-",nrow(cv))) 492 | 493 | results 494 | ``` 495 | 496 | 497 | ```{r} 498 | getmode <- function(x) { 499 | unique.x <- unique(x) 500 | unique.x[which.max(tabulate(match(x, unique.x)))] 501 | } 502 | ``` 503 | 504 | ```{r} 505 | results$final.prediction <- apply(results, 1, getmode) 506 | confusionMatrix(results$actual.class, results$final.prediction) 507 | ``` 508 | 509 | Accuracy -> 0.810 (best) 510 | -------------------------------------------------------------------------------- /Edudata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Edudata.pdf -------------------------------------------------------------------------------- /Fake News.R: -------------------------------------------------------------------------------- 1 | # Fake News 2 | 3 | ## Loading the Libraries 4 | 5 | library(rpart) 6 | library(rpart.plot) 7 | 8 | 9 | setwd('./Kaggle') 10 | 11 | 12 | ### Reading the Fake News Dataset 13 | 14 | fake <- read.csv('fake.csv') 15 | print(table(fake$type)) 16 | head(fake) 17 | 18 | ### Checking for NA Values 19 | 20 | any(is.na(fake)) 21 | 22 | ### Running the Analysis 23 | 24 | 25 | mini_model <- rpart(formula = type ~ ord_in_thread + language + country + spam_score + replies_count + participants_count + likes + comments + shares, 26 | data = fake, 27 | method = "class", # Classification 28 | parms = list(split = "information"), # Use Information Gain as splitting criterion 29 | control = rpart.control(cp = 0.01, # Minimum loss decrease complexity param 30 | maxcompete = 3, # Competition by split for debugging 31 | maxsurrogate = 3, # Competition per surrogate for debugging 32 | xval = 20, # 10 cross-validation 33 | maxdepth = 4)) # Maximum Depth for easy interpretation 34 | plotcp(mini_model) 35 | 36 | 37 | 38 | ### Decision Tree 39 | rpart.plot(mini_model, main = "Decision Tree", box.palette = list("Gy", "Gn", "Bu", "Bn", "Or", "Rd", "Gy", "Pu")) 40 | 41 | 42 | print(summary(mini_model)) 43 | -------------------------------------------------------------------------------- /Heart Disease Decision Trees.R: -------------------------------------------------------------------------------- 1 | ## Heart Disease Decision Trees 2 | # Loading the data 3 | 4 | library(FFTrees) 5 | FFTrees.guide() 6 | data(heartdisease) 7 | head(heartdisease) 8 | summary(heartdisease) 9 | 10 | # Heart Disease Data 11 | set.seed(100) 12 | samples <- sample(c(T, F), size = nrow(heartdisease), replace = T) 13 | heartdisease.train <- heartdisease[samples,] 14 | heartdisease.test <- heartdisease[samples == 0,] 15 | heart.FFTrees <- FFTrees(formula = diagnosis ~., data = heartdisease.train,data.test = heartdisease.test) 16 | print(heart.FFTrees) 17 | class(heart.FFTrees) 18 | names(heart.FFTrees) 19 | heart.FFTrees$cue.accuracies 20 | 21 | # ROC Plot 22 | showcues(heart.FFTrees, main = "Heartdisease Cue Accuracy") 23 | 24 | # Stats 25 | heart.FFTrees$FFTrees.stats 26 | summary(heart.FFTrees) 27 | 28 | # Area Under the Curve 29 | heart.FFTrees$auc 30 | 31 | # Train Decision DF 32 | 33 | heart.FFTrees$decision.train[1:5,] 34 | heart.FFTrees$levelout.train[1:5,] 35 | 36 | # Selecting Cues and Plotting Trees 37 | heart.as.FFTrees <- FFTrees(formula = diagnosis ~ age + sex, data = heartdisease) 38 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease")) 39 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"), train.p = 5) 40 | -------------------------------------------------------------------------------- /Heart Disease Decision Trees.Rmd: -------------------------------------------------------------------------------- 1 | ## Heart Disease Decision Trees 2 | # Loading the data 3 | 4 | ```{r} 5 | library(FFTrees) 6 | FFTrees.guide() 7 | data(heartdisease) 8 | head(heartdisease) 9 | summary(heartdisease) 10 | ``` 11 | 12 | # Heart Disease Data 13 | 14 | For this dataset, there will be information on the first head and summary sets to determine the overall number of heart disease for patients. 15 | ```{r} 16 | set.seed(100) 17 | samples <- sample(c(T, F), size = nrow(heartdisease), replace = T) 18 | heartdisease.train <- heartdisease[samples,] 19 | heartdisease.test <- heartdisease[samples == 0,] 20 | heart.FFTrees <- FFTrees(formula = diagnosis ~., data = heartdisease.train,data.test = heartdisease.test) 21 | print(heart.FFTrees) 22 | class(heart.FFTrees) 23 | names(heart.FFTrees) 24 | heart.FFTrees$cue.accuracies 25 | ``` 26 | 27 | # ROC Plot 28 | ```{r} 29 | showcues(heart.FFTrees, main = "Heartdisease Cue Accuracy") 30 | ``` 31 | # Stats 32 | 33 | ```{r} 34 | heart.FFTrees$FFTrees.stats 35 | summary(heart.FFTrees) 36 | ``` 37 | 38 | # Area Under the Curve 39 | ```{r} 40 | heart.FFTrees$auc 41 | ``` 42 | # Train Decision DF 43 | 44 | ```{r} 45 | heart.FFTrees$decision.train[1:5,] 46 | heart.FFTrees$levelout.train[1:5,] 47 | ``` 48 | 49 | # Selecting Cues and Plotting Trees 50 | 51 | ```{r, echo=FALSE} 52 | heart.as.FFTrees <- FFTrees(formula = diagnosis ~ age + sex, data = heartdisease) 53 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease")) 54 | plot(heart.FFTrees, main = "Heart Disease",decision.names = c("Healthy", "Disease"), train.p = 5) 55 | ``` 56 | -------------------------------------------------------------------------------- /Housing Index Zillow.R: -------------------------------------------------------------------------------- 1 | # Zillow Housing Index From 2010 2 | 3 | ### Changing the Working Directory 4 | 5 | setwd('./Kaggle/Zillow') 6 | 7 | ### Loading the Libraries 8 | 9 | library(forecast) 10 | library(zoo) 11 | library(ggplot2) 12 | library(ggthemes) 13 | library(tidyr) 14 | library(dplyr) 15 | library(readr) 16 | 17 | ### Reading the Datasets 18 | 19 | rent <- read.csv('./price.csv') 20 | sqft <- read.csv('./pricepersqft.csv') 21 | 22 | ### Looking at the Top 10 23 | 24 | values=head(rent,10) 25 | values=data.frame(t(as.matrix(values[,7:81]))) 26 | colnames(values)=rent[1:10,2] 27 | 28 | 29 | ### Monthly Percentage Change (Seattle) 30 | 31 | suppressMessages(library(quantmod)) 32 | 33 | pct_change <- function(rent) { 34 | 35 | nc <- ncol(rent) 36 | ln <- colnames(rent) 37 | 38 | meta <- rent[c(1:6)] 39 | data <- rent[c(7:nc)] 40 | 41 | data <- t(apply(data, 1, Delt)) 42 | 43 | rv <- cbind(meta, data) 44 | colnames(rv) <- ln 45 | 46 | rv[-7] 47 | } 48 | 49 | # Select data for the Seattle, WA metro region. 50 | # Total of 98 places. 51 | pc <- subset(rent, rent$Metro == 'Seattle') 52 | pc <- pct_change(pc) 53 | 54 | last = ncol(pc) 55 | 56 | pc <- pc[order(pc[last], decreasing = TRUE),] 57 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 3)) 58 | 59 | # Top 10 places in the Seattle region with the 60 | # highest most recent monthly percentage change. 61 | head(pc, n=10) 62 | 63 | 64 | ### Monthly Percentage Change (San Francisco) 65 | 66 | pct_change <- function(rent) { 67 | 68 | nc <- ncol(rent) 69 | ln <- colnames(rent) 70 | 71 | meta <- rent[c(1:6)] 72 | data <- rent[c(7:nc)] 73 | 74 | data <- t(apply(data, 1, Delt)) 75 | 76 | rv <- cbind(meta, data) 77 | colnames(rv) <- ln 78 | 79 | rv[-7] 80 | } 81 | 82 | # Select data for the San Francisco, CA metro region. 83 | 84 | pc <- subset(rent, rent$Metro == 'San Francisco') 85 | pc <- pct_change(pc) 86 | 87 | last = ncol(pc) 88 | 89 | pc <- pc[order(pc[last], decreasing = TRUE),] 90 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 3)) 91 | 92 | # Top 10 places in the San Francisco region with the 93 | # highest most recent monthly percentage change. 94 | head(pc, n=10) 95 | 96 | 97 | 98 | ### Yearly Percentage Change (Sacramento) 99 | 100 | get_range <- function(rent) { 101 | last = ncol(rent) 102 | n <- colnames(rent) 103 | val <- length(n[7:last]) 104 | val <- round(val/12) 105 | rv <- seq(last - val * 12, last, 12) 106 | rv 107 | } 108 | 109 | # Select data for the Sacramento, CA metro region. 110 | # Total of 55 places. 111 | pc <- subset(rent, rent$Metro == 'Sacramento') 112 | 113 | years <- get_range(pc) 114 | pc <- cbind(pc[1:6], pc[years]) 115 | 116 | # use function defined above 117 | pc <- pct_change(pc) 118 | 119 | last = ncol(pc) 120 | 121 | pc <- pc[order(pc[last], decreasing = TRUE),] 122 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 2)) 123 | 124 | # Top 10 places in the Sacramento region with the 125 | # highest most recent yearly percentage change. 126 | head(pc, n=10) 127 | 128 | 129 | ### Yearly Percentage Change (San Francisco) 130 | 131 | get_range <- function(rent) { 132 | last = ncol(rent) 133 | n <- colnames(rent) 134 | val <- length(n[7:last]) 135 | val <- round(val/12) 136 | rv <- seq(last - val * 12, last, 12) 137 | rv 138 | } 139 | 140 | # Select data for the San Francisco, CA metro region. 141 | pc <- subset(rent, rent$Metro == 'San Francisco') 142 | 143 | years <- get_range(pc) 144 | pc <- cbind(pc[1:6], pc[years]) 145 | 146 | # use function defined above 147 | pc <- pct_change(pc) 148 | 149 | last = ncol(pc) 150 | 151 | pc <- pc[order(pc[last], decreasing = TRUE),] 152 | pc <- cbind(pc[c(2,5)], round(pc[(last-3):last], 2)) 153 | 154 | # Top 10 places in the San Francisco region with the 155 | # highest most recent yearly percentage change. 156 | head(pc, n=10) 157 | 158 | 159 | 160 | ### Index Numbers (Los Angeles) 161 | 162 | index_base_100 <- function(rent) { 163 | 164 | nc <- ncol(rent) 165 | ln <- colnames(rent) 166 | 167 | meta <- rent[, c(1:6)] 168 | data <- rent[7:nc] 169 | base <- rent[7] 170 | 171 | index <- function(x) { 172 | x / base 173 | } 174 | 175 | data <- apply(data, 2, index) 176 | data <- data.frame(data) 177 | 178 | data <- data * 100 179 | data <- round(data) 180 | 181 | rv <- cbind(meta, data) 182 | colnames(rv) <- ln 183 | 184 | rv 185 | } 186 | 187 | # Select data for the Los Angeles, CA metro region. 188 | # Total of 148 places. 189 | # Base: November 2010 = 100 190 | idx <- subset(rent, rent$Metro == 'Los Angeles') 191 | idx <- index_base_100(idx) 192 | 193 | last = ncol(idx) 194 | s <- seq(last-36, last, 12) 195 | 196 | idx <- idx[order(idx[last], decreasing = TRUE),] 197 | idx <- cbind(idx[c(2,5)], idx[s]) 198 | 199 | # The top 10 places in the Los Angeles metro region 200 | # with the largest index change over the base period. 201 | head(idx, n=10) 202 | 203 | #### San Francisco Region for Index Numbers 204 | 205 | index_base_100 <- function(rent) { 206 | 207 | nc <- ncol(rent) 208 | ln <- colnames(rent) 209 | 210 | meta <- rent[, c(1:6)] 211 | data <- rent[7:nc] 212 | base <- rent[7] 213 | 214 | index <- function(x) { 215 | x / base 216 | } 217 | 218 | data <- apply(data, 2, index) 219 | data <- data.frame(data) 220 | 221 | data <- data * 100 222 | data <- round(data) 223 | 224 | rv <- cbind(meta, data) 225 | colnames(rv) <- ln 226 | 227 | rv 228 | } 229 | 230 | # Select data for the San Francisco, CA metro region. 231 | # Base: November 2010 = 100 232 | idx <- subset(rent, rent$Metro == 'San Francisco') 233 | idx <- index_base_100(idx) 234 | 235 | last = ncol(idx) 236 | s <- seq(last-36, last, 12) 237 | 238 | idx <- idx[order(idx[last], decreasing = TRUE),] 239 | idx <- cbind(idx[c(2,5)], idx[s]) 240 | 241 | # The top 10 places in the San Francisco metro region 242 | # with the largest index change over the base period. 243 | head(idx, n=10) 244 | 245 | 246 | 247 | 248 | ### Top 10 Cities By Population Using Time-Series Analysis 249 | 250 | date <- seq(as.Date("2010/11/01"), as.Date("2017/01/31"),"month") 251 | date <- as.yearmon(date) 252 | ts=zoo(values,order.by = date) 253 | values=fortify(ts) 254 | values$Index=as.Date(values$Index) 255 | 256 | autoplot(ts,facet=NULL)+ 257 | theme_minimal()+ 258 | labs(x="Time",y="Price") 259 | 260 | forecasts=matrix(,ncol=10,nrow=11) 261 | 262 | for(i in 1:10){ 263 | 264 | forecasts[,i]=forecast(auto.arima(ts[,i],lambda = 0,stepwise = F),h=11)$mean 265 | 266 | } 267 | 268 | colnames(forecasts) = rent[1:10,2] 269 | results=rbind(values[,2:11],forecasts) 270 | date_2 <- seq(as.Date("2010/11/01"), as.Date("2017/12/31"),"month") 271 | date_2 <- as.yearmon(date_2) 272 | results=zoo(results,order.by = date_2) 273 | autoplot(results,facet=NULL)+ 274 | theme_minimal()+ 275 | labs(x="Time",y="Price")+ 276 | geom_vline(aes(xintercept=2017),size=0.2) 277 | -------------------------------------------------------------------------------- /Illegal Immigration.R: -------------------------------------------------------------------------------- 1 | ## Illegal Immigration 2 | 3 | ## Loading the Libraries and Changing the Working Directory 4 | library(tidyverse) 5 | library(d3Network) 6 | setwd('./Kaggle') 7 | 8 | ## Reading the Dataset & Rearranging Them 9 | 10 | arrests <- read.csv('./illegal immigration.csv') 11 | names(arrests) <- gsub("[.]", " ", names(arrests)) 12 | arrests.clns <- 13 | arrests %>% gather(key, value, -Border, -Sector, -`State Territory`) %>% separate(key, 14 | into = c("Year", "Type"), 15 | sep = " ", 16 | extra = "merge")%>%na.omit() 17 | 18 | arrests.clns$Year <- gsub("X", "", arrests.clns$Year) 19 | arrests.clns$Type<-trimws(tolower(arrests.clns$Type)) 20 | 21 | arrests.net <- 22 | arrests.clns %>% filter(Border != "United States" & 23 | Sector != "All") %>% select(Type, Sector, value) %>% rename(source = 24 | Type, target = Sector) %>% group_by(source, target) %>% 25 | summarize(value = mean(value)) %>% na.omit() %>% mutate(rank = rank(desc(value), source)) %>% 26 | arrange(rank)%>%filter(rank<=10) 27 | 28 | Nodes <- 29 | rbind(data.frame(name = unique(arrests.net$source)), data.frame(name = unique(arrests.net$target))) 30 | Links <- arrests.net 31 | Links$source <- match(Links$source, Nodes$name) - 1 32 | Links$target <- match(Links$target, Nodes$name) - 1 33 | 34 | # Graphs 35 | arrests.plot<-arrests.clns%>%group_by(Year,Type)%>%summarize(avg.value=mean(value)) 36 | 37 | 38 | ggplot(arrests.plot)+aes(Year,avg.value,color=Type)+ 39 | geom_point(size=2)+theme(plot.title = element_text(hjust = 0.5, face = "bold"), 40 | axis.text.y=element_blank(),axis.text.x=element_blank())+ 41 | labs(title="Average arrests over the years",y="Average Arrests") 42 | 43 | ## Based on the information that is given in the graph, the arrests 44 | ## of Mexicans have greatly reduced in comparison to all illegal immigrants. It 45 | ## has been considered that illegal immigration with Mexicans is not the biggest factor 46 | ## that is contributing to the problems in the United States. 47 | -------------------------------------------------------------------------------- /Iris.R: -------------------------------------------------------------------------------- 1 | # Iris 2 | 3 | # Getting the Data 4 | data("iris") 5 | head(iris) 6 | 7 | # Loading the Libraries 8 | library(ggplot2) 9 | library(dplyr) 10 | library(tidyr) 11 | 12 | # Summary of the Data 13 | summary(iris) 14 | 15 | head(iris, n = 10) 16 | 17 | 18 | # Forming to Long Iris 19 | long_iris <- iris%>% 20 | gather(part,value,Sepal.Length,Sepal.Width,Petal.Length ,Petal.Width)%>% 21 | separate(part, c('part', 'measure'), sep = '\\.') 22 | head(long_iris, n=10) 23 | 24 | 25 | sapply(long_iris, class) 26 | 27 | fcts <- c('part', 'measure') 28 | long_iris[fcts] <- lapply(long_iris[fcts], as.factor) 29 | sapply(long_iris, class) 30 | 31 | 32 | Missing_d <- function(x){sum(is.na(x))/length(x)*100} 33 | 34 | 35 | apply(long_iris, 2, Missing_d) 36 | 37 | 38 | 39 | is_special <- function(x){ 40 | if(is.numeric(x)) !is.finite(x) else is.na(x) 41 | } 42 | 43 | 44 | sapply(long_iris, is_special) 45 | 46 | 47 | 48 | # Checking for NA Values 49 | 50 | sum(is.na(long_iris$value)) 51 | 52 | 53 | ## Exploratory Data Analysis 54 | p <- ggplot(long_iris, aes(x = Species, y = value, col = part)) 55 | p + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(.~ measure) 56 | p + geom_jitter(alpha = 0.3, size = 0.8) + stat_boxplot(alpha = 0.5) + facet_grid(.~ measure) 57 | p + geom_jitter(alpha = 0.5, size = 0.8) + stat_boxplot(alpha = 0.5) + facet_grid(.~ part) 58 | 59 | 60 | 61 | iris$Flower <- 1:nrow(iris) 62 | 63 | #create wide_iris 64 | wide_iris <- iris %>% 65 | gather(key, value, -Species, -Flower) %>% 66 | separate(key, c("Part", "Measure"),sep = "\\.") %>% 67 | spread(Measure, value) 68 | 69 | head(wide_iris, n=10) 70 | 71 | 72 | 73 | q <- ggplot(wide_iris, aes(x = Width, y = Length, col = Species)) 74 | q + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(. ~ Species) + 75 | stat_smooth(method = 'lm', se = F) 76 | q + geom_jitter(alpha = 0.4, size = 0.8) + facet_grid(. ~ Part) 77 | q + geom_point(alpha = 0.4, size = 0.8) + stat_smooth(method = 'lm', fullrange = T, size = 0.5) -------------------------------------------------------------------------------- /Mass Shootings/Mass Shootings Dataset.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Mass Shootings/Mass Shootings Dataset.csv -------------------------------------------------------------------------------- /Mass Shootings/Mass Shootings.R: -------------------------------------------------------------------------------- 1 | # Mass Shootings 2 | 3 | ## Libraries 4 | 5 | library(data.table) # A faster way to handle data frames in R 6 | library(ggplot2) # For more control on plots 7 | library(ggthemes) # For prettier ggplot2 plot aesthetics and acessibility for color-blind palettes 8 | library(knitr) # For pretty tables 9 | library(lubridate) # For easy handling dates 10 | library(scales) # To add more ticks and facilitate plot interpretation 11 | library(lattice) 12 | library(chron) 13 | library(grid) 14 | 15 | ## Changing Working Directory 16 | 17 | setwd('./Kaggle/Mass Shootings') 18 | 19 | ## Loading the Data 20 | 21 | shooters_hooters <- fread("./Mass Shootings Dataset.csv") 22 | kable(head(shooters_hooters)) 23 | 24 | ## How many people got killed/injured per year? 25 | 26 | yearvitm <- shooters_hooters[,.(Date,Fatalities,Injured)] 27 | kable(head(yearvitm, 10)) 28 | 29 | ## Converstion to Dates 30 | yearvitm$Date <- mdy(yearvitm$Date) 31 | 32 | ## Checking for Missing Data 33 | 34 | nrow(yearvitm[is.na(Date) | is.na(Fatalities) | is.na(Injured)]) 35 | 36 | 37 | ## Creating a Time-Series Plot of Fatalities and Injuries 38 | 39 | vitmyear_long <- melt(yearvitm,id.vars="Date",measure.vars = c("Fatalities","Injured"), 40 | variable.name="TypeOfVictim",value.name = "Number") 41 | kable(head(vitmyear_long)) 42 | 43 | 44 | ggplot(vitmyear_long, aes(x=Date, y=Number, color=TypeOfVictim)) + 45 | geom_line() + 46 | xlab("") + 47 | ylab("Number of Victims") + 48 | theme_minimal() + 49 | scale_x_date(breaks=pretty_breaks(n=10)) + 50 | labs(title = "Number of Victims per Year", 51 | subtitle = "The number of injured people in October 2, 2017 is much higher than ever seen before.") + 52 | scale_color_colorblind(name = "Type of Victim") 53 | 54 | ## Before the October 2 Fatality 55 | 56 | ggplot(vitmyear_long[Number < 100], aes(x=Date, y=Number, color=TypeOfVictim)) + 57 | geom_line() + 58 | xlab("") + 59 | ylab("Number of Victims") + 60 | theme_minimal() + 61 | scale_x_date(breaks=pretty_breaks(n=10)) + 62 | labs(title = "Number of Victims per Year Without October 2, 2017 Injuries") + 63 | scale_color_colorblind(name = "Type of Victim") 64 | 65 | 66 | ## Geom Plot 67 | 68 | ggplot(vitmyear_long[Number < 100], aes(x=Date, y=Number, color=TypeOfVictim)) + 69 | geom_point(alpha=0.4) + # Modified to point and added alpha for transparency 70 | xlab("") + 71 | ylab("Number of Victims") + 72 | theme_minimal() + 73 | scale_x_date(breaks=pretty_breaks(n=10)) + 74 | labs(title = "Number of Victims per Year Without October 2, 2017 Injuries", 75 | subtitle = "The number of shooting events itensified over the last years.") + 76 | scale_color_colorblind(name = "Type of Victim") 77 | 78 | 79 | ## Looking at the Calendar Days 80 | 81 | calendar_dead <- shooters_hooters[,.(Date,Victims=`Total victims`)] 82 | calendar_dead$Date <- mdy(calendar_dead$Date) 83 | kable(head(calendar_dead)) 84 | 85 | 86 | ## Missing Data 87 | 88 | nrow(calendar_dead[is.na(Date) | is.na(Victims)]) 89 | 90 | 91 | ## Creating a Calendar Heatmap 92 | 93 | heatmap_calendar <- function(dates, 94 | values, 95 | ncolors=99, 96 | color="r2g", 97 | varname="Values", 98 | date.form = "%Y-%m-%d", ...) { 99 | if (class(dates) == "character" | class(dates) == "factor" ) { 100 | dates <- strptime(dates, date.form) 101 | } 102 | caldat <- data.frame(value = values, dates = dates) 103 | min.date <- as.Date(paste(format(min(dates), "%Y"), 104 | "-1-1",sep = "")) 105 | max.date <- as.Date(paste(format(max(dates), "%Y"), 106 | "-12-31", sep = "")) 107 | dates.f <- data.frame(date.seq = seq(min.date, max.date, by="days")) 108 | 109 | # Merge moves data by one day, avoid 110 | caldat <- data.frame(date.seq = seq(min.date, max.date, by="days"), value = NA) 111 | dates <- as.Date(dates) 112 | caldat$value[match(dates, caldat$date.seq)] <- values 113 | 114 | caldat$dotw <- as.numeric(format(caldat$date.seq, "%w")) 115 | caldat$woty <- as.numeric(format(caldat$date.seq, "%U")) + 1 116 | caldat$yr <- as.factor(format(caldat$date.seq, "%Y")) 117 | caldat$month <- as.numeric(format(caldat$date.seq, "%m")) 118 | yrs <- as.character(unique(caldat$yr)) 119 | d.loc <- as.numeric() 120 | for (m in min(yrs):max(yrs)) { 121 | d.subset <- which(caldat$yr == m) 122 | sub.seq <- seq(1,length(d.subset)) 123 | d.loc <- c(d.loc, sub.seq) 124 | } 125 | caldat <- cbind(caldat, seq=d.loc) 126 | 127 | #color styles 128 | r2b <- c("#0571B0", "#92C5DE", "#F7F7F7", "#F4A582", "#CA0020") #red to blue 129 | r2g <- c("#D61818", "#FFAE63", "#FFFFBD", "#B5E384") #red to green 130 | w2b <- c("#045A8D", "#2B8CBE", "#74A9CF", "#BDC9E1", "#F1EEF6") #white to blue 131 | 132 | assign("col.sty", get(color)) 133 | calendar.pal <- colorRampPalette((col.sty), space = "Lab") 134 | def.theme <- lattice.getOption("default.theme") 135 | cal.theme <- 136 | function() { 137 | theme <- 138 | list( 139 | strip.background = list(col = "transparent"), 140 | strip.border = list(col = "transparent"), 141 | axis.line = list(col="transparent"), 142 | par.strip.text=list(cex=0.8)) 143 | } 144 | lattice.options(default.theme = cal.theme) 145 | yrs <- (unique(caldat$yr)) 146 | nyr <- length(yrs) 147 | print(cal.plot <- levelplot(value~woty*dotw | yr, data=caldat, 148 | as.table=TRUE, 149 | aspect=.12, 150 | layout = c(1, nyr%%7), 151 | between = list(x=0, y=c(1,1)), 152 | strip=TRUE, 153 | main = paste("Calendar Heat Map of ", varname, sep = ""), 154 | scales = list( 155 | x = list( 156 | at= c(seq(2.9, 52, by=4.42)), 157 | labels = month.abb, 158 | alternating = c(1, rep(0, (nyr-1))), 159 | tck=0, 160 | cex = 0.7), 161 | y=list( 162 | at = c(0, 1, 2, 3, 4, 5, 6), 163 | labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", 164 | "Friday", "Saturday"), 165 | alternating = 1, 166 | cex = 0.6, 167 | tck=0)), 168 | xlim =c(0.4, 54.6), 169 | ylim=c(6.6,-0.6), 170 | cuts= ncolors - 1, 171 | col.regions = (calendar.pal(ncolors)), 172 | xlab="" , 173 | ylab="", 174 | colorkey= list(col = calendar.pal(ncolors), width = 0.6, height = 0.5), 175 | subscripts=TRUE 176 | ) ) 177 | panel.locs <- trellis.currentLayout() 178 | for (row in 1:nrow(panel.locs)) { 179 | for (column in 1:ncol(panel.locs)) { 180 | if (panel.locs[row, column] > 0) 181 | { 182 | trellis.focus("panel", row = row, column = column, 183 | highlight = FALSE) 184 | xyetc <- trellis.panelArgs() 185 | subs <- caldat[xyetc$subscripts,] 186 | dates.fsubs <- caldat[caldat$yr == unique(subs$yr),] 187 | y.start <- dates.fsubs$dotw[1] 188 | y.end <- dates.fsubs$dotw[nrow(dates.fsubs)] 189 | dates.len <- nrow(dates.fsubs) 190 | adj.start <- dates.fsubs$woty[1] 191 | 192 | for (k in 0:6) { 193 | if (k < y.start) { 194 | x.start <- adj.start + 0.5 195 | } else { 196 | x.start <- adj.start - 0.5 197 | } 198 | if (k > y.end) { 199 | x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] - 0.5 200 | } else { 201 | x.finis <- dates.fsubs$woty[nrow(dates.fsubs)] + 0.5 202 | } 203 | grid.lines(x = c(x.start, x.finis), y = c(k -0.5, k - 0.5), 204 | default.units = "native", gp=gpar(col = "grey", lwd = 1)) 205 | } 206 | if (adj.start < 2) { 207 | grid.lines(x = c( 0.5, 0.5), y = c(6.5, y.start-0.5), 208 | default.units = "native", gp=gpar(col = "grey", lwd = 1)) 209 | grid.lines(x = c(1.5, 1.5), y = c(6.5, -0.5), default.units = "native", 210 | gp=gpar(col = "grey", lwd = 1)) 211 | grid.lines(x = c(x.finis, x.finis), 212 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native", 213 | gp=gpar(col = "grey", lwd = 1)) 214 | if (dates.fsubs$dotw[dates.len] != 6) { 215 | grid.lines(x = c(x.finis + 1, x.finis + 1), 216 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native", 217 | gp=gpar(col = "grey", lwd = 1)) 218 | } 219 | grid.lines(x = c(x.finis, x.finis), 220 | y = c(dates.fsubs$dotw[dates.len] -0.5, -0.5), default.units = "native", 221 | gp=gpar(col = "grey", lwd = 1)) 222 | } 223 | for (n in 1:51) { 224 | grid.lines(x = c(n + 1.5, n + 1.5), 225 | y = c(-0.5, 6.5), default.units = "native", gp=gpar(col = "grey", lwd = 1)) 226 | } 227 | x.start <- adj.start - 0.5 228 | 229 | if (y.start > 0) { 230 | grid.lines(x = c(x.start, x.start + 1), 231 | y = c(y.start - 0.5, y.start - 0.5), default.units = "native", 232 | gp=gpar(col = "black", lwd = 1.75)) 233 | grid.lines(x = c(x.start + 1, x.start + 1), 234 | y = c(y.start - 0.5 , -0.5), default.units = "native", 235 | gp=gpar(col = "black", lwd = 1.75)) 236 | grid.lines(x = c(x.start, x.start), 237 | y = c(y.start - 0.5, 6.5), default.units = "native", 238 | gp=gpar(col = "black", lwd = 1.75)) 239 | if (y.end < 6 ) { 240 | grid.lines(x = c(x.start + 1, x.finis + 1), 241 | y = c(-0.5, -0.5), default.units = "native", 242 | gp=gpar(col = "black", lwd = 1.75)) 243 | grid.lines(x = c(x.start, x.finis), 244 | y = c(6.5, 6.5), default.units = "native", 245 | gp=gpar(col = "black", lwd = 1.75)) 246 | } else { 247 | grid.lines(x = c(x.start + 1, x.finis), 248 | y = c(-0.5, -0.5), default.units = "native", 249 | gp=gpar(col = "black", lwd = 1.75)) 250 | grid.lines(x = c(x.start, x.finis), 251 | y = c(6.5, 6.5), default.units = "native", 252 | gp=gpar(col = "black", lwd = 1.75)) 253 | } 254 | } else { 255 | grid.lines(x = c(x.start, x.start), 256 | y = c( - 0.5, 6.5), default.units = "native", 257 | gp=gpar(col = "black", lwd = 1.75)) 258 | } 259 | 260 | if (y.start == 0 ) { 261 | if (y.end < 6 ) { 262 | grid.lines(x = c(x.start, x.finis + 1), 263 | y = c(-0.5, -0.5), default.units = "native", 264 | gp=gpar(col = "black", lwd = 1.75)) 265 | grid.lines(x = c(x.start, x.finis), 266 | y = c(6.5, 6.5), default.units = "native", 267 | gp=gpar(col = "black", lwd = 1.75)) 268 | } else { 269 | grid.lines(x = c(x.start + 1, x.finis), 270 | y = c(-0.5, -0.5), default.units = "native", 271 | gp=gpar(col = "black", lwd = 1.75)) 272 | grid.lines(x = c(x.start, x.finis), 273 | y = c(6.5, 6.5), default.units = "native", 274 | gp=gpar(col = "black", lwd = 1.75)) 275 | } 276 | } 277 | for (j in 1:12) { 278 | last.month <- max(dates.fsubs$seq[dates.fsubs$month == j]) 279 | x.last.m <- dates.fsubs$woty[last.month] + 0.5 280 | y.last.m <- dates.fsubs$dotw[last.month] + 0.5 281 | grid.lines(x = c(x.last.m, x.last.m), y = c(-0.5, y.last.m), 282 | default.units = "native", gp=gpar(col = "black", lwd = 1.75)) 283 | if ((y.last.m) < 6) { 284 | grid.lines(x = c(x.last.m, x.last.m - 1), y = c(y.last.m, y.last.m), 285 | default.units = "native", gp=gpar(col = "black", lwd = 1.75)) 286 | grid.lines(x = c(x.last.m - 1, x.last.m - 1), y = c(y.last.m, 6.5), 287 | default.units = "native", gp=gpar(col = "black", lwd = 1.75)) 288 | } else { 289 | grid.lines(x = c(x.last.m, x.last.m), y = c(- 0.5, 6.5), 290 | default.units = "native", gp=gpar(col = "black", lwd = 1.75)) 291 | } 292 | } 293 | } 294 | } 295 | trellis.unfocus() 296 | } 297 | lattice.options(default.theme = def.theme) 298 | } 299 | 300 | 301 | ## Analysis for the Last Five Years 302 | 303 | subset_dc <- calendar_dead[year(Date) %in% 2012:2017] 304 | heatmap_calendar(subset_dc$Date, subset_dc$Victims, varname="Victims") 305 | 306 | 307 | 308 | calendar_deadly <- calendar_dead[year(Date) %in% (2012:2017-6)] 309 | heatmap_calendar(calendar_deadly$Date, calendar_deadly$Victims, varname="Victims") 310 | -------------------------------------------------------------------------------- /Melbourne Housing Market.R: -------------------------------------------------------------------------------- 1 | # Melbourne Housing Market 2 | 3 | ## Importing the Libraries 4 | library(ggplot2) 5 | library(dplyr) 6 | library(plyr) 7 | library(scales) 8 | library(readr) 9 | 10 | ## Getting the Dataset 11 | housing <- read.csv('./Melbourne Housing.csv') 12 | head(housing) 13 | summary(housing) 14 | 15 | ## Checking for NA Values 16 | any(is.na(housing)) 17 | 18 | 19 | housing_filter <- filter(housing,Price!="NA") 20 | n <- length(housing_filter$Price) 21 | maxprice<-sort(housing_filter$Price,partial=n-9)[n-9] 22 | minprice<-sort(housing_filter$Price,partial=10)[10] 23 | housing_maxfilter <- filter(housing_filter,Price>=maxprice) 24 | housing_minfilter <- filter(housing_filter,Price<=minprice) 25 | 26 | ## Exploratory Data Analysis 27 | ggplot(housing_maxfilter,aes(Suburb,Price))+geom_bar(stat = "identity") + scale_y_continuous(labels=comma) 28 | ggplot(housing_minfilter,aes(Suburb,Price))+geom_bar(stat = "identity") + scale_y_continuous(labels=comma) 29 | qplot(Price,data=housing_filter,geom = "freqpoly",bins=50) + scale_x_continuous(labels=comma) 30 | -------------------------------------------------------------------------------- /NFL Draft.R: -------------------------------------------------------------------------------- 1 | ### NFL Draft 2 | 3 | # Load Libraries 4 | options(warn=-1) 5 | library(dplyr) 6 | library(ggplot2) 7 | library(repr) 8 | 9 | setwd('./Kaggle') 10 | 11 | 12 | # Read the Data 13 | draft <- read.csv('./NFL Draft.csv') 14 | head(draft) 15 | str(draft) 16 | 17 | # Checking for the NA Values 18 | 19 | any(is.na(draft)) 20 | 21 | # Creating the Linear Regression 22 | draft <- draft %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit() 23 | fit.1 <- lm(DrAV ~ Pick, data=draft) 24 | fit.2 <- lm(DrAV ~ poly(Pick,2), data=draft) 25 | fit.3 <- lm(DrAV ~ poly(Pick,3), data=draft) 26 | fit.4 <- lm(DrAV ~ poly(Pick,4), data=draft) 27 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=draft) 28 | fit.6 <- lm(DrAV ~ poly(Pick,6), data=draft) 29 | fit.7 <- lm(DrAV ~ poly(Pick,7), data=draft) 30 | 31 | anova(fit.1, fit.2, fit.3, fit.4, fit.5, fit.6, fit.7) 32 | 33 | # Draft Pick Exploratory Analysis 34 | 35 | draft$y_hat <- predict(fit.5) 36 | group_by_pick <- draft %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame() 37 | options(repr.plot.width=4, repr.plot.height=3) 38 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point(color='blue') 39 | 40 | 41 | # Quarterback Draft Analysis 42 | 43 | qb <- read.csv("./Nfl Draft.csv") 44 | qb <- qb %>% filter(Position.Standard=='QB') %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit() 45 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb) 46 | new <- data.frame(Pick = seq_len(256)) 47 | y_hat <- predict(fit.5, new, se.fit = TRUE) 48 | df <- data.frame(y_hat = matrix(unlist(y_hat))) 49 | qb <- inner_join(qb, df, by=) 50 | group_by_pick <- qb %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame() 51 | options(repr.plot.width=4, repr.plot.height=3) 52 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point() 53 | 54 | 55 | qb <- read.csv("./Nfl Draft.csv") 56 | qb <- filter(qb, Position.Standard=="QB") 57 | qb <- qb %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit() 58 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb) 59 | new <- data.frame(Pick = seq_len(256)) 60 | y_hat <- predict(fit.5, new, se.fit = TRUE) 61 | df <- data.frame(y_hat = matrix(unlist(y_hat))) 62 | Predicted_AV <- df[seq(1,256),] 63 | df <- data.frame(Pick = new$Pick, Predicted_AV) 64 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='orange') 65 | 66 | 67 | 68 | # Running Backs in Draft 69 | 70 | rb <- read.csv("./Nfl Draft.csv") 71 | rb <- filter(rb, Position.Standard=="RB") 72 | rb <- qb_df %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit() 73 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=rb) 74 | new <- data.frame(Pick = seq_len(256)) 75 | y_hat <- predict(fit.5, new, se.fit = TRUE) 76 | df <- data.frame(y_hat = matrix(unlist(y_hat))) 77 | Predicted_AV <- df[seq(1,256),] 78 | df <- data.frame(Pick = new$Pick, Predicted_AV) 79 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='red') 80 | head(df) 81 | 82 | 83 | -------------------------------------------------------------------------------- /NFL Draft.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | word_document: default 4 | html_document: default 5 | --- 6 | ### NFL Draft 7 | 8 | # Load Libraries 9 | ```{r} 10 | options(warn=-1) 11 | library(dplyr) 12 | library(ggplot2) 13 | library(repr) 14 | ``` 15 | 16 | # Read the Data 17 | ```{r} 18 | draft <- read.csv('./NFL Draft.csv') 19 | head(draft) 20 | str(draft) 21 | ``` 22 | # Checking for the NA Values 23 | 24 | ```{r} 25 | any(is.na(draft)) 26 | ``` 27 | 28 | # Creating the Linear Regression 29 | 30 | ```{r} 31 | draft <- draft %>% select(Pick, DrAV) %>% filter(Pick<257) %>% na.omit() 32 | fit.1 <- lm(DrAV ~ Pick, data=draft) 33 | fit.2 <- lm(DrAV ~ poly(Pick,2), data=draft) 34 | fit.3 <- lm(DrAV ~ poly(Pick,3), data=draft) 35 | fit.4 <- lm(DrAV ~ poly(Pick,4), data=draft) 36 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=draft) 37 | fit.6 <- lm(DrAV ~ poly(Pick,6), data=draft) 38 | fit.7 <- lm(DrAV ~ poly(Pick,7), data=draft) 39 | 40 | anova(fit.1, fit.2, fit.3, fit.4, fit.5, fit.6, fit.7) 41 | ``` 42 | 43 | Created a linear model to fit all seven rounds of the NFL Draft for predictions with the modern draft. 44 | 45 | # Draft Pick Exploratory Analysis 46 | 47 | 48 | ```{r} 49 | draft$y_hat <- predict(fit.5) 50 | group_by_pick <- draft %>% group_by(Pick) %>% summarise(predicted_av = mean(y_hat)) %>% data.frame() 51 | options(repr.plot.width=4, repr.plot.height=3) 52 | ggplot(group_by_pick, aes(Pick, predicted_av)) + geom_point(color='blue') 53 | ``` 54 | 55 | # Quarterback Draft Analysis 56 | 57 | 58 | 59 | ```{r} 60 | qb <- read.csv("./Nfl Draft.csv") 61 | qb <- filter(qb, Position.Standard=="QB") 62 | qb <- qb %>% 63 | select(Pick, DrAV) %>% 64 | filter(Pick<257) %>% 65 | na.omit() 66 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=qb) 67 | new <- data.frame(Pick = seq_len(256)) 68 | y_hat <- predict(fit.5, new, se.fit = TRUE) 69 | df <- data.frame(y_hat = matrix(unlist(y_hat))) 70 | Predicted_AV <- df[seq(1,256),] 71 | df <- data.frame(Pick = new$Pick, Predicted_AV) 72 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='orange') 73 | ``` 74 | 75 | 76 | # Running Backs in Draft 77 | 78 | ```{r} 79 | rb <- read.csv("./Nfl Draft.csv") 80 | rb <- filter(rb, Position.Standard=="RB") 81 | rb <- rb %>% 82 | select(Pick, DrAV) %>% 83 | filter(Pick<257) %>% 84 | na.omit() 85 | fit.5 <- lm(DrAV ~ poly(Pick,5), data=rb) 86 | new <- data.frame(Pick = seq_len(256)) 87 | y_hat <- predict(fit.5, new, se.fit = TRUE) 88 | df <- data.frame(y_hat = matrix(unlist(y_hat))) 89 | Predicted_AV <- df[seq(1,256),] 90 | df <- data.frame(Pick = new$Pick, Predicted_AV) 91 | ggplot(df, aes(Pick, Predicted_AV)) + geom_point(color='red') 92 | head(df) 93 | ``` 94 | 95 | -------------------------------------------------------------------------------- /NFL_Draft.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/NFL_Draft.pdf -------------------------------------------------------------------------------- /NYSE/NYSE.R: -------------------------------------------------------------------------------- 1 | # NYSE Analysis 2 | 3 | ##Changing the Working Directory 4 | 5 | setwd('./Kaggle/NYSE') 6 | 7 | ## Loading the Libraries 8 | 9 | library(Quandl) 10 | library(ggplot2) 11 | library(readr) 12 | library(dplyr) 13 | library(quantmod) 14 | 15 | ## Reading the Data 16 | 17 | nyse<-read.table("./prices.csv",header = TRUE,sep=",") #importing data into R 18 | head(nyse) #finding structure of the stock 19 | unique(nyse$symbol) #found it has 501 unique stocks 20 | 21 | 22 | ## Picking Apple 23 | 24 | aapl <- subset(nyse,symbol=="AAPL") #extracting only Apple 25 | 26 | aapl[1:3,] 27 | close<-aapl$close #creating vector called close 28 | close[1:3] 29 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of Apple Stock") #plotting a line chart 30 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return 31 | plot(returns,type = "l",xlab = "AAPL",main="Plotting daily returns of AAPL Stock") #plotting daily return chart 32 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation 33 | ohlc <- aapl[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart 34 | head(ohlc) 35 | 36 | 37 | 38 | ## Picking RL 39 | 40 | RL <- subset(nyse,symbol=="RL") #extracting only Ralph Lauren 41 | 42 | RL[1:3,] 43 | close <- RL$close #creating vector called close 44 | close[1:3] 45 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of RL Stock") #plotting a line chart 46 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return 47 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of RL Stock") #plotting daily return chart 48 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation 49 | ohlc <- RL[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart 50 | head(ohlc) 51 | 52 | 53 | ## Picking FB 54 | 55 | 56 | FB <- subset(nyse,symbol=="FB") #extracting only Facebook 57 | 58 | FB[1:3,] 59 | close <- FB$close #creating vector called close 60 | close[1:3] 61 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of FB Stock") #plotting a line chart 62 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return 63 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of FB Stock") #plotting daily return chart 64 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation 65 | ohlc <- FB[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart 66 | head(ohlc) 67 | 68 | 69 | ## Picking PG 70 | 71 | 72 | PG <- subset(nyse,symbol=="PG") #extracting only PG 73 | 74 | PG[1:3,] 75 | close <- PG$close #creating vector called close 76 | close[1:3] 77 | plot(close, type="l",xlab = "days",ylab = "Closing Price",main="Plotting Line Chart of PG Stock") #plotting a line chart 78 | returns <- (close[1:(length(close)-1)]-close[2:length(close)])/close[2:length(close)] #calculating arthmatic daily return 79 | plot(returns,type = "l",xlab = "RL",main="Plotting daily returns of PG Stock") #plotting daily return chart 80 | macd <- MACD(close,nFast=12,nSlow=26,nSig = 9,maType = SMA,percent = FALSE) #MACD calculation 81 | ohlc <- PG[c("date","open","high","low","close","volume")] #OHLC dataframe as the excel data cant be used to create a candlestick chart 82 | head(ohlc) 83 | 84 | 85 | -------------------------------------------------------------------------------- /Norwegian Development Funds.R: -------------------------------------------------------------------------------- 1 | #### Norweigian Development Funds 2 | 3 | 4 | # Change Working Directory 5 | setwd('./Kaggle') 6 | 7 | 8 | # Load the Libraries 9 | 10 | library(readr) 11 | library(ggplot2) 12 | library(tidyverse) 13 | library(dplyr) 14 | 15 | 16 | # Getting the Data 17 | ndf <- read_csv("./funds.csv") 18 | head(ndf) 19 | summary(ndf) 20 | 21 | # Checking for NA Values 22 | any(is.na(ndf)) 23 | 24 | # Cleaning the Data 25 | names(ndf) <- make.names(names(ndf)) 26 | ndf[, grep("NA.", colnames(ndf))] <- NULL 27 | 28 | # Exploratory Data Analysis 29 | 30 | ndf %>% group_by(Recipient.Region, Year) %>% 31 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% ungroup() %>% 32 | ggplot(aes(x=Year, y=Disbursements)) + 33 | geom_bar(stat = "identity",aes(fill=Recipient.Region)) + 34 | facet_wrap(~Recipient.Region) 35 | 36 | # Middle East is slowly picking up, Asia is on decline. 37 | # Interesting to see the same chart in terms of size of the contract. 38 | # Are certain regions getting bigger "support packages" then others? 39 | # Are there material differences in geographical vs non-geographical contracts? 40 | # Lets look at the quantity of those contracts by country. 41 | 42 | ndf%>% group_by(Recipient.Region, Year) %>% 43 | summarise(Mean_Disbursement=mean(Disbursements..1000...)/1000) %>% 44 | ungroup() %>% 45 | ggplot(aes(x=Year, y=Mean_Disbursement)) + 46 | geom_bar(stat = "identity",aes(fill=Year)) + facet_wrap(~Recipient.Region) 47 | 48 | 49 | # Non Geographical-Projects 50 | ndf%>% dplyr::filter(Recipient.Region=="Not geographically allocated") %>% 51 | group_by(Main.Sector) %>% 52 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% 53 | ungroup() %>% 54 | ggplot(aes(x=Main.Sector, y=Disbursements)) + 55 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip() 56 | 57 | # It has been determined that administration costs are very high up for the 58 | # disbursements. 59 | 60 | 61 | ndf %>% 62 | dplyr::filter(grepl("910 - Administration", Main.Sector)) %>% 63 | group_by(Budget.Post..Chapter) %>% 64 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% 65 | ungroup() %>% 66 | ggplot(aes(x=Budget.Post..Chapter, y=Disbursements)) + 67 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip() 68 | 69 | -------------------------------------------------------------------------------- /Norwegian Development Funds.Rmd: -------------------------------------------------------------------------------- 1 | #### Norweigian Development Funds 2 | 3 | 4 | # Load the Libraries 5 | 6 | ```{r} 7 | library(readr) 8 | library(ggplot2) 9 | library(tidyverse) 10 | library(dplyr) 11 | ``` 12 | 13 | # Getting the Data 14 | 15 | ```{r} 16 | ndf <- read_csv("./funds.csv") 17 | head(ndf) 18 | summary(ndf) 19 | ``` 20 | 21 | 22 | # Checking for NA Values 23 | 24 | ```{r} 25 | any(is.na(ndf)) 26 | ``` 27 | 28 | # Cleaning the Data 29 | 30 | ```{r} 31 | names(ndf) <- make.names(names(ndf)) 32 | ndf[, grep("NA.", colnames(ndf))] <- NULL 33 | ``` 34 | 35 | 36 | # Exploratory Data Analysis 37 | 38 | 39 | ```{r} 40 | ndf %>% group_by(Recipient.Region, Year) %>% 41 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% ungroup() %>% 42 | ggplot(aes(x=Year, y=Disbursements)) + 43 | geom_bar(stat = "identity",aes(fill=Recipient.Region)) + 44 | facet_wrap(~Recipient.Region) 45 | ``` 46 | 47 | Middle East is slowly picking up, Asia is on decline. Interesting to see the same chart in terms of size of the contract. Are certain regions getting bigger "support packages" then others? Are there material differences in geographical vs non-geographical contracts? Lets look at the quantity of those contracts by country. 48 | 49 | ```{r} 50 | ndf%>% group_by(Recipient.Region, Year) %>% 51 | summarise(Mean_Disbursement=mean(Disbursements..1000...)/1000) %>% 52 | ungroup() %>% 53 | ggplot(aes(x=Year, y=Mean_Disbursement)) + 54 | geom_bar(stat = "identity",aes(fill=Year)) + facet_wrap(~Recipient.Region) 55 | ``` 56 | 57 | # Non Geographical-Projects 58 | 59 | ```{r} 60 | ndf%>% dplyr::filter(Recipient.Region=="Not geographically allocated") %>% 61 | group_by(Main.Sector) %>% 62 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% 63 | ungroup() %>% 64 | ggplot(aes(x=Main.Sector, y=Disbursements)) + 65 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip() 66 | ``` 67 | 68 | It has been determined that administration costs are very high up for the disbursements. 69 | 70 | ```{r} 71 | ndf %>% 72 | dplyr::filter(grepl("910 - Administration", Main.Sector)) %>% 73 | group_by(Budget.Post..Chapter) %>% 74 | summarise(Disbursements=sum(Disbursements..1000...)/1000) %>% 75 | ungroup() %>% 76 | ggplot(aes(x=Budget.Post..Chapter, y=Disbursements)) + 77 | geom_bar(stat = "identity",aes(fill=Disbursements)) + coord_flip() 78 | ``` 79 | -------------------------------------------------------------------------------- /NorwegianDevelopmentFunds.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/NorwegianDevelopmentFunds.pdf -------------------------------------------------------------------------------- /Pokemon Mining/Pokemon Data Mining.R: -------------------------------------------------------------------------------- 1 | # Pokemon Data Mining 2 | 3 | ## Loading the Libraries 4 | 5 | library(ggplot2) 6 | library(dplyr) 7 | library(gridExtra) 8 | library(fmsb) 9 | library(corrplot) 10 | library(corrgram) 11 | library(caTools) 12 | library(gplots) 13 | library(RColorBrewer) 14 | 15 | ## Changing the Working Directory 16 | 17 | setwd('./Kaggle/Pokemon Mining') 18 | 19 | ## Reading the Dataset and Grouping the Data 20 | 21 | pokmon <-read.csv('./pokemon_alopez247.csv',sep=',') 22 | 23 | ## Filter by Type and Grouping the Data Together 24 | 25 | group <- pokmon %>% 26 | filter(hasGender=='True') %>% 27 | group_by(Body_Style) %>% 28 | select(HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Pr_Male,Height_m, Weight_kg, Catch_Rate) %>% 29 | summarise(avgHP = mean(Attack),avgDefense = mean(Defense),avgSPAttack = mean(Sp_Atk),avgSPDef = mean(Sp_Def),avgProbMale = mean(Pr_Male),avgHeight = mean(Height_m),avgWeight = mean(Weight_kg),avgCatch = mean(Catch_Rate)) 30 | 31 | 32 | ## Creating the Radar Plots 33 | 34 | max <- c(100,100,100,100,1,3,100,200) 35 | min <- rep(0,8) 36 | 37 | 38 | par(mfrow=c(4,4)) 39 | par(mar=c(1,1,1,1)) 40 | for(i in 1:nrow(group)){ 41 | radarchart(rbind(max,min,group[i,2:9]), axistype=2 , 42 | pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) , 43 | plwd=4 , cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,2000,5), 44 | cglwd=0.8, vlcex=0.6 ,title=as.character(group$Body_Style[i])) 45 | } 46 | 47 | 48 | ## Creating a Heatmap 49 | 50 | grouped <-as.data.frame(group) 51 | row.names(grouped) <- grouped$Body_Style 52 | grouped <- grouped[,2:9] 53 | group_matrix <- data.matrix(grouped) 54 | heatmap.2(group_matrix, Rowv=FALSE, Colv=FALSE, dendrogram='none', cellnote=round(group_matrix,digits=2), notecol="black", trace='none', key=FALSE,lwid = c(.01,.99),lhei = c(.01,.99),margins = c(8,16)) 55 | 56 | 57 | ## Racetrack Plot 58 | 59 | #select the average of Total (sum of all characteristics) and reorder the result 60 | 61 | first <- pokmon %>% 62 | filter(hasGender=='True') %>% 63 | group_by(Body_Style) %>% 64 | select(Total) %>% 65 | summarise(avgTotal = mean(Total)) 66 | 67 | first%>% 68 | arrange(desc(avgTotal)) 69 | 70 | 71 | first$ReorderedBody <- reorder(first$Body_Style, first$avgTotal) 72 | 73 | ## Creating a Color Palette 74 | 75 | colorCount = length(unique(group$Body_Style)) 76 | getPalette = colorRampPalette(brewer.pal(9, "Set1")) 77 | 78 | ## Defining Each Color Label for Each Bar 79 | 80 | first$LABEL <-paste0(round(first$avgTotal)) 81 | ggplot(first, aes(x=ReorderedBody, y=avgTotal, fill=factor(ReorderedBody))) + 82 | geom_bar(width = 0.9, stat="identity") + 83 | scale_fill_manual(values = getPalette(colorCount)) + 84 | coord_polar(theta = "y") + 85 | xlab("") + 86 | ylab("") + 87 | ylim(c(0,max(first$avgTotal))) + 88 | ggtitle("Average Total per Body Style") + 89 | geom_text(data = first, hjust = 1, size = 3, aes(x = Body_Style, y = 0, label = LABEL)) + 90 | theme_minimal() + 91 | guides(fill=guide_legend(title=NULL,reverse=TRUE)) + 92 | theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),axis.line = element_blank(),axis.text.y = element_blank(),axis.text.x = element_blank(),axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5)) 93 | 94 | 95 | 96 | ## Mean/Median Comparison 97 | 98 | Median <- pokmon %>% 99 | filter(hasGender=='True') %>% 100 | group_by(Body_Style) %>% 101 | select(HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Pr_Male,Height_m, Weight_kg, Catch_Rate) %>% 102 | summarise(medHP = median(Attack),medDefense = median(Defense),medSPAttack = median(Sp_Atk),medSPDef = median(Sp_Def),medProbMale = median(Pr_Male),medHeight = median(Height_m),medWeight = median(Weight_kg),medCatch = median(Catch_Rate)) 103 | 104 | 105 | Mean<-rbind(max,min,group[1,2:9]) 106 | byMedian<-rbind(max,min,Median[1,2:9]) 107 | op <- par(mar=c(1, 2, 2, 1),mfrow=c(1, 2)) 108 | radarchart(Mean ,vlcex=.6, title = "Bipedal_tailed : mean") 109 | radarchart(byMedian ,vlcex=.6,title = "Bipedal_tailed : median") 110 | 111 | 112 | g1<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Pr_Male)) + geom_histogram(bins=100) 113 | 114 | g2<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Catch_Rate)) + geom_histogram(bins=100) 115 | 116 | g3<-ggplot(data=filter(pokmon,hasGender=='True' & Body_Style=='bipedal_tailed'),aes(x=Attack)) + geom_histogram(bins=100) 117 | 118 | grid.arrange(g1,g2,g3,ncol=3) 119 | 120 | 121 | ## Creating a Linear Model (Correlation with Numeric Variables) 122 | 123 | pokemon <-pokmon %>% 124 | filter(hasGender=='True' & Body_Style=='bipedal_tailed') %>% 125 | select(-Number) 126 | num.cols <- sapply(pokemon, is.numeric) 127 | cor.data <- cor(pokemon[,num.cols]) 128 | corrPLOT<-corrplot(cor.data,method='ellipse') 129 | 130 | 131 | ## Linear Model (3) 132 | 133 | pokemon1 <-pokmon %>% 134 | filter(hasGender=='True') %>% 135 | select(-Generation) 136 | 137 | ## Split data into Training/Testing 138 | 139 | set.seed(1562) 140 | split<-sample.split(pokemon1$Number,SplitRatio=.7) 141 | train<-subset(pokemon1,split==T) 142 | test<-subset(pokemon1,split==F) 143 | 144 | ## Summarize a Given Model 145 | 146 | plotRes <- function(mod){ 147 | print(mod) 148 | summary(mod) 149 | #create DF with prediction and real values 150 | mod.predictions <- predict(mod,test) 151 | mod.res<- cbind(mod.predictions,test$Catch_Rate) 152 | colnames(mod.res) <- c('pred','real') 153 | mod.res <- as.data.frame(mod.res) 154 | #make plots of residuals,etc... 155 | g1 <- ggplot(data=mod.res,aes(x=pred,y=real)) + geom_point() + geom_abline(intercept = 0, slope = 1, color="red") 156 | g2 <- ggplot(data=mod.res,aes(x=real-pred)) + geom_histogram(bins=100) 157 | g3 <- ggplot(data=mod.res,aes(x=pred,y=real-pred)) + geom_point() 158 | grid.arrange(g1,g2,g3,nrow=2, ncol=2) 159 | #calculate metrics 160 | mse <- mean((mod.res$real-mod.res$pred)^2) 161 | rmse<-mse^0.5 162 | SSE = sum((mod.res$pred - mod.res$real)^2) 163 | SST = sum( (mean(test$Catch_Rate) - mod.res$real)^2) 164 | R2 = 1 - SSE/SST 165 | sprintf("MSE: %f RMSE : %f R2 :%f", mse,rmse,R2) 166 | } 167 | 168 | # Linear Model 169 | 170 | linModel<-lm(Catch_Rate ~ HP + Attack + Defense + Sp_Atk + Sp_Def + Speed + Pr_Male + Height_m + Weight_kg, train) 171 | plotRes(linModel) 172 | 173 | -------------------------------------------------------------------------------- /Pokemon Mining/final_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Pokemon Mining/final_model.pkl -------------------------------------------------------------------------------- /Pokemon Mining/report_Pokemon.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Pokemon Mining/report_Pokemon.pdf -------------------------------------------------------------------------------- /Pokemon.R: -------------------------------------------------------------------------------- 1 | # Pokemon 2 | 3 | ## In this report, we are going to analyze the different types of Pokemon to check on types and frequencies. 4 | 5 | 6 | 7 | # Checking Frequencies 8 | setwd("./Kaggle") 9 | pokemon <- read.csv('./Pokemon.csv', header = T) 10 | pokemon$Name <- as.character(pokemon$Name) 11 | rev(sort(table(pokemon$Type.1))) 12 | rev(sort(table(pokemon$Type.2))) 13 | 14 | library(ggplot2) 15 | library(ggthemes) 16 | library(corrplot) 17 | library(reshape2) 18 | 19 | # In this section, we are going to generate a linear model to determine which Pokemon is the strongest in combination. 20 | 21 | 22 | colnames(pokemon) <- c("number", "name", "type1", "type2", "total", "hp", 23 | "attack", "defense", "sp.atk", "sp.def", "speed", 24 | "generation", "legendary") 25 | head(pokemon) 26 | poke <- lm(total ~ hp + attack + defense + sp.atk + sp.def + speed, pokemon) 27 | par(mfrow = c(2,2)) 28 | plot(poke) 29 | 30 | 31 | pokemon[c(1,3,6), 2] 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Side-Projects-For-Fun 2 | 3 | The purpose is to draw conclusions or analyses in order to utilize better decision-making or analyses based on facts pulled from the datasets. 4 | 5 | The datasets are going to be varied from different websites that is given. 6 | 7 | It will consist of different programming languages. 8 | 9 | This repository consists of data that analyzes the following: 10 | 11 | Affordable Care Act 12 | 13 | To perform analyses on which US states that are uninsured during the years of 2010 and 2015. 14 | 15 | Education Data 16 | 17 | Performing data analyses in order to determine which gender performs well, takes advantage of resources, making predictions on which 18 | courses are the most common, and utilize models to predict which one is more accurate based on decision trees, SVM, or confusion matrix. 19 | 20 | Fake News 21 | 22 | Using both Python and R to determine the analyses on Fake News in order to predict which type of news it is by 23 | running the decisiontree model in R. 24 | The analyses is also done in Python to generate the accuracy of the random forest model. 25 | 26 | Global Land Temperatures in Oakland and San Francisco 27 | 28 | Using R to predict the differences with the temperatures around these two cities in comparision to the given data back in the early 29 | history. The analyses involved with converting the latitude and longitude, graphing the average monthly temperature on a celsius 30 | perspective, uncertainty between today and 100 years ago, and creating a random forest model for both cities. 31 | 32 | NFL Draft 33 | 34 | Utilized R to predict the performance of draft picks over the last 30 years in order to see which rounds and picks that has performed 35 | the best in their careers. The analysis is done according to the information on the Quarterback, and Running Backs at that time. 36 | 37 | Y Combinator 38 | 39 | Y combinator is a venture capital organization that provides seed money for potential start-ups. Using Python to predict on which 40 | organizations and industries that they would invest in. 41 | -------------------------------------------------------------------------------- /Speed Dating Data.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/atse0612/Side-Projects-For-Fun/b2f21804b95d678df6553493d3cb6fc9635bc682/Speed Dating Data.csv -------------------------------------------------------------------------------- /Welfare/SNAPerror.csv: -------------------------------------------------------------------------------- 1 | ST,Error 2 | CT,5.84 3 | ME,2.52 4 | MA,5.09 5 | NH,4.81 6 | NY,5.23 7 | RI,5.97 8 | VT,2.76 9 | DE,2.78 10 | MD,3.41 11 | NJ,1.43 12 | PA,4.27 13 | VA,4.73 14 | WV,4.9 15 | AL,2.03 16 | FL,0.42 17 | GA,6.49 18 | KY,6 19 | MI,1.16 20 | NC,4.98 21 | SC,1.09 22 | TN,1.08 23 | IL,5.27 24 | IN,4.76 25 | MI,2.99 26 | MN,6.87 27 | OH,4.67 28 | WI,2.55 29 | AR,5.58 30 | LA,1.55 31 | NM,6.22 32 | OK,5.58 33 | TX,0.63 34 | CO,4.26 35 | IA,4.6 36 | KS,0.75 37 | MO,1.5 38 | MT,7.25 39 | NB,2.98 40 | ND,1.73 41 | SD,1.26 42 | UT,2.79 43 | WY,5.19 44 | AK,0.89 45 | AZ,5.18 46 | CA,5.13 47 | HI,4.13 48 | ID,2.74 49 | NV,7.61 50 | OR,5.11 51 | WA,0.77 52 | -------------------------------------------------------------------------------- /Welfare/UIerror.csv: -------------------------------------------------------------------------------- 1 | ST,Sample,Amount,Over,Under,Improper,BYE,Fraud,Agency,Work 2 | CA,"1,715","$16,956,798,074 ",6.93%,0.39%,7.32%,3.91%,4.91%,0.86%,0.13% 3 | TX,"1,454","$7,498,430,914 ",10.11%,0.24%,10.35%,2.03%,0.73%,1.56%,5.28% 4 | PA,"1,538","$7,133,177,891 ",9.23%,0.28%,9.51%,5.40%,6.19%,1.40%,0.00% 5 | NY,"1,445","$7,117,763,161 ",8.44%,0.32%,8.76%,1.98%,4.72%,1.81%,0.90% 6 | NJ,"1,452","$6,421,190,104 ",14.91%,1.59%,16.50%,3.34%,0.63%,1.98%,5.90% 7 | IL,"1,468","$5,584,275,026 ",14.09%,0.52%,14.61%,4.94%,1.53%,0.85%,7.60% 8 | MA,"1,540","$4,186,212,294 ",11.25%,0.56%,11.81%,4.94%,2.76%,2.57%,4.40% 9 | OH,"1,445","$3,024,921,930 ",8.91%,0.66%,9.57%,2.30%,1.59%,2.22%,3.46% 10 | WA,"1,248","$3,017,584,189 ",13.33%,0.23%,13.56%,1.98%,1.54%,0.60%,8.61% 11 | MI,"1,440","$2,500,031,867 ",22.60%,0.45%,23.05%,3.18%,2.42%,5.69%,17.23% 12 | MN,"1,463","$2,403,053,981 ",7.49%,0.34%,7.82%,5.08%,3.08%,0.60%,0.00% 13 | CT,"1,469","$2,102,220,297 ",6.48%,0.17%,6.65%,1.75%,2.06%,1.27%,2.62% 14 | MD,"1,491","$1,951,584,741 ",18.81%,0.19%,19.00%,5.01%,2.80%,1.59%,11.60% 15 | WI,"1,521","$1,907,839,572 ",19.96%,0.40%,20.35%,4.92%,3.83%,2.24%,12.31% 16 | OR,"1,448","$1,628,033,954 ",11.52%,0.67%,12.18%,2.22%,7.01%,2.20%,5.67% 17 | CO,"1,460","$1,563,741,964 ",10.81%,0.69%,11.50%,3.75%,1.48%,2.11%,2.55% 18 | GA,"1,440","$1,451,640,663 ",7.85%,0.00%,7.85%,2.08%,1.55%,1.01%,4.97% 19 | VA,"1,464","$1,429,554,781 ",7.46%,0.21%,7.67%,2.45%,1.32%,1.48%,2.60% 20 | NC,"1,558","$1,303,128,966 ",16.21%,0.62%,16.83%,4.07%,3.35%,1.71%,10.19% 21 | IA,"1,440","$1,219,787,168 ",8.96%,0.50%,9.46%,3.86%,0.93%,1.69%,0.28% 22 | IN,"1,446","$1,156,473,354 ",9.37%,0.10%,9.47%,3.62%,1.01%,2.62%,0.15% 23 | KY,"1,525","$1,095,889,549 ",11.26%,0.36%,11.63%,3.58%,3.39%,4.20%,3.14% 24 | MO,"1,440","$1,075,249,585 ",7.20%,0.11%,7.31%,3.41%,3.37%,0.67%,1.83% 25 | NV,"1,474","$1,046,096,955 ",24.63%,0.41%,25.04%,5.63%,3.92%,1.73%,15.07% 26 | OK,"1,449","$896,136,361 ",3.96%,0.23%,4.19%,2.00%,0.76%,0.98%,0.67% 27 | AZ,"1,441","$885,731,513 ",11.13%,0.06%,11.19%,3.92%,4.56%,2.35%,2.59% 28 | TN,"1,437","$868,021,609 ",22.32%,0.32%,22.64%,3.81%,3.05%,6.12%,12.88% 29 | KS,"1,442","$738,353,820 ",17.00%,0.13%,17.13%,3.56%,2.60%,2.58%,8.51% 30 | AR,"1,441","$736,933,973 ",9.73%,0.46%,10.19%,4.82%,5.28%,1.51%,0.00% 31 | AL,"1,443","$697,834,597 ",6.99%,0.13%,7.13%,3.22%,2.16%,0.77%,2.03% 32 | WV,"1,448","$685,370,018 ",3.65%,0.43%,4.08%,2.09%,1.08%,0.50%,0.20% 33 | FL,600,"$681,007,973 ",6.41%,0.03%,6.44%,2.33%,1.26%,1.95%,1.52% 34 | SC,"1,524","$601,798,820 ",14.20%,0.24%,14.44%,6.46%,4.84%,1.36%,7.94% 35 | NM,"1,484","$560,943,955 ",18.79%,0.35%,19.13%,3.85%,2.72%,3.05%,13.56% 36 | LA,"1,449","$540,247,945 ",9.83%,0.27%,10.10%,5.37%,4.07%,3.26%,0.10% 37 | UT,"1,445","$539,019,184 ",6.50%,0.24%,6.74%,1.58%,1.48%,0.62%,2.65% 38 | HI,"1,084","$531,658,743 ",3.48%,0.15%,3.63%,1.46%,0.96%,0.32%,0.49% 39 | RI,"1,446","$523,060,078 ",12.70%,0.43%,13.12%,2.66%,5.28%,1.94%,7.43% 40 | PR,"1,456","$494,880,795 ",8.07%,0.68%,8.74%,4.40%,2.68%,3.39%,0.10% 41 | AK,"1,444","$411,994,099 ",9.14%,0.35%,9.49%,3.00%,1.87%,0.84%,2.72% 42 | ND,"1,082","$407,266,907 ",15.45%,0.26%,15.71%,1.73%,0.38%,1.07%,10.01% 43 | ME,"1,447","$396,544,911 ",18.06%,0.34%,18.40%,2.56%,1.09%,1.97%,13.65% 44 | DC,"1,092","$392,138,796 ",14.04%,0.38%,14.41%,9.71%,3.83%,2.35%,1.69% 45 | MS,"1,505","$359,396,023 ",9.17%,0.28%,9.45%,4.46%,4.62%,2.25%,1.06% 46 | MT,"1,082","$324,791,812 ",8.41%,0.39%,8.80%,2.26%,1.63%,2.28%,3.44% 47 | ID,"1,457","$293,251,194 ",12.26%,0.32%,12.59%,2.95%,4.83%,1.48%,5.75% 48 | DE,"1,080","$255,174,354 ",12.60%,0.58%,13.17%,2.73%,2.67%,3.54%,5.29% 49 | NE,"1,081","$254,806,809 ",14.31%,0.30%,14.61%,3.62%,0.95%,2.36%,6.22% 50 | WY,"1,080","$251,902,817 ",11.82%,0.06%,11.88%,1.35%,1.62%,2.01%,5.12% 51 | VT,"1,081","$227,535,295 ",6.44%,0.47%,6.90%,2.19%,3.68%,0.36%,1.47% 52 | NH,"1,087","$200,260,946 ",6.17%,0.53%,6.70%,2.18%,0.90%,1.28%,0.74% 53 | SD,"1,080","$81,500,590 ",9.86%,0.12%,9.97%,1.65%,3.66%,2.05%,6.76% 54 | -------------------------------------------------------------------------------- /Welfare/Welfare.R: -------------------------------------------------------------------------------- 1 | # Welfare 2 | 3 | 4 | ### Loading the Libraries 5 | library(tidyverse) 6 | library(readr) 7 | library(ggmap) 8 | library(highcharter) 9 | data(usgeojson) 10 | 11 | 12 | ### Setting the Working Directory 13 | 14 | setwd('./Kaggle/Welfare') 15 | 16 | ## Reading the Libraries 17 | welfare <- read.csv('./UIerror.csv') 18 | snap <- read.csv('./SNAPerror.csv') 19 | welfare< - welfare %>%mutate(Fraud= as.numeric(unlist(strsplit(welfare$Fraud,'%')))) 20 | 21 | ## Reading NA 22 | 23 | any(is.na(welfare)) 24 | any(is.na(snap)) 25 | 26 | ## Welfare Fraud By State 27 | 28 | highchart() %>% 29 | hc_title(text = "Welfare Fraud Rate by State", align= "right") %>% 30 | hc_add_series_map(usgeojson, df = welfare, 31 | value = "Fraud", joinBy =c("postalcode","ST")) %>% 32 | hc_mapNavigation(enabled = TRUE) %>% 33 | hc_add_theme(hc_theme_538())%>% 34 | hc_credits(enabled = TRUE, text = "https://www.dol.gov/general/maps/data", 35 | href = "https://www.dol.gov/general/maps/data") 36 | 37 | 38 | highchart() %>% 39 | hc_title(text = "SNAP Payment Error Rate", align="right") %>% 40 | hc_add_series_map(usgeojson, df =snap, 41 | value = "Error", joinBy =c("postalcode","ST")) %>% 42 | hc_mapNavigation(enabled = TRUE) %>% 43 | hc_add_theme(hc_theme_538()) %>% 44 | hc_credits(enabled = TRUE, text = "https://www.fns.usda.gov/sites/default/files/snap/2014-rates.pdf", 45 | href = "https://www.fns.usda.gov/sites/default/files/snap/2014-rates.pdf") -------------------------------------------------------------------------------- /World Food Facts.R: -------------------------------------------------------------------------------- 1 | ### World Food Facts 2 | 3 | 4 | setwd('./Kaggle') 5 | 6 | # Load Libraries: 7 | 8 | library(ggplot2) 9 | library(dplyr) 10 | library(data.table) 11 | library(caret) 12 | library(corrplot) 13 | 14 | # Reading the Data 15 | food = read.csv('./FoodFacts.csv') 16 | head(food) 17 | summary(food) 18 | 19 | # Checking for NA Values 20 | any(is.na(food)) 21 | 22 | ## Cleaning the Data 23 | 24 | # Meat and Vegan 25 | 26 | meat = grep("meat|Meat", food$categories_en) 27 | vegan = grep("vegan|Vegan", food$labels) 28 | 29 | 30 | # New Dataframe for Countries and Products 31 | 32 | Countries = as.data.frame(table(food$countries_en)) 33 | Countries = Countries[-1,] 34 | 35 | # Countries With the Highest Value 36 | 37 | Top = head(Countries[order(-Countries$Freq),],10) 38 | Top = droplevels(Top) 39 | 40 | 41 | # Adding Values to the Countries 42 | 43 | i = 1 44 | 45 | while (i < length(Top$Var1)+1) { 46 | 47 | Country = grep(Top[i,1], Countries$Var1) 48 | Country_totals = sum(Countries$Freq[Country]) 49 | Top[i,2] = Country_totals 50 | i=i+1 51 | } 52 | 53 | # Plotting the Number of Products Per Country 54 | barplot(Top$Freq, names.arg = Top$Var1, col = "lavender", main = "Number of Products by Country" 55 | , ylab = "Counts", las=1) 56 | 57 | # New Dataframe for the Meat 58 | 59 | Countries_meat = as.data.frame(table(food$countries_en[meat])) 60 | Countries_meat = Countries_meat[-1,] 61 | 62 | # Countries With the Highest Meat Products 63 | Top_meat = head(Countries_meat[order(-Countries_meat$Freq),],10) 64 | Top_meat = Top_meat[-grep(",", Top_meat$Var1),] #Get rid of any rows that are a combination of countries by looking for a comma 65 | Top_meat = droplevels(Top_meat) 66 | 67 | 68 | ## Combining the Dataset 69 | 70 | i = 1 71 | 72 | while (i < length(Top_meat$Var1)+1) { 73 | 74 | Country = grep(Top[i,1], Countries_meat$Var1) 75 | Country_totals = sum(Countries_meat$Freq[Country]) 76 | Top_meat[i,2] = Country_totals 77 | i=i+1 78 | } 79 | 80 | # New Dataframe for Vegan Products 81 | 82 | Countries_vegan = as.data.frame(table(food$countries_en[vegan])) 83 | Countries_vegan = Countries_vegan[-1,] 84 | 85 | 86 | Top_vegan = head(Countries_vegan[order(-Countries_vegan$Freq),],10) 87 | Top_vegan = Top_vegan[-grep(",", Top_vegan$Var1),] #Get rid of any rows that are a combination of countries by looking for a comma 88 | Top_vegan = droplevels(Top_vegan) 89 | 90 | 91 | i = 1 92 | 93 | while (i < length(Top_vegan$Var1)+1) { 94 | 95 | Country = grep(Top_vegan[i,1], Countries_vegan$Var1) 96 | Country_totals = sum(Countries_vegan$Freq[Country]) 97 | Top_vegan[i,2] = Country_totals 98 | i=i+1 99 | } 100 | 101 | 102 | rm(food) 103 | 104 | 105 | #Change column names, 106 | colnames(Top) = c("Country", "Count") 107 | colnames(Top_meat) = c("Country", "Count") 108 | colnames(Top_vegan) = c("Country", "Count") 109 | 110 | 111 | #Do some merging to get overall results, 112 | Results_meat = merge(Top, Top_meat, by = "Country") 113 | Results_vegan = merge(Top, Top_vegan, by = "Country") 114 | colnames(Results_meat) = c("Country", "Total no. of products", "No. of meat products") 115 | colnames(Results_vegan) = c("Country", "Total no. of products", "No. of vegan products") 116 | 117 | # % for Each Country for Meat Products 118 | Results_meat$MeatPerc = Results_meat$`No. of meat products` / Results_meat$`Total no. of products` * 100 119 | Results_vegan$VeganPerc = Results_vegan$`No. of vegan products` / Results_vegan$`Total no. of products` * 100 120 | 121 | 122 | ## Exploratory Data Analysis in Meat 123 | 124 | # Meat 125 | m = ggplot(Results_meat, aes(x=reorder(Country,-MeatPerc), y=MeatPerc)) 126 | 127 | m + geom_bar(stat = "identity", fill="yellow", colour="red") + 128 | ggtitle("Counties with Highest % of Meat Products \n (in terms of no. of products submitted)") + 129 | ylab("Percentage %") + 130 | theme_classic() + 131 | theme(legend.position="none") + 132 | theme(axis.text.x = element_text(size=15, angle = 90)) + 133 | scale_x_discrete(name="") 134 | 135 | 136 | # Vegan 137 | 138 | v = ggplot(Results_vegan, aes(x=reorder(Country,-VeganPerc), y=VeganPerc)) 139 | 140 | v + geom_bar(stat = "identity", fill="dark blue", colour="green") + 141 | ggtitle("Counties with Highest % of Vegan-labelled Products \n (in terms of no. of products submitted)") + 142 | ylab("Percentage %") + 143 | theme_classic() + 144 | theme(legend.position="none") + 145 | theme(axis.text.x = element_text(size=15, angle = 90)) + 146 | scale_x_discrete(name="") 147 | 148 | 149 | 150 | ### Removing Spain 151 | 152 | Results_vegan = Results_vegan[-grep("Spain", Results_vegan$Country),] 153 | 154 | 155 | 156 | v = ggplot(Results_vegan, aes(x=reorder(Country,-VeganPerc), y=VeganPerc)) 157 | 158 | v + geom_bar(stat = "identity", fill="orange", colour="red",alpha=0.5) + 159 | ggtitle("Counties with Highest % of Vegan-labelled Products - Spain omitted \n (in terms of no. of products submitted)") + 160 | ylab("Percentage %") + 161 | theme_classic() + 162 | theme(legend.position="none") + 163 | theme(axis.text.x = element_text(size=15, angle = 90)) + 164 | scale_x_discrete(name="") 165 | 166 | 167 | -------------------------------------------------------------------------------- /mcdonalds.csv: -------------------------------------------------------------------------------- 1 | Category,Item,Serving Size,Calories,Calories from Fat,Total Fat,Total Fat (% Daily Value),Saturated Fat,Saturated Fat (% Daily Value),Trans Fat,Cholesterol,Cholesterol (% Daily Value),Sodium,Sodium (% Daily Value),Carbohydrates,Carbohydrates (% Daily Value),Dietary Fiber,Dietary Fiber (% Daily Value),Sugars,Protein,Vitamin A (% Daily Value),Vitamin C (% Daily Value),Calcium (% Daily Value),Iron (% Daily Value) 2 | Breakfast,Egg McMuffin,4.8 oz (136 g),300,120,13,20,5,25,0,260,87,750,31,31,10,4,17,3,17,10,0,25,15 3 | Breakfast,Egg White Delight,4.8 oz (135 g),250,70,8,12,3,15,0,25,8,770,32,30,10,4,17,3,18,6,0,25,8 4 | Breakfast,Sausage McMuffin,3.9 oz (111 g),370,200,23,35,8,42,0,45,15,780,33,29,10,4,17,2,14,8,0,25,10 5 | Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,250,28,43,10,52,0,285,95,860,36,30,10,4,17,2,21,15,0,30,15 6 | Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,210,23,35,8,42,0,50,16,880,37,30,10,4,17,2,21,6,0,25,10 7 | Breakfast,Steak & Egg McMuffin,6.5 oz (185 g),430,210,23,36,9,46,1,300,100,960,40,31,10,4,18,3,26,15,2,30,20 8 | Breakfast,"Bacon, Egg & Cheese Biscuit (Regular Biscuit)",5.3 oz (150 g),460,230,26,40,13,65,0,250,83,1300,54,38,13,2,7,3,19,10,8,15,15 9 | Breakfast,"Bacon, Egg & Cheese Biscuit (Large Biscuit)",5.8 oz (164 g),520,270,30,47,14,68,0,250,83,1410,59,43,14,3,12,4,19,15,8,20,20 10 | Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (Regular Biscuit)",5.4 oz (153 g),410,180,20,32,11,56,0,35,11,1300,54,36,12,2,7,3,20,2,8,15,10 11 | Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (Large Biscuit)",5.9 oz (167 g),470,220,25,38,12,59,0,35,11,1420,59,42,14,3,12,4,20,6,8,15,15 12 | Breakfast,Sausage Biscuit (Regular Biscuit),4.1 oz (117 g),430,240,27,42,12,62,0,30,10,1080,45,34,11,2,6,2,11,0,0,6,15 13 | Breakfast,Sausage Biscuit (Large Biscuit),4.6 oz (131 g),480,280,31,48,13,65,0,30,10,1190,50,39,13,3,11,3,11,4,0,8,15 14 | Breakfast,Sausage Biscuit with Egg (Regular Biscuit),5.7 oz (163 g),510,290,33,50,14,71,0,250,83,1170,49,36,12,2,6,2,18,6,0,10,20 15 | Breakfast,Sausage Biscuit with Egg (Large Biscuit),6.2 oz (177 g),570,330,37,57,15,74,0,250,83,1280,53,42,14,3,11,3,18,10,0,10,20 16 | Breakfast,Sausage Biscuit with Egg Whites (Regular Biscuit),5.9 oz (167 g),460,250,27,42,12,62,0,35,11,1180,49,34,11,2,6,3,18,0,0,8,15 17 | Breakfast,Sausage Biscuit with Egg Whites (Large Biscuit),6.4 oz (181 g),520,280,32,49,13,65,0,35,11,1290,54,40,13,3,11,3,18,4,0,8,15 18 | Breakfast,Southern Style Chicken Biscuit (Regular Biscuit),5 oz (143 g),410,180,20,31,8,41,0,30,10,1180,49,41,14,2,6,3,17,0,2,6,15 19 | Breakfast,Southern Style Chicken Biscuit (Large Biscuit),5.5 oz (157 g),470,220,24,37,9,45,0,30,10,1290,54,46,15,3,11,4,17,4,2,8,15 20 | Breakfast,Steak & Egg Biscuit (Regular Biscuit),7.1 oz (201 g),540,290,32,49,16,78,1,280,93,1470,61,38,13,2,8,3,25,10,2,20,25 21 | Breakfast,"Bacon, Egg & Cheese McGriddles",6.1 oz (174 g),460,190,21,32,9,44,0,250,84,1250,52,48,16,2,9,15,19,10,10,20,15 22 | Breakfast,"Bacon, Egg & Cheese McGriddles with Egg Whites",6.3 oz (178 g),400,140,15,24,7,34,0,35,11,1250,52,47,16,2,9,16,20,2,10,15,10 23 | Breakfast,Sausage McGriddles,5 oz (141 g),420,200,22,34,8,40,0,35,11,1030,43,44,15,2,8,15,11,0,0,8,10 24 | Breakfast,"Sausage, Egg & Cheese McGriddles",7.1 oz (201 g),550,280,31,48,12,61,0,265,89,1320,55,48,16,2,9,15,20,10,0,20,15 25 | Breakfast,"Sausage, Egg & Cheese McGriddles with Egg Whites",7.2 oz (205 g),500,230,26,40,10,52,0,50,17,1320,55,46,15,2,9,15,21,2,0,20,10 26 | Breakfast,"Bacon, Egg & Cheese Bagel",6.9 oz (197 g),620,280,31,48,11,56,0.5,275,92,1480,62,57,19,3,11,7,30,20,15,20,20 27 | Breakfast,"Bacon, Egg & Cheese Bagel with Egg Whites",7.1 oz (201 g),570,230,25,39,9,45,0.5,60,20,1480,62,55,18,3,12,8,30,10,15,20,15 28 | Breakfast,"Steak, Egg & Cheese Bagel",8.5 oz (241 g),670,310,35,53,13,63,1.5,295,99,1510,63,56,19,3,12,7,33,20,4,25,25 29 | Breakfast,Big Breakfast (Regular Biscuit),9.5 oz (269 g),740,430,48,73,17,87,0,555,185,1560,65,51,17,3,12,3,28,15,2,15,25 30 | Breakfast,Big Breakfast (Large Biscuit),10 oz (283 g),800,470,52,80,18,90,0,555,185,1680,70,56,19,4,17,3,28,15,2,15,30 31 | Breakfast,Big Breakfast with Egg Whites (Regular Biscuit),9.6 oz (272 g),640,330,37,57,14,69,0,35,12,1590,66,50,17,3,12,3,26,0,2,10,15 32 | Breakfast,Big Breakfast with Egg Whites (Large Biscuit),10.1 oz (286 g),690,370,41,63,14,72,0,35,12,1700,71,55,18,4,17,4,26,4,2,10,15 33 | Breakfast,Big Breakfast with Hotcakes (Regular Biscuit),14.8 oz (420 g),1090,510,56,87,19,96,0,575,192,2150,90,111,37,6,23,17,36,15,2,25,40 34 | Breakfast,Big Breakfast with Hotcakes (Large Biscuit),15.3 oz (434 g),1150,540,60,93,20,100,0,575,192,2260,94,116,39,7,28,17,36,15,2,30,40 35 | Breakfast,Big Breakfast with Hotcakes and Egg Whites (Regular Biscuit),14.9 oz (423 g),990,410,46,70,16,78,0,55,19,2170,91,110,37,6,23,17,35,0,2,25,30 36 | Breakfast,Big Breakfast with Hotcakes and Egg Whites (Large Biscuit),15.4 oz (437 g),1050,450,50,77,16,81,0,55,19,2290,95,115,38,7,28,18,35,4,2,25,30 37 | Breakfast,Hotcakes,5.3 oz (151 g),350,80,9,13,2,9,0,20,7,590,24,60,20,3,10,14,8,0,0,15,15 38 | Breakfast,Hotcakes and Sausage,6.8 oz (192 g),520,210,24,37,7,36,0,50,17,930,39,61,20,3,10,14,15,0,0,15,15 39 | Breakfast,Sausage Burrito,3.9 oz (111 g),300,150,16,25,7,33,0,115,38,790,33,26,9,1,5,2,12,10,2,15,15 40 | Breakfast,Hash Brown,2 oz (56 g),150,80,9,14,1.5,6,0,0,0,310,13,15,5,2,6,0,1,0,2,0,2 41 | Breakfast,Cinnamon Melts,4 oz (114 g),460,170,19,30,9,43,0,15,5,370,15,66,22,3,11,32,6,4,0,6,15 42 | Breakfast,Fruit & Maple Oatmeal,9.6 oz (251 g),290,35,4,6,1.5,8,0,5,2,160,7,58,19,5,19,32,5,2,130,10,10 43 | Breakfast,Fruit & Maple Oatmeal without Brown Sugar,9.6 oz (251 g),260,40,4,6,1.5,8,0,5,2,115,5,49,16,5,22,18,5,2,130,6,10 44 | Beef & Pork,Big Mac,7.4 oz (211 g),530,240,27,42,10,48,1,85,28,960,40,47,16,3,13,9,24,6,2,25,25 45 | Beef & Pork,Quarter Pounder with Cheese,7.1 oz (202 g),520,240,26,41,12,61,1.5,95,31,1100,46,41,14,3,11,10,30,10,2,30,25 46 | Beef & Pork,Quarter Pounder with Bacon & Cheese,8 oz (227 g),600,260,29,45,13,63,1.5,105,34,1440,60,48,16,3,12,12,37,6,15,25,30 47 | Beef & Pork,Quarter Pounder with Bacon Habanero Ranch,8.3 oz (235 g),610,280,31,48,13,64,1.5,105,35,1180,49,46,15,3,14,10,37,8,20,25,30 48 | Beef & Pork,Quarter Pounder Deluxe,8.6 oz (244 g),540,250,27,42,11,54,1.5,85,28,960,40,45,15,3,13,9,29,10,8,25,30 49 | Beef & Pork,Double Quarter Pounder with Cheese,10 oz (283 g),750,380,43,66,19,96,2.5,160,53,1280,53,42,14,3,11,10,48,10,2,30,35 50 | Beef & Pork,Hamburger,3.5 oz (98 g),240,70,8,12,3,15,0,30,10,480,20,32,11,1,6,6,12,2,2,10,15 51 | Beef & Pork,Cheeseburger,4 oz (113 g),290,100,11,18,5,27,0.5,45,15,680,28,33,11,2,7,7,15,6,2,20,15 52 | Beef & Pork,Double Cheeseburger,5.7 oz (161 g),430,190,21,32,10,52,1,90,30,1040,43,35,12,2,8,7,24,10,2,30,20 53 | Beef & Pork,Bacon Clubhouse Burger,9.5 oz (270 g),720,360,40,62,15,75,1.5,115,38,1470,61,51,17,4,14,14,39,8,25,30,25 54 | Beef & Pork,McDouble,5.2 oz (147 g),380,150,17,26,8,40,1,75,25,840,35,34,11,2,7,7,22,6,2,20,20 55 | Beef & Pork,Bacon McDouble,5.7 oz (161 g),440,200,22,34,10,49,1,90,30,1110,46,35,12,2,7,7,27,6,10,20,20 56 | Beef & Pork,Daily Double,6.7 oz (190 g),430,200,22,35,9,44,1,80,27,760,32,34,11,2,8,7,22,8,8,20,20 57 | Beef & Pork,Jalapeño Double,5.6 oz (159 g),430,210,23,36,9,44,1,80,27,1030,43,35,12,2,7,6,22,6,8,20,20 58 | Beef & Pork,McRib,7.3 oz (208 g),500,240,26,40,10,48,0,70,23,980,41,44,15,3,10,11,22,2,2,15,20 59 | Chicken & Fish,Premium Crispy Chicken Classic Sandwich,7.5 oz (213 g),510,200,22,33,3.5,18,0,45,16,990,41,55,18,3,13,10,24,4,6,15,20 60 | Chicken & Fish,Premium Grilled Chicken Classic Sandwich,7 oz (200 g),350,80,9,13,2,9,0,65,22,820,34,42,14,3,13,8,28,4,8,15,20 61 | Chicken & Fish,Premium Crispy Chicken Club Sandwich,8.8 oz (249 g),670,300,33,51,9,44,0,85,29,1410,59,58,19,3,14,11,36,8,20,30,20 62 | Chicken & Fish,Premium Grilled Chicken Club Sandwich,8.3 oz (235 g),510,180,20,31,7,36,0,105,35,1250,52,44,15,3,13,9,40,8,20,30,20 63 | Chicken & Fish,Premium Crispy Chicken Ranch BLT Sandwich,8.1 oz (230 g),610,250,28,43,6,31,0,70,24,1400,58,57,19,3,13,11,32,4,20,15,20 64 | Chicken & Fish,Premium Grilled Chicken Ranch BLT Sandwich,7.6 oz (217 g),450,130,15,23,4.5,22,0,90,30,1230,51,43,14,3,13,9,36,4,20,15,20 65 | Chicken & Fish,Bacon Clubhouse Crispy Chicken Sandwich,10 oz (284 g),750,340,38,59,10,51,0.5,90,31,1720,72,65,22,4,15,16,36,8,25,30,15 66 | Chicken & Fish,Bacon Clubhouse Grilled Chicken Sandwich,9.5 oz (270 g),590,230,25,39,8,42,0,110,37,1560,65,51,17,4,15,14,40,8,30,30,15 67 | Chicken & Fish,Southern Style Crispy Chicken Sandwich,5.6 oz (160 g),430,170,19,29,3,15,0,45,14,910,38,43,14,2,7,7,21,4,2,15,15 68 | Chicken & Fish,McChicken,5.1 oz (143 g),360,140,16,25,3,15,0,35,11,800,33,40,13,2,7,5,14,0,2,10,15 69 | Chicken & Fish,Bacon Cheddar McChicken,6 oz (171 g),480,220,24,38,7,35,0,65,21,1260,53,43,14,2,8,6,22,4,10,20,15 70 | Chicken & Fish,Bacon Buffalo Ranch McChicken,5.7 oz (161 g),430,190,21,32,5,25,0,50,17,1260,53,41,14,2,7,6,20,2,10,15,15 71 | Chicken & Fish,Buffalo Ranch McChicken,5.2 oz (148 g),360,150,16,25,3,16,0,35,11,990,41,40,13,2,7,5,14,2,2,15,15 72 | Chicken & Fish,Premium McWrap Chicken & Bacon (Crispy Chicken),11.1 oz (316 g),630,280,32,49,9,45,0.5,80,26,1540,64,56,19,3,13,7,32,60,20,20,20 73 | Chicken & Fish,Premium McWrap Chicken & Bacon (Grilled Chicken),10.7 oz (302 g),480,170,19,28,7,36,0,95,32,1370,57,42,14,3,13,6,36,60,25,20,20 74 | Chicken & Fish,Premium McWrap Chicken & Ranch (Crispy Chicken),10.9 oz (310 g),610,280,31,47,8,40,0.5,65,21,1340,56,56,19,3,14,8,27,60,15,20,20 75 | Chicken & Fish,Premium McWrap Chicken & Ranch (Grilled Chicken),10.5 oz (297 g),450,160,18,27,6,31,0.5,80,27,1170,49,42,14,3,14,6,30,60,15,15,20 76 | Chicken & Fish,Premium McWrap Southwest Chicken (Crispy Chicken),11.1 oz (314 g),670,300,33,51,8,40,0.5,60,21,1480,62,68,23,5,19,12,27,60,15,20,20 77 | Chicken & Fish,Premium McWrap Southwest Chicken (Grilled Chicken),11.2 oz (318 g),520,180,20,31,6,32,0,80,27,1320,55,55,18,5,20,10,31,70,15,20,20 78 | Chicken & Fish,Premium McWrap Chicken Sweet Chili (Crispy Chicken),10.7 oz (304 g),540,200,23,35,4.5,23,0,50,16,1260,52,61,20,3,13,14,23,60,15,8,20 79 | Chicken & Fish,Premium McWrap Chicken Sweet Chili (Grilled Chicken),10.3 oz (291 g),380,90,10,15,3,14,0,65,22,1090,45,47,16,3,13,12,27,60,15,8,20 80 | Chicken & Fish,Chicken McNuggets (4 piece),2.3 oz (65 g),190,110,12,18,2,10,0,25,9,360,15,12,4,1,2,0,9,0,2,0,2 81 | Chicken & Fish,Chicken McNuggets (6 piece),3.4 oz (97 g),280,160,18,27,3,15,0,40,13,540,22,18,6,1,4,0,13,0,2,2,4 82 | Chicken & Fish,Chicken McNuggets (10 piece),5.7 oz (162 g),470,270,30,45,5,25,0,65,22,900,37,30,10,2,6,0,22,0,4,2,6 83 | Chicken & Fish,Chicken McNuggets (20 piece),11.4 oz (323 g),940,530,59,91,10,50,0,135,44,1800,75,59,20,3,12,0,44,0,8,4,10 84 | Chicken & Fish,Chicken McNuggets (40 piece),22.8 oz (646 g),1880,1060,118,182,20,101,1,265,89,3600,150,118,39,6,24,1,87,0,15,8,25 85 | Chicken & Fish,Filet-O-Fish,5 oz (142 g),390,170,19,29,4,19,0,40,14,590,24,39,13,2,7,5,15,2,0,15,10 86 | Salads,Premium Bacon Ranch Salad (without Chicken),7.9 oz (223 g),140,70,7,11,3.5,18,0,25,9,300,13,10,3,3,12,4,9,170,30,15,6 87 | Salads,Premium Bacon Ranch Salad with Crispy Chicken,9 oz (255 g),380,190,21,33,6,29,0,70,23,860,36,22,7,2,10,5,25,100,25,15,8 88 | Salads,Premium Bacon Ranch Salad with Grilled Chicken,8.5 oz (241 g),220,80,8,13,4,20,0,85,29,690,29,8,3,2,10,4,29,110,30,15,8 89 | Salads,Premium Southwest Salad (without Chicken),8.1 oz (230 g),140,40,4.5,7,2,9,0,10,3,150,6,20,7,6,23,6,6,160,25,15,10 90 | Salads,Premium Southwest Salad with Crispy Chicken,12.3 oz (348 g),450,190,22,33,4.5,22,0,50,17,850,35,42,14,7,28,12,23,170,30,15,15 91 | Salads,Premium Southwest Salad with Grilled Chicken,11.8 oz (335 g),290,80,8,13,2.5,13,0,70,23,680,28,28,9,7,28,10,27,170,30,15,15 92 | Snacks & Sides,Chipotle BBQ Snack Wrap (Crispy Chicken),4.6 oz (130 g),340,130,15,23,4.5,22,0,30,11,780,33,37,12,1,6,8,14,4,0,10,10 93 | Snacks & Sides,Chipotle BBQ Snack Wrap (Grilled Chicken),4.3 oz (123 g),260,70,8,13,3.5,18,0,40,14,700,29,30,10,1,6,7,16,4,2,10,10 94 | Snacks & Sides,Honey Mustard Snack Wrap (Crispy Chicken),4.3 oz (123 g),330,130,15,23,4.5,22,0,35,11,730,30,34,11,1,5,3,14,2,0,10,10 95 | Snacks & Sides,Honey Mustard Snack Wrap (Grilled Chicken),4.1 oz (116 g),250,70,8,13,3.5,18,0,45,14,650,27,27,9,1,5,2,16,2,2,10,10 96 | Snacks & Sides,Ranch Snack Wrap (Crispy Chicken),4.5 oz (128 g),360,180,20,30,5,27,0,40,13,810,34,32,11,1,5,3,15,2,0,10,10 97 | Snacks & Sides,Ranch Snack Wrap (Grilled Chicken),4.3 oz (121 g),280,120,13,20,4.5,22,0,45,16,720,30,25,8,1,5,2,16,2,2,10,10 98 | Snacks & Sides,Small French Fries,2.6 oz (75 g),230,100,11,17,1.5,8,0,0,0,130,5,30,10,2,10,0,2,0,30,0,4 99 | Snacks & Sides,Medium French Fries,3.9 oz (111 g),340,140,16,24,2.5,11,0,0,0,190,8,44,15,4,14,0,4,0,45,2,4 100 | Snacks & Sides,Large French Fries,5.9 oz (168 g),510,220,24,37,3.5,17,0,0,0,290,12,67,22,5,22,0,6,0,70,2,8 101 | Snacks & Sides,Kids French Fries,1.3 oz (38 g),110,50,5,8,1,4,0,0,0,65,3,15,5,1,5,0,1,0,15,0,2 102 | Snacks & Sides,Side Salad,3.1 oz (87 g),20,0,0,0,0,0,0,0,0,10,0,4,1,1,6,2,1,45,25,2,4 103 | Snacks & Sides,Apple Slices,1.2 oz (34 g),15,0,0,0,0,0,0,0,0,0,0,4,1,0,0,3,0,0,160,2,0 104 | Snacks & Sides,Fruit 'n Yogurt Parfait,5.2 oz (149 g),150,20,2,3,1,5,0,5,2,70,3,30,10,1,3,23,4,2,15,10,4 105 | Desserts,Baked Apple Pie,2.7 oz (77 g),250,110,13,19,7,35,0,0,0,170,7,32,11,4,15,13,2,4,25,2,6 106 | Desserts,Chocolate Chip Cookie,1 cookie (33 g),160,70,8,12,3.5,19,0,10,3,90,4,21,7,1,3,15,2,2,0,2,8 107 | Desserts,Oatmeal Raisin Cookie,1 cookie (33 g),150,50,6,9,2.5,13,0,10,3,135,6,22,7,1,3,13,2,2,0,2,6 108 | Desserts,Kids Ice Cream Cone,1 oz (29 g),45,10,1.5,2,1,4,0,5,2,20,1,7,2,0,0,6,1,2,0,4,0 109 | Desserts,Hot Fudge Sundae,6.3 oz (179 g),330,80,9,14,7,34,0,25,8,170,7,53,18,1,3,48,8,8,0,25,8 110 | Desserts,Hot Caramel Sundae,6.4 oz (182 g),340,70,8,12,5,24,0,30,10,150,6,60,20,0,0,43,7,10,0,25,0 111 | Desserts,Strawberry Sundae,6.3 oz (178 g),280,60,6,10,4,20,0,25,8,85,4,49,16,0,0,45,6,8,4,20,0 112 | Beverages,Coca-Cola Classic (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,0,0,39,13,0,0,39,0,0,0,0,0 113 | Beverages,Coca-Cola Classic (Medium),21 fl oz cup,200,0,0,0,0,0,0,0,0,5,0,55,18,0,0,55,0,0,0,0,0 114 | Beverages,Coca-Cola Classic (Large),30 fl oz cup,280,0,0,0,0,0,0,0,0,5,0,76,25,0,0,76,0,0,0,0,0 115 | Beverages,Coca-Cola Classic (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,0,0,28,9,0,0,28,0,0,0,0,0 116 | Beverages,Diet Coke (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0 117 | Beverages,Diet Coke (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,20,1,0,0,0,0,0,0,0,0,0,0 118 | Beverages,Diet Coke (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,35,1,0,0,0,0,0,0,0,0,0,0 119 | Beverages,Diet Coke (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0 120 | Beverages,Dr Pepper (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,45,2,37,12,0,0,35,0,0,0,0,0 121 | Beverages,Dr Pepper (Medium),21 fl oz cup,190,0,0,0,0,0,0,0,0,65,3,53,18,0,0,51,0,0,0,0,0 122 | Beverages,Dr Pepper (Large),30 fl oz cup,270,0,0,0,0,0,0,0,0,90,4,72,24,0,0,70,0,0,0,0,0 123 | Beverages,Dr Pepper (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,30,1,27,9,0,0,26,0,0,0,0,0 124 | Beverages,Diet Dr Pepper (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,70,3,0,0,0,0,0,2,0,0,0,0 125 | Beverages,Diet Dr Pepper (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,100,4,0,0,0,0,0,3,0,0,0,0 126 | Beverages,Diet Dr Pepper (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,140,6,0,0,0,0,0,4,0,0,0,0 127 | Beverages,Diet Dr Pepper (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,50,2,0,0,0,0,0,1,0,0,0,0 128 | Beverages,Sprite (Small),16 fl oz cup,140,0,0,0,0,0,0,0,0,30,1,37,12,0,0,37,0,0,0,0,0 129 | Beverages,Sprite (Medium),21 fl oz cup,200,0,0,0,0,0,0,0,0,45,2,54,18,0,0,54,0,0,0,0,0 130 | Beverages,Sprite (Large),30 fl oz cup,280,0,0,0,0,0,0,0,0,60,3,74,25,0,0,74,0,0,0,0,0 131 | Beverages,Sprite (Child),12 fl oz cup,100,0,0,0,0,0,0,0,0,25,1,27,9,0,0,27,0,0,0,0,0 132 | Beverages,1% Low Fat Milk Jug,1 carton (236 ml),100,20,2.5,4,1.5,8,0,10,3,125,5,12,4,0,0,12,8,10,4,30,0 133 | Beverages,Fat Free Chocolate Milk Jug,1 carton (236 ml),130,0,0,0,0,0,0,5,2,135,6,23,8,1,2,22,9,10,0,30,8 134 | Beverages,Minute Maid 100% Apple Juice Box,6 fl oz (177 ml),80,0,0,0,0,0,0,0,0,15,1,21,7,0,0,19,0,0,100,10,0 135 | Beverages,Minute Maid Orange Juice (Small),12 fl oz cup,150,0,0,0,0,0,0,0,0,0,0,34,11,0,0,30,2,0,130,2,0 136 | Beverages,Minute Maid Orange Juice (Medium),16 fl oz cup,190,0,0,0,0,0,0,0,0,0,0,44,15,0,0,39,3,0,160,4,0 137 | Beverages,Minute Maid Orange Juice (Large),22 fl oz cup,280,0,0,0,0,0,0,0,0,5,0,65,22,0,0,58,4,0,240,4,0 138 | Beverages,Dasani Water Bottle,16.9 fl oz,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 139 | Coffee & Tea,Iced Tea (Small),16 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0 140 | Coffee & Tea,Iced Tea (Medium),21 fl oz cup,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0 141 | Coffee & Tea,Iced Tea (Large),30 fl oz cup,0,0,0,0,0,0,0,0,0,15,1,0,0,0,0,0,0,0,0,0,0 142 | Coffee & Tea,Iced Tea (Child),12 fl oz cup,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0 143 | Coffee & Tea,Sweet Tea (Small),16 fl oz cup,150,0,0,0,0,0,0,0,0,10,0,36,12,0,0,36,1,0,0,0,0 144 | Coffee & Tea,Sweet Tea (Medium),21 fl oz cup,180,0,0,0,0,0,0,0,0,10,0,45,15,0,0,45,1,0,0,0,0 145 | Coffee & Tea,Sweet Tea (Large),30 fl oz cup,220,0,0,0,0,0,0,0,0,10,1,54,18,0,0,54,1,0,0,0,0 146 | Coffee & Tea,Sweet Tea (Child),12 fl oz cup,110,0,0,0,0,0,0,0,0,5,0,27,9,0,0,27,0,0,0,0,0 147 | Coffee & Tea,Coffee (Small),12 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 148 | Coffee & Tea,Coffee (Medium),16 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 149 | Coffee & Tea,Coffee (Large),16 fl oz cup,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 150 | Coffee & Tea,Latte (Small),12 fl oz cup,170,80,9,13,5,24,0,25,9,115,5,15,5,1,3,12,9,8,0,30,0 151 | Coffee & Tea,Latte (Medium),16 fl oz cup,210,90,10,16,6,30,0,30,11,140,6,18,6,1,4,15,11,10,0,35,0 152 | Coffee & Tea,Latte (Large),20 fl oz cup,280,120,14,21,8,39,0,40,14,180,8,24,8,1,6,20,15,15,0,50,2 153 | Coffee & Tea,Caramel Latte (Small),12 fl oz cup,270,80,9,13,5,24,0,25,9,115,5,40,13,1,3,38,9,8,0,30,0 154 | Coffee & Tea,Caramel Latte (Medium),16 fl oz cup,340,90,10,16,6,30,0,30,11,140,6,50,17,1,4,48,11,10,0,35,0 155 | Coffee & Tea,Caramel Latte (Large),20 fl oz cup,430,120,14,21,8,39,0,40,14,180,8,62,21,1,6,59,15,15,0,50,2 156 | Coffee & Tea,Hazelnut Latte (Small),12 fl oz cup,270,80,9,13,5,24,0,25,9,115,5,40,13,1,3,38,9,8,0,30,0 157 | Coffee & Tea,Hazelnut Latte (Medium),16 fl oz cup,330,90,10,16,6,30,0,30,11,140,6,50,17,1,4,47,11,10,0,35,0 158 | Coffee & Tea,Hazelnut Latte (Large),20 fl oz cup,430,120,14,21,8,39,0,40,14,180,8,62,21,1,6,58,15,15,0,50,2 159 | Coffee & Tea,French Vanilla Latte (Small),12 fl oz cup,260,80,9,13,5,24,0,25,9,115,5,38,13,1,3,36,9,8,0,30,0 160 | Coffee & Tea,French Vanilla Latte (Medium),16 fl oz cup,330,90,10,16,6,30,0,30,11,140,6,48,16,1,4,45,11,10,0,35,2 161 | Coffee & Tea,French Vanilla Latte (Large),20 fl oz cup,420,120,14,21,8,39,0,40,14,190,8,60,20,1,6,56,15,15,0,50,2 162 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Small),12 fl oz cup,210,80,9,13,5,24,0,25,9,150,6,24,8,1,4,12,9,8,0,30,0 163 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Medium),16 fl oz cup,260,90,10,16,6,30,0,30,11,190,8,29,10,1,5,15,12,10,0,35,0 164 | Coffee & Tea,Latte with Sugar Free French Vanilla Syrup (Large),20 fl oz cup,330,120,14,21,8,39,0,40,14,240,10,37,12,2,7,20,15,15,0,50,2 165 | Coffee & Tea,Nonfat Latte (Small),12 fl oz cup,100,0,0,0,0,0,0,5,2,110,5,15,5,1,3,13,10,10,0,30,0 166 | Coffee & Tea,Nonfat Latte (Medium),16 fl oz cup,130,0,0,0,0,0,0,5,2,135,6,19,6,1,4,16,12,15,0,40,0 167 | Coffee & Tea,Nonfat Latte (Large),20 fl oz cup,170,0,0.5,1,0,0,0,10,3,180,7,25,8,1,6,21,16,15,0,50,2 168 | Coffee & Tea,Nonfat Caramel Latte (Small),12 fl oz cup,200,0,0,0,0,0,0,5,2,110,5,41,14,1,3,39,10,10,0,30,0 169 | Coffee & Tea,Nonfat Caramel Latte (Medium),16 fl oz cup,250,0,0,0,0,0,0,5,2,135,6,51,17,1,4,48,12,15,0,40,0 170 | Coffee & Tea,Nonfat Caramel Latte (Large),20 fl oz cup,310,0,0.5,1,0,0,0,10,3,180,7,63,21,1,6,59,16,15,0,50,2 171 | Coffee & Tea,Nonfat Hazelnut Latte (Small),12 fl oz cup,200,0,0,0,0,0,0,5,2,110,5,40,13,1,3,38,10,10,0,30,0 172 | Coffee & Tea,Nonfat Hazelnut Latte (Medium),16 fl oz cup,250,0,0,0,0,0,0,5,2,135,6,51,17,1,4,48,12,15,0,40,0 173 | Coffee & Tea,Nonfat Hazelnut Latte (Large),20 fl oz cup,310,0,0.5,1,0,0,0,10,3,180,7,63,21,1,6,59,16,15,0,50,2 174 | Coffee & Tea,Nonfat French Vanilla Latte (Small),12 fl oz cup,190,0,0,0,0,0,0,5,2,115,5,39,13,1,3,37,10,10,0,30,0 175 | Coffee & Tea,Nonfat French Vanilla Latte (Medium),16 fl oz cup,240,0,0,0,0,0,0,5,2,140,6,49,16,1,4,46,12,15,0,40,2 176 | Coffee & Tea,Nonfat French Vanilla Latte (Large),20 fl oz cup,300,0,0.5,1,0,0,0,10,3,180,8,60,20,1,6,56,16,15,0,50,2 177 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Small),12 fl oz cup,140,0,0,0,0,0,0,5,2,150,6,24,8,1,4,13,10,10,0,30,0 178 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Medium),16 fl oz cup,170,0,0,0,0,0,0,5,2,180,8,30,10,1,5,16,12,15,0,40,0 179 | Coffee & Tea,Nonfat Latte with Sugar Free French Vanilla Syrup (Large),20 fl oz cup,220,0,0.5,1,0,0,0,10,3,240,10,38,13,2,7,21,16,15,0,50,2 180 | Coffee & Tea,Mocha (Small),12 fl oz cup,340,100,11,18,7,34,0,35,12,150,6,49,16,2,6,42,10,10,0,30,6 181 | Coffee & Tea,Mocha (Medium),16 fl oz cup,410,120,14,21,8,40,0,40,14,190,8,60,20,2,8,53,13,10,0,40,6 182 | Coffee & Tea,Mocha (Large),20 fl oz cup,500,150,17,26,10,49,0.5,50,17,240,10,72,24,2,10,63,16,15,0,50,8 183 | Coffee & Tea,Mocha with Nonfat Milk (Small),12 fl oz cup,270,30,3.5,5,2,11,0,15,5,150,6,49,16,2,6,43,11,10,0,35,6 184 | Coffee & Tea,Mocha with Nonfat Milk (Medium),16 fl oz cup,330,30,3.5,6,2,11,0,15,5,190,8,60,20,2,8,53,13,15,0,40,6 185 | Coffee & Tea,Mocha with Nonfat Milk (Large),20 fl oz cup,390,35,4,6,2.5,12,0,20,6,240,10,73,24,2,10,64,17,20,0,50,8 186 | Coffee & Tea,Caramel Mocha (Small),12 fl oz cup,320,100,11,17,7,33,0,35,12,170,7,45,15,1,3,40,10,10,0,30,2 187 | Coffee & Tea,Caramel Mocha (Medium),16 fl oz cup,390,120,14,21,8,40,0.5,40,14,220,9,55,18,1,4,50,12,15,0,40,2 188 | Coffee & Tea,Caramel Mocha (Large),20 fl oz cup,480,150,17,26,10,49,0.5,50,17,270,11,66,22,1,5,60,16,15,0,50,4 189 | Coffee & Tea,Nonfat Caramel Mocha (Small),12 fl oz cup,250,30,3.5,5,2,10,0,15,5,170,7,45,15,1,3,41,10,10,0,35,2 190 | Coffee & Tea,Nonfat Caramel Mocha (Medium),16 fl oz cup,310,30,3.5,5,2,11,0,15,5,210,9,56,19,1,4,51,13,15,0,40,2 191 | Coffee & Tea,Nonfat Caramel Mocha (Large),20 fl oz cup,370,35,3.5,6,2.5,11,0,20,6,270,11,67,22,1,5,61,17,20,0,50,4 192 | Coffee & Tea,Hot Chocolate (Small),12 fl oz cup,360,120,13,21,8,39,0,40,14,180,8,50,17,1,3,45,11,10,0,40,6 193 | Coffee & Tea,Hot Chocolate (Medium),16 fl oz cup,440,140,16,25,9,47,0.5,50,16,220,9,61,20,1,4,56,14,15,0,45,6 194 | Coffee & Tea,Hot Chocolate (Large),20 fl oz cup,540,180,20,31,12,58,0.5,60,20,280,12,73,24,1,5,68,17,20,0,60,8 195 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Small),12 fl oz cup,280,30,3.5,5,2,11,0,15,5,180,7,50,17,1,3,46,12,15,0,40,6 196 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Medium),16 fl oz cup,340,30,3.5,5,2,11,0,15,6,220,9,61,20,1,4,57,14,20,0,50,6 197 | Coffee & Tea,Hot Chocolate with Nonfat Milk (Large),20 fl oz cup,400,35,3.5,6,2.5,12,0,20,7,280,12,74,25,1,5,69,19,25,0,60,8 198 | Coffee & Tea,Regular Iced Coffee (Small),16 fl oz cup,140,40,4.5,7,3,15,0,15,6,35,1,23,8,0,0,22,1,4,0,4,0 199 | Coffee & Tea,Regular Iced Coffee (Medium),22 fl oz cup,190,60,7,11,4.5,22,0,25,9,50,2,31,10,0,0,30,1,4,0,4,0 200 | Coffee & Tea,Regular Iced Coffee (Large),32 fl oz cup,270,80,9,14,6,29,0,35,12,75,3,47,16,0,0,45,2,6,0,8,0 201 | Coffee & Tea,Caramel Iced Coffee (Small),16 fl oz cup,130,40,4.5,7,3,15,0,15,6,35,2,22,7,0,0,21,1,4,0,4,0 202 | Coffee & Tea,Caramel Iced Coffee (Medium),22 fl oz cup,180,60,7,11,4.5,22,0,25,9,50,2,29,10,0,0,28,1,4,0,4,0 203 | Coffee & Tea,Caramel Iced Coffee (Large),32 fl oz cup,260,80,9,14,6,29,0,35,12,65,3,43,14,0,0,42,2,6,0,6,0 204 | Coffee & Tea,Hazelnut Iced Coffee (Small),16 fl oz cup,130,40,4.5,7,3,15,0,15,6,35,1,21,7,0,0,20,1,4,0,4,0 205 | Coffee & Tea,Hazelnut Iced Coffee (Medium),22 fl oz cup,180,60,7,11,4.5,22,0,25,9,50,2,29,10,0,0,28,1,4,0,4,0 206 | Coffee & Tea,Hazelnut Iced Coffee (Large),32 fl oz cup,250,80,9,14,6,29,0,35,12,75,3,43,14,0,0,41,2,6,0,8,0 207 | Coffee & Tea,French Vanilla Iced Coffee (Small),16 fl oz cup,120,40,4.5,7,3,15,0,15,6,40,2,20,7,0,0,19,1,4,0,4,0 208 | Coffee & Tea,French Vanilla Iced Coffee (Medium),22 fl oz cup,170,60,7,11,4.5,22,0,25,9,55,2,27,9,0,0,26,1,4,0,4,0 209 | Coffee & Tea,French Vanilla Iced Coffee (Large),32 fl oz cup,240,80,9,14,6,29,0,35,12,80,3,41,14,0,0,39,2,6,0,8,0 210 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Small),16 fl oz cup,80,40,4.5,7,3,15,0,15,6,65,3,9,3,0,0,1,1,4,0,4,0 211 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Medium),22 fl oz cup,120,60,7,11,4.5,22,0,25,9,90,4,12,4,0,0,2,1,4,0,4,0 212 | Coffee & Tea,Iced Coffee with Sugar Free French Vanilla Syrup (Large),32 fl oz cup,160,80,9,14,6,29,0,35,12,135,6,18,6,0,0,2,2,6,0,8,0 213 | Coffee & Tea,Iced Mocha (Small),12 fl oz cup,290,100,11,17,7,33,0,35,12,125,5,41,14,1,4,34,8,10,0,25,4 214 | Coffee & Tea,Iced Mocha (Medium),16 fl oz cup,350,110,13,19,8,38,0,40,13,150,6,50,17,1,5,43,9,10,0,30,6 215 | Coffee & Tea,Iced Mocha (Large),22 fl oz cup,480,150,16,25,10,49,0.5,50,17,220,9,70,23,2,8,62,14,15,0,40,8 216 | Coffee & Tea,Iced Mocha with Nonfat Milk (Small),12 fl oz cup,240,45,5,8,3,16,0,20,7,125,5,41,14,1,4,35,8,10,0,25,4 217 | Coffee & Tea,Iced Mocha with Nonfat Milk (Medium),16 fl oz cup,290,45,5,8,3.5,17,0,20,7,150,6,50,17,1,5,43,10,15,0,30,6 218 | Coffee & Tea,Iced Mocha with Nonfat Milk (Large),22 fl oz cup,390,50,6,9,3.5,18,0,25,8,220,9,71,24,2,8,62,14,20,0,45,8 219 | Coffee & Tea,Iced Caramel Mocha (Small),12 fl oz cup,280,100,11,17,7,33,0,35,12,140,6,38,13,0,0,33,8,10,0,25,2 220 | Coffee & Tea,Iced Caramel Mocha (Medium),16 fl oz cup,340,110,13,19,7,37,0,40,13,170,7,46,15,1,2,41,9,10,0,30,2 221 | Coffee & Tea,Iced Caramel Mocha (Large),22 fl oz cup,460,150,16,25,10,48,0.5,50,17,250,10,65,22,1,3,59,13,15,0,40,4 222 | Coffee & Tea,Iced Nonfat Caramel Mocha (Small),12 fl oz cup,230,45,5,8,3,16,0,20,7,140,6,38,13,0,0,33,8,10,0,25,2 223 | Coffee & Tea,Iced Nonfat Caramel Mocha (Medium),16 fl oz cup,270,45,5,8,3,16,0,20,7,170,7,47,16,1,2,41,10,15,0,30,2 224 | Coffee & Tea,Iced Nonfat Caramel Mocha (Large),22 fl oz cup,370,50,6,8,3.5,17,0,25,8,250,10,65,22,1,3,59,14,20,0,45,4 225 | Coffee & Tea,Frappé Mocha (Small),12 fl oz cup,450,160,18,28,12,59,1,65,21,125,5,65,22,1,3,57,7,15,0,20,4 226 | Coffee & Tea,Frappé Mocha (Medium),16 fl oz cup,550,200,22,34,14,71,1,75,25,160,7,80,27,1,4,71,9,15,0,25,4 227 | Coffee & Tea,Frappé Mocha (Large),22 fl oz cup,670,240,26,41,17,85,1,90,30,190,8,98,33,1,4,88,11,20,0,35,4 228 | Coffee & Tea,Frappé Caramel (Small),12 fl oz cup,450,170,19,29,12,60,1,65,22,125,5,64,21,0,0,57,7,15,0,25,2 229 | Coffee & Tea,Frappé Caramel (Medium),16 fl oz cup,550,200,23,35,15,73,1,80,27,160,7,79,26,0,0,71,9,20,0,30,2 230 | Coffee & Tea,Frappé Caramel (Large),22 fl oz cup,670,250,27,42,17,87,1.5,95,32,190,8,96,32,0,0,88,11,20,0,35,2 231 | Coffee & Tea,Frappé Chocolate Chip (Small),12 fl oz cup,530,200,23,35,14,72,1,65,22,135,6,76,25,1,5,67,8,15,0,25,4 232 | Coffee & Tea,Frappé Chocolate Chip (Medium),16 fl oz cup,630,240,26,41,17,85,1,80,26,160,7,91,30,1,5,81,9,15,0,30,4 233 | Coffee & Tea,Frappé Chocolate Chip (Large),22 fl oz cup,760,280,31,48,20,101,1.5,95,32,200,8,111,37,1,5,99,12,20,0,35,6 234 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Small),12 fl oz cup,220,5,0.5,1,0,0,0,5,1,40,2,50,17,3,12,44,2,0,2,6,2 235 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Medium),16 fl oz cup,260,5,1,1,0,0,0,5,1,50,2,62,21,4,15,54,3,0,4,8,2 236 | Smoothies & Shakes,Blueberry Pomegranate Smoothie (Large),22 fl oz cup,340,10,1,2,0.5,3,0,5,2,65,3,79,26,5,19,70,4,0,4,10,2 237 | Smoothies & Shakes,Strawberry Banana Smoothie (Small),12 fl oz cup,210,5,0.5,1,0,0,0,5,1,50,2,47,16,3,10,44,3,0,30,8,2 238 | Smoothies & Shakes,Strawberry Banana Smoothie (Medium),16 fl oz cup,250,5,1,1,0,0,0,5,1,60,2,58,19,3,13,54,4,0,35,8,4 239 | Smoothies & Shakes,Strawberry Banana Smoothie (Large),22 fl oz cup,330,10,1,2,0.5,3,0,5,2,80,3,74,25,4,16,70,5,0,45,10,4 240 | Smoothies & Shakes,Mango Pineapple Smoothie (Small),12 fl oz cup,210,5,0.5,1,0,0,0,5,1,40,2,50,17,1,4,46,2,30,20,8,2 241 | Smoothies & Shakes,Mango Pineapple Smoothie (Medium),16 fl oz cup,260,10,1,1,0,0,0,5,1,45,2,61,20,1,5,56,3,40,25,8,2 242 | Smoothies & Shakes,Mango Pineapple Smoothie (Large),22 fl oz cup,340,10,1,2,0.5,3,0,5,2,60,3,78,26,2,6,72,4,50,30,10,2 243 | Smoothies & Shakes,Vanilla Shake (Small),12 fl oz cup,530,140,15,24,10,49,1,60,20,160,7,86,29,0,0,63,11,20,0,40,0 244 | Smoothies & Shakes,Vanilla Shake (Medium),16 fl oz cup,660,170,19,29,12,61,1,75,24,200,9,109,36,0,0,81,14,25,0,50,0 245 | Smoothies & Shakes,Vanilla Shake (Large),22 fl oz cup,820,210,23,35,15,73,1,90,29,260,11,135,45,0,0,101,18,30,0,60,0 246 | Smoothies & Shakes,Strawberry Shake (Small),12 fl oz cup,550,150,16,25,10,52,1,60,21,160,7,90,30,0,0,79,12,20,0,40,0 247 | Smoothies & Shakes,Strawberry Shake (Medium),16 fl oz cup,690,180,20,30,13,63,1,75,25,210,9,114,38,0,0,100,15,25,0,50,0 248 | Smoothies & Shakes,Strawberry Shake (Large),22 fl oz cup,850,210,24,36,15,75,1,90,30,260,11,140,47,0,0,123,18,30,0,70,0 249 | Smoothies & Shakes,Chocolate Shake (Small),12 fl oz cup,560,150,16,25,10,51,1,60,20,240,10,91,30,1,5,77,12,20,0,40,8 250 | Smoothies & Shakes,Chocolate Shake (Medium),16 fl oz cup,700,180,20,30,12,62,1,75,24,300,13,114,38,2,6,97,15,25,0,50,10 251 | Smoothies & Shakes,Chocolate Shake (Large),22 fl oz cup,850,210,23,36,15,74,1,85,29,380,16,141,47,2,8,120,19,30,0,60,15 252 | Smoothies & Shakes,Shamrock Shake (Medium),16 fl oz cup,660,170,19,29,12,61,1,75,24,210,9,109,36,0,0,93,14,25,0,50,0 253 | Smoothies & Shakes,Shamrock Shake (Large),22 fl oz cup,820,210,23,35,15,73,1,90,29,260,11,135,45,0,0,115,18,30,0,60,0 254 | Smoothies & Shakes,McFlurry with M&M’s Candies (Small),10.9 oz (310 g),650,210,23,35,14,72,0.5,50,17,180,7,96,32,1,6,89,13,15,0,45,8 255 | Smoothies & Shakes,McFlurry with M&M’s Candies (Medium),16.2 oz (460 g),930,290,33,50,20,102,1,75,25,260,11,139,46,2,7,128,20,25,0,70,10 256 | Smoothies & Shakes,McFlurry with M&M’s Candies (Snack),7.3 oz (207 g),430,140,15,24,10,48,0,35,11,120,5,64,21,1,4,59,9,10,0,30,4 257 | Smoothies & Shakes,McFlurry with Oreo Cookies (Small),10.1 oz (285 g),510,150,17,26,9,44,0.5,45,14,280,12,80,27,1,4,64,12,15,0,40,8 258 | Smoothies & Shakes,McFlurry with Oreo Cookies (Medium),13.4 oz (381 g),690,200,23,35,12,58,1,55,19,380,16,106,35,1,5,85,15,20,0,50,10 259 | Smoothies & Shakes,McFlurry with Oreo Cookies (Snack),6.7 oz (190 g),340,100,11,17,6,29,0,30,9,190,8,53,18,1,2,43,8,10,0,25,6 260 | Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,290,32,50,15,76,1,60,20,400,17,114,38,2,9,103,21,20,0,60,6 261 | Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Snack),7.1 oz (202 g),410,150,16,25,8,38,0,30,10,200,8,57,19,1,5,51,10,10,0,30,4 262 | --------------------------------------------------------------------------------